xmmintrin.h (_mm_add_ps, [...]): Use vector extensions instead of builtins.
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blob66f51999345a4a8a6097201d4d8629c5fa46b839
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 typedef unsigned char __mmask8;
56 typedef unsigned short __mmask16;
58 extern __inline __m512i
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm512_set_epi64 (long long __A, long long __B, long long __C,
61 long long __D, long long __E, long long __F,
62 long long __G, long long __H)
64 return __extension__ (__m512i) (__v8di)
65 { __H, __G, __F, __E, __D, __C, __B, __A };
68 /* Create the vector [A B C D E F G H I J K L M N O P]. */
69 extern __inline __m512i
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
72 int __E, int __F, int __G, int __H,
73 int __I, int __J, int __K, int __L,
74 int __M, int __N, int __O, int __P)
76 return __extension__ (__m512i)(__v16si)
77 { __P, __O, __N, __M, __L, __K, __J, __I,
78 __H, __G, __F, __E, __D, __C, __B, __A };
81 extern __inline __m512d
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _mm512_set_pd (double __A, double __B, double __C, double __D,
84 double __E, double __F, double __G, double __H)
86 return __extension__ (__m512d)
87 { __H, __G, __F, __E, __D, __C, __B, __A };
90 extern __inline __m512
91 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 _mm512_set_ps (float __A, float __B, float __C, float __D,
93 float __E, float __F, float __G, float __H,
94 float __I, float __J, float __K, float __L,
95 float __M, float __N, float __O, float __P)
97 return __extension__ (__m512)
98 { __P, __O, __N, __M, __L, __K, __J, __I,
99 __H, __G, __F, __E, __D, __C, __B, __A };
102 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
103 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
105 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
106 e8,e9,e10,e11,e12,e13,e14,e15) \
107 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
109 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
110 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
112 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
115 extern __inline __m512
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm512_undefined_ps (void)
119 __m512 __Y = __Y;
120 return __Y;
123 extern __inline __m512d
124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125 _mm512_undefined_pd (void)
127 __m512d __Y = __Y;
128 return __Y;
131 extern __inline __m512i
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_undefined_si512 (void)
135 __m512i __Y = __Y;
136 return __Y;
139 extern __inline __m512i
140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141 _mm512_set1_epi8 (char __A)
143 return __extension__ (__m512i)(__v64qi)
144 { __A, __A, __A, __A, __A, __A, __A, __A,
145 __A, __A, __A, __A, __A, __A, __A, __A,
146 __A, __A, __A, __A, __A, __A, __A, __A,
147 __A, __A, __A, __A, __A, __A, __A, __A,
148 __A, __A, __A, __A, __A, __A, __A, __A,
149 __A, __A, __A, __A, __A, __A, __A, __A,
150 __A, __A, __A, __A, __A, __A, __A, __A,
151 __A, __A, __A, __A, __A, __A, __A, __A };
154 extern __inline __m512i
155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156 _mm512_set1_epi16 (short __A)
158 return __extension__ (__m512i)(__v32hi)
159 { __A, __A, __A, __A, __A, __A, __A, __A,
160 __A, __A, __A, __A, __A, __A, __A, __A,
161 __A, __A, __A, __A, __A, __A, __A, __A,
162 __A, __A, __A, __A, __A, __A, __A, __A };
165 extern __inline __m512d
166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
167 _mm512_set1_pd (double __A)
169 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
170 (__v2df) { __A, },
171 (__v8df)
172 _mm512_undefined_pd (),
173 (__mmask8) -1);
176 extern __inline __m512
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm512_set1_ps (float __A)
180 return (__m512) __builtin_ia32_broadcastss512 (__extension__
181 (__v4sf) { __A, },
182 (__v16sf)
183 _mm512_undefined_ps (),
184 (__mmask16) -1);
187 /* Create the vector [A B C D A B C D A B C D A B C D]. */
188 extern __inline __m512i
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
192 return __extension__ (__m512i)(__v16si)
193 { __D, __C, __B, __A, __D, __C, __B, __A,
194 __D, __C, __B, __A, __D, __C, __B, __A };
197 extern __inline __m512i
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
200 long long __D)
202 return __extension__ (__m512i) (__v8di)
203 { __D, __C, __B, __A, __D, __C, __B, __A };
206 extern __inline __m512d
207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208 _mm512_set4_pd (double __A, double __B, double __C, double __D)
210 return __extension__ (__m512d)
211 { __D, __C, __B, __A, __D, __C, __B, __A };
214 extern __inline __m512
215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216 _mm512_set4_ps (float __A, float __B, float __C, float __D)
218 return __extension__ (__m512)
219 { __D, __C, __B, __A, __D, __C, __B, __A,
220 __D, __C, __B, __A, __D, __C, __B, __A };
223 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
224 _mm512_set4_epi64(e3,e2,e1,e0)
226 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
227 _mm512_set4_epi32(e3,e2,e1,e0)
229 #define _mm512_setr4_pd(e0,e1,e2,e3) \
230 _mm512_set4_pd(e3,e2,e1,e0)
232 #define _mm512_setr4_ps(e0,e1,e2,e3) \
233 _mm512_set4_ps(e3,e2,e1,e0)
235 extern __inline __m512
236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
237 _mm512_setzero_ps (void)
239 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
240 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
243 extern __inline __m512d
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245 _mm512_setzero_pd (void)
247 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
250 extern __inline __m512i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm512_setzero_epi32 (void)
254 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
257 extern __inline __m512i
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 _mm512_setzero_si512 (void)
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
264 extern __inline __m512d
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
268 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
269 (__v8df) __W,
270 (__mmask8) __U);
273 extern __inline __m512d
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
277 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
278 (__v8df)
279 _mm512_setzero_pd (),
280 (__mmask8) __U);
283 extern __inline __m512
284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
287 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
288 (__v16sf) __W,
289 (__mmask16) __U);
292 extern __inline __m512
293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
296 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
297 (__v16sf)
298 _mm512_setzero_ps (),
299 (__mmask16) __U);
302 extern __inline __m512d
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 _mm512_load_pd (void const *__P)
306 return *(__m512d *) __P;
309 extern __inline __m512d
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
313 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
314 (__v8df) __W,
315 (__mmask8) __U);
318 extern __inline __m512d
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
322 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
323 (__v8df)
324 _mm512_setzero_pd (),
325 (__mmask8) __U);
328 extern __inline void
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm512_store_pd (void *__P, __m512d __A)
332 *(__m512d *) __P = __A;
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
339 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
340 (__mmask8) __U);
343 extern __inline __m512
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm512_load_ps (void const *__P)
347 return *(__m512 *) __P;
350 extern __inline __m512
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
354 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
355 (__v16sf) __W,
356 (__mmask16) __U);
359 extern __inline __m512
360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
361 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
363 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
364 (__v16sf)
365 _mm512_setzero_ps (),
366 (__mmask16) __U);
369 extern __inline void
370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371 _mm512_store_ps (void *__P, __m512 __A)
373 *(__m512 *) __P = __A;
376 extern __inline void
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
380 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
381 (__mmask16) __U);
384 extern __inline __m512i
385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
388 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
389 (__v8di) __W,
390 (__mmask8) __U);
393 extern __inline __m512i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
397 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
398 (__v8di)
399 _mm512_setzero_si512 (),
400 (__mmask8) __U);
403 extern __inline __m512i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm512_load_epi64 (void const *__P)
407 return *(__m512i *) __P;
410 extern __inline __m512i
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
414 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
415 (__v8di) __W,
416 (__mmask8) __U);
419 extern __inline __m512i
420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
423 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
424 (__v8di)
425 _mm512_setzero_si512 (),
426 (__mmask8) __U);
429 extern __inline void
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm512_store_epi64 (void *__P, __m512i __A)
433 *(__m512i *) __P = __A;
436 extern __inline void
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
440 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
441 (__mmask8) __U);
444 extern __inline __m512i
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
448 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
449 (__v16si) __W,
450 (__mmask16) __U);
453 extern __inline __m512i
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
457 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
458 (__v16si)
459 _mm512_setzero_si512 (),
460 (__mmask16) __U);
463 extern __inline __m512i
464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465 _mm512_load_si512 (void const *__P)
467 return *(__m512i *) __P;
470 extern __inline __m512i
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm512_load_epi32 (void const *__P)
474 return *(__m512i *) __P;
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
481 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
482 (__v16si) __W,
483 (__mmask16) __U);
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
490 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
491 (__v16si)
492 _mm512_setzero_si512 (),
493 (__mmask16) __U);
496 extern __inline void
497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
498 _mm512_store_si512 (void *__P, __m512i __A)
500 *(__m512i *) __P = __A;
503 extern __inline void
504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505 _mm512_store_epi32 (void *__P, __m512i __A)
507 *(__m512i *) __P = __A;
510 extern __inline void
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
514 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
515 (__mmask16) __U);
518 extern __inline __m512i
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
522 return (__m512i) ((__v16su) __A * (__v16su) __B);
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530 (__v16si) __B,
531 (__v16si)
532 _mm512_setzero_si512 (),
533 __M);
536 extern __inline __m512i
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541 (__v16si) __B,
542 (__v16si) __W, __M);
545 extern __inline __m512i
546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550 (__v16si) __Y,
551 (__v16si)
552 _mm512_undefined_si512 (),
553 (__mmask16) -1);
556 extern __inline __m512i
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561 (__v16si) __Y,
562 (__v16si) __W,
563 (__mmask16) __U);
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571 (__v16si) __Y,
572 (__v16si)
573 _mm512_setzero_si512 (),
574 (__mmask16) __U);
577 extern __inline __m512i
578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582 (__v16si) __Y,
583 (__v16si)
584 _mm512_undefined_si512 (),
585 (__mmask16) -1);
588 extern __inline __m512i
589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593 (__v16si) __Y,
594 (__v16si) __W,
595 (__mmask16) __U);
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603 (__v16si) __Y,
604 (__v16si)
605 _mm512_setzero_si512 (),
606 (__mmask16) __U);
609 extern __inline __m512i
610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614 (__v16si) __Y,
615 (__v16si)
616 _mm512_undefined_si512 (),
617 (__mmask16) -1);
620 extern __inline __m512i
621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625 (__v16si) __Y,
626 (__v16si) __W,
627 (__mmask16) __U);
630 extern __inline __m512i
631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635 (__v16si) __Y,
636 (__v16si)
637 _mm512_setzero_si512 (),
638 (__mmask16) __U);
641 extern __inline __m512i
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm512_add_epi64 (__m512i __A, __m512i __B)
645 return (__m512i) ((__v8du) __A + (__v8du) __B);
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
652 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
653 (__v8di) __B,
654 (__v8di) __W,
655 (__mmask8) __U);
658 extern __inline __m512i
659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
662 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
663 (__v8di) __B,
664 (__v8di)
665 _mm512_setzero_si512 (),
666 (__mmask8) __U);
669 extern __inline __m512i
670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671 _mm512_sub_epi64 (__m512i __A, __m512i __B)
673 return (__m512i) ((__v8du) __A - (__v8du) __B);
676 extern __inline __m512i
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
680 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
681 (__v8di) __B,
682 (__v8di) __W,
683 (__mmask8) __U);
686 extern __inline __m512i
687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
690 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
691 (__v8di) __B,
692 (__v8di)
693 _mm512_setzero_si512 (),
694 (__mmask8) __U);
697 extern __inline __m512i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
701 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
702 (__v8di) __Y,
703 (__v8di)
704 _mm512_undefined_pd (),
705 (__mmask8) -1);
708 extern __inline __m512i
709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
712 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
713 (__v8di) __Y,
714 (__v8di) __W,
715 (__mmask8) __U);
718 extern __inline __m512i
719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
722 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
723 (__v8di) __Y,
724 (__v8di)
725 _mm512_setzero_si512 (),
726 (__mmask8) __U);
729 extern __inline __m512i
730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
733 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
734 (__v8di) __Y,
735 (__v8di)
736 _mm512_undefined_si512 (),
737 (__mmask8) -1);
740 extern __inline __m512i
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
744 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
745 (__v8di) __Y,
746 (__v8di) __W,
747 (__mmask8) __U);
750 extern __inline __m512i
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
754 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
755 (__v8di) __Y,
756 (__v8di)
757 _mm512_setzero_si512 (),
758 (__mmask8) __U);
761 extern __inline __m512i
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
765 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
766 (__v8di) __Y,
767 (__v8di)
768 _mm512_undefined_si512 (),
769 (__mmask8) -1);
772 extern __inline __m512i
773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
776 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
777 (__v8di) __Y,
778 (__v8di) __W,
779 (__mmask8) __U);
782 extern __inline __m512i
783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
786 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
787 (__v8di) __Y,
788 (__v8di)
789 _mm512_setzero_si512 (),
790 (__mmask8) __U);
793 extern __inline __m512i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm512_add_epi32 (__m512i __A, __m512i __B)
797 return (__m512i) ((__v16su) __A + (__v16su) __B);
800 extern __inline __m512i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
804 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
805 (__v16si) __B,
806 (__v16si) __W,
807 (__mmask16) __U);
810 extern __inline __m512i
811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
812 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
814 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
815 (__v16si) __B,
816 (__v16si)
817 _mm512_setzero_si512 (),
818 (__mmask16) __U);
821 extern __inline __m512i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
825 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
826 (__v16si) __Y,
827 (__v8di)
828 _mm512_undefined_si512 (),
829 (__mmask8) -1);
832 extern __inline __m512i
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
836 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
837 (__v16si) __Y,
838 (__v8di) __W, __M);
841 extern __inline __m512i
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
845 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
846 (__v16si) __Y,
847 (__v8di)
848 _mm512_setzero_si512 (),
849 __M);
852 extern __inline __m512i
853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854 _mm512_sub_epi32 (__m512i __A, __m512i __B)
856 return (__m512i) ((__v16su) __A - (__v16su) __B);
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
863 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
864 (__v16si) __B,
865 (__v16si) __W,
866 (__mmask16) __U);
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
873 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
874 (__v16si) __B,
875 (__v16si)
876 _mm512_setzero_si512 (),
877 (__mmask16) __U);
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
884 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
885 (__v16si) __Y,
886 (__v8di)
887 _mm512_undefined_si512 (),
888 (__mmask8) -1);
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
895 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
896 (__v16si) __Y,
897 (__v8di) __W, __M);
900 extern __inline __m512i
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
904 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
905 (__v16si) __Y,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 __M);
911 #ifdef __OPTIMIZE__
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
916 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
917 (__v8di)
918 _mm512_undefined_si512 (),
919 (__mmask8) -1);
922 extern __inline __m512i
923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
925 unsigned int __B)
927 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
928 (__v8di) __W,
929 (__mmask8) __U);
932 extern __inline __m512i
933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
936 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
937 (__v8di)
938 _mm512_setzero_si512 (),
939 (__mmask8) __U);
941 #else
942 #define _mm512_slli_epi64(X, C) \
943 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
944 (__v8di)(__m512i)_mm512_undefined_si512 (),\
945 (__mmask8)-1))
947 #define _mm512_mask_slli_epi64(W, U, X, C) \
948 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
949 (__v8di)(__m512i)(W),\
950 (__mmask8)(U)))
952 #define _mm512_maskz_slli_epi64(U, X, C) \
953 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
954 (__v8di)(__m512i)_mm512_setzero_si512 (),\
955 (__mmask8)(U)))
956 #endif
958 extern __inline __m512i
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm512_sll_epi64 (__m512i __A, __m128i __B)
962 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
963 (__v2di) __B,
964 (__v8di)
965 _mm512_undefined_si512 (),
966 (__mmask8) -1);
969 extern __inline __m512i
970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
973 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
974 (__v2di) __B,
975 (__v8di) __W,
976 (__mmask8) __U);
979 extern __inline __m512i
980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
983 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
984 (__v2di) __B,
985 (__v8di)
986 _mm512_setzero_si512 (),
987 (__mmask8) __U);
990 #ifdef __OPTIMIZE__
991 extern __inline __m512i
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
995 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
996 (__v8di)
997 _mm512_undefined_si512 (),
998 (__mmask8) -1);
1001 extern __inline __m512i
1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1004 __m512i __A, unsigned int __B)
1006 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1007 (__v8di) __W,
1008 (__mmask8) __U);
1011 extern __inline __m512i
1012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1015 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1016 (__v8di)
1017 _mm512_setzero_si512 (),
1018 (__mmask8) __U);
1020 #else
1021 #define _mm512_srli_epi64(X, C) \
1022 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1023 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1024 (__mmask8)-1))
1026 #define _mm512_mask_srli_epi64(W, U, X, C) \
1027 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1028 (__v8di)(__m512i)(W),\
1029 (__mmask8)(U)))
1031 #define _mm512_maskz_srli_epi64(U, X, C) \
1032 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1033 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1034 (__mmask8)(U)))
1035 #endif
1037 extern __inline __m512i
1038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1041 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1042 (__v2di) __B,
1043 (__v8di)
1044 _mm512_undefined_si512 (),
1045 (__mmask8) -1);
1048 extern __inline __m512i
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1052 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1053 (__v2di) __B,
1054 (__v8di) __W,
1055 (__mmask8) __U);
1058 extern __inline __m512i
1059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1062 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1063 (__v2di) __B,
1064 (__v8di)
1065 _mm512_setzero_si512 (),
1066 (__mmask8) __U);
1069 #ifdef __OPTIMIZE__
1070 extern __inline __m512i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1074 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1075 (__v8di)
1076 _mm512_undefined_si512 (),
1077 (__mmask8) -1);
1080 extern __inline __m512i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1083 unsigned int __B)
1085 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1086 (__v8di) __W,
1087 (__mmask8) __U);
1090 extern __inline __m512i
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1094 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1095 (__v8di)
1096 _mm512_setzero_si512 (),
1097 (__mmask8) __U);
1099 #else
1100 #define _mm512_srai_epi64(X, C) \
1101 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1102 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1103 (__mmask8)-1))
1105 #define _mm512_mask_srai_epi64(W, U, X, C) \
1106 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1107 (__v8di)(__m512i)(W),\
1108 (__mmask8)(U)))
1110 #define _mm512_maskz_srai_epi64(U, X, C) \
1111 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1112 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1113 (__mmask8)(U)))
1114 #endif
1116 extern __inline __m512i
1117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1120 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1121 (__v2di) __B,
1122 (__v8di)
1123 _mm512_undefined_si512 (),
1124 (__mmask8) -1);
1127 extern __inline __m512i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1131 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1132 (__v2di) __B,
1133 (__v8di) __W,
1134 (__mmask8) __U);
1137 extern __inline __m512i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1141 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1142 (__v2di) __B,
1143 (__v8di)
1144 _mm512_setzero_si512 (),
1145 (__mmask8) __U);
1148 #ifdef __OPTIMIZE__
1149 extern __inline __m512i
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1153 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1154 (__v16si)
1155 _mm512_undefined_si512 (),
1156 (__mmask16) -1);
1159 extern __inline __m512i
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1162 unsigned int __B)
1164 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1165 (__v16si) __W,
1166 (__mmask16) __U);
1169 extern __inline __m512i
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1173 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1174 (__v16si)
1175 _mm512_setzero_si512 (),
1176 (__mmask16) __U);
1178 #else
1179 #define _mm512_slli_epi32(X, C) \
1180 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1181 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1182 (__mmask16)-1))
1184 #define _mm512_mask_slli_epi32(W, U, X, C) \
1185 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1186 (__v16si)(__m512i)(W),\
1187 (__mmask16)(U)))
1189 #define _mm512_maskz_slli_epi32(U, X, C) \
1190 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1191 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1192 (__mmask16)(U)))
1193 #endif
1195 extern __inline __m512i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1199 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1200 (__v4si) __B,
1201 (__v16si)
1202 _mm512_undefined_si512 (),
1203 (__mmask16) -1);
1206 extern __inline __m512i
1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1210 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1211 (__v4si) __B,
1212 (__v16si) __W,
1213 (__mmask16) __U);
1216 extern __inline __m512i
1217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1220 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1221 (__v4si) __B,
1222 (__v16si)
1223 _mm512_setzero_si512 (),
1224 (__mmask16) __U);
1227 #ifdef __OPTIMIZE__
1228 extern __inline __m512i
1229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1230 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1232 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1233 (__v16si)
1234 _mm512_undefined_si512 (),
1235 (__mmask16) -1);
1238 extern __inline __m512i
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1241 __m512i __A, unsigned int __B)
1243 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1244 (__v16si) __W,
1245 (__mmask16) __U);
1248 extern __inline __m512i
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1252 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1253 (__v16si)
1254 _mm512_setzero_si512 (),
1255 (__mmask16) __U);
1257 #else
1258 #define _mm512_srli_epi32(X, C) \
1259 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1260 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1261 (__mmask16)-1))
1263 #define _mm512_mask_srli_epi32(W, U, X, C) \
1264 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1265 (__v16si)(__m512i)(W),\
1266 (__mmask16)(U)))
1268 #define _mm512_maskz_srli_epi32(U, X, C) \
1269 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1270 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1271 (__mmask16)(U)))
1272 #endif
1274 extern __inline __m512i
1275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1278 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1279 (__v4si) __B,
1280 (__v16si)
1281 _mm512_undefined_si512 (),
1282 (__mmask16) -1);
1285 extern __inline __m512i
1286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1289 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1290 (__v4si) __B,
1291 (__v16si) __W,
1292 (__mmask16) __U);
1295 extern __inline __m512i
1296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1299 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1300 (__v4si) __B,
1301 (__v16si)
1302 _mm512_setzero_si512 (),
1303 (__mmask16) __U);
1306 #ifdef __OPTIMIZE__
1307 extern __inline __m512i
1308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1311 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1312 (__v16si)
1313 _mm512_undefined_si512 (),
1314 (__mmask16) -1);
1317 extern __inline __m512i
1318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1320 unsigned int __B)
1322 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1323 (__v16si) __W,
1324 (__mmask16) __U);
1327 extern __inline __m512i
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1331 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1332 (__v16si)
1333 _mm512_setzero_si512 (),
1334 (__mmask16) __U);
1336 #else
1337 #define _mm512_srai_epi32(X, C) \
1338 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1339 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1340 (__mmask16)-1))
1342 #define _mm512_mask_srai_epi32(W, U, X, C) \
1343 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1344 (__v16si)(__m512i)(W),\
1345 (__mmask16)(U)))
1347 #define _mm512_maskz_srai_epi32(U, X, C) \
1348 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1349 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1350 (__mmask16)(U)))
1351 #endif
1353 extern __inline __m512i
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1357 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1358 (__v4si) __B,
1359 (__v16si)
1360 _mm512_undefined_si512 (),
1361 (__mmask16) -1);
1364 extern __inline __m512i
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1368 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1369 (__v4si) __B,
1370 (__v16si) __W,
1371 (__mmask16) __U);
1374 extern __inline __m512i
1375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1378 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1379 (__v4si) __B,
1380 (__v16si)
1381 _mm512_setzero_si512 (),
1382 (__mmask16) __U);
1385 #ifdef __OPTIMIZE__
1386 extern __inline __m128d
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1390 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1391 (__v2df) __B,
1392 __R);
1395 extern __inline __m128
1396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1399 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1400 (__v4sf) __B,
1401 __R);
1404 extern __inline __m128d
1405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1408 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1409 (__v2df) __B,
1410 __R);
1413 extern __inline __m128
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1417 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1418 (__v4sf) __B,
1419 __R);
1422 #else
1423 #define _mm_add_round_sd(A, B, C) \
1424 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1426 #define _mm_add_round_ss(A, B, C) \
1427 (__m128)__builtin_ia32_addss_round(A, B, C)
1429 #define _mm_sub_round_sd(A, B, C) \
1430 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1432 #define _mm_sub_round_ss(A, B, C) \
1433 (__m128)__builtin_ia32_subss_round(A, B, C)
1434 #endif
1436 #ifdef __OPTIMIZE__
1437 extern __inline __m512i
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1441 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1442 (__v8di) __B,
1443 (__v8di) __C, imm,
1444 (__mmask8) -1);
1447 extern __inline __m512i
1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1450 __m512i __C, const int imm)
1452 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1453 (__v8di) __B,
1454 (__v8di) __C, imm,
1455 (__mmask8) __U);
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1461 __m512i __C, const int imm)
1463 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1464 (__v8di) __B,
1465 (__v8di) __C,
1466 imm, (__mmask8) __U);
1469 extern __inline __m512i
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1473 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1474 (__v16si) __B,
1475 (__v16si) __C,
1476 imm, (__mmask16) -1);
1479 extern __inline __m512i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1482 __m512i __C, const int imm)
1484 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1485 (__v16si) __B,
1486 (__v16si) __C,
1487 imm, (__mmask16) __U);
1490 extern __inline __m512i
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1493 __m512i __C, const int imm)
1495 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1496 (__v16si) __B,
1497 (__v16si) __C,
1498 imm, (__mmask16) __U);
1500 #else
1501 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1502 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1503 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1504 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1505 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1506 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1507 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1508 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1509 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1510 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1512 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1513 (__mmask16)-1))
1514 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1515 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1516 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1517 (__mmask16)(U)))
1518 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1519 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1520 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1521 (__mmask16)(U)))
1522 #endif
1524 extern __inline __m512d
1525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526 _mm512_rcp14_pd (__m512d __A)
1528 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1529 (__v8df)
1530 _mm512_undefined_pd (),
1531 (__mmask8) -1);
1534 extern __inline __m512d
1535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1538 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1539 (__v8df) __W,
1540 (__mmask8) __U);
1543 extern __inline __m512d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df)
1549 _mm512_setzero_pd (),
1550 (__mmask8) __U);
1553 extern __inline __m512
1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm512_rcp14_ps (__m512 __A)
1557 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1558 (__v16sf)
1559 _mm512_undefined_ps (),
1560 (__mmask16) -1);
1563 extern __inline __m512
1564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1567 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1568 (__v16sf) __W,
1569 (__mmask16) __U);
1572 extern __inline __m512
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf)
1578 _mm512_setzero_ps (),
1579 (__mmask16) __U);
1582 extern __inline __m128d
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm_rcp14_sd (__m128d __A, __m128d __B)
1586 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1587 (__v2df) __A);
1590 extern __inline __m128
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm_rcp14_ss (__m128 __A, __m128 __B)
1594 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1595 (__v4sf) __A);
1598 extern __inline __m512d
1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 _mm512_rsqrt14_pd (__m512d __A)
1602 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1603 (__v8df)
1604 _mm512_undefined_pd (),
1605 (__mmask8) -1);
1608 extern __inline __m512d
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1612 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1613 (__v8df) __W,
1614 (__mmask8) __U);
1617 extern __inline __m512d
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df)
1623 _mm512_setzero_pd (),
1624 (__mmask8) __U);
1627 extern __inline __m512
1628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629 _mm512_rsqrt14_ps (__m512 __A)
1631 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1632 (__v16sf)
1633 _mm512_undefined_ps (),
1634 (__mmask16) -1);
1637 extern __inline __m512
1638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1641 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1642 (__v16sf) __W,
1643 (__mmask16) __U);
1646 extern __inline __m512
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf)
1652 _mm512_setzero_ps (),
1653 (__mmask16) __U);
1656 extern __inline __m128d
1657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1660 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1661 (__v2df) __A);
1664 extern __inline __m128
1665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1668 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1669 (__v4sf) __A);
1672 #ifdef __OPTIMIZE__
1673 extern __inline __m512d
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1677 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1678 (__v8df)
1679 _mm512_undefined_pd (),
1680 (__mmask8) -1, __R);
1683 extern __inline __m512d
1684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1686 const int __R)
1688 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1689 (__v8df) __W,
1690 (__mmask8) __U, __R);
1693 extern __inline __m512d
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df)
1699 _mm512_setzero_pd (),
1700 (__mmask8) __U, __R);
1703 extern __inline __m512
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1707 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1708 (__v16sf)
1709 _mm512_undefined_ps (),
1710 (__mmask16) -1, __R);
1713 extern __inline __m512
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1717 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1718 (__v16sf) __W,
1719 (__mmask16) __U, __R);
1722 extern __inline __m512
1723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf)
1728 _mm512_setzero_ps (),
1729 (__mmask16) __U, __R);
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1736 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1737 (__v2df) __A,
1738 __R);
1741 extern __inline __m128
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1745 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1746 (__v4sf) __A,
1747 __R);
1749 #else
1750 #define _mm512_sqrt_round_pd(A, C) \
1751 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1753 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1754 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1756 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1757 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1759 #define _mm512_sqrt_round_ps(A, C) \
1760 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1762 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1763 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1765 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1766 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1768 #define _mm_sqrt_round_sd(A, B, C) \
1769 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1771 #define _mm_sqrt_round_ss(A, B, C) \
1772 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1773 #endif
1775 extern __inline __m512i
1776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777 _mm512_cvtepi8_epi32 (__m128i __A)
1779 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1780 (__v16si)
1781 _mm512_undefined_si512 (),
1782 (__mmask16) -1);
1785 extern __inline __m512i
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1789 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1790 (__v16si) __W,
1791 (__mmask16) __U);
1794 extern __inline __m512i
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si)
1800 _mm512_setzero_si512 (),
1801 (__mmask16) __U);
1804 extern __inline __m512i
1805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806 _mm512_cvtepi8_epi64 (__m128i __A)
1808 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1809 (__v8di)
1810 _mm512_undefined_si512 (),
1811 (__mmask8) -1);
1814 extern __inline __m512i
1815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1816 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1818 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1819 (__v8di) __W,
1820 (__mmask8) __U);
1823 extern __inline __m512i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di)
1829 _mm512_setzero_si512 (),
1830 (__mmask8) __U);
1833 extern __inline __m512i
1834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835 _mm512_cvtepi16_epi32 (__m256i __A)
1837 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1838 (__v16si)
1839 _mm512_undefined_si512 (),
1840 (__mmask16) -1);
1843 extern __inline __m512i
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1847 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1848 (__v16si) __W,
1849 (__mmask16) __U);
1852 extern __inline __m512i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si)
1858 _mm512_setzero_si512 (),
1859 (__mmask16) __U);
1862 extern __inline __m512i
1863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864 _mm512_cvtepi16_epi64 (__m128i __A)
1866 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1867 (__v8di)
1868 _mm512_undefined_si512 (),
1869 (__mmask8) -1);
1872 extern __inline __m512i
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1876 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1877 (__v8di) __W,
1878 (__mmask8) __U);
1881 extern __inline __m512i
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di)
1887 _mm512_setzero_si512 (),
1888 (__mmask8) __U);
1891 extern __inline __m512i
1892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893 _mm512_cvtepi32_epi64 (__m256i __X)
1895 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1896 (__v8di)
1897 _mm512_undefined_si512 (),
1898 (__mmask8) -1);
1901 extern __inline __m512i
1902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1905 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1906 (__v8di) __W,
1907 (__mmask8) __U);
1910 extern __inline __m512i
1911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di)
1916 _mm512_setzero_si512 (),
1917 (__mmask8) __U);
1920 extern __inline __m512i
1921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1922 _mm512_cvtepu8_epi32 (__m128i __A)
1924 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1925 (__v16si)
1926 _mm512_undefined_si512 (),
1927 (__mmask16) -1);
1930 extern __inline __m512i
1931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1934 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1935 (__v16si) __W,
1936 (__mmask16) __U);
1939 extern __inline __m512i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si)
1945 _mm512_setzero_si512 (),
1946 (__mmask16) __U);
1949 extern __inline __m512i
1950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951 _mm512_cvtepu8_epi64 (__m128i __A)
1953 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1954 (__v8di)
1955 _mm512_undefined_si512 (),
1956 (__mmask8) -1);
1959 extern __inline __m512i
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1963 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1964 (__v8di) __W,
1965 (__mmask8) __U);
1968 extern __inline __m512i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di)
1974 _mm512_setzero_si512 (),
1975 (__mmask8) __U);
1978 extern __inline __m512i
1979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1980 _mm512_cvtepu16_epi32 (__m256i __A)
1982 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1983 (__v16si)
1984 _mm512_undefined_si512 (),
1985 (__mmask16) -1);
1988 extern __inline __m512i
1989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1990 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1992 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1993 (__v16si) __W,
1994 (__mmask16) __U);
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si)
2003 _mm512_setzero_si512 (),
2004 (__mmask16) __U);
2007 extern __inline __m512i
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_cvtepu16_epi64 (__m128i __A)
2011 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2012 (__v8di)
2013 _mm512_undefined_si512 (),
2014 (__mmask8) -1);
2017 extern __inline __m512i
2018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2021 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2022 (__v8di) __W,
2023 (__mmask8) __U);
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di)
2032 _mm512_setzero_si512 (),
2033 (__mmask8) __U);
2036 extern __inline __m512i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm512_cvtepu32_epi64 (__m256i __X)
2040 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2041 (__v8di)
2042 _mm512_undefined_si512 (),
2043 (__mmask8) -1);
2046 extern __inline __m512i
2047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2050 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2051 (__v8di) __W,
2052 (__mmask8) __U);
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di)
2061 _mm512_setzero_si512 (),
2062 (__mmask8) __U);
2065 #ifdef __OPTIMIZE__
2066 extern __inline __m512d
2067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2070 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2071 (__v8df) __B,
2072 (__v8df)
2073 _mm512_undefined_pd (),
2074 (__mmask8) -1, __R);
2077 extern __inline __m512d
2078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2079 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2080 __m512d __B, const int __R)
2082 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2083 (__v8df) __B,
2084 (__v8df) __W,
2085 (__mmask8) __U, __R);
2088 extern __inline __m512d
2089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2091 const int __R)
2093 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2094 (__v8df) __B,
2095 (__v8df)
2096 _mm512_setzero_pd (),
2097 (__mmask8) __U, __R);
2100 extern __inline __m512
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2104 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2105 (__v16sf) __B,
2106 (__v16sf)
2107 _mm512_undefined_ps (),
2108 (__mmask16) -1, __R);
2111 extern __inline __m512
2112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2114 __m512 __B, const int __R)
2116 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2117 (__v16sf) __B,
2118 (__v16sf) __W,
2119 (__mmask16) __U, __R);
2122 extern __inline __m512
2123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2126 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2127 (__v16sf) __B,
2128 (__v16sf)
2129 _mm512_setzero_ps (),
2130 (__mmask16) __U, __R);
2133 extern __inline __m512d
2134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2137 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2138 (__v8df) __B,
2139 (__v8df)
2140 _mm512_undefined_pd (),
2141 (__mmask8) -1, __R);
2144 extern __inline __m512d
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147 __m512d __B, const int __R)
2149 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2150 (__v8df) __B,
2151 (__v8df) __W,
2152 (__mmask8) __U, __R);
2155 extern __inline __m512d
2156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158 const int __R)
2160 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df)
2163 _mm512_setzero_pd (),
2164 (__mmask8) __U, __R);
2167 extern __inline __m512
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2171 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2172 (__v16sf) __B,
2173 (__v16sf)
2174 _mm512_undefined_ps (),
2175 (__mmask16) -1, __R);
2178 extern __inline __m512
2179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181 __m512 __B, const int __R)
2183 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2184 (__v16sf) __B,
2185 (__v16sf) __W,
2186 (__mmask16) __U, __R);
2189 extern __inline __m512
2190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2193 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2194 (__v16sf) __B,
2195 (__v16sf)
2196 _mm512_setzero_ps (),
2197 (__mmask16) __U, __R);
2199 #else
2200 #define _mm512_add_round_pd(A, B, C) \
2201 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2203 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2204 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2206 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2207 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2209 #define _mm512_add_round_ps(A, B, C) \
2210 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2212 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2213 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2215 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2216 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2218 #define _mm512_sub_round_pd(A, B, C) \
2219 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2221 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2222 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2224 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2225 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2227 #define _mm512_sub_round_ps(A, B, C) \
2228 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2230 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2231 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2233 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2234 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2235 #endif
2237 #ifdef __OPTIMIZE__
2238 extern __inline __m512d
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2242 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2243 (__v8df) __B,
2244 (__v8df)
2245 _mm512_undefined_pd (),
2246 (__mmask8) -1, __R);
2249 extern __inline __m512d
2250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2252 __m512d __B, const int __R)
2254 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2255 (__v8df) __B,
2256 (__v8df) __W,
2257 (__mmask8) __U, __R);
2260 extern __inline __m512d
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2263 const int __R)
2265 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2266 (__v8df) __B,
2267 (__v8df)
2268 _mm512_setzero_pd (),
2269 (__mmask8) __U, __R);
2272 extern __inline __m512
2273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2276 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2277 (__v16sf) __B,
2278 (__v16sf)
2279 _mm512_undefined_ps (),
2280 (__mmask16) -1, __R);
2283 extern __inline __m512
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2286 __m512 __B, const int __R)
2288 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2289 (__v16sf) __B,
2290 (__v16sf) __W,
2291 (__mmask16) __U, __R);
2294 extern __inline __m512
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2298 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2299 (__v16sf) __B,
2300 (__v16sf)
2301 _mm512_setzero_ps (),
2302 (__mmask16) __U, __R);
2305 extern __inline __m512d
2306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2309 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2310 (__v8df) __V,
2311 (__v8df)
2312 _mm512_undefined_pd (),
2313 (__mmask8) -1, __R);
2316 extern __inline __m512d
2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2319 __m512d __V, const int __R)
2321 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2322 (__v8df) __V,
2323 (__v8df) __W,
2324 (__mmask8) __U, __R);
2327 extern __inline __m512d
2328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2330 const int __R)
2332 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2333 (__v8df) __V,
2334 (__v8df)
2335 _mm512_setzero_pd (),
2336 (__mmask8) __U, __R);
2339 extern __inline __m512
2340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2343 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2344 (__v16sf) __B,
2345 (__v16sf)
2346 _mm512_undefined_ps (),
2347 (__mmask16) -1, __R);
2350 extern __inline __m512
2351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2353 __m512 __B, const int __R)
2355 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2356 (__v16sf) __B,
2357 (__v16sf) __W,
2358 (__mmask16) __U, __R);
2361 extern __inline __m512
2362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2365 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2366 (__v16sf) __B,
2367 (__v16sf)
2368 _mm512_setzero_ps (),
2369 (__mmask16) __U, __R);
2372 extern __inline __m128d
2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2376 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2377 (__v2df) __B,
2378 __R);
2381 extern __inline __m128
2382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2385 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2386 (__v4sf) __B,
2387 __R);
2390 extern __inline __m128d
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2394 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2395 (__v2df) __B,
2396 __R);
2399 extern __inline __m128
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2403 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2404 (__v4sf) __B,
2405 __R);
2408 #else
2409 #define _mm512_mul_round_pd(A, B, C) \
2410 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2412 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2413 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2415 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2416 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2418 #define _mm512_mul_round_ps(A, B, C) \
2419 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2421 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2422 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2424 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2425 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2427 #define _mm512_div_round_pd(A, B, C) \
2428 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2430 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2431 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2433 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2434 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2436 #define _mm512_div_round_ps(A, B, C) \
2437 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2439 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2440 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2442 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2443 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2445 #define _mm_mul_round_sd(A, B, C) \
2446 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2448 #define _mm_mul_round_ss(A, B, C) \
2449 (__m128)__builtin_ia32_mulss_round(A, B, C)
2451 #define _mm_div_round_sd(A, B, C) \
2452 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2454 #define _mm_div_round_ss(A, B, C) \
2455 (__m128)__builtin_ia32_divss_round(A, B, C)
2456 #endif
2458 #ifdef __OPTIMIZE__
2459 extern __inline __m512d
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2463 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2464 (__v8df) __B,
2465 (__v8df)
2466 _mm512_undefined_pd (),
2467 (__mmask8) -1, __R);
2470 extern __inline __m512d
2471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2472 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2473 __m512d __B, const int __R)
2475 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2476 (__v8df) __B,
2477 (__v8df) __W,
2478 (__mmask8) __U, __R);
2481 extern __inline __m512d
2482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2484 const int __R)
2486 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2487 (__v8df) __B,
2488 (__v8df)
2489 _mm512_setzero_pd (),
2490 (__mmask8) __U, __R);
2493 extern __inline __m512
2494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2497 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2498 (__v16sf) __B,
2499 (__v16sf)
2500 _mm512_undefined_ps (),
2501 (__mmask16) -1, __R);
2504 extern __inline __m512
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2507 __m512 __B, const int __R)
2509 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2510 (__v16sf) __B,
2511 (__v16sf) __W,
2512 (__mmask16) __U, __R);
2515 extern __inline __m512
2516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2517 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2519 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2520 (__v16sf) __B,
2521 (__v16sf)
2522 _mm512_setzero_ps (),
2523 (__mmask16) __U, __R);
2526 extern __inline __m512d
2527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2530 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2531 (__v8df) __B,
2532 (__v8df)
2533 _mm512_undefined_pd (),
2534 (__mmask8) -1, __R);
2537 extern __inline __m512d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540 __m512d __B, const int __R)
2542 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2543 (__v8df) __B,
2544 (__v8df) __W,
2545 (__mmask8) __U, __R);
2548 extern __inline __m512d
2549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551 const int __R)
2553 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2554 (__v8df) __B,
2555 (__v8df)
2556 _mm512_setzero_pd (),
2557 (__mmask8) __U, __R);
2560 extern __inline __m512
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2564 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2565 (__v16sf) __B,
2566 (__v16sf)
2567 _mm512_undefined_ps (),
2568 (__mmask16) -1, __R);
2571 extern __inline __m512
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574 __m512 __B, const int __R)
2576 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2577 (__v16sf) __B,
2578 (__v16sf) __W,
2579 (__mmask16) __U, __R);
2582 extern __inline __m512
2583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2586 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2587 (__v16sf) __B,
2588 (__v16sf)
2589 _mm512_setzero_ps (),
2590 (__mmask16) __U, __R);
2592 #else
2593 #define _mm512_max_round_pd(A, B, R) \
2594 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2596 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2597 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2599 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2600 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2602 #define _mm512_max_round_ps(A, B, R) \
2603 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2605 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2606 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2608 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2609 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2611 #define _mm512_min_round_pd(A, B, R) \
2612 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2614 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2615 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2617 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2618 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2620 #define _mm512_min_round_ps(A, B, R) \
2621 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2623 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2624 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2626 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2627 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2628 #endif
2630 #ifdef __OPTIMIZE__
2631 extern __inline __m512d
2632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2635 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2636 (__v8df) __B,
2637 (__v8df)
2638 _mm512_undefined_pd (),
2639 (__mmask8) -1, __R);
2642 extern __inline __m512d
2643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2645 __m512d __B, const int __R)
2647 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2648 (__v8df) __B,
2649 (__v8df) __W,
2650 (__mmask8) __U, __R);
2653 extern __inline __m512d
2654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2656 const int __R)
2658 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2659 (__v8df) __B,
2660 (__v8df)
2661 _mm512_setzero_pd (),
2662 (__mmask8) __U, __R);
2665 extern __inline __m512
2666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2669 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2670 (__v16sf) __B,
2671 (__v16sf)
2672 _mm512_undefined_ps (),
2673 (__mmask16) -1, __R);
2676 extern __inline __m512
2677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2679 __m512 __B, const int __R)
2681 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2682 (__v16sf) __B,
2683 (__v16sf) __W,
2684 (__mmask16) __U, __R);
2687 extern __inline __m512
2688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2689 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2690 const int __R)
2692 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2693 (__v16sf) __B,
2694 (__v16sf)
2695 _mm512_setzero_ps (),
2696 (__mmask16) __U, __R);
2699 extern __inline __m128d
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2703 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2704 (__v2df) __B,
2705 __R);
2708 extern __inline __m128
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2712 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2713 (__v4sf) __B,
2714 __R);
2716 #else
2717 #define _mm512_scalef_round_pd(A, B, C) \
2718 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2720 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2721 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2723 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2724 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2726 #define _mm512_scalef_round_ps(A, B, C) \
2727 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2729 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2730 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2732 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2733 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2735 #define _mm_scalef_round_sd(A, B, C) \
2736 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2738 #define _mm_scalef_round_ss(A, B, C) \
2739 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2740 #endif
2742 #ifdef __OPTIMIZE__
2743 extern __inline __m512d
2744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2747 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2748 (__v8df) __B,
2749 (__v8df) __C,
2750 (__mmask8) -1, __R);
2753 extern __inline __m512d
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2756 __m512d __C, const int __R)
2758 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759 (__v8df) __B,
2760 (__v8df) __C,
2761 (__mmask8) __U, __R);
2764 extern __inline __m512d
2765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2767 __mmask8 __U, const int __R)
2769 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2770 (__v8df) __B,
2771 (__v8df) __C,
2772 (__mmask8) __U, __R);
2775 extern __inline __m512d
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2778 __m512d __C, const int __R)
2780 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2781 (__v8df) __B,
2782 (__v8df) __C,
2783 (__mmask8) __U, __R);
2786 extern __inline __m512
2787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2790 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791 (__v16sf) __B,
2792 (__v16sf) __C,
2793 (__mmask16) -1, __R);
2796 extern __inline __m512
2797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2798 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2799 __m512 __C, const int __R)
2801 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2802 (__v16sf) __B,
2803 (__v16sf) __C,
2804 (__mmask16) __U, __R);
2807 extern __inline __m512
2808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2810 __mmask16 __U, const int __R)
2812 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2813 (__v16sf) __B,
2814 (__v16sf) __C,
2815 (__mmask16) __U, __R);
2818 extern __inline __m512
2819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2821 __m512 __C, const int __R)
2823 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2824 (__v16sf) __B,
2825 (__v16sf) __C,
2826 (__mmask16) __U, __R);
2829 extern __inline __m512d
2830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2833 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2834 (__v8df) __B,
2835 -(__v8df) __C,
2836 (__mmask8) -1, __R);
2839 extern __inline __m512d
2840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2842 __m512d __C, const int __R)
2844 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2845 (__v8df) __B,
2846 -(__v8df) __C,
2847 (__mmask8) __U, __R);
2850 extern __inline __m512d
2851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2853 __mmask8 __U, const int __R)
2855 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2856 (__v8df) __B,
2857 (__v8df) __C,
2858 (__mmask8) __U, __R);
2861 extern __inline __m512d
2862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2864 __m512d __C, const int __R)
2866 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2867 (__v8df) __B,
2868 -(__v8df) __C,
2869 (__mmask8) __U, __R);
2872 extern __inline __m512
2873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2874 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2876 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2877 (__v16sf) __B,
2878 -(__v16sf) __C,
2879 (__mmask16) -1, __R);
2882 extern __inline __m512
2883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2884 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2885 __m512 __C, const int __R)
2887 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2888 (__v16sf) __B,
2889 -(__v16sf) __C,
2890 (__mmask16) __U, __R);
2893 extern __inline __m512
2894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2896 __mmask16 __U, const int __R)
2898 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2899 (__v16sf) __B,
2900 (__v16sf) __C,
2901 (__mmask16) __U, __R);
2904 extern __inline __m512
2905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2907 __m512 __C, const int __R)
2909 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2910 (__v16sf) __B,
2911 -(__v16sf) __C,
2912 (__mmask16) __U, __R);
2915 extern __inline __m512d
2916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2919 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920 (__v8df) __B,
2921 (__v8df) __C,
2922 (__mmask8) -1, __R);
2925 extern __inline __m512d
2926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2928 __m512d __C, const int __R)
2930 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931 (__v8df) __B,
2932 (__v8df) __C,
2933 (__mmask8) __U, __R);
2936 extern __inline __m512d
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2939 __mmask8 __U, const int __R)
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2942 (__v8df) __B,
2943 (__v8df) __C,
2944 (__mmask8) __U, __R);
2947 extern __inline __m512d
2948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2950 __m512d __C, const int __R)
2952 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2953 (__v8df) __B,
2954 (__v8df) __C,
2955 (__mmask8) __U, __R);
2958 extern __inline __m512
2959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2962 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2963 (__v16sf) __B,
2964 (__v16sf) __C,
2965 (__mmask16) -1, __R);
2968 extern __inline __m512
2969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2971 __m512 __C, const int __R)
2973 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2974 (__v16sf) __B,
2975 (__v16sf) __C,
2976 (__mmask16) __U, __R);
2979 extern __inline __m512
2980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2982 __mmask16 __U, const int __R)
2984 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2985 (__v16sf) __B,
2986 (__v16sf) __C,
2987 (__mmask16) __U, __R);
2990 extern __inline __m512
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2993 __m512 __C, const int __R)
2995 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2996 (__v16sf) __B,
2997 (__v16sf) __C,
2998 (__mmask16) __U, __R);
3001 extern __inline __m512d
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3005 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3006 (__v8df) __B,
3007 -(__v8df) __C,
3008 (__mmask8) -1, __R);
3011 extern __inline __m512d
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3014 __m512d __C, const int __R)
3016 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 -(__v8df) __C,
3019 (__mmask8) __U, __R);
3022 extern __inline __m512d
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3025 __mmask8 __U, const int __R)
3027 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df) __C,
3030 (__mmask8) __U, __R);
3033 extern __inline __m512d
3034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3036 __m512d __C, const int __R)
3038 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3039 (__v8df) __B,
3040 -(__v8df) __C,
3041 (__mmask8) __U, __R);
3044 extern __inline __m512
3045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3048 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049 (__v16sf) __B,
3050 -(__v16sf) __C,
3051 (__mmask16) -1, __R);
3054 extern __inline __m512
3055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3057 __m512 __C, const int __R)
3059 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060 (__v16sf) __B,
3061 -(__v16sf) __C,
3062 (__mmask16) __U, __R);
3065 extern __inline __m512
3066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3068 __mmask16 __U, const int __R)
3070 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3071 (__v16sf) __B,
3072 (__v16sf) __C,
3073 (__mmask16) __U, __R);
3076 extern __inline __m512
3077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3079 __m512 __C, const int __R)
3081 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082 (__v16sf) __B,
3083 -(__v16sf) __C,
3084 (__mmask16) __U, __R);
3087 extern __inline __m512d
3088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3091 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3092 (__v8df) __B,
3093 (__v8df) __C,
3094 (__mmask8) -1, __R);
3097 extern __inline __m512d
3098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3100 __m512d __C, const int __R)
3102 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3103 (__v8df) __B,
3104 (__v8df) __C,
3105 (__mmask8) __U, __R);
3108 extern __inline __m512d
3109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3111 __mmask8 __U, const int __R)
3113 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3114 (__v8df) __B,
3115 (__v8df) __C,
3116 (__mmask8) __U, __R);
3119 extern __inline __m512d
3120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3121 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3122 __m512d __C, const int __R)
3124 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3125 (__v8df) __B,
3126 (__v8df) __C,
3127 (__mmask8) __U, __R);
3130 extern __inline __m512
3131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3134 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3135 (__v16sf) __B,
3136 (__v16sf) __C,
3137 (__mmask16) -1, __R);
3140 extern __inline __m512
3141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3143 __m512 __C, const int __R)
3145 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3146 (__v16sf) __B,
3147 (__v16sf) __C,
3148 (__mmask16) __U, __R);
3151 extern __inline __m512
3152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3154 __mmask16 __U, const int __R)
3156 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3157 (__v16sf) __B,
3158 (__v16sf) __C,
3159 (__mmask16) __U, __R);
3162 extern __inline __m512
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3165 __m512 __C, const int __R)
3167 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3168 (__v16sf) __B,
3169 (__v16sf) __C,
3170 (__mmask16) __U, __R);
3173 extern __inline __m512d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3177 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3178 (__v8df) __B,
3179 -(__v8df) __C,
3180 (__mmask8) -1, __R);
3183 extern __inline __m512d
3184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3186 __m512d __C, const int __R)
3188 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3189 (__v8df) __B,
3190 (__v8df) __C,
3191 (__mmask8) __U, __R);
3194 extern __inline __m512d
3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3197 __mmask8 __U, const int __R)
3199 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3200 (__v8df) __B,
3201 (__v8df) __C,
3202 (__mmask8) __U, __R);
3205 extern __inline __m512d
3206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3208 __m512d __C, const int __R)
3210 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3211 (__v8df) __B,
3212 -(__v8df) __C,
3213 (__mmask8) __U, __R);
3216 extern __inline __m512
3217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3220 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3221 (__v16sf) __B,
3222 -(__v16sf) __C,
3223 (__mmask16) -1, __R);
3226 extern __inline __m512
3227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3229 __m512 __C, const int __R)
3231 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3232 (__v16sf) __B,
3233 (__v16sf) __C,
3234 (__mmask16) __U, __R);
3237 extern __inline __m512
3238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3240 __mmask16 __U, const int __R)
3242 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3243 (__v16sf) __B,
3244 (__v16sf) __C,
3245 (__mmask16) __U, __R);
3248 extern __inline __m512
3249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3251 __m512 __C, const int __R)
3253 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3254 (__v16sf) __B,
3255 -(__v16sf) __C,
3256 (__mmask16) __U, __R);
3258 #else
3259 #define _mm512_fmadd_round_pd(A, B, C, R) \
3260 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3262 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3263 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3265 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3266 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3268 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3271 #define _mm512_fmadd_round_ps(A, B, C, R) \
3272 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3274 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3275 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3277 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3278 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3280 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3283 #define _mm512_fmsub_round_pd(A, B, C, R) \
3284 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3286 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3287 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3289 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3290 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3292 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3295 #define _mm512_fmsub_round_ps(A, B, C, R) \
3296 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3298 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3299 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3301 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3302 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3304 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3307 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3308 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3310 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3311 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3313 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3314 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3316 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3319 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3320 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3322 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3323 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3325 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3326 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3328 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3331 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3332 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3334 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3335 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3337 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3338 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3340 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3343 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3344 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3346 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3347 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3349 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3350 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3352 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3355 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3356 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3358 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3359 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3361 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3362 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3364 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3367 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3368 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3370 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3371 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3373 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3374 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3376 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3379 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3380 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3382 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3383 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3385 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3386 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3388 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3391 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3392 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3394 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3395 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3397 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3398 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3400 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3402 #endif
3404 extern __inline __m512i
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm512_abs_epi64 (__m512i __A)
3408 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3409 (__v8di)
3410 _mm512_undefined_si512 (),
3411 (__mmask8) -1);
3414 extern __inline __m512i
3415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3418 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3419 (__v8di) __W,
3420 (__mmask8) __U);
3423 extern __inline __m512i
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di)
3429 _mm512_setzero_si512 (),
3430 (__mmask8) __U);
3433 extern __inline __m512i
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm512_abs_epi32 (__m512i __A)
3437 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3438 (__v16si)
3439 _mm512_undefined_si512 (),
3440 (__mmask16) -1);
3443 extern __inline __m512i
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3447 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3448 (__v16si) __W,
3449 (__mmask16) __U);
3452 extern __inline __m512i
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si)
3458 _mm512_setzero_si512 (),
3459 (__mmask16) __U);
3462 extern __inline __m512
3463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464 _mm512_broadcastss_ps (__m128 __A)
3466 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3467 (__v16sf)
3468 _mm512_undefined_ps (),
3469 (__mmask16) -1);
3472 extern __inline __m512
3473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3476 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3477 (__v16sf) __O, __M);
3480 extern __inline __m512
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3484 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3485 (__v16sf)
3486 _mm512_setzero_ps (),
3487 __M);
3490 extern __inline __m512d
3491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492 _mm512_broadcastsd_pd (__m128d __A)
3494 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3495 (__v8df)
3496 _mm512_undefined_pd (),
3497 (__mmask8) -1);
3500 extern __inline __m512d
3501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3502 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3504 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3505 (__v8df) __O, __M);
3508 extern __inline __m512d
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3512 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3513 (__v8df)
3514 _mm512_setzero_pd (),
3515 __M);
3518 extern __inline __m512i
3519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3520 _mm512_broadcastd_epi32 (__m128i __A)
3522 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3523 (__v16si)
3524 _mm512_undefined_si512 (),
3525 (__mmask16) -1);
3528 extern __inline __m512i
3529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3532 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3533 (__v16si) __O, __M);
3536 extern __inline __m512i
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3540 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3541 (__v16si)
3542 _mm512_setzero_si512 (),
3543 __M);
3546 extern __inline __m512i
3547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548 _mm512_set1_epi32 (int __A)
3550 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3551 (__v16si)
3552 _mm512_undefined_si512 (),
3553 (__mmask16)(-1));
3556 extern __inline __m512i
3557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3560 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3561 __M);
3564 extern __inline __m512i
3565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3568 return (__m512i)
3569 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3570 (__v16si) _mm512_setzero_si512 (),
3571 __M);
3574 extern __inline __m512i
3575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576 _mm512_broadcastq_epi64 (__m128i __A)
3578 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3579 (__v8di)
3580 _mm512_undefined_si512 (),
3581 (__mmask8) -1);
3584 extern __inline __m512i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3588 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3589 (__v8di) __O, __M);
3592 extern __inline __m512i
3593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3594 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3596 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3597 (__v8di)
3598 _mm512_setzero_si512 (),
3599 __M);
3602 extern __inline __m512i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604 _mm512_set1_epi64 (long long __A)
3606 #ifdef TARGET_64BIT
3607 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3608 (__v8di)
3609 _mm512_undefined_si512 (),
3610 (__mmask8)(-1));
3611 #else
3612 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
3613 (__v8di)
3614 _mm512_undefined_si512 (),
3615 (__mmask8)(-1));
3616 #endif
3619 extern __inline __m512i
3620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3621 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3623 #ifdef TARGET_64BIT
3624 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3625 __M);
3626 #else
3627 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
3628 __M);
3629 #endif
3632 extern __inline __m512i
3633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3634 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3636 #ifdef TARGET_64BIT
3637 return (__m512i)
3638 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3639 (__v8di) _mm512_setzero_si512 (),
3640 __M);
3641 #else
3642 return (__m512i)
3643 __builtin_ia32_pbroadcastq512_mem_mask (__A,
3644 (__v8di) _mm512_setzero_si512 (),
3645 __M);
3646 #endif
3649 extern __inline __m512
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm512_broadcast_f32x4 (__m128 __A)
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf)
3655 _mm512_undefined_ps (),
3656 (__mmask16) -1);
3659 extern __inline __m512
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3663 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3664 (__v16sf) __O,
3665 __M);
3668 extern __inline __m512
3669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3670 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3672 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3673 (__v16sf)
3674 _mm512_setzero_ps (),
3675 __M);
3678 extern __inline __m512i
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm512_broadcast_i32x4 (__m128i __A)
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si)
3684 _mm512_undefined_si512 (),
3685 (__mmask16) -1);
3688 extern __inline __m512i
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3692 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3693 (__v16si) __O,
3694 __M);
3697 extern __inline __m512i
3698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3699 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3701 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3702 (__v16si)
3703 _mm512_setzero_si512 (),
3704 __M);
3707 extern __inline __m512d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm512_broadcast_f64x4 (__m256d __A)
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df)
3713 _mm512_undefined_pd (),
3714 (__mmask8) -1);
3717 extern __inline __m512d
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3721 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3722 (__v8df) __O,
3723 __M);
3726 extern __inline __m512d
3727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3728 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3730 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3731 (__v8df)
3732 _mm512_setzero_pd (),
3733 __M);
3736 extern __inline __m512i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm512_broadcast_i64x4 (__m256i __A)
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di)
3742 _mm512_undefined_si512 (),
3743 (__mmask8) -1);
3746 extern __inline __m512i
3747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3748 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3750 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3751 (__v8di) __O,
3752 __M);
3755 extern __inline __m512i
3756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3757 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3759 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3760 (__v8di)
3761 _mm512_setzero_si512 (),
3762 __M);
3765 typedef enum
3767 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3768 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3769 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3770 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3771 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3772 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3773 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3774 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3775 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3776 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3777 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3778 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3779 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3780 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3781 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3782 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3783 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3784 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3785 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3786 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3787 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3788 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3789 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3790 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3791 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3792 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3793 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3794 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3795 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3796 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3797 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3798 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3799 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3800 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3801 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3802 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3803 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3804 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3805 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3806 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3807 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3808 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3809 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3810 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3811 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3812 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3813 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3814 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3815 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3816 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3817 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3818 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3819 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3820 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3821 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3822 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3823 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3824 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3825 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3826 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3827 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3828 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3829 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3830 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3831 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3832 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3833 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3834 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3835 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3836 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3837 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3838 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3839 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3840 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3841 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3842 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3843 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3844 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3845 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3846 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3847 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3848 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3849 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3850 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3851 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3852 _MM_PERM_DDDD = 0xFF
3853 } _MM_PERM_ENUM;
3855 #ifdef __OPTIMIZE__
3856 extern __inline __m512i
3857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3858 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3860 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3861 __mask,
3862 (__v16si)
3863 _mm512_undefined_si512 (),
3864 (__mmask16) -1);
3867 extern __inline __m512i
3868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3869 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3870 _MM_PERM_ENUM __mask)
3872 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3873 __mask,
3874 (__v16si) __W,
3875 (__mmask16) __U);
3878 extern __inline __m512i
3879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3880 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3882 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3883 __mask,
3884 (__v16si)
3885 _mm512_setzero_si512 (),
3886 (__mmask16) __U);
3889 extern __inline __m512i
3890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3891 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3893 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3894 (__v8di) __B, __imm,
3895 (__v8di)
3896 _mm512_undefined_si512 (),
3897 (__mmask8) -1);
3900 extern __inline __m512i
3901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3902 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3903 __m512i __B, const int __imm)
3905 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3906 (__v8di) __B, __imm,
3907 (__v8di) __W,
3908 (__mmask8) __U);
3911 extern __inline __m512i
3912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3913 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3914 const int __imm)
3916 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3917 (__v8di) __B, __imm,
3918 (__v8di)
3919 _mm512_setzero_si512 (),
3920 (__mmask8) __U);
3923 extern __inline __m512i
3924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3927 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3928 (__v16si) __B,
3929 __imm,
3930 (__v16si)
3931 _mm512_undefined_si512 (),
3932 (__mmask16) -1);
3935 extern __inline __m512i
3936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3937 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3938 __m512i __B, const int __imm)
3940 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3941 (__v16si) __B,
3942 __imm,
3943 (__v16si) __W,
3944 (__mmask16) __U);
3947 extern __inline __m512i
3948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3949 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3950 const int __imm)
3952 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3953 (__v16si) __B,
3954 __imm,
3955 (__v16si)
3956 _mm512_setzero_si512 (),
3957 (__mmask16) __U);
3960 extern __inline __m512d
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3964 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3965 (__v8df) __B, __imm,
3966 (__v8df)
3967 _mm512_undefined_pd (),
3968 (__mmask8) -1);
3971 extern __inline __m512d
3972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3973 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3974 __m512d __B, const int __imm)
3976 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3977 (__v8df) __B, __imm,
3978 (__v8df) __W,
3979 (__mmask8) __U);
3982 extern __inline __m512d
3983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3984 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3985 const int __imm)
3987 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3988 (__v8df) __B, __imm,
3989 (__v8df)
3990 _mm512_setzero_pd (),
3991 (__mmask8) __U);
3994 extern __inline __m512
3995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3996 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3998 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3999 (__v16sf) __B, __imm,
4000 (__v16sf)
4001 _mm512_undefined_ps (),
4002 (__mmask16) -1);
4005 extern __inline __m512
4006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4007 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
4008 __m512 __B, const int __imm)
4010 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4011 (__v16sf) __B, __imm,
4012 (__v16sf) __W,
4013 (__mmask16) __U);
4016 extern __inline __m512
4017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4018 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4019 const int __imm)
4021 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4022 (__v16sf) __B, __imm,
4023 (__v16sf)
4024 _mm512_setzero_ps (),
4025 (__mmask16) __U);
4028 #else
4029 #define _mm512_shuffle_epi32(X, C) \
4030 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4031 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4032 (__mmask16)-1))
4034 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4035 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4036 (__v16si)(__m512i)(W),\
4037 (__mmask16)(U)))
4039 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4040 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4041 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4042 (__mmask16)(U)))
4044 #define _mm512_shuffle_i64x2(X, Y, C) \
4045 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4046 (__v8di)(__m512i)(Y), (int)(C),\
4047 (__v8di)(__m512i)_mm512_undefined_si512 (),\
4048 (__mmask8)-1))
4050 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4051 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4052 (__v8di)(__m512i)(Y), (int)(C),\
4053 (__v8di)(__m512i)(W),\
4054 (__mmask8)(U)))
4056 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4057 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4058 (__v8di)(__m512i)(Y), (int)(C),\
4059 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4060 (__mmask8)(U)))
4062 #define _mm512_shuffle_i32x4(X, Y, C) \
4063 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4064 (__v16si)(__m512i)(Y), (int)(C),\
4065 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4066 (__mmask16)-1))
4068 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4069 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4070 (__v16si)(__m512i)(Y), (int)(C),\
4071 (__v16si)(__m512i)(W),\
4072 (__mmask16)(U)))
4074 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4075 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4076 (__v16si)(__m512i)(Y), (int)(C),\
4077 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4078 (__mmask16)(U)))
4080 #define _mm512_shuffle_f64x2(X, Y, C) \
4081 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4082 (__v8df)(__m512d)(Y), (int)(C),\
4083 (__v8df)(__m512d)_mm512_undefined_pd(),\
4084 (__mmask8)-1))
4086 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4087 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4088 (__v8df)(__m512d)(Y), (int)(C),\
4089 (__v8df)(__m512d)(W),\
4090 (__mmask8)(U)))
4092 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4093 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4094 (__v8df)(__m512d)(Y), (int)(C),\
4095 (__v8df)(__m512d)_mm512_setzero_pd(),\
4096 (__mmask8)(U)))
4098 #define _mm512_shuffle_f32x4(X, Y, C) \
4099 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4100 (__v16sf)(__m512)(Y), (int)(C),\
4101 (__v16sf)(__m512)_mm512_undefined_ps(),\
4102 (__mmask16)-1))
4104 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4105 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4106 (__v16sf)(__m512)(Y), (int)(C),\
4107 (__v16sf)(__m512)(W),\
4108 (__mmask16)(U)))
4110 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4111 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4112 (__v16sf)(__m512)(Y), (int)(C),\
4113 (__v16sf)(__m512)_mm512_setzero_ps(),\
4114 (__mmask16)(U)))
4115 #endif
4117 extern __inline __m512i
4118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4121 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4122 (__v16si) __B,
4123 (__v16si)
4124 _mm512_undefined_si512 (),
4125 (__mmask16) -1);
4128 extern __inline __m512i
4129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4130 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4132 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4133 (__v16si) __B,
4134 (__v16si) __W,
4135 (__mmask16) __U);
4138 extern __inline __m512i
4139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4142 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4143 (__v16si) __B,
4144 (__v16si)
4145 _mm512_setzero_si512 (),
4146 (__mmask16) __U);
4149 extern __inline __m512i
4150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4151 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4153 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4154 (__v16si) __B,
4155 (__v16si)
4156 _mm512_undefined_si512 (),
4157 (__mmask16) -1);
4160 extern __inline __m512i
4161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4162 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4164 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4165 (__v16si) __B,
4166 (__v16si) __W,
4167 (__mmask16) __U);
4170 extern __inline __m512i
4171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4172 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4174 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4175 (__v16si) __B,
4176 (__v16si)
4177 _mm512_setzero_si512 (),
4178 (__mmask16) __U);
4181 extern __inline __m512i
4182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4183 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4185 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4186 (__v8di) __B,
4187 (__v8di)
4188 _mm512_undefined_si512 (),
4189 (__mmask8) -1);
4192 extern __inline __m512i
4193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4194 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4196 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4197 (__v8di) __B,
4198 (__v8di) __W,
4199 (__mmask8) __U);
4202 extern __inline __m512i
4203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4204 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4206 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4207 (__v8di) __B,
4208 (__v8di)
4209 _mm512_setzero_si512 (),
4210 (__mmask8) __U);
4213 extern __inline __m512i
4214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4215 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4217 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4218 (__v8di) __B,
4219 (__v8di)
4220 _mm512_undefined_si512 (),
4221 (__mmask8) -1);
4224 extern __inline __m512i
4225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4226 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4228 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4229 (__v8di) __B,
4230 (__v8di) __W,
4231 (__mmask8) __U);
4234 extern __inline __m512i
4235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4236 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4238 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4239 (__v8di) __B,
4240 (__v8di)
4241 _mm512_setzero_si512 (),
4242 (__mmask8) __U);
4245 #ifdef __OPTIMIZE__
4246 extern __inline __m256i
4247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4248 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4250 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4251 (__v8si)
4252 _mm256_undefined_si256 (),
4253 (__mmask8) -1, __R);
4256 extern __inline __m256i
4257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4258 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4259 const int __R)
4261 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4262 (__v8si) __W,
4263 (__mmask8) __U, __R);
4266 extern __inline __m256i
4267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4268 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4270 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4271 (__v8si)
4272 _mm256_setzero_si256 (),
4273 (__mmask8) __U, __R);
4276 extern __inline __m256i
4277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4278 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4280 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4281 (__v8si)
4282 _mm256_undefined_si256 (),
4283 (__mmask8) -1, __R);
4286 extern __inline __m256i
4287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4288 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4289 const int __R)
4291 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4292 (__v8si) __W,
4293 (__mmask8) __U, __R);
4296 extern __inline __m256i
4297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4298 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4300 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4301 (__v8si)
4302 _mm256_setzero_si256 (),
4303 (__mmask8) __U, __R);
4305 #else
4306 #define _mm512_cvtt_roundpd_epi32(A, B) \
4307 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4309 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4310 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4312 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4313 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4315 #define _mm512_cvtt_roundpd_epu32(A, B) \
4316 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4318 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4319 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4321 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4322 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4323 #endif
4325 #ifdef __OPTIMIZE__
4326 extern __inline __m256i
4327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4328 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4330 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4331 (__v8si)
4332 _mm256_undefined_si256 (),
4333 (__mmask8) -1, __R);
4336 extern __inline __m256i
4337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4338 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4339 const int __R)
4341 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4342 (__v8si) __W,
4343 (__mmask8) __U, __R);
4346 extern __inline __m256i
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4350 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4351 (__v8si)
4352 _mm256_setzero_si256 (),
4353 (__mmask8) __U, __R);
4356 extern __inline __m256i
4357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4358 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4360 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4361 (__v8si)
4362 _mm256_undefined_si256 (),
4363 (__mmask8) -1, __R);
4366 extern __inline __m256i
4367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4369 const int __R)
4371 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4372 (__v8si) __W,
4373 (__mmask8) __U, __R);
4376 extern __inline __m256i
4377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4378 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4380 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4381 (__v8si)
4382 _mm256_setzero_si256 (),
4383 (__mmask8) __U, __R);
4385 #else
4386 #define _mm512_cvt_roundpd_epi32(A, B) \
4387 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4389 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4390 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4392 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4393 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4395 #define _mm512_cvt_roundpd_epu32(A, B) \
4396 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4398 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4399 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4401 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4402 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4403 #endif
4405 #ifdef __OPTIMIZE__
4406 extern __inline __m512i
4407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4408 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4410 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4411 (__v16si)
4412 _mm512_undefined_si512 (),
4413 (__mmask16) -1, __R);
4416 extern __inline __m512i
4417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4418 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4419 const int __R)
4421 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4422 (__v16si) __W,
4423 (__mmask16) __U, __R);
4426 extern __inline __m512i
4427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4428 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4430 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4431 (__v16si)
4432 _mm512_setzero_si512 (),
4433 (__mmask16) __U, __R);
4436 extern __inline __m512i
4437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4438 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4440 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4441 (__v16si)
4442 _mm512_undefined_si512 (),
4443 (__mmask16) -1, __R);
4446 extern __inline __m512i
4447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4448 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4449 const int __R)
4451 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4452 (__v16si) __W,
4453 (__mmask16) __U, __R);
4456 extern __inline __m512i
4457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4458 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4460 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4461 (__v16si)
4462 _mm512_setzero_si512 (),
4463 (__mmask16) __U, __R);
4465 #else
4466 #define _mm512_cvtt_roundps_epi32(A, B) \
4467 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4469 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4470 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4472 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4473 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4475 #define _mm512_cvtt_roundps_epu32(A, B) \
4476 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4478 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4479 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4481 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4482 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4483 #endif
4485 #ifdef __OPTIMIZE__
4486 extern __inline __m512i
4487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4490 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4491 (__v16si)
4492 _mm512_undefined_si512 (),
4493 (__mmask16) -1, __R);
4496 extern __inline __m512i
4497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4498 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4499 const int __R)
4501 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4502 (__v16si) __W,
4503 (__mmask16) __U, __R);
4506 extern __inline __m512i
4507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4508 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4510 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4511 (__v16si)
4512 _mm512_setzero_si512 (),
4513 (__mmask16) __U, __R);
4516 extern __inline __m512i
4517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4518 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4520 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4521 (__v16si)
4522 _mm512_undefined_si512 (),
4523 (__mmask16) -1, __R);
4526 extern __inline __m512i
4527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4529 const int __R)
4531 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4532 (__v16si) __W,
4533 (__mmask16) __U, __R);
4536 extern __inline __m512i
4537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4538 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4540 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4541 (__v16si)
4542 _mm512_setzero_si512 (),
4543 (__mmask16) __U, __R);
4545 #else
4546 #define _mm512_cvt_roundps_epi32(A, B) \
4547 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4549 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4550 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4552 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4553 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4555 #define _mm512_cvt_roundps_epu32(A, B) \
4556 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4558 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4559 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4561 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4562 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4563 #endif
4565 extern __inline __m128d
4566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4567 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4569 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4572 #ifdef __x86_64__
4573 #ifdef __OPTIMIZE__
4574 extern __inline __m128d
4575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4576 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4578 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4581 extern __inline __m128d
4582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4583 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4585 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4588 extern __inline __m128d
4589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4590 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4592 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4594 #else
4595 #define _mm_cvt_roundu64_sd(A, B, C) \
4596 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4598 #define _mm_cvt_roundi64_sd(A, B, C) \
4599 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4601 #define _mm_cvt_roundsi64_sd(A, B, C) \
4602 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4603 #endif
4605 #endif
4607 #ifdef __OPTIMIZE__
4608 extern __inline __m128
4609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4610 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4612 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4615 extern __inline __m128
4616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4617 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4619 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4622 extern __inline __m128
4623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4624 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4626 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4628 #else
4629 #define _mm_cvt_roundu32_ss(A, B, C) \
4630 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4632 #define _mm_cvt_roundi32_ss(A, B, C) \
4633 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4635 #define _mm_cvt_roundsi32_ss(A, B, C) \
4636 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4637 #endif
4639 #ifdef __x86_64__
4640 #ifdef __OPTIMIZE__
4641 extern __inline __m128
4642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4645 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4648 extern __inline __m128
4649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4650 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4652 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4655 extern __inline __m128
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4659 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4661 #else
4662 #define _mm_cvt_roundu64_ss(A, B, C) \
4663 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4665 #define _mm_cvt_roundi64_ss(A, B, C) \
4666 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4668 #define _mm_cvt_roundsi64_ss(A, B, C) \
4669 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4670 #endif
4672 #endif
4674 extern __inline __m128i
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm512_cvtepi32_epi8 (__m512i __A)
4678 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4679 (__v16qi)
4680 _mm_undefined_si128 (),
4681 (__mmask16) -1);
4684 extern __inline void
4685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4686 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4688 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4691 extern __inline __m128i
4692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4693 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4695 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4696 (__v16qi) __O, __M);
4699 extern __inline __m128i
4700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4701 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4703 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4704 (__v16qi)
4705 _mm_setzero_si128 (),
4706 __M);
4709 extern __inline __m128i
4710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4711 _mm512_cvtsepi32_epi8 (__m512i __A)
4713 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4714 (__v16qi)
4715 _mm_undefined_si128 (),
4716 (__mmask16) -1);
4719 extern __inline void
4720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4721 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4723 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4726 extern __inline __m128i
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4730 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4731 (__v16qi) __O, __M);
4734 extern __inline __m128i
4735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4736 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4738 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4739 (__v16qi)
4740 _mm_setzero_si128 (),
4741 __M);
4744 extern __inline __m128i
4745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4746 _mm512_cvtusepi32_epi8 (__m512i __A)
4748 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4749 (__v16qi)
4750 _mm_undefined_si128 (),
4751 (__mmask16) -1);
4754 extern __inline void
4755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4756 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4758 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4761 extern __inline __m128i
4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4765 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4766 (__v16qi) __O,
4767 __M);
4770 extern __inline __m128i
4771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4774 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4775 (__v16qi)
4776 _mm_setzero_si128 (),
4777 __M);
4780 extern __inline __m256i
4781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4782 _mm512_cvtepi32_epi16 (__m512i __A)
4784 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4785 (__v16hi)
4786 _mm256_undefined_si256 (),
4787 (__mmask16) -1);
4790 extern __inline void
4791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4792 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4794 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4797 extern __inline __m256i
4798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4799 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4801 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4802 (__v16hi) __O, __M);
4805 extern __inline __m256i
4806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4807 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4809 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4810 (__v16hi)
4811 _mm256_setzero_si256 (),
4812 __M);
4815 extern __inline __m256i
4816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4817 _mm512_cvtsepi32_epi16 (__m512i __A)
4819 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4820 (__v16hi)
4821 _mm256_undefined_si256 (),
4822 (__mmask16) -1);
4825 extern __inline void
4826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4829 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4832 extern __inline __m256i
4833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4834 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4836 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4837 (__v16hi) __O, __M);
4840 extern __inline __m256i
4841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4842 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4844 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4845 (__v16hi)
4846 _mm256_setzero_si256 (),
4847 __M);
4850 extern __inline __m256i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_cvtusepi32_epi16 (__m512i __A)
4854 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4855 (__v16hi)
4856 _mm256_undefined_si256 (),
4857 (__mmask16) -1);
4860 extern __inline void
4861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4862 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4864 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4867 extern __inline __m256i
4868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4871 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4872 (__v16hi) __O,
4873 __M);
4876 extern __inline __m256i
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4880 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4881 (__v16hi)
4882 _mm256_setzero_si256 (),
4883 __M);
4886 extern __inline __m256i
4887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4888 _mm512_cvtepi64_epi32 (__m512i __A)
4890 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4891 (__v8si)
4892 _mm256_undefined_si256 (),
4893 (__mmask8) -1);
4896 extern __inline void
4897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4898 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4900 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4903 extern __inline __m256i
4904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4905 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4907 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4908 (__v8si) __O, __M);
4911 extern __inline __m256i
4912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4913 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4915 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4916 (__v8si)
4917 _mm256_setzero_si256 (),
4918 __M);
4921 extern __inline __m256i
4922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4923 _mm512_cvtsepi64_epi32 (__m512i __A)
4925 __v8si __O;
4926 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4927 (__v8si)
4928 _mm256_undefined_si256 (),
4929 (__mmask8) -1);
4932 extern __inline void
4933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4934 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4936 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4939 extern __inline __m256i
4940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4941 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4943 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4944 (__v8si) __O, __M);
4947 extern __inline __m256i
4948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4951 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4952 (__v8si)
4953 _mm256_setzero_si256 (),
4954 __M);
4957 extern __inline __m256i
4958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4959 _mm512_cvtusepi64_epi32 (__m512i __A)
4961 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4962 (__v8si)
4963 _mm256_undefined_si256 (),
4964 (__mmask8) -1);
4967 extern __inline void
4968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4969 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4971 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4974 extern __inline __m256i
4975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4976 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4978 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4979 (__v8si) __O, __M);
4982 extern __inline __m256i
4983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4984 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4986 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4987 (__v8si)
4988 _mm256_setzero_si256 (),
4989 __M);
4992 extern __inline __m128i
4993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4994 _mm512_cvtepi64_epi16 (__m512i __A)
4996 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4997 (__v8hi)
4998 _mm_undefined_si128 (),
4999 (__mmask8) -1);
5002 extern __inline void
5003 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5004 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5006 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5009 extern __inline __m128i
5010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5011 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5013 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5014 (__v8hi) __O, __M);
5017 extern __inline __m128i
5018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5019 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5021 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5022 (__v8hi)
5023 _mm_setzero_si128 (),
5024 __M);
5027 extern __inline __m128i
5028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5029 _mm512_cvtsepi64_epi16 (__m512i __A)
5031 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5032 (__v8hi)
5033 _mm_undefined_si128 (),
5034 (__mmask8) -1);
5037 extern __inline void
5038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5039 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5041 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5044 extern __inline __m128i
5045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5046 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5048 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5049 (__v8hi) __O, __M);
5052 extern __inline __m128i
5053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5056 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5057 (__v8hi)
5058 _mm_setzero_si128 (),
5059 __M);
5062 extern __inline __m128i
5063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064 _mm512_cvtusepi64_epi16 (__m512i __A)
5066 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5067 (__v8hi)
5068 _mm_undefined_si128 (),
5069 (__mmask8) -1);
5072 extern __inline void
5073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5074 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5076 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5079 extern __inline __m128i
5080 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5081 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5083 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5084 (__v8hi) __O, __M);
5087 extern __inline __m128i
5088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5089 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5091 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5092 (__v8hi)
5093 _mm_setzero_si128 (),
5094 __M);
5097 extern __inline __m128i
5098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5099 _mm512_cvtepi64_epi8 (__m512i __A)
5101 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5102 (__v16qi)
5103 _mm_undefined_si128 (),
5104 (__mmask8) -1);
5107 extern __inline void
5108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5109 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5111 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5114 extern __inline __m128i
5115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5118 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5119 (__v16qi) __O, __M);
5122 extern __inline __m128i
5123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5124 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5126 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5127 (__v16qi)
5128 _mm_setzero_si128 (),
5129 __M);
5132 extern __inline __m128i
5133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5134 _mm512_cvtsepi64_epi8 (__m512i __A)
5136 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5137 (__v16qi)
5138 _mm_undefined_si128 (),
5139 (__mmask8) -1);
5142 extern __inline void
5143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5144 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5146 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5149 extern __inline __m128i
5150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5151 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5153 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5154 (__v16qi) __O, __M);
5157 extern __inline __m128i
5158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5159 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5161 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5162 (__v16qi)
5163 _mm_setzero_si128 (),
5164 __M);
5167 extern __inline __m128i
5168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5169 _mm512_cvtusepi64_epi8 (__m512i __A)
5171 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5172 (__v16qi)
5173 _mm_undefined_si128 (),
5174 (__mmask8) -1);
5177 extern __inline void
5178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5179 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5181 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5184 extern __inline __m128i
5185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5186 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5188 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5189 (__v16qi) __O,
5190 __M);
5193 extern __inline __m128i
5194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5195 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5197 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5198 (__v16qi)
5199 _mm_setzero_si128 (),
5200 __M);
5203 extern __inline __m512d
5204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205 _mm512_cvtepi32_pd (__m256i __A)
5207 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5208 (__v8df)
5209 _mm512_undefined_pd (),
5210 (__mmask8) -1);
5213 extern __inline __m512d
5214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5217 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5218 (__v8df) __W,
5219 (__mmask8) __U);
5222 extern __inline __m512d
5223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5224 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5226 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5227 (__v8df)
5228 _mm512_setzero_pd (),
5229 (__mmask8) __U);
5232 extern __inline __m512d
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_cvtepu32_pd (__m256i __A)
5236 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5237 (__v8df)
5238 _mm512_undefined_pd (),
5239 (__mmask8) -1);
5242 extern __inline __m512d
5243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5244 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5246 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5247 (__v8df) __W,
5248 (__mmask8) __U);
5251 extern __inline __m512d
5252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5253 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5255 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5256 (__v8df)
5257 _mm512_setzero_pd (),
5258 (__mmask8) __U);
5261 #ifdef __OPTIMIZE__
5262 extern __inline __m512
5263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5264 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5266 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5267 (__v16sf)
5268 _mm512_undefined_ps (),
5269 (__mmask16) -1, __R);
5272 extern __inline __m512
5273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5275 const int __R)
5277 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5278 (__v16sf) __W,
5279 (__mmask16) __U, __R);
5282 extern __inline __m512
5283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5284 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5286 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5287 (__v16sf)
5288 _mm512_setzero_ps (),
5289 (__mmask16) __U, __R);
5292 extern __inline __m512
5293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5294 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5296 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5297 (__v16sf)
5298 _mm512_undefined_ps (),
5299 (__mmask16) -1, __R);
5302 extern __inline __m512
5303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5304 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5305 const int __R)
5307 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5308 (__v16sf) __W,
5309 (__mmask16) __U, __R);
5312 extern __inline __m512
5313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5314 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5316 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5317 (__v16sf)
5318 _mm512_setzero_ps (),
5319 (__mmask16) __U, __R);
5322 #else
5323 #define _mm512_cvt_roundepi32_ps(A, B) \
5324 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5326 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5327 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5329 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5330 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5332 #define _mm512_cvt_roundepu32_ps(A, B) \
5333 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5335 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5336 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5338 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5339 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5340 #endif
5342 #ifdef __OPTIMIZE__
5343 extern __inline __m256d
5344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5345 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5347 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5348 __imm,
5349 (__v4df)
5350 _mm256_undefined_pd (),
5351 (__mmask8) -1);
5354 extern __inline __m256d
5355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5356 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5357 const int __imm)
5359 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5360 __imm,
5361 (__v4df) __W,
5362 (__mmask8) __U);
5365 extern __inline __m256d
5366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5367 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5369 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5370 __imm,
5371 (__v4df)
5372 _mm256_setzero_pd (),
5373 (__mmask8) __U);
5376 extern __inline __m128
5377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5378 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5380 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5381 __imm,
5382 (__v4sf)
5383 _mm_undefined_ps (),
5384 (__mmask8) -1);
5387 extern __inline __m128
5388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5389 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5390 const int __imm)
5392 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5393 __imm,
5394 (__v4sf) __W,
5395 (__mmask8) __U);
5398 extern __inline __m128
5399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5400 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5402 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5403 __imm,
5404 (__v4sf)
5405 _mm_setzero_ps (),
5406 (__mmask8) __U);
5409 extern __inline __m256i
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5413 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5414 __imm,
5415 (__v4di)
5416 _mm256_undefined_si256 (),
5417 (__mmask8) -1);
5420 extern __inline __m256i
5421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5422 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5423 const int __imm)
5425 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5426 __imm,
5427 (__v4di) __W,
5428 (__mmask8) __U);
5431 extern __inline __m256i
5432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5435 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5436 __imm,
5437 (__v4di)
5438 _mm256_setzero_si256 (),
5439 (__mmask8) __U);
5442 extern __inline __m128i
5443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5444 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5446 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5447 __imm,
5448 (__v4si)
5449 _mm_undefined_si128 (),
5450 (__mmask8) -1);
5453 extern __inline __m128i
5454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5455 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5456 const int __imm)
5458 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5459 __imm,
5460 (__v4si) __W,
5461 (__mmask8) __U);
5464 extern __inline __m128i
5465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5466 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5468 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5469 __imm,
5470 (__v4si)
5471 _mm_setzero_si128 (),
5472 (__mmask8) __U);
5474 #else
5476 #define _mm512_extractf64x4_pd(X, C) \
5477 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5478 (int) (C),\
5479 (__v4df)(__m256d)_mm256_undefined_pd(),\
5480 (__mmask8)-1))
5482 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5483 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5484 (int) (C),\
5485 (__v4df)(__m256d)(W),\
5486 (__mmask8)(U)))
5488 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5489 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5490 (int) (C),\
5491 (__v4df)(__m256d)_mm256_setzero_pd(),\
5492 (__mmask8)(U)))
5494 #define _mm512_extractf32x4_ps(X, C) \
5495 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5496 (int) (C),\
5497 (__v4sf)(__m128)_mm_undefined_ps(),\
5498 (__mmask8)-1))
5500 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5501 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5502 (int) (C),\
5503 (__v4sf)(__m128)(W),\
5504 (__mmask8)(U)))
5506 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5507 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5508 (int) (C),\
5509 (__v4sf)(__m128)_mm_setzero_ps(),\
5510 (__mmask8)(U)))
5512 #define _mm512_extracti64x4_epi64(X, C) \
5513 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5514 (int) (C),\
5515 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5516 (__mmask8)-1))
5518 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5519 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5520 (int) (C),\
5521 (__v4di)(__m256i)(W),\
5522 (__mmask8)(U)))
5524 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5525 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5526 (int) (C),\
5527 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5528 (__mmask8)(U)))
5530 #define _mm512_extracti32x4_epi32(X, C) \
5531 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5532 (int) (C),\
5533 (__v4si)(__m128i)_mm_undefined_si128 (),\
5534 (__mmask8)-1))
5536 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5537 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5538 (int) (C),\
5539 (__v4si)(__m128i)(W),\
5540 (__mmask8)(U)))
5542 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5543 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5544 (int) (C),\
5545 (__v4si)(__m128i)_mm_setzero_si128 (),\
5546 (__mmask8)(U)))
5547 #endif
5549 #ifdef __OPTIMIZE__
5550 extern __inline __m512i
5551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5552 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5554 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5555 (__v4si) __B,
5556 __imm,
5557 (__v16si) __A, -1);
5560 extern __inline __m512
5561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5562 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5564 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5565 (__v4sf) __B,
5566 __imm,
5567 (__v16sf) __A, -1);
5570 extern __inline __m512i
5571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5572 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5574 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5575 (__v4di) __B,
5576 __imm,
5577 (__v8di)
5578 _mm512_undefined_si512 (),
5579 (__mmask8) -1);
5582 extern __inline __m512i
5583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5584 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5585 __m256i __B, const int __imm)
5587 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5588 (__v4di) __B,
5589 __imm,
5590 (__v8di) __W,
5591 (__mmask8) __U);
5594 extern __inline __m512i
5595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5596 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5597 const int __imm)
5599 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5600 (__v4di) __B,
5601 __imm,
5602 (__v8di)
5603 _mm512_setzero_si512 (),
5604 (__mmask8) __U);
5607 extern __inline __m512d
5608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5609 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5611 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5612 (__v4df) __B,
5613 __imm,
5614 (__v8df)
5615 _mm512_undefined_pd (),
5616 (__mmask8) -1);
5619 extern __inline __m512d
5620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5621 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5622 __m256d __B, const int __imm)
5624 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5625 (__v4df) __B,
5626 __imm,
5627 (__v8df) __W,
5628 (__mmask8) __U);
5631 extern __inline __m512d
5632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5633 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5634 const int __imm)
5636 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5637 (__v4df) __B,
5638 __imm,
5639 (__v8df)
5640 _mm512_setzero_pd (),
5641 (__mmask8) __U);
5643 #else
5644 #define _mm512_insertf32x4(X, Y, C) \
5645 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5646 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5648 #define _mm512_inserti32x4(X, Y, C) \
5649 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5650 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5652 #define _mm512_insertf64x4(X, Y, C) \
5653 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5654 (__v4df)(__m256d) (Y), (int) (C), \
5655 (__v8df)(__m512d)_mm512_undefined_pd(), \
5656 (__mmask8)-1))
5658 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5659 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5660 (__v4df)(__m256d) (Y), (int) (C), \
5661 (__v8df)(__m512d)(W), \
5662 (__mmask8)(U)))
5664 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5665 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5666 (__v4df)(__m256d) (Y), (int) (C), \
5667 (__v8df)(__m512d)_mm512_setzero_pd(), \
5668 (__mmask8)(U)))
5670 #define _mm512_inserti64x4(X, Y, C) \
5671 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5672 (__v4di)(__m256i) (Y), (int) (C), \
5673 (__v8di)(__m512i)_mm512_undefined_si512 (), \
5674 (__mmask8)-1))
5676 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5677 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5678 (__v4di)(__m256i) (Y), (int) (C),\
5679 (__v8di)(__m512i)(W),\
5680 (__mmask8)(U)))
5682 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5683 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5684 (__v4di)(__m256i) (Y), (int) (C), \
5685 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5686 (__mmask8)(U)))
5687 #endif
5689 extern __inline __m512d
5690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691 _mm512_loadu_pd (void const *__P)
5693 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5694 (__v8df)
5695 _mm512_undefined_pd (),
5696 (__mmask8) -1);
5699 extern __inline __m512d
5700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5701 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5703 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5704 (__v8df) __W,
5705 (__mmask8) __U);
5708 extern __inline __m512d
5709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5710 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5712 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5713 (__v8df)
5714 _mm512_setzero_pd (),
5715 (__mmask8) __U);
5718 extern __inline void
5719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5720 _mm512_storeu_pd (void *__P, __m512d __A)
5722 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5723 (__mmask8) -1);
5726 extern __inline void
5727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5728 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5730 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5731 (__mmask8) __U);
5734 extern __inline __m512
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm512_loadu_ps (void const *__P)
5738 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5739 (__v16sf)
5740 _mm512_undefined_ps (),
5741 (__mmask16) -1);
5744 extern __inline __m512
5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5748 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5749 (__v16sf) __W,
5750 (__mmask16) __U);
5753 extern __inline __m512
5754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5755 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5757 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5758 (__v16sf)
5759 _mm512_setzero_ps (),
5760 (__mmask16) __U);
5763 extern __inline void
5764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5765 _mm512_storeu_ps (void *__P, __m512 __A)
5767 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5768 (__mmask16) -1);
5771 extern __inline void
5772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5773 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5775 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5776 (__mmask16) __U);
5779 extern __inline __m512i
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5783 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5784 (__v8di) __W,
5785 (__mmask8) __U);
5788 extern __inline __m512i
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5792 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5793 (__v8di)
5794 _mm512_setzero_si512 (),
5795 (__mmask8) __U);
5798 extern __inline void
5799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5802 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5803 (__mmask8) __U);
5806 extern __inline __m512i
5807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5808 _mm512_loadu_si512 (void const *__P)
5810 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5811 (__v16si)
5812 _mm512_setzero_si512 (),
5813 (__mmask16) -1);
5816 extern __inline __m512i
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5820 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5821 (__v16si) __W,
5822 (__mmask16) __U);
5825 extern __inline __m512i
5826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5827 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5829 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5830 (__v16si)
5831 _mm512_setzero_si512 (),
5832 (__mmask16) __U);
5835 extern __inline void
5836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837 _mm512_storeu_si512 (void *__P, __m512i __A)
5839 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5840 (__mmask16) -1);
5843 extern __inline void
5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5847 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5848 (__mmask16) __U);
5851 extern __inline __m512d
5852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5853 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5855 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5856 (__v8di) __C,
5857 (__v8df)
5858 _mm512_undefined_pd (),
5859 (__mmask8) -1);
5862 extern __inline __m512d
5863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5864 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5866 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5867 (__v8di) __C,
5868 (__v8df) __W,
5869 (__mmask8) __U);
5872 extern __inline __m512d
5873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5876 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5877 (__v8di) __C,
5878 (__v8df)
5879 _mm512_setzero_pd (),
5880 (__mmask8) __U);
5883 extern __inline __m512
5884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5885 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5887 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5888 (__v16si) __C,
5889 (__v16sf)
5890 _mm512_undefined_ps (),
5891 (__mmask16) -1);
5894 extern __inline __m512
5895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5896 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5898 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5899 (__v16si) __C,
5900 (__v16sf) __W,
5901 (__mmask16) __U);
5904 extern __inline __m512
5905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5906 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5908 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5909 (__v16si) __C,
5910 (__v16sf)
5911 _mm512_setzero_ps (),
5912 (__mmask16) __U);
5915 extern __inline __m512i
5916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5917 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5919 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5920 /* idx */ ,
5921 (__v8di) __A,
5922 (__v8di) __B,
5923 (__mmask8) -1);
5926 extern __inline __m512i
5927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5928 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5929 __m512i __B)
5931 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5932 /* idx */ ,
5933 (__v8di) __A,
5934 (__v8di) __B,
5935 (__mmask8) __U);
5938 extern __inline __m512i
5939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5941 __mmask8 __U, __m512i __B)
5943 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5944 (__v8di) __I
5945 /* idx */ ,
5946 (__v8di) __B,
5947 (__mmask8) __U);
5950 extern __inline __m512i
5951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5952 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5953 __m512i __I, __m512i __B)
5955 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5956 /* idx */ ,
5957 (__v8di) __A,
5958 (__v8di) __B,
5959 (__mmask8) __U);
5962 extern __inline __m512i
5963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5964 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5966 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5967 /* idx */ ,
5968 (__v16si) __A,
5969 (__v16si) __B,
5970 (__mmask16) -1);
5973 extern __inline __m512i
5974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5975 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5976 __m512i __I, __m512i __B)
5978 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5979 /* idx */ ,
5980 (__v16si) __A,
5981 (__v16si) __B,
5982 (__mmask16) __U);
5985 extern __inline __m512i
5986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5987 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5988 __mmask16 __U, __m512i __B)
5990 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5991 (__v16si) __I
5992 /* idx */ ,
5993 (__v16si) __B,
5994 (__mmask16) __U);
5997 extern __inline __m512i
5998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5999 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
6000 __m512i __I, __m512i __B)
6002 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
6003 /* idx */ ,
6004 (__v16si) __A,
6005 (__v16si) __B,
6006 (__mmask16) __U);
6009 extern __inline __m512d
6010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6011 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
6013 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6014 /* idx */ ,
6015 (__v8df) __A,
6016 (__v8df) __B,
6017 (__mmask8) -1);
6020 extern __inline __m512d
6021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6022 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6023 __m512d __B)
6025 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6026 /* idx */ ,
6027 (__v8df) __A,
6028 (__v8df) __B,
6029 (__mmask8) __U);
6032 extern __inline __m512d
6033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6034 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6035 __m512d __B)
6037 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6038 (__v8di) __I
6039 /* idx */ ,
6040 (__v8df) __B,
6041 (__mmask8) __U);
6044 extern __inline __m512d
6045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6046 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6047 __m512d __B)
6049 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6050 /* idx */ ,
6051 (__v8df) __A,
6052 (__v8df) __B,
6053 (__mmask8) __U);
6056 extern __inline __m512
6057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6058 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6060 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6061 /* idx */ ,
6062 (__v16sf) __A,
6063 (__v16sf) __B,
6064 (__mmask16) -1);
6067 extern __inline __m512
6068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6069 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6071 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6072 /* idx */ ,
6073 (__v16sf) __A,
6074 (__v16sf) __B,
6075 (__mmask16) __U);
6078 extern __inline __m512
6079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6080 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6081 __m512 __B)
6083 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6084 (__v16si) __I
6085 /* idx */ ,
6086 (__v16sf) __B,
6087 (__mmask16) __U);
6090 extern __inline __m512
6091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6092 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6093 __m512 __B)
6095 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6096 /* idx */ ,
6097 (__v16sf) __A,
6098 (__v16sf) __B,
6099 (__mmask16) __U);
6102 #ifdef __OPTIMIZE__
6103 extern __inline __m512d
6104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6105 _mm512_permute_pd (__m512d __X, const int __C)
6107 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6108 (__v8df)
6109 _mm512_undefined_pd (),
6110 (__mmask8) -1);
6113 extern __inline __m512d
6114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6117 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6118 (__v8df) __W,
6119 (__mmask8) __U);
6122 extern __inline __m512d
6123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6124 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6126 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6127 (__v8df)
6128 _mm512_setzero_pd (),
6129 (__mmask8) __U);
6132 extern __inline __m512
6133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6134 _mm512_permute_ps (__m512 __X, const int __C)
6136 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6137 (__v16sf)
6138 _mm512_undefined_ps (),
6139 (__mmask16) -1);
6142 extern __inline __m512
6143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6144 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6146 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6147 (__v16sf) __W,
6148 (__mmask16) __U);
6151 extern __inline __m512
6152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6153 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6155 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6156 (__v16sf)
6157 _mm512_setzero_ps (),
6158 (__mmask16) __U);
6160 #else
6161 #define _mm512_permute_pd(X, C) \
6162 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6163 (__v8df)(__m512d)_mm512_undefined_pd(),\
6164 (__mmask8)(-1)))
6166 #define _mm512_mask_permute_pd(W, U, X, C) \
6167 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6168 (__v8df)(__m512d)(W), \
6169 (__mmask8)(U)))
6171 #define _mm512_maskz_permute_pd(U, X, C) \
6172 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6173 (__v8df)(__m512d)_mm512_setzero_pd(), \
6174 (__mmask8)(U)))
6176 #define _mm512_permute_ps(X, C) \
6177 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6178 (__v16sf)(__m512)_mm512_undefined_ps(),\
6179 (__mmask16)(-1)))
6181 #define _mm512_mask_permute_ps(W, U, X, C) \
6182 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6183 (__v16sf)(__m512)(W), \
6184 (__mmask16)(U)))
6186 #define _mm512_maskz_permute_ps(U, X, C) \
6187 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6188 (__v16sf)(__m512)_mm512_setzero_ps(), \
6189 (__mmask16)(U)))
6190 #endif
6192 #ifdef __OPTIMIZE__
6193 extern __inline __m512i
6194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6195 _mm512_permutex_epi64 (__m512i __X, const int __I)
6197 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6198 (__v8di)
6199 _mm512_undefined_si512 (),
6200 (__mmask8) (-1));
6203 extern __inline __m512i
6204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6205 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6206 __m512i __X, const int __I)
6208 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6209 (__v8di) __W,
6210 (__mmask8) __M);
6213 extern __inline __m512i
6214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6215 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6217 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6218 (__v8di)
6219 _mm512_setzero_si512 (),
6220 (__mmask8) __M);
6223 extern __inline __m512d
6224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6225 _mm512_permutex_pd (__m512d __X, const int __M)
6227 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6228 (__v8df)
6229 _mm512_undefined_pd (),
6230 (__mmask8) -1);
6233 extern __inline __m512d
6234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6235 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6237 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6238 (__v8df) __W,
6239 (__mmask8) __U);
6242 extern __inline __m512d
6243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6246 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6247 (__v8df)
6248 _mm512_setzero_pd (),
6249 (__mmask8) __U);
6251 #else
6252 #define _mm512_permutex_pd(X, M) \
6253 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6254 (__v8df)(__m512d)_mm512_undefined_pd(),\
6255 (__mmask8)-1))
6257 #define _mm512_mask_permutex_pd(W, U, X, M) \
6258 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6259 (__v8df)(__m512d)(W), (__mmask8)(U)))
6261 #define _mm512_maskz_permutex_pd(U, X, M) \
6262 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6263 (__v8df)(__m512d)_mm512_setzero_pd(),\
6264 (__mmask8)(U)))
6266 #define _mm512_permutex_epi64(X, I) \
6267 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6268 (int)(I), \
6269 (__v8di)(__m512i) \
6270 (_mm512_undefined_si512 ()),\
6271 (__mmask8)(-1)))
6273 #define _mm512_maskz_permutex_epi64(M, X, I) \
6274 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6275 (int)(I), \
6276 (__v8di)(__m512i) \
6277 (_mm512_setzero_si512 ()),\
6278 (__mmask8)(M)))
6280 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6281 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6282 (int)(I), \
6283 (__v8di)(__m512i)(W), \
6284 (__mmask8)(M)))
6285 #endif
6287 extern __inline __m512i
6288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6289 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6291 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6292 (__v8di) __X,
6293 (__v8di)
6294 _mm512_setzero_si512 (),
6295 __M);
6298 extern __inline __m512i
6299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6300 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6302 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6303 (__v8di) __X,
6304 (__v8di)
6305 _mm512_undefined_si512 (),
6306 (__mmask8) -1);
6309 extern __inline __m512i
6310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6311 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6312 __m512i __Y)
6314 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6315 (__v8di) __X,
6316 (__v8di) __W,
6317 __M);
6320 extern __inline __m512i
6321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6322 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6324 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6325 (__v16si) __X,
6326 (__v16si)
6327 _mm512_setzero_si512 (),
6328 __M);
6331 extern __inline __m512i
6332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6333 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6335 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6336 (__v16si) __X,
6337 (__v16si)
6338 _mm512_undefined_si512 (),
6339 (__mmask16) -1);
6342 extern __inline __m512i
6343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6344 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6345 __m512i __Y)
6347 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6348 (__v16si) __X,
6349 (__v16si) __W,
6350 __M);
6353 extern __inline __m512d
6354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6355 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6357 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6358 (__v8di) __X,
6359 (__v8df)
6360 _mm512_undefined_pd (),
6361 (__mmask8) -1);
6364 extern __inline __m512d
6365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6366 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6368 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6369 (__v8di) __X,
6370 (__v8df) __W,
6371 (__mmask8) __U);
6374 extern __inline __m512d
6375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6376 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6378 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6379 (__v8di) __X,
6380 (__v8df)
6381 _mm512_setzero_pd (),
6382 (__mmask8) __U);
6385 extern __inline __m512
6386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6387 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6389 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6390 (__v16si) __X,
6391 (__v16sf)
6392 _mm512_undefined_ps (),
6393 (__mmask16) -1);
6396 extern __inline __m512
6397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6398 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6400 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6401 (__v16si) __X,
6402 (__v16sf) __W,
6403 (__mmask16) __U);
6406 extern __inline __m512
6407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6408 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6410 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6411 (__v16si) __X,
6412 (__v16sf)
6413 _mm512_setzero_ps (),
6414 (__mmask16) __U);
6417 #ifdef __OPTIMIZE__
6418 extern __inline __m512
6419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6420 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6422 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6423 (__v16sf) __V, __imm,
6424 (__v16sf)
6425 _mm512_undefined_ps (),
6426 (__mmask16) -1);
6429 extern __inline __m512
6430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6431 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6432 __m512 __V, const int __imm)
6434 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6435 (__v16sf) __V, __imm,
6436 (__v16sf) __W,
6437 (__mmask16) __U);
6440 extern __inline __m512
6441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6442 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6444 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6445 (__v16sf) __V, __imm,
6446 (__v16sf)
6447 _mm512_setzero_ps (),
6448 (__mmask16) __U);
6451 extern __inline __m512d
6452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6453 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6455 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6456 (__v8df) __V, __imm,
6457 (__v8df)
6458 _mm512_undefined_pd (),
6459 (__mmask8) -1);
6462 extern __inline __m512d
6463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6464 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6465 __m512d __V, const int __imm)
6467 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6468 (__v8df) __V, __imm,
6469 (__v8df) __W,
6470 (__mmask8) __U);
6473 extern __inline __m512d
6474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6475 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6476 const int __imm)
6478 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6479 (__v8df) __V, __imm,
6480 (__v8df)
6481 _mm512_setzero_pd (),
6482 (__mmask8) __U);
6485 extern __inline __m512d
6486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6487 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6488 const int __imm, const int __R)
6490 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6491 (__v8df) __B,
6492 (__v8di) __C,
6493 __imm,
6494 (__mmask8) -1, __R);
6497 extern __inline __m512d
6498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6499 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6500 __m512i __C, const int __imm, const int __R)
6502 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6503 (__v8df) __B,
6504 (__v8di) __C,
6505 __imm,
6506 (__mmask8) __U, __R);
6509 extern __inline __m512d
6510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6511 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6512 __m512i __C, const int __imm, const int __R)
6514 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6515 (__v8df) __B,
6516 (__v8di) __C,
6517 __imm,
6518 (__mmask8) __U, __R);
6521 extern __inline __m512
6522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6523 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6524 const int __imm, const int __R)
6526 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6527 (__v16sf) __B,
6528 (__v16si) __C,
6529 __imm,
6530 (__mmask16) -1, __R);
6533 extern __inline __m512
6534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6535 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6536 __m512i __C, const int __imm, const int __R)
6538 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6539 (__v16sf) __B,
6540 (__v16si) __C,
6541 __imm,
6542 (__mmask16) __U, __R);
6545 extern __inline __m512
6546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6547 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6548 __m512i __C, const int __imm, const int __R)
6550 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6551 (__v16sf) __B,
6552 (__v16si) __C,
6553 __imm,
6554 (__mmask16) __U, __R);
6557 extern __inline __m128d
6558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6559 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6560 const int __imm, const int __R)
6562 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6563 (__v2df) __B,
6564 (__v2di) __C, __imm,
6565 (__mmask8) -1, __R);
6568 extern __inline __m128d
6569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6570 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6571 __m128i __C, const int __imm, const int __R)
6573 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6574 (__v2df) __B,
6575 (__v2di) __C, __imm,
6576 (__mmask8) __U, __R);
6579 extern __inline __m128d
6580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6581 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6582 __m128i __C, const int __imm, const int __R)
6584 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6585 (__v2df) __B,
6586 (__v2di) __C,
6587 __imm,
6588 (__mmask8) __U, __R);
6591 extern __inline __m128
6592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6593 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6594 const int __imm, const int __R)
6596 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6597 (__v4sf) __B,
6598 (__v4si) __C, __imm,
6599 (__mmask8) -1, __R);
6602 extern __inline __m128
6603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6604 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6605 __m128i __C, const int __imm, const int __R)
6607 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6608 (__v4sf) __B,
6609 (__v4si) __C, __imm,
6610 (__mmask8) __U, __R);
6613 extern __inline __m128
6614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6616 __m128i __C, const int __imm, const int __R)
6618 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6619 (__v4sf) __B,
6620 (__v4si) __C, __imm,
6621 (__mmask8) __U, __R);
6624 #else
6625 #define _mm512_shuffle_pd(X, Y, C) \
6626 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6627 (__v8df)(__m512d)(Y), (int)(C),\
6628 (__v8df)(__m512d)_mm512_undefined_pd(),\
6629 (__mmask8)-1))
6631 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6632 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6633 (__v8df)(__m512d)(Y), (int)(C),\
6634 (__v8df)(__m512d)(W),\
6635 (__mmask8)(U)))
6637 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6638 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6639 (__v8df)(__m512d)(Y), (int)(C),\
6640 (__v8df)(__m512d)_mm512_setzero_pd(),\
6641 (__mmask8)(U)))
6643 #define _mm512_shuffle_ps(X, Y, C) \
6644 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6645 (__v16sf)(__m512)(Y), (int)(C),\
6646 (__v16sf)(__m512)_mm512_undefined_ps(),\
6647 (__mmask16)-1))
6649 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6650 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6651 (__v16sf)(__m512)(Y), (int)(C),\
6652 (__v16sf)(__m512)(W),\
6653 (__mmask16)(U)))
6655 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6656 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6657 (__v16sf)(__m512)(Y), (int)(C),\
6658 (__v16sf)(__m512)_mm512_setzero_ps(),\
6659 (__mmask16)(U)))
6661 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6662 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6663 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6664 (__mmask8)(-1), (R)))
6666 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6667 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6668 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6669 (__mmask8)(U), (R)))
6671 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6672 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6673 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6674 (__mmask8)(U), (R)))
6676 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6677 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6678 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6679 (__mmask16)(-1), (R)))
6681 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6682 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6683 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6684 (__mmask16)(U), (R)))
6686 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6687 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6688 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6689 (__mmask16)(U), (R)))
6691 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6692 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6693 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6694 (__mmask8)(-1), (R)))
6696 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6697 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6698 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6699 (__mmask8)(U), (R)))
6701 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6702 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6703 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6704 (__mmask8)(U), (R)))
6706 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6707 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6708 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6709 (__mmask8)(-1), (R)))
6711 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6712 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6713 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6714 (__mmask8)(U), (R)))
6716 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6717 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6718 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6719 (__mmask8)(U), (R)))
6720 #endif
6722 extern __inline __m512
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm512_movehdup_ps (__m512 __A)
6726 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6727 (__v16sf)
6728 _mm512_undefined_ps (),
6729 (__mmask16) -1);
6732 extern __inline __m512
6733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6736 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6737 (__v16sf) __W,
6738 (__mmask16) __U);
6741 extern __inline __m512
6742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6743 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6745 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6746 (__v16sf)
6747 _mm512_setzero_ps (),
6748 (__mmask16) __U);
6751 extern __inline __m512
6752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753 _mm512_moveldup_ps (__m512 __A)
6755 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6756 (__v16sf)
6757 _mm512_undefined_ps (),
6758 (__mmask16) -1);
6761 extern __inline __m512
6762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6765 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6766 (__v16sf) __W,
6767 (__mmask16) __U);
6770 extern __inline __m512
6771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6772 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6774 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6775 (__v16sf)
6776 _mm512_setzero_ps (),
6777 (__mmask16) __U);
6780 extern __inline __m512i
6781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6782 _mm512_or_si512 (__m512i __A, __m512i __B)
6784 return (__m512i) ((__v16su) __A | (__v16su) __B);
6787 extern __inline __m512i
6788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6789 _mm512_or_epi32 (__m512i __A, __m512i __B)
6791 return (__m512i) ((__v16su) __A | (__v16su) __B);
6794 extern __inline __m512i
6795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6796 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6798 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6799 (__v16si) __B,
6800 (__v16si) __W,
6801 (__mmask16) __U);
6804 extern __inline __m512i
6805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6806 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6808 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6809 (__v16si) __B,
6810 (__v16si)
6811 _mm512_setzero_si512 (),
6812 (__mmask16) __U);
6815 extern __inline __m512i
6816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6817 _mm512_or_epi64 (__m512i __A, __m512i __B)
6819 return (__m512i) ((__v8du) __A | (__v8du) __B);
6822 extern __inline __m512i
6823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6824 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6826 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6827 (__v8di) __B,
6828 (__v8di) __W,
6829 (__mmask8) __U);
6832 extern __inline __m512i
6833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6836 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6837 (__v8di) __B,
6838 (__v8di)
6839 _mm512_setzero_si512 (),
6840 (__mmask8) __U);
6843 extern __inline __m512i
6844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 _mm512_xor_si512 (__m512i __A, __m512i __B)
6847 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6850 extern __inline __m512i
6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6852 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6854 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6857 extern __inline __m512i
6858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6859 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6861 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6862 (__v16si) __B,
6863 (__v16si) __W,
6864 (__mmask16) __U);
6867 extern __inline __m512i
6868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6869 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6871 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6872 (__v16si) __B,
6873 (__v16si)
6874 _mm512_setzero_si512 (),
6875 (__mmask16) __U);
6878 extern __inline __m512i
6879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6880 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6882 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6885 extern __inline __m512i
6886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6887 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6889 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6890 (__v8di) __B,
6891 (__v8di) __W,
6892 (__mmask8) __U);
6895 extern __inline __m512i
6896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6897 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6899 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6900 (__v8di) __B,
6901 (__v8di)
6902 _mm512_setzero_si512 (),
6903 (__mmask8) __U);
6906 #ifdef __OPTIMIZE__
6907 extern __inline __m512i
6908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909 _mm512_rol_epi32 (__m512i __A, const int __B)
6911 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6912 (__v16si)
6913 _mm512_undefined_si512 (),
6914 (__mmask16) -1);
6917 extern __inline __m512i
6918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6921 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6922 (__v16si) __W,
6923 (__mmask16) __U);
6926 extern __inline __m512i
6927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6928 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6930 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6931 (__v16si)
6932 _mm512_setzero_si512 (),
6933 (__mmask16) __U);
6936 extern __inline __m512i
6937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6938 _mm512_ror_epi32 (__m512i __A, int __B)
6940 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6941 (__v16si)
6942 _mm512_undefined_si512 (),
6943 (__mmask16) -1);
6946 extern __inline __m512i
6947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6948 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6950 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6951 (__v16si) __W,
6952 (__mmask16) __U);
6955 extern __inline __m512i
6956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6957 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6959 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6960 (__v16si)
6961 _mm512_setzero_si512 (),
6962 (__mmask16) __U);
6965 extern __inline __m512i
6966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967 _mm512_rol_epi64 (__m512i __A, const int __B)
6969 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6970 (__v8di)
6971 _mm512_undefined_si512 (),
6972 (__mmask8) -1);
6975 extern __inline __m512i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6979 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6980 (__v8di) __W,
6981 (__mmask8) __U);
6984 extern __inline __m512i
6985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6986 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6988 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6989 (__v8di)
6990 _mm512_setzero_si512 (),
6991 (__mmask8) __U);
6994 extern __inline __m512i
6995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6996 _mm512_ror_epi64 (__m512i __A, int __B)
6998 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6999 (__v8di)
7000 _mm512_undefined_si512 (),
7001 (__mmask8) -1);
7004 extern __inline __m512i
7005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7006 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
7008 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7009 (__v8di) __W,
7010 (__mmask8) __U);
7013 extern __inline __m512i
7014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7015 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
7017 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
7018 (__v8di)
7019 _mm512_setzero_si512 (),
7020 (__mmask8) __U);
7023 #else
7024 #define _mm512_rol_epi32(A, B) \
7025 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7026 (int)(B), \
7027 (__v16si)_mm512_undefined_si512 (), \
7028 (__mmask16)(-1)))
7029 #define _mm512_mask_rol_epi32(W, U, A, B) \
7030 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7031 (int)(B), \
7032 (__v16si)(__m512i)(W), \
7033 (__mmask16)(U)))
7034 #define _mm512_maskz_rol_epi32(U, A, B) \
7035 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7036 (int)(B), \
7037 (__v16si)_mm512_setzero_si512 (), \
7038 (__mmask16)(U)))
7039 #define _mm512_ror_epi32(A, B) \
7040 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7041 (int)(B), \
7042 (__v16si)_mm512_undefined_si512 (), \
7043 (__mmask16)(-1)))
7044 #define _mm512_mask_ror_epi32(W, U, A, B) \
7045 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7046 (int)(B), \
7047 (__v16si)(__m512i)(W), \
7048 (__mmask16)(U)))
7049 #define _mm512_maskz_ror_epi32(U, A, B) \
7050 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7051 (int)(B), \
7052 (__v16si)_mm512_setzero_si512 (), \
7053 (__mmask16)(U)))
7054 #define _mm512_rol_epi64(A, B) \
7055 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7056 (int)(B), \
7057 (__v8di)_mm512_undefined_si512 (), \
7058 (__mmask8)(-1)))
7059 #define _mm512_mask_rol_epi64(W, U, A, B) \
7060 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7061 (int)(B), \
7062 (__v8di)(__m512i)(W), \
7063 (__mmask8)(U)))
7064 #define _mm512_maskz_rol_epi64(U, A, B) \
7065 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7066 (int)(B), \
7067 (__v8di)_mm512_setzero_si512 (), \
7068 (__mmask8)(U)))
7070 #define _mm512_ror_epi64(A, B) \
7071 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7072 (int)(B), \
7073 (__v8di)_mm512_undefined_si512 (), \
7074 (__mmask8)(-1)))
7075 #define _mm512_mask_ror_epi64(W, U, A, B) \
7076 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7077 (int)(B), \
7078 (__v8di)(__m512i)(W), \
7079 (__mmask8)(U)))
7080 #define _mm512_maskz_ror_epi64(U, A, B) \
7081 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7082 (int)(B), \
7083 (__v8di)_mm512_setzero_si512 (), \
7084 (__mmask8)(U)))
7085 #endif
7087 extern __inline __m512i
7088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7089 _mm512_and_si512 (__m512i __A, __m512i __B)
7091 return (__m512i) ((__v16su) __A & (__v16su) __B);
7094 extern __inline __m512i
7095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7096 _mm512_and_epi32 (__m512i __A, __m512i __B)
7098 return (__m512i) ((__v16su) __A & (__v16su) __B);
7101 extern __inline __m512i
7102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7103 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7105 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7106 (__v16si) __B,
7107 (__v16si) __W,
7108 (__mmask16) __U);
7111 extern __inline __m512i
7112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7113 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7115 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7116 (__v16si) __B,
7117 (__v16si)
7118 _mm512_setzero_si512 (),
7119 (__mmask16) __U);
7122 extern __inline __m512i
7123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7124 _mm512_and_epi64 (__m512i __A, __m512i __B)
7126 return (__m512i) ((__v8du) __A & (__v8du) __B);
7129 extern __inline __m512i
7130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7133 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7134 (__v8di) __B,
7135 (__v8di) __W, __U);
7138 extern __inline __m512i
7139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7140 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7142 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7143 (__v8di) __B,
7144 (__v8di)
7145 _mm512_setzero_pd (),
7146 __U);
7149 extern __inline __m512i
7150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7151 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7153 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7154 (__v16si) __B,
7155 (__v16si)
7156 _mm512_undefined_si512 (),
7157 (__mmask16) -1);
7160 extern __inline __m512i
7161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7162 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7164 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7165 (__v16si) __B,
7166 (__v16si)
7167 _mm512_undefined_si512 (),
7168 (__mmask16) -1);
7171 extern __inline __m512i
7172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7173 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7175 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7176 (__v16si) __B,
7177 (__v16si) __W,
7178 (__mmask16) __U);
7181 extern __inline __m512i
7182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7183 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7185 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7186 (__v16si) __B,
7187 (__v16si)
7188 _mm512_setzero_si512 (),
7189 (__mmask16) __U);
7192 extern __inline __m512i
7193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7194 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7196 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7197 (__v8di) __B,
7198 (__v8di)
7199 _mm512_undefined_si512 (),
7200 (__mmask8) -1);
7203 extern __inline __m512i
7204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7205 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7207 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7208 (__v8di) __B,
7209 (__v8di) __W, __U);
7212 extern __inline __m512i
7213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7214 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7216 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7217 (__v8di) __B,
7218 (__v8di)
7219 _mm512_setzero_pd (),
7220 __U);
7223 extern __inline __mmask16
7224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7225 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7227 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7228 (__v16si) __B,
7229 (__mmask16) -1);
7232 extern __inline __mmask16
7233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7234 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7236 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7237 (__v16si) __B, __U);
7240 extern __inline __mmask8
7241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7242 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7244 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7245 (__v8di) __B,
7246 (__mmask8) -1);
7249 extern __inline __mmask8
7250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7251 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7253 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7256 extern __inline __mmask16
7257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7258 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7260 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7261 (__v16si) __B,
7262 (__mmask16) -1);
7265 extern __inline __mmask16
7266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7267 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7269 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7270 (__v16si) __B, __U);
7273 extern __inline __mmask8
7274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7275 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7277 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7278 (__v8di) __B,
7279 (__mmask8) -1);
7282 extern __inline __mmask8
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7286 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7287 (__v8di) __B, __U);
7290 extern __inline __m512i
7291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7292 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7294 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7295 (__v16si) __B,
7296 (__v16si)
7297 _mm512_undefined_si512 (),
7298 (__mmask16) -1);
7301 extern __inline __m512i
7302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7303 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7304 __m512i __B)
7306 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7307 (__v16si) __B,
7308 (__v16si) __W,
7309 (__mmask16) __U);
7312 extern __inline __m512i
7313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7314 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7316 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7317 (__v16si) __B,
7318 (__v16si)
7319 _mm512_setzero_si512 (),
7320 (__mmask16) __U);
7323 extern __inline __m512i
7324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7325 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7327 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7328 (__v8di) __B,
7329 (__v8di)
7330 _mm512_undefined_si512 (),
7331 (__mmask8) -1);
7334 extern __inline __m512i
7335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7338 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7339 (__v8di) __B,
7340 (__v8di) __W,
7341 (__mmask8) __U);
7344 extern __inline __m512i
7345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7346 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7348 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7349 (__v8di) __B,
7350 (__v8di)
7351 _mm512_setzero_si512 (),
7352 (__mmask8) __U);
7355 extern __inline __m512i
7356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7357 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7359 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7360 (__v16si) __B,
7361 (__v16si)
7362 _mm512_undefined_si512 (),
7363 (__mmask16) -1);
7366 extern __inline __m512i
7367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7368 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7369 __m512i __B)
7371 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7372 (__v16si) __B,
7373 (__v16si) __W,
7374 (__mmask16) __U);
7377 extern __inline __m512i
7378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7379 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7381 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7382 (__v16si) __B,
7383 (__v16si)
7384 _mm512_setzero_si512 (),
7385 (__mmask16) __U);
7388 extern __inline __m512i
7389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7392 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7393 (__v8di) __B,
7394 (__v8di)
7395 _mm512_undefined_si512 (),
7396 (__mmask8) -1);
7399 extern __inline __m512i
7400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7401 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7403 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7404 (__v8di) __B,
7405 (__v8di) __W,
7406 (__mmask8) __U);
7409 extern __inline __m512i
7410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7411 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7413 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7414 (__v8di) __B,
7415 (__v8di)
7416 _mm512_setzero_si512 (),
7417 (__mmask8) __U);
7420 #ifdef __x86_64__
7421 #ifdef __OPTIMIZE__
7422 extern __inline unsigned long long
7423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7424 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7426 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7429 extern __inline long long
7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7433 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7436 extern __inline long long
7437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7438 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7440 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7443 extern __inline unsigned long long
7444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7445 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7447 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7450 extern __inline long long
7451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7452 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7454 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7457 extern __inline long long
7458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7459 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7461 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7463 #else
7464 #define _mm_cvt_roundss_u64(A, B) \
7465 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7467 #define _mm_cvt_roundss_si64(A, B) \
7468 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7470 #define _mm_cvt_roundss_i64(A, B) \
7471 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7473 #define _mm_cvtt_roundss_u64(A, B) \
7474 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7476 #define _mm_cvtt_roundss_i64(A, B) \
7477 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7479 #define _mm_cvtt_roundss_si64(A, B) \
7480 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7481 #endif
7482 #endif
7484 #ifdef __OPTIMIZE__
7485 extern __inline unsigned
7486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7487 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7489 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7492 extern __inline int
7493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7494 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7496 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7499 extern __inline int
7500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7503 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7506 extern __inline unsigned
7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7510 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7513 extern __inline int
7514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7515 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7517 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7520 extern __inline int
7521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7522 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7524 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7526 #else
7527 #define _mm_cvt_roundss_u32(A, B) \
7528 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7530 #define _mm_cvt_roundss_si32(A, B) \
7531 ((int)__builtin_ia32_vcvtss2si32(A, B))
7533 #define _mm_cvt_roundss_i32(A, B) \
7534 ((int)__builtin_ia32_vcvtss2si32(A, B))
7536 #define _mm_cvtt_roundss_u32(A, B) \
7537 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7539 #define _mm_cvtt_roundss_si32(A, B) \
7540 ((int)__builtin_ia32_vcvttss2si32(A, B))
7542 #define _mm_cvtt_roundss_i32(A, B) \
7543 ((int)__builtin_ia32_vcvttss2si32(A, B))
7544 #endif
7546 #ifdef __x86_64__
7547 #ifdef __OPTIMIZE__
7548 extern __inline unsigned long long
7549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7550 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7552 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7555 extern __inline long long
7556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7557 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7559 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7562 extern __inline long long
7563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7564 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7566 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7569 extern __inline unsigned long long
7570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7571 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7573 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7576 extern __inline long long
7577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7578 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7580 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7583 extern __inline long long
7584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7585 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7587 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7589 #else
7590 #define _mm_cvt_roundsd_u64(A, B) \
7591 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7593 #define _mm_cvt_roundsd_si64(A, B) \
7594 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7596 #define _mm_cvt_roundsd_i64(A, B) \
7597 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7599 #define _mm_cvtt_roundsd_u64(A, B) \
7600 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7602 #define _mm_cvtt_roundsd_si64(A, B) \
7603 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7605 #define _mm_cvtt_roundsd_i64(A, B) \
7606 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7607 #endif
7608 #endif
7610 #ifdef __OPTIMIZE__
7611 extern __inline unsigned
7612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7613 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7615 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7618 extern __inline int
7619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7620 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7622 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7625 extern __inline int
7626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7627 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7629 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7632 extern __inline unsigned
7633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7634 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7636 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7639 extern __inline int
7640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7641 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7643 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7646 extern __inline int
7647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7648 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7650 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7652 #else
7653 #define _mm_cvt_roundsd_u32(A, B) \
7654 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7656 #define _mm_cvt_roundsd_si32(A, B) \
7657 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7659 #define _mm_cvt_roundsd_i32(A, B) \
7660 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7662 #define _mm_cvtt_roundsd_u32(A, B) \
7663 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7665 #define _mm_cvtt_roundsd_si32(A, B) \
7666 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7668 #define _mm_cvtt_roundsd_i32(A, B) \
7669 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7670 #endif
7672 extern __inline __m512d
7673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674 _mm512_movedup_pd (__m512d __A)
7676 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7677 (__v8df)
7678 _mm512_undefined_pd (),
7679 (__mmask8) -1);
7682 extern __inline __m512d
7683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7686 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7687 (__v8df) __W,
7688 (__mmask8) __U);
7691 extern __inline __m512d
7692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7693 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7695 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7696 (__v8df)
7697 _mm512_setzero_pd (),
7698 (__mmask8) __U);
7701 extern __inline __m512d
7702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7703 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7705 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7706 (__v8df) __B,
7707 (__v8df)
7708 _mm512_undefined_pd (),
7709 (__mmask8) -1);
7712 extern __inline __m512d
7713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7714 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7716 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7717 (__v8df) __B,
7718 (__v8df) __W,
7719 (__mmask8) __U);
7722 extern __inline __m512d
7723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7724 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7726 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7727 (__v8df) __B,
7728 (__v8df)
7729 _mm512_setzero_pd (),
7730 (__mmask8) __U);
7733 extern __inline __m512d
7734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7735 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7737 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7738 (__v8df) __B,
7739 (__v8df)
7740 _mm512_undefined_pd (),
7741 (__mmask8) -1);
7744 extern __inline __m512d
7745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7746 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7748 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7749 (__v8df) __B,
7750 (__v8df) __W,
7751 (__mmask8) __U);
7754 extern __inline __m512d
7755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7756 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7758 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7759 (__v8df) __B,
7760 (__v8df)
7761 _mm512_setzero_pd (),
7762 (__mmask8) __U);
7765 extern __inline __m512
7766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7767 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7769 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7770 (__v16sf) __B,
7771 (__v16sf)
7772 _mm512_undefined_ps (),
7773 (__mmask16) -1);
7776 extern __inline __m512
7777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7778 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7780 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7781 (__v16sf) __B,
7782 (__v16sf) __W,
7783 (__mmask16) __U);
7786 extern __inline __m512
7787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7788 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7790 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7791 (__v16sf) __B,
7792 (__v16sf)
7793 _mm512_setzero_ps (),
7794 (__mmask16) __U);
7797 #ifdef __OPTIMIZE__
7798 extern __inline __m512d
7799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7800 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7802 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7803 (__v8df)
7804 _mm512_undefined_pd (),
7805 (__mmask8) -1, __R);
7808 extern __inline __m512d
7809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7810 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7811 const int __R)
7813 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7814 (__v8df) __W,
7815 (__mmask8) __U, __R);
7818 extern __inline __m512d
7819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7820 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7822 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7823 (__v8df)
7824 _mm512_setzero_pd (),
7825 (__mmask8) __U, __R);
7828 extern __inline __m512
7829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7832 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7833 (__v16sf)
7834 _mm512_undefined_ps (),
7835 (__mmask16) -1, __R);
7838 extern __inline __m512
7839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7841 const int __R)
7843 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7844 (__v16sf) __W,
7845 (__mmask16) __U, __R);
7848 extern __inline __m512
7849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7850 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7852 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7853 (__v16sf)
7854 _mm512_setzero_ps (),
7855 (__mmask16) __U, __R);
7858 extern __inline __m256i
7859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7860 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7862 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7863 __I,
7864 (__v16hi)
7865 _mm256_undefined_si256 (),
7866 -1);
7869 extern __inline __m256i
7870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7871 _mm512_cvtps_ph (__m512 __A, const int __I)
7873 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7874 __I,
7875 (__v16hi)
7876 _mm256_undefined_si256 (),
7877 -1);
7880 extern __inline __m256i
7881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7882 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7883 const int __I)
7885 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7886 __I,
7887 (__v16hi) __U,
7888 (__mmask16) __W);
7891 extern __inline __m256i
7892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7893 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7895 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7896 __I,
7897 (__v16hi) __U,
7898 (__mmask16) __W);
7901 extern __inline __m256i
7902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7903 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7905 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7906 __I,
7907 (__v16hi)
7908 _mm256_setzero_si256 (),
7909 (__mmask16) __W);
7912 extern __inline __m256i
7913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7914 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7916 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7917 __I,
7918 (__v16hi)
7919 _mm256_setzero_si256 (),
7920 (__mmask16) __W);
7922 #else
7923 #define _mm512_cvt_roundps_pd(A, B) \
7924 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7926 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7927 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7929 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7930 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7932 #define _mm512_cvt_roundph_ps(A, B) \
7933 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7935 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7936 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7938 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7939 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7941 #define _mm512_cvt_roundps_ph(A, I) \
7942 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7943 (__v16hi)_mm256_undefined_si256 (), -1))
7944 #define _mm512_cvtps_ph(A, I) \
7945 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7946 (__v16hi)_mm256_undefined_si256 (), -1))
7947 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7948 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7949 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7950 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7951 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7952 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7953 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7954 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7955 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7956 #define _mm512_maskz_cvtps_ph(W, A, I) \
7957 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7958 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7959 #endif
7961 #ifdef __OPTIMIZE__
7962 extern __inline __m256
7963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7964 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7966 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7967 (__v8sf)
7968 _mm256_undefined_ps (),
7969 (__mmask8) -1, __R);
7972 extern __inline __m256
7973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7974 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7975 const int __R)
7977 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7978 (__v8sf) __W,
7979 (__mmask8) __U, __R);
7982 extern __inline __m256
7983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7984 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7986 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7987 (__v8sf)
7988 _mm256_setzero_ps (),
7989 (__mmask8) __U, __R);
7992 extern __inline __m128
7993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7994 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7996 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7997 (__v2df) __B,
7998 __R);
8001 extern __inline __m128d
8002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8003 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
8005 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
8006 (__v4sf) __B,
8007 __R);
8009 #else
8010 #define _mm512_cvt_roundpd_ps(A, B) \
8011 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
8013 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
8014 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
8016 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
8017 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8019 #define _mm_cvt_roundsd_ss(A, B, C) \
8020 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8022 #define _mm_cvt_roundss_sd(A, B, C) \
8023 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8024 #endif
8026 extern __inline void
8027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8028 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8030 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8033 extern __inline void
8034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8035 _mm512_stream_ps (float *__P, __m512 __A)
8037 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8040 extern __inline void
8041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8042 _mm512_stream_pd (double *__P, __m512d __A)
8044 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8047 extern __inline __m512i
8048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049 _mm512_stream_load_si512 (void *__P)
8051 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8054 /* Constants for mantissa extraction */
8055 typedef enum
8057 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8058 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8059 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8060 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8061 } _MM_MANTISSA_NORM_ENUM;
8063 typedef enum
8065 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8066 _MM_MANT_SIGN_zero, /* sign = 0 */
8067 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8068 } _MM_MANTISSA_SIGN_ENUM;
8070 #ifdef __OPTIMIZE__
8071 extern __inline __m128
8072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8073 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8075 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8076 (__v4sf) __B,
8077 __R);
8080 extern __inline __m128d
8081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8084 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8085 (__v2df) __B,
8086 __R);
8089 extern __inline __m512
8090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8091 _mm512_getexp_round_ps (__m512 __A, const int __R)
8093 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8094 (__v16sf)
8095 _mm512_undefined_ps (),
8096 (__mmask16) -1, __R);
8099 extern __inline __m512
8100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8101 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8102 const int __R)
8104 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8105 (__v16sf) __W,
8106 (__mmask16) __U, __R);
8109 extern __inline __m512
8110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8111 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8113 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8114 (__v16sf)
8115 _mm512_setzero_ps (),
8116 (__mmask16) __U, __R);
8119 extern __inline __m512d
8120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8121 _mm512_getexp_round_pd (__m512d __A, const int __R)
8123 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8124 (__v8df)
8125 _mm512_undefined_pd (),
8126 (__mmask8) -1, __R);
8129 extern __inline __m512d
8130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8131 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8132 const int __R)
8134 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8135 (__v8df) __W,
8136 (__mmask8) __U, __R);
8139 extern __inline __m512d
8140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8141 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8143 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8144 (__v8df)
8145 _mm512_setzero_pd (),
8146 (__mmask8) __U, __R);
8149 extern __inline __m512d
8150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8151 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8152 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8154 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8155 (__C << 2) | __B,
8156 _mm512_undefined_pd (),
8157 (__mmask8) -1, __R);
8160 extern __inline __m512d
8161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8162 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8163 _MM_MANTISSA_NORM_ENUM __B,
8164 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8166 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8167 (__C << 2) | __B,
8168 (__v8df) __W, __U,
8169 __R);
8172 extern __inline __m512d
8173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8174 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8175 _MM_MANTISSA_NORM_ENUM __B,
8176 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8178 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8179 (__C << 2) | __B,
8180 (__v8df)
8181 _mm512_setzero_pd (),
8182 __U, __R);
8185 extern __inline __m512
8186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8187 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8188 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8190 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8191 (__C << 2) | __B,
8192 _mm512_undefined_ps (),
8193 (__mmask16) -1, __R);
8196 extern __inline __m512
8197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8198 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8199 _MM_MANTISSA_NORM_ENUM __B,
8200 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8202 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8203 (__C << 2) | __B,
8204 (__v16sf) __W, __U,
8205 __R);
8208 extern __inline __m512
8209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8210 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8211 _MM_MANTISSA_NORM_ENUM __B,
8212 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8214 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8215 (__C << 2) | __B,
8216 (__v16sf)
8217 _mm512_setzero_ps (),
8218 __U, __R);
8221 extern __inline __m128d
8222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8223 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8224 _MM_MANTISSA_NORM_ENUM __C,
8225 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8227 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8228 (__v2df) __B,
8229 (__D << 2) | __C,
8230 __R);
8233 extern __inline __m128
8234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8235 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8236 _MM_MANTISSA_NORM_ENUM __C,
8237 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8239 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8240 (__v4sf) __B,
8241 (__D << 2) | __C,
8242 __R);
8245 #else
8246 #define _mm512_getmant_round_pd(X, B, C, R) \
8247 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8248 (int)(((C)<<2) | (B)), \
8249 (__v8df)(__m512d)_mm512_undefined_pd(), \
8250 (__mmask8)-1,\
8251 (R)))
8253 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8254 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8255 (int)(((C)<<2) | (B)), \
8256 (__v8df)(__m512d)(W), \
8257 (__mmask8)(U),\
8258 (R)))
8260 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8261 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8262 (int)(((C)<<2) | (B)), \
8263 (__v8df)(__m512d)_mm512_setzero_pd(), \
8264 (__mmask8)(U),\
8265 (R)))
8266 #define _mm512_getmant_round_ps(X, B, C, R) \
8267 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8268 (int)(((C)<<2) | (B)), \
8269 (__v16sf)(__m512)_mm512_undefined_ps(), \
8270 (__mmask16)-1,\
8271 (R)))
8273 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8274 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8275 (int)(((C)<<2) | (B)), \
8276 (__v16sf)(__m512)(W), \
8277 (__mmask16)(U),\
8278 (R)))
8280 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8281 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8282 (int)(((C)<<2) | (B)), \
8283 (__v16sf)(__m512)_mm512_setzero_ps(), \
8284 (__mmask16)(U),\
8285 (R)))
8286 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8287 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8288 (__v2df)(__m128d)(Y), \
8289 (int)(((D)<<2) | (C)), \
8290 (R)))
8292 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8293 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8294 (__v4sf)(__m128)(Y), \
8295 (int)(((D)<<2) | (C)), \
8296 (R)))
8298 #define _mm_getexp_round_ss(A, B, R) \
8299 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8301 #define _mm_getexp_round_sd(A, B, R) \
8302 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8304 #define _mm512_getexp_round_ps(A, R) \
8305 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8306 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8308 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8309 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8310 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8312 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8313 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8314 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8316 #define _mm512_getexp_round_pd(A, R) \
8317 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8318 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8320 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8321 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8322 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8324 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8325 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8326 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8327 #endif
8329 #ifdef __OPTIMIZE__
8330 extern __inline __m512
8331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8332 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8334 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8335 (__v16sf)
8336 _mm512_undefined_ps (),
8337 -1, __R);
8340 extern __inline __m512
8341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8342 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8343 const int __imm, const int __R)
8345 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8346 (__v16sf) __A,
8347 (__mmask16) __B, __R);
8350 extern __inline __m512
8351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8352 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8353 const int __imm, const int __R)
8355 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8356 __imm,
8357 (__v16sf)
8358 _mm512_setzero_ps (),
8359 (__mmask16) __A, __R);
8362 extern __inline __m512d
8363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8364 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8366 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8367 (__v8df)
8368 _mm512_undefined_pd (),
8369 -1, __R);
8372 extern __inline __m512d
8373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8374 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8375 __m512d __C, const int __imm, const int __R)
8377 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8378 (__v8df) __A,
8379 (__mmask8) __B, __R);
8382 extern __inline __m512d
8383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8384 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8385 const int __imm, const int __R)
8387 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8388 __imm,
8389 (__v8df)
8390 _mm512_setzero_pd (),
8391 (__mmask8) __A, __R);
8394 extern __inline __m128
8395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8396 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8398 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8399 (__v4sf) __B, __imm, __R);
8402 extern __inline __m128d
8403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8404 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8405 const int __R)
8407 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8408 (__v2df) __B, __imm, __R);
8411 #else
8412 #define _mm512_roundscale_round_ps(A, B, R) \
8413 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8414 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8415 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8416 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8417 (int)(D), \
8418 (__v16sf)(__m512)(A), \
8419 (__mmask16)(B), R))
8420 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8421 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8422 (int)(C), \
8423 (__v16sf)_mm512_setzero_ps(),\
8424 (__mmask16)(A), R))
8425 #define _mm512_roundscale_round_pd(A, B, R) \
8426 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8427 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8428 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8429 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8430 (int)(D), \
8431 (__v8df)(__m512d)(A), \
8432 (__mmask8)(B), R))
8433 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8434 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8435 (int)(C), \
8436 (__v8df)_mm512_setzero_pd(),\
8437 (__mmask8)(A), R))
8438 #define _mm_roundscale_round_ss(A, B, C, R) \
8439 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8440 (__v4sf)(__m128)(B), (int)(C), R))
8441 #define _mm_roundscale_round_sd(A, B, C, R) \
8442 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8443 (__v2df)(__m128d)(B), (int)(C), R))
8444 #endif
8446 extern __inline __m512
8447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8448 _mm512_floor_ps (__m512 __A)
8450 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8451 _MM_FROUND_FLOOR,
8452 (__v16sf) __A, -1,
8453 _MM_FROUND_CUR_DIRECTION);
8456 extern __inline __m512d
8457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8458 _mm512_floor_pd (__m512d __A)
8460 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8461 _MM_FROUND_FLOOR,
8462 (__v8df) __A, -1,
8463 _MM_FROUND_CUR_DIRECTION);
8466 extern __inline __m512
8467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8468 _mm512_ceil_ps (__m512 __A)
8470 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8471 _MM_FROUND_CEIL,
8472 (__v16sf) __A, -1,
8473 _MM_FROUND_CUR_DIRECTION);
8476 extern __inline __m512d
8477 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8478 _mm512_ceil_pd (__m512d __A)
8480 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8481 _MM_FROUND_CEIL,
8482 (__v8df) __A, -1,
8483 _MM_FROUND_CUR_DIRECTION);
8486 extern __inline __m512
8487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8488 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8490 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8491 _MM_FROUND_FLOOR,
8492 (__v16sf) __W, __U,
8493 _MM_FROUND_CUR_DIRECTION);
8496 extern __inline __m512d
8497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8498 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8500 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8501 _MM_FROUND_FLOOR,
8502 (__v8df) __W, __U,
8503 _MM_FROUND_CUR_DIRECTION);
8506 extern __inline __m512
8507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8508 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8510 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8511 _MM_FROUND_CEIL,
8512 (__v16sf) __W, __U,
8513 _MM_FROUND_CUR_DIRECTION);
8516 extern __inline __m512d
8517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8518 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8520 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8521 _MM_FROUND_CEIL,
8522 (__v8df) __W, __U,
8523 _MM_FROUND_CUR_DIRECTION);
8526 #ifdef __OPTIMIZE__
8527 extern __inline __m512i
8528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8529 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8531 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8532 (__v16si) __B, __imm,
8533 (__v16si)
8534 _mm512_undefined_si512 (),
8535 (__mmask16) -1);
8538 extern __inline __m512i
8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8541 __m512i __B, const int __imm)
8543 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8544 (__v16si) __B, __imm,
8545 (__v16si) __W,
8546 (__mmask16) __U);
8549 extern __inline __m512i
8550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8551 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8552 const int __imm)
8554 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8555 (__v16si) __B, __imm,
8556 (__v16si)
8557 _mm512_setzero_si512 (),
8558 (__mmask16) __U);
8561 extern __inline __m512i
8562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8563 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8565 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8566 (__v8di) __B, __imm,
8567 (__v8di)
8568 _mm512_undefined_si512 (),
8569 (__mmask8) -1);
8572 extern __inline __m512i
8573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8574 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8575 __m512i __B, const int __imm)
8577 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8578 (__v8di) __B, __imm,
8579 (__v8di) __W,
8580 (__mmask8) __U);
8583 extern __inline __m512i
8584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8585 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8586 const int __imm)
8588 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8589 (__v8di) __B, __imm,
8590 (__v8di)
8591 _mm512_setzero_si512 (),
8592 (__mmask8) __U);
8594 #else
8595 #define _mm512_alignr_epi32(X, Y, C) \
8596 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8597 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
8598 (__mmask16)-1))
8600 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8601 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8602 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8603 (__mmask16)(U)))
8605 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8606 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8607 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8608 (__mmask16)(U)))
8610 #define _mm512_alignr_epi64(X, Y, C) \
8611 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8612 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
8613 (__mmask8)-1))
8615 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8616 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8617 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8619 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8620 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8621 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8622 (__mmask8)(U)))
8623 #endif
8625 extern __inline __mmask16
8626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8627 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8629 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8630 (__v16si) __B,
8631 (__mmask16) -1);
8634 extern __inline __mmask16
8635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8636 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8638 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8639 (__v16si) __B, __U);
8642 extern __inline __mmask8
8643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8644 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8646 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8647 (__v8di) __B, __U);
8650 extern __inline __mmask8
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8654 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8655 (__v8di) __B,
8656 (__mmask8) -1);
8659 extern __inline __mmask16
8660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8661 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8663 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8664 (__v16si) __B,
8665 (__mmask16) -1);
8668 extern __inline __mmask16
8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8672 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8673 (__v16si) __B, __U);
8676 extern __inline __mmask8
8677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8678 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8680 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8681 (__v8di) __B, __U);
8684 extern __inline __mmask8
8685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8686 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8688 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8689 (__v8di) __B,
8690 (__mmask8) -1);
8693 extern __inline __mmask16
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8697 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8698 (__v16si) __Y, 5,
8699 (__mmask16) -1);
8702 extern __inline __mmask16
8703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8704 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8706 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8707 (__v16si) __Y, 5,
8708 (__mmask16) __M);
8711 extern __inline __mmask16
8712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8713 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8715 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8716 (__v16si) __Y, 5,
8717 (__mmask16) __M);
8720 extern __inline __mmask16
8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8722 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8724 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8725 (__v16si) __Y, 5,
8726 (__mmask16) -1);
8729 extern __inline __mmask8
8730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8731 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8733 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8734 (__v8di) __Y, 5,
8735 (__mmask8) __M);
8738 extern __inline __mmask8
8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8742 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8743 (__v8di) __Y, 5,
8744 (__mmask8) -1);
8747 extern __inline __mmask8
8748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8749 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8751 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8752 (__v8di) __Y, 5,
8753 (__mmask8) __M);
8756 extern __inline __mmask8
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8760 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8761 (__v8di) __Y, 5,
8762 (__mmask8) -1);
8765 extern __inline __mmask16
8766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8769 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8770 (__v16si) __Y, 2,
8771 (__mmask16) __M);
8774 extern __inline __mmask16
8775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8776 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8778 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8779 (__v16si) __Y, 2,
8780 (__mmask16) -1);
8783 extern __inline __mmask16
8784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8785 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8787 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8788 (__v16si) __Y, 2,
8789 (__mmask16) __M);
8792 extern __inline __mmask16
8793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8794 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8796 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8797 (__v16si) __Y, 2,
8798 (__mmask16) -1);
8801 extern __inline __mmask8
8802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8803 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8805 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8806 (__v8di) __Y, 2,
8807 (__mmask8) __M);
8810 extern __inline __mmask8
8811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8812 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8814 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8815 (__v8di) __Y, 2,
8816 (__mmask8) -1);
8819 extern __inline __mmask8
8820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8821 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8823 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8824 (__v8di) __Y, 2,
8825 (__mmask8) __M);
8828 extern __inline __mmask8
8829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8830 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8832 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8833 (__v8di) __Y, 2,
8834 (__mmask8) -1);
8837 extern __inline __mmask16
8838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8839 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8841 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8842 (__v16si) __Y, 1,
8843 (__mmask16) __M);
8846 extern __inline __mmask16
8847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8848 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8850 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8851 (__v16si) __Y, 1,
8852 (__mmask16) -1);
8855 extern __inline __mmask16
8856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8859 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8860 (__v16si) __Y, 1,
8861 (__mmask16) __M);
8864 extern __inline __mmask16
8865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8868 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8869 (__v16si) __Y, 1,
8870 (__mmask16) -1);
8873 extern __inline __mmask8
8874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8875 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8877 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8878 (__v8di) __Y, 1,
8879 (__mmask8) __M);
8882 extern __inline __mmask8
8883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8884 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8886 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8887 (__v8di) __Y, 1,
8888 (__mmask8) -1);
8891 extern __inline __mmask8
8892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8893 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8895 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8896 (__v8di) __Y, 1,
8897 (__mmask8) __M);
8900 extern __inline __mmask8
8901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8902 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8904 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8905 (__v8di) __Y, 1,
8906 (__mmask8) -1);
8909 extern __inline __mmask16
8910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8911 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8913 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8914 (__v16si) __Y, 4,
8915 (__mmask16) -1);
8918 extern __inline __mmask16
8919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8920 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8922 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8923 (__v16si) __Y, 4,
8924 (__mmask16) __M);
8927 extern __inline __mmask16
8928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8929 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8931 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8932 (__v16si) __Y, 4,
8933 (__mmask16) __M);
8936 extern __inline __mmask16
8937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8938 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8940 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8941 (__v16si) __Y, 4,
8942 (__mmask16) -1);
8945 extern __inline __mmask8
8946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8947 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8949 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8950 (__v8di) __Y, 4,
8951 (__mmask8) __M);
8954 extern __inline __mmask8
8955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8956 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8958 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8959 (__v8di) __Y, 4,
8960 (__mmask8) -1);
8963 extern __inline __mmask8
8964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8965 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8967 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8968 (__v8di) __Y, 4,
8969 (__mmask8) __M);
8972 extern __inline __mmask8
8973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8974 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8976 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8977 (__v8di) __Y, 4,
8978 (__mmask8) -1);
8981 #define _MM_CMPINT_EQ 0x0
8982 #define _MM_CMPINT_LT 0x1
8983 #define _MM_CMPINT_LE 0x2
8984 #define _MM_CMPINT_UNUSED 0x3
8985 #define _MM_CMPINT_NE 0x4
8986 #define _MM_CMPINT_NLT 0x5
8987 #define _MM_CMPINT_GE 0x5
8988 #define _MM_CMPINT_NLE 0x6
8989 #define _MM_CMPINT_GT 0x6
8991 #ifdef __OPTIMIZE__
8992 extern __inline __mmask8
8993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8994 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8996 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8997 (__v8di) __Y, __P,
8998 (__mmask8) -1);
9001 extern __inline __mmask16
9002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9003 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
9005 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9006 (__v16si) __Y, __P,
9007 (__mmask16) -1);
9010 extern __inline __mmask8
9011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9012 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
9014 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9015 (__v8di) __Y, __P,
9016 (__mmask8) -1);
9019 extern __inline __mmask16
9020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9021 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9023 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9024 (__v16si) __Y, __P,
9025 (__mmask16) -1);
9028 extern __inline __mmask8
9029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9030 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9031 const int __R)
9033 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9034 (__v8df) __Y, __P,
9035 (__mmask8) -1, __R);
9038 extern __inline __mmask16
9039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9040 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9042 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9043 (__v16sf) __Y, __P,
9044 (__mmask16) -1, __R);
9047 extern __inline __mmask8
9048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9049 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9050 const int __P)
9052 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9053 (__v8di) __Y, __P,
9054 (__mmask8) __U);
9057 extern __inline __mmask16
9058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9059 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9060 const int __P)
9062 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9063 (__v16si) __Y, __P,
9064 (__mmask16) __U);
9067 extern __inline __mmask8
9068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9069 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9070 const int __P)
9072 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9073 (__v8di) __Y, __P,
9074 (__mmask8) __U);
9077 extern __inline __mmask16
9078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9079 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9080 const int __P)
9082 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9083 (__v16si) __Y, __P,
9084 (__mmask16) __U);
9087 extern __inline __mmask8
9088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9089 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9090 const int __P, const int __R)
9092 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9093 (__v8df) __Y, __P,
9094 (__mmask8) __U, __R);
9097 extern __inline __mmask16
9098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9099 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9100 const int __P, const int __R)
9102 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9103 (__v16sf) __Y, __P,
9104 (__mmask16) __U, __R);
9107 extern __inline __mmask8
9108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9109 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9111 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9112 (__v2df) __Y, __P,
9113 (__mmask8) -1, __R);
9116 extern __inline __mmask8
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9119 const int __P, const int __R)
9121 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9122 (__v2df) __Y, __P,
9123 (__mmask8) __M, __R);
9126 extern __inline __mmask8
9127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9128 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9130 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9131 (__v4sf) __Y, __P,
9132 (__mmask8) -1, __R);
9135 extern __inline __mmask8
9136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9137 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9138 const int __P, const int __R)
9140 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9141 (__v4sf) __Y, __P,
9142 (__mmask8) __M, __R);
9145 #else
9146 #define _mm512_cmp_epi64_mask(X, Y, P) \
9147 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9148 (__v8di)(__m512i)(Y), (int)(P),\
9149 (__mmask8)-1))
9151 #define _mm512_cmp_epi32_mask(X, Y, P) \
9152 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9153 (__v16si)(__m512i)(Y), (int)(P),\
9154 (__mmask16)-1))
9156 #define _mm512_cmp_epu64_mask(X, Y, P) \
9157 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9158 (__v8di)(__m512i)(Y), (int)(P),\
9159 (__mmask8)-1))
9161 #define _mm512_cmp_epu32_mask(X, Y, P) \
9162 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9163 (__v16si)(__m512i)(Y), (int)(P),\
9164 (__mmask16)-1))
9166 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9167 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9168 (__v8df)(__m512d)(Y), (int)(P),\
9169 (__mmask8)-1, R))
9171 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9172 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9173 (__v16sf)(__m512)(Y), (int)(P),\
9174 (__mmask16)-1, R))
9176 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9177 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9178 (__v8di)(__m512i)(Y), (int)(P),\
9179 (__mmask8)M))
9181 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9182 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9183 (__v16si)(__m512i)(Y), (int)(P),\
9184 (__mmask16)M))
9186 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9187 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9188 (__v8di)(__m512i)(Y), (int)(P),\
9189 (__mmask8)M))
9191 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9192 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9193 (__v16si)(__m512i)(Y), (int)(P),\
9194 (__mmask16)M))
9196 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9197 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9198 (__v8df)(__m512d)(Y), (int)(P),\
9199 (__mmask8)M, R))
9201 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9202 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9203 (__v16sf)(__m512)(Y), (int)(P),\
9204 (__mmask16)M, R))
9206 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9207 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9208 (__v2df)(__m128d)(Y), (int)(P),\
9209 (__mmask8)-1, R))
9211 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9212 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9213 (__v2df)(__m128d)(Y), (int)(P),\
9214 (M), R))
9216 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9217 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9218 (__v4sf)(__m128)(Y), (int)(P), \
9219 (__mmask8)-1, R))
9221 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9222 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9223 (__v4sf)(__m128)(Y), (int)(P), \
9224 (M), R))
9225 #endif
9227 #ifdef __OPTIMIZE__
9228 extern __inline __m512
9229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9230 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9232 __m512 v1_old = _mm512_undefined_ps ();
9233 __mmask16 mask = 0xFFFF;
9235 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9236 __addr,
9237 (__v16si) __index,
9238 mask, __scale);
9241 extern __inline __m512
9242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9243 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9244 __m512i __index, float const *__addr, int __scale)
9246 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9247 __addr,
9248 (__v16si) __index,
9249 __mask, __scale);
9252 extern __inline __m512d
9253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9254 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9256 __m512d v1_old = _mm512_undefined_pd ();
9257 __mmask8 mask = 0xFF;
9259 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9260 __addr,
9261 (__v8si) __index, mask,
9262 __scale);
9265 extern __inline __m512d
9266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9267 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9268 __m256i __index, double const *__addr, int __scale)
9270 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9271 __addr,
9272 (__v8si) __index,
9273 __mask, __scale);
9276 extern __inline __m256
9277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9278 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9280 __m256 v1_old = _mm256_undefined_ps ();
9281 __mmask8 mask = 0xFF;
9283 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9284 __addr,
9285 (__v8di) __index, mask,
9286 __scale);
9289 extern __inline __m256
9290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9291 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9292 __m512i __index, float const *__addr, int __scale)
9294 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9295 __addr,
9296 (__v8di) __index,
9297 __mask, __scale);
9300 extern __inline __m512d
9301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9302 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9304 __m512d v1_old = _mm512_undefined_pd ();
9305 __mmask8 mask = 0xFF;
9307 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9308 __addr,
9309 (__v8di) __index, mask,
9310 __scale);
9313 extern __inline __m512d
9314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9315 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9316 __m512i __index, double const *__addr, int __scale)
9318 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9319 __addr,
9320 (__v8di) __index,
9321 __mask, __scale);
9324 extern __inline __m512i
9325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9326 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9328 __m512i v1_old = _mm512_undefined_si512 ();
9329 __mmask16 mask = 0xFFFF;
9331 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9332 __addr,
9333 (__v16si) __index,
9334 mask, __scale);
9337 extern __inline __m512i
9338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9339 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9340 __m512i __index, int const *__addr, int __scale)
9342 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9343 __addr,
9344 (__v16si) __index,
9345 __mask, __scale);
9348 extern __inline __m512i
9349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9350 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9352 __m512i v1_old = _mm512_undefined_si512 ();
9353 __mmask8 mask = 0xFF;
9355 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9356 __addr,
9357 (__v8si) __index, mask,
9358 __scale);
9361 extern __inline __m512i
9362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9363 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9364 __m256i __index, long long const *__addr,
9365 int __scale)
9367 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9368 __addr,
9369 (__v8si) __index,
9370 __mask, __scale);
9373 extern __inline __m256i
9374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9375 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9377 __m256i v1_old = _mm256_undefined_si256 ();
9378 __mmask8 mask = 0xFF;
9380 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9381 __addr,
9382 (__v8di) __index,
9383 mask, __scale);
9386 extern __inline __m256i
9387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9388 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9389 __m512i __index, int const *__addr, int __scale)
9391 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9392 __addr,
9393 (__v8di) __index,
9394 __mask, __scale);
9397 extern __inline __m512i
9398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9399 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9401 __m512i v1_old = _mm512_undefined_si512 ();
9402 __mmask8 mask = 0xFF;
9404 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9405 __addr,
9406 (__v8di) __index, mask,
9407 __scale);
9410 extern __inline __m512i
9411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9412 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9413 __m512i __index, long long const *__addr,
9414 int __scale)
9416 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9417 __addr,
9418 (__v8di) __index,
9419 __mask, __scale);
9422 extern __inline void
9423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9424 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9426 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9427 (__v16si) __index, (__v16sf) __v1, __scale);
9430 extern __inline void
9431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9432 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9433 __m512i __index, __m512 __v1, int __scale)
9435 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9436 (__v16sf) __v1, __scale);
9439 extern __inline void
9440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9441 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9442 int __scale)
9444 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9445 (__v8si) __index, (__v8df) __v1, __scale);
9448 extern __inline void
9449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9450 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9451 __m256i __index, __m512d __v1, int __scale)
9453 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9454 (__v8df) __v1, __scale);
9457 extern __inline void
9458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9459 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9461 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9462 (__v8di) __index, (__v8sf) __v1, __scale);
9465 extern __inline void
9466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9467 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9468 __m512i __index, __m256 __v1, int __scale)
9470 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9471 (__v8sf) __v1, __scale);
9474 extern __inline void
9475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9476 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9477 int __scale)
9479 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9480 (__v8di) __index, (__v8df) __v1, __scale);
9483 extern __inline void
9484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9485 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9486 __m512i __index, __m512d __v1, int __scale)
9488 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9489 (__v8df) __v1, __scale);
9492 extern __inline void
9493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9494 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9495 __m512i __v1, int __scale)
9497 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9498 (__v16si) __index, (__v16si) __v1, __scale);
9501 extern __inline void
9502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9503 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9504 __m512i __index, __m512i __v1, int __scale)
9506 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9507 (__v16si) __v1, __scale);
9510 extern __inline void
9511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9512 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9513 __m512i __v1, int __scale)
9515 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9516 (__v8si) __index, (__v8di) __v1, __scale);
9519 extern __inline void
9520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9521 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9522 __m256i __index, __m512i __v1, int __scale)
9524 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9525 (__v8di) __v1, __scale);
9528 extern __inline void
9529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9530 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9531 __m256i __v1, int __scale)
9533 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9534 (__v8di) __index, (__v8si) __v1, __scale);
9537 extern __inline void
9538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9539 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9540 __m512i __index, __m256i __v1, int __scale)
9542 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9543 (__v8si) __v1, __scale);
9546 extern __inline void
9547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9548 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9549 __m512i __v1, int __scale)
9551 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9552 (__v8di) __index, (__v8di) __v1, __scale);
9555 extern __inline void
9556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9557 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9558 __m512i __index, __m512i __v1, int __scale)
9560 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9561 (__v8di) __v1, __scale);
9563 #else
9564 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9565 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9566 (float const *)ADDR, \
9567 (__v16si)(__m512i)INDEX, \
9568 (__mmask16)0xFFFF, (int)SCALE)
9570 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9571 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9572 (float const *)ADDR, \
9573 (__v16si)(__m512i)INDEX, \
9574 (__mmask16)MASK, (int)SCALE)
9576 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9577 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9578 (double const *)ADDR, \
9579 (__v8si)(__m256i)INDEX, \
9580 (__mmask8)0xFF, (int)SCALE)
9582 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9583 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9584 (double const *)ADDR, \
9585 (__v8si)(__m256i)INDEX, \
9586 (__mmask8)MASK, (int)SCALE)
9588 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9589 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9590 (float const *)ADDR, \
9591 (__v8di)(__m512i)INDEX, \
9592 (__mmask8)0xFF, (int)SCALE)
9594 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9595 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9596 (float const *)ADDR, \
9597 (__v8di)(__m512i)INDEX, \
9598 (__mmask8)MASK, (int)SCALE)
9600 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9601 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9602 (double const *)ADDR, \
9603 (__v8di)(__m512i)INDEX, \
9604 (__mmask8)0xFF, (int)SCALE)
9606 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9607 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9608 (double const *)ADDR, \
9609 (__v8di)(__m512i)INDEX, \
9610 (__mmask8)MASK, (int)SCALE)
9612 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9613 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
9614 (int const *)ADDR, \
9615 (__v16si)(__m512i)INDEX, \
9616 (__mmask16)0xFFFF, (int)SCALE)
9618 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9619 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9620 (int const *)ADDR, \
9621 (__v16si)(__m512i)INDEX, \
9622 (__mmask16)MASK, (int)SCALE)
9624 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9625 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
9626 (long long const *)ADDR, \
9627 (__v8si)(__m256i)INDEX, \
9628 (__mmask8)0xFF, (int)SCALE)
9630 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9631 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9632 (long long const *)ADDR, \
9633 (__v8si)(__m256i)INDEX, \
9634 (__mmask8)MASK, (int)SCALE)
9636 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9637 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9638 (int const *)ADDR, \
9639 (__v8di)(__m512i)INDEX, \
9640 (__mmask8)0xFF, (int)SCALE)
9642 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9643 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9644 (int const *)ADDR, \
9645 (__v8di)(__m512i)INDEX, \
9646 (__mmask8)MASK, (int)SCALE)
9648 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9649 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
9650 (long long const *)ADDR, \
9651 (__v8di)(__m512i)INDEX, \
9652 (__mmask8)0xFF, (int)SCALE)
9654 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9655 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9656 (long long const *)ADDR, \
9657 (__v8di)(__m512i)INDEX, \
9658 (__mmask8)MASK, (int)SCALE)
9660 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9661 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9662 (__v16si)(__m512i)INDEX, \
9663 (__v16sf)(__m512)V1, (int)SCALE)
9665 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9666 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9667 (__v16si)(__m512i)INDEX, \
9668 (__v16sf)(__m512)V1, (int)SCALE)
9670 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9671 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9672 (__v8si)(__m256i)INDEX, \
9673 (__v8df)(__m512d)V1, (int)SCALE)
9675 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9676 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9677 (__v8si)(__m256i)INDEX, \
9678 (__v8df)(__m512d)V1, (int)SCALE)
9680 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9681 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9682 (__v8di)(__m512i)INDEX, \
9683 (__v8sf)(__m256)V1, (int)SCALE)
9685 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9686 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9687 (__v8di)(__m512i)INDEX, \
9688 (__v8sf)(__m256)V1, (int)SCALE)
9690 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9691 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9692 (__v8di)(__m512i)INDEX, \
9693 (__v8df)(__m512d)V1, (int)SCALE)
9695 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9696 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9697 (__v8di)(__m512i)INDEX, \
9698 (__v8df)(__m512d)V1, (int)SCALE)
9700 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9701 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9702 (__v16si)(__m512i)INDEX, \
9703 (__v16si)(__m512i)V1, (int)SCALE)
9705 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9706 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9707 (__v16si)(__m512i)INDEX, \
9708 (__v16si)(__m512i)V1, (int)SCALE)
9710 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9711 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9712 (__v8si)(__m256i)INDEX, \
9713 (__v8di)(__m512i)V1, (int)SCALE)
9715 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9716 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9717 (__v8si)(__m256i)INDEX, \
9718 (__v8di)(__m512i)V1, (int)SCALE)
9720 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9721 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9722 (__v8di)(__m512i)INDEX, \
9723 (__v8si)(__m256i)V1, (int)SCALE)
9725 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9726 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9727 (__v8di)(__m512i)INDEX, \
9728 (__v8si)(__m256i)V1, (int)SCALE)
9730 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9731 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9732 (__v8di)(__m512i)INDEX, \
9733 (__v8di)(__m512i)V1, (int)SCALE)
9735 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9736 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9737 (__v8di)(__m512i)INDEX, \
9738 (__v8di)(__m512i)V1, (int)SCALE)
9739 #endif
9741 extern __inline __m512d
9742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9745 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9746 (__v8df) __W,
9747 (__mmask8) __U);
9750 extern __inline __m512d
9751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9752 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9754 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9755 (__v8df)
9756 _mm512_setzero_pd (),
9757 (__mmask8) __U);
9760 extern __inline void
9761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9762 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9764 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9765 (__mmask8) __U);
9768 extern __inline __m512
9769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9772 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9773 (__v16sf) __W,
9774 (__mmask16) __U);
9777 extern __inline __m512
9778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9779 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9781 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9782 (__v16sf)
9783 _mm512_setzero_ps (),
9784 (__mmask16) __U);
9787 extern __inline void
9788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9789 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9791 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9792 (__mmask16) __U);
9795 extern __inline __m512i
9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9799 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9800 (__v8di) __W,
9801 (__mmask8) __U);
9804 extern __inline __m512i
9805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9806 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9808 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9809 (__v8di)
9810 _mm512_setzero_si512 (),
9811 (__mmask8) __U);
9814 extern __inline void
9815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9816 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9818 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9819 (__mmask8) __U);
9822 extern __inline __m512i
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9826 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9827 (__v16si) __W,
9828 (__mmask16) __U);
9831 extern __inline __m512i
9832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9833 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9835 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9836 (__v16si)
9837 _mm512_setzero_si512 (),
9838 (__mmask16) __U);
9841 extern __inline void
9842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9843 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9845 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9846 (__mmask16) __U);
9849 extern __inline __m512d
9850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9851 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9853 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9854 (__v8df) __W,
9855 (__mmask8) __U);
9858 extern __inline __m512d
9859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9860 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9862 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9863 (__v8df)
9864 _mm512_setzero_pd (),
9865 (__mmask8) __U);
9868 extern __inline __m512d
9869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9872 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9873 (__v8df) __W,
9874 (__mmask8) __U);
9877 extern __inline __m512d
9878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9879 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9881 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9882 (__v8df)
9883 _mm512_setzero_pd (),
9884 (__mmask8) __U);
9887 extern __inline __m512
9888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9891 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9892 (__v16sf) __W,
9893 (__mmask16) __U);
9896 extern __inline __m512
9897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9900 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9901 (__v16sf)
9902 _mm512_setzero_ps (),
9903 (__mmask16) __U);
9906 extern __inline __m512
9907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9910 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9911 (__v16sf) __W,
9912 (__mmask16) __U);
9915 extern __inline __m512
9916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9919 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9920 (__v16sf)
9921 _mm512_setzero_ps (),
9922 (__mmask16) __U);
9925 extern __inline __m512i
9926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9929 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9930 (__v8di) __W,
9931 (__mmask8) __U);
9934 extern __inline __m512i
9935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9938 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9939 (__v8di)
9940 _mm512_setzero_si512 (),
9941 (__mmask8) __U);
9944 extern __inline __m512i
9945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9946 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9948 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9949 (__v8di) __W,
9950 (__mmask8) __U);
9953 extern __inline __m512i
9954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9955 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9957 return (__m512i)
9958 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9959 (__v8di)
9960 _mm512_setzero_si512 (),
9961 (__mmask8) __U);
9964 extern __inline __m512i
9965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9966 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9968 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9969 (__v16si) __W,
9970 (__mmask16) __U);
9973 extern __inline __m512i
9974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9977 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9978 (__v16si)
9979 _mm512_setzero_si512 (),
9980 (__mmask16) __U);
9983 extern __inline __m512i
9984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9985 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9987 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9988 (__v16si) __W,
9989 (__mmask16) __U);
9992 extern __inline __m512i
9993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9994 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9996 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9997 (__v16si)
9998 _mm512_setzero_si512
9999 (), (__mmask16) __U);
10002 /* Mask arithmetic operations */
10003 extern __inline __mmask16
10004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10005 _mm512_kand (__mmask16 __A, __mmask16 __B)
10007 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
10010 extern __inline __mmask16
10011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10012 _mm512_kandn (__mmask16 __A, __mmask16 __B)
10014 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
10017 extern __inline __mmask16
10018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10019 _mm512_kor (__mmask16 __A, __mmask16 __B)
10021 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10024 extern __inline int
10025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10026 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10028 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10029 (__mmask16) __B);
10032 extern __inline int
10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10034 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10036 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10037 (__mmask16) __B);
10040 extern __inline __mmask16
10041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10042 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10044 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10047 extern __inline __mmask16
10048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10049 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10051 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10054 extern __inline __mmask16
10055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10056 _mm512_knot (__mmask16 __A)
10058 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10061 extern __inline __mmask16
10062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10063 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10065 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10068 #ifdef __OPTIMIZE__
10069 extern __inline __m512i
10070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10071 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10072 const int __imm)
10074 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10075 (__v4si) __D,
10076 __imm,
10077 (__v16si)
10078 _mm512_setzero_si512 (),
10079 __B);
10082 extern __inline __m512
10083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10085 const int __imm)
10087 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10088 (__v4sf) __D,
10089 __imm,
10090 (__v16sf)
10091 _mm512_setzero_ps (), __B);
10094 extern __inline __m512i
10095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10096 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10097 __m128i __D, const int __imm)
10099 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10100 (__v4si) __D,
10101 __imm,
10102 (__v16si) __A,
10103 __B);
10106 extern __inline __m512
10107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10108 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10109 __m128 __D, const int __imm)
10111 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10112 (__v4sf) __D,
10113 __imm,
10114 (__v16sf) __A, __B);
10116 #else
10117 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10118 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10119 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10120 (__mmask8)(A)))
10122 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10123 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10124 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10125 (__mmask8)(A)))
10127 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10128 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10129 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10130 (__mmask8)(B)))
10132 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10133 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10134 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10135 (__mmask8)(B)))
10136 #endif
10138 extern __inline __m512i
10139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10140 _mm512_max_epi64 (__m512i __A, __m512i __B)
10142 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10143 (__v8di) __B,
10144 (__v8di)
10145 _mm512_undefined_si512 (),
10146 (__mmask8) -1);
10149 extern __inline __m512i
10150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10151 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10153 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10154 (__v8di) __B,
10155 (__v8di)
10156 _mm512_setzero_si512 (),
10157 __M);
10160 extern __inline __m512i
10161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10162 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10164 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10165 (__v8di) __B,
10166 (__v8di) __W, __M);
10169 extern __inline __m512i
10170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171 _mm512_min_epi64 (__m512i __A, __m512i __B)
10173 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10174 (__v8di) __B,
10175 (__v8di)
10176 _mm512_undefined_si512 (),
10177 (__mmask8) -1);
10180 extern __inline __m512i
10181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10182 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10184 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10185 (__v8di) __B,
10186 (__v8di) __W, __M);
10189 extern __inline __m512i
10190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10191 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10193 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10194 (__v8di) __B,
10195 (__v8di)
10196 _mm512_setzero_si512 (),
10197 __M);
10200 extern __inline __m512i
10201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10202 _mm512_max_epu64 (__m512i __A, __m512i __B)
10204 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10205 (__v8di) __B,
10206 (__v8di)
10207 _mm512_undefined_si512 (),
10208 (__mmask8) -1);
10211 extern __inline __m512i
10212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10213 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10215 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10216 (__v8di) __B,
10217 (__v8di)
10218 _mm512_setzero_si512 (),
10219 __M);
10222 extern __inline __m512i
10223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10224 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10226 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10227 (__v8di) __B,
10228 (__v8di) __W, __M);
10231 extern __inline __m512i
10232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10233 _mm512_min_epu64 (__m512i __A, __m512i __B)
10235 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10236 (__v8di) __B,
10237 (__v8di)
10238 _mm512_undefined_si512 (),
10239 (__mmask8) -1);
10242 extern __inline __m512i
10243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10244 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10246 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10247 (__v8di) __B,
10248 (__v8di) __W, __M);
10251 extern __inline __m512i
10252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10253 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10255 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10256 (__v8di) __B,
10257 (__v8di)
10258 _mm512_setzero_si512 (),
10259 __M);
10262 extern __inline __m512i
10263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10264 _mm512_max_epi32 (__m512i __A, __m512i __B)
10266 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10267 (__v16si) __B,
10268 (__v16si)
10269 _mm512_undefined_si512 (),
10270 (__mmask16) -1);
10273 extern __inline __m512i
10274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10275 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10277 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10278 (__v16si) __B,
10279 (__v16si)
10280 _mm512_setzero_si512 (),
10281 __M);
10284 extern __inline __m512i
10285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10286 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10288 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10289 (__v16si) __B,
10290 (__v16si) __W, __M);
10293 extern __inline __m512i
10294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10295 _mm512_min_epi32 (__m512i __A, __m512i __B)
10297 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10298 (__v16si) __B,
10299 (__v16si)
10300 _mm512_undefined_si512 (),
10301 (__mmask16) -1);
10304 extern __inline __m512i
10305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10306 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10308 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10309 (__v16si) __B,
10310 (__v16si)
10311 _mm512_setzero_si512 (),
10312 __M);
10315 extern __inline __m512i
10316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10319 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10320 (__v16si) __B,
10321 (__v16si) __W, __M);
10324 extern __inline __m512i
10325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10326 _mm512_max_epu32 (__m512i __A, __m512i __B)
10328 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10329 (__v16si) __B,
10330 (__v16si)
10331 _mm512_undefined_si512 (),
10332 (__mmask16) -1);
10335 extern __inline __m512i
10336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10337 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10339 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10340 (__v16si) __B,
10341 (__v16si)
10342 _mm512_setzero_si512 (),
10343 __M);
10346 extern __inline __m512i
10347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10348 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10350 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10351 (__v16si) __B,
10352 (__v16si) __W, __M);
10355 extern __inline __m512i
10356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10357 _mm512_min_epu32 (__m512i __A, __m512i __B)
10359 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10360 (__v16si) __B,
10361 (__v16si)
10362 _mm512_undefined_si512 (),
10363 (__mmask16) -1);
10366 extern __inline __m512i
10367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10370 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10371 (__v16si) __B,
10372 (__v16si)
10373 _mm512_setzero_si512 (),
10374 __M);
10377 extern __inline __m512i
10378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10379 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10381 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10382 (__v16si) __B,
10383 (__v16si) __W, __M);
10386 extern __inline __m512
10387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10388 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10390 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10391 (__v16sf) __B,
10392 (__v16sf)
10393 _mm512_undefined_ps (),
10394 (__mmask16) -1);
10397 extern __inline __m512
10398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10399 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10401 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10402 (__v16sf) __B,
10403 (__v16sf) __W,
10404 (__mmask16) __U);
10407 extern __inline __m512
10408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10409 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10411 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10412 (__v16sf) __B,
10413 (__v16sf)
10414 _mm512_setzero_ps (),
10415 (__mmask16) __U);
10418 #ifdef __OPTIMIZE__
10419 extern __inline __m128d
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10423 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10424 (__v2df) __B,
10425 __R);
10428 extern __inline __m128
10429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10430 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10432 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10433 (__v4sf) __B,
10434 __R);
10437 extern __inline __m128d
10438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10439 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10441 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10442 (__v2df) __B,
10443 __R);
10446 extern __inline __m128
10447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10450 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10451 (__v4sf) __B,
10452 __R);
10455 #else
10456 #define _mm_max_round_sd(A, B, C) \
10457 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10459 #define _mm_max_round_ss(A, B, C) \
10460 (__m128)__builtin_ia32_addss_round(A, B, C)
10462 #define _mm_min_round_sd(A, B, C) \
10463 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10465 #define _mm_min_round_ss(A, B, C) \
10466 (__m128)__builtin_ia32_subss_round(A, B, C)
10467 #endif
10469 extern __inline __m512d
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10473 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10474 (__v8df) __W,
10475 (__mmask8) __U);
10478 extern __inline __m512
10479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10480 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10482 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10483 (__v16sf) __W,
10484 (__mmask16) __U);
10487 extern __inline __m512i
10488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10491 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10492 (__v8di) __W,
10493 (__mmask8) __U);
10496 extern __inline __m512i
10497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10498 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10500 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10501 (__v16si) __W,
10502 (__mmask16) __U);
10505 #ifdef __OPTIMIZE__
10506 extern __inline __m128d
10507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10508 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10510 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10511 (__v2df) __A,
10512 (__v2df) __B,
10513 __R);
10516 extern __inline __m128
10517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10518 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10520 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10521 (__v4sf) __A,
10522 (__v4sf) __B,
10523 __R);
10526 extern __inline __m128d
10527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10528 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10530 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10531 (__v2df) __A,
10532 -(__v2df) __B,
10533 __R);
10536 extern __inline __m128
10537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10538 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10540 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10541 (__v4sf) __A,
10542 -(__v4sf) __B,
10543 __R);
10546 extern __inline __m128d
10547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10548 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10550 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10551 -(__v2df) __A,
10552 (__v2df) __B,
10553 __R);
10556 extern __inline __m128
10557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10558 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10560 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10561 -(__v4sf) __A,
10562 (__v4sf) __B,
10563 __R);
10566 extern __inline __m128d
10567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10568 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10570 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10571 -(__v2df) __A,
10572 -(__v2df) __B,
10573 __R);
10576 extern __inline __m128
10577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10578 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10580 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10581 -(__v4sf) __A,
10582 -(__v4sf) __B,
10583 __R);
10585 #else
10586 #define _mm_fmadd_round_sd(A, B, C, R) \
10587 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10589 #define _mm_fmadd_round_ss(A, B, C, R) \
10590 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10592 #define _mm_fmsub_round_sd(A, B, C, R) \
10593 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10595 #define _mm_fmsub_round_ss(A, B, C, R) \
10596 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10598 #define _mm_fnmadd_round_sd(A, B, C, R) \
10599 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10601 #define _mm_fnmadd_round_ss(A, B, C, R) \
10602 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10604 #define _mm_fnmsub_round_sd(A, B, C, R) \
10605 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10607 #define _mm_fnmsub_round_ss(A, B, C, R) \
10608 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10609 #endif
10611 #ifdef __OPTIMIZE__
10612 extern __inline int
10613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10614 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10616 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10619 extern __inline int
10620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10621 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10623 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10625 #else
10626 #define _mm_comi_round_ss(A, B, C, D)\
10627 __builtin_ia32_vcomiss(A, B, C, D)
10628 #define _mm_comi_round_sd(A, B, C, D)\
10629 __builtin_ia32_vcomisd(A, B, C, D)
10630 #endif
10632 extern __inline __m512d
10633 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10634 _mm512_sqrt_pd (__m512d __A)
10636 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10637 (__v8df)
10638 _mm512_undefined_pd (),
10639 (__mmask8) -1,
10640 _MM_FROUND_CUR_DIRECTION);
10643 extern __inline __m512d
10644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10645 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10647 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10648 (__v8df) __W,
10649 (__mmask8) __U,
10650 _MM_FROUND_CUR_DIRECTION);
10653 extern __inline __m512d
10654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10655 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10657 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10658 (__v8df)
10659 _mm512_setzero_pd (),
10660 (__mmask8) __U,
10661 _MM_FROUND_CUR_DIRECTION);
10664 extern __inline __m512
10665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10666 _mm512_sqrt_ps (__m512 __A)
10668 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10669 (__v16sf)
10670 _mm512_undefined_ps (),
10671 (__mmask16) -1,
10672 _MM_FROUND_CUR_DIRECTION);
10675 extern __inline __m512
10676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10677 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10679 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10680 (__v16sf) __W,
10681 (__mmask16) __U,
10682 _MM_FROUND_CUR_DIRECTION);
10685 extern __inline __m512
10686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10687 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10689 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10690 (__v16sf)
10691 _mm512_setzero_ps (),
10692 (__mmask16) __U,
10693 _MM_FROUND_CUR_DIRECTION);
10696 extern __inline __m512d
10697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10698 _mm512_add_pd (__m512d __A, __m512d __B)
10700 return (__m512d) ((__v8df)__A + (__v8df)__B);
10703 extern __inline __m512d
10704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10705 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10707 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10708 (__v8df) __B,
10709 (__v8df) __W,
10710 (__mmask8) __U,
10711 _MM_FROUND_CUR_DIRECTION);
10714 extern __inline __m512d
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10718 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10719 (__v8df) __B,
10720 (__v8df)
10721 _mm512_setzero_pd (),
10722 (__mmask8) __U,
10723 _MM_FROUND_CUR_DIRECTION);
10726 extern __inline __m512
10727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10728 _mm512_add_ps (__m512 __A, __m512 __B)
10730 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10733 extern __inline __m512
10734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10735 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10737 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10738 (__v16sf) __B,
10739 (__v16sf) __W,
10740 (__mmask16) __U,
10741 _MM_FROUND_CUR_DIRECTION);
10744 extern __inline __m512
10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10748 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10749 (__v16sf) __B,
10750 (__v16sf)
10751 _mm512_setzero_ps (),
10752 (__mmask16) __U,
10753 _MM_FROUND_CUR_DIRECTION);
10756 extern __inline __m512d
10757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10758 _mm512_sub_pd (__m512d __A, __m512d __B)
10760 return (__m512d) ((__v8df)__A - (__v8df)__B);
10763 extern __inline __m512d
10764 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10765 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10767 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10768 (__v8df) __B,
10769 (__v8df) __W,
10770 (__mmask8) __U,
10771 _MM_FROUND_CUR_DIRECTION);
10774 extern __inline __m512d
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10778 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10779 (__v8df) __B,
10780 (__v8df)
10781 _mm512_setzero_pd (),
10782 (__mmask8) __U,
10783 _MM_FROUND_CUR_DIRECTION);
10786 extern __inline __m512
10787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10788 _mm512_sub_ps (__m512 __A, __m512 __B)
10790 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10793 extern __inline __m512
10794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10795 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10797 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10798 (__v16sf) __B,
10799 (__v16sf) __W,
10800 (__mmask16) __U,
10801 _MM_FROUND_CUR_DIRECTION);
10804 extern __inline __m512
10805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10808 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10809 (__v16sf) __B,
10810 (__v16sf)
10811 _mm512_setzero_ps (),
10812 (__mmask16) __U,
10813 _MM_FROUND_CUR_DIRECTION);
10816 extern __inline __m512d
10817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818 _mm512_mul_pd (__m512d __A, __m512d __B)
10820 return (__m512d) ((__v8df)__A * (__v8df)__B);
10823 extern __inline __m512d
10824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10825 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10827 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10828 (__v8df) __B,
10829 (__v8df) __W,
10830 (__mmask8) __U,
10831 _MM_FROUND_CUR_DIRECTION);
10834 extern __inline __m512d
10835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10836 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10838 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10839 (__v8df) __B,
10840 (__v8df)
10841 _mm512_setzero_pd (),
10842 (__mmask8) __U,
10843 _MM_FROUND_CUR_DIRECTION);
10846 extern __inline __m512
10847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10848 _mm512_mul_ps (__m512 __A, __m512 __B)
10850 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10853 extern __inline __m512
10854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10855 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10857 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10858 (__v16sf) __B,
10859 (__v16sf) __W,
10860 (__mmask16) __U,
10861 _MM_FROUND_CUR_DIRECTION);
10864 extern __inline __m512
10865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10868 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10869 (__v16sf) __B,
10870 (__v16sf)
10871 _mm512_setzero_ps (),
10872 (__mmask16) __U,
10873 _MM_FROUND_CUR_DIRECTION);
10876 extern __inline __m512d
10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10878 _mm512_div_pd (__m512d __M, __m512d __V)
10880 return (__m512d) ((__v8df)__M / (__v8df)__V);
10883 extern __inline __m512d
10884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10885 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10887 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10888 (__v8df) __V,
10889 (__v8df) __W,
10890 (__mmask8) __U,
10891 _MM_FROUND_CUR_DIRECTION);
10894 extern __inline __m512d
10895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10896 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10898 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10899 (__v8df) __V,
10900 (__v8df)
10901 _mm512_setzero_pd (),
10902 (__mmask8) __U,
10903 _MM_FROUND_CUR_DIRECTION);
10906 extern __inline __m512
10907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10908 _mm512_div_ps (__m512 __A, __m512 __B)
10910 return (__m512) ((__v16sf)__A / (__v16sf)__B);
10913 extern __inline __m512
10914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10915 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10917 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10918 (__v16sf) __B,
10919 (__v16sf) __W,
10920 (__mmask16) __U,
10921 _MM_FROUND_CUR_DIRECTION);
10924 extern __inline __m512
10925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10926 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10928 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10929 (__v16sf) __B,
10930 (__v16sf)
10931 _mm512_setzero_ps (),
10932 (__mmask16) __U,
10933 _MM_FROUND_CUR_DIRECTION);
10936 extern __inline __m512d
10937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10938 _mm512_max_pd (__m512d __A, __m512d __B)
10940 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10941 (__v8df) __B,
10942 (__v8df)
10943 _mm512_undefined_pd (),
10944 (__mmask8) -1,
10945 _MM_FROUND_CUR_DIRECTION);
10948 extern __inline __m512d
10949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10950 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10952 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10953 (__v8df) __B,
10954 (__v8df) __W,
10955 (__mmask8) __U,
10956 _MM_FROUND_CUR_DIRECTION);
10959 extern __inline __m512d
10960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10961 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10963 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10964 (__v8df) __B,
10965 (__v8df)
10966 _mm512_setzero_pd (),
10967 (__mmask8) __U,
10968 _MM_FROUND_CUR_DIRECTION);
10971 extern __inline __m512
10972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10973 _mm512_max_ps (__m512 __A, __m512 __B)
10975 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10976 (__v16sf) __B,
10977 (__v16sf)
10978 _mm512_undefined_ps (),
10979 (__mmask16) -1,
10980 _MM_FROUND_CUR_DIRECTION);
10983 extern __inline __m512
10984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10985 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10987 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10988 (__v16sf) __B,
10989 (__v16sf) __W,
10990 (__mmask16) __U,
10991 _MM_FROUND_CUR_DIRECTION);
10994 extern __inline __m512
10995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10996 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10998 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10999 (__v16sf) __B,
11000 (__v16sf)
11001 _mm512_setzero_ps (),
11002 (__mmask16) __U,
11003 _MM_FROUND_CUR_DIRECTION);
11006 extern __inline __m512d
11007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11008 _mm512_min_pd (__m512d __A, __m512d __B)
11010 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11011 (__v8df) __B,
11012 (__v8df)
11013 _mm512_undefined_pd (),
11014 (__mmask8) -1,
11015 _MM_FROUND_CUR_DIRECTION);
11018 extern __inline __m512d
11019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11020 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11022 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11023 (__v8df) __B,
11024 (__v8df) __W,
11025 (__mmask8) __U,
11026 _MM_FROUND_CUR_DIRECTION);
11029 extern __inline __m512d
11030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11031 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11033 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11034 (__v8df) __B,
11035 (__v8df)
11036 _mm512_setzero_pd (),
11037 (__mmask8) __U,
11038 _MM_FROUND_CUR_DIRECTION);
11041 extern __inline __m512
11042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11043 _mm512_min_ps (__m512 __A, __m512 __B)
11045 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11046 (__v16sf) __B,
11047 (__v16sf)
11048 _mm512_undefined_ps (),
11049 (__mmask16) -1,
11050 _MM_FROUND_CUR_DIRECTION);
11053 extern __inline __m512
11054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11055 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11057 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11058 (__v16sf) __B,
11059 (__v16sf) __W,
11060 (__mmask16) __U,
11061 _MM_FROUND_CUR_DIRECTION);
11064 extern __inline __m512
11065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11066 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11068 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11069 (__v16sf) __B,
11070 (__v16sf)
11071 _mm512_setzero_ps (),
11072 (__mmask16) __U,
11073 _MM_FROUND_CUR_DIRECTION);
11076 extern __inline __m512d
11077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11078 _mm512_scalef_pd (__m512d __A, __m512d __B)
11080 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11081 (__v8df) __B,
11082 (__v8df)
11083 _mm512_undefined_pd (),
11084 (__mmask8) -1,
11085 _MM_FROUND_CUR_DIRECTION);
11088 extern __inline __m512d
11089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11090 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11092 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11093 (__v8df) __B,
11094 (__v8df) __W,
11095 (__mmask8) __U,
11096 _MM_FROUND_CUR_DIRECTION);
11099 extern __inline __m512d
11100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11101 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11103 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11104 (__v8df) __B,
11105 (__v8df)
11106 _mm512_setzero_pd (),
11107 (__mmask8) __U,
11108 _MM_FROUND_CUR_DIRECTION);
11111 extern __inline __m512
11112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11113 _mm512_scalef_ps (__m512 __A, __m512 __B)
11115 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11116 (__v16sf) __B,
11117 (__v16sf)
11118 _mm512_undefined_ps (),
11119 (__mmask16) -1,
11120 _MM_FROUND_CUR_DIRECTION);
11123 extern __inline __m512
11124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11125 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11127 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11128 (__v16sf) __B,
11129 (__v16sf) __W,
11130 (__mmask16) __U,
11131 _MM_FROUND_CUR_DIRECTION);
11134 extern __inline __m512
11135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11136 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11138 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11139 (__v16sf) __B,
11140 (__v16sf)
11141 _mm512_setzero_ps (),
11142 (__mmask16) __U,
11143 _MM_FROUND_CUR_DIRECTION);
11146 extern __inline __m128d
11147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11148 _mm_scalef_sd (__m128d __A, __m128d __B)
11150 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11151 (__v2df) __B,
11152 _MM_FROUND_CUR_DIRECTION);
11155 extern __inline __m128
11156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11157 _mm_scalef_ss (__m128 __A, __m128 __B)
11159 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11160 (__v4sf) __B,
11161 _MM_FROUND_CUR_DIRECTION);
11164 extern __inline __m512d
11165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11166 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11168 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11169 (__v8df) __B,
11170 (__v8df) __C,
11171 (__mmask8) -1,
11172 _MM_FROUND_CUR_DIRECTION);
11175 extern __inline __m512d
11176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11177 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11179 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11180 (__v8df) __B,
11181 (__v8df) __C,
11182 (__mmask8) __U,
11183 _MM_FROUND_CUR_DIRECTION);
11186 extern __inline __m512d
11187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11188 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11190 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11191 (__v8df) __B,
11192 (__v8df) __C,
11193 (__mmask8) __U,
11194 _MM_FROUND_CUR_DIRECTION);
11197 extern __inline __m512d
11198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11199 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11201 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11202 (__v8df) __B,
11203 (__v8df) __C,
11204 (__mmask8) __U,
11205 _MM_FROUND_CUR_DIRECTION);
11208 extern __inline __m512
11209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11210 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11212 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11213 (__v16sf) __B,
11214 (__v16sf) __C,
11215 (__mmask16) -1,
11216 _MM_FROUND_CUR_DIRECTION);
11219 extern __inline __m512
11220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11221 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11223 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11224 (__v16sf) __B,
11225 (__v16sf) __C,
11226 (__mmask16) __U,
11227 _MM_FROUND_CUR_DIRECTION);
11230 extern __inline __m512
11231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11232 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11234 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11235 (__v16sf) __B,
11236 (__v16sf) __C,
11237 (__mmask16) __U,
11238 _MM_FROUND_CUR_DIRECTION);
11241 extern __inline __m512
11242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11243 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11245 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11246 (__v16sf) __B,
11247 (__v16sf) __C,
11248 (__mmask16) __U,
11249 _MM_FROUND_CUR_DIRECTION);
11252 extern __inline __m512d
11253 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11254 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11256 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11257 (__v8df) __B,
11258 -(__v8df) __C,
11259 (__mmask8) -1,
11260 _MM_FROUND_CUR_DIRECTION);
11263 extern __inline __m512d
11264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11265 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11267 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11268 (__v8df) __B,
11269 -(__v8df) __C,
11270 (__mmask8) __U,
11271 _MM_FROUND_CUR_DIRECTION);
11274 extern __inline __m512d
11275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11276 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11278 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11279 (__v8df) __B,
11280 (__v8df) __C,
11281 (__mmask8) __U,
11282 _MM_FROUND_CUR_DIRECTION);
11285 extern __inline __m512d
11286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11287 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11289 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11290 (__v8df) __B,
11291 -(__v8df) __C,
11292 (__mmask8) __U,
11293 _MM_FROUND_CUR_DIRECTION);
11296 extern __inline __m512
11297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11298 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11300 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11301 (__v16sf) __B,
11302 -(__v16sf) __C,
11303 (__mmask16) -1,
11304 _MM_FROUND_CUR_DIRECTION);
11307 extern __inline __m512
11308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11309 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11311 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11312 (__v16sf) __B,
11313 -(__v16sf) __C,
11314 (__mmask16) __U,
11315 _MM_FROUND_CUR_DIRECTION);
11318 extern __inline __m512
11319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11320 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11322 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11323 (__v16sf) __B,
11324 (__v16sf) __C,
11325 (__mmask16) __U,
11326 _MM_FROUND_CUR_DIRECTION);
11329 extern __inline __m512
11330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11331 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11333 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11334 (__v16sf) __B,
11335 -(__v16sf) __C,
11336 (__mmask16) __U,
11337 _MM_FROUND_CUR_DIRECTION);
11340 extern __inline __m512d
11341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11342 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11344 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11345 (__v8df) __B,
11346 (__v8df) __C,
11347 (__mmask8) -1,
11348 _MM_FROUND_CUR_DIRECTION);
11351 extern __inline __m512d
11352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11353 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11355 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11356 (__v8df) __B,
11357 (__v8df) __C,
11358 (__mmask8) __U,
11359 _MM_FROUND_CUR_DIRECTION);
11362 extern __inline __m512d
11363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11364 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11366 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11367 (__v8df) __B,
11368 (__v8df) __C,
11369 (__mmask8) __U,
11370 _MM_FROUND_CUR_DIRECTION);
11373 extern __inline __m512d
11374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11375 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11377 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11378 (__v8df) __B,
11379 (__v8df) __C,
11380 (__mmask8) __U,
11381 _MM_FROUND_CUR_DIRECTION);
11384 extern __inline __m512
11385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11386 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11388 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11389 (__v16sf) __B,
11390 (__v16sf) __C,
11391 (__mmask16) -1,
11392 _MM_FROUND_CUR_DIRECTION);
11395 extern __inline __m512
11396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11397 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11399 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11400 (__v16sf) __B,
11401 (__v16sf) __C,
11402 (__mmask16) __U,
11403 _MM_FROUND_CUR_DIRECTION);
11406 extern __inline __m512
11407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11408 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11410 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11411 (__v16sf) __B,
11412 (__v16sf) __C,
11413 (__mmask16) __U,
11414 _MM_FROUND_CUR_DIRECTION);
11417 extern __inline __m512
11418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11419 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11421 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11422 (__v16sf) __B,
11423 (__v16sf) __C,
11424 (__mmask16) __U,
11425 _MM_FROUND_CUR_DIRECTION);
11428 extern __inline __m512d
11429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11430 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11432 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11433 (__v8df) __B,
11434 -(__v8df) __C,
11435 (__mmask8) -1,
11436 _MM_FROUND_CUR_DIRECTION);
11439 extern __inline __m512d
11440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11441 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11443 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11444 (__v8df) __B,
11445 -(__v8df) __C,
11446 (__mmask8) __U,
11447 _MM_FROUND_CUR_DIRECTION);
11450 extern __inline __m512d
11451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11454 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11455 (__v8df) __B,
11456 (__v8df) __C,
11457 (__mmask8) __U,
11458 _MM_FROUND_CUR_DIRECTION);
11461 extern __inline __m512d
11462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11463 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11465 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11466 (__v8df) __B,
11467 -(__v8df) __C,
11468 (__mmask8) __U,
11469 _MM_FROUND_CUR_DIRECTION);
11472 extern __inline __m512
11473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11474 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11476 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11477 (__v16sf) __B,
11478 -(__v16sf) __C,
11479 (__mmask16) -1,
11480 _MM_FROUND_CUR_DIRECTION);
11483 extern __inline __m512
11484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11485 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11487 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11488 (__v16sf) __B,
11489 -(__v16sf) __C,
11490 (__mmask16) __U,
11491 _MM_FROUND_CUR_DIRECTION);
11494 extern __inline __m512
11495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11496 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11498 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11499 (__v16sf) __B,
11500 (__v16sf) __C,
11501 (__mmask16) __U,
11502 _MM_FROUND_CUR_DIRECTION);
11505 extern __inline __m512
11506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11507 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11509 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11510 (__v16sf) __B,
11511 -(__v16sf) __C,
11512 (__mmask16) __U,
11513 _MM_FROUND_CUR_DIRECTION);
11516 extern __inline __m512d
11517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11518 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11520 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11521 (__v8df) __B,
11522 (__v8df) __C,
11523 (__mmask8) -1,
11524 _MM_FROUND_CUR_DIRECTION);
11527 extern __inline __m512d
11528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11529 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11531 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11532 (__v8df) __B,
11533 (__v8df) __C,
11534 (__mmask8) __U,
11535 _MM_FROUND_CUR_DIRECTION);
11538 extern __inline __m512d
11539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11540 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11542 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11543 (__v8df) __B,
11544 (__v8df) __C,
11545 (__mmask8) __U,
11546 _MM_FROUND_CUR_DIRECTION);
11549 extern __inline __m512d
11550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11551 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11553 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11554 (__v8df) __B,
11555 (__v8df) __C,
11556 (__mmask8) __U,
11557 _MM_FROUND_CUR_DIRECTION);
11560 extern __inline __m512
11561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11564 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11565 (__v16sf) __B,
11566 (__v16sf) __C,
11567 (__mmask16) -1,
11568 _MM_FROUND_CUR_DIRECTION);
11571 extern __inline __m512
11572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11573 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11575 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11576 (__v16sf) __B,
11577 (__v16sf) __C,
11578 (__mmask16) __U,
11579 _MM_FROUND_CUR_DIRECTION);
11582 extern __inline __m512
11583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11584 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11586 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11587 (__v16sf) __B,
11588 (__v16sf) __C,
11589 (__mmask16) __U,
11590 _MM_FROUND_CUR_DIRECTION);
11593 extern __inline __m512
11594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11595 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11597 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11598 (__v16sf) __B,
11599 (__v16sf) __C,
11600 (__mmask16) __U,
11601 _MM_FROUND_CUR_DIRECTION);
11604 extern __inline __m512d
11605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11606 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11608 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11609 (__v8df) __B,
11610 -(__v8df) __C,
11611 (__mmask8) -1,
11612 _MM_FROUND_CUR_DIRECTION);
11615 extern __inline __m512d
11616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11617 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11619 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11620 (__v8df) __B,
11621 (__v8df) __C,
11622 (__mmask8) __U,
11623 _MM_FROUND_CUR_DIRECTION);
11626 extern __inline __m512d
11627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11628 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11630 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11631 (__v8df) __B,
11632 (__v8df) __C,
11633 (__mmask8) __U,
11634 _MM_FROUND_CUR_DIRECTION);
11637 extern __inline __m512d
11638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11639 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11641 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11642 (__v8df) __B,
11643 -(__v8df) __C,
11644 (__mmask8) __U,
11645 _MM_FROUND_CUR_DIRECTION);
11648 extern __inline __m512
11649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11650 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11652 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11653 (__v16sf) __B,
11654 -(__v16sf) __C,
11655 (__mmask16) -1,
11656 _MM_FROUND_CUR_DIRECTION);
11659 extern __inline __m512
11660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11661 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11663 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11664 (__v16sf) __B,
11665 (__v16sf) __C,
11666 (__mmask16) __U,
11667 _MM_FROUND_CUR_DIRECTION);
11670 extern __inline __m512
11671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11674 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11675 (__v16sf) __B,
11676 (__v16sf) __C,
11677 (__mmask16) __U,
11678 _MM_FROUND_CUR_DIRECTION);
11681 extern __inline __m512
11682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11683 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11685 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11686 (__v16sf) __B,
11687 -(__v16sf) __C,
11688 (__mmask16) __U,
11689 _MM_FROUND_CUR_DIRECTION);
11692 extern __inline __m256i
11693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11694 _mm512_cvttpd_epi32 (__m512d __A)
11696 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11697 (__v8si)
11698 _mm256_undefined_si256 (),
11699 (__mmask8) -1,
11700 _MM_FROUND_CUR_DIRECTION);
11703 extern __inline __m256i
11704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11705 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11707 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11708 (__v8si) __W,
11709 (__mmask8) __U,
11710 _MM_FROUND_CUR_DIRECTION);
11713 extern __inline __m256i
11714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11715 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11717 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11718 (__v8si)
11719 _mm256_setzero_si256 (),
11720 (__mmask8) __U,
11721 _MM_FROUND_CUR_DIRECTION);
11724 extern __inline __m256i
11725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11726 _mm512_cvttpd_epu32 (__m512d __A)
11728 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11729 (__v8si)
11730 _mm256_undefined_si256 (),
11731 (__mmask8) -1,
11732 _MM_FROUND_CUR_DIRECTION);
11735 extern __inline __m256i
11736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11737 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11739 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11740 (__v8si) __W,
11741 (__mmask8) __U,
11742 _MM_FROUND_CUR_DIRECTION);
11745 extern __inline __m256i
11746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11747 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11749 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11750 (__v8si)
11751 _mm256_setzero_si256 (),
11752 (__mmask8) __U,
11753 _MM_FROUND_CUR_DIRECTION);
11756 extern __inline __m256i
11757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11758 _mm512_cvtpd_epi32 (__m512d __A)
11760 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11761 (__v8si)
11762 _mm256_undefined_si256 (),
11763 (__mmask8) -1,
11764 _MM_FROUND_CUR_DIRECTION);
11767 extern __inline __m256i
11768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11769 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11771 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11772 (__v8si) __W,
11773 (__mmask8) __U,
11774 _MM_FROUND_CUR_DIRECTION);
11777 extern __inline __m256i
11778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11779 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11781 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11782 (__v8si)
11783 _mm256_setzero_si256 (),
11784 (__mmask8) __U,
11785 _MM_FROUND_CUR_DIRECTION);
11788 extern __inline __m256i
11789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11790 _mm512_cvtpd_epu32 (__m512d __A)
11792 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11793 (__v8si)
11794 _mm256_undefined_si256 (),
11795 (__mmask8) -1,
11796 _MM_FROUND_CUR_DIRECTION);
11799 extern __inline __m256i
11800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11801 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11803 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11804 (__v8si) __W,
11805 (__mmask8) __U,
11806 _MM_FROUND_CUR_DIRECTION);
11809 extern __inline __m256i
11810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11811 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11813 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11814 (__v8si)
11815 _mm256_setzero_si256 (),
11816 (__mmask8) __U,
11817 _MM_FROUND_CUR_DIRECTION);
11820 extern __inline __m512i
11821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11822 _mm512_cvttps_epi32 (__m512 __A)
11824 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11825 (__v16si)
11826 _mm512_undefined_si512 (),
11827 (__mmask16) -1,
11828 _MM_FROUND_CUR_DIRECTION);
11831 extern __inline __m512i
11832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11833 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11835 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11836 (__v16si) __W,
11837 (__mmask16) __U,
11838 _MM_FROUND_CUR_DIRECTION);
11841 extern __inline __m512i
11842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11843 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11845 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11846 (__v16si)
11847 _mm512_setzero_si512 (),
11848 (__mmask16) __U,
11849 _MM_FROUND_CUR_DIRECTION);
11852 extern __inline __m512i
11853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11854 _mm512_cvttps_epu32 (__m512 __A)
11856 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11857 (__v16si)
11858 _mm512_undefined_si512 (),
11859 (__mmask16) -1,
11860 _MM_FROUND_CUR_DIRECTION);
11863 extern __inline __m512i
11864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11865 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11867 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11868 (__v16si) __W,
11869 (__mmask16) __U,
11870 _MM_FROUND_CUR_DIRECTION);
11873 extern __inline __m512i
11874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11875 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11877 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11878 (__v16si)
11879 _mm512_setzero_si512 (),
11880 (__mmask16) __U,
11881 _MM_FROUND_CUR_DIRECTION);
11884 extern __inline __m512i
11885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11886 _mm512_cvtps_epi32 (__m512 __A)
11888 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11889 (__v16si)
11890 _mm512_undefined_si512 (),
11891 (__mmask16) -1,
11892 _MM_FROUND_CUR_DIRECTION);
11895 extern __inline __m512i
11896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11897 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11899 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11900 (__v16si) __W,
11901 (__mmask16) __U,
11902 _MM_FROUND_CUR_DIRECTION);
11905 extern __inline __m512i
11906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11907 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11909 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11910 (__v16si)
11911 _mm512_setzero_si512 (),
11912 (__mmask16) __U,
11913 _MM_FROUND_CUR_DIRECTION);
11916 extern __inline __m512i
11917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11918 _mm512_cvtps_epu32 (__m512 __A)
11920 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11921 (__v16si)
11922 _mm512_undefined_si512 (),
11923 (__mmask16) -1,
11924 _MM_FROUND_CUR_DIRECTION);
11927 extern __inline __m512i
11928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11929 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11931 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11932 (__v16si) __W,
11933 (__mmask16) __U,
11934 _MM_FROUND_CUR_DIRECTION);
11937 extern __inline __m512i
11938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11939 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11941 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11942 (__v16si)
11943 _mm512_setzero_si512 (),
11944 (__mmask16) __U,
11945 _MM_FROUND_CUR_DIRECTION);
11948 #ifdef __x86_64__
11949 extern __inline __m128
11950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11951 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11953 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11954 _MM_FROUND_CUR_DIRECTION);
11957 extern __inline __m128d
11958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11959 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11961 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11962 _MM_FROUND_CUR_DIRECTION);
11964 #endif
11966 extern __inline __m128
11967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11970 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11971 _MM_FROUND_CUR_DIRECTION);
11974 extern __inline __m512
11975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11976 _mm512_cvtepi32_ps (__m512i __A)
11978 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11979 (__v16sf)
11980 _mm512_undefined_ps (),
11981 (__mmask16) -1,
11982 _MM_FROUND_CUR_DIRECTION);
11985 extern __inline __m512
11986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11987 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11989 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11990 (__v16sf) __W,
11991 (__mmask16) __U,
11992 _MM_FROUND_CUR_DIRECTION);
11995 extern __inline __m512
11996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11997 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11999 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
12000 (__v16sf)
12001 _mm512_setzero_ps (),
12002 (__mmask16) __U,
12003 _MM_FROUND_CUR_DIRECTION);
12006 extern __inline __m512
12007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008 _mm512_cvtepu32_ps (__m512i __A)
12010 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12011 (__v16sf)
12012 _mm512_undefined_ps (),
12013 (__mmask16) -1,
12014 _MM_FROUND_CUR_DIRECTION);
12017 extern __inline __m512
12018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12019 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12021 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12022 (__v16sf) __W,
12023 (__mmask16) __U,
12024 _MM_FROUND_CUR_DIRECTION);
12027 extern __inline __m512
12028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12029 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12031 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12032 (__v16sf)
12033 _mm512_setzero_ps (),
12034 (__mmask16) __U,
12035 _MM_FROUND_CUR_DIRECTION);
12038 #ifdef __OPTIMIZE__
12039 extern __inline __m512d
12040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12041 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12043 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12044 (__v8df) __B,
12045 (__v8di) __C,
12046 __imm,
12047 (__mmask8) -1,
12048 _MM_FROUND_CUR_DIRECTION);
12051 extern __inline __m512d
12052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12053 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12054 __m512i __C, const int __imm)
12056 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12057 (__v8df) __B,
12058 (__v8di) __C,
12059 __imm,
12060 (__mmask8) __U,
12061 _MM_FROUND_CUR_DIRECTION);
12064 extern __inline __m512d
12065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12067 __m512i __C, const int __imm)
12069 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12070 (__v8df) __B,
12071 (__v8di) __C,
12072 __imm,
12073 (__mmask8) __U,
12074 _MM_FROUND_CUR_DIRECTION);
12077 extern __inline __m512
12078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12079 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12081 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12082 (__v16sf) __B,
12083 (__v16si) __C,
12084 __imm,
12085 (__mmask16) -1,
12086 _MM_FROUND_CUR_DIRECTION);
12089 extern __inline __m512
12090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12091 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12092 __m512i __C, const int __imm)
12094 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12095 (__v16sf) __B,
12096 (__v16si) __C,
12097 __imm,
12098 (__mmask16) __U,
12099 _MM_FROUND_CUR_DIRECTION);
12102 extern __inline __m512
12103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12104 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12105 __m512i __C, const int __imm)
12107 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12108 (__v16sf) __B,
12109 (__v16si) __C,
12110 __imm,
12111 (__mmask16) __U,
12112 _MM_FROUND_CUR_DIRECTION);
12115 extern __inline __m128d
12116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12117 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12119 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12120 (__v2df) __B,
12121 (__v2di) __C, __imm,
12122 (__mmask8) -1,
12123 _MM_FROUND_CUR_DIRECTION);
12126 extern __inline __m128d
12127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12128 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12129 __m128i __C, const int __imm)
12131 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12132 (__v2df) __B,
12133 (__v2di) __C, __imm,
12134 (__mmask8) __U,
12135 _MM_FROUND_CUR_DIRECTION);
12138 extern __inline __m128d
12139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12140 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12141 __m128i __C, const int __imm)
12143 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12144 (__v2df) __B,
12145 (__v2di) __C,
12146 __imm,
12147 (__mmask8) __U,
12148 _MM_FROUND_CUR_DIRECTION);
12151 extern __inline __m128
12152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12153 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12155 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12156 (__v4sf) __B,
12157 (__v4si) __C, __imm,
12158 (__mmask8) -1,
12159 _MM_FROUND_CUR_DIRECTION);
12162 extern __inline __m128
12163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12164 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12165 __m128i __C, const int __imm)
12167 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12168 (__v4sf) __B,
12169 (__v4si) __C, __imm,
12170 (__mmask8) __U,
12171 _MM_FROUND_CUR_DIRECTION);
12174 extern __inline __m128
12175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12176 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12177 __m128i __C, const int __imm)
12179 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12180 (__v4sf) __B,
12181 (__v4si) __C, __imm,
12182 (__mmask8) __U,
12183 _MM_FROUND_CUR_DIRECTION);
12185 #else
12186 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12187 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12188 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12189 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12191 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12192 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12193 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12194 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12196 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12197 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12198 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12199 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12201 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12202 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12203 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12204 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12206 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12207 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12208 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12209 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12211 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12212 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12213 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12214 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12216 #define _mm_fixupimm_sd(X, Y, Z, C) \
12217 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12218 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12219 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12221 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12222 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12223 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12224 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12226 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12227 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12228 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12229 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12231 #define _mm_fixupimm_ss(X, Y, Z, C) \
12232 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12233 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12234 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12236 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12237 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12238 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12239 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12241 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12242 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12243 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12244 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12245 #endif
12247 #ifdef __x86_64__
12248 extern __inline unsigned long long
12249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12250 _mm_cvtss_u64 (__m128 __A)
12252 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12253 __A,
12254 _MM_FROUND_CUR_DIRECTION);
12257 extern __inline unsigned long long
12258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12259 _mm_cvttss_u64 (__m128 __A)
12261 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12262 __A,
12263 _MM_FROUND_CUR_DIRECTION);
12266 extern __inline long long
12267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12268 _mm_cvttss_i64 (__m128 __A)
12270 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12271 _MM_FROUND_CUR_DIRECTION);
12273 #endif /* __x86_64__ */
12275 extern __inline unsigned
12276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12277 _mm_cvtss_u32 (__m128 __A)
12279 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12280 _MM_FROUND_CUR_DIRECTION);
12283 extern __inline unsigned
12284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12285 _mm_cvttss_u32 (__m128 __A)
12287 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12288 _MM_FROUND_CUR_DIRECTION);
12291 extern __inline int
12292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12293 _mm_cvttss_i32 (__m128 __A)
12295 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12296 _MM_FROUND_CUR_DIRECTION);
12299 #ifdef __x86_64__
12300 extern __inline unsigned long long
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm_cvtsd_u64 (__m128d __A)
12304 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12305 __A,
12306 _MM_FROUND_CUR_DIRECTION);
12309 extern __inline unsigned long long
12310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12311 _mm_cvttsd_u64 (__m128d __A)
12313 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12314 __A,
12315 _MM_FROUND_CUR_DIRECTION);
12318 extern __inline long long
12319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12320 _mm_cvttsd_i64 (__m128d __A)
12322 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12323 _MM_FROUND_CUR_DIRECTION);
12325 #endif /* __x86_64__ */
12327 extern __inline unsigned
12328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12329 _mm_cvtsd_u32 (__m128d __A)
12331 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12332 _MM_FROUND_CUR_DIRECTION);
12335 extern __inline unsigned
12336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12337 _mm_cvttsd_u32 (__m128d __A)
12339 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12340 _MM_FROUND_CUR_DIRECTION);
12343 extern __inline int
12344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345 _mm_cvttsd_i32 (__m128d __A)
12347 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12348 _MM_FROUND_CUR_DIRECTION);
12351 extern __inline __m512d
12352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12353 _mm512_cvtps_pd (__m256 __A)
12355 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12356 (__v8df)
12357 _mm512_undefined_pd (),
12358 (__mmask8) -1,
12359 _MM_FROUND_CUR_DIRECTION);
12362 extern __inline __m512d
12363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12364 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12366 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12367 (__v8df) __W,
12368 (__mmask8) __U,
12369 _MM_FROUND_CUR_DIRECTION);
12372 extern __inline __m512d
12373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12374 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12376 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12377 (__v8df)
12378 _mm512_setzero_pd (),
12379 (__mmask8) __U,
12380 _MM_FROUND_CUR_DIRECTION);
12383 extern __inline __m512
12384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12385 _mm512_cvtph_ps (__m256i __A)
12387 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12388 (__v16sf)
12389 _mm512_undefined_ps (),
12390 (__mmask16) -1,
12391 _MM_FROUND_CUR_DIRECTION);
12394 extern __inline __m512
12395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12396 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12398 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12399 (__v16sf) __W,
12400 (__mmask16) __U,
12401 _MM_FROUND_CUR_DIRECTION);
12404 extern __inline __m512
12405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12408 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12409 (__v16sf)
12410 _mm512_setzero_ps (),
12411 (__mmask16) __U,
12412 _MM_FROUND_CUR_DIRECTION);
12415 extern __inline __m256
12416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12417 _mm512_cvtpd_ps (__m512d __A)
12419 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12420 (__v8sf)
12421 _mm256_undefined_ps (),
12422 (__mmask8) -1,
12423 _MM_FROUND_CUR_DIRECTION);
12426 extern __inline __m256
12427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12428 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12430 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12431 (__v8sf) __W,
12432 (__mmask8) __U,
12433 _MM_FROUND_CUR_DIRECTION);
12436 extern __inline __m256
12437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12438 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12440 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12441 (__v8sf)
12442 _mm256_setzero_ps (),
12443 (__mmask8) __U,
12444 _MM_FROUND_CUR_DIRECTION);
12447 #ifdef __OPTIMIZE__
12448 extern __inline __m512
12449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12450 _mm512_getexp_ps (__m512 __A)
12452 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12453 (__v16sf)
12454 _mm512_undefined_ps (),
12455 (__mmask16) -1,
12456 _MM_FROUND_CUR_DIRECTION);
12459 extern __inline __m512
12460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12461 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12463 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12464 (__v16sf) __W,
12465 (__mmask16) __U,
12466 _MM_FROUND_CUR_DIRECTION);
12469 extern __inline __m512
12470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12471 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12473 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12474 (__v16sf)
12475 _mm512_setzero_ps (),
12476 (__mmask16) __U,
12477 _MM_FROUND_CUR_DIRECTION);
12480 extern __inline __m512d
12481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12482 _mm512_getexp_pd (__m512d __A)
12484 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12485 (__v8df)
12486 _mm512_undefined_pd (),
12487 (__mmask8) -1,
12488 _MM_FROUND_CUR_DIRECTION);
12491 extern __inline __m512d
12492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12493 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12495 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12496 (__v8df) __W,
12497 (__mmask8) __U,
12498 _MM_FROUND_CUR_DIRECTION);
12501 extern __inline __m512d
12502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12503 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12505 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12506 (__v8df)
12507 _mm512_setzero_pd (),
12508 (__mmask8) __U,
12509 _MM_FROUND_CUR_DIRECTION);
12512 extern __inline __m128
12513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12514 _mm_getexp_ss (__m128 __A, __m128 __B)
12516 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12517 (__v4sf) __B,
12518 _MM_FROUND_CUR_DIRECTION);
12521 extern __inline __m128d
12522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12523 _mm_getexp_sd (__m128d __A, __m128d __B)
12525 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12526 (__v2df) __B,
12527 _MM_FROUND_CUR_DIRECTION);
12530 extern __inline __m512d
12531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12532 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12533 _MM_MANTISSA_SIGN_ENUM __C)
12535 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12536 (__C << 2) | __B,
12537 _mm512_undefined_pd (),
12538 (__mmask8) -1,
12539 _MM_FROUND_CUR_DIRECTION);
12542 extern __inline __m512d
12543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12544 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12545 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12547 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12548 (__C << 2) | __B,
12549 (__v8df) __W, __U,
12550 _MM_FROUND_CUR_DIRECTION);
12553 extern __inline __m512d
12554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12555 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12556 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12558 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12559 (__C << 2) | __B,
12560 (__v8df)
12561 _mm512_setzero_pd (),
12562 __U,
12563 _MM_FROUND_CUR_DIRECTION);
12566 extern __inline __m512
12567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12568 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12569 _MM_MANTISSA_SIGN_ENUM __C)
12571 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12572 (__C << 2) | __B,
12573 _mm512_undefined_ps (),
12574 (__mmask16) -1,
12575 _MM_FROUND_CUR_DIRECTION);
12578 extern __inline __m512
12579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12580 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12581 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12583 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12584 (__C << 2) | __B,
12585 (__v16sf) __W, __U,
12586 _MM_FROUND_CUR_DIRECTION);
12589 extern __inline __m512
12590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12591 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12592 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12594 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12595 (__C << 2) | __B,
12596 (__v16sf)
12597 _mm512_setzero_ps (),
12598 __U,
12599 _MM_FROUND_CUR_DIRECTION);
12602 extern __inline __m128d
12603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12604 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12605 _MM_MANTISSA_SIGN_ENUM __D)
12607 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12608 (__v2df) __B,
12609 (__D << 2) | __C,
12610 _MM_FROUND_CUR_DIRECTION);
12613 extern __inline __m128
12614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12615 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12616 _MM_MANTISSA_SIGN_ENUM __D)
12618 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12619 (__v4sf) __B,
12620 (__D << 2) | __C,
12621 _MM_FROUND_CUR_DIRECTION);
12624 #else
12625 #define _mm512_getmant_pd(X, B, C) \
12626 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12627 (int)(((C)<<2) | (B)), \
12628 (__v8df)_mm512_undefined_pd(), \
12629 (__mmask8)-1,\
12630 _MM_FROUND_CUR_DIRECTION))
12632 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12633 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12634 (int)(((C)<<2) | (B)), \
12635 (__v8df)(__m512d)(W), \
12636 (__mmask8)(U),\
12637 _MM_FROUND_CUR_DIRECTION))
12639 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12640 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12641 (int)(((C)<<2) | (B)), \
12642 (__v8df)_mm512_setzero_pd(), \
12643 (__mmask8)(U),\
12644 _MM_FROUND_CUR_DIRECTION))
12645 #define _mm512_getmant_ps(X, B, C) \
12646 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12647 (int)(((C)<<2) | (B)), \
12648 (__v16sf)_mm512_undefined_ps(), \
12649 (__mmask16)-1,\
12650 _MM_FROUND_CUR_DIRECTION))
12652 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12653 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12654 (int)(((C)<<2) | (B)), \
12655 (__v16sf)(__m512)(W), \
12656 (__mmask16)(U),\
12657 _MM_FROUND_CUR_DIRECTION))
12659 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12660 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12661 (int)(((C)<<2) | (B)), \
12662 (__v16sf)_mm512_setzero_ps(), \
12663 (__mmask16)(U),\
12664 _MM_FROUND_CUR_DIRECTION))
12665 #define _mm_getmant_sd(X, Y, C, D) \
12666 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12667 (__v2df)(__m128d)(Y), \
12668 (int)(((D)<<2) | (C)), \
12669 _MM_FROUND_CUR_DIRECTION))
12671 #define _mm_getmant_ss(X, Y, C, D) \
12672 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12673 (__v4sf)(__m128)(Y), \
12674 (int)(((D)<<2) | (C)), \
12675 _MM_FROUND_CUR_DIRECTION))
12677 #define _mm_getexp_ss(A, B) \
12678 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12679 _MM_FROUND_CUR_DIRECTION))
12681 #define _mm_getexp_sd(A, B) \
12682 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12683 _MM_FROUND_CUR_DIRECTION))
12685 #define _mm512_getexp_ps(A) \
12686 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12687 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12689 #define _mm512_mask_getexp_ps(W, U, A) \
12690 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12691 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12693 #define _mm512_maskz_getexp_ps(U, A) \
12694 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12695 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12697 #define _mm512_getexp_pd(A) \
12698 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12699 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12701 #define _mm512_mask_getexp_pd(W, U, A) \
12702 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12703 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12705 #define _mm512_maskz_getexp_pd(U, A) \
12706 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12707 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12708 #endif
12710 #ifdef __OPTIMIZE__
12711 extern __inline __m512
12712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12713 _mm512_roundscale_ps (__m512 __A, const int __imm)
12715 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12716 (__v16sf)
12717 _mm512_undefined_ps (),
12719 _MM_FROUND_CUR_DIRECTION);
12722 extern __inline __m512
12723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12724 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12725 const int __imm)
12727 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12728 (__v16sf) __A,
12729 (__mmask16) __B,
12730 _MM_FROUND_CUR_DIRECTION);
12733 extern __inline __m512
12734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12735 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12737 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12738 __imm,
12739 (__v16sf)
12740 _mm512_setzero_ps (),
12741 (__mmask16) __A,
12742 _MM_FROUND_CUR_DIRECTION);
12745 extern __inline __m512d
12746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12747 _mm512_roundscale_pd (__m512d __A, const int __imm)
12749 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12750 (__v8df)
12751 _mm512_undefined_pd (),
12753 _MM_FROUND_CUR_DIRECTION);
12756 extern __inline __m512d
12757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12758 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12759 const int __imm)
12761 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12762 (__v8df) __A,
12763 (__mmask8) __B,
12764 _MM_FROUND_CUR_DIRECTION);
12767 extern __inline __m512d
12768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12769 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12771 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12772 __imm,
12773 (__v8df)
12774 _mm512_setzero_pd (),
12775 (__mmask8) __A,
12776 _MM_FROUND_CUR_DIRECTION);
12779 extern __inline __m128
12780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12781 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12783 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12784 (__v4sf) __B, __imm,
12785 _MM_FROUND_CUR_DIRECTION);
12788 extern __inline __m128d
12789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12790 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12792 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12793 (__v2df) __B, __imm,
12794 _MM_FROUND_CUR_DIRECTION);
12797 #else
12798 #define _mm512_roundscale_ps(A, B) \
12799 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12800 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12801 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12802 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12803 (int)(D), \
12804 (__v16sf)(__m512)(A), \
12805 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12806 #define _mm512_maskz_roundscale_ps(A, B, C) \
12807 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12808 (int)(C), \
12809 (__v16sf)_mm512_setzero_ps(),\
12810 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12811 #define _mm512_roundscale_pd(A, B) \
12812 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12813 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12814 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12815 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12816 (int)(D), \
12817 (__v8df)(__m512d)(A), \
12818 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12819 #define _mm512_maskz_roundscale_pd(A, B, C) \
12820 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12821 (int)(C), \
12822 (__v8df)_mm512_setzero_pd(),\
12823 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12824 #define _mm_roundscale_ss(A, B, C) \
12825 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12826 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12827 #define _mm_roundscale_sd(A, B, C) \
12828 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12829 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12830 #endif
12832 #ifdef __OPTIMIZE__
12833 extern __inline __mmask8
12834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12835 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12837 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12838 (__v8df) __Y, __P,
12839 (__mmask8) -1,
12840 _MM_FROUND_CUR_DIRECTION);
12843 extern __inline __mmask16
12844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12845 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12847 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12848 (__v16sf) __Y, __P,
12849 (__mmask16) -1,
12850 _MM_FROUND_CUR_DIRECTION);
12853 extern __inline __mmask16
12854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12855 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12857 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12858 (__v16sf) __Y, __P,
12859 (__mmask16) __U,
12860 _MM_FROUND_CUR_DIRECTION);
12863 extern __inline __mmask8
12864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12865 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12867 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12868 (__v8df) __Y, __P,
12869 (__mmask8) __U,
12870 _MM_FROUND_CUR_DIRECTION);
12873 extern __inline __mmask8
12874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12875 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12877 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12878 (__v2df) __Y, __P,
12879 (__mmask8) -1,
12880 _MM_FROUND_CUR_DIRECTION);
12883 extern __inline __mmask8
12884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12885 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12887 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12888 (__v2df) __Y, __P,
12889 (__mmask8) __M,
12890 _MM_FROUND_CUR_DIRECTION);
12893 extern __inline __mmask8
12894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12895 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12897 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12898 (__v4sf) __Y, __P,
12899 (__mmask8) -1,
12900 _MM_FROUND_CUR_DIRECTION);
12903 extern __inline __mmask8
12904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12905 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12907 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12908 (__v4sf) __Y, __P,
12909 (__mmask8) __M,
12910 _MM_FROUND_CUR_DIRECTION);
12913 #else
12914 #define _mm512_cmp_pd_mask(X, Y, P) \
12915 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12916 (__v8df)(__m512d)(Y), (int)(P),\
12917 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12919 #define _mm512_cmp_ps_mask(X, Y, P) \
12920 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12921 (__v16sf)(__m512)(Y), (int)(P),\
12922 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12924 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12925 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12926 (__v8df)(__m512d)(Y), (int)(P),\
12927 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12929 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12930 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12931 (__v16sf)(__m512)(Y), (int)(P),\
12932 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12934 #define _mm_cmp_sd_mask(X, Y, P) \
12935 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12936 (__v2df)(__m128d)(Y), (int)(P),\
12937 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12939 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12940 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12941 (__v2df)(__m128d)(Y), (int)(P),\
12942 M,_MM_FROUND_CUR_DIRECTION))
12944 #define _mm_cmp_ss_mask(X, Y, P) \
12945 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12946 (__v4sf)(__m128)(Y), (int)(P), \
12947 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12949 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12950 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12951 (__v4sf)(__m128)(Y), (int)(P), \
12952 M,_MM_FROUND_CUR_DIRECTION))
12953 #endif
12955 extern __inline __mmask16
12956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12957 _mm512_kmov (__mmask16 __A)
12959 return __builtin_ia32_kmov16 (__A);
12962 extern __inline __m512
12963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12964 _mm512_castpd_ps (__m512d __A)
12966 return (__m512) (__A);
12969 extern __inline __m512i
12970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12971 _mm512_castpd_si512 (__m512d __A)
12973 return (__m512i) (__A);
12976 extern __inline __m512d
12977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12978 _mm512_castps_pd (__m512 __A)
12980 return (__m512d) (__A);
12983 extern __inline __m512i
12984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12985 _mm512_castps_si512 (__m512 __A)
12987 return (__m512i) (__A);
12990 extern __inline __m512
12991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12992 _mm512_castsi512_ps (__m512i __A)
12994 return (__m512) (__A);
12997 extern __inline __m512d
12998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12999 _mm512_castsi512_pd (__m512i __A)
13001 return (__m512d) (__A);
13004 extern __inline __m128d
13005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13006 _mm512_castpd512_pd128 (__m512d __A)
13008 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
13011 extern __inline __m128
13012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13013 _mm512_castps512_ps128 (__m512 __A)
13015 return _mm512_extractf32x4_ps(__A, 0);
13018 extern __inline __m128i
13019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13020 _mm512_castsi512_si128 (__m512i __A)
13022 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13025 extern __inline __m256d
13026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13027 _mm512_castpd512_pd256 (__m512d __A)
13029 return _mm512_extractf64x4_pd(__A, 0);
13032 extern __inline __m256
13033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13034 _mm512_castps512_ps256 (__m512 __A)
13036 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13039 extern __inline __m256i
13040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13041 _mm512_castsi512_si256 (__m512i __A)
13043 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13046 extern __inline __m512d
13047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13048 _mm512_castpd128_pd512 (__m128d __A)
13050 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13053 extern __inline __m512
13054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13055 _mm512_castps128_ps512 (__m128 __A)
13057 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13060 extern __inline __m512i
13061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13062 _mm512_castsi128_si512 (__m128i __A)
13064 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13067 extern __inline __m512d
13068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13069 _mm512_castpd256_pd512 (__m256d __A)
13071 return __builtin_ia32_pd512_256pd (__A);
13074 extern __inline __m512
13075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13076 _mm512_castps256_ps512 (__m256 __A)
13078 return __builtin_ia32_ps512_256ps (__A);
13081 extern __inline __m512i
13082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13083 _mm512_castsi256_si512 (__m256i __A)
13085 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13088 extern __inline __mmask16
13089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13090 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13092 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13093 (__v16si) __B, 0,
13094 (__mmask16) -1);
13097 extern __inline __mmask16
13098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13099 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13101 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13102 (__v16si) __B, 0, __U);
13105 extern __inline __mmask8
13106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13107 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13109 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13110 (__v8di) __B, 0, __U);
13113 extern __inline __mmask8
13114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13115 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13117 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13118 (__v8di) __B, 0,
13119 (__mmask8) -1);
13122 extern __inline __mmask16
13123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13124 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13126 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13127 (__v16si) __B, 6,
13128 (__mmask16) -1);
13131 extern __inline __mmask16
13132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13133 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13135 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13136 (__v16si) __B, 6, __U);
13139 extern __inline __mmask8
13140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13141 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13143 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13144 (__v8di) __B, 6, __U);
13147 extern __inline __mmask8
13148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13149 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13151 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13152 (__v8di) __B, 6,
13153 (__mmask8) -1);
13156 #ifdef __DISABLE_AVX512F__
13157 #undef __DISABLE_AVX512F__
13158 #pragma GCC pop_options
13159 #endif /* __DISABLE_AVX512F__ */
13161 #endif /* _AVX512FINTRIN_H_INCLUDED */