[AVX-512] Enable QI-mode mask logic patterns on non-AVX-512DQ targets.
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blobd7e7020577bbb956358d5c065fe7eb206998f08a
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
42 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
44 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
45 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
46 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
47 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
49 /* The Intel API is flexible enough that we must allow aliasing with other
50 vector types, and their scalar components. */
51 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
52 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
53 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
55 typedef unsigned char __mmask8;
56 typedef unsigned short __mmask16;
58 extern __inline __m512i
59 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
60 _mm512_set_epi64 (long long __A, long long __B, long long __C,
61 long long __D, long long __E, long long __F,
62 long long __G, long long __H)
64 return __extension__ (__m512i) (__v8di)
65 { __H, __G, __F, __E, __D, __C, __B, __A };
68 /* Create the vector [A B C D E F G H I J K L M N O P]. */
69 extern __inline __m512i
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
72 int __E, int __F, int __G, int __H,
73 int __I, int __J, int __K, int __L,
74 int __M, int __N, int __O, int __P)
76 return __extension__ (__m512i)(__v16si)
77 { __P, __O, __N, __M, __L, __K, __J, __I,
78 __H, __G, __F, __E, __D, __C, __B, __A };
81 extern __inline __m512d
82 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
83 _mm512_set_pd (double __A, double __B, double __C, double __D,
84 double __E, double __F, double __G, double __H)
86 return __extension__ (__m512d)
87 { __H, __G, __F, __E, __D, __C, __B, __A };
90 extern __inline __m512
91 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
92 _mm512_set_ps (float __A, float __B, float __C, float __D,
93 float __E, float __F, float __G, float __H,
94 float __I, float __J, float __K, float __L,
95 float __M, float __N, float __O, float __P)
97 return __extension__ (__m512)
98 { __P, __O, __N, __M, __L, __K, __J, __I,
99 __H, __G, __F, __E, __D, __C, __B, __A };
102 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
103 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
105 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
106 e8,e9,e10,e11,e12,e13,e14,e15) \
107 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
109 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
110 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
112 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
113 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
115 extern __inline __m512
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm512_undefined_ps (void)
119 __m512 __Y = __Y;
120 return __Y;
123 extern __inline __m512d
124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
125 _mm512_undefined_pd (void)
127 __m512d __Y = __Y;
128 return __Y;
131 extern __inline __m512i
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_undefined_si512 (void)
135 __m512i __Y = __Y;
136 return __Y;
139 extern __inline __m512i
140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
141 _mm512_set1_epi8 (char __A)
143 return __extension__ (__m512i)(__v64qi)
144 { __A, __A, __A, __A, __A, __A, __A, __A,
145 __A, __A, __A, __A, __A, __A, __A, __A,
146 __A, __A, __A, __A, __A, __A, __A, __A,
147 __A, __A, __A, __A, __A, __A, __A, __A,
148 __A, __A, __A, __A, __A, __A, __A, __A,
149 __A, __A, __A, __A, __A, __A, __A, __A,
150 __A, __A, __A, __A, __A, __A, __A, __A,
151 __A, __A, __A, __A, __A, __A, __A, __A };
154 extern __inline __m512i
155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156 _mm512_set1_epi16 (short __A)
158 return __extension__ (__m512i)(__v32hi)
159 { __A, __A, __A, __A, __A, __A, __A, __A,
160 __A, __A, __A, __A, __A, __A, __A, __A,
161 __A, __A, __A, __A, __A, __A, __A, __A,
162 __A, __A, __A, __A, __A, __A, __A, __A };
165 extern __inline __m512d
166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
167 _mm512_set1_pd (double __A)
169 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
170 (__v2df) { __A, },
171 (__v8df)
172 _mm512_undefined_pd (),
173 (__mmask8) -1);
176 extern __inline __m512
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm512_set1_ps (float __A)
180 return (__m512) __builtin_ia32_broadcastss512 (__extension__
181 (__v4sf) { __A, },
182 (__v16sf)
183 _mm512_undefined_ps (),
184 (__mmask16) -1);
187 /* Create the vector [A B C D A B C D A B C D A B C D]. */
188 extern __inline __m512i
189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
190 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
192 return __extension__ (__m512i)(__v16si)
193 { __D, __C, __B, __A, __D, __C, __B, __A,
194 __D, __C, __B, __A, __D, __C, __B, __A };
197 extern __inline __m512i
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
200 long long __D)
202 return __extension__ (__m512i) (__v8di)
203 { __D, __C, __B, __A, __D, __C, __B, __A };
206 extern __inline __m512d
207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
208 _mm512_set4_pd (double __A, double __B, double __C, double __D)
210 return __extension__ (__m512d)
211 { __D, __C, __B, __A, __D, __C, __B, __A };
214 extern __inline __m512
215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
216 _mm512_set4_ps (float __A, float __B, float __C, float __D)
218 return __extension__ (__m512)
219 { __D, __C, __B, __A, __D, __C, __B, __A,
220 __D, __C, __B, __A, __D, __C, __B, __A };
223 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
224 _mm512_set4_epi64(e3,e2,e1,e0)
226 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
227 _mm512_set4_epi32(e3,e2,e1,e0)
229 #define _mm512_setr4_pd(e0,e1,e2,e3) \
230 _mm512_set4_pd(e3,e2,e1,e0)
232 #define _mm512_setr4_ps(e0,e1,e2,e3) \
233 _mm512_set4_ps(e3,e2,e1,e0)
235 extern __inline __m512
236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
237 _mm512_setzero_ps (void)
239 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
240 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
243 extern __inline __m512d
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245 _mm512_setzero_pd (void)
247 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
250 extern __inline __m512i
251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
252 _mm512_setzero_epi32 (void)
254 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
257 extern __inline __m512i
258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
259 _mm512_setzero_si512 (void)
261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
264 extern __inline __m512d
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
268 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
269 (__v8df) __W,
270 (__mmask8) __U);
273 extern __inline __m512d
274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
275 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
277 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
278 (__v8df)
279 _mm512_setzero_pd (),
280 (__mmask8) __U);
283 extern __inline __m512
284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
285 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
287 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
288 (__v16sf) __W,
289 (__mmask16) __U);
292 extern __inline __m512
293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
296 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
297 (__v16sf)
298 _mm512_setzero_ps (),
299 (__mmask16) __U);
302 extern __inline __m512d
303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
304 _mm512_load_pd (void const *__P)
306 return *(__m512d *) __P;
309 extern __inline __m512d
310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
311 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
313 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
314 (__v8df) __W,
315 (__mmask8) __U);
318 extern __inline __m512d
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
322 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
323 (__v8df)
324 _mm512_setzero_pd (),
325 (__mmask8) __U);
328 extern __inline void
329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
330 _mm512_store_pd (void *__P, __m512d __A)
332 *(__m512d *) __P = __A;
335 extern __inline void
336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
339 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
340 (__mmask8) __U);
343 extern __inline __m512
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm512_load_ps (void const *__P)
347 return *(__m512 *) __P;
350 extern __inline __m512
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
354 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
355 (__v16sf) __W,
356 (__mmask16) __U);
359 extern __inline __m512
360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
361 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
363 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
364 (__v16sf)
365 _mm512_setzero_ps (),
366 (__mmask16) __U);
369 extern __inline void
370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
371 _mm512_store_ps (void *__P, __m512 __A)
373 *(__m512 *) __P = __A;
376 extern __inline void
377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
378 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
380 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
381 (__mmask16) __U);
384 extern __inline __m512i
385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
386 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
388 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
389 (__v8di) __W,
390 (__mmask8) __U);
393 extern __inline __m512i
394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
395 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
397 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
398 (__v8di)
399 _mm512_setzero_si512 (),
400 (__mmask8) __U);
403 extern __inline __m512i
404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
405 _mm512_load_epi64 (void const *__P)
407 return *(__m512i *) __P;
410 extern __inline __m512i
411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
412 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
414 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
415 (__v8di) __W,
416 (__mmask8) __U);
419 extern __inline __m512i
420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
423 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
424 (__v8di)
425 _mm512_setzero_si512 (),
426 (__mmask8) __U);
429 extern __inline void
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm512_store_epi64 (void *__P, __m512i __A)
433 *(__m512i *) __P = __A;
436 extern __inline void
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
440 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
441 (__mmask8) __U);
444 extern __inline __m512i
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
448 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
449 (__v16si) __W,
450 (__mmask16) __U);
453 extern __inline __m512i
454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
455 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
457 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
458 (__v16si)
459 _mm512_setzero_si512 (),
460 (__mmask16) __U);
463 extern __inline __m512i
464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
465 _mm512_load_si512 (void const *__P)
467 return *(__m512i *) __P;
470 extern __inline __m512i
471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
472 _mm512_load_epi32 (void const *__P)
474 return *(__m512i *) __P;
477 extern __inline __m512i
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
481 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
482 (__v16si) __W,
483 (__mmask16) __U);
486 extern __inline __m512i
487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
488 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
490 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
491 (__v16si)
492 _mm512_setzero_si512 (),
493 (__mmask16) __U);
496 extern __inline void
497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
498 _mm512_store_si512 (void *__P, __m512i __A)
500 *(__m512i *) __P = __A;
503 extern __inline void
504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
505 _mm512_store_epi32 (void *__P, __m512i __A)
507 *(__m512i *) __P = __A;
510 extern __inline void
511 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
514 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
515 (__mmask16) __U);
518 extern __inline __m512i
519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
520 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
522 return (__m512i) ((__v16su) __A * (__v16su) __B);
525 extern __inline __m512i
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
530 (__v16si) __B,
531 (__v16si)
532 _mm512_setzero_si512 (),
533 __M);
536 extern __inline __m512i
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
541 (__v16si) __B,
542 (__v16si) __W, __M);
545 extern __inline __m512i
546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
547 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
550 (__v16si) __Y,
551 (__v16si)
552 _mm512_undefined_si512 (),
553 (__mmask16) -1);
556 extern __inline __m512i
557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
558 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
561 (__v16si) __Y,
562 (__v16si) __W,
563 (__mmask16) __U);
566 extern __inline __m512i
567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
568 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
571 (__v16si) __Y,
572 (__v16si)
573 _mm512_setzero_si512 (),
574 (__mmask16) __U);
577 extern __inline __m512i
578 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
579 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
582 (__v16si) __Y,
583 (__v16si)
584 _mm512_undefined_si512 (),
585 (__mmask16) -1);
588 extern __inline __m512i
589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
590 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
593 (__v16si) __Y,
594 (__v16si) __W,
595 (__mmask16) __U);
598 extern __inline __m512i
599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
600 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
603 (__v16si) __Y,
604 (__v16si)
605 _mm512_setzero_si512 (),
606 (__mmask16) __U);
609 extern __inline __m512i
610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
611 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
614 (__v16si) __Y,
615 (__v16si)
616 _mm512_undefined_si512 (),
617 (__mmask16) -1);
620 extern __inline __m512i
621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
622 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
625 (__v16si) __Y,
626 (__v16si) __W,
627 (__mmask16) __U);
630 extern __inline __m512i
631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
632 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
635 (__v16si) __Y,
636 (__v16si)
637 _mm512_setzero_si512 (),
638 (__mmask16) __U);
641 extern __inline __m512i
642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
643 _mm512_add_epi64 (__m512i __A, __m512i __B)
645 return (__m512i) ((__v8du) __A + (__v8du) __B);
648 extern __inline __m512i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
652 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
653 (__v8di) __B,
654 (__v8di) __W,
655 (__mmask8) __U);
658 extern __inline __m512i
659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
662 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
663 (__v8di) __B,
664 (__v8di)
665 _mm512_setzero_si512 (),
666 (__mmask8) __U);
669 extern __inline __m512i
670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
671 _mm512_sub_epi64 (__m512i __A, __m512i __B)
673 return (__m512i) ((__v8du) __A - (__v8du) __B);
676 extern __inline __m512i
677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
678 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
680 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
681 (__v8di) __B,
682 (__v8di) __W,
683 (__mmask8) __U);
686 extern __inline __m512i
687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
688 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
690 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
691 (__v8di) __B,
692 (__v8di)
693 _mm512_setzero_si512 (),
694 (__mmask8) __U);
697 extern __inline __m512i
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
701 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
702 (__v8di) __Y,
703 (__v8di)
704 _mm512_undefined_pd (),
705 (__mmask8) -1);
708 extern __inline __m512i
709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
710 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
712 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
713 (__v8di) __Y,
714 (__v8di) __W,
715 (__mmask8) __U);
718 extern __inline __m512i
719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
722 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
723 (__v8di) __Y,
724 (__v8di)
725 _mm512_setzero_si512 (),
726 (__mmask8) __U);
729 extern __inline __m512i
730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
733 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
734 (__v8di) __Y,
735 (__v8di)
736 _mm512_undefined_si512 (),
737 (__mmask8) -1);
740 extern __inline __m512i
741 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
742 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
744 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
745 (__v8di) __Y,
746 (__v8di) __W,
747 (__mmask8) __U);
750 extern __inline __m512i
751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
752 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
754 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
755 (__v8di) __Y,
756 (__v8di)
757 _mm512_setzero_si512 (),
758 (__mmask8) __U);
761 extern __inline __m512i
762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
763 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
765 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
766 (__v8di) __Y,
767 (__v8di)
768 _mm512_undefined_si512 (),
769 (__mmask8) -1);
772 extern __inline __m512i
773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
774 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
776 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
777 (__v8di) __Y,
778 (__v8di) __W,
779 (__mmask8) __U);
782 extern __inline __m512i
783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
784 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
786 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
787 (__v8di) __Y,
788 (__v8di)
789 _mm512_setzero_si512 (),
790 (__mmask8) __U);
793 extern __inline __m512i
794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
795 _mm512_add_epi32 (__m512i __A, __m512i __B)
797 return (__m512i) ((__v16su) __A + (__v16su) __B);
800 extern __inline __m512i
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
804 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
805 (__v16si) __B,
806 (__v16si) __W,
807 (__mmask16) __U);
810 extern __inline __m512i
811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
812 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
814 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
815 (__v16si) __B,
816 (__v16si)
817 _mm512_setzero_si512 (),
818 (__mmask16) __U);
821 extern __inline __m512i
822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
823 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
825 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
826 (__v16si) __Y,
827 (__v8di)
828 _mm512_undefined_si512 (),
829 (__mmask8) -1);
832 extern __inline __m512i
833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
836 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
837 (__v16si) __Y,
838 (__v8di) __W, __M);
841 extern __inline __m512i
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
845 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
846 (__v16si) __Y,
847 (__v8di)
848 _mm512_setzero_si512 (),
849 __M);
852 extern __inline __m512i
853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
854 _mm512_sub_epi32 (__m512i __A, __m512i __B)
856 return (__m512i) ((__v16su) __A - (__v16su) __B);
859 extern __inline __m512i
860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
861 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
863 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
864 (__v16si) __B,
865 (__v16si) __W,
866 (__mmask16) __U);
869 extern __inline __m512i
870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
871 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
873 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
874 (__v16si) __B,
875 (__v16si)
876 _mm512_setzero_si512 (),
877 (__mmask16) __U);
880 extern __inline __m512i
881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
884 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
885 (__v16si) __Y,
886 (__v8di)
887 _mm512_undefined_si512 (),
888 (__mmask8) -1);
891 extern __inline __m512i
892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
893 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
895 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
896 (__v16si) __Y,
897 (__v8di) __W, __M);
900 extern __inline __m512i
901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
902 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
904 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
905 (__v16si) __Y,
906 (__v8di)
907 _mm512_setzero_si512 (),
908 __M);
911 #ifdef __OPTIMIZE__
912 extern __inline __m512i
913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
914 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
916 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
917 (__v8di)
918 _mm512_undefined_si512 (),
919 (__mmask8) -1);
922 extern __inline __m512i
923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
924 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
925 unsigned int __B)
927 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
928 (__v8di) __W,
929 (__mmask8) __U);
932 extern __inline __m512i
933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
934 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
936 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
937 (__v8di)
938 _mm512_setzero_si512 (),
939 (__mmask8) __U);
941 #else
942 #define _mm512_slli_epi64(X, C) \
943 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
944 (__v8di)(__m512i)_mm512_undefined_si512 (),\
945 (__mmask8)-1))
947 #define _mm512_mask_slli_epi64(W, U, X, C) \
948 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
949 (__v8di)(__m512i)(W),\
950 (__mmask8)(U)))
952 #define _mm512_maskz_slli_epi64(U, X, C) \
953 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
954 (__v8di)(__m512i)_mm512_setzero_si512 (),\
955 (__mmask8)(U)))
956 #endif
958 extern __inline __m512i
959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
960 _mm512_sll_epi64 (__m512i __A, __m128i __B)
962 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
963 (__v2di) __B,
964 (__v8di)
965 _mm512_undefined_si512 (),
966 (__mmask8) -1);
969 extern __inline __m512i
970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
971 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
973 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
974 (__v2di) __B,
975 (__v8di) __W,
976 (__mmask8) __U);
979 extern __inline __m512i
980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
981 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
983 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
984 (__v2di) __B,
985 (__v8di)
986 _mm512_setzero_si512 (),
987 (__mmask8) __U);
990 #ifdef __OPTIMIZE__
991 extern __inline __m512i
992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
993 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
995 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
996 (__v8di)
997 _mm512_undefined_si512 (),
998 (__mmask8) -1);
1001 extern __inline __m512i
1002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1003 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
1004 __m512i __A, unsigned int __B)
1006 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1007 (__v8di) __W,
1008 (__mmask8) __U);
1011 extern __inline __m512i
1012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1013 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1015 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
1016 (__v8di)
1017 _mm512_setzero_si512 (),
1018 (__mmask8) __U);
1020 #else
1021 #define _mm512_srli_epi64(X, C) \
1022 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1023 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1024 (__mmask8)-1))
1026 #define _mm512_mask_srli_epi64(W, U, X, C) \
1027 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1028 (__v8di)(__m512i)(W),\
1029 (__mmask8)(U)))
1031 #define _mm512_maskz_srli_epi64(U, X, C) \
1032 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1033 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1034 (__mmask8)(U)))
1035 #endif
1037 extern __inline __m512i
1038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1039 _mm512_srl_epi64 (__m512i __A, __m128i __B)
1041 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1042 (__v2di) __B,
1043 (__v8di)
1044 _mm512_undefined_si512 (),
1045 (__mmask8) -1);
1048 extern __inline __m512i
1049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1050 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1052 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1053 (__v2di) __B,
1054 (__v8di) __W,
1055 (__mmask8) __U);
1058 extern __inline __m512i
1059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1060 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1062 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
1063 (__v2di) __B,
1064 (__v8di)
1065 _mm512_setzero_si512 (),
1066 (__mmask8) __U);
1069 #ifdef __OPTIMIZE__
1070 extern __inline __m512i
1071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1072 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
1074 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1075 (__v8di)
1076 _mm512_undefined_si512 (),
1077 (__mmask8) -1);
1080 extern __inline __m512i
1081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1082 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
1083 unsigned int __B)
1085 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1086 (__v8di) __W,
1087 (__mmask8) __U);
1090 extern __inline __m512i
1091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1092 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
1094 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
1095 (__v8di)
1096 _mm512_setzero_si512 (),
1097 (__mmask8) __U);
1099 #else
1100 #define _mm512_srai_epi64(X, C) \
1101 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1102 (__v8di)(__m512i)_mm512_undefined_si512 (),\
1103 (__mmask8)-1))
1105 #define _mm512_mask_srai_epi64(W, U, X, C) \
1106 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1107 (__v8di)(__m512i)(W),\
1108 (__mmask8)(U)))
1110 #define _mm512_maskz_srai_epi64(U, X, C) \
1111 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
1112 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1113 (__mmask8)(U)))
1114 #endif
1116 extern __inline __m512i
1117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1118 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1120 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1121 (__v2di) __B,
1122 (__v8di)
1123 _mm512_undefined_si512 (),
1124 (__mmask8) -1);
1127 extern __inline __m512i
1128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1129 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1131 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1132 (__v2di) __B,
1133 (__v8di) __W,
1134 (__mmask8) __U);
1137 extern __inline __m512i
1138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1139 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1141 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1142 (__v2di) __B,
1143 (__v8di)
1144 _mm512_setzero_si512 (),
1145 (__mmask8) __U);
1148 #ifdef __OPTIMIZE__
1149 extern __inline __m512i
1150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1153 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1154 (__v16si)
1155 _mm512_undefined_si512 (),
1156 (__mmask16) -1);
1159 extern __inline __m512i
1160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1162 unsigned int __B)
1164 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1165 (__v16si) __W,
1166 (__mmask16) __U);
1169 extern __inline __m512i
1170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1171 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1173 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1174 (__v16si)
1175 _mm512_setzero_si512 (),
1176 (__mmask16) __U);
1178 #else
1179 #define _mm512_slli_epi32(X, C) \
1180 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1181 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1182 (__mmask16)-1))
1184 #define _mm512_mask_slli_epi32(W, U, X, C) \
1185 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1186 (__v16si)(__m512i)(W),\
1187 (__mmask16)(U)))
1189 #define _mm512_maskz_slli_epi32(U, X, C) \
1190 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1191 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1192 (__mmask16)(U)))
1193 #endif
1195 extern __inline __m512i
1196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1197 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1199 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1200 (__v4si) __B,
1201 (__v16si)
1202 _mm512_undefined_si512 (),
1203 (__mmask16) -1);
1206 extern __inline __m512i
1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1210 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1211 (__v4si) __B,
1212 (__v16si) __W,
1213 (__mmask16) __U);
1216 extern __inline __m512i
1217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1220 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1221 (__v4si) __B,
1222 (__v16si)
1223 _mm512_setzero_si512 (),
1224 (__mmask16) __U);
1227 #ifdef __OPTIMIZE__
1228 extern __inline __m512i
1229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1230 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1232 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1233 (__v16si)
1234 _mm512_undefined_si512 (),
1235 (__mmask16) -1);
1238 extern __inline __m512i
1239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1240 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1241 __m512i __A, unsigned int __B)
1243 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1244 (__v16si) __W,
1245 (__mmask16) __U);
1248 extern __inline __m512i
1249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1250 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1252 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1253 (__v16si)
1254 _mm512_setzero_si512 (),
1255 (__mmask16) __U);
1257 #else
1258 #define _mm512_srli_epi32(X, C) \
1259 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1260 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1261 (__mmask16)-1))
1263 #define _mm512_mask_srli_epi32(W, U, X, C) \
1264 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1265 (__v16si)(__m512i)(W),\
1266 (__mmask16)(U)))
1268 #define _mm512_maskz_srli_epi32(U, X, C) \
1269 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1270 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1271 (__mmask16)(U)))
1272 #endif
1274 extern __inline __m512i
1275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1276 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1278 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1279 (__v4si) __B,
1280 (__v16si)
1281 _mm512_undefined_si512 (),
1282 (__mmask16) -1);
1285 extern __inline __m512i
1286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1289 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1290 (__v4si) __B,
1291 (__v16si) __W,
1292 (__mmask16) __U);
1295 extern __inline __m512i
1296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1299 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1300 (__v4si) __B,
1301 (__v16si)
1302 _mm512_setzero_si512 (),
1303 (__mmask16) __U);
1306 #ifdef __OPTIMIZE__
1307 extern __inline __m512i
1308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1309 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1311 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1312 (__v16si)
1313 _mm512_undefined_si512 (),
1314 (__mmask16) -1);
1317 extern __inline __m512i
1318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1319 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1320 unsigned int __B)
1322 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1323 (__v16si) __W,
1324 (__mmask16) __U);
1327 extern __inline __m512i
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1331 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1332 (__v16si)
1333 _mm512_setzero_si512 (),
1334 (__mmask16) __U);
1336 #else
1337 #define _mm512_srai_epi32(X, C) \
1338 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1339 (__v16si)(__m512i)_mm512_undefined_si512 (),\
1340 (__mmask16)-1))
1342 #define _mm512_mask_srai_epi32(W, U, X, C) \
1343 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1344 (__v16si)(__m512i)(W),\
1345 (__mmask16)(U)))
1347 #define _mm512_maskz_srai_epi32(U, X, C) \
1348 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1349 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1350 (__mmask16)(U)))
1351 #endif
1353 extern __inline __m512i
1354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1355 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1357 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1358 (__v4si) __B,
1359 (__v16si)
1360 _mm512_undefined_si512 (),
1361 (__mmask16) -1);
1364 extern __inline __m512i
1365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1366 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1368 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1369 (__v4si) __B,
1370 (__v16si) __W,
1371 (__mmask16) __U);
1374 extern __inline __m512i
1375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1376 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1378 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1379 (__v4si) __B,
1380 (__v16si)
1381 _mm512_setzero_si512 (),
1382 (__mmask16) __U);
1385 #ifdef __OPTIMIZE__
1386 extern __inline __m128d
1387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1388 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1390 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1391 (__v2df) __B,
1392 __R);
1395 extern __inline __m128
1396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1397 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1399 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1400 (__v4sf) __B,
1401 __R);
1404 extern __inline __m128d
1405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1406 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1408 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1409 (__v2df) __B,
1410 __R);
1413 extern __inline __m128
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1417 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1418 (__v4sf) __B,
1419 __R);
1422 #else
1423 #define _mm_add_round_sd(A, B, C) \
1424 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1426 #define _mm_add_round_ss(A, B, C) \
1427 (__m128)__builtin_ia32_addss_round(A, B, C)
1429 #define _mm_sub_round_sd(A, B, C) \
1430 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1432 #define _mm_sub_round_ss(A, B, C) \
1433 (__m128)__builtin_ia32_subss_round(A, B, C)
1434 #endif
1436 #ifdef __OPTIMIZE__
1437 extern __inline __m512i
1438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1439 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1441 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1442 (__v8di) __B,
1443 (__v8di) __C, imm,
1444 (__mmask8) -1);
1447 extern __inline __m512i
1448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1449 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1450 __m512i __C, const int imm)
1452 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1453 (__v8di) __B,
1454 (__v8di) __C, imm,
1455 (__mmask8) __U);
1458 extern __inline __m512i
1459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1460 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1461 __m512i __C, const int imm)
1463 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1464 (__v8di) __B,
1465 (__v8di) __C,
1466 imm, (__mmask8) __U);
1469 extern __inline __m512i
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1473 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1474 (__v16si) __B,
1475 (__v16si) __C,
1476 imm, (__mmask16) -1);
1479 extern __inline __m512i
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1482 __m512i __C, const int imm)
1484 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1485 (__v16si) __B,
1486 (__v16si) __C,
1487 imm, (__mmask16) __U);
1490 extern __inline __m512i
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1493 __m512i __C, const int imm)
1495 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1496 (__v16si) __B,
1497 (__v16si) __C,
1498 imm, (__mmask16) __U);
1500 #else
1501 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1502 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1503 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1504 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1505 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1506 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1507 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1508 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1509 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1510 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1511 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1512 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1513 (__mmask16)-1))
1514 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1515 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1516 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1517 (__mmask16)(U)))
1518 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1519 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1520 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1521 (__mmask16)(U)))
1522 #endif
1524 extern __inline __m512d
1525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526 _mm512_rcp14_pd (__m512d __A)
1528 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1529 (__v8df)
1530 _mm512_undefined_pd (),
1531 (__mmask8) -1);
1534 extern __inline __m512d
1535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1538 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1539 (__v8df) __W,
1540 (__mmask8) __U);
1543 extern __inline __m512d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1548 (__v8df)
1549 _mm512_setzero_pd (),
1550 (__mmask8) __U);
1553 extern __inline __m512
1554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1555 _mm512_rcp14_ps (__m512 __A)
1557 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1558 (__v16sf)
1559 _mm512_undefined_ps (),
1560 (__mmask16) -1);
1563 extern __inline __m512
1564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1565 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1567 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1568 (__v16sf) __W,
1569 (__mmask16) __U);
1572 extern __inline __m512
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1577 (__v16sf)
1578 _mm512_setzero_ps (),
1579 (__mmask16) __U);
1582 extern __inline __m128d
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm_rcp14_sd (__m128d __A, __m128d __B)
1586 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1587 (__v2df) __A);
1590 extern __inline __m128
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm_rcp14_ss (__m128 __A, __m128 __B)
1594 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1595 (__v4sf) __A);
1598 extern __inline __m512d
1599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1600 _mm512_rsqrt14_pd (__m512d __A)
1602 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1603 (__v8df)
1604 _mm512_undefined_pd (),
1605 (__mmask8) -1);
1608 extern __inline __m512d
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1612 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1613 (__v8df) __W,
1614 (__mmask8) __U);
1617 extern __inline __m512d
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1622 (__v8df)
1623 _mm512_setzero_pd (),
1624 (__mmask8) __U);
1627 extern __inline __m512
1628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1629 _mm512_rsqrt14_ps (__m512 __A)
1631 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1632 (__v16sf)
1633 _mm512_undefined_ps (),
1634 (__mmask16) -1);
1637 extern __inline __m512
1638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1639 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1641 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1642 (__v16sf) __W,
1643 (__mmask16) __U);
1646 extern __inline __m512
1647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1648 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1651 (__v16sf)
1652 _mm512_setzero_ps (),
1653 (__mmask16) __U);
1656 extern __inline __m128d
1657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1658 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1660 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1661 (__v2df) __A);
1664 extern __inline __m128
1665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1666 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1668 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1669 (__v4sf) __A);
1672 #ifdef __OPTIMIZE__
1673 extern __inline __m512d
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1677 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1678 (__v8df)
1679 _mm512_undefined_pd (),
1680 (__mmask8) -1, __R);
1683 extern __inline __m512d
1684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1686 const int __R)
1688 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1689 (__v8df) __W,
1690 (__mmask8) __U, __R);
1693 extern __inline __m512d
1694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1698 (__v8df)
1699 _mm512_setzero_pd (),
1700 (__mmask8) __U, __R);
1703 extern __inline __m512
1704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1707 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1708 (__v16sf)
1709 _mm512_undefined_ps (),
1710 (__mmask16) -1, __R);
1713 extern __inline __m512
1714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1715 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1717 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1718 (__v16sf) __W,
1719 (__mmask16) __U, __R);
1722 extern __inline __m512
1723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1724 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1727 (__v16sf)
1728 _mm512_setzero_ps (),
1729 (__mmask16) __U, __R);
1732 extern __inline __m128d
1733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1734 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1736 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1737 (__v2df) __A,
1738 __R);
1741 extern __inline __m128
1742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1743 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1745 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1746 (__v4sf) __A,
1747 __R);
1749 #else
1750 #define _mm512_sqrt_round_pd(A, C) \
1751 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
1753 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1754 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1756 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1757 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1759 #define _mm512_sqrt_round_ps(A, C) \
1760 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
1762 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1763 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1765 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1766 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1768 #define _mm_sqrt_round_sd(A, B, C) \
1769 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1771 #define _mm_sqrt_round_ss(A, B, C) \
1772 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1773 #endif
1775 extern __inline __m512i
1776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1777 _mm512_cvtepi8_epi32 (__m128i __A)
1779 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1780 (__v16si)
1781 _mm512_undefined_si512 (),
1782 (__mmask16) -1);
1785 extern __inline __m512i
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1789 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1790 (__v16si) __W,
1791 (__mmask16) __U);
1794 extern __inline __m512i
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1799 (__v16si)
1800 _mm512_setzero_si512 (),
1801 (__mmask16) __U);
1804 extern __inline __m512i
1805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1806 _mm512_cvtepi8_epi64 (__m128i __A)
1808 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1809 (__v8di)
1810 _mm512_undefined_si512 (),
1811 (__mmask8) -1);
1814 extern __inline __m512i
1815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1816 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1818 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1819 (__v8di) __W,
1820 (__mmask8) __U);
1823 extern __inline __m512i
1824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1825 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1828 (__v8di)
1829 _mm512_setzero_si512 (),
1830 (__mmask8) __U);
1833 extern __inline __m512i
1834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835 _mm512_cvtepi16_epi32 (__m256i __A)
1837 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1838 (__v16si)
1839 _mm512_undefined_si512 (),
1840 (__mmask16) -1);
1843 extern __inline __m512i
1844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1845 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1847 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1848 (__v16si) __W,
1849 (__mmask16) __U);
1852 extern __inline __m512i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1857 (__v16si)
1858 _mm512_setzero_si512 (),
1859 (__mmask16) __U);
1862 extern __inline __m512i
1863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1864 _mm512_cvtepi16_epi64 (__m128i __A)
1866 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1867 (__v8di)
1868 _mm512_undefined_si512 (),
1869 (__mmask8) -1);
1872 extern __inline __m512i
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1876 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1877 (__v8di) __W,
1878 (__mmask8) __U);
1881 extern __inline __m512i
1882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1883 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1886 (__v8di)
1887 _mm512_setzero_si512 (),
1888 (__mmask8) __U);
1891 extern __inline __m512i
1892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893 _mm512_cvtepi32_epi64 (__m256i __X)
1895 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1896 (__v8di)
1897 _mm512_undefined_si512 (),
1898 (__mmask8) -1);
1901 extern __inline __m512i
1902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1905 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1906 (__v8di) __W,
1907 (__mmask8) __U);
1910 extern __inline __m512i
1911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1912 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1915 (__v8di)
1916 _mm512_setzero_si512 (),
1917 (__mmask8) __U);
1920 extern __inline __m512i
1921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1922 _mm512_cvtepu8_epi32 (__m128i __A)
1924 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1925 (__v16si)
1926 _mm512_undefined_si512 (),
1927 (__mmask16) -1);
1930 extern __inline __m512i
1931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1932 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1934 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1935 (__v16si) __W,
1936 (__mmask16) __U);
1939 extern __inline __m512i
1940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1944 (__v16si)
1945 _mm512_setzero_si512 (),
1946 (__mmask16) __U);
1949 extern __inline __m512i
1950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951 _mm512_cvtepu8_epi64 (__m128i __A)
1953 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1954 (__v8di)
1955 _mm512_undefined_si512 (),
1956 (__mmask8) -1);
1959 extern __inline __m512i
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1963 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1964 (__v8di) __W,
1965 (__mmask8) __U);
1968 extern __inline __m512i
1969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1970 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1973 (__v8di)
1974 _mm512_setzero_si512 (),
1975 (__mmask8) __U);
1978 extern __inline __m512i
1979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1980 _mm512_cvtepu16_epi32 (__m256i __A)
1982 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1983 (__v16si)
1984 _mm512_undefined_si512 (),
1985 (__mmask16) -1);
1988 extern __inline __m512i
1989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1990 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1992 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1993 (__v16si) __W,
1994 (__mmask16) __U);
1997 extern __inline __m512i
1998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1999 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
2002 (__v16si)
2003 _mm512_setzero_si512 (),
2004 (__mmask16) __U);
2007 extern __inline __m512i
2008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2009 _mm512_cvtepu16_epi64 (__m128i __A)
2011 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2012 (__v8di)
2013 _mm512_undefined_si512 (),
2014 (__mmask8) -1);
2017 extern __inline __m512i
2018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
2021 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2022 (__v8di) __W,
2023 (__mmask8) __U);
2026 extern __inline __m512i
2027 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
2031 (__v8di)
2032 _mm512_setzero_si512 (),
2033 (__mmask8) __U);
2036 extern __inline __m512i
2037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2038 _mm512_cvtepu32_epi64 (__m256i __X)
2040 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2041 (__v8di)
2042 _mm512_undefined_si512 (),
2043 (__mmask8) -1);
2046 extern __inline __m512i
2047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2048 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
2050 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2051 (__v8di) __W,
2052 (__mmask8) __U);
2055 extern __inline __m512i
2056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
2060 (__v8di)
2061 _mm512_setzero_si512 (),
2062 (__mmask8) __U);
2065 #ifdef __OPTIMIZE__
2066 extern __inline __m512d
2067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2068 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
2070 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2071 (__v8df) __B,
2072 (__v8df)
2073 _mm512_undefined_pd (),
2074 (__mmask8) -1, __R);
2077 extern __inline __m512d
2078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2079 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2080 __m512d __B, const int __R)
2082 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2083 (__v8df) __B,
2084 (__v8df) __W,
2085 (__mmask8) __U, __R);
2088 extern __inline __m512d
2089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2090 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2091 const int __R)
2093 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
2094 (__v8df) __B,
2095 (__v8df)
2096 _mm512_setzero_pd (),
2097 (__mmask8) __U, __R);
2100 extern __inline __m512
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
2104 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2105 (__v16sf) __B,
2106 (__v16sf)
2107 _mm512_undefined_ps (),
2108 (__mmask16) -1, __R);
2111 extern __inline __m512
2112 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2113 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2114 __m512 __B, const int __R)
2116 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2117 (__v16sf) __B,
2118 (__v16sf) __W,
2119 (__mmask16) __U, __R);
2122 extern __inline __m512
2123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2124 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2126 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2127 (__v16sf) __B,
2128 (__v16sf)
2129 _mm512_setzero_ps (),
2130 (__mmask16) __U, __R);
2133 extern __inline __m512d
2134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2137 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2138 (__v8df) __B,
2139 (__v8df)
2140 _mm512_undefined_pd (),
2141 (__mmask8) -1, __R);
2144 extern __inline __m512d
2145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2146 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2147 __m512d __B, const int __R)
2149 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2150 (__v8df) __B,
2151 (__v8df) __W,
2152 (__mmask8) __U, __R);
2155 extern __inline __m512d
2156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2157 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2158 const int __R)
2160 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2161 (__v8df) __B,
2162 (__v8df)
2163 _mm512_setzero_pd (),
2164 (__mmask8) __U, __R);
2167 extern __inline __m512
2168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2169 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2171 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2172 (__v16sf) __B,
2173 (__v16sf)
2174 _mm512_undefined_ps (),
2175 (__mmask16) -1, __R);
2178 extern __inline __m512
2179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2180 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2181 __m512 __B, const int __R)
2183 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2184 (__v16sf) __B,
2185 (__v16sf) __W,
2186 (__mmask16) __U, __R);
2189 extern __inline __m512
2190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2193 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2194 (__v16sf) __B,
2195 (__v16sf)
2196 _mm512_setzero_ps (),
2197 (__mmask16) __U, __R);
2199 #else
2200 #define _mm512_add_round_pd(A, B, C) \
2201 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2203 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2204 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2206 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2207 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2209 #define _mm512_add_round_ps(A, B, C) \
2210 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2212 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2213 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2215 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2216 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2218 #define _mm512_sub_round_pd(A, B, C) \
2219 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2221 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2222 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2224 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2225 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2227 #define _mm512_sub_round_ps(A, B, C) \
2228 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2230 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2231 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2233 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2234 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2235 #endif
2237 #ifdef __OPTIMIZE__
2238 extern __inline __m512d
2239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2240 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2242 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2243 (__v8df) __B,
2244 (__v8df)
2245 _mm512_undefined_pd (),
2246 (__mmask8) -1, __R);
2249 extern __inline __m512d
2250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2252 __m512d __B, const int __R)
2254 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2255 (__v8df) __B,
2256 (__v8df) __W,
2257 (__mmask8) __U, __R);
2260 extern __inline __m512d
2261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2262 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2263 const int __R)
2265 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2266 (__v8df) __B,
2267 (__v8df)
2268 _mm512_setzero_pd (),
2269 (__mmask8) __U, __R);
2272 extern __inline __m512
2273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2274 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2276 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2277 (__v16sf) __B,
2278 (__v16sf)
2279 _mm512_undefined_ps (),
2280 (__mmask16) -1, __R);
2283 extern __inline __m512
2284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2285 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2286 __m512 __B, const int __R)
2288 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2289 (__v16sf) __B,
2290 (__v16sf) __W,
2291 (__mmask16) __U, __R);
2294 extern __inline __m512
2295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2296 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2298 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2299 (__v16sf) __B,
2300 (__v16sf)
2301 _mm512_setzero_ps (),
2302 (__mmask16) __U, __R);
2305 extern __inline __m512d
2306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2309 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2310 (__v8df) __V,
2311 (__v8df)
2312 _mm512_undefined_pd (),
2313 (__mmask8) -1, __R);
2316 extern __inline __m512d
2317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2318 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2319 __m512d __V, const int __R)
2321 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2322 (__v8df) __V,
2323 (__v8df) __W,
2324 (__mmask8) __U, __R);
2327 extern __inline __m512d
2328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2330 const int __R)
2332 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2333 (__v8df) __V,
2334 (__v8df)
2335 _mm512_setzero_pd (),
2336 (__mmask8) __U, __R);
2339 extern __inline __m512
2340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2343 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2344 (__v16sf) __B,
2345 (__v16sf)
2346 _mm512_undefined_ps (),
2347 (__mmask16) -1, __R);
2350 extern __inline __m512
2351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2353 __m512 __B, const int __R)
2355 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2356 (__v16sf) __B,
2357 (__v16sf) __W,
2358 (__mmask16) __U, __R);
2361 extern __inline __m512
2362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2365 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2366 (__v16sf) __B,
2367 (__v16sf)
2368 _mm512_setzero_ps (),
2369 (__mmask16) __U, __R);
2372 extern __inline __m128d
2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2376 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2377 (__v2df) __B,
2378 __R);
2381 extern __inline __m128
2382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2383 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2385 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2386 (__v4sf) __B,
2387 __R);
2390 extern __inline __m128d
2391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2392 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2394 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2395 (__v2df) __B,
2396 __R);
2399 extern __inline __m128
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2403 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2404 (__v4sf) __B,
2405 __R);
2408 #else
2409 #define _mm512_mul_round_pd(A, B, C) \
2410 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2412 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2413 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2415 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2416 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2418 #define _mm512_mul_round_ps(A, B, C) \
2419 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2421 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2422 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2424 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2425 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2427 #define _mm512_div_round_pd(A, B, C) \
2428 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2430 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2431 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2433 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2434 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2436 #define _mm512_div_round_ps(A, B, C) \
2437 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2439 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2440 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2442 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2443 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2445 #define _mm_mul_round_sd(A, B, C) \
2446 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2448 #define _mm_mul_round_ss(A, B, C) \
2449 (__m128)__builtin_ia32_mulss_round(A, B, C)
2451 #define _mm_div_round_sd(A, B, C) \
2452 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2454 #define _mm_div_round_ss(A, B, C) \
2455 (__m128)__builtin_ia32_divss_round(A, B, C)
2456 #endif
2458 #ifdef __OPTIMIZE__
2459 extern __inline __m512d
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2463 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2464 (__v8df) __B,
2465 (__v8df)
2466 _mm512_undefined_pd (),
2467 (__mmask8) -1, __R);
2470 extern __inline __m512d
2471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2472 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2473 __m512d __B, const int __R)
2475 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2476 (__v8df) __B,
2477 (__v8df) __W,
2478 (__mmask8) __U, __R);
2481 extern __inline __m512d
2482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2483 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2484 const int __R)
2486 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2487 (__v8df) __B,
2488 (__v8df)
2489 _mm512_setzero_pd (),
2490 (__mmask8) __U, __R);
2493 extern __inline __m512
2494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2497 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2498 (__v16sf) __B,
2499 (__v16sf)
2500 _mm512_undefined_ps (),
2501 (__mmask16) -1, __R);
2504 extern __inline __m512
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2507 __m512 __B, const int __R)
2509 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2510 (__v16sf) __B,
2511 (__v16sf) __W,
2512 (__mmask16) __U, __R);
2515 extern __inline __m512
2516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2517 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2519 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2520 (__v16sf) __B,
2521 (__v16sf)
2522 _mm512_setzero_ps (),
2523 (__mmask16) __U, __R);
2526 extern __inline __m512d
2527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2528 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2530 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2531 (__v8df) __B,
2532 (__v8df)
2533 _mm512_undefined_pd (),
2534 (__mmask8) -1, __R);
2537 extern __inline __m512d
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2540 __m512d __B, const int __R)
2542 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2543 (__v8df) __B,
2544 (__v8df) __W,
2545 (__mmask8) __U, __R);
2548 extern __inline __m512d
2549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2550 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2551 const int __R)
2553 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2554 (__v8df) __B,
2555 (__v8df)
2556 _mm512_setzero_pd (),
2557 (__mmask8) __U, __R);
2560 extern __inline __m512
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2564 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2565 (__v16sf) __B,
2566 (__v16sf)
2567 _mm512_undefined_ps (),
2568 (__mmask16) -1, __R);
2571 extern __inline __m512
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2574 __m512 __B, const int __R)
2576 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2577 (__v16sf) __B,
2578 (__v16sf) __W,
2579 (__mmask16) __U, __R);
2582 extern __inline __m512
2583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2586 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2587 (__v16sf) __B,
2588 (__v16sf)
2589 _mm512_setzero_ps (),
2590 (__mmask16) __U, __R);
2592 #else
2593 #define _mm512_max_round_pd(A, B, R) \
2594 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2596 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2597 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2599 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2600 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2602 #define _mm512_max_round_ps(A, B, R) \
2603 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
2605 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2606 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2608 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2609 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2611 #define _mm512_min_round_pd(A, B, R) \
2612 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
2614 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2615 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2617 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2618 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2620 #define _mm512_min_round_ps(A, B, R) \
2621 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
2623 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2624 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2626 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2627 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2628 #endif
2630 #ifdef __OPTIMIZE__
2631 extern __inline __m512d
2632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2633 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2635 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2636 (__v8df) __B,
2637 (__v8df)
2638 _mm512_undefined_pd (),
2639 (__mmask8) -1, __R);
2642 extern __inline __m512d
2643 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2644 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2645 __m512d __B, const int __R)
2647 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2648 (__v8df) __B,
2649 (__v8df) __W,
2650 (__mmask8) __U, __R);
2653 extern __inline __m512d
2654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2656 const int __R)
2658 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2659 (__v8df) __B,
2660 (__v8df)
2661 _mm512_setzero_pd (),
2662 (__mmask8) __U, __R);
2665 extern __inline __m512
2666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2669 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2670 (__v16sf) __B,
2671 (__v16sf)
2672 _mm512_undefined_ps (),
2673 (__mmask16) -1, __R);
2676 extern __inline __m512
2677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2678 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2679 __m512 __B, const int __R)
2681 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2682 (__v16sf) __B,
2683 (__v16sf) __W,
2684 (__mmask16) __U, __R);
2687 extern __inline __m512
2688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2689 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2690 const int __R)
2692 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2693 (__v16sf) __B,
2694 (__v16sf)
2695 _mm512_setzero_ps (),
2696 (__mmask16) __U, __R);
2699 extern __inline __m128d
2700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2701 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2703 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2704 (__v2df) __B,
2705 __R);
2708 extern __inline __m128
2709 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2712 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2713 (__v4sf) __B,
2714 __R);
2716 #else
2717 #define _mm512_scalef_round_pd(A, B, C) \
2718 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
2720 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2721 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2723 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2724 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2726 #define _mm512_scalef_round_ps(A, B, C) \
2727 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
2729 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2730 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2732 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2733 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2735 #define _mm_scalef_round_sd(A, B, C) \
2736 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2738 #define _mm_scalef_round_ss(A, B, C) \
2739 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2740 #endif
2742 #ifdef __OPTIMIZE__
2743 extern __inline __m512d
2744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2745 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2747 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2748 (__v8df) __B,
2749 (__v8df) __C,
2750 (__mmask8) -1, __R);
2753 extern __inline __m512d
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2756 __m512d __C, const int __R)
2758 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759 (__v8df) __B,
2760 (__v8df) __C,
2761 (__mmask8) __U, __R);
2764 extern __inline __m512d
2765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2766 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2767 __mmask8 __U, const int __R)
2769 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2770 (__v8df) __B,
2771 (__v8df) __C,
2772 (__mmask8) __U, __R);
2775 extern __inline __m512d
2776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2777 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2778 __m512d __C, const int __R)
2780 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2781 (__v8df) __B,
2782 (__v8df) __C,
2783 (__mmask8) __U, __R);
2786 extern __inline __m512
2787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2790 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2791 (__v16sf) __B,
2792 (__v16sf) __C,
2793 (__mmask16) -1, __R);
2796 extern __inline __m512
2797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2798 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2799 __m512 __C, const int __R)
2801 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2802 (__v16sf) __B,
2803 (__v16sf) __C,
2804 (__mmask16) __U, __R);
2807 extern __inline __m512
2808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2810 __mmask16 __U, const int __R)
2812 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2813 (__v16sf) __B,
2814 (__v16sf) __C,
2815 (__mmask16) __U, __R);
2818 extern __inline __m512
2819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2820 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2821 __m512 __C, const int __R)
2823 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2824 (__v16sf) __B,
2825 (__v16sf) __C,
2826 (__mmask16) __U, __R);
2829 extern __inline __m512d
2830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2831 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2833 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2834 (__v8df) __B,
2835 -(__v8df) __C,
2836 (__mmask8) -1, __R);
2839 extern __inline __m512d
2840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2841 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2842 __m512d __C, const int __R)
2844 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2845 (__v8df) __B,
2846 -(__v8df) __C,
2847 (__mmask8) __U, __R);
2850 extern __inline __m512d
2851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2852 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2853 __mmask8 __U, const int __R)
2855 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2856 (__v8df) __B,
2857 (__v8df) __C,
2858 (__mmask8) __U, __R);
2861 extern __inline __m512d
2862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2863 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2864 __m512d __C, const int __R)
2866 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2867 (__v8df) __B,
2868 -(__v8df) __C,
2869 (__mmask8) __U, __R);
2872 extern __inline __m512
2873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2874 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2876 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2877 (__v16sf) __B,
2878 -(__v16sf) __C,
2879 (__mmask16) -1, __R);
2882 extern __inline __m512
2883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2884 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2885 __m512 __C, const int __R)
2887 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2888 (__v16sf) __B,
2889 -(__v16sf) __C,
2890 (__mmask16) __U, __R);
2893 extern __inline __m512
2894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2895 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2896 __mmask16 __U, const int __R)
2898 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2899 (__v16sf) __B,
2900 (__v16sf) __C,
2901 (__mmask16) __U, __R);
2904 extern __inline __m512
2905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2906 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2907 __m512 __C, const int __R)
2909 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2910 (__v16sf) __B,
2911 -(__v16sf) __C,
2912 (__mmask16) __U, __R);
2915 extern __inline __m512d
2916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2919 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2920 (__v8df) __B,
2921 (__v8df) __C,
2922 (__mmask8) -1, __R);
2925 extern __inline __m512d
2926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2927 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2928 __m512d __C, const int __R)
2930 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2931 (__v8df) __B,
2932 (__v8df) __C,
2933 (__mmask8) __U, __R);
2936 extern __inline __m512d
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2939 __mmask8 __U, const int __R)
2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2942 (__v8df) __B,
2943 (__v8df) __C,
2944 (__mmask8) __U, __R);
2947 extern __inline __m512d
2948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2950 __m512d __C, const int __R)
2952 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2953 (__v8df) __B,
2954 (__v8df) __C,
2955 (__mmask8) __U, __R);
2958 extern __inline __m512
2959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2960 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2962 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2963 (__v16sf) __B,
2964 (__v16sf) __C,
2965 (__mmask16) -1, __R);
2968 extern __inline __m512
2969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2970 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2971 __m512 __C, const int __R)
2973 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2974 (__v16sf) __B,
2975 (__v16sf) __C,
2976 (__mmask16) __U, __R);
2979 extern __inline __m512
2980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2981 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2982 __mmask16 __U, const int __R)
2984 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2985 (__v16sf) __B,
2986 (__v16sf) __C,
2987 (__mmask16) __U, __R);
2990 extern __inline __m512
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2993 __m512 __C, const int __R)
2995 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2996 (__v16sf) __B,
2997 (__v16sf) __C,
2998 (__mmask16) __U, __R);
3001 extern __inline __m512d
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3005 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3006 (__v8df) __B,
3007 -(__v8df) __C,
3008 (__mmask8) -1, __R);
3011 extern __inline __m512d
3012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3013 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3014 __m512d __C, const int __R)
3016 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3017 (__v8df) __B,
3018 -(__v8df) __C,
3019 (__mmask8) __U, __R);
3022 extern __inline __m512d
3023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3024 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3025 __mmask8 __U, const int __R)
3027 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3028 (__v8df) __B,
3029 (__v8df) __C,
3030 (__mmask8) __U, __R);
3033 extern __inline __m512d
3034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3036 __m512d __C, const int __R)
3038 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3039 (__v8df) __B,
3040 -(__v8df) __C,
3041 (__mmask8) __U, __R);
3044 extern __inline __m512
3045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3046 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3048 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3049 (__v16sf) __B,
3050 -(__v16sf) __C,
3051 (__mmask16) -1, __R);
3054 extern __inline __m512
3055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3056 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3057 __m512 __C, const int __R)
3059 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3060 (__v16sf) __B,
3061 -(__v16sf) __C,
3062 (__mmask16) __U, __R);
3065 extern __inline __m512
3066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3068 __mmask16 __U, const int __R)
3070 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3071 (__v16sf) __B,
3072 (__v16sf) __C,
3073 (__mmask16) __U, __R);
3076 extern __inline __m512
3077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3079 __m512 __C, const int __R)
3081 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082 (__v16sf) __B,
3083 -(__v16sf) __C,
3084 (__mmask16) __U, __R);
3087 extern __inline __m512d
3088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3089 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3091 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3092 (__v8df) __B,
3093 (__v8df) __C,
3094 (__mmask8) -1, __R);
3097 extern __inline __m512d
3098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3099 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3100 __m512d __C, const int __R)
3102 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3103 (__v8df) __B,
3104 (__v8df) __C,
3105 (__mmask8) __U, __R);
3108 extern __inline __m512d
3109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3110 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
3111 __mmask8 __U, const int __R)
3113 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3114 (__v8df) __B,
3115 (__v8df) __C,
3116 (__mmask8) __U, __R);
3119 extern __inline __m512d
3120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3121 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3122 __m512d __C, const int __R)
3124 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3125 (__v8df) __B,
3126 (__v8df) __C,
3127 (__mmask8) __U, __R);
3130 extern __inline __m512
3131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3134 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3135 (__v16sf) __B,
3136 (__v16sf) __C,
3137 (__mmask16) -1, __R);
3140 extern __inline __m512
3141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3142 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3143 __m512 __C, const int __R)
3145 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3146 (__v16sf) __B,
3147 (__v16sf) __C,
3148 (__mmask16) __U, __R);
3151 extern __inline __m512
3152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3153 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3154 __mmask16 __U, const int __R)
3156 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3157 (__v16sf) __B,
3158 (__v16sf) __C,
3159 (__mmask16) __U, __R);
3162 extern __inline __m512
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3165 __m512 __C, const int __R)
3167 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3168 (__v16sf) __B,
3169 (__v16sf) __C,
3170 (__mmask16) __U, __R);
3173 extern __inline __m512d
3174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3175 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3177 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3178 (__v8df) __B,
3179 -(__v8df) __C,
3180 (__mmask8) -1, __R);
3183 extern __inline __m512d
3184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3185 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3186 __m512d __C, const int __R)
3188 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3189 (__v8df) __B,
3190 (__v8df) __C,
3191 (__mmask8) __U, __R);
3194 extern __inline __m512d
3195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3196 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3197 __mmask8 __U, const int __R)
3199 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3200 (__v8df) __B,
3201 (__v8df) __C,
3202 (__mmask8) __U, __R);
3205 extern __inline __m512d
3206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3208 __m512d __C, const int __R)
3210 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3211 (__v8df) __B,
3212 -(__v8df) __C,
3213 (__mmask8) __U, __R);
3216 extern __inline __m512
3217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3218 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3220 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3221 (__v16sf) __B,
3222 -(__v16sf) __C,
3223 (__mmask16) -1, __R);
3226 extern __inline __m512
3227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3228 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3229 __m512 __C, const int __R)
3231 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3232 (__v16sf) __B,
3233 (__v16sf) __C,
3234 (__mmask16) __U, __R);
3237 extern __inline __m512
3238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3239 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3240 __mmask16 __U, const int __R)
3242 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3243 (__v16sf) __B,
3244 (__v16sf) __C,
3245 (__mmask16) __U, __R);
3248 extern __inline __m512
3249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3251 __m512 __C, const int __R)
3253 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3254 (__v16sf) __B,
3255 -(__v16sf) __C,
3256 (__mmask16) __U, __R);
3258 #else
3259 #define _mm512_fmadd_round_pd(A, B, C, R) \
3260 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3262 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3263 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3265 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3266 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3268 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3269 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3271 #define _mm512_fmadd_round_ps(A, B, C, R) \
3272 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3274 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3275 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3277 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3278 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3280 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3281 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3283 #define _mm512_fmsub_round_pd(A, B, C, R) \
3284 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3286 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3287 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3289 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3290 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3292 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3293 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3295 #define _mm512_fmsub_round_ps(A, B, C, R) \
3296 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3298 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3299 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3301 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3302 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3304 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3305 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3307 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3308 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3310 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3311 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3313 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3314 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3316 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3317 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3319 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3320 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3322 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3323 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3325 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3326 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3328 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3329 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3331 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3332 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3334 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3335 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3337 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3338 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3340 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3341 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3343 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3344 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3346 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3347 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3349 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3350 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3352 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3353 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3355 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3356 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3358 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3359 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3361 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3362 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3364 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3365 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3367 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3368 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3370 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3371 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3373 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3374 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3376 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3377 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3379 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3380 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3382 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3383 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3385 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3386 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3388 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3389 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3391 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3392 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3394 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3395 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3397 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3398 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3400 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3401 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3402 #endif
3404 extern __inline __m512i
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm512_abs_epi64 (__m512i __A)
3408 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3409 (__v8di)
3410 _mm512_undefined_si512 (),
3411 (__mmask8) -1);
3414 extern __inline __m512i
3415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3416 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3418 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3419 (__v8di) __W,
3420 (__mmask8) __U);
3423 extern __inline __m512i
3424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3425 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3428 (__v8di)
3429 _mm512_setzero_si512 (),
3430 (__mmask8) __U);
3433 extern __inline __m512i
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm512_abs_epi32 (__m512i __A)
3437 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3438 (__v16si)
3439 _mm512_undefined_si512 (),
3440 (__mmask16) -1);
3443 extern __inline __m512i
3444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3445 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3447 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3448 (__v16si) __W,
3449 (__mmask16) __U);
3452 extern __inline __m512i
3453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3454 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3457 (__v16si)
3458 _mm512_setzero_si512 (),
3459 (__mmask16) __U);
3462 extern __inline __m512
3463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3464 _mm512_broadcastss_ps (__m128 __A)
3466 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3467 (__v16sf)
3468 _mm512_undefined_ps (),
3469 (__mmask16) -1);
3472 extern __inline __m512
3473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3474 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3476 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3477 (__v16sf) __O, __M);
3480 extern __inline __m512
3481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3482 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3484 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3485 (__v16sf)
3486 _mm512_setzero_ps (),
3487 __M);
3490 extern __inline __m512d
3491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3492 _mm512_broadcastsd_pd (__m128d __A)
3494 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3495 (__v8df)
3496 _mm512_undefined_pd (),
3497 (__mmask8) -1);
3500 extern __inline __m512d
3501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3502 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3504 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3505 (__v8df) __O, __M);
3508 extern __inline __m512d
3509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3510 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3512 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3513 (__v8df)
3514 _mm512_setzero_pd (),
3515 __M);
3518 extern __inline __m512i
3519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3520 _mm512_broadcastd_epi32 (__m128i __A)
3522 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3523 (__v16si)
3524 _mm512_undefined_si512 (),
3525 (__mmask16) -1);
3528 extern __inline __m512i
3529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3532 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3533 (__v16si) __O, __M);
3536 extern __inline __m512i
3537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3538 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3540 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3541 (__v16si)
3542 _mm512_setzero_si512 (),
3543 __M);
3546 extern __inline __m512i
3547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548 _mm512_set1_epi32 (int __A)
3550 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3551 (__v16si)
3552 _mm512_undefined_si512 (),
3553 (__mmask16)(-1));
3556 extern __inline __m512i
3557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3560 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3561 __M);
3564 extern __inline __m512i
3565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3566 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3568 return (__m512i)
3569 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3570 (__v16si) _mm512_setzero_si512 (),
3571 __M);
3574 extern __inline __m512i
3575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3576 _mm512_broadcastq_epi64 (__m128i __A)
3578 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3579 (__v8di)
3580 _mm512_undefined_si512 (),
3581 (__mmask8) -1);
3584 extern __inline __m512i
3585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3586 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3588 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3589 (__v8di) __O, __M);
3592 extern __inline __m512i
3593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3594 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3596 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3597 (__v8di)
3598 _mm512_setzero_si512 (),
3599 __M);
3602 extern __inline __m512i
3603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3604 _mm512_set1_epi64 (long long __A)
3606 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3607 (__v8di)
3608 _mm512_undefined_si512 (),
3609 (__mmask8)(-1));
3612 extern __inline __m512i
3613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3614 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3616 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3617 __M);
3620 extern __inline __m512i
3621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3624 return (__m512i)
3625 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3626 (__v8di) _mm512_setzero_si512 (),
3627 __M);
3630 extern __inline __m512
3631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3632 _mm512_broadcast_f32x4 (__m128 __A)
3634 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3635 (__v16sf)
3636 _mm512_undefined_ps (),
3637 (__mmask16) -1);
3640 extern __inline __m512
3641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3642 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3644 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3645 (__v16sf) __O,
3646 __M);
3649 extern __inline __m512
3650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3651 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3654 (__v16sf)
3655 _mm512_setzero_ps (),
3656 __M);
3659 extern __inline __m512i
3660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3661 _mm512_broadcast_i32x4 (__m128i __A)
3663 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3664 (__v16si)
3665 _mm512_undefined_si512 (),
3666 (__mmask16) -1);
3669 extern __inline __m512i
3670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3671 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3673 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3674 (__v16si) __O,
3675 __M);
3678 extern __inline __m512i
3679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3680 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3683 (__v16si)
3684 _mm512_setzero_si512 (),
3685 __M);
3688 extern __inline __m512d
3689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3690 _mm512_broadcast_f64x4 (__m256d __A)
3692 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3693 (__v8df)
3694 _mm512_undefined_pd (),
3695 (__mmask8) -1);
3698 extern __inline __m512d
3699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3700 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3702 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3703 (__v8df) __O,
3704 __M);
3707 extern __inline __m512d
3708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3709 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3712 (__v8df)
3713 _mm512_setzero_pd (),
3714 __M);
3717 extern __inline __m512i
3718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3719 _mm512_broadcast_i64x4 (__m256i __A)
3721 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3722 (__v8di)
3723 _mm512_undefined_si512 (),
3724 (__mmask8) -1);
3727 extern __inline __m512i
3728 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3729 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3731 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3732 (__v8di) __O,
3733 __M);
3736 extern __inline __m512i
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3741 (__v8di)
3742 _mm512_setzero_si512 (),
3743 __M);
3746 typedef enum
3748 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3749 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3750 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3751 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3752 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3753 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3754 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3755 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3756 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3757 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3758 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3759 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3760 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3761 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3762 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3763 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3764 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3765 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3766 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3767 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3768 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3769 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3770 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3771 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3772 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3773 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3774 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3775 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3776 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3777 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3778 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3779 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3780 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3781 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3782 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3783 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3784 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3785 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3786 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3787 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3788 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3789 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3790 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3791 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3792 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3793 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3794 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3795 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3796 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3797 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3798 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3799 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3800 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3801 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3802 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3803 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3804 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3805 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3806 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3807 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3808 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3809 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3810 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3811 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3812 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3813 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3814 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3815 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3816 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3817 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3818 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3819 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3820 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3821 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3822 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3823 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3824 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3825 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3826 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3827 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3828 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3829 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3830 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3831 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3832 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3833 _MM_PERM_DDDD = 0xFF
3834 } _MM_PERM_ENUM;
3836 #ifdef __OPTIMIZE__
3837 extern __inline __m512i
3838 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3839 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3841 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3842 __mask,
3843 (__v16si)
3844 _mm512_undefined_si512 (),
3845 (__mmask16) -1);
3848 extern __inline __m512i
3849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3850 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3851 _MM_PERM_ENUM __mask)
3853 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3854 __mask,
3855 (__v16si) __W,
3856 (__mmask16) __U);
3859 extern __inline __m512i
3860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3861 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3863 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3864 __mask,
3865 (__v16si)
3866 _mm512_setzero_si512 (),
3867 (__mmask16) __U);
3870 extern __inline __m512i
3871 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3872 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3874 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3875 (__v8di) __B, __imm,
3876 (__v8di)
3877 _mm512_undefined_si512 (),
3878 (__mmask8) -1);
3881 extern __inline __m512i
3882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3883 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3884 __m512i __B, const int __imm)
3886 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3887 (__v8di) __B, __imm,
3888 (__v8di) __W,
3889 (__mmask8) __U);
3892 extern __inline __m512i
3893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3894 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3895 const int __imm)
3897 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3898 (__v8di) __B, __imm,
3899 (__v8di)
3900 _mm512_setzero_si512 (),
3901 (__mmask8) __U);
3904 extern __inline __m512i
3905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3906 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3908 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3909 (__v16si) __B,
3910 __imm,
3911 (__v16si)
3912 _mm512_undefined_si512 (),
3913 (__mmask16) -1);
3916 extern __inline __m512i
3917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3918 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3919 __m512i __B, const int __imm)
3921 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3922 (__v16si) __B,
3923 __imm,
3924 (__v16si) __W,
3925 (__mmask16) __U);
3928 extern __inline __m512i
3929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3930 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3931 const int __imm)
3933 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3934 (__v16si) __B,
3935 __imm,
3936 (__v16si)
3937 _mm512_setzero_si512 (),
3938 (__mmask16) __U);
3941 extern __inline __m512d
3942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3943 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3945 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3946 (__v8df) __B, __imm,
3947 (__v8df)
3948 _mm512_undefined_pd (),
3949 (__mmask8) -1);
3952 extern __inline __m512d
3953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3954 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3955 __m512d __B, const int __imm)
3957 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3958 (__v8df) __B, __imm,
3959 (__v8df) __W,
3960 (__mmask8) __U);
3963 extern __inline __m512d
3964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3965 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3966 const int __imm)
3968 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3969 (__v8df) __B, __imm,
3970 (__v8df)
3971 _mm512_setzero_pd (),
3972 (__mmask8) __U);
3975 extern __inline __m512
3976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3977 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3979 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3980 (__v16sf) __B, __imm,
3981 (__v16sf)
3982 _mm512_undefined_ps (),
3983 (__mmask16) -1);
3986 extern __inline __m512
3987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3988 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3989 __m512 __B, const int __imm)
3991 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3992 (__v16sf) __B, __imm,
3993 (__v16sf) __W,
3994 (__mmask16) __U);
3997 extern __inline __m512
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
4000 const int __imm)
4002 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
4003 (__v16sf) __B, __imm,
4004 (__v16sf)
4005 _mm512_setzero_ps (),
4006 (__mmask16) __U);
4009 #else
4010 #define _mm512_shuffle_epi32(X, C) \
4011 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4012 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4013 (__mmask16)-1))
4015 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
4016 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4017 (__v16si)(__m512i)(W),\
4018 (__mmask16)(U)))
4020 #define _mm512_maskz_shuffle_epi32(U, X, C) \
4021 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
4022 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4023 (__mmask16)(U)))
4025 #define _mm512_shuffle_i64x2(X, Y, C) \
4026 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4027 (__v8di)(__m512i)(Y), (int)(C),\
4028 (__v8di)(__m512i)_mm512_undefined_si512 (),\
4029 (__mmask8)-1))
4031 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
4032 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4033 (__v8di)(__m512i)(Y), (int)(C),\
4034 (__v8di)(__m512i)(W),\
4035 (__mmask8)(U)))
4037 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
4038 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
4039 (__v8di)(__m512i)(Y), (int)(C),\
4040 (__v8di)(__m512i)_mm512_setzero_si512 (),\
4041 (__mmask8)(U)))
4043 #define _mm512_shuffle_i32x4(X, Y, C) \
4044 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4045 (__v16si)(__m512i)(Y), (int)(C),\
4046 (__v16si)(__m512i)_mm512_undefined_si512 (),\
4047 (__mmask16)-1))
4049 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
4050 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4051 (__v16si)(__m512i)(Y), (int)(C),\
4052 (__v16si)(__m512i)(W),\
4053 (__mmask16)(U)))
4055 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
4056 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
4057 (__v16si)(__m512i)(Y), (int)(C),\
4058 (__v16si)(__m512i)_mm512_setzero_si512 (),\
4059 (__mmask16)(U)))
4061 #define _mm512_shuffle_f64x2(X, Y, C) \
4062 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4063 (__v8df)(__m512d)(Y), (int)(C),\
4064 (__v8df)(__m512d)_mm512_undefined_pd(),\
4065 (__mmask8)-1))
4067 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
4068 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4069 (__v8df)(__m512d)(Y), (int)(C),\
4070 (__v8df)(__m512d)(W),\
4071 (__mmask8)(U)))
4073 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
4074 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
4075 (__v8df)(__m512d)(Y), (int)(C),\
4076 (__v8df)(__m512d)_mm512_setzero_pd(),\
4077 (__mmask8)(U)))
4079 #define _mm512_shuffle_f32x4(X, Y, C) \
4080 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4081 (__v16sf)(__m512)(Y), (int)(C),\
4082 (__v16sf)(__m512)_mm512_undefined_ps(),\
4083 (__mmask16)-1))
4085 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
4086 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4087 (__v16sf)(__m512)(Y), (int)(C),\
4088 (__v16sf)(__m512)(W),\
4089 (__mmask16)(U)))
4091 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
4092 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
4093 (__v16sf)(__m512)(Y), (int)(C),\
4094 (__v16sf)(__m512)_mm512_setzero_ps(),\
4095 (__mmask16)(U)))
4096 #endif
4098 extern __inline __m512i
4099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4100 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
4102 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4103 (__v16si) __B,
4104 (__v16si)
4105 _mm512_undefined_si512 (),
4106 (__mmask16) -1);
4109 extern __inline __m512i
4110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4111 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4113 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4114 (__v16si) __B,
4115 (__v16si) __W,
4116 (__mmask16) __U);
4119 extern __inline __m512i
4120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4121 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4123 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4124 (__v16si) __B,
4125 (__v16si)
4126 _mm512_setzero_si512 (),
4127 (__mmask16) __U);
4130 extern __inline __m512i
4131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4132 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4134 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4135 (__v16si) __B,
4136 (__v16si)
4137 _mm512_undefined_si512 (),
4138 (__mmask16) -1);
4141 extern __inline __m512i
4142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4143 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4145 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4146 (__v16si) __B,
4147 (__v16si) __W,
4148 (__mmask16) __U);
4151 extern __inline __m512i
4152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4153 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4155 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4156 (__v16si) __B,
4157 (__v16si)
4158 _mm512_setzero_si512 (),
4159 (__mmask16) __U);
4162 extern __inline __m512i
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4166 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4167 (__v8di) __B,
4168 (__v8di)
4169 _mm512_undefined_si512 (),
4170 (__mmask8) -1);
4173 extern __inline __m512i
4174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4175 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4177 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4178 (__v8di) __B,
4179 (__v8di) __W,
4180 (__mmask8) __U);
4183 extern __inline __m512i
4184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4185 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4187 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4188 (__v8di) __B,
4189 (__v8di)
4190 _mm512_setzero_si512 (),
4191 (__mmask8) __U);
4194 extern __inline __m512i
4195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4196 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4198 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4199 (__v8di) __B,
4200 (__v8di)
4201 _mm512_undefined_si512 (),
4202 (__mmask8) -1);
4205 extern __inline __m512i
4206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4207 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4209 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4210 (__v8di) __B,
4211 (__v8di) __W,
4212 (__mmask8) __U);
4215 extern __inline __m512i
4216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4217 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4219 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4220 (__v8di) __B,
4221 (__v8di)
4222 _mm512_setzero_si512 (),
4223 (__mmask8) __U);
4226 #ifdef __OPTIMIZE__
4227 extern __inline __m256i
4228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4229 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4231 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4232 (__v8si)
4233 _mm256_undefined_si256 (),
4234 (__mmask8) -1, __R);
4237 extern __inline __m256i
4238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4239 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4240 const int __R)
4242 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4243 (__v8si) __W,
4244 (__mmask8) __U, __R);
4247 extern __inline __m256i
4248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4249 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4252 (__v8si)
4253 _mm256_setzero_si256 (),
4254 (__mmask8) __U, __R);
4257 extern __inline __m256i
4258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4259 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4261 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4262 (__v8si)
4263 _mm256_undefined_si256 (),
4264 (__mmask8) -1, __R);
4267 extern __inline __m256i
4268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4269 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4270 const int __R)
4272 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4273 (__v8si) __W,
4274 (__mmask8) __U, __R);
4277 extern __inline __m256i
4278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4282 (__v8si)
4283 _mm256_setzero_si256 (),
4284 (__mmask8) __U, __R);
4286 #else
4287 #define _mm512_cvtt_roundpd_epi32(A, B) \
4288 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4290 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4291 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4293 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4294 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4296 #define _mm512_cvtt_roundpd_epu32(A, B) \
4297 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4299 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4300 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4302 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4303 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4304 #endif
4306 #ifdef __OPTIMIZE__
4307 extern __inline __m256i
4308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4309 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4311 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4312 (__v8si)
4313 _mm256_undefined_si256 (),
4314 (__mmask8) -1, __R);
4317 extern __inline __m256i
4318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4319 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4320 const int __R)
4322 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4323 (__v8si) __W,
4324 (__mmask8) __U, __R);
4327 extern __inline __m256i
4328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4329 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4332 (__v8si)
4333 _mm256_setzero_si256 (),
4334 (__mmask8) __U, __R);
4337 extern __inline __m256i
4338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4341 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4342 (__v8si)
4343 _mm256_undefined_si256 (),
4344 (__mmask8) -1, __R);
4347 extern __inline __m256i
4348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4349 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4350 const int __R)
4352 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4353 (__v8si) __W,
4354 (__mmask8) __U, __R);
4357 extern __inline __m256i
4358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4359 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4362 (__v8si)
4363 _mm256_setzero_si256 (),
4364 (__mmask8) __U, __R);
4366 #else
4367 #define _mm512_cvt_roundpd_epi32(A, B) \
4368 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4370 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4371 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4373 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4374 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4376 #define _mm512_cvt_roundpd_epu32(A, B) \
4377 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
4379 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4380 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4382 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4383 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4384 #endif
4386 #ifdef __OPTIMIZE__
4387 extern __inline __m512i
4388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4389 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4391 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4392 (__v16si)
4393 _mm512_undefined_si512 (),
4394 (__mmask16) -1, __R);
4397 extern __inline __m512i
4398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4399 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4400 const int __R)
4402 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4403 (__v16si) __W,
4404 (__mmask16) __U, __R);
4407 extern __inline __m512i
4408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4409 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4412 (__v16si)
4413 _mm512_setzero_si512 (),
4414 (__mmask16) __U, __R);
4417 extern __inline __m512i
4418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4419 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4421 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4422 (__v16si)
4423 _mm512_undefined_si512 (),
4424 (__mmask16) -1, __R);
4427 extern __inline __m512i
4428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4429 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4430 const int __R)
4432 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4433 (__v16si) __W,
4434 (__mmask16) __U, __R);
4437 extern __inline __m512i
4438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4439 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4442 (__v16si)
4443 _mm512_setzero_si512 (),
4444 (__mmask16) __U, __R);
4446 #else
4447 #define _mm512_cvtt_roundps_epi32(A, B) \
4448 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4450 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4451 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4453 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4454 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4456 #define _mm512_cvtt_roundps_epu32(A, B) \
4457 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4459 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4460 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4462 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4463 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4464 #endif
4466 #ifdef __OPTIMIZE__
4467 extern __inline __m512i
4468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4469 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4471 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4472 (__v16si)
4473 _mm512_undefined_si512 (),
4474 (__mmask16) -1, __R);
4477 extern __inline __m512i
4478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4479 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4480 const int __R)
4482 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4483 (__v16si) __W,
4484 (__mmask16) __U, __R);
4487 extern __inline __m512i
4488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4489 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4492 (__v16si)
4493 _mm512_setzero_si512 (),
4494 (__mmask16) __U, __R);
4497 extern __inline __m512i
4498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4499 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4501 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4502 (__v16si)
4503 _mm512_undefined_si512 (),
4504 (__mmask16) -1, __R);
4507 extern __inline __m512i
4508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4509 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4510 const int __R)
4512 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4513 (__v16si) __W,
4514 (__mmask16) __U, __R);
4517 extern __inline __m512i
4518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4519 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4522 (__v16si)
4523 _mm512_setzero_si512 (),
4524 (__mmask16) __U, __R);
4526 #else
4527 #define _mm512_cvt_roundps_epi32(A, B) \
4528 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4530 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4531 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4533 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4534 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4536 #define _mm512_cvt_roundps_epu32(A, B) \
4537 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
4539 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4540 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4542 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4543 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4544 #endif
4546 extern __inline __m128d
4547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4548 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4550 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4553 #ifdef __x86_64__
4554 #ifdef __OPTIMIZE__
4555 extern __inline __m128d
4556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4557 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4559 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4562 extern __inline __m128d
4563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4564 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4566 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4569 extern __inline __m128d
4570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4571 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4573 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4575 #else
4576 #define _mm_cvt_roundu64_sd(A, B, C) \
4577 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4579 #define _mm_cvt_roundi64_sd(A, B, C) \
4580 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4582 #define _mm_cvt_roundsi64_sd(A, B, C) \
4583 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4584 #endif
4586 #endif
4588 #ifdef __OPTIMIZE__
4589 extern __inline __m128
4590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4593 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4596 extern __inline __m128
4597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4598 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4600 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4603 extern __inline __m128
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4607 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4609 #else
4610 #define _mm_cvt_roundu32_ss(A, B, C) \
4611 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4613 #define _mm_cvt_roundi32_ss(A, B, C) \
4614 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4616 #define _mm_cvt_roundsi32_ss(A, B, C) \
4617 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4618 #endif
4620 #ifdef __x86_64__
4621 #ifdef __OPTIMIZE__
4622 extern __inline __m128
4623 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4624 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4626 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4629 extern __inline __m128
4630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4631 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4633 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4636 extern __inline __m128
4637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4638 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4640 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4642 #else
4643 #define _mm_cvt_roundu64_ss(A, B, C) \
4644 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4646 #define _mm_cvt_roundi64_ss(A, B, C) \
4647 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4649 #define _mm_cvt_roundsi64_ss(A, B, C) \
4650 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4651 #endif
4653 #endif
4655 extern __inline __m128i
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_cvtepi32_epi8 (__m512i __A)
4659 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4660 (__v16qi)
4661 _mm_undefined_si128 (),
4662 (__mmask16) -1);
4665 extern __inline void
4666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4667 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4669 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4672 extern __inline __m128i
4673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4674 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4676 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4677 (__v16qi) __O, __M);
4680 extern __inline __m128i
4681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4682 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4684 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4685 (__v16qi)
4686 _mm_setzero_si128 (),
4687 __M);
4690 extern __inline __m128i
4691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4692 _mm512_cvtsepi32_epi8 (__m512i __A)
4694 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4695 (__v16qi)
4696 _mm_undefined_si128 (),
4697 (__mmask16) -1);
4700 extern __inline void
4701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4702 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4704 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4707 extern __inline __m128i
4708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4711 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4712 (__v16qi) __O, __M);
4715 extern __inline __m128i
4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4719 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4720 (__v16qi)
4721 _mm_setzero_si128 (),
4722 __M);
4725 extern __inline __m128i
4726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4727 _mm512_cvtusepi32_epi8 (__m512i __A)
4729 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4730 (__v16qi)
4731 _mm_undefined_si128 (),
4732 (__mmask16) -1);
4735 extern __inline void
4736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4737 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4739 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4742 extern __inline __m128i
4743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4744 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4746 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4747 (__v16qi) __O,
4748 __M);
4751 extern __inline __m128i
4752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4753 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4756 (__v16qi)
4757 _mm_setzero_si128 (),
4758 __M);
4761 extern __inline __m256i
4762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4763 _mm512_cvtepi32_epi16 (__m512i __A)
4765 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4766 (__v16hi)
4767 _mm256_undefined_si256 (),
4768 (__mmask16) -1);
4771 extern __inline void
4772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4773 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4775 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4778 extern __inline __m256i
4779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4780 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4782 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4783 (__v16hi) __O, __M);
4786 extern __inline __m256i
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4790 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4791 (__v16hi)
4792 _mm256_setzero_si256 (),
4793 __M);
4796 extern __inline __m256i
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm512_cvtsepi32_epi16 (__m512i __A)
4800 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4801 (__v16hi)
4802 _mm256_undefined_si256 (),
4803 (__mmask16) -1);
4806 extern __inline void
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4810 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4813 extern __inline __m256i
4814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4815 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4817 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4818 (__v16hi) __O, __M);
4821 extern __inline __m256i
4822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4823 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4825 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4826 (__v16hi)
4827 _mm256_setzero_si256 (),
4828 __M);
4831 extern __inline __m256i
4832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4833 _mm512_cvtusepi32_epi16 (__m512i __A)
4835 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4836 (__v16hi)
4837 _mm256_undefined_si256 (),
4838 (__mmask16) -1);
4841 extern __inline void
4842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4843 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4845 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4848 extern __inline __m256i
4849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4850 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4852 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4853 (__v16hi) __O,
4854 __M);
4857 extern __inline __m256i
4858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4859 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4862 (__v16hi)
4863 _mm256_setzero_si256 (),
4864 __M);
4867 extern __inline __m256i
4868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4869 _mm512_cvtepi64_epi32 (__m512i __A)
4871 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4872 (__v8si)
4873 _mm256_undefined_si256 (),
4874 (__mmask8) -1);
4877 extern __inline void
4878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4879 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4881 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4884 extern __inline __m256i
4885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4888 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4889 (__v8si) __O, __M);
4892 extern __inline __m256i
4893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4894 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4896 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4897 (__v8si)
4898 _mm256_setzero_si256 (),
4899 __M);
4902 extern __inline __m256i
4903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 _mm512_cvtsepi64_epi32 (__m512i __A)
4906 __v8si __O;
4907 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4908 (__v8si)
4909 _mm256_undefined_si256 (),
4910 (__mmask8) -1);
4913 extern __inline void
4914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4915 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4917 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4920 extern __inline __m256i
4921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4922 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4924 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4925 (__v8si) __O, __M);
4928 extern __inline __m256i
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4932 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4933 (__v8si)
4934 _mm256_setzero_si256 (),
4935 __M);
4938 extern __inline __m256i
4939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940 _mm512_cvtusepi64_epi32 (__m512i __A)
4942 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4943 (__v8si)
4944 _mm256_undefined_si256 (),
4945 (__mmask8) -1);
4948 extern __inline void
4949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4950 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4952 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4955 extern __inline __m256i
4956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4957 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4959 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4960 (__v8si) __O, __M);
4963 extern __inline __m256i
4964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4965 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4967 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4968 (__v8si)
4969 _mm256_setzero_si256 (),
4970 __M);
4973 extern __inline __m128i
4974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4975 _mm512_cvtepi64_epi16 (__m512i __A)
4977 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4978 (__v8hi)
4979 _mm_undefined_si128 (),
4980 (__mmask8) -1);
4983 extern __inline void
4984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4985 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4987 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4990 extern __inline __m128i
4991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4992 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4994 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4995 (__v8hi) __O, __M);
4998 extern __inline __m128i
4999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5000 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
5002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
5003 (__v8hi)
5004 _mm_setzero_si128 (),
5005 __M);
5008 extern __inline __m128i
5009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5010 _mm512_cvtsepi64_epi16 (__m512i __A)
5012 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5013 (__v8hi)
5014 _mm_undefined_si128 (),
5015 (__mmask8) -1);
5018 extern __inline void
5019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5020 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
5022 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
5025 extern __inline __m128i
5026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5027 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5029 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5030 (__v8hi) __O, __M);
5033 extern __inline __m128i
5034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
5037 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
5038 (__v8hi)
5039 _mm_setzero_si128 (),
5040 __M);
5043 extern __inline __m128i
5044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5045 _mm512_cvtusepi64_epi16 (__m512i __A)
5047 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5048 (__v8hi)
5049 _mm_undefined_si128 (),
5050 (__mmask8) -1);
5053 extern __inline void
5054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5055 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
5057 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
5060 extern __inline __m128i
5061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5062 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
5064 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5065 (__v8hi) __O, __M);
5068 extern __inline __m128i
5069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5070 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
5072 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
5073 (__v8hi)
5074 _mm_setzero_si128 (),
5075 __M);
5078 extern __inline __m128i
5079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5080 _mm512_cvtepi64_epi8 (__m512i __A)
5082 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5083 (__v16qi)
5084 _mm_undefined_si128 (),
5085 (__mmask8) -1);
5088 extern __inline void
5089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5090 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5092 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5095 extern __inline __m128i
5096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5097 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5099 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5100 (__v16qi) __O, __M);
5103 extern __inline __m128i
5104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5105 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
5107 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
5108 (__v16qi)
5109 _mm_setzero_si128 (),
5110 __M);
5113 extern __inline __m128i
5114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5115 _mm512_cvtsepi64_epi8 (__m512i __A)
5117 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5118 (__v16qi)
5119 _mm_undefined_si128 (),
5120 (__mmask8) -1);
5123 extern __inline void
5124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5125 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5127 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5130 extern __inline __m128i
5131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5132 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5134 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5135 (__v16qi) __O, __M);
5138 extern __inline __m128i
5139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5142 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5143 (__v16qi)
5144 _mm_setzero_si128 (),
5145 __M);
5148 extern __inline __m128i
5149 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5150 _mm512_cvtusepi64_epi8 (__m512i __A)
5152 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5153 (__v16qi)
5154 _mm_undefined_si128 (),
5155 (__mmask8) -1);
5158 extern __inline void
5159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5160 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5162 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5165 extern __inline __m128i
5166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5167 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5169 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5170 (__v16qi) __O,
5171 __M);
5174 extern __inline __m128i
5175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5178 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5179 (__v16qi)
5180 _mm_setzero_si128 (),
5181 __M);
5184 extern __inline __m512d
5185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5186 _mm512_cvtepi32_pd (__m256i __A)
5188 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5189 (__v8df)
5190 _mm512_undefined_pd (),
5191 (__mmask8) -1);
5194 extern __inline __m512d
5195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5196 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5198 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5199 (__v8df) __W,
5200 (__mmask8) __U);
5203 extern __inline __m512d
5204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5205 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5207 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5208 (__v8df)
5209 _mm512_setzero_pd (),
5210 (__mmask8) __U);
5213 extern __inline __m512d
5214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5215 _mm512_cvtepu32_pd (__m256i __A)
5217 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5218 (__v8df)
5219 _mm512_undefined_pd (),
5220 (__mmask8) -1);
5223 extern __inline __m512d
5224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5225 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5227 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5228 (__v8df) __W,
5229 (__mmask8) __U);
5232 extern __inline __m512d
5233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5234 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5236 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5237 (__v8df)
5238 _mm512_setzero_pd (),
5239 (__mmask8) __U);
5242 #ifdef __OPTIMIZE__
5243 extern __inline __m512
5244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5245 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5247 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5248 (__v16sf)
5249 _mm512_undefined_ps (),
5250 (__mmask16) -1, __R);
5253 extern __inline __m512
5254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5255 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5256 const int __R)
5258 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5259 (__v16sf) __W,
5260 (__mmask16) __U, __R);
5263 extern __inline __m512
5264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5265 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5267 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5268 (__v16sf)
5269 _mm512_setzero_ps (),
5270 (__mmask16) __U, __R);
5273 extern __inline __m512
5274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5275 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5277 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5278 (__v16sf)
5279 _mm512_undefined_ps (),
5280 (__mmask16) -1, __R);
5283 extern __inline __m512
5284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5286 const int __R)
5288 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5289 (__v16sf) __W,
5290 (__mmask16) __U, __R);
5293 extern __inline __m512
5294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5297 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5298 (__v16sf)
5299 _mm512_setzero_ps (),
5300 (__mmask16) __U, __R);
5303 #else
5304 #define _mm512_cvt_roundepi32_ps(A, B) \
5305 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5307 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5308 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5310 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5311 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5313 #define _mm512_cvt_roundepu32_ps(A, B) \
5314 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
5316 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5317 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5319 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5320 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5321 #endif
5323 #ifdef __OPTIMIZE__
5324 extern __inline __m256d
5325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5326 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5328 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5329 __imm,
5330 (__v4df)
5331 _mm256_undefined_pd (),
5332 (__mmask8) -1);
5335 extern __inline __m256d
5336 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5337 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5338 const int __imm)
5340 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5341 __imm,
5342 (__v4df) __W,
5343 (__mmask8) __U);
5346 extern __inline __m256d
5347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5348 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5350 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5351 __imm,
5352 (__v4df)
5353 _mm256_setzero_pd (),
5354 (__mmask8) __U);
5357 extern __inline __m128
5358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5359 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5361 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5362 __imm,
5363 (__v4sf)
5364 _mm_undefined_ps (),
5365 (__mmask8) -1);
5368 extern __inline __m128
5369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5370 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5371 const int __imm)
5373 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5374 __imm,
5375 (__v4sf) __W,
5376 (__mmask8) __U);
5379 extern __inline __m128
5380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5381 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5383 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5384 __imm,
5385 (__v4sf)
5386 _mm_setzero_ps (),
5387 (__mmask8) __U);
5390 extern __inline __m256i
5391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5392 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5394 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5395 __imm,
5396 (__v4di)
5397 _mm256_undefined_si256 (),
5398 (__mmask8) -1);
5401 extern __inline __m256i
5402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5403 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5404 const int __imm)
5406 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5407 __imm,
5408 (__v4di) __W,
5409 (__mmask8) __U);
5412 extern __inline __m256i
5413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5414 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5416 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5417 __imm,
5418 (__v4di)
5419 _mm256_setzero_si256 (),
5420 (__mmask8) __U);
5423 extern __inline __m128i
5424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5427 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5428 __imm,
5429 (__v4si)
5430 _mm_undefined_si128 (),
5431 (__mmask8) -1);
5434 extern __inline __m128i
5435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5436 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5437 const int __imm)
5439 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5440 __imm,
5441 (__v4si) __W,
5442 (__mmask8) __U);
5445 extern __inline __m128i
5446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5447 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5449 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5450 __imm,
5451 (__v4si)
5452 _mm_setzero_si128 (),
5453 (__mmask8) __U);
5455 #else
5457 #define _mm512_extractf64x4_pd(X, C) \
5458 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5459 (int) (C),\
5460 (__v4df)(__m256d)_mm256_undefined_pd(),\
5461 (__mmask8)-1))
5463 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5464 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5465 (int) (C),\
5466 (__v4df)(__m256d)(W),\
5467 (__mmask8)(U)))
5469 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5470 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5471 (int) (C),\
5472 (__v4df)(__m256d)_mm256_setzero_pd(),\
5473 (__mmask8)(U)))
5475 #define _mm512_extractf32x4_ps(X, C) \
5476 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5477 (int) (C),\
5478 (__v4sf)(__m128)_mm_undefined_ps(),\
5479 (__mmask8)-1))
5481 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5482 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5483 (int) (C),\
5484 (__v4sf)(__m128)(W),\
5485 (__mmask8)(U)))
5487 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5488 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5489 (int) (C),\
5490 (__v4sf)(__m128)_mm_setzero_ps(),\
5491 (__mmask8)(U)))
5493 #define _mm512_extracti64x4_epi64(X, C) \
5494 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5495 (int) (C),\
5496 (__v4di)(__m256i)_mm256_undefined_si256 (),\
5497 (__mmask8)-1))
5499 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5500 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5501 (int) (C),\
5502 (__v4di)(__m256i)(W),\
5503 (__mmask8)(U)))
5505 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5506 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5507 (int) (C),\
5508 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5509 (__mmask8)(U)))
5511 #define _mm512_extracti32x4_epi32(X, C) \
5512 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5513 (int) (C),\
5514 (__v4si)(__m128i)_mm_undefined_si128 (),\
5515 (__mmask8)-1))
5517 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5518 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5519 (int) (C),\
5520 (__v4si)(__m128i)(W),\
5521 (__mmask8)(U)))
5523 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5524 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5525 (int) (C),\
5526 (__v4si)(__m128i)_mm_setzero_si128 (),\
5527 (__mmask8)(U)))
5528 #endif
5530 #ifdef __OPTIMIZE__
5531 extern __inline __m512i
5532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5533 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5535 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5536 (__v4si) __B,
5537 __imm,
5538 (__v16si) __A, -1);
5541 extern __inline __m512
5542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5545 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5546 (__v4sf) __B,
5547 __imm,
5548 (__v16sf) __A, -1);
5551 extern __inline __m512i
5552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5555 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5556 (__v4di) __B,
5557 __imm,
5558 (__v8di)
5559 _mm512_undefined_si512 (),
5560 (__mmask8) -1);
5563 extern __inline __m512i
5564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5565 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5566 __m256i __B, const int __imm)
5568 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5569 (__v4di) __B,
5570 __imm,
5571 (__v8di) __W,
5572 (__mmask8) __U);
5575 extern __inline __m512i
5576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5578 const int __imm)
5580 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5581 (__v4di) __B,
5582 __imm,
5583 (__v8di)
5584 _mm512_setzero_si512 (),
5585 (__mmask8) __U);
5588 extern __inline __m512d
5589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5590 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5592 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5593 (__v4df) __B,
5594 __imm,
5595 (__v8df)
5596 _mm512_undefined_pd (),
5597 (__mmask8) -1);
5600 extern __inline __m512d
5601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5602 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5603 __m256d __B, const int __imm)
5605 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5606 (__v4df) __B,
5607 __imm,
5608 (__v8df) __W,
5609 (__mmask8) __U);
5612 extern __inline __m512d
5613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5614 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5615 const int __imm)
5617 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5618 (__v4df) __B,
5619 __imm,
5620 (__v8df)
5621 _mm512_setzero_pd (),
5622 (__mmask8) __U);
5624 #else
5625 #define _mm512_insertf32x4(X, Y, C) \
5626 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5627 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5629 #define _mm512_inserti32x4(X, Y, C) \
5630 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5631 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5633 #define _mm512_insertf64x4(X, Y, C) \
5634 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5635 (__v4df)(__m256d) (Y), (int) (C), \
5636 (__v8df)(__m512d)_mm512_undefined_pd(), \
5637 (__mmask8)-1))
5639 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5640 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5641 (__v4df)(__m256d) (Y), (int) (C), \
5642 (__v8df)(__m512d)(W), \
5643 (__mmask8)(U)))
5645 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5646 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5647 (__v4df)(__m256d) (Y), (int) (C), \
5648 (__v8df)(__m512d)_mm512_setzero_pd(), \
5649 (__mmask8)(U)))
5651 #define _mm512_inserti64x4(X, Y, C) \
5652 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5653 (__v4di)(__m256i) (Y), (int) (C), \
5654 (__v8di)(__m512i)_mm512_undefined_si512 (), \
5655 (__mmask8)-1))
5657 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5658 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5659 (__v4di)(__m256i) (Y), (int) (C),\
5660 (__v8di)(__m512i)(W),\
5661 (__mmask8)(U)))
5663 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5664 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5665 (__v4di)(__m256i) (Y), (int) (C), \
5666 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5667 (__mmask8)(U)))
5668 #endif
5670 extern __inline __m512d
5671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5672 _mm512_loadu_pd (void const *__P)
5674 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5675 (__v8df)
5676 _mm512_undefined_pd (),
5677 (__mmask8) -1);
5680 extern __inline __m512d
5681 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5682 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5684 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5685 (__v8df) __W,
5686 (__mmask8) __U);
5689 extern __inline __m512d
5690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5691 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5693 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5694 (__v8df)
5695 _mm512_setzero_pd (),
5696 (__mmask8) __U);
5699 extern __inline void
5700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5701 _mm512_storeu_pd (void *__P, __m512d __A)
5703 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5704 (__mmask8) -1);
5707 extern __inline void
5708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5709 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5711 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5712 (__mmask8) __U);
5715 extern __inline __m512
5716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5717 _mm512_loadu_ps (void const *__P)
5719 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5720 (__v16sf)
5721 _mm512_undefined_ps (),
5722 (__mmask16) -1);
5725 extern __inline __m512
5726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5727 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5729 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5730 (__v16sf) __W,
5731 (__mmask16) __U);
5734 extern __inline __m512
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5738 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5739 (__v16sf)
5740 _mm512_setzero_ps (),
5741 (__mmask16) __U);
5744 extern __inline void
5745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5746 _mm512_storeu_ps (void *__P, __m512 __A)
5748 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5749 (__mmask16) -1);
5752 extern __inline void
5753 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5754 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5756 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5757 (__mmask16) __U);
5760 extern __inline __m512i
5761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5762 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5764 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5765 (__v8di) __W,
5766 (__mmask8) __U);
5769 extern __inline __m512i
5770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5771 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5773 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5774 (__v8di)
5775 _mm512_setzero_si512 (),
5776 (__mmask8) __U);
5779 extern __inline void
5780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5783 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5784 (__mmask8) __U);
5787 extern __inline __m512i
5788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5789 _mm512_loadu_si512 (void const *__P)
5791 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5792 (__v16si)
5793 _mm512_setzero_si512 (),
5794 (__mmask16) -1);
5797 extern __inline __m512i
5798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5799 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5801 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5802 (__v16si) __W,
5803 (__mmask16) __U);
5806 extern __inline __m512i
5807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5808 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5810 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5811 (__v16si)
5812 _mm512_setzero_si512 (),
5813 (__mmask16) __U);
5816 extern __inline void
5817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 _mm512_storeu_si512 (void *__P, __m512i __A)
5820 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5821 (__mmask16) -1);
5824 extern __inline void
5825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5826 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5828 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5829 (__mmask16) __U);
5832 extern __inline __m512d
5833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5834 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5836 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5837 (__v8di) __C,
5838 (__v8df)
5839 _mm512_undefined_pd (),
5840 (__mmask8) -1);
5843 extern __inline __m512d
5844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5845 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5847 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5848 (__v8di) __C,
5849 (__v8df) __W,
5850 (__mmask8) __U);
5853 extern __inline __m512d
5854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5855 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5857 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5858 (__v8di) __C,
5859 (__v8df)
5860 _mm512_setzero_pd (),
5861 (__mmask8) __U);
5864 extern __inline __m512
5865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5866 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5868 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5869 (__v16si) __C,
5870 (__v16sf)
5871 _mm512_undefined_ps (),
5872 (__mmask16) -1);
5875 extern __inline __m512
5876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5877 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5879 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5880 (__v16si) __C,
5881 (__v16sf) __W,
5882 (__mmask16) __U);
5885 extern __inline __m512
5886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5887 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5889 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5890 (__v16si) __C,
5891 (__v16sf)
5892 _mm512_setzero_ps (),
5893 (__mmask16) __U);
5896 extern __inline __m512i
5897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5898 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5900 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5901 /* idx */ ,
5902 (__v8di) __A,
5903 (__v8di) __B,
5904 (__mmask8) -1);
5907 extern __inline __m512i
5908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5909 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5910 __m512i __B)
5912 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5913 /* idx */ ,
5914 (__v8di) __A,
5915 (__v8di) __B,
5916 (__mmask8) __U);
5919 extern __inline __m512i
5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5922 __mmask8 __U, __m512i __B)
5924 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5925 (__v8di) __I
5926 /* idx */ ,
5927 (__v8di) __B,
5928 (__mmask8) __U);
5931 extern __inline __m512i
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5934 __m512i __I, __m512i __B)
5936 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5937 /* idx */ ,
5938 (__v8di) __A,
5939 (__v8di) __B,
5940 (__mmask8) __U);
5943 extern __inline __m512i
5944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5945 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5947 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5948 /* idx */ ,
5949 (__v16si) __A,
5950 (__v16si) __B,
5951 (__mmask16) -1);
5954 extern __inline __m512i
5955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5956 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5957 __m512i __I, __m512i __B)
5959 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5960 /* idx */ ,
5961 (__v16si) __A,
5962 (__v16si) __B,
5963 (__mmask16) __U);
5966 extern __inline __m512i
5967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5969 __mmask16 __U, __m512i __B)
5971 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5972 (__v16si) __I
5973 /* idx */ ,
5974 (__v16si) __B,
5975 (__mmask16) __U);
5978 extern __inline __m512i
5979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5980 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5981 __m512i __I, __m512i __B)
5983 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5984 /* idx */ ,
5985 (__v16si) __A,
5986 (__v16si) __B,
5987 (__mmask16) __U);
5990 extern __inline __m512d
5991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5992 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5994 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5995 /* idx */ ,
5996 (__v8df) __A,
5997 (__v8df) __B,
5998 (__mmask8) -1);
6001 extern __inline __m512d
6002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6003 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
6004 __m512d __B)
6006 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6007 /* idx */ ,
6008 (__v8df) __A,
6009 (__v8df) __B,
6010 (__mmask8) __U);
6013 extern __inline __m512d
6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6016 __m512d __B)
6018 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6019 (__v8di) __I
6020 /* idx */ ,
6021 (__v8df) __B,
6022 (__mmask8) __U);
6025 extern __inline __m512d
6026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6027 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6028 __m512d __B)
6030 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6031 /* idx */ ,
6032 (__v8df) __A,
6033 (__v8df) __B,
6034 (__mmask8) __U);
6037 extern __inline __m512
6038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6039 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
6041 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6042 /* idx */ ,
6043 (__v16sf) __A,
6044 (__v16sf) __B,
6045 (__mmask16) -1);
6048 extern __inline __m512
6049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6050 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6052 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6053 /* idx */ ,
6054 (__v16sf) __A,
6055 (__v16sf) __B,
6056 (__mmask16) __U);
6059 extern __inline __m512
6060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6061 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6062 __m512 __B)
6064 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6065 (__v16si) __I
6066 /* idx */ ,
6067 (__v16sf) __B,
6068 (__mmask16) __U);
6071 extern __inline __m512
6072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6073 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6074 __m512 __B)
6076 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6077 /* idx */ ,
6078 (__v16sf) __A,
6079 (__v16sf) __B,
6080 (__mmask16) __U);
6083 #ifdef __OPTIMIZE__
6084 extern __inline __m512d
6085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6086 _mm512_permute_pd (__m512d __X, const int __C)
6088 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6089 (__v8df)
6090 _mm512_undefined_pd (),
6091 (__mmask8) -1);
6094 extern __inline __m512d
6095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6096 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
6098 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6099 (__v8df) __W,
6100 (__mmask8) __U);
6103 extern __inline __m512d
6104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6105 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
6107 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
6108 (__v8df)
6109 _mm512_setzero_pd (),
6110 (__mmask8) __U);
6113 extern __inline __m512
6114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6115 _mm512_permute_ps (__m512 __X, const int __C)
6117 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6118 (__v16sf)
6119 _mm512_undefined_ps (),
6120 (__mmask16) -1);
6123 extern __inline __m512
6124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6125 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6127 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6128 (__v16sf) __W,
6129 (__mmask16) __U);
6132 extern __inline __m512
6133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6134 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6136 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6137 (__v16sf)
6138 _mm512_setzero_ps (),
6139 (__mmask16) __U);
6141 #else
6142 #define _mm512_permute_pd(X, C) \
6143 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6144 (__v8df)(__m512d)_mm512_undefined_pd(),\
6145 (__mmask8)(-1)))
6147 #define _mm512_mask_permute_pd(W, U, X, C) \
6148 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6149 (__v8df)(__m512d)(W), \
6150 (__mmask8)(U)))
6152 #define _mm512_maskz_permute_pd(U, X, C) \
6153 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6154 (__v8df)(__m512d)_mm512_setzero_pd(), \
6155 (__mmask8)(U)))
6157 #define _mm512_permute_ps(X, C) \
6158 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6159 (__v16sf)(__m512)_mm512_undefined_ps(),\
6160 (__mmask16)(-1)))
6162 #define _mm512_mask_permute_ps(W, U, X, C) \
6163 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6164 (__v16sf)(__m512)(W), \
6165 (__mmask16)(U)))
6167 #define _mm512_maskz_permute_ps(U, X, C) \
6168 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6169 (__v16sf)(__m512)_mm512_setzero_ps(), \
6170 (__mmask16)(U)))
6171 #endif
6173 #ifdef __OPTIMIZE__
6174 extern __inline __m512i
6175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6176 _mm512_permutex_epi64 (__m512i __X, const int __I)
6178 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6179 (__v8di)
6180 _mm512_undefined_si512 (),
6181 (__mmask8) (-1));
6184 extern __inline __m512i
6185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6186 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6187 __m512i __X, const int __I)
6189 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6190 (__v8di) __W,
6191 (__mmask8) __M);
6194 extern __inline __m512i
6195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6196 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6198 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6199 (__v8di)
6200 _mm512_setzero_si512 (),
6201 (__mmask8) __M);
6204 extern __inline __m512d
6205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6206 _mm512_permutex_pd (__m512d __X, const int __M)
6208 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6209 (__v8df)
6210 _mm512_undefined_pd (),
6211 (__mmask8) -1);
6214 extern __inline __m512d
6215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6216 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6218 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6219 (__v8df) __W,
6220 (__mmask8) __U);
6223 extern __inline __m512d
6224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6225 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6227 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6228 (__v8df)
6229 _mm512_setzero_pd (),
6230 (__mmask8) __U);
6232 #else
6233 #define _mm512_permutex_pd(X, M) \
6234 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6235 (__v8df)(__m512d)_mm512_undefined_pd(),\
6236 (__mmask8)-1))
6238 #define _mm512_mask_permutex_pd(W, U, X, M) \
6239 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6240 (__v8df)(__m512d)(W), (__mmask8)(U)))
6242 #define _mm512_maskz_permutex_pd(U, X, M) \
6243 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6244 (__v8df)(__m512d)_mm512_setzero_pd(),\
6245 (__mmask8)(U)))
6247 #define _mm512_permutex_epi64(X, I) \
6248 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6249 (int)(I), \
6250 (__v8di)(__m512i) \
6251 (_mm512_undefined_si512 ()),\
6252 (__mmask8)(-1)))
6254 #define _mm512_maskz_permutex_epi64(M, X, I) \
6255 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6256 (int)(I), \
6257 (__v8di)(__m512i) \
6258 (_mm512_setzero_si512 ()),\
6259 (__mmask8)(M)))
6261 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6262 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6263 (int)(I), \
6264 (__v8di)(__m512i)(W), \
6265 (__mmask8)(M)))
6266 #endif
6268 extern __inline __m512i
6269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6270 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6272 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6273 (__v8di) __X,
6274 (__v8di)
6275 _mm512_setzero_si512 (),
6276 __M);
6279 extern __inline __m512i
6280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6281 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6283 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6284 (__v8di) __X,
6285 (__v8di)
6286 _mm512_undefined_si512 (),
6287 (__mmask8) -1);
6290 extern __inline __m512i
6291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6292 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6293 __m512i __Y)
6295 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6296 (__v8di) __X,
6297 (__v8di) __W,
6298 __M);
6301 extern __inline __m512i
6302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6303 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6305 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6306 (__v16si) __X,
6307 (__v16si)
6308 _mm512_setzero_si512 (),
6309 __M);
6312 extern __inline __m512i
6313 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6316 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6317 (__v16si) __X,
6318 (__v16si)
6319 _mm512_undefined_si512 (),
6320 (__mmask16) -1);
6323 extern __inline __m512i
6324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6326 __m512i __Y)
6328 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6329 (__v16si) __X,
6330 (__v16si) __W,
6331 __M);
6334 extern __inline __m512d
6335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6338 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6339 (__v8di) __X,
6340 (__v8df)
6341 _mm512_undefined_pd (),
6342 (__mmask8) -1);
6345 extern __inline __m512d
6346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6347 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6349 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6350 (__v8di) __X,
6351 (__v8df) __W,
6352 (__mmask8) __U);
6355 extern __inline __m512d
6356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6359 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6360 (__v8di) __X,
6361 (__v8df)
6362 _mm512_setzero_pd (),
6363 (__mmask8) __U);
6366 extern __inline __m512
6367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6368 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6370 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6371 (__v16si) __X,
6372 (__v16sf)
6373 _mm512_undefined_ps (),
6374 (__mmask16) -1);
6377 extern __inline __m512
6378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6379 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6381 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6382 (__v16si) __X,
6383 (__v16sf) __W,
6384 (__mmask16) __U);
6387 extern __inline __m512
6388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6389 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6391 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6392 (__v16si) __X,
6393 (__v16sf)
6394 _mm512_setzero_ps (),
6395 (__mmask16) __U);
6398 #ifdef __OPTIMIZE__
6399 extern __inline __m512
6400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6401 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6403 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6404 (__v16sf) __V, __imm,
6405 (__v16sf)
6406 _mm512_undefined_ps (),
6407 (__mmask16) -1);
6410 extern __inline __m512
6411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6412 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6413 __m512 __V, const int __imm)
6415 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6416 (__v16sf) __V, __imm,
6417 (__v16sf) __W,
6418 (__mmask16) __U);
6421 extern __inline __m512
6422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6425 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6426 (__v16sf) __V, __imm,
6427 (__v16sf)
6428 _mm512_setzero_ps (),
6429 (__mmask16) __U);
6432 extern __inline __m512d
6433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6436 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6437 (__v8df) __V, __imm,
6438 (__v8df)
6439 _mm512_undefined_pd (),
6440 (__mmask8) -1);
6443 extern __inline __m512d
6444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6445 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6446 __m512d __V, const int __imm)
6448 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6449 (__v8df) __V, __imm,
6450 (__v8df) __W,
6451 (__mmask8) __U);
6454 extern __inline __m512d
6455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6456 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6457 const int __imm)
6459 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6460 (__v8df) __V, __imm,
6461 (__v8df)
6462 _mm512_setzero_pd (),
6463 (__mmask8) __U);
6466 extern __inline __m512d
6467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6468 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6469 const int __imm, const int __R)
6471 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6472 (__v8df) __B,
6473 (__v8di) __C,
6474 __imm,
6475 (__mmask8) -1, __R);
6478 extern __inline __m512d
6479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6480 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6481 __m512i __C, const int __imm, const int __R)
6483 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6484 (__v8df) __B,
6485 (__v8di) __C,
6486 __imm,
6487 (__mmask8) __U, __R);
6490 extern __inline __m512d
6491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6492 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6493 __m512i __C, const int __imm, const int __R)
6495 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6496 (__v8df) __B,
6497 (__v8di) __C,
6498 __imm,
6499 (__mmask8) __U, __R);
6502 extern __inline __m512
6503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6504 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6505 const int __imm, const int __R)
6507 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6508 (__v16sf) __B,
6509 (__v16si) __C,
6510 __imm,
6511 (__mmask16) -1, __R);
6514 extern __inline __m512
6515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6516 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6517 __m512i __C, const int __imm, const int __R)
6519 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6520 (__v16sf) __B,
6521 (__v16si) __C,
6522 __imm,
6523 (__mmask16) __U, __R);
6526 extern __inline __m512
6527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6528 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6529 __m512i __C, const int __imm, const int __R)
6531 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6532 (__v16sf) __B,
6533 (__v16si) __C,
6534 __imm,
6535 (__mmask16) __U, __R);
6538 extern __inline __m128d
6539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6541 const int __imm, const int __R)
6543 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6544 (__v2df) __B,
6545 (__v2di) __C, __imm,
6546 (__mmask8) -1, __R);
6549 extern __inline __m128d
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6552 __m128i __C, const int __imm, const int __R)
6554 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6555 (__v2df) __B,
6556 (__v2di) __C, __imm,
6557 (__mmask8) __U, __R);
6560 extern __inline __m128d
6561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6563 __m128i __C, const int __imm, const int __R)
6565 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6566 (__v2df) __B,
6567 (__v2di) __C,
6568 __imm,
6569 (__mmask8) __U, __R);
6572 extern __inline __m128
6573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6574 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6575 const int __imm, const int __R)
6577 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6578 (__v4sf) __B,
6579 (__v4si) __C, __imm,
6580 (__mmask8) -1, __R);
6583 extern __inline __m128
6584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6585 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6586 __m128i __C, const int __imm, const int __R)
6588 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6589 (__v4sf) __B,
6590 (__v4si) __C, __imm,
6591 (__mmask8) __U, __R);
6594 extern __inline __m128
6595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6596 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6597 __m128i __C, const int __imm, const int __R)
6599 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6600 (__v4sf) __B,
6601 (__v4si) __C, __imm,
6602 (__mmask8) __U, __R);
6605 #else
6606 #define _mm512_shuffle_pd(X, Y, C) \
6607 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6608 (__v8df)(__m512d)(Y), (int)(C),\
6609 (__v8df)(__m512d)_mm512_undefined_pd(),\
6610 (__mmask8)-1))
6612 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6613 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6614 (__v8df)(__m512d)(Y), (int)(C),\
6615 (__v8df)(__m512d)(W),\
6616 (__mmask8)(U)))
6618 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6619 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6620 (__v8df)(__m512d)(Y), (int)(C),\
6621 (__v8df)(__m512d)_mm512_setzero_pd(),\
6622 (__mmask8)(U)))
6624 #define _mm512_shuffle_ps(X, Y, C) \
6625 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6626 (__v16sf)(__m512)(Y), (int)(C),\
6627 (__v16sf)(__m512)_mm512_undefined_ps(),\
6628 (__mmask16)-1))
6630 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6631 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6632 (__v16sf)(__m512)(Y), (int)(C),\
6633 (__v16sf)(__m512)(W),\
6634 (__mmask16)(U)))
6636 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6637 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6638 (__v16sf)(__m512)(Y), (int)(C),\
6639 (__v16sf)(__m512)_mm512_setzero_ps(),\
6640 (__mmask16)(U)))
6642 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6643 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6644 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6645 (__mmask8)(-1), (R)))
6647 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6648 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6649 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6650 (__mmask8)(U), (R)))
6652 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6653 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6654 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6655 (__mmask8)(U), (R)))
6657 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6658 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6659 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6660 (__mmask16)(-1), (R)))
6662 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6663 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6664 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6665 (__mmask16)(U), (R)))
6667 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6668 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6669 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6670 (__mmask16)(U), (R)))
6672 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6673 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6674 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6675 (__mmask8)(-1), (R)))
6677 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6678 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6679 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6680 (__mmask8)(U), (R)))
6682 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6683 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6684 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6685 (__mmask8)(U), (R)))
6687 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6688 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6689 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6690 (__mmask8)(-1), (R)))
6692 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6693 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6694 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6695 (__mmask8)(U), (R)))
6697 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6698 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6699 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6700 (__mmask8)(U), (R)))
6701 #endif
6703 extern __inline __m512
6704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6705 _mm512_movehdup_ps (__m512 __A)
6707 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6708 (__v16sf)
6709 _mm512_undefined_ps (),
6710 (__mmask16) -1);
6713 extern __inline __m512
6714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6715 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6717 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6718 (__v16sf) __W,
6719 (__mmask16) __U);
6722 extern __inline __m512
6723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6726 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6727 (__v16sf)
6728 _mm512_setzero_ps (),
6729 (__mmask16) __U);
6732 extern __inline __m512
6733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734 _mm512_moveldup_ps (__m512 __A)
6736 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6737 (__v16sf)
6738 _mm512_undefined_ps (),
6739 (__mmask16) -1);
6742 extern __inline __m512
6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6746 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6747 (__v16sf) __W,
6748 (__mmask16) __U);
6751 extern __inline __m512
6752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6753 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6755 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6756 (__v16sf)
6757 _mm512_setzero_ps (),
6758 (__mmask16) __U);
6761 extern __inline __m512i
6762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6763 _mm512_or_si512 (__m512i __A, __m512i __B)
6765 return (__m512i) ((__v16su) __A | (__v16su) __B);
6768 extern __inline __m512i
6769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6770 _mm512_or_epi32 (__m512i __A, __m512i __B)
6772 return (__m512i) ((__v16su) __A | (__v16su) __B);
6775 extern __inline __m512i
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6779 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6780 (__v16si) __B,
6781 (__v16si) __W,
6782 (__mmask16) __U);
6785 extern __inline __m512i
6786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6787 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6789 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6790 (__v16si) __B,
6791 (__v16si)
6792 _mm512_setzero_si512 (),
6793 (__mmask16) __U);
6796 extern __inline __m512i
6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798 _mm512_or_epi64 (__m512i __A, __m512i __B)
6800 return (__m512i) ((__v8du) __A | (__v8du) __B);
6803 extern __inline __m512i
6804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6805 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6807 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6808 (__v8di) __B,
6809 (__v8di) __W,
6810 (__mmask8) __U);
6813 extern __inline __m512i
6814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6815 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6817 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6818 (__v8di) __B,
6819 (__v8di)
6820 _mm512_setzero_si512 (),
6821 (__mmask8) __U);
6824 extern __inline __m512i
6825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6826 _mm512_xor_si512 (__m512i __A, __m512i __B)
6828 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6831 extern __inline __m512i
6832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6833 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6835 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
6838 extern __inline __m512i
6839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6840 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6842 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6843 (__v16si) __B,
6844 (__v16si) __W,
6845 (__mmask16) __U);
6848 extern __inline __m512i
6849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6850 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6852 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6853 (__v16si) __B,
6854 (__v16si)
6855 _mm512_setzero_si512 (),
6856 (__mmask16) __U);
6859 extern __inline __m512i
6860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6861 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6863 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
6866 extern __inline __m512i
6867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6868 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6870 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6871 (__v8di) __B,
6872 (__v8di) __W,
6873 (__mmask8) __U);
6876 extern __inline __m512i
6877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6880 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6881 (__v8di) __B,
6882 (__v8di)
6883 _mm512_setzero_si512 (),
6884 (__mmask8) __U);
6887 #ifdef __OPTIMIZE__
6888 extern __inline __m512i
6889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890 _mm512_rol_epi32 (__m512i __A, const int __B)
6892 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6893 (__v16si)
6894 _mm512_undefined_si512 (),
6895 (__mmask16) -1);
6898 extern __inline __m512i
6899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6902 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6903 (__v16si) __W,
6904 (__mmask16) __U);
6907 extern __inline __m512i
6908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6909 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6911 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6912 (__v16si)
6913 _mm512_setzero_si512 (),
6914 (__mmask16) __U);
6917 extern __inline __m512i
6918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6919 _mm512_ror_epi32 (__m512i __A, int __B)
6921 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6922 (__v16si)
6923 _mm512_undefined_si512 (),
6924 (__mmask16) -1);
6927 extern __inline __m512i
6928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6929 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6931 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6932 (__v16si) __W,
6933 (__mmask16) __U);
6936 extern __inline __m512i
6937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6938 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6940 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6941 (__v16si)
6942 _mm512_setzero_si512 (),
6943 (__mmask16) __U);
6946 extern __inline __m512i
6947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6948 _mm512_rol_epi64 (__m512i __A, const int __B)
6950 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6951 (__v8di)
6952 _mm512_undefined_si512 (),
6953 (__mmask8) -1);
6956 extern __inline __m512i
6957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6958 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6960 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6961 (__v8di) __W,
6962 (__mmask8) __U);
6965 extern __inline __m512i
6966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6967 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6969 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6970 (__v8di)
6971 _mm512_setzero_si512 (),
6972 (__mmask8) __U);
6975 extern __inline __m512i
6976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 _mm512_ror_epi64 (__m512i __A, int __B)
6979 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6980 (__v8di)
6981 _mm512_undefined_si512 (),
6982 (__mmask8) -1);
6985 extern __inline __m512i
6986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6987 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6989 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6990 (__v8di) __W,
6991 (__mmask8) __U);
6994 extern __inline __m512i
6995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6996 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6998 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6999 (__v8di)
7000 _mm512_setzero_si512 (),
7001 (__mmask8) __U);
7004 #else
7005 #define _mm512_rol_epi32(A, B) \
7006 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7007 (int)(B), \
7008 (__v16si)_mm512_undefined_si512 (), \
7009 (__mmask16)(-1)))
7010 #define _mm512_mask_rol_epi32(W, U, A, B) \
7011 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7012 (int)(B), \
7013 (__v16si)(__m512i)(W), \
7014 (__mmask16)(U)))
7015 #define _mm512_maskz_rol_epi32(U, A, B) \
7016 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
7017 (int)(B), \
7018 (__v16si)_mm512_setzero_si512 (), \
7019 (__mmask16)(U)))
7020 #define _mm512_ror_epi32(A, B) \
7021 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7022 (int)(B), \
7023 (__v16si)_mm512_undefined_si512 (), \
7024 (__mmask16)(-1)))
7025 #define _mm512_mask_ror_epi32(W, U, A, B) \
7026 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7027 (int)(B), \
7028 (__v16si)(__m512i)(W), \
7029 (__mmask16)(U)))
7030 #define _mm512_maskz_ror_epi32(U, A, B) \
7031 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
7032 (int)(B), \
7033 (__v16si)_mm512_setzero_si512 (), \
7034 (__mmask16)(U)))
7035 #define _mm512_rol_epi64(A, B) \
7036 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7037 (int)(B), \
7038 (__v8di)_mm512_undefined_si512 (), \
7039 (__mmask8)(-1)))
7040 #define _mm512_mask_rol_epi64(W, U, A, B) \
7041 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7042 (int)(B), \
7043 (__v8di)(__m512i)(W), \
7044 (__mmask8)(U)))
7045 #define _mm512_maskz_rol_epi64(U, A, B) \
7046 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
7047 (int)(B), \
7048 (__v8di)_mm512_setzero_si512 (), \
7049 (__mmask8)(U)))
7051 #define _mm512_ror_epi64(A, B) \
7052 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7053 (int)(B), \
7054 (__v8di)_mm512_undefined_si512 (), \
7055 (__mmask8)(-1)))
7056 #define _mm512_mask_ror_epi64(W, U, A, B) \
7057 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7058 (int)(B), \
7059 (__v8di)(__m512i)(W), \
7060 (__mmask8)(U)))
7061 #define _mm512_maskz_ror_epi64(U, A, B) \
7062 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
7063 (int)(B), \
7064 (__v8di)_mm512_setzero_si512 (), \
7065 (__mmask8)(U)))
7066 #endif
7068 extern __inline __m512i
7069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7070 _mm512_and_si512 (__m512i __A, __m512i __B)
7072 return (__m512i) ((__v16su) __A & (__v16su) __B);
7075 extern __inline __m512i
7076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7077 _mm512_and_epi32 (__m512i __A, __m512i __B)
7079 return (__m512i) ((__v16su) __A & (__v16su) __B);
7082 extern __inline __m512i
7083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7084 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7086 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7087 (__v16si) __B,
7088 (__v16si) __W,
7089 (__mmask16) __U);
7092 extern __inline __m512i
7093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7094 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7096 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7097 (__v16si) __B,
7098 (__v16si)
7099 _mm512_setzero_si512 (),
7100 (__mmask16) __U);
7103 extern __inline __m512i
7104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7105 _mm512_and_epi64 (__m512i __A, __m512i __B)
7107 return (__m512i) ((__v8du) __A & (__v8du) __B);
7110 extern __inline __m512i
7111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7112 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7114 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7115 (__v8di) __B,
7116 (__v8di) __W, __U);
7119 extern __inline __m512i
7120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7121 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7123 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7124 (__v8di) __B,
7125 (__v8di)
7126 _mm512_setzero_pd (),
7127 __U);
7130 extern __inline __m512i
7131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7132 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7134 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7135 (__v16si) __B,
7136 (__v16si)
7137 _mm512_undefined_si512 (),
7138 (__mmask16) -1);
7141 extern __inline __m512i
7142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7143 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7145 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7146 (__v16si) __B,
7147 (__v16si)
7148 _mm512_undefined_si512 (),
7149 (__mmask16) -1);
7152 extern __inline __m512i
7153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7154 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7156 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7157 (__v16si) __B,
7158 (__v16si) __W,
7159 (__mmask16) __U);
7162 extern __inline __m512i
7163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7166 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7167 (__v16si) __B,
7168 (__v16si)
7169 _mm512_setzero_si512 (),
7170 (__mmask16) __U);
7173 extern __inline __m512i
7174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7177 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7178 (__v8di) __B,
7179 (__v8di)
7180 _mm512_undefined_si512 (),
7181 (__mmask8) -1);
7184 extern __inline __m512i
7185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7188 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7189 (__v8di) __B,
7190 (__v8di) __W, __U);
7193 extern __inline __m512i
7194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7195 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7197 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7198 (__v8di) __B,
7199 (__v8di)
7200 _mm512_setzero_pd (),
7201 __U);
7204 extern __inline __mmask16
7205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7206 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7208 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7209 (__v16si) __B,
7210 (__mmask16) -1);
7213 extern __inline __mmask16
7214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7215 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7217 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7218 (__v16si) __B, __U);
7221 extern __inline __mmask8
7222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7223 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7225 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7226 (__v8di) __B,
7227 (__mmask8) -1);
7230 extern __inline __mmask8
7231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7232 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7234 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7237 extern __inline __mmask16
7238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7239 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7241 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7242 (__v16si) __B,
7243 (__mmask16) -1);
7246 extern __inline __mmask16
7247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7248 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7250 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7251 (__v16si) __B, __U);
7254 extern __inline __mmask8
7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7258 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7259 (__v8di) __B,
7260 (__mmask8) -1);
7263 extern __inline __mmask8
7264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7265 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7267 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7268 (__v8di) __B, __U);
7271 extern __inline __m512i
7272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7275 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7276 (__v16si) __B,
7277 (__v16si)
7278 _mm512_undefined_si512 (),
7279 (__mmask16) -1);
7282 extern __inline __m512i
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7285 __m512i __B)
7287 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7288 (__v16si) __B,
7289 (__v16si) __W,
7290 (__mmask16) __U);
7293 extern __inline __m512i
7294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7295 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7297 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7298 (__v16si) __B,
7299 (__v16si)
7300 _mm512_setzero_si512 (),
7301 (__mmask16) __U);
7304 extern __inline __m512i
7305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7306 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7308 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7309 (__v8di) __B,
7310 (__v8di)
7311 _mm512_undefined_si512 (),
7312 (__mmask8) -1);
7315 extern __inline __m512i
7316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7317 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7319 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7320 (__v8di) __B,
7321 (__v8di) __W,
7322 (__mmask8) __U);
7325 extern __inline __m512i
7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7329 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7330 (__v8di) __B,
7331 (__v8di)
7332 _mm512_setzero_si512 (),
7333 (__mmask8) __U);
7336 extern __inline __m512i
7337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7338 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7340 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7341 (__v16si) __B,
7342 (__v16si)
7343 _mm512_undefined_si512 (),
7344 (__mmask16) -1);
7347 extern __inline __m512i
7348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7349 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7350 __m512i __B)
7352 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7353 (__v16si) __B,
7354 (__v16si) __W,
7355 (__mmask16) __U);
7358 extern __inline __m512i
7359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7360 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7362 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7363 (__v16si) __B,
7364 (__v16si)
7365 _mm512_setzero_si512 (),
7366 (__mmask16) __U);
7369 extern __inline __m512i
7370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7371 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7373 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7374 (__v8di) __B,
7375 (__v8di)
7376 _mm512_undefined_si512 (),
7377 (__mmask8) -1);
7380 extern __inline __m512i
7381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7382 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7384 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7385 (__v8di) __B,
7386 (__v8di) __W,
7387 (__mmask8) __U);
7390 extern __inline __m512i
7391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7392 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7394 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7395 (__v8di) __B,
7396 (__v8di)
7397 _mm512_setzero_si512 (),
7398 (__mmask8) __U);
7401 #ifdef __x86_64__
7402 #ifdef __OPTIMIZE__
7403 extern __inline unsigned long long
7404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7405 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7407 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7410 extern __inline long long
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7414 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7417 extern __inline long long
7418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7419 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7421 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7424 extern __inline unsigned long long
7425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7426 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7428 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7431 extern __inline long long
7432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7433 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7435 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7438 extern __inline long long
7439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7440 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7442 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7444 #else
7445 #define _mm_cvt_roundss_u64(A, B) \
7446 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7448 #define _mm_cvt_roundss_si64(A, B) \
7449 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7451 #define _mm_cvt_roundss_i64(A, B) \
7452 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7454 #define _mm_cvtt_roundss_u64(A, B) \
7455 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7457 #define _mm_cvtt_roundss_i64(A, B) \
7458 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7460 #define _mm_cvtt_roundss_si64(A, B) \
7461 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7462 #endif
7463 #endif
7465 #ifdef __OPTIMIZE__
7466 extern __inline unsigned
7467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7468 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7470 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7473 extern __inline int
7474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7475 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7477 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7480 extern __inline int
7481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7482 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7484 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7487 extern __inline unsigned
7488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7491 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7494 extern __inline int
7495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7496 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7498 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7501 extern __inline int
7502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7503 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7505 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7507 #else
7508 #define _mm_cvt_roundss_u32(A, B) \
7509 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7511 #define _mm_cvt_roundss_si32(A, B) \
7512 ((int)__builtin_ia32_vcvtss2si32(A, B))
7514 #define _mm_cvt_roundss_i32(A, B) \
7515 ((int)__builtin_ia32_vcvtss2si32(A, B))
7517 #define _mm_cvtt_roundss_u32(A, B) \
7518 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7520 #define _mm_cvtt_roundss_si32(A, B) \
7521 ((int)__builtin_ia32_vcvttss2si32(A, B))
7523 #define _mm_cvtt_roundss_i32(A, B) \
7524 ((int)__builtin_ia32_vcvttss2si32(A, B))
7525 #endif
7527 #ifdef __x86_64__
7528 #ifdef __OPTIMIZE__
7529 extern __inline unsigned long long
7530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7531 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7533 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7536 extern __inline long long
7537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7538 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7540 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7543 extern __inline long long
7544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7545 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7547 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7550 extern __inline unsigned long long
7551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7552 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7554 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7557 extern __inline long long
7558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7559 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7561 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7564 extern __inline long long
7565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7566 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7568 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7570 #else
7571 #define _mm_cvt_roundsd_u64(A, B) \
7572 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7574 #define _mm_cvt_roundsd_si64(A, B) \
7575 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7577 #define _mm_cvt_roundsd_i64(A, B) \
7578 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7580 #define _mm_cvtt_roundsd_u64(A, B) \
7581 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7583 #define _mm_cvtt_roundsd_si64(A, B) \
7584 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7586 #define _mm_cvtt_roundsd_i64(A, B) \
7587 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7588 #endif
7589 #endif
7591 #ifdef __OPTIMIZE__
7592 extern __inline unsigned
7593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7594 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7596 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7599 extern __inline int
7600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7601 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7603 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7606 extern __inline int
7607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7608 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7610 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7613 extern __inline unsigned
7614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7615 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7617 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7620 extern __inline int
7621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7622 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7624 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7627 extern __inline int
7628 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7631 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7633 #else
7634 #define _mm_cvt_roundsd_u32(A, B) \
7635 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7637 #define _mm_cvt_roundsd_si32(A, B) \
7638 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7640 #define _mm_cvt_roundsd_i32(A, B) \
7641 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7643 #define _mm_cvtt_roundsd_u32(A, B) \
7644 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7646 #define _mm_cvtt_roundsd_si32(A, B) \
7647 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7649 #define _mm_cvtt_roundsd_i32(A, B) \
7650 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7651 #endif
7653 extern __inline __m512d
7654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7655 _mm512_movedup_pd (__m512d __A)
7657 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7658 (__v8df)
7659 _mm512_undefined_pd (),
7660 (__mmask8) -1);
7663 extern __inline __m512d
7664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7665 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7667 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7668 (__v8df) __W,
7669 (__mmask8) __U);
7672 extern __inline __m512d
7673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7676 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7677 (__v8df)
7678 _mm512_setzero_pd (),
7679 (__mmask8) __U);
7682 extern __inline __m512d
7683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7686 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7687 (__v8df) __B,
7688 (__v8df)
7689 _mm512_undefined_pd (),
7690 (__mmask8) -1);
7693 extern __inline __m512d
7694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7695 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7697 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7698 (__v8df) __B,
7699 (__v8df) __W,
7700 (__mmask8) __U);
7703 extern __inline __m512d
7704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7705 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7707 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7708 (__v8df) __B,
7709 (__v8df)
7710 _mm512_setzero_pd (),
7711 (__mmask8) __U);
7714 extern __inline __m512d
7715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7716 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7718 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7719 (__v8df) __B,
7720 (__v8df)
7721 _mm512_undefined_pd (),
7722 (__mmask8) -1);
7725 extern __inline __m512d
7726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7727 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7729 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7730 (__v8df) __B,
7731 (__v8df) __W,
7732 (__mmask8) __U);
7735 extern __inline __m512d
7736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7737 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7739 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7740 (__v8df) __B,
7741 (__v8df)
7742 _mm512_setzero_pd (),
7743 (__mmask8) __U);
7746 extern __inline __m512
7747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7748 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7750 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7751 (__v16sf) __B,
7752 (__v16sf)
7753 _mm512_undefined_ps (),
7754 (__mmask16) -1);
7757 extern __inline __m512
7758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7759 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7761 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7762 (__v16sf) __B,
7763 (__v16sf) __W,
7764 (__mmask16) __U);
7767 extern __inline __m512
7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7771 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7772 (__v16sf) __B,
7773 (__v16sf)
7774 _mm512_setzero_ps (),
7775 (__mmask16) __U);
7778 #ifdef __OPTIMIZE__
7779 extern __inline __m512d
7780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7781 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7783 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7784 (__v8df)
7785 _mm512_undefined_pd (),
7786 (__mmask8) -1, __R);
7789 extern __inline __m512d
7790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7791 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7792 const int __R)
7794 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7795 (__v8df) __W,
7796 (__mmask8) __U, __R);
7799 extern __inline __m512d
7800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7801 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7803 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7804 (__v8df)
7805 _mm512_setzero_pd (),
7806 (__mmask8) __U, __R);
7809 extern __inline __m512
7810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7811 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7813 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7814 (__v16sf)
7815 _mm512_undefined_ps (),
7816 (__mmask16) -1, __R);
7819 extern __inline __m512
7820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7821 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7822 const int __R)
7824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7825 (__v16sf) __W,
7826 (__mmask16) __U, __R);
7829 extern __inline __m512
7830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7831 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7833 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7834 (__v16sf)
7835 _mm512_setzero_ps (),
7836 (__mmask16) __U, __R);
7839 extern __inline __m256i
7840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7841 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7843 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7844 __I,
7845 (__v16hi)
7846 _mm256_undefined_si256 (),
7847 -1);
7850 extern __inline __m256i
7851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7852 _mm512_cvtps_ph (__m512 __A, const int __I)
7854 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7855 __I,
7856 (__v16hi)
7857 _mm256_undefined_si256 (),
7858 -1);
7861 extern __inline __m256i
7862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7863 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7864 const int __I)
7866 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7867 __I,
7868 (__v16hi) __U,
7869 (__mmask16) __W);
7872 extern __inline __m256i
7873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7874 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7876 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7877 __I,
7878 (__v16hi) __U,
7879 (__mmask16) __W);
7882 extern __inline __m256i
7883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7886 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7887 __I,
7888 (__v16hi)
7889 _mm256_setzero_si256 (),
7890 (__mmask16) __W);
7893 extern __inline __m256i
7894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7895 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7897 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7898 __I,
7899 (__v16hi)
7900 _mm256_setzero_si256 (),
7901 (__mmask16) __W);
7903 #else
7904 #define _mm512_cvt_roundps_pd(A, B) \
7905 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
7907 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7908 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7910 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7911 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7913 #define _mm512_cvt_roundph_ps(A, B) \
7914 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
7916 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7917 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7919 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7920 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7922 #define _mm512_cvt_roundps_ph(A, I) \
7923 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7924 (__v16hi)_mm256_undefined_si256 (), -1))
7925 #define _mm512_cvtps_ph(A, I) \
7926 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7927 (__v16hi)_mm256_undefined_si256 (), -1))
7928 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7929 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7930 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7931 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7932 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7933 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7934 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7935 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7936 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7937 #define _mm512_maskz_cvtps_ph(W, A, I) \
7938 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7939 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7940 #endif
7942 #ifdef __OPTIMIZE__
7943 extern __inline __m256
7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7947 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7948 (__v8sf)
7949 _mm256_undefined_ps (),
7950 (__mmask8) -1, __R);
7953 extern __inline __m256
7954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7955 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7956 const int __R)
7958 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7959 (__v8sf) __W,
7960 (__mmask8) __U, __R);
7963 extern __inline __m256
7964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7965 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7967 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7968 (__v8sf)
7969 _mm256_setzero_ps (),
7970 (__mmask8) __U, __R);
7973 extern __inline __m128
7974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7975 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7977 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7978 (__v2df) __B,
7979 __R);
7982 extern __inline __m128d
7983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7984 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7986 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7987 (__v4sf) __B,
7988 __R);
7990 #else
7991 #define _mm512_cvt_roundpd_ps(A, B) \
7992 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
7994 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7995 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7997 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7998 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
8000 #define _mm_cvt_roundsd_ss(A, B, C) \
8001 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
8003 #define _mm_cvt_roundss_sd(A, B, C) \
8004 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
8005 #endif
8007 extern __inline void
8008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8009 _mm512_stream_si512 (__m512i * __P, __m512i __A)
8011 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
8014 extern __inline void
8015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8016 _mm512_stream_ps (float *__P, __m512 __A)
8018 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
8021 extern __inline void
8022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8023 _mm512_stream_pd (double *__P, __m512d __A)
8025 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
8028 extern __inline __m512i
8029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8030 _mm512_stream_load_si512 (void *__P)
8032 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8035 /* Constants for mantissa extraction */
8036 typedef enum
8038 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8039 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8040 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8041 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8042 } _MM_MANTISSA_NORM_ENUM;
8044 typedef enum
8046 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8047 _MM_MANT_SIGN_zero, /* sign = 0 */
8048 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8049 } _MM_MANTISSA_SIGN_ENUM;
8051 #ifdef __OPTIMIZE__
8052 extern __inline __m128
8053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8054 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
8056 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
8057 (__v4sf) __B,
8058 __R);
8061 extern __inline __m128d
8062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
8065 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
8066 (__v2df) __B,
8067 __R);
8070 extern __inline __m512
8071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8072 _mm512_getexp_round_ps (__m512 __A, const int __R)
8074 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8075 (__v16sf)
8076 _mm512_undefined_ps (),
8077 (__mmask16) -1, __R);
8080 extern __inline __m512
8081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8082 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8083 const int __R)
8085 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8086 (__v16sf) __W,
8087 (__mmask16) __U, __R);
8090 extern __inline __m512
8091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8092 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
8094 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8095 (__v16sf)
8096 _mm512_setzero_ps (),
8097 (__mmask16) __U, __R);
8100 extern __inline __m512d
8101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8102 _mm512_getexp_round_pd (__m512d __A, const int __R)
8104 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8105 (__v8df)
8106 _mm512_undefined_pd (),
8107 (__mmask8) -1, __R);
8110 extern __inline __m512d
8111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8112 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8113 const int __R)
8115 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8116 (__v8df) __W,
8117 (__mmask8) __U, __R);
8120 extern __inline __m512d
8121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8122 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8124 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8125 (__v8df)
8126 _mm512_setzero_pd (),
8127 (__mmask8) __U, __R);
8130 extern __inline __m512d
8131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8132 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8133 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8135 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8136 (__C << 2) | __B,
8137 _mm512_undefined_pd (),
8138 (__mmask8) -1, __R);
8141 extern __inline __m512d
8142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8143 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8144 _MM_MANTISSA_NORM_ENUM __B,
8145 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8147 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8148 (__C << 2) | __B,
8149 (__v8df) __W, __U,
8150 __R);
8153 extern __inline __m512d
8154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8155 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8156 _MM_MANTISSA_NORM_ENUM __B,
8157 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8159 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8160 (__C << 2) | __B,
8161 (__v8df)
8162 _mm512_setzero_pd (),
8163 __U, __R);
8166 extern __inline __m512
8167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8168 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8169 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8171 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8172 (__C << 2) | __B,
8173 _mm512_undefined_ps (),
8174 (__mmask16) -1, __R);
8177 extern __inline __m512
8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8180 _MM_MANTISSA_NORM_ENUM __B,
8181 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8183 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8184 (__C << 2) | __B,
8185 (__v16sf) __W, __U,
8186 __R);
8189 extern __inline __m512
8190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8192 _MM_MANTISSA_NORM_ENUM __B,
8193 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8195 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8196 (__C << 2) | __B,
8197 (__v16sf)
8198 _mm512_setzero_ps (),
8199 __U, __R);
8202 extern __inline __m128d
8203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8204 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8205 _MM_MANTISSA_NORM_ENUM __C,
8206 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8208 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8209 (__v2df) __B,
8210 (__D << 2) | __C,
8211 __R);
8214 extern __inline __m128
8215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8216 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8217 _MM_MANTISSA_NORM_ENUM __C,
8218 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8220 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8221 (__v4sf) __B,
8222 (__D << 2) | __C,
8223 __R);
8226 #else
8227 #define _mm512_getmant_round_pd(X, B, C, R) \
8228 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8229 (int)(((C)<<2) | (B)), \
8230 (__v8df)(__m512d)_mm512_undefined_pd(), \
8231 (__mmask8)-1,\
8232 (R)))
8234 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8235 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8236 (int)(((C)<<2) | (B)), \
8237 (__v8df)(__m512d)(W), \
8238 (__mmask8)(U),\
8239 (R)))
8241 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8242 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8243 (int)(((C)<<2) | (B)), \
8244 (__v8df)(__m512d)_mm512_setzero_pd(), \
8245 (__mmask8)(U),\
8246 (R)))
8247 #define _mm512_getmant_round_ps(X, B, C, R) \
8248 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8249 (int)(((C)<<2) | (B)), \
8250 (__v16sf)(__m512)_mm512_undefined_ps(), \
8251 (__mmask16)-1,\
8252 (R)))
8254 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8255 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8256 (int)(((C)<<2) | (B)), \
8257 (__v16sf)(__m512)(W), \
8258 (__mmask16)(U),\
8259 (R)))
8261 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8262 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8263 (int)(((C)<<2) | (B)), \
8264 (__v16sf)(__m512)_mm512_setzero_ps(), \
8265 (__mmask16)(U),\
8266 (R)))
8267 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8268 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8269 (__v2df)(__m128d)(Y), \
8270 (int)(((D)<<2) | (C)), \
8271 (R)))
8273 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8274 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8275 (__v4sf)(__m128)(Y), \
8276 (int)(((D)<<2) | (C)), \
8277 (R)))
8279 #define _mm_getexp_round_ss(A, B, R) \
8280 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8282 #define _mm_getexp_round_sd(A, B, R) \
8283 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8285 #define _mm512_getexp_round_ps(A, R) \
8286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8287 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
8289 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8290 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8291 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8293 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8294 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8295 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8297 #define _mm512_getexp_round_pd(A, R) \
8298 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8299 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
8301 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8302 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8303 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8305 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8306 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8307 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8308 #endif
8310 #ifdef __OPTIMIZE__
8311 extern __inline __m512
8312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8313 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8315 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8316 (__v16sf)
8317 _mm512_undefined_ps (),
8318 -1, __R);
8321 extern __inline __m512
8322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8323 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8324 const int __imm, const int __R)
8326 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8327 (__v16sf) __A,
8328 (__mmask16) __B, __R);
8331 extern __inline __m512
8332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8333 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8334 const int __imm, const int __R)
8336 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8337 __imm,
8338 (__v16sf)
8339 _mm512_setzero_ps (),
8340 (__mmask16) __A, __R);
8343 extern __inline __m512d
8344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8347 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8348 (__v8df)
8349 _mm512_undefined_pd (),
8350 -1, __R);
8353 extern __inline __m512d
8354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8356 __m512d __C, const int __imm, const int __R)
8358 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8359 (__v8df) __A,
8360 (__mmask8) __B, __R);
8363 extern __inline __m512d
8364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8366 const int __imm, const int __R)
8368 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8369 __imm,
8370 (__v8df)
8371 _mm512_setzero_pd (),
8372 (__mmask8) __A, __R);
8375 extern __inline __m128
8376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8377 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8379 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8380 (__v4sf) __B, __imm, __R);
8383 extern __inline __m128d
8384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8385 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8386 const int __R)
8388 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8389 (__v2df) __B, __imm, __R);
8392 #else
8393 #define _mm512_roundscale_round_ps(A, B, R) \
8394 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8395 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
8396 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8397 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8398 (int)(D), \
8399 (__v16sf)(__m512)(A), \
8400 (__mmask16)(B), R))
8401 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8402 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8403 (int)(C), \
8404 (__v16sf)_mm512_setzero_ps(),\
8405 (__mmask16)(A), R))
8406 #define _mm512_roundscale_round_pd(A, B, R) \
8407 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8408 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
8409 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8410 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8411 (int)(D), \
8412 (__v8df)(__m512d)(A), \
8413 (__mmask8)(B), R))
8414 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8415 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8416 (int)(C), \
8417 (__v8df)_mm512_setzero_pd(),\
8418 (__mmask8)(A), R))
8419 #define _mm_roundscale_round_ss(A, B, C, R) \
8420 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8421 (__v4sf)(__m128)(B), (int)(C), R))
8422 #define _mm_roundscale_round_sd(A, B, C, R) \
8423 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8424 (__v2df)(__m128d)(B), (int)(C), R))
8425 #endif
8427 extern __inline __m512
8428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8429 _mm512_floor_ps (__m512 __A)
8431 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8432 _MM_FROUND_FLOOR,
8433 (__v16sf) __A, -1,
8434 _MM_FROUND_CUR_DIRECTION);
8437 extern __inline __m512d
8438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8439 _mm512_floor_pd (__m512d __A)
8441 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8442 _MM_FROUND_FLOOR,
8443 (__v8df) __A, -1,
8444 _MM_FROUND_CUR_DIRECTION);
8447 extern __inline __m512
8448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449 _mm512_ceil_ps (__m512 __A)
8451 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8452 _MM_FROUND_CEIL,
8453 (__v16sf) __A, -1,
8454 _MM_FROUND_CUR_DIRECTION);
8457 extern __inline __m512d
8458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8459 _mm512_ceil_pd (__m512d __A)
8461 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8462 _MM_FROUND_CEIL,
8463 (__v8df) __A, -1,
8464 _MM_FROUND_CUR_DIRECTION);
8467 extern __inline __m512
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8471 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8472 _MM_FROUND_FLOOR,
8473 (__v16sf) __W, __U,
8474 _MM_FROUND_CUR_DIRECTION);
8477 extern __inline __m512d
8478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8479 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8481 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8482 _MM_FROUND_FLOOR,
8483 (__v8df) __W, __U,
8484 _MM_FROUND_CUR_DIRECTION);
8487 extern __inline __m512
8488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8489 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8491 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8492 _MM_FROUND_CEIL,
8493 (__v16sf) __W, __U,
8494 _MM_FROUND_CUR_DIRECTION);
8497 extern __inline __m512d
8498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8499 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8501 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8502 _MM_FROUND_CEIL,
8503 (__v8df) __W, __U,
8504 _MM_FROUND_CUR_DIRECTION);
8507 #ifdef __OPTIMIZE__
8508 extern __inline __m512i
8509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8510 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8512 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8513 (__v16si) __B, __imm,
8514 (__v16si)
8515 _mm512_undefined_si512 (),
8516 (__mmask16) -1);
8519 extern __inline __m512i
8520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8521 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8522 __m512i __B, const int __imm)
8524 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8525 (__v16si) __B, __imm,
8526 (__v16si) __W,
8527 (__mmask16) __U);
8530 extern __inline __m512i
8531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8532 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8533 const int __imm)
8535 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8536 (__v16si) __B, __imm,
8537 (__v16si)
8538 _mm512_setzero_si512 (),
8539 (__mmask16) __U);
8542 extern __inline __m512i
8543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8546 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8547 (__v8di) __B, __imm,
8548 (__v8di)
8549 _mm512_undefined_si512 (),
8550 (__mmask8) -1);
8553 extern __inline __m512i
8554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8556 __m512i __B, const int __imm)
8558 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8559 (__v8di) __B, __imm,
8560 (__v8di) __W,
8561 (__mmask8) __U);
8564 extern __inline __m512i
8565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8567 const int __imm)
8569 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8570 (__v8di) __B, __imm,
8571 (__v8di)
8572 _mm512_setzero_si512 (),
8573 (__mmask8) __U);
8575 #else
8576 #define _mm512_alignr_epi32(X, Y, C) \
8577 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8578 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
8579 (__mmask16)-1))
8581 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8582 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8583 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8584 (__mmask16)(U)))
8586 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8587 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8588 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
8589 (__mmask16)(U)))
8591 #define _mm512_alignr_epi64(X, Y, C) \
8592 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8593 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
8594 (__mmask8)-1))
8596 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8597 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8598 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8600 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8601 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8602 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
8603 (__mmask8)(U)))
8604 #endif
8606 extern __inline __mmask16
8607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8608 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8610 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8611 (__v16si) __B,
8612 (__mmask16) -1);
8615 extern __inline __mmask16
8616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8617 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8619 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8620 (__v16si) __B, __U);
8623 extern __inline __mmask8
8624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8625 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8627 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8628 (__v8di) __B, __U);
8631 extern __inline __mmask8
8632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8633 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8635 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8636 (__v8di) __B,
8637 (__mmask8) -1);
8640 extern __inline __mmask16
8641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8644 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8645 (__v16si) __B,
8646 (__mmask16) -1);
8649 extern __inline __mmask16
8650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8653 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8654 (__v16si) __B, __U);
8657 extern __inline __mmask8
8658 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8659 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8661 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8662 (__v8di) __B, __U);
8665 extern __inline __mmask8
8666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8667 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8669 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8670 (__v8di) __B,
8671 (__mmask8) -1);
8674 extern __inline __mmask16
8675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8676 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8678 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8679 (__v16si) __Y, 5,
8680 (__mmask16) -1);
8683 extern __inline __mmask16
8684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8685 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8687 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8688 (__v16si) __Y, 5,
8689 (__mmask16) __M);
8692 extern __inline __mmask16
8693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8694 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8696 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8697 (__v16si) __Y, 5,
8698 (__mmask16) __M);
8701 extern __inline __mmask16
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8705 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8706 (__v16si) __Y, 5,
8707 (__mmask16) -1);
8710 extern __inline __mmask8
8711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8714 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8715 (__v8di) __Y, 5,
8716 (__mmask8) __M);
8719 extern __inline __mmask8
8720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8723 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8724 (__v8di) __Y, 5,
8725 (__mmask8) -1);
8728 extern __inline __mmask8
8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8732 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8733 (__v8di) __Y, 5,
8734 (__mmask8) __M);
8737 extern __inline __mmask8
8738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8739 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8741 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8742 (__v8di) __Y, 5,
8743 (__mmask8) -1);
8746 extern __inline __mmask16
8747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8748 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8750 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8751 (__v16si) __Y, 2,
8752 (__mmask16) __M);
8755 extern __inline __mmask16
8756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8757 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8759 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8760 (__v16si) __Y, 2,
8761 (__mmask16) -1);
8764 extern __inline __mmask16
8765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8768 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8769 (__v16si) __Y, 2,
8770 (__mmask16) __M);
8773 extern __inline __mmask16
8774 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8775 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8777 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8778 (__v16si) __Y, 2,
8779 (__mmask16) -1);
8782 extern __inline __mmask8
8783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8784 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8786 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8787 (__v8di) __Y, 2,
8788 (__mmask8) __M);
8791 extern __inline __mmask8
8792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8795 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8796 (__v8di) __Y, 2,
8797 (__mmask8) -1);
8800 extern __inline __mmask8
8801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8802 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8804 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8805 (__v8di) __Y, 2,
8806 (__mmask8) __M);
8809 extern __inline __mmask8
8810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8813 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8814 (__v8di) __Y, 2,
8815 (__mmask8) -1);
8818 extern __inline __mmask16
8819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8820 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8822 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8823 (__v16si) __Y, 1,
8824 (__mmask16) __M);
8827 extern __inline __mmask16
8828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8829 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8831 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8832 (__v16si) __Y, 1,
8833 (__mmask16) -1);
8836 extern __inline __mmask16
8837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8838 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8840 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8841 (__v16si) __Y, 1,
8842 (__mmask16) __M);
8845 extern __inline __mmask16
8846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8849 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8850 (__v16si) __Y, 1,
8851 (__mmask16) -1);
8854 extern __inline __mmask8
8855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8858 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8859 (__v8di) __Y, 1,
8860 (__mmask8) __M);
8863 extern __inline __mmask8
8864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8867 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8868 (__v8di) __Y, 1,
8869 (__mmask8) -1);
8872 extern __inline __mmask8
8873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8874 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8876 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8877 (__v8di) __Y, 1,
8878 (__mmask8) __M);
8881 extern __inline __mmask8
8882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8883 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8885 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8886 (__v8di) __Y, 1,
8887 (__mmask8) -1);
8890 extern __inline __mmask16
8891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8892 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8894 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8895 (__v16si) __Y, 4,
8896 (__mmask16) -1);
8899 extern __inline __mmask16
8900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8901 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8903 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8904 (__v16si) __Y, 4,
8905 (__mmask16) __M);
8908 extern __inline __mmask16
8909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8912 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8913 (__v16si) __Y, 4,
8914 (__mmask16) __M);
8917 extern __inline __mmask16
8918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8919 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8921 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8922 (__v16si) __Y, 4,
8923 (__mmask16) -1);
8926 extern __inline __mmask8
8927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8928 _mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
8930 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8931 (__v8di) __Y, 4,
8932 (__mmask8) __M);
8935 extern __inline __mmask8
8936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8937 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8939 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8940 (__v8di) __Y, 4,
8941 (__mmask8) -1);
8944 extern __inline __mmask8
8945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
8948 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8949 (__v8di) __Y, 4,
8950 (__mmask8) __M);
8953 extern __inline __mmask8
8954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8955 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8957 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8958 (__v8di) __Y, 4,
8959 (__mmask8) -1);
8962 #define _MM_CMPINT_EQ 0x0
8963 #define _MM_CMPINT_LT 0x1
8964 #define _MM_CMPINT_LE 0x2
8965 #define _MM_CMPINT_UNUSED 0x3
8966 #define _MM_CMPINT_NE 0x4
8967 #define _MM_CMPINT_NLT 0x5
8968 #define _MM_CMPINT_GE 0x5
8969 #define _MM_CMPINT_NLE 0x6
8970 #define _MM_CMPINT_GT 0x6
8972 #ifdef __OPTIMIZE__
8973 extern __inline __mmask8
8974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8975 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8977 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8978 (__v8di) __Y, __P,
8979 (__mmask8) -1);
8982 extern __inline __mmask16
8983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8984 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8986 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8987 (__v16si) __Y, __P,
8988 (__mmask16) -1);
8991 extern __inline __mmask8
8992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8993 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8995 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8996 (__v8di) __Y, __P,
8997 (__mmask8) -1);
9000 extern __inline __mmask16
9001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9002 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
9004 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9005 (__v16si) __Y, __P,
9006 (__mmask16) -1);
9009 extern __inline __mmask8
9010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
9012 const int __R)
9014 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9015 (__v8df) __Y, __P,
9016 (__mmask8) -1, __R);
9019 extern __inline __mmask16
9020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9021 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
9023 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9024 (__v16sf) __Y, __P,
9025 (__mmask16) -1, __R);
9028 extern __inline __mmask8
9029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9030 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9031 const int __P)
9033 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
9034 (__v8di) __Y, __P,
9035 (__mmask8) __U);
9038 extern __inline __mmask16
9039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9040 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9041 const int __P)
9043 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
9044 (__v16si) __Y, __P,
9045 (__mmask16) __U);
9048 extern __inline __mmask8
9049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9050 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
9051 const int __P)
9053 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
9054 (__v8di) __Y, __P,
9055 (__mmask8) __U);
9058 extern __inline __mmask16
9059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9060 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
9061 const int __P)
9063 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
9064 (__v16si) __Y, __P,
9065 (__mmask16) __U);
9068 extern __inline __mmask8
9069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9070 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
9071 const int __P, const int __R)
9073 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
9074 (__v8df) __Y, __P,
9075 (__mmask8) __U, __R);
9078 extern __inline __mmask16
9079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9080 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
9081 const int __P, const int __R)
9083 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
9084 (__v16sf) __Y, __P,
9085 (__mmask16) __U, __R);
9088 extern __inline __mmask8
9089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9090 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
9092 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9093 (__v2df) __Y, __P,
9094 (__mmask8) -1, __R);
9097 extern __inline __mmask8
9098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9099 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
9100 const int __P, const int __R)
9102 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
9103 (__v2df) __Y, __P,
9104 (__mmask8) __M, __R);
9107 extern __inline __mmask8
9108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9109 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
9111 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9112 (__v4sf) __Y, __P,
9113 (__mmask8) -1, __R);
9116 extern __inline __mmask8
9117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
9119 const int __P, const int __R)
9121 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
9122 (__v4sf) __Y, __P,
9123 (__mmask8) __M, __R);
9126 #else
9127 #define _mm512_cmp_epi64_mask(X, Y, P) \
9128 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9129 (__v8di)(__m512i)(Y), (int)(P),\
9130 (__mmask8)-1))
9132 #define _mm512_cmp_epi32_mask(X, Y, P) \
9133 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9134 (__v16si)(__m512i)(Y), (int)(P),\
9135 (__mmask16)-1))
9137 #define _mm512_cmp_epu64_mask(X, Y, P) \
9138 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9139 (__v8di)(__m512i)(Y), (int)(P),\
9140 (__mmask8)-1))
9142 #define _mm512_cmp_epu32_mask(X, Y, P) \
9143 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9144 (__v16si)(__m512i)(Y), (int)(P),\
9145 (__mmask16)-1))
9147 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
9148 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9149 (__v8df)(__m512d)(Y), (int)(P),\
9150 (__mmask8)-1, R))
9152 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
9153 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9154 (__v16sf)(__m512)(Y), (int)(P),\
9155 (__mmask16)-1, R))
9157 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
9158 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
9159 (__v8di)(__m512i)(Y), (int)(P),\
9160 (__mmask8)M))
9162 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
9163 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
9164 (__v16si)(__m512i)(Y), (int)(P),\
9165 (__mmask16)M))
9167 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
9168 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
9169 (__v8di)(__m512i)(Y), (int)(P),\
9170 (__mmask8)M))
9172 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
9173 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
9174 (__v16si)(__m512i)(Y), (int)(P),\
9175 (__mmask16)M))
9177 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
9178 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
9179 (__v8df)(__m512d)(Y), (int)(P),\
9180 (__mmask8)M, R))
9182 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
9183 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
9184 (__v16sf)(__m512)(Y), (int)(P),\
9185 (__mmask16)M, R))
9187 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
9188 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9189 (__v2df)(__m128d)(Y), (int)(P),\
9190 (__mmask8)-1, R))
9192 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
9193 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
9194 (__v2df)(__m128d)(Y), (int)(P),\
9195 (M), R))
9197 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
9198 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9199 (__v4sf)(__m128)(Y), (int)(P), \
9200 (__mmask8)-1, R))
9202 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
9203 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
9204 (__v4sf)(__m128)(Y), (int)(P), \
9205 (M), R))
9206 #endif
9208 #ifdef __OPTIMIZE__
9209 extern __inline __m512
9210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9211 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
9213 __m512 v1_old = _mm512_undefined_ps ();
9214 __mmask16 mask = 0xFFFF;
9216 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9217 __addr,
9218 (__v16si) __index,
9219 mask, __scale);
9222 extern __inline __m512
9223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9224 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
9225 __m512i __index, float const *__addr, int __scale)
9227 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
9228 __addr,
9229 (__v16si) __index,
9230 __mask, __scale);
9233 extern __inline __m512d
9234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9235 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9237 __m512d v1_old = _mm512_undefined_pd ();
9238 __mmask8 mask = 0xFF;
9240 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9241 __addr,
9242 (__v8si) __index, mask,
9243 __scale);
9246 extern __inline __m512d
9247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9248 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9249 __m256i __index, double const *__addr, int __scale)
9251 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9252 __addr,
9253 (__v8si) __index,
9254 __mask, __scale);
9257 extern __inline __m256
9258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9259 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9261 __m256 v1_old = _mm256_undefined_ps ();
9262 __mmask8 mask = 0xFF;
9264 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9265 __addr,
9266 (__v8di) __index, mask,
9267 __scale);
9270 extern __inline __m256
9271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9272 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9273 __m512i __index, float const *__addr, int __scale)
9275 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9276 __addr,
9277 (__v8di) __index,
9278 __mask, __scale);
9281 extern __inline __m512d
9282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9285 __m512d v1_old = _mm512_undefined_pd ();
9286 __mmask8 mask = 0xFF;
9288 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9289 __addr,
9290 (__v8di) __index, mask,
9291 __scale);
9294 extern __inline __m512d
9295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9296 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9297 __m512i __index, double const *__addr, int __scale)
9299 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9300 __addr,
9301 (__v8di) __index,
9302 __mask, __scale);
9305 extern __inline __m512i
9306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9307 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9309 __m512i v1_old = _mm512_undefined_si512 ();
9310 __mmask16 mask = 0xFFFF;
9312 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9313 __addr,
9314 (__v16si) __index,
9315 mask, __scale);
9318 extern __inline __m512i
9319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9320 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9321 __m512i __index, int const *__addr, int __scale)
9323 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9324 __addr,
9325 (__v16si) __index,
9326 __mask, __scale);
9329 extern __inline __m512i
9330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9331 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9333 __m512i v1_old = _mm512_undefined_si512 ();
9334 __mmask8 mask = 0xFF;
9336 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9337 __addr,
9338 (__v8si) __index, mask,
9339 __scale);
9342 extern __inline __m512i
9343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9344 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9345 __m256i __index, long long const *__addr,
9346 int __scale)
9348 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9349 __addr,
9350 (__v8si) __index,
9351 __mask, __scale);
9354 extern __inline __m256i
9355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9356 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9358 __m256i v1_old = _mm256_undefined_si256 ();
9359 __mmask8 mask = 0xFF;
9361 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9362 __addr,
9363 (__v8di) __index,
9364 mask, __scale);
9367 extern __inline __m256i
9368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9369 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9370 __m512i __index, int const *__addr, int __scale)
9372 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9373 __addr,
9374 (__v8di) __index,
9375 __mask, __scale);
9378 extern __inline __m512i
9379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9380 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9382 __m512i v1_old = _mm512_undefined_si512 ();
9383 __mmask8 mask = 0xFF;
9385 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9386 __addr,
9387 (__v8di) __index, mask,
9388 __scale);
9391 extern __inline __m512i
9392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9393 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9394 __m512i __index, long long const *__addr,
9395 int __scale)
9397 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9398 __addr,
9399 (__v8di) __index,
9400 __mask, __scale);
9403 extern __inline void
9404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9405 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9407 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9408 (__v16si) __index, (__v16sf) __v1, __scale);
9411 extern __inline void
9412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9413 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9414 __m512i __index, __m512 __v1, int __scale)
9416 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9417 (__v16sf) __v1, __scale);
9420 extern __inline void
9421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9422 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9423 int __scale)
9425 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9426 (__v8si) __index, (__v8df) __v1, __scale);
9429 extern __inline void
9430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9431 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9432 __m256i __index, __m512d __v1, int __scale)
9434 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9435 (__v8df) __v1, __scale);
9438 extern __inline void
9439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9440 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9442 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9443 (__v8di) __index, (__v8sf) __v1, __scale);
9446 extern __inline void
9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9449 __m512i __index, __m256 __v1, int __scale)
9451 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9452 (__v8sf) __v1, __scale);
9455 extern __inline void
9456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9457 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9458 int __scale)
9460 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9461 (__v8di) __index, (__v8df) __v1, __scale);
9464 extern __inline void
9465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9466 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9467 __m512i __index, __m512d __v1, int __scale)
9469 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9470 (__v8df) __v1, __scale);
9473 extern __inline void
9474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9475 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9476 __m512i __v1, int __scale)
9478 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9479 (__v16si) __index, (__v16si) __v1, __scale);
9482 extern __inline void
9483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9484 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9485 __m512i __index, __m512i __v1, int __scale)
9487 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9488 (__v16si) __v1, __scale);
9491 extern __inline void
9492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9493 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9494 __m512i __v1, int __scale)
9496 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9497 (__v8si) __index, (__v8di) __v1, __scale);
9500 extern __inline void
9501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9502 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9503 __m256i __index, __m512i __v1, int __scale)
9505 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9506 (__v8di) __v1, __scale);
9509 extern __inline void
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9512 __m256i __v1, int __scale)
9514 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9515 (__v8di) __index, (__v8si) __v1, __scale);
9518 extern __inline void
9519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9520 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9521 __m512i __index, __m256i __v1, int __scale)
9523 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9524 (__v8si) __v1, __scale);
9527 extern __inline void
9528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9529 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9530 __m512i __v1, int __scale)
9532 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9533 (__v8di) __index, (__v8di) __v1, __scale);
9536 extern __inline void
9537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9538 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9539 __m512i __index, __m512i __v1, int __scale)
9541 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9542 (__v8di) __v1, __scale);
9544 #else
9545 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9546 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
9547 (float const *)ADDR, \
9548 (__v16si)(__m512i)INDEX, \
9549 (__mmask16)0xFFFF, (int)SCALE)
9551 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9552 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9553 (float const *)ADDR, \
9554 (__v16si)(__m512i)INDEX, \
9555 (__mmask16)MASK, (int)SCALE)
9557 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9558 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
9559 (double const *)ADDR, \
9560 (__v8si)(__m256i)INDEX, \
9561 (__mmask8)0xFF, (int)SCALE)
9563 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9564 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9565 (double const *)ADDR, \
9566 (__v8si)(__m256i)INDEX, \
9567 (__mmask8)MASK, (int)SCALE)
9569 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9570 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
9571 (float const *)ADDR, \
9572 (__v8di)(__m512i)INDEX, \
9573 (__mmask8)0xFF, (int)SCALE)
9575 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9576 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9577 (float const *)ADDR, \
9578 (__v8di)(__m512i)INDEX, \
9579 (__mmask8)MASK, (int)SCALE)
9581 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9582 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
9583 (double const *)ADDR, \
9584 (__v8di)(__m512i)INDEX, \
9585 (__mmask8)0xFF, (int)SCALE)
9587 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9588 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9589 (double const *)ADDR, \
9590 (__v8di)(__m512i)INDEX, \
9591 (__mmask8)MASK, (int)SCALE)
9593 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9594 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
9595 (int const *)ADDR, \
9596 (__v16si)(__m512i)INDEX, \
9597 (__mmask16)0xFFFF, (int)SCALE)
9599 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9600 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9601 (int const *)ADDR, \
9602 (__v16si)(__m512i)INDEX, \
9603 (__mmask16)MASK, (int)SCALE)
9605 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9606 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
9607 (long long const *)ADDR, \
9608 (__v8si)(__m256i)INDEX, \
9609 (__mmask8)0xFF, (int)SCALE)
9611 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9612 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9613 (long long const *)ADDR, \
9614 (__v8si)(__m256i)INDEX, \
9615 (__mmask8)MASK, (int)SCALE)
9617 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9618 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
9619 (int const *)ADDR, \
9620 (__v8di)(__m512i)INDEX, \
9621 (__mmask8)0xFF, (int)SCALE)
9623 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9624 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9625 (int const *)ADDR, \
9626 (__v8di)(__m512i)INDEX, \
9627 (__mmask8)MASK, (int)SCALE)
9629 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9630 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
9631 (long long const *)ADDR, \
9632 (__v8di)(__m512i)INDEX, \
9633 (__mmask8)0xFF, (int)SCALE)
9635 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9636 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9637 (long long const *)ADDR, \
9638 (__v8di)(__m512i)INDEX, \
9639 (__mmask8)MASK, (int)SCALE)
9641 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9642 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9643 (__v16si)(__m512i)INDEX, \
9644 (__v16sf)(__m512)V1, (int)SCALE)
9646 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9647 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9648 (__v16si)(__m512i)INDEX, \
9649 (__v16sf)(__m512)V1, (int)SCALE)
9651 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9652 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9653 (__v8si)(__m256i)INDEX, \
9654 (__v8df)(__m512d)V1, (int)SCALE)
9656 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9657 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9658 (__v8si)(__m256i)INDEX, \
9659 (__v8df)(__m512d)V1, (int)SCALE)
9661 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9662 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9663 (__v8di)(__m512i)INDEX, \
9664 (__v8sf)(__m256)V1, (int)SCALE)
9666 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9667 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9668 (__v8di)(__m512i)INDEX, \
9669 (__v8sf)(__m256)V1, (int)SCALE)
9671 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9672 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9673 (__v8di)(__m512i)INDEX, \
9674 (__v8df)(__m512d)V1, (int)SCALE)
9676 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9677 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9678 (__v8di)(__m512i)INDEX, \
9679 (__v8df)(__m512d)V1, (int)SCALE)
9681 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9682 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9683 (__v16si)(__m512i)INDEX, \
9684 (__v16si)(__m512i)V1, (int)SCALE)
9686 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9687 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9688 (__v16si)(__m512i)INDEX, \
9689 (__v16si)(__m512i)V1, (int)SCALE)
9691 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9692 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9693 (__v8si)(__m256i)INDEX, \
9694 (__v8di)(__m512i)V1, (int)SCALE)
9696 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9697 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9698 (__v8si)(__m256i)INDEX, \
9699 (__v8di)(__m512i)V1, (int)SCALE)
9701 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9702 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9703 (__v8di)(__m512i)INDEX, \
9704 (__v8si)(__m256i)V1, (int)SCALE)
9706 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9707 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9708 (__v8di)(__m512i)INDEX, \
9709 (__v8si)(__m256i)V1, (int)SCALE)
9711 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9712 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9713 (__v8di)(__m512i)INDEX, \
9714 (__v8di)(__m512i)V1, (int)SCALE)
9716 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9717 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9718 (__v8di)(__m512i)INDEX, \
9719 (__v8di)(__m512i)V1, (int)SCALE)
9720 #endif
9722 extern __inline __m512d
9723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9726 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9727 (__v8df) __W,
9728 (__mmask8) __U);
9731 extern __inline __m512d
9732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9735 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9736 (__v8df)
9737 _mm512_setzero_pd (),
9738 (__mmask8) __U);
9741 extern __inline void
9742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9743 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9745 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9746 (__mmask8) __U);
9749 extern __inline __m512
9750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9753 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9754 (__v16sf) __W,
9755 (__mmask16) __U);
9758 extern __inline __m512
9759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9762 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9763 (__v16sf)
9764 _mm512_setzero_ps (),
9765 (__mmask16) __U);
9768 extern __inline void
9769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9770 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9772 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9773 (__mmask16) __U);
9776 extern __inline __m512i
9777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9780 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9781 (__v8di) __W,
9782 (__mmask8) __U);
9785 extern __inline __m512i
9786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9789 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9790 (__v8di)
9791 _mm512_setzero_si512 (),
9792 (__mmask8) __U);
9795 extern __inline void
9796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9797 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9799 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9800 (__mmask8) __U);
9803 extern __inline __m512i
9804 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9807 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9808 (__v16si) __W,
9809 (__mmask16) __U);
9812 extern __inline __m512i
9813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9816 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9817 (__v16si)
9818 _mm512_setzero_si512 (),
9819 (__mmask16) __U);
9822 extern __inline void
9823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9826 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9827 (__mmask16) __U);
9830 extern __inline __m512d
9831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9832 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9834 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9835 (__v8df) __W,
9836 (__mmask8) __U);
9839 extern __inline __m512d
9840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9841 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9843 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9844 (__v8df)
9845 _mm512_setzero_pd (),
9846 (__mmask8) __U);
9849 extern __inline __m512d
9850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9851 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9853 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9854 (__v8df) __W,
9855 (__mmask8) __U);
9858 extern __inline __m512d
9859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9860 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9862 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9863 (__v8df)
9864 _mm512_setzero_pd (),
9865 (__mmask8) __U);
9868 extern __inline __m512
9869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9870 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9872 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9873 (__v16sf) __W,
9874 (__mmask16) __U);
9877 extern __inline __m512
9878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9879 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9881 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9882 (__v16sf)
9883 _mm512_setzero_ps (),
9884 (__mmask16) __U);
9887 extern __inline __m512
9888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9889 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9891 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9892 (__v16sf) __W,
9893 (__mmask16) __U);
9896 extern __inline __m512
9897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9900 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9901 (__v16sf)
9902 _mm512_setzero_ps (),
9903 (__mmask16) __U);
9906 extern __inline __m512i
9907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9908 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9910 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9911 (__v8di) __W,
9912 (__mmask8) __U);
9915 extern __inline __m512i
9916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9917 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9919 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9920 (__v8di)
9921 _mm512_setzero_si512 (),
9922 (__mmask8) __U);
9925 extern __inline __m512i
9926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9927 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9929 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9930 (__v8di) __W,
9931 (__mmask8) __U);
9934 extern __inline __m512i
9935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9936 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9938 return (__m512i)
9939 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9940 (__v8di)
9941 _mm512_setzero_si512 (),
9942 (__mmask8) __U);
9945 extern __inline __m512i
9946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9947 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9949 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9950 (__v16si) __W,
9951 (__mmask16) __U);
9954 extern __inline __m512i
9955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9956 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9958 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9959 (__v16si)
9960 _mm512_setzero_si512 (),
9961 (__mmask16) __U);
9964 extern __inline __m512i
9965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9966 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9968 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9969 (__v16si) __W,
9970 (__mmask16) __U);
9973 extern __inline __m512i
9974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9975 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9977 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9978 (__v16si)
9979 _mm512_setzero_si512
9980 (), (__mmask16) __U);
9983 /* Mask arithmetic operations */
9984 extern __inline __mmask16
9985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9986 _mm512_kand (__mmask16 __A, __mmask16 __B)
9988 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9991 extern __inline __mmask16
9992 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9993 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9995 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9998 extern __inline __mmask16
9999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000 _mm512_kor (__mmask16 __A, __mmask16 __B)
10002 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
10005 extern __inline int
10006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10007 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
10009 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
10010 (__mmask16) __B);
10013 extern __inline int
10014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10015 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
10017 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
10018 (__mmask16) __B);
10021 extern __inline __mmask16
10022 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10023 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
10025 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
10028 extern __inline __mmask16
10029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10030 _mm512_kxor (__mmask16 __A, __mmask16 __B)
10032 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
10035 extern __inline __mmask16
10036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10037 _mm512_knot (__mmask16 __A)
10039 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
10042 extern __inline __mmask16
10043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10044 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
10046 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
10049 #ifdef __OPTIMIZE__
10050 extern __inline __m512i
10051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10052 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
10053 const int __imm)
10055 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10056 (__v4si) __D,
10057 __imm,
10058 (__v16si)
10059 _mm512_setzero_si512 (),
10060 __B);
10063 extern __inline __m512
10064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10065 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
10066 const int __imm)
10068 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10069 (__v4sf) __D,
10070 __imm,
10071 (__v16sf)
10072 _mm512_setzero_ps (), __B);
10075 extern __inline __m512i
10076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10077 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
10078 __m128i __D, const int __imm)
10080 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
10081 (__v4si) __D,
10082 __imm,
10083 (__v16si) __A,
10084 __B);
10087 extern __inline __m512
10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
10090 __m128 __D, const int __imm)
10092 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
10093 (__v4sf) __D,
10094 __imm,
10095 (__v16sf) __A, __B);
10097 #else
10098 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
10099 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10100 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
10101 (__mmask8)(A)))
10103 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
10104 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10105 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
10106 (__mmask8)(A)))
10108 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
10109 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
10110 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
10111 (__mmask8)(B)))
10113 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
10114 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
10115 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
10116 (__mmask8)(B)))
10117 #endif
10119 extern __inline __m512i
10120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121 _mm512_max_epi64 (__m512i __A, __m512i __B)
10123 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10124 (__v8di) __B,
10125 (__v8di)
10126 _mm512_undefined_si512 (),
10127 (__mmask8) -1);
10130 extern __inline __m512i
10131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10132 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10134 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10135 (__v8di) __B,
10136 (__v8di)
10137 _mm512_setzero_si512 (),
10138 __M);
10141 extern __inline __m512i
10142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10143 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10145 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
10146 (__v8di) __B,
10147 (__v8di) __W, __M);
10150 extern __inline __m512i
10151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10152 _mm512_min_epi64 (__m512i __A, __m512i __B)
10154 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10155 (__v8di) __B,
10156 (__v8di)
10157 _mm512_undefined_si512 (),
10158 (__mmask8) -1);
10161 extern __inline __m512i
10162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10163 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10165 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10166 (__v8di) __B,
10167 (__v8di) __W, __M);
10170 extern __inline __m512i
10171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10172 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
10174 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
10175 (__v8di) __B,
10176 (__v8di)
10177 _mm512_setzero_si512 (),
10178 __M);
10181 extern __inline __m512i
10182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183 _mm512_max_epu64 (__m512i __A, __m512i __B)
10185 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10186 (__v8di) __B,
10187 (__v8di)
10188 _mm512_undefined_si512 (),
10189 (__mmask8) -1);
10192 extern __inline __m512i
10193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10194 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10196 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10197 (__v8di) __B,
10198 (__v8di)
10199 _mm512_setzero_si512 (),
10200 __M);
10203 extern __inline __m512i
10204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10207 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
10208 (__v8di) __B,
10209 (__v8di) __W, __M);
10212 extern __inline __m512i
10213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10214 _mm512_min_epu64 (__m512i __A, __m512i __B)
10216 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10217 (__v8di) __B,
10218 (__v8di)
10219 _mm512_undefined_si512 (),
10220 (__mmask8) -1);
10223 extern __inline __m512i
10224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10225 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
10227 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10228 (__v8di) __B,
10229 (__v8di) __W, __M);
10232 extern __inline __m512i
10233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10234 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10236 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10237 (__v8di) __B,
10238 (__v8di)
10239 _mm512_setzero_si512 (),
10240 __M);
10243 extern __inline __m512i
10244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245 _mm512_max_epi32 (__m512i __A, __m512i __B)
10247 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10248 (__v16si) __B,
10249 (__v16si)
10250 _mm512_undefined_si512 (),
10251 (__mmask16) -1);
10254 extern __inline __m512i
10255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10256 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10258 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10259 (__v16si) __B,
10260 (__v16si)
10261 _mm512_setzero_si512 (),
10262 __M);
10265 extern __inline __m512i
10266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10267 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10269 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10270 (__v16si) __B,
10271 (__v16si) __W, __M);
10274 extern __inline __m512i
10275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10276 _mm512_min_epi32 (__m512i __A, __m512i __B)
10278 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10279 (__v16si) __B,
10280 (__v16si)
10281 _mm512_undefined_si512 (),
10282 (__mmask16) -1);
10285 extern __inline __m512i
10286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10287 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10289 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10290 (__v16si) __B,
10291 (__v16si)
10292 _mm512_setzero_si512 (),
10293 __M);
10296 extern __inline __m512i
10297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10298 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10300 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10301 (__v16si) __B,
10302 (__v16si) __W, __M);
10305 extern __inline __m512i
10306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10307 _mm512_max_epu32 (__m512i __A, __m512i __B)
10309 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10310 (__v16si) __B,
10311 (__v16si)
10312 _mm512_undefined_si512 (),
10313 (__mmask16) -1);
10316 extern __inline __m512i
10317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10318 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10320 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10321 (__v16si) __B,
10322 (__v16si)
10323 _mm512_setzero_si512 (),
10324 __M);
10327 extern __inline __m512i
10328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10331 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10332 (__v16si) __B,
10333 (__v16si) __W, __M);
10336 extern __inline __m512i
10337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10338 _mm512_min_epu32 (__m512i __A, __m512i __B)
10340 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10341 (__v16si) __B,
10342 (__v16si)
10343 _mm512_undefined_si512 (),
10344 (__mmask16) -1);
10347 extern __inline __m512i
10348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10349 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10351 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10352 (__v16si) __B,
10353 (__v16si)
10354 _mm512_setzero_si512 (),
10355 __M);
10358 extern __inline __m512i
10359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10360 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10362 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10363 (__v16si) __B,
10364 (__v16si) __W, __M);
10367 extern __inline __m512
10368 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10369 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10371 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10372 (__v16sf) __B,
10373 (__v16sf)
10374 _mm512_undefined_ps (),
10375 (__mmask16) -1);
10378 extern __inline __m512
10379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10380 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10382 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10383 (__v16sf) __B,
10384 (__v16sf) __W,
10385 (__mmask16) __U);
10388 extern __inline __m512
10389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10390 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10392 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10393 (__v16sf) __B,
10394 (__v16sf)
10395 _mm512_setzero_ps (),
10396 (__mmask16) __U);
10399 #ifdef __OPTIMIZE__
10400 extern __inline __m128d
10401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10402 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10404 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10405 (__v2df) __B,
10406 __R);
10409 extern __inline __m128
10410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10411 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10413 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10414 (__v4sf) __B,
10415 __R);
10418 extern __inline __m128d
10419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10420 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10422 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10423 (__v2df) __B,
10424 __R);
10427 extern __inline __m128
10428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10429 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10431 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10432 (__v4sf) __B,
10433 __R);
10436 #else
10437 #define _mm_max_round_sd(A, B, C) \
10438 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10440 #define _mm_max_round_ss(A, B, C) \
10441 (__m128)__builtin_ia32_addss_round(A, B, C)
10443 #define _mm_min_round_sd(A, B, C) \
10444 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10446 #define _mm_min_round_ss(A, B, C) \
10447 (__m128)__builtin_ia32_subss_round(A, B, C)
10448 #endif
10450 extern __inline __m512d
10451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10452 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10454 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10455 (__v8df) __W,
10456 (__mmask8) __U);
10459 extern __inline __m512
10460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10461 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10463 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10464 (__v16sf) __W,
10465 (__mmask16) __U);
10468 extern __inline __m512i
10469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10470 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10472 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10473 (__v8di) __W,
10474 (__mmask8) __U);
10477 extern __inline __m512i
10478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10481 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10482 (__v16si) __W,
10483 (__mmask16) __U);
10486 #ifdef __OPTIMIZE__
10487 extern __inline __m128d
10488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10489 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10491 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10492 (__v2df) __A,
10493 (__v2df) __B,
10494 __R);
10497 extern __inline __m128
10498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10499 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10501 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10502 (__v4sf) __A,
10503 (__v4sf) __B,
10504 __R);
10507 extern __inline __m128d
10508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10509 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10511 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10512 (__v2df) __A,
10513 -(__v2df) __B,
10514 __R);
10517 extern __inline __m128
10518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10521 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10522 (__v4sf) __A,
10523 -(__v4sf) __B,
10524 __R);
10527 extern __inline __m128d
10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10531 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10532 -(__v2df) __A,
10533 (__v2df) __B,
10534 __R);
10537 extern __inline __m128
10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10541 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10542 -(__v4sf) __A,
10543 (__v4sf) __B,
10544 __R);
10547 extern __inline __m128d
10548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10549 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10551 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10552 -(__v2df) __A,
10553 -(__v2df) __B,
10554 __R);
10557 extern __inline __m128
10558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10559 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10561 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10562 -(__v4sf) __A,
10563 -(__v4sf) __B,
10564 __R);
10566 #else
10567 #define _mm_fmadd_round_sd(A, B, C, R) \
10568 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10570 #define _mm_fmadd_round_ss(A, B, C, R) \
10571 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10573 #define _mm_fmsub_round_sd(A, B, C, R) \
10574 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10576 #define _mm_fmsub_round_ss(A, B, C, R) \
10577 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10579 #define _mm_fnmadd_round_sd(A, B, C, R) \
10580 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10582 #define _mm_fnmadd_round_ss(A, B, C, R) \
10583 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10585 #define _mm_fnmsub_round_sd(A, B, C, R) \
10586 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10588 #define _mm_fnmsub_round_ss(A, B, C, R) \
10589 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10590 #endif
10592 #ifdef __OPTIMIZE__
10593 extern __inline int
10594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10595 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10597 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10600 extern __inline int
10601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10602 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10604 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10606 #else
10607 #define _mm_comi_round_ss(A, B, C, D)\
10608 __builtin_ia32_vcomiss(A, B, C, D)
10609 #define _mm_comi_round_sd(A, B, C, D)\
10610 __builtin_ia32_vcomisd(A, B, C, D)
10611 #endif
10613 extern __inline __m512d
10614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10615 _mm512_sqrt_pd (__m512d __A)
10617 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10618 (__v8df)
10619 _mm512_undefined_pd (),
10620 (__mmask8) -1,
10621 _MM_FROUND_CUR_DIRECTION);
10624 extern __inline __m512d
10625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10626 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10628 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10629 (__v8df) __W,
10630 (__mmask8) __U,
10631 _MM_FROUND_CUR_DIRECTION);
10634 extern __inline __m512d
10635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10636 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10638 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10639 (__v8df)
10640 _mm512_setzero_pd (),
10641 (__mmask8) __U,
10642 _MM_FROUND_CUR_DIRECTION);
10645 extern __inline __m512
10646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10647 _mm512_sqrt_ps (__m512 __A)
10649 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10650 (__v16sf)
10651 _mm512_undefined_ps (),
10652 (__mmask16) -1,
10653 _MM_FROUND_CUR_DIRECTION);
10656 extern __inline __m512
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10660 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10661 (__v16sf) __W,
10662 (__mmask16) __U,
10663 _MM_FROUND_CUR_DIRECTION);
10666 extern __inline __m512
10667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10668 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10670 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10671 (__v16sf)
10672 _mm512_setzero_ps (),
10673 (__mmask16) __U,
10674 _MM_FROUND_CUR_DIRECTION);
10677 extern __inline __m512d
10678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10679 _mm512_add_pd (__m512d __A, __m512d __B)
10681 return (__m512d) ((__v8df)__A + (__v8df)__B);
10684 extern __inline __m512d
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10688 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10689 (__v8df) __B,
10690 (__v8df) __W,
10691 (__mmask8) __U,
10692 _MM_FROUND_CUR_DIRECTION);
10695 extern __inline __m512d
10696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10697 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10699 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10700 (__v8df) __B,
10701 (__v8df)
10702 _mm512_setzero_pd (),
10703 (__mmask8) __U,
10704 _MM_FROUND_CUR_DIRECTION);
10707 extern __inline __m512
10708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10709 _mm512_add_ps (__m512 __A, __m512 __B)
10711 return (__m512) ((__v16sf)__A + (__v16sf)__B);
10714 extern __inline __m512
10715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10716 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10718 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10719 (__v16sf) __B,
10720 (__v16sf) __W,
10721 (__mmask16) __U,
10722 _MM_FROUND_CUR_DIRECTION);
10725 extern __inline __m512
10726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10727 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10729 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10730 (__v16sf) __B,
10731 (__v16sf)
10732 _mm512_setzero_ps (),
10733 (__mmask16) __U,
10734 _MM_FROUND_CUR_DIRECTION);
10737 extern __inline __m512d
10738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10739 _mm512_sub_pd (__m512d __A, __m512d __B)
10741 return (__m512d) ((__v8df)__A - (__v8df)__B);
10744 extern __inline __m512d
10745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10746 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10748 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10749 (__v8df) __B,
10750 (__v8df) __W,
10751 (__mmask8) __U,
10752 _MM_FROUND_CUR_DIRECTION);
10755 extern __inline __m512d
10756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10757 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10759 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10760 (__v8df) __B,
10761 (__v8df)
10762 _mm512_setzero_pd (),
10763 (__mmask8) __U,
10764 _MM_FROUND_CUR_DIRECTION);
10767 extern __inline __m512
10768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10769 _mm512_sub_ps (__m512 __A, __m512 __B)
10771 return (__m512) ((__v16sf)__A - (__v16sf)__B);
10774 extern __inline __m512
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10778 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10779 (__v16sf) __B,
10780 (__v16sf) __W,
10781 (__mmask16) __U,
10782 _MM_FROUND_CUR_DIRECTION);
10785 extern __inline __m512
10786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10789 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10790 (__v16sf) __B,
10791 (__v16sf)
10792 _mm512_setzero_ps (),
10793 (__mmask16) __U,
10794 _MM_FROUND_CUR_DIRECTION);
10797 extern __inline __m512d
10798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10799 _mm512_mul_pd (__m512d __A, __m512d __B)
10801 return (__m512d) ((__v8df)__A * (__v8df)__B);
10804 extern __inline __m512d
10805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10806 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10808 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10809 (__v8df) __B,
10810 (__v8df) __W,
10811 (__mmask8) __U,
10812 _MM_FROUND_CUR_DIRECTION);
10815 extern __inline __m512d
10816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10817 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10819 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10820 (__v8df) __B,
10821 (__v8df)
10822 _mm512_setzero_pd (),
10823 (__mmask8) __U,
10824 _MM_FROUND_CUR_DIRECTION);
10827 extern __inline __m512
10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829 _mm512_mul_ps (__m512 __A, __m512 __B)
10831 return (__m512) ((__v16sf)__A * (__v16sf)__B);
10834 extern __inline __m512
10835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10836 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10838 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10839 (__v16sf) __B,
10840 (__v16sf) __W,
10841 (__mmask16) __U,
10842 _MM_FROUND_CUR_DIRECTION);
10845 extern __inline __m512
10846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10847 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10849 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10850 (__v16sf) __B,
10851 (__v16sf)
10852 _mm512_setzero_ps (),
10853 (__mmask16) __U,
10854 _MM_FROUND_CUR_DIRECTION);
10857 extern __inline __m512d
10858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10859 _mm512_div_pd (__m512d __M, __m512d __V)
10861 return (__m512d) ((__v8df)__M / (__v8df)__V);
10864 extern __inline __m512d
10865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10866 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10868 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10869 (__v8df) __V,
10870 (__v8df) __W,
10871 (__mmask8) __U,
10872 _MM_FROUND_CUR_DIRECTION);
10875 extern __inline __m512d
10876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10877 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10879 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10880 (__v8df) __V,
10881 (__v8df)
10882 _mm512_setzero_pd (),
10883 (__mmask8) __U,
10884 _MM_FROUND_CUR_DIRECTION);
10887 extern __inline __m512
10888 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10889 _mm512_div_ps (__m512 __A, __m512 __B)
10891 return (__m512) ((__v16sf)__A / (__v16sf)__B);
10894 extern __inline __m512
10895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10896 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10898 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10899 (__v16sf) __B,
10900 (__v16sf) __W,
10901 (__mmask16) __U,
10902 _MM_FROUND_CUR_DIRECTION);
10905 extern __inline __m512
10906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10907 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10909 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10910 (__v16sf) __B,
10911 (__v16sf)
10912 _mm512_setzero_ps (),
10913 (__mmask16) __U,
10914 _MM_FROUND_CUR_DIRECTION);
10917 extern __inline __m512d
10918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10919 _mm512_max_pd (__m512d __A, __m512d __B)
10921 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10922 (__v8df) __B,
10923 (__v8df)
10924 _mm512_undefined_pd (),
10925 (__mmask8) -1,
10926 _MM_FROUND_CUR_DIRECTION);
10929 extern __inline __m512d
10930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10933 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10934 (__v8df) __B,
10935 (__v8df) __W,
10936 (__mmask8) __U,
10937 _MM_FROUND_CUR_DIRECTION);
10940 extern __inline __m512d
10941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10942 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10944 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10945 (__v8df) __B,
10946 (__v8df)
10947 _mm512_setzero_pd (),
10948 (__mmask8) __U,
10949 _MM_FROUND_CUR_DIRECTION);
10952 extern __inline __m512
10953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10954 _mm512_max_ps (__m512 __A, __m512 __B)
10956 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10957 (__v16sf) __B,
10958 (__v16sf)
10959 _mm512_undefined_ps (),
10960 (__mmask16) -1,
10961 _MM_FROUND_CUR_DIRECTION);
10964 extern __inline __m512
10965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10966 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10968 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10969 (__v16sf) __B,
10970 (__v16sf) __W,
10971 (__mmask16) __U,
10972 _MM_FROUND_CUR_DIRECTION);
10975 extern __inline __m512
10976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10977 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10979 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10980 (__v16sf) __B,
10981 (__v16sf)
10982 _mm512_setzero_ps (),
10983 (__mmask16) __U,
10984 _MM_FROUND_CUR_DIRECTION);
10987 extern __inline __m512d
10988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10989 _mm512_min_pd (__m512d __A, __m512d __B)
10991 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10992 (__v8df) __B,
10993 (__v8df)
10994 _mm512_undefined_pd (),
10995 (__mmask8) -1,
10996 _MM_FROUND_CUR_DIRECTION);
10999 extern __inline __m512d
11000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11001 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11003 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11004 (__v8df) __B,
11005 (__v8df) __W,
11006 (__mmask8) __U,
11007 _MM_FROUND_CUR_DIRECTION);
11010 extern __inline __m512d
11011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11012 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
11014 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
11015 (__v8df) __B,
11016 (__v8df)
11017 _mm512_setzero_pd (),
11018 (__mmask8) __U,
11019 _MM_FROUND_CUR_DIRECTION);
11022 extern __inline __m512
11023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11024 _mm512_min_ps (__m512 __A, __m512 __B)
11026 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11027 (__v16sf) __B,
11028 (__v16sf)
11029 _mm512_undefined_ps (),
11030 (__mmask16) -1,
11031 _MM_FROUND_CUR_DIRECTION);
11034 extern __inline __m512
11035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11039 (__v16sf) __B,
11040 (__v16sf) __W,
11041 (__mmask16) __U,
11042 _MM_FROUND_CUR_DIRECTION);
11045 extern __inline __m512
11046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11047 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
11049 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
11050 (__v16sf) __B,
11051 (__v16sf)
11052 _mm512_setzero_ps (),
11053 (__mmask16) __U,
11054 _MM_FROUND_CUR_DIRECTION);
11057 extern __inline __m512d
11058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11059 _mm512_scalef_pd (__m512d __A, __m512d __B)
11061 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11062 (__v8df) __B,
11063 (__v8df)
11064 _mm512_undefined_pd (),
11065 (__mmask8) -1,
11066 _MM_FROUND_CUR_DIRECTION);
11069 extern __inline __m512d
11070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11071 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11073 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11074 (__v8df) __B,
11075 (__v8df) __W,
11076 (__mmask8) __U,
11077 _MM_FROUND_CUR_DIRECTION);
11080 extern __inline __m512d
11081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11082 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
11084 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
11085 (__v8df) __B,
11086 (__v8df)
11087 _mm512_setzero_pd (),
11088 (__mmask8) __U,
11089 _MM_FROUND_CUR_DIRECTION);
11092 extern __inline __m512
11093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11094 _mm512_scalef_ps (__m512 __A, __m512 __B)
11096 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11097 (__v16sf) __B,
11098 (__v16sf)
11099 _mm512_undefined_ps (),
11100 (__mmask16) -1,
11101 _MM_FROUND_CUR_DIRECTION);
11104 extern __inline __m512
11105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11106 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11108 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11109 (__v16sf) __B,
11110 (__v16sf) __W,
11111 (__mmask16) __U,
11112 _MM_FROUND_CUR_DIRECTION);
11115 extern __inline __m512
11116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11117 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
11119 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
11120 (__v16sf) __B,
11121 (__v16sf)
11122 _mm512_setzero_ps (),
11123 (__mmask16) __U,
11124 _MM_FROUND_CUR_DIRECTION);
11127 extern __inline __m128d
11128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11129 _mm_scalef_sd (__m128d __A, __m128d __B)
11131 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
11132 (__v2df) __B,
11133 _MM_FROUND_CUR_DIRECTION);
11136 extern __inline __m128
11137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11138 _mm_scalef_ss (__m128 __A, __m128 __B)
11140 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
11141 (__v4sf) __B,
11142 _MM_FROUND_CUR_DIRECTION);
11145 extern __inline __m512d
11146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11147 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11149 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11150 (__v8df) __B,
11151 (__v8df) __C,
11152 (__mmask8) -1,
11153 _MM_FROUND_CUR_DIRECTION);
11156 extern __inline __m512d
11157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11158 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11160 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11161 (__v8df) __B,
11162 (__v8df) __C,
11163 (__mmask8) __U,
11164 _MM_FROUND_CUR_DIRECTION);
11167 extern __inline __m512d
11168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11169 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11171 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
11172 (__v8df) __B,
11173 (__v8df) __C,
11174 (__mmask8) __U,
11175 _MM_FROUND_CUR_DIRECTION);
11178 extern __inline __m512d
11179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11182 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11183 (__v8df) __B,
11184 (__v8df) __C,
11185 (__mmask8) __U,
11186 _MM_FROUND_CUR_DIRECTION);
11189 extern __inline __m512
11190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11191 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11193 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11194 (__v16sf) __B,
11195 (__v16sf) __C,
11196 (__mmask16) -1,
11197 _MM_FROUND_CUR_DIRECTION);
11200 extern __inline __m512
11201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11202 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11204 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11205 (__v16sf) __B,
11206 (__v16sf) __C,
11207 (__mmask16) __U,
11208 _MM_FROUND_CUR_DIRECTION);
11211 extern __inline __m512
11212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11213 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11215 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11216 (__v16sf) __B,
11217 (__v16sf) __C,
11218 (__mmask16) __U,
11219 _MM_FROUND_CUR_DIRECTION);
11222 extern __inline __m512
11223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11224 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11226 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11227 (__v16sf) __B,
11228 (__v16sf) __C,
11229 (__mmask16) __U,
11230 _MM_FROUND_CUR_DIRECTION);
11233 extern __inline __m512d
11234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11235 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11237 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11238 (__v8df) __B,
11239 -(__v8df) __C,
11240 (__mmask8) -1,
11241 _MM_FROUND_CUR_DIRECTION);
11244 extern __inline __m512d
11245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11248 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11249 (__v8df) __B,
11250 -(__v8df) __C,
11251 (__mmask8) __U,
11252 _MM_FROUND_CUR_DIRECTION);
11255 extern __inline __m512d
11256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11259 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11260 (__v8df) __B,
11261 (__v8df) __C,
11262 (__mmask8) __U,
11263 _MM_FROUND_CUR_DIRECTION);
11266 extern __inline __m512d
11267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11268 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11270 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11271 (__v8df) __B,
11272 -(__v8df) __C,
11273 (__mmask8) __U,
11274 _MM_FROUND_CUR_DIRECTION);
11277 extern __inline __m512
11278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11279 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11281 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11282 (__v16sf) __B,
11283 -(__v16sf) __C,
11284 (__mmask16) -1,
11285 _MM_FROUND_CUR_DIRECTION);
11288 extern __inline __m512
11289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11292 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11293 (__v16sf) __B,
11294 -(__v16sf) __C,
11295 (__mmask16) __U,
11296 _MM_FROUND_CUR_DIRECTION);
11299 extern __inline __m512
11300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11301 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11303 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11304 (__v16sf) __B,
11305 (__v16sf) __C,
11306 (__mmask16) __U,
11307 _MM_FROUND_CUR_DIRECTION);
11310 extern __inline __m512
11311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11312 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11314 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11315 (__v16sf) __B,
11316 -(__v16sf) __C,
11317 (__mmask16) __U,
11318 _MM_FROUND_CUR_DIRECTION);
11321 extern __inline __m512d
11322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11323 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11325 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11326 (__v8df) __B,
11327 (__v8df) __C,
11328 (__mmask8) -1,
11329 _MM_FROUND_CUR_DIRECTION);
11332 extern __inline __m512d
11333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11334 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11336 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11337 (__v8df) __B,
11338 (__v8df) __C,
11339 (__mmask8) __U,
11340 _MM_FROUND_CUR_DIRECTION);
11343 extern __inline __m512d
11344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11345 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11347 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11348 (__v8df) __B,
11349 (__v8df) __C,
11350 (__mmask8) __U,
11351 _MM_FROUND_CUR_DIRECTION);
11354 extern __inline __m512d
11355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11358 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11359 (__v8df) __B,
11360 (__v8df) __C,
11361 (__mmask8) __U,
11362 _MM_FROUND_CUR_DIRECTION);
11365 extern __inline __m512
11366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11367 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11369 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11370 (__v16sf) __B,
11371 (__v16sf) __C,
11372 (__mmask16) -1,
11373 _MM_FROUND_CUR_DIRECTION);
11376 extern __inline __m512
11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11380 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11381 (__v16sf) __B,
11382 (__v16sf) __C,
11383 (__mmask16) __U,
11384 _MM_FROUND_CUR_DIRECTION);
11387 extern __inline __m512
11388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11389 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11391 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11392 (__v16sf) __B,
11393 (__v16sf) __C,
11394 (__mmask16) __U,
11395 _MM_FROUND_CUR_DIRECTION);
11398 extern __inline __m512
11399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11402 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11403 (__v16sf) __B,
11404 (__v16sf) __C,
11405 (__mmask16) __U,
11406 _MM_FROUND_CUR_DIRECTION);
11409 extern __inline __m512d
11410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11411 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11413 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11414 (__v8df) __B,
11415 -(__v8df) __C,
11416 (__mmask8) -1,
11417 _MM_FROUND_CUR_DIRECTION);
11420 extern __inline __m512d
11421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11422 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11424 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11425 (__v8df) __B,
11426 -(__v8df) __C,
11427 (__mmask8) __U,
11428 _MM_FROUND_CUR_DIRECTION);
11431 extern __inline __m512d
11432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11433 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11435 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11436 (__v8df) __B,
11437 (__v8df) __C,
11438 (__mmask8) __U,
11439 _MM_FROUND_CUR_DIRECTION);
11442 extern __inline __m512d
11443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11444 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11446 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11447 (__v8df) __B,
11448 -(__v8df) __C,
11449 (__mmask8) __U,
11450 _MM_FROUND_CUR_DIRECTION);
11453 extern __inline __m512
11454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11455 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11457 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11458 (__v16sf) __B,
11459 -(__v16sf) __C,
11460 (__mmask16) -1,
11461 _MM_FROUND_CUR_DIRECTION);
11464 extern __inline __m512
11465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11466 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11468 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11469 (__v16sf) __B,
11470 -(__v16sf) __C,
11471 (__mmask16) __U,
11472 _MM_FROUND_CUR_DIRECTION);
11475 extern __inline __m512
11476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11477 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11479 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11480 (__v16sf) __B,
11481 (__v16sf) __C,
11482 (__mmask16) __U,
11483 _MM_FROUND_CUR_DIRECTION);
11486 extern __inline __m512
11487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11488 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11490 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11491 (__v16sf) __B,
11492 -(__v16sf) __C,
11493 (__mmask16) __U,
11494 _MM_FROUND_CUR_DIRECTION);
11497 extern __inline __m512d
11498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11499 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11501 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11502 (__v8df) __B,
11503 (__v8df) __C,
11504 (__mmask8) -1,
11505 _MM_FROUND_CUR_DIRECTION);
11508 extern __inline __m512d
11509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11510 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11512 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11513 (__v8df) __B,
11514 (__v8df) __C,
11515 (__mmask8) __U,
11516 _MM_FROUND_CUR_DIRECTION);
11519 extern __inline __m512d
11520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11521 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11523 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11524 (__v8df) __B,
11525 (__v8df) __C,
11526 (__mmask8) __U,
11527 _MM_FROUND_CUR_DIRECTION);
11530 extern __inline __m512d
11531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11534 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11535 (__v8df) __B,
11536 (__v8df) __C,
11537 (__mmask8) __U,
11538 _MM_FROUND_CUR_DIRECTION);
11541 extern __inline __m512
11542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11543 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11545 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11546 (__v16sf) __B,
11547 (__v16sf) __C,
11548 (__mmask16) -1,
11549 _MM_FROUND_CUR_DIRECTION);
11552 extern __inline __m512
11553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11554 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11556 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11557 (__v16sf) __B,
11558 (__v16sf) __C,
11559 (__mmask16) __U,
11560 _MM_FROUND_CUR_DIRECTION);
11563 extern __inline __m512
11564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11565 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11567 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11568 (__v16sf) __B,
11569 (__v16sf) __C,
11570 (__mmask16) __U,
11571 _MM_FROUND_CUR_DIRECTION);
11574 extern __inline __m512
11575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11576 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11578 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11579 (__v16sf) __B,
11580 (__v16sf) __C,
11581 (__mmask16) __U,
11582 _MM_FROUND_CUR_DIRECTION);
11585 extern __inline __m512d
11586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11587 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11589 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11590 (__v8df) __B,
11591 -(__v8df) __C,
11592 (__mmask8) -1,
11593 _MM_FROUND_CUR_DIRECTION);
11596 extern __inline __m512d
11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11600 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11601 (__v8df) __B,
11602 (__v8df) __C,
11603 (__mmask8) __U,
11604 _MM_FROUND_CUR_DIRECTION);
11607 extern __inline __m512d
11608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11611 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11612 (__v8df) __B,
11613 (__v8df) __C,
11614 (__mmask8) __U,
11615 _MM_FROUND_CUR_DIRECTION);
11618 extern __inline __m512d
11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11622 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11623 (__v8df) __B,
11624 -(__v8df) __C,
11625 (__mmask8) __U,
11626 _MM_FROUND_CUR_DIRECTION);
11629 extern __inline __m512
11630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11631 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11633 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11634 (__v16sf) __B,
11635 -(__v16sf) __C,
11636 (__mmask16) -1,
11637 _MM_FROUND_CUR_DIRECTION);
11640 extern __inline __m512
11641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11644 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11645 (__v16sf) __B,
11646 (__v16sf) __C,
11647 (__mmask16) __U,
11648 _MM_FROUND_CUR_DIRECTION);
11651 extern __inline __m512
11652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11653 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11655 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11656 (__v16sf) __B,
11657 (__v16sf) __C,
11658 (__mmask16) __U,
11659 _MM_FROUND_CUR_DIRECTION);
11662 extern __inline __m512
11663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11664 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11666 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11667 (__v16sf) __B,
11668 -(__v16sf) __C,
11669 (__mmask16) __U,
11670 _MM_FROUND_CUR_DIRECTION);
11673 extern __inline __m256i
11674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11675 _mm512_cvttpd_epi32 (__m512d __A)
11677 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11678 (__v8si)
11679 _mm256_undefined_si256 (),
11680 (__mmask8) -1,
11681 _MM_FROUND_CUR_DIRECTION);
11684 extern __inline __m256i
11685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11686 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11688 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11689 (__v8si) __W,
11690 (__mmask8) __U,
11691 _MM_FROUND_CUR_DIRECTION);
11694 extern __inline __m256i
11695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11696 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11698 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11699 (__v8si)
11700 _mm256_setzero_si256 (),
11701 (__mmask8) __U,
11702 _MM_FROUND_CUR_DIRECTION);
11705 extern __inline __m256i
11706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11707 _mm512_cvttpd_epu32 (__m512d __A)
11709 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11710 (__v8si)
11711 _mm256_undefined_si256 (),
11712 (__mmask8) -1,
11713 _MM_FROUND_CUR_DIRECTION);
11716 extern __inline __m256i
11717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11718 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11720 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11721 (__v8si) __W,
11722 (__mmask8) __U,
11723 _MM_FROUND_CUR_DIRECTION);
11726 extern __inline __m256i
11727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11728 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11730 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11731 (__v8si)
11732 _mm256_setzero_si256 (),
11733 (__mmask8) __U,
11734 _MM_FROUND_CUR_DIRECTION);
11737 extern __inline __m256i
11738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11739 _mm512_cvtpd_epi32 (__m512d __A)
11741 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11742 (__v8si)
11743 _mm256_undefined_si256 (),
11744 (__mmask8) -1,
11745 _MM_FROUND_CUR_DIRECTION);
11748 extern __inline __m256i
11749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11752 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11753 (__v8si) __W,
11754 (__mmask8) __U,
11755 _MM_FROUND_CUR_DIRECTION);
11758 extern __inline __m256i
11759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11760 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11762 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11763 (__v8si)
11764 _mm256_setzero_si256 (),
11765 (__mmask8) __U,
11766 _MM_FROUND_CUR_DIRECTION);
11769 extern __inline __m256i
11770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11771 _mm512_cvtpd_epu32 (__m512d __A)
11773 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11774 (__v8si)
11775 _mm256_undefined_si256 (),
11776 (__mmask8) -1,
11777 _MM_FROUND_CUR_DIRECTION);
11780 extern __inline __m256i
11781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11782 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11784 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11785 (__v8si) __W,
11786 (__mmask8) __U,
11787 _MM_FROUND_CUR_DIRECTION);
11790 extern __inline __m256i
11791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11792 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11794 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11795 (__v8si)
11796 _mm256_setzero_si256 (),
11797 (__mmask8) __U,
11798 _MM_FROUND_CUR_DIRECTION);
11801 extern __inline __m512i
11802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11803 _mm512_cvttps_epi32 (__m512 __A)
11805 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11806 (__v16si)
11807 _mm512_undefined_si512 (),
11808 (__mmask16) -1,
11809 _MM_FROUND_CUR_DIRECTION);
11812 extern __inline __m512i
11813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11814 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11816 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11817 (__v16si) __W,
11818 (__mmask16) __U,
11819 _MM_FROUND_CUR_DIRECTION);
11822 extern __inline __m512i
11823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11824 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11826 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11827 (__v16si)
11828 _mm512_setzero_si512 (),
11829 (__mmask16) __U,
11830 _MM_FROUND_CUR_DIRECTION);
11833 extern __inline __m512i
11834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11835 _mm512_cvttps_epu32 (__m512 __A)
11837 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11838 (__v16si)
11839 _mm512_undefined_si512 (),
11840 (__mmask16) -1,
11841 _MM_FROUND_CUR_DIRECTION);
11844 extern __inline __m512i
11845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11846 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11848 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11849 (__v16si) __W,
11850 (__mmask16) __U,
11851 _MM_FROUND_CUR_DIRECTION);
11854 extern __inline __m512i
11855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11856 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11858 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11859 (__v16si)
11860 _mm512_setzero_si512 (),
11861 (__mmask16) __U,
11862 _MM_FROUND_CUR_DIRECTION);
11865 extern __inline __m512i
11866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867 _mm512_cvtps_epi32 (__m512 __A)
11869 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11870 (__v16si)
11871 _mm512_undefined_si512 (),
11872 (__mmask16) -1,
11873 _MM_FROUND_CUR_DIRECTION);
11876 extern __inline __m512i
11877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11880 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11881 (__v16si) __W,
11882 (__mmask16) __U,
11883 _MM_FROUND_CUR_DIRECTION);
11886 extern __inline __m512i
11887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11890 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11891 (__v16si)
11892 _mm512_setzero_si512 (),
11893 (__mmask16) __U,
11894 _MM_FROUND_CUR_DIRECTION);
11897 extern __inline __m512i
11898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11899 _mm512_cvtps_epu32 (__m512 __A)
11901 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11902 (__v16si)
11903 _mm512_undefined_si512 (),
11904 (__mmask16) -1,
11905 _MM_FROUND_CUR_DIRECTION);
11908 extern __inline __m512i
11909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11910 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11912 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11913 (__v16si) __W,
11914 (__mmask16) __U,
11915 _MM_FROUND_CUR_DIRECTION);
11918 extern __inline __m512i
11919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11920 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11922 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11923 (__v16si)
11924 _mm512_setzero_si512 (),
11925 (__mmask16) __U,
11926 _MM_FROUND_CUR_DIRECTION);
11929 #ifdef __x86_64__
11930 extern __inline __m128
11931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11932 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11934 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11935 _MM_FROUND_CUR_DIRECTION);
11938 extern __inline __m128d
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11942 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11943 _MM_FROUND_CUR_DIRECTION);
11945 #endif
11947 extern __inline __m128
11948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11949 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11951 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11952 _MM_FROUND_CUR_DIRECTION);
11955 extern __inline __m512
11956 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11957 _mm512_cvtepi32_ps (__m512i __A)
11959 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11960 (__v16sf)
11961 _mm512_undefined_ps (),
11962 (__mmask16) -1,
11963 _MM_FROUND_CUR_DIRECTION);
11966 extern __inline __m512
11967 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11968 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11970 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11971 (__v16sf) __W,
11972 (__mmask16) __U,
11973 _MM_FROUND_CUR_DIRECTION);
11976 extern __inline __m512
11977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11978 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11980 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11981 (__v16sf)
11982 _mm512_setzero_ps (),
11983 (__mmask16) __U,
11984 _MM_FROUND_CUR_DIRECTION);
11987 extern __inline __m512
11988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11989 _mm512_cvtepu32_ps (__m512i __A)
11991 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11992 (__v16sf)
11993 _mm512_undefined_ps (),
11994 (__mmask16) -1,
11995 _MM_FROUND_CUR_DIRECTION);
11998 extern __inline __m512
11999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12000 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
12002 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12003 (__v16sf) __W,
12004 (__mmask16) __U,
12005 _MM_FROUND_CUR_DIRECTION);
12008 extern __inline __m512
12009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12010 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
12012 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
12013 (__v16sf)
12014 _mm512_setzero_ps (),
12015 (__mmask16) __U,
12016 _MM_FROUND_CUR_DIRECTION);
12019 #ifdef __OPTIMIZE__
12020 extern __inline __m512d
12021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12022 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
12024 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12025 (__v8df) __B,
12026 (__v8di) __C,
12027 __imm,
12028 (__mmask8) -1,
12029 _MM_FROUND_CUR_DIRECTION);
12032 extern __inline __m512d
12033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12034 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
12035 __m512i __C, const int __imm)
12037 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
12038 (__v8df) __B,
12039 (__v8di) __C,
12040 __imm,
12041 (__mmask8) __U,
12042 _MM_FROUND_CUR_DIRECTION);
12045 extern __inline __m512d
12046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12047 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
12048 __m512i __C, const int __imm)
12050 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
12051 (__v8df) __B,
12052 (__v8di) __C,
12053 __imm,
12054 (__mmask8) __U,
12055 _MM_FROUND_CUR_DIRECTION);
12058 extern __inline __m512
12059 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12060 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
12062 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12063 (__v16sf) __B,
12064 (__v16si) __C,
12065 __imm,
12066 (__mmask16) -1,
12067 _MM_FROUND_CUR_DIRECTION);
12070 extern __inline __m512
12071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
12073 __m512i __C, const int __imm)
12075 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
12076 (__v16sf) __B,
12077 (__v16si) __C,
12078 __imm,
12079 (__mmask16) __U,
12080 _MM_FROUND_CUR_DIRECTION);
12083 extern __inline __m512
12084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12085 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
12086 __m512i __C, const int __imm)
12088 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
12089 (__v16sf) __B,
12090 (__v16si) __C,
12091 __imm,
12092 (__mmask16) __U,
12093 _MM_FROUND_CUR_DIRECTION);
12096 extern __inline __m128d
12097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
12100 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12101 (__v2df) __B,
12102 (__v2di) __C, __imm,
12103 (__mmask8) -1,
12104 _MM_FROUND_CUR_DIRECTION);
12107 extern __inline __m128d
12108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12109 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
12110 __m128i __C, const int __imm)
12112 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
12113 (__v2df) __B,
12114 (__v2di) __C, __imm,
12115 (__mmask8) __U,
12116 _MM_FROUND_CUR_DIRECTION);
12119 extern __inline __m128d
12120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12121 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
12122 __m128i __C, const int __imm)
12124 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
12125 (__v2df) __B,
12126 (__v2di) __C,
12127 __imm,
12128 (__mmask8) __U,
12129 _MM_FROUND_CUR_DIRECTION);
12132 extern __inline __m128
12133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
12136 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12137 (__v4sf) __B,
12138 (__v4si) __C, __imm,
12139 (__mmask8) -1,
12140 _MM_FROUND_CUR_DIRECTION);
12143 extern __inline __m128
12144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
12146 __m128i __C, const int __imm)
12148 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
12149 (__v4sf) __B,
12150 (__v4si) __C, __imm,
12151 (__mmask8) __U,
12152 _MM_FROUND_CUR_DIRECTION);
12155 extern __inline __m128
12156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12157 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
12158 __m128i __C, const int __imm)
12160 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
12161 (__v4sf) __B,
12162 (__v4si) __C, __imm,
12163 (__mmask8) __U,
12164 _MM_FROUND_CUR_DIRECTION);
12166 #else
12167 #define _mm512_fixupimm_pd(X, Y, Z, C) \
12168 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12169 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12170 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12172 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
12173 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
12174 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12175 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12177 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
12178 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
12179 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
12180 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12182 #define _mm512_fixupimm_ps(X, Y, Z, C) \
12183 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12184 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12185 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12187 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
12188 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
12189 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12190 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12192 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
12193 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12194 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12195 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12197 #define _mm_fixupimm_sd(X, Y, Z, C) \
12198 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12199 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12200 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12202 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12203 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12204 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12205 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12207 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12208 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12209 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12210 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12212 #define _mm_fixupimm_ss(X, Y, Z, C) \
12213 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12214 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12215 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12217 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12218 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12219 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12220 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12222 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12223 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12224 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12225 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12226 #endif
12228 #ifdef __x86_64__
12229 extern __inline unsigned long long
12230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12231 _mm_cvtss_u64 (__m128 __A)
12233 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12234 __A,
12235 _MM_FROUND_CUR_DIRECTION);
12238 extern __inline unsigned long long
12239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12240 _mm_cvttss_u64 (__m128 __A)
12242 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12243 __A,
12244 _MM_FROUND_CUR_DIRECTION);
12247 extern __inline long long
12248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12249 _mm_cvttss_i64 (__m128 __A)
12251 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12252 _MM_FROUND_CUR_DIRECTION);
12254 #endif /* __x86_64__ */
12256 extern __inline unsigned
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm_cvtss_u32 (__m128 __A)
12260 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12261 _MM_FROUND_CUR_DIRECTION);
12264 extern __inline unsigned
12265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12266 _mm_cvttss_u32 (__m128 __A)
12268 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12269 _MM_FROUND_CUR_DIRECTION);
12272 extern __inline int
12273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12274 _mm_cvttss_i32 (__m128 __A)
12276 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12277 _MM_FROUND_CUR_DIRECTION);
12280 #ifdef __x86_64__
12281 extern __inline unsigned long long
12282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12283 _mm_cvtsd_u64 (__m128d __A)
12285 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12286 __A,
12287 _MM_FROUND_CUR_DIRECTION);
12290 extern __inline unsigned long long
12291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292 _mm_cvttsd_u64 (__m128d __A)
12294 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12295 __A,
12296 _MM_FROUND_CUR_DIRECTION);
12299 extern __inline long long
12300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12301 _mm_cvttsd_i64 (__m128d __A)
12303 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12304 _MM_FROUND_CUR_DIRECTION);
12306 #endif /* __x86_64__ */
12308 extern __inline unsigned
12309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310 _mm_cvtsd_u32 (__m128d __A)
12312 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12313 _MM_FROUND_CUR_DIRECTION);
12316 extern __inline unsigned
12317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12318 _mm_cvttsd_u32 (__m128d __A)
12320 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12321 _MM_FROUND_CUR_DIRECTION);
12324 extern __inline int
12325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12326 _mm_cvttsd_i32 (__m128d __A)
12328 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12329 _MM_FROUND_CUR_DIRECTION);
12332 extern __inline __m512d
12333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12334 _mm512_cvtps_pd (__m256 __A)
12336 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12337 (__v8df)
12338 _mm512_undefined_pd (),
12339 (__mmask8) -1,
12340 _MM_FROUND_CUR_DIRECTION);
12343 extern __inline __m512d
12344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12345 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12347 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12348 (__v8df) __W,
12349 (__mmask8) __U,
12350 _MM_FROUND_CUR_DIRECTION);
12353 extern __inline __m512d
12354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12357 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12358 (__v8df)
12359 _mm512_setzero_pd (),
12360 (__mmask8) __U,
12361 _MM_FROUND_CUR_DIRECTION);
12364 extern __inline __m512
12365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12366 _mm512_cvtph_ps (__m256i __A)
12368 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12369 (__v16sf)
12370 _mm512_undefined_ps (),
12371 (__mmask16) -1,
12372 _MM_FROUND_CUR_DIRECTION);
12375 extern __inline __m512
12376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12377 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12379 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12380 (__v16sf) __W,
12381 (__mmask16) __U,
12382 _MM_FROUND_CUR_DIRECTION);
12385 extern __inline __m512
12386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12387 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12389 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12390 (__v16sf)
12391 _mm512_setzero_ps (),
12392 (__mmask16) __U,
12393 _MM_FROUND_CUR_DIRECTION);
12396 extern __inline __m256
12397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12398 _mm512_cvtpd_ps (__m512d __A)
12400 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12401 (__v8sf)
12402 _mm256_undefined_ps (),
12403 (__mmask8) -1,
12404 _MM_FROUND_CUR_DIRECTION);
12407 extern __inline __m256
12408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12411 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12412 (__v8sf) __W,
12413 (__mmask8) __U,
12414 _MM_FROUND_CUR_DIRECTION);
12417 extern __inline __m256
12418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12419 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12421 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12422 (__v8sf)
12423 _mm256_setzero_ps (),
12424 (__mmask8) __U,
12425 _MM_FROUND_CUR_DIRECTION);
12428 #ifdef __OPTIMIZE__
12429 extern __inline __m512
12430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12431 _mm512_getexp_ps (__m512 __A)
12433 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12434 (__v16sf)
12435 _mm512_undefined_ps (),
12436 (__mmask16) -1,
12437 _MM_FROUND_CUR_DIRECTION);
12440 extern __inline __m512
12441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12442 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12444 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12445 (__v16sf) __W,
12446 (__mmask16) __U,
12447 _MM_FROUND_CUR_DIRECTION);
12450 extern __inline __m512
12451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12452 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12454 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12455 (__v16sf)
12456 _mm512_setzero_ps (),
12457 (__mmask16) __U,
12458 _MM_FROUND_CUR_DIRECTION);
12461 extern __inline __m512d
12462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12463 _mm512_getexp_pd (__m512d __A)
12465 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12466 (__v8df)
12467 _mm512_undefined_pd (),
12468 (__mmask8) -1,
12469 _MM_FROUND_CUR_DIRECTION);
12472 extern __inline __m512d
12473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12474 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12476 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12477 (__v8df) __W,
12478 (__mmask8) __U,
12479 _MM_FROUND_CUR_DIRECTION);
12482 extern __inline __m512d
12483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12484 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12486 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12487 (__v8df)
12488 _mm512_setzero_pd (),
12489 (__mmask8) __U,
12490 _MM_FROUND_CUR_DIRECTION);
12493 extern __inline __m128
12494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12495 _mm_getexp_ss (__m128 __A, __m128 __B)
12497 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12498 (__v4sf) __B,
12499 _MM_FROUND_CUR_DIRECTION);
12502 extern __inline __m128d
12503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12504 _mm_getexp_sd (__m128d __A, __m128d __B)
12506 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12507 (__v2df) __B,
12508 _MM_FROUND_CUR_DIRECTION);
12511 extern __inline __m512d
12512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12513 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12514 _MM_MANTISSA_SIGN_ENUM __C)
12516 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12517 (__C << 2) | __B,
12518 _mm512_undefined_pd (),
12519 (__mmask8) -1,
12520 _MM_FROUND_CUR_DIRECTION);
12523 extern __inline __m512d
12524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12525 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12526 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12528 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12529 (__C << 2) | __B,
12530 (__v8df) __W, __U,
12531 _MM_FROUND_CUR_DIRECTION);
12534 extern __inline __m512d
12535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12536 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12537 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12539 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12540 (__C << 2) | __B,
12541 (__v8df)
12542 _mm512_setzero_pd (),
12543 __U,
12544 _MM_FROUND_CUR_DIRECTION);
12547 extern __inline __m512
12548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12549 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12550 _MM_MANTISSA_SIGN_ENUM __C)
12552 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12553 (__C << 2) | __B,
12554 _mm512_undefined_ps (),
12555 (__mmask16) -1,
12556 _MM_FROUND_CUR_DIRECTION);
12559 extern __inline __m512
12560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12561 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12562 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12564 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12565 (__C << 2) | __B,
12566 (__v16sf) __W, __U,
12567 _MM_FROUND_CUR_DIRECTION);
12570 extern __inline __m512
12571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12572 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12573 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12575 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12576 (__C << 2) | __B,
12577 (__v16sf)
12578 _mm512_setzero_ps (),
12579 __U,
12580 _MM_FROUND_CUR_DIRECTION);
12583 extern __inline __m128d
12584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12585 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12586 _MM_MANTISSA_SIGN_ENUM __D)
12588 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12589 (__v2df) __B,
12590 (__D << 2) | __C,
12591 _MM_FROUND_CUR_DIRECTION);
12594 extern __inline __m128
12595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12596 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12597 _MM_MANTISSA_SIGN_ENUM __D)
12599 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12600 (__v4sf) __B,
12601 (__D << 2) | __C,
12602 _MM_FROUND_CUR_DIRECTION);
12605 #else
12606 #define _mm512_getmant_pd(X, B, C) \
12607 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12608 (int)(((C)<<2) | (B)), \
12609 (__v8df)_mm512_undefined_pd(), \
12610 (__mmask8)-1,\
12611 _MM_FROUND_CUR_DIRECTION))
12613 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12614 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12615 (int)(((C)<<2) | (B)), \
12616 (__v8df)(__m512d)(W), \
12617 (__mmask8)(U),\
12618 _MM_FROUND_CUR_DIRECTION))
12620 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12621 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12622 (int)(((C)<<2) | (B)), \
12623 (__v8df)_mm512_setzero_pd(), \
12624 (__mmask8)(U),\
12625 _MM_FROUND_CUR_DIRECTION))
12626 #define _mm512_getmant_ps(X, B, C) \
12627 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12628 (int)(((C)<<2) | (B)), \
12629 (__v16sf)_mm512_undefined_ps(), \
12630 (__mmask16)-1,\
12631 _MM_FROUND_CUR_DIRECTION))
12633 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12634 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12635 (int)(((C)<<2) | (B)), \
12636 (__v16sf)(__m512)(W), \
12637 (__mmask16)(U),\
12638 _MM_FROUND_CUR_DIRECTION))
12640 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12641 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12642 (int)(((C)<<2) | (B)), \
12643 (__v16sf)_mm512_setzero_ps(), \
12644 (__mmask16)(U),\
12645 _MM_FROUND_CUR_DIRECTION))
12646 #define _mm_getmant_sd(X, Y, C, D) \
12647 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12648 (__v2df)(__m128d)(Y), \
12649 (int)(((D)<<2) | (C)), \
12650 _MM_FROUND_CUR_DIRECTION))
12652 #define _mm_getmant_ss(X, Y, C, D) \
12653 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12654 (__v4sf)(__m128)(Y), \
12655 (int)(((D)<<2) | (C)), \
12656 _MM_FROUND_CUR_DIRECTION))
12658 #define _mm_getexp_ss(A, B) \
12659 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12660 _MM_FROUND_CUR_DIRECTION))
12662 #define _mm_getexp_sd(A, B) \
12663 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12664 _MM_FROUND_CUR_DIRECTION))
12666 #define _mm512_getexp_ps(A) \
12667 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12668 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12670 #define _mm512_mask_getexp_ps(W, U, A) \
12671 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12672 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12674 #define _mm512_maskz_getexp_ps(U, A) \
12675 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12676 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12678 #define _mm512_getexp_pd(A) \
12679 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12680 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12682 #define _mm512_mask_getexp_pd(W, U, A) \
12683 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12684 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12686 #define _mm512_maskz_getexp_pd(U, A) \
12687 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12688 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12689 #endif
12691 #ifdef __OPTIMIZE__
12692 extern __inline __m512
12693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12694 _mm512_roundscale_ps (__m512 __A, const int __imm)
12696 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12697 (__v16sf)
12698 _mm512_undefined_ps (),
12700 _MM_FROUND_CUR_DIRECTION);
12703 extern __inline __m512
12704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12706 const int __imm)
12708 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12709 (__v16sf) __A,
12710 (__mmask16) __B,
12711 _MM_FROUND_CUR_DIRECTION);
12714 extern __inline __m512
12715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12716 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12718 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12719 __imm,
12720 (__v16sf)
12721 _mm512_setzero_ps (),
12722 (__mmask16) __A,
12723 _MM_FROUND_CUR_DIRECTION);
12726 extern __inline __m512d
12727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12728 _mm512_roundscale_pd (__m512d __A, const int __imm)
12730 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12731 (__v8df)
12732 _mm512_undefined_pd (),
12734 _MM_FROUND_CUR_DIRECTION);
12737 extern __inline __m512d
12738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12739 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12740 const int __imm)
12742 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12743 (__v8df) __A,
12744 (__mmask8) __B,
12745 _MM_FROUND_CUR_DIRECTION);
12748 extern __inline __m512d
12749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12750 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12752 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12753 __imm,
12754 (__v8df)
12755 _mm512_setzero_pd (),
12756 (__mmask8) __A,
12757 _MM_FROUND_CUR_DIRECTION);
12760 extern __inline __m128
12761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12762 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12764 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12765 (__v4sf) __B, __imm,
12766 _MM_FROUND_CUR_DIRECTION);
12769 extern __inline __m128d
12770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12771 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12773 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12774 (__v2df) __B, __imm,
12775 _MM_FROUND_CUR_DIRECTION);
12778 #else
12779 #define _mm512_roundscale_ps(A, B) \
12780 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12781 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12782 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12783 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12784 (int)(D), \
12785 (__v16sf)(__m512)(A), \
12786 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12787 #define _mm512_maskz_roundscale_ps(A, B, C) \
12788 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12789 (int)(C), \
12790 (__v16sf)_mm512_setzero_ps(),\
12791 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12792 #define _mm512_roundscale_pd(A, B) \
12793 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12794 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12795 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12796 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12797 (int)(D), \
12798 (__v8df)(__m512d)(A), \
12799 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12800 #define _mm512_maskz_roundscale_pd(A, B, C) \
12801 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12802 (int)(C), \
12803 (__v8df)_mm512_setzero_pd(),\
12804 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12805 #define _mm_roundscale_ss(A, B, C) \
12806 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12807 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12808 #define _mm_roundscale_sd(A, B, C) \
12809 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12810 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12811 #endif
12813 #ifdef __OPTIMIZE__
12814 extern __inline __mmask8
12815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12816 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12818 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12819 (__v8df) __Y, __P,
12820 (__mmask8) -1,
12821 _MM_FROUND_CUR_DIRECTION);
12824 extern __inline __mmask16
12825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12826 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12828 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12829 (__v16sf) __Y, __P,
12830 (__mmask16) -1,
12831 _MM_FROUND_CUR_DIRECTION);
12834 extern __inline __mmask16
12835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12836 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12838 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12839 (__v16sf) __Y, __P,
12840 (__mmask16) __U,
12841 _MM_FROUND_CUR_DIRECTION);
12844 extern __inline __mmask8
12845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12846 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12848 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12849 (__v8df) __Y, __P,
12850 (__mmask8) __U,
12851 _MM_FROUND_CUR_DIRECTION);
12854 extern __inline __mmask8
12855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12856 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12858 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12859 (__v2df) __Y, __P,
12860 (__mmask8) -1,
12861 _MM_FROUND_CUR_DIRECTION);
12864 extern __inline __mmask8
12865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12866 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12868 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12869 (__v2df) __Y, __P,
12870 (__mmask8) __M,
12871 _MM_FROUND_CUR_DIRECTION);
12874 extern __inline __mmask8
12875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12876 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12878 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12879 (__v4sf) __Y, __P,
12880 (__mmask8) -1,
12881 _MM_FROUND_CUR_DIRECTION);
12884 extern __inline __mmask8
12885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12886 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12888 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12889 (__v4sf) __Y, __P,
12890 (__mmask8) __M,
12891 _MM_FROUND_CUR_DIRECTION);
12894 #else
12895 #define _mm512_cmp_pd_mask(X, Y, P) \
12896 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12897 (__v8df)(__m512d)(Y), (int)(P),\
12898 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12900 #define _mm512_cmp_ps_mask(X, Y, P) \
12901 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12902 (__v16sf)(__m512)(Y), (int)(P),\
12903 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12905 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12906 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12907 (__v8df)(__m512d)(Y), (int)(P),\
12908 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12910 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12911 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12912 (__v16sf)(__m512)(Y), (int)(P),\
12913 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12915 #define _mm_cmp_sd_mask(X, Y, P) \
12916 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12917 (__v2df)(__m128d)(Y), (int)(P),\
12918 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12920 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12921 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12922 (__v2df)(__m128d)(Y), (int)(P),\
12923 M,_MM_FROUND_CUR_DIRECTION))
12925 #define _mm_cmp_ss_mask(X, Y, P) \
12926 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12927 (__v4sf)(__m128)(Y), (int)(P), \
12928 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12930 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12931 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12932 (__v4sf)(__m128)(Y), (int)(P), \
12933 M,_MM_FROUND_CUR_DIRECTION))
12934 #endif
12936 extern __inline __mmask16
12937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12938 _mm512_kmov (__mmask16 __A)
12940 return __builtin_ia32_kmov16 (__A);
12943 extern __inline __m512
12944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12945 _mm512_castpd_ps (__m512d __A)
12947 return (__m512) (__A);
12950 extern __inline __m512i
12951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12952 _mm512_castpd_si512 (__m512d __A)
12954 return (__m512i) (__A);
12957 extern __inline __m512d
12958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12959 _mm512_castps_pd (__m512 __A)
12961 return (__m512d) (__A);
12964 extern __inline __m512i
12965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12966 _mm512_castps_si512 (__m512 __A)
12968 return (__m512i) (__A);
12971 extern __inline __m512
12972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12973 _mm512_castsi512_ps (__m512i __A)
12975 return (__m512) (__A);
12978 extern __inline __m512d
12979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12980 _mm512_castsi512_pd (__m512i __A)
12982 return (__m512d) (__A);
12985 extern __inline __m128d
12986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12987 _mm512_castpd512_pd128 (__m512d __A)
12989 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
12992 extern __inline __m128
12993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12994 _mm512_castps512_ps128 (__m512 __A)
12996 return _mm512_extractf32x4_ps(__A, 0);
12999 extern __inline __m128i
13000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13001 _mm512_castsi512_si128 (__m512i __A)
13003 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
13006 extern __inline __m256d
13007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13008 _mm512_castpd512_pd256 (__m512d __A)
13010 return _mm512_extractf64x4_pd(__A, 0);
13013 extern __inline __m256
13014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13015 _mm512_castps512_ps256 (__m512 __A)
13017 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
13020 extern __inline __m256i
13021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13022 _mm512_castsi512_si256 (__m512i __A)
13024 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
13027 extern __inline __m512d
13028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13029 _mm512_castpd128_pd512 (__m128d __A)
13031 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
13034 extern __inline __m512
13035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13036 _mm512_castps128_ps512 (__m128 __A)
13038 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
13041 extern __inline __m512i
13042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13043 _mm512_castsi128_si512 (__m128i __A)
13045 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
13048 extern __inline __m512d
13049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13050 _mm512_castpd256_pd512 (__m256d __A)
13052 return __builtin_ia32_pd512_256pd (__A);
13055 extern __inline __m512
13056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13057 _mm512_castps256_ps512 (__m256 __A)
13059 return __builtin_ia32_ps512_256ps (__A);
13062 extern __inline __m512i
13063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13064 _mm512_castsi256_si512 (__m256i __A)
13066 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
13069 extern __inline __mmask16
13070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13071 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
13073 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13074 (__v16si) __B, 0,
13075 (__mmask16) -1);
13078 extern __inline __mmask16
13079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13080 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13082 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13083 (__v16si) __B, 0, __U);
13086 extern __inline __mmask8
13087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13088 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13090 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13091 (__v8di) __B, 0, __U);
13094 extern __inline __mmask8
13095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13096 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
13098 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13099 (__v8di) __B, 0,
13100 (__mmask8) -1);
13103 extern __inline __mmask16
13104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13105 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
13107 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13108 (__v16si) __B, 6,
13109 (__mmask16) -1);
13112 extern __inline __mmask16
13113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13114 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
13116 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
13117 (__v16si) __B, 6, __U);
13120 extern __inline __mmask8
13121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13122 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
13124 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13125 (__v8di) __B, 6, __U);
13128 extern __inline __mmask8
13129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13130 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
13132 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
13133 (__v8di) __B, 6,
13134 (__mmask8) -1);
13137 #ifdef __DISABLE_AVX512F__
13138 #undef __DISABLE_AVX512F__
13139 #pragma GCC pop_options
13140 #endif /* __DISABLE_AVX512F__ */
13142 #endif /* _AVX512FINTRIN_H_INCLUDED */