Merge from trunk
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blob422e83b46f9b0689675c0a0f50a5aa55c3997f97
1 /* Copyright (C) 2013-2014 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #ifndef __AVX512F__
32 #pragma GCC push_options
33 #pragma GCC target("avx512f")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
41 typedef int __v16si __attribute__ ((__vector_size__ (64)));
43 /* The Intel API is flexible enough that we must allow aliasing with other
44 vector types, and their scalar components. */
45 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
46 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
47 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
49 typedef unsigned char __mmask8;
50 typedef unsigned short __mmask16;
52 extern __inline __m512i
53 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
54 _mm512_set_epi64 (long long __A, long long __B, long long __C,
55 long long __D, long long __E, long long __F,
56 long long __G, long long __H)
58 return __extension__ (__m512i) (__v8di)
59 { __H, __G, __F, __E, __D, __C, __B, __A };
62 /* Create the vector [A B C D E F G H I J K L M N O P]. */
63 extern __inline __m512i
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
66 int __E, int __F, int __G, int __H,
67 int __I, int __J, int __K, int __L,
68 int __M, int __N, int __O, int __P)
70 return __extension__ (__m512i)(__v16si)
71 { __P, __O, __N, __M, __L, __K, __J, __I,
72 __H, __G, __F, __E, __D, __C, __B, __A };
75 extern __inline __m512d
76 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
77 _mm512_set_pd (double __A, double __B, double __C, double __D,
78 double __E, double __F, double __G, double __H)
80 return __extension__ (__m512d)
81 { __H, __G, __F, __E, __D, __C, __B, __A };
84 extern __inline __m512
85 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
86 _mm512_set_ps (float __A, float __B, float __C, float __D,
87 float __E, float __F, float __G, float __H,
88 float __I, float __J, float __K, float __L,
89 float __M, float __N, float __O, float __P)
91 return __extension__ (__m512)
92 { __P, __O, __N, __M, __L, __K, __J, __I,
93 __H, __G, __F, __E, __D, __C, __B, __A };
96 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
97 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
99 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
100 e8,e9,e10,e11,e12,e13,e14,e15) \
101 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
103 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
104 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
106 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
107 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
109 extern __inline __m512
110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
111 _mm512_setzero_ps (void)
113 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
114 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
117 extern __inline __m512d
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 _mm512_setzero_pd (void)
121 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
124 extern __inline __m512i
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm512_setzero_si512 (void)
128 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
131 extern __inline __m512d
132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
133 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
135 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
136 (__v8df) __W,
137 (__mmask8) __U);
140 extern __inline __m512d
141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
142 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
144 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
145 (__v8df)
146 _mm512_setzero_pd (),
147 (__mmask8) __U);
150 extern __inline __m512
151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
152 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
154 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
155 (__v16sf) __W,
156 (__mmask16) __U);
159 extern __inline __m512
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
163 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
164 (__v16sf)
165 _mm512_setzero_ps (),
166 (__mmask16) __U);
169 extern __inline __m512d
170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
171 _mm512_load_pd (void const *__P)
173 return *(__m512d *) __P;
176 extern __inline __m512d
177 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
178 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
180 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
181 (__v8df) __W,
182 (__mmask8) __U);
185 extern __inline __m512d
186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
187 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
189 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
190 (__v8df)
191 _mm512_setzero_pd (),
192 (__mmask8) __U);
195 extern __inline void
196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
197 _mm512_store_pd (void *__P, __m512d __A)
199 *(__m512d *) __P = __A;
202 extern __inline void
203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
204 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
206 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
207 (__mmask8) __U);
210 extern __inline __m512
211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212 _mm512_load_ps (void const *__P)
214 return *(__m512 *) __P;
217 extern __inline __m512
218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
219 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
221 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
222 (__v16sf) __W,
223 (__mmask16) __U);
226 extern __inline __m512
227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
228 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
230 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
231 (__v16sf)
232 _mm512_setzero_ps (),
233 (__mmask16) __U);
236 extern __inline void
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm512_store_ps (void *__P, __m512 __A)
240 *(__m512 *) __P = __A;
243 extern __inline void
244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
245 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
247 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
248 (__mmask16) __U);
251 extern __inline __m512i
252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
253 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
255 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
256 (__v8di) __W,
257 (__mmask8) __U);
260 extern __inline __m512i
261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
262 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
264 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
265 (__v8di)
266 _mm512_setzero_si512 (),
267 (__mmask8) __U);
270 extern __inline __m512i
271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272 _mm512_load_epi64 (void const *__P)
274 return *(__m512i *) __P;
277 extern __inline __m512i
278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
279 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
281 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
282 (__v8di) __W,
283 (__mmask8) __U);
286 extern __inline __m512i
287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
288 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
290 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
291 (__v8di)
292 _mm512_setzero_si512 (),
293 (__mmask8) __U);
296 extern __inline void
297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
298 _mm512_store_epi64 (void *__P, __m512i __A)
300 *(__m512i *) __P = __A;
303 extern __inline void
304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
305 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
307 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
308 (__mmask8) __U);
311 extern __inline __m512i
312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
313 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
315 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
316 (__v16si) __W,
317 (__mmask16) __U);
320 extern __inline __m512i
321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
322 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
324 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
325 (__v16si)
326 _mm512_setzero_si512 (),
327 (__mmask16) __U);
330 extern __inline __m512i
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm512_load_si512 (void const *__P)
334 return *(__m512i *) __P;
337 extern __inline __m512i
338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
339 _mm512_load_epi32 (void const *__P)
341 return *(__m512i *) __P;
344 extern __inline __m512i
345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
346 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
348 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
349 (__v16si) __W,
350 (__mmask16) __U);
353 extern __inline __m512i
354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
355 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
357 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
358 (__v16si)
359 _mm512_setzero_si512 (),
360 (__mmask16) __U);
363 extern __inline void
364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
365 _mm512_store_si512 (void *__P, __m512i __A)
367 *(__m512i *) __P = __A;
370 extern __inline void
371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
372 _mm512_store_epi32 (void *__P, __m512i __A)
374 *(__m512i *) __P = __A;
377 extern __inline void
378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
379 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
381 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
382 (__mmask16) __U);
385 extern __inline __m512i
386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
389 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
390 (__v16si) __B,
391 (__v16si)
392 _mm512_setzero_si512 (),
393 (__mmask16) -1);
396 extern __inline __m512i
397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
398 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
400 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
401 (__v16si) __B,
402 (__v16si)
403 _mm512_setzero_si512 (),
404 __M);
407 extern __inline __m512i
408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
409 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
411 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
412 (__v16si) __B,
413 (__v16si) __W, __M);
416 extern __inline __m512i
417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
418 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
420 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
421 (__v16si) __Y,
422 (__v16si)
423 _mm512_setzero_si512 (),
424 (__mmask16) -1);
427 extern __inline __m512i
428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
429 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
431 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
432 (__v16si) __Y,
433 (__v16si) __W,
434 (__mmask16) __U);
437 extern __inline __m512i
438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
439 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
441 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
442 (__v16si) __Y,
443 (__v16si)
444 _mm512_setzero_si512 (),
445 (__mmask16) __U);
448 extern __inline __m512i
449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
450 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
452 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
453 (__v16si) __Y,
454 (__v16si)
455 _mm512_setzero_si512 (),
456 (__mmask16) -1);
459 extern __inline __m512i
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
463 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
464 (__v16si) __Y,
465 (__v16si) __W,
466 (__mmask16) __U);
469 extern __inline __m512i
470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
471 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
473 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
474 (__v16si) __Y,
475 (__v16si)
476 _mm512_setzero_si512 (),
477 (__mmask16) __U);
480 extern __inline __m512i
481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
482 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
484 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
485 (__v16si) __Y,
486 (__v16si)
487 _mm512_setzero_si512 (),
488 (__mmask16) -1);
491 extern __inline __m512i
492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
493 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
495 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
496 (__v16si) __Y,
497 (__v16si) __W,
498 (__mmask16) __U);
501 extern __inline __m512i
502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
503 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
505 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
506 (__v16si) __Y,
507 (__v16si)
508 _mm512_setzero_si512 (),
509 (__mmask16) __U);
512 extern __inline __m512i
513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
514 _mm512_add_epi64 (__m512i __A, __m512i __B)
516 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
517 (__v8di) __B,
518 (__v8di)
519 _mm512_setzero_si512 (),
520 (__mmask8) -1);
523 extern __inline __m512i
524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
525 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
527 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
528 (__v8di) __B,
529 (__v8di) __W,
530 (__mmask8) __U);
533 extern __inline __m512i
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
537 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
538 (__v8di) __B,
539 (__v8di)
540 _mm512_setzero_si512 (),
541 (__mmask8) __U);
544 extern __inline __m512i
545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
546 _mm512_sub_epi64 (__m512i __A, __m512i __B)
548 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
549 (__v8di) __B,
550 (__v8di)
551 _mm512_setzero_pd (),
552 (__mmask8) -1);
555 extern __inline __m512i
556 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
557 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
559 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
560 (__v8di) __B,
561 (__v8di) __W,
562 (__mmask8) __U);
565 extern __inline __m512i
566 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
567 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
569 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
570 (__v8di) __B,
571 (__v8di)
572 _mm512_setzero_si512 (),
573 (__mmask8) __U);
576 extern __inline __m512i
577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
578 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
580 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
581 (__v8di) __Y,
582 (__v8di)
583 _mm512_setzero_pd (),
584 (__mmask8) -1);
587 extern __inline __m512i
588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
589 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
591 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
592 (__v8di) __Y,
593 (__v8di) __W,
594 (__mmask8) __U);
597 extern __inline __m512i
598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
599 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
601 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
602 (__v8di) __Y,
603 (__v8di)
604 _mm512_setzero_si512 (),
605 (__mmask8) __U);
608 extern __inline __m512i
609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
612 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
613 (__v8di) __Y,
614 (__v8di)
615 _mm512_setzero_si512 (),
616 (__mmask8) -1);
619 extern __inline __m512i
620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
621 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
623 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
624 (__v8di) __Y,
625 (__v8di) __W,
626 (__mmask8) __U);
629 extern __inline __m512i
630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
631 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
633 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
634 (__v8di) __Y,
635 (__v8di)
636 _mm512_setzero_si512 (),
637 (__mmask8) __U);
640 extern __inline __m512i
641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
642 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
644 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
645 (__v8di) __Y,
646 (__v8di)
647 _mm512_setzero_si512 (),
648 (__mmask8) -1);
651 extern __inline __m512i
652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
653 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
655 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
656 (__v8di) __Y,
657 (__v8di) __W,
658 (__mmask8) __U);
661 extern __inline __m512i
662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
663 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
665 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
666 (__v8di) __Y,
667 (__v8di)
668 _mm512_setzero_si512 (),
669 (__mmask8) __U);
672 extern __inline __m512i
673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674 _mm512_add_epi32 (__m512i __A, __m512i __B)
676 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
677 (__v16si) __B,
678 (__v16si)
679 _mm512_setzero_si512 (),
680 (__mmask16) -1);
683 extern __inline __m512i
684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
687 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
688 (__v16si) __B,
689 (__v16si) __W,
690 (__mmask16) __U);
693 extern __inline __m512i
694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
695 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
697 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
698 (__v16si) __B,
699 (__v16si)
700 _mm512_setzero_si512 (),
701 (__mmask16) __U);
704 extern __inline __m512i
705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
708 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
709 (__v16si) __Y,
710 (__v8di)
711 _mm512_setzero_si512 (),
712 (__mmask8) -1);
715 extern __inline __m512i
716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
717 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
719 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
720 (__v16si) __Y,
721 (__v8di) __W, __M);
724 extern __inline __m512i
725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
726 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
728 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
729 (__v16si) __Y,
730 (__v8di)
731 _mm512_setzero_si512 (),
732 __M);
735 extern __inline __m512i
736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
737 _mm512_sub_epi32 (__m512i __A, __m512i __B)
739 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
740 (__v16si) __B,
741 (__v16si)
742 _mm512_setzero_si512 (),
743 (__mmask16) -1);
746 extern __inline __m512i
747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
750 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
751 (__v16si) __B,
752 (__v16si) __W,
753 (__mmask16) __U);
756 extern __inline __m512i
757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
758 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
760 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
761 (__v16si) __B,
762 (__v16si)
763 _mm512_setzero_si512 (),
764 (__mmask16) __U);
767 extern __inline __m512i
768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
771 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
772 (__v16si) __Y,
773 (__v8di)
774 _mm512_setzero_si512 (),
775 (__mmask8) -1);
778 extern __inline __m512i
779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
780 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
782 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
783 (__v16si) __Y,
784 (__v8di) __W, __M);
787 extern __inline __m512i
788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
789 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
791 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
792 (__v16si) __Y,
793 (__v8di)
794 _mm512_setzero_si512 (),
795 __M);
798 #ifdef __OPTIMIZE__
799 extern __inline __m512i
800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
801 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
803 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
804 (__v8di)
805 _mm512_setzero_si512 (),
806 (__mmask8) -1);
809 extern __inline __m512i
810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
811 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
812 unsigned int __B)
814 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
815 (__v8di) __W,
816 (__mmask8) __U);
819 extern __inline __m512i
820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
821 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
823 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
824 (__v8di)
825 _mm512_setzero_si512 (),
826 (__mmask8) __U);
828 #else
829 #define _mm512_slli_epi64(X, C) \
830 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
831 (__v8di)(__m512i)_mm512_setzero_si512 (),\
832 (__mmask8)-1))
834 #define _mm512_mask_slli_epi64(W, U, X, C) \
835 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
836 (__v8di)(__m512i)(W),\
837 (__mmask8)(U)))
839 #define _mm512_maskz_slli_epi64(U, X, C) \
840 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
841 (__v8di)(__m512i)_mm512_setzero_si512 (),\
842 (__mmask8)(U)))
843 #endif
845 extern __inline __m512i
846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
847 _mm512_sll_epi64 (__m512i __A, __m128i __B)
849 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
850 (__v2di) __B,
851 (__v8di)
852 _mm512_setzero_si512 (),
853 (__mmask8) -1);
856 extern __inline __m512i
857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
860 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
861 (__v2di) __B,
862 (__v8di) __W,
863 (__mmask8) __U);
866 extern __inline __m512i
867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
868 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
870 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
871 (__v2di) __B,
872 (__v8di)
873 _mm512_setzero_si512 (),
874 (__mmask8) __U);
877 #ifdef __OPTIMIZE__
878 extern __inline __m512i
879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
880 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
882 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
883 (__v8di)
884 _mm512_setzero_si512 (),
885 (__mmask8) -1);
888 extern __inline __m512i
889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
890 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
891 __m512i __A, unsigned int __B)
893 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
894 (__v8di) __W,
895 (__mmask8) __U);
898 extern __inline __m512i
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
902 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
903 (__v8di)
904 _mm512_setzero_si512 (),
905 (__mmask8) __U);
907 #else
908 #define _mm512_srli_epi64(X, C) \
909 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
910 (__v8di)(__m512i)_mm512_setzero_si512 (),\
911 (__mmask8)-1))
913 #define _mm512_mask_srli_epi64(W, U, X, C) \
914 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
915 (__v8di)(__m512i)(W),\
916 (__mmask8)(U)))
918 #define _mm512_maskz_srli_epi64(U, X, C) \
919 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
920 (__v8di)(__m512i)_mm512_setzero_si512 (),\
921 (__mmask8)(U)))
922 #endif
924 extern __inline __m512i
925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
926 _mm512_srl_epi64 (__m512i __A, __m128i __B)
928 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
929 (__v2di) __B,
930 (__v8di)
931 _mm512_setzero_si512 (),
932 (__mmask8) -1);
935 extern __inline __m512i
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
939 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
940 (__v2di) __B,
941 (__v8di) __W,
942 (__mmask8) __U);
945 extern __inline __m512i
946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
947 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
949 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
950 (__v2di) __B,
951 (__v8di)
952 _mm512_setzero_si512 (),
953 (__mmask8) __U);
956 #ifdef __OPTIMIZE__
957 extern __inline __m512i
958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
961 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
962 (__v8di)
963 _mm512_setzero_si512 (),
964 (__mmask8) -1);
967 extern __inline __m512i
968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
970 unsigned int __B)
972 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
973 (__v8di) __W,
974 (__mmask8) __U);
977 extern __inline __m512i
978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
979 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
981 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
982 (__v8di)
983 _mm512_setzero_si512 (),
984 (__mmask8) __U);
986 #else
987 #define _mm512_srai_epi64(X, C) \
988 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
989 (__v8di)(__m512i)_mm512_setzero_si512 (),\
990 (__mmask8)-1))
992 #define _mm512_mask_srai_epi64(W, U, X, C) \
993 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
994 (__v8di)(__m512i)(W),\
995 (__mmask8)(U)))
997 #define _mm512_maskz_srai_epi64(U, X, C) \
998 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
999 (__v8di)(__m512i)_mm512_setzero_si512 (),\
1000 (__mmask8)(U)))
1001 #endif
1003 extern __inline __m512i
1004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm512_sra_epi64 (__m512i __A, __m128i __B)
1007 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1008 (__v2di) __B,
1009 (__v8di)
1010 _mm512_setzero_si512 (),
1011 (__mmask8) -1);
1014 extern __inline __m512i
1015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
1018 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1019 (__v2di) __B,
1020 (__v8di) __W,
1021 (__mmask8) __U);
1024 extern __inline __m512i
1025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
1028 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
1029 (__v2di) __B,
1030 (__v8di)
1031 _mm512_setzero_si512 (),
1032 (__mmask8) __U);
1035 #ifdef __OPTIMIZE__
1036 extern __inline __m512i
1037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1038 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
1040 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1041 (__v16si)
1042 _mm512_setzero_si512 (),
1043 (__mmask16) -1);
1046 extern __inline __m512i
1047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1048 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1049 unsigned int __B)
1051 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1052 (__v16si) __W,
1053 (__mmask16) __U);
1056 extern __inline __m512i
1057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1058 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1060 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
1061 (__v16si)
1062 _mm512_setzero_si512 (),
1063 (__mmask16) __U);
1065 #else
1066 #define _mm512_slli_epi32(X, C) \
1067 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1068 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1069 (__mmask16)-1))
1071 #define _mm512_mask_slli_epi32(W, U, X, C) \
1072 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1073 (__v16si)(__m512i)(W),\
1074 (__mmask16)(U)))
1076 #define _mm512_maskz_slli_epi32(U, X, C) \
1077 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1078 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1079 (__mmask16)(U)))
1080 #endif
1082 extern __inline __m512i
1083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084 _mm512_sll_epi32 (__m512i __A, __m128i __B)
1086 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1087 (__v4si) __B,
1088 (__v16si)
1089 _mm512_setzero_si512 (),
1090 (__mmask16) -1);
1093 extern __inline __m512i
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1097 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1098 (__v4si) __B,
1099 (__v16si) __W,
1100 (__mmask16) __U);
1103 extern __inline __m512i
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1107 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
1108 (__v4si) __B,
1109 (__v16si)
1110 _mm512_setzero_si512 (),
1111 (__mmask16) __U);
1114 #ifdef __OPTIMIZE__
1115 extern __inline __m512i
1116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1117 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
1119 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1120 (__v16si)
1121 _mm512_setzero_si512 (),
1122 (__mmask16) -1);
1125 extern __inline __m512i
1126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1127 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
1128 __m512i __A, unsigned int __B)
1130 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1131 (__v16si) __W,
1132 (__mmask16) __U);
1135 extern __inline __m512i
1136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1137 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1139 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
1140 (__v16si)
1141 _mm512_setzero_si512 (),
1142 (__mmask16) __U);
1144 #else
1145 #define _mm512_srli_epi32(X, C) \
1146 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1147 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1148 (__mmask16)-1))
1150 #define _mm512_mask_srli_epi32(W, U, X, C) \
1151 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1152 (__v16si)(__m512i)(W),\
1153 (__mmask16)(U)))
1155 #define _mm512_maskz_srli_epi32(U, X, C) \
1156 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1157 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1158 (__mmask16)(U)))
1159 #endif
1161 extern __inline __m512i
1162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1163 _mm512_srl_epi32 (__m512i __A, __m128i __B)
1165 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1166 (__v4si) __B,
1167 (__v16si)
1168 _mm512_setzero_si512 (),
1169 (__mmask16) -1);
1172 extern __inline __m512i
1173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1174 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1176 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1177 (__v4si) __B,
1178 (__v16si) __W,
1179 (__mmask16) __U);
1182 extern __inline __m512i
1183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1184 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1186 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
1187 (__v4si) __B,
1188 (__v16si)
1189 _mm512_setzero_si512 (),
1190 (__mmask16) __U);
1193 #ifdef __OPTIMIZE__
1194 extern __inline __m512i
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
1198 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1199 (__v16si)
1200 _mm512_setzero_si512 (),
1201 (__mmask16) -1);
1204 extern __inline __m512i
1205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1206 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
1207 unsigned int __B)
1209 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1210 (__v16si) __W,
1211 (__mmask16) __U);
1214 extern __inline __m512i
1215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1216 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
1218 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
1219 (__v16si)
1220 _mm512_setzero_si512 (),
1221 (__mmask16) __U);
1223 #else
1224 #define _mm512_srai_epi32(X, C) \
1225 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1226 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1227 (__mmask16)-1))
1229 #define _mm512_mask_srai_epi32(W, U, X, C) \
1230 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1231 (__v16si)(__m512i)(W),\
1232 (__mmask16)(U)))
1234 #define _mm512_maskz_srai_epi32(U, X, C) \
1235 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
1236 (__v16si)(__m512i)_mm512_setzero_si512 (),\
1237 (__mmask16)(U)))
1238 #endif
1240 extern __inline __m512i
1241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1242 _mm512_sra_epi32 (__m512i __A, __m128i __B)
1244 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1245 (__v4si) __B,
1246 (__v16si)
1247 _mm512_setzero_si512 (),
1248 (__mmask16) -1);
1251 extern __inline __m512i
1252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1253 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
1255 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1256 (__v4si) __B,
1257 (__v16si) __W,
1258 (__mmask16) __U);
1261 extern __inline __m512i
1262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1263 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
1265 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
1266 (__v4si) __B,
1267 (__v16si)
1268 _mm512_setzero_si512 (),
1269 (__mmask16) __U);
1272 #ifdef __OPTIMIZE__
1273 extern __inline __m128d
1274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
1277 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
1278 (__v2df) __B,
1279 __R);
1282 extern __inline __m128
1283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1284 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
1286 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
1287 (__v4sf) __B,
1288 __R);
1291 extern __inline __m128d
1292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1293 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
1295 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
1296 (__v2df) __B,
1297 __R);
1300 extern __inline __m128
1301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1302 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
1304 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
1305 (__v4sf) __B,
1306 __R);
1309 #else
1310 #define _mm_add_round_sd(A, B, C) \
1311 (__m128d)__builtin_ia32_addsd_round(A, B, C)
1313 #define _mm_add_round_ss(A, B, C) \
1314 (__m128)__builtin_ia32_addss_round(A, B, C)
1316 #define _mm_sub_round_sd(A, B, C) \
1317 (__m128d)__builtin_ia32_subsd_round(A, B, C)
1319 #define _mm_sub_round_ss(A, B, C) \
1320 (__m128)__builtin_ia32_subss_round(A, B, C)
1321 #endif
1323 #ifdef __OPTIMIZE__
1324 extern __inline __m512i
1325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1326 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1328 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1329 (__v8di) __B,
1330 (__v8di) __C, imm,
1331 (__mmask8) -1);
1334 extern __inline __m512i
1335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1336 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
1337 __m512i __C, const int imm)
1339 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
1340 (__v8di) __B,
1341 (__v8di) __C, imm,
1342 (__mmask8) __U);
1345 extern __inline __m512i
1346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1347 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
1348 __m512i __C, const int imm)
1350 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
1351 (__v8di) __B,
1352 (__v8di) __C,
1353 imm, (__mmask8) __U);
1356 extern __inline __m512i
1357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1358 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
1360 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1361 (__v16si) __B,
1362 (__v16si) __C,
1363 imm, (__mmask16) -1);
1366 extern __inline __m512i
1367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1368 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
1369 __m512i __C, const int imm)
1371 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
1372 (__v16si) __B,
1373 (__v16si) __C,
1374 imm, (__mmask16) __U);
1377 extern __inline __m512i
1378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1379 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
1380 __m512i __C, const int imm)
1382 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
1383 (__v16si) __B,
1384 (__v16si) __C,
1385 imm, (__mmask16) __U);
1387 #else
1388 #define _mm512_ternarylogic_epi64(A, B, C, I) \
1389 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1390 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
1391 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
1392 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
1393 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1394 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
1395 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
1396 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
1397 #define _mm512_ternarylogic_epi32(A, B, C, I) \
1398 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1399 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1400 (__mmask16)-1))
1401 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
1402 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
1403 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1404 (__mmask16)(U)))
1405 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
1406 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
1407 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
1408 (__mmask16)(U)))
1409 #endif
1411 extern __inline __m512d
1412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1413 _mm512_rcp14_pd (__m512d __A)
1415 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1416 (__v8df)
1417 _mm512_setzero_pd (),
1418 (__mmask8) -1);
1421 extern __inline __m512d
1422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1423 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1425 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1426 (__v8df) __W,
1427 (__mmask8) __U);
1430 extern __inline __m512d
1431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1432 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1434 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1435 (__v8df)
1436 _mm512_setzero_pd (),
1437 (__mmask8) __U);
1440 extern __inline __m512
1441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1442 _mm512_rcp14_ps (__m512 __A)
1444 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1445 (__v16sf)
1446 _mm512_setzero_ps (),
1447 (__mmask16) -1);
1450 extern __inline __m512
1451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1452 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1454 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1455 (__v16sf) __W,
1456 (__mmask16) __U);
1459 extern __inline __m512
1460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1461 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1463 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1464 (__v16sf)
1465 _mm512_setzero_ps (),
1466 (__mmask16) __U);
1469 extern __inline __m128d
1470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1471 _mm_rcp14_sd (__m128d __A, __m128d __B)
1473 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
1474 (__v2df) __A);
1477 extern __inline __m128
1478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1479 _mm_rcp14_ss (__m128 __A, __m128 __B)
1481 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
1482 (__v4sf) __A);
1485 extern __inline __m512d
1486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1487 _mm512_rsqrt14_pd (__m512d __A)
1489 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1490 (__v8df)
1491 _mm512_setzero_pd (),
1492 (__mmask8) -1);
1495 extern __inline __m512d
1496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1497 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1499 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1500 (__v8df) __W,
1501 (__mmask8) __U);
1504 extern __inline __m512d
1505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1506 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1508 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1509 (__v8df)
1510 _mm512_setzero_pd (),
1511 (__mmask8) __U);
1514 extern __inline __m512
1515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1516 _mm512_rsqrt14_ps (__m512 __A)
1518 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1519 (__v16sf)
1520 _mm512_setzero_ps (),
1521 (__mmask16) -1);
1524 extern __inline __m512
1525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1526 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1528 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1529 (__v16sf) __W,
1530 (__mmask16) __U);
1533 extern __inline __m512
1534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1537 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1538 (__v16sf)
1539 _mm512_setzero_ps (),
1540 (__mmask16) __U);
1543 extern __inline __m128d
1544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1545 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
1547 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
1548 (__v2df) __A);
1551 extern __inline __m128
1552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
1555 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
1556 (__v4sf) __A);
1559 #ifdef __OPTIMIZE__
1560 extern __inline __m512d
1561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1562 _mm512_sqrt_round_pd (__m512d __A, const int __R)
1564 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1565 (__v8df)
1566 _mm512_setzero_pd (),
1567 (__mmask8) -1, __R);
1570 extern __inline __m512d
1571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1572 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1573 const int __R)
1575 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1576 (__v8df) __W,
1577 (__mmask8) __U, __R);
1580 extern __inline __m512d
1581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1582 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
1584 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1585 (__v8df)
1586 _mm512_setzero_pd (),
1587 (__mmask8) __U, __R);
1590 extern __inline __m512
1591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1592 _mm512_sqrt_round_ps (__m512 __A, const int __R)
1594 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1595 (__v16sf)
1596 _mm512_setzero_ps (),
1597 (__mmask16) -1, __R);
1600 extern __inline __m512
1601 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1602 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
1604 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1605 (__v16sf) __W,
1606 (__mmask16) __U, __R);
1609 extern __inline __m512
1610 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1611 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
1613 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
1614 (__v16sf)
1615 _mm512_setzero_ps (),
1616 (__mmask16) __U, __R);
1619 extern __inline __m128d
1620 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1621 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
1623 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
1624 (__v2df) __A,
1625 __R);
1628 extern __inline __m128
1629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
1632 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
1633 (__v4sf) __A,
1634 __R);
1636 #else
1637 #define _mm512_sqrt_round_pd(A, C) \
1638 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
1640 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
1641 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
1643 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
1644 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
1646 #define _mm512_sqrt_round_ps(A, C) \
1647 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
1649 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
1650 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
1652 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
1653 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
1655 #define _mm_sqrt_round_sd(A, B, C) \
1656 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
1658 #define _mm_sqrt_round_ss(A, B, C) \
1659 (__m128)__builtin_ia32_sqrtss_round(A, B, C)
1660 #endif
1662 extern __inline __m512i
1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664 _mm512_cvtepi8_epi32 (__m128i __A)
1666 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1667 (__v16si)
1668 _mm512_setzero_si512 (),
1669 (__mmask16) -1);
1672 extern __inline __m512i
1673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1674 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1676 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1677 (__v16si) __W,
1678 (__mmask16) __U);
1681 extern __inline __m512i
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
1685 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
1686 (__v16si)
1687 _mm512_setzero_si512 (),
1688 (__mmask16) __U);
1691 extern __inline __m512i
1692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1693 _mm512_cvtepi8_epi64 (__m128i __A)
1695 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1696 (__v8di)
1697 _mm512_setzero_si512 (),
1698 (__mmask8) -1);
1701 extern __inline __m512i
1702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1703 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1705 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1706 (__v8di) __W,
1707 (__mmask8) __U);
1710 extern __inline __m512i
1711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1712 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
1714 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
1715 (__v8di)
1716 _mm512_setzero_si512 (),
1717 (__mmask8) __U);
1720 extern __inline __m512i
1721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1722 _mm512_cvtepi16_epi32 (__m256i __A)
1724 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1725 (__v16si)
1726 _mm512_setzero_si512 (),
1727 (__mmask16) -1);
1730 extern __inline __m512i
1731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1732 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1734 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1735 (__v16si) __W,
1736 (__mmask16) __U);
1739 extern __inline __m512i
1740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1741 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
1743 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
1744 (__v16si)
1745 _mm512_setzero_si512 (),
1746 (__mmask16) __U);
1749 extern __inline __m512i
1750 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1751 _mm512_cvtepi16_epi64 (__m128i __A)
1753 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1754 (__v8di)
1755 _mm512_setzero_si512 (),
1756 (__mmask8) -1);
1759 extern __inline __m512i
1760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1761 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1763 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1764 (__v8di) __W,
1765 (__mmask8) __U);
1768 extern __inline __m512i
1769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1770 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
1772 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
1773 (__v8di)
1774 _mm512_setzero_si512 (),
1775 (__mmask8) __U);
1778 extern __inline __m512i
1779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1780 _mm512_cvtepi32_epi64 (__m256i __X)
1782 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1783 (__v8di)
1784 _mm512_setzero_si512 (),
1785 (__mmask8) -1);
1788 extern __inline __m512i
1789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1790 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1792 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1793 (__v8di) __W,
1794 (__mmask8) __U);
1797 extern __inline __m512i
1798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1799 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
1801 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
1802 (__v8di)
1803 _mm512_setzero_si512 (),
1804 (__mmask8) __U);
1807 extern __inline __m512i
1808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1809 _mm512_cvtepu8_epi32 (__m128i __A)
1811 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1812 (__v16si)
1813 _mm512_setzero_si512 (),
1814 (__mmask16) -1);
1817 extern __inline __m512i
1818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1819 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
1821 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1822 (__v16si) __W,
1823 (__mmask16) __U);
1826 extern __inline __m512i
1827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1828 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
1830 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
1831 (__v16si)
1832 _mm512_setzero_si512 (),
1833 (__mmask16) __U);
1836 extern __inline __m512i
1837 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1838 _mm512_cvtepu8_epi64 (__m128i __A)
1840 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1841 (__v8di)
1842 _mm512_setzero_si512 (),
1843 (__mmask8) -1);
1846 extern __inline __m512i
1847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1848 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1850 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1851 (__v8di) __W,
1852 (__mmask8) __U);
1855 extern __inline __m512i
1856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1857 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
1859 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
1860 (__v8di)
1861 _mm512_setzero_si512 (),
1862 (__mmask8) __U);
1865 extern __inline __m512i
1866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1867 _mm512_cvtepu16_epi32 (__m256i __A)
1869 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1870 (__v16si)
1871 _mm512_setzero_si512 (),
1872 (__mmask16) -1);
1875 extern __inline __m512i
1876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1877 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
1879 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1880 (__v16si) __W,
1881 (__mmask16) __U);
1884 extern __inline __m512i
1885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1886 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
1888 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
1889 (__v16si)
1890 _mm512_setzero_si512 (),
1891 (__mmask16) __U);
1894 extern __inline __m512i
1895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896 _mm512_cvtepu16_epi64 (__m128i __A)
1898 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1899 (__v8di)
1900 _mm512_setzero_si512 (),
1901 (__mmask8) -1);
1904 extern __inline __m512i
1905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
1908 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1909 (__v8di) __W,
1910 (__mmask8) __U);
1913 extern __inline __m512i
1914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1915 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
1917 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
1918 (__v8di)
1919 _mm512_setzero_si512 (),
1920 (__mmask8) __U);
1923 extern __inline __m512i
1924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1925 _mm512_cvtepu32_epi64 (__m256i __X)
1927 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1928 (__v8di)
1929 _mm512_setzero_si512 (),
1930 (__mmask8) -1);
1933 extern __inline __m512i
1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1935 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
1937 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1938 (__v8di) __W,
1939 (__mmask8) __U);
1942 extern __inline __m512i
1943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1944 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
1946 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
1947 (__v8di)
1948 _mm512_setzero_si512 (),
1949 (__mmask8) __U);
1952 #ifdef __OPTIMIZE__
1953 extern __inline __m512d
1954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1955 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
1957 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1958 (__v8df) __B,
1959 (__v8df)
1960 _mm512_setzero_pd (),
1961 (__mmask8) -1, __R);
1964 extern __inline __m512d
1965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
1967 __m512d __B, const int __R)
1969 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1970 (__v8df) __B,
1971 (__v8df) __W,
1972 (__mmask8) __U, __R);
1975 extern __inline __m512d
1976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1977 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
1978 const int __R)
1980 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
1981 (__v8df) __B,
1982 (__v8df)
1983 _mm512_setzero_pd (),
1984 (__mmask8) __U, __R);
1987 extern __inline __m512
1988 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1989 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
1991 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
1992 (__v16sf) __B,
1993 (__v16sf)
1994 _mm512_setzero_ps (),
1995 (__mmask16) -1, __R);
1998 extern __inline __m512
1999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2000 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2001 __m512 __B, const int __R)
2003 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2004 (__v16sf) __B,
2005 (__v16sf) __W,
2006 (__mmask16) __U, __R);
2009 extern __inline __m512
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2013 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
2014 (__v16sf) __B,
2015 (__v16sf)
2016 _mm512_setzero_ps (),
2017 (__mmask16) __U, __R);
2020 extern __inline __m512d
2021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2022 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
2024 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2025 (__v8df) __B,
2026 (__v8df)
2027 _mm512_setzero_pd (),
2028 (__mmask8) -1, __R);
2031 extern __inline __m512d
2032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2033 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2034 __m512d __B, const int __R)
2036 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2037 (__v8df) __B,
2038 (__v8df) __W,
2039 (__mmask8) __U, __R);
2042 extern __inline __m512d
2043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2044 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2045 const int __R)
2047 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
2048 (__v8df) __B,
2049 (__v8df)
2050 _mm512_setzero_pd (),
2051 (__mmask8) __U, __R);
2054 extern __inline __m512
2055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2056 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
2058 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2059 (__v16sf) __B,
2060 (__v16sf)
2061 _mm512_setzero_ps (),
2062 (__mmask16) -1, __R);
2065 extern __inline __m512
2066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2067 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2068 __m512 __B, const int __R)
2070 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2071 (__v16sf) __B,
2072 (__v16sf) __W,
2073 (__mmask16) __U, __R);
2076 extern __inline __m512
2077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2078 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2080 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
2081 (__v16sf) __B,
2082 (__v16sf)
2083 _mm512_setzero_ps (),
2084 (__mmask16) __U, __R);
2086 #else
2087 #define _mm512_add_round_pd(A, B, C) \
2088 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2090 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
2091 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
2093 #define _mm512_maskz_add_round_pd(U, A, B, C) \
2094 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2096 #define _mm512_add_round_ps(A, B, C) \
2097 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2099 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
2100 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
2102 #define _mm512_maskz_add_round_ps(U, A, B, C) \
2103 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2105 #define _mm512_sub_round_pd(A, B, C) \
2106 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2108 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
2109 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
2111 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
2112 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2114 #define _mm512_sub_round_ps(A, B, C) \
2115 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2117 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
2118 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
2120 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
2121 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2122 #endif
2124 #ifdef __OPTIMIZE__
2125 extern __inline __m512d
2126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2127 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
2129 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2130 (__v8df) __B,
2131 (__v8df)
2132 _mm512_setzero_pd (),
2133 (__mmask8) -1, __R);
2136 extern __inline __m512d
2137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2138 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2139 __m512d __B, const int __R)
2141 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2142 (__v8df) __B,
2143 (__v8df) __W,
2144 (__mmask8) __U, __R);
2147 extern __inline __m512d
2148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2149 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2150 const int __R)
2152 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
2153 (__v8df) __B,
2154 (__v8df)
2155 _mm512_setzero_pd (),
2156 (__mmask8) __U, __R);
2159 extern __inline __m512
2160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2161 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
2163 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2164 (__v16sf) __B,
2165 (__v16sf)
2166 _mm512_setzero_ps (),
2167 (__mmask16) -1, __R);
2170 extern __inline __m512
2171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2172 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2173 __m512 __B, const int __R)
2175 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2176 (__v16sf) __B,
2177 (__v16sf) __W,
2178 (__mmask16) __U, __R);
2181 extern __inline __m512
2182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2183 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2185 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
2186 (__v16sf) __B,
2187 (__v16sf)
2188 _mm512_setzero_ps (),
2189 (__mmask16) __U, __R);
2192 extern __inline __m512d
2193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2194 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
2196 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2197 (__v8df) __V,
2198 (__v8df)
2199 _mm512_setzero_pd (),
2200 (__mmask8) -1, __R);
2203 extern __inline __m512d
2204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2205 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
2206 __m512d __V, const int __R)
2208 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2209 (__v8df) __V,
2210 (__v8df) __W,
2211 (__mmask8) __U, __R);
2214 extern __inline __m512d
2215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2216 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
2217 const int __R)
2219 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
2220 (__v8df) __V,
2221 (__v8df)
2222 _mm512_setzero_pd (),
2223 (__mmask8) __U, __R);
2226 extern __inline __m512
2227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
2230 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2231 (__v16sf) __B,
2232 (__v16sf)
2233 _mm512_setzero_ps (),
2234 (__mmask16) -1, __R);
2237 extern __inline __m512
2238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2239 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2240 __m512 __B, const int __R)
2242 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2243 (__v16sf) __B,
2244 (__v16sf) __W,
2245 (__mmask16) __U, __R);
2248 extern __inline __m512
2249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2250 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2252 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
2253 (__v16sf) __B,
2254 (__v16sf)
2255 _mm512_setzero_ps (),
2256 (__mmask16) __U, __R);
2259 extern __inline __m128d
2260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2261 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
2263 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
2264 (__v2df) __B,
2265 __R);
2268 extern __inline __m128
2269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2270 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
2272 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
2273 (__v4sf) __B,
2274 __R);
2277 extern __inline __m128d
2278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2279 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
2281 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
2282 (__v2df) __B,
2283 __R);
2286 extern __inline __m128
2287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2288 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
2290 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
2291 (__v4sf) __B,
2292 __R);
2295 #else
2296 #define _mm512_mul_round_pd(A, B, C) \
2297 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2299 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
2300 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
2302 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
2303 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2305 #define _mm512_mul_round_ps(A, B, C) \
2306 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2308 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
2309 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
2311 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
2312 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2314 #define _mm512_div_round_pd(A, B, C) \
2315 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2317 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
2318 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
2320 #define _mm512_maskz_div_round_pd(U, A, B, C) \
2321 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2323 #define _mm512_div_round_ps(A, B, C) \
2324 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2326 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
2327 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
2329 #define _mm512_maskz_div_round_ps(U, A, B, C) \
2330 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2332 #define _mm_mul_round_sd(A, B, C) \
2333 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
2335 #define _mm_mul_round_ss(A, B, C) \
2336 (__m128)__builtin_ia32_mulss_round(A, B, C)
2338 #define _mm_div_round_sd(A, B, C) \
2339 (__m128d)__builtin_ia32_divsd_round(A, B, C)
2341 #define _mm_div_round_ss(A, B, C) \
2342 (__m128)__builtin_ia32_divss_round(A, B, C)
2343 #endif
2345 #ifdef __OPTIMIZE__
2346 extern __inline __m512d
2347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2348 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
2350 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2351 (__v8df) __B,
2352 (__v8df)
2353 _mm512_setzero_pd (),
2354 (__mmask8) -1, __R);
2357 extern __inline __m512d
2358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2359 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2360 __m512d __B, const int __R)
2362 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2363 (__v8df) __B,
2364 (__v8df) __W,
2365 (__mmask8) __U, __R);
2368 extern __inline __m512d
2369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2370 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2371 const int __R)
2373 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
2374 (__v8df) __B,
2375 (__v8df)
2376 _mm512_setzero_pd (),
2377 (__mmask8) __U, __R);
2380 extern __inline __m512
2381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2382 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
2384 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2385 (__v16sf) __B,
2386 (__v16sf)
2387 _mm512_setzero_ps (),
2388 (__mmask16) -1, __R);
2391 extern __inline __m512
2392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2393 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2394 __m512 __B, const int __R)
2396 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2397 (__v16sf) __B,
2398 (__v16sf) __W,
2399 (__mmask16) __U, __R);
2402 extern __inline __m512
2403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2404 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2406 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
2407 (__v16sf) __B,
2408 (__v16sf)
2409 _mm512_setzero_ps (),
2410 (__mmask16) __U, __R);
2413 extern __inline __m512d
2414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2415 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
2417 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2418 (__v8df) __B,
2419 (__v8df)
2420 _mm512_setzero_pd (),
2421 (__mmask8) -1, __R);
2424 extern __inline __m512d
2425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2426 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2427 __m512d __B, const int __R)
2429 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2430 (__v8df) __B,
2431 (__v8df) __W,
2432 (__mmask8) __U, __R);
2435 extern __inline __m512d
2436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2437 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2438 const int __R)
2440 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
2441 (__v8df) __B,
2442 (__v8df)
2443 _mm512_setzero_pd (),
2444 (__mmask8) __U, __R);
2447 extern __inline __m512
2448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2449 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
2451 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2452 (__v16sf) __B,
2453 (__v16sf)
2454 _mm512_setzero_ps (),
2455 (__mmask16) -1, __R);
2458 extern __inline __m512
2459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2461 __m512 __B, const int __R)
2463 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2464 (__v16sf) __B,
2465 (__v16sf) __W,
2466 (__mmask16) __U, __R);
2469 extern __inline __m512
2470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2471 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
2473 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
2474 (__v16sf) __B,
2475 (__v16sf)
2476 _mm512_setzero_ps (),
2477 (__mmask16) __U, __R);
2479 #else
2480 #define _mm512_max_round_pd(A, B, R) \
2481 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
2483 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
2484 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
2486 #define _mm512_maskz_max_round_pd(U, A, B, R) \
2487 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2489 #define _mm512_max_round_ps(A, B, R) \
2490 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_pd(), -1, R)
2492 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
2493 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
2495 #define _mm512_maskz_max_round_ps(U, A, B, R) \
2496 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2498 #define _mm512_min_round_pd(A, B, R) \
2499 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
2501 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
2502 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
2504 #define _mm512_maskz_min_round_pd(U, A, B, R) \
2505 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
2507 #define _mm512_min_round_ps(A, B, R) \
2508 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, R)
2510 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
2511 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
2513 #define _mm512_maskz_min_round_ps(U, A, B, R) \
2514 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
2515 #endif
2517 #ifdef __OPTIMIZE__
2518 extern __inline __m512d
2519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2520 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
2522 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2523 (__v8df) __B,
2524 (__v8df)
2525 _mm512_setzero_pd (),
2526 (__mmask8) -1, __R);
2529 extern __inline __m512d
2530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2531 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
2532 __m512d __B, const int __R)
2534 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2535 (__v8df) __B,
2536 (__v8df) __W,
2537 (__mmask8) __U, __R);
2540 extern __inline __m512d
2541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2542 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2543 const int __R)
2545 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
2546 (__v8df) __B,
2547 (__v8df)
2548 _mm512_setzero_pd (),
2549 (__mmask8) __U, __R);
2552 extern __inline __m512
2553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2554 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
2556 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2557 (__v16sf) __B,
2558 (__v16sf)
2559 _mm512_setzero_ps (),
2560 (__mmask16) -1, __R);
2563 extern __inline __m512
2564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2565 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
2566 __m512 __B, const int __R)
2568 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2569 (__v16sf) __B,
2570 (__v16sf) __W,
2571 (__mmask16) __U, __R);
2574 extern __inline __m512
2575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2576 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2577 const int __R)
2579 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
2580 (__v16sf) __B,
2581 (__v16sf)
2582 _mm512_setzero_ps (),
2583 (__mmask16) __U, __R);
2586 extern __inline __m128d
2587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2588 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
2590 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
2591 (__v2df) __B,
2592 __R);
2595 extern __inline __m128
2596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
2599 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
2600 (__v4sf) __B,
2601 __R);
2603 #else
2604 #define _mm512_scalef_round_pd(A, B, C) \
2605 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
2607 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
2608 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
2610 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
2611 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
2613 #define _mm512_scalef_round_ps(A, B, C) \
2614 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
2616 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
2617 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
2619 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
2620 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
2622 #define _mm_scalef_round_sd(A, B, C) \
2623 (__m128d)__builtin_ia32_scalefsd_round(A, B, C)
2625 #define _mm_scalef_round_ss(A, B, C) \
2626 (__m128)__builtin_ia32_scalefss_round(A, B, C)
2627 #endif
2629 #ifdef __OPTIMIZE__
2630 extern __inline __m512d
2631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2632 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2634 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2635 (__v8df) __B,
2636 (__v8df) __C,
2637 (__mmask8) -1, __R);
2640 extern __inline __m512d
2641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2642 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2643 __m512d __C, const int __R)
2645 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2646 (__v8df) __B,
2647 (__v8df) __C,
2648 (__mmask8) __U, __R);
2651 extern __inline __m512d
2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2654 __mmask8 __U, const int __R)
2656 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2657 (__v8df) __B,
2658 (__v8df) __C,
2659 (__mmask8) __U, __R);
2662 extern __inline __m512d
2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2665 __m512d __C, const int __R)
2667 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2668 (__v8df) __B,
2669 (__v8df) __C,
2670 (__mmask8) __U, __R);
2673 extern __inline __m512
2674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2675 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2677 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2678 (__v16sf) __B,
2679 (__v16sf) __C,
2680 (__mmask16) -1, __R);
2683 extern __inline __m512
2684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2685 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2686 __m512 __C, const int __R)
2688 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2689 (__v16sf) __B,
2690 (__v16sf) __C,
2691 (__mmask16) __U, __R);
2694 extern __inline __m512
2695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2696 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2697 __mmask16 __U, const int __R)
2699 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2700 (__v16sf) __B,
2701 (__v16sf) __C,
2702 (__mmask16) __U, __R);
2705 extern __inline __m512
2706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2707 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2708 __m512 __C, const int __R)
2710 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2711 (__v16sf) __B,
2712 (__v16sf) __C,
2713 (__mmask16) __U, __R);
2716 extern __inline __m512d
2717 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2718 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2720 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2721 (__v8df) __B,
2722 -(__v8df) __C,
2723 (__mmask8) -1, __R);
2726 extern __inline __m512d
2727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2729 __m512d __C, const int __R)
2731 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2732 (__v8df) __B,
2733 -(__v8df) __C,
2734 (__mmask8) __U, __R);
2737 extern __inline __m512d
2738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2739 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2740 __mmask8 __U, const int __R)
2742 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
2743 (__v8df) __B,
2744 (__v8df) __C,
2745 (__mmask8) __U, __R);
2748 extern __inline __m512d
2749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2750 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2751 __m512d __C, const int __R)
2753 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2754 (__v8df) __B,
2755 -(__v8df) __C,
2756 (__mmask8) __U, __R);
2759 extern __inline __m512
2760 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2761 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2763 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2764 (__v16sf) __B,
2765 -(__v16sf) __C,
2766 (__mmask16) -1, __R);
2769 extern __inline __m512
2770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2771 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2772 __m512 __C, const int __R)
2774 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2775 (__v16sf) __B,
2776 -(__v16sf) __C,
2777 (__mmask16) __U, __R);
2780 extern __inline __m512
2781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2782 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2783 __mmask16 __U, const int __R)
2785 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
2786 (__v16sf) __B,
2787 (__v16sf) __C,
2788 (__mmask16) __U, __R);
2791 extern __inline __m512
2792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2793 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2794 __m512 __C, const int __R)
2796 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2797 (__v16sf) __B,
2798 -(__v16sf) __C,
2799 (__mmask16) __U, __R);
2802 extern __inline __m512d
2803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2804 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2806 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2807 (__v8df) __B,
2808 (__v8df) __C,
2809 (__mmask8) -1, __R);
2812 extern __inline __m512d
2813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2814 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2815 __m512d __C, const int __R)
2817 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2818 (__v8df) __B,
2819 (__v8df) __C,
2820 (__mmask8) __U, __R);
2823 extern __inline __m512d
2824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2825 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
2826 __mmask8 __U, const int __R)
2828 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2829 (__v8df) __B,
2830 (__v8df) __C,
2831 (__mmask8) __U, __R);
2834 extern __inline __m512d
2835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2836 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2837 __m512d __C, const int __R)
2839 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2840 (__v8df) __B,
2841 (__v8df) __C,
2842 (__mmask8) __U, __R);
2845 extern __inline __m512
2846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2847 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2849 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2850 (__v16sf) __B,
2851 (__v16sf) __C,
2852 (__mmask16) -1, __R);
2855 extern __inline __m512
2856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2857 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2858 __m512 __C, const int __R)
2860 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2861 (__v16sf) __B,
2862 (__v16sf) __C,
2863 (__mmask16) __U, __R);
2866 extern __inline __m512
2867 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2868 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
2869 __mmask16 __U, const int __R)
2871 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
2872 (__v16sf) __B,
2873 (__v16sf) __C,
2874 (__mmask16) __U, __R);
2877 extern __inline __m512
2878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2879 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2880 __m512 __C, const int __R)
2882 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2883 (__v16sf) __B,
2884 (__v16sf) __C,
2885 (__mmask16) __U, __R);
2888 extern __inline __m512d
2889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2890 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2892 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2893 (__v8df) __B,
2894 -(__v8df) __C,
2895 (__mmask8) -1, __R);
2898 extern __inline __m512d
2899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2900 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2901 __m512d __C, const int __R)
2903 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2904 (__v8df) __B,
2905 -(__v8df) __C,
2906 (__mmask8) __U, __R);
2909 extern __inline __m512d
2910 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2912 __mmask8 __U, const int __R)
2914 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
2915 (__v8df) __B,
2916 (__v8df) __C,
2917 (__mmask8) __U, __R);
2920 extern __inline __m512d
2921 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2922 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
2923 __m512d __C, const int __R)
2925 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2926 (__v8df) __B,
2927 -(__v8df) __C,
2928 (__mmask8) __U, __R);
2931 extern __inline __m512
2932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2933 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
2935 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2936 (__v16sf) __B,
2937 -(__v16sf) __C,
2938 (__mmask16) -1, __R);
2941 extern __inline __m512
2942 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2943 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
2944 __m512 __C, const int __R)
2946 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
2947 (__v16sf) __B,
2948 -(__v16sf) __C,
2949 (__mmask16) __U, __R);
2952 extern __inline __m512
2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2954 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
2955 __mmask16 __U, const int __R)
2957 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
2958 (__v16sf) __B,
2959 (__v16sf) __C,
2960 (__mmask16) __U, __R);
2963 extern __inline __m512
2964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2965 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
2966 __m512 __C, const int __R)
2968 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
2969 (__v16sf) __B,
2970 -(__v16sf) __C,
2971 (__mmask16) __U, __R);
2974 extern __inline __m512d
2975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2976 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
2978 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2979 (__v8df) __B,
2980 (__v8df) __C,
2981 (__mmask8) -1, __R);
2984 extern __inline __m512d
2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
2987 __m512d __C, const int __R)
2989 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
2990 (__v8df) __B,
2991 (__v8df) __C,
2992 (__mmask8) __U, __R);
2995 extern __inline __m512d
2996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
2998 __mmask8 __U, const int __R)
3000 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
3001 (__v8df) __B,
3002 (__v8df) __C,
3003 (__mmask8) __U, __R);
3006 extern __inline __m512d
3007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3008 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3009 __m512d __C, const int __R)
3011 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3012 (__v8df) __B,
3013 (__v8df) __C,
3014 (__mmask8) __U, __R);
3017 extern __inline __m512
3018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3019 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3021 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3022 (__v16sf) __B,
3023 (__v16sf) __C,
3024 (__mmask16) -1, __R);
3027 extern __inline __m512
3028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3029 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3030 __m512 __C, const int __R)
3032 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3033 (__v16sf) __B,
3034 (__v16sf) __C,
3035 (__mmask16) __U, __R);
3038 extern __inline __m512
3039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3040 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
3041 __mmask16 __U, const int __R)
3043 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3044 (__v16sf) __B,
3045 (__v16sf) __C,
3046 (__mmask16) __U, __R);
3049 extern __inline __m512
3050 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3051 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3052 __m512 __C, const int __R)
3054 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3055 (__v16sf) __B,
3056 (__v16sf) __C,
3057 (__mmask16) __U, __R);
3060 extern __inline __m512d
3061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3062 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
3064 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
3065 (__v8df) __B,
3066 -(__v8df) __C,
3067 (__mmask8) -1, __R);
3070 extern __inline __m512d
3071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3072 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
3073 __m512d __C, const int __R)
3075 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3076 (__v8df) __B,
3077 (__v8df) __C,
3078 (__mmask8) __U, __R);
3081 extern __inline __m512d
3082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3083 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
3084 __mmask8 __U, const int __R)
3086 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3087 (__v8df) __B,
3088 (__v8df) __C,
3089 (__mmask8) __U, __R);
3092 extern __inline __m512d
3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
3095 __m512d __C, const int __R)
3097 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
3098 (__v8df) __B,
3099 -(__v8df) __C,
3100 (__mmask8) __U, __R);
3103 extern __inline __m512
3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
3107 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3108 (__v16sf) __B,
3109 -(__v16sf) __C,
3110 (__mmask16) -1, __R);
3113 extern __inline __m512
3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3115 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
3116 __m512 __C, const int __R)
3118 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3119 (__v16sf) __B,
3120 (__v16sf) __C,
3121 (__mmask16) __U, __R);
3124 extern __inline __m512
3125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3126 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
3127 __mmask16 __U, const int __R)
3129 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3130 (__v16sf) __B,
3131 (__v16sf) __C,
3132 (__mmask16) __U, __R);
3135 extern __inline __m512
3136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3137 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
3138 __m512 __C, const int __R)
3140 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3141 (__v16sf) __B,
3142 -(__v16sf) __C,
3143 (__mmask16) __U, __R);
3145 #else
3146 #define _mm512_fmadd_round_pd(A, B, C, R) \
3147 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
3149 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
3150 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3152 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
3153 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
3155 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
3156 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
3158 #define _mm512_fmadd_round_ps(A, B, C, R) \
3159 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
3161 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
3162 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
3164 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
3165 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
3167 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
3168 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
3170 #define _mm512_fmsub_round_pd(A, B, C, R) \
3171 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
3173 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
3174 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
3176 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3177 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
3179 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
3180 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
3182 #define _mm512_fmsub_round_ps(A, B, C, R) \
3183 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
3185 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
3186 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
3188 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3189 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
3191 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
3192 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
3194 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
3195 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
3197 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
3198 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
3200 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
3201 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
3203 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
3204 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
3206 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
3207 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
3209 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
3210 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
3212 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
3213 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
3215 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
3216 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
3218 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
3219 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
3221 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
3222 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
3224 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3225 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
3227 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
3228 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
3230 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
3231 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
3233 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3234 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
3236 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3237 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
3239 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3240 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
3242 #define _mm512_fnmadd_round_pd(A, B, C, R) \
3243 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R)
3245 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3246 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R)
3248 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
3249 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R)
3251 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
3252 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R)
3254 #define _mm512_fnmadd_round_ps(A, B, C, R) \
3255 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R)
3257 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3258 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R)
3260 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
3261 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R)
3263 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
3264 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R)
3266 #define _mm512_fnmsub_round_pd(A, B, C, R) \
3267 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
3269 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3270 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
3272 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3273 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
3275 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
3276 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
3278 #define _mm512_fnmsub_round_ps(A, B, C, R) \
3279 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
3281 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3282 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
3284 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3285 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
3287 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
3288 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
3289 #endif
3291 extern __inline __m512i
3292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3293 _mm512_abs_epi64 (__m512i __A)
3295 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3296 (__v8di)
3297 _mm512_setzero_si512 (),
3298 (__mmask8) -1);
3301 extern __inline __m512i
3302 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3303 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
3305 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3306 (__v8di) __W,
3307 (__mmask8) __U);
3310 extern __inline __m512i
3311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
3314 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
3315 (__v8di)
3316 _mm512_setzero_si512 (),
3317 (__mmask8) __U);
3320 extern __inline __m512i
3321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3322 _mm512_abs_epi32 (__m512i __A)
3324 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3325 (__v16si)
3326 _mm512_setzero_si512 (),
3327 (__mmask16) -1);
3330 extern __inline __m512i
3331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3332 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
3334 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3335 (__v16si) __W,
3336 (__mmask16) __U);
3339 extern __inline __m512i
3340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3341 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
3343 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
3344 (__v16si)
3345 _mm512_setzero_si512 (),
3346 (__mmask16) __U);
3349 extern __inline __m512
3350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3351 _mm512_broadcastss_ps (__m128 __A)
3353 __v16sf __O;
3354 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O,
3355 (__mmask16) -1);
3358 extern __inline __m512
3359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3360 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
3362 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3363 (__v16sf) __O, __M);
3366 extern __inline __m512
3367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3368 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
3370 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
3371 (__v16sf)
3372 _mm512_setzero_ps (),
3373 __M);
3376 extern __inline __m512d
3377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3378 _mm512_broadcastsd_pd (__m128d __A)
3380 __v8df __O;
3381 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O,
3382 (__mmask8) -1);
3385 extern __inline __m512d
3386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3387 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
3389 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3390 (__v8df) __O, __M);
3393 extern __inline __m512d
3394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3395 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
3397 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
3398 (__v8df)
3399 _mm512_setzero_pd (),
3400 __M);
3403 extern __inline __m512i
3404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3405 _mm512_broadcastd_epi32 (__m128i __A)
3407 __v16si __O;
3408 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O,
3409 (__mmask16) -1);
3412 extern __inline __m512i
3413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3414 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
3416 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3417 (__v16si) __O, __M);
3420 extern __inline __m512i
3421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3422 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
3424 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
3425 (__v16si)
3426 _mm512_setzero_si512 (),
3427 __M);
3430 extern __inline __m512i
3431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3432 _mm512_set1_epi32 (int __A)
3434 __v16si __O;
3435 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O,
3436 (__mmask16)(-1));
3439 extern __inline __m512i
3440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3441 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
3443 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
3444 __M);
3447 extern __inline __m512i
3448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3449 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
3451 return (__m512i)
3452 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
3453 (__v16si) _mm512_setzero_si512 (),
3454 __M);
3457 extern __inline __m512i
3458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3459 _mm512_broadcastq_epi64 (__m128i __A)
3461 __v8di __O;
3462 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O,
3463 (__mmask8) -1);
3466 extern __inline __m512i
3467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
3470 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3471 (__v8di) __O, __M);
3474 extern __inline __m512i
3475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3476 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
3478 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
3479 (__v8di)
3480 _mm512_setzero_si512 (),
3481 __M);
3484 extern __inline __m512i
3485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3486 _mm512_set1_epi64 (long long __A)
3488 __v8di __O;
3489 #ifdef TARGET_64BIT
3490 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O,
3491 (__mmask8)(-1));
3492 #else
3493 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O,
3494 (__mmask8)(-1));
3495 #endif
3498 extern __inline __m512i
3499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3500 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
3502 #ifdef TARGET_64BIT
3503 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
3504 __M);
3505 #else
3506 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, (__v8di) __O,
3507 __M);
3508 #endif
3511 extern __inline __m512i
3512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3513 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
3515 #ifdef TARGET_64BIT
3516 return (__m512i)
3517 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
3518 (__v8di) _mm512_setzero_si512 (),
3519 __M);
3520 #else
3521 return (__m512i)
3522 __builtin_ia32_pbroadcastq512_mem_mask (__A,
3523 (__v8di) _mm512_setzero_si512 (),
3524 __M);
3525 #endif
3528 extern __inline __m512
3529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3530 _mm512_broadcast_f32x4 (__m128 __A)
3532 __v16sf __O;
3533 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O,
3534 (__mmask16) -1);
3537 extern __inline __m512
3538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3539 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
3541 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3542 (__v16sf) __O,
3543 __M);
3546 extern __inline __m512
3547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3548 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
3550 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
3551 (__v16sf)
3552 _mm512_setzero_ps (),
3553 __M);
3556 extern __inline __m512i
3557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558 _mm512_broadcast_i32x4 (__m128i __A)
3560 __v16si __O;
3561 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3562 __O,
3563 (__mmask16) -1);
3566 extern __inline __m512i
3567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3568 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
3570 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3571 (__v16si) __O,
3572 __M);
3575 extern __inline __m512i
3576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3577 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
3579 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
3580 (__v16si)
3581 _mm512_setzero_si512 (),
3582 __M);
3585 extern __inline __m512d
3586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3587 _mm512_broadcast_f64x4 (__m256d __A)
3589 __v8df __O;
3590 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3591 __O,
3592 (__mmask8) -1);
3595 extern __inline __m512d
3596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3597 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
3599 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3600 (__v8df) __O,
3601 __M);
3604 extern __inline __m512d
3605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3606 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
3608 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
3609 (__v8df)
3610 _mm512_setzero_pd (),
3611 __M);
3614 extern __inline __m512i
3615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3616 _mm512_broadcast_i64x4 (__m256i __A)
3618 __v8di __O;
3619 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3620 __O,
3621 (__mmask8) -1);
3624 extern __inline __m512i
3625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3626 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
3628 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3629 (__v8di) __O,
3630 __M);
3633 extern __inline __m512i
3634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3635 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
3637 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
3638 (__v8di)
3639 _mm512_setzero_si512 (),
3640 __M);
3643 typedef enum
3645 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
3646 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
3647 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
3648 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
3649 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
3650 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
3651 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
3652 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
3653 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
3654 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
3655 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
3656 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
3657 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
3658 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
3659 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
3660 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
3661 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
3662 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
3663 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
3664 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
3665 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
3666 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
3667 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
3668 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
3669 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
3670 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
3671 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
3672 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
3673 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
3674 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
3675 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
3676 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
3677 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
3678 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
3679 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
3680 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
3681 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
3682 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
3683 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
3684 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
3685 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
3686 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
3687 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
3688 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
3689 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
3690 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
3691 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
3692 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
3693 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
3694 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
3695 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
3696 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
3697 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
3698 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
3699 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
3700 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
3701 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
3702 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
3703 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
3704 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
3705 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
3706 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
3707 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
3708 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
3709 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
3710 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
3711 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
3712 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
3713 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
3714 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
3715 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
3716 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
3717 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
3718 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
3719 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
3720 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
3721 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
3722 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
3723 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
3724 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
3725 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
3726 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
3727 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
3728 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
3729 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
3730 _MM_PERM_DDDD = 0xFF
3731 } _MM_PERM_ENUM;
3733 #ifdef __OPTIMIZE__
3734 extern __inline __m512i
3735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3736 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
3738 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3739 __mask,
3740 (__v16si)
3741 _mm512_setzero_si512 (),
3742 (__mmask16) -1);
3745 extern __inline __m512i
3746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3747 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
3748 _MM_PERM_ENUM __mask)
3750 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3751 __mask,
3752 (__v16si) __W,
3753 (__mmask16) __U);
3756 extern __inline __m512i
3757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3758 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
3760 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
3761 __mask,
3762 (__v16si)
3763 _mm512_setzero_si512 (),
3764 (__mmask16) __U);
3767 extern __inline __m512i
3768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3769 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
3771 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3772 (__v8di) __B, __imm,
3773 (__v8di)
3774 _mm512_setzero_si512 (),
3775 (__mmask8) -1);
3778 extern __inline __m512i
3779 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3780 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
3781 __m512i __B, const int __imm)
3783 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3784 (__v8di) __B, __imm,
3785 (__v8di) __W,
3786 (__mmask8) __U);
3789 extern __inline __m512i
3790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3791 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
3792 const int __imm)
3794 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
3795 (__v8di) __B, __imm,
3796 (__v8di)
3797 _mm512_setzero_si512 (),
3798 (__mmask8) __U);
3801 extern __inline __m512i
3802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3803 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
3805 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3806 (__v16si) __B,
3807 __imm,
3808 (__v16si)
3809 _mm512_setzero_si512 (),
3810 (__mmask16) -1);
3813 extern __inline __m512i
3814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
3816 __m512i __B, const int __imm)
3818 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3819 (__v16si) __B,
3820 __imm,
3821 (__v16si) __W,
3822 (__mmask16) __U);
3825 extern __inline __m512i
3826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3827 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
3828 const int __imm)
3830 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
3831 (__v16si) __B,
3832 __imm,
3833 (__v16si)
3834 _mm512_setzero_si512 (),
3835 (__mmask16) __U);
3838 extern __inline __m512d
3839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3840 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
3842 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3843 (__v8df) __B, __imm,
3844 (__v8df)
3845 _mm512_setzero_pd (),
3846 (__mmask8) -1);
3849 extern __inline __m512d
3850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3851 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
3852 __m512d __B, const int __imm)
3854 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3855 (__v8df) __B, __imm,
3856 (__v8df) __W,
3857 (__mmask8) __U);
3860 extern __inline __m512d
3861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3862 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
3863 const int __imm)
3865 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
3866 (__v8df) __B, __imm,
3867 (__v8df)
3868 _mm512_setzero_pd (),
3869 (__mmask8) __U);
3872 extern __inline __m512
3873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3874 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
3876 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3877 (__v16sf) __B, __imm,
3878 (__v16sf)
3879 _mm512_setzero_ps (),
3880 (__mmask16) -1);
3883 extern __inline __m512
3884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3885 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
3886 __m512 __B, const int __imm)
3888 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3889 (__v16sf) __B, __imm,
3890 (__v16sf) __W,
3891 (__mmask16) __U);
3894 extern __inline __m512
3895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3896 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
3897 const int __imm)
3899 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
3900 (__v16sf) __B, __imm,
3901 (__v16sf)
3902 _mm512_setzero_ps (),
3903 (__mmask16) __U);
3906 #else
3907 #define _mm512_shuffle_epi32(X, C) \
3908 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3909 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3910 (__mmask16)-1))
3912 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
3913 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3914 (__v16si)(__m512i)(W),\
3915 (__mmask16)(U)))
3917 #define _mm512_maskz_shuffle_epi32(U, X, C) \
3918 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
3919 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3920 (__mmask16)(U)))
3922 #define _mm512_shuffle_i64x2(X, Y, C) \
3923 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3924 (__v8di)(__m512i)(Y), (int)(C),\
3925 (__v8di)(__m512i)_mm512_setzero_si512 (),\
3926 (__mmask8)-1))
3928 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
3929 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3930 (__v8di)(__m512i)(Y), (int)(C),\
3931 (__v8di)(__m512i)(W),\
3932 (__mmask8)(U)))
3934 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
3935 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
3936 (__v8di)(__m512i)(Y), (int)(C),\
3937 (__v8di)(__m512i)_mm512_setzero_si512 (),\
3938 (__mmask8)(U)))
3940 #define _mm512_shuffle_i32x4(X, Y, C) \
3941 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3942 (__v16si)(__m512i)(Y), (int)(C),\
3943 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3944 (__mmask16)-1))
3946 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
3947 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3948 (__v16si)(__m512i)(Y), (int)(C),\
3949 (__v16si)(__m512i)(W),\
3950 (__mmask16)(U)))
3952 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
3953 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
3954 (__v16si)(__m512i)(Y), (int)(C),\
3955 (__v16si)(__m512i)_mm512_setzero_si512 (),\
3956 (__mmask16)(U)))
3958 #define _mm512_shuffle_f64x2(X, Y, C) \
3959 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3960 (__v8df)(__m512d)(Y), (int)(C),\
3961 (__v8df)(__m512d)_mm512_setzero_pd(),\
3962 (__mmask8)-1))
3964 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
3965 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3966 (__v8df)(__m512d)(Y), (int)(C),\
3967 (__v8df)(__m512d)(W),\
3968 (__mmask8)(U)))
3970 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
3971 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
3972 (__v8df)(__m512d)(Y), (int)(C),\
3973 (__v8df)(__m512d)_mm512_setzero_pd(),\
3974 (__mmask8)(U)))
3976 #define _mm512_shuffle_f32x4(X, Y, C) \
3977 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3978 (__v16sf)(__m512)(Y), (int)(C),\
3979 (__v16sf)(__m512)_mm512_setzero_ps(),\
3980 (__mmask16)-1))
3982 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
3983 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3984 (__v16sf)(__m512)(Y), (int)(C),\
3985 (__v16sf)(__m512)(W),\
3986 (__mmask16)(U)))
3988 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
3989 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
3990 (__v16sf)(__m512)(Y), (int)(C),\
3991 (__v16sf)(__m512)_mm512_setzero_ps(),\
3992 (__mmask16)(U)))
3993 #endif
3995 extern __inline __m512i
3996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3997 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
3999 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4000 (__v16si) __B,
4001 (__v16si)
4002 _mm512_setzero_si512 (),
4003 (__mmask16) -1);
4006 extern __inline __m512i
4007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4008 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4010 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4011 (__v16si) __B,
4012 (__v16si) __W,
4013 (__mmask16) __U);
4016 extern __inline __m512i
4017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4018 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4020 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
4021 (__v16si) __B,
4022 (__v16si)
4023 _mm512_setzero_si512 (),
4024 (__mmask16) __U);
4027 extern __inline __m512i
4028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4029 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4031 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4032 (__v16si) __B,
4033 (__v16si)
4034 _mm512_setzero_si512 (),
4035 (__mmask16) -1);
4038 extern __inline __m512i
4039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4040 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4042 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4043 (__v16si) __B,
4044 (__v16si) __W,
4045 (__mmask16) __U);
4048 extern __inline __m512i
4049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4050 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4052 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
4053 (__v16si) __B,
4054 (__v16si)
4055 _mm512_setzero_si512 (),
4056 (__mmask16) __U);
4059 extern __inline __m512i
4060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4061 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
4063 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4064 (__v8di) __B,
4065 (__v8di)
4066 _mm512_setzero_si512 (),
4067 (__mmask8) -1);
4070 extern __inline __m512i
4071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4072 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4074 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4075 (__v8di) __B,
4076 (__v8di) __W,
4077 (__mmask8) __U);
4080 extern __inline __m512i
4081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4082 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4084 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
4085 (__v8di) __B,
4086 (__v8di)
4087 _mm512_setzero_si512 (),
4088 (__mmask8) __U);
4091 extern __inline __m512i
4092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4093 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4095 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4096 (__v8di) __B,
4097 (__v8di)
4098 _mm512_setzero_si512 (),
4099 (__mmask8) -1);
4102 extern __inline __m512i
4103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4104 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4106 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4107 (__v8di) __B,
4108 (__v8di) __W,
4109 (__mmask8) __U);
4112 extern __inline __m512i
4113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4114 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4116 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
4117 (__v8di) __B,
4118 (__v8di)
4119 _mm512_setzero_si512 (),
4120 (__mmask8) __U);
4123 #ifdef __OPTIMIZE__
4124 extern __inline __m256i
4125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4126 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
4128 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4129 (__v8si)
4130 _mm256_setzero_si256 (),
4131 (__mmask8) -1, __R);
4134 extern __inline __m256i
4135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4136 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4137 const int __R)
4139 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4140 (__v8si) __W,
4141 (__mmask8) __U, __R);
4144 extern __inline __m256i
4145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4146 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4148 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4149 (__v8si)
4150 _mm256_setzero_si256 (),
4151 (__mmask8) __U, __R);
4154 extern __inline __m256i
4155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4156 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
4158 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4159 (__v8si)
4160 _mm256_setzero_si256 (),
4161 (__mmask8) -1, __R);
4164 extern __inline __m256i
4165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4166 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4167 const int __R)
4169 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4170 (__v8si) __W,
4171 (__mmask8) __U, __R);
4174 extern __inline __m256i
4175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4176 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4178 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
4179 (__v8si)
4180 _mm256_setzero_si256 (),
4181 (__mmask8) __U, __R);
4183 #else
4184 #define _mm512_cvtt_roundpd_epi32(A, B) \
4185 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4187 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
4188 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
4190 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
4191 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4193 #define _mm512_cvtt_roundpd_epu32(A, B) \
4194 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4196 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
4197 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
4199 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
4200 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4201 #endif
4203 #ifdef __OPTIMIZE__
4204 extern __inline __m256i
4205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4206 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
4208 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4209 (__v8si)
4210 _mm256_setzero_si256 (),
4211 (__mmask8) -1, __R);
4214 extern __inline __m256i
4215 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4216 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
4217 const int __R)
4219 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4220 (__v8si) __W,
4221 (__mmask8) __U, __R);
4224 extern __inline __m256i
4225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4226 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
4228 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4229 (__v8si)
4230 _mm256_setzero_si256 (),
4231 (__mmask8) __U, __R);
4234 extern __inline __m256i
4235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4236 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
4238 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4239 (__v8si)
4240 _mm256_setzero_si256 (),
4241 (__mmask8) -1, __R);
4244 extern __inline __m256i
4245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4246 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
4247 const int __R)
4249 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4250 (__v8si) __W,
4251 (__mmask8) __U, __R);
4254 extern __inline __m256i
4255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4256 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
4258 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4259 (__v8si)
4260 _mm256_setzero_si256 (),
4261 (__mmask8) __U, __R);
4263 #else
4264 #define _mm512_cvt_roundpd_epi32(A, B) \
4265 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4267 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
4268 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
4270 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
4271 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4273 #define _mm512_cvt_roundpd_epu32(A, B) \
4274 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
4276 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
4277 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
4279 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
4280 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
4281 #endif
4283 #ifdef __OPTIMIZE__
4284 extern __inline __m512i
4285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4286 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
4288 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4289 (__v16si)
4290 _mm512_setzero_si512 (),
4291 (__mmask16) -1, __R);
4294 extern __inline __m512i
4295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4296 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4297 const int __R)
4299 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4300 (__v16si) __W,
4301 (__mmask16) __U, __R);
4304 extern __inline __m512i
4305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4306 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4308 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4309 (__v16si)
4310 _mm512_setzero_si512 (),
4311 (__mmask16) __U, __R);
4314 extern __inline __m512i
4315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4316 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
4318 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4319 (__v16si)
4320 _mm512_setzero_si512 (),
4321 (__mmask16) -1, __R);
4324 extern __inline __m512i
4325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4326 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4327 const int __R)
4329 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4330 (__v16si) __W,
4331 (__mmask16) __U, __R);
4334 extern __inline __m512i
4335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4338 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
4339 (__v16si)
4340 _mm512_setzero_si512 (),
4341 (__mmask16) __U, __R);
4343 #else
4344 #define _mm512_cvtt_roundps_epi32(A, B) \
4345 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4347 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
4348 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
4350 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
4351 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4353 #define _mm512_cvtt_roundps_epu32(A, B) \
4354 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4356 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
4357 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
4359 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
4360 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4361 #endif
4363 #ifdef __OPTIMIZE__
4364 extern __inline __m512i
4365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4366 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
4368 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4369 (__v16si)
4370 _mm512_setzero_si512 (),
4371 (__mmask16) -1, __R);
4374 extern __inline __m512i
4375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4376 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
4377 const int __R)
4379 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4380 (__v16si) __W,
4381 (__mmask16) __U, __R);
4384 extern __inline __m512i
4385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4386 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
4388 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4389 (__v16si)
4390 _mm512_setzero_si512 (),
4391 (__mmask16) __U, __R);
4394 extern __inline __m512i
4395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4396 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
4398 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4399 (__v16si)
4400 _mm512_setzero_si512 (),
4401 (__mmask16) -1, __R);
4404 extern __inline __m512i
4405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4406 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
4407 const int __R)
4409 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4410 (__v16si) __W,
4411 (__mmask16) __U, __R);
4414 extern __inline __m512i
4415 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4416 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
4418 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4419 (__v16si)
4420 _mm512_setzero_si512 (),
4421 (__mmask16) __U, __R);
4423 #else
4424 #define _mm512_cvt_roundps_epi32(A, B) \
4425 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4427 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
4428 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
4430 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
4431 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4433 #define _mm512_cvt_roundps_epu32(A, B) \
4434 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
4436 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
4437 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
4439 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
4440 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
4441 #endif
4443 extern __inline __m128d
4444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4445 _mm_cvtu32_sd (__m128d __A, unsigned __B)
4447 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
4450 #ifdef __x86_64__
4451 #ifdef __OPTIMIZE__
4452 extern __inline __m128d
4453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4454 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
4456 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
4459 extern __inline __m128d
4460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4461 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
4463 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4466 extern __inline __m128d
4467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4468 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
4470 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
4472 #else
4473 #define _mm_cvt_roundu64_sd(A, B, C) \
4474 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
4476 #define _mm_cvt_roundi64_sd(A, B, C) \
4477 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4479 #define _mm_cvt_roundsi64_sd(A, B, C) \
4480 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
4481 #endif
4483 #endif
4485 #ifdef __OPTIMIZE__
4486 extern __inline __m128
4487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4488 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
4490 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
4493 extern __inline __m128
4494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4495 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
4497 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4500 extern __inline __m128
4501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4502 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
4504 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
4506 #else
4507 #define _mm_cvt_roundu32_ss(A, B, C) \
4508 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
4510 #define _mm_cvt_roundi32_ss(A, B, C) \
4511 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4513 #define _mm_cvt_roundsi32_ss(A, B, C) \
4514 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
4515 #endif
4517 #ifdef __x86_64__
4518 #ifdef __OPTIMIZE__
4519 extern __inline __m128
4520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4521 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
4523 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
4526 extern __inline __m128
4527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4528 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
4530 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4533 extern __inline __m128
4534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4535 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
4537 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
4539 #else
4540 #define _mm_cvt_roundu64_ss(A, B, C) \
4541 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
4543 #define _mm_cvt_roundi64_ss(A, B, C) \
4544 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4546 #define _mm_cvt_roundsi64_ss(A, B, C) \
4547 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
4548 #endif
4550 #endif
4552 extern __inline __m128i
4553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4554 _mm512_cvtepi32_epi8 (__m512i __A)
4556 __v16qi __O;
4557 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O,
4558 (__mmask16) -1);
4561 extern __inline void
4562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4563 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4565 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4568 extern __inline __m128i
4569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4570 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4572 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4573 (__v16qi) __O, __M);
4576 extern __inline __m128i
4577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4578 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
4580 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
4581 (__v16qi)
4582 _mm_setzero_si128 (),
4583 __M);
4586 extern __inline __m128i
4587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4588 _mm512_cvtsepi32_epi8 (__m512i __A)
4590 __v16qi __O;
4591 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O,
4592 (__mmask16) -1);
4595 extern __inline void
4596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4597 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4599 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4602 extern __inline __m128i
4603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4604 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4606 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4607 (__v16qi) __O, __M);
4610 extern __inline __m128i
4611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4612 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
4614 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
4615 (__v16qi)
4616 _mm_setzero_si128 (),
4617 __M);
4620 extern __inline __m128i
4621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4622 _mm512_cvtusepi32_epi8 (__m512i __A)
4624 __v16qi __O;
4625 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O,
4626 (__mmask16) -1);
4629 extern __inline void
4630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4631 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
4633 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
4636 extern __inline __m128i
4637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4638 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
4640 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4641 (__v16qi) __O,
4642 __M);
4645 extern __inline __m128i
4646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4647 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
4649 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
4650 (__v16qi)
4651 _mm_setzero_si128 (),
4652 __M);
4655 extern __inline __m256i
4656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4657 _mm512_cvtepi32_epi16 (__m512i __A)
4659 __v16hi __O;
4660 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O,
4661 (__mmask16) -1);
4664 extern __inline void
4665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4666 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
4668 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
4671 extern __inline __m256i
4672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4673 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4675 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4676 (__v16hi) __O, __M);
4679 extern __inline __m256i
4680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4681 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
4683 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
4684 (__v16hi)
4685 _mm256_setzero_si256 (),
4686 __M);
4689 extern __inline __m256i
4690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4691 _mm512_cvtsepi32_epi16 (__m512i __A)
4693 __v16hi __O;
4694 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O,
4695 (__mmask16) -1);
4698 extern __inline void
4699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4700 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4702 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4705 extern __inline __m256i
4706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4707 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4709 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4710 (__v16hi) __O, __M);
4713 extern __inline __m256i
4714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4715 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
4717 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
4718 (__v16hi)
4719 _mm256_setzero_si256 (),
4720 __M);
4723 extern __inline __m256i
4724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4725 _mm512_cvtusepi32_epi16 (__m512i __A)
4727 __v16hi __O;
4728 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O,
4729 (__mmask16) -1);
4732 extern __inline void
4733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4734 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
4736 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
4739 extern __inline __m256i
4740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4741 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
4743 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4744 (__v16hi) __O,
4745 __M);
4748 extern __inline __m256i
4749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4750 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
4752 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
4753 (__v16hi)
4754 _mm256_setzero_si256 (),
4755 __M);
4758 extern __inline __m256i
4759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4760 _mm512_cvtepi64_epi32 (__m512i __A)
4762 __v8si __O;
4763 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O,
4764 (__mmask8) -1);
4767 extern __inline void
4768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4769 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4771 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4774 extern __inline __m256i
4775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4776 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4778 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4779 (__v8si) __O, __M);
4782 extern __inline __m256i
4783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4784 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
4786 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
4787 (__v8si)
4788 _mm256_setzero_si256 (),
4789 __M);
4792 extern __inline __m256i
4793 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 _mm512_cvtsepi64_epi32 (__m512i __A)
4796 __v8si __O;
4797 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O,
4798 (__mmask8) -1);
4801 extern __inline void
4802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4803 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
4805 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
4808 extern __inline __m256i
4809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4810 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4812 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4813 (__v8si) __O, __M);
4816 extern __inline __m256i
4817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4818 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
4820 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
4821 (__v8si)
4822 _mm256_setzero_si256 (),
4823 __M);
4826 extern __inline __m256i
4827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4828 _mm512_cvtusepi64_epi32 (__m512i __A)
4830 __v8si __O;
4831 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O,
4832 (__mmask8) -1);
4835 extern __inline void
4836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4837 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
4839 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
4842 extern __inline __m256i
4843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4844 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
4846 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4847 (__v8si) __O, __M);
4850 extern __inline __m256i
4851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4852 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
4854 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
4855 (__v8si)
4856 _mm256_setzero_si256 (),
4857 __M);
4860 extern __inline __m128i
4861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4862 _mm512_cvtepi64_epi16 (__m512i __A)
4864 __v8hi __O;
4865 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O,
4866 (__mmask8) -1);
4869 extern __inline void
4870 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4871 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4873 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4876 extern __inline __m128i
4877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4878 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4880 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4881 (__v8hi) __O, __M);
4884 extern __inline __m128i
4885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4886 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
4888 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
4889 (__v8hi)
4890 _mm_setzero_si128 (),
4891 __M);
4894 extern __inline __m128i
4895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4896 _mm512_cvtsepi64_epi16 (__m512i __A)
4898 __v8hi __O;
4899 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O,
4900 (__mmask8) -1);
4903 extern __inline void
4904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4905 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
4907 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
4910 extern __inline __m128i
4911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4912 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4914 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
4915 (__v8hi) __O, __M);
4918 extern __inline __m128i
4919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4920 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
4922 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
4923 (__v8hi)
4924 _mm_setzero_si128 (),
4925 __M);
4928 extern __inline __m128i
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm512_cvtusepi64_epi16 (__m512i __A)
4932 __v8hi __O;
4933 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O,
4934 (__mmask8) -1);
4937 extern __inline void
4938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4939 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
4941 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
4944 extern __inline __m128i
4945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4946 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
4948 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
4949 (__v8hi) __O, __M);
4952 extern __inline __m128i
4953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4954 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
4956 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
4957 (__v8hi)
4958 _mm_setzero_si128 (),
4959 __M);
4962 extern __inline __m128i
4963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4964 _mm512_cvtepi64_epi8 (__m512i __A)
4966 __v16qi __O;
4967 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O,
4968 (__mmask8) -1);
4971 extern __inline void
4972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4973 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
4975 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
4978 extern __inline __m128i
4979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4980 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
4982 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
4983 (__v16qi) __O, __M);
4986 extern __inline __m128i
4987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4988 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
4990 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
4991 (__v16qi)
4992 _mm_setzero_si128 (),
4993 __M);
4996 extern __inline __m128i
4997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4998 _mm512_cvtsepi64_epi8 (__m512i __A)
5000 __v16qi __O;
5001 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O,
5002 (__mmask8) -1);
5005 extern __inline void
5006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5007 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5009 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5012 extern __inline __m128i
5013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5014 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5016 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5017 (__v16qi) __O, __M);
5020 extern __inline __m128i
5021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5022 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
5024 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
5025 (__v16qi)
5026 _mm_setzero_si128 (),
5027 __M);
5030 extern __inline __m128i
5031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5032 _mm512_cvtusepi64_epi8 (__m512i __A)
5034 __v16qi __O;
5035 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O,
5036 (__mmask8) -1);
5039 extern __inline void
5040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5041 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
5043 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
5046 extern __inline __m128i
5047 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5048 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
5050 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5051 (__v16qi) __O,
5052 __M);
5055 extern __inline __m128i
5056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5057 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
5059 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
5060 (__v16qi)
5061 _mm_setzero_si128 (),
5062 __M);
5065 extern __inline __m512d
5066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5067 _mm512_cvtepi32_pd (__m256i __A)
5069 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5070 (__v8df)
5071 _mm512_setzero_pd (),
5072 (__mmask8) -1);
5075 extern __inline __m512d
5076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5077 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5079 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5080 (__v8df) __W,
5081 (__mmask8) __U);
5084 extern __inline __m512d
5085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5086 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
5088 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
5089 (__v8df)
5090 _mm512_setzero_pd (),
5091 (__mmask8) __U);
5094 extern __inline __m512d
5095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5096 _mm512_cvtepu32_pd (__m256i __A)
5098 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5099 (__v8df)
5100 _mm512_setzero_pd (),
5101 (__mmask8) -1);
5104 extern __inline __m512d
5105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5106 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
5108 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5109 (__v8df) __W,
5110 (__mmask8) __U);
5113 extern __inline __m512d
5114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5115 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
5117 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
5118 (__v8df)
5119 _mm512_setzero_pd (),
5120 (__mmask8) __U);
5123 #ifdef __OPTIMIZE__
5124 extern __inline __m512
5125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5126 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
5128 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5129 (__v16sf)
5130 _mm512_setzero_ps (),
5131 (__mmask16) -1, __R);
5134 extern __inline __m512
5135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5136 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5137 const int __R)
5139 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5140 (__v16sf) __W,
5141 (__mmask16) __U, __R);
5144 extern __inline __m512
5145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5146 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
5148 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
5149 (__v16sf)
5150 _mm512_setzero_ps (),
5151 (__mmask16) __U, __R);
5154 extern __inline __m512
5155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5156 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
5158 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5159 (__v16sf)
5160 _mm512_setzero_ps (),
5161 (__mmask16) -1, __R);
5164 extern __inline __m512
5165 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5166 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
5167 const int __R)
5169 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5170 (__v16sf) __W,
5171 (__mmask16) __U, __R);
5174 extern __inline __m512
5175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
5178 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
5179 (__v16sf)
5180 _mm512_setzero_ps (),
5181 (__mmask16) __U, __R);
5184 #else
5185 #define _mm512_cvt_roundepi32_ps(A, B) \
5186 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
5188 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
5189 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
5191 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
5192 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5194 #define _mm512_cvt_roundepu32_ps(A, B) \
5195 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
5197 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
5198 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
5200 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
5201 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
5202 #endif
5204 #ifdef __OPTIMIZE__
5205 extern __inline __m256d
5206 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5207 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
5209 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5210 __imm,
5211 (__v4df)
5212 _mm256_setzero_pd (),
5213 (__mmask8) -1);
5216 extern __inline __m256d
5217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
5219 const int __imm)
5221 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5222 __imm,
5223 (__v4df) __W,
5224 (__mmask8) __U);
5227 extern __inline __m256d
5228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5229 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
5231 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
5232 __imm,
5233 (__v4df)
5234 _mm256_setzero_pd (),
5235 (__mmask8) __U);
5238 extern __inline __m128
5239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5240 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
5242 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5243 __imm,
5244 (__v4sf)
5245 _mm_setzero_ps (),
5246 (__mmask8) -1);
5249 extern __inline __m128
5250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5251 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
5252 const int __imm)
5254 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5255 __imm,
5256 (__v4sf) __W,
5257 (__mmask8) __U);
5260 extern __inline __m128
5261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5262 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
5264 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
5265 __imm,
5266 (__v4sf)
5267 _mm_setzero_ps (),
5268 (__mmask8) __U);
5271 extern __inline __m256i
5272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5273 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
5275 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5276 __imm,
5277 (__v4di)
5278 _mm256_setzero_si256 (),
5279 (__mmask8) -1);
5282 extern __inline __m256i
5283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5284 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
5285 const int __imm)
5287 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5288 __imm,
5289 (__v4di) __W,
5290 (__mmask8) __U);
5293 extern __inline __m256i
5294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5295 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
5297 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
5298 __imm,
5299 (__v4di)
5300 _mm256_setzero_si256 (),
5301 (__mmask8) __U);
5304 extern __inline __m128i
5305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5306 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
5308 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5309 __imm,
5310 (__v4si)
5311 _mm_setzero_si128 (),
5312 (__mmask8) -1);
5315 extern __inline __m128i
5316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5317 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
5318 const int __imm)
5320 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5321 __imm,
5322 (__v4si) __W,
5323 (__mmask8) __U);
5326 extern __inline __m128i
5327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5328 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
5330 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
5331 __imm,
5332 (__v4si)
5333 _mm_setzero_si128 (),
5334 (__mmask8) __U);
5336 #else
5338 #define _mm512_extractf64x4_pd(X, C) \
5339 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5340 (int) (C),\
5341 (__v4df)(__m256d)_mm256_setzero_pd(),\
5342 (__mmask8)-1))
5344 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
5345 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5346 (int) (C),\
5347 (__v4df)(__m256d)(W),\
5348 (__mmask8)(U)))
5350 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
5351 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
5352 (int) (C),\
5353 (__v4df)(__m256d)_mm256_setzero_pd(),\
5354 (__mmask8)(U)))
5356 #define _mm512_extractf32x4_ps(X, C) \
5357 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5358 (int) (C),\
5359 (__v4sf)(__m128)_mm_setzero_ps(),\
5360 (__mmask8)-1))
5362 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
5363 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5364 (int) (C),\
5365 (__v4sf)(__m128)(W),\
5366 (__mmask8)(U)))
5368 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
5369 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
5370 (int) (C),\
5371 (__v4sf)(__m128)_mm_setzero_ps(),\
5372 (__mmask8)(U)))
5374 #define _mm512_extracti64x4_epi64(X, C) \
5375 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5376 (int) (C),\
5377 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5378 (__mmask8)-1))
5380 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
5381 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5382 (int) (C),\
5383 (__v4di)(__m256i)(W),\
5384 (__mmask8)(U)))
5386 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
5387 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
5388 (int) (C),\
5389 (__v4di)(__m256i)_mm256_setzero_si256 (),\
5390 (__mmask8)(U)))
5392 #define _mm512_extracti32x4_epi32(X, C) \
5393 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5394 (int) (C),\
5395 (__v4si)(__m128i)_mm_setzero_si128 (),\
5396 (__mmask8)-1))
5398 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
5399 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5400 (int) (C),\
5401 (__v4si)(__m128i)(W),\
5402 (__mmask8)(U)))
5404 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
5405 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
5406 (int) (C),\
5407 (__v4si)(__m128i)_mm_setzero_si128 (),\
5408 (__mmask8)(U)))
5409 #endif
5411 #ifdef __OPTIMIZE__
5412 extern __inline __m512i
5413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5414 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
5416 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
5417 (__v4si) __B,
5418 __imm,
5419 (__v16si) __A, -1);
5422 extern __inline __m512
5423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5424 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
5426 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
5427 (__v4sf) __B,
5428 __imm,
5429 (__v16sf) __A, -1);
5432 extern __inline __m512i
5433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5434 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
5436 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5437 (__v4di) __B,
5438 __imm,
5439 (__v8di)
5440 _mm512_setzero_si512 (),
5441 (__mmask8) -1);
5444 extern __inline __m512i
5445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5446 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
5447 __m256i __B, const int __imm)
5449 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5450 (__v4di) __B,
5451 __imm,
5452 (__v8di) __W,
5453 (__mmask8) __U);
5456 extern __inline __m512i
5457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5458 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
5459 const int __imm)
5461 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
5462 (__v4di) __B,
5463 __imm,
5464 (__v8di)
5465 _mm512_setzero_si512 (),
5466 (__mmask8) __U);
5469 extern __inline __m512d
5470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5471 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
5473 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5474 (__v4df) __B,
5475 __imm,
5476 (__v8df)
5477 _mm512_setzero_pd (),
5478 (__mmask8) -1);
5481 extern __inline __m512d
5482 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5483 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
5484 __m256d __B, const int __imm)
5486 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5487 (__v4df) __B,
5488 __imm,
5489 (__v8df) __W,
5490 (__mmask8) __U);
5493 extern __inline __m512d
5494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5495 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
5496 const int __imm)
5498 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
5499 (__v4df) __B,
5500 __imm,
5501 (__v8df)
5502 _mm512_setzero_pd (),
5503 (__mmask8) __U);
5505 #else
5506 #define _mm512_insertf32x4(X, Y, C) \
5507 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
5508 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
5510 #define _mm512_inserti32x4(X, Y, C) \
5511 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
5512 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
5514 #define _mm512_insertf64x4(X, Y, C) \
5515 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5516 (__v4df)(__m256d) (Y), (int) (C), \
5517 (__v8df)(__m512d)_mm512_setzero_pd(), \
5518 (__mmask8)-1))
5520 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
5521 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5522 (__v4df)(__m256d) (Y), (int) (C), \
5523 (__v8df)(__m512d)(W), \
5524 (__mmask8)(U)))
5526 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
5527 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
5528 (__v4df)(__m256d) (Y), (int) (C), \
5529 (__v8df)(__m512d)_mm512_setzero_pd(), \
5530 (__mmask8)(U)))
5532 #define _mm512_inserti64x4(X, Y, C) \
5533 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5534 (__v4di)(__m256i) (Y), (int) (C), \
5535 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5536 (__mmask8)-1))
5538 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
5539 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5540 (__v4di)(__m256i) (Y), (int) (C),\
5541 (__v8di)(__m512i)(W),\
5542 (__mmask8)(U)))
5544 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
5545 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
5546 (__v4di)(__m256i) (Y), (int) (C), \
5547 (__v8di)(__m512i)_mm512_setzero_si512 (), \
5548 (__mmask8)(U)))
5549 #endif
5551 extern __inline __m512d
5552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5553 _mm512_loadu_pd (void const *__P)
5555 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5556 (__v8df)
5557 _mm512_setzero_pd (),
5558 (__mmask8) -1);
5561 extern __inline __m512d
5562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5563 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
5565 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5566 (__v8df) __W,
5567 (__mmask8) __U);
5570 extern __inline __m512d
5571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5572 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
5574 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
5575 (__v8df)
5576 _mm512_setzero_pd (),
5577 (__mmask8) __U);
5580 extern __inline void
5581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5582 _mm512_storeu_pd (void *__P, __m512d __A)
5584 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5585 (__mmask8) -1);
5588 extern __inline void
5589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5590 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
5592 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A,
5593 (__mmask8) __U);
5596 extern __inline __m512
5597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5598 _mm512_loadu_ps (void const *__P)
5600 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5601 (__v16sf)
5602 _mm512_setzero_ps (),
5603 (__mmask16) -1);
5606 extern __inline __m512
5607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5608 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
5610 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5611 (__v16sf) __W,
5612 (__mmask16) __U);
5615 extern __inline __m512
5616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5617 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
5619 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
5620 (__v16sf)
5621 _mm512_setzero_ps (),
5622 (__mmask16) __U);
5625 extern __inline void
5626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627 _mm512_storeu_ps (void *__P, __m512 __A)
5629 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5630 (__mmask16) -1);
5633 extern __inline void
5634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5635 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
5637 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A,
5638 (__mmask16) __U);
5641 extern __inline __m512i
5642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5643 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5645 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5646 (__v8di) __W,
5647 (__mmask8) __U);
5650 extern __inline __m512i
5651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5652 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5654 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P,
5655 (__v8di)
5656 _mm512_setzero_si512 (),
5657 (__mmask8) __U);
5660 extern __inline void
5661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5662 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
5664 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A,
5665 (__mmask8) __U);
5668 extern __inline __m512i
5669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5670 _mm512_loadu_si512 (void const *__P)
5672 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5673 (__v16si)
5674 _mm512_setzero_si512 (),
5675 (__mmask16) -1);
5678 extern __inline __m512i
5679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5680 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5682 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5683 (__v16si) __W,
5684 (__mmask16) __U);
5687 extern __inline __m512i
5688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5689 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
5691 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P,
5692 (__v16si)
5693 _mm512_setzero_si512 (),
5694 (__mmask16) __U);
5697 extern __inline void
5698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5699 _mm512_storeu_si512 (void *__P, __m512i __A)
5701 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5702 (__mmask16) -1);
5705 extern __inline void
5706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5707 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
5709 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A,
5710 (__mmask16) __U);
5713 extern __inline __m512d
5714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5715 _mm512_permutevar_pd (__m512d __A, __m512i __C)
5717 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5718 (__v8di) __C,
5719 (__v8df)
5720 _mm512_setzero_pd (),
5721 (__mmask8) -1);
5724 extern __inline __m512d
5725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5726 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
5728 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5729 (__v8di) __C,
5730 (__v8df) __W,
5731 (__mmask8) __U);
5734 extern __inline __m512d
5735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5736 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
5738 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
5739 (__v8di) __C,
5740 (__v8df)
5741 _mm512_setzero_pd (),
5742 (__mmask8) __U);
5745 extern __inline __m512
5746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 _mm512_permutevar_ps (__m512 __A, __m512i __C)
5749 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5750 (__v16si) __C,
5751 (__v16sf)
5752 _mm512_setzero_ps (),
5753 (__mmask16) -1);
5756 extern __inline __m512
5757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5758 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
5760 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5761 (__v16si) __C,
5762 (__v16sf) __W,
5763 (__mmask16) __U);
5766 extern __inline __m512
5767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5768 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
5770 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
5771 (__v16si) __C,
5772 (__v16sf)
5773 _mm512_setzero_ps (),
5774 (__mmask16) __U);
5777 extern __inline __m512i
5778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5779 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
5781 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5782 /* idx */ ,
5783 (__v8di) __A,
5784 (__v8di) __B,
5785 (__mmask8) -1);
5788 extern __inline __m512i
5789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
5791 __m512i __B)
5793 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
5794 /* idx */ ,
5795 (__v8di) __A,
5796 (__v8di) __B,
5797 (__mmask8) __U);
5800 extern __inline __m512i
5801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5802 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
5803 __mmask8 __U, __m512i __B)
5805 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
5806 (__v8di) __I
5807 /* idx */ ,
5808 (__v8di) __B,
5809 (__mmask8) __U);
5812 extern __inline __m512i
5813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5814 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
5815 __m512i __I, __m512i __B)
5817 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
5818 /* idx */ ,
5819 (__v8di) __A,
5820 (__v8di) __B,
5821 (__mmask8) __U);
5824 extern __inline __m512i
5825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5826 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
5828 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5829 /* idx */ ,
5830 (__v16si) __A,
5831 (__v16si) __B,
5832 (__mmask16) -1);
5835 extern __inline __m512i
5836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
5838 __m512i __I, __m512i __B)
5840 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
5841 /* idx */ ,
5842 (__v16si) __A,
5843 (__v16si) __B,
5844 (__mmask16) __U);
5847 extern __inline __m512i
5848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5849 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5850 __mmask16 __U, __m512i __B)
5852 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5853 (__v16si) __I
5854 /* idx */ ,
5855 (__v16si) __B,
5856 (__mmask16) __U);
5859 extern __inline __m512i
5860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5861 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
5862 __m512i __I, __m512i __B)
5864 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
5865 /* idx */ ,
5866 (__v16si) __A,
5867 (__v16si) __B,
5868 (__mmask16) __U);
5871 extern __inline __m512d
5872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5873 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
5875 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5876 /* idx */ ,
5877 (__v8df) __A,
5878 (__v8df) __B,
5879 (__mmask8) -1);
5882 extern __inline __m512d
5883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
5885 __m512d __B)
5887 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
5888 /* idx */ ,
5889 (__v8df) __A,
5890 (__v8df) __B,
5891 (__mmask8) __U);
5894 extern __inline __m512d
5895 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5896 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
5897 __m512d __B)
5899 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
5900 (__v8di) __I
5901 /* idx */ ,
5902 (__v8df) __B,
5903 (__mmask8) __U);
5906 extern __inline __m512d
5907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5908 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
5909 __m512d __B)
5911 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
5912 /* idx */ ,
5913 (__v8df) __A,
5914 (__v8df) __B,
5915 (__mmask8) __U);
5918 extern __inline __m512
5919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5920 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
5922 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
5923 /* idx */ ,
5924 (__v16sf) __A,
5925 (__v16sf) __B,
5926 (__mmask16) -1);
5929 extern __inline __m512
5930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5931 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
5933 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
5934 /* idx */ ,
5935 (__v16sf) __A,
5936 (__v16sf) __B,
5937 (__mmask16) __U);
5940 extern __inline __m512
5941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5942 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
5943 __m512 __B)
5945 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
5946 (__v16si) __I
5947 /* idx */ ,
5948 (__v16sf) __B,
5949 (__mmask16) __U);
5952 extern __inline __m512
5953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5954 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
5955 __m512 __B)
5957 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
5958 /* idx */ ,
5959 (__v16sf) __A,
5960 (__v16sf) __B,
5961 (__mmask16) __U);
5964 #ifdef __OPTIMIZE__
5965 extern __inline __m512d
5966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5967 _mm512_permute_pd (__m512d __X, const int __C)
5969 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5970 (__v8df)
5971 _mm512_setzero_pd (),
5972 (__mmask8) -1);
5975 extern __inline __m512d
5976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
5979 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5980 (__v8df) __W,
5981 (__mmask8) __U);
5984 extern __inline __m512d
5985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
5988 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
5989 (__v8df)
5990 _mm512_setzero_pd (),
5991 (__mmask8) __U);
5994 extern __inline __m512
5995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996 _mm512_permute_ps (__m512 __X, const int __C)
5998 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
5999 (__v16sf)
6000 _mm512_setzero_ps (),
6001 (__mmask16) -1);
6004 extern __inline __m512
6005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6006 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
6008 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6009 (__v16sf) __W,
6010 (__mmask16) __U);
6013 extern __inline __m512
6014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6015 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
6017 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
6018 (__v16sf)
6019 _mm512_setzero_ps (),
6020 (__mmask16) __U);
6022 #else
6023 #define _mm512_permute_pd(X, C) \
6024 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6025 (__v8df)(__m512d)(X), \
6026 (__mmask8)(-1)))
6028 #define _mm512_mask_permute_pd(W, U, X, C) \
6029 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6030 (__v8df)(__m512d)(W), \
6031 (__mmask8)(U)))
6033 #define _mm512_maskz_permute_pd(U, X, C) \
6034 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
6035 (__v8df)(__m512d)_mm512_setzero_pd(), \
6036 (__mmask8)(U)))
6038 #define _mm512_permute_ps(X, C) \
6039 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6040 (__v16sf)(__m512)(X), \
6041 (__mmask16)(-1)))
6043 #define _mm512_mask_permute_ps(W, U, X, C) \
6044 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6045 (__v16sf)(__m512)(W), \
6046 (__mmask16)(U)))
6048 #define _mm512_maskz_permute_ps(U, X, C) \
6049 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
6050 (__v16sf)(__m512)_mm512_setzero_ps(), \
6051 (__mmask16)(U)))
6052 #endif
6054 #ifdef __OPTIMIZE__
6055 extern __inline __m512i
6056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6057 _mm512_permutex_epi64 (__m512i __X, const int __I)
6059 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6060 (__v8di)
6061 _mm512_setzero_si512 (),
6062 (__mmask8) (-1));
6065 extern __inline __m512i
6066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6067 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
6068 __m512i __X, const int __I)
6070 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6071 (__v8di) __W,
6072 (__mmask8) __M);
6075 extern __inline __m512i
6076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6077 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
6079 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
6080 (__v8di)
6081 _mm512_setzero_si512 (),
6082 (__mmask8) __M);
6085 extern __inline __m512d
6086 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6087 _mm512_permutex_pd (__m512d __X, const int __M)
6089 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6090 (__v8df)
6091 _mm512_setzero_pd (),
6092 (__mmask8) -1);
6095 extern __inline __m512d
6096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6097 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
6099 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6100 (__v8df) __W,
6101 (__mmask8) __U);
6104 extern __inline __m512d
6105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6106 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
6108 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
6109 (__v8df)
6110 _mm512_setzero_pd (),
6111 (__mmask8) __U);
6113 #else
6114 #define _mm512_permutex_pd(X, M) \
6115 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6116 (__v8df)(__m512d)(X), (__mmask8)-1))
6118 #define _mm512_mask_permutex_pd(W, U, X, M) \
6119 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6120 (__v8df)(__m512d)(W), (__mmask8)(U)))
6122 #define _mm512_maskz_permutex_pd(U, X, M) \
6123 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
6124 (__v8df)(__m512d)_mm512_setzero_pd(),\
6125 (__mmask8)(U)))
6127 #define _mm512_permutex_epi64(X, I) \
6128 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6129 (int)(I), \
6130 (__v8di)(__m512i)(X), \
6131 (__mmask8)(-1)))
6133 #define _mm512_maskz_permutex_epi64(M, X, I) \
6134 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6135 (int)(I), \
6136 (__v8di)(__m512i) \
6137 (_mm512_setzero_si512 ()),\
6138 (__mmask8)(M)))
6140 #define _mm512_mask_permutex_epi64(W, M, X, I) \
6141 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
6142 (int)(I), \
6143 (__v8di)(__m512i)(W), \
6144 (__mmask8)(M)))
6145 #endif
6147 extern __inline __m512i
6148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6149 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
6151 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6152 (__v8di) __X,
6153 (__v8di)
6154 _mm512_setzero_si512 (),
6155 __M);
6158 extern __inline __m512i
6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
6162 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6163 (__v8di) __X,
6164 (__v8di)
6165 _mm512_setzero_si512 (),
6166 (__mmask8) -1);
6169 extern __inline __m512i
6170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6171 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
6172 __m512i __Y)
6174 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
6175 (__v8di) __X,
6176 (__v8di) __W,
6177 __M);
6180 extern __inline __m512i
6181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
6184 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6185 (__v16si) __X,
6186 (__v16si)
6187 _mm512_setzero_si512 (),
6188 __M);
6191 extern __inline __m512i
6192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6193 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
6195 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6196 (__v16si) __X,
6197 (__v16si)
6198 _mm512_setzero_si512 (),
6199 (__mmask16) -1);
6202 extern __inline __m512i
6203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6204 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
6205 __m512i __Y)
6207 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
6208 (__v16si) __X,
6209 (__v16si) __W,
6210 __M);
6213 extern __inline __m512d
6214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6215 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
6217 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6218 (__v8di) __X,
6219 (__v8df)
6220 _mm512_setzero_pd (),
6221 (__mmask8) -1);
6224 extern __inline __m512d
6225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6226 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
6228 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6229 (__v8di) __X,
6230 (__v8df) __W,
6231 (__mmask8) __U);
6234 extern __inline __m512d
6235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6236 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
6238 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
6239 (__v8di) __X,
6240 (__v8df)
6241 _mm512_setzero_pd (),
6242 (__mmask8) __U);
6245 extern __inline __m512
6246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6247 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
6249 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6250 (__v16si) __X,
6251 (__v16sf)
6252 _mm512_setzero_ps (),
6253 (__mmask16) -1);
6256 extern __inline __m512
6257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6258 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
6260 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6261 (__v16si) __X,
6262 (__v16sf) __W,
6263 (__mmask16) __U);
6266 extern __inline __m512
6267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6268 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
6270 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
6271 (__v16si) __X,
6272 (__v16sf)
6273 _mm512_setzero_ps (),
6274 (__mmask16) __U);
6277 #ifdef __OPTIMIZE__
6278 extern __inline __m512
6279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6280 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
6282 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6283 (__v16sf) __V, __imm,
6284 (__v16sf)
6285 _mm512_setzero_ps (),
6286 (__mmask16) -1);
6289 extern __inline __m512
6290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6291 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
6292 __m512 __V, const int __imm)
6294 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6295 (__v16sf) __V, __imm,
6296 (__v16sf) __W,
6297 (__mmask16) __U);
6300 extern __inline __m512
6301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6302 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
6304 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
6305 (__v16sf) __V, __imm,
6306 (__v16sf)
6307 _mm512_setzero_ps (),
6308 (__mmask16) __U);
6311 extern __inline __m512d
6312 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6313 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
6315 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6316 (__v8df) __V, __imm,
6317 (__v8df)
6318 _mm512_setzero_pd (),
6319 (__mmask8) -1);
6322 extern __inline __m512d
6323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6324 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
6325 __m512d __V, const int __imm)
6327 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6328 (__v8df) __V, __imm,
6329 (__v8df) __W,
6330 (__mmask8) __U);
6333 extern __inline __m512d
6334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6335 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
6336 const int __imm)
6338 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
6339 (__v8df) __V, __imm,
6340 (__v8df)
6341 _mm512_setzero_pd (),
6342 (__mmask8) __U);
6345 extern __inline __m512d
6346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6347 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
6348 const int __imm, const int __R)
6350 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6351 (__v8df) __B,
6352 (__v8di) __C,
6353 __imm,
6354 (__mmask8) -1, __R);
6357 extern __inline __m512d
6358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6359 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6360 __m512i __C, const int __imm, const int __R)
6362 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
6363 (__v8df) __B,
6364 (__v8di) __C,
6365 __imm,
6366 (__mmask8) __U, __R);
6369 extern __inline __m512d
6370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6371 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6372 __m512i __C, const int __imm, const int __R)
6374 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
6375 (__v8df) __B,
6376 (__v8di) __C,
6377 __imm,
6378 (__mmask8) __U, __R);
6381 extern __inline __m512
6382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6383 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
6384 const int __imm, const int __R)
6386 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6387 (__v16sf) __B,
6388 (__v16si) __C,
6389 __imm,
6390 (__mmask16) -1, __R);
6393 extern __inline __m512
6394 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6395 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6396 __m512i __C, const int __imm, const int __R)
6398 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
6399 (__v16sf) __B,
6400 (__v16si) __C,
6401 __imm,
6402 (__mmask16) __U, __R);
6405 extern __inline __m512
6406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6407 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6408 __m512i __C, const int __imm, const int __R)
6410 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
6411 (__v16sf) __B,
6412 (__v16si) __C,
6413 __imm,
6414 (__mmask16) __U, __R);
6417 extern __inline __m128d
6418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6419 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
6420 const int __imm, const int __R)
6422 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6423 (__v2df) __B,
6424 (__v2di) __C, __imm,
6425 (__mmask8) -1, __R);
6428 extern __inline __m128d
6429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6430 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
6431 __m128i __C, const int __imm, const int __R)
6433 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
6434 (__v2df) __B,
6435 (__v2di) __C, __imm,
6436 (__mmask8) __U, __R);
6439 extern __inline __m128d
6440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6441 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
6442 __m128i __C, const int __imm, const int __R)
6444 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
6445 (__v2df) __B,
6446 (__v2di) __C,
6447 __imm,
6448 (__mmask8) __U, __R);
6451 extern __inline __m128
6452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6453 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
6454 const int __imm, const int __R)
6456 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6457 (__v4sf) __B,
6458 (__v4si) __C, __imm,
6459 (__mmask8) -1, __R);
6462 extern __inline __m128
6463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6464 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
6465 __m128i __C, const int __imm, const int __R)
6467 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
6468 (__v4sf) __B,
6469 (__v4si) __C, __imm,
6470 (__mmask8) __U, __R);
6473 extern __inline __m128
6474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6475 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
6476 __m128i __C, const int __imm, const int __R)
6478 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
6479 (__v4sf) __B,
6480 (__v4si) __C, __imm,
6481 (__mmask8) __U, __R);
6484 #else
6485 #define _mm512_shuffle_pd(X, Y, C) \
6486 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6487 (__v8df)(__m512d)(Y), (int)(C),\
6488 (__v8df)(__m512d)_mm512_setzero_pd(),\
6489 (__mmask8)-1))
6491 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
6492 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6493 (__v8df)(__m512d)(Y), (int)(C),\
6494 (__v8df)(__m512d)(W),\
6495 (__mmask8)(U)))
6497 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
6498 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
6499 (__v8df)(__m512d)(Y), (int)(C),\
6500 (__v8df)(__m512d)_mm512_setzero_pd(),\
6501 (__mmask8)(U)))
6503 #define _mm512_shuffle_ps(X, Y, C) \
6504 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6505 (__v16sf)(__m512)(Y), (int)(C),\
6506 (__v16sf)(__m512)_mm512_setzero_ps(),\
6507 (__mmask16)-1))
6509 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
6510 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6511 (__v16sf)(__m512)(Y), (int)(C),\
6512 (__v16sf)(__m512)(W),\
6513 (__mmask16)(U)))
6515 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
6516 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
6517 (__v16sf)(__m512)(Y), (int)(C),\
6518 (__v16sf)(__m512)_mm512_setzero_ps(),\
6519 (__mmask16)(U)))
6521 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
6522 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6523 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6524 (__mmask8)(-1), (R)))
6526 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
6527 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
6528 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6529 (__mmask8)(U), (R)))
6531 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
6532 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
6533 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
6534 (__mmask8)(U), (R)))
6536 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
6537 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6538 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6539 (__mmask16)(-1), (R)))
6541 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
6542 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
6543 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6544 (__mmask16)(U), (R)))
6546 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
6547 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
6548 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
6549 (__mmask16)(U), (R)))
6551 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
6552 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6553 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6554 (__mmask8)(-1), (R)))
6556 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
6557 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
6558 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6559 (__mmask8)(U), (R)))
6561 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
6562 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
6563 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
6564 (__mmask8)(U), (R)))
6566 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
6567 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6568 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6569 (__mmask8)(-1), (R)))
6571 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
6572 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
6573 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6574 (__mmask8)(U), (R)))
6576 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
6577 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
6578 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
6579 (__mmask8)(U), (R)))
6580 #endif
6582 extern __inline __m512
6583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6584 _mm512_movehdup_ps (__m512 __A)
6586 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6587 (__v16sf)
6588 _mm512_setzero_ps (),
6589 (__mmask16) -1);
6592 extern __inline __m512
6593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6594 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6596 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6597 (__v16sf) __W,
6598 (__mmask16) __U);
6601 extern __inline __m512
6602 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6603 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
6605 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
6606 (__v16sf)
6607 _mm512_setzero_ps (),
6608 (__mmask16) __U);
6611 extern __inline __m512
6612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6613 _mm512_moveldup_ps (__m512 __A)
6615 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6616 (__v16sf)
6617 _mm512_setzero_ps (),
6618 (__mmask16) -1);
6621 extern __inline __m512
6622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6623 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
6625 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6626 (__v16sf) __W,
6627 (__mmask16) __U);
6630 extern __inline __m512
6631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6632 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
6634 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
6635 (__v16sf)
6636 _mm512_setzero_ps (),
6637 (__mmask16) __U);
6640 extern __inline __m512i
6641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6642 _mm512_or_si512 (__m512i __A, __m512i __B)
6644 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6645 (__v16si) __B,
6646 (__v16si)
6647 _mm512_setzero_si512 (),
6648 (__mmask16) -1);
6651 extern __inline __m512i
6652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6653 _mm512_or_epi32 (__m512i __A, __m512i __B)
6655 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6656 (__v16si) __B,
6657 (__v16si)
6658 _mm512_setzero_si512 (),
6659 (__mmask16) -1);
6662 extern __inline __m512i
6663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6664 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6666 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6667 (__v16si) __B,
6668 (__v16si) __W,
6669 (__mmask16) __U);
6672 extern __inline __m512i
6673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6674 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6676 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
6677 (__v16si) __B,
6678 (__v16si)
6679 _mm512_setzero_si512 (),
6680 (__mmask16) __U);
6683 extern __inline __m512i
6684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6685 _mm512_or_epi64 (__m512i __A, __m512i __B)
6687 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6688 (__v8di) __B,
6689 (__v8di)
6690 _mm512_setzero_si512 (),
6691 (__mmask8) -1);
6694 extern __inline __m512i
6695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6696 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
6698 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6699 (__v8di) __B,
6700 (__v8di) __W,
6701 (__mmask8) __U);
6704 extern __inline __m512i
6705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6706 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
6708 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
6709 (__v8di) __B,
6710 (__v8di)
6711 _mm512_setzero_si512 (),
6712 (__mmask8) __U);
6715 extern __inline __m512i
6716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6717 _mm512_xor_si512 (__m512i __A, __m512i __B)
6719 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6720 (__v16si) __B,
6721 (__v16si)
6722 _mm512_setzero_si512 (),
6723 (__mmask16) -1);
6726 extern __inline __m512i
6727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6728 _mm512_xor_epi32 (__m512i __A, __m512i __B)
6730 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6731 (__v16si) __B,
6732 (__v16si)
6733 _mm512_setzero_si512 (),
6734 (__mmask16) -1);
6737 extern __inline __m512i
6738 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6739 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6741 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6742 (__v16si) __B,
6743 (__v16si) __W,
6744 (__mmask16) __U);
6747 extern __inline __m512i
6748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6749 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
6751 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
6752 (__v16si) __B,
6753 (__v16si)
6754 _mm512_setzero_si512 (),
6755 (__mmask16) __U);
6758 extern __inline __m512i
6759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6760 _mm512_xor_epi64 (__m512i __A, __m512i __B)
6762 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6763 (__v8di) __B,
6764 (__v8di)
6765 _mm512_setzero_si512 (),
6766 (__mmask8) -1);
6769 extern __inline __m512i
6770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6771 _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6773 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6774 (__v8di) __B,
6775 (__v8di) __W,
6776 (__mmask8) __U);
6779 extern __inline __m512i
6780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6781 _mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
6783 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
6784 (__v8di) __B,
6785 (__v8di)
6786 _mm512_setzero_si512 (),
6787 (__mmask8) __U);
6790 #ifdef __OPTIMIZE__
6791 extern __inline __m512i
6792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6793 _mm512_rol_epi32 (__m512i __A, const int __B)
6795 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6796 (__v16si)
6797 _mm512_setzero_si512 (),
6798 (__mmask16) -1);
6801 extern __inline __m512i
6802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6803 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
6805 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6806 (__v16si) __W,
6807 (__mmask16) __U);
6810 extern __inline __m512i
6811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
6814 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
6815 (__v16si)
6816 _mm512_setzero_si512 (),
6817 (__mmask16) __U);
6820 extern __inline __m512i
6821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6822 _mm512_ror_epi32 (__m512i __A, int __B)
6824 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6825 (__v16si)
6826 _mm512_setzero_si512 (),
6827 (__mmask16) -1);
6830 extern __inline __m512i
6831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6832 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
6834 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6835 (__v16si) __W,
6836 (__mmask16) __U);
6839 extern __inline __m512i
6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6841 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
6843 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
6844 (__v16si)
6845 _mm512_setzero_si512 (),
6846 (__mmask16) __U);
6849 extern __inline __m512i
6850 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6851 _mm512_rol_epi64 (__m512i __A, const int __B)
6853 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6854 (__v8di)
6855 _mm512_setzero_si512 (),
6856 (__mmask8) -1);
6859 extern __inline __m512i
6860 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6861 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
6863 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6864 (__v8di) __W,
6865 (__mmask8) __U);
6868 extern __inline __m512i
6869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6870 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
6872 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
6873 (__v8di)
6874 _mm512_setzero_si512 (),
6875 (__mmask8) __U);
6878 extern __inline __m512i
6879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6880 _mm512_ror_epi64 (__m512i __A, int __B)
6882 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6883 (__v8di)
6884 _mm512_setzero_si512 (),
6885 (__mmask8) -1);
6888 extern __inline __m512i
6889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6890 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
6892 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6893 (__v8di) __W,
6894 (__mmask8) __U);
6897 extern __inline __m512i
6898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6899 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
6901 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
6902 (__v8di)
6903 _mm512_setzero_si512 (),
6904 (__mmask8) __U);
6907 #else
6908 #define _mm512_rol_epi32(A, B) \
6909 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6910 (int)(B), \
6911 (__v16si)_mm512_setzero_si512 (), \
6912 (__mmask16)(-1)))
6913 #define _mm512_mask_rol_epi32(W, U, A, B) \
6914 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6915 (int)(B), \
6916 (__v16si)(__m512i)(W), \
6917 (__mmask16)(U)))
6918 #define _mm512_maskz_rol_epi32(U, A, B) \
6919 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
6920 (int)(B), \
6921 (__v16si)_mm512_setzero_si512 (), \
6922 (__mmask16)(U)))
6923 #define _mm512_ror_epi32(A, B) \
6924 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6925 (int)(B), \
6926 (__v16si)_mm512_setzero_si512 (), \
6927 (__mmask16)(-1)))
6928 #define _mm512_mask_ror_epi32(W, U, A, B) \
6929 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6930 (int)(B), \
6931 (__v16si)(__m512i)(W), \
6932 (__mmask16)(U)))
6933 #define _mm512_maskz_ror_epi32(U, A, B) \
6934 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
6935 (int)(B), \
6936 (__v16si)_mm512_setzero_si512 (), \
6937 (__mmask16)(U)))
6938 #define _mm512_rol_epi64(A, B) \
6939 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6940 (int)(B), \
6941 (__v8di)_mm512_setzero_si512 (), \
6942 (__mmask8)(-1)))
6943 #define _mm512_mask_rol_epi64(W, U, A, B) \
6944 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6945 (int)(B), \
6946 (__v8di)(__m512i)(W), \
6947 (__mmask8)(U)))
6948 #define _mm512_maskz_rol_epi64(U, A, B) \
6949 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
6950 (int)(B), \
6951 (__v8di)_mm512_setzero_si512 (), \
6952 (__mmask8)(U)))
6954 #define _mm512_ror_epi64(A, B) \
6955 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6956 (int)(B), \
6957 (__v8di)_mm512_setzero_si512 (), \
6958 (__mmask8)(-1)))
6959 #define _mm512_mask_ror_epi64(W, U, A, B) \
6960 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6961 (int)(B), \
6962 (__v8di)(__m512i)(W), \
6963 (__mmask8)(U)))
6964 #define _mm512_maskz_ror_epi64(U, A, B) \
6965 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
6966 (int)(B), \
6967 (__v8di)_mm512_setzero_si512 (), \
6968 (__mmask8)(U)))
6969 #endif
6971 extern __inline __m512i
6972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6973 _mm512_and_si512 (__m512i __A, __m512i __B)
6975 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6976 (__v16si) __B,
6977 (__v16si)
6978 _mm512_setzero_si512 (),
6979 (__mmask16) -1);
6982 extern __inline __m512i
6983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6984 _mm512_and_epi32 (__m512i __A, __m512i __B)
6986 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6987 (__v16si) __B,
6988 (__v16si)
6989 _mm512_setzero_si512 (),
6990 (__mmask16) -1);
6993 extern __inline __m512i
6994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6995 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
6997 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
6998 (__v16si) __B,
6999 (__v16si) __W,
7000 (__mmask16) __U);
7003 extern __inline __m512i
7004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7005 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7007 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
7008 (__v16si) __B,
7009 (__v16si)
7010 _mm512_setzero_si512 (),
7011 (__mmask16) __U);
7014 extern __inline __m512i
7015 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7016 _mm512_and_epi64 (__m512i __A, __m512i __B)
7018 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7019 (__v8di) __B,
7020 (__v8di)
7021 _mm512_setzero_si512 (),
7022 (__mmask8) -1);
7025 extern __inline __m512i
7026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7027 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7029 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7030 (__v8di) __B,
7031 (__v8di) __W, __U);
7034 extern __inline __m512i
7035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7036 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7038 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
7039 (__v8di) __B,
7040 (__v8di)
7041 _mm512_setzero_pd (),
7042 __U);
7045 extern __inline __m512i
7046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7047 _mm512_andnot_si512 (__m512i __A, __m512i __B)
7049 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7050 (__v16si) __B,
7051 (__v16si)
7052 _mm512_setzero_si512 (),
7053 (__mmask16) -1);
7056 extern __inline __m512i
7057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7058 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
7060 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7061 (__v16si) __B,
7062 (__v16si)
7063 _mm512_setzero_si512 (),
7064 (__mmask16) -1);
7067 extern __inline __m512i
7068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7069 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7071 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7072 (__v16si) __B,
7073 (__v16si) __W,
7074 (__mmask16) __U);
7077 extern __inline __m512i
7078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7079 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7081 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
7082 (__v16si) __B,
7083 (__v16si)
7084 _mm512_setzero_si512 (),
7085 (__mmask16) __U);
7088 extern __inline __m512i
7089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7090 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
7092 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7093 (__v8di) __B,
7094 (__v8di)
7095 _mm512_setzero_si512 (),
7096 (__mmask8) -1);
7099 extern __inline __m512i
7100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7101 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7103 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7104 (__v8di) __B,
7105 (__v8di) __W, __U);
7108 extern __inline __m512i
7109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7110 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7112 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
7113 (__v8di) __B,
7114 (__v8di)
7115 _mm512_setzero_pd (),
7116 __U);
7119 extern __inline __mmask16
7120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7121 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
7123 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7124 (__v16si) __B,
7125 (__mmask16) -1);
7128 extern __inline __mmask16
7129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7130 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7132 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
7133 (__v16si) __B, __U);
7136 extern __inline __mmask8
7137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7138 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
7140 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
7141 (__v8di) __B,
7142 (__mmask8) -1);
7145 extern __inline __mmask8
7146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7147 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7149 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
7152 extern __inline __mmask16
7153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7154 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
7156 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7157 (__v16si) __B,
7158 (__mmask16) -1);
7161 extern __inline __mmask16
7162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7163 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
7165 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
7166 (__v16si) __B, __U);
7169 extern __inline __mmask8
7170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7171 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
7173 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7174 (__v8di) __B,
7175 (__mmask8) -1);
7178 extern __inline __mmask8
7179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7180 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
7182 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
7183 (__v8di) __B, __U);
7186 extern __inline __m512i
7187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7188 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
7190 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7191 (__v16si) __B,
7192 (__v16si)
7193 _mm512_setzero_si512 (),
7194 (__mmask16) -1);
7197 extern __inline __m512i
7198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7199 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7200 __m512i __B)
7202 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7203 (__v16si) __B,
7204 (__v16si) __W,
7205 (__mmask16) __U);
7208 extern __inline __m512i
7209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7210 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7212 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
7213 (__v16si) __B,
7214 (__v16si)
7215 _mm512_setzero_si512 (),
7216 (__mmask16) __U);
7219 extern __inline __m512i
7220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7221 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
7223 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7224 (__v8di) __B,
7225 (__v8di)
7226 _mm512_setzero_si512 (),
7227 (__mmask8) -1);
7230 extern __inline __m512i
7231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7232 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7234 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7235 (__v8di) __B,
7236 (__v8di) __W,
7237 (__mmask8) __U);
7240 extern __inline __m512i
7241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7242 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7244 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
7245 (__v8di) __B,
7246 (__v8di)
7247 _mm512_setzero_si512 (),
7248 (__mmask8) __U);
7251 extern __inline __m512i
7252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7253 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
7255 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7256 (__v16si) __B,
7257 (__v16si)
7258 _mm512_setzero_si512 (),
7259 (__mmask16) -1);
7262 extern __inline __m512i
7263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7264 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7265 __m512i __B)
7267 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7268 (__v16si) __B,
7269 (__v16si) __W,
7270 (__mmask16) __U);
7273 extern __inline __m512i
7274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7275 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7277 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
7278 (__v16si) __B,
7279 (__v16si)
7280 _mm512_setzero_si512 (),
7281 (__mmask16) __U);
7284 extern __inline __m512i
7285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7286 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
7288 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7289 (__v8di) __B,
7290 (__v8di)
7291 _mm512_setzero_si512 (),
7292 (__mmask8) -1);
7295 extern __inline __m512i
7296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7297 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7299 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7300 (__v8di) __B,
7301 (__v8di) __W,
7302 (__mmask8) __U);
7305 extern __inline __m512i
7306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7307 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7309 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
7310 (__v8di) __B,
7311 (__v8di)
7312 _mm512_setzero_si512 (),
7313 (__mmask8) __U);
7316 #ifdef __x86_64__
7317 #ifdef __OPTIMIZE__
7318 extern __inline unsigned long long
7319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7320 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
7322 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
7325 extern __inline long long
7326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7327 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
7329 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7332 extern __inline long long
7333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7334 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
7336 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
7339 extern __inline unsigned long long
7340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7341 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
7343 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
7346 extern __inline long long
7347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7348 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
7350 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7353 extern __inline long long
7354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7355 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
7357 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
7359 #else
7360 #define _mm_cvt_roundss_u64(A, B) \
7361 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
7363 #define _mm_cvt_roundss_si64(A, B) \
7364 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7366 #define _mm_cvt_roundss_i64(A, B) \
7367 ((long long)__builtin_ia32_vcvtss2si64(A, B))
7369 #define _mm_cvtt_roundss_u64(A, B) \
7370 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
7372 #define _mm_cvtt_roundss_i64(A, B) \
7373 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7375 #define _mm_cvtt_roundss_si64(A, B) \
7376 ((long long)__builtin_ia32_vcvttss2si64(A, B))
7377 #endif
7378 #endif
7380 #ifdef __OPTIMIZE__
7381 extern __inline unsigned
7382 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7383 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
7385 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
7388 extern __inline int
7389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7390 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
7392 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7395 extern __inline int
7396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7397 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
7399 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
7402 extern __inline unsigned
7403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7404 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
7406 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
7409 extern __inline int
7410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7411 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
7413 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7416 extern __inline int
7417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7418 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
7420 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
7422 #else
7423 #define _mm_cvt_roundss_u32(A, B) \
7424 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
7426 #define _mm_cvt_roundss_si32(A, B) \
7427 ((int)__builtin_ia32_vcvtss2si32(A, B))
7429 #define _mm_cvt_roundss_i32(A, B) \
7430 ((int)__builtin_ia32_vcvtss2si32(A, B))
7432 #define _mm_cvtt_roundss_u32(A, B) \
7433 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
7435 #define _mm_cvtt_roundss_si32(A, B) \
7436 ((int)__builtin_ia32_vcvttss2si32(A, B))
7438 #define _mm_cvtt_roundss_i32(A, B) \
7439 ((int)__builtin_ia32_vcvttss2si32(A, B))
7440 #endif
7442 #ifdef __x86_64__
7443 #ifdef __OPTIMIZE__
7444 extern __inline unsigned long long
7445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7446 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
7448 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
7451 extern __inline long long
7452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7453 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
7455 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7458 extern __inline long long
7459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7460 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
7462 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
7465 extern __inline unsigned long long
7466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7467 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
7469 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
7472 extern __inline long long
7473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7474 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
7476 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7479 extern __inline long long
7480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7481 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
7483 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
7485 #else
7486 #define _mm_cvt_roundsd_u64(A, B) \
7487 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
7489 #define _mm_cvt_roundsd_si64(A, B) \
7490 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7492 #define _mm_cvt_roundsd_i64(A, B) \
7493 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
7495 #define _mm_cvtt_roundsd_u64(A, B) \
7496 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
7498 #define _mm_cvtt_roundsd_si64(A, B) \
7499 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7501 #define _mm_cvtt_roundsd_i64(A, B) \
7502 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
7503 #endif
7504 #endif
7506 #ifdef __OPTIMIZE__
7507 extern __inline unsigned
7508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7509 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
7511 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
7514 extern __inline int
7515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7516 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
7518 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7521 extern __inline int
7522 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7523 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
7525 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
7528 extern __inline unsigned
7529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7530 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
7532 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
7535 extern __inline int
7536 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7537 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
7539 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7542 extern __inline int
7543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7544 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
7546 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
7548 #else
7549 #define _mm_cvt_roundsd_u32(A, B) \
7550 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
7552 #define _mm_cvt_roundsd_si32(A, B) \
7553 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7555 #define _mm_cvt_roundsd_i32(A, B) \
7556 ((int)__builtin_ia32_vcvtsd2si32(A, B))
7558 #define _mm_cvtt_roundsd_u32(A, B) \
7559 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
7561 #define _mm_cvtt_roundsd_si32(A, B) \
7562 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7564 #define _mm_cvtt_roundsd_i32(A, B) \
7565 ((int)__builtin_ia32_vcvttsd2si32(A, B))
7566 #endif
7568 extern __inline __m512d
7569 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7570 _mm512_movedup_pd (__m512d __A)
7572 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7573 (__v8df)
7574 _mm512_setzero_pd (),
7575 (__mmask8) -1);
7578 extern __inline __m512d
7579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7580 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
7582 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7583 (__v8df) __W,
7584 (__mmask8) __U);
7587 extern __inline __m512d
7588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7589 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
7591 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
7592 (__v8df)
7593 _mm512_setzero_pd (),
7594 (__mmask8) __U);
7597 extern __inline __m512d
7598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7599 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
7601 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7602 (__v8df) __B,
7603 (__v8df)
7604 _mm512_setzero_pd (),
7605 (__mmask8) -1);
7608 extern __inline __m512d
7609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7610 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7612 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7613 (__v8df) __B,
7614 (__v8df) __W,
7615 (__mmask8) __U);
7618 extern __inline __m512d
7619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7620 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
7622 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
7623 (__v8df) __B,
7624 (__v8df)
7625 _mm512_setzero_pd (),
7626 (__mmask8) __U);
7629 extern __inline __m512d
7630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7631 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
7633 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7634 (__v8df) __B,
7635 (__v8df)
7636 _mm512_setzero_pd (),
7637 (__mmask8) -1);
7640 extern __inline __m512d
7641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7642 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
7644 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7645 (__v8df) __B,
7646 (__v8df) __W,
7647 (__mmask8) __U);
7650 extern __inline __m512d
7651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7652 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
7654 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
7655 (__v8df) __B,
7656 (__v8df)
7657 _mm512_setzero_pd (),
7658 (__mmask8) __U);
7661 extern __inline __m512
7662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7663 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
7665 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7666 (__v16sf) __B,
7667 (__v16sf)
7668 _mm512_setzero_ps (),
7669 (__mmask16) -1);
7672 extern __inline __m512
7673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7674 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7676 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7677 (__v16sf) __B,
7678 (__v16sf) __W,
7679 (__mmask16) __U);
7682 extern __inline __m512
7683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7684 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
7686 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
7687 (__v16sf) __B,
7688 (__v16sf)
7689 _mm512_setzero_ps (),
7690 (__mmask16) __U);
7693 #ifdef __OPTIMIZE__
7694 extern __inline __m512d
7695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7696 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
7698 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7699 (__v8df)
7700 _mm512_setzero_pd (),
7701 (__mmask8) -1, __R);
7704 extern __inline __m512d
7705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7706 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
7707 const int __R)
7709 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7710 (__v8df) __W,
7711 (__mmask8) __U, __R);
7714 extern __inline __m512d
7715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7716 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
7718 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
7719 (__v8df)
7720 _mm512_setzero_pd (),
7721 (__mmask8) __U, __R);
7724 extern __inline __m512
7725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7726 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
7728 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7729 (__v16sf)
7730 _mm512_setzero_ps (),
7731 (__mmask16) -1, __R);
7734 extern __inline __m512
7735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7736 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
7737 const int __R)
7739 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7740 (__v16sf) __W,
7741 (__mmask16) __U, __R);
7744 extern __inline __m512
7745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7746 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
7748 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
7749 (__v16sf)
7750 _mm512_setzero_ps (),
7751 (__mmask16) __U, __R);
7754 extern __inline __m256i
7755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7756 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
7758 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7759 __I,
7760 (__v16hi)
7761 _mm256_setzero_si256 (),
7762 -1);
7765 extern __inline __m256i
7766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7767 _mm512_cvtps_ph (__m512 __A, const int __I)
7769 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7770 __I,
7771 (__v16hi)
7772 _mm256_setzero_si256 (),
7773 -1);
7776 extern __inline __m256i
7777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7778 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
7779 const int __I)
7781 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7782 __I,
7783 (__v16hi) __U,
7784 (__mmask16) __W);
7787 extern __inline __m256i
7788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7789 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
7791 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7792 __I,
7793 (__v16hi) __U,
7794 (__mmask16) __W);
7797 extern __inline __m256i
7798 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7799 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
7801 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7802 __I,
7803 (__v16hi)
7804 _mm256_setzero_si256 (),
7805 (__mmask16) __W);
7808 extern __inline __m256i
7809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7810 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
7812 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
7813 __I,
7814 (__v16hi)
7815 _mm256_setzero_si256 (),
7816 (__mmask16) __W);
7818 #else
7819 #define _mm512_cvt_roundps_pd(A, B) \
7820 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, B)
7822 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
7823 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
7825 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
7826 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
7828 #define _mm512_cvt_roundph_ps(A, B) \
7829 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
7831 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
7832 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
7834 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
7835 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
7837 #define _mm512_cvt_roundps_ph(A, I) \
7838 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7839 (__v16hi)_mm256_setzero_si256 (), -1))
7840 #define _mm512_cvtps_ph(A, I) \
7841 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7842 (__v16hi)_mm256_setzero_si256 (), -1))
7843 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
7844 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7845 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7846 #define _mm512_mask_cvtps_ph(U, W, A, I) \
7847 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7848 (__v16hi)(__m256i)(U), (__mmask16) (W)))
7849 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
7850 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7851 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7852 #define _mm512_maskz_cvtps_ph(W, A, I) \
7853 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
7854 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
7855 #endif
7857 #ifdef __OPTIMIZE__
7858 extern __inline __m256
7859 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7860 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
7862 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7863 (__v8sf)
7864 _mm256_setzero_ps (),
7865 (__mmask8) -1, __R);
7868 extern __inline __m256
7869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
7871 const int __R)
7873 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7874 (__v8sf) __W,
7875 (__mmask8) __U, __R);
7878 extern __inline __m256
7879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7880 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
7882 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
7883 (__v8sf)
7884 _mm256_setzero_ps (),
7885 (__mmask8) __U, __R);
7888 extern __inline __m128
7889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7890 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
7892 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
7893 (__v2df) __B,
7894 __R);
7897 extern __inline __m128d
7898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7899 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
7901 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
7902 (__v4sf) __B,
7903 __R);
7905 #else
7906 #define _mm512_cvt_roundpd_ps(A, B) \
7907 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B)
7909 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
7910 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
7912 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
7913 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
7915 #define _mm_cvt_roundsd_ss(A, B, C) \
7916 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
7918 #define _mm_cvt_roundss_sd(A, B, C) \
7919 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
7920 #endif
7922 extern __inline void
7923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7924 _mm512_stream_si512 (__m512i * __P, __m512i __A)
7926 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
7929 extern __inline void
7930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7931 _mm512_stream_ps (float *__P, __m512 __A)
7933 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
7936 extern __inline void
7937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7938 _mm512_stream_pd (double *__P, __m512d __A)
7940 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
7943 extern __inline __m512i
7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945 _mm512_stream_load_si512 (void *__P)
7947 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
7950 #ifdef __OPTIMIZE__
7951 extern __inline __m128
7952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7953 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
7955 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
7956 (__v4sf) __B,
7957 __R);
7960 extern __inline __m128d
7961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7962 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
7964 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
7965 (__v2df) __B,
7966 __R);
7969 extern __inline __m512
7970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7971 _mm512_getexp_round_ps (__m512 __A, const int __R)
7973 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7974 (__v16sf)
7975 _mm512_setzero_ps (),
7976 (__mmask16) -1, __R);
7979 extern __inline __m512
7980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7981 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
7982 const int __R)
7984 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7985 (__v16sf) __W,
7986 (__mmask16) __U, __R);
7989 extern __inline __m512
7990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7991 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
7993 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7994 (__v16sf)
7995 _mm512_setzero_ps (),
7996 (__mmask16) __U, __R);
7999 extern __inline __m512d
8000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8001 _mm512_getexp_round_pd (__m512d __A, const int __R)
8003 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8004 (__v8df)
8005 _mm512_setzero_pd (),
8006 (__mmask8) -1, __R);
8009 extern __inline __m512d
8010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8011 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8012 const int __R)
8014 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8015 (__v8df) __W,
8016 (__mmask8) __U, __R);
8019 extern __inline __m512d
8020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8021 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
8023 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8024 (__v8df)
8025 _mm512_setzero_pd (),
8026 (__mmask8) __U, __R);
8029 /* Constants for mantissa extraction */
8030 typedef enum
8032 _MM_MANT_NORM_1_2, /* interval [1, 2) */
8033 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
8034 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
8035 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
8036 } _MM_MANTISSA_NORM_ENUM;
8038 typedef enum
8040 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
8041 _MM_MANT_SIGN_zero, /* sign = 0 */
8042 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
8043 } _MM_MANTISSA_SIGN_ENUM;
8045 extern __inline __m512d
8046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8047 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
8048 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8050 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8051 (__C << 2) | __B,
8052 _mm512_setzero_pd (),
8053 (__mmask8) -1, __R);
8056 extern __inline __m512d
8057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8058 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
8059 _MM_MANTISSA_NORM_ENUM __B,
8060 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8062 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8063 (__C << 2) | __B,
8064 (__v8df) __W, __U,
8065 __R);
8068 extern __inline __m512d
8069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8070 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
8071 _MM_MANTISSA_NORM_ENUM __B,
8072 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8074 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
8075 (__C << 2) | __B,
8076 (__v8df)
8077 _mm512_setzero_pd (),
8078 __U, __R);
8081 extern __inline __m512
8082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8083 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
8084 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8086 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8087 (__C << 2) | __B,
8088 _mm512_setzero_ps (),
8089 (__mmask16) -1, __R);
8092 extern __inline __m512
8093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8094 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
8095 _MM_MANTISSA_NORM_ENUM __B,
8096 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8098 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8099 (__C << 2) | __B,
8100 (__v16sf) __W, __U,
8101 __R);
8104 extern __inline __m512
8105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8106 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
8107 _MM_MANTISSA_NORM_ENUM __B,
8108 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
8110 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
8111 (__C << 2) | __B,
8112 (__v16sf)
8113 _mm512_setzero_ps (),
8114 __U, __R);
8117 extern __inline __m128d
8118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8119 _mm_getmant_round_sd (__m128d __A, __m128d __B,
8120 _MM_MANTISSA_NORM_ENUM __C,
8121 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8123 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
8124 (__v2df) __B,
8125 (__D << 2) | __C,
8126 __R);
8129 extern __inline __m128
8130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8131 _mm_getmant_round_ss (__m128 __A, __m128 __B,
8132 _MM_MANTISSA_NORM_ENUM __C,
8133 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
8135 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
8136 (__v4sf) __B,
8137 (__D << 2) | __C,
8138 __R);
8141 #else
8142 #define _mm512_getmant_round_pd(X, B, C, R) \
8143 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8144 (int)(((C)<<2) | (B)), \
8145 (__v8df)(__m512d)_mm512_setzero_pd(), \
8146 (__mmask8)-1,\
8147 (R)))
8149 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
8150 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8151 (int)(((C)<<2) | (B)), \
8152 (__v8df)(__m512d)(W), \
8153 (__mmask8)(U),\
8154 (R)))
8156 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
8157 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
8158 (int)(((C)<<2) | (B)), \
8159 (__v8df)(__m512d)_mm512_setzero_pd(), \
8160 (__mmask8)(U),\
8161 (R)))
8162 #define _mm512_getmant_round_ps(X, B, C, R) \
8163 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8164 (int)(((C)<<2) | (B)), \
8165 (__v16sf)(__m512)_mm512_setzero_ps(), \
8166 (__mmask16)-1,\
8167 (R)))
8169 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
8170 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8171 (int)(((C)<<2) | (B)), \
8172 (__v16sf)(__m512)(W), \
8173 (__mmask16)(U),\
8174 (R)))
8176 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
8177 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
8178 (int)(((C)<<2) | (B)), \
8179 (__v16sf)(__m512)_mm512_setzero_ps(), \
8180 (__mmask16)(U),\
8181 (R)))
8182 #define _mm_getmant_round_sd(X, Y, C, D, R) \
8183 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
8184 (__v2df)(__m128d)(Y), \
8185 (int)(((D)<<2) | (C)), \
8186 (R)))
8188 #define _mm_getmant_round_ss(X, Y, C, D, R) \
8189 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
8190 (__v4sf)(__m128)(Y), \
8191 (int)(((D)<<2) | (C)), \
8192 (R)))
8194 #define _mm_getexp_round_ss(A, B, R) \
8195 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
8197 #define _mm_getexp_round_sd(A, B, R) \
8198 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
8200 #define _mm512_getexp_round_ps(A, R) \
8201 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8202 (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R))
8204 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
8205 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8206 (__v16sf)(__m512)(W), (__mmask16)(U), R))
8208 #define _mm512_maskz_getexp_round_ps(U, A, R) \
8209 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8210 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
8212 #define _mm512_getexp_round_pd(A, R) \
8213 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8214 (__v8df)_mm512_setzero_pd(), (__mmask8)-1, R))
8216 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
8217 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8218 (__v8df)(__m512d)(W), (__mmask8)(U), R))
8220 #define _mm512_maskz_getexp_round_pd(U, A, R) \
8221 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8222 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
8223 #endif
8225 #ifdef __OPTIMIZE__
8226 extern __inline __m512
8227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
8230 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
8231 (__v16sf) __A, -1, __R);
8234 extern __inline __m512
8235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8236 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
8237 const int __imm, const int __R)
8239 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
8240 (__v16sf) __A,
8241 (__mmask16) __B, __R);
8244 extern __inline __m512
8245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8246 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
8247 const int __imm, const int __R)
8249 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
8250 __imm,
8251 (__v16sf)
8252 _mm512_setzero_ps (),
8253 (__mmask16) __A, __R);
8256 extern __inline __m512d
8257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8258 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
8260 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
8261 (__v8df) __A, -1, __R);
8264 extern __inline __m512d
8265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
8267 __m512d __C, const int __imm, const int __R)
8269 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
8270 (__v8df) __A,
8271 (__mmask8) __B, __R);
8274 extern __inline __m512d
8275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8276 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
8277 const int __imm, const int __R)
8279 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
8280 __imm,
8281 (__v8df)
8282 _mm512_setzero_pd (),
8283 (__mmask8) __A, __R);
8286 extern __inline __m128
8287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8288 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R)
8290 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
8291 (__v4sf) __B, __imm, __R);
8294 extern __inline __m128d
8295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8296 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
8297 const int __R)
8299 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
8300 (__v2df) __B, __imm, __R);
8303 #else
8304 #define _mm512_roundscale_round_ps(A, B, R) \
8305 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
8306 (__v16sf)(__m512)(A), (__mmask16)(-1), R))
8307 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
8308 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
8309 (int)(D), \
8310 (__v16sf)(__m512)(A), \
8311 (__mmask16)(B), R))
8312 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
8313 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
8314 (int)(C), \
8315 (__v16sf)_mm512_setzero_ps(),\
8316 (__mmask16)(A), R))
8317 #define _mm512_roundscale_round_pd(A, B, R) \
8318 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
8319 (__v8df)(__m512d)(A), (__mmask8)(-1), R))
8320 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
8321 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
8322 (int)(D), \
8323 (__v8df)(__m512d)(A), \
8324 (__mmask8)(B), R))
8325 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
8326 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
8327 (int)(C), \
8328 (__v8df)_mm512_setzero_pd(),\
8329 (__mmask8)(A), R))
8330 #define _mm_roundscale_round_ss(A, B, C, R) \
8331 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
8332 (__v4sf)(__m128)(B), (int)(C), R))
8333 #define _mm_roundscale_round_sd(A, B, C, R) \
8334 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
8335 (__v2df)(__m128d)(B), (int)(C), R))
8336 #endif
8338 extern __inline __m512
8339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8340 _mm512_floor_ps (__m512 __A)
8342 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8343 _MM_FROUND_FLOOR,
8344 (__v16sf) __A, -1,
8345 _MM_FROUND_CUR_DIRECTION);
8348 extern __inline __m512d
8349 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8350 _mm512_floor_pd (__m512d __A)
8352 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8353 _MM_FROUND_FLOOR,
8354 (__v8df) __A, -1,
8355 _MM_FROUND_CUR_DIRECTION);
8358 extern __inline __m512
8359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8360 _mm512_ceil_ps (__m512 __A)
8362 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8363 _MM_FROUND_CEIL,
8364 (__v16sf) __A, -1,
8365 _MM_FROUND_CUR_DIRECTION);
8368 extern __inline __m512d
8369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8370 _mm512_ceil_pd (__m512d __A)
8372 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8373 _MM_FROUND_CEIL,
8374 (__v8df) __A, -1,
8375 _MM_FROUND_CUR_DIRECTION);
8378 extern __inline __m512
8379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8380 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
8382 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8383 _MM_FROUND_FLOOR,
8384 (__v16sf) __W, __U,
8385 _MM_FROUND_CUR_DIRECTION);
8388 extern __inline __m512d
8389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8390 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
8392 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8393 _MM_FROUND_FLOOR,
8394 (__v8df) __W, __U,
8395 _MM_FROUND_CUR_DIRECTION);
8398 extern __inline __m512
8399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8400 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
8402 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
8403 _MM_FROUND_CEIL,
8404 (__v16sf) __W, __U,
8405 _MM_FROUND_CUR_DIRECTION);
8408 extern __inline __m512d
8409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8410 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
8412 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
8413 _MM_FROUND_CEIL,
8414 (__v8df) __W, __U,
8415 _MM_FROUND_CUR_DIRECTION);
8418 #ifdef __OPTIMIZE__
8419 extern __inline __m512i
8420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8421 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
8423 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8424 (__v16si) __B, __imm,
8425 (__v16si)
8426 _mm512_setzero_si512 (),
8427 (__mmask16) -1);
8430 extern __inline __m512i
8431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8432 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
8433 __m512i __B, const int __imm)
8435 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8436 (__v16si) __B, __imm,
8437 (__v16si) __W,
8438 (__mmask16) __U);
8441 extern __inline __m512i
8442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8443 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
8444 const int __imm)
8446 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
8447 (__v16si) __B, __imm,
8448 (__v16si)
8449 _mm512_setzero_si512 (),
8450 (__mmask16) __U);
8453 extern __inline __m512i
8454 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8455 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
8457 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8458 (__v8di) __B, __imm,
8459 (__v8di)
8460 _mm512_setzero_si512 (),
8461 (__mmask8) -1);
8464 extern __inline __m512i
8465 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8466 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
8467 __m512i __B, const int __imm)
8469 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8470 (__v8di) __B, __imm,
8471 (__v8di) __W,
8472 (__mmask8) __U);
8475 extern __inline __m512i
8476 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8477 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
8478 const int __imm)
8480 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
8481 (__v8di) __B, __imm,
8482 (__v8di)
8483 _mm512_setzero_si512 (),
8484 (__mmask8) __U);
8486 #else
8487 #define _mm512_alignr_epi32(X, Y, C) \
8488 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8489 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \
8490 (__mmask16)-1))
8492 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
8493 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8494 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
8495 (__mmask16)(U)))
8497 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
8498 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
8499 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\
8500 (__mmask16)(U)))
8502 #define _mm512_alignr_epi64(X, Y, C) \
8503 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8504 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1))
8506 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
8507 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8508 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
8510 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
8511 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
8512 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\
8513 (__mmask8)(U)))
8514 #endif
8516 extern __inline __mmask16
8517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8518 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
8520 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8521 (__v16si) __B,
8522 (__mmask16) -1);
8525 extern __inline __mmask16
8526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8527 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8529 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
8530 (__v16si) __B, __U);
8533 extern __inline __mmask8
8534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8535 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8537 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8538 (__v8di) __B, __U);
8541 extern __inline __mmask8
8542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8543 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
8545 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
8546 (__v8di) __B,
8547 (__mmask8) -1);
8550 extern __inline __mmask16
8551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8552 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
8554 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8555 (__v16si) __B,
8556 (__mmask16) -1);
8559 extern __inline __mmask16
8560 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8561 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8563 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
8564 (__v16si) __B, __U);
8567 extern __inline __mmask8
8568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8569 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8571 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8572 (__v8di) __B, __U);
8575 extern __inline __mmask8
8576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
8579 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
8580 (__v8di) __B,
8581 (__mmask8) -1);
8584 extern __inline __mmask16
8585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8586 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
8588 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8589 (__v16si) __Y, 5,
8590 (__mmask16) -1);
8593 extern __inline __mmask16
8594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8595 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
8597 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8598 (__v16si) __Y, 5,
8599 (__mmask16) -1);
8602 extern __inline __mmask8
8603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8604 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
8606 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8607 (__v8di) __Y, 5,
8608 (__mmask8) -1);
8611 extern __inline __mmask8
8612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8613 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
8615 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8616 (__v8di) __Y, 5,
8617 (__mmask8) -1);
8620 extern __inline __mmask16
8621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8622 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
8624 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8625 (__v16si) __Y, 2,
8626 (__mmask16) -1);
8629 extern __inline __mmask16
8630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
8633 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8634 (__v16si) __Y, 2,
8635 (__mmask16) -1);
8638 extern __inline __mmask8
8639 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8640 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
8642 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8643 (__v8di) __Y, 2,
8644 (__mmask8) -1);
8647 extern __inline __mmask8
8648 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8649 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
8651 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8652 (__v8di) __Y, 2,
8653 (__mmask8) -1);
8656 extern __inline __mmask16
8657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8658 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
8660 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8661 (__v16si) __Y, 1,
8662 (__mmask16) -1);
8665 extern __inline __mmask16
8666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8667 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
8669 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8670 (__v16si) __Y, 1,
8671 (__mmask16) -1);
8674 extern __inline __mmask8
8675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8676 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
8678 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8679 (__v8di) __Y, 1,
8680 (__mmask8) -1);
8683 extern __inline __mmask8
8684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8685 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
8687 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8688 (__v8di) __Y, 1,
8689 (__mmask8) -1);
8692 extern __inline __mmask16
8693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8694 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
8696 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8697 (__v16si) __Y, 4,
8698 (__mmask16) -1);
8701 extern __inline __mmask16
8702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8703 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
8705 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8706 (__v16si) __Y, 4,
8707 (__mmask16) -1);
8710 extern __inline __mmask8
8711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
8714 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8715 (__v8di) __Y, 4,
8716 (__mmask8) -1);
8719 extern __inline __mmask8
8720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8721 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
8723 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8724 (__v8di) __Y, 4,
8725 (__mmask8) -1);
8728 #define _MM_CMPINT_EQ 0x0
8729 #define _MM_CMPINT_LT 0x1
8730 #define _MM_CMPINT_LE 0x2
8731 #define _MM_CMPINT_UNUSED 0x3
8732 #define _MM_CMPINT_NE 0x4
8733 #define _MM_CMPINT_NLT 0x5
8734 #define _MM_CMPINT_GE 0x5
8735 #define _MM_CMPINT_NLE 0x6
8736 #define _MM_CMPINT_GT 0x6
8738 #ifdef __OPTIMIZE__
8739 extern __inline __mmask8
8740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8741 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
8743 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8744 (__v8di) __Y, __P,
8745 (__mmask8) -1);
8748 extern __inline __mmask16
8749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8750 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
8752 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8753 (__v16si) __Y, __P,
8754 (__mmask16) -1);
8757 extern __inline __mmask8
8758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8759 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
8761 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8762 (__v8di) __Y, __P,
8763 (__mmask8) -1);
8766 extern __inline __mmask16
8767 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8768 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
8770 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8771 (__v16si) __Y, __P,
8772 (__mmask16) -1);
8775 extern __inline __mmask8
8776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8777 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
8778 const int __R)
8780 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
8781 (__v8df) __Y, __P,
8782 (__mmask8) -1, __R);
8785 extern __inline __mmask16
8786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8787 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
8789 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
8790 (__v16sf) __Y, __P,
8791 (__mmask16) -1, __R);
8794 extern __inline __mmask8
8795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8796 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
8797 const int __P)
8799 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
8800 (__v8di) __Y, __P,
8801 (__mmask8) __U);
8804 extern __inline __mmask16
8805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8806 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
8807 const int __P)
8809 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
8810 (__v16si) __Y, __P,
8811 (__mmask16) __U);
8814 extern __inline __mmask8
8815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8816 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
8817 const int __P)
8819 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
8820 (__v8di) __Y, __P,
8821 (__mmask8) __U);
8824 extern __inline __mmask16
8825 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8826 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
8827 const int __P)
8829 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
8830 (__v16si) __Y, __P,
8831 (__mmask16) __U);
8834 extern __inline __mmask8
8835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8836 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
8837 const int __P, const int __R)
8839 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
8840 (__v8df) __Y, __P,
8841 (__mmask8) __U, __R);
8844 extern __inline __mmask16
8845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8846 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
8847 const int __P, const int __R)
8849 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
8850 (__v16sf) __Y, __P,
8851 (__mmask16) __U, __R);
8854 extern __inline __mmask8
8855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8856 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
8858 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
8859 (__v2df) __Y, __P,
8860 (__mmask8) -1, __R);
8863 extern __inline __mmask8
8864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8865 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
8866 const int __P, const int __R)
8868 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
8869 (__v2df) __Y, __P,
8870 (__mmask8) __M, __R);
8873 extern __inline __mmask8
8874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8875 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
8877 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
8878 (__v4sf) __Y, __P,
8879 (__mmask8) -1, __R);
8882 extern __inline __mmask8
8883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8884 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
8885 const int __P, const int __R)
8887 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
8888 (__v4sf) __Y, __P,
8889 (__mmask8) __M, __R);
8892 #else
8893 #define _mm512_cmp_epi64_mask(X, Y, P) \
8894 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
8895 (__v8di)(__m512i)(Y), (int)(P),\
8896 (__mmask8)-1))
8898 #define _mm512_cmp_epi32_mask(X, Y, P) \
8899 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
8900 (__v16si)(__m512i)(Y), (int)(P),\
8901 (__mmask16)-1))
8903 #define _mm512_cmp_epu64_mask(X, Y, P) \
8904 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
8905 (__v8di)(__m512i)(Y), (int)(P),\
8906 (__mmask8)-1))
8908 #define _mm512_cmp_epu32_mask(X, Y, P) \
8909 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
8910 (__v16si)(__m512i)(Y), (int)(P),\
8911 (__mmask16)-1))
8913 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
8914 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
8915 (__v8df)(__m512d)(Y), (int)(P),\
8916 (__mmask8)-1, R))
8918 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
8919 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
8920 (__v16sf)(__m512)(Y), (int)(P),\
8921 (__mmask16)-1, R))
8923 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
8924 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
8925 (__v8di)(__m512i)(Y), (int)(P),\
8926 (__mmask8)M))
8928 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
8929 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
8930 (__v16si)(__m512i)(Y), (int)(P),\
8931 (__mmask16)M))
8933 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
8934 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
8935 (__v8di)(__m512i)(Y), (int)(P),\
8936 (__mmask8)M))
8938 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
8939 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
8940 (__v16si)(__m512i)(Y), (int)(P),\
8941 (__mmask16)M))
8943 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
8944 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
8945 (__v8df)(__m512d)(Y), (int)(P),\
8946 (__mmask8)M, R))
8948 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
8949 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
8950 (__v16sf)(__m512)(Y), (int)(P),\
8951 (__mmask16)M, R))
8953 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8954 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
8955 (__v2df)(__m128d)(Y), (int)(P),\
8956 (__mmask8)-1, R))
8958 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8959 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
8960 (__v2df)(__m128d)(Y), (int)(P),\
8961 (M), R))
8963 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8964 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
8965 (__v4sf)(__m128)(Y), (int)(P), \
8966 (__mmask8)-1, R))
8968 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8969 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
8970 (__v4sf)(__m128)(Y), (int)(P), \
8971 (M), R))
8972 #endif
8974 #ifdef __OPTIMIZE__
8975 extern __inline __m512
8976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8977 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
8979 __m512 v1_old = _mm512_setzero_ps ();
8980 __mmask16 mask = 0xFFFF;
8982 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
8983 __addr,
8984 (__v16si) __index,
8985 mask, __scale);
8988 extern __inline __m512
8989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8990 _mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
8991 __m512i __index, float const *__addr, int __scale)
8993 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
8994 __addr,
8995 (__v16si) __index,
8996 __mask, __scale);
8999 extern __inline __m512d
9000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9001 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
9003 __m512d v1_old = _mm512_setzero_pd ();
9004 __mmask8 mask = 0xFF;
9006 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
9007 __addr,
9008 (__v8si) __index, mask,
9009 __scale);
9012 extern __inline __m512d
9013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9014 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
9015 __m256i __index, double const *__addr, int __scale)
9017 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
9018 __addr,
9019 (__v8si) __index,
9020 __mask, __scale);
9023 extern __inline __m256
9024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9025 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
9027 __m256 v1_old = _mm256_setzero_ps ();
9028 __mmask8 mask = 0xFF;
9030 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
9031 __addr,
9032 (__v8di) __index, mask,
9033 __scale);
9036 extern __inline __m256
9037 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9038 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
9039 __m512i __index, float const *__addr, int __scale)
9041 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
9042 __addr,
9043 (__v8di) __index,
9044 __mask, __scale);
9047 extern __inline __m512d
9048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9049 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
9051 __m512d v1_old = _mm512_setzero_pd ();
9052 __mmask8 mask = 0xFF;
9054 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
9055 __addr,
9056 (__v8di) __index, mask,
9057 __scale);
9060 extern __inline __m512d
9061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9062 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
9063 __m512i __index, double const *__addr, int __scale)
9065 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
9066 __addr,
9067 (__v8di) __index,
9068 __mask, __scale);
9071 extern __inline __m512i
9072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9073 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
9075 __m512i v1_old = _mm512_setzero_si512 ();
9076 __mmask16 mask = 0xFFFF;
9078 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
9079 __addr,
9080 (__v16si) __index,
9081 mask, __scale);
9084 extern __inline __m512i
9085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
9087 __m512i __index, int const *__addr, int __scale)
9089 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
9090 __addr,
9091 (__v16si) __index,
9092 __mask, __scale);
9095 extern __inline __m512i
9096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
9099 __m512i v1_old = _mm512_setzero_si512 ();
9100 __mmask8 mask = 0xFF;
9102 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
9103 __addr,
9104 (__v8si) __index, mask,
9105 __scale);
9108 extern __inline __m512i
9109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9110 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9111 __m256i __index, long long const *__addr,
9112 int __scale)
9114 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
9115 __addr,
9116 (__v8si) __index,
9117 __mask, __scale);
9120 extern __inline __m256i
9121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9122 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
9124 __m256i v1_old = _mm256_setzero_si256 ();
9125 __mmask8 mask = 0xFF;
9127 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
9128 __addr,
9129 (__v8di) __index,
9130 mask, __scale);
9133 extern __inline __m256i
9134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9135 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
9136 __m512i __index, int const *__addr, int __scale)
9138 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
9139 __addr,
9140 (__v8di) __index,
9141 __mask, __scale);
9144 extern __inline __m512i
9145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9146 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
9148 __m512i v1_old = _mm512_setzero_si512 ();
9149 __mmask8 mask = 0xFF;
9151 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
9152 __addr,
9153 (__v8di) __index, mask,
9154 __scale);
9157 extern __inline __m512i
9158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9159 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
9160 __m512i __index, long long const *__addr,
9161 int __scale)
9163 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
9164 __addr,
9165 (__v8di) __index,
9166 __mask, __scale);
9169 extern __inline void
9170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9171 _mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale)
9173 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
9174 (__v16si) __index, (__v16sf) __v1, __scale);
9177 extern __inline void
9178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9179 _mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask,
9180 __m512i __index, __m512 __v1, int __scale)
9182 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
9183 (__v16sf) __v1, __scale);
9186 extern __inline void
9187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9188 _mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1,
9189 int __scale)
9191 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
9192 (__v8si) __index, (__v8df) __v1, __scale);
9195 extern __inline void
9196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9197 _mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask,
9198 __m256i __index, __m512d __v1, int __scale)
9200 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
9201 (__v8df) __v1, __scale);
9204 extern __inline void
9205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9206 _mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale)
9208 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
9209 (__v8di) __index, (__v8sf) __v1, __scale);
9212 extern __inline void
9213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214 _mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask,
9215 __m512i __index, __m256 __v1, int __scale)
9217 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
9218 (__v8sf) __v1, __scale);
9221 extern __inline void
9222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9223 _mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1,
9224 int __scale)
9226 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
9227 (__v8di) __index, (__v8df) __v1, __scale);
9230 extern __inline void
9231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9232 _mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask,
9233 __m512i __index, __m512d __v1, int __scale)
9235 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
9236 (__v8df) __v1, __scale);
9239 extern __inline void
9240 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9241 _mm512_i32scatter_epi32 (int *__addr, __m512i __index,
9242 __m512i __v1, int __scale)
9244 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
9245 (__v16si) __index, (__v16si) __v1, __scale);
9248 extern __inline void
9249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9250 _mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask,
9251 __m512i __index, __m512i __v1, int __scale)
9253 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
9254 (__v16si) __v1, __scale);
9257 extern __inline void
9258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9259 _mm512_i32scatter_epi64 (long long *__addr, __m256i __index,
9260 __m512i __v1, int __scale)
9262 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
9263 (__v8si) __index, (__v8di) __v1, __scale);
9266 extern __inline void
9267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9268 _mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask,
9269 __m256i __index, __m512i __v1, int __scale)
9271 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
9272 (__v8di) __v1, __scale);
9275 extern __inline void
9276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9277 _mm512_i64scatter_epi32 (int *__addr, __m512i __index,
9278 __m256i __v1, int __scale)
9280 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
9281 (__v8di) __index, (__v8si) __v1, __scale);
9284 extern __inline void
9285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9286 _mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask,
9287 __m512i __index, __m256i __v1, int __scale)
9289 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
9290 (__v8si) __v1, __scale);
9293 extern __inline void
9294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9295 _mm512_i64scatter_epi64 (long long *__addr, __m512i __index,
9296 __m512i __v1, int __scale)
9298 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
9299 (__v8di) __index, (__v8di) __v1, __scale);
9302 extern __inline void
9303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9304 _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
9305 __m512i __index, __m512i __v1, int __scale)
9307 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
9308 (__v8di) __v1, __scale);
9310 #else
9311 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
9312 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \
9313 (float const *)ADDR, \
9314 (__v16si)(__m512i)INDEX, \
9315 (__mmask16)0xFFFF, (int)SCALE)
9317 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9318 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \
9319 (float const *)ADDR, \
9320 (__v16si)(__m512i)INDEX, \
9321 (__mmask16)MASK, (int)SCALE)
9323 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
9324 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \
9325 (double const *)ADDR, \
9326 (__v8si)(__m256i)INDEX, \
9327 (__mmask8)0xFF, (int)SCALE)
9329 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9330 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \
9331 (double const *)ADDR, \
9332 (__v8si)(__m256i)INDEX, \
9333 (__mmask8)MASK, (int)SCALE)
9335 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
9336 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \
9337 (float const *)ADDR, \
9338 (__v8di)(__m512i)INDEX, \
9339 (__mmask8)0xFF, (int)SCALE)
9341 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
9342 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \
9343 (float const *)ADDR, \
9344 (__v8di)(__m512i)INDEX, \
9345 (__mmask8)MASK, (int)SCALE)
9347 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
9348 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \
9349 (double const *)ADDR, \
9350 (__v8di)(__m512i)INDEX, \
9351 (__mmask8)0xFF, (int)SCALE)
9353 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
9354 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \
9355 (double const *)ADDR, \
9356 (__v8di)(__m512i)INDEX, \
9357 (__mmask8)MASK, (int)SCALE)
9359 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
9360 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \
9361 (int const *)ADDR, \
9362 (__v16si)(__m512i)INDEX, \
9363 (__mmask16)0xFFFF, (int)SCALE)
9365 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9366 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \
9367 (int const *)ADDR, \
9368 (__v16si)(__m512i)INDEX, \
9369 (__mmask16)MASK, (int)SCALE)
9371 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
9372 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \
9373 (long long const *)ADDR, \
9374 (__v8si)(__m256i)INDEX, \
9375 (__mmask8)0xFF, (int)SCALE)
9377 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9378 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \
9379 (long long const *)ADDR, \
9380 (__v8si)(__m256i)INDEX, \
9381 (__mmask8)MASK, (int)SCALE)
9383 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
9384 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \
9385 (int const *)ADDR, \
9386 (__v8di)(__m512i)INDEX, \
9387 (__mmask8)0xFF, (int)SCALE)
9389 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
9390 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \
9391 (int const *)ADDR, \
9392 (__v8di)(__m512i)INDEX, \
9393 (__mmask8)MASK, (int)SCALE)
9395 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
9396 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \
9397 (long long const *)ADDR, \
9398 (__v8di)(__m512i)INDEX, \
9399 (__mmask8)0xFF, (int)SCALE)
9401 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
9402 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \
9403 (long long const *)ADDR, \
9404 (__v8di)(__m512i)INDEX, \
9405 (__mmask8)MASK, (int)SCALE)
9407 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
9408 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \
9409 (__v16si)(__m512i)INDEX, \
9410 (__v16sf)(__m512)V1, (int)SCALE)
9412 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9413 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \
9414 (__v16si)(__m512i)INDEX, \
9415 (__v16sf)(__m512)V1, (int)SCALE)
9417 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
9418 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \
9419 (__v8si)(__m256i)INDEX, \
9420 (__v8df)(__m512d)V1, (int)SCALE)
9422 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9423 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \
9424 (__v8si)(__m256i)INDEX, \
9425 (__v8df)(__m512d)V1, (int)SCALE)
9427 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
9428 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \
9429 (__v8di)(__m512i)INDEX, \
9430 (__v8sf)(__m256)V1, (int)SCALE)
9432 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
9433 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \
9434 (__v8di)(__m512i)INDEX, \
9435 (__v8sf)(__m256)V1, (int)SCALE)
9437 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
9438 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \
9439 (__v8di)(__m512i)INDEX, \
9440 (__v8df)(__m512d)V1, (int)SCALE)
9442 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
9443 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \
9444 (__v8di)(__m512i)INDEX, \
9445 (__v8df)(__m512d)V1, (int)SCALE)
9447 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
9448 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \
9449 (__v16si)(__m512i)INDEX, \
9450 (__v16si)(__m512i)V1, (int)SCALE)
9452 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9453 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \
9454 (__v16si)(__m512i)INDEX, \
9455 (__v16si)(__m512i)V1, (int)SCALE)
9457 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
9458 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9459 (__v8si)(__m256i)INDEX, \
9460 (__v8di)(__m512i)V1, (int)SCALE)
9462 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9463 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \
9464 (__v8si)(__m256i)INDEX, \
9465 (__v8di)(__m512i)V1, (int)SCALE)
9467 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
9468 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \
9469 (__v8di)(__m512i)INDEX, \
9470 (__v8si)(__m256i)V1, (int)SCALE)
9472 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
9473 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \
9474 (__v8di)(__m512i)INDEX, \
9475 (__v8si)(__m256i)V1, (int)SCALE)
9477 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
9478 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \
9479 (__v8di)(__m512i)INDEX, \
9480 (__v8di)(__m512i)V1, (int)SCALE)
9482 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
9483 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \
9484 (__v8di)(__m512i)INDEX, \
9485 (__v8di)(__m512i)V1, (int)SCALE)
9486 #endif
9488 extern __inline __m512d
9489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9490 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9492 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9493 (__v8df) __W,
9494 (__mmask8) __U);
9497 extern __inline __m512d
9498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9501 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9502 (__v8df)
9503 _mm512_setzero_pd (),
9504 (__mmask8) __U);
9507 extern __inline void
9508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9509 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9511 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9512 (__mmask8) __U);
9515 extern __inline __m512
9516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9517 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9519 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9520 (__v16sf) __W,
9521 (__mmask16) __U);
9524 extern __inline __m512
9525 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9526 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9528 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9529 (__v16sf)
9530 _mm512_setzero_ps (),
9531 (__mmask16) __U);
9534 extern __inline void
9535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9536 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9538 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9539 (__mmask16) __U);
9542 extern __inline __m512i
9543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9544 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9546 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9547 (__v8di) __W,
9548 (__mmask8) __U);
9551 extern __inline __m512i
9552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9553 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9555 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9556 (__v8di)
9557 _mm512_setzero_si512 (),
9558 (__mmask8) __U);
9561 extern __inline void
9562 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9563 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9565 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9566 (__mmask8) __U);
9569 extern __inline __m512i
9570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9571 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9573 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9574 (__v16si) __W,
9575 (__mmask16) __U);
9578 extern __inline __m512i
9579 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9580 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9582 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9583 (__v16si)
9584 _mm512_setzero_si512 (),
9585 (__mmask16) __U);
9588 extern __inline void
9589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9590 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9592 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9593 (__mmask16) __U);
9596 extern __inline __m512d
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9600 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9601 (__v8df) __W,
9602 (__mmask8) __U);
9605 extern __inline __m512d
9606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9607 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9609 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
9610 (__v8df)
9611 _mm512_setzero_pd (),
9612 (__mmask8) __U);
9615 extern __inline __m512d
9616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9617 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9619 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
9620 (__v8df) __W,
9621 (__mmask8) __U);
9624 extern __inline __m512d
9625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9626 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
9628 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
9629 (__v8df)
9630 _mm512_setzero_pd (),
9631 (__mmask8) __U);
9634 extern __inline __m512
9635 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9636 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9638 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9639 (__v16sf) __W,
9640 (__mmask16) __U);
9643 extern __inline __m512
9644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9645 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9647 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
9648 (__v16sf)
9649 _mm512_setzero_ps (),
9650 (__mmask16) __U);
9653 extern __inline __m512
9654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9655 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9657 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
9658 (__v16sf) __W,
9659 (__mmask16) __U);
9662 extern __inline __m512
9663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9664 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
9666 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
9667 (__v16sf)
9668 _mm512_setzero_ps (),
9669 (__mmask16) __U);
9672 extern __inline __m512i
9673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9674 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9676 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9677 (__v8di) __W,
9678 (__mmask8) __U);
9681 extern __inline __m512i
9682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9683 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
9685 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
9686 (__v8di)
9687 _mm512_setzero_si512 (),
9688 (__mmask8) __U);
9691 extern __inline __m512i
9692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9693 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9695 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
9696 (__v8di) __W,
9697 (__mmask8) __U);
9700 extern __inline __m512i
9701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9702 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
9704 return (__m512i)
9705 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
9706 (__v8di)
9707 _mm512_setzero_si512 (),
9708 (__mmask8) __U);
9711 extern __inline __m512i
9712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9713 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9715 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9716 (__v16si) __W,
9717 (__mmask16) __U);
9720 extern __inline __m512i
9721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9722 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9724 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
9725 (__v16si)
9726 _mm512_setzero_si512 (),
9727 (__mmask16) __U);
9730 extern __inline __m512i
9731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9732 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9734 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
9735 (__v16si) __W,
9736 (__mmask16) __U);
9739 extern __inline __m512i
9740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9741 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
9743 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
9744 (__v16si)
9745 _mm512_setzero_si512
9746 (), (__mmask16) __U);
9749 /* Mask arithmetic operations */
9750 extern __inline __mmask16
9751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9752 _mm512_kand (__mmask16 __A, __mmask16 __B)
9754 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
9757 extern __inline __mmask16
9758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9759 _mm512_kandn (__mmask16 __A, __mmask16 __B)
9761 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
9764 extern __inline __mmask16
9765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9766 _mm512_kor (__mmask16 __A, __mmask16 __B)
9768 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
9771 extern __inline int
9772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9773 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
9775 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
9776 (__mmask16) __B);
9779 extern __inline int
9780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9781 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
9783 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
9784 (__mmask16) __B);
9787 extern __inline __mmask16
9788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9789 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
9791 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
9794 extern __inline __mmask16
9795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 _mm512_kxor (__mmask16 __A, __mmask16 __B)
9798 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
9801 extern __inline __mmask16
9802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9803 _mm512_knot (__mmask16 __A)
9805 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
9808 extern __inline __mmask16
9809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9810 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
9812 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
9815 #ifdef __OPTIMIZE__
9816 extern __inline __m512i
9817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9818 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
9819 const int __imm)
9821 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
9822 (__v4si) __D,
9823 __imm,
9824 (__v16si)
9825 _mm512_setzero_si512 (),
9826 __B);
9829 extern __inline __m512
9830 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9831 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
9832 const int __imm)
9834 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
9835 (__v4sf) __D,
9836 __imm,
9837 (__v16sf)
9838 _mm512_setzero_ps (), __B);
9841 extern __inline __m512i
9842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9843 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
9844 __m128i __D, const int __imm)
9846 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
9847 (__v4si) __D,
9848 __imm,
9849 (__v16si) __A,
9850 __B);
9853 extern __inline __m512
9854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9855 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
9856 __m128 __D, const int __imm)
9858 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
9859 (__v4sf) __D,
9860 __imm,
9861 (__v16sf) __A, __B);
9863 #else
9864 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
9865 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
9866 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
9867 (__mmask8)(A)))
9869 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
9870 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
9871 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
9872 (__mmask8)(A)))
9874 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
9875 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
9876 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
9877 (__mmask8)(B)))
9879 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
9880 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
9881 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
9882 (__mmask8)(B)))
9883 #endif
9885 extern __inline __m512i
9886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9887 _mm512_max_epi64 (__m512i __A, __m512i __B)
9889 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
9890 (__v8di) __B,
9891 (__v8di)
9892 _mm512_setzero_si512 (),
9893 (__mmask8) -1);
9896 extern __inline __m512i
9897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9898 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
9900 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
9901 (__v8di) __B,
9902 (__v8di)
9903 _mm512_setzero_si512 (),
9904 __M);
9907 extern __inline __m512i
9908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9909 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9911 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
9912 (__v8di) __B,
9913 (__v8di) __W, __M);
9916 extern __inline __m512i
9917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9918 _mm512_min_epi64 (__m512i __A, __m512i __B)
9920 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
9921 (__v8di) __B,
9922 (__v8di)
9923 _mm512_setzero_si512 (),
9924 (__mmask8) -1);
9927 extern __inline __m512i
9928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9929 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9931 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
9932 (__v8di) __B,
9933 (__v8di) __W, __M);
9936 extern __inline __m512i
9937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9938 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
9940 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
9941 (__v8di) __B,
9942 (__v8di)
9943 _mm512_setzero_si512 (),
9944 __M);
9947 extern __inline __m512i
9948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949 _mm512_max_epu64 (__m512i __A, __m512i __B)
9951 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
9952 (__v8di) __B,
9953 (__v8di)
9954 _mm512_setzero_si512 (),
9955 (__mmask8) -1);
9958 extern __inline __m512i
9959 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9960 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
9962 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
9963 (__v8di) __B,
9964 (__v8di)
9965 _mm512_setzero_si512 (),
9966 __M);
9969 extern __inline __m512i
9970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9971 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9973 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
9974 (__v8di) __B,
9975 (__v8di) __W, __M);
9978 extern __inline __m512i
9979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9980 _mm512_min_epu64 (__m512i __A, __m512i __B)
9982 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
9983 (__v8di) __B,
9984 (__v8di)
9985 _mm512_setzero_si512 (),
9986 (__mmask8) -1);
9989 extern __inline __m512i
9990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9991 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
9993 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
9994 (__v8di) __B,
9995 (__v8di) __W, __M);
9998 extern __inline __m512i
9999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
10002 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
10003 (__v8di) __B,
10004 (__v8di)
10005 _mm512_setzero_si512 (),
10006 __M);
10009 extern __inline __m512i
10010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10011 _mm512_max_epi32 (__m512i __A, __m512i __B)
10013 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10014 (__v16si) __B,
10015 (__v16si)
10016 _mm512_setzero_si512 (),
10017 (__mmask16) -1);
10020 extern __inline __m512i
10021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10022 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10024 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10025 (__v16si) __B,
10026 (__v16si)
10027 _mm512_setzero_si512 (),
10028 __M);
10031 extern __inline __m512i
10032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10033 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10035 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
10036 (__v16si) __B,
10037 (__v16si) __W, __M);
10040 extern __inline __m512i
10041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10042 _mm512_min_epi32 (__m512i __A, __m512i __B)
10044 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10045 (__v16si) __B,
10046 (__v16si)
10047 _mm512_setzero_si512 (),
10048 (__mmask16) -1);
10051 extern __inline __m512i
10052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10053 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
10055 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10056 (__v16si) __B,
10057 (__v16si)
10058 _mm512_setzero_si512 (),
10059 __M);
10062 extern __inline __m512i
10063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10064 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10066 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
10067 (__v16si) __B,
10068 (__v16si) __W, __M);
10071 extern __inline __m512i
10072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10073 _mm512_max_epu32 (__m512i __A, __m512i __B)
10075 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10076 (__v16si) __B,
10077 (__v16si)
10078 _mm512_setzero_si512 (),
10079 (__mmask16) -1);
10082 extern __inline __m512i
10083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10086 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10087 (__v16si) __B,
10088 (__v16si)
10089 _mm512_setzero_si512 (),
10090 __M);
10093 extern __inline __m512i
10094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10095 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10097 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
10098 (__v16si) __B,
10099 (__v16si) __W, __M);
10102 extern __inline __m512i
10103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10104 _mm512_min_epu32 (__m512i __A, __m512i __B)
10106 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10107 (__v16si) __B,
10108 (__v16si)
10109 _mm512_setzero_si512 (),
10110 (__mmask16) -1);
10113 extern __inline __m512i
10114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10115 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
10117 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10118 (__v16si) __B,
10119 (__v16si)
10120 _mm512_setzero_si512 (),
10121 __M);
10124 extern __inline __m512i
10125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10126 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
10128 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
10129 (__v16si) __B,
10130 (__v16si) __W, __M);
10133 extern __inline __m512
10134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10135 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
10137 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10138 (__v16sf) __B,
10139 (__v16sf)
10140 _mm512_setzero_ps (),
10141 (__mmask16) -1);
10144 extern __inline __m512
10145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10146 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10148 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10149 (__v16sf) __B,
10150 (__v16sf) __W,
10151 (__mmask16) __U);
10154 extern __inline __m512
10155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10156 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
10158 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
10159 (__v16sf) __B,
10160 (__v16sf)
10161 _mm512_setzero_ps (),
10162 (__mmask16) __U);
10165 #ifdef __OPTIMIZE__
10166 extern __inline __m128d
10167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10168 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
10170 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
10171 (__v2df) __B,
10172 __R);
10175 extern __inline __m128
10176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10177 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
10179 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
10180 (__v4sf) __B,
10181 __R);
10184 extern __inline __m128d
10185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10186 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
10188 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
10189 (__v2df) __B,
10190 __R);
10193 extern __inline __m128
10194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
10197 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
10198 (__v4sf) __B,
10199 __R);
10202 #else
10203 #define _mm_max_round_sd(A, B, C) \
10204 (__m128d)__builtin_ia32_addsd_round(A, B, C)
10206 #define _mm_max_round_ss(A, B, C) \
10207 (__m128)__builtin_ia32_addss_round(A, B, C)
10209 #define _mm_min_round_sd(A, B, C) \
10210 (__m128d)__builtin_ia32_subsd_round(A, B, C)
10212 #define _mm_min_round_ss(A, B, C) \
10213 (__m128)__builtin_ia32_subss_round(A, B, C)
10214 #endif
10216 extern __inline __m512d
10217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10218 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
10220 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
10221 (__v8df) __W,
10222 (__mmask8) __U);
10225 extern __inline __m512
10226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10227 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
10229 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
10230 (__v16sf) __W,
10231 (__mmask16) __U);
10234 extern __inline __m512i
10235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10236 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
10238 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
10239 (__v8di) __W,
10240 (__mmask8) __U);
10243 extern __inline __m512i
10244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
10247 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
10248 (__v16si) __W,
10249 (__mmask16) __U);
10252 #ifdef __OPTIMIZE__
10253 extern __inline __m128d
10254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10255 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10257 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10258 (__v2df) __A,
10259 (__v2df) __B,
10260 __R);
10263 extern __inline __m128
10264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10265 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10267 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10268 (__v4sf) __A,
10269 (__v4sf) __B,
10270 __R);
10273 extern __inline __m128d
10274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10275 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10277 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10278 (__v2df) __A,
10279 -(__v2df) __B,
10280 __R);
10283 extern __inline __m128
10284 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10285 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10287 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10288 (__v4sf) __A,
10289 -(__v4sf) __B,
10290 __R);
10293 extern __inline __m128d
10294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10295 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10297 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10298 -(__v2df) __A,
10299 (__v2df) __B,
10300 __R);
10303 extern __inline __m128
10304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10305 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10307 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10308 -(__v4sf) __A,
10309 (__v4sf) __B,
10310 __R);
10313 extern __inline __m128d
10314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10315 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
10317 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
10318 -(__v2df) __A,
10319 -(__v2df) __B,
10320 __R);
10323 extern __inline __m128
10324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10325 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
10327 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
10328 -(__v4sf) __A,
10329 -(__v4sf) __B,
10330 __R);
10332 #else
10333 #define _mm_fmadd_round_sd(A, B, C, R) \
10334 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
10336 #define _mm_fmadd_round_ss(A, B, C, R) \
10337 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
10339 #define _mm_fmsub_round_sd(A, B, C, R) \
10340 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
10342 #define _mm_fmsub_round_ss(A, B, C, R) \
10343 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
10345 #define _mm_fnmadd_round_sd(A, B, C, R) \
10346 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
10348 #define _mm_fnmadd_round_ss(A, B, C, R) \
10349 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
10351 #define _mm_fnmsub_round_sd(A, B, C, R) \
10352 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
10354 #define _mm_fnmsub_round_ss(A, B, C, R) \
10355 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
10356 #endif
10358 #ifdef __OPTIMIZE__
10359 extern __inline int
10360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10361 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
10363 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
10366 extern __inline int
10367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
10370 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
10372 #else
10373 #define _mm_comi_round_ss(A, B, C, D)\
10374 __builtin_ia32_vcomiss(A, B, C, D)
10375 #define _mm_comi_round_sd(A, B, C, D)\
10376 __builtin_ia32_vcomisd(A, B, C, D)
10377 #endif
10379 extern __inline __m512d
10380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10381 _mm512_sqrt_pd (__m512d __A)
10383 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10384 (__v8df)
10385 _mm512_setzero_pd (),
10386 (__mmask8) -1,
10387 _MM_FROUND_CUR_DIRECTION);
10390 extern __inline __m512d
10391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10392 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
10394 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10395 (__v8df) __W,
10396 (__mmask8) __U,
10397 _MM_FROUND_CUR_DIRECTION);
10400 extern __inline __m512d
10401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10402 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
10404 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
10405 (__v8df)
10406 _mm512_setzero_pd (),
10407 (__mmask8) __U,
10408 _MM_FROUND_CUR_DIRECTION);
10411 extern __inline __m512
10412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10413 _mm512_sqrt_ps (__m512 __A)
10415 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10416 (__v16sf)
10417 _mm512_setzero_ps (),
10418 (__mmask16) -1,
10419 _MM_FROUND_CUR_DIRECTION);
10422 extern __inline __m512
10423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10424 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
10426 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10427 (__v16sf) __W,
10428 (__mmask16) __U,
10429 _MM_FROUND_CUR_DIRECTION);
10432 extern __inline __m512
10433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10434 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
10436 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
10437 (__v16sf)
10438 _mm512_setzero_ps (),
10439 (__mmask16) __U,
10440 _MM_FROUND_CUR_DIRECTION);
10443 extern __inline __m512d
10444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10445 _mm512_add_pd (__m512d __A, __m512d __B)
10447 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10448 (__v8df) __B,
10449 (__v8df)
10450 _mm512_setzero_pd (),
10451 (__mmask8) -1,
10452 _MM_FROUND_CUR_DIRECTION);
10455 extern __inline __m512d
10456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10457 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10459 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10460 (__v8df) __B,
10461 (__v8df) __W,
10462 (__mmask8) __U,
10463 _MM_FROUND_CUR_DIRECTION);
10466 extern __inline __m512d
10467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10468 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
10470 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
10471 (__v8df) __B,
10472 (__v8df)
10473 _mm512_setzero_pd (),
10474 (__mmask8) __U,
10475 _MM_FROUND_CUR_DIRECTION);
10478 extern __inline __m512
10479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10480 _mm512_add_ps (__m512 __A, __m512 __B)
10482 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10483 (__v16sf) __B,
10484 (__v16sf)
10485 _mm512_setzero_ps (),
10486 (__mmask16) -1,
10487 _MM_FROUND_CUR_DIRECTION);
10490 extern __inline __m512
10491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10492 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10494 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10495 (__v16sf) __B,
10496 (__v16sf) __W,
10497 (__mmask16) __U,
10498 _MM_FROUND_CUR_DIRECTION);
10501 extern __inline __m512
10502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10503 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
10505 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
10506 (__v16sf) __B,
10507 (__v16sf)
10508 _mm512_setzero_ps (),
10509 (__mmask16) __U,
10510 _MM_FROUND_CUR_DIRECTION);
10513 extern __inline __m512d
10514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10515 _mm512_sub_pd (__m512d __A, __m512d __B)
10517 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10518 (__v8df) __B,
10519 (__v8df)
10520 _mm512_setzero_pd (),
10521 (__mmask8) -1,
10522 _MM_FROUND_CUR_DIRECTION);
10525 extern __inline __m512d
10526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10527 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10529 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10530 (__v8df) __B,
10531 (__v8df) __W,
10532 (__mmask8) __U,
10533 _MM_FROUND_CUR_DIRECTION);
10536 extern __inline __m512d
10537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10538 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
10540 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
10541 (__v8df) __B,
10542 (__v8df)
10543 _mm512_setzero_pd (),
10544 (__mmask8) __U,
10545 _MM_FROUND_CUR_DIRECTION);
10548 extern __inline __m512
10549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10550 _mm512_sub_ps (__m512 __A, __m512 __B)
10552 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10553 (__v16sf) __B,
10554 (__v16sf)
10555 _mm512_setzero_ps (),
10556 (__mmask16) -1,
10557 _MM_FROUND_CUR_DIRECTION);
10560 extern __inline __m512
10561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10562 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10564 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10565 (__v16sf) __B,
10566 (__v16sf) __W,
10567 (__mmask16) __U,
10568 _MM_FROUND_CUR_DIRECTION);
10571 extern __inline __m512
10572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10573 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
10575 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
10576 (__v16sf) __B,
10577 (__v16sf)
10578 _mm512_setzero_ps (),
10579 (__mmask16) __U,
10580 _MM_FROUND_CUR_DIRECTION);
10583 extern __inline __m512d
10584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10585 _mm512_mul_pd (__m512d __A, __m512d __B)
10587 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10588 (__v8df) __B,
10589 (__v8df)
10590 _mm512_setzero_pd (),
10591 (__mmask8) -1,
10592 _MM_FROUND_CUR_DIRECTION);
10595 extern __inline __m512d
10596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10597 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10599 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10600 (__v8df) __B,
10601 (__v8df) __W,
10602 (__mmask8) __U,
10603 _MM_FROUND_CUR_DIRECTION);
10606 extern __inline __m512d
10607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10608 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
10610 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
10611 (__v8df) __B,
10612 (__v8df)
10613 _mm512_setzero_pd (),
10614 (__mmask8) __U,
10615 _MM_FROUND_CUR_DIRECTION);
10618 extern __inline __m512
10619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10620 _mm512_mul_ps (__m512 __A, __m512 __B)
10622 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10623 (__v16sf) __B,
10624 (__v16sf)
10625 _mm512_setzero_ps (),
10626 (__mmask16) -1,
10627 _MM_FROUND_CUR_DIRECTION);
10630 extern __inline __m512
10631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10632 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10634 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10635 (__v16sf) __B,
10636 (__v16sf) __W,
10637 (__mmask16) __U,
10638 _MM_FROUND_CUR_DIRECTION);
10641 extern __inline __m512
10642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10643 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
10645 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
10646 (__v16sf) __B,
10647 (__v16sf)
10648 _mm512_setzero_ps (),
10649 (__mmask16) __U,
10650 _MM_FROUND_CUR_DIRECTION);
10653 extern __inline __m512d
10654 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10655 _mm512_div_pd (__m512d __M, __m512d __V)
10657 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10658 (__v8df) __V,
10659 (__v8df)
10660 _mm512_setzero_pd (),
10661 (__mmask8) -1,
10662 _MM_FROUND_CUR_DIRECTION);
10665 extern __inline __m512d
10666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10667 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
10669 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10670 (__v8df) __V,
10671 (__v8df) __W,
10672 (__mmask8) __U,
10673 _MM_FROUND_CUR_DIRECTION);
10676 extern __inline __m512d
10677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10678 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
10680 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
10681 (__v8df) __V,
10682 (__v8df)
10683 _mm512_setzero_pd (),
10684 (__mmask8) __U,
10685 _MM_FROUND_CUR_DIRECTION);
10688 extern __inline __m512
10689 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10690 _mm512_div_ps (__m512 __A, __m512 __B)
10692 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10693 (__v16sf) __B,
10694 (__v16sf)
10695 _mm512_setzero_ps (),
10696 (__mmask16) -1,
10697 _MM_FROUND_CUR_DIRECTION);
10700 extern __inline __m512
10701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10702 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10704 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10705 (__v16sf) __B,
10706 (__v16sf) __W,
10707 (__mmask16) __U,
10708 _MM_FROUND_CUR_DIRECTION);
10711 extern __inline __m512
10712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
10715 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
10716 (__v16sf) __B,
10717 (__v16sf)
10718 _mm512_setzero_ps (),
10719 (__mmask16) __U,
10720 _MM_FROUND_CUR_DIRECTION);
10723 extern __inline __m512d
10724 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10725 _mm512_max_pd (__m512d __A, __m512d __B)
10727 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10728 (__v8df) __B,
10729 (__v8df)
10730 _mm512_setzero_pd (),
10731 (__mmask8) -1,
10732 _MM_FROUND_CUR_DIRECTION);
10735 extern __inline __m512d
10736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10737 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10739 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10740 (__v8df) __B,
10741 (__v8df) __W,
10742 (__mmask8) __U,
10743 _MM_FROUND_CUR_DIRECTION);
10746 extern __inline __m512d
10747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10748 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
10750 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
10751 (__v8df) __B,
10752 (__v8df)
10753 _mm512_setzero_pd (),
10754 (__mmask8) __U,
10755 _MM_FROUND_CUR_DIRECTION);
10758 extern __inline __m512
10759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10760 _mm512_max_ps (__m512 __A, __m512 __B)
10762 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10763 (__v16sf) __B,
10764 (__v16sf)
10765 _mm512_setzero_ps (),
10766 (__mmask16) -1,
10767 _MM_FROUND_CUR_DIRECTION);
10770 extern __inline __m512
10771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10772 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10774 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10775 (__v16sf) __B,
10776 (__v16sf) __W,
10777 (__mmask16) __U,
10778 _MM_FROUND_CUR_DIRECTION);
10781 extern __inline __m512
10782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10783 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
10785 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
10786 (__v16sf) __B,
10787 (__v16sf)
10788 _mm512_setzero_ps (),
10789 (__mmask16) __U,
10790 _MM_FROUND_CUR_DIRECTION);
10793 extern __inline __m512d
10794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10795 _mm512_min_pd (__m512d __A, __m512d __B)
10797 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10798 (__v8df) __B,
10799 (__v8df)
10800 _mm512_setzero_pd (),
10801 (__mmask8) -1,
10802 _MM_FROUND_CUR_DIRECTION);
10805 extern __inline __m512d
10806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10807 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10809 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10810 (__v8df) __B,
10811 (__v8df) __W,
10812 (__mmask8) __U,
10813 _MM_FROUND_CUR_DIRECTION);
10816 extern __inline __m512d
10817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10818 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
10820 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
10821 (__v8df) __B,
10822 (__v8df)
10823 _mm512_setzero_pd (),
10824 (__mmask8) __U,
10825 _MM_FROUND_CUR_DIRECTION);
10828 extern __inline __m512
10829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10830 _mm512_min_ps (__m512 __A, __m512 __B)
10832 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
10833 (__v16sf) __B,
10834 (__v16sf)
10835 _mm512_setzero_ps (),
10836 (__mmask16) -1,
10837 _MM_FROUND_CUR_DIRECTION);
10840 extern __inline __m512
10841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10842 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10844 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
10845 (__v16sf) __B,
10846 (__v16sf) __W,
10847 (__mmask16) __U,
10848 _MM_FROUND_CUR_DIRECTION);
10851 extern __inline __m512
10852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10853 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
10855 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
10856 (__v16sf) __B,
10857 (__v16sf)
10858 _mm512_setzero_ps (),
10859 (__mmask16) __U,
10860 _MM_FROUND_CUR_DIRECTION);
10863 extern __inline __m512d
10864 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10865 _mm512_scalef_pd (__m512d __A, __m512d __B)
10867 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
10868 (__v8df) __B,
10869 (__v8df)
10870 _mm512_setzero_pd (),
10871 (__mmask8) -1,
10872 _MM_FROUND_CUR_DIRECTION);
10875 extern __inline __m512d
10876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10877 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
10879 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
10880 (__v8df) __B,
10881 (__v8df) __W,
10882 (__mmask8) __U,
10883 _MM_FROUND_CUR_DIRECTION);
10886 extern __inline __m512d
10887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
10890 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
10891 (__v8df) __B,
10892 (__v8df)
10893 _mm512_setzero_pd (),
10894 (__mmask8) __U,
10895 _MM_FROUND_CUR_DIRECTION);
10898 extern __inline __m512
10899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900 _mm512_scalef_ps (__m512 __A, __m512 __B)
10902 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
10903 (__v16sf) __B,
10904 (__v16sf)
10905 _mm512_setzero_ps (),
10906 (__mmask16) -1,
10907 _MM_FROUND_CUR_DIRECTION);
10910 extern __inline __m512
10911 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10912 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
10914 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
10915 (__v16sf) __B,
10916 (__v16sf) __W,
10917 (__mmask16) __U,
10918 _MM_FROUND_CUR_DIRECTION);
10921 extern __inline __m512
10922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10923 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
10925 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
10926 (__v16sf) __B,
10927 (__v16sf)
10928 _mm512_setzero_ps (),
10929 (__mmask16) __U,
10930 _MM_FROUND_CUR_DIRECTION);
10933 extern __inline __m128d
10934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10935 _mm_scalef_sd (__m128d __A, __m128d __B)
10937 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
10938 (__v2df) __B,
10939 _MM_FROUND_CUR_DIRECTION);
10942 extern __inline __m128
10943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10944 _mm_scalef_ss (__m128 __A, __m128 __B)
10946 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
10947 (__v4sf) __B,
10948 _MM_FROUND_CUR_DIRECTION);
10951 extern __inline __m512d
10952 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10953 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
10955 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
10956 (__v8df) __B,
10957 (__v8df) __C,
10958 (__mmask8) -1,
10959 _MM_FROUND_CUR_DIRECTION);
10962 extern __inline __m512d
10963 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10964 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
10966 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
10967 (__v8df) __B,
10968 (__v8df) __C,
10969 (__mmask8) __U,
10970 _MM_FROUND_CUR_DIRECTION);
10973 extern __inline __m512d
10974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10975 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
10977 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
10978 (__v8df) __B,
10979 (__v8df) __C,
10980 (__mmask8) __U,
10981 _MM_FROUND_CUR_DIRECTION);
10984 extern __inline __m512d
10985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10986 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
10988 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
10989 (__v8df) __B,
10990 (__v8df) __C,
10991 (__mmask8) __U,
10992 _MM_FROUND_CUR_DIRECTION);
10995 extern __inline __m512
10996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10997 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
10999 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11000 (__v16sf) __B,
11001 (__v16sf) __C,
11002 (__mmask16) -1,
11003 _MM_FROUND_CUR_DIRECTION);
11006 extern __inline __m512
11007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11008 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11010 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11011 (__v16sf) __B,
11012 (__v16sf) __C,
11013 (__mmask16) __U,
11014 _MM_FROUND_CUR_DIRECTION);
11017 extern __inline __m512
11018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11019 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11021 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
11022 (__v16sf) __B,
11023 (__v16sf) __C,
11024 (__mmask16) __U,
11025 _MM_FROUND_CUR_DIRECTION);
11028 extern __inline __m512
11029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11030 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11032 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11033 (__v16sf) __B,
11034 (__v16sf) __C,
11035 (__mmask16) __U,
11036 _MM_FROUND_CUR_DIRECTION);
11039 extern __inline __m512d
11040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11041 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11043 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11044 (__v8df) __B,
11045 -(__v8df) __C,
11046 (__mmask8) -1,
11047 _MM_FROUND_CUR_DIRECTION);
11050 extern __inline __m512d
11051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11052 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11054 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
11055 (__v8df) __B,
11056 -(__v8df) __C,
11057 (__mmask8) __U,
11058 _MM_FROUND_CUR_DIRECTION);
11061 extern __inline __m512d
11062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11063 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11065 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
11066 (__v8df) __B,
11067 (__v8df) __C,
11068 (__mmask8) __U,
11069 _MM_FROUND_CUR_DIRECTION);
11072 extern __inline __m512d
11073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11074 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11076 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
11077 (__v8df) __B,
11078 -(__v8df) __C,
11079 (__mmask8) __U,
11080 _MM_FROUND_CUR_DIRECTION);
11083 extern __inline __m512
11084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11085 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11087 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11088 (__v16sf) __B,
11089 -(__v16sf) __C,
11090 (__mmask16) -1,
11091 _MM_FROUND_CUR_DIRECTION);
11094 extern __inline __m512
11095 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11096 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11098 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
11099 (__v16sf) __B,
11100 -(__v16sf) __C,
11101 (__mmask16) __U,
11102 _MM_FROUND_CUR_DIRECTION);
11105 extern __inline __m512
11106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11107 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11109 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
11110 (__v16sf) __B,
11111 (__v16sf) __C,
11112 (__mmask16) __U,
11113 _MM_FROUND_CUR_DIRECTION);
11116 extern __inline __m512
11117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11118 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11120 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
11121 (__v16sf) __B,
11122 -(__v16sf) __C,
11123 (__mmask16) __U,
11124 _MM_FROUND_CUR_DIRECTION);
11127 extern __inline __m512d
11128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11129 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
11131 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11132 (__v8df) __B,
11133 (__v8df) __C,
11134 (__mmask8) -1,
11135 _MM_FROUND_CUR_DIRECTION);
11138 extern __inline __m512d
11139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11140 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11142 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11143 (__v8df) __B,
11144 (__v8df) __C,
11145 (__mmask8) __U,
11146 _MM_FROUND_CUR_DIRECTION);
11149 extern __inline __m512d
11150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11151 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11153 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
11154 (__v8df) __B,
11155 (__v8df) __C,
11156 (__mmask8) __U,
11157 _MM_FROUND_CUR_DIRECTION);
11160 extern __inline __m512d
11161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11162 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11164 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11165 (__v8df) __B,
11166 (__v8df) __C,
11167 (__mmask8) __U,
11168 _MM_FROUND_CUR_DIRECTION);
11171 extern __inline __m512
11172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11173 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
11175 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11176 (__v16sf) __B,
11177 (__v16sf) __C,
11178 (__mmask16) -1,
11179 _MM_FROUND_CUR_DIRECTION);
11182 extern __inline __m512
11183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11184 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11186 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11187 (__v16sf) __B,
11188 (__v16sf) __C,
11189 (__mmask16) __U,
11190 _MM_FROUND_CUR_DIRECTION);
11193 extern __inline __m512
11194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11195 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11197 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
11198 (__v16sf) __B,
11199 (__v16sf) __C,
11200 (__mmask16) __U,
11201 _MM_FROUND_CUR_DIRECTION);
11204 extern __inline __m512
11205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11206 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11208 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11209 (__v16sf) __B,
11210 (__v16sf) __C,
11211 (__mmask16) __U,
11212 _MM_FROUND_CUR_DIRECTION);
11215 extern __inline __m512d
11216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11217 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
11219 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11220 (__v8df) __B,
11221 -(__v8df) __C,
11222 (__mmask8) -1,
11223 _MM_FROUND_CUR_DIRECTION);
11226 extern __inline __m512d
11227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11228 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11230 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
11231 (__v8df) __B,
11232 -(__v8df) __C,
11233 (__mmask8) __U,
11234 _MM_FROUND_CUR_DIRECTION);
11237 extern __inline __m512d
11238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11239 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11241 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
11242 (__v8df) __B,
11243 (__v8df) __C,
11244 (__mmask8) __U,
11245 _MM_FROUND_CUR_DIRECTION);
11248 extern __inline __m512d
11249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11250 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11252 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
11253 (__v8df) __B,
11254 -(__v8df) __C,
11255 (__mmask8) __U,
11256 _MM_FROUND_CUR_DIRECTION);
11259 extern __inline __m512
11260 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11261 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
11263 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11264 (__v16sf) __B,
11265 -(__v16sf) __C,
11266 (__mmask16) -1,
11267 _MM_FROUND_CUR_DIRECTION);
11270 extern __inline __m512
11271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11272 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11274 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
11275 (__v16sf) __B,
11276 -(__v16sf) __C,
11277 (__mmask16) __U,
11278 _MM_FROUND_CUR_DIRECTION);
11281 extern __inline __m512
11282 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11283 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11285 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
11286 (__v16sf) __B,
11287 (__v16sf) __C,
11288 (__mmask16) __U,
11289 _MM_FROUND_CUR_DIRECTION);
11292 extern __inline __m512
11293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11294 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11296 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
11297 (__v16sf) __B,
11298 -(__v16sf) __C,
11299 (__mmask16) __U,
11300 _MM_FROUND_CUR_DIRECTION);
11303 extern __inline __m512d
11304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11305 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
11307 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11308 (__v8df) __B,
11309 (__v8df) __C,
11310 (__mmask8) -1,
11311 _MM_FROUND_CUR_DIRECTION);
11314 extern __inline __m512d
11315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11316 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11318 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
11319 (__v8df) __B,
11320 (__v8df) __C,
11321 (__mmask8) __U,
11322 _MM_FROUND_CUR_DIRECTION);
11325 extern __inline __m512d
11326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11329 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
11330 (__v8df) __B,
11331 (__v8df) __C,
11332 (__mmask8) __U,
11333 _MM_FROUND_CUR_DIRECTION);
11336 extern __inline __m512d
11337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11338 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11340 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11341 (__v8df) __B,
11342 (__v8df) __C,
11343 (__mmask8) __U,
11344 _MM_FROUND_CUR_DIRECTION);
11347 extern __inline __m512
11348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11349 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
11351 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11352 (__v16sf) __B,
11353 (__v16sf) __C,
11354 (__mmask16) -1,
11355 _MM_FROUND_CUR_DIRECTION);
11358 extern __inline __m512
11359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11360 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11362 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
11363 (__v16sf) __B,
11364 (__v16sf) __C,
11365 (__mmask16) __U,
11366 _MM_FROUND_CUR_DIRECTION);
11369 extern __inline __m512
11370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11371 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11373 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
11374 (__v16sf) __B,
11375 (__v16sf) __C,
11376 (__mmask16) __U,
11377 _MM_FROUND_CUR_DIRECTION);
11380 extern __inline __m512
11381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11382 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11384 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11385 (__v16sf) __B,
11386 (__v16sf) __C,
11387 (__mmask16) __U,
11388 _MM_FROUND_CUR_DIRECTION);
11391 extern __inline __m512d
11392 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11393 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
11395 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
11396 (__v8df) __B,
11397 -(__v8df) __C,
11398 (__mmask8) -1,
11399 _MM_FROUND_CUR_DIRECTION);
11402 extern __inline __m512d
11403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11404 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
11406 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
11407 (__v8df) __B,
11408 (__v8df) __C,
11409 (__mmask8) __U,
11410 _MM_FROUND_CUR_DIRECTION);
11413 extern __inline __m512d
11414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11415 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
11417 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
11418 (__v8df) __B,
11419 (__v8df) __C,
11420 (__mmask8) __U,
11421 _MM_FROUND_CUR_DIRECTION);
11424 extern __inline __m512d
11425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11426 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
11428 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
11429 (__v8df) __B,
11430 -(__v8df) __C,
11431 (__mmask8) __U,
11432 _MM_FROUND_CUR_DIRECTION);
11435 extern __inline __m512
11436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11437 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
11439 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
11440 (__v16sf) __B,
11441 -(__v16sf) __C,
11442 (__mmask16) -1,
11443 _MM_FROUND_CUR_DIRECTION);
11446 extern __inline __m512
11447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11448 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
11450 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
11451 (__v16sf) __B,
11452 (__v16sf) __C,
11453 (__mmask16) __U,
11454 _MM_FROUND_CUR_DIRECTION);
11457 extern __inline __m512
11458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11459 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
11461 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
11462 (__v16sf) __B,
11463 (__v16sf) __C,
11464 (__mmask16) __U,
11465 _MM_FROUND_CUR_DIRECTION);
11468 extern __inline __m512
11469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11470 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
11472 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
11473 (__v16sf) __B,
11474 -(__v16sf) __C,
11475 (__mmask16) __U,
11476 _MM_FROUND_CUR_DIRECTION);
11479 extern __inline __m256i
11480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11481 _mm512_cvttpd_epi32 (__m512d __A)
11483 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11484 (__v8si)
11485 _mm256_setzero_si256 (),
11486 (__mmask8) -1,
11487 _MM_FROUND_CUR_DIRECTION);
11490 extern __inline __m256i
11491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11492 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11494 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11495 (__v8si) __W,
11496 (__mmask8) __U,
11497 _MM_FROUND_CUR_DIRECTION);
11500 extern __inline __m256i
11501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
11504 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
11505 (__v8si)
11506 _mm256_setzero_si256 (),
11507 (__mmask8) __U,
11508 _MM_FROUND_CUR_DIRECTION);
11511 extern __inline __m256i
11512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11513 _mm512_cvttpd_epu32 (__m512d __A)
11515 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11516 (__v8si)
11517 _mm256_setzero_si256 (),
11518 (__mmask8) -1,
11519 _MM_FROUND_CUR_DIRECTION);
11522 extern __inline __m256i
11523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11524 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11526 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11527 (__v8si) __W,
11528 (__mmask8) __U,
11529 _MM_FROUND_CUR_DIRECTION);
11532 extern __inline __m256i
11533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11534 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
11536 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
11537 (__v8si)
11538 _mm256_setzero_si256 (),
11539 (__mmask8) __U,
11540 _MM_FROUND_CUR_DIRECTION);
11543 extern __inline __m256i
11544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11545 _mm512_cvtpd_epi32 (__m512d __A)
11547 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11548 (__v8si)
11549 _mm256_setzero_si256 (),
11550 (__mmask8) -1,
11551 _MM_FROUND_CUR_DIRECTION);
11554 extern __inline __m256i
11555 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11556 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
11558 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11559 (__v8si) __W,
11560 (__mmask8) __U,
11561 _MM_FROUND_CUR_DIRECTION);
11564 extern __inline __m256i
11565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11566 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
11568 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
11569 (__v8si)
11570 _mm256_setzero_si256 (),
11571 (__mmask8) __U,
11572 _MM_FROUND_CUR_DIRECTION);
11575 extern __inline __m256i
11576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11577 _mm512_cvtpd_epu32 (__m512d __A)
11579 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11580 (__v8si)
11581 _mm256_setzero_si256 (),
11582 (__mmask8) -1,
11583 _MM_FROUND_CUR_DIRECTION);
11586 extern __inline __m256i
11587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11588 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
11590 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11591 (__v8si) __W,
11592 (__mmask8) __U,
11593 _MM_FROUND_CUR_DIRECTION);
11596 extern __inline __m256i
11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
11600 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
11601 (__v8si)
11602 _mm256_setzero_si256 (),
11603 (__mmask8) __U,
11604 _MM_FROUND_CUR_DIRECTION);
11607 extern __inline __m512i
11608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11609 _mm512_cvttps_epi32 (__m512 __A)
11611 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11612 (__v16si)
11613 _mm512_setzero_si512 (),
11614 (__mmask16) -1,
11615 _MM_FROUND_CUR_DIRECTION);
11618 extern __inline __m512i
11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11622 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11623 (__v16si) __W,
11624 (__mmask16) __U,
11625 _MM_FROUND_CUR_DIRECTION);
11628 extern __inline __m512i
11629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11630 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
11632 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
11633 (__v16si)
11634 _mm512_setzero_si512 (),
11635 (__mmask16) __U,
11636 _MM_FROUND_CUR_DIRECTION);
11639 extern __inline __m512i
11640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11641 _mm512_cvttps_epu32 (__m512 __A)
11643 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11644 (__v16si)
11645 _mm512_setzero_si512 (),
11646 (__mmask16) -1,
11647 _MM_FROUND_CUR_DIRECTION);
11650 extern __inline __m512i
11651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11652 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11654 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11655 (__v16si) __W,
11656 (__mmask16) __U,
11657 _MM_FROUND_CUR_DIRECTION);
11660 extern __inline __m512i
11661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11662 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
11664 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
11665 (__v16si)
11666 _mm512_setzero_si512 (),
11667 (__mmask16) __U,
11668 _MM_FROUND_CUR_DIRECTION);
11671 extern __inline __m512i
11672 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11673 _mm512_cvtps_epi32 (__m512 __A)
11675 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11676 (__v16si)
11677 _mm512_setzero_si512 (),
11678 (__mmask16) -1,
11679 _MM_FROUND_CUR_DIRECTION);
11682 extern __inline __m512i
11683 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11684 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
11686 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11687 (__v16si) __W,
11688 (__mmask16) __U,
11689 _MM_FROUND_CUR_DIRECTION);
11692 extern __inline __m512i
11693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11694 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
11696 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
11697 (__v16si)
11698 _mm512_setzero_si512 (),
11699 (__mmask16) __U,
11700 _MM_FROUND_CUR_DIRECTION);
11703 extern __inline __m512i
11704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11705 _mm512_cvtps_epu32 (__m512 __A)
11707 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11708 (__v16si)
11709 _mm512_setzero_si512 (),
11710 (__mmask16) -1,
11711 _MM_FROUND_CUR_DIRECTION);
11714 extern __inline __m512i
11715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
11718 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11719 (__v16si) __W,
11720 (__mmask16) __U,
11721 _MM_FROUND_CUR_DIRECTION);
11724 extern __inline __m512i
11725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11726 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
11728 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
11729 (__v16si)
11730 _mm512_setzero_si512 (),
11731 (__mmask16) __U,
11732 _MM_FROUND_CUR_DIRECTION);
11735 #ifdef __x86_64__
11736 extern __inline __m128
11737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11738 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
11740 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
11741 _MM_FROUND_CUR_DIRECTION);
11744 extern __inline __m128d
11745 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11746 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
11748 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
11749 _MM_FROUND_CUR_DIRECTION);
11751 #endif
11753 extern __inline __m128
11754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11755 _mm_cvtu32_ss (__m128 __A, unsigned __B)
11757 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
11758 _MM_FROUND_CUR_DIRECTION);
11761 extern __inline __m512
11762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11763 _mm512_cvtepi32_ps (__m512i __A)
11765 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11766 (__v16sf)
11767 _mm512_setzero_ps (),
11768 (__mmask16) -1,
11769 _MM_FROUND_CUR_DIRECTION);
11772 extern __inline __m512
11773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11774 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11776 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11777 (__v16sf) __W,
11778 (__mmask16) __U,
11779 _MM_FROUND_CUR_DIRECTION);
11782 extern __inline __m512
11783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11784 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
11786 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
11787 (__v16sf)
11788 _mm512_setzero_ps (),
11789 (__mmask16) __U,
11790 _MM_FROUND_CUR_DIRECTION);
11793 extern __inline __m512
11794 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11795 _mm512_cvtepu32_ps (__m512i __A)
11797 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11798 (__v16sf)
11799 _mm512_setzero_ps (),
11800 (__mmask16) -1,
11801 _MM_FROUND_CUR_DIRECTION);
11804 extern __inline __m512
11805 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11806 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
11808 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11809 (__v16sf) __W,
11810 (__mmask16) __U,
11811 _MM_FROUND_CUR_DIRECTION);
11814 extern __inline __m512
11815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11816 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
11818 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
11819 (__v16sf)
11820 _mm512_setzero_ps (),
11821 (__mmask16) __U,
11822 _MM_FROUND_CUR_DIRECTION);
11825 #ifdef __OPTIMIZE__
11826 extern __inline __m512d
11827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11828 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
11830 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
11831 (__v8df) __B,
11832 (__v8di) __C,
11833 __imm,
11834 (__mmask8) -1,
11835 _MM_FROUND_CUR_DIRECTION);
11838 extern __inline __m512d
11839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11840 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
11841 __m512i __C, const int __imm)
11843 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
11844 (__v8df) __B,
11845 (__v8di) __C,
11846 __imm,
11847 (__mmask8) __U,
11848 _MM_FROUND_CUR_DIRECTION);
11851 extern __inline __m512d
11852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11853 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
11854 __m512i __C, const int __imm)
11856 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
11857 (__v8df) __B,
11858 (__v8di) __C,
11859 __imm,
11860 (__mmask8) __U,
11861 _MM_FROUND_CUR_DIRECTION);
11864 extern __inline __m512
11865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11866 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
11868 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
11869 (__v16sf) __B,
11870 (__v16si) __C,
11871 __imm,
11872 (__mmask16) -1,
11873 _MM_FROUND_CUR_DIRECTION);
11876 extern __inline __m512
11877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
11879 __m512i __C, const int __imm)
11881 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
11882 (__v16sf) __B,
11883 (__v16si) __C,
11884 __imm,
11885 (__mmask16) __U,
11886 _MM_FROUND_CUR_DIRECTION);
11889 extern __inline __m512
11890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11891 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
11892 __m512i __C, const int __imm)
11894 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
11895 (__v16sf) __B,
11896 (__v16si) __C,
11897 __imm,
11898 (__mmask16) __U,
11899 _MM_FROUND_CUR_DIRECTION);
11902 extern __inline __m128d
11903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11904 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
11906 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
11907 (__v2df) __B,
11908 (__v2di) __C, __imm,
11909 (__mmask8) -1,
11910 _MM_FROUND_CUR_DIRECTION);
11913 extern __inline __m128d
11914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11915 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
11916 __m128i __C, const int __imm)
11918 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
11919 (__v2df) __B,
11920 (__v2di) __C, __imm,
11921 (__mmask8) __U,
11922 _MM_FROUND_CUR_DIRECTION);
11925 extern __inline __m128d
11926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11927 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
11928 __m128i __C, const int __imm)
11930 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
11931 (__v2df) __B,
11932 (__v2di) __C,
11933 __imm,
11934 (__mmask8) __U,
11935 _MM_FROUND_CUR_DIRECTION);
11938 extern __inline __m128
11939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11940 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
11942 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
11943 (__v4sf) __B,
11944 (__v4si) __C, __imm,
11945 (__mmask8) -1,
11946 _MM_FROUND_CUR_DIRECTION);
11949 extern __inline __m128
11950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11951 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
11952 __m128i __C, const int __imm)
11954 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
11955 (__v4sf) __B,
11956 (__v4si) __C, __imm,
11957 (__mmask8) __U,
11958 _MM_FROUND_CUR_DIRECTION);
11961 extern __inline __m128
11962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11963 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
11964 __m128i __C, const int __imm)
11966 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
11967 (__v4sf) __B,
11968 (__v4si) __C, __imm,
11969 (__mmask8) __U,
11970 _MM_FROUND_CUR_DIRECTION);
11972 #else
11973 #define _mm512_fixupimm_pd(X, Y, Z, C) \
11974 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
11975 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
11976 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
11978 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
11979 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
11980 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
11981 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11983 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
11984 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
11985 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
11986 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
11988 #define _mm512_fixupimm_ps(X, Y, Z, C) \
11989 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
11990 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
11991 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
11993 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
11994 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
11995 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
11996 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
11998 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
11999 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
12000 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
12001 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12003 #define _mm_fixupimm_sd(X, Y, Z, C) \
12004 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12005 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12006 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12008 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
12009 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
12010 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12011 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12013 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
12014 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
12015 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
12016 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12018 #define _mm_fixupimm_ss(X, Y, Z, C) \
12019 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12020 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12021 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12023 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
12024 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
12025 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12026 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12028 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
12029 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
12030 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
12031 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12032 #endif
12034 #ifdef __x86_64__
12035 extern __inline unsigned long long
12036 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12037 _mm_cvtss_u64 (__m128 __A)
12039 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
12040 __A,
12041 _MM_FROUND_CUR_DIRECTION);
12044 extern __inline unsigned long long
12045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046 _mm_cvttss_u64 (__m128 __A)
12048 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
12049 __A,
12050 _MM_FROUND_CUR_DIRECTION);
12053 extern __inline long long
12054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12055 _mm_cvttss_i64 (__m128 __A)
12057 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
12058 _MM_FROUND_CUR_DIRECTION);
12060 #endif /* __x86_64__ */
12062 extern __inline unsigned
12063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12064 _mm_cvtss_u32 (__m128 __A)
12066 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
12067 _MM_FROUND_CUR_DIRECTION);
12070 extern __inline unsigned
12071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12072 _mm_cvttss_u32 (__m128 __A)
12074 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
12075 _MM_FROUND_CUR_DIRECTION);
12078 extern __inline int
12079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080 _mm_cvttss_i32 (__m128 __A)
12082 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
12083 _MM_FROUND_CUR_DIRECTION);
12086 #ifdef __x86_64__
12087 extern __inline unsigned long long
12088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12089 _mm_cvtsd_u64 (__m128d __A)
12091 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
12092 __A,
12093 _MM_FROUND_CUR_DIRECTION);
12096 extern __inline unsigned long long
12097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098 _mm_cvttsd_u64 (__m128d __A)
12100 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
12101 __A,
12102 _MM_FROUND_CUR_DIRECTION);
12105 extern __inline long long
12106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12107 _mm_cvttsd_i64 (__m128d __A)
12109 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
12110 _MM_FROUND_CUR_DIRECTION);
12112 #endif /* __x86_64__ */
12114 extern __inline unsigned
12115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116 _mm_cvtsd_u32 (__m128d __A)
12118 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
12119 _MM_FROUND_CUR_DIRECTION);
12122 extern __inline unsigned
12123 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12124 _mm_cvttsd_u32 (__m128d __A)
12126 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
12127 _MM_FROUND_CUR_DIRECTION);
12130 extern __inline int
12131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12132 _mm_cvttsd_i32 (__m128d __A)
12134 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
12135 _MM_FROUND_CUR_DIRECTION);
12138 extern __inline __m512d
12139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12140 _mm512_cvtps_pd (__m256 __A)
12142 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12143 (__v8df)
12144 _mm512_setzero_pd (),
12145 (__mmask8) -1,
12146 _MM_FROUND_CUR_DIRECTION);
12149 extern __inline __m512d
12150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12151 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
12153 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12154 (__v8df) __W,
12155 (__mmask8) __U,
12156 _MM_FROUND_CUR_DIRECTION);
12159 extern __inline __m512d
12160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12161 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
12163 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
12164 (__v8df)
12165 _mm512_setzero_pd (),
12166 (__mmask8) __U,
12167 _MM_FROUND_CUR_DIRECTION);
12170 extern __inline __m512
12171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12172 _mm512_cvtph_ps (__m256i __A)
12174 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12175 (__v16sf)
12176 _mm512_setzero_ps (),
12177 (__mmask16) -1,
12178 _MM_FROUND_CUR_DIRECTION);
12181 extern __inline __m512
12182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12183 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
12185 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12186 (__v16sf) __W,
12187 (__mmask16) __U,
12188 _MM_FROUND_CUR_DIRECTION);
12191 extern __inline __m512
12192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12193 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
12195 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
12196 (__v16sf)
12197 _mm512_setzero_ps (),
12198 (__mmask16) __U,
12199 _MM_FROUND_CUR_DIRECTION);
12202 extern __inline __m256
12203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12204 _mm512_cvtpd_ps (__m512d __A)
12206 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12207 (__v8sf)
12208 _mm256_setzero_ps (),
12209 (__mmask8) -1,
12210 _MM_FROUND_CUR_DIRECTION);
12213 extern __inline __m256
12214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12215 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
12217 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12218 (__v8sf) __W,
12219 (__mmask8) __U,
12220 _MM_FROUND_CUR_DIRECTION);
12223 extern __inline __m256
12224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12225 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
12227 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
12228 (__v8sf)
12229 _mm256_setzero_ps (),
12230 (__mmask8) __U,
12231 _MM_FROUND_CUR_DIRECTION);
12234 #ifdef __OPTIMIZE__
12235 extern __inline __m512
12236 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12237 _mm512_getexp_ps (__m512 __A)
12239 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12240 (__v16sf)
12241 _mm512_setzero_ps (),
12242 (__mmask16) -1,
12243 _MM_FROUND_CUR_DIRECTION);
12246 extern __inline __m512
12247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12248 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
12250 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12251 (__v16sf) __W,
12252 (__mmask16) __U,
12253 _MM_FROUND_CUR_DIRECTION);
12256 extern __inline __m512
12257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12258 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
12260 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
12261 (__v16sf)
12262 _mm512_setzero_ps (),
12263 (__mmask16) __U,
12264 _MM_FROUND_CUR_DIRECTION);
12267 extern __inline __m512d
12268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12269 _mm512_getexp_pd (__m512d __A)
12271 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12272 (__v8df)
12273 _mm512_setzero_pd (),
12274 (__mmask8) -1,
12275 _MM_FROUND_CUR_DIRECTION);
12278 extern __inline __m512d
12279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12280 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
12282 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12283 (__v8df) __W,
12284 (__mmask8) __U,
12285 _MM_FROUND_CUR_DIRECTION);
12288 extern __inline __m512d
12289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12290 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
12292 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
12293 (__v8df)
12294 _mm512_setzero_pd (),
12295 (__mmask8) __U,
12296 _MM_FROUND_CUR_DIRECTION);
12299 extern __inline __m128
12300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12301 _mm_getexp_ss (__m128 __A, __m128 __B)
12303 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
12304 (__v4sf) __B,
12305 _MM_FROUND_CUR_DIRECTION);
12308 extern __inline __m128d
12309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12310 _mm_getexp_sd (__m128d __A, __m128d __B)
12312 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
12313 (__v2df) __B,
12314 _MM_FROUND_CUR_DIRECTION);
12317 extern __inline __m512d
12318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12319 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
12320 _MM_MANTISSA_SIGN_ENUM __C)
12322 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12323 (__C << 2) | __B,
12324 _mm512_setzero_pd (),
12325 (__mmask8) -1,
12326 _MM_FROUND_CUR_DIRECTION);
12329 extern __inline __m512d
12330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12331 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
12332 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12334 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12335 (__C << 2) | __B,
12336 (__v8df) __W, __U,
12337 _MM_FROUND_CUR_DIRECTION);
12340 extern __inline __m512d
12341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12342 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
12343 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12345 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
12346 (__C << 2) | __B,
12347 (__v8df)
12348 _mm512_setzero_pd (),
12349 __U,
12350 _MM_FROUND_CUR_DIRECTION);
12353 extern __inline __m512
12354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12355 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
12356 _MM_MANTISSA_SIGN_ENUM __C)
12358 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12359 (__C << 2) | __B,
12360 _mm512_setzero_ps (),
12361 (__mmask16) -1,
12362 _MM_FROUND_CUR_DIRECTION);
12365 extern __inline __m512
12366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
12368 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12370 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12371 (__C << 2) | __B,
12372 (__v16sf) __W, __U,
12373 _MM_FROUND_CUR_DIRECTION);
12376 extern __inline __m512
12377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12378 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
12379 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
12381 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
12382 (__C << 2) | __B,
12383 (__v16sf)
12384 _mm512_setzero_ps (),
12385 __U,
12386 _MM_FROUND_CUR_DIRECTION);
12389 extern __inline __m128d
12390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12391 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
12392 _MM_MANTISSA_SIGN_ENUM __D)
12394 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
12395 (__v2df) __B,
12396 (__D << 2) | __C,
12397 _MM_FROUND_CUR_DIRECTION);
12400 extern __inline __m128
12401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12402 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
12403 _MM_MANTISSA_SIGN_ENUM __D)
12405 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
12406 (__v4sf) __B,
12407 (__D << 2) | __C,
12408 _MM_FROUND_CUR_DIRECTION);
12411 #else
12412 #define _mm512_getmant_pd(X, B, C) \
12413 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12414 (int)(((C)<<2) | (B)), \
12415 (__v8df)(__m512d)_mm512_setzero_pd(), \
12416 (__mmask8)-1,\
12417 _MM_FROUND_CUR_DIRECTION))
12419 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
12420 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12421 (int)(((C)<<2) | (B)), \
12422 (__v8df)(__m512d)(W), \
12423 (__mmask8)(U),\
12424 _MM_FROUND_CUR_DIRECTION))
12426 #define _mm512_maskz_getmant_pd(U, X, B, C) \
12427 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
12428 (int)(((C)<<2) | (B)), \
12429 (__v8df)(__m512d)_mm512_setzero_pd(), \
12430 (__mmask8)(U),\
12431 _MM_FROUND_CUR_DIRECTION))
12432 #define _mm512_getmant_ps(X, B, C) \
12433 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12434 (int)(((C)<<2) | (B)), \
12435 (__v16sf)(__m512)_mm512_setzero_ps(), \
12436 (__mmask16)-1,\
12437 _MM_FROUND_CUR_DIRECTION))
12439 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
12440 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12441 (int)(((C)<<2) | (B)), \
12442 (__v16sf)(__m512)(W), \
12443 (__mmask16)(U),\
12444 _MM_FROUND_CUR_DIRECTION))
12446 #define _mm512_maskz_getmant_ps(U, X, B, C) \
12447 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
12448 (int)(((C)<<2) | (B)), \
12449 (__v16sf)(__m512)_mm512_setzero_ps(), \
12450 (__mmask16)(U),\
12451 _MM_FROUND_CUR_DIRECTION))
12452 #define _mm_getmant_sd(X, Y, C, D) \
12453 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
12454 (__v2df)(__m128d)(Y), \
12455 (int)(((D)<<2) | (C)), \
12456 _MM_FROUND_CUR_DIRECTION))
12458 #define _mm_getmant_ss(X, Y, C, D) \
12459 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
12460 (__v4sf)(__m128)(Y), \
12461 (int)(((D)<<2) | (C)), \
12462 _MM_FROUND_CUR_DIRECTION))
12464 #define _mm_getexp_ss(A, B) \
12465 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
12466 _MM_FROUND_CUR_DIRECTION))
12468 #define _mm_getexp_sd(A, B) \
12469 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
12470 _MM_FROUND_CUR_DIRECTION))
12472 #define _mm512_getexp_ps(A) \
12473 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12474 (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
12476 #define _mm512_mask_getexp_ps(W, U, A) \
12477 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12478 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12480 #define _mm512_maskz_getexp_ps(U, A) \
12481 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
12482 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
12484 #define _mm512_getexp_pd(A) \
12485 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12486 (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
12488 #define _mm512_mask_getexp_pd(W, U, A) \
12489 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12490 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12492 #define _mm512_maskz_getexp_pd(U, A) \
12493 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
12494 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
12495 #endif
12497 #ifdef __OPTIMIZE__
12498 extern __inline __m512
12499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12500 _mm512_roundscale_ps (__m512 __A, const int __imm)
12502 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
12503 (__v16sf) __A, -1,
12504 _MM_FROUND_CUR_DIRECTION);
12507 extern __inline __m512
12508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12509 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
12510 const int __imm)
12512 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
12513 (__v16sf) __A,
12514 (__mmask16) __B,
12515 _MM_FROUND_CUR_DIRECTION);
12518 extern __inline __m512
12519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12520 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
12522 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
12523 __imm,
12524 (__v16sf)
12525 _mm512_setzero_ps (),
12526 (__mmask16) __A,
12527 _MM_FROUND_CUR_DIRECTION);
12530 extern __inline __m512d
12531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12532 _mm512_roundscale_pd (__m512d __A, const int __imm)
12534 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
12535 (__v8df) __A, -1,
12536 _MM_FROUND_CUR_DIRECTION);
12539 extern __inline __m512d
12540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12541 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
12542 const int __imm)
12544 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
12545 (__v8df) __A,
12546 (__mmask8) __B,
12547 _MM_FROUND_CUR_DIRECTION);
12550 extern __inline __m512d
12551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12552 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
12554 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
12555 __imm,
12556 (__v8df)
12557 _mm512_setzero_pd (),
12558 (__mmask8) __A,
12559 _MM_FROUND_CUR_DIRECTION);
12562 extern __inline __m128
12563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12564 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
12566 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A,
12567 (__v4sf) __B, __imm,
12568 _MM_FROUND_CUR_DIRECTION);
12571 extern __inline __m128d
12572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12573 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
12575 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A,
12576 (__v2df) __B, __imm,
12577 _MM_FROUND_CUR_DIRECTION);
12580 #else
12581 #define _mm512_roundscale_ps(A, B) \
12582 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
12583 (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
12584 #define _mm512_mask_roundscale_ps(A, B, C, D) \
12585 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
12586 (int)(D), \
12587 (__v16sf)(__m512)(A), \
12588 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
12589 #define _mm512_maskz_roundscale_ps(A, B, C) \
12590 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
12591 (int)(C), \
12592 (__v16sf)_mm512_setzero_ps(),\
12593 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
12594 #define _mm512_roundscale_pd(A, B) \
12595 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
12596 (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
12597 #define _mm512_mask_roundscale_pd(A, B, C, D) \
12598 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
12599 (int)(D), \
12600 (__v8df)(__m512d)(A), \
12601 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
12602 #define _mm512_maskz_roundscale_pd(A, B, C) \
12603 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
12604 (int)(C), \
12605 (__v8df)_mm512_setzero_pd(),\
12606 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
12607 #define _mm_roundscale_ss(A, B, C) \
12608 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \
12609 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12610 #define _mm_roundscale_sd(A, B, C) \
12611 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \
12612 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION))
12613 #endif
12615 #ifdef __OPTIMIZE__
12616 extern __inline __mmask8
12617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12618 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
12620 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12621 (__v8df) __Y, __P,
12622 (__mmask8) -1,
12623 _MM_FROUND_CUR_DIRECTION);
12626 extern __inline __mmask16
12627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12628 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
12630 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12631 (__v16sf) __Y, __P,
12632 (__mmask16) -1,
12633 _MM_FROUND_CUR_DIRECTION);
12636 extern __inline __mmask16
12637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12638 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
12640 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12641 (__v16sf) __Y, __P,
12642 (__mmask16) __U,
12643 _MM_FROUND_CUR_DIRECTION);
12646 extern __inline __mmask8
12647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12648 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
12650 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12651 (__v8df) __Y, __P,
12652 (__mmask8) __U,
12653 _MM_FROUND_CUR_DIRECTION);
12656 extern __inline __mmask8
12657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12658 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
12660 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12661 (__v2df) __Y, __P,
12662 (__mmask8) -1,
12663 _MM_FROUND_CUR_DIRECTION);
12666 extern __inline __mmask8
12667 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12668 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
12670 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
12671 (__v2df) __Y, __P,
12672 (__mmask8) __M,
12673 _MM_FROUND_CUR_DIRECTION);
12676 extern __inline __mmask8
12677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12678 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
12680 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12681 (__v4sf) __Y, __P,
12682 (__mmask8) -1,
12683 _MM_FROUND_CUR_DIRECTION);
12686 extern __inline __mmask8
12687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12688 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
12690 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
12691 (__v4sf) __Y, __P,
12692 (__mmask8) __M,
12693 _MM_FROUND_CUR_DIRECTION);
12696 #else
12697 #define _mm512_cmp_pd_mask(X, Y, P) \
12698 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12699 (__v8df)(__m512d)(Y), (int)(P),\
12700 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12702 #define _mm512_cmp_ps_mask(X, Y, P) \
12703 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12704 (__v16sf)(__m512)(Y), (int)(P),\
12705 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
12707 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
12708 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12709 (__v8df)(__m512d)(Y), (int)(P),\
12710 (__mmask8)M, _MM_FROUND_CUR_DIRECTION))
12712 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
12713 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12714 (__v16sf)(__m512)(Y), (int)(P),\
12715 (__mmask16)M,_MM_FROUND_CUR_DIRECTION))
12717 #define _mm_cmp_sd_mask(X, Y, P) \
12718 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12719 (__v2df)(__m128d)(Y), (int)(P),\
12720 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12722 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
12723 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
12724 (__v2df)(__m128d)(Y), (int)(P),\
12725 M,_MM_FROUND_CUR_DIRECTION))
12727 #define _mm_cmp_ss_mask(X, Y, P) \
12728 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12729 (__v4sf)(__m128)(Y), (int)(P), \
12730 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
12732 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
12733 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
12734 (__v4sf)(__m128)(Y), (int)(P), \
12735 M,_MM_FROUND_CUR_DIRECTION))
12736 #endif
12738 extern __inline __mmask16
12739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12740 _mm512_kmov (__mmask16 __A)
12742 return __builtin_ia32_kmov16 (__A);
12745 #ifdef __DISABLE_AVX512F__
12746 #undef __DISABLE_AVX512F__
12747 #pragma GCC pop_options
12748 #endif /* __DISABLE_AVX512F__ */
12750 #endif /* _AVX512FINTRIN_H_INCLUDED */