gcc/
[official-gcc.git] / gcc / config / i386 / avx512erintrin.h
blob4583d690378f4b6e31ef702e8d62e44d5668f1ad
1 /* Copyright (C) 2013
2 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #ifndef _IMMINTRIN_H_INCLUDED
26 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
27 #endif
29 #ifndef _AVX512ERINTRIN_H_INCLUDED
30 #define _AVX512ERINTRIN_H_INCLUDED
32 #ifndef __AVX512ER__
33 #pragma GCC push_options
34 #pragma GCC target("avx512er")
35 #define __DISABLE_AVX512ER__
36 #endif /* __AVX512ER__ */
38 /* Internal data types for implementing the intrinsics. */
39 typedef double __v8df __attribute__ ((__vector_size__ (64)));
40 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
42 /* The Intel API is flexible enough that we must allow aliasing with other
43 vector types, and their scalar components. */
44 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
45 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
47 typedef unsigned char __mmask8;
48 typedef unsigned short __mmask16;
50 #ifdef __OPTIMIZE__
51 extern __inline __m512d
52 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
53 _mm512_exp2a23_round_pd (__m512d __A, int __R)
55 __m512d __W;
56 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
57 (__v8df) __W,
58 (__mmask8) -1, __R);
61 extern __inline __m512d
62 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
63 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
65 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
66 (__v8df) __W,
67 (__mmask8) __U, __R);
70 extern __inline __m512d
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
74 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
75 (__v8df) _mm512_setzero_pd (),
76 (__mmask8) __U, __R);
79 extern __inline __m512
80 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
81 _mm512_exp2a23_round_ps (__m512 __A, int __R)
83 __m512 __W;
84 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
85 (__v16sf) __W,
86 (__mmask16) -1, __R);
89 extern __inline __m512
90 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
91 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
93 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
94 (__v16sf) __W,
95 (__mmask16) __U, __R);
98 extern __inline __m512
99 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
100 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
102 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
103 (__v16sf) _mm512_setzero_ps (),
104 (__mmask16) __U, __R);
107 extern __inline __m512d
108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
109 _mm512_rcp28_round_pd (__m512d __A, int __R)
111 __m512d __W;
112 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
113 (__v8df) __W,
114 (__mmask8) -1, __R);
117 extern __inline __m512d
118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
119 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
121 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
122 (__v8df) __W,
123 (__mmask8) __U, __R);
126 extern __inline __m512d
127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
128 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
130 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
131 (__v8df) _mm512_setzero_pd (),
132 (__mmask8) __U, __R);
135 extern __inline __m512
136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
137 _mm512_rcp28_round_ps (__m512 __A, int __R)
139 __m512 __W;
140 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
141 (__v16sf) __W,
142 (__mmask16) -1, __R);
145 extern __inline __m512
146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
147 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
149 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
150 (__v16sf) __W,
151 (__mmask16) __U, __R);
154 extern __inline __m512
155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
156 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
158 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
159 (__v16sf) _mm512_setzero_ps (),
160 (__mmask16) __U, __R);
163 extern __inline __m512d
164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
165 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
167 __m512d __W;
168 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
169 (__v8df) __W,
170 (__mmask8) -1, __R);
173 extern __inline __m512d
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
177 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
178 (__v8df) __W,
179 (__mmask8) __U, __R);
182 extern __inline __m512d
183 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
184 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
186 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
187 (__v8df) _mm512_setzero_pd (),
188 (__mmask8) __U, __R);
191 extern __inline __m512
192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
195 __m512 __W;
196 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
197 (__v16sf) __W,
198 (__mmask16) -1, __R);
201 extern __inline __m512
202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
203 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
205 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
206 (__v16sf) __W,
207 (__mmask16) __U, __R);
210 extern __inline __m512
211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
214 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
215 (__v16sf) _mm512_setzero_ps (),
216 (__mmask16) __U, __R);
218 #else
219 #define _mm512_exp2a23_round_pd(A, C) \
220 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
222 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
223 __builtin_ia32_exp2pd_mask(A, W, U, C)
225 #define _mm512_maskz_exp2a23_round_pd(U, A, C) \
226 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
228 #define _mm512_exp2a23_round_ps(A, C) \
229 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
231 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
232 __builtin_ia32_exp2ps_mask(A, W, U, C)
234 #define _mm512_maskz_exp2a23_round_ps(U, A, C) \
235 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
237 #define _mm512_rcp28_round_pd(A, C) \
238 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
240 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
241 __builtin_ia32_rcp28pd_mask(A, W, U, C)
243 #define _mm512_maskz_rcp28_round_pd(U, A, C) \
244 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
246 #define _mm512_rcp28_round_ps(A, C) \
247 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
249 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
250 __builtin_ia32_rcp28ps_mask(A, W, U, C)
252 #define _mm512_maskz_rcp28_round_ps(U, A, C) \
253 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
255 #define _mm512_rsqrt28_round_pd(A, C) \
256 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
258 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
259 __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
261 #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
262 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
264 #define _mm512_rsqrt28_round_ps(A, C) \
265 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
267 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
268 __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
270 #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
271 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
272 #endif
274 #define _mm512_exp2a23_pd(A) \
275 _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
277 #define _mm512_mask_exp2a23_pd(W, U, A) \
278 _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
280 #define _mm512_maskz_exp2a23_pd(U, A) \
281 _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
283 #define _mm512_exp2a23_ps(A) \
284 _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
286 #define _mm512_mask_exp2a23_ps(W, U, A) \
287 _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
289 #define _mm512_maskz_exp2a23_ps(U, A) \
290 _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
292 #define _mm512_rcp28_pd(A) \
293 _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
295 #define _mm512_mask_rcp28_pd(W, U, A) \
296 _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
298 #define _mm512_maskz_rcp28_pd(U, A) \
299 _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
301 #define _mm512_rcp28_ps(A) \
302 _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
304 #define _mm512_mask_rcp28_ps(W, U, A) \
305 _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
307 #define _mm512_maskz_rcp28_ps(U, A) \
308 _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
310 #define _mm512_rsqrt28_pd(A) \
311 _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
313 #define _mm512_mask_rsqrt28_pd(W, U, A) \
314 _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
316 #define _mm512_maskz_rsqrt28_pd(U, A) \
317 _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
319 #define _mm512_rsqrt28_ps(A) \
320 _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
322 #define _mm512_mask_rsqrt28_ps(W, U, A) \
323 _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
325 #define _mm512_maskz_rsqrt28_ps(U, A) \
326 _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
328 #ifdef __DISABLE_AVX512ER__
329 #undef __DISABLE_AVX512ER__
330 #pragma GCC pop_options
331 #endif /* __DISABLE_AVX512ER__ */
333 #endif /* _AVX512ERINTRIN_H_INCLUDED */