Check if SSE is available with HAS_CPU_FEATURE
[glibc.git] / sysdeps / x86 / cpu-features.h
blob8ec1562fe7f9b701a590b37bb627734af981c849
1 /* This file is part of the GNU C Library.
2 Copyright (C) 2008-2017 Free Software Foundation, Inc.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
18 #ifndef cpu_features_h
19 #define cpu_features_h
21 #define bit_arch_Fast_Rep_String (1 << 0)
22 #define bit_arch_Fast_Copy_Backward (1 << 1)
23 #define bit_arch_Slow_BSF (1 << 2)
24 #define bit_arch_Fast_Unaligned_Load (1 << 4)
25 #define bit_arch_Prefer_PMINUB_for_stringop (1 << 5)
26 #define bit_arch_AVX_Usable (1 << 6)
27 #define bit_arch_FMA_Usable (1 << 7)
28 #define bit_arch_FMA4_Usable (1 << 8)
29 #define bit_arch_Slow_SSE4_2 (1 << 9)
30 #define bit_arch_AVX2_Usable (1 << 10)
31 #define bit_arch_AVX_Fast_Unaligned_Load (1 << 11)
32 #define bit_arch_AVX512F_Usable (1 << 12)
33 #define bit_arch_AVX512DQ_Usable (1 << 13)
34 #define bit_arch_I586 (1 << 14)
35 #define bit_arch_I686 (1 << 15)
36 #define bit_arch_Prefer_MAP_32BIT_EXEC (1 << 16)
37 #define bit_arch_Prefer_No_VZEROUPPER (1 << 17)
38 #define bit_arch_Fast_Unaligned_Copy (1 << 18)
39 #define bit_arch_Prefer_ERMS (1 << 19)
40 #define bit_arch_Use_dl_runtime_resolve_opt (1 << 20)
41 #define bit_arch_Use_dl_runtime_resolve_slow (1 << 21)
43 /* CPUID Feature flags. */
45 /* COMMON_CPUID_INDEX_1. */
46 #define bit_cpu_CX8 (1 << 8)
47 #define bit_cpu_CMOV (1 << 15)
48 #define bit_cpu_SSE (1 << 25)
49 #define bit_cpu_SSE2 (1 << 26)
50 #define bit_cpu_SSSE3 (1 << 9)
51 #define bit_cpu_SSE4_1 (1 << 19)
52 #define bit_cpu_SSE4_2 (1 << 20)
53 #define bit_cpu_OSXSAVE (1 << 27)
54 #define bit_cpu_AVX (1 << 28)
55 #define bit_cpu_POPCOUNT (1 << 23)
56 #define bit_cpu_FMA (1 << 12)
57 #define bit_cpu_FMA4 (1 << 16)
58 #define bit_cpu_HTT (1 << 28)
60 /* COMMON_CPUID_INDEX_7. */
61 #define bit_cpu_ERMS (1 << 9)
62 #define bit_cpu_RTM (1 << 11)
63 #define bit_cpu_AVX2 (1 << 5)
64 #define bit_cpu_AVX512F (1 << 16)
65 #define bit_cpu_AVX512DQ (1 << 17)
67 /* XCR0 Feature flags. */
68 #define bit_XMM_state (1 << 1)
69 #define bit_YMM_state (1 << 2)
70 #define bit_Opmask_state (1 << 5)
71 #define bit_ZMM0_15_state (1 << 6)
72 #define bit_ZMM16_31_state (1 << 7)
74 /* The integer bit array index for the first set of internal feature bits. */
75 #define FEATURE_INDEX_1 0
77 /* The current maximum size of the feature integer bit array. */
78 #define FEATURE_INDEX_MAX 1
80 #ifdef __ASSEMBLER__
82 # include <cpu-features-offsets.h>
84 # define index_cpu_CX8 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
85 # define index_cpu_CMOV COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
86 # define index_cpu_SSE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
87 # define index_cpu_SSE2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET
88 # define index_cpu_SSSE3 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
89 # define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
90 # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
91 # define index_cpu_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
92 # define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
93 # define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
95 # define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
96 # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
97 # define index_arch_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
98 # define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
99 # define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
100 # define index_arch_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE
101 # define index_arch_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
102 # define index_arch_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
103 # define index_arch_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
104 # define index_arch_AVX2_Usable FEATURE_INDEX_1*FEATURE_SIZE
105 # define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
106 # define index_arch_AVX512F_Usable FEATURE_INDEX_1*FEATURE_SIZE
107 # define index_arch_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
108 # define index_arch_I586 FEATURE_INDEX_1*FEATURE_SIZE
109 # define index_arch_I686 FEATURE_INDEX_1*FEATURE_SIZE
110 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
111 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1*FEATURE_SIZE
112 # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1*FEATURE_SIZE
113 # define index_arch_Prefer_ERMS FEATURE_INDEX_1*FEATURE_SIZE
114 # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
115 # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
118 # if defined (_LIBC) && !IS_IN (nonlib)
119 # ifdef __x86_64__
120 # ifdef SHARED
121 # if IS_IN (rtld)
122 # define LOAD_RTLD_GLOBAL_RO_RDX
123 # define HAS_FEATURE(offset, field, name) \
124 testl $(bit_##field##_##name), \
125 _rtld_local_ro+offset+(index_##field##_##name)(%rip)
126 # else
127 # define LOAD_RTLD_GLOBAL_RO_RDX \
128 mov _rtld_global_ro@GOTPCREL(%rip), %RDX_LP
129 # define HAS_FEATURE(offset, field, name) \
130 testl $(bit_##field##_##name), \
131 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%rdx)
132 # endif
133 # else /* SHARED */
134 # define LOAD_RTLD_GLOBAL_RO_RDX
135 # define HAS_FEATURE(offset, field, name) \
136 testl $(bit_##field##_##name), \
137 _dl_x86_cpu_features+offset+(index_##field##_##name)(%rip)
138 # endif /* !SHARED */
139 # else /* __x86_64__ */
140 # ifdef SHARED
141 # define LOAD_FUNC_GOT_EAX(func) \
142 leal func@GOTOFF(%edx), %eax
143 # if IS_IN (rtld)
144 # define LOAD_GOT_AND_RTLD_GLOBAL_RO \
145 LOAD_PIC_REG(dx)
146 # define HAS_FEATURE(offset, field, name) \
147 testl $(bit_##field##_##name), \
148 offset+(index_##field##_##name)+_rtld_local_ro@GOTOFF(%edx)
149 # else
150 # define LOAD_GOT_AND_RTLD_GLOBAL_RO \
151 LOAD_PIC_REG(dx); \
152 mov _rtld_global_ro@GOT(%edx), %ecx
153 # define HAS_FEATURE(offset, field, name) \
154 testl $(bit_##field##_##name), \
155 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+offset+(index_##field##_##name)(%ecx)
156 # endif
157 # else /* SHARED */
158 # define LOAD_FUNC_GOT_EAX(func) \
159 leal func, %eax
160 # define LOAD_GOT_AND_RTLD_GLOBAL_RO
161 # define HAS_FEATURE(offset, field, name) \
162 testl $(bit_##field##_##name), \
163 _dl_x86_cpu_features+offset+(index_##field##_##name)
164 # endif /* !SHARED */
165 # endif /* !__x86_64__ */
166 # else /* _LIBC && !nonlib */
167 # error "Sorry, <cpu-features.h> is unimplemented for assembler"
168 # endif /* !_LIBC || nonlib */
170 /* HAS_* evaluates to true if we may use the feature at runtime. */
171 # define HAS_CPU_FEATURE(name) HAS_FEATURE (CPUID_OFFSET, cpu, name)
172 # define HAS_ARCH_FEATURE(name) HAS_FEATURE (FEATURE_OFFSET, arch, name)
174 #else /* __ASSEMBLER__ */
176 enum
178 COMMON_CPUID_INDEX_1 = 0,
179 COMMON_CPUID_INDEX_7,
180 COMMON_CPUID_INDEX_80000001, /* for AMD */
181 /* Keep the following line at the end. */
182 COMMON_CPUID_INDEX_MAX
185 struct cpu_features
187 enum cpu_features_kind
189 arch_kind_unknown = 0,
190 arch_kind_intel,
191 arch_kind_amd,
192 arch_kind_other
193 } kind;
194 int max_cpuid;
195 struct cpuid_registers
197 unsigned int eax;
198 unsigned int ebx;
199 unsigned int ecx;
200 unsigned int edx;
201 } cpuid[COMMON_CPUID_INDEX_MAX];
202 unsigned int family;
203 unsigned int model;
204 unsigned int feature[FEATURE_INDEX_MAX];
207 /* Used from outside of glibc to get access to the CPU features
208 structure. */
209 extern const struct cpu_features *__get_cpu_features (void)
210 __attribute__ ((const));
212 # if defined (_LIBC) && !IS_IN (nonlib)
213 /* Unused for x86. */
214 # define INIT_ARCH()
215 # define __get_cpu_features() (&GLRO(dl_x86_cpu_features))
216 # endif
219 /* Only used directly in cpu-features.c. */
220 # define CPU_FEATURES_CPU_P(ptr, name) \
221 ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
222 # define CPU_FEATURES_ARCH_P(ptr, name) \
223 ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
225 /* HAS_* evaluates to true if we may use the feature at runtime. */
226 # define HAS_CPU_FEATURE(name) \
227 CPU_FEATURES_CPU_P (__get_cpu_features (), name)
228 # define HAS_ARCH_FEATURE(name) \
229 CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
231 # define index_cpu_CX8 COMMON_CPUID_INDEX_1
232 # define index_cpu_CMOV COMMON_CPUID_INDEX_1
233 # define index_cpu_SSE COMMON_CPUID_INDEX_1
234 # define index_cpu_SSE2 COMMON_CPUID_INDEX_1
235 # define index_cpu_SSSE3 COMMON_CPUID_INDEX_1
236 # define index_cpu_SSE4_1 COMMON_CPUID_INDEX_1
237 # define index_cpu_SSE4_2 COMMON_CPUID_INDEX_1
238 # define index_cpu_AVX COMMON_CPUID_INDEX_1
239 # define index_cpu_AVX2 COMMON_CPUID_INDEX_7
240 # define index_cpu_AVX512F COMMON_CPUID_INDEX_7
241 # define index_cpu_AVX512DQ COMMON_CPUID_INDEX_7
242 # define index_cpu_ERMS COMMON_CPUID_INDEX_7
243 # define index_cpu_RTM COMMON_CPUID_INDEX_7
244 # define index_cpu_FMA COMMON_CPUID_INDEX_1
245 # define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
246 # define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
247 # define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
248 # define index_cpu_HTT COMMON_CPUID_INDEX_1
250 # define reg_CX8 edx
251 # define reg_CMOV edx
252 # define reg_SSE edx
253 # define reg_SSE2 edx
254 # define reg_SSSE3 ecx
255 # define reg_SSE4_1 ecx
256 # define reg_SSE4_2 ecx
257 # define reg_AVX ecx
258 # define reg_AVX2 ebx
259 # define reg_AVX512F ebx
260 # define reg_AVX512DQ ebx
261 # define reg_ERMS ebx
262 # define reg_RTM ebx
263 # define reg_FMA ecx
264 # define reg_FMA4 ecx
265 # define reg_POPCOUNT ecx
266 # define reg_OSXSAVE ecx
267 # define reg_HTT edx
269 # define index_arch_Fast_Rep_String FEATURE_INDEX_1
270 # define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
271 # define index_arch_Slow_BSF FEATURE_INDEX_1
272 # define index_arch_Fast_Unaligned_Load FEATURE_INDEX_1
273 # define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
274 # define index_arch_AVX_Usable FEATURE_INDEX_1
275 # define index_arch_FMA_Usable FEATURE_INDEX_1
276 # define index_arch_FMA4_Usable FEATURE_INDEX_1
277 # define index_arch_Slow_SSE4_2 FEATURE_INDEX_1
278 # define index_arch_AVX2_Usable FEATURE_INDEX_1
279 # define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
280 # define index_arch_AVX512F_Usable FEATURE_INDEX_1
281 # define index_arch_AVX512DQ_Usable FEATURE_INDEX_1
282 # define index_arch_I586 FEATURE_INDEX_1
283 # define index_arch_I686 FEATURE_INDEX_1
284 # define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
285 # define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
286 # define index_arch_Fast_Unaligned_Copy FEATURE_INDEX_1
287 # define index_arch_Prefer_ERMS FEATURE_INDEX_1
288 # define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
289 # define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
291 #endif /* !__ASSEMBLER__ */
293 #ifdef __x86_64__
294 # define HAS_CPUID 1
295 #elif defined __i586__ || defined __pentium__
296 # define HAS_CPUID 1
297 # define HAS_I586 1
298 # define HAS_I686 HAS_ARCH_FEATURE (I686)
299 #elif (defined __i686__ || defined __pentiumpro__ \
300 || defined __pentium4__ || defined __nocona__ \
301 || defined __atom__ || defined __core2__ \
302 || defined __corei7__ || defined __corei7_avx__ \
303 || defined __core_avx2__ || defined __nehalem__ \
304 || defined __sandybridge__ || defined __haswell__ \
305 || defined __knl__ || defined __bonnell__ \
306 || defined __silvermont__ \
307 || defined __k6__ || defined __k8__ \
308 || defined __athlon__ || defined __amdfam10__ \
309 || defined __bdver1__ || defined __bdver2__ \
310 || defined __bdver3__ || defined __bdver4__ \
311 || defined __btver1__ || defined __btver2__)
312 # define HAS_CPUID 1
313 # define HAS_I586 1
314 # define HAS_I686 1
315 #else
316 # define HAS_CPUID 0
317 # define HAS_I586 HAS_ARCH_FEATURE (I586)
318 # define HAS_I686 HAS_ARCH_FEATURE (I686)
319 #endif
321 #endif /* cpu_features_h */