1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
25 #include <dl-hwcap2.h>
27 extern void TUNABLE_CALLBACK (set_hwcaps
) (tunable_val_t
*)
32 TUNABLE_CALLBACK (set_prefer_map_32bit_exec
) (tunable_val_t
*valp
)
35 GLRO(dl_x86_cpu_features
).preferred
[index_arch_Prefer_MAP_32BIT_EXEC
]
36 |= bit_arch_Prefer_MAP_32BIT_EXEC
;
41 extern void TUNABLE_CALLBACK (set_x86_ibt
) (tunable_val_t
*)
43 extern void TUNABLE_CALLBACK (set_x86_shstk
) (tunable_val_t
*)
50 update_active (struct cpu_features
*cpu_features
)
52 /* Copy the cpuid bits to active bits for CPU featuress whose usability
53 in user space can be detected without additional OS support. */
54 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE3
);
55 CPU_FEATURE_SET_ACTIVE (cpu_features
, PCLMULQDQ
);
56 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSSE3
);
57 CPU_FEATURE_SET_ACTIVE (cpu_features
, CMPXCHG16B
);
58 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE4_1
);
59 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE4_2
);
60 CPU_FEATURE_SET_ACTIVE (cpu_features
, MOVBE
);
61 CPU_FEATURE_SET_ACTIVE (cpu_features
, POPCNT
);
62 CPU_FEATURE_SET_ACTIVE (cpu_features
, AES
);
63 CPU_FEATURE_SET_ACTIVE (cpu_features
, OSXSAVE
);
64 CPU_FEATURE_SET_ACTIVE (cpu_features
, TSC
);
65 CPU_FEATURE_SET_ACTIVE (cpu_features
, CX8
);
66 CPU_FEATURE_SET_ACTIVE (cpu_features
, CMOV
);
67 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLFSH
);
68 CPU_FEATURE_SET_ACTIVE (cpu_features
, MMX
);
69 CPU_FEATURE_SET_ACTIVE (cpu_features
, FXSR
);
70 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE
);
71 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE2
);
72 CPU_FEATURE_SET_ACTIVE (cpu_features
, HTT
);
73 CPU_FEATURE_SET_ACTIVE (cpu_features
, BMI1
);
74 CPU_FEATURE_SET_ACTIVE (cpu_features
, HLE
);
75 CPU_FEATURE_SET_ACTIVE (cpu_features
, BMI2
);
76 CPU_FEATURE_SET_ACTIVE (cpu_features
, ERMS
);
77 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDSEED
);
78 CPU_FEATURE_SET_ACTIVE (cpu_features
, ADX
);
79 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLFLUSHOPT
);
80 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLWB
);
81 CPU_FEATURE_SET_ACTIVE (cpu_features
, SHA
);
82 CPU_FEATURE_SET_ACTIVE (cpu_features
, PREFETCHWT1
);
83 CPU_FEATURE_SET_ACTIVE (cpu_features
, OSPKE
);
84 CPU_FEATURE_SET_ACTIVE (cpu_features
, WAITPKG
);
85 CPU_FEATURE_SET_ACTIVE (cpu_features
, GFNI
);
86 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDPID
);
87 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDRAND
);
88 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLDEMOTE
);
89 CPU_FEATURE_SET_ACTIVE (cpu_features
, MOVDIRI
);
90 CPU_FEATURE_SET_ACTIVE (cpu_features
, MOVDIR64B
);
91 CPU_FEATURE_SET_ACTIVE (cpu_features
, FSRM
);
92 CPU_FEATURE_SET_ACTIVE (cpu_features
, RTM_ALWAYS_ABORT
);
93 CPU_FEATURE_SET_ACTIVE (cpu_features
, SERIALIZE
);
94 CPU_FEATURE_SET_ACTIVE (cpu_features
, TSXLDTRK
);
95 CPU_FEATURE_SET_ACTIVE (cpu_features
, LAHF64_SAHF64
);
96 CPU_FEATURE_SET_ACTIVE (cpu_features
, LZCNT
);
97 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE4A
);
98 CPU_FEATURE_SET_ACTIVE (cpu_features
, PREFETCHW
);
99 CPU_FEATURE_SET_ACTIVE (cpu_features
, TBM
);
100 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDTSCP
);
101 CPU_FEATURE_SET_ACTIVE (cpu_features
, WBNOINVD
);
102 CPU_FEATURE_SET_ACTIVE (cpu_features
, RAO_INT
);
103 CPU_FEATURE_SET_ACTIVE (cpu_features
, CMPCCXADD
);
104 CPU_FEATURE_SET_ACTIVE (cpu_features
, FZLRM
);
105 CPU_FEATURE_SET_ACTIVE (cpu_features
, FSRS
);
106 CPU_FEATURE_SET_ACTIVE (cpu_features
, FSRCS
);
107 CPU_FEATURE_SET_ACTIVE (cpu_features
, PREFETCHI
);
108 CPU_FEATURE_SET_ACTIVE (cpu_features
, PTWRITE
);
110 if (!CPU_FEATURES_CPU_P (cpu_features
, RTM_ALWAYS_ABORT
))
111 CPU_FEATURE_SET_ACTIVE (cpu_features
, RTM
);
114 CPU_FEATURE_SET_ACTIVE (cpu_features
, IBT
);
115 CPU_FEATURE_SET_ACTIVE (cpu_features
, SHSTK
);
118 /* Can we call xgetbv? */
119 if (CPU_FEATURES_CPU_P (cpu_features
, OSXSAVE
))
122 unsigned int xcrhigh
;
123 asm ("xgetbv" : "=a" (xcrlow
), "=d" (xcrhigh
) : "c" (0));
124 /* Is YMM and XMM state usable? */
125 if ((xcrlow
& (bit_YMM_state
| bit_XMM_state
))
126 == (bit_YMM_state
| bit_XMM_state
))
128 /* Determine if AVX is usable. */
129 if (CPU_FEATURES_CPU_P (cpu_features
, AVX
))
131 CPU_FEATURE_SET (cpu_features
, AVX
);
132 /* The following features depend on AVX being usable. */
133 /* Determine if AVX2 is usable. */
134 if (CPU_FEATURES_CPU_P (cpu_features
, AVX2
))
136 CPU_FEATURE_SET (cpu_features
, AVX2
);
138 /* Unaligned load with 256-bit AVX registers are faster
139 on Intel/AMD processors with AVX2. */
140 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
141 |= bit_arch_AVX_Fast_Unaligned_Load
;
143 /* Determine if AVX-IFMA is usable. */
144 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX_IFMA
);
145 /* Determine if AVX-NE-CONVERT is usable. */
146 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX_NE_CONVERT
);
147 /* Determine if AVX-VNNI is usable. */
148 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX_VNNI
);
149 /* Determine if AVX-VNNI-INT8 is usable. */
150 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX_VNNI_INT8
);
151 /* Determine if FMA is usable. */
152 CPU_FEATURE_SET_ACTIVE (cpu_features
, FMA
);
153 /* Determine if VAES is usable. */
154 CPU_FEATURE_SET_ACTIVE (cpu_features
, VAES
);
155 /* Determine if VPCLMULQDQ is usable. */
156 CPU_FEATURE_SET_ACTIVE (cpu_features
, VPCLMULQDQ
);
157 /* Determine if XOP is usable. */
158 CPU_FEATURE_SET_ACTIVE (cpu_features
, XOP
);
159 /* Determine if F16C is usable. */
160 CPU_FEATURE_SET_ACTIVE (cpu_features
, F16C
);
163 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
164 ZMM16-ZMM31 state are enabled. */
165 if ((xcrlow
& (bit_Opmask_state
| bit_ZMM0_15_state
166 | bit_ZMM16_31_state
))
167 == (bit_Opmask_state
| bit_ZMM0_15_state
| bit_ZMM16_31_state
))
169 /* Determine if AVX512F is usable. */
170 if (CPU_FEATURES_CPU_P (cpu_features
, AVX512F
))
172 CPU_FEATURE_SET (cpu_features
, AVX512F
);
173 /* Determine if AVX512CD is usable. */
174 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512CD
);
175 /* Determine if AVX512ER is usable. */
176 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512ER
);
177 /* Determine if AVX512PF is usable. */
178 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512PF
);
179 /* Determine if AVX512VL is usable. */
180 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512VL
);
181 /* Determine if AVX512DQ is usable. */
182 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512DQ
);
183 /* Determine if AVX512BW is usable. */
184 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512BW
);
185 /* Determine if AVX512_4FMAPS is usable. */
186 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_4FMAPS
);
187 /* Determine if AVX512_4VNNIW is usable. */
188 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_4VNNIW
);
189 /* Determine if AVX512_BITALG is usable. */
190 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_BITALG
);
191 /* Determine if AVX512_IFMA is usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_IFMA
);
193 /* Determine if AVX512_VBMI is usable. */
194 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_VBMI
);
195 /* Determine if AVX512_VBMI2 is usable. */
196 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_VBMI2
);
197 /* Determine if is AVX512_VNNI usable. */
198 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_VNNI
);
199 /* Determine if AVX512_VPOPCNTDQ is usable. */
200 CPU_FEATURE_SET_ACTIVE (cpu_features
,
202 /* Determine if AVX512_VP2INTERSECT is usable. */
203 CPU_FEATURE_SET_ACTIVE (cpu_features
,
204 AVX512_VP2INTERSECT
);
205 /* Determine if AVX512_BF16 is usable. */
206 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_BF16
);
207 /* Determine if AVX512_FP16 is usable. */
208 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_FP16
);
213 /* Are XTILECFG and XTILEDATA states usable? */
214 if ((xcrlow
& (bit_XTILECFG_state
| bit_XTILEDATA_state
))
215 == (bit_XTILECFG_state
| bit_XTILEDATA_state
))
217 /* Determine if AMX_BF16 is usable. */
218 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_BF16
);
219 /* Determine if AMX_TILE is usable. */
220 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_TILE
);
221 /* Determine if AMX_INT8 is usable. */
222 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_INT8
);
223 /* Determine if AMX_FP16 is usable. */
224 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_FP16
);
225 /* Determine if AMX_COMPLEX is usable. */
226 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_COMPLEX
);
229 /* APX is usable only if the APX state is supported by kernel. */
230 if ((xcrlow
& bit_APX_state
) != 0)
231 CPU_FEATURE_SET_ACTIVE (cpu_features
, APX_F
);
233 /* These features are usable only when OSXSAVE is enabled. */
234 CPU_FEATURE_SET (cpu_features
, XSAVE
);
235 CPU_FEATURE_SET_ACTIVE (cpu_features
, XSAVEOPT
);
236 CPU_FEATURE_SET_ACTIVE (cpu_features
, XSAVEC
);
237 CPU_FEATURE_SET_ACTIVE (cpu_features
, XGETBV_ECX_1
);
238 CPU_FEATURE_SET_ACTIVE (cpu_features
, XFD
);
240 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
241 size + integer register save size and align it to 64 bytes. */
242 if (cpu_features
->basic
.max_cpuid
>= 0xd)
244 unsigned int eax
, ebx
, ecx
, edx
;
246 __cpuid_count (0xd, 0, eax
, ebx
, ecx
, edx
);
249 unsigned int xsave_state_full_size
250 = ALIGN_UP (ebx
+ STATE_SAVE_OFFSET
, 64);
252 cpu_features
->xsave_state_size
253 = xsave_state_full_size
;
254 cpu_features
->xsave_state_full_size
255 = xsave_state_full_size
;
257 /* Check if XSAVEC is available. */
258 if (CPU_FEATURES_CPU_P (cpu_features
, XSAVEC
))
260 unsigned int xstate_comp_offsets
[32];
261 unsigned int xstate_comp_sizes
[32];
264 xstate_comp_offsets
[0] = 0;
265 xstate_comp_offsets
[1] = 160;
266 xstate_comp_offsets
[2] = 576;
267 xstate_comp_sizes
[0] = 160;
268 xstate_comp_sizes
[1] = 256;
270 for (i
= 2; i
< 32; i
++)
272 if ((STATE_SAVE_MASK
& (1 << i
)) != 0)
274 __cpuid_count (0xd, i
, eax
, ebx
, ecx
, edx
);
275 xstate_comp_sizes
[i
] = eax
;
280 xstate_comp_sizes
[i
] = 0;
285 xstate_comp_offsets
[i
]
286 = (xstate_comp_offsets
[i
- 1]
287 + xstate_comp_sizes
[i
-1]);
288 if ((ecx
& (1 << 1)) != 0)
289 xstate_comp_offsets
[i
]
290 = ALIGN_UP (xstate_comp_offsets
[i
], 64);
296 = xstate_comp_offsets
[31] + xstate_comp_sizes
[31];
299 cpu_features
->xsave_state_size
300 = ALIGN_UP (size
+ STATE_SAVE_OFFSET
, 64);
301 CPU_FEATURE_SET (cpu_features
, XSAVEC
);
308 /* Determine if PKU is usable. */
309 if (CPU_FEATURES_CPU_P (cpu_features
, OSPKE
))
310 CPU_FEATURE_SET (cpu_features
, PKU
);
312 /* Determine if Key Locker instructions are usable. */
313 if (CPU_FEATURES_CPU_P (cpu_features
, AESKLE
))
315 CPU_FEATURE_SET (cpu_features
, AESKLE
);
316 CPU_FEATURE_SET_ACTIVE (cpu_features
, KL
);
317 CPU_FEATURE_SET_ACTIVE (cpu_features
, WIDE_KL
);
320 dl_check_hwcap2 (cpu_features
);
322 cpu_features
->isa_1
= get_isa_level (cpu_features
);
326 get_extended_indices (struct cpu_features
*cpu_features
)
328 unsigned int eax
, ebx
, ecx
, edx
;
329 __cpuid (0x80000000, eax
, ebx
, ecx
, edx
);
330 if (eax
>= 0x80000001)
332 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.eax
,
333 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.ebx
,
334 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.ecx
,
335 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.edx
);
336 if (eax
>= 0x80000007)
338 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.eax
,
339 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.ebx
,
340 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.ecx
,
341 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.edx
);
342 if (eax
>= 0x80000008)
344 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.eax
,
345 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.ebx
,
346 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.ecx
,
347 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.edx
);
351 get_common_indices (struct cpu_features
*cpu_features
,
352 unsigned int *family
, unsigned int *model
,
353 unsigned int *extended_model
, unsigned int *stepping
)
359 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.ebx
,
360 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.ecx
,
361 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.edx
);
362 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.eax
= eax
;
363 *family
= (eax
>> 8) & 0x0f;
364 *model
= (eax
>> 4) & 0x0f;
365 *extended_model
= (eax
>> 12) & 0xf0;
366 *stepping
= eax
& 0x0f;
369 *family
+= (eax
>> 20) & 0xff;
370 *model
+= *extended_model
;
374 if (cpu_features
->basic
.max_cpuid
>= 7)
377 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.eax
,
378 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.ebx
,
379 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.ecx
,
380 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.edx
);
382 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.eax
,
383 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.ebx
,
384 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.ecx
,
385 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.edx
);
388 if (cpu_features
->basic
.max_cpuid
>= 0xd)
389 __cpuid_count (0xd, 1,
390 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.eax
,
391 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.ebx
,
392 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.ecx
,
393 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.edx
);
395 if (cpu_features
->basic
.max_cpuid
>= 0x14)
396 __cpuid_count (0x14, 0,
397 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.eax
,
398 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.ebx
,
399 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.ecx
,
400 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.edx
);
402 if (cpu_features
->basic
.max_cpuid
>= 0x19)
403 __cpuid_count (0x19, 0,
404 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.eax
,
405 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.ebx
,
406 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.ecx
,
407 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.edx
);
409 dl_check_minsigstacksize (cpu_features
);
412 _Static_assert (((index_arch_Fast_Unaligned_Load
413 == index_arch_Fast_Unaligned_Copy
)
414 && (index_arch_Fast_Unaligned_Load
415 == index_arch_Prefer_PMINUB_for_stringop
)
416 && (index_arch_Fast_Unaligned_Load
417 == index_arch_Slow_SSE4_2
)
418 && (index_arch_Fast_Unaligned_Load
419 == index_arch_Fast_Rep_String
)
420 && (index_arch_Fast_Unaligned_Load
421 == index_arch_Fast_Copy_Backward
)),
422 "Incorrect index_arch_Fast_Unaligned_Load");
425 /* Intel Family-6 microarch list. */
428 /* Atom processors. */
430 INTEL_ATOM_SILVERMONT
,
433 INTEL_ATOM_GOLDMONT_PLUS
,
434 INTEL_ATOM_SIERRAFOREST
,
435 INTEL_ATOM_GRANDRIDGE
,
438 /* Bigcore processors. */
440 INTEL_BIGCORE_PENRYN
,
441 INTEL_BIGCORE_DUNNINGTON
,
442 INTEL_BIGCORE_NEHALEM
,
443 INTEL_BIGCORE_WESTMERE
,
444 INTEL_BIGCORE_SANDYBRIDGE
,
445 INTEL_BIGCORE_IVYBRIDGE
,
446 INTEL_BIGCORE_HASWELL
,
447 INTEL_BIGCORE_BROADWELL
,
448 INTEL_BIGCORE_SKYLAKE
,
449 INTEL_BIGCORE_KABYLAKE
,
450 INTEL_BIGCORE_COMETLAKE
,
451 INTEL_BIGCORE_SKYLAKE_AVX512
,
452 INTEL_BIGCORE_CANNONLAKE
,
453 INTEL_BIGCORE_ICELAKE
,
454 INTEL_BIGCORE_TIGERLAKE
,
455 INTEL_BIGCORE_ROCKETLAKE
,
456 INTEL_BIGCORE_SAPPHIRERAPIDS
,
457 INTEL_BIGCORE_RAPTORLAKE
,
458 INTEL_BIGCORE_EMERALDRAPIDS
,
459 INTEL_BIGCORE_METEORLAKE
,
460 INTEL_BIGCORE_LUNARLAKE
,
461 INTEL_BIGCORE_ARROWLAKE
,
462 INTEL_BIGCORE_GRANITERAPIDS
,
464 /* Mixed (bigcore + atom SOC). */
465 INTEL_MIXED_LAKEFIELD
,
466 INTEL_MIXED_ALDERLAKE
,
470 INTEL_KNIGHTS_LANDING
,
477 intel_get_fam6_microarch (unsigned int model
,
478 __attribute__ ((unused
)) unsigned int stepping
)
484 return INTEL_ATOM_BONNELL
;
488 /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
489 (microarchitecturally identical). */
490 return INTEL_ATOM_BONNELL
;
495 return INTEL_ATOM_SILVERMONT
;
499 return INTEL_ATOM_AIRMONT
;
502 return INTEL_ATOM_GOLDMONT
;
504 return INTEL_ATOM_GOLDMONT_PLUS
;
506 return INTEL_ATOM_SIERRAFOREST
;
508 return INTEL_ATOM_GRANDRIDGE
;
512 return INTEL_ATOM_TREMONT
;
515 return INTEL_BIGCORE_MEROM
;
517 return INTEL_BIGCORE_PENRYN
;
519 return INTEL_BIGCORE_DUNNINGTON
;
524 return INTEL_BIGCORE_NEHALEM
;
528 return INTEL_BIGCORE_WESTMERE
;
531 return INTEL_BIGCORE_SANDYBRIDGE
;
534 return INTEL_BIGCORE_IVYBRIDGE
;
539 return INTEL_BIGCORE_HASWELL
;
544 return INTEL_BIGCORE_BROADWELL
;
547 return INTEL_BIGCORE_SKYLAKE
;
559 All of these are derivatives of Kabylake (Skylake client).
561 return INTEL_BIGCORE_KABYLAKE
;
564 Stepping = {10, 11, 12, 13}
569 Coffeelake is a derivatives of Kabylake (Skylake client).
571 return INTEL_BIGCORE_KABYLAKE
;
574 return INTEL_BIGCORE_COMETLAKE
;
576 return INTEL_BIGCORE_CANNONLAKE
;
586 These are all microarchitecturally identical, so use
587 Skylake-avx512 for all of them.
589 return INTEL_BIGCORE_SKYLAKE_AVX512
;
595 return INTEL_BIGCORE_ICELAKE
;
598 return INTEL_BIGCORE_TIGERLAKE
;
600 return INTEL_BIGCORE_ROCKETLAKE
;
602 return INTEL_BIGCORE_SAPPHIRERAPIDS
;
606 return INTEL_BIGCORE_RAPTORLAKE
;
608 return INTEL_BIGCORE_EMERALDRAPIDS
;
611 return INTEL_BIGCORE_METEORLAKE
;
613 return INTEL_BIGCORE_LUNARLAKE
;
615 return INTEL_BIGCORE_ARROWLAKE
;
618 return INTEL_BIGCORE_GRANITERAPIDS
;
620 return INTEL_MIXED_LAKEFIELD
;
624 return INTEL_MIXED_ALDERLAKE
;
626 return INTEL_KNIGHTS_MILL
;
628 return INTEL_KNIGHTS_LANDING
;
630 return INTEL_UNKNOWN
;
635 init_cpu_features (struct cpu_features
*cpu_features
)
637 unsigned int ebx
, ecx
, edx
;
638 unsigned int family
= 0;
639 unsigned int model
= 0;
640 unsigned int stepping
= 0;
641 enum cpu_features_kind kind
;
643 cpu_features
->cachesize_non_temporal_divisor
= 4;
645 if (__get_cpuid_max (0, 0) == 0)
647 kind
= arch_kind_other
;
652 __cpuid (0, cpu_features
->basic
.max_cpuid
, ebx
, ecx
, edx
);
654 /* This spells out "GenuineIntel". */
655 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
657 unsigned int extended_model
;
659 kind
= arch_kind_intel
;
661 get_common_indices (cpu_features
, &family
, &model
, &extended_model
,
664 get_extended_indices (cpu_features
);
666 update_active (cpu_features
);
670 model
+= extended_model
;
671 unsigned int microarch
672 = intel_get_fam6_microarch (model
, stepping
);
676 /* Atom / KNL tuning. */
677 case INTEL_ATOM_BONNELL
:
678 /* BSF is slow on Bonnell. */
679 cpu_features
->preferred
[index_arch_Slow_BSF
]
680 |= bit_arch_Slow_BSF
;
683 /* Unaligned load versions are faster than SSSE3
684 on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
685 case INTEL_ATOM_AIRMONT
:
686 case INTEL_ATOM_SILVERMONT
:
687 case INTEL_ATOM_GOLDMONT
:
688 case INTEL_ATOM_GOLDMONT_PLUS
:
690 /* Knights Landing. Enable Silvermont optimizations. */
691 case INTEL_KNIGHTS_LANDING
:
693 cpu_features
->preferred
[index_arch_Fast_Unaligned_Load
]
694 |= (bit_arch_Fast_Unaligned_Load
695 | bit_arch_Fast_Unaligned_Copy
696 | bit_arch_Prefer_PMINUB_for_stringop
697 | bit_arch_Slow_SSE4_2
);
700 case INTEL_ATOM_TREMONT
:
701 /* Enable rep string instructions, unaligned load, unaligned
702 copy, pminub and avoid SSE 4.2 on Tremont. */
703 cpu_features
->preferred
[index_arch_Fast_Rep_String
]
704 |= (bit_arch_Fast_Rep_String
705 | bit_arch_Fast_Unaligned_Load
706 | bit_arch_Fast_Unaligned_Copy
707 | bit_arch_Prefer_PMINUB_for_stringop
708 | bit_arch_Slow_SSE4_2
);
712 Default tuned Knights microarch.
713 case INTEL_KNIGHTS_MILL:
717 Default tuned atom microarch.
718 case INTEL_ATOM_SIERRAFOREST:
719 case INTEL_ATOM_GRANDRIDGE:
722 /* Bigcore/Default Tuning. */
725 /* Unknown family 0x06 processors. Assuming this is one
726 of Core i3/i5/i7 processors if AVX is available. */
727 if (!CPU_FEATURES_CPU_P (cpu_features
, AVX
))
730 enable_modern_features
:
731 /* Rep string instructions, unaligned load, unaligned copy,
732 and pminub are fast on Intel Core i3, i5 and i7. */
733 cpu_features
->preferred
[index_arch_Fast_Rep_String
]
734 |= (bit_arch_Fast_Rep_String
735 | bit_arch_Fast_Unaligned_Load
736 | bit_arch_Fast_Unaligned_Copy
737 | bit_arch_Prefer_PMINUB_for_stringop
);
740 case INTEL_BIGCORE_NEHALEM
:
741 case INTEL_BIGCORE_WESTMERE
:
742 /* Older CPUs prefer non-temporal stores at lower threshold. */
743 cpu_features
->cachesize_non_temporal_divisor
= 8;
744 goto enable_modern_features
;
746 /* Older Bigcore microarch (smaller non-temporal store
748 case INTEL_BIGCORE_SANDYBRIDGE
:
749 case INTEL_BIGCORE_IVYBRIDGE
:
750 case INTEL_BIGCORE_HASWELL
:
751 case INTEL_BIGCORE_BROADWELL
:
752 cpu_features
->cachesize_non_temporal_divisor
= 8;
755 /* Newer Bigcore microarch (larger non-temporal store
757 case INTEL_BIGCORE_SKYLAKE
:
758 case INTEL_BIGCORE_KABYLAKE
:
759 case INTEL_BIGCORE_COMETLAKE
:
760 case INTEL_BIGCORE_SKYLAKE_AVX512
:
761 case INTEL_BIGCORE_CANNONLAKE
:
762 case INTEL_BIGCORE_ICELAKE
:
763 case INTEL_BIGCORE_TIGERLAKE
:
764 case INTEL_BIGCORE_ROCKETLAKE
:
765 case INTEL_BIGCORE_RAPTORLAKE
:
766 case INTEL_BIGCORE_METEORLAKE
:
767 case INTEL_BIGCORE_LUNARLAKE
:
768 case INTEL_BIGCORE_ARROWLAKE
:
769 case INTEL_BIGCORE_SAPPHIRERAPIDS
:
770 case INTEL_BIGCORE_EMERALDRAPIDS
:
771 case INTEL_BIGCORE_GRANITERAPIDS
:
772 cpu_features
->cachesize_non_temporal_divisor
= 2;
775 /* Default tuned Mixed (bigcore + atom SOC). */
776 case INTEL_MIXED_LAKEFIELD
:
777 case INTEL_MIXED_ALDERLAKE
:
778 cpu_features
->cachesize_non_temporal_divisor
= 2;
782 /* Disable TSX on some processors to avoid TSX on kernels that
783 weren't updated with the latest microcode package (which
784 disables broken feature by default). */
787 case INTEL_BIGCORE_SKYLAKE_AVX512
:
788 /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
793 case INTEL_BIGCORE_KABYLAKE
:
794 /* NB: Although the errata documents that for model == 0x8e
795 (kabylake skylake client), only 0xb stepping or lower are
796 impacted, the intention of the errata was to disable TSX on
797 all client processors on all steppings. Include 0xc
798 stepping which is an Intel Core i7-8665U, a client mobile
803 case INTEL_BIGCORE_SKYLAKE
:
804 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
805 processors listed in:
807 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
810 CPU_FEATURE_UNSET (cpu_features
, HLE
);
811 CPU_FEATURE_UNSET (cpu_features
, RTM
);
812 CPU_FEATURE_SET (cpu_features
, RTM_ALWAYS_ABORT
);
815 case INTEL_BIGCORE_HASWELL
:
816 /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
817 TSX. Haswell also include other model numbers that have
819 if (model
== 0x3f && stepping
>= 4)
822 CPU_FEATURE_UNSET (cpu_features
, RTM
);
828 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
829 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
830 frequency if AVX512ER isn't available. */
831 if (CPU_FEATURES_CPU_P (cpu_features
, AVX512ER
))
832 cpu_features
->preferred
[index_arch_Prefer_No_VZEROUPPER
]
833 |= bit_arch_Prefer_No_VZEROUPPER
;
836 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
837 when ZMM load and store instructions are used. */
838 if (!CPU_FEATURES_CPU_P (cpu_features
, AVX_VNNI
))
839 cpu_features
->preferred
[index_arch_Prefer_No_AVX512
]
840 |= bit_arch_Prefer_No_AVX512
;
842 /* Avoid RTM abort triggered by VZEROUPPER inside a
843 transactionally executing RTM region. */
844 if (CPU_FEATURE_USABLE_P (cpu_features
, RTM
))
845 cpu_features
->preferred
[index_arch_Prefer_No_VZEROUPPER
]
846 |= bit_arch_Prefer_No_VZEROUPPER
;
849 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
850 if (CPU_FEATURES_CPU_P (cpu_features
, FSRM
))
851 cpu_features
->preferred
[index_arch_Avoid_Short_Distance_REP_MOVSB
]
852 |= bit_arch_Avoid_Short_Distance_REP_MOVSB
;
854 /* This spells out "AuthenticAMD" or "HygonGenuine". */
855 else if ((ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
856 || (ebx
== 0x6f677948 && ecx
== 0x656e6975 && edx
== 0x6e65476e))
858 unsigned int extended_model
;
860 kind
= arch_kind_amd
;
862 get_common_indices (cpu_features
, &family
, &model
, &extended_model
,
865 get_extended_indices (cpu_features
);
867 update_active (cpu_features
);
869 ecx
= cpu_features
->features
[CPUID_INDEX_1
].cpuid
.ecx
;
871 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX
))
873 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
874 FMA4 requires AVX, determine if FMA4 is usable here. */
875 CPU_FEATURE_SET_ACTIVE (cpu_features
, FMA4
);
881 if (model
>= 0x60 && model
<= 0x7f)
883 cpu_features
->preferred
[index_arch_Fast_Unaligned_Load
]
884 |= (bit_arch_Fast_Unaligned_Load
885 | bit_arch_Fast_Copy_Backward
);
887 /* Unaligned AVX loads are slower.*/
888 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
889 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
893 /* This spells out "CentaurHauls" or " Shanghai ". */
894 else if ((ebx
== 0x746e6543 && ecx
== 0x736c7561 && edx
== 0x48727561)
895 || (ebx
== 0x68532020 && ecx
== 0x20206961 && edx
== 0x68676e61))
897 unsigned int extended_model
, stepping
;
899 kind
= arch_kind_zhaoxin
;
901 get_common_indices (cpu_features
, &family
, &model
, &extended_model
,
904 get_extended_indices (cpu_features
);
906 update_active (cpu_features
);
908 model
+= extended_model
;
911 if (model
== 0xf || model
== 0x19)
913 CPU_FEATURE_UNSET (cpu_features
, AVX
);
914 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
916 cpu_features
->preferred
[index_arch_Slow_SSE4_2
]
917 |= bit_arch_Slow_SSE4_2
;
919 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
920 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
923 else if (family
== 0x7)
927 CPU_FEATURE_UNSET (cpu_features
, AVX
);
928 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
930 cpu_features
->preferred
[index_arch_Slow_SSE4_2
]
931 |= bit_arch_Slow_SSE4_2
;
933 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
934 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
936 else if (model
== 0x3b)
938 CPU_FEATURE_UNSET (cpu_features
, AVX
);
939 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
941 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
942 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
948 kind
= arch_kind_other
;
949 get_common_indices (cpu_features
, NULL
, NULL
, NULL
, NULL
);
950 update_active (cpu_features
);
953 /* Support i586 if CX8 is available. */
954 if (CPU_FEATURES_CPU_P (cpu_features
, CX8
))
955 cpu_features
->preferred
[index_arch_I586
] |= bit_arch_I586
;
957 /* Support i686 if CMOV is available. */
958 if (CPU_FEATURES_CPU_P (cpu_features
, CMOV
))
959 cpu_features
->preferred
[index_arch_I686
] |= bit_arch_I686
;
965 cpu_features
->basic
.kind
= kind
;
966 cpu_features
->basic
.family
= family
;
967 cpu_features
->basic
.model
= model
;
968 cpu_features
->basic
.stepping
= stepping
;
970 dl_init_cacheinfo (cpu_features
);
972 TUNABLE_GET (hwcaps
, tunable_val_t
*, TUNABLE_CALLBACK (set_hwcaps
));
975 TUNABLE_GET (prefer_map_32bit_exec
, tunable_val_t
*,
976 TUNABLE_CALLBACK (set_prefer_map_32bit_exec
));
979 bool disable_xsave_features
= false;
981 if (!CPU_FEATURE_USABLE_P (cpu_features
, OSXSAVE
))
983 /* These features are usable only if OSXSAVE is usable. */
984 CPU_FEATURE_UNSET (cpu_features
, XSAVE
);
985 CPU_FEATURE_UNSET (cpu_features
, XSAVEOPT
);
986 CPU_FEATURE_UNSET (cpu_features
, XSAVEC
);
987 CPU_FEATURE_UNSET (cpu_features
, XGETBV_ECX_1
);
988 CPU_FEATURE_UNSET (cpu_features
, XFD
);
990 disable_xsave_features
= true;
993 if (disable_xsave_features
994 || (!CPU_FEATURE_USABLE_P (cpu_features
, XSAVE
)
995 && !CPU_FEATURE_USABLE_P (cpu_features
, XSAVEC
)))
997 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
998 cpu_features
->xsave_state_size
= 0;
1000 CPU_FEATURE_UNSET (cpu_features
, AVX
);
1001 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
1002 CPU_FEATURE_UNSET (cpu_features
, AVX_VNNI
);
1003 CPU_FEATURE_UNSET (cpu_features
, FMA
);
1004 CPU_FEATURE_UNSET (cpu_features
, VAES
);
1005 CPU_FEATURE_UNSET (cpu_features
, VPCLMULQDQ
);
1006 CPU_FEATURE_UNSET (cpu_features
, XOP
);
1007 CPU_FEATURE_UNSET (cpu_features
, F16C
);
1008 CPU_FEATURE_UNSET (cpu_features
, AVX512F
);
1009 CPU_FEATURE_UNSET (cpu_features
, AVX512CD
);
1010 CPU_FEATURE_UNSET (cpu_features
, AVX512ER
);
1011 CPU_FEATURE_UNSET (cpu_features
, AVX512PF
);
1012 CPU_FEATURE_UNSET (cpu_features
, AVX512VL
);
1013 CPU_FEATURE_UNSET (cpu_features
, AVX512DQ
);
1014 CPU_FEATURE_UNSET (cpu_features
, AVX512BW
);
1015 CPU_FEATURE_UNSET (cpu_features
, AVX512_4FMAPS
);
1016 CPU_FEATURE_UNSET (cpu_features
, AVX512_4VNNIW
);
1017 CPU_FEATURE_UNSET (cpu_features
, AVX512_BITALG
);
1018 CPU_FEATURE_UNSET (cpu_features
, AVX512_IFMA
);
1019 CPU_FEATURE_UNSET (cpu_features
, AVX512_VBMI
);
1020 CPU_FEATURE_UNSET (cpu_features
, AVX512_VBMI2
);
1021 CPU_FEATURE_UNSET (cpu_features
, AVX512_VNNI
);
1022 CPU_FEATURE_UNSET (cpu_features
, AVX512_VPOPCNTDQ
);
1023 CPU_FEATURE_UNSET (cpu_features
, AVX512_VP2INTERSECT
);
1024 CPU_FEATURE_UNSET (cpu_features
, AVX512_BF16
);
1025 CPU_FEATURE_UNSET (cpu_features
, AVX512_FP16
);
1026 CPU_FEATURE_UNSET (cpu_features
, AMX_BF16
);
1027 CPU_FEATURE_UNSET (cpu_features
, AMX_TILE
);
1028 CPU_FEATURE_UNSET (cpu_features
, AMX_INT8
);
1030 CPU_FEATURE_UNSET (cpu_features
, FMA4
);
1034 GLRO(dl_hwcap
) = HWCAP_X86_64
;
1035 if (cpu_features
->basic
.kind
== arch_kind_intel
)
1037 const char *platform
= NULL
;
1039 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512CD
))
1041 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512ER
))
1043 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512PF
))
1044 platform
= "xeon_phi";
1048 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512BW
)
1049 && CPU_FEATURE_USABLE_P (cpu_features
, AVX512DQ
)
1050 && CPU_FEATURE_USABLE_P (cpu_features
, AVX512VL
))
1051 GLRO(dl_hwcap
) |= HWCAP_X86_AVX512_1
;
1055 if (platform
== NULL
1056 && CPU_FEATURE_USABLE_P (cpu_features
, AVX2
)
1057 && CPU_FEATURE_USABLE_P (cpu_features
, FMA
)
1058 && CPU_FEATURE_USABLE_P (cpu_features
, BMI1
)
1059 && CPU_FEATURE_USABLE_P (cpu_features
, BMI2
)
1060 && CPU_FEATURE_USABLE_P (cpu_features
, LZCNT
)
1061 && CPU_FEATURE_USABLE_P (cpu_features
, MOVBE
)
1062 && CPU_FEATURE_USABLE_P (cpu_features
, POPCNT
))
1063 platform
= "haswell";
1065 if (platform
!= NULL
)
1066 GLRO(dl_platform
) = platform
;
1070 if (CPU_FEATURE_USABLE_P (cpu_features
, SSE2
))
1071 GLRO(dl_hwcap
) |= HWCAP_X86_SSE2
;
1073 if (CPU_FEATURES_ARCH_P (cpu_features
, I686
))
1074 GLRO(dl_platform
) = "i686";
1075 else if (CPU_FEATURES_ARCH_P (cpu_features
, I586
))
1076 GLRO(dl_platform
) = "i586";
1080 TUNABLE_GET (x86_ibt
, tunable_val_t
*,
1081 TUNABLE_CALLBACK (set_x86_ibt
));
1082 TUNABLE_GET (x86_shstk
, tunable_val_t
*,
1083 TUNABLE_CALLBACK (set_x86_shstk
));
1085 /* Check CET status. */
1086 unsigned int cet_status
= get_cet_status ();
1088 if ((cet_status
& GNU_PROPERTY_X86_FEATURE_1_IBT
) == 0)
1089 CPU_FEATURE_UNSET (cpu_features
, IBT
)
1090 if ((cet_status
& GNU_PROPERTY_X86_FEATURE_1_SHSTK
) == 0)
1091 CPU_FEATURE_UNSET (cpu_features
, SHSTK
)
1095 GL(dl_x86_feature_1
) = cet_status
;
1098 /* Check if IBT and SHSTK are enabled by kernel. */
1099 if ((cet_status
& GNU_PROPERTY_X86_FEATURE_1_IBT
)
1100 || (cet_status
& GNU_PROPERTY_X86_FEATURE_1_SHSTK
))
1102 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
1103 disabled by environment variable:
1105 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
1107 unsigned int cet_feature
= 0;
1108 if (!CPU_FEATURE_USABLE (IBT
))
1109 cet_feature
|= GNU_PROPERTY_X86_FEATURE_1_IBT
;
1110 if (!CPU_FEATURE_USABLE (SHSTK
))
1111 cet_feature
|= GNU_PROPERTY_X86_FEATURE_1_SHSTK
;
1115 int res
= dl_cet_disable_cet (cet_feature
);
1117 /* Clear the disabled bits in dl_x86_feature_1. */
1119 GL(dl_x86_feature_1
) &= ~cet_feature
;
1122 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
1123 lock CET if IBT or SHSTK is enabled permissively. */
1124 if (GL(dl_x86_feature_control
).ibt
!= cet_permissive
1125 && GL(dl_x86_feature_control
).shstk
!= cet_permissive
)
1133 /* NB: In libc.a, call init_cacheinfo. */