hurd: fix build of tst-system.c
[glibc.git] / sysdeps / x86 / cpu-features.c
blob822688e21f77fa1ad5d0d43de89bf37d56081cd1
1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <dl-hwcap.h>
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
26 #if HAVE_TUNABLES
27 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
28 attribute_hidden;
30 # ifdef __LP64__
31 static void
32 TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
34 if (valp->numval)
35 GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
36 |= bit_arch_Prefer_MAP_32BIT_EXEC;
38 # endif
40 # if CET_ENABLED
41 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
42 attribute_hidden;
43 extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
44 attribute_hidden;
45 # endif
46 #endif
48 #if CET_ENABLED
49 # include <dl-cet.h>
50 #endif
52 static void
53 update_active (struct cpu_features *cpu_features)
55 /* Copy the cpuid bits to active bits for CPU featuress whose usability
56 in user space can be detected without additonal OS support. */
57 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
58 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
59 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
60 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
61 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
62 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
63 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
64 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
65 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
66 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
67 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
68 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
69 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
70 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
71 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
72 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
73 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
74 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
75 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
76 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
77 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
78 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
79 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
80 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
81 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
82 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
83 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
84 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
85 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
86 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
87 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
88 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
89 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
90 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
91 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
92 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
93 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
94 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
99 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
100 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
101 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
102 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
103 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
104 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
106 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
107 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
108 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
110 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
111 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
113 #if CET_ENABLED
114 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
115 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
116 #endif
118 /* Can we call xgetbv? */
119 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
121 unsigned int xcrlow;
122 unsigned int xcrhigh;
123 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
124 /* Is YMM and XMM state usable? */
125 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
126 == (bit_YMM_state | bit_XMM_state))
128 /* Determine if AVX is usable. */
129 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
131 CPU_FEATURE_SET (cpu_features, AVX);
132 /* The following features depend on AVX being usable. */
133 /* Determine if AVX2 is usable. */
134 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
136 CPU_FEATURE_SET (cpu_features, AVX2);
138 /* Unaligned load with 256-bit AVX registers are faster
139 on Intel/AMD processors with AVX2. */
140 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
141 |= bit_arch_AVX_Fast_Unaligned_Load;
143 /* Determine if AVX-VNNI is usable. */
144 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
145 /* Determine if FMA is usable. */
146 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
147 /* Determine if VAES is usable. */
148 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
149 /* Determine if VPCLMULQDQ is usable. */
150 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
151 /* Determine if XOP is usable. */
152 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
153 /* Determine if F16C is usable. */
154 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
157 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
158 ZMM16-ZMM31 state are enabled. */
159 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
160 | bit_ZMM16_31_state))
161 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
163 /* Determine if AVX512F is usable. */
164 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
166 CPU_FEATURE_SET (cpu_features, AVX512F);
167 /* Determine if AVX512CD is usable. */
168 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
169 /* Determine if AVX512ER is usable. */
170 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
171 /* Determine if AVX512PF is usable. */
172 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
173 /* Determine if AVX512VL is usable. */
174 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
175 /* Determine if AVX512DQ is usable. */
176 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
177 /* Determine if AVX512BW is usable. */
178 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
179 /* Determine if AVX512_4FMAPS is usable. */
180 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
181 /* Determine if AVX512_4VNNIW is usable. */
182 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
183 /* Determine if AVX512_BITALG is usable. */
184 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
185 /* Determine if AVX512_IFMA is usable. */
186 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
187 /* Determine if AVX512_VBMI is usable. */
188 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
189 /* Determine if AVX512_VBMI2 is usable. */
190 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
191 /* Determine if is AVX512_VNNI usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
193 /* Determine if AVX512_VPOPCNTDQ is usable. */
194 CPU_FEATURE_SET_ACTIVE (cpu_features,
195 AVX512_VPOPCNTDQ);
196 /* Determine if AVX512_VP2INTERSECT is usable. */
197 CPU_FEATURE_SET_ACTIVE (cpu_features,
198 AVX512_VP2INTERSECT);
199 /* Determine if AVX512_BF16 is usable. */
200 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
201 /* Determine if AVX512_FP16 is usable. */
202 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
207 /* Are XTILECFG and XTILEDATA states usable? */
208 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
209 == (bit_XTILECFG_state | bit_XTILEDATA_state))
211 /* Determine if AMX_BF16 is usable. */
212 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
213 /* Determine if AMX_TILE is usable. */
214 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
215 /* Determine if AMX_INT8 is usable. */
216 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
219 /* These features are usable only when OSXSAVE is enabled. */
220 CPU_FEATURE_SET (cpu_features, XSAVE);
221 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
222 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
223 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
224 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
226 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
227 size + integer register save size and align it to 64 bytes. */
228 if (cpu_features->basic.max_cpuid >= 0xd)
230 unsigned int eax, ebx, ecx, edx;
232 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
233 if (ebx != 0)
235 unsigned int xsave_state_full_size
236 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
238 cpu_features->xsave_state_size
239 = xsave_state_full_size;
240 cpu_features->xsave_state_full_size
241 = xsave_state_full_size;
243 /* Check if XSAVEC is available. */
244 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
246 unsigned int xstate_comp_offsets[32];
247 unsigned int xstate_comp_sizes[32];
248 unsigned int i;
250 xstate_comp_offsets[0] = 0;
251 xstate_comp_offsets[1] = 160;
252 xstate_comp_offsets[2] = 576;
253 xstate_comp_sizes[0] = 160;
254 xstate_comp_sizes[1] = 256;
256 for (i = 2; i < 32; i++)
258 if ((STATE_SAVE_MASK & (1 << i)) != 0)
260 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
261 xstate_comp_sizes[i] = eax;
263 else
265 ecx = 0;
266 xstate_comp_sizes[i] = 0;
269 if (i > 2)
271 xstate_comp_offsets[i]
272 = (xstate_comp_offsets[i - 1]
273 + xstate_comp_sizes[i -1]);
274 if ((ecx & (1 << 1)) != 0)
275 xstate_comp_offsets[i]
276 = ALIGN_UP (xstate_comp_offsets[i], 64);
280 /* Use XSAVEC. */
281 unsigned int size
282 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
283 if (size)
285 cpu_features->xsave_state_size
286 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
287 CPU_FEATURE_SET (cpu_features, XSAVEC);
294 /* Determine if PKU is usable. */
295 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
296 CPU_FEATURE_SET (cpu_features, PKU);
298 /* Determine if Key Locker instructions are usable. */
299 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
301 CPU_FEATURE_SET (cpu_features, AESKLE);
302 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
303 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
306 cpu_features->isa_1 = get_isa_level (cpu_features);
309 static void
310 get_extended_indices (struct cpu_features *cpu_features)
312 unsigned int eax, ebx, ecx, edx;
313 __cpuid (0x80000000, eax, ebx, ecx, edx);
314 if (eax >= 0x80000001)
315 __cpuid (0x80000001,
316 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
317 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
318 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
319 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
320 if (eax >= 0x80000007)
321 __cpuid (0x80000007,
322 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
323 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
324 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
325 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
326 if (eax >= 0x80000008)
327 __cpuid (0x80000008,
328 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
329 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
330 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
331 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
334 static void
335 get_common_indices (struct cpu_features *cpu_features,
336 unsigned int *family, unsigned int *model,
337 unsigned int *extended_model, unsigned int *stepping)
339 if (family)
341 unsigned int eax;
342 __cpuid (1, eax,
343 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
344 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
345 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
346 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
347 *family = (eax >> 8) & 0x0f;
348 *model = (eax >> 4) & 0x0f;
349 *extended_model = (eax >> 12) & 0xf0;
350 *stepping = eax & 0x0f;
351 if (*family == 0x0f)
353 *family += (eax >> 20) & 0xff;
354 *model += *extended_model;
358 if (cpu_features->basic.max_cpuid >= 7)
360 __cpuid_count (7, 0,
361 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
362 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
363 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
364 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
365 __cpuid_count (7, 1,
366 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
367 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
368 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
369 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
372 if (cpu_features->basic.max_cpuid >= 0xd)
373 __cpuid_count (0xd, 1,
374 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
375 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
376 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
377 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
379 if (cpu_features->basic.max_cpuid >= 0x14)
380 __cpuid_count (0x14, 0,
381 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
382 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
383 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
384 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
386 if (cpu_features->basic.max_cpuid >= 0x19)
387 __cpuid_count (0x19, 0,
388 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
389 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
390 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
391 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
393 dl_check_minsigstacksize (cpu_features);
396 _Static_assert (((index_arch_Fast_Unaligned_Load
397 == index_arch_Fast_Unaligned_Copy)
398 && (index_arch_Fast_Unaligned_Load
399 == index_arch_Prefer_PMINUB_for_stringop)
400 && (index_arch_Fast_Unaligned_Load
401 == index_arch_Slow_SSE4_2)
402 && (index_arch_Fast_Unaligned_Load
403 == index_arch_Fast_Rep_String)
404 && (index_arch_Fast_Unaligned_Load
405 == index_arch_Fast_Copy_Backward)),
406 "Incorrect index_arch_Fast_Unaligned_Load");
408 static inline void
409 init_cpu_features (struct cpu_features *cpu_features)
411 unsigned int ebx, ecx, edx;
412 unsigned int family = 0;
413 unsigned int model = 0;
414 unsigned int stepping = 0;
415 enum cpu_features_kind kind;
417 #if !HAS_CPUID
418 if (__get_cpuid_max (0, 0) == 0)
420 kind = arch_kind_other;
421 goto no_cpuid;
423 #endif
425 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
427 /* This spells out "GenuineIntel". */
428 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
430 unsigned int extended_model;
432 kind = arch_kind_intel;
434 get_common_indices (cpu_features, &family, &model, &extended_model,
435 &stepping);
437 get_extended_indices (cpu_features);
439 update_active (cpu_features);
441 if (family == 0x06)
443 model += extended_model;
444 switch (model)
446 case 0x1c:
447 case 0x26:
448 /* BSF is slow on Atom. */
449 cpu_features->preferred[index_arch_Slow_BSF]
450 |= bit_arch_Slow_BSF;
451 break;
453 case 0x57:
454 /* Knights Landing. Enable Silvermont optimizations. */
456 case 0x7a:
457 /* Unaligned load versions are faster than SSSE3
458 on Goldmont Plus. */
460 case 0x5c:
461 case 0x5f:
462 /* Unaligned load versions are faster than SSSE3
463 on Goldmont. */
465 case 0x4c:
466 case 0x5a:
467 case 0x75:
468 /* Airmont is a die shrink of Silvermont. */
470 case 0x37:
471 case 0x4a:
472 case 0x4d:
473 case 0x5d:
474 /* Unaligned load versions are faster than SSSE3
475 on Silvermont. */
476 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
477 |= (bit_arch_Fast_Unaligned_Load
478 | bit_arch_Fast_Unaligned_Copy
479 | bit_arch_Prefer_PMINUB_for_stringop
480 | bit_arch_Slow_SSE4_2);
481 break;
483 case 0x86:
484 case 0x96:
485 case 0x9c:
486 /* Enable rep string instructions, unaligned load, unaligned
487 copy, pminub and avoid SSE 4.2 on Tremont. */
488 cpu_features->preferred[index_arch_Fast_Rep_String]
489 |= (bit_arch_Fast_Rep_String
490 | bit_arch_Fast_Unaligned_Load
491 | bit_arch_Fast_Unaligned_Copy
492 | bit_arch_Prefer_PMINUB_for_stringop
493 | bit_arch_Slow_SSE4_2);
494 break;
496 default:
497 /* Unknown family 0x06 processors. Assuming this is one
498 of Core i3/i5/i7 processors if AVX is available. */
499 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
500 break;
501 /* Fall through. */
503 case 0x1a:
504 case 0x1e:
505 case 0x1f:
506 case 0x25:
507 case 0x2c:
508 case 0x2e:
509 case 0x2f:
510 /* Rep string instructions, unaligned load, unaligned copy,
511 and pminub are fast on Intel Core i3, i5 and i7. */
512 cpu_features->preferred[index_arch_Fast_Rep_String]
513 |= (bit_arch_Fast_Rep_String
514 | bit_arch_Fast_Unaligned_Load
515 | bit_arch_Fast_Unaligned_Copy
516 | bit_arch_Prefer_PMINUB_for_stringop);
517 break;
520 /* Disable TSX on some processors to avoid TSX on kernels that
521 weren't updated with the latest microcode package (which
522 disables broken feature by default). */
523 switch (model)
525 case 0x55:
526 if (stepping <= 5)
527 goto disable_tsx;
528 break;
529 case 0x8e:
530 /* NB: Although the errata documents that for model == 0x8e,
531 only 0xb stepping or lower are impacted, the intention of
532 the errata was to disable TSX on all client processors on
533 all steppings. Include 0xc stepping which is an Intel
534 Core i7-8665U, a client mobile processor. */
535 case 0x9e:
536 if (stepping > 0xc)
537 break;
538 /* Fall through. */
539 case 0x4e:
540 case 0x5e:
542 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
543 processors listed in:
545 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
547 disable_tsx:
548 CPU_FEATURE_UNSET (cpu_features, HLE);
549 CPU_FEATURE_UNSET (cpu_features, RTM);
550 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
552 break;
553 case 0x3f:
554 /* Xeon E7 v3 with stepping >= 4 has working TSX. */
555 if (stepping >= 4)
556 break;
557 /* Fall through. */
558 case 0x3c:
559 case 0x45:
560 case 0x46:
561 /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
562 with stepping >= 4) to avoid TSX on kernels that weren't
563 updated with the latest microcode package (which disables
564 broken feature by default). */
565 CPU_FEATURE_UNSET (cpu_features, RTM);
566 break;
571 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
572 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
573 frequency if AVX512ER isn't available. */
574 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
575 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
576 |= bit_arch_Prefer_No_VZEROUPPER;
577 else
579 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
580 when ZMM load and store instructions are used. */
581 if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
582 cpu_features->preferred[index_arch_Prefer_No_AVX512]
583 |= bit_arch_Prefer_No_AVX512;
585 /* Avoid RTM abort triggered by VZEROUPPER inside a
586 transactionally executing RTM region. */
587 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
588 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
589 |= bit_arch_Prefer_No_VZEROUPPER;
592 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
593 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
594 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
595 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
597 /* This spells out "AuthenticAMD" or "HygonGenuine". */
598 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
599 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
601 unsigned int extended_model;
603 kind = arch_kind_amd;
605 get_common_indices (cpu_features, &family, &model, &extended_model,
606 &stepping);
608 get_extended_indices (cpu_features);
610 update_active (cpu_features);
612 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
614 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
616 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
617 FMA4 requires AVX, determine if FMA4 is usable here. */
618 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
621 if (family == 0x15)
623 /* "Excavator" */
624 if (model >= 0x60 && model <= 0x7f)
626 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
627 |= (bit_arch_Fast_Unaligned_Load
628 | bit_arch_Fast_Copy_Backward);
630 /* Unaligned AVX loads are slower.*/
631 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
632 &= ~bit_arch_AVX_Fast_Unaligned_Load;
636 /* This spells out "CentaurHauls" or " Shanghai ". */
637 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
638 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
640 unsigned int extended_model, stepping;
642 kind = arch_kind_zhaoxin;
644 get_common_indices (cpu_features, &family, &model, &extended_model,
645 &stepping);
647 get_extended_indices (cpu_features);
649 update_active (cpu_features);
651 model += extended_model;
652 if (family == 0x6)
654 if (model == 0xf || model == 0x19)
656 CPU_FEATURE_UNSET (cpu_features, AVX);
657 CPU_FEATURE_UNSET (cpu_features, AVX2);
659 cpu_features->preferred[index_arch_Slow_SSE4_2]
660 |= bit_arch_Slow_SSE4_2;
662 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
663 &= ~bit_arch_AVX_Fast_Unaligned_Load;
666 else if (family == 0x7)
668 if (model == 0x1b)
670 CPU_FEATURE_UNSET (cpu_features, AVX);
671 CPU_FEATURE_UNSET (cpu_features, AVX2);
673 cpu_features->preferred[index_arch_Slow_SSE4_2]
674 |= bit_arch_Slow_SSE4_2;
676 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
677 &= ~bit_arch_AVX_Fast_Unaligned_Load;
679 else if (model == 0x3b)
681 CPU_FEATURE_UNSET (cpu_features, AVX);
682 CPU_FEATURE_UNSET (cpu_features, AVX2);
684 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
685 &= ~bit_arch_AVX_Fast_Unaligned_Load;
689 else
691 kind = arch_kind_other;
692 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
693 update_active (cpu_features);
696 /* Support i586 if CX8 is available. */
697 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
698 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
700 /* Support i686 if CMOV is available. */
701 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
702 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
704 #if !HAS_CPUID
705 no_cpuid:
706 #endif
708 cpu_features->basic.kind = kind;
709 cpu_features->basic.family = family;
710 cpu_features->basic.model = model;
711 cpu_features->basic.stepping = stepping;
713 dl_init_cacheinfo (cpu_features);
715 #if HAVE_TUNABLES
716 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
718 # ifdef __LP64__
719 TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
720 TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
721 # endif
723 bool disable_xsave_features = false;
725 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
727 /* These features are usable only if OSXSAVE is usable. */
728 CPU_FEATURE_UNSET (cpu_features, XSAVE);
729 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
730 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
731 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
732 CPU_FEATURE_UNSET (cpu_features, XFD);
734 disable_xsave_features = true;
737 if (disable_xsave_features
738 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
739 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
741 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
742 cpu_features->xsave_state_size = 0;
744 CPU_FEATURE_UNSET (cpu_features, AVX);
745 CPU_FEATURE_UNSET (cpu_features, AVX2);
746 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
747 CPU_FEATURE_UNSET (cpu_features, FMA);
748 CPU_FEATURE_UNSET (cpu_features, VAES);
749 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
750 CPU_FEATURE_UNSET (cpu_features, XOP);
751 CPU_FEATURE_UNSET (cpu_features, F16C);
752 CPU_FEATURE_UNSET (cpu_features, AVX512F);
753 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
754 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
755 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
756 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
757 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
758 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
759 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
760 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
761 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
762 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
763 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
764 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
765 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
766 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
767 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
768 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
769 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
770 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
771 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
772 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
774 CPU_FEATURE_UNSET (cpu_features, FMA4);
777 #elif defined SHARED
778 /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. The
779 glibc.cpu.hwcap_mask tunable is initialized already, so no
780 need to do this. */
781 GLRO(dl_hwcap_mask) = HWCAP_IMPORTANT;
782 #endif
784 #ifdef __x86_64__
785 GLRO(dl_hwcap) = HWCAP_X86_64;
786 if (cpu_features->basic.kind == arch_kind_intel)
788 const char *platform = NULL;
790 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
792 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
794 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
795 platform = "xeon_phi";
797 else
799 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
800 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
801 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
802 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
806 if (platform == NULL
807 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
808 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
809 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
810 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
811 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
812 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
813 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
814 platform = "haswell";
816 if (platform != NULL)
817 GLRO(dl_platform) = platform;
819 #else
820 GLRO(dl_hwcap) = 0;
821 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
822 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
824 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
825 GLRO(dl_platform) = "i686";
826 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
827 GLRO(dl_platform) = "i586";
828 #endif
830 #if CET_ENABLED
831 # if HAVE_TUNABLES
832 TUNABLE_GET (x86_ibt, tunable_val_t *,
833 TUNABLE_CALLBACK (set_x86_ibt));
834 TUNABLE_GET (x86_shstk, tunable_val_t *,
835 TUNABLE_CALLBACK (set_x86_shstk));
836 # endif
838 /* Check CET status. */
839 unsigned int cet_status = get_cet_status ();
841 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
842 CPU_FEATURE_UNSET (cpu_features, IBT)
843 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
844 CPU_FEATURE_UNSET (cpu_features, SHSTK)
846 if (cet_status)
848 GL(dl_x86_feature_1) = cet_status;
850 # ifndef SHARED
851 /* Check if IBT and SHSTK are enabled by kernel. */
852 if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT)
853 || (cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK))
855 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
856 disabled by environment variable:
858 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
860 unsigned int cet_feature = 0;
861 if (!CPU_FEATURE_USABLE (IBT))
862 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
863 if (!CPU_FEATURE_USABLE (SHSTK))
864 cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
866 if (cet_feature)
868 int res = dl_cet_disable_cet (cet_feature);
870 /* Clear the disabled bits in dl_x86_feature_1. */
871 if (res == 0)
872 GL(dl_x86_feature_1) &= ~cet_feature;
875 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
876 lock CET if IBT or SHSTK is enabled permissively. */
877 if (GL(dl_x86_feature_control).ibt != cet_permissive
878 && GL(dl_x86_feature_control).shstk != cet_permissive)
879 dl_cet_lock_cet ();
881 # endif
883 #endif
885 #ifndef SHARED
886 /* NB: In libc.a, call init_cacheinfo. */
887 init_cacheinfo ();
888 #endif