1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include <libc-pointer-arith.h>
21 #include <get-isa-level.h>
22 #include <cacheinfo.h>
23 #include <dl-cacheinfo.h>
24 #include <dl-minsigstacksize.h>
27 extern void TUNABLE_CALLBACK (set_hwcaps
) (tunable_val_t
*)
32 TUNABLE_CALLBACK (set_prefer_map_32bit_exec
) (tunable_val_t
*valp
)
35 GLRO(dl_x86_cpu_features
).preferred
[index_arch_Prefer_MAP_32BIT_EXEC
]
36 |= bit_arch_Prefer_MAP_32BIT_EXEC
;
41 extern void TUNABLE_CALLBACK (set_x86_ibt
) (tunable_val_t
*)
43 extern void TUNABLE_CALLBACK (set_x86_shstk
) (tunable_val_t
*)
53 update_active (struct cpu_features
*cpu_features
)
55 /* Copy the cpuid bits to active bits for CPU featuress whose usability
56 in user space can be detected without additonal OS support. */
57 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE3
);
58 CPU_FEATURE_SET_ACTIVE (cpu_features
, PCLMULQDQ
);
59 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSSE3
);
60 CPU_FEATURE_SET_ACTIVE (cpu_features
, CMPXCHG16B
);
61 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE4_1
);
62 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE4_2
);
63 CPU_FEATURE_SET_ACTIVE (cpu_features
, MOVBE
);
64 CPU_FEATURE_SET_ACTIVE (cpu_features
, POPCNT
);
65 CPU_FEATURE_SET_ACTIVE (cpu_features
, AES
);
66 CPU_FEATURE_SET_ACTIVE (cpu_features
, OSXSAVE
);
67 CPU_FEATURE_SET_ACTIVE (cpu_features
, TSC
);
68 CPU_FEATURE_SET_ACTIVE (cpu_features
, CX8
);
69 CPU_FEATURE_SET_ACTIVE (cpu_features
, CMOV
);
70 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLFSH
);
71 CPU_FEATURE_SET_ACTIVE (cpu_features
, MMX
);
72 CPU_FEATURE_SET_ACTIVE (cpu_features
, FXSR
);
73 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE
);
74 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE2
);
75 CPU_FEATURE_SET_ACTIVE (cpu_features
, HTT
);
76 CPU_FEATURE_SET_ACTIVE (cpu_features
, BMI1
);
77 CPU_FEATURE_SET_ACTIVE (cpu_features
, HLE
);
78 CPU_FEATURE_SET_ACTIVE (cpu_features
, BMI2
);
79 CPU_FEATURE_SET_ACTIVE (cpu_features
, ERMS
);
80 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDSEED
);
81 CPU_FEATURE_SET_ACTIVE (cpu_features
, ADX
);
82 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLFLUSHOPT
);
83 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLWB
);
84 CPU_FEATURE_SET_ACTIVE (cpu_features
, SHA
);
85 CPU_FEATURE_SET_ACTIVE (cpu_features
, PREFETCHWT1
);
86 CPU_FEATURE_SET_ACTIVE (cpu_features
, OSPKE
);
87 CPU_FEATURE_SET_ACTIVE (cpu_features
, WAITPKG
);
88 CPU_FEATURE_SET_ACTIVE (cpu_features
, GFNI
);
89 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDPID
);
90 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDRAND
);
91 CPU_FEATURE_SET_ACTIVE (cpu_features
, CLDEMOTE
);
92 CPU_FEATURE_SET_ACTIVE (cpu_features
, MOVDIRI
);
93 CPU_FEATURE_SET_ACTIVE (cpu_features
, MOVDIR64B
);
94 CPU_FEATURE_SET_ACTIVE (cpu_features
, FSRM
);
95 CPU_FEATURE_SET_ACTIVE (cpu_features
, RTM_ALWAYS_ABORT
);
96 CPU_FEATURE_SET_ACTIVE (cpu_features
, SERIALIZE
);
97 CPU_FEATURE_SET_ACTIVE (cpu_features
, TSXLDTRK
);
98 CPU_FEATURE_SET_ACTIVE (cpu_features
, LAHF64_SAHF64
);
99 CPU_FEATURE_SET_ACTIVE (cpu_features
, LZCNT
);
100 CPU_FEATURE_SET_ACTIVE (cpu_features
, SSE4A
);
101 CPU_FEATURE_SET_ACTIVE (cpu_features
, PREFETCHW
);
102 CPU_FEATURE_SET_ACTIVE (cpu_features
, TBM
);
103 CPU_FEATURE_SET_ACTIVE (cpu_features
, RDTSCP
);
104 CPU_FEATURE_SET_ACTIVE (cpu_features
, WBNOINVD
);
105 CPU_FEATURE_SET_ACTIVE (cpu_features
, FZLRM
);
106 CPU_FEATURE_SET_ACTIVE (cpu_features
, FSRS
);
107 CPU_FEATURE_SET_ACTIVE (cpu_features
, FSRCS
);
108 CPU_FEATURE_SET_ACTIVE (cpu_features
, PTWRITE
);
110 if (!CPU_FEATURES_CPU_P (cpu_features
, RTM_ALWAYS_ABORT
))
111 CPU_FEATURE_SET_ACTIVE (cpu_features
, RTM
);
114 CPU_FEATURE_SET_ACTIVE (cpu_features
, IBT
);
115 CPU_FEATURE_SET_ACTIVE (cpu_features
, SHSTK
);
118 /* Can we call xgetbv? */
119 if (CPU_FEATURES_CPU_P (cpu_features
, OSXSAVE
))
122 unsigned int xcrhigh
;
123 asm ("xgetbv" : "=a" (xcrlow
), "=d" (xcrhigh
) : "c" (0));
124 /* Is YMM and XMM state usable? */
125 if ((xcrlow
& (bit_YMM_state
| bit_XMM_state
))
126 == (bit_YMM_state
| bit_XMM_state
))
128 /* Determine if AVX is usable. */
129 if (CPU_FEATURES_CPU_P (cpu_features
, AVX
))
131 CPU_FEATURE_SET (cpu_features
, AVX
);
132 /* The following features depend on AVX being usable. */
133 /* Determine if AVX2 is usable. */
134 if (CPU_FEATURES_CPU_P (cpu_features
, AVX2
))
136 CPU_FEATURE_SET (cpu_features
, AVX2
);
138 /* Unaligned load with 256-bit AVX registers are faster
139 on Intel/AMD processors with AVX2. */
140 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
141 |= bit_arch_AVX_Fast_Unaligned_Load
;
143 /* Determine if AVX-VNNI is usable. */
144 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX_VNNI
);
145 /* Determine if FMA is usable. */
146 CPU_FEATURE_SET_ACTIVE (cpu_features
, FMA
);
147 /* Determine if VAES is usable. */
148 CPU_FEATURE_SET_ACTIVE (cpu_features
, VAES
);
149 /* Determine if VPCLMULQDQ is usable. */
150 CPU_FEATURE_SET_ACTIVE (cpu_features
, VPCLMULQDQ
);
151 /* Determine if XOP is usable. */
152 CPU_FEATURE_SET_ACTIVE (cpu_features
, XOP
);
153 /* Determine if F16C is usable. */
154 CPU_FEATURE_SET_ACTIVE (cpu_features
, F16C
);
157 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
158 ZMM16-ZMM31 state are enabled. */
159 if ((xcrlow
& (bit_Opmask_state
| bit_ZMM0_15_state
160 | bit_ZMM16_31_state
))
161 == (bit_Opmask_state
| bit_ZMM0_15_state
| bit_ZMM16_31_state
))
163 /* Determine if AVX512F is usable. */
164 if (CPU_FEATURES_CPU_P (cpu_features
, AVX512F
))
166 CPU_FEATURE_SET (cpu_features
, AVX512F
);
167 /* Determine if AVX512CD is usable. */
168 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512CD
);
169 /* Determine if AVX512ER is usable. */
170 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512ER
);
171 /* Determine if AVX512PF is usable. */
172 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512PF
);
173 /* Determine if AVX512VL is usable. */
174 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512VL
);
175 /* Determine if AVX512DQ is usable. */
176 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512DQ
);
177 /* Determine if AVX512BW is usable. */
178 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512BW
);
179 /* Determine if AVX512_4FMAPS is usable. */
180 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_4FMAPS
);
181 /* Determine if AVX512_4VNNIW is usable. */
182 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_4VNNIW
);
183 /* Determine if AVX512_BITALG is usable. */
184 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_BITALG
);
185 /* Determine if AVX512_IFMA is usable. */
186 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_IFMA
);
187 /* Determine if AVX512_VBMI is usable. */
188 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_VBMI
);
189 /* Determine if AVX512_VBMI2 is usable. */
190 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_VBMI2
);
191 /* Determine if is AVX512_VNNI usable. */
192 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_VNNI
);
193 /* Determine if AVX512_VPOPCNTDQ is usable. */
194 CPU_FEATURE_SET_ACTIVE (cpu_features
,
196 /* Determine if AVX512_VP2INTERSECT is usable. */
197 CPU_FEATURE_SET_ACTIVE (cpu_features
,
198 AVX512_VP2INTERSECT
);
199 /* Determine if AVX512_BF16 is usable. */
200 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_BF16
);
201 /* Determine if AVX512_FP16 is usable. */
202 CPU_FEATURE_SET_ACTIVE (cpu_features
, AVX512_FP16
);
207 /* Are XTILECFG and XTILEDATA states usable? */
208 if ((xcrlow
& (bit_XTILECFG_state
| bit_XTILEDATA_state
))
209 == (bit_XTILECFG_state
| bit_XTILEDATA_state
))
211 /* Determine if AMX_BF16 is usable. */
212 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_BF16
);
213 /* Determine if AMX_TILE is usable. */
214 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_TILE
);
215 /* Determine if AMX_INT8 is usable. */
216 CPU_FEATURE_SET_ACTIVE (cpu_features
, AMX_INT8
);
219 /* These features are usable only when OSXSAVE is enabled. */
220 CPU_FEATURE_SET (cpu_features
, XSAVE
);
221 CPU_FEATURE_SET_ACTIVE (cpu_features
, XSAVEOPT
);
222 CPU_FEATURE_SET_ACTIVE (cpu_features
, XSAVEC
);
223 CPU_FEATURE_SET_ACTIVE (cpu_features
, XGETBV_ECX_1
);
224 CPU_FEATURE_SET_ACTIVE (cpu_features
, XFD
);
226 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
227 size + integer register save size and align it to 64 bytes. */
228 if (cpu_features
->basic
.max_cpuid
>= 0xd)
230 unsigned int eax
, ebx
, ecx
, edx
;
232 __cpuid_count (0xd, 0, eax
, ebx
, ecx
, edx
);
235 unsigned int xsave_state_full_size
236 = ALIGN_UP (ebx
+ STATE_SAVE_OFFSET
, 64);
238 cpu_features
->xsave_state_size
239 = xsave_state_full_size
;
240 cpu_features
->xsave_state_full_size
241 = xsave_state_full_size
;
243 /* Check if XSAVEC is available. */
244 if (CPU_FEATURES_CPU_P (cpu_features
, XSAVEC
))
246 unsigned int xstate_comp_offsets
[32];
247 unsigned int xstate_comp_sizes
[32];
250 xstate_comp_offsets
[0] = 0;
251 xstate_comp_offsets
[1] = 160;
252 xstate_comp_offsets
[2] = 576;
253 xstate_comp_sizes
[0] = 160;
254 xstate_comp_sizes
[1] = 256;
256 for (i
= 2; i
< 32; i
++)
258 if ((STATE_SAVE_MASK
& (1 << i
)) != 0)
260 __cpuid_count (0xd, i
, eax
, ebx
, ecx
, edx
);
261 xstate_comp_sizes
[i
] = eax
;
266 xstate_comp_sizes
[i
] = 0;
271 xstate_comp_offsets
[i
]
272 = (xstate_comp_offsets
[i
- 1]
273 + xstate_comp_sizes
[i
-1]);
274 if ((ecx
& (1 << 1)) != 0)
275 xstate_comp_offsets
[i
]
276 = ALIGN_UP (xstate_comp_offsets
[i
], 64);
282 = xstate_comp_offsets
[31] + xstate_comp_sizes
[31];
285 cpu_features
->xsave_state_size
286 = ALIGN_UP (size
+ STATE_SAVE_OFFSET
, 64);
287 CPU_FEATURE_SET (cpu_features
, XSAVEC
);
294 /* Determine if PKU is usable. */
295 if (CPU_FEATURES_CPU_P (cpu_features
, OSPKE
))
296 CPU_FEATURE_SET (cpu_features
, PKU
);
298 /* Determine if Key Locker instructions are usable. */
299 if (CPU_FEATURES_CPU_P (cpu_features
, AESKLE
))
301 CPU_FEATURE_SET (cpu_features
, AESKLE
);
302 CPU_FEATURE_SET_ACTIVE (cpu_features
, KL
);
303 CPU_FEATURE_SET_ACTIVE (cpu_features
, WIDE_KL
);
306 cpu_features
->isa_1
= get_isa_level (cpu_features
);
310 get_extended_indices (struct cpu_features
*cpu_features
)
312 unsigned int eax
, ebx
, ecx
, edx
;
313 __cpuid (0x80000000, eax
, ebx
, ecx
, edx
);
314 if (eax
>= 0x80000001)
316 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.eax
,
317 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.ebx
,
318 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.ecx
,
319 cpu_features
->features
[CPUID_INDEX_80000001
].cpuid
.edx
);
320 if (eax
>= 0x80000007)
322 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.eax
,
323 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.ebx
,
324 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.ecx
,
325 cpu_features
->features
[CPUID_INDEX_80000007
].cpuid
.edx
);
326 if (eax
>= 0x80000008)
328 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.eax
,
329 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.ebx
,
330 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.ecx
,
331 cpu_features
->features
[CPUID_INDEX_80000008
].cpuid
.edx
);
335 get_common_indices (struct cpu_features
*cpu_features
,
336 unsigned int *family
, unsigned int *model
,
337 unsigned int *extended_model
, unsigned int *stepping
)
343 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.ebx
,
344 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.ecx
,
345 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.edx
);
346 cpu_features
->features
[CPUID_INDEX_1
].cpuid
.eax
= eax
;
347 *family
= (eax
>> 8) & 0x0f;
348 *model
= (eax
>> 4) & 0x0f;
349 *extended_model
= (eax
>> 12) & 0xf0;
350 *stepping
= eax
& 0x0f;
353 *family
+= (eax
>> 20) & 0xff;
354 *model
+= *extended_model
;
358 if (cpu_features
->basic
.max_cpuid
>= 7)
361 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.eax
,
362 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.ebx
,
363 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.ecx
,
364 cpu_features
->features
[CPUID_INDEX_7
].cpuid
.edx
);
366 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.eax
,
367 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.ebx
,
368 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.ecx
,
369 cpu_features
->features
[CPUID_INDEX_7_ECX_1
].cpuid
.edx
);
372 if (cpu_features
->basic
.max_cpuid
>= 0xd)
373 __cpuid_count (0xd, 1,
374 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.eax
,
375 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.ebx
,
376 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.ecx
,
377 cpu_features
->features
[CPUID_INDEX_D_ECX_1
].cpuid
.edx
);
379 if (cpu_features
->basic
.max_cpuid
>= 0x14)
380 __cpuid_count (0x14, 0,
381 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.eax
,
382 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.ebx
,
383 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.ecx
,
384 cpu_features
->features
[CPUID_INDEX_14_ECX_0
].cpuid
.edx
);
386 if (cpu_features
->basic
.max_cpuid
>= 0x19)
387 __cpuid_count (0x19, 0,
388 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.eax
,
389 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.ebx
,
390 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.ecx
,
391 cpu_features
->features
[CPUID_INDEX_19
].cpuid
.edx
);
393 dl_check_minsigstacksize (cpu_features
);
396 _Static_assert (((index_arch_Fast_Unaligned_Load
397 == index_arch_Fast_Unaligned_Copy
)
398 && (index_arch_Fast_Unaligned_Load
399 == index_arch_Prefer_PMINUB_for_stringop
)
400 && (index_arch_Fast_Unaligned_Load
401 == index_arch_Slow_SSE4_2
)
402 && (index_arch_Fast_Unaligned_Load
403 == index_arch_Fast_Rep_String
)
404 && (index_arch_Fast_Unaligned_Load
405 == index_arch_Fast_Copy_Backward
)),
406 "Incorrect index_arch_Fast_Unaligned_Load");
409 init_cpu_features (struct cpu_features
*cpu_features
)
411 unsigned int ebx
, ecx
, edx
;
412 unsigned int family
= 0;
413 unsigned int model
= 0;
414 unsigned int stepping
= 0;
415 enum cpu_features_kind kind
;
418 if (__get_cpuid_max (0, 0) == 0)
420 kind
= arch_kind_other
;
425 __cpuid (0, cpu_features
->basic
.max_cpuid
, ebx
, ecx
, edx
);
427 /* This spells out "GenuineIntel". */
428 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
430 unsigned int extended_model
;
432 kind
= arch_kind_intel
;
434 get_common_indices (cpu_features
, &family
, &model
, &extended_model
,
437 get_extended_indices (cpu_features
);
439 update_active (cpu_features
);
443 model
+= extended_model
;
448 /* BSF is slow on Atom. */
449 cpu_features
->preferred
[index_arch_Slow_BSF
]
450 |= bit_arch_Slow_BSF
;
454 /* Knights Landing. Enable Silvermont optimizations. */
457 /* Unaligned load versions are faster than SSSE3
462 /* Unaligned load versions are faster than SSSE3
468 /* Airmont is a die shrink of Silvermont. */
474 /* Unaligned load versions are faster than SSSE3
476 cpu_features
->preferred
[index_arch_Fast_Unaligned_Load
]
477 |= (bit_arch_Fast_Unaligned_Load
478 | bit_arch_Fast_Unaligned_Copy
479 | bit_arch_Prefer_PMINUB_for_stringop
480 | bit_arch_Slow_SSE4_2
);
486 /* Enable rep string instructions, unaligned load, unaligned
487 copy, pminub and avoid SSE 4.2 on Tremont. */
488 cpu_features
->preferred
[index_arch_Fast_Rep_String
]
489 |= (bit_arch_Fast_Rep_String
490 | bit_arch_Fast_Unaligned_Load
491 | bit_arch_Fast_Unaligned_Copy
492 | bit_arch_Prefer_PMINUB_for_stringop
493 | bit_arch_Slow_SSE4_2
);
497 /* Unknown family 0x06 processors. Assuming this is one
498 of Core i3/i5/i7 processors if AVX is available. */
499 if (!CPU_FEATURES_CPU_P (cpu_features
, AVX
))
510 /* Rep string instructions, unaligned load, unaligned copy,
511 and pminub are fast on Intel Core i3, i5 and i7. */
512 cpu_features
->preferred
[index_arch_Fast_Rep_String
]
513 |= (bit_arch_Fast_Rep_String
514 | bit_arch_Fast_Unaligned_Load
515 | bit_arch_Fast_Unaligned_Copy
516 | bit_arch_Prefer_PMINUB_for_stringop
);
520 /* Disable TSX on some processors to avoid TSX on kernels that
521 weren't updated with the latest microcode package (which
522 disables broken feature by default). */
530 /* NB: Although the errata documents that for model == 0x8e,
531 only 0xb stepping or lower are impacted, the intention of
532 the errata was to disable TSX on all client processors on
533 all steppings. Include 0xc stepping which is an Intel
534 Core i7-8665U, a client mobile processor. */
542 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
543 processors listed in:
545 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
548 CPU_FEATURE_UNSET (cpu_features
, HLE
);
549 CPU_FEATURE_UNSET (cpu_features
, RTM
);
550 CPU_FEATURE_SET (cpu_features
, RTM_ALWAYS_ABORT
);
554 /* Xeon E7 v3 with stepping >= 4 has working TSX. */
561 /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
562 with stepping >= 4) to avoid TSX on kernels that weren't
563 updated with the latest microcode package (which disables
564 broken feature by default). */
565 CPU_FEATURE_UNSET (cpu_features
, RTM
);
571 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
572 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
573 frequency if AVX512ER isn't available. */
574 if (CPU_FEATURES_CPU_P (cpu_features
, AVX512ER
))
575 cpu_features
->preferred
[index_arch_Prefer_No_VZEROUPPER
]
576 |= bit_arch_Prefer_No_VZEROUPPER
;
579 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
580 when ZMM load and store instructions are used. */
581 if (!CPU_FEATURES_CPU_P (cpu_features
, AVX_VNNI
))
582 cpu_features
->preferred
[index_arch_Prefer_No_AVX512
]
583 |= bit_arch_Prefer_No_AVX512
;
585 /* Avoid RTM abort triggered by VZEROUPPER inside a
586 transactionally executing RTM region. */
587 if (CPU_FEATURE_USABLE_P (cpu_features
, RTM
))
588 cpu_features
->preferred
[index_arch_Prefer_No_VZEROUPPER
]
589 |= bit_arch_Prefer_No_VZEROUPPER
;
592 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
593 if (CPU_FEATURES_CPU_P (cpu_features
, FSRM
))
594 cpu_features
->preferred
[index_arch_Avoid_Short_Distance_REP_MOVSB
]
595 |= bit_arch_Avoid_Short_Distance_REP_MOVSB
;
597 /* This spells out "AuthenticAMD" or "HygonGenuine". */
598 else if ((ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
599 || (ebx
== 0x6f677948 && ecx
== 0x656e6975 && edx
== 0x6e65476e))
601 unsigned int extended_model
;
603 kind
= arch_kind_amd
;
605 get_common_indices (cpu_features
, &family
, &model
, &extended_model
,
608 get_extended_indices (cpu_features
);
610 update_active (cpu_features
);
612 ecx
= cpu_features
->features
[CPUID_INDEX_1
].cpuid
.ecx
;
614 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX
))
616 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
617 FMA4 requires AVX, determine if FMA4 is usable here. */
618 CPU_FEATURE_SET_ACTIVE (cpu_features
, FMA4
);
624 if (model
>= 0x60 && model
<= 0x7f)
626 cpu_features
->preferred
[index_arch_Fast_Unaligned_Load
]
627 |= (bit_arch_Fast_Unaligned_Load
628 | bit_arch_Fast_Copy_Backward
);
630 /* Unaligned AVX loads are slower.*/
631 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
632 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
636 /* This spells out "CentaurHauls" or " Shanghai ". */
637 else if ((ebx
== 0x746e6543 && ecx
== 0x736c7561 && edx
== 0x48727561)
638 || (ebx
== 0x68532020 && ecx
== 0x20206961 && edx
== 0x68676e61))
640 unsigned int extended_model
, stepping
;
642 kind
= arch_kind_zhaoxin
;
644 get_common_indices (cpu_features
, &family
, &model
, &extended_model
,
647 get_extended_indices (cpu_features
);
649 update_active (cpu_features
);
651 model
+= extended_model
;
654 if (model
== 0xf || model
== 0x19)
656 CPU_FEATURE_UNSET (cpu_features
, AVX
);
657 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
659 cpu_features
->preferred
[index_arch_Slow_SSE4_2
]
660 |= bit_arch_Slow_SSE4_2
;
662 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
663 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
666 else if (family
== 0x7)
670 CPU_FEATURE_UNSET (cpu_features
, AVX
);
671 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
673 cpu_features
->preferred
[index_arch_Slow_SSE4_2
]
674 |= bit_arch_Slow_SSE4_2
;
676 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
677 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
679 else if (model
== 0x3b)
681 CPU_FEATURE_UNSET (cpu_features
, AVX
);
682 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
684 cpu_features
->preferred
[index_arch_AVX_Fast_Unaligned_Load
]
685 &= ~bit_arch_AVX_Fast_Unaligned_Load
;
691 kind
= arch_kind_other
;
692 get_common_indices (cpu_features
, NULL
, NULL
, NULL
, NULL
);
693 update_active (cpu_features
);
696 /* Support i586 if CX8 is available. */
697 if (CPU_FEATURES_CPU_P (cpu_features
, CX8
))
698 cpu_features
->preferred
[index_arch_I586
] |= bit_arch_I586
;
700 /* Support i686 if CMOV is available. */
701 if (CPU_FEATURES_CPU_P (cpu_features
, CMOV
))
702 cpu_features
->preferred
[index_arch_I686
] |= bit_arch_I686
;
708 cpu_features
->basic
.kind
= kind
;
709 cpu_features
->basic
.family
= family
;
710 cpu_features
->basic
.model
= model
;
711 cpu_features
->basic
.stepping
= stepping
;
713 dl_init_cacheinfo (cpu_features
);
716 TUNABLE_GET (hwcaps
, tunable_val_t
*, TUNABLE_CALLBACK (set_hwcaps
));
719 TUNABLE_GET (prefer_map_32bit_exec
, tunable_val_t
*,
720 TUNABLE_CALLBACK (set_prefer_map_32bit_exec
));
723 bool disable_xsave_features
= false;
725 if (!CPU_FEATURE_USABLE_P (cpu_features
, OSXSAVE
))
727 /* These features are usable only if OSXSAVE is usable. */
728 CPU_FEATURE_UNSET (cpu_features
, XSAVE
);
729 CPU_FEATURE_UNSET (cpu_features
, XSAVEOPT
);
730 CPU_FEATURE_UNSET (cpu_features
, XSAVEC
);
731 CPU_FEATURE_UNSET (cpu_features
, XGETBV_ECX_1
);
732 CPU_FEATURE_UNSET (cpu_features
, XFD
);
734 disable_xsave_features
= true;
737 if (disable_xsave_features
738 || (!CPU_FEATURE_USABLE_P (cpu_features
, XSAVE
)
739 && !CPU_FEATURE_USABLE_P (cpu_features
, XSAVEC
)))
741 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
742 cpu_features
->xsave_state_size
= 0;
744 CPU_FEATURE_UNSET (cpu_features
, AVX
);
745 CPU_FEATURE_UNSET (cpu_features
, AVX2
);
746 CPU_FEATURE_UNSET (cpu_features
, AVX_VNNI
);
747 CPU_FEATURE_UNSET (cpu_features
, FMA
);
748 CPU_FEATURE_UNSET (cpu_features
, VAES
);
749 CPU_FEATURE_UNSET (cpu_features
, VPCLMULQDQ
);
750 CPU_FEATURE_UNSET (cpu_features
, XOP
);
751 CPU_FEATURE_UNSET (cpu_features
, F16C
);
752 CPU_FEATURE_UNSET (cpu_features
, AVX512F
);
753 CPU_FEATURE_UNSET (cpu_features
, AVX512CD
);
754 CPU_FEATURE_UNSET (cpu_features
, AVX512ER
);
755 CPU_FEATURE_UNSET (cpu_features
, AVX512PF
);
756 CPU_FEATURE_UNSET (cpu_features
, AVX512VL
);
757 CPU_FEATURE_UNSET (cpu_features
, AVX512DQ
);
758 CPU_FEATURE_UNSET (cpu_features
, AVX512BW
);
759 CPU_FEATURE_UNSET (cpu_features
, AVX512_4FMAPS
);
760 CPU_FEATURE_UNSET (cpu_features
, AVX512_4VNNIW
);
761 CPU_FEATURE_UNSET (cpu_features
, AVX512_BITALG
);
762 CPU_FEATURE_UNSET (cpu_features
, AVX512_IFMA
);
763 CPU_FEATURE_UNSET (cpu_features
, AVX512_VBMI
);
764 CPU_FEATURE_UNSET (cpu_features
, AVX512_VBMI2
);
765 CPU_FEATURE_UNSET (cpu_features
, AVX512_VNNI
);
766 CPU_FEATURE_UNSET (cpu_features
, AVX512_VPOPCNTDQ
);
767 CPU_FEATURE_UNSET (cpu_features
, AVX512_VP2INTERSECT
);
768 CPU_FEATURE_UNSET (cpu_features
, AVX512_BF16
);
769 CPU_FEATURE_UNSET (cpu_features
, AVX512_FP16
);
770 CPU_FEATURE_UNSET (cpu_features
, AMX_BF16
);
771 CPU_FEATURE_UNSET (cpu_features
, AMX_TILE
);
772 CPU_FEATURE_UNSET (cpu_features
, AMX_INT8
);
774 CPU_FEATURE_UNSET (cpu_features
, FMA4
);
778 /* Reuse dl_platform, dl_hwcap and dl_hwcap_mask for x86. The
779 glibc.cpu.hwcap_mask tunable is initialized already, so no
781 GLRO(dl_hwcap_mask
) = HWCAP_IMPORTANT
;
785 GLRO(dl_hwcap
) = HWCAP_X86_64
;
786 if (cpu_features
->basic
.kind
== arch_kind_intel
)
788 const char *platform
= NULL
;
790 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512CD
))
792 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512ER
))
794 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512PF
))
795 platform
= "xeon_phi";
799 if (CPU_FEATURE_USABLE_P (cpu_features
, AVX512BW
)
800 && CPU_FEATURE_USABLE_P (cpu_features
, AVX512DQ
)
801 && CPU_FEATURE_USABLE_P (cpu_features
, AVX512VL
))
802 GLRO(dl_hwcap
) |= HWCAP_X86_AVX512_1
;
807 && CPU_FEATURE_USABLE_P (cpu_features
, AVX2
)
808 && CPU_FEATURE_USABLE_P (cpu_features
, FMA
)
809 && CPU_FEATURE_USABLE_P (cpu_features
, BMI1
)
810 && CPU_FEATURE_USABLE_P (cpu_features
, BMI2
)
811 && CPU_FEATURE_USABLE_P (cpu_features
, LZCNT
)
812 && CPU_FEATURE_USABLE_P (cpu_features
, MOVBE
)
813 && CPU_FEATURE_USABLE_P (cpu_features
, POPCNT
))
814 platform
= "haswell";
816 if (platform
!= NULL
)
817 GLRO(dl_platform
) = platform
;
821 if (CPU_FEATURE_USABLE_P (cpu_features
, SSE2
))
822 GLRO(dl_hwcap
) |= HWCAP_X86_SSE2
;
824 if (CPU_FEATURES_ARCH_P (cpu_features
, I686
))
825 GLRO(dl_platform
) = "i686";
826 else if (CPU_FEATURES_ARCH_P (cpu_features
, I586
))
827 GLRO(dl_platform
) = "i586";
832 TUNABLE_GET (x86_ibt
, tunable_val_t
*,
833 TUNABLE_CALLBACK (set_x86_ibt
));
834 TUNABLE_GET (x86_shstk
, tunable_val_t
*,
835 TUNABLE_CALLBACK (set_x86_shstk
));
838 /* Check CET status. */
839 unsigned int cet_status
= get_cet_status ();
841 if ((cet_status
& GNU_PROPERTY_X86_FEATURE_1_IBT
) == 0)
842 CPU_FEATURE_UNSET (cpu_features
, IBT
)
843 if ((cet_status
& GNU_PROPERTY_X86_FEATURE_1_SHSTK
) == 0)
844 CPU_FEATURE_UNSET (cpu_features
, SHSTK
)
848 GL(dl_x86_feature_1
) = cet_status
;
851 /* Check if IBT and SHSTK are enabled by kernel. */
852 if ((cet_status
& GNU_PROPERTY_X86_FEATURE_1_IBT
)
853 || (cet_status
& GNU_PROPERTY_X86_FEATURE_1_SHSTK
))
855 /* Disable IBT and/or SHSTK if they are enabled by kernel, but
856 disabled by environment variable:
858 GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
860 unsigned int cet_feature
= 0;
861 if (!CPU_FEATURE_USABLE (IBT
))
862 cet_feature
|= GNU_PROPERTY_X86_FEATURE_1_IBT
;
863 if (!CPU_FEATURE_USABLE (SHSTK
))
864 cet_feature
|= GNU_PROPERTY_X86_FEATURE_1_SHSTK
;
868 int res
= dl_cet_disable_cet (cet_feature
);
870 /* Clear the disabled bits in dl_x86_feature_1. */
872 GL(dl_x86_feature_1
) &= ~cet_feature
;
875 /* Lock CET if IBT or SHSTK is enabled in executable. Don't
876 lock CET if IBT or SHSTK is enabled permissively. */
877 if (GL(dl_x86_feature_control
).ibt
!= cet_permissive
878 && GL(dl_x86_feature_control
).shstk
!= cet_permissive
)
886 /* NB: In libc.a, call init_cacheinfo. */