x86-64: Don't use SSE resolvers for ISA level 3 or above
[glibc.git] / sysdeps / x86 / cpu-features.c
blob0ad0a78f67a2c3d26630d065d805a4809aa3f2c5
1 /* Initialize CPU feature data.
2 This file is part of the GNU C Library.
3 Copyright (C) 2008-2024 Free Software Foundation, Inc.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <dl-hwcap.h>
20 #include <libc-pointer-arith.h>
21 #include <isa-level.h>
22 #include <get-isa-level.h>
23 #include <cacheinfo.h>
24 #include <dl-cacheinfo.h>
25 #include <dl-minsigstacksize.h>
26 #include <dl-hwcap2.h>
28 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
29 attribute_hidden;
31 #if defined SHARED
32 extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden;
33 extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden;
34 extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden;
36 # ifdef __x86_64__
37 # include <dl-plt-rewrite.h>
39 static void
40 TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
42 /* We must be careful about where we put the call to
43 dl_plt_rewrite_supported() since it may generate
44 spurious SELinux log entries. It should only be
45 attempted if the user requested a PLT rewrite. */
46 if (valp->numval != 0 && dl_plt_rewrite_supported ())
48 /* Use JMPABS only on APX processors. */
49 const struct cpu_features *cpu_features = __get_cpu_features ();
50 GL (dl_x86_feature_control).plt_rewrite
51 = ((valp->numval > 1 && CPU_FEATURE_PRESENT_P (cpu_features, APX_F))
52 ? plt_rewrite_jmpabs
53 : plt_rewrite_jmp);
56 # else
57 extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden;
58 # endif
59 #endif
61 #ifdef __x86_64__
62 extern void _dl_runtime_resolve_fxsave (void) attribute_hidden;
63 extern void _dl_runtime_resolve_xsave (void) attribute_hidden;
64 extern void _dl_runtime_resolve_xsavec (void) attribute_hidden;
65 #endif
67 #ifdef __LP64__
68 static void
69 TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
71 if (valp->numval)
72 GLRO(dl_x86_cpu_features).preferred[index_arch_Prefer_MAP_32BIT_EXEC]
73 |= bit_arch_Prefer_MAP_32BIT_EXEC;
75 #endif
77 #if CET_ENABLED
78 extern void TUNABLE_CALLBACK (set_x86_ibt) (tunable_val_t *)
79 attribute_hidden;
80 extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *)
81 attribute_hidden;
83 # include <dl-cet.h>
84 #endif
86 static void
87 update_active (struct cpu_features *cpu_features)
89 /* Copy the cpuid bits to active bits for CPU featuress whose usability
90 in user space can be detected without additional OS support. */
91 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE3);
92 CPU_FEATURE_SET_ACTIVE (cpu_features, PCLMULQDQ);
93 CPU_FEATURE_SET_ACTIVE (cpu_features, SSSE3);
94 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPXCHG16B);
95 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_1);
96 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4_2);
97 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVBE);
98 CPU_FEATURE_SET_ACTIVE (cpu_features, POPCNT);
99 CPU_FEATURE_SET_ACTIVE (cpu_features, AES);
100 CPU_FEATURE_SET_ACTIVE (cpu_features, OSXSAVE);
101 CPU_FEATURE_SET_ACTIVE (cpu_features, TSC);
102 CPU_FEATURE_SET_ACTIVE (cpu_features, CX8);
103 CPU_FEATURE_SET_ACTIVE (cpu_features, CMOV);
104 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFSH);
105 CPU_FEATURE_SET_ACTIVE (cpu_features, MMX);
106 CPU_FEATURE_SET_ACTIVE (cpu_features, FXSR);
107 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE);
108 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE2);
109 CPU_FEATURE_SET_ACTIVE (cpu_features, HTT);
110 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI1);
111 CPU_FEATURE_SET_ACTIVE (cpu_features, HLE);
112 CPU_FEATURE_SET_ACTIVE (cpu_features, BMI2);
113 CPU_FEATURE_SET_ACTIVE (cpu_features, ERMS);
114 CPU_FEATURE_SET_ACTIVE (cpu_features, RDSEED);
115 CPU_FEATURE_SET_ACTIVE (cpu_features, ADX);
116 CPU_FEATURE_SET_ACTIVE (cpu_features, CLFLUSHOPT);
117 CPU_FEATURE_SET_ACTIVE (cpu_features, CLWB);
118 CPU_FEATURE_SET_ACTIVE (cpu_features, SHA);
119 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHWT1);
120 CPU_FEATURE_SET_ACTIVE (cpu_features, OSPKE);
121 CPU_FEATURE_SET_ACTIVE (cpu_features, WAITPKG);
122 CPU_FEATURE_SET_ACTIVE (cpu_features, GFNI);
123 CPU_FEATURE_SET_ACTIVE (cpu_features, RDPID);
124 CPU_FEATURE_SET_ACTIVE (cpu_features, RDRAND);
125 CPU_FEATURE_SET_ACTIVE (cpu_features, CLDEMOTE);
126 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIRI);
127 CPU_FEATURE_SET_ACTIVE (cpu_features, MOVDIR64B);
128 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRM);
129 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM_ALWAYS_ABORT);
130 CPU_FEATURE_SET_ACTIVE (cpu_features, SERIALIZE);
131 CPU_FEATURE_SET_ACTIVE (cpu_features, TSXLDTRK);
132 CPU_FEATURE_SET_ACTIVE (cpu_features, LAHF64_SAHF64);
133 CPU_FEATURE_SET_ACTIVE (cpu_features, LZCNT);
134 CPU_FEATURE_SET_ACTIVE (cpu_features, SSE4A);
135 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHW);
136 CPU_FEATURE_SET_ACTIVE (cpu_features, TBM);
137 CPU_FEATURE_SET_ACTIVE (cpu_features, RDTSCP);
138 CPU_FEATURE_SET_ACTIVE (cpu_features, WBNOINVD);
139 CPU_FEATURE_SET_ACTIVE (cpu_features, RAO_INT);
140 CPU_FEATURE_SET_ACTIVE (cpu_features, CMPCCXADD);
141 CPU_FEATURE_SET_ACTIVE (cpu_features, FZLRM);
142 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRS);
143 CPU_FEATURE_SET_ACTIVE (cpu_features, FSRCS);
144 CPU_FEATURE_SET_ACTIVE (cpu_features, PREFETCHI);
145 CPU_FEATURE_SET_ACTIVE (cpu_features, PTWRITE);
147 if (!CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
148 CPU_FEATURE_SET_ACTIVE (cpu_features, RTM);
150 #if CET_ENABLED && 0
151 CPU_FEATURE_SET_ACTIVE (cpu_features, IBT);
152 CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
153 #endif
155 enum
157 os_xmm = 1,
158 os_ymm = 2,
159 os_zmm = 4
160 } os_vector_size = os_xmm;
161 /* Can we call xgetbv? */
162 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
164 unsigned int xcrlow;
165 unsigned int xcrhigh;
166 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
167 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
168 /* Is YMM and XMM state usable? */
169 if ((xcrlow & (bit_YMM_state | bit_XMM_state))
170 == (bit_YMM_state | bit_XMM_state))
172 /* Determine if AVX is usable. */
173 if (CPU_FEATURES_CPU_P (cpu_features, AVX))
175 os_vector_size |= os_ymm;
176 CPU_FEATURE_SET (cpu_features, AVX);
177 /* The following features depend on AVX being usable. */
178 /* Determine if AVX2 is usable. */
179 if (CPU_FEATURES_CPU_P (cpu_features, AVX2))
181 CPU_FEATURE_SET (cpu_features, AVX2);
183 /* Unaligned load with 256-bit AVX registers are faster
184 on Intel/AMD processors with AVX2. */
185 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
186 |= bit_arch_AVX_Fast_Unaligned_Load;
188 /* Determine if AVX-IFMA is usable. */
189 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_IFMA);
190 /* Determine if AVX-NE-CONVERT is usable. */
191 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_NE_CONVERT);
192 /* Determine if AVX-VNNI is usable. */
193 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI);
194 /* Determine if AVX-VNNI-INT8 is usable. */
195 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX_VNNI_INT8);
196 /* Determine if FMA is usable. */
197 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA);
198 /* Determine if VAES is usable. */
199 CPU_FEATURE_SET_ACTIVE (cpu_features, VAES);
200 /* Determine if VPCLMULQDQ is usable. */
201 CPU_FEATURE_SET_ACTIVE (cpu_features, VPCLMULQDQ);
202 /* Determine if XOP is usable. */
203 CPU_FEATURE_SET_ACTIVE (cpu_features, XOP);
204 /* Determine if F16C is usable. */
205 CPU_FEATURE_SET_ACTIVE (cpu_features, F16C);
208 /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
209 ZMM16-ZMM31 state are enabled. */
210 if ((xcrlow & (bit_Opmask_state | bit_ZMM0_15_state
211 | bit_ZMM16_31_state))
212 == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
214 os_vector_size |= os_zmm;
215 /* Determine if AVX512F is usable. */
216 if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
218 CPU_FEATURE_SET (cpu_features, AVX512F);
219 /* Determine if AVX512CD is usable. */
220 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512CD);
221 /* Determine if AVX512ER is usable. */
222 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512ER);
223 /* Determine if AVX512PF is usable. */
224 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512PF);
225 /* Determine if AVX512VL is usable. */
226 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512VL);
227 /* Determine if AVX512DQ is usable. */
228 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512DQ);
229 /* Determine if AVX512BW is usable. */
230 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512BW);
231 /* Determine if AVX512_4FMAPS is usable. */
232 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4FMAPS);
233 /* Determine if AVX512_4VNNIW is usable. */
234 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_4VNNIW);
235 /* Determine if AVX512_BITALG is usable. */
236 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BITALG);
237 /* Determine if AVX512_IFMA is usable. */
238 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_IFMA);
239 /* Determine if AVX512_VBMI is usable. */
240 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI);
241 /* Determine if AVX512_VBMI2 is usable. */
242 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VBMI2);
243 /* Determine if is AVX512_VNNI usable. */
244 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_VNNI);
245 /* Determine if AVX512_VPOPCNTDQ is usable. */
246 CPU_FEATURE_SET_ACTIVE (cpu_features,
247 AVX512_VPOPCNTDQ);
248 /* Determine if AVX512_VP2INTERSECT is usable. */
249 CPU_FEATURE_SET_ACTIVE (cpu_features,
250 AVX512_VP2INTERSECT);
251 /* Determine if AVX512_BF16 is usable. */
252 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_BF16);
253 /* Determine if AVX512_FP16 is usable. */
254 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX512_FP16);
259 if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
260 && cpu_features->basic.max_cpuid >= 0x24)
262 __cpuid_count (
263 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
264 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
265 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
266 cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
267 if (os_vector_size & os_xmm)
268 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
269 if (os_vector_size & os_ymm)
270 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
271 if (os_vector_size & os_zmm)
272 CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
275 /* Are XTILECFG and XTILEDATA states usable? */
276 if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
277 == (bit_XTILECFG_state | bit_XTILEDATA_state))
279 /* Determine if AMX_BF16 is usable. */
280 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_BF16);
281 /* Determine if AMX_TILE is usable. */
282 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_TILE);
283 /* Determine if AMX_INT8 is usable. */
284 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_INT8);
285 /* Determine if AMX_FP16 is usable. */
286 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_FP16);
287 /* Determine if AMX_COMPLEX is usable. */
288 CPU_FEATURE_SET_ACTIVE (cpu_features, AMX_COMPLEX);
291 /* APX is usable only if the APX state is supported by kernel. */
292 if ((xcrlow & bit_APX_state) != 0)
293 CPU_FEATURE_SET_ACTIVE (cpu_features, APX_F);
295 /* These features are usable only when OSXSAVE is enabled. */
296 CPU_FEATURE_SET (cpu_features, XSAVE);
297 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEOPT);
298 CPU_FEATURE_SET_ACTIVE (cpu_features, XSAVEC);
299 CPU_FEATURE_SET_ACTIVE (cpu_features, XGETBV_ECX_1);
300 CPU_FEATURE_SET_ACTIVE (cpu_features, XFD);
302 /* For _dl_runtime_resolve, set xsave_state_size to xsave area
303 size + integer register save size and align it to 64 bytes. */
304 if (cpu_features->basic.max_cpuid >= 0xd)
306 unsigned int eax, ebx, ecx, edx;
308 __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
309 if (ebx != 0)
311 unsigned int xsave_state_full_size
312 = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
314 cpu_features->xsave_state_size
315 = xsave_state_full_size;
316 cpu_features->xsave_state_full_size
317 = xsave_state_full_size;
319 /* Check if XSAVEC is available. */
320 if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
322 unsigned int xstate_comp_offsets[32];
323 unsigned int xstate_comp_sizes[32];
324 unsigned int i;
326 xstate_comp_offsets[0] = 0;
327 xstate_comp_offsets[1] = 160;
328 xstate_comp_offsets[2] = 576;
329 xstate_comp_sizes[0] = 160;
330 xstate_comp_sizes[1] = 256;
332 for (i = 2; i < 32; i++)
334 if ((STATE_SAVE_MASK & (1 << i)) != 0)
336 __cpuid_count (0xd, i, eax, ebx, ecx, edx);
337 xstate_comp_sizes[i] = eax;
339 else
341 ecx = 0;
342 xstate_comp_sizes[i] = 0;
345 if (i > 2)
347 xstate_comp_offsets[i]
348 = (xstate_comp_offsets[i - 1]
349 + xstate_comp_sizes[i -1]);
350 if ((ecx & (1 << 1)) != 0)
351 xstate_comp_offsets[i]
352 = ALIGN_UP (xstate_comp_offsets[i], 64);
356 /* Use XSAVEC. */
357 unsigned int size
358 = xstate_comp_offsets[31] + xstate_comp_sizes[31];
359 if (size)
361 cpu_features->xsave_state_size
362 = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
363 CPU_FEATURE_SET (cpu_features, XSAVEC);
370 /* Determine if PKU is usable. */
371 if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
372 CPU_FEATURE_SET (cpu_features, PKU);
374 /* Determine if Key Locker instructions are usable. */
375 if (CPU_FEATURES_CPU_P (cpu_features, AESKLE))
377 CPU_FEATURE_SET (cpu_features, AESKLE);
378 CPU_FEATURE_SET_ACTIVE (cpu_features, KL);
379 CPU_FEATURE_SET_ACTIVE (cpu_features, WIDE_KL);
382 dl_check_hwcap2 (cpu_features);
384 cpu_features->isa_1 = get_isa_level (cpu_features);
387 static void
388 get_extended_indices (struct cpu_features *cpu_features)
390 unsigned int eax, ebx, ecx, edx;
391 __cpuid (0x80000000, eax, ebx, ecx, edx);
392 if (eax >= 0x80000001)
393 __cpuid (0x80000001,
394 cpu_features->features[CPUID_INDEX_80000001].cpuid.eax,
395 cpu_features->features[CPUID_INDEX_80000001].cpuid.ebx,
396 cpu_features->features[CPUID_INDEX_80000001].cpuid.ecx,
397 cpu_features->features[CPUID_INDEX_80000001].cpuid.edx);
398 if (eax >= 0x80000007)
399 __cpuid (0x80000007,
400 cpu_features->features[CPUID_INDEX_80000007].cpuid.eax,
401 cpu_features->features[CPUID_INDEX_80000007].cpuid.ebx,
402 cpu_features->features[CPUID_INDEX_80000007].cpuid.ecx,
403 cpu_features->features[CPUID_INDEX_80000007].cpuid.edx);
404 if (eax >= 0x80000008)
405 __cpuid (0x80000008,
406 cpu_features->features[CPUID_INDEX_80000008].cpuid.eax,
407 cpu_features->features[CPUID_INDEX_80000008].cpuid.ebx,
408 cpu_features->features[CPUID_INDEX_80000008].cpuid.ecx,
409 cpu_features->features[CPUID_INDEX_80000008].cpuid.edx);
412 static void
413 get_common_indices (struct cpu_features *cpu_features,
414 unsigned int *family, unsigned int *model,
415 unsigned int *extended_model, unsigned int *stepping)
417 if (family)
419 unsigned int eax;
420 __cpuid (1, eax,
421 cpu_features->features[CPUID_INDEX_1].cpuid.ebx,
422 cpu_features->features[CPUID_INDEX_1].cpuid.ecx,
423 cpu_features->features[CPUID_INDEX_1].cpuid.edx);
424 cpu_features->features[CPUID_INDEX_1].cpuid.eax = eax;
425 *family = (eax >> 8) & 0x0f;
426 *model = (eax >> 4) & 0x0f;
427 *extended_model = (eax >> 12) & 0xf0;
428 *stepping = eax & 0x0f;
429 if (*family == 0x0f)
431 *family += (eax >> 20) & 0xff;
432 *model += *extended_model;
436 if (cpu_features->basic.max_cpuid >= 7)
438 __cpuid_count (7, 0,
439 cpu_features->features[CPUID_INDEX_7].cpuid.eax,
440 cpu_features->features[CPUID_INDEX_7].cpuid.ebx,
441 cpu_features->features[CPUID_INDEX_7].cpuid.ecx,
442 cpu_features->features[CPUID_INDEX_7].cpuid.edx);
443 __cpuid_count (7, 1,
444 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.eax,
445 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ebx,
446 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.ecx,
447 cpu_features->features[CPUID_INDEX_7_ECX_1].cpuid.edx);
450 if (cpu_features->basic.max_cpuid >= 0xd)
451 __cpuid_count (0xd, 1,
452 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.eax,
453 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ebx,
454 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.ecx,
455 cpu_features->features[CPUID_INDEX_D_ECX_1].cpuid.edx);
457 if (cpu_features->basic.max_cpuid >= 0x14)
458 __cpuid_count (0x14, 0,
459 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.eax,
460 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ebx,
461 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.ecx,
462 cpu_features->features[CPUID_INDEX_14_ECX_0].cpuid.edx);
464 if (cpu_features->basic.max_cpuid >= 0x19)
465 __cpuid_count (0x19, 0,
466 cpu_features->features[CPUID_INDEX_19].cpuid.eax,
467 cpu_features->features[CPUID_INDEX_19].cpuid.ebx,
468 cpu_features->features[CPUID_INDEX_19].cpuid.ecx,
469 cpu_features->features[CPUID_INDEX_19].cpuid.edx);
471 dl_check_minsigstacksize (cpu_features);
474 _Static_assert (((index_arch_Fast_Unaligned_Load
475 == index_arch_Fast_Unaligned_Copy)
476 && (index_arch_Fast_Unaligned_Load
477 == index_arch_Prefer_PMINUB_for_stringop)
478 && (index_arch_Fast_Unaligned_Load
479 == index_arch_Slow_SSE4_2)
480 && (index_arch_Fast_Unaligned_Load
481 == index_arch_Fast_Rep_String)
482 && (index_arch_Fast_Unaligned_Load
483 == index_arch_Fast_Copy_Backward)),
484 "Incorrect index_arch_Fast_Unaligned_Load");
487 /* Intel Family-6 microarch list. */
488 enum
490 /* Atom processors. */
491 INTEL_ATOM_BONNELL,
492 INTEL_ATOM_SILVERMONT,
493 INTEL_ATOM_AIRMONT,
494 INTEL_ATOM_GOLDMONT,
495 INTEL_ATOM_GOLDMONT_PLUS,
496 INTEL_ATOM_SIERRAFOREST,
497 INTEL_ATOM_GRANDRIDGE,
498 INTEL_ATOM_TREMONT,
500 /* Bigcore processors. */
501 INTEL_BIGCORE_MEROM,
502 INTEL_BIGCORE_PENRYN,
503 INTEL_BIGCORE_DUNNINGTON,
504 INTEL_BIGCORE_NEHALEM,
505 INTEL_BIGCORE_WESTMERE,
506 INTEL_BIGCORE_SANDYBRIDGE,
507 INTEL_BIGCORE_IVYBRIDGE,
508 INTEL_BIGCORE_HASWELL,
509 INTEL_BIGCORE_BROADWELL,
510 INTEL_BIGCORE_SKYLAKE,
511 INTEL_BIGCORE_KABYLAKE,
512 INTEL_BIGCORE_COMETLAKE,
513 INTEL_BIGCORE_SKYLAKE_AVX512,
514 INTEL_BIGCORE_CANNONLAKE,
515 INTEL_BIGCORE_ICELAKE,
516 INTEL_BIGCORE_TIGERLAKE,
517 INTEL_BIGCORE_ROCKETLAKE,
518 INTEL_BIGCORE_SAPPHIRERAPIDS,
519 INTEL_BIGCORE_RAPTORLAKE,
520 INTEL_BIGCORE_EMERALDRAPIDS,
521 INTEL_BIGCORE_METEORLAKE,
522 INTEL_BIGCORE_LUNARLAKE,
523 INTEL_BIGCORE_ARROWLAKE,
524 INTEL_BIGCORE_GRANITERAPIDS,
526 /* Mixed (bigcore + atom SOC). */
527 INTEL_MIXED_LAKEFIELD,
528 INTEL_MIXED_ALDERLAKE,
530 /* KNL. */
531 INTEL_KNIGHTS_MILL,
532 INTEL_KNIGHTS_LANDING,
534 /* Unknown. */
535 INTEL_UNKNOWN,
538 static unsigned int
539 intel_get_fam6_microarch (unsigned int model,
540 __attribute__ ((unused)) unsigned int stepping)
542 switch (model)
544 case 0x1C:
545 case 0x26:
546 return INTEL_ATOM_BONNELL;
547 case 0x27:
548 case 0x35:
549 case 0x36:
550 /* Really Saltwell, but Saltwell is just a die shrink of Bonnell
551 (microarchitecturally identical). */
552 return INTEL_ATOM_BONNELL;
553 case 0x37:
554 case 0x4A:
555 case 0x4D:
556 case 0x5D:
557 return INTEL_ATOM_SILVERMONT;
558 case 0x4C:
559 case 0x5A:
560 case 0x75:
561 return INTEL_ATOM_AIRMONT;
562 case 0x5C:
563 case 0x5F:
564 return INTEL_ATOM_GOLDMONT;
565 case 0x7A:
566 return INTEL_ATOM_GOLDMONT_PLUS;
567 case 0xAF:
568 return INTEL_ATOM_SIERRAFOREST;
569 case 0xB6:
570 return INTEL_ATOM_GRANDRIDGE;
571 case 0x86:
572 case 0x96:
573 case 0x9C:
574 return INTEL_ATOM_TREMONT;
575 case 0x0F:
576 case 0x16:
577 return INTEL_BIGCORE_MEROM;
578 case 0x17:
579 return INTEL_BIGCORE_PENRYN;
580 case 0x1D:
581 return INTEL_BIGCORE_DUNNINGTON;
582 case 0x1A:
583 case 0x1E:
584 case 0x1F:
585 case 0x2E:
586 return INTEL_BIGCORE_NEHALEM;
587 case 0x25:
588 case 0x2C:
589 case 0x2F:
590 return INTEL_BIGCORE_WESTMERE;
591 case 0x2A:
592 case 0x2D:
593 return INTEL_BIGCORE_SANDYBRIDGE;
594 case 0x3A:
595 case 0x3E:
596 return INTEL_BIGCORE_IVYBRIDGE;
597 case 0x3C:
598 case 0x3F:
599 case 0x45:
600 case 0x46:
601 return INTEL_BIGCORE_HASWELL;
602 case 0x3D:
603 case 0x47:
604 case 0x4F:
605 case 0x56:
606 return INTEL_BIGCORE_BROADWELL;
607 case 0x4E:
608 case 0x5E:
609 return INTEL_BIGCORE_SKYLAKE;
610 case 0x8E:
612 Stepping = {9}
613 -> Amberlake
614 Stepping = {10}
615 -> Coffeelake
616 Stepping = {11, 12}
617 -> Whiskeylake
618 else
619 -> Kabylake
621 All of these are derivatives of Kabylake (Skylake client).
623 return INTEL_BIGCORE_KABYLAKE;
624 case 0x9E:
626 Stepping = {10, 11, 12, 13}
627 -> Coffeelake
628 else
629 -> Kabylake
631 Coffeelake is a derivatives of Kabylake (Skylake client).
633 return INTEL_BIGCORE_KABYLAKE;
634 case 0xA5:
635 case 0xA6:
636 return INTEL_BIGCORE_COMETLAKE;
637 case 0x66:
638 return INTEL_BIGCORE_CANNONLAKE;
639 case 0x55:
641 Stepping = {6, 7}
642 -> Cascadelake
643 Stepping = {11}
644 -> Cooperlake
645 else
646 -> Skylake-avx512
648 These are all microarchitecturally identical, so use
649 Skylake-avx512 for all of them.
651 return INTEL_BIGCORE_SKYLAKE_AVX512;
652 case 0x6A:
653 case 0x6C:
654 case 0x7D:
655 case 0x7E:
656 case 0x9D:
657 return INTEL_BIGCORE_ICELAKE;
658 case 0x8C:
659 case 0x8D:
660 return INTEL_BIGCORE_TIGERLAKE;
661 case 0xA7:
662 return INTEL_BIGCORE_ROCKETLAKE;
663 case 0x8F:
664 return INTEL_BIGCORE_SAPPHIRERAPIDS;
665 case 0xB7:
666 case 0xBA:
667 case 0xBF:
668 return INTEL_BIGCORE_RAPTORLAKE;
669 case 0xCF:
670 return INTEL_BIGCORE_EMERALDRAPIDS;
671 case 0xAA:
672 case 0xAC:
673 return INTEL_BIGCORE_METEORLAKE;
674 case 0xbd:
675 return INTEL_BIGCORE_LUNARLAKE;
676 case 0xc6:
677 return INTEL_BIGCORE_ARROWLAKE;
678 case 0xAD:
679 case 0xAE:
680 return INTEL_BIGCORE_GRANITERAPIDS;
681 case 0x8A:
682 return INTEL_MIXED_LAKEFIELD;
683 case 0x97:
684 case 0x9A:
685 case 0xBE:
686 return INTEL_MIXED_ALDERLAKE;
687 case 0x85:
688 return INTEL_KNIGHTS_MILL;
689 case 0x57:
690 return INTEL_KNIGHTS_LANDING;
691 default:
692 return INTEL_UNKNOWN;
696 static inline void
697 init_cpu_features (struct cpu_features *cpu_features)
699 unsigned int ebx, ecx, edx;
700 unsigned int family = 0;
701 unsigned int model = 0;
702 unsigned int stepping = 0;
703 enum cpu_features_kind kind;
705 cpu_features->cachesize_non_temporal_divisor = 4;
706 #if !HAS_CPUID
707 if (__get_cpuid_max (0, 0) == 0)
709 kind = arch_kind_other;
710 goto no_cpuid;
712 #endif
714 __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
716 /* This spells out "GenuineIntel". */
717 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
719 unsigned int extended_model;
721 kind = arch_kind_intel;
723 get_common_indices (cpu_features, &family, &model, &extended_model,
724 &stepping);
726 get_extended_indices (cpu_features);
728 update_active (cpu_features);
730 if (family == 0x06)
732 model += extended_model;
733 unsigned int microarch
734 = intel_get_fam6_microarch (model, stepping);
736 switch (microarch)
738 /* Atom / KNL tuning. */
739 case INTEL_ATOM_BONNELL:
740 /* BSF is slow on Bonnell. */
741 cpu_features->preferred[index_arch_Slow_BSF]
742 |= bit_arch_Slow_BSF;
743 break;
745 /* Unaligned load versions are faster than SSSE3
746 on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
747 case INTEL_ATOM_AIRMONT:
748 case INTEL_ATOM_SILVERMONT:
749 case INTEL_ATOM_GOLDMONT:
750 case INTEL_ATOM_GOLDMONT_PLUS:
752 /* Knights Landing. Enable Silvermont optimizations. */
753 case INTEL_KNIGHTS_LANDING:
755 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
756 |= (bit_arch_Fast_Unaligned_Load
757 | bit_arch_Fast_Unaligned_Copy
758 | bit_arch_Prefer_PMINUB_for_stringop
759 | bit_arch_Slow_SSE4_2);
760 break;
762 case INTEL_ATOM_TREMONT:
763 /* Enable rep string instructions, unaligned load, unaligned
764 copy, pminub and avoid SSE 4.2 on Tremont. */
765 cpu_features->preferred[index_arch_Fast_Rep_String]
766 |= (bit_arch_Fast_Rep_String
767 | bit_arch_Fast_Unaligned_Load
768 | bit_arch_Fast_Unaligned_Copy
769 | bit_arch_Prefer_PMINUB_for_stringop
770 | bit_arch_Slow_SSE4_2);
771 break;
774 Default tuned Knights microarch.
775 case INTEL_KNIGHTS_MILL:
779 Default tuned atom microarch.
780 case INTEL_ATOM_SIERRAFOREST:
781 case INTEL_ATOM_GRANDRIDGE:
784 /* Bigcore/Default Tuning. */
785 default:
786 default_tuning:
787 /* Unknown family 0x06 processors. Assuming this is one
788 of Core i3/i5/i7 processors if AVX is available. */
789 if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
790 break;
792 enable_modern_features:
793 /* Rep string instructions, unaligned load, unaligned copy,
794 and pminub are fast on Intel Core i3, i5 and i7. */
795 cpu_features->preferred[index_arch_Fast_Rep_String]
796 |= (bit_arch_Fast_Rep_String
797 | bit_arch_Fast_Unaligned_Load
798 | bit_arch_Fast_Unaligned_Copy
799 | bit_arch_Prefer_PMINUB_for_stringop);
800 break;
802 case INTEL_BIGCORE_NEHALEM:
803 case INTEL_BIGCORE_WESTMERE:
804 /* Older CPUs prefer non-temporal stores at lower threshold. */
805 cpu_features->cachesize_non_temporal_divisor = 8;
806 goto enable_modern_features;
808 /* Older Bigcore microarch (smaller non-temporal store
809 threshold). */
810 case INTEL_BIGCORE_SANDYBRIDGE:
811 case INTEL_BIGCORE_IVYBRIDGE:
812 case INTEL_BIGCORE_HASWELL:
813 case INTEL_BIGCORE_BROADWELL:
814 cpu_features->cachesize_non_temporal_divisor = 8;
815 goto default_tuning;
817 /* Newer Bigcore microarch (larger non-temporal store
818 threshold). */
819 case INTEL_BIGCORE_SKYLAKE:
820 case INTEL_BIGCORE_KABYLAKE:
821 case INTEL_BIGCORE_COMETLAKE:
822 case INTEL_BIGCORE_SKYLAKE_AVX512:
823 case INTEL_BIGCORE_CANNONLAKE:
824 case INTEL_BIGCORE_ICELAKE:
825 case INTEL_BIGCORE_TIGERLAKE:
826 case INTEL_BIGCORE_ROCKETLAKE:
827 case INTEL_BIGCORE_RAPTORLAKE:
828 case INTEL_BIGCORE_METEORLAKE:
829 case INTEL_BIGCORE_LUNARLAKE:
830 case INTEL_BIGCORE_ARROWLAKE:
831 case INTEL_BIGCORE_SAPPHIRERAPIDS:
832 case INTEL_BIGCORE_EMERALDRAPIDS:
833 case INTEL_BIGCORE_GRANITERAPIDS:
834 cpu_features->cachesize_non_temporal_divisor = 2;
835 goto default_tuning;
837 /* Default tuned Mixed (bigcore + atom SOC). */
838 case INTEL_MIXED_LAKEFIELD:
839 case INTEL_MIXED_ALDERLAKE:
840 cpu_features->cachesize_non_temporal_divisor = 2;
841 goto default_tuning;
844 /* Disable TSX on some processors to avoid TSX on kernels that
845 weren't updated with the latest microcode package (which
846 disables broken feature by default). */
847 switch (microarch)
849 case INTEL_BIGCORE_SKYLAKE_AVX512:
850 /* 0x55 (Skylake-avx512) && stepping <= 5 disable TSX. */
851 if (stepping <= 5)
852 goto disable_tsx;
853 break;
855 case INTEL_BIGCORE_KABYLAKE:
856 /* NB: Although the errata documents that for model == 0x8e
857 (kabylake skylake client), only 0xb stepping or lower are
858 impacted, the intention of the errata was to disable TSX on
859 all client processors on all steppings. Include 0xc
860 stepping which is an Intel Core i7-8665U, a client mobile
861 processor. */
862 if (stepping > 0xc)
863 break;
864 /* Fall through. */
865 case INTEL_BIGCORE_SKYLAKE:
866 /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
867 processors listed in:
869 https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
871 disable_tsx:
872 CPU_FEATURE_UNSET (cpu_features, HLE);
873 CPU_FEATURE_UNSET (cpu_features, RTM);
874 CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
875 break;
877 case INTEL_BIGCORE_HASWELL:
878 /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
879 TSX. Haswell also include other model numbers that have
880 working TSX. */
881 if (model == 0x3f && stepping >= 4)
882 break;
884 CPU_FEATURE_UNSET (cpu_features, RTM);
885 break;
890 /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
891 if AVX512ER is available. Don't use AVX512 to avoid lower CPU
892 frequency if AVX512ER isn't available. */
893 if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
894 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
895 |= bit_arch_Prefer_No_VZEROUPPER;
896 else
898 /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
899 when ZMM load and store instructions are used. */
900 if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
901 cpu_features->preferred[index_arch_Prefer_No_AVX512]
902 |= bit_arch_Prefer_No_AVX512;
904 /* Avoid RTM abort triggered by VZEROUPPER inside a
905 transactionally executing RTM region. */
906 if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
907 cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
908 |= bit_arch_Prefer_No_VZEROUPPER;
911 /* Avoid avoid short distance REP MOVSB on processor with FSRM. */
912 if (CPU_FEATURES_CPU_P (cpu_features, FSRM))
913 cpu_features->preferred[index_arch_Avoid_Short_Distance_REP_MOVSB]
914 |= bit_arch_Avoid_Short_Distance_REP_MOVSB;
916 /* This spells out "AuthenticAMD" or "HygonGenuine". */
917 else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
918 || (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e))
920 unsigned int extended_model;
922 kind = arch_kind_amd;
924 get_common_indices (cpu_features, &family, &model, &extended_model,
925 &stepping);
927 get_extended_indices (cpu_features);
929 update_active (cpu_features);
931 ecx = cpu_features->features[CPUID_INDEX_1].cpuid.ecx;
933 if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
935 /* Since the FMA4 bit is in CPUID_INDEX_80000001 and
936 FMA4 requires AVX, determine if FMA4 is usable here. */
937 CPU_FEATURE_SET_ACTIVE (cpu_features, FMA4);
940 if (family == 0x15)
942 /* "Excavator" */
943 if (model >= 0x60 && model <= 0x7f)
945 cpu_features->preferred[index_arch_Fast_Unaligned_Load]
946 |= (bit_arch_Fast_Unaligned_Load
947 | bit_arch_Fast_Copy_Backward);
949 /* Unaligned AVX loads are slower.*/
950 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
951 &= ~bit_arch_AVX_Fast_Unaligned_Load;
955 /* This spells out "CentaurHauls" or " Shanghai ". */
956 else if ((ebx == 0x746e6543 && ecx == 0x736c7561 && edx == 0x48727561)
957 || (ebx == 0x68532020 && ecx == 0x20206961 && edx == 0x68676e61))
959 unsigned int extended_model, stepping;
961 kind = arch_kind_zhaoxin;
963 get_common_indices (cpu_features, &family, &model, &extended_model,
964 &stepping);
966 get_extended_indices (cpu_features);
968 update_active (cpu_features);
970 model += extended_model;
971 if (family == 0x6)
973 if (model == 0xf || model == 0x19)
975 CPU_FEATURE_UNSET (cpu_features, AVX);
976 CPU_FEATURE_UNSET (cpu_features, AVX2);
978 cpu_features->preferred[index_arch_Slow_SSE4_2]
979 |= bit_arch_Slow_SSE4_2;
981 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
982 &= ~bit_arch_AVX_Fast_Unaligned_Load;
985 else if (family == 0x7)
987 if (model == 0x1b)
989 CPU_FEATURE_UNSET (cpu_features, AVX);
990 CPU_FEATURE_UNSET (cpu_features, AVX2);
992 cpu_features->preferred[index_arch_Slow_SSE4_2]
993 |= bit_arch_Slow_SSE4_2;
995 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
996 &= ~bit_arch_AVX_Fast_Unaligned_Load;
998 else if (model == 0x3b)
1000 CPU_FEATURE_UNSET (cpu_features, AVX);
1001 CPU_FEATURE_UNSET (cpu_features, AVX2);
1003 cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
1004 &= ~bit_arch_AVX_Fast_Unaligned_Load;
1008 else
1010 kind = arch_kind_other;
1011 get_common_indices (cpu_features, NULL, NULL, NULL, NULL);
1012 update_active (cpu_features);
1015 /* Support i586 if CX8 is available. */
1016 if (CPU_FEATURES_CPU_P (cpu_features, CX8))
1017 cpu_features->preferred[index_arch_I586] |= bit_arch_I586;
1019 /* Support i686 if CMOV is available. */
1020 if (CPU_FEATURES_CPU_P (cpu_features, CMOV))
1021 cpu_features->preferred[index_arch_I686] |= bit_arch_I686;
1023 #if !HAS_CPUID
1024 no_cpuid:
1025 #endif
1027 cpu_features->basic.kind = kind;
1028 cpu_features->basic.family = family;
1029 cpu_features->basic.model = model;
1030 cpu_features->basic.stepping = stepping;
1032 dl_init_cacheinfo (cpu_features);
1034 TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
1036 #ifdef __LP64__
1037 TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
1038 TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
1039 #endif
1041 bool disable_xsave_features = false;
1043 if (!CPU_FEATURE_USABLE_P (cpu_features, OSXSAVE))
1045 /* These features are usable only if OSXSAVE is usable. */
1046 CPU_FEATURE_UNSET (cpu_features, XSAVE);
1047 CPU_FEATURE_UNSET (cpu_features, XSAVEOPT);
1048 CPU_FEATURE_UNSET (cpu_features, XSAVEC);
1049 CPU_FEATURE_UNSET (cpu_features, XGETBV_ECX_1);
1050 CPU_FEATURE_UNSET (cpu_features, XFD);
1052 disable_xsave_features = true;
1055 if (disable_xsave_features
1056 || (!CPU_FEATURE_USABLE_P (cpu_features, XSAVE)
1057 && !CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)))
1059 /* Clear xsave_state_size if both XSAVE and XSAVEC aren't usable. */
1060 cpu_features->xsave_state_size = 0;
1062 CPU_FEATURE_UNSET (cpu_features, AVX);
1063 CPU_FEATURE_UNSET (cpu_features, AVX2);
1064 CPU_FEATURE_UNSET (cpu_features, AVX_VNNI);
1065 CPU_FEATURE_UNSET (cpu_features, FMA);
1066 CPU_FEATURE_UNSET (cpu_features, VAES);
1067 CPU_FEATURE_UNSET (cpu_features, VPCLMULQDQ);
1068 CPU_FEATURE_UNSET (cpu_features, XOP);
1069 CPU_FEATURE_UNSET (cpu_features, F16C);
1070 CPU_FEATURE_UNSET (cpu_features, AVX512F);
1071 CPU_FEATURE_UNSET (cpu_features, AVX512CD);
1072 CPU_FEATURE_UNSET (cpu_features, AVX512ER);
1073 CPU_FEATURE_UNSET (cpu_features, AVX512PF);
1074 CPU_FEATURE_UNSET (cpu_features, AVX512VL);
1075 CPU_FEATURE_UNSET (cpu_features, AVX512DQ);
1076 CPU_FEATURE_UNSET (cpu_features, AVX512BW);
1077 CPU_FEATURE_UNSET (cpu_features, AVX512_4FMAPS);
1078 CPU_FEATURE_UNSET (cpu_features, AVX512_4VNNIW);
1079 CPU_FEATURE_UNSET (cpu_features, AVX512_BITALG);
1080 CPU_FEATURE_UNSET (cpu_features, AVX512_IFMA);
1081 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI);
1082 CPU_FEATURE_UNSET (cpu_features, AVX512_VBMI2);
1083 CPU_FEATURE_UNSET (cpu_features, AVX512_VNNI);
1084 CPU_FEATURE_UNSET (cpu_features, AVX512_VPOPCNTDQ);
1085 CPU_FEATURE_UNSET (cpu_features, AVX512_VP2INTERSECT);
1086 CPU_FEATURE_UNSET (cpu_features, AVX512_BF16);
1087 CPU_FEATURE_UNSET (cpu_features, AVX512_FP16);
1088 CPU_FEATURE_UNSET (cpu_features, AMX_BF16);
1089 CPU_FEATURE_UNSET (cpu_features, AMX_TILE);
1090 CPU_FEATURE_UNSET (cpu_features, AMX_INT8);
1092 CPU_FEATURE_UNSET (cpu_features, FMA4);
1095 #ifdef __x86_64__
1096 GLRO(dl_hwcap) = HWCAP_X86_64;
1097 if (cpu_features->basic.kind == arch_kind_intel)
1099 const char *platform = NULL;
1101 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512CD))
1103 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512ER))
1105 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512PF))
1106 platform = "xeon_phi";
1108 else
1110 if (CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
1111 && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
1112 && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
1113 GLRO(dl_hwcap) |= HWCAP_X86_AVX512_1;
1117 if (platform == NULL
1118 && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
1119 && CPU_FEATURE_USABLE_P (cpu_features, FMA)
1120 && CPU_FEATURE_USABLE_P (cpu_features, BMI1)
1121 && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
1122 && CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
1123 && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
1124 && CPU_FEATURE_USABLE_P (cpu_features, POPCNT))
1125 platform = "haswell";
1127 if (platform != NULL)
1128 GLRO(dl_platform) = platform;
1130 #else
1131 GLRO(dl_hwcap) = 0;
1132 if (CPU_FEATURE_USABLE_P (cpu_features, SSE2))
1133 GLRO(dl_hwcap) |= HWCAP_X86_SSE2;
1135 if (CPU_FEATURES_ARCH_P (cpu_features, I686))
1136 GLRO(dl_platform) = "i686";
1137 else if (CPU_FEATURES_ARCH_P (cpu_features, I586))
1138 GLRO(dl_platform) = "i586";
1139 #endif
1141 #if CET_ENABLED
1142 TUNABLE_GET (x86_ibt, tunable_val_t *,
1143 TUNABLE_CALLBACK (set_x86_ibt));
1144 TUNABLE_GET (x86_shstk, tunable_val_t *,
1145 TUNABLE_CALLBACK (set_x86_shstk));
1146 #endif
1148 #if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
1149 if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
1150 #endif
1152 if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
1154 #ifdef __x86_64__
1155 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec;
1156 #endif
1157 #ifdef SHARED
1158 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec;
1159 #endif
1161 else
1163 #ifdef __x86_64__
1164 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave;
1165 #endif
1166 #ifdef SHARED
1167 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave;
1168 #endif
1171 #if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
1172 else
1174 # ifdef __x86_64__
1175 GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
1176 # ifdef SHARED
1177 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
1178 # endif
1179 # else
1180 # ifdef SHARED
1181 if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
1182 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
1183 else
1184 GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
1185 # endif
1186 # endif
1188 #endif
1190 #ifdef SHARED
1191 # ifdef __x86_64__
1192 TUNABLE_GET (plt_rewrite, tunable_val_t *,
1193 TUNABLE_CALLBACK (set_plt_rewrite));
1194 # endif
1195 #else
1196 /* NB: In libc.a, call init_cacheinfo. */
1197 init_cacheinfo ();
1198 #endif