1 /* Print CPU diagnostics data in ld.so. x86 version.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <dl-diagnostics.h>
21 #include <array_length.h>
22 #include <cpu-features.h>
24 #include <dl-iterate_cpu.h>
30 /* The generic CPUID dumping code. */
31 static void _dl_diagnostics_cpuid (void);
34 print_cpu_features_value (const char *label
, uint64_t value
)
36 _dl_printf ("x86.cpu_features.");
37 _dl_diagnostics_print_labeled_value (label
, value
);
41 print_cpu_feature_internal (unsigned int index
, const char *kind
,
42 unsigned int reg
, uint32_t value
)
44 _dl_printf ("x86.cpu_features.features[0x%x].%s[0x%x]=0x%x\n",
45 index
, kind
, reg
, value
);
49 print_cpu_feature_preferred (const char *label
, unsigned int flag
)
51 _dl_printf("x86.cpu_features.preferred.%s=0x%x\n", label
, flag
);
55 _dl_diagnostics_cpu (void)
57 const struct cpu_features
*cpu_features
= __get_cpu_features ();
59 print_cpu_features_value ("basic.kind", cpu_features
->basic
.kind
);
60 print_cpu_features_value ("basic.max_cpuid", cpu_features
->basic
.max_cpuid
);
61 print_cpu_features_value ("basic.family", cpu_features
->basic
.family
);
62 print_cpu_features_value ("basic.model", cpu_features
->basic
.model
);
63 print_cpu_features_value ("basic.stepping", cpu_features
->basic
.stepping
);
65 for (unsigned int index
= 0; index
< CPUID_INDEX_MAX
; ++index
)
67 /* The index values are part of the ABI via
68 <sys/platform/x86.h>, so translating them to strings is not
70 for (unsigned int reg
= 0; reg
< 4; ++reg
)
71 print_cpu_feature_internal
73 cpu_features
->features
[index
].cpuid_array
[reg
]);
74 for (unsigned int reg
= 0; reg
< 4; ++reg
)
75 print_cpu_feature_internal
76 (index
, "active", reg
,
77 cpu_features
->features
[index
].active_array
[reg
]);
80 /* The preferred indicators are not part of the ABI and need to be
83 print_cpu_feature_preferred (#x, CPU_FEATURE_PREFERRED_P (cpu_features, x));
84 #include "cpu-features-preferred_feature_index_1.def"
87 print_cpu_features_value ("isa_1", cpu_features
->isa_1
);
88 print_cpu_features_value ("xsave_state_size",
89 cpu_features
->xsave_state_size
);
90 print_cpu_features_value ("xsave_state_full_size",
91 cpu_features
->xsave_state_full_size
);
92 print_cpu_features_value ("data_cache_size", cpu_features
->data_cache_size
);
93 print_cpu_features_value ("shared_cache_size",
94 cpu_features
->shared_cache_size
);
95 print_cpu_features_value ("non_temporal_threshold",
96 cpu_features
->non_temporal_threshold
);
97 print_cpu_features_value ("memset_non_temporal_threshold",
98 cpu_features
->memset_non_temporal_threshold
);
99 print_cpu_features_value ("rep_movsb_threshold",
100 cpu_features
->rep_movsb_threshold
);
101 print_cpu_features_value ("rep_movsb_stop_threshold",
102 cpu_features
->rep_movsb_stop_threshold
);
103 print_cpu_features_value ("rep_stosb_threshold",
104 cpu_features
->rep_stosb_threshold
);
105 print_cpu_features_value ("level1_icache_size",
106 cpu_features
->level1_icache_size
);
107 print_cpu_features_value ("level1_icache_linesize",
108 cpu_features
->level1_icache_linesize
);
109 print_cpu_features_value ("level1_dcache_size",
110 cpu_features
->level1_dcache_size
);
111 print_cpu_features_value ("level1_dcache_assoc",
112 cpu_features
->level1_dcache_assoc
);
113 print_cpu_features_value ("level1_dcache_linesize",
114 cpu_features
->level1_dcache_linesize
);
115 print_cpu_features_value ("level2_cache_size",
116 cpu_features
->level2_cache_size
);
117 print_cpu_features_value ("level2_cache_assoc",
118 cpu_features
->level2_cache_assoc
);
119 print_cpu_features_value ("level2_cache_linesize",
120 cpu_features
->level2_cache_linesize
);
121 print_cpu_features_value ("level3_cache_size",
122 cpu_features
->level3_cache_size
);
123 print_cpu_features_value ("level3_cache_assoc",
124 cpu_features
->level3_cache_assoc
);
125 print_cpu_features_value ("level3_cache_linesize",
126 cpu_features
->level3_cache_linesize
);
127 print_cpu_features_value ("level4_cache_size",
128 cpu_features
->level4_cache_size
);
129 print_cpu_features_value ("cachesize_non_temporal_divisor",
130 cpu_features
->cachesize_non_temporal_divisor
);
132 offsetof (struct cpu_features
, cachesize_non_temporal_divisor
)
133 + sizeof (cpu_features
->cachesize_non_temporal_divisor
)
134 == sizeof (*cpu_features
),
135 "last cpu_features field has been printed");
137 _dl_diagnostics_cpuid ();
140 /* The following code implements a generic CPUID dumper that tries to
141 gather CPUID data without knowing about CPUID implementation
144 /* Register arguments to CPUID. Multiple ECX subleaf values yielding
145 the same result are combined, to shorten the output. Both
146 identical matches (EAX to EDX are the same) and matches where EAX,
147 EBX, EDX, and ECX are equal except in the lower byte, which must
148 match the query ECX value. The latter is needed to compress ranges
149 on CPUs which preserve the lowest byte in ECX if an unknown leaf is
156 bool ecx_preserves_query_byte
;
159 /* Single integer value that can be used for sorting/ordering
160 comparisons. Uses Q->eax and Q->ecx_first only because ecx_last is
161 always greater than the previous ecx_first value and less than the
163 static inline unsigned long long int
164 cpuid_query_combined (struct cpuid_query
*q
)
166 /* ecx can be -1 (that is, ~0U). If this happens, this the only ecx
167 value for this eax value, so the ordering does not matter. */
168 return ((unsigned long long int) q
->eax
<< 32) | (unsigned int) q
->ecx_first
;
171 /* Used for differential reporting of zero/non-zero values. */
172 static const struct cpuid_registers cpuid_registers_zero
;
174 /* Register arguments to CPUID paired with the results that came back. */
175 struct cpuid_query_result
177 struct cpuid_query q
;
178 struct cpuid_registers r
;
181 /* During a first enumeration pass, we try to collect data for
182 cpuid_initial_subleaf_limit subleaves per leaf/EAX value. If we run
183 out of space, we try once more with applying the lower limit. */
184 enum { cpuid_main_leaf_limit
= 128 };
185 enum { cpuid_initial_subleaf_limit
= 512 };
186 enum { cpuid_subleaf_limit
= 32 };
188 /* Offset of the extended leaf area. */
189 enum {cpuid_extended_leaf_offset
= 0x80000000 };
191 /* Collected CPUID data. Everything is stored in a statically sized
192 array that is sized so that the second pass will collect some data
193 for all leaves, after the limit is applied. On the second pass,
194 ecx_limit is set to cpuid_subleaf_limit. */
195 struct cpuid_collected_data
198 unsigned int ecx_limit
;
199 uint64_t xgetbv_ecx_0
;
200 struct cpuid_query_result qr
[cpuid_main_leaf_limit
201 * 2 * cpuid_subleaf_limit
];
204 /* Fill in the result of a CPUID query. Returns true if there is
205 room, false if nothing could be stored. */
207 _dl_diagnostics_cpuid_store (struct cpuid_collected_data
*ccd
,
208 unsigned eax
, int ecx
)
210 if (ccd
->used
>= array_length (ccd
->qr
))
213 /* Tentatively fill in the next value. */
214 __cpuid_count (eax
, ecx
,
215 ccd
->qr
[ccd
->used
].r
.eax
,
216 ccd
->qr
[ccd
->used
].r
.ebx
,
217 ccd
->qr
[ccd
->used
].r
.ecx
,
218 ccd
->qr
[ccd
->used
].r
.edx
);
220 /* If the ECX subleaf is next subleaf after the previous one (for
221 the same leaf), and the values are the same, merge the result
222 with the already-stored one. Do this before skipping zero
223 leaves, which avoids artifiacts for ECX == 256 queries. */
225 && ccd
->qr
[ccd
->used
- 1].q
.eax
== eax
226 && ccd
->qr
[ccd
->used
- 1].q
.ecx_last
+ 1 == ecx
)
228 /* Exact match of the previous result. Ignore the value of
229 ecx_preserves_query_byte if this is a singleton range so far
230 because we can treat ECX as fixed if the same value repeats. */
231 if ((!ccd
->qr
[ccd
->used
- 1].q
.ecx_preserves_query_byte
232 || (ccd
->qr
[ccd
->used
- 1].q
.ecx_first
233 == ccd
->qr
[ccd
->used
- 1].q
.ecx_last
))
234 && memcmp (&ccd
->qr
[ccd
->used
- 1].r
, &ccd
->qr
[ccd
->used
].r
,
235 sizeof (ccd
->qr
[ccd
->used
].r
)) == 0)
237 ccd
->qr
[ccd
->used
- 1].q
.ecx_last
= ecx
;
238 /* ECX is now fixed because the same value has been observed
239 twice, even if we had a low-byte match before. */
240 ccd
->qr
[ccd
->used
- 1].q
.ecx_preserves_query_byte
= false;
243 /* Match except for the low byte in ECX, which must match the
244 incoming ECX value. */
245 if (ccd
->qr
[ccd
->used
- 1].q
.ecx_preserves_query_byte
246 && (ecx
& 0xff) == (ccd
->qr
[ccd
->used
].r
.ecx
& 0xff)
247 && ccd
->qr
[ccd
->used
].r
.eax
== ccd
->qr
[ccd
->used
- 1].r
.eax
248 && ccd
->qr
[ccd
->used
].r
.ebx
== ccd
->qr
[ccd
->used
- 1].r
.ebx
249 && ((ccd
->qr
[ccd
->used
].r
.ecx
& 0xffffff00)
250 == (ccd
->qr
[ccd
->used
- 1].r
.ecx
& 0xffffff00))
251 && ccd
->qr
[ccd
->used
].r
.edx
== ccd
->qr
[ccd
->used
- 1].r
.edx
)
253 ccd
->qr
[ccd
->used
- 1].q
.ecx_last
= ecx
;
258 /* Do not store zero results. All-zero values usually mean that the
259 subleaf is unsupported. */
260 if (ccd
->qr
[ccd
->used
].r
.eax
== 0
261 && ccd
->qr
[ccd
->used
].r
.ebx
== 0
262 && ccd
->qr
[ccd
->used
].r
.ecx
== 0
263 && ccd
->qr
[ccd
->used
].r
.edx
== 0)
266 /* The result needs to be stored. Fill in the query parameters and
267 consume the storage. */
268 ccd
->qr
[ccd
->used
].q
.eax
= eax
;
269 ccd
->qr
[ccd
->used
].q
.ecx_first
= ecx
;
270 ccd
->qr
[ccd
->used
].q
.ecx_last
= ecx
;
271 ccd
->qr
[ccd
->used
].q
.ecx_preserves_query_byte
272 = (ecx
& 0xff) == (ccd
->qr
[ccd
->used
].r
.ecx
& 0xff);
277 /* Collected CPUID data into *CCD. If LIMIT, apply per-leaf limits to
278 avoid exceeding the pre-allocated space. Return true if all data
279 could be stored, false if the retrying without a limit is
282 _dl_diagnostics_cpuid_collect_1 (struct cpuid_collected_data
*ccd
, bool limit
)
286 = (limit
? cpuid_subleaf_limit
: cpuid_initial_subleaf_limit
) - 1;
287 _dl_diagnostics_cpuid_store (ccd
, 0x00, 0x00);
289 /* CPUID reported all 0. Should not happen. */
291 unsigned int maximum_leaf
= ccd
->qr
[0x00].r
.eax
;
292 if (limit
&& maximum_leaf
>= cpuid_main_leaf_limit
)
293 maximum_leaf
= cpuid_main_leaf_limit
- 1;
295 for (unsigned int eax
= 1; eax
<= maximum_leaf
; ++eax
)
297 for (unsigned int ecx
= 0; ecx
<= ccd
->ecx_limit
; ++ecx
)
298 if (!_dl_diagnostics_cpuid_store (ccd
, eax
, ecx
))
302 if (!_dl_diagnostics_cpuid_store (ccd
, cpuid_extended_leaf_offset
, 0x00))
304 maximum_leaf
= ccd
->qr
[ccd
->used
- 1].r
.eax
;
305 if (maximum_leaf
< cpuid_extended_leaf_offset
)
306 /* No extended CPUID information. */
309 && maximum_leaf
- cpuid_extended_leaf_offset
>= cpuid_main_leaf_limit
)
310 maximum_leaf
= cpuid_extended_leaf_offset
+ cpuid_main_leaf_limit
- 1;
311 for (unsigned int eax
= cpuid_extended_leaf_offset
+ 1;
312 eax
<= maximum_leaf
; ++eax
)
314 for (unsigned int ecx
= 0; ecx
<= ccd
->ecx_limit
; ++ecx
)
315 if (!_dl_diagnostics_cpuid_store (ccd
, eax
, ecx
))
321 /* Call _dl_diagnostics_cpuid_collect_1 twice if necessary, the
322 second time with the limit applied. */
324 _dl_diagnostics_cpuid_collect (struct cpuid_collected_data
*ccd
)
326 if (!_dl_diagnostics_cpuid_collect_1 (ccd
, false))
327 _dl_diagnostics_cpuid_collect_1 (ccd
, true);
329 /* Re-use the result of the official feature probing here. */
330 const struct cpu_features
*cpu_features
= __get_cpu_features ();
331 if (CPU_FEATURES_CPU_P (cpu_features
, OSXSAVE
))
334 unsigned int xcrhigh
;
335 asm ("xgetbv" : "=a" (xcrlow
), "=d" (xcrhigh
) : "c" (0));
336 ccd
->xgetbv_ecx_0
= ((uint64_t) xcrhigh
<< 32) + xcrlow
;
339 ccd
->xgetbv_ecx_0
= 0;
342 /* Print a CPUID register value (passed as REG_VALUE) if it differs
343 from the expected REG_REFERENCE value. PROCESSOR_INDEX is the
344 process sequence number (always starting at zero; not a kernel ID). */
346 _dl_diagnostics_cpuid_print_reg (unsigned int processor_index
,
347 const struct cpuid_query
*q
,
348 const char *reg_label
, unsigned int reg_value
,
352 _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
353 ".ecx[0x%x].%s=0x%x\n",
354 processor_index
, q
->eax
, q
->ecx_first
, reg_label
, reg_value
);
356 _dl_printf ("x86.processor[0x%x].cpuid.eax[0x%x].%s=0x%x\n",
357 processor_index
, q
->eax
, reg_label
, reg_value
);
360 /* Print CPUID result values in *RESULT for the query in
361 CCD->qr[CCD_IDX]. PROCESSOR_INDEX is the process sequence number
362 (always starting at zero; not a kernel ID). */
364 _dl_diagnostics_cpuid_print_query (unsigned int processor_index
,
365 struct cpuid_collected_data
*ccd
,
366 unsigned int ccd_idx
,
367 const struct cpuid_registers
*result
)
369 /* Treat this as a value if subleaves if ecx isn't zero (maybe
370 within the [ecx_fist, ecx_last] range), or if eax matches its
371 neighbors. If the range is [0, ecx_limit], then the subleaves
372 are not distinct (independently of ecx_preserves_query_byte),
373 so do not report them separately. */
374 struct cpuid_query
*q
= &ccd
->qr
[ccd_idx
].q
;
375 bool subleaf
= (q
->ecx_first
> 0
376 || (q
->ecx_first
!= q
->ecx_last
377 && !(q
->ecx_first
== 0 && q
->ecx_last
== ccd
->ecx_limit
))
378 || (ccd_idx
> 0 && q
->eax
== ccd
->qr
[ccd_idx
- 1].q
.eax
)
379 || (ccd_idx
+ 1 < ccd
->used
380 && q
->eax
== ccd
->qr
[ccd_idx
+ 1].q
.eax
));
381 _dl_diagnostics_cpuid_print_reg (processor_index
, q
, "eax", result
->eax
,
383 _dl_diagnostics_cpuid_print_reg (processor_index
, q
, "ebx", result
->ebx
,
385 _dl_diagnostics_cpuid_print_reg (processor_index
, q
, "ecx", result
->ecx
,
387 _dl_diagnostics_cpuid_print_reg (processor_index
, q
, "edx", result
->edx
,
390 if (subleaf
&& q
->ecx_first
!= q
->ecx_last
)
392 _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
393 ".ecx[0x%x].until_ecx=0x%x\n",
394 processor_index
, q
->eax
, q
->ecx_first
, q
->ecx_last
);
395 if (q
->ecx_preserves_query_byte
)
396 _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
397 ".ecx[0x%x].ecx_query_mask=0xff\n",
398 processor_index
, q
->eax
, q
->ecx_first
);
402 /* Perform differential reporting of the data in *CURRENT against
403 *BASE. REQUESTED_CPU is the kernel CPU ID the thread was
404 configured to run on, or -1 if no configuration was possible.
405 PROCESSOR_INDEX is the process sequence number (always starting at
406 zero; not a kernel ID). */
408 _dl_diagnostics_cpuid_report (struct dl_iterate_cpu
*dci
,
409 struct cpuid_collected_data
*current
,
410 struct cpuid_collected_data
*base
)
412 if (dci
->requested_cpu
>= 0)
413 _dl_printf ("x86.processor[0x%x].requested=0x%x\n",
414 dci
->processor_index
, dci
->requested_cpu
);
415 if (dci
->actual_cpu
>= 0)
416 _dl_printf ("x86.processor[0x%x].observed=0x%x\n",
417 dci
->processor_index
, dci
->actual_cpu
);
418 if (dci
->actual_node
>= 0)
419 _dl_printf ("x86.processor[0x%x].observed_node=0x%x\n",
420 dci
->processor_index
, dci
->actual_node
);
422 _dl_printf ("x86.processor[0x%x].cpuid_leaves=0x%x\n",
423 dci
->processor_index
, current
->used
);
424 _dl_printf ("x86.processor[0x%x].ecx_limit=0x%x\n",
425 dci
->processor_index
, current
->ecx_limit
);
427 unsigned int base_idx
= 0;
428 for (unsigned int current_idx
= 0; current_idx
< current
->used
;
431 /* Report missing data on the current CPU as 0. */
432 unsigned long long int current_query
433 = cpuid_query_combined (¤t
->qr
[current_idx
].q
);
434 while (base_idx
< base
->used
435 && cpuid_query_combined (&base
->qr
[base_idx
].q
) < current_query
)
437 _dl_diagnostics_cpuid_print_query (dci
->processor_index
,
439 &cpuid_registers_zero
);
443 if (base_idx
< base
->used
444 && cpuid_query_combined (&base
->qr
[base_idx
].q
) == current_query
)
446 _Static_assert (sizeof (struct cpuid_registers
) == 4 * 4,
447 "no padding in struct cpuid_registers");
448 if (current
->qr
[current_idx
].q
.ecx_last
449 != base
->qr
[base_idx
].q
.ecx_last
450 || memcmp (¤t
->qr
[current_idx
].r
,
451 &base
->qr
[base_idx
].r
,
452 sizeof (struct cpuid_registers
)) != 0)
453 /* The ECX range or the values have changed. Show the
455 _dl_diagnostics_cpuid_print_query (dci
->processor_index
,
456 current
, current_idx
,
457 ¤t
->qr
[current_idx
].r
);
461 /* Data is absent in the base reference. Report the new data. */
462 _dl_diagnostics_cpuid_print_query (dci
->processor_index
,
463 current
, current_idx
,
464 ¤t
->qr
[current_idx
].r
);
467 if (current
->xgetbv_ecx_0
!= base
->xgetbv_ecx_0
)
469 /* Re-use the 64-bit printing routine. */
470 _dl_printf ("x86.processor[0x%x].", dci
->processor_index
);
471 _dl_diagnostics_print_labeled_value ("xgetbv.ecx[0x0]",
472 current
->xgetbv_ecx_0
);
477 _dl_diagnostics_cpuid (void)
480 /* CPUID is not supported, so there is nothing to dump. */
481 if (__get_cpuid_max (0, 0) == 0)
485 struct dl_iterate_cpu dic
;
486 _dl_iterate_cpu_init (&dic
);
488 /* Two copies of the data are used. Data is written to the index
489 (dic.processor_index & 1). The previous version against which the
490 data dump is reported is at index !(processor_index & 1). */
491 struct cpuid_collected_data ccd
[2];
493 /* The initial data is presumed to be all zero. Zero results are
496 ccd
[1].xgetbv_ecx_0
= 0;
498 /* Run the CPUID probing on a specific CPU. There are expected
499 differences for encoding core IDs and topology information in
500 CPUID output, but some firmware/kernel bugs also may result in
501 asymmetric data across CPUs in some cases. */
502 while (_dl_iterate_cpu_next (&dic
))
504 _dl_diagnostics_cpuid_collect (&ccd
[dic
.processor_index
& 1]);
505 _dl_diagnostics_cpuid_report
506 (&dic
, &ccd
[dic
.processor_index
& 1],
507 &ccd
[!(dic
.processor_index
& 1)]);