MIPS: Regenerate ulps
[glibc.git] / sysdeps / x86 / dl-diagnostics-cpu.c
blob49eeb5f70a2d4eacd6a84eae3228c940f2afe39c
1 /* Print CPU diagnostics data in ld.so. x86 version.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
19 #include <dl-diagnostics.h>
21 #include <array_length.h>
22 #include <cpu-features.h>
23 #include <cpuid.h>
24 #include <dl-iterate_cpu.h>
25 #include <ldsodefs.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #include <sysdep.h>
30 /* The generic CPUID dumping code. */
31 static void _dl_diagnostics_cpuid (void);
33 static void
34 print_cpu_features_value (const char *label, uint64_t value)
36 _dl_printf ("x86.cpu_features.");
37 _dl_diagnostics_print_labeled_value (label, value);
40 static void
41 print_cpu_feature_internal (unsigned int index, const char *kind,
42 unsigned int reg, uint32_t value)
44 _dl_printf ("x86.cpu_features.features[0x%x].%s[0x%x]=0x%x\n",
45 index, kind, reg, value);
48 static void
49 print_cpu_feature_preferred (const char *label, unsigned int flag)
51 _dl_printf("x86.cpu_features.preferred.%s=0x%x\n", label, flag);
54 void
55 _dl_diagnostics_cpu (void)
57 const struct cpu_features *cpu_features = __get_cpu_features ();
59 print_cpu_features_value ("basic.kind", cpu_features->basic.kind);
60 print_cpu_features_value ("basic.max_cpuid", cpu_features->basic.max_cpuid);
61 print_cpu_features_value ("basic.family", cpu_features->basic.family);
62 print_cpu_features_value ("basic.model", cpu_features->basic.model);
63 print_cpu_features_value ("basic.stepping", cpu_features->basic.stepping);
65 for (unsigned int index = 0; index < CPUID_INDEX_MAX; ++index)
67 /* The index values are part of the ABI via
68 <sys/platform/x86.h>, so translating them to strings is not
69 necessary. */
70 for (unsigned int reg = 0; reg < 4; ++reg)
71 print_cpu_feature_internal
72 (index, "cpuid", reg,
73 cpu_features->features[index].cpuid_array[reg]);
74 for (unsigned int reg = 0; reg < 4; ++reg)
75 print_cpu_feature_internal
76 (index, "active", reg,
77 cpu_features->features[index].active_array[reg]);
80 /* The preferred indicators are not part of the ABI and need to be
81 translated. */
82 #define BIT(x) \
83 print_cpu_feature_preferred (#x, CPU_FEATURE_PREFERRED_P (cpu_features, x));
84 #include "cpu-features-preferred_feature_index_1.def"
85 #undef BIT
87 print_cpu_features_value ("isa_1", cpu_features->isa_1);
88 print_cpu_features_value ("xsave_state_size",
89 cpu_features->xsave_state_size);
90 print_cpu_features_value ("xsave_state_full_size",
91 cpu_features->xsave_state_full_size);
92 print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size);
93 print_cpu_features_value ("shared_cache_size",
94 cpu_features->shared_cache_size);
95 print_cpu_features_value ("non_temporal_threshold",
96 cpu_features->non_temporal_threshold);
97 print_cpu_features_value ("memset_non_temporal_threshold",
98 cpu_features->memset_non_temporal_threshold);
99 print_cpu_features_value ("rep_movsb_threshold",
100 cpu_features->rep_movsb_threshold);
101 print_cpu_features_value ("rep_movsb_stop_threshold",
102 cpu_features->rep_movsb_stop_threshold);
103 print_cpu_features_value ("rep_stosb_threshold",
104 cpu_features->rep_stosb_threshold);
105 print_cpu_features_value ("level1_icache_size",
106 cpu_features->level1_icache_size);
107 print_cpu_features_value ("level1_icache_linesize",
108 cpu_features->level1_icache_linesize);
109 print_cpu_features_value ("level1_dcache_size",
110 cpu_features->level1_dcache_size);
111 print_cpu_features_value ("level1_dcache_assoc",
112 cpu_features->level1_dcache_assoc);
113 print_cpu_features_value ("level1_dcache_linesize",
114 cpu_features->level1_dcache_linesize);
115 print_cpu_features_value ("level2_cache_size",
116 cpu_features->level2_cache_size);
117 print_cpu_features_value ("level2_cache_assoc",
118 cpu_features->level2_cache_assoc);
119 print_cpu_features_value ("level2_cache_linesize",
120 cpu_features->level2_cache_linesize);
121 print_cpu_features_value ("level3_cache_size",
122 cpu_features->level3_cache_size);
123 print_cpu_features_value ("level3_cache_assoc",
124 cpu_features->level3_cache_assoc);
125 print_cpu_features_value ("level3_cache_linesize",
126 cpu_features->level3_cache_linesize);
127 print_cpu_features_value ("level4_cache_size",
128 cpu_features->level4_cache_size);
129 print_cpu_features_value ("cachesize_non_temporal_divisor",
130 cpu_features->cachesize_non_temporal_divisor);
131 _Static_assert (
132 offsetof (struct cpu_features, cachesize_non_temporal_divisor)
133 + sizeof (cpu_features->cachesize_non_temporal_divisor)
134 == sizeof (*cpu_features),
135 "last cpu_features field has been printed");
137 _dl_diagnostics_cpuid ();
140 /* The following code implements a generic CPUID dumper that tries to
141 gather CPUID data without knowing about CPUID implementation
142 details. */
144 /* Register arguments to CPUID. Multiple ECX subleaf values yielding
145 the same result are combined, to shorten the output. Both
146 identical matches (EAX to EDX are the same) and matches where EAX,
147 EBX, EDX, and ECX are equal except in the lower byte, which must
148 match the query ECX value. The latter is needed to compress ranges
149 on CPUs which preserve the lowest byte in ECX if an unknown leaf is
150 queried. */
151 struct cpuid_query
153 unsigned int eax;
154 unsigned ecx_first;
155 unsigned ecx_last;
156 bool ecx_preserves_query_byte;
159 /* Single integer value that can be used for sorting/ordering
160 comparisons. Uses Q->eax and Q->ecx_first only because ecx_last is
161 always greater than the previous ecx_first value and less than the
162 subsequent one. */
163 static inline unsigned long long int
164 cpuid_query_combined (struct cpuid_query *q)
166 /* ecx can be -1 (that is, ~0U). If this happens, this the only ecx
167 value for this eax value, so the ordering does not matter. */
168 return ((unsigned long long int) q->eax << 32) | (unsigned int) q->ecx_first;
171 /* Used for differential reporting of zero/non-zero values. */
172 static const struct cpuid_registers cpuid_registers_zero;
174 /* Register arguments to CPUID paired with the results that came back. */
175 struct cpuid_query_result
177 struct cpuid_query q;
178 struct cpuid_registers r;
181 /* During a first enumeration pass, we try to collect data for
182 cpuid_initial_subleaf_limit subleaves per leaf/EAX value. If we run
183 out of space, we try once more with applying the lower limit. */
184 enum { cpuid_main_leaf_limit = 128 };
185 enum { cpuid_initial_subleaf_limit = 512 };
186 enum { cpuid_subleaf_limit = 32 };
188 /* Offset of the extended leaf area. */
189 enum {cpuid_extended_leaf_offset = 0x80000000 };
191 /* Collected CPUID data. Everything is stored in a statically sized
192 array that is sized so that the second pass will collect some data
193 for all leaves, after the limit is applied. On the second pass,
194 ecx_limit is set to cpuid_subleaf_limit. */
195 struct cpuid_collected_data
197 unsigned int used;
198 unsigned int ecx_limit;
199 uint64_t xgetbv_ecx_0;
200 struct cpuid_query_result qr[cpuid_main_leaf_limit
201 * 2 * cpuid_subleaf_limit];
204 /* Fill in the result of a CPUID query. Returns true if there is
205 room, false if nothing could be stored. */
206 static bool
207 _dl_diagnostics_cpuid_store (struct cpuid_collected_data *ccd,
208 unsigned eax, int ecx)
210 if (ccd->used >= array_length (ccd->qr))
211 return false;
213 /* Tentatively fill in the next value. */
214 __cpuid_count (eax, ecx,
215 ccd->qr[ccd->used].r.eax,
216 ccd->qr[ccd->used].r.ebx,
217 ccd->qr[ccd->used].r.ecx,
218 ccd->qr[ccd->used].r.edx);
220 /* If the ECX subleaf is next subleaf after the previous one (for
221 the same leaf), and the values are the same, merge the result
222 with the already-stored one. Do this before skipping zero
223 leaves, which avoids artifiacts for ECX == 256 queries. */
224 if (ccd->used > 0
225 && ccd->qr[ccd->used - 1].q.eax == eax
226 && ccd->qr[ccd->used - 1].q.ecx_last + 1 == ecx)
228 /* Exact match of the previous result. Ignore the value of
229 ecx_preserves_query_byte if this is a singleton range so far
230 because we can treat ECX as fixed if the same value repeats. */
231 if ((!ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte
232 || (ccd->qr[ccd->used - 1].q.ecx_first
233 == ccd->qr[ccd->used - 1].q.ecx_last))
234 && memcmp (&ccd->qr[ccd->used - 1].r, &ccd->qr[ccd->used].r,
235 sizeof (ccd->qr[ccd->used].r)) == 0)
237 ccd->qr[ccd->used - 1].q.ecx_last = ecx;
238 /* ECX is now fixed because the same value has been observed
239 twice, even if we had a low-byte match before. */
240 ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte = false;
241 return true;
243 /* Match except for the low byte in ECX, which must match the
244 incoming ECX value. */
245 if (ccd->qr[ccd->used - 1].q.ecx_preserves_query_byte
246 && (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff)
247 && ccd->qr[ccd->used].r.eax == ccd->qr[ccd->used - 1].r.eax
248 && ccd->qr[ccd->used].r.ebx == ccd->qr[ccd->used - 1].r.ebx
249 && ((ccd->qr[ccd->used].r.ecx & 0xffffff00)
250 == (ccd->qr[ccd->used - 1].r.ecx & 0xffffff00))
251 && ccd->qr[ccd->used].r.edx == ccd->qr[ccd->used - 1].r.edx)
253 ccd->qr[ccd->used - 1].q.ecx_last = ecx;
254 return true;
258 /* Do not store zero results. All-zero values usually mean that the
259 subleaf is unsupported. */
260 if (ccd->qr[ccd->used].r.eax == 0
261 && ccd->qr[ccd->used].r.ebx == 0
262 && ccd->qr[ccd->used].r.ecx == 0
263 && ccd->qr[ccd->used].r.edx == 0)
264 return true;
266 /* The result needs to be stored. Fill in the query parameters and
267 consume the storage. */
268 ccd->qr[ccd->used].q.eax = eax;
269 ccd->qr[ccd->used].q.ecx_first = ecx;
270 ccd->qr[ccd->used].q.ecx_last = ecx;
271 ccd->qr[ccd->used].q.ecx_preserves_query_byte
272 = (ecx & 0xff) == (ccd->qr[ccd->used].r.ecx & 0xff);
273 ++ccd->used;
274 return true;
277 /* Collected CPUID data into *CCD. If LIMIT, apply per-leaf limits to
278 avoid exceeding the pre-allocated space. Return true if all data
279 could be stored, false if the retrying without a limit is
280 requested. */
281 static bool
282 _dl_diagnostics_cpuid_collect_1 (struct cpuid_collected_data *ccd, bool limit)
284 ccd->used = 0;
285 ccd->ecx_limit
286 = (limit ? cpuid_subleaf_limit : cpuid_initial_subleaf_limit) - 1;
287 _dl_diagnostics_cpuid_store (ccd, 0x00, 0x00);
288 if (ccd->used == 0)
289 /* CPUID reported all 0. Should not happen. */
290 return true;
291 unsigned int maximum_leaf = ccd->qr[0x00].r.eax;
292 if (limit && maximum_leaf >= cpuid_main_leaf_limit)
293 maximum_leaf = cpuid_main_leaf_limit - 1;
295 for (unsigned int eax = 1; eax <= maximum_leaf; ++eax)
297 for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx)
298 if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx))
299 return false;
302 if (!_dl_diagnostics_cpuid_store (ccd, cpuid_extended_leaf_offset, 0x00))
303 return false;
304 maximum_leaf = ccd->qr[ccd->used - 1].r.eax;
305 if (maximum_leaf < cpuid_extended_leaf_offset)
306 /* No extended CPUID information. */
307 return true;
308 if (limit
309 && maximum_leaf - cpuid_extended_leaf_offset >= cpuid_main_leaf_limit)
310 maximum_leaf = cpuid_extended_leaf_offset + cpuid_main_leaf_limit - 1;
311 for (unsigned int eax = cpuid_extended_leaf_offset + 1;
312 eax <= maximum_leaf; ++eax)
314 for (unsigned int ecx = 0; ecx <= ccd->ecx_limit; ++ecx)
315 if (!_dl_diagnostics_cpuid_store (ccd, eax, ecx))
316 return false;
318 return true;
321 /* Call _dl_diagnostics_cpuid_collect_1 twice if necessary, the
322 second time with the limit applied. */
323 static void
324 _dl_diagnostics_cpuid_collect (struct cpuid_collected_data *ccd)
326 if (!_dl_diagnostics_cpuid_collect_1 (ccd, false))
327 _dl_diagnostics_cpuid_collect_1 (ccd, true);
329 /* Re-use the result of the official feature probing here. */
330 const struct cpu_features *cpu_features = __get_cpu_features ();
331 if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
333 unsigned int xcrlow;
334 unsigned int xcrhigh;
335 asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
336 ccd->xgetbv_ecx_0 = ((uint64_t) xcrhigh << 32) + xcrlow;
338 else
339 ccd->xgetbv_ecx_0 = 0;
342 /* Print a CPUID register value (passed as REG_VALUE) if it differs
343 from the expected REG_REFERENCE value. PROCESSOR_INDEX is the
344 process sequence number (always starting at zero; not a kernel ID). */
345 static void
346 _dl_diagnostics_cpuid_print_reg (unsigned int processor_index,
347 const struct cpuid_query *q,
348 const char *reg_label, unsigned int reg_value,
349 bool subleaf)
351 if (subleaf)
352 _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
353 ".ecx[0x%x].%s=0x%x\n",
354 processor_index, q->eax, q->ecx_first, reg_label, reg_value);
355 else
356 _dl_printf ("x86.processor[0x%x].cpuid.eax[0x%x].%s=0x%x\n",
357 processor_index, q->eax, reg_label, reg_value);
360 /* Print CPUID result values in *RESULT for the query in
361 CCD->qr[CCD_IDX]. PROCESSOR_INDEX is the process sequence number
362 (always starting at zero; not a kernel ID). */
363 static void
364 _dl_diagnostics_cpuid_print_query (unsigned int processor_index,
365 struct cpuid_collected_data *ccd,
366 unsigned int ccd_idx,
367 const struct cpuid_registers *result)
369 /* Treat this as a value if subleaves if ecx isn't zero (maybe
370 within the [ecx_fist, ecx_last] range), or if eax matches its
371 neighbors. If the range is [0, ecx_limit], then the subleaves
372 are not distinct (independently of ecx_preserves_query_byte),
373 so do not report them separately. */
374 struct cpuid_query *q = &ccd->qr[ccd_idx].q;
375 bool subleaf = (q->ecx_first > 0
376 || (q->ecx_first != q->ecx_last
377 && !(q->ecx_first == 0 && q->ecx_last == ccd->ecx_limit))
378 || (ccd_idx > 0 && q->eax == ccd->qr[ccd_idx - 1].q.eax)
379 || (ccd_idx + 1 < ccd->used
380 && q->eax == ccd->qr[ccd_idx + 1].q.eax));
381 _dl_diagnostics_cpuid_print_reg (processor_index, q, "eax", result->eax,
382 subleaf);
383 _dl_diagnostics_cpuid_print_reg (processor_index, q, "ebx", result->ebx,
384 subleaf);
385 _dl_diagnostics_cpuid_print_reg (processor_index, q, "ecx", result->ecx,
386 subleaf);
387 _dl_diagnostics_cpuid_print_reg (processor_index, q, "edx", result->edx,
388 subleaf);
390 if (subleaf && q->ecx_first != q->ecx_last)
392 _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
393 ".ecx[0x%x].until_ecx=0x%x\n",
394 processor_index, q->eax, q->ecx_first, q->ecx_last);
395 if (q->ecx_preserves_query_byte)
396 _dl_printf ("x86.processor[0x%x].cpuid.subleaf_eax[0x%x]"
397 ".ecx[0x%x].ecx_query_mask=0xff\n",
398 processor_index, q->eax, q->ecx_first);
402 /* Perform differential reporting of the data in *CURRENT against
403 *BASE. REQUESTED_CPU is the kernel CPU ID the thread was
404 configured to run on, or -1 if no configuration was possible.
405 PROCESSOR_INDEX is the process sequence number (always starting at
406 zero; not a kernel ID). */
407 static void
408 _dl_diagnostics_cpuid_report (struct dl_iterate_cpu *dci,
409 struct cpuid_collected_data *current,
410 struct cpuid_collected_data *base)
412 if (dci->requested_cpu >= 0)
413 _dl_printf ("x86.processor[0x%x].requested=0x%x\n",
414 dci->processor_index, dci->requested_cpu);
415 if (dci->actual_cpu >= 0)
416 _dl_printf ("x86.processor[0x%x].observed=0x%x\n",
417 dci->processor_index, dci->actual_cpu);
418 if (dci->actual_node >= 0)
419 _dl_printf ("x86.processor[0x%x].observed_node=0x%x\n",
420 dci->processor_index, dci->actual_node);
422 _dl_printf ("x86.processor[0x%x].cpuid_leaves=0x%x\n",
423 dci->processor_index, current->used);
424 _dl_printf ("x86.processor[0x%x].ecx_limit=0x%x\n",
425 dci->processor_index, current->ecx_limit);
427 unsigned int base_idx = 0;
428 for (unsigned int current_idx = 0; current_idx < current->used;
429 ++current_idx)
431 /* Report missing data on the current CPU as 0. */
432 unsigned long long int current_query
433 = cpuid_query_combined (&current->qr[current_idx].q);
434 while (base_idx < base->used
435 && cpuid_query_combined (&base->qr[base_idx].q) < current_query)
437 _dl_diagnostics_cpuid_print_query (dci->processor_index,
438 base, base_idx,
439 &cpuid_registers_zero);
440 ++base_idx;
443 if (base_idx < base->used
444 && cpuid_query_combined (&base->qr[base_idx].q) == current_query)
446 _Static_assert (sizeof (struct cpuid_registers) == 4 * 4,
447 "no padding in struct cpuid_registers");
448 if (current->qr[current_idx].q.ecx_last
449 != base->qr[base_idx].q.ecx_last
450 || memcmp (&current->qr[current_idx].r,
451 &base->qr[base_idx].r,
452 sizeof (struct cpuid_registers)) != 0)
453 /* The ECX range or the values have changed. Show the
454 new values. */
455 _dl_diagnostics_cpuid_print_query (dci->processor_index,
456 current, current_idx,
457 &current->qr[current_idx].r);
458 ++base_idx;
460 else
461 /* Data is absent in the base reference. Report the new data. */
462 _dl_diagnostics_cpuid_print_query (dci->processor_index,
463 current, current_idx,
464 &current->qr[current_idx].r);
467 if (current->xgetbv_ecx_0 != base->xgetbv_ecx_0)
469 /* Re-use the 64-bit printing routine. */
470 _dl_printf ("x86.processor[0x%x].", dci->processor_index);
471 _dl_diagnostics_print_labeled_value ("xgetbv.ecx[0x0]",
472 current->xgetbv_ecx_0);
476 static void
477 _dl_diagnostics_cpuid (void)
479 #if !HAS_CPUID
480 /* CPUID is not supported, so there is nothing to dump. */
481 if (__get_cpuid_max (0, 0) == 0)
482 return;
483 #endif
485 struct dl_iterate_cpu dic;
486 _dl_iterate_cpu_init (&dic);
488 /* Two copies of the data are used. Data is written to the index
489 (dic.processor_index & 1). The previous version against which the
490 data dump is reported is at index !(processor_index & 1). */
491 struct cpuid_collected_data ccd[2];
493 /* The initial data is presumed to be all zero. Zero results are
494 not recorded. */
495 ccd[1].used = 0;
496 ccd[1].xgetbv_ecx_0 = 0;
498 /* Run the CPUID probing on a specific CPU. There are expected
499 differences for encoding core IDs and topology information in
500 CPUID output, but some firmware/kernel bugs also may result in
501 asymmetric data across CPUs in some cases. */
502 while (_dl_iterate_cpu_next (&dic))
504 _dl_diagnostics_cpuid_collect (&ccd[dic.processor_index & 1]);
505 _dl_diagnostics_cpuid_report
506 (&dic, &ccd[dic.processor_index & 1],
507 &ccd[!(dic.processor_index & 1)]);