Update hwloc to v1.11.12
[charm.git] / contrib / hwloc / src / topology-x86.c
blobdc45c10c0aedb7462bf2aca54678ebf0c47a06e0
1 /*
2 * Copyright © 2010-2018 Inria. All rights reserved.
3 * Copyright © 2010-2013 Université Bordeaux
4 * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
5 * See COPYING in top-level directory.
8 * This backend is only used when the operating system does not export
9 * the necessary hardware topology information to user-space applications.
10 * Currently, only the FreeBSD backend relies on this x86 backend.
12 * Other backends such as Linux have their own way to retrieve various
13 * pieces of hardware topology information from the operating system
14 * on various architectures, without having to use this x86-specific code.
17 #include <private/autogen/config.h>
18 #include <hwloc.h>
19 #include <private/private.h>
20 #include <private/debug.h>
21 #include <private/misc.h>
23 #include <private/cpuid-x86.h>
25 #ifdef HAVE_VALGRIND_VALGRIND_H
26 #include <valgrind/valgrind.h>
27 #endif
29 struct hwloc_x86_backend_data_s {
30 unsigned nbprocs;
31 hwloc_bitmap_t apicid_set;
32 int apicid_unique;
33 int is_knl;
36 #define has_topoext(features) ((features)[6] & (1 << 22))
37 #define has_x2apic(features) ((features)[4] & (1 << 21))
39 struct cacheinfo {
40 unsigned type;
41 unsigned level;
42 unsigned nbthreads_sharing;
43 unsigned cacheid;
45 unsigned linesize;
46 unsigned linepart;
47 int inclusive;
48 int ways;
49 unsigned sets;
50 unsigned long size;
53 struct procinfo {
54 unsigned present;
55 unsigned apicid;
56 unsigned max_log_proc;
57 unsigned max_nbcores;
58 unsigned max_nbthreads;
59 unsigned packageid;
60 unsigned nodeid;
61 unsigned unitid;
62 unsigned logprocid;
63 unsigned threadid;
64 unsigned coreid;
65 unsigned *otherids;
66 unsigned levels;
67 unsigned numcaches;
68 struct cacheinfo *cache;
69 char cpuvendor[13];
70 char cpumodel[3*4*4+1];
71 unsigned cpustepping;
72 unsigned cpumodelnumber;
73 unsigned cpufamilynumber;
76 enum cpuid_type {
77 intel,
78 amd,
79 zhaoxin,
80 hygon,
81 unknown
84 static void fill_amd_cache(struct procinfo *infos, unsigned level, int type, unsigned cpuid)
86 struct cacheinfo *cache, *tmpcaches;
87 unsigned cachenum;
88 unsigned long size = 0;
90 if (level == 1)
91 size = ((cpuid >> 24)) << 10;
92 else if (level == 2)
93 size = ((cpuid >> 16)) << 10;
94 else if (level == 3)
95 size = ((cpuid >> 18)) << 19;
96 if (!size)
97 return;
99 tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache));
100 if (!tmpcaches)
101 /* failed to allocated, ignore that cache */
102 return;
103 infos->cache = tmpcaches;
104 cachenum = infos->numcaches++;
106 cache = &infos->cache[cachenum];
108 cache->type = type;
109 cache->level = level;
110 if (level <= 2)
111 cache->nbthreads_sharing = 1;
112 else
113 cache->nbthreads_sharing = infos->max_log_proc;
114 cache->linesize = cpuid & 0xff;
115 cache->linepart = 0;
116 cache->inclusive = 0; /* old AMD (K8-K10) supposed to have exclusive caches */
118 if (level == 1) {
119 cache->ways = (cpuid >> 16) & 0xff;
120 if (cache->ways == 0xff)
121 /* Fully associative */
122 cache->ways = -1;
123 } else {
124 static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 };
125 unsigned ways = (cpuid >> 12) & 0xf;
126 cache->ways = ways_tab[ways];
128 cache->size = size;
129 cache->sets = 0;
131 hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
134 /* Fetch information from the processor itself thanks to cpuid and store it in
135 * infos for summarize to analyze them globally */
136 static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type)
138 struct hwloc_x86_backend_data_s *data = backend->private_data;
139 unsigned eax, ebx, ecx = 0, edx;
140 unsigned cachenum;
141 struct cacheinfo *cache;
142 unsigned regs[4];
143 unsigned _model, _extendedmodel, _family, _extendedfamily;
145 infos->present = 1;
147 /* on return from this function, the following fields must be set in infos:
148 * packageid, nodeid, unitid, coreid, threadid, or -1
149 * apicid
150 * levels and levels slots in otherids[]
151 * numcaches and numcaches slots in caches[]
153 * max_log_proc, max_nbthreads, max_nbcores, logprocid
154 * are only used temporarily inside this function and its callees.
157 /* Get apicid, max_log_proc, packageid, logprocid from cpuid 0x01 */
158 eax = 0x01;
159 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
160 infos->apicid = ebx >> 24;
161 if (edx & (1 << 28))
162 infos->max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
163 else
164 infos->max_log_proc = 1;
165 hwloc_debug("APIC ID 0x%02x max_log_proc %u\n", infos->apicid, infos->max_log_proc);
166 infos->packageid = infos->apicid / infos->max_log_proc;
167 infos->logprocid = infos->apicid % infos->max_log_proc;
168 hwloc_debug("phys %u thread %u\n", infos->packageid, infos->logprocid);
170 /* Get cpu model/family/stepping numbers from same cpuid */
171 _model = (eax>>4) & 0xf;
172 _extendedmodel = (eax>>16) & 0xf;
173 _family = (eax>>8) & 0xf;
174 _extendedfamily = (eax>>20) & 0xff;
175 if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == hygon) && _family == 0xf) {
176 infos->cpufamilynumber = _family + _extendedfamily;
177 } else {
178 infos->cpufamilynumber = _family;
180 if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf))
181 || ((cpuid_type == amd || cpuid_type == hygon) && _family == 0xf)
182 || (cpuid_type == zhaoxin && (_family == 0x6 || _family == 0x7))) {
183 infos->cpumodelnumber = _model + (_extendedmodel << 4);
184 } else {
185 infos->cpumodelnumber = _model;
187 infos->cpustepping = eax & 0xf;
189 if (cpuid_type == intel && infos->cpufamilynumber == 0x6 &&
190 (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85))
191 data->is_knl = 1; /* KNM is the same as KNL */
193 /* Get cpu vendor string from cpuid 0x00 */
194 memset(regs, 0, sizeof(regs));
195 regs[0] = 0;
196 hwloc_x86_cpuid(&regs[0], &regs[1], &regs[3], &regs[2]);
197 memcpy(infos->cpuvendor, regs+1, 4*3);
198 /* infos was calloc'ed, already ends with \0 */
200 /* Get cpu model string from cpuid 0x80000002-4 */
201 if (highest_ext_cpuid >= 0x80000004) {
202 memset(regs, 0, sizeof(regs));
203 regs[0] = 0x80000002;
204 hwloc_x86_cpuid(&regs[0], &regs[1], &regs[2], &regs[3]);
205 memcpy(infos->cpumodel, regs, 4*4);
206 regs[0] = 0x80000003;
207 hwloc_x86_cpuid(&regs[0], &regs[1], &regs[2], &regs[3]);
208 memcpy(infos->cpumodel + 4*4, regs, 4*4);
209 regs[0] = 0x80000004;
210 hwloc_x86_cpuid(&regs[0], &regs[1], &regs[2], &regs[3]);
211 memcpy(infos->cpumodel + 4*4*2, regs, 4*4);
212 /* infos was calloc'ed, already ends with \0 */
215 /* Get core/thread information from cpuid 0x80000008
216 * (not supported on Intel)
218 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) {
219 unsigned coreidsize;
220 eax = 0x80000008;
221 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
222 coreidsize = (ecx >> 12) & 0xf;
223 hwloc_debug("core ID size: %u\n", coreidsize);
224 if (!coreidsize) {
225 infos->max_nbcores = (ecx & 0xff) + 1;
226 } else
227 infos->max_nbcores = 1 << coreidsize;
228 hwloc_debug("Thus max # of cores: %u\n", infos->max_nbcores);
229 /* Still no multithreaded AMD */
230 infos->max_nbthreads = 1 ;
231 hwloc_debug("and max # of threads: %u\n", infos->max_nbthreads);
232 /* The legacy max_log_proc is deprecated, it can be smaller than max_nbcores,
233 * which is the maximum number of cores that the processor could theoretically support
234 * (see "Multiple Core Calculation" in the AMD CPUID specification).
235 * Recompute packageid/logprocid/threadid/coreid accordingly.
237 infos->packageid = infos->apicid / infos->max_nbcores;
238 infos->logprocid = infos->apicid % infos->max_nbcores;
239 infos->threadid = infos->logprocid % infos->max_nbthreads;
240 infos->coreid = infos->logprocid / infos->max_nbthreads;
241 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
244 infos->numcaches = 0;
245 infos->cache = NULL;
247 /* Get apicid, nodeid, unitid from cpuid 0x8000001e
248 * and cache information from cpuid 0x8000001d
249 * (AMD topology extension)
251 if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) {
252 unsigned apic_id, node_id, nodes_per_proc;
254 /* the code below doesn't want any other cache yet */
255 assert(!infos->numcaches);
257 eax = 0x8000001e;
258 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
259 infos->apicid = apic_id = eax;
261 if (infos->cpufamilynumber == 0x16) {
262 /* ecx is reserved */
263 node_id = 0;
264 nodes_per_proc = 1;
265 } else {
266 /* AMD other families or Hygon family 18h */
267 node_id = ecx & 0xff;
268 nodes_per_proc = ((ecx >> 8) & 7) + 1;
270 infos->nodeid = node_id;
271 if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
272 || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) {
273 hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
276 if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
277 unsigned unit_id, cores_per_unit;
278 infos->unitid = unit_id = ebx & 0xff;
279 cores_per_unit = ((ebx >> 8) & 0xff) + 1;
280 hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
281 } else {
282 unsigned core_id, threads_per_core;
283 infos->coreid = core_id = ebx & 0xff;
284 threads_per_core = ((ebx >> 8) & 0xff) + 1;
285 hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
288 for (cachenum = 0; ; cachenum++) {
289 unsigned type;
290 eax = 0x8000001d;
291 ecx = cachenum;
292 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
293 type = eax & 0x1f;
294 if (type == 0)
295 break;
296 infos->numcaches++;
299 cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
300 if (cache) {
301 for (cachenum = 0; ; cachenum++) {
302 unsigned long linesize, linepart, ways, sets;
303 unsigned type;
304 eax = 0x8000001d;
305 ecx = cachenum;
306 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
308 type = eax & 0x1f;
310 if (type == 0)
311 break;
313 cache->type = type;
314 cache->level = (eax >> 5) & 0x7;
315 /* Note: actually number of cores */
316 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
318 cache->linesize = linesize = (ebx & 0xfff) + 1;
319 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
320 ways = ((ebx >> 22) & 0x3ff) + 1;
322 if (eax & (1 << 9))
323 /* Fully associative */
324 cache->ways = -1;
325 else
326 cache->ways = ways;
327 cache->sets = sets = ecx + 1;
328 cache->size = linesize * linepart * ways * sets;
329 cache->inclusive = edx & 0x2;
331 hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
333 cache++;
335 } else {
336 infos->numcaches = 0;
338 } else {
339 /* If there's no topoext,
340 * get cache information from cpuid 0x80000005 and 0x80000006
341 * (not supported on Intel)
343 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) {
344 eax = 0x80000005;
345 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
346 fill_amd_cache(infos, 1, 1, ecx); /* L1d */
347 fill_amd_cache(infos, 1, 2, edx); /* L1i */
349 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) {
350 eax = 0x80000006;
351 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
352 if (ecx & 0xf000)
353 /* This is actually supported on Intel but LinePerTag isn't returned in bits 8-11.
354 * Could be useful if some Intels (at least before Core micro-architecture)
355 * support this leaf without leaf 0x4.
357 fill_amd_cache(infos, 2, 3, ecx); /* L2u */
358 if (edx & 0xf000)
359 fill_amd_cache(infos, 3, 3, edx); /* L3u */
363 /* Get thread/core + cache information from cpuid 0x04
364 * (not supported on AMD)
366 if ((cpuid_type != amd && cpuid_type != hygon) && highest_cpuid >= 0x04) {
367 unsigned level;
368 struct cacheinfo *tmpcaches;
369 unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */
371 for (cachenum = 0; ; cachenum++) {
372 unsigned type;
373 eax = 0x04;
374 ecx = cachenum;
375 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
377 type = eax & 0x1f;
379 hwloc_debug("cache %u type %u\n", cachenum, type);
381 if (type == 0)
382 break;
383 level = (eax >> 5) & 0x7;
384 if (data->is_knl && level == 3)
385 /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
386 break;
387 infos->numcaches++;
389 if (!cachenum) {
390 /* by the way, get thread/core information from the first cache */
391 infos->max_nbcores = ((eax >> 26) & 0x3f) + 1;
392 infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores;
393 hwloc_debug("thus %u threads\n", infos->max_nbthreads);
394 infos->threadid = infos->logprocid % infos->max_nbthreads;
395 infos->coreid = infos->logprocid / infos->max_nbthreads;
396 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
400 tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache));
401 if (!tmpcaches) {
402 infos->numcaches = oldnumcaches;
403 } else {
404 infos->cache = tmpcaches;
405 cache = &infos->cache[oldnumcaches];
407 for (cachenum = 0; ; cachenum++) {
408 unsigned long linesize, linepart, ways, sets;
409 unsigned type;
410 eax = 0x04;
411 ecx = cachenum;
412 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
414 type = eax & 0x1f;
416 if (type == 0)
417 break;
418 level = (eax >> 5) & 0x7;
419 if (data->is_knl && level == 3)
420 /* KNL reports wrong L3 information (size always 0, cpuset always the entire machine, ignore it */
421 break;
423 cache->type = type;
424 cache->level = level;
425 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
427 cache->linesize = linesize = (ebx & 0xfff) + 1;
428 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
429 ways = ((ebx >> 22) & 0x3ff) + 1;
430 if (eax & (1 << 9))
431 /* Fully associative */
432 cache->ways = -1;
433 else
434 cache->ways = ways;
435 cache->sets = sets = ecx + 1;
436 cache->size = linesize * linepart * ways * sets;
437 cache->inclusive = edx & 0x2;
439 hwloc_debug("cache %u type %u L%u t%u c%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10);
441 cache++;
446 /* Get package/core/thread information from cpuid 0x0b
447 * (Intel x2APIC)
449 if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) {
450 unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
451 for (level = 0; ; level++) {
452 ecx = level;
453 eax = 0x0b;
454 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
455 if (!eax && !ebx)
456 break;
458 if (level) {
459 infos->otherids = malloc(level * sizeof(*infos->otherids));
460 if (infos->otherids) {
461 infos->levels = level;
462 for (level = 0; ; level++) {
463 ecx = level;
464 eax = 0x0b;
465 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
466 if (!eax && !ebx)
467 break;
468 apic_nextshift = eax & 0x1f;
469 apic_number = ebx & 0xffff;
470 apic_type = (ecx & 0xff00) >> 8;
471 apic_id = edx;
472 id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1);
473 hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
474 infos->apicid = apic_id;
475 infos->otherids[level] = UINT_MAX;
476 switch (apic_type) {
477 case 1:
478 infos->threadid = id;
479 break;
480 case 2:
481 infos->coreid = id;
482 break;
483 default:
484 hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
485 infos->otherids[level] = apic_id >> apic_shift;
486 break;
488 apic_shift = apic_nextshift;
490 infos->apicid = apic_id;
491 infos->packageid = apic_id >> apic_shift;
492 hwloc_debug("x2APIC remainder: %u\n", infos->packageid);
493 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
498 /* Now that we have all info, compute cacheids and apply quirks */
499 for (cachenum = 0; cachenum < infos->numcaches; cachenum++) {
500 cache = &infos->cache[cachenum];
502 /* default cacheid value */
503 cache->cacheid = infos->apicid / cache->nbthreads_sharing;
505 if (cpuid_type == amd) {
506 /* AMD quirks */
507 if (infos->cpufamilynumber == 0x17
508 && cache->level == 3 && cache->nbthreads_sharing == 6) {
509 /* AMD family 0x17 always shares L3 between 8 APIC ids,
510 * even when only 6 APIC ids are enabled and reported in nbthreads_sharing
511 * (on 24-core CPUs).
513 cache->cacheid = infos->apicid / 8;
515 } else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
516 && cache->level == 3
517 && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) {
518 /* Fix AMD family 0x10 model 0x9 (Magny-Cours) with 8 or 12 cores.
519 * The L3 (and its associativity) is actually split into two halves).
521 if (cache->nbthreads_sharing == 16)
522 cache->nbthreads_sharing = 12; /* nbthreads_sharing is a power of 2 but the processor actually has 8 or 12 cores */
523 cache->nbthreads_sharing /= 2;
524 cache->size /= 2;
525 if (cache->ways != -1)
526 cache->ways /= 2;
527 /* AMD Magny-Cours 12-cores processor reserve APIC ids as AAAAAABBBBBB....
528 * among first L3 (A), second L3 (B), and unexisting cores (.).
529 * On multi-socket servers, L3 in non-first sockets may have APIC id ranges
530 * such as [16-21] that are not aligned on multiple of nbthreads_sharing (6).
531 * That means, we can't just compare apicid/nbthreads_sharing to identify siblings.
533 cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
534 + 2 * (infos->apicid / infos->max_log_proc); /* add 2 caches per previous package */
536 } else if (infos->cpufamilynumber == 0x15
537 && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
538 && cache->level == 3 && cache->nbthreads_sharing == 6) {
539 /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
540 * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
542 cache->cacheid = (infos->apicid % infos->max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
543 + 2 * (infos->apicid / infos->max_log_proc); /* add 2 cache per previous package */
545 } else if (cpuid_type == hygon) {
546 if (infos->cpufamilynumber == 0x18
547 && cache->level == 3 && cache->nbthreads_sharing == 6) {
548 /* Hygon family 0x18 always shares L3 between 8 APIC ids,
549 * even when only 6 APIC ids are enabled and reported in nbthreads_sharing
550 * (on 24-core CPUs).
552 cache->cacheid = infos->apicid / 8;
557 if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
558 data->apicid_unique = 0;
559 else
560 hwloc_bitmap_set(data->apicid_set, infos->apicid);
563 static void
564 hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int nodup)
566 char number[8];
567 hwloc_obj_add_info_nodup(obj, "CPUVendor", info->cpuvendor, nodup);
568 snprintf(number, sizeof(number), "%u", info->cpufamilynumber);
569 hwloc_obj_add_info_nodup(obj, "CPUFamilyNumber", number, nodup);
570 snprintf(number, sizeof(number), "%u", info->cpumodelnumber);
571 hwloc_obj_add_info_nodup(obj, "CPUModelNumber", number, nodup);
572 if (info->cpumodel[0]) {
573 const char *c = info->cpumodel;
574 while (*c == ' ')
575 c++;
576 hwloc_obj_add_info_nodup(obj, "CPUModel", c, nodup);
578 snprintf(number, sizeof(number), "%u", info->cpustepping);
579 hwloc_obj_add_info_nodup(obj, "CPUStepping", number, nodup);
582 /* Analyse information stored in infos, and build/annotate topology levels accordingly */
583 static int summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
585 struct hwloc_topology *topology = backend->topology;
586 struct hwloc_x86_backend_data_s *data = backend->private_data;
587 unsigned nbprocs = data->nbprocs;
588 hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
589 unsigned i, j, l, level, type;
590 unsigned nbpackages = 0;
591 int one = -1;
592 unsigned next_group_depth = topology->next_group_depth;
593 int caches_added = 0;
594 hwloc_bitmap_t remaining_cpuset;
596 for (i = 0; i < nbprocs; i++)
597 if (infos[i].present) {
598 hwloc_bitmap_set(complete_cpuset, i);
599 one = i;
602 if (one == -1) {
603 hwloc_bitmap_free(complete_cpuset);
604 return 0;
607 remaining_cpuset = hwloc_bitmap_alloc();
609 /* Ideally, when fulldiscovery=0, we could add any object that doesn't exist yet.
610 * But what if the x86 and the native backends disagree because one is buggy? Which one to trust?
611 * Only annotate existing objects for now.
614 /* Look for packages */
615 if (fulldiscovery) {
616 hwloc_bitmap_t package_cpuset;
617 hwloc_obj_t package;
619 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
620 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
621 unsigned packageid = infos[i].packageid;
623 package_cpuset = hwloc_bitmap_alloc();
624 for (j = i; j < nbprocs; j++) {
625 if (infos[j].packageid == packageid) {
626 hwloc_bitmap_set(package_cpuset, j);
627 hwloc_bitmap_clr(remaining_cpuset, j);
630 package = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, packageid);
631 package->cpuset = package_cpuset;
633 hwloc_x86_add_cpuinfos(package, &infos[i], 0);
635 hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
636 packageid, package_cpuset);
637 hwloc_insert_object_by_cpuset(topology, package);
638 nbpackages++;
641 } else {
642 /* Annotate packages previously-existing packages */
643 hwloc_obj_t package = NULL;
644 int same = 1;
645 nbpackages = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PACKAGE);
646 /* check whether all packages have the same info */
647 for(i=1; i<nbprocs; i++) {
648 if (strcmp(infos[i].cpumodel, infos[0].cpumodel)) {
649 same = 0;
650 break;
653 /* now iterate over packages and annotate them */
654 while ((package = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PACKAGE, package)) != NULL) {
655 if (package->os_index == (unsigned) -1) {
656 /* try to fix the package OS index if unknown.
657 * FIXME: ideally, we should check all bits in case x86 and the native backend disagree.
659 for(i=0; i<nbprocs; i++) {
660 if (hwloc_bitmap_isset(package->cpuset, i)) {
661 package->os_index = infos[i].packageid;
662 break;
666 for(i=0; i<nbprocs; i++) {
667 /* if there's a single package, it's the one we want.
668 * if the index is ok, it's the one we want.
669 * if the index is unknown but all packages have the same id, that's fine
671 if (nbpackages == 1 || infos[i].packageid == package->os_index || (same && package->os_index == (unsigned) -1)) {
672 hwloc_x86_add_cpuinfos(package, &infos[i], 1);
673 break;
678 /* If there was no package, annotate the Machine instead */
679 if ((!nbpackages) && infos[0].cpumodel[0]) {
680 hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[0], 1);
683 /* Look for Numa nodes inside packages */
684 if (fulldiscovery) {
685 hwloc_bitmap_t node_cpuset;
686 hwloc_obj_t node;
688 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
689 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
690 unsigned packageid = infos[i].packageid;
691 unsigned nodeid = infos[i].nodeid;
693 if (nodeid == (unsigned)-1) {
694 hwloc_bitmap_clr(remaining_cpuset, i);
695 continue;
698 node_cpuset = hwloc_bitmap_alloc();
699 for (j = i; j < nbprocs; j++) {
700 if (infos[j].nodeid == (unsigned) -1) {
701 hwloc_bitmap_clr(remaining_cpuset, j);
702 continue;
705 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
706 hwloc_bitmap_set(node_cpuset, j);
707 hwloc_bitmap_clr(remaining_cpuset, j);
710 node = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, nodeid);
711 node->cpuset = node_cpuset;
712 node->nodeset = hwloc_bitmap_alloc();
713 hwloc_bitmap_set(node->nodeset, nodeid);
714 hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
715 nodeid, node_cpuset);
716 hwloc_insert_object_by_cpuset(topology, node);
720 if (fulldiscovery) {
721 hwloc_bitmap_t unit_cpuset;
722 hwloc_obj_t unit;
724 /* Look for Compute units inside packages */
725 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
726 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
727 unsigned packageid = infos[i].packageid;
728 unsigned unitid = infos[i].unitid;
730 if (unitid == (unsigned)-1) {
731 hwloc_bitmap_clr(remaining_cpuset, i);
732 continue;
735 unit_cpuset = hwloc_bitmap_alloc();
736 for (j = i; j < nbprocs; j++) {
737 if (infos[j].unitid == (unsigned) -1) {
738 hwloc_bitmap_clr(remaining_cpuset, j);
739 continue;
742 if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
743 hwloc_bitmap_set(unit_cpuset, j);
744 hwloc_bitmap_clr(remaining_cpuset, j);
747 unit = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unitid);
748 unit->cpuset = unit_cpuset;
749 hwloc_obj_add_info(unit, "Type", "ComputeUnit");
750 hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
751 unitid, unit_cpuset);
752 hwloc_insert_object_by_cpuset(topology, unit);
755 /* Look for unknown objects */
756 if (infos[one].otherids) {
757 for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
758 if (infos[one].otherids[level] != UINT_MAX) {
759 hwloc_bitmap_t unknown_cpuset;
760 hwloc_obj_t unknown_obj;
762 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
763 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
764 unsigned unknownid = infos[i].otherids[level];
766 unknown_cpuset = hwloc_bitmap_alloc();
767 for (j = i; j < nbprocs; j++) {
768 if (infos[j].otherids[level] == unknownid) {
769 hwloc_bitmap_set(unknown_cpuset, j);
770 hwloc_bitmap_clr(remaining_cpuset, j);
773 unknown_obj = hwloc_alloc_setup_object(HWLOC_OBJ_GROUP, unknownid);
774 unknown_obj->cpuset = unknown_cpuset;
775 unknown_obj->os_level = level;
776 unknown_obj->attr->group.depth = topology->next_group_depth + level;
777 if (next_group_depth <= topology->next_group_depth + level)
778 next_group_depth = topology->next_group_depth + level + 1;
779 hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n",
780 level, unknownid, unknown_cpuset);
781 hwloc_insert_object_by_cpuset(topology, unknown_obj);
788 /* Look for cores */
789 if (fulldiscovery) {
790 hwloc_bitmap_t core_cpuset;
791 hwloc_obj_t core;
793 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
794 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
795 unsigned packageid = infos[i].packageid;
796 unsigned nodeid = infos[i].nodeid;
797 unsigned coreid = infos[i].coreid;
799 if (coreid == (unsigned) -1) {
800 hwloc_bitmap_clr(remaining_cpuset, i);
801 continue;
804 core_cpuset = hwloc_bitmap_alloc();
805 for (j = i; j < nbprocs; j++) {
806 if (infos[j].coreid == (unsigned) -1) {
807 hwloc_bitmap_clr(remaining_cpuset, j);
808 continue;
811 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
812 hwloc_bitmap_set(core_cpuset, j);
813 hwloc_bitmap_clr(remaining_cpuset, j);
816 core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, coreid);
817 core->cpuset = core_cpuset;
818 hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
819 coreid, core_cpuset);
820 hwloc_insert_object_by_cpuset(topology, core);
824 /* Look for PUs */
825 if (fulldiscovery) {
826 hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
827 for (i=0; i<nbprocs; i++)
828 if(infos[i].present) { /* Only add present PU. We don't know if others actually exist */
829 struct hwloc_obj *obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, i);
830 obj->cpuset = hwloc_bitmap_alloc();
831 hwloc_bitmap_only(obj->cpuset, i);
832 hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
833 hwloc_insert_object_by_cpuset(topology, obj);
837 /* Look for caches */
838 /* First find max level */
839 level = 0;
840 for (i = 0; i < nbprocs; i++)
841 for (j = 0; j < infos[i].numcaches; j++)
842 if (infos[i].cache[j].level > level)
843 level = infos[i].cache[j].level;
844 while (level > 0) {
845 for (type = 1; type <= 3; type++) {
846 /* Look for caches of that type at level level */
848 hwloc_obj_t cache;
850 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
851 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
852 hwloc_bitmap_t puset;
853 int depth;
855 for (l = 0; l < infos[i].numcaches; l++) {
856 if (infos[i].cache[l].level == level && infos[i].cache[l].type == type)
857 break;
859 if (l == infos[i].numcaches) {
860 /* no cache Llevel of that type in i */
861 hwloc_bitmap_clr(remaining_cpuset, i);
862 continue;
865 puset = hwloc_bitmap_alloc();
866 hwloc_bitmap_set(puset, i);
867 depth = hwloc_get_cache_type_depth(topology, level,
868 type == 1 ? HWLOC_OBJ_CACHE_DATA : type == 2 ? HWLOC_OBJ_CACHE_INSTRUCTION : HWLOC_OBJ_CACHE_UNIFIED);
869 if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
870 cache = hwloc_get_next_obj_covering_cpuset_by_depth(topology, puset, depth, NULL);
871 else
872 cache = NULL;
873 hwloc_bitmap_free(puset);
875 if (cache) {
876 /* Found cache above that PU, annotate if no such attribute yet */
877 if (!hwloc_obj_get_info_by_name(cache, "Inclusive"))
878 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
879 hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset);
880 } else {
881 /* Add the missing cache */
882 hwloc_bitmap_t cache_cpuset;
883 unsigned packageid = infos[i].packageid;
884 unsigned cacheid = infos[i].cache[l].cacheid;
885 /* Now look for others sharing it */
886 cache_cpuset = hwloc_bitmap_alloc();
887 for (j = i; j < nbprocs; j++) {
888 unsigned l2;
889 for (l2 = 0; l2 < infos[j].numcaches; l2++) {
890 if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type)
891 break;
893 if (l2 == infos[j].numcaches) {
894 /* no cache Llevel of that type in j */
895 hwloc_bitmap_clr(remaining_cpuset, j);
896 continue;
898 if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
899 hwloc_bitmap_set(cache_cpuset, j);
900 hwloc_bitmap_clr(remaining_cpuset, j);
903 cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, cacheid);
904 cache->attr->cache.depth = level;
905 cache->attr->cache.size = infos[i].cache[l].size;
906 cache->attr->cache.linesize = infos[i].cache[l].linesize;
907 cache->attr->cache.associativity = infos[i].cache[l].ways;
908 switch (infos[i].cache[l].type) {
909 case 1:
910 cache->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
911 break;
912 case 2:
913 cache->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
914 break;
915 case 3:
916 cache->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
917 break;
919 cache->cpuset = cache_cpuset;
920 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
921 hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
922 level, cacheid, cache_cpuset);
923 hwloc_insert_object_by_cpuset(topology, cache);
924 caches_added++;
929 level--;
932 hwloc_bitmap_free(remaining_cpuset);
933 hwloc_bitmap_free(complete_cpuset);
934 topology->next_group_depth = next_group_depth;
936 return fulldiscovery || caches_added;
939 static int
940 look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
941 unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
942 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
943 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
945 struct hwloc_x86_backend_data_s *data = backend->private_data;
946 struct hwloc_topology *topology = backend->topology;
947 unsigned nbprocs = data->nbprocs;
948 hwloc_bitmap_t orig_cpuset = hwloc_bitmap_alloc();
949 hwloc_bitmap_t set;
950 unsigned i;
951 int ret = 0;
953 if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
954 hwloc_bitmap_free(orig_cpuset);
955 return -1;
958 set = hwloc_bitmap_alloc();
960 for (i = 0; i < nbprocs; i++) {
961 hwloc_bitmap_only(set, i);
962 hwloc_debug("binding to CPU%u\n", i);
963 if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) {
964 hwloc_debug("could not bind to CPU%u: %s\n", i, strerror(errno));
965 continue;
967 look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
970 set_cpubind(topology, orig_cpuset, 0);
971 hwloc_bitmap_free(set);
972 hwloc_bitmap_free(orig_cpuset);
974 if (!data->apicid_unique)
975 fulldiscovery = 0;
976 else
977 ret = summarize(backend, infos, fulldiscovery);
978 return ret;
981 #if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID
982 #include <sys/param.h>
983 #include <sys/cpuset.h>
984 typedef cpusetid_t hwloc_x86_os_state_t;
985 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state)
987 /* temporary make all cpus available during discovery */
988 cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state);
989 cpuset_setid(CPU_WHICH_PID, -1, 0);
991 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state)
993 /* restore initial cpuset */
994 cpuset_setid(CPU_WHICH_PID, -1, *state);
996 #else /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
997 typedef void * hwloc_x86_os_state_t;
998 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
999 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused) { }
1000 #endif /* !defined HWLOC_FREEBSD_SYS || !defined HAVE_CPUSET_SETID */
1003 #define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
1004 #define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
1005 #define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
1007 #define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
1008 #define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
1009 #define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
1011 /* HYGON "HygonGenuine" */
1012 #define HYGON_EBX ('H' | ('y'<<8) | ('g'<<16) | ('o'<<24))
1013 #define HYGON_EDX ('n' | ('G'<<8) | ('e'<<16) | ('n'<<24))
1014 #define HYGON_ECX ('u' | ('i'<<8) | ('n'<<16) | ('e'<<24))
1016 /* (Zhaoxin) CentaurHauls */
1017 #define ZX_EBX ('C' | ('e'<<8) | ('n'<<16) | ('t'<<24))
1018 #define ZX_EDX ('a' | ('u'<<8) | ('r'<<16) | ('H'<<24))
1019 #define ZX_ECX ('a' | ('u'<<8) | ('l'<<16) | ('s'<<24))
1021 #define SH_EBX (' ' | (' '<<8) | ('S'<<16) | ('h'<<24))
1022 #define SH_EDX ('a' | ('n'<<8) | ('g'<<16) | ('h'<<24))
1023 #define SH_ECX ('a' | ('i'<<8) | (' '<<16) | (' '<<24))
1025 /* fake cpubind for when nbprocs=1 and no binding support */
1026 static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
1027 hwloc_cpuset_t set __hwloc_attribute_unused,
1028 int flags __hwloc_attribute_unused)
1030 return 0;
1032 static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
1033 hwloc_const_cpuset_t set __hwloc_attribute_unused,
1034 int flags __hwloc_attribute_unused)
1036 return 0;
1039 static
1040 int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
1042 struct hwloc_x86_backend_data_s *data = backend->private_data;
1043 unsigned nbprocs = data->nbprocs;
1044 unsigned eax, ebx, ecx = 0, edx;
1045 unsigned i;
1046 unsigned highest_cpuid;
1047 unsigned highest_ext_cpuid;
1048 /* This stores cpuid features with the same indexing as Linux */
1049 unsigned features[10] = { 0 };
1050 struct procinfo *infos = NULL;
1051 enum cpuid_type cpuid_type = unknown;
1052 hwloc_x86_os_state_t os_state;
1053 struct hwloc_binding_hooks hooks;
1054 struct hwloc_topology_support support;
1055 struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
1056 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
1057 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
1058 int ret = -1;
1060 /* check if binding works */
1061 memset(&hooks, 0, sizeof(hooks));
1062 support.membind = &memsupport;
1063 hwloc_set_native_binding_hooks(&hooks, &support);
1064 if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
1065 get_cpubind = hooks.get_thisthread_cpubind;
1066 set_cpubind = hooks.set_thisthread_cpubind;
1067 } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) {
1068 get_cpubind = hooks.get_thisproc_cpubind;
1069 set_cpubind = hooks.set_thisproc_cpubind;
1070 } else {
1071 /* we need binding support if there are multiple PUs */
1072 if (nbprocs > 1)
1073 goto out;
1074 get_cpubind = fake_get_cpubind;
1075 set_cpubind = fake_set_cpubind;
1078 if (!hwloc_have_x86_cpuid())
1079 goto out;
1081 infos = calloc(nbprocs, sizeof(struct procinfo));
1082 if (NULL == infos)
1083 goto out;
1084 for (i = 0; i < nbprocs; i++) {
1085 infos[i].nodeid = (unsigned) -1;
1086 infos[i].packageid = (unsigned) -1;
1087 infos[i].unitid = (unsigned) -1;
1088 infos[i].coreid = (unsigned) -1;
1089 infos[i].threadid = (unsigned) -1;
1092 eax = 0x00;
1093 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1094 highest_cpuid = eax;
1095 if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
1096 cpuid_type = intel;
1097 if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
1098 cpuid_type = amd;
1099 /* support for zhaoxin x86 cpu vendor id */
1100 if (ebx == ZX_EBX && ecx == ZX_ECX && edx == ZX_EDX)
1101 cpuid_type = zhaoxin;
1102 if (ebx == SH_EBX && ecx == SH_ECX && edx == SH_EDX)
1103 cpuid_type = zhaoxin;
1104 else if (ebx == HYGON_EBX && ecx == HYGON_ECX && edx == HYGON_EDX)
1105 cpuid_type = hygon;
1107 hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
1108 if (highest_cpuid < 0x01) {
1109 goto out_with_infos;
1112 eax = 0x01;
1113 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1114 features[0] = edx;
1115 features[4] = ecx;
1117 eax = 0x80000000;
1118 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1119 highest_ext_cpuid = eax;
1121 hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
1123 if (highest_cpuid >= 0x7) {
1124 eax = 0x7;
1125 ecx = 0;
1126 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1127 features[9] = ebx;
1130 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
1131 eax = 0x80000001;
1132 hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx);
1133 features[1] = edx;
1134 features[6] = ecx;
1137 hwloc_x86_os_state_save(&os_state);
1139 ret = look_procs(backend, infos, fulldiscovery,
1140 highest_cpuid, highest_ext_cpuid, features, cpuid_type,
1141 get_cpubind, set_cpubind);
1142 if (ret >= 0)
1143 /* success, we're done */
1144 goto out_with_os_state;
1146 if (nbprocs == 1) {
1147 /* only one processor, no need to bind */
1148 look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type);
1149 ret = summarize(backend, infos, fulldiscovery);
1152 out_with_os_state:
1153 hwloc_x86_os_state_restore(&os_state);
1155 out_with_infos:
1156 if (NULL != infos) {
1157 for (i = 0; i < nbprocs; i++) {
1158 free(infos[i].cache);
1159 if (infos[i].otherids)
1160 free(infos[i].otherids);
1162 free(infos);
1165 out:
1166 return ret;
1169 static int
1170 hwloc_x86_discover(struct hwloc_backend *backend)
1172 struct hwloc_x86_backend_data_s *data = backend->private_data;
1173 struct hwloc_topology *topology = backend->topology;
1174 int alreadypus = 0;
1175 int ret;
1177 #if HAVE_DECL_RUNNING_ON_VALGRIND
1178 if (RUNNING_ON_VALGRIND) {
1179 fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n");
1180 return 0;
1182 #endif
1184 data->nbprocs = hwloc_fallback_nbprocessors(topology);
1186 if (!topology->is_thissystem) {
1187 hwloc_debug("%s", "\nno x86 detection (not thissystem)\n");
1188 return 0;
1191 if (topology->levels[0][0]->cpuset) {
1192 /* somebody else discovered things */
1193 if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
1194 /* only PUs were discovered, as much as we would, complete the topology with everything else */
1195 alreadypus = 1;
1196 goto fulldiscovery;
1199 /* several object types were added, we can't easily complete, just do partial discovery */
1200 ret = hwloc_look_x86(backend, 0);
1201 if (ret)
1202 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1203 return ret;
1204 } else {
1205 /* topology is empty, initialize it */
1206 hwloc_alloc_obj_cpusets(topology->levels[0][0]);
1209 fulldiscovery:
1210 if (hwloc_look_x86(backend, 1) < 0) {
1211 /* if failed, create PUs */
1212 if (!alreadypus)
1213 hwloc_setup_pu_level(topology, data->nbprocs);
1216 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1218 #ifdef HAVE_UNAME
1219 hwloc_add_uname_info(topology, NULL); /* we already know is_thissystem() is true */
1220 #else
1221 /* uname isn't available, manually setup the "Architecture" info */
1222 #ifdef HWLOC_X86_64_ARCH
1223 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
1224 #else
1225 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86");
1226 #endif
1227 #endif
1228 return 1;
1231 static void
1232 hwloc_x86_backend_disable(struct hwloc_backend *backend)
1234 struct hwloc_x86_backend_data_s *data = backend->private_data;
1235 hwloc_bitmap_free(data->apicid_set);
1236 free(data);
1239 static struct hwloc_backend *
1240 hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
1241 const void *_data1 __hwloc_attribute_unused,
1242 const void *_data2 __hwloc_attribute_unused,
1243 const void *_data3 __hwloc_attribute_unused)
1245 struct hwloc_backend *backend;
1246 struct hwloc_x86_backend_data_s *data;
1248 backend = hwloc_backend_alloc(component);
1249 if (!backend)
1250 goto out;
1252 data = malloc(sizeof(*data));
1253 if (!data) {
1254 errno = ENOMEM;
1255 goto out_with_backend;
1258 backend->private_data = data;
1259 backend->flags = HWLOC_BACKEND_FLAG_NEED_LEVELS;
1260 backend->discover = hwloc_x86_discover;
1261 backend->disable = hwloc_x86_backend_disable;
1263 /* default values */
1264 data->is_knl = 0;
1265 data->apicid_set = hwloc_bitmap_alloc();
1266 data->apicid_unique = 1;
1268 return backend;
1270 out_with_backend:
1271 free(backend);
1272 out:
1273 return NULL;
1276 static struct hwloc_disc_component hwloc_x86_disc_component = {
1277 HWLOC_DISC_COMPONENT_TYPE_CPU,
1278 "x86",
1279 HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
1280 hwloc_x86_component_instantiate,
1281 45, /* between native and no_os */
1282 NULL
1285 const struct hwloc_component hwloc_x86_component = {
1286 HWLOC_COMPONENT_ABI,
1287 NULL, NULL,
1288 HWLOC_COMPONENT_TYPE_DISC,
1290 &hwloc_x86_disc_component