2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
14 * 3. Neither the name of The DragonFly Project nor the names of its
15 * contributors may be used to endorse or promote products derived
16 * from this software without specific, prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysctl.h>
38 #include <sys/cpu_topology.h>
40 #include <machine/smp.h>
46 #define INDENT_BUF_SIZE LEVEL_NO*3
49 /* Per-cpu sysctl nodes and info */
50 struct per_cpu_sysctl_info
{
51 struct sysctl_ctx_list sysctl_ctx
;
52 struct sysctl_oid
*sysctl_tree
;
56 char physical_siblings
[8*MAXCPU
];
57 char core_siblings
[8*MAXCPU
];
59 typedef struct per_cpu_sysctl_info per_cpu_sysctl_info_t
;
61 static cpu_node_t cpu_topology_nodes
[MAXCPU
]; /* Memory for topology */
62 static cpu_node_t
*cpu_root_node
; /* Root node pointer */
64 static struct sysctl_ctx_list cpu_topology_sysctl_ctx
;
65 static struct sysctl_oid
*cpu_topology_sysctl_tree
;
66 static char cpu_topology_members
[8*MAXCPU
];
67 static per_cpu_sysctl_info_t
*pcpu_sysctl
;
68 static void sbuf_print_cpuset(struct sbuf
*sb
, cpumask_t
*mask
);
70 int cpu_topology_levels_number
= 1;
71 int cpu_topology_core_ids
;
72 int cpu_topology_phys_ids
;
73 cpu_node_t
*root_cpu_node
;
75 MALLOC_DEFINE(M_PCPUSYS
, "pcpusys", "pcpu sysctl topology");
77 SYSCTL_INT(_hw
, OID_AUTO
, cpu_topology_core_ids
, CTLFLAG_RW
,
78 &cpu_topology_core_ids
, 0, "# of real cores per package");
79 SYSCTL_INT(_hw
, OID_AUTO
, cpu_topology_phys_ids
, CTLFLAG_RW
,
80 &cpu_topology_phys_ids
, 0, "# of physical packages");
82 /* Get the next valid apicid starting
83 * from current apicid (curr_apicid
86 get_next_valid_apicid(int curr_apicid
)
88 int next_apicid
= curr_apicid
;
92 while(get_cpuid_from_apicid(next_apicid
) == -1 &&
93 next_apicid
< NAPICID
);
94 if (next_apicid
== NAPICID
) {
95 kprintf("Warning: No next valid APICID found. Returning -1\n");
101 /* Generic topology tree. The parameters have the following meaning:
102 * - children_no_per_level : the number of children on each level
103 * - level_types : the type of the level (THREAD, CORE, CHIP, etc)
104 * - cur_level : the current level of the tree
105 * - node : the current node
106 * - last_free_node : the last free node in the global array.
107 * - cpuid : basicly this are the ids of the leafs
110 build_topology_tree(int *children_no_per_level
,
111 uint8_t *level_types
,
114 cpu_node_t
**last_free_node
,
119 node
->child_no
= children_no_per_level
[cur_level
];
120 node
->type
= level_types
[cur_level
];
121 CPUMASK_ASSZERO(node
->members
);
122 node
->compute_unit_id
= -1;
124 if (node
->child_no
== 0) {
125 *apicid
= get_next_valid_apicid(*apicid
);
126 CPUMASK_ASSBIT(node
->members
, get_cpuid_from_apicid(*apicid
));
130 if (node
->parent_node
== NULL
)
131 root_cpu_node
= node
;
133 for (i
= 0; i
< node
->child_no
; i
++) {
134 node
->child_node
[i
] = *last_free_node
;
137 node
->child_node
[i
]->parent_node
= node
;
139 build_topology_tree(children_no_per_level
,
146 CPUMASK_ORMASK(node
->members
, node
->child_node
[i
]->members
);
150 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
152 migrate_elements(cpu_node_t
**a
, int n
, int pos
)
156 for (i
= pos
; i
< n
- 1 ; i
++) {
163 /* Build CPU topology. The detection is made by comparing the
164 * chip, core and logical IDs of each CPU with the IDs of the
165 * BSP. When we found a match, at that level the CPUs are siblings.
168 build_cpu_topology(int assumed_ncpus
)
170 detect_cpu_topology();
173 int threads_per_core
= 0;
174 int cores_per_chip
= 0;
175 int chips_per_package
= 0;
176 int children_no_per_level
[LEVEL_NO
];
177 uint8_t level_types
[LEVEL_NO
];
180 cpu_node_t
*root
= &cpu_topology_nodes
[0];
181 cpu_node_t
*last_free_node
= root
+ 1;
184 * Assume that the topology is uniform.
185 * Find the number of siblings within chip
186 * and witin core to build up the topology.
188 for (i
= 0; i
< assumed_ncpus
; i
++) {
191 CPUMASK_ASSBIT(mask
, i
);
194 /* smp_active_mask has not been initialized yet, ignore */
195 if (CPUMASK_TESTMASK(mask
, smp_active_mask
) == 0)
199 if (get_chip_ID(BSPID
) == get_chip_ID(i
))
204 if (get_core_number_within_chip(BSPID
) ==
205 get_core_number_within_chip(i
))
209 cores_per_chip
/= threads_per_core
;
210 chips_per_package
= assumed_ncpus
/ (cores_per_chip
* threads_per_core
);
213 kprintf("CPU Topology: cores_per_chip: %d; threads_per_core: %d; chips_per_package: %d;\n",
214 cores_per_chip
, threads_per_core
, chips_per_package
);
216 if (threads_per_core
> 1) { /* HT available - 4 levels */
218 children_no_per_level
[0] = chips_per_package
;
219 children_no_per_level
[1] = cores_per_chip
;
220 children_no_per_level
[2] = threads_per_core
;
221 children_no_per_level
[3] = 0;
223 level_types
[0] = PACKAGE_LEVEL
;
224 level_types
[1] = CHIP_LEVEL
;
225 level_types
[2] = CORE_LEVEL
;
226 level_types
[3] = THREAD_LEVEL
;
228 build_topology_tree(children_no_per_level
,
235 cpu_topology_levels_number
= 4;
237 } else if (cores_per_chip
> 1) { /* No HT available - 3 levels */
239 children_no_per_level
[0] = chips_per_package
;
240 children_no_per_level
[1] = cores_per_chip
;
241 children_no_per_level
[2] = 0;
243 level_types
[0] = PACKAGE_LEVEL
;
244 level_types
[1] = CHIP_LEVEL
;
245 level_types
[2] = CORE_LEVEL
;
247 build_topology_tree(children_no_per_level
,
254 cpu_topology_levels_number
= 3;
256 } else { /* No HT and no Multi-Core - 2 levels */
258 children_no_per_level
[0] = chips_per_package
;
259 children_no_per_level
[1] = 0;
261 level_types
[0] = PACKAGE_LEVEL
;
262 level_types
[1] = CHIP_LEVEL
;
264 build_topology_tree(children_no_per_level
,
271 cpu_topology_levels_number
= 2;
275 cpu_root_node
= root
;
278 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL)
279 if (fix_amd_topology() == 0) {
280 int visited
[MAXCPU
], i
, j
, pos
, cpuid
;
281 cpu_node_t
*leaf
, *parent
;
283 bzero(visited
, MAXCPU
* sizeof(int));
285 for (i
= 0; i
< assumed_ncpus
; i
++) {
286 if (visited
[i
] == 0) {
289 leaf
= get_cpu_node_by_cpuid(i
);
291 if (leaf
->type
== CORE_LEVEL
) {
292 parent
= leaf
->parent_node
;
294 last_free_node
->child_node
[0] = leaf
;
295 last_free_node
->child_no
= 1;
296 last_free_node
->members
= leaf
->members
;
297 last_free_node
->compute_unit_id
= leaf
->compute_unit_id
;
298 last_free_node
->parent_node
= parent
;
299 last_free_node
->type
= CORE_LEVEL
;
302 for (j
= 0; j
< parent
->child_no
; j
++) {
303 if (parent
->child_node
[j
] != leaf
) {
305 cpuid
= BSFCPUMASK(parent
->child_node
[j
]->members
);
306 if (visited
[cpuid
] == 0 &&
307 parent
->child_node
[j
]->compute_unit_id
== leaf
->compute_unit_id
) {
309 last_free_node
->child_node
[last_free_node
->child_no
] = parent
->child_node
[j
];
310 last_free_node
->child_no
++;
311 CPUMASK_ORMASK(last_free_node
->members
, parent
->child_node
[j
]->members
);
313 parent
->child_node
[j
]->type
= THREAD_LEVEL
;
314 parent
->child_node
[j
]->parent_node
= last_free_node
;
317 migrate_elements(parent
->child_node
, parent
->child_no
, j
);
325 if (last_free_node
->child_no
> 1) {
326 parent
->child_node
[pos
] = last_free_node
;
327 leaf
->type
= THREAD_LEVEL
;
328 leaf
->parent_node
= last_free_node
;
338 /* Recursive function helper to print the CPU topology tree */
340 print_cpu_topology_tree_sysctl_helper(cpu_node_t
*node
,
349 sbuf_bcat(sb
, buf
, buf_len
);
351 sbuf_printf(sb
, "\\-");
352 buf
[buf_len
] = ' ';buf_len
++;
353 buf
[buf_len
] = ' ';buf_len
++;
355 sbuf_printf(sb
, "|-");
356 buf
[buf_len
] = '|';buf_len
++;
357 buf
[buf_len
] = ' ';buf_len
++;
360 bsr_member
= BSRCPUMASK(node
->members
);
362 if (node
->type
== PACKAGE_LEVEL
) {
363 sbuf_printf(sb
,"PACKAGE MEMBERS: ");
364 } else if (node
->type
== CHIP_LEVEL
) {
365 sbuf_printf(sb
,"CHIP ID %d: ",
366 get_chip_ID(bsr_member
));
367 } else if (node
->type
== CORE_LEVEL
) {
368 if (node
->compute_unit_id
!= (uint8_t)-1) {
369 sbuf_printf(sb
,"Compute Unit ID %d: ",
370 node
->compute_unit_id
);
372 sbuf_printf(sb
,"CORE ID %d: ",
373 get_core_number_within_chip(bsr_member
));
375 } else if (node
->type
== THREAD_LEVEL
) {
376 if (node
->compute_unit_id
!= (uint8_t)-1) {
377 sbuf_printf(sb
,"CORE ID %d: ",
378 get_core_number_within_chip(bsr_member
));
380 sbuf_printf(sb
,"THREAD ID %d: ",
381 get_logical_CPU_number_within_core(bsr_member
));
384 sbuf_printf(sb
,"UNKNOWN: ");
386 sbuf_print_cpuset(sb
, &node
->members
);
387 sbuf_printf(sb
,"\n");
389 for (i
= 0; i
< node
->child_no
; i
++) {
390 print_cpu_topology_tree_sysctl_helper(node
->child_node
[i
],
391 sb
, buf
, buf_len
, i
== (node
->child_no
-1));
395 /* SYSCTL PROCEDURE for printing the CPU Topology tree */
397 print_cpu_topology_tree_sysctl(SYSCTL_HANDLER_ARGS
)
401 char buf
[INDENT_BUF_SIZE
];
403 KASSERT(cpu_root_node
!= NULL
, ("cpu_root_node isn't initialized"));
405 sb
= sbuf_new(NULL
, NULL
, 500, SBUF_AUTOEXTEND
);
409 sbuf_printf(sb
,"\n");
410 print_cpu_topology_tree_sysctl_helper(cpu_root_node
, sb
, buf
, 0, 1);
414 ret
= SYSCTL_OUT(req
, sbuf_data(sb
), sbuf_len(sb
));
421 /* SYSCTL PROCEDURE for printing the CPU Topology level description */
423 print_cpu_topology_level_description_sysctl(SYSCTL_HANDLER_ARGS
)
428 sb
= sbuf_new(NULL
, NULL
, 500, SBUF_AUTOEXTEND
);
432 if (cpu_topology_levels_number
== 4) /* HT available */
433 sbuf_printf(sb
, "0 - thread; 1 - core; 2 - socket; 3 - anything");
434 else if (cpu_topology_levels_number
== 3) /* No HT available */
435 sbuf_printf(sb
, "0 - core; 1 - socket; 2 - anything");
436 else if (cpu_topology_levels_number
== 2) /* No HT and no Multi-Core */
437 sbuf_printf(sb
, "0 - socket; 1 - anything");
439 sbuf_printf(sb
, "Unknown");
443 ret
= SYSCTL_OUT(req
, sbuf_data(sb
), sbuf_len(sb
));
450 /* Find a cpu_node_t by a mask */
452 get_cpu_node_by_cpumask(cpu_node_t
* node
,
455 cpu_node_t
* found
= NULL
;
458 if (CPUMASK_CMPMASKEQ(node
->members
, mask
))
461 for (i
= 0; i
< node
->child_no
; i
++) {
462 found
= get_cpu_node_by_cpumask(node
->child_node
[i
], mask
);
471 get_cpu_node_by_cpuid(int cpuid
) {
474 CPUMASK_ASSBIT(mask
, cpuid
);
476 KASSERT(cpu_root_node
!= NULL
, ("cpu_root_node isn't initialized"));
478 return get_cpu_node_by_cpumask(cpu_root_node
, mask
);
481 /* Get the mask of siblings for level_type of a cpuid */
483 get_cpumask_from_level(int cpuid
,
489 CPUMASK_ASSBIT(mask
, cpuid
);
491 KASSERT(cpu_root_node
!= NULL
, ("cpu_root_node isn't initialized"));
493 node
= get_cpu_node_by_cpumask(cpu_root_node
, mask
);
496 CPUMASK_ASSZERO(mask
);
500 while (node
!= NULL
) {
501 if (node
->type
== level_type
) {
502 return node
->members
;
504 node
= node
->parent_node
;
506 CPUMASK_ASSZERO(mask
);
511 static const cpu_node_t
*
512 get_cpu_node_by_chipid2(const cpu_node_t
*node
, int chip_id
)
516 if (node
->type
!= CHIP_LEVEL
) {
517 const cpu_node_t
*ret
= NULL
;
520 for (i
= 0; i
< node
->child_no
; ++i
) {
521 ret
= get_cpu_node_by_chipid2(node
->child_node
[i
],
529 cpuid
= BSRCPUMASK(node
->members
);
530 if (get_chip_ID(cpuid
) == chip_id
)
536 get_cpu_node_by_chipid(int chip_id
)
538 KASSERT(cpu_root_node
!= NULL
, ("cpu_root_node isn't initialized"));
539 return get_cpu_node_by_chipid2(cpu_root_node
, chip_id
);
542 /* init pcpu_sysctl structure info */
544 init_pcpu_topology_sysctl(int assumed_ncpus
)
553 pcpu_sysctl
= kmalloc(sizeof(*pcpu_sysctl
) * MAXCPU
, M_PCPUSYS
,
556 for (i
= 0; i
< assumed_ncpus
; i
++) {
557 sbuf_new(&sb
, pcpu_sysctl
[i
].cpu_name
,
558 sizeof(pcpu_sysctl
[i
].cpu_name
), SBUF_FIXEDLEN
);
559 sbuf_printf(&sb
,"cpu%d", i
);
563 /* Get physical siblings */
564 mask
= get_cpumask_from_level(i
, CHIP_LEVEL
);
565 if (CPUMASK_TESTZERO(mask
)) {
566 pcpu_sysctl
[i
].physical_id
= INVALID_ID
;
570 sbuf_new(&sb
, pcpu_sysctl
[i
].physical_siblings
,
571 sizeof(pcpu_sysctl
[i
].physical_siblings
), SBUF_FIXEDLEN
);
572 sbuf_print_cpuset(&sb
, &mask
);
576 phys_id
= get_chip_ID(i
);
577 pcpu_sysctl
[i
].physical_id
= phys_id
;
578 if (min_id
< 0 || min_id
> phys_id
)
580 if (max_id
< 0 || max_id
< phys_id
)
583 /* Get core siblings */
584 mask
= get_cpumask_from_level(i
, CORE_LEVEL
);
585 if (CPUMASK_TESTZERO(mask
)) {
586 pcpu_sysctl
[i
].core_id
= INVALID_ID
;
590 sbuf_new(&sb
, pcpu_sysctl
[i
].core_siblings
,
591 sizeof(pcpu_sysctl
[i
].core_siblings
), SBUF_FIXEDLEN
);
592 sbuf_print_cpuset(&sb
, &mask
);
596 pcpu_sysctl
[i
].core_id
= get_core_number_within_chip(i
);
597 if (cpu_topology_core_ids
< pcpu_sysctl
[i
].core_id
)
598 cpu_topology_core_ids
= pcpu_sysctl
[i
].core_id
+ 1;
603 * Normalize physical ids so they can be used by the VM system.
604 * Some systems number starting at 0 others number starting at 1.
606 cpu_topology_phys_ids
= max_id
- min_id
+ 1;
607 if (cpu_topology_phys_ids
<= 0) /* don't crash */
608 cpu_topology_phys_ids
= 1;
609 for (i
= 0; i
< assumed_ncpus
; i
++) {
610 pcpu_sysctl
[i
].physical_id
%= cpu_topology_phys_ids
;
614 /* Build SYSCTL structure for revealing
615 * the CPU Topology to user-space.
618 build_sysctl_cpu_topology(int assumed_ncpus
)
623 /* SYSCTL new leaf for "cpu_topology" */
624 sysctl_ctx_init(&cpu_topology_sysctl_ctx
);
625 cpu_topology_sysctl_tree
= SYSCTL_ADD_NODE(&cpu_topology_sysctl_ctx
,
626 SYSCTL_STATIC_CHILDREN(_hw
),
631 /* SYSCTL cpu_topology "tree" entry */
632 SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx
,
633 SYSCTL_CHILDREN(cpu_topology_sysctl_tree
),
634 OID_AUTO
, "tree", CTLTYPE_STRING
| CTLFLAG_RD
,
635 NULL
, 0, print_cpu_topology_tree_sysctl
, "A",
636 "Tree print of CPU topology");
638 /* SYSCTL cpu_topology "level_description" entry */
639 SYSCTL_ADD_PROC(&cpu_topology_sysctl_ctx
,
640 SYSCTL_CHILDREN(cpu_topology_sysctl_tree
),
641 OID_AUTO
, "level_description", CTLTYPE_STRING
| CTLFLAG_RD
,
642 NULL
, 0, print_cpu_topology_level_description_sysctl
, "A",
643 "Level description of CPU topology");
645 /* SYSCTL cpu_topology "members" entry */
646 sbuf_new(&sb
, cpu_topology_members
,
647 sizeof(cpu_topology_members
), SBUF_FIXEDLEN
);
648 sbuf_print_cpuset(&sb
, &cpu_root_node
->members
);
651 SYSCTL_ADD_STRING(&cpu_topology_sysctl_ctx
,
652 SYSCTL_CHILDREN(cpu_topology_sysctl_tree
),
653 OID_AUTO
, "members", CTLFLAG_RD
,
654 cpu_topology_members
, 0,
655 "Members of the CPU Topology");
657 /* SYSCTL per_cpu info */
658 for (i
= 0; i
< assumed_ncpus
; i
++) {
659 /* New leaf : hw.cpu_topology.cpux */
660 sysctl_ctx_init(&pcpu_sysctl
[i
].sysctl_ctx
);
661 pcpu_sysctl
[i
].sysctl_tree
= SYSCTL_ADD_NODE(&pcpu_sysctl
[i
].sysctl_ctx
,
662 SYSCTL_CHILDREN(cpu_topology_sysctl_tree
),
664 pcpu_sysctl
[i
].cpu_name
,
667 /* Check if the physical_id found is valid */
668 if (pcpu_sysctl
[i
].physical_id
== INVALID_ID
) {
672 /* Add physical id info */
673 SYSCTL_ADD_INT(&pcpu_sysctl
[i
].sysctl_ctx
,
674 SYSCTL_CHILDREN(pcpu_sysctl
[i
].sysctl_tree
),
675 OID_AUTO
, "physical_id", CTLFLAG_RD
,
676 &pcpu_sysctl
[i
].physical_id
, 0,
679 /* Add physical siblings */
680 SYSCTL_ADD_STRING(&pcpu_sysctl
[i
].sysctl_ctx
,
681 SYSCTL_CHILDREN(pcpu_sysctl
[i
].sysctl_tree
),
682 OID_AUTO
, "physical_siblings", CTLFLAG_RD
,
683 pcpu_sysctl
[i
].physical_siblings
, 0,
684 "Physical siblings");
686 /* Check if the core_id found is valid */
687 if (pcpu_sysctl
[i
].core_id
== INVALID_ID
) {
691 /* Add core id info */
692 SYSCTL_ADD_INT(&pcpu_sysctl
[i
].sysctl_ctx
,
693 SYSCTL_CHILDREN(pcpu_sysctl
[i
].sysctl_tree
),
694 OID_AUTO
, "core_id", CTLFLAG_RD
,
695 &pcpu_sysctl
[i
].core_id
, 0,
698 /*Add core siblings */
699 SYSCTL_ADD_STRING(&pcpu_sysctl
[i
].sysctl_ctx
,
700 SYSCTL_CHILDREN(pcpu_sysctl
[i
].sysctl_tree
),
701 OID_AUTO
, "core_siblings", CTLFLAG_RD
,
702 pcpu_sysctl
[i
].core_siblings
, 0,
709 sbuf_print_cpuset(struct sbuf
*sb
, cpumask_t
*mask
)
716 sbuf_printf(sb
, "cpus(");
717 CPUSET_FOREACH(i
, *mask
) {
728 sbuf_printf(sb
, ", ");
730 sbuf_printf(sb
, "%d", b
);
732 sbuf_printf(sb
, "%d-%d", b
, e
- 1);
739 sbuf_printf(sb
, ", ");
742 sbuf_printf(sb
, "%d", b
);
744 sbuf_printf(sb
, "%d-%d", b
, e
- 1);
747 sbuf_printf(sb
, ") ");
751 get_cpu_core_id(int cpuid
)
754 return(pcpu_sysctl
[cpuid
].core_id
);
759 get_cpu_phys_id(int cpuid
)
762 return(pcpu_sysctl
[cpuid
].physical_id
);
768 /* Build the CPU Topology and SYSCTL Topology tree */
770 init_cpu_topology(void)
774 assumed_ncpus
= naps
+ 1;
776 build_cpu_topology(assumed_ncpus
);
777 init_pcpu_topology_sysctl(assumed_ncpus
);
778 build_sysctl_cpu_topology(assumed_ncpus
);
780 SYSINIT(cpu_topology
, SI_BOOT2_CPU_TOPOLOGY
, SI_ORDER_FIRST
,
781 init_cpu_topology
, NULL
);