2 * NUMA parameter parsing routines
4 * Copyright (c) 2014 Fujitsu Ltd.
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "sysemu/sysemu.h"
26 #include "exec/cpu-common.h"
27 #include "qemu/bitmap.h"
29 #include "qemu/error-report.h"
30 #include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */
31 #include "qapi-visit.h"
32 #include "qapi/opts-visitor.h"
33 #include "qapi/dealloc-visitor.h"
34 #include "qapi/qmp/qerror.h"
36 QemuOptsList qemu_numa_opts
= {
38 .implied_opt_name
= "type",
39 .head
= QTAILQ_HEAD_INITIALIZER(qemu_numa_opts
.head
),
40 .desc
= { { 0 } } /* validated with OptsVisitor */
43 static void numa_node_parse(NumaNodeOptions
*node
, QemuOpts
*opts
, Error
**errp
)
46 uint16List
*cpus
= NULL
;
48 if (node
->has_nodeid
) {
49 nodenr
= node
->nodeid
;
51 nodenr
= nb_numa_nodes
;
54 if (nodenr
>= MAX_NODES
) {
55 error_setg(errp
, "Max number of NUMA nodes reached: %"
60 for (cpus
= node
->cpus
; cpus
; cpus
= cpus
->next
) {
61 if (cpus
->value
> MAX_CPUMASK_BITS
) {
62 error_setg(errp
, "CPU number %" PRIu16
" is bigger than %d",
63 cpus
->value
, MAX_CPUMASK_BITS
);
66 bitmap_set(numa_info
[nodenr
].node_cpu
, cpus
->value
, 1);
70 uint64_t mem_size
= node
->mem
;
71 const char *mem_str
= qemu_opt_get(opts
, "mem");
72 /* Fix up legacy suffix-less format */
73 if (g_ascii_isdigit(mem_str
[strlen(mem_str
) - 1])) {
76 numa_info
[nodenr
].node_mem
= mem_size
;
80 int numa_init_func(QemuOpts
*opts
, void *opaque
)
82 NumaOptions
*object
= NULL
;
86 OptsVisitor
*ov
= opts_visitor_new(opts
);
87 visit_type_NumaOptions(opts_get_visitor(ov
), &object
, NULL
, &err
);
88 opts_visitor_cleanup(ov
);
95 switch (object
->kind
) {
96 case NUMA_OPTIONS_KIND_NODE
:
97 numa_node_parse(object
->node
, opts
, &err
);
110 qerror_report_err(err
);
114 QapiDeallocVisitor
*dv
= qapi_dealloc_visitor_new();
115 visit_type_NumaOptions(qapi_dealloc_get_visitor(dv
),
116 &object
, NULL
, NULL
);
117 qapi_dealloc_visitor_cleanup(dv
);
123 void set_numa_nodes(void)
125 if (nb_numa_nodes
> 0) {
129 if (nb_numa_nodes
> MAX_NODES
) {
130 nb_numa_nodes
= MAX_NODES
;
133 /* If no memory size if given for any node, assume the default case
134 * and distribute the available memory equally across all nodes
136 for (i
= 0; i
< nb_numa_nodes
; i
++) {
137 if (numa_info
[i
].node_mem
!= 0) {
141 if (i
== nb_numa_nodes
) {
142 uint64_t usedmem
= 0;
144 /* On Linux, the each node's border has to be 8MB aligned,
145 * the final node gets the rest.
147 for (i
= 0; i
< nb_numa_nodes
- 1; i
++) {
148 numa_info
[i
].node_mem
= (ram_size
/ nb_numa_nodes
) &
150 usedmem
+= numa_info
[i
].node_mem
;
152 numa_info
[i
].node_mem
= ram_size
- usedmem
;
156 for (i
= 0; i
< nb_numa_nodes
; i
++) {
157 numa_total
+= numa_info
[i
].node_mem
;
159 if (numa_total
!= ram_size
) {
160 error_report("total memory for NUMA nodes (%" PRIu64
")"
161 " should equal RAM size (" RAM_ADDR_FMT
")",
162 numa_total
, ram_size
);
166 for (i
= 0; i
< nb_numa_nodes
; i
++) {
167 if (!bitmap_empty(numa_info
[i
].node_cpu
, MAX_CPUMASK_BITS
)) {
171 /* assigning the VCPUs round-robin is easier to implement, guest OSes
172 * must cope with this anyway, because there are BIOSes out there in
173 * real machines which also use this scheme.
175 if (i
== nb_numa_nodes
) {
176 for (i
= 0; i
< max_cpus
; i
++) {
177 set_bit(i
, numa_info
[i
% nb_numa_nodes
].node_cpu
);
183 void set_numa_modes(void)
189 for (i
= 0; i
< nb_numa_nodes
; i
++) {
190 if (test_bit(cpu
->cpu_index
, numa_info
[i
].node_cpu
)) {