2 * Copyright (c) 2018 Intel Corporation
3 * Copyright (c) 2019 Red Hat, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2 or later, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
18 #include "qemu/osdep.h"
19 #include "qemu/error-report.h"
20 #include "qemu/cutils.h"
21 #include "qemu/units.h"
22 #include "qapi/error.h"
23 #include "qapi/visitor.h"
24 #include "qapi/qapi-visit-common.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpus.h"
27 #include "sysemu/numa.h"
28 #include "sysemu/reset.h"
30 #include "hw/loader.h"
32 #include "hw/kvm/clock.h"
33 #include "hw/i386/microvm.h"
34 #include "hw/i386/x86.h"
35 #include "target/i386/cpu.h"
36 #include "hw/intc/i8259.h"
37 #include "hw/timer/i8254.h"
38 #include "hw/rtc/mc146818rtc.h"
39 #include "hw/char/serial.h"
40 #include "hw/i386/topology.h"
41 #include "hw/i386/e820_memory_layout.h"
42 #include "hw/i386/fw_cfg.h"
43 #include "hw/virtio/virtio-mmio.h"
48 #include "hw/xen/start_info.h"
50 #define MICROVM_BIOS_FILENAME "bios-microvm.bin"
52 static void microvm_set_rtc(MicrovmMachineState
*mms
, ISADevice
*s
)
54 X86MachineState
*x86ms
= X86_MACHINE(mms
);
57 val
= MIN(x86ms
->below_4g_mem_size
/ KiB
, 640);
58 rtc_set_memory(s
, 0x15, val
);
59 rtc_set_memory(s
, 0x16, val
>> 8);
60 /* extended memory (next 64MiB) */
61 if (x86ms
->below_4g_mem_size
> 1 * MiB
) {
62 val
= (x86ms
->below_4g_mem_size
- 1 * MiB
) / KiB
;
69 rtc_set_memory(s
, 0x17, val
);
70 rtc_set_memory(s
, 0x18, val
>> 8);
71 rtc_set_memory(s
, 0x30, val
);
72 rtc_set_memory(s
, 0x31, val
>> 8);
73 /* memory between 16MiB and 4GiB */
74 if (x86ms
->below_4g_mem_size
> 16 * MiB
) {
75 val
= (x86ms
->below_4g_mem_size
- 16 * MiB
) / (64 * KiB
);
82 rtc_set_memory(s
, 0x34, val
);
83 rtc_set_memory(s
, 0x35, val
>> 8);
84 /* memory above 4GiB */
85 val
= x86ms
->above_4g_mem_size
/ 65536;
86 rtc_set_memory(s
, 0x5b, val
);
87 rtc_set_memory(s
, 0x5c, val
>> 8);
88 rtc_set_memory(s
, 0x5d, val
>> 16);
91 static void microvm_gsi_handler(void *opaque
, int n
, int level
)
95 qemu_set_irq(s
->ioapic_irq
[n
], level
);
98 static void microvm_devices_init(MicrovmMachineState
*mms
)
100 X86MachineState
*x86ms
= X86_MACHINE(mms
);
102 ISADevice
*rtc_state
;
106 /* Core components */
108 gsi_state
= g_malloc0(sizeof(*gsi_state
));
109 if (mms
->pic
== ON_OFF_AUTO_ON
|| mms
->pic
== ON_OFF_AUTO_AUTO
) {
110 x86ms
->gsi
= qemu_allocate_irqs(gsi_handler
, gsi_state
, GSI_NUM_PINS
);
112 x86ms
->gsi
= qemu_allocate_irqs(microvm_gsi_handler
,
113 gsi_state
, GSI_NUM_PINS
);
116 isa_bus
= isa_bus_new(NULL
, get_system_memory(), get_system_io(),
118 isa_bus_irqs(isa_bus
, x86ms
->gsi
);
120 ioapic_init_gsi(gsi_state
, "machine");
124 for (i
= 0; i
< VIRTIO_NUM_TRANSPORTS
; i
++) {
125 sysbus_create_simple("virtio-mmio",
126 VIRTIO_MMIO_BASE
+ i
* 512,
127 x86ms
->gsi
[VIRTIO_IRQ_BASE
+ i
]);
130 /* Optional and legacy devices */
132 if (mms
->pic
== ON_OFF_AUTO_ON
|| mms
->pic
== ON_OFF_AUTO_AUTO
) {
135 i8259
= i8259_init(isa_bus
, x86_allocate_cpu_irq());
136 for (i
= 0; i
< ISA_NUM_IRQS
; i
++) {
137 gsi_state
->i8259_irq
[i
] = i8259
[i
];
142 if (mms
->pit
== ON_OFF_AUTO_ON
|| mms
->pit
== ON_OFF_AUTO_AUTO
) {
143 if (kvm_pit_in_kernel()) {
144 kvm_pit_init(isa_bus
, 0x40);
146 i8254_pit_init(isa_bus
, 0x40, 0, NULL
);
150 if (mms
->rtc
== ON_OFF_AUTO_ON
||
151 (mms
->rtc
== ON_OFF_AUTO_AUTO
&& !kvm_enabled())) {
152 rtc_state
= mc146818_rtc_init(isa_bus
, 2000, NULL
);
153 microvm_set_rtc(mms
, rtc_state
);
156 if (mms
->isa_serial
) {
157 serial_hds_isa_init(isa_bus
, 0, 1);
160 if (bios_name
== NULL
) {
161 bios_name
= MICROVM_BIOS_FILENAME
;
163 x86_bios_rom_init(get_system_memory(), true);
166 static void microvm_memory_init(MicrovmMachineState
*mms
)
168 MachineState
*machine
= MACHINE(mms
);
169 X86MachineState
*x86ms
= X86_MACHINE(mms
);
170 MemoryRegion
*ram
, *ram_below_4g
, *ram_above_4g
;
171 MemoryRegion
*system_memory
= get_system_memory();
177 * Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
178 * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
179 * also known as MMCFG).
180 * If it doesn't, we need to split it in chunks below and above 4G.
181 * In any case, try to make sure that guest addresses aligned at
182 * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
184 if (machine
->ram_size
>= 0xb0000000) {
191 * Handle the machine opt max-ram-below-4g. It is basically doing
192 * min(qemu limit, user limit).
194 if (!x86ms
->max_ram_below_4g
) {
195 x86ms
->max_ram_below_4g
= 4 * GiB
;
197 if (lowmem
> x86ms
->max_ram_below_4g
) {
198 lowmem
= x86ms
->max_ram_below_4g
;
199 if (machine
->ram_size
- lowmem
> lowmem
&&
200 lowmem
& (1 * GiB
- 1)) {
201 warn_report("There is possibly poor performance as the ram size "
202 " (0x%" PRIx64
") is more then twice the size of"
203 " max-ram-below-4g (%"PRIu64
") and"
204 " max-ram-below-4g is not a multiple of 1G.",
205 (uint64_t)machine
->ram_size
, x86ms
->max_ram_below_4g
);
209 if (machine
->ram_size
> lowmem
) {
210 x86ms
->above_4g_mem_size
= machine
->ram_size
- lowmem
;
211 x86ms
->below_4g_mem_size
= lowmem
;
213 x86ms
->above_4g_mem_size
= 0;
214 x86ms
->below_4g_mem_size
= machine
->ram_size
;
217 ram
= g_malloc(sizeof(*ram
));
218 memory_region_allocate_system_memory(ram
, NULL
, "microvm.ram",
221 ram_below_4g
= g_malloc(sizeof(*ram_below_4g
));
222 memory_region_init_alias(ram_below_4g
, NULL
, "ram-below-4g", ram
,
223 0, x86ms
->below_4g_mem_size
);
224 memory_region_add_subregion(system_memory
, 0, ram_below_4g
);
226 e820_add_entry(0, x86ms
->below_4g_mem_size
, E820_RAM
);
228 if (x86ms
->above_4g_mem_size
> 0) {
229 ram_above_4g
= g_malloc(sizeof(*ram_above_4g
));
230 memory_region_init_alias(ram_above_4g
, NULL
, "ram-above-4g", ram
,
231 x86ms
->below_4g_mem_size
,
232 x86ms
->above_4g_mem_size
);
233 memory_region_add_subregion(system_memory
, 0x100000000ULL
,
235 e820_add_entry(0x100000000ULL
, x86ms
->above_4g_mem_size
, E820_RAM
);
238 fw_cfg
= fw_cfg_init_io_dma(FW_CFG_IO_BASE
, FW_CFG_IO_BASE
+ 4,
239 &address_space_memory
);
241 fw_cfg_add_i16(fw_cfg
, FW_CFG_NB_CPUS
, machine
->smp
.cpus
);
242 fw_cfg_add_i16(fw_cfg
, FW_CFG_MAX_CPUS
, machine
->smp
.max_cpus
);
243 fw_cfg_add_i64(fw_cfg
, FW_CFG_RAM_SIZE
, (uint64_t)machine
->ram_size
);
244 fw_cfg_add_i32(fw_cfg
, FW_CFG_IRQ0_OVERRIDE
, kvm_allows_irq0_override());
245 fw_cfg_add_bytes(fw_cfg
, FW_CFG_E820_TABLE
,
246 &e820_reserve
, sizeof(e820_reserve
));
247 fw_cfg_add_file(fw_cfg
, "etc/e820", e820_table
,
248 sizeof(struct e820_entry
) * e820_get_num_entries());
252 if (machine
->kernel_filename
!= NULL
) {
253 x86_load_linux(x86ms
, fw_cfg
, 0, true, true);
256 if (mms
->option_roms
) {
257 for (i
= 0; i
< nb_option_roms
; i
++) {
258 rom_add_option(option_rom
[i
].name
, option_rom
[i
].bootindex
);
262 x86ms
->fw_cfg
= fw_cfg
;
263 x86ms
->ioapic_as
= &address_space_memory
;
266 static gchar
*microvm_get_mmio_cmdline(gchar
*name
)
273 separator
= g_strrstr(name
, ".");
278 if (qemu_strtol(separator
+ 1, NULL
, 10, &index
) != 0) {
282 cmdline
= g_malloc0(VIRTIO_CMDLINE_MAXLEN
);
283 ret
= g_snprintf(cmdline
, VIRTIO_CMDLINE_MAXLEN
,
284 " virtio_mmio.device=512@0x%lx:%ld",
285 VIRTIO_MMIO_BASE
+ index
* 512,
286 VIRTIO_IRQ_BASE
+ index
);
287 if (ret
< 0 || ret
>= VIRTIO_CMDLINE_MAXLEN
) {
295 static void microvm_fix_kernel_cmdline(MachineState
*machine
)
297 X86MachineState
*x86ms
= X86_MACHINE(machine
);
303 * Find MMIO transports with attached devices, and add them to the kernel
306 * Yes, this is a hack, but one that heavily improves the UX without
307 * introducing any significant issues.
309 cmdline
= g_strdup(machine
->kernel_cmdline
);
310 bus
= sysbus_get_default();
311 QTAILQ_FOREACH(kid
, &bus
->children
, sibling
) {
312 DeviceState
*dev
= kid
->child
;
313 ObjectClass
*class = object_get_class(OBJECT(dev
));
315 if (class == object_class_by_name(TYPE_VIRTIO_MMIO
)) {
316 VirtIOMMIOProxy
*mmio
= VIRTIO_MMIO(OBJECT(dev
));
317 VirtioBusState
*mmio_virtio_bus
= &mmio
->bus
;
318 BusState
*mmio_bus
= &mmio_virtio_bus
->parent_obj
;
320 if (!QTAILQ_EMPTY(&mmio_bus
->children
)) {
321 gchar
*mmio_cmdline
= microvm_get_mmio_cmdline(mmio_bus
->name
);
323 char *newcmd
= g_strjoin(NULL
, cmdline
, mmio_cmdline
, NULL
);
324 g_free(mmio_cmdline
);
332 fw_cfg_modify_i32(x86ms
->fw_cfg
, FW_CFG_CMDLINE_SIZE
, strlen(cmdline
) + 1);
333 fw_cfg_modify_string(x86ms
->fw_cfg
, FW_CFG_CMDLINE_DATA
, cmdline
);
338 static void microvm_machine_state_init(MachineState
*machine
)
340 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
341 X86MachineState
*x86ms
= X86_MACHINE(machine
);
342 Error
*local_err
= NULL
;
344 microvm_memory_init(mms
);
346 x86_cpus_init(x86ms
, CPU_VERSION_LATEST
);
348 error_report_err(local_err
);
352 microvm_devices_init(mms
);
355 static void microvm_machine_reset(MachineState
*machine
)
357 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
361 if (machine
->kernel_filename
!= NULL
&&
362 mms
->auto_kernel_cmdline
&& !mms
->kernel_cmdline_fixed
) {
363 microvm_fix_kernel_cmdline(machine
);
364 mms
->kernel_cmdline_fixed
= true;
367 qemu_devices_reset();
372 if (cpu
->apic_state
) {
373 device_reset(cpu
->apic_state
);
378 static void microvm_machine_get_pic(Object
*obj
, Visitor
*v
, const char *name
,
379 void *opaque
, Error
**errp
)
381 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
382 OnOffAuto pic
= mms
->pic
;
384 visit_type_OnOffAuto(v
, name
, &pic
, errp
);
387 static void microvm_machine_set_pic(Object
*obj
, Visitor
*v
, const char *name
,
388 void *opaque
, Error
**errp
)
390 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
392 visit_type_OnOffAuto(v
, name
, &mms
->pic
, errp
);
395 static void microvm_machine_get_pit(Object
*obj
, Visitor
*v
, const char *name
,
396 void *opaque
, Error
**errp
)
398 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
399 OnOffAuto pit
= mms
->pit
;
401 visit_type_OnOffAuto(v
, name
, &pit
, errp
);
404 static void microvm_machine_set_pit(Object
*obj
, Visitor
*v
, const char *name
,
405 void *opaque
, Error
**errp
)
407 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
409 visit_type_OnOffAuto(v
, name
, &mms
->pit
, errp
);
412 static void microvm_machine_get_rtc(Object
*obj
, Visitor
*v
, const char *name
,
413 void *opaque
, Error
**errp
)
415 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
416 OnOffAuto rtc
= mms
->rtc
;
418 visit_type_OnOffAuto(v
, name
, &rtc
, errp
);
421 static void microvm_machine_set_rtc(Object
*obj
, Visitor
*v
, const char *name
,
422 void *opaque
, Error
**errp
)
424 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
426 visit_type_OnOffAuto(v
, name
, &mms
->rtc
, errp
);
429 static bool microvm_machine_get_isa_serial(Object
*obj
, Error
**errp
)
431 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
433 return mms
->isa_serial
;
436 static void microvm_machine_set_isa_serial(Object
*obj
, bool value
,
439 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
441 mms
->isa_serial
= value
;
444 static bool microvm_machine_get_option_roms(Object
*obj
, Error
**errp
)
446 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
448 return mms
->option_roms
;
451 static void microvm_machine_set_option_roms(Object
*obj
, bool value
,
454 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
456 mms
->option_roms
= value
;
459 static bool microvm_machine_get_auto_kernel_cmdline(Object
*obj
, Error
**errp
)
461 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
463 return mms
->auto_kernel_cmdline
;
466 static void microvm_machine_set_auto_kernel_cmdline(Object
*obj
, bool value
,
469 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
471 mms
->auto_kernel_cmdline
= value
;
474 static void microvm_machine_initfn(Object
*obj
)
476 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
479 mms
->pic
= ON_OFF_AUTO_AUTO
;
480 mms
->pit
= ON_OFF_AUTO_AUTO
;
481 mms
->rtc
= ON_OFF_AUTO_AUTO
;
482 mms
->isa_serial
= true;
483 mms
->option_roms
= true;
484 mms
->auto_kernel_cmdline
= true;
487 mms
->kernel_cmdline_fixed
= false;
490 static void microvm_class_init(ObjectClass
*oc
, void *data
)
492 MachineClass
*mc
= MACHINE_CLASS(oc
);
494 mc
->init
= microvm_machine_state_init
;
496 mc
->family
= "microvm_i386";
497 mc
->desc
= "microvm (i386)";
498 mc
->units_per_default_bus
= 1;
501 mc
->has_hotpluggable_cpus
= false;
502 mc
->auto_enable_numa_with_memhp
= false;
503 mc
->default_cpu_type
= TARGET_DEFAULT_CPU_TYPE
;
504 mc
->nvdimm_supported
= false;
506 /* Avoid relying too much on kernel components */
507 mc
->default_kernel_irqchip_split
= true;
509 /* Machine class handlers */
510 mc
->reset
= microvm_machine_reset
;
512 object_class_property_add(oc
, MICROVM_MACHINE_PIC
, "OnOffAuto",
513 microvm_machine_get_pic
,
514 microvm_machine_set_pic
,
515 NULL
, NULL
, &error_abort
);
516 object_class_property_set_description(oc
, MICROVM_MACHINE_PIC
,
517 "Enable i8259 PIC", &error_abort
);
519 object_class_property_add(oc
, MICROVM_MACHINE_PIT
, "OnOffAuto",
520 microvm_machine_get_pit
,
521 microvm_machine_set_pit
,
522 NULL
, NULL
, &error_abort
);
523 object_class_property_set_description(oc
, MICROVM_MACHINE_PIT
,
524 "Enable i8254 PIT", &error_abort
);
526 object_class_property_add(oc
, MICROVM_MACHINE_RTC
, "OnOffAuto",
527 microvm_machine_get_rtc
,
528 microvm_machine_set_rtc
,
529 NULL
, NULL
, &error_abort
);
530 object_class_property_set_description(oc
, MICROVM_MACHINE_RTC
,
531 "Enable MC146818 RTC", &error_abort
);
533 object_class_property_add_bool(oc
, MICROVM_MACHINE_ISA_SERIAL
,
534 microvm_machine_get_isa_serial
,
535 microvm_machine_set_isa_serial
,
537 object_class_property_set_description(oc
, MICROVM_MACHINE_ISA_SERIAL
,
538 "Set off to disable the instantiation an ISA serial port",
541 object_class_property_add_bool(oc
, MICROVM_MACHINE_OPTION_ROMS
,
542 microvm_machine_get_option_roms
,
543 microvm_machine_set_option_roms
,
545 object_class_property_set_description(oc
, MICROVM_MACHINE_OPTION_ROMS
,
546 "Set off to disable loading option ROMs", &error_abort
);
548 object_class_property_add_bool(oc
, MICROVM_MACHINE_AUTO_KERNEL_CMDLINE
,
549 microvm_machine_get_auto_kernel_cmdline
,
550 microvm_machine_set_auto_kernel_cmdline
,
552 object_class_property_set_description(oc
,
553 MICROVM_MACHINE_AUTO_KERNEL_CMDLINE
,
554 "Set off to disable adding virtio-mmio devices to the kernel cmdline",
558 static const TypeInfo microvm_machine_info
= {
559 .name
= TYPE_MICROVM_MACHINE
,
560 .parent
= TYPE_X86_MACHINE
,
561 .instance_size
= sizeof(MicrovmMachineState
),
562 .instance_init
= microvm_machine_initfn
,
563 .class_size
= sizeof(MicrovmMachineClass
),
564 .class_init
= microvm_class_init
,
565 .interfaces
= (InterfaceInfo
[]) {
570 static void microvm_machine_init(void)
572 type_register_static(µvm_machine_info
);
574 type_init(microvm_machine_init
);