2 * Copyright (c) 2018 Intel Corporation
3 * Copyright (c) 2019 Red Hat, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2 or later, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
18 #include "qemu/osdep.h"
19 #include "qemu/error-report.h"
20 #include "qemu/cutils.h"
21 #include "qemu/units.h"
22 #include "qapi/error.h"
23 #include "qapi/visitor.h"
24 #include "qapi/qapi-visit-common.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpus.h"
27 #include "sysemu/numa.h"
28 #include "sysemu/reset.h"
29 #include "sysemu/runstate.h"
30 #include "acpi-microvm.h"
32 #include "hw/loader.h"
34 #include "hw/kvm/clock.h"
35 #include "hw/i386/microvm.h"
36 #include "hw/i386/x86.h"
37 #include "target/i386/cpu.h"
38 #include "hw/intc/i8259.h"
39 #include "hw/timer/i8254.h"
40 #include "hw/rtc/mc146818rtc.h"
41 #include "hw/char/serial.h"
42 #include "hw/i386/topology.h"
43 #include "hw/i386/e820_memory_layout.h"
44 #include "hw/i386/fw_cfg.h"
45 #include "hw/virtio/virtio-mmio.h"
46 #include "hw/acpi/acpi.h"
47 #include "hw/acpi/generic_event_device.h"
52 #include "hw/xen/start_info.h"
54 #define MICROVM_QBOOT_FILENAME "qboot.rom"
55 #define MICROVM_BIOS_FILENAME "bios-microvm.bin"
57 static void microvm_set_rtc(MicrovmMachineState
*mms
, ISADevice
*s
)
59 X86MachineState
*x86ms
= X86_MACHINE(mms
);
62 val
= MIN(x86ms
->below_4g_mem_size
/ KiB
, 640);
63 rtc_set_memory(s
, 0x15, val
);
64 rtc_set_memory(s
, 0x16, val
>> 8);
65 /* extended memory (next 64MiB) */
66 if (x86ms
->below_4g_mem_size
> 1 * MiB
) {
67 val
= (x86ms
->below_4g_mem_size
- 1 * MiB
) / KiB
;
74 rtc_set_memory(s
, 0x17, val
);
75 rtc_set_memory(s
, 0x18, val
>> 8);
76 rtc_set_memory(s
, 0x30, val
);
77 rtc_set_memory(s
, 0x31, val
>> 8);
78 /* memory between 16MiB and 4GiB */
79 if (x86ms
->below_4g_mem_size
> 16 * MiB
) {
80 val
= (x86ms
->below_4g_mem_size
- 16 * MiB
) / (64 * KiB
);
87 rtc_set_memory(s
, 0x34, val
);
88 rtc_set_memory(s
, 0x35, val
>> 8);
89 /* memory above 4GiB */
90 val
= x86ms
->above_4g_mem_size
/ 65536;
91 rtc_set_memory(s
, 0x5b, val
);
92 rtc_set_memory(s
, 0x5c, val
>> 8);
93 rtc_set_memory(s
, 0x5d, val
>> 16);
96 static void microvm_gsi_handler(void *opaque
, int n
, int level
)
100 qemu_set_irq(s
->ioapic_irq
[n
], level
);
103 static void microvm_devices_init(MicrovmMachineState
*mms
)
105 X86MachineState
*x86ms
= X86_MACHINE(mms
);
107 ISADevice
*rtc_state
;
111 /* Core components */
113 gsi_state
= g_malloc0(sizeof(*gsi_state
));
114 if (mms
->pic
== ON_OFF_AUTO_ON
|| mms
->pic
== ON_OFF_AUTO_AUTO
) {
115 x86ms
->gsi
= qemu_allocate_irqs(gsi_handler
, gsi_state
, GSI_NUM_PINS
);
117 x86ms
->gsi
= qemu_allocate_irqs(microvm_gsi_handler
,
118 gsi_state
, GSI_NUM_PINS
);
121 isa_bus
= isa_bus_new(NULL
, get_system_memory(), get_system_io(),
123 isa_bus_irqs(isa_bus
, x86ms
->gsi
);
125 ioapic_init_gsi(gsi_state
, "machine");
129 mms
->virtio_irq_base
= x86_machine_is_acpi_enabled(x86ms
) ? 16 : 5;
130 for (i
= 0; i
< VIRTIO_NUM_TRANSPORTS
; i
++) {
131 sysbus_create_simple("virtio-mmio",
132 VIRTIO_MMIO_BASE
+ i
* 512,
133 x86ms
->gsi
[mms
->virtio_irq_base
+ i
]);
136 /* Optional and legacy devices */
137 if (x86_machine_is_acpi_enabled(x86ms
)) {
138 DeviceState
*dev
= qdev_new(TYPE_ACPI_GED_X86
);
139 qdev_prop_set_uint32(dev
, "ged-event", ACPI_GED_PWR_DOWN_EVT
);
140 sysbus_mmio_map(SYS_BUS_DEVICE(dev
), 0, GED_MMIO_BASE
);
141 /* sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, GED_MMIO_BASE_MEMHP); */
142 sysbus_mmio_map(SYS_BUS_DEVICE(dev
), 2, GED_MMIO_BASE_REGS
);
143 sysbus_connect_irq(SYS_BUS_DEVICE(dev
), 0,
144 x86ms
->gsi
[GED_MMIO_IRQ
]);
145 sysbus_realize(SYS_BUS_DEVICE(dev
), &error_fatal
);
146 x86ms
->acpi_dev
= HOTPLUG_HANDLER(dev
);
149 if (mms
->pic
== ON_OFF_AUTO_ON
|| mms
->pic
== ON_OFF_AUTO_AUTO
) {
152 i8259
= i8259_init(isa_bus
, x86_allocate_cpu_irq());
153 for (i
= 0; i
< ISA_NUM_IRQS
; i
++) {
154 gsi_state
->i8259_irq
[i
] = i8259
[i
];
159 if (mms
->pit
== ON_OFF_AUTO_ON
|| mms
->pit
== ON_OFF_AUTO_AUTO
) {
160 if (kvm_pit_in_kernel()) {
161 kvm_pit_init(isa_bus
, 0x40);
163 i8254_pit_init(isa_bus
, 0x40, 0, NULL
);
167 if (mms
->rtc
== ON_OFF_AUTO_ON
||
168 (mms
->rtc
== ON_OFF_AUTO_AUTO
&& !kvm_enabled())) {
169 rtc_state
= mc146818_rtc_init(isa_bus
, 2000, NULL
);
170 microvm_set_rtc(mms
, rtc_state
);
173 if (mms
->isa_serial
) {
174 serial_hds_isa_init(isa_bus
, 0, 1);
177 if (bios_name
== NULL
) {
178 bios_name
= x86_machine_is_acpi_enabled(x86ms
)
179 ? MICROVM_BIOS_FILENAME
180 : MICROVM_QBOOT_FILENAME
;
182 x86_bios_rom_init(get_system_memory(), true);
185 static void microvm_memory_init(MicrovmMachineState
*mms
)
187 MachineState
*machine
= MACHINE(mms
);
188 X86MachineState
*x86ms
= X86_MACHINE(mms
);
189 MemoryRegion
*ram_below_4g
, *ram_above_4g
;
190 MemoryRegion
*system_memory
= get_system_memory();
192 ram_addr_t lowmem
= 0xc0000000; /* 3G */
195 if (machine
->ram_size
> lowmem
) {
196 x86ms
->above_4g_mem_size
= machine
->ram_size
- lowmem
;
197 x86ms
->below_4g_mem_size
= lowmem
;
199 x86ms
->above_4g_mem_size
= 0;
200 x86ms
->below_4g_mem_size
= machine
->ram_size
;
203 ram_below_4g
= g_malloc(sizeof(*ram_below_4g
));
204 memory_region_init_alias(ram_below_4g
, NULL
, "ram-below-4g", machine
->ram
,
205 0, x86ms
->below_4g_mem_size
);
206 memory_region_add_subregion(system_memory
, 0, ram_below_4g
);
208 e820_add_entry(0, x86ms
->below_4g_mem_size
, E820_RAM
);
210 if (x86ms
->above_4g_mem_size
> 0) {
211 ram_above_4g
= g_malloc(sizeof(*ram_above_4g
));
212 memory_region_init_alias(ram_above_4g
, NULL
, "ram-above-4g",
214 x86ms
->below_4g_mem_size
,
215 x86ms
->above_4g_mem_size
);
216 memory_region_add_subregion(system_memory
, 0x100000000ULL
,
218 e820_add_entry(0x100000000ULL
, x86ms
->above_4g_mem_size
, E820_RAM
);
221 fw_cfg
= fw_cfg_init_io_dma(FW_CFG_IO_BASE
, FW_CFG_IO_BASE
+ 4,
222 &address_space_memory
);
224 fw_cfg_add_i16(fw_cfg
, FW_CFG_NB_CPUS
, machine
->smp
.cpus
);
225 fw_cfg_add_i16(fw_cfg
, FW_CFG_MAX_CPUS
, machine
->smp
.max_cpus
);
226 fw_cfg_add_i64(fw_cfg
, FW_CFG_RAM_SIZE
, (uint64_t)machine
->ram_size
);
227 fw_cfg_add_i32(fw_cfg
, FW_CFG_IRQ0_OVERRIDE
, kvm_allows_irq0_override());
228 fw_cfg_add_bytes(fw_cfg
, FW_CFG_E820_TABLE
,
229 &e820_reserve
, sizeof(e820_reserve
));
230 fw_cfg_add_file(fw_cfg
, "etc/e820", e820_table
,
231 sizeof(struct e820_entry
) * e820_get_num_entries());
235 if (machine
->kernel_filename
!= NULL
) {
236 x86_load_linux(x86ms
, fw_cfg
, 0, true, true);
239 if (mms
->option_roms
) {
240 for (i
= 0; i
< nb_option_roms
; i
++) {
241 rom_add_option(option_rom
[i
].name
, option_rom
[i
].bootindex
);
245 x86ms
->fw_cfg
= fw_cfg
;
246 x86ms
->ioapic_as
= &address_space_memory
;
249 static gchar
*microvm_get_mmio_cmdline(gchar
*name
, uint32_t virtio_irq_base
)
256 separator
= g_strrstr(name
, ".");
261 if (qemu_strtol(separator
+ 1, NULL
, 10, &index
) != 0) {
265 cmdline
= g_malloc0(VIRTIO_CMDLINE_MAXLEN
);
266 ret
= g_snprintf(cmdline
, VIRTIO_CMDLINE_MAXLEN
,
267 " virtio_mmio.device=512@0x%lx:%ld",
268 VIRTIO_MMIO_BASE
+ index
* 512,
269 virtio_irq_base
+ index
);
270 if (ret
< 0 || ret
>= VIRTIO_CMDLINE_MAXLEN
) {
278 static void microvm_fix_kernel_cmdline(MachineState
*machine
)
280 X86MachineState
*x86ms
= X86_MACHINE(machine
);
281 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
287 * Find MMIO transports with attached devices, and add them to the kernel
290 * Yes, this is a hack, but one that heavily improves the UX without
291 * introducing any significant issues.
293 cmdline
= g_strdup(machine
->kernel_cmdline
);
294 bus
= sysbus_get_default();
295 QTAILQ_FOREACH(kid
, &bus
->children
, sibling
) {
296 DeviceState
*dev
= kid
->child
;
297 ObjectClass
*class = object_get_class(OBJECT(dev
));
299 if (class == object_class_by_name(TYPE_VIRTIO_MMIO
)) {
300 VirtIOMMIOProxy
*mmio
= VIRTIO_MMIO(OBJECT(dev
));
301 VirtioBusState
*mmio_virtio_bus
= &mmio
->bus
;
302 BusState
*mmio_bus
= &mmio_virtio_bus
->parent_obj
;
304 if (!QTAILQ_EMPTY(&mmio_bus
->children
)) {
305 gchar
*mmio_cmdline
= microvm_get_mmio_cmdline
306 (mmio_bus
->name
, mms
->virtio_irq_base
);
308 char *newcmd
= g_strjoin(NULL
, cmdline
, mmio_cmdline
, NULL
);
309 g_free(mmio_cmdline
);
317 fw_cfg_modify_i32(x86ms
->fw_cfg
, FW_CFG_CMDLINE_SIZE
, strlen(cmdline
) + 1);
318 fw_cfg_modify_string(x86ms
->fw_cfg
, FW_CFG_CMDLINE_DATA
, cmdline
);
323 static void microvm_machine_state_init(MachineState
*machine
)
325 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
326 X86MachineState
*x86ms
= X86_MACHINE(machine
);
327 Error
*local_err
= NULL
;
329 microvm_memory_init(mms
);
331 x86_cpus_init(x86ms
, CPU_VERSION_LATEST
);
333 error_report_err(local_err
);
337 microvm_devices_init(mms
);
340 static void microvm_machine_reset(MachineState
*machine
)
342 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
346 if (!x86_machine_is_acpi_enabled(X86_MACHINE(machine
)) &&
347 machine
->kernel_filename
!= NULL
&&
348 mms
->auto_kernel_cmdline
&& !mms
->kernel_cmdline_fixed
) {
349 microvm_fix_kernel_cmdline(machine
);
350 mms
->kernel_cmdline_fixed
= true;
353 qemu_devices_reset();
358 if (cpu
->apic_state
) {
359 device_legacy_reset(cpu
->apic_state
);
364 static void microvm_machine_get_pic(Object
*obj
, Visitor
*v
, const char *name
,
365 void *opaque
, Error
**errp
)
367 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
368 OnOffAuto pic
= mms
->pic
;
370 visit_type_OnOffAuto(v
, name
, &pic
, errp
);
373 static void microvm_machine_set_pic(Object
*obj
, Visitor
*v
, const char *name
,
374 void *opaque
, Error
**errp
)
376 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
378 visit_type_OnOffAuto(v
, name
, &mms
->pic
, errp
);
381 static void microvm_machine_get_pit(Object
*obj
, Visitor
*v
, const char *name
,
382 void *opaque
, Error
**errp
)
384 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
385 OnOffAuto pit
= mms
->pit
;
387 visit_type_OnOffAuto(v
, name
, &pit
, errp
);
390 static void microvm_machine_set_pit(Object
*obj
, Visitor
*v
, const char *name
,
391 void *opaque
, Error
**errp
)
393 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
395 visit_type_OnOffAuto(v
, name
, &mms
->pit
, errp
);
398 static void microvm_machine_get_rtc(Object
*obj
, Visitor
*v
, const char *name
,
399 void *opaque
, Error
**errp
)
401 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
402 OnOffAuto rtc
= mms
->rtc
;
404 visit_type_OnOffAuto(v
, name
, &rtc
, errp
);
407 static void microvm_machine_set_rtc(Object
*obj
, Visitor
*v
, const char *name
,
408 void *opaque
, Error
**errp
)
410 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
412 visit_type_OnOffAuto(v
, name
, &mms
->rtc
, errp
);
415 static bool microvm_machine_get_isa_serial(Object
*obj
, Error
**errp
)
417 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
419 return mms
->isa_serial
;
422 static void microvm_machine_set_isa_serial(Object
*obj
, bool value
,
425 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
427 mms
->isa_serial
= value
;
430 static bool microvm_machine_get_option_roms(Object
*obj
, Error
**errp
)
432 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
434 return mms
->option_roms
;
437 static void microvm_machine_set_option_roms(Object
*obj
, bool value
,
440 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
442 mms
->option_roms
= value
;
445 static bool microvm_machine_get_auto_kernel_cmdline(Object
*obj
, Error
**errp
)
447 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
449 return mms
->auto_kernel_cmdline
;
452 static void microvm_machine_set_auto_kernel_cmdline(Object
*obj
, bool value
,
455 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
457 mms
->auto_kernel_cmdline
= value
;
460 static void microvm_machine_done(Notifier
*notifier
, void *data
)
462 MicrovmMachineState
*mms
= container_of(notifier
, MicrovmMachineState
,
465 acpi_setup_microvm(mms
);
468 static void microvm_powerdown_req(Notifier
*notifier
, void *data
)
470 MicrovmMachineState
*mms
= container_of(notifier
, MicrovmMachineState
,
472 X86MachineState
*x86ms
= X86_MACHINE(mms
);
474 if (x86ms
->acpi_dev
) {
475 Object
*obj
= OBJECT(x86ms
->acpi_dev
);
476 AcpiDeviceIfClass
*adevc
= ACPI_DEVICE_IF_GET_CLASS(obj
);
477 adevc
->send_event(ACPI_DEVICE_IF(x86ms
->acpi_dev
),
478 ACPI_POWER_DOWN_STATUS
);
482 static void microvm_machine_initfn(Object
*obj
)
484 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
487 mms
->pic
= ON_OFF_AUTO_AUTO
;
488 mms
->pit
= ON_OFF_AUTO_AUTO
;
489 mms
->rtc
= ON_OFF_AUTO_AUTO
;
490 mms
->isa_serial
= true;
491 mms
->option_roms
= true;
492 mms
->auto_kernel_cmdline
= true;
495 mms
->kernel_cmdline_fixed
= false;
497 mms
->machine_done
.notify
= microvm_machine_done
;
498 qemu_add_machine_init_done_notifier(&mms
->machine_done
);
499 mms
->powerdown_req
.notify
= microvm_powerdown_req
;
500 qemu_register_powerdown_notifier(&mms
->powerdown_req
);
503 static void microvm_class_init(ObjectClass
*oc
, void *data
)
505 MachineClass
*mc
= MACHINE_CLASS(oc
);
507 mc
->init
= microvm_machine_state_init
;
509 mc
->family
= "microvm_i386";
510 mc
->desc
= "microvm (i386)";
511 mc
->units_per_default_bus
= 1;
514 mc
->has_hotpluggable_cpus
= false;
515 mc
->auto_enable_numa_with_memhp
= false;
516 mc
->auto_enable_numa_with_memdev
= false;
517 mc
->default_cpu_type
= TARGET_DEFAULT_CPU_TYPE
;
518 mc
->nvdimm_supported
= false;
519 mc
->default_ram_id
= "microvm.ram";
521 /* Avoid relying too much on kernel components */
522 mc
->default_kernel_irqchip_split
= true;
524 /* Machine class handlers */
525 mc
->reset
= microvm_machine_reset
;
527 object_class_property_add(oc
, MICROVM_MACHINE_PIC
, "OnOffAuto",
528 microvm_machine_get_pic
,
529 microvm_machine_set_pic
,
531 object_class_property_set_description(oc
, MICROVM_MACHINE_PIC
,
534 object_class_property_add(oc
, MICROVM_MACHINE_PIT
, "OnOffAuto",
535 microvm_machine_get_pit
,
536 microvm_machine_set_pit
,
538 object_class_property_set_description(oc
, MICROVM_MACHINE_PIT
,
541 object_class_property_add(oc
, MICROVM_MACHINE_RTC
, "OnOffAuto",
542 microvm_machine_get_rtc
,
543 microvm_machine_set_rtc
,
545 object_class_property_set_description(oc
, MICROVM_MACHINE_RTC
,
546 "Enable MC146818 RTC");
548 object_class_property_add_bool(oc
, MICROVM_MACHINE_ISA_SERIAL
,
549 microvm_machine_get_isa_serial
,
550 microvm_machine_set_isa_serial
);
551 object_class_property_set_description(oc
, MICROVM_MACHINE_ISA_SERIAL
,
552 "Set off to disable the instantiation an ISA serial port");
554 object_class_property_add_bool(oc
, MICROVM_MACHINE_OPTION_ROMS
,
555 microvm_machine_get_option_roms
,
556 microvm_machine_set_option_roms
);
557 object_class_property_set_description(oc
, MICROVM_MACHINE_OPTION_ROMS
,
558 "Set off to disable loading option ROMs");
560 object_class_property_add_bool(oc
, MICROVM_MACHINE_AUTO_KERNEL_CMDLINE
,
561 microvm_machine_get_auto_kernel_cmdline
,
562 microvm_machine_set_auto_kernel_cmdline
);
563 object_class_property_set_description(oc
,
564 MICROVM_MACHINE_AUTO_KERNEL_CMDLINE
,
565 "Set off to disable adding virtio-mmio devices to the kernel cmdline");
568 static const TypeInfo microvm_machine_info
= {
569 .name
= TYPE_MICROVM_MACHINE
,
570 .parent
= TYPE_X86_MACHINE
,
571 .instance_size
= sizeof(MicrovmMachineState
),
572 .instance_init
= microvm_machine_initfn
,
573 .class_size
= sizeof(MicrovmMachineClass
),
574 .class_init
= microvm_class_init
,
575 .interfaces
= (InterfaceInfo
[]) {
580 static void microvm_machine_init(void)
582 type_register_static(µvm_machine_info
);
584 type_init(microvm_machine_init
);