2 * Copyright (c) 2018 Intel Corporation
3 * Copyright (c) 2019 Red Hat, Inc.
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2 or later, as published by the Free Software Foundation.
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
18 #include "qemu/osdep.h"
19 #include "qemu/error-report.h"
20 #include "qemu/cutils.h"
21 #include "qemu/units.h"
22 #include "qapi/error.h"
23 #include "qapi/visitor.h"
24 #include "qapi/qapi-visit-common.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpus.h"
27 #include "sysemu/numa.h"
28 #include "sysemu/reset.h"
30 #include "hw/loader.h"
32 #include "hw/kvm/clock.h"
33 #include "hw/i386/microvm.h"
34 #include "hw/i386/x86.h"
35 #include "target/i386/cpu.h"
36 #include "hw/intc/i8259.h"
37 #include "hw/timer/i8254.h"
38 #include "hw/rtc/mc146818rtc.h"
39 #include "hw/char/serial.h"
40 #include "hw/i386/topology.h"
41 #include "hw/i386/e820_memory_layout.h"
42 #include "hw/i386/fw_cfg.h"
43 #include "hw/virtio/virtio-mmio.h"
48 #include "hw/xen/start_info.h"
50 #define MICROVM_BIOS_FILENAME "bios-microvm.bin"
52 static void microvm_set_rtc(MicrovmMachineState
*mms
, ISADevice
*s
)
54 X86MachineState
*x86ms
= X86_MACHINE(mms
);
57 val
= MIN(x86ms
->below_4g_mem_size
/ KiB
, 640);
58 rtc_set_memory(s
, 0x15, val
);
59 rtc_set_memory(s
, 0x16, val
>> 8);
60 /* extended memory (next 64MiB) */
61 if (x86ms
->below_4g_mem_size
> 1 * MiB
) {
62 val
= (x86ms
->below_4g_mem_size
- 1 * MiB
) / KiB
;
69 rtc_set_memory(s
, 0x17, val
);
70 rtc_set_memory(s
, 0x18, val
>> 8);
71 rtc_set_memory(s
, 0x30, val
);
72 rtc_set_memory(s
, 0x31, val
>> 8);
73 /* memory between 16MiB and 4GiB */
74 if (x86ms
->below_4g_mem_size
> 16 * MiB
) {
75 val
= (x86ms
->below_4g_mem_size
- 16 * MiB
) / (64 * KiB
);
82 rtc_set_memory(s
, 0x34, val
);
83 rtc_set_memory(s
, 0x35, val
>> 8);
84 /* memory above 4GiB */
85 val
= x86ms
->above_4g_mem_size
/ 65536;
86 rtc_set_memory(s
, 0x5b, val
);
87 rtc_set_memory(s
, 0x5c, val
>> 8);
88 rtc_set_memory(s
, 0x5d, val
>> 16);
91 static void microvm_gsi_handler(void *opaque
, int n
, int level
)
95 qemu_set_irq(s
->ioapic_irq
[n
], level
);
98 static void microvm_devices_init(MicrovmMachineState
*mms
)
100 X86MachineState
*x86ms
= X86_MACHINE(mms
);
102 ISADevice
*rtc_state
;
106 /* Core components */
108 gsi_state
= g_malloc0(sizeof(*gsi_state
));
109 if (mms
->pic
== ON_OFF_AUTO_ON
|| mms
->pic
== ON_OFF_AUTO_AUTO
) {
110 x86ms
->gsi
= qemu_allocate_irqs(gsi_handler
, gsi_state
, GSI_NUM_PINS
);
112 x86ms
->gsi
= qemu_allocate_irqs(microvm_gsi_handler
,
113 gsi_state
, GSI_NUM_PINS
);
116 isa_bus
= isa_bus_new(NULL
, get_system_memory(), get_system_io(),
118 isa_bus_irqs(isa_bus
, x86ms
->gsi
);
120 ioapic_init_gsi(gsi_state
, "machine");
124 for (i
= 0; i
< VIRTIO_NUM_TRANSPORTS
; i
++) {
125 sysbus_create_simple("virtio-mmio",
126 VIRTIO_MMIO_BASE
+ i
* 512,
127 x86ms
->gsi
[VIRTIO_IRQ_BASE
+ i
]);
130 /* Optional and legacy devices */
132 if (mms
->pic
== ON_OFF_AUTO_ON
|| mms
->pic
== ON_OFF_AUTO_AUTO
) {
135 i8259
= i8259_init(isa_bus
, x86_allocate_cpu_irq());
136 for (i
= 0; i
< ISA_NUM_IRQS
; i
++) {
137 gsi_state
->i8259_irq
[i
] = i8259
[i
];
142 if (mms
->pit
== ON_OFF_AUTO_ON
|| mms
->pit
== ON_OFF_AUTO_AUTO
) {
143 if (kvm_pit_in_kernel()) {
144 kvm_pit_init(isa_bus
, 0x40);
146 i8254_pit_init(isa_bus
, 0x40, 0, NULL
);
150 if (mms
->rtc
== ON_OFF_AUTO_ON
||
151 (mms
->rtc
== ON_OFF_AUTO_AUTO
&& !kvm_enabled())) {
152 rtc_state
= mc146818_rtc_init(isa_bus
, 2000, NULL
);
153 microvm_set_rtc(mms
, rtc_state
);
156 if (mms
->isa_serial
) {
157 serial_hds_isa_init(isa_bus
, 0, 1);
160 if (bios_name
== NULL
) {
161 bios_name
= MICROVM_BIOS_FILENAME
;
163 x86_bios_rom_init(get_system_memory(), true);
166 static void microvm_memory_init(MicrovmMachineState
*mms
)
168 MachineState
*machine
= MACHINE(mms
);
169 X86MachineState
*x86ms
= X86_MACHINE(mms
);
170 MemoryRegion
*ram_below_4g
, *ram_above_4g
;
171 MemoryRegion
*system_memory
= get_system_memory();
177 * Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
178 * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
179 * also known as MMCFG).
180 * If it doesn't, we need to split it in chunks below and above 4G.
181 * In any case, try to make sure that guest addresses aligned at
182 * 1G boundaries get mapped to host addresses aligned at 1G boundaries.
184 if (machine
->ram_size
>= 0xb0000000) {
191 * Handle the machine opt max-ram-below-4g. It is basically doing
192 * min(qemu limit, user limit).
194 if (!x86ms
->max_ram_below_4g
) {
195 x86ms
->max_ram_below_4g
= 4 * GiB
;
197 if (lowmem
> x86ms
->max_ram_below_4g
) {
198 lowmem
= x86ms
->max_ram_below_4g
;
199 if (machine
->ram_size
- lowmem
> lowmem
&&
200 lowmem
& (1 * GiB
- 1)) {
201 warn_report("There is possibly poor performance as the ram size "
202 " (0x%" PRIx64
") is more then twice the size of"
203 " max-ram-below-4g (%"PRIu64
") and"
204 " max-ram-below-4g is not a multiple of 1G.",
205 (uint64_t)machine
->ram_size
, x86ms
->max_ram_below_4g
);
209 if (machine
->ram_size
> lowmem
) {
210 x86ms
->above_4g_mem_size
= machine
->ram_size
- lowmem
;
211 x86ms
->below_4g_mem_size
= lowmem
;
213 x86ms
->above_4g_mem_size
= 0;
214 x86ms
->below_4g_mem_size
= machine
->ram_size
;
217 ram_below_4g
= g_malloc(sizeof(*ram_below_4g
));
218 memory_region_init_alias(ram_below_4g
, NULL
, "ram-below-4g", machine
->ram
,
219 0, x86ms
->below_4g_mem_size
);
220 memory_region_add_subregion(system_memory
, 0, ram_below_4g
);
222 e820_add_entry(0, x86ms
->below_4g_mem_size
, E820_RAM
);
224 if (x86ms
->above_4g_mem_size
> 0) {
225 ram_above_4g
= g_malloc(sizeof(*ram_above_4g
));
226 memory_region_init_alias(ram_above_4g
, NULL
, "ram-above-4g",
228 x86ms
->below_4g_mem_size
,
229 x86ms
->above_4g_mem_size
);
230 memory_region_add_subregion(system_memory
, 0x100000000ULL
,
232 e820_add_entry(0x100000000ULL
, x86ms
->above_4g_mem_size
, E820_RAM
);
235 fw_cfg
= fw_cfg_init_io_dma(FW_CFG_IO_BASE
, FW_CFG_IO_BASE
+ 4,
236 &address_space_memory
);
238 fw_cfg_add_i16(fw_cfg
, FW_CFG_NB_CPUS
, machine
->smp
.cpus
);
239 fw_cfg_add_i16(fw_cfg
, FW_CFG_MAX_CPUS
, machine
->smp
.max_cpus
);
240 fw_cfg_add_i64(fw_cfg
, FW_CFG_RAM_SIZE
, (uint64_t)machine
->ram_size
);
241 fw_cfg_add_i32(fw_cfg
, FW_CFG_IRQ0_OVERRIDE
, kvm_allows_irq0_override());
242 fw_cfg_add_bytes(fw_cfg
, FW_CFG_E820_TABLE
,
243 &e820_reserve
, sizeof(e820_reserve
));
244 fw_cfg_add_file(fw_cfg
, "etc/e820", e820_table
,
245 sizeof(struct e820_entry
) * e820_get_num_entries());
249 if (machine
->kernel_filename
!= NULL
) {
250 x86_load_linux(x86ms
, fw_cfg
, 0, true, true);
253 if (mms
->option_roms
) {
254 for (i
= 0; i
< nb_option_roms
; i
++) {
255 rom_add_option(option_rom
[i
].name
, option_rom
[i
].bootindex
);
259 x86ms
->fw_cfg
= fw_cfg
;
260 x86ms
->ioapic_as
= &address_space_memory
;
263 static gchar
*microvm_get_mmio_cmdline(gchar
*name
)
270 separator
= g_strrstr(name
, ".");
275 if (qemu_strtol(separator
+ 1, NULL
, 10, &index
) != 0) {
279 cmdline
= g_malloc0(VIRTIO_CMDLINE_MAXLEN
);
280 ret
= g_snprintf(cmdline
, VIRTIO_CMDLINE_MAXLEN
,
281 " virtio_mmio.device=512@0x%lx:%ld",
282 VIRTIO_MMIO_BASE
+ index
* 512,
283 VIRTIO_IRQ_BASE
+ index
);
284 if (ret
< 0 || ret
>= VIRTIO_CMDLINE_MAXLEN
) {
292 static void microvm_fix_kernel_cmdline(MachineState
*machine
)
294 X86MachineState
*x86ms
= X86_MACHINE(machine
);
300 * Find MMIO transports with attached devices, and add them to the kernel
303 * Yes, this is a hack, but one that heavily improves the UX without
304 * introducing any significant issues.
306 cmdline
= g_strdup(machine
->kernel_cmdline
);
307 bus
= sysbus_get_default();
308 QTAILQ_FOREACH(kid
, &bus
->children
, sibling
) {
309 DeviceState
*dev
= kid
->child
;
310 ObjectClass
*class = object_get_class(OBJECT(dev
));
312 if (class == object_class_by_name(TYPE_VIRTIO_MMIO
)) {
313 VirtIOMMIOProxy
*mmio
= VIRTIO_MMIO(OBJECT(dev
));
314 VirtioBusState
*mmio_virtio_bus
= &mmio
->bus
;
315 BusState
*mmio_bus
= &mmio_virtio_bus
->parent_obj
;
317 if (!QTAILQ_EMPTY(&mmio_bus
->children
)) {
318 gchar
*mmio_cmdline
= microvm_get_mmio_cmdline(mmio_bus
->name
);
320 char *newcmd
= g_strjoin(NULL
, cmdline
, mmio_cmdline
, NULL
);
321 g_free(mmio_cmdline
);
329 fw_cfg_modify_i32(x86ms
->fw_cfg
, FW_CFG_CMDLINE_SIZE
, strlen(cmdline
) + 1);
330 fw_cfg_modify_string(x86ms
->fw_cfg
, FW_CFG_CMDLINE_DATA
, cmdline
);
335 static void microvm_machine_state_init(MachineState
*machine
)
337 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
338 X86MachineState
*x86ms
= X86_MACHINE(machine
);
339 Error
*local_err
= NULL
;
341 microvm_memory_init(mms
);
343 x86_cpus_init(x86ms
, CPU_VERSION_LATEST
);
345 error_report_err(local_err
);
349 microvm_devices_init(mms
);
352 static void microvm_machine_reset(MachineState
*machine
)
354 MicrovmMachineState
*mms
= MICROVM_MACHINE(machine
);
358 if (machine
->kernel_filename
!= NULL
&&
359 mms
->auto_kernel_cmdline
&& !mms
->kernel_cmdline_fixed
) {
360 microvm_fix_kernel_cmdline(machine
);
361 mms
->kernel_cmdline_fixed
= true;
364 qemu_devices_reset();
369 if (cpu
->apic_state
) {
370 device_legacy_reset(cpu
->apic_state
);
375 static void microvm_machine_get_pic(Object
*obj
, Visitor
*v
, const char *name
,
376 void *opaque
, Error
**errp
)
378 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
379 OnOffAuto pic
= mms
->pic
;
381 visit_type_OnOffAuto(v
, name
, &pic
, errp
);
384 static void microvm_machine_set_pic(Object
*obj
, Visitor
*v
, const char *name
,
385 void *opaque
, Error
**errp
)
387 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
389 visit_type_OnOffAuto(v
, name
, &mms
->pic
, errp
);
392 static void microvm_machine_get_pit(Object
*obj
, Visitor
*v
, const char *name
,
393 void *opaque
, Error
**errp
)
395 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
396 OnOffAuto pit
= mms
->pit
;
398 visit_type_OnOffAuto(v
, name
, &pit
, errp
);
401 static void microvm_machine_set_pit(Object
*obj
, Visitor
*v
, const char *name
,
402 void *opaque
, Error
**errp
)
404 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
406 visit_type_OnOffAuto(v
, name
, &mms
->pit
, errp
);
409 static void microvm_machine_get_rtc(Object
*obj
, Visitor
*v
, const char *name
,
410 void *opaque
, Error
**errp
)
412 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
413 OnOffAuto rtc
= mms
->rtc
;
415 visit_type_OnOffAuto(v
, name
, &rtc
, errp
);
418 static void microvm_machine_set_rtc(Object
*obj
, Visitor
*v
, const char *name
,
419 void *opaque
, Error
**errp
)
421 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
423 visit_type_OnOffAuto(v
, name
, &mms
->rtc
, errp
);
426 static bool microvm_machine_get_isa_serial(Object
*obj
, Error
**errp
)
428 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
430 return mms
->isa_serial
;
433 static void microvm_machine_set_isa_serial(Object
*obj
, bool value
,
436 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
438 mms
->isa_serial
= value
;
441 static bool microvm_machine_get_option_roms(Object
*obj
, Error
**errp
)
443 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
445 return mms
->option_roms
;
448 static void microvm_machine_set_option_roms(Object
*obj
, bool value
,
451 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
453 mms
->option_roms
= value
;
456 static bool microvm_machine_get_auto_kernel_cmdline(Object
*obj
, Error
**errp
)
458 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
460 return mms
->auto_kernel_cmdline
;
463 static void microvm_machine_set_auto_kernel_cmdline(Object
*obj
, bool value
,
466 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
468 mms
->auto_kernel_cmdline
= value
;
471 static void microvm_machine_initfn(Object
*obj
)
473 MicrovmMachineState
*mms
= MICROVM_MACHINE(obj
);
476 mms
->pic
= ON_OFF_AUTO_AUTO
;
477 mms
->pit
= ON_OFF_AUTO_AUTO
;
478 mms
->rtc
= ON_OFF_AUTO_AUTO
;
479 mms
->isa_serial
= true;
480 mms
->option_roms
= true;
481 mms
->auto_kernel_cmdline
= true;
484 mms
->kernel_cmdline_fixed
= false;
487 static void microvm_class_init(ObjectClass
*oc
, void *data
)
489 MachineClass
*mc
= MACHINE_CLASS(oc
);
491 mc
->init
= microvm_machine_state_init
;
493 mc
->family
= "microvm_i386";
494 mc
->desc
= "microvm (i386)";
495 mc
->units_per_default_bus
= 1;
498 mc
->has_hotpluggable_cpus
= false;
499 mc
->auto_enable_numa_with_memhp
= false;
500 mc
->default_cpu_type
= TARGET_DEFAULT_CPU_TYPE
;
501 mc
->nvdimm_supported
= false;
502 mc
->default_ram_id
= "microvm.ram";
504 /* Avoid relying too much on kernel components */
505 mc
->default_kernel_irqchip_split
= true;
507 /* Machine class handlers */
508 mc
->reset
= microvm_machine_reset
;
510 object_class_property_add(oc
, MICROVM_MACHINE_PIC
, "OnOffAuto",
511 microvm_machine_get_pic
,
512 microvm_machine_set_pic
,
514 object_class_property_set_description(oc
, MICROVM_MACHINE_PIC
,
517 object_class_property_add(oc
, MICROVM_MACHINE_PIT
, "OnOffAuto",
518 microvm_machine_get_pit
,
519 microvm_machine_set_pit
,
521 object_class_property_set_description(oc
, MICROVM_MACHINE_PIT
,
524 object_class_property_add(oc
, MICROVM_MACHINE_RTC
, "OnOffAuto",
525 microvm_machine_get_rtc
,
526 microvm_machine_set_rtc
,
528 object_class_property_set_description(oc
, MICROVM_MACHINE_RTC
,
529 "Enable MC146818 RTC");
531 object_class_property_add_bool(oc
, MICROVM_MACHINE_ISA_SERIAL
,
532 microvm_machine_get_isa_serial
,
533 microvm_machine_set_isa_serial
);
534 object_class_property_set_description(oc
, MICROVM_MACHINE_ISA_SERIAL
,
535 "Set off to disable the instantiation an ISA serial port");
537 object_class_property_add_bool(oc
, MICROVM_MACHINE_OPTION_ROMS
,
538 microvm_machine_get_option_roms
,
539 microvm_machine_set_option_roms
);
540 object_class_property_set_description(oc
, MICROVM_MACHINE_OPTION_ROMS
,
541 "Set off to disable loading option ROMs");
543 object_class_property_add_bool(oc
, MICROVM_MACHINE_AUTO_KERNEL_CMDLINE
,
544 microvm_machine_get_auto_kernel_cmdline
,
545 microvm_machine_set_auto_kernel_cmdline
);
546 object_class_property_set_description(oc
,
547 MICROVM_MACHINE_AUTO_KERNEL_CMDLINE
,
548 "Set off to disable adding virtio-mmio devices to the kernel cmdline");
551 static const TypeInfo microvm_machine_info
= {
552 .name
= TYPE_MICROVM_MACHINE
,
553 .parent
= TYPE_X86_MACHINE
,
554 .instance_size
= sizeof(MicrovmMachineState
),
555 .instance_init
= microvm_machine_initfn
,
556 .class_size
= sizeof(MicrovmMachineClass
),
557 .class_init
= microvm_class_init
,
558 .interfaces
= (InterfaceInfo
[]) {
563 static void microvm_machine_init(void)
565 type_register_static(µvm_machine_info
);
567 type_init(microvm_machine_init
);