2 * vfio based device assignment support - platform devices
4 * Copyright Linaro Limited, 2014
7 * Kim Phillips <kim.phillips@linaro.org>
8 * Eric Auger <eric.auger@linaro.org>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on vfio based PCI device assignment support:
14 * Copyright Red Hat, Inc. 2012
17 #include "qemu/osdep.h"
18 #include CONFIG_DEVICES /* CONFIG_IOMMUFD */
19 #include "qapi/error.h"
20 #include <sys/ioctl.h>
21 #include <linux/vfio.h>
23 #include "hw/vfio/vfio-platform.h"
24 #include "sysemu/iommufd.h"
25 #include "migration/vmstate.h"
26 #include "qemu/error-report.h"
27 #include "qemu/lockable.h"
28 #include "qemu/main-loop.h"
29 #include "qemu/module.h"
30 #include "qemu/range.h"
31 #include "exec/memory.h"
32 #include "exec/address-spaces.h"
33 #include "qemu/queue.h"
34 #include "hw/sysbus.h"
37 #include "hw/platform-bus.h"
38 #include "hw/qdev-properties.h"
39 #include "sysemu/kvm.h"
42 * Functions used whatever the injection method
45 static inline bool vfio_irq_is_automasked(VFIOINTp
*intp
)
47 return intp
->flags
& VFIO_IRQ_INFO_AUTOMASKED
;
51 * vfio_init_intp - allocate, initialize the IRQ struct pointer
52 * and add it into the list of IRQs
53 * @vbasedev: the VFIO device handle
54 * @info: irq info struct retrieved from VFIO driver
57 static VFIOINTp
*vfio_init_intp(VFIODevice
*vbasedev
,
58 struct vfio_irq_info info
, Error
**errp
)
61 VFIOPlatformDevice
*vdev
=
62 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
63 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(vdev
);
66 intp
= g_malloc0(sizeof(*intp
));
68 intp
->pin
= info
.index
;
69 intp
->flags
= info
.flags
;
70 intp
->state
= VFIO_IRQ_INACTIVE
;
71 intp
->kvm_accel
= false;
73 sysbus_init_irq(sbdev
, &intp
->qemuirq
);
75 /* Get an eventfd for trigger */
76 intp
->interrupt
= g_new0(EventNotifier
, 1);
77 ret
= event_notifier_init(intp
->interrupt
, 0);
79 g_free(intp
->interrupt
);
81 error_setg_errno(errp
, -ret
,
82 "failed to initialize trigger eventfd notifier");
85 if (vfio_irq_is_automasked(intp
)) {
86 /* Get an eventfd for resample/unmask */
87 intp
->unmask
= g_new0(EventNotifier
, 1);
88 ret
= event_notifier_init(intp
->unmask
, 0);
90 g_free(intp
->interrupt
);
93 error_setg_errno(errp
, -ret
,
94 "failed to initialize resample eventfd notifier");
99 QLIST_INSERT_HEAD(&vdev
->intp_list
, intp
, next
);
104 * vfio_set_trigger_eventfd - set VFIO eventfd handling
106 * @intp: IRQ struct handle
107 * @handler: handler to be called on eventfd signaling
109 * Setup VFIO signaling and attach an optional user-side handler
112 static int vfio_set_trigger_eventfd(VFIOINTp
*intp
,
113 eventfd_user_side_handler_t handler
)
115 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
116 int32_t fd
= event_notifier_get_fd(intp
->interrupt
);
119 qemu_set_fd_handler(fd
, (IOHandler
*)handler
, NULL
, intp
);
121 if (!vfio_set_irq_signaling(vbasedev
, intp
->pin
, 0,
122 VFIO_IRQ_SET_ACTION_TRIGGER
, fd
, &err
)) {
123 error_reportf_err(err
, VFIO_MSG_PREFIX
, vbasedev
->name
);
124 qemu_set_fd_handler(fd
, NULL
, NULL
, NULL
);
132 * Functions only used when eventfds are handled on user-side
137 * vfio_mmap_set_enabled - enable/disable the fast path mode
138 * @vdev: the VFIO platform device
139 * @enabled: the target mmap state
141 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
142 * enabled = false ~ slow path = MMIO region is trapped and region callbacks
143 * are called; slow path enables to trap the device IRQ status register reset
146 static void vfio_mmap_set_enabled(VFIOPlatformDevice
*vdev
, bool enabled
)
150 for (i
= 0; i
< vdev
->vbasedev
.num_regions
; i
++) {
151 vfio_region_mmaps_set_enabled(vdev
->regions
[i
], enabled
);
156 * vfio_intp_mmap_enable - timer function, restores the fast path
157 * if there is no more active IRQ
158 * @opaque: actually points to the VFIO platform device
160 * Called on mmap timer timeout, this function checks whether the
161 * IRQ is still active and if not, restores the fast path.
162 * by construction a single eventfd is handled at a time.
163 * if the IRQ is still active, the timer is re-programmed.
165 static void vfio_intp_mmap_enable(void *opaque
)
168 VFIOPlatformDevice
*vdev
= (VFIOPlatformDevice
*)opaque
;
170 QEMU_LOCK_GUARD(&vdev
->intp_mutex
);
171 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
172 if (tmp
->state
== VFIO_IRQ_ACTIVE
) {
173 trace_vfio_platform_intp_mmap_enable(tmp
->pin
);
174 /* re-program the timer to check active status later */
175 timer_mod(vdev
->mmap_timer
,
176 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
181 vfio_mmap_set_enabled(vdev
, true);
185 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
186 * @opaque: opaque pointer, in practice the VFIOINTp handle
188 * The function is called on a previous IRQ completion, from
189 * vfio_platform_eoi, while the intp_mutex is locked.
190 * Also in such situation, the slow path already is set and
191 * the mmap timer was already programmed.
193 static void vfio_intp_inject_pending_lockheld(VFIOINTp
*intp
)
195 trace_vfio_platform_intp_inject_pending_lockheld(intp
->pin
,
196 event_notifier_get_fd(intp
->interrupt
));
198 intp
->state
= VFIO_IRQ_ACTIVE
;
200 /* trigger the virtual IRQ */
201 qemu_set_irq(intp
->qemuirq
, 1);
205 * vfio_intp_interrupt - The user-side eventfd handler
206 * @opaque: opaque pointer which in practice is the VFIOINTp handle
208 * the function is entered in event handler context:
209 * the vIRQ is injected into the guest if there is no other active
212 static void vfio_intp_interrupt(VFIOINTp
*intp
)
216 VFIOPlatformDevice
*vdev
= intp
->vdev
;
217 bool delay_handling
= false;
219 QEMU_LOCK_GUARD(&vdev
->intp_mutex
);
220 if (intp
->state
== VFIO_IRQ_INACTIVE
) {
221 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
222 if (tmp
->state
== VFIO_IRQ_ACTIVE
||
223 tmp
->state
== VFIO_IRQ_PENDING
) {
224 delay_handling
= true;
229 if (delay_handling
) {
231 * the new IRQ gets a pending status and is pushed in
234 intp
->state
= VFIO_IRQ_PENDING
;
235 trace_vfio_intp_interrupt_set_pending(intp
->pin
);
236 QSIMPLEQ_INSERT_TAIL(&vdev
->pending_intp_queue
,
238 event_notifier_test_and_clear(intp
->interrupt
);
242 trace_vfio_platform_intp_interrupt(intp
->pin
,
243 event_notifier_get_fd(intp
->interrupt
));
245 ret
= event_notifier_test_and_clear(intp
->interrupt
);
247 error_report("Error when clearing fd=%d (ret = %d)",
248 event_notifier_get_fd(intp
->interrupt
), ret
);
251 intp
->state
= VFIO_IRQ_ACTIVE
;
254 vfio_mmap_set_enabled(vdev
, false);
256 /* trigger the virtual IRQ */
257 qemu_set_irq(intp
->qemuirq
, 1);
260 * Schedule the mmap timer which will restore fastpath when no IRQ
263 if (vdev
->mmap_timeout
) {
264 timer_mod(vdev
->mmap_timer
,
265 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
271 * vfio_platform_eoi - IRQ completion routine
272 * @vbasedev: the VFIO device handle
274 * De-asserts the active virtual IRQ and unmasks the physical IRQ
275 * (effective for level sensitive IRQ auto-masked by the VFIO driver).
276 * Then it handles next pending IRQ if any.
277 * eoi function is called on the first access to any MMIO region
278 * after an IRQ was triggered, trapped since slow path was set.
279 * It is assumed this access corresponds to the IRQ status
280 * register reset. With such a mechanism, a single IRQ can be
281 * handled at a time since there is no way to know which IRQ
282 * was completed by the guest (we would need additional details
283 * about the IRQ status register mask).
285 static void vfio_platform_eoi(VFIODevice
*vbasedev
)
288 VFIOPlatformDevice
*vdev
=
289 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
291 QEMU_LOCK_GUARD(&vdev
->intp_mutex
);
292 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
293 if (intp
->state
== VFIO_IRQ_ACTIVE
) {
294 trace_vfio_platform_eoi(intp
->pin
,
295 event_notifier_get_fd(intp
->interrupt
));
296 intp
->state
= VFIO_IRQ_INACTIVE
;
298 /* deassert the virtual IRQ */
299 qemu_set_irq(intp
->qemuirq
, 0);
301 if (vfio_irq_is_automasked(intp
)) {
302 /* unmasks the physical level-sensitive IRQ */
303 vfio_unmask_single_irqindex(vbasedev
, intp
->pin
);
306 /* a single IRQ can be active at a time */
310 /* in case there are pending IRQs, handle the first one */
311 if (!QSIMPLEQ_EMPTY(&vdev
->pending_intp_queue
)) {
312 intp
= QSIMPLEQ_FIRST(&vdev
->pending_intp_queue
);
313 vfio_intp_inject_pending_lockheld(intp
);
314 QSIMPLEQ_REMOVE_HEAD(&vdev
->pending_intp_queue
, pqnext
);
319 * vfio_start_eventfd_injection - starts the virtual IRQ injection using
320 * user-side handled eventfds
321 * @sbdev: the sysbus device handle
322 * @irq: the qemu irq handle
325 static void vfio_start_eventfd_injection(SysBusDevice
*sbdev
, qemu_irq irq
)
327 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(sbdev
);
330 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
331 if (intp
->qemuirq
== irq
) {
337 if (vfio_set_trigger_eventfd(intp
, vfio_intp_interrupt
)) {
343 * Functions used for irqfd
347 * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
348 * @intp: the IRQ struct handle
349 * programs the VFIO driver to unmask this IRQ when the
350 * intp->unmask eventfd is triggered
352 static int vfio_set_resample_eventfd(VFIOINTp
*intp
)
354 int32_t fd
= event_notifier_get_fd(intp
->unmask
);
355 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
358 qemu_set_fd_handler(fd
, NULL
, NULL
, NULL
);
359 if (!vfio_set_irq_signaling(vbasedev
, intp
->pin
, 0,
360 VFIO_IRQ_SET_ACTION_UNMASK
, fd
, &err
)) {
361 error_reportf_err(err
, VFIO_MSG_PREFIX
, vbasedev
->name
);
368 * vfio_start_irqfd_injection - starts the virtual IRQ injection using
371 * @sbdev: the sysbus device handle
372 * @irq: the qemu irq handle
374 * In case the irqfd setup fails, we fallback to userspace handled eventfd
376 static void vfio_start_irqfd_injection(SysBusDevice
*sbdev
, qemu_irq irq
)
378 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(sbdev
);
381 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
382 !vdev
->irqfd_allowed
) {
386 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
387 if (intp
->qemuirq
== irq
) {
393 if (kvm_irqchip_add_irqfd_notifier(kvm_state
, intp
->interrupt
,
394 intp
->unmask
, irq
) < 0) {
398 if (vfio_set_trigger_eventfd(intp
, NULL
) < 0) {
401 if (vfio_irq_is_automasked(intp
)) {
402 if (vfio_set_resample_eventfd(intp
) < 0) {
405 trace_vfio_platform_start_level_irqfd_injection(intp
->pin
,
406 event_notifier_get_fd(intp
->interrupt
),
407 event_notifier_get_fd(intp
->unmask
));
409 trace_vfio_platform_start_edge_irqfd_injection(intp
->pin
,
410 event_notifier_get_fd(intp
->interrupt
));
413 intp
->kvm_accel
= true;
417 kvm_irqchip_remove_irqfd_notifier(kvm_state
, intp
->interrupt
, irq
);
420 vfio_start_eventfd_injection(sbdev
, irq
);
426 static void vfio_platform_compute_needs_reset(VFIODevice
*vbasedev
)
428 vbasedev
->needs_reset
= true;
431 /* not implemented yet */
432 static int vfio_platform_hot_reset_multi(VFIODevice
*vbasedev
)
438 * vfio_populate_device - Allocate and populate MMIO region
439 * and IRQ structs according to driver returned information
440 * @vbasedev: the VFIO device handle
441 * @errp: error object
444 static bool vfio_populate_device(VFIODevice
*vbasedev
, Error
**errp
)
446 VFIOINTp
*intp
, *tmp
;
448 VFIOPlatformDevice
*vdev
=
449 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
451 if (!(vbasedev
->flags
& VFIO_DEVICE_FLAGS_PLATFORM
)) {
452 error_setg(errp
, "this isn't a platform device");
456 vdev
->regions
= g_new0(VFIORegion
*, vbasedev
->num_regions
);
458 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
459 char *name
= g_strdup_printf("VFIO %s region %d\n", vbasedev
->name
, i
);
461 vdev
->regions
[i
] = g_new0(VFIORegion
, 1);
462 ret
= vfio_region_setup(OBJECT(vdev
), vbasedev
,
463 vdev
->regions
[i
], i
, name
);
466 error_setg_errno(errp
, -ret
, "failed to get region %d info", i
);
471 vdev
->mmap_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL
,
472 vfio_intp_mmap_enable
, vdev
);
474 QSIMPLEQ_INIT(&vdev
->pending_intp_queue
);
476 for (i
= 0; i
< vbasedev
->num_irqs
; i
++) {
477 struct vfio_irq_info irq
= { .argsz
= sizeof(irq
) };
480 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_IRQ_INFO
, &irq
);
482 error_setg_errno(errp
, -ret
, "failed to get device irq info");
485 trace_vfio_platform_populate_interrupts(irq
.index
,
488 intp
= vfio_init_intp(vbasedev
, irq
, errp
);
496 timer_del(vdev
->mmap_timer
);
497 QLIST_FOREACH_SAFE(intp
, &vdev
->intp_list
, next
, tmp
) {
498 QLIST_REMOVE(intp
, next
);
502 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
503 if (vdev
->regions
[i
]) {
504 vfio_region_finalize(vdev
->regions
[i
]);
506 g_free(vdev
->regions
[i
]);
508 g_free(vdev
->regions
);
512 /* specialized functions for VFIO Platform devices */
513 static VFIODeviceOps vfio_platform_ops
= {
514 .vfio_compute_needs_reset
= vfio_platform_compute_needs_reset
,
515 .vfio_hot_reset_multi
= vfio_platform_hot_reset_multi
,
516 .vfio_eoi
= vfio_platform_eoi
,
520 * vfio_base_device_init - perform preliminary VFIO setup
521 * @vbasedev: the VFIO device handle
522 * @errp: error object
524 * Implement the VFIO command sequence that allows to discover
525 * assigned device resources: group extraction, device
526 * fd retrieval, resource query.
527 * Precondition: the device name must be initialized
529 static bool vfio_base_device_init(VFIODevice
*vbasedev
, Error
**errp
)
531 /* @fd takes precedence over @sysfsdev which takes precedence over @host */
532 if (vbasedev
->fd
< 0 && vbasedev
->sysfsdev
) {
533 g_free(vbasedev
->name
);
534 vbasedev
->name
= g_path_get_basename(vbasedev
->sysfsdev
);
535 } else if (vbasedev
->fd
< 0) {
536 if (!vbasedev
->name
|| strchr(vbasedev
->name
, '/')) {
537 error_setg(errp
, "wrong host device name");
541 vbasedev
->sysfsdev
= g_strdup_printf("/sys/bus/platform/devices/%s",
545 if (!vfio_device_get_name(vbasedev
, errp
)) {
549 if (!vfio_attach_device(vbasedev
->name
, vbasedev
,
550 &address_space_memory
, errp
)) {
554 if (vfio_populate_device(vbasedev
, errp
)) {
558 vfio_detach_device(vbasedev
);
563 * vfio_platform_realize - the device realize function
564 * @dev: device state pointer
567 * initialize the device, its memory regions and IRQ structures
568 * IRQ are started separately
570 static void vfio_platform_realize(DeviceState
*dev
, Error
**errp
)
573 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(dev
);
574 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(dev
);
575 VFIODevice
*vbasedev
= &vdev
->vbasedev
;
578 qemu_mutex_init(&vdev
->intp_mutex
);
580 trace_vfio_platform_realize(vbasedev
->sysfsdev
?
581 vbasedev
->sysfsdev
: vbasedev
->name
,
584 if (!vfio_base_device_init(vbasedev
, errp
)) {
594 path
= g_strdup_printf("%s/of_node/compatible", vbasedev
->sysfsdev
);
595 if (!g_file_get_contents(path
, &contents
, &length
, &gerr
)) {
596 error_setg(errp
, "%s", gerr
->message
);
602 vdev
->compat
= contents
;
603 for (vdev
->num_compat
= 0; length
; vdev
->num_compat
++) {
604 size_t skip
= strlen(contents
) + 1;
610 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
611 if (vfio_region_mmap(vdev
->regions
[i
])) {
612 warn_report("%s mmap unsupported, performance may be slow",
613 memory_region_name(vdev
->regions
[i
]->mem
));
615 sysbus_init_mmio(sbdev
, vdev
->regions
[i
]->mem
);
620 if (vdev
->vbasedev
.name
) {
621 error_prepend(errp
, VFIO_MSG_PREFIX
, vdev
->vbasedev
.name
);
623 error_prepend(errp
, "vfio error: ");
627 static const VMStateDescription vfio_platform_vmstate
= {
628 .name
= "vfio-platform",
632 static Property vfio_platform_dev_properties
[] = {
633 DEFINE_PROP_STRING("host", VFIOPlatformDevice
, vbasedev
.name
),
634 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice
, vbasedev
.sysfsdev
),
635 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice
, vbasedev
.no_mmap
, false),
636 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice
,
638 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice
, irqfd_allowed
, true),
639 #ifdef CONFIG_IOMMUFD
640 DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice
, vbasedev
.iommufd
,
641 TYPE_IOMMUFD_BACKEND
, IOMMUFDBackend
*),
643 DEFINE_PROP_END_OF_LIST(),
646 static void vfio_platform_instance_init(Object
*obj
)
648 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(obj
);
649 VFIODevice
*vbasedev
= &vdev
->vbasedev
;
651 vfio_device_init(vbasedev
, VFIO_DEVICE_TYPE_PLATFORM
, &vfio_platform_ops
,
652 DEVICE(vdev
), false);
655 #ifdef CONFIG_IOMMUFD
656 static void vfio_platform_set_fd(Object
*obj
, const char *str
, Error
**errp
)
658 vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj
)->vbasedev
, str
, errp
);
662 static void vfio_platform_class_init(ObjectClass
*klass
, void *data
)
664 DeviceClass
*dc
= DEVICE_CLASS(klass
);
665 SysBusDeviceClass
*sbc
= SYS_BUS_DEVICE_CLASS(klass
);
667 dc
->realize
= vfio_platform_realize
;
668 device_class_set_props(dc
, vfio_platform_dev_properties
);
669 #ifdef CONFIG_IOMMUFD
670 object_class_property_add_str(klass
, "fd", NULL
, vfio_platform_set_fd
);
672 dc
->vmsd
= &vfio_platform_vmstate
;
673 dc
->desc
= "VFIO-based platform device assignment";
674 sbc
->connect_irq_notifier
= vfio_start_irqfd_injection
;
675 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
676 /* Supported by TYPE_VIRT_MACHINE */
677 dc
->user_creatable
= true;
680 static const TypeInfo vfio_platform_dev_info
= {
681 .name
= TYPE_VFIO_PLATFORM
,
682 .parent
= TYPE_SYS_BUS_DEVICE
,
683 .instance_size
= sizeof(VFIOPlatformDevice
),
684 .instance_init
= vfio_platform_instance_init
,
685 .class_init
= vfio_platform_class_init
,
686 .class_size
= sizeof(VFIOPlatformDeviceClass
),
689 static void register_vfio_platform_dev_type(void)
691 type_register_static(&vfio_platform_dev_info
);
694 type_init(register_vfio_platform_dev_type
)