2 * vfio based device assignment support - platform devices
4 * Copyright Linaro Limited, 2014
7 * Kim Phillips <kim.phillips@linaro.org>
8 * Eric Auger <eric.auger@linaro.org>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on vfio based PCI device assignment support:
14 * Copyright Red Hat, Inc. 2012
17 #include "qemu/osdep.h"
18 #include <sys/ioctl.h>
19 #include <linux/vfio.h>
21 #include "hw/vfio/vfio-platform.h"
22 #include "qemu/error-report.h"
23 #include "qemu/range.h"
24 #include "sysemu/sysemu.h"
25 #include "exec/memory.h"
26 #include "qemu/queue.h"
27 #include "hw/sysbus.h"
29 #include "hw/platform-bus.h"
30 #include "sysemu/kvm.h"
33 * Functions used whatever the injection method
36 static inline bool vfio_irq_is_automasked(VFIOINTp
*intp
)
38 return intp
->flags
& VFIO_IRQ_INFO_AUTOMASKED
;
42 * vfio_init_intp - allocate, initialize the IRQ struct pointer
43 * and add it into the list of IRQs
44 * @vbasedev: the VFIO device handle
45 * @info: irq info struct retrieved from VFIO driver
47 static VFIOINTp
*vfio_init_intp(VFIODevice
*vbasedev
,
48 struct vfio_irq_info info
)
51 VFIOPlatformDevice
*vdev
=
52 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
53 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(vdev
);
56 intp
= g_malloc0(sizeof(*intp
));
58 intp
->pin
= info
.index
;
59 intp
->flags
= info
.flags
;
60 intp
->state
= VFIO_IRQ_INACTIVE
;
61 intp
->kvm_accel
= false;
63 sysbus_init_irq(sbdev
, &intp
->qemuirq
);
65 /* Get an eventfd for trigger */
66 intp
->interrupt
= g_malloc0(sizeof(EventNotifier
));
67 ret
= event_notifier_init(intp
->interrupt
, 0);
69 g_free(intp
->interrupt
);
71 error_report("vfio: Error: trigger event_notifier_init failed ");
74 if (vfio_irq_is_automasked(intp
)) {
75 /* Get an eventfd for resample/unmask */
76 intp
->unmask
= g_malloc0(sizeof(EventNotifier
));
77 ret
= event_notifier_init(intp
->unmask
, 0);
79 g_free(intp
->interrupt
);
82 error_report("vfio: Error: resamplefd event_notifier_init failed");
87 QLIST_INSERT_HEAD(&vdev
->intp_list
, intp
, next
);
92 * vfio_set_trigger_eventfd - set VFIO eventfd handling
94 * @intp: IRQ struct handle
95 * @handler: handler to be called on eventfd signaling
97 * Setup VFIO signaling and attach an optional user-side handler
100 static int vfio_set_trigger_eventfd(VFIOINTp
*intp
,
101 eventfd_user_side_handler_t handler
)
103 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
104 struct vfio_irq_set
*irq_set
;
108 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
109 irq_set
= g_malloc0(argsz
);
110 irq_set
->argsz
= argsz
;
111 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
;
112 irq_set
->index
= intp
->pin
;
115 pfd
= (int32_t *)&irq_set
->data
;
116 *pfd
= event_notifier_get_fd(intp
->interrupt
);
117 qemu_set_fd_handler(*pfd
, (IOHandler
*)handler
, NULL
, intp
);
118 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
121 error_report("vfio: Failed to set trigger eventfd: %m");
122 qemu_set_fd_handler(*pfd
, NULL
, NULL
, NULL
);
128 * Functions only used when eventfds are handled on user-side
133 * vfio_mmap_set_enabled - enable/disable the fast path mode
134 * @vdev: the VFIO platform device
135 * @enabled: the target mmap state
137 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
138 * enabled = false ~ slow path = MMIO region is trapped and region callbacks
139 * are called; slow path enables to trap the device IRQ status register reset
142 static void vfio_mmap_set_enabled(VFIOPlatformDevice
*vdev
, bool enabled
)
146 for (i
= 0; i
< vdev
->vbasedev
.num_regions
; i
++) {
147 vfio_region_mmaps_set_enabled(vdev
->regions
[i
], enabled
);
152 * vfio_intp_mmap_enable - timer function, restores the fast path
153 * if there is no more active IRQ
154 * @opaque: actually points to the VFIO platform device
156 * Called on mmap timer timout, this function checks whether the
157 * IRQ is still active and if not, restores the fast path.
158 * by construction a single eventfd is handled at a time.
159 * if the IRQ is still active, the timer is re-programmed.
161 static void vfio_intp_mmap_enable(void *opaque
)
164 VFIOPlatformDevice
*vdev
= (VFIOPlatformDevice
*)opaque
;
166 qemu_mutex_lock(&vdev
->intp_mutex
);
167 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
168 if (tmp
->state
== VFIO_IRQ_ACTIVE
) {
169 trace_vfio_platform_intp_mmap_enable(tmp
->pin
);
170 /* re-program the timer to check active status later */
171 timer_mod(vdev
->mmap_timer
,
172 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
174 qemu_mutex_unlock(&vdev
->intp_mutex
);
178 vfio_mmap_set_enabled(vdev
, true);
179 qemu_mutex_unlock(&vdev
->intp_mutex
);
183 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
184 * @opaque: opaque pointer, in practice the VFIOINTp handle
186 * The function is called on a previous IRQ completion, from
187 * vfio_platform_eoi, while the intp_mutex is locked.
188 * Also in such situation, the slow path already is set and
189 * the mmap timer was already programmed.
191 static void vfio_intp_inject_pending_lockheld(VFIOINTp
*intp
)
193 trace_vfio_platform_intp_inject_pending_lockheld(intp
->pin
,
194 event_notifier_get_fd(intp
->interrupt
));
196 intp
->state
= VFIO_IRQ_ACTIVE
;
198 /* trigger the virtual IRQ */
199 qemu_set_irq(intp
->qemuirq
, 1);
203 * vfio_intp_interrupt - The user-side eventfd handler
204 * @opaque: opaque pointer which in practice is the VFIOINTp handle
206 * the function is entered in event handler context:
207 * the vIRQ is injected into the guest if there is no other active
210 static void vfio_intp_interrupt(VFIOINTp
*intp
)
214 VFIOPlatformDevice
*vdev
= intp
->vdev
;
215 bool delay_handling
= false;
217 qemu_mutex_lock(&vdev
->intp_mutex
);
218 if (intp
->state
== VFIO_IRQ_INACTIVE
) {
219 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
220 if (tmp
->state
== VFIO_IRQ_ACTIVE
||
221 tmp
->state
== VFIO_IRQ_PENDING
) {
222 delay_handling
= true;
227 if (delay_handling
) {
229 * the new IRQ gets a pending status and is pushed in
232 intp
->state
= VFIO_IRQ_PENDING
;
233 trace_vfio_intp_interrupt_set_pending(intp
->pin
);
234 QSIMPLEQ_INSERT_TAIL(&vdev
->pending_intp_queue
,
236 ret
= event_notifier_test_and_clear(intp
->interrupt
);
237 qemu_mutex_unlock(&vdev
->intp_mutex
);
241 trace_vfio_platform_intp_interrupt(intp
->pin
,
242 event_notifier_get_fd(intp
->interrupt
));
244 ret
= event_notifier_test_and_clear(intp
->interrupt
);
246 error_report("Error when clearing fd=%d (ret = %d)",
247 event_notifier_get_fd(intp
->interrupt
), ret
);
250 intp
->state
= VFIO_IRQ_ACTIVE
;
253 vfio_mmap_set_enabled(vdev
, false);
255 /* trigger the virtual IRQ */
256 qemu_set_irq(intp
->qemuirq
, 1);
259 * Schedule the mmap timer which will restore fastpath when no IRQ
262 if (vdev
->mmap_timeout
) {
263 timer_mod(vdev
->mmap_timer
,
264 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
267 qemu_mutex_unlock(&vdev
->intp_mutex
);
271 * vfio_platform_eoi - IRQ completion routine
272 * @vbasedev: the VFIO device handle
274 * De-asserts the active virtual IRQ and unmasks the physical IRQ
275 * (effective for level sensitive IRQ auto-masked by the VFIO driver).
276 * Then it handles next pending IRQ if any.
277 * eoi function is called on the first access to any MMIO region
278 * after an IRQ was triggered, trapped since slow path was set.
279 * It is assumed this access corresponds to the IRQ status
280 * register reset. With such a mechanism, a single IRQ can be
281 * handled at a time since there is no way to know which IRQ
282 * was completed by the guest (we would need additional details
283 * about the IRQ status register mask).
285 static void vfio_platform_eoi(VFIODevice
*vbasedev
)
288 VFIOPlatformDevice
*vdev
=
289 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
291 qemu_mutex_lock(&vdev
->intp_mutex
);
292 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
293 if (intp
->state
== VFIO_IRQ_ACTIVE
) {
294 trace_vfio_platform_eoi(intp
->pin
,
295 event_notifier_get_fd(intp
->interrupt
));
296 intp
->state
= VFIO_IRQ_INACTIVE
;
298 /* deassert the virtual IRQ */
299 qemu_set_irq(intp
->qemuirq
, 0);
301 if (vfio_irq_is_automasked(intp
)) {
302 /* unmasks the physical level-sensitive IRQ */
303 vfio_unmask_single_irqindex(vbasedev
, intp
->pin
);
306 /* a single IRQ can be active at a time */
310 /* in case there are pending IRQs, handle the first one */
311 if (!QSIMPLEQ_EMPTY(&vdev
->pending_intp_queue
)) {
312 intp
= QSIMPLEQ_FIRST(&vdev
->pending_intp_queue
);
313 vfio_intp_inject_pending_lockheld(intp
);
314 QSIMPLEQ_REMOVE_HEAD(&vdev
->pending_intp_queue
, pqnext
);
316 qemu_mutex_unlock(&vdev
->intp_mutex
);
320 * vfio_start_eventfd_injection - starts the virtual IRQ injection using
321 * user-side handled eventfds
322 * @sbdev: the sysbus device handle
323 * @irq: the qemu irq handle
326 static void vfio_start_eventfd_injection(SysBusDevice
*sbdev
, qemu_irq irq
)
329 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(sbdev
);
332 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
333 if (intp
->qemuirq
== irq
) {
339 ret
= vfio_set_trigger_eventfd(intp
, vfio_intp_interrupt
);
341 error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
348 * Functions used for irqfd
352 * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
353 * @intp: the IRQ struct handle
354 * programs the VFIO driver to unmask this IRQ when the
355 * intp->unmask eventfd is triggered
357 static int vfio_set_resample_eventfd(VFIOINTp
*intp
)
359 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
360 struct vfio_irq_set
*irq_set
;
364 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
365 irq_set
= g_malloc0(argsz
);
366 irq_set
->argsz
= argsz
;
367 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_UNMASK
;
368 irq_set
->index
= intp
->pin
;
371 pfd
= (int32_t *)&irq_set
->data
;
372 *pfd
= event_notifier_get_fd(intp
->unmask
);
373 qemu_set_fd_handler(*pfd
, NULL
, NULL
, NULL
);
374 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
377 error_report("vfio: Failed to set resample eventfd: %m");
383 * vfio_start_irqfd_injection - starts the virtual IRQ injection using
386 * @sbdev: the sysbus device handle
387 * @irq: the qemu irq handle
389 * In case the irqfd setup fails, we fallback to userspace handled eventfd
391 static void vfio_start_irqfd_injection(SysBusDevice
*sbdev
, qemu_irq irq
)
393 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(sbdev
);
396 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
397 !vdev
->irqfd_allowed
) {
401 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
402 if (intp
->qemuirq
== irq
) {
408 if (kvm_irqchip_add_irqfd_notifier(kvm_state
, intp
->interrupt
,
409 intp
->unmask
, irq
) < 0) {
413 if (vfio_set_trigger_eventfd(intp
, NULL
) < 0) {
416 if (vfio_irq_is_automasked(intp
)) {
417 if (vfio_set_resample_eventfd(intp
) < 0) {
420 trace_vfio_platform_start_level_irqfd_injection(intp
->pin
,
421 event_notifier_get_fd(intp
->interrupt
),
422 event_notifier_get_fd(intp
->unmask
));
424 trace_vfio_platform_start_edge_irqfd_injection(intp
->pin
,
425 event_notifier_get_fd(intp
->interrupt
));
428 intp
->kvm_accel
= true;
432 kvm_irqchip_remove_irqfd_notifier(kvm_state
, intp
->interrupt
, irq
);
433 error_report("vfio: failed to start eventfd signaling for IRQ %d: %m",
437 vfio_start_eventfd_injection(sbdev
, irq
);
443 static void vfio_platform_compute_needs_reset(VFIODevice
*vbasedev
)
445 vbasedev
->needs_reset
= true;
448 /* not implemented yet */
449 static int vfio_platform_hot_reset_multi(VFIODevice
*vbasedev
)
455 * vfio_populate_device - Allocate and populate MMIO region
456 * and IRQ structs according to driver returned information
457 * @vbasedev: the VFIO device handle
460 static int vfio_populate_device(VFIODevice
*vbasedev
)
462 VFIOINTp
*intp
, *tmp
;
464 VFIOPlatformDevice
*vdev
=
465 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
467 if (!(vbasedev
->flags
& VFIO_DEVICE_FLAGS_PLATFORM
)) {
468 error_report("vfio: Um, this isn't a platform device");
472 vdev
->regions
= g_new0(VFIORegion
*, vbasedev
->num_regions
);
474 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
475 char *name
= g_strdup_printf("VFIO %s region %d\n", vbasedev
->name
, i
);
477 vdev
->regions
[i
] = g_new0(VFIORegion
, 1);
478 ret
= vfio_region_setup(OBJECT(vdev
), vbasedev
,
479 vdev
->regions
[i
], i
, name
);
482 error_report("vfio: Error getting region %d info: %m", i
);
487 vdev
->mmap_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL
,
488 vfio_intp_mmap_enable
, vdev
);
490 QSIMPLEQ_INIT(&vdev
->pending_intp_queue
);
492 for (i
= 0; i
< vbasedev
->num_irqs
; i
++) {
493 struct vfio_irq_info irq
= { .argsz
= sizeof(irq
) };
496 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_IRQ_INFO
, &irq
);
498 error_printf("vfio: error getting device %s irq info",
502 trace_vfio_platform_populate_interrupts(irq
.index
,
505 intp
= vfio_init_intp(vbasedev
, irq
);
507 error_report("vfio: Error installing IRQ %d up", i
);
514 timer_del(vdev
->mmap_timer
);
515 QLIST_FOREACH_SAFE(intp
, &vdev
->intp_list
, next
, tmp
) {
516 QLIST_REMOVE(intp
, next
);
520 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
521 if (vdev
->regions
[i
]) {
522 vfio_region_finalize(vdev
->regions
[i
]);
524 g_free(vdev
->regions
[i
]);
526 g_free(vdev
->regions
);
530 /* specialized functions for VFIO Platform devices */
531 static VFIODeviceOps vfio_platform_ops
= {
532 .vfio_compute_needs_reset
= vfio_platform_compute_needs_reset
,
533 .vfio_hot_reset_multi
= vfio_platform_hot_reset_multi
,
534 .vfio_eoi
= vfio_platform_eoi
,
538 * vfio_base_device_init - perform preliminary VFIO setup
539 * @vbasedev: the VFIO device handle
541 * Implement the VFIO command sequence that allows to discover
542 * assigned device resources: group extraction, device
543 * fd retrieval, resource query.
544 * Precondition: the device name must be initialized
546 static int vfio_base_device_init(VFIODevice
*vbasedev
)
549 VFIODevice
*vbasedev_iter
;
550 char *tmp
, group_path
[PATH_MAX
], *group_name
;
556 /* @sysfsdev takes precedence over @host */
557 if (vbasedev
->sysfsdev
) {
558 g_free(vbasedev
->name
);
559 vbasedev
->name
= g_strdup(basename(vbasedev
->sysfsdev
));
561 if (!vbasedev
->name
|| strchr(vbasedev
->name
, '/')) {
565 vbasedev
->sysfsdev
= g_strdup_printf("/sys/bus/platform/devices/%s",
569 if (stat(vbasedev
->sysfsdev
, &st
) < 0) {
570 error_report("vfio: error: no such host device: %s",
575 tmp
= g_strdup_printf("%s/iommu_group", vbasedev
->sysfsdev
);
576 len
= readlink(tmp
, group_path
, sizeof(group_path
));
579 if (len
< 0 || len
>= sizeof(group_path
)) {
580 error_report("vfio: error no iommu_group for device");
581 return len
< 0 ? -errno
: -ENAMETOOLONG
;
586 group_name
= basename(group_path
);
587 if (sscanf(group_name
, "%d", &groupid
) != 1) {
588 error_report("vfio: error reading %s: %m", group_path
);
592 trace_vfio_platform_base_device_init(vbasedev
->name
, groupid
);
594 group
= vfio_get_group(groupid
, &address_space_memory
);
596 error_report("vfio: failed to get group %d", groupid
);
600 QLIST_FOREACH(vbasedev_iter
, &group
->device_list
, next
) {
601 if (strcmp(vbasedev_iter
->name
, vbasedev
->name
) == 0) {
602 error_report("vfio: error: device %s is already attached",
604 vfio_put_group(group
);
608 ret
= vfio_get_device(group
, vbasedev
->name
, vbasedev
);
610 error_report("vfio: failed to get device %s", vbasedev
->name
);
611 vfio_put_group(group
);
615 ret
= vfio_populate_device(vbasedev
);
617 error_report("vfio: failed to populate device %s", vbasedev
->name
);
618 vfio_put_group(group
);
625 * vfio_platform_realize - the device realize function
626 * @dev: device state pointer
629 * initialize the device, its memory regions and IRQ structures
630 * IRQ are started separately
632 static void vfio_platform_realize(DeviceState
*dev
, Error
**errp
)
634 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(dev
);
635 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(dev
);
636 VFIODevice
*vbasedev
= &vdev
->vbasedev
;
639 vbasedev
->type
= VFIO_DEVICE_TYPE_PLATFORM
;
640 vbasedev
->ops
= &vfio_platform_ops
;
642 trace_vfio_platform_realize(vbasedev
->sysfsdev
?
643 vbasedev
->sysfsdev
: vbasedev
->name
,
646 ret
= vfio_base_device_init(vbasedev
);
648 error_setg(errp
, "vfio: vfio_base_device_init failed for %s",
653 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
654 if (vfio_region_mmap(vdev
->regions
[i
])) {
655 error_report("%s mmap unsupported. Performance may be slow",
656 memory_region_name(vdev
->regions
[i
]->mem
));
658 sysbus_init_mmio(sbdev
, vdev
->regions
[i
]->mem
);
662 static const VMStateDescription vfio_platform_vmstate
= {
663 .name
= TYPE_VFIO_PLATFORM
,
667 static Property vfio_platform_dev_properties
[] = {
668 DEFINE_PROP_STRING("host", VFIOPlatformDevice
, vbasedev
.name
),
669 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice
, vbasedev
.sysfsdev
),
670 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice
, vbasedev
.no_mmap
, false),
671 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice
,
673 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice
, irqfd_allowed
, true),
674 DEFINE_PROP_END_OF_LIST(),
677 static void vfio_platform_class_init(ObjectClass
*klass
, void *data
)
679 DeviceClass
*dc
= DEVICE_CLASS(klass
);
680 SysBusDeviceClass
*sbc
= SYS_BUS_DEVICE_CLASS(klass
);
682 dc
->realize
= vfio_platform_realize
;
683 dc
->props
= vfio_platform_dev_properties
;
684 dc
->vmsd
= &vfio_platform_vmstate
;
685 dc
->desc
= "VFIO-based platform device assignment";
686 sbc
->connect_irq_notifier
= vfio_start_irqfd_injection
;
687 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
690 static const TypeInfo vfio_platform_dev_info
= {
691 .name
= TYPE_VFIO_PLATFORM
,
692 .parent
= TYPE_SYS_BUS_DEVICE
,
693 .instance_size
= sizeof(VFIOPlatformDevice
),
694 .class_init
= vfio_platform_class_init
,
695 .class_size
= sizeof(VFIOPlatformDeviceClass
),
699 static void register_vfio_platform_dev_type(void)
701 type_register_static(&vfio_platform_dev_info
);
704 type_init(register_vfio_platform_dev_type
)