2 * vfio based device assignment support - platform devices
4 * Copyright Linaro Limited, 2014
7 * Kim Phillips <kim.phillips@linaro.org>
8 * Eric Auger <eric.auger@linaro.org>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on vfio based PCI device assignment support:
14 * Copyright Red Hat, Inc. 2012
17 #include <sys/ioctl.h>
18 #include <linux/vfio.h>
20 #include "hw/vfio/vfio-platform.h"
21 #include "qemu/error-report.h"
22 #include "qemu/range.h"
23 #include "sysemu/sysemu.h"
24 #include "exec/memory.h"
25 #include "qemu/queue.h"
26 #include "hw/sysbus.h"
28 #include "hw/platform-bus.h"
29 #include "sysemu/kvm.h"
32 * Functions used whatever the injection method
36 * vfio_init_intp - allocate, initialize the IRQ struct pointer
37 * and add it into the list of IRQs
38 * @vbasedev: the VFIO device handle
39 * @info: irq info struct retrieved from VFIO driver
41 static VFIOINTp
*vfio_init_intp(VFIODevice
*vbasedev
,
42 struct vfio_irq_info info
)
45 VFIOPlatformDevice
*vdev
=
46 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
47 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(vdev
);
50 intp
= g_malloc0(sizeof(*intp
));
52 intp
->pin
= info
.index
;
53 intp
->flags
= info
.flags
;
54 intp
->state
= VFIO_IRQ_INACTIVE
;
55 intp
->kvm_accel
= false;
57 sysbus_init_irq(sbdev
, &intp
->qemuirq
);
59 /* Get an eventfd for trigger */
60 ret
= event_notifier_init(&intp
->interrupt
, 0);
63 error_report("vfio: Error: trigger event_notifier_init failed ");
66 /* Get an eventfd for resample/unmask */
67 ret
= event_notifier_init(&intp
->unmask
, 0);
70 error_report("vfio: Error: resamplefd event_notifier_init failed");
74 QLIST_INSERT_HEAD(&vdev
->intp_list
, intp
, next
);
79 * vfio_set_trigger_eventfd - set VFIO eventfd handling
81 * @intp: IRQ struct handle
82 * @handler: handler to be called on eventfd signaling
84 * Setup VFIO signaling and attach an optional user-side handler
87 static int vfio_set_trigger_eventfd(VFIOINTp
*intp
,
88 eventfd_user_side_handler_t handler
)
90 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
91 struct vfio_irq_set
*irq_set
;
95 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
96 irq_set
= g_malloc0(argsz
);
97 irq_set
->argsz
= argsz
;
98 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
;
99 irq_set
->index
= intp
->pin
;
102 pfd
= (int32_t *)&irq_set
->data
;
103 *pfd
= event_notifier_get_fd(&intp
->interrupt
);
104 qemu_set_fd_handler(*pfd
, (IOHandler
*)handler
, NULL
, intp
);
105 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
108 error_report("vfio: Failed to set trigger eventfd: %m");
109 qemu_set_fd_handler(*pfd
, NULL
, NULL
, NULL
);
115 * Functions only used when eventfds are handled on user-side
120 * vfio_mmap_set_enabled - enable/disable the fast path mode
121 * @vdev: the VFIO platform device
122 * @enabled: the target mmap state
124 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
125 * enabled = false ~ slow path = MMIO region is trapped and region callbacks
126 * are called; slow path enables to trap the device IRQ status register reset
129 static void vfio_mmap_set_enabled(VFIOPlatformDevice
*vdev
, bool enabled
)
133 trace_vfio_platform_mmap_set_enabled(enabled
);
135 for (i
= 0; i
< vdev
->vbasedev
.num_regions
; i
++) {
136 VFIORegion
*region
= vdev
->regions
[i
];
138 memory_region_set_enabled(®ion
->mmap_mem
, enabled
);
143 * vfio_intp_mmap_enable - timer function, restores the fast path
144 * if there is no more active IRQ
145 * @opaque: actually points to the VFIO platform device
147 * Called on mmap timer timout, this function checks whether the
148 * IRQ is still active and if not, restores the fast path.
149 * by construction a single eventfd is handled at a time.
150 * if the IRQ is still active, the timer is re-programmed.
152 static void vfio_intp_mmap_enable(void *opaque
)
155 VFIOPlatformDevice
*vdev
= (VFIOPlatformDevice
*)opaque
;
157 qemu_mutex_lock(&vdev
->intp_mutex
);
158 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
159 if (tmp
->state
== VFIO_IRQ_ACTIVE
) {
160 trace_vfio_platform_intp_mmap_enable(tmp
->pin
);
161 /* re-program the timer to check active status later */
162 timer_mod(vdev
->mmap_timer
,
163 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
165 qemu_mutex_unlock(&vdev
->intp_mutex
);
169 vfio_mmap_set_enabled(vdev
, true);
170 qemu_mutex_unlock(&vdev
->intp_mutex
);
174 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
175 * @opaque: opaque pointer, in practice the VFIOINTp handle
177 * The function is called on a previous IRQ completion, from
178 * vfio_platform_eoi, while the intp_mutex is locked.
179 * Also in such situation, the slow path already is set and
180 * the mmap timer was already programmed.
182 static void vfio_intp_inject_pending_lockheld(VFIOINTp
*intp
)
184 trace_vfio_platform_intp_inject_pending_lockheld(intp
->pin
,
185 event_notifier_get_fd(&intp
->interrupt
));
187 intp
->state
= VFIO_IRQ_ACTIVE
;
189 /* trigger the virtual IRQ */
190 qemu_set_irq(intp
->qemuirq
, 1);
194 * vfio_intp_interrupt - The user-side eventfd handler
195 * @opaque: opaque pointer which in practice is the VFIOINTp handle
197 * the function is entered in event handler context:
198 * the vIRQ is injected into the guest if there is no other active
201 static void vfio_intp_interrupt(VFIOINTp
*intp
)
205 VFIOPlatformDevice
*vdev
= intp
->vdev
;
206 bool delay_handling
= false;
208 qemu_mutex_lock(&vdev
->intp_mutex
);
209 if (intp
->state
== VFIO_IRQ_INACTIVE
) {
210 QLIST_FOREACH(tmp
, &vdev
->intp_list
, next
) {
211 if (tmp
->state
== VFIO_IRQ_ACTIVE
||
212 tmp
->state
== VFIO_IRQ_PENDING
) {
213 delay_handling
= true;
218 if (delay_handling
) {
220 * the new IRQ gets a pending status and is pushed in
223 intp
->state
= VFIO_IRQ_PENDING
;
224 trace_vfio_intp_interrupt_set_pending(intp
->pin
);
225 QSIMPLEQ_INSERT_TAIL(&vdev
->pending_intp_queue
,
227 ret
= event_notifier_test_and_clear(&intp
->interrupt
);
228 qemu_mutex_unlock(&vdev
->intp_mutex
);
232 trace_vfio_platform_intp_interrupt(intp
->pin
,
233 event_notifier_get_fd(&intp
->interrupt
));
235 ret
= event_notifier_test_and_clear(&intp
->interrupt
);
237 error_report("Error when clearing fd=%d (ret = %d)\n",
238 event_notifier_get_fd(&intp
->interrupt
), ret
);
241 intp
->state
= VFIO_IRQ_ACTIVE
;
244 vfio_mmap_set_enabled(vdev
, false);
246 /* trigger the virtual IRQ */
247 qemu_set_irq(intp
->qemuirq
, 1);
250 * Schedule the mmap timer which will restore fastpath when no IRQ
253 if (vdev
->mmap_timeout
) {
254 timer_mod(vdev
->mmap_timer
,
255 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL
) +
258 qemu_mutex_unlock(&vdev
->intp_mutex
);
262 * vfio_platform_eoi - IRQ completion routine
263 * @vbasedev: the VFIO device handle
265 * De-asserts the active virtual IRQ and unmasks the physical IRQ
266 * (effective for level sensitive IRQ auto-masked by the VFIO driver).
267 * Then it handles next pending IRQ if any.
268 * eoi function is called on the first access to any MMIO region
269 * after an IRQ was triggered, trapped since slow path was set.
270 * It is assumed this access corresponds to the IRQ status
271 * register reset. With such a mechanism, a single IRQ can be
272 * handled at a time since there is no way to know which IRQ
273 * was completed by the guest (we would need additional details
274 * about the IRQ status register mask).
276 static void vfio_platform_eoi(VFIODevice
*vbasedev
)
279 VFIOPlatformDevice
*vdev
=
280 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
282 qemu_mutex_lock(&vdev
->intp_mutex
);
283 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
284 if (intp
->state
== VFIO_IRQ_ACTIVE
) {
285 trace_vfio_platform_eoi(intp
->pin
,
286 event_notifier_get_fd(&intp
->interrupt
));
287 intp
->state
= VFIO_IRQ_INACTIVE
;
289 /* deassert the virtual IRQ */
290 qemu_set_irq(intp
->qemuirq
, 0);
292 if (intp
->flags
& VFIO_IRQ_INFO_AUTOMASKED
) {
293 /* unmasks the physical level-sensitive IRQ */
294 vfio_unmask_single_irqindex(vbasedev
, intp
->pin
);
297 /* a single IRQ can be active at a time */
301 /* in case there are pending IRQs, handle the first one */
302 if (!QSIMPLEQ_EMPTY(&vdev
->pending_intp_queue
)) {
303 intp
= QSIMPLEQ_FIRST(&vdev
->pending_intp_queue
);
304 vfio_intp_inject_pending_lockheld(intp
);
305 QSIMPLEQ_REMOVE_HEAD(&vdev
->pending_intp_queue
, pqnext
);
307 qemu_mutex_unlock(&vdev
->intp_mutex
);
311 * vfio_start_eventfd_injection - starts the virtual IRQ injection using
312 * user-side handled eventfds
313 * @intp: the IRQ struct pointer
316 static int vfio_start_eventfd_injection(VFIOINTp
*intp
)
320 ret
= vfio_set_trigger_eventfd(intp
, vfio_intp_interrupt
);
322 error_report("vfio: Error: Failed to pass IRQ fd to the driver: %m");
328 * Functions used for irqfd
332 * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
333 * @intp: the IRQ struct handle
334 * programs the VFIO driver to unmask this IRQ when the
335 * intp->unmask eventfd is triggered
337 static int vfio_set_resample_eventfd(VFIOINTp
*intp
)
339 VFIODevice
*vbasedev
= &intp
->vdev
->vbasedev
;
340 struct vfio_irq_set
*irq_set
;
344 argsz
= sizeof(*irq_set
) + sizeof(*pfd
);
345 irq_set
= g_malloc0(argsz
);
346 irq_set
->argsz
= argsz
;
347 irq_set
->flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_UNMASK
;
348 irq_set
->index
= intp
->pin
;
351 pfd
= (int32_t *)&irq_set
->data
;
352 *pfd
= event_notifier_get_fd(&intp
->unmask
);
353 qemu_set_fd_handler(*pfd
, NULL
, NULL
, NULL
);
354 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_SET_IRQS
, irq_set
);
357 error_report("vfio: Failed to set resample eventfd: %m");
362 static void vfio_start_irqfd_injection(SysBusDevice
*sbdev
, qemu_irq irq
)
364 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(sbdev
);
367 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
368 !vdev
->irqfd_allowed
) {
372 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
373 if (intp
->qemuirq
== irq
) {
379 /* Get to a known interrupt state */
380 qemu_set_fd_handler(event_notifier_get_fd(&intp
->interrupt
),
383 vfio_mask_single_irqindex(&vdev
->vbasedev
, intp
->pin
);
384 qemu_set_irq(intp
->qemuirq
, 0);
386 if (kvm_irqchip_add_irqfd_notifier(kvm_state
, &intp
->interrupt
,
387 &intp
->unmask
, irq
) < 0) {
391 if (vfio_set_trigger_eventfd(intp
, NULL
) < 0) {
394 if (vfio_set_resample_eventfd(intp
) < 0) {
398 /* Let's resume injection with irqfd setup */
399 vfio_unmask_single_irqindex(&vdev
->vbasedev
, intp
->pin
);
401 intp
->kvm_accel
= true;
403 trace_vfio_platform_start_irqfd_injection(intp
->pin
,
404 event_notifier_get_fd(&intp
->interrupt
),
405 event_notifier_get_fd(&intp
->unmask
));
408 kvm_irqchip_remove_irqfd_notifier(kvm_state
, &intp
->interrupt
, irq
);
410 vfio_start_eventfd_injection(intp
);
411 vfio_unmask_single_irqindex(&vdev
->vbasedev
, intp
->pin
);
417 static void vfio_platform_compute_needs_reset(VFIODevice
*vbasedev
)
419 vbasedev
->needs_reset
= true;
422 /* not implemented yet */
423 static int vfio_platform_hot_reset_multi(VFIODevice
*vbasedev
)
429 * vfio_populate_device - Allocate and populate MMIO region
430 * and IRQ structs according to driver returned information
431 * @vbasedev: the VFIO device handle
434 static int vfio_populate_device(VFIODevice
*vbasedev
)
436 VFIOINTp
*intp
, *tmp
;
438 VFIOPlatformDevice
*vdev
=
439 container_of(vbasedev
, VFIOPlatformDevice
, vbasedev
);
441 if (!(vbasedev
->flags
& VFIO_DEVICE_FLAGS_PLATFORM
)) {
442 error_report("vfio: Um, this isn't a platform device");
446 vdev
->regions
= g_new0(VFIORegion
*, vbasedev
->num_regions
);
448 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
449 struct vfio_region_info reg_info
= { .argsz
= sizeof(reg_info
) };
452 vdev
->regions
[i
] = g_malloc0(sizeof(VFIORegion
));
453 ptr
= vdev
->regions
[i
];
455 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_REGION_INFO
, ®_info
);
457 error_report("vfio: Error getting region %d info: %m", i
);
460 ptr
->flags
= reg_info
.flags
;
461 ptr
->size
= reg_info
.size
;
462 ptr
->fd_offset
= reg_info
.offset
;
464 ptr
->vbasedev
= vbasedev
;
466 trace_vfio_platform_populate_regions(ptr
->nr
,
467 (unsigned long)ptr
->flags
,
468 (unsigned long)ptr
->size
,
470 (unsigned long)ptr
->fd_offset
);
473 vdev
->mmap_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL
,
474 vfio_intp_mmap_enable
, vdev
);
476 QSIMPLEQ_INIT(&vdev
->pending_intp_queue
);
478 for (i
= 0; i
< vbasedev
->num_irqs
; i
++) {
479 struct vfio_irq_info irq
= { .argsz
= sizeof(irq
) };
482 ret
= ioctl(vbasedev
->fd
, VFIO_DEVICE_GET_IRQ_INFO
, &irq
);
484 error_printf("vfio: error getting device %s irq info",
488 trace_vfio_platform_populate_interrupts(irq
.index
,
491 intp
= vfio_init_intp(vbasedev
, irq
);
493 error_report("vfio: Error installing IRQ %d up", i
);
500 timer_del(vdev
->mmap_timer
);
501 QLIST_FOREACH_SAFE(intp
, &vdev
->intp_list
, next
, tmp
) {
502 QLIST_REMOVE(intp
, next
);
506 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
507 g_free(vdev
->regions
[i
]);
509 g_free(vdev
->regions
);
513 /* specialized functions for VFIO Platform devices */
514 static VFIODeviceOps vfio_platform_ops
= {
515 .vfio_compute_needs_reset
= vfio_platform_compute_needs_reset
,
516 .vfio_hot_reset_multi
= vfio_platform_hot_reset_multi
,
517 .vfio_eoi
= vfio_platform_eoi
,
521 * vfio_base_device_init - perform preliminary VFIO setup
522 * @vbasedev: the VFIO device handle
524 * Implement the VFIO command sequence that allows to discover
525 * assigned device resources: group extraction, device
526 * fd retrieval, resource query.
527 * Precondition: the device name must be initialized
529 static int vfio_base_device_init(VFIODevice
*vbasedev
)
532 VFIODevice
*vbasedev_iter
;
533 char path
[PATH_MAX
], iommu_group_path
[PATH_MAX
], *group_name
;
539 /* name must be set prior to the call */
540 if (!vbasedev
->name
|| strchr(vbasedev
->name
, '/')) {
544 /* Check that the host device exists */
545 g_snprintf(path
, sizeof(path
), "/sys/bus/platform/devices/%s/",
548 if (stat(path
, &st
) < 0) {
549 error_report("vfio: error: no such host device: %s", path
);
553 g_strlcat(path
, "iommu_group", sizeof(path
));
554 len
= readlink(path
, iommu_group_path
, sizeof(iommu_group_path
));
555 if (len
< 0 || len
>= sizeof(iommu_group_path
)) {
556 error_report("vfio: error no iommu_group for device");
557 return len
< 0 ? -errno
: -ENAMETOOLONG
;
560 iommu_group_path
[len
] = 0;
561 group_name
= basename(iommu_group_path
);
563 if (sscanf(group_name
, "%d", &groupid
) != 1) {
564 error_report("vfio: error reading %s: %m", path
);
568 trace_vfio_platform_base_device_init(vbasedev
->name
, groupid
);
570 group
= vfio_get_group(groupid
, &address_space_memory
);
572 error_report("vfio: failed to get group %d", groupid
);
576 g_snprintf(path
, sizeof(path
), "%s", vbasedev
->name
);
578 QLIST_FOREACH(vbasedev_iter
, &group
->device_list
, next
) {
579 if (strcmp(vbasedev_iter
->name
, vbasedev
->name
) == 0) {
580 error_report("vfio: error: device %s is already attached", path
);
581 vfio_put_group(group
);
585 ret
= vfio_get_device(group
, path
, vbasedev
);
587 error_report("vfio: failed to get device %s", path
);
588 vfio_put_group(group
);
592 ret
= vfio_populate_device(vbasedev
);
594 error_report("vfio: failed to populate device %s", path
);
595 vfio_put_group(group
);
602 * vfio_map_region - initialize the 2 memory regions for a given
604 * @vdev: the VFIO platform device handle
605 * @nr: the index of the region
607 * Init the top memory region and the mmapped memory region beneath
608 * VFIOPlatformDevice is used since VFIODevice is not a QOM Object
609 * and could not be passed to memory region functions
611 static void vfio_map_region(VFIOPlatformDevice
*vdev
, int nr
)
613 VFIORegion
*region
= vdev
->regions
[nr
];
614 uint64_t size
= region
->size
;
621 g_snprintf(name
, sizeof(name
), "VFIO %s region %d",
622 vdev
->vbasedev
.name
, nr
);
624 /* A "slow" read/write mapping underlies all regions */
625 memory_region_init_io(®ion
->mem
, OBJECT(vdev
), &vfio_region_ops
,
628 g_strlcat(name
, " mmap", sizeof(name
));
630 if (vfio_mmap_region(OBJECT(vdev
), region
, ®ion
->mem
,
631 ®ion
->mmap_mem
, ®ion
->mmap
, size
, 0, name
)) {
632 error_report("%s unsupported. Performance may be slow", name
);
637 * vfio_platform_realize - the device realize function
638 * @dev: device state pointer
641 * initialize the device, its memory regions and IRQ structures
642 * IRQ are started separately
644 static void vfio_platform_realize(DeviceState
*dev
, Error
**errp
)
646 VFIOPlatformDevice
*vdev
= VFIO_PLATFORM_DEVICE(dev
);
647 SysBusDevice
*sbdev
= SYS_BUS_DEVICE(dev
);
648 VFIODevice
*vbasedev
= &vdev
->vbasedev
;
652 vbasedev
->type
= VFIO_DEVICE_TYPE_PLATFORM
;
653 vbasedev
->ops
= &vfio_platform_ops
;
655 trace_vfio_platform_realize(vbasedev
->name
, vdev
->compat
);
657 ret
= vfio_base_device_init(vbasedev
);
659 error_setg(errp
, "vfio: vfio_base_device_init failed for %s",
664 for (i
= 0; i
< vbasedev
->num_regions
; i
++) {
665 vfio_map_region(vdev
, i
);
666 sysbus_init_mmio(sbdev
, &vdev
->regions
[i
]->mem
);
669 QLIST_FOREACH(intp
, &vdev
->intp_list
, next
) {
670 vfio_start_eventfd_injection(intp
);
674 static const VMStateDescription vfio_platform_vmstate
= {
675 .name
= TYPE_VFIO_PLATFORM
,
679 static Property vfio_platform_dev_properties
[] = {
680 DEFINE_PROP_STRING("host", VFIOPlatformDevice
, vbasedev
.name
),
681 DEFINE_PROP_BOOL("x-mmap", VFIOPlatformDevice
, vbasedev
.allow_mmap
, true),
682 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice
,
684 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice
, irqfd_allowed
, true),
685 DEFINE_PROP_END_OF_LIST(),
688 static void vfio_platform_class_init(ObjectClass
*klass
, void *data
)
690 DeviceClass
*dc
= DEVICE_CLASS(klass
);
691 SysBusDeviceClass
*sbc
= SYS_BUS_DEVICE_CLASS(klass
);
693 dc
->realize
= vfio_platform_realize
;
694 dc
->props
= vfio_platform_dev_properties
;
695 dc
->vmsd
= &vfio_platform_vmstate
;
696 dc
->desc
= "VFIO-based platform device assignment";
697 sbc
->connect_irq_notifier
= vfio_start_irqfd_injection
;
698 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
701 static const TypeInfo vfio_platform_dev_info
= {
702 .name
= TYPE_VFIO_PLATFORM
,
703 .parent
= TYPE_SYS_BUS_DEVICE
,
704 .instance_size
= sizeof(VFIOPlatformDevice
),
705 .class_init
= vfio_platform_class_init
,
706 .class_size
= sizeof(VFIOPlatformDeviceClass
),
710 static void register_vfio_platform_dev_type(void)
712 type_register_static(&vfio_platform_dev_info
);
715 type_init(register_vfio_platform_dev_type
)