Merge tag 'v9.1.0'
[qemu/ar7.git] / hw / vfio / platform.c
bloba85c199c76aa25559acf931621b84dd3dac6184d
1 /*
2 * vfio based device assignment support - platform devices
4 * Copyright Linaro Limited, 2014
6 * Authors:
7 * Kim Phillips <kim.phillips@linaro.org>
8 * Eric Auger <eric.auger@linaro.org>
10 * This work is licensed under the terms of the GNU GPL, version 2. See
11 * the COPYING file in the top-level directory.
13 * Based on vfio based PCI device assignment support:
14 * Copyright Red Hat, Inc. 2012
17 #include "qemu/osdep.h"
18 #include CONFIG_DEVICES /* CONFIG_IOMMUFD */
19 #include "qapi/error.h"
20 #include <sys/ioctl.h>
21 #include <linux/vfio.h>
23 #include "hw/vfio/vfio-platform.h"
24 #include "sysemu/iommufd.h"
25 #include "migration/vmstate.h"
26 #include "qemu/error-report.h"
27 #include "qemu/lockable.h"
28 #include "qemu/main-loop.h"
29 #include "qemu/module.h"
30 #include "qemu/range.h"
31 #include "exec/memory.h"
32 #include "exec/address-spaces.h"
33 #include "qemu/queue.h"
34 #include "hw/sysbus.h"
35 #include "trace.h"
36 #include "hw/irq.h"
37 #include "hw/platform-bus.h"
38 #include "hw/qdev-properties.h"
39 #include "sysemu/kvm.h"
42 * Functions used whatever the injection method
45 static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
47 return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
50 /**
51 * vfio_init_intp - allocate, initialize the IRQ struct pointer
52 * and add it into the list of IRQs
53 * @vbasedev: the VFIO device handle
54 * @info: irq info struct retrieved from VFIO driver
55 * @errp: error object
57 static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
58 struct vfio_irq_info info, Error **errp)
60 int ret;
61 VFIOPlatformDevice *vdev =
62 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
63 SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
64 VFIOINTp *intp;
66 intp = g_malloc0(sizeof(*intp));
67 intp->vdev = vdev;
68 intp->pin = info.index;
69 intp->flags = info.flags;
70 intp->state = VFIO_IRQ_INACTIVE;
71 intp->kvm_accel = false;
73 sysbus_init_irq(sbdev, &intp->qemuirq);
75 /* Get an eventfd for trigger */
76 intp->interrupt = g_new0(EventNotifier, 1);
77 ret = event_notifier_init(intp->interrupt, 0);
78 if (ret) {
79 g_free(intp->interrupt);
80 g_free(intp);
81 error_setg_errno(errp, -ret,
82 "failed to initialize trigger eventfd notifier");
83 return NULL;
85 if (vfio_irq_is_automasked(intp)) {
86 /* Get an eventfd for resample/unmask */
87 intp->unmask = g_new0(EventNotifier, 1);
88 ret = event_notifier_init(intp->unmask, 0);
89 if (ret) {
90 g_free(intp->interrupt);
91 g_free(intp->unmask);
92 g_free(intp);
93 error_setg_errno(errp, -ret,
94 "failed to initialize resample eventfd notifier");
95 return NULL;
99 QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
100 return intp;
104 * vfio_set_trigger_eventfd - set VFIO eventfd handling
106 * @intp: IRQ struct handle
107 * @handler: handler to be called on eventfd signaling
109 * Setup VFIO signaling and attach an optional user-side handler
110 * to the eventfd
112 static int vfio_set_trigger_eventfd(VFIOINTp *intp,
113 eventfd_user_side_handler_t handler)
115 VFIODevice *vbasedev = &intp->vdev->vbasedev;
116 int32_t fd = event_notifier_get_fd(intp->interrupt);
117 Error *err = NULL;
119 qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp);
121 if (!vfio_set_irq_signaling(vbasedev, intp->pin, 0,
122 VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
123 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
124 qemu_set_fd_handler(fd, NULL, NULL, NULL);
125 return -EINVAL;
128 return 0;
132 * Functions only used when eventfds are handled on user-side
133 * ie. without irqfd
137 * vfio_mmap_set_enabled - enable/disable the fast path mode
138 * @vdev: the VFIO platform device
139 * @enabled: the target mmap state
141 * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
142 * enabled = false ~ slow path = MMIO region is trapped and region callbacks
143 * are called; slow path enables to trap the device IRQ status register reset
146 static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
148 int i;
150 for (i = 0; i < vdev->vbasedev.num_regions; i++) {
151 vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
156 * vfio_intp_mmap_enable - timer function, restores the fast path
157 * if there is no more active IRQ
158 * @opaque: actually points to the VFIO platform device
160 * Called on mmap timer timeout, this function checks whether the
161 * IRQ is still active and if not, restores the fast path.
162 * by construction a single eventfd is handled at a time.
163 * if the IRQ is still active, the timer is re-programmed.
165 static void vfio_intp_mmap_enable(void *opaque)
167 VFIOINTp *tmp;
168 VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
170 QEMU_LOCK_GUARD(&vdev->intp_mutex);
171 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
172 if (tmp->state == VFIO_IRQ_ACTIVE) {
173 trace_vfio_platform_intp_mmap_enable(tmp->pin);
174 /* re-program the timer to check active status later */
175 timer_mod(vdev->mmap_timer,
176 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
177 vdev->mmap_timeout);
178 return;
181 vfio_mmap_set_enabled(vdev, true);
185 * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
186 * @opaque: opaque pointer, in practice the VFIOINTp handle
188 * The function is called on a previous IRQ completion, from
189 * vfio_platform_eoi, while the intp_mutex is locked.
190 * Also in such situation, the slow path already is set and
191 * the mmap timer was already programmed.
193 static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
195 trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
196 event_notifier_get_fd(intp->interrupt));
198 intp->state = VFIO_IRQ_ACTIVE;
200 /* trigger the virtual IRQ */
201 qemu_set_irq(intp->qemuirq, 1);
205 * vfio_intp_interrupt - The user-side eventfd handler
206 * @opaque: opaque pointer which in practice is the VFIOINTp handle
208 * the function is entered in event handler context:
209 * the vIRQ is injected into the guest if there is no other active
210 * or pending IRQ.
212 static void vfio_intp_interrupt(VFIOINTp *intp)
214 int ret;
215 VFIOINTp *tmp;
216 VFIOPlatformDevice *vdev = intp->vdev;
217 bool delay_handling = false;
219 QEMU_LOCK_GUARD(&vdev->intp_mutex);
220 if (intp->state == VFIO_IRQ_INACTIVE) {
221 QLIST_FOREACH(tmp, &vdev->intp_list, next) {
222 if (tmp->state == VFIO_IRQ_ACTIVE ||
223 tmp->state == VFIO_IRQ_PENDING) {
224 delay_handling = true;
225 break;
229 if (delay_handling) {
231 * the new IRQ gets a pending status and is pushed in
232 * the pending queue
234 intp->state = VFIO_IRQ_PENDING;
235 trace_vfio_intp_interrupt_set_pending(intp->pin);
236 QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
237 intp, pqnext);
238 event_notifier_test_and_clear(intp->interrupt);
239 return;
242 trace_vfio_platform_intp_interrupt(intp->pin,
243 event_notifier_get_fd(intp->interrupt));
245 ret = event_notifier_test_and_clear(intp->interrupt);
246 if (!ret) {
247 error_report("Error when clearing fd=%d (ret = %d)",
248 event_notifier_get_fd(intp->interrupt), ret);
251 intp->state = VFIO_IRQ_ACTIVE;
253 /* sets slow path */
254 vfio_mmap_set_enabled(vdev, false);
256 /* trigger the virtual IRQ */
257 qemu_set_irq(intp->qemuirq, 1);
260 * Schedule the mmap timer which will restore fastpath when no IRQ
261 * is active anymore
263 if (vdev->mmap_timeout) {
264 timer_mod(vdev->mmap_timer,
265 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
266 vdev->mmap_timeout);
271 * vfio_platform_eoi - IRQ completion routine
272 * @vbasedev: the VFIO device handle
274 * De-asserts the active virtual IRQ and unmasks the physical IRQ
275 * (effective for level sensitive IRQ auto-masked by the VFIO driver).
276 * Then it handles next pending IRQ if any.
277 * eoi function is called on the first access to any MMIO region
278 * after an IRQ was triggered, trapped since slow path was set.
279 * It is assumed this access corresponds to the IRQ status
280 * register reset. With such a mechanism, a single IRQ can be
281 * handled at a time since there is no way to know which IRQ
282 * was completed by the guest (we would need additional details
283 * about the IRQ status register mask).
285 static void vfio_platform_eoi(VFIODevice *vbasedev)
287 VFIOINTp *intp;
288 VFIOPlatformDevice *vdev =
289 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
291 QEMU_LOCK_GUARD(&vdev->intp_mutex);
292 QLIST_FOREACH(intp, &vdev->intp_list, next) {
293 if (intp->state == VFIO_IRQ_ACTIVE) {
294 trace_vfio_platform_eoi(intp->pin,
295 event_notifier_get_fd(intp->interrupt));
296 intp->state = VFIO_IRQ_INACTIVE;
298 /* deassert the virtual IRQ */
299 qemu_set_irq(intp->qemuirq, 0);
301 if (vfio_irq_is_automasked(intp)) {
302 /* unmasks the physical level-sensitive IRQ */
303 vfio_unmask_single_irqindex(vbasedev, intp->pin);
306 /* a single IRQ can be active at a time */
307 break;
310 /* in case there are pending IRQs, handle the first one */
311 if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
312 intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
313 vfio_intp_inject_pending_lockheld(intp);
314 QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
319 * vfio_start_eventfd_injection - starts the virtual IRQ injection using
320 * user-side handled eventfds
321 * @sbdev: the sysbus device handle
322 * @irq: the qemu irq handle
325 static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
327 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
328 VFIOINTp *intp;
330 QLIST_FOREACH(intp, &vdev->intp_list, next) {
331 if (intp->qemuirq == irq) {
332 break;
335 assert(intp);
337 if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) {
338 abort();
343 * Functions used for irqfd
347 * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
348 * @intp: the IRQ struct handle
349 * programs the VFIO driver to unmask this IRQ when the
350 * intp->unmask eventfd is triggered
352 static int vfio_set_resample_eventfd(VFIOINTp *intp)
354 int32_t fd = event_notifier_get_fd(intp->unmask);
355 VFIODevice *vbasedev = &intp->vdev->vbasedev;
356 Error *err = NULL;
358 qemu_set_fd_handler(fd, NULL, NULL, NULL);
359 if (!vfio_set_irq_signaling(vbasedev, intp->pin, 0,
360 VFIO_IRQ_SET_ACTION_UNMASK, fd, &err)) {
361 error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
362 return -EINVAL;
364 return 0;
368 * vfio_start_irqfd_injection - starts the virtual IRQ injection using
369 * irqfd
371 * @sbdev: the sysbus device handle
372 * @irq: the qemu irq handle
374 * In case the irqfd setup fails, we fallback to userspace handled eventfd
376 static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
378 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
379 VFIOINTp *intp;
381 if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
382 !vdev->irqfd_allowed) {
383 goto fail_irqfd;
386 QLIST_FOREACH(intp, &vdev->intp_list, next) {
387 if (intp->qemuirq == irq) {
388 break;
391 assert(intp);
393 if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
394 intp->unmask, irq) < 0) {
395 goto fail_irqfd;
398 if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
399 goto fail_vfio;
401 if (vfio_irq_is_automasked(intp)) {
402 if (vfio_set_resample_eventfd(intp) < 0) {
403 goto fail_vfio;
405 trace_vfio_platform_start_level_irqfd_injection(intp->pin,
406 event_notifier_get_fd(intp->interrupt),
407 event_notifier_get_fd(intp->unmask));
408 } else {
409 trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
410 event_notifier_get_fd(intp->interrupt));
413 intp->kvm_accel = true;
415 return;
416 fail_vfio:
417 kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
418 abort();
419 fail_irqfd:
420 vfio_start_eventfd_injection(sbdev, irq);
421 return;
424 /* VFIO skeleton */
426 static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
428 vbasedev->needs_reset = true;
431 /* not implemented yet */
432 static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
434 return -1;
438 * vfio_populate_device - Allocate and populate MMIO region
439 * and IRQ structs according to driver returned information
440 * @vbasedev: the VFIO device handle
441 * @errp: error object
444 static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp)
446 VFIOINTp *intp, *tmp;
447 int i, ret = -1;
448 VFIOPlatformDevice *vdev =
449 container_of(vbasedev, VFIOPlatformDevice, vbasedev);
451 if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
452 error_setg(errp, "this isn't a platform device");
453 return false;
456 vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
458 for (i = 0; i < vbasedev->num_regions; i++) {
459 char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
461 vdev->regions[i] = g_new0(VFIORegion, 1);
462 ret = vfio_region_setup(OBJECT(vdev), vbasedev,
463 vdev->regions[i], i, name);
464 g_free(name);
465 if (ret) {
466 error_setg_errno(errp, -ret, "failed to get region %d info", i);
467 goto reg_error;
471 vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
472 vfio_intp_mmap_enable, vdev);
474 QSIMPLEQ_INIT(&vdev->pending_intp_queue);
476 for (i = 0; i < vbasedev->num_irqs; i++) {
477 struct vfio_irq_info irq = { .argsz = sizeof(irq) };
479 irq.index = i;
480 ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
481 if (ret) {
482 error_setg_errno(errp, -ret, "failed to get device irq info");
483 goto irq_err;
484 } else {
485 trace_vfio_platform_populate_interrupts(irq.index,
486 irq.count,
487 irq.flags);
488 intp = vfio_init_intp(vbasedev, irq, errp);
489 if (!intp) {
490 goto irq_err;
494 return true;
495 irq_err:
496 timer_del(vdev->mmap_timer);
497 QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
498 QLIST_REMOVE(intp, next);
499 g_free(intp);
501 reg_error:
502 for (i = 0; i < vbasedev->num_regions; i++) {
503 if (vdev->regions[i]) {
504 vfio_region_finalize(vdev->regions[i]);
506 g_free(vdev->regions[i]);
508 g_free(vdev->regions);
509 return false;
512 /* specialized functions for VFIO Platform devices */
513 static VFIODeviceOps vfio_platform_ops = {
514 .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
515 .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
516 .vfio_eoi = vfio_platform_eoi,
520 * vfio_base_device_init - perform preliminary VFIO setup
521 * @vbasedev: the VFIO device handle
522 * @errp: error object
524 * Implement the VFIO command sequence that allows to discover
525 * assigned device resources: group extraction, device
526 * fd retrieval, resource query.
527 * Precondition: the device name must be initialized
529 static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
531 /* @fd takes precedence over @sysfsdev which takes precedence over @host */
532 if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
533 g_free(vbasedev->name);
534 vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
535 } else if (vbasedev->fd < 0) {
536 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
537 error_setg(errp, "wrong host device name");
538 return false;
541 vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
542 vbasedev->name);
545 if (!vfio_device_get_name(vbasedev, errp)) {
546 return false;
549 if (!vfio_attach_device(vbasedev->name, vbasedev,
550 &address_space_memory, errp)) {
551 return false;
554 if (vfio_populate_device(vbasedev, errp)) {
555 return true;
558 vfio_detach_device(vbasedev);
559 return false;
563 * vfio_platform_realize - the device realize function
564 * @dev: device state pointer
565 * @errp: error
567 * initialize the device, its memory regions and IRQ structures
568 * IRQ are started separately
570 static void vfio_platform_realize(DeviceState *dev, Error **errp)
572 ERRP_GUARD();
573 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
574 SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
575 VFIODevice *vbasedev = &vdev->vbasedev;
576 int i;
578 qemu_mutex_init(&vdev->intp_mutex);
580 trace_vfio_platform_realize(vbasedev->sysfsdev ?
581 vbasedev->sysfsdev : vbasedev->name,
582 vdev->compat);
584 if (!vfio_base_device_init(vbasedev, errp)) {
585 goto init_err;
588 if (!vdev->compat) {
589 GError *gerr = NULL;
590 gchar *contents;
591 gsize length;
592 char *path;
594 path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
595 if (!g_file_get_contents(path, &contents, &length, &gerr)) {
596 error_setg(errp, "%s", gerr->message);
597 g_error_free(gerr);
598 g_free(path);
599 return;
601 g_free(path);
602 vdev->compat = contents;
603 for (vdev->num_compat = 0; length; vdev->num_compat++) {
604 size_t skip = strlen(contents) + 1;
605 contents += skip;
606 length -= skip;
610 for (i = 0; i < vbasedev->num_regions; i++) {
611 if (vfio_region_mmap(vdev->regions[i])) {
612 warn_report("%s mmap unsupported, performance may be slow",
613 memory_region_name(vdev->regions[i]->mem));
615 sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
617 return;
619 init_err:
620 if (vdev->vbasedev.name) {
621 error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
622 } else {
623 error_prepend(errp, "vfio error: ");
627 static const VMStateDescription vfio_platform_vmstate = {
628 .name = "vfio-platform",
629 .unmigratable = 1,
632 static Property vfio_platform_dev_properties[] = {
633 DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
634 DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
635 DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
636 DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
637 mmap_timeout, 1100),
638 DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
639 #ifdef CONFIG_IOMMUFD
640 DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd,
641 TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
642 #endif
643 DEFINE_PROP_END_OF_LIST(),
646 static void vfio_platform_instance_init(Object *obj)
648 VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
649 VFIODevice *vbasedev = &vdev->vbasedev;
651 vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
652 DEVICE(vdev), false);
655 #ifdef CONFIG_IOMMUFD
656 static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp)
658 vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp);
660 #endif
662 static void vfio_platform_class_init(ObjectClass *klass, void *data)
664 DeviceClass *dc = DEVICE_CLASS(klass);
665 SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
667 dc->realize = vfio_platform_realize;
668 device_class_set_props(dc, vfio_platform_dev_properties);
669 #ifdef CONFIG_IOMMUFD
670 object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd);
671 #endif
672 dc->vmsd = &vfio_platform_vmstate;
673 dc->desc = "VFIO-based platform device assignment";
674 sbc->connect_irq_notifier = vfio_start_irqfd_injection;
675 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
676 /* Supported by TYPE_VIRT_MACHINE */
677 dc->user_creatable = true;
680 static const TypeInfo vfio_platform_dev_info = {
681 .name = TYPE_VFIO_PLATFORM,
682 .parent = TYPE_SYS_BUS_DEVICE,
683 .instance_size = sizeof(VFIOPlatformDevice),
684 .instance_init = vfio_platform_instance_init,
685 .class_init = vfio_platform_class_init,
686 .class_size = sizeof(VFIOPlatformDeviceClass),
689 static void register_vfio_platform_dev_type(void)
691 type_register_static(&vfio_platform_dev_info);
694 type_init(register_vfio_platform_dev_type)