4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
16 #include <linux/cdev.h>
17 #include <linux/compat.h>
18 #include <linux/device.h>
19 #include <linux/file.h>
20 #include <linux/anon_inodes.h>
22 #include <linux/idr.h>
23 #include <linux/iommu.h>
24 #include <linux/list.h>
25 #include <linux/module.h>
26 #include <linux/mutex.h>
27 #include <linux/rwsem.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/stat.h>
31 #include <linux/string.h>
32 #include <linux/uaccess.h>
33 #include <linux/vfio.h>
34 #include <linux/wait.h>
36 #define DRIVER_VERSION "0.3"
37 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
38 #define DRIVER_DESC "VFIO - User Level meta-driver"
42 struct list_head iommu_drivers_list
;
43 struct mutex iommu_drivers_lock
;
44 struct list_head group_list
;
46 struct mutex group_lock
;
47 struct cdev group_cdev
;
51 wait_queue_head_t release_q
;
54 struct vfio_iommu_driver
{
55 const struct vfio_iommu_driver_ops
*ops
;
56 struct list_head vfio_next
;
59 struct vfio_container
{
61 struct list_head group_list
;
62 struct rw_semaphore group_lock
;
63 struct vfio_iommu_driver
*iommu_driver
;
70 atomic_t container_users
;
71 struct iommu_group
*iommu_group
;
72 struct vfio_container
*container
;
73 struct list_head device_list
;
74 struct mutex device_lock
;
76 struct notifier_block nb
;
77 struct list_head vfio_next
;
78 struct list_head container_next
;
85 const struct vfio_device_ops
*ops
;
86 struct vfio_group
*group
;
87 struct list_head group_next
;
92 * IOMMU driver registration
94 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops
*ops
)
96 struct vfio_iommu_driver
*driver
, *tmp
;
98 driver
= kzalloc(sizeof(*driver
), GFP_KERNEL
);
104 mutex_lock(&vfio
.iommu_drivers_lock
);
106 /* Check for duplicates */
107 list_for_each_entry(tmp
, &vfio
.iommu_drivers_list
, vfio_next
) {
108 if (tmp
->ops
== ops
) {
109 mutex_unlock(&vfio
.iommu_drivers_lock
);
115 list_add(&driver
->vfio_next
, &vfio
.iommu_drivers_list
);
117 mutex_unlock(&vfio
.iommu_drivers_lock
);
121 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver
);
123 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops
*ops
)
125 struct vfio_iommu_driver
*driver
;
127 mutex_lock(&vfio
.iommu_drivers_lock
);
128 list_for_each_entry(driver
, &vfio
.iommu_drivers_list
, vfio_next
) {
129 if (driver
->ops
== ops
) {
130 list_del(&driver
->vfio_next
);
131 mutex_unlock(&vfio
.iommu_drivers_lock
);
136 mutex_unlock(&vfio
.iommu_drivers_lock
);
138 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver
);
141 * Group minor allocation/free - both called with vfio.group_lock held
143 static int vfio_alloc_group_minor(struct vfio_group
*group
)
145 /* index 0 is used by /dev/vfio/vfio */
146 return idr_alloc(&vfio
.group_idr
, group
, 1, MINORMASK
+ 1, GFP_KERNEL
);
149 static void vfio_free_group_minor(int minor
)
151 idr_remove(&vfio
.group_idr
, minor
);
154 static int vfio_iommu_group_notifier(struct notifier_block
*nb
,
155 unsigned long action
, void *data
);
156 static void vfio_group_get(struct vfio_group
*group
);
159 * Container objects - containers are created when /dev/vfio/vfio is
160 * opened, but their lifecycle extends until the last user is done, so
161 * it's freed via kref. Must support container/group/device being
162 * closed in any order.
164 static void vfio_container_get(struct vfio_container
*container
)
166 kref_get(&container
->kref
);
169 static void vfio_container_release(struct kref
*kref
)
171 struct vfio_container
*container
;
172 container
= container_of(kref
, struct vfio_container
, kref
);
177 static void vfio_container_put(struct vfio_container
*container
)
179 kref_put(&container
->kref
, vfio_container_release
);
182 static void vfio_group_unlock_and_free(struct vfio_group
*group
)
184 mutex_unlock(&vfio
.group_lock
);
186 * Unregister outside of lock. A spurious callback is harmless now
187 * that the group is no longer in vfio.group_list.
189 iommu_group_unregister_notifier(group
->iommu_group
, &group
->nb
);
194 * Group objects - create, release, get, put, search
196 static struct vfio_group
*vfio_create_group(struct iommu_group
*iommu_group
)
198 struct vfio_group
*group
, *tmp
;
202 group
= kzalloc(sizeof(*group
), GFP_KERNEL
);
204 return ERR_PTR(-ENOMEM
);
206 kref_init(&group
->kref
);
207 INIT_LIST_HEAD(&group
->device_list
);
208 mutex_init(&group
->device_lock
);
209 atomic_set(&group
->container_users
, 0);
210 atomic_set(&group
->opened
, 0);
211 group
->iommu_group
= iommu_group
;
213 group
->nb
.notifier_call
= vfio_iommu_group_notifier
;
216 * blocking notifiers acquire a rwsem around registering and hold
217 * it around callback. Therefore, need to register outside of
218 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
219 * do anything unless it can find the group in vfio.group_list, so
220 * no harm in registering early.
222 ret
= iommu_group_register_notifier(iommu_group
, &group
->nb
);
228 mutex_lock(&vfio
.group_lock
);
230 minor
= vfio_alloc_group_minor(group
);
232 vfio_group_unlock_and_free(group
);
233 return ERR_PTR(minor
);
236 /* Did we race creating this group? */
237 list_for_each_entry(tmp
, &vfio
.group_list
, vfio_next
) {
238 if (tmp
->iommu_group
== iommu_group
) {
240 vfio_free_group_minor(minor
);
241 vfio_group_unlock_and_free(group
);
246 dev
= device_create(vfio
.class, NULL
, MKDEV(MAJOR(vfio
.devt
), minor
),
247 group
, "%d", iommu_group_id(iommu_group
));
249 vfio_free_group_minor(minor
);
250 vfio_group_unlock_and_free(group
);
251 return (struct vfio_group
*)dev
; /* ERR_PTR */
254 group
->minor
= minor
;
257 list_add(&group
->vfio_next
, &vfio
.group_list
);
259 mutex_unlock(&vfio
.group_lock
);
264 /* called with vfio.group_lock held */
265 static void vfio_group_release(struct kref
*kref
)
267 struct vfio_group
*group
= container_of(kref
, struct vfio_group
, kref
);
269 WARN_ON(!list_empty(&group
->device_list
));
271 device_destroy(vfio
.class, MKDEV(MAJOR(vfio
.devt
), group
->minor
));
272 list_del(&group
->vfio_next
);
273 vfio_free_group_minor(group
->minor
);
274 vfio_group_unlock_and_free(group
);
277 static void vfio_group_put(struct vfio_group
*group
)
279 kref_put_mutex(&group
->kref
, vfio_group_release
, &vfio
.group_lock
);
282 /* Assume group_lock or group reference is held */
283 static void vfio_group_get(struct vfio_group
*group
)
285 kref_get(&group
->kref
);
289 * Not really a try as we will sleep for mutex, but we need to make
290 * sure the group pointer is valid under lock and get a reference.
292 static struct vfio_group
*vfio_group_try_get(struct vfio_group
*group
)
294 struct vfio_group
*target
= group
;
296 mutex_lock(&vfio
.group_lock
);
297 list_for_each_entry(group
, &vfio
.group_list
, vfio_next
) {
298 if (group
== target
) {
299 vfio_group_get(group
);
300 mutex_unlock(&vfio
.group_lock
);
304 mutex_unlock(&vfio
.group_lock
);
310 struct vfio_group
*vfio_group_get_from_iommu(struct iommu_group
*iommu_group
)
312 struct vfio_group
*group
;
314 mutex_lock(&vfio
.group_lock
);
315 list_for_each_entry(group
, &vfio
.group_list
, vfio_next
) {
316 if (group
->iommu_group
== iommu_group
) {
317 vfio_group_get(group
);
318 mutex_unlock(&vfio
.group_lock
);
322 mutex_unlock(&vfio
.group_lock
);
327 static struct vfio_group
*vfio_group_get_from_minor(int minor
)
329 struct vfio_group
*group
;
331 mutex_lock(&vfio
.group_lock
);
332 group
= idr_find(&vfio
.group_idr
, minor
);
334 mutex_unlock(&vfio
.group_lock
);
337 vfio_group_get(group
);
338 mutex_unlock(&vfio
.group_lock
);
344 * Device objects - create, release, get, put, search
347 struct vfio_device
*vfio_group_create_device(struct vfio_group
*group
,
349 const struct vfio_device_ops
*ops
,
352 struct vfio_device
*device
;
355 device
= kzalloc(sizeof(*device
), GFP_KERNEL
);
357 return ERR_PTR(-ENOMEM
);
359 kref_init(&device
->kref
);
361 device
->group
= group
;
363 device
->device_data
= device_data
;
365 ret
= dev_set_drvdata(dev
, device
);
371 /* No need to get group_lock, caller has group reference */
372 vfio_group_get(group
);
374 mutex_lock(&group
->device_lock
);
375 list_add(&device
->group_next
, &group
->device_list
);
376 mutex_unlock(&group
->device_lock
);
381 static void vfio_device_release(struct kref
*kref
)
383 struct vfio_device
*device
= container_of(kref
,
384 struct vfio_device
, kref
);
385 struct vfio_group
*group
= device
->group
;
387 list_del(&device
->group_next
);
388 mutex_unlock(&group
->device_lock
);
390 dev_set_drvdata(device
->dev
, NULL
);
394 /* vfio_del_group_dev may be waiting for this device */
395 wake_up(&vfio
.release_q
);
398 /* Device reference always implies a group reference */
399 void vfio_device_put(struct vfio_device
*device
)
401 struct vfio_group
*group
= device
->group
;
402 kref_put_mutex(&device
->kref
, vfio_device_release
, &group
->device_lock
);
403 vfio_group_put(group
);
405 EXPORT_SYMBOL_GPL(vfio_device_put
);
407 static void vfio_device_get(struct vfio_device
*device
)
409 vfio_group_get(device
->group
);
410 kref_get(&device
->kref
);
413 static struct vfio_device
*vfio_group_get_device(struct vfio_group
*group
,
416 struct vfio_device
*device
;
418 mutex_lock(&group
->device_lock
);
419 list_for_each_entry(device
, &group
->device_list
, group_next
) {
420 if (device
->dev
== dev
) {
421 vfio_device_get(device
);
422 mutex_unlock(&group
->device_lock
);
426 mutex_unlock(&group
->device_lock
);
431 * Whitelist some drivers that we know are safe (no dma) or just sit on
432 * a device. It's not always practical to leave a device within a group
433 * driverless as it could get re-bound to something unsafe.
435 static const char * const vfio_driver_whitelist
[] = { "pci-stub", "pcieport" };
437 static bool vfio_whitelisted_driver(struct device_driver
*drv
)
441 for (i
= 0; i
< ARRAY_SIZE(vfio_driver_whitelist
); i
++) {
442 if (!strcmp(drv
->name
, vfio_driver_whitelist
[i
]))
450 * A vfio group is viable for use by userspace if all devices are either
451 * driver-less or bound to a vfio or whitelisted driver. We test the
452 * latter by the existence of a struct vfio_device matching the dev.
454 static int vfio_dev_viable(struct device
*dev
, void *data
)
456 struct vfio_group
*group
= data
;
457 struct vfio_device
*device
;
458 struct device_driver
*drv
= ACCESS_ONCE(dev
->driver
);
460 if (!drv
|| vfio_whitelisted_driver(drv
))
463 device
= vfio_group_get_device(group
, dev
);
465 vfio_device_put(device
);
473 * Async device support
475 static int vfio_group_nb_add_dev(struct vfio_group
*group
, struct device
*dev
)
477 struct vfio_device
*device
;
479 /* Do we already know about it? We shouldn't */
480 device
= vfio_group_get_device(group
, dev
);
481 if (WARN_ON_ONCE(device
)) {
482 vfio_device_put(device
);
486 /* Nothing to do for idle groups */
487 if (!atomic_read(&group
->container_users
))
490 /* TODO Prevent device auto probing */
491 WARN("Device %s added to live group %d!\n", dev_name(dev
),
492 iommu_group_id(group
->iommu_group
));
497 static int vfio_group_nb_del_dev(struct vfio_group
*group
, struct device
*dev
)
499 struct vfio_device
*device
;
502 * Expect to fall out here. If a device was in use, it would
503 * have been bound to a vfio sub-driver, which would have blocked
504 * in .remove at vfio_del_group_dev. Sanity check that we no
505 * longer track the device, so it's safe to remove.
507 device
= vfio_group_get_device(group
, dev
);
511 WARN("Device %s removed from live group %d!\n", dev_name(dev
),
512 iommu_group_id(group
->iommu_group
));
514 vfio_device_put(device
);
518 static int vfio_group_nb_verify(struct vfio_group
*group
, struct device
*dev
)
520 /* We don't care what happens when the group isn't in use */
521 if (!atomic_read(&group
->container_users
))
524 return vfio_dev_viable(dev
, group
);
527 static int vfio_iommu_group_notifier(struct notifier_block
*nb
,
528 unsigned long action
, void *data
)
530 struct vfio_group
*group
= container_of(nb
, struct vfio_group
, nb
);
531 struct device
*dev
= data
;
534 * Need to go through a group_lock lookup to get a reference or
535 * we risk racing a group being removed. Leave a WARN_ON for
536 * debuging, but if the group no longer exists, a spurious notify
539 group
= vfio_group_try_get(group
);
544 case IOMMU_GROUP_NOTIFY_ADD_DEVICE
:
545 vfio_group_nb_add_dev(group
, dev
);
547 case IOMMU_GROUP_NOTIFY_DEL_DEVICE
:
548 vfio_group_nb_del_dev(group
, dev
);
550 case IOMMU_GROUP_NOTIFY_BIND_DRIVER
:
551 pr_debug("%s: Device %s, group %d binding to driver\n",
552 __func__
, dev_name(dev
),
553 iommu_group_id(group
->iommu_group
));
555 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER
:
556 pr_debug("%s: Device %s, group %d bound to driver %s\n",
557 __func__
, dev_name(dev
),
558 iommu_group_id(group
->iommu_group
), dev
->driver
->name
);
559 BUG_ON(vfio_group_nb_verify(group
, dev
));
561 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER
:
562 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
563 __func__
, dev_name(dev
),
564 iommu_group_id(group
->iommu_group
), dev
->driver
->name
);
566 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER
:
567 pr_debug("%s: Device %s, group %d unbound from driver\n",
568 __func__
, dev_name(dev
),
569 iommu_group_id(group
->iommu_group
));
571 * XXX An unbound device in a live group is ok, but we'd
572 * really like to avoid the above BUG_ON by preventing other
573 * drivers from binding to it. Once that occurs, we have to
574 * stop the system to maintain isolation. At a minimum, we'd
575 * want a toggle to disable driver auto probe for this device.
580 vfio_group_put(group
);
587 int vfio_add_group_dev(struct device
*dev
,
588 const struct vfio_device_ops
*ops
, void *device_data
)
590 struct iommu_group
*iommu_group
;
591 struct vfio_group
*group
;
592 struct vfio_device
*device
;
594 iommu_group
= iommu_group_get(dev
);
598 group
= vfio_group_get_from_iommu(iommu_group
);
600 group
= vfio_create_group(iommu_group
);
602 iommu_group_put(iommu_group
);
603 return PTR_ERR(group
);
607 device
= vfio_group_get_device(group
, dev
);
609 WARN(1, "Device %s already exists on group %d\n",
610 dev_name(dev
), iommu_group_id(iommu_group
));
611 vfio_device_put(device
);
612 vfio_group_put(group
);
613 iommu_group_put(iommu_group
);
617 device
= vfio_group_create_device(group
, dev
, ops
, device_data
);
618 if (IS_ERR(device
)) {
619 vfio_group_put(group
);
620 iommu_group_put(iommu_group
);
621 return PTR_ERR(device
);
625 * Added device holds reference to iommu_group and vfio_device
626 * (which in turn holds reference to vfio_group). Drop extra
627 * group reference used while acquiring device.
629 vfio_group_put(group
);
633 EXPORT_SYMBOL_GPL(vfio_add_group_dev
);
636 * Get a reference to the vfio_device for a device that is known to
637 * be bound to a vfio driver. The driver implicitly holds a
638 * vfio_device reference between vfio_add_group_dev and
639 * vfio_del_group_dev. We can therefore use drvdata to increment
640 * that reference from the struct device. This additional
641 * reference must be released by calling vfio_device_put.
643 struct vfio_device
*vfio_device_get_from_dev(struct device
*dev
)
645 struct vfio_device
*device
= dev_get_drvdata(dev
);
647 vfio_device_get(device
);
651 EXPORT_SYMBOL_GPL(vfio_device_get_from_dev
);
654 * Caller must hold a reference to the vfio_device
656 void *vfio_device_data(struct vfio_device
*device
)
658 return device
->device_data
;
660 EXPORT_SYMBOL_GPL(vfio_device_data
);
662 /* Given a referenced group, check if it contains the device */
663 static bool vfio_dev_present(struct vfio_group
*group
, struct device
*dev
)
665 struct vfio_device
*device
;
667 device
= vfio_group_get_device(group
, dev
);
671 vfio_device_put(device
);
676 * Decrement the device reference count and wait for the device to be
677 * removed. Open file descriptors for the device... */
678 void *vfio_del_group_dev(struct device
*dev
)
680 struct vfio_device
*device
= dev_get_drvdata(dev
);
681 struct vfio_group
*group
= device
->group
;
682 struct iommu_group
*iommu_group
= group
->iommu_group
;
683 void *device_data
= device
->device_data
;
686 * The group exists so long as we have a device reference. Get
687 * a group reference and use it to scan for the device going away.
689 vfio_group_get(group
);
691 vfio_device_put(device
);
693 /* TODO send a signal to encourage this to be released */
694 wait_event(vfio
.release_q
, !vfio_dev_present(group
, dev
));
696 vfio_group_put(group
);
698 iommu_group_put(iommu_group
);
702 EXPORT_SYMBOL_GPL(vfio_del_group_dev
);
705 * VFIO base fd, /dev/vfio/vfio
707 static long vfio_ioctl_check_extension(struct vfio_container
*container
,
710 struct vfio_iommu_driver
*driver
;
713 down_read(&container
->group_lock
);
715 driver
= container
->iommu_driver
;
718 /* No base extensions yet */
721 * If no driver is set, poll all registered drivers for
722 * extensions and return the first positive result. If
723 * a driver is already set, further queries will be passed
724 * only to that driver.
727 mutex_lock(&vfio
.iommu_drivers_lock
);
728 list_for_each_entry(driver
, &vfio
.iommu_drivers_list
,
730 if (!try_module_get(driver
->ops
->owner
))
733 ret
= driver
->ops
->ioctl(NULL
,
734 VFIO_CHECK_EXTENSION
,
736 module_put(driver
->ops
->owner
);
740 mutex_unlock(&vfio
.iommu_drivers_lock
);
742 ret
= driver
->ops
->ioctl(container
->iommu_data
,
743 VFIO_CHECK_EXTENSION
, arg
);
746 up_read(&container
->group_lock
);
751 /* hold write lock on container->group_lock */
752 static int __vfio_container_attach_groups(struct vfio_container
*container
,
753 struct vfio_iommu_driver
*driver
,
756 struct vfio_group
*group
;
759 list_for_each_entry(group
, &container
->group_list
, container_next
) {
760 ret
= driver
->ops
->attach_group(data
, group
->iommu_group
);
768 list_for_each_entry_continue_reverse(group
, &container
->group_list
,
770 driver
->ops
->detach_group(data
, group
->iommu_group
);
776 static long vfio_ioctl_set_iommu(struct vfio_container
*container
,
779 struct vfio_iommu_driver
*driver
;
782 down_write(&container
->group_lock
);
785 * The container is designed to be an unprivileged interface while
786 * the group can be assigned to specific users. Therefore, only by
787 * adding a group to a container does the user get the privilege of
788 * enabling the iommu, which may allocate finite resources. There
789 * is no unset_iommu, but by removing all the groups from a container,
790 * the container is deprivileged and returns to an unset state.
792 if (list_empty(&container
->group_list
) || container
->iommu_driver
) {
793 up_write(&container
->group_lock
);
797 mutex_lock(&vfio
.iommu_drivers_lock
);
798 list_for_each_entry(driver
, &vfio
.iommu_drivers_list
, vfio_next
) {
801 if (!try_module_get(driver
->ops
->owner
))
805 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
806 * so test which iommu driver reported support for this
807 * extension and call open on them. We also pass them the
808 * magic, allowing a single driver to support multiple
809 * interfaces if they'd like.
811 if (driver
->ops
->ioctl(NULL
, VFIO_CHECK_EXTENSION
, arg
) <= 0) {
812 module_put(driver
->ops
->owner
);
816 /* module reference holds the driver we're working on */
817 mutex_unlock(&vfio
.iommu_drivers_lock
);
819 data
= driver
->ops
->open(arg
);
822 module_put(driver
->ops
->owner
);
823 goto skip_drivers_unlock
;
826 ret
= __vfio_container_attach_groups(container
, driver
, data
);
828 container
->iommu_driver
= driver
;
829 container
->iommu_data
= data
;
831 driver
->ops
->release(data
);
832 module_put(driver
->ops
->owner
);
835 goto skip_drivers_unlock
;
838 mutex_unlock(&vfio
.iommu_drivers_lock
);
840 up_write(&container
->group_lock
);
845 static long vfio_fops_unl_ioctl(struct file
*filep
,
846 unsigned int cmd
, unsigned long arg
)
848 struct vfio_container
*container
= filep
->private_data
;
849 struct vfio_iommu_driver
*driver
;
857 case VFIO_GET_API_VERSION
:
858 ret
= VFIO_API_VERSION
;
860 case VFIO_CHECK_EXTENSION
:
861 ret
= vfio_ioctl_check_extension(container
, arg
);
864 ret
= vfio_ioctl_set_iommu(container
, arg
);
867 down_read(&container
->group_lock
);
869 driver
= container
->iommu_driver
;
870 data
= container
->iommu_data
;
872 if (driver
) /* passthrough all unrecognized ioctls */
873 ret
= driver
->ops
->ioctl(data
, cmd
, arg
);
875 up_read(&container
->group_lock
);
882 static long vfio_fops_compat_ioctl(struct file
*filep
,
883 unsigned int cmd
, unsigned long arg
)
885 arg
= (unsigned long)compat_ptr(arg
);
886 return vfio_fops_unl_ioctl(filep
, cmd
, arg
);
888 #endif /* CONFIG_COMPAT */
890 static int vfio_fops_open(struct inode
*inode
, struct file
*filep
)
892 struct vfio_container
*container
;
894 container
= kzalloc(sizeof(*container
), GFP_KERNEL
);
898 INIT_LIST_HEAD(&container
->group_list
);
899 init_rwsem(&container
->group_lock
);
900 kref_init(&container
->kref
);
902 filep
->private_data
= container
;
907 static int vfio_fops_release(struct inode
*inode
, struct file
*filep
)
909 struct vfio_container
*container
= filep
->private_data
;
911 filep
->private_data
= NULL
;
913 vfio_container_put(container
);
919 * Once an iommu driver is set, we optionally pass read/write/mmap
920 * on to the driver, allowing management interfaces beyond ioctl.
922 static ssize_t
vfio_fops_read(struct file
*filep
, char __user
*buf
,
923 size_t count
, loff_t
*ppos
)
925 struct vfio_container
*container
= filep
->private_data
;
926 struct vfio_iommu_driver
*driver
;
927 ssize_t ret
= -EINVAL
;
929 down_read(&container
->group_lock
);
931 driver
= container
->iommu_driver
;
932 if (likely(driver
&& driver
->ops
->read
))
933 ret
= driver
->ops
->read(container
->iommu_data
,
936 up_read(&container
->group_lock
);
941 static ssize_t
vfio_fops_write(struct file
*filep
, const char __user
*buf
,
942 size_t count
, loff_t
*ppos
)
944 struct vfio_container
*container
= filep
->private_data
;
945 struct vfio_iommu_driver
*driver
;
946 ssize_t ret
= -EINVAL
;
948 down_read(&container
->group_lock
);
950 driver
= container
->iommu_driver
;
951 if (likely(driver
&& driver
->ops
->write
))
952 ret
= driver
->ops
->write(container
->iommu_data
,
955 up_read(&container
->group_lock
);
960 static int vfio_fops_mmap(struct file
*filep
, struct vm_area_struct
*vma
)
962 struct vfio_container
*container
= filep
->private_data
;
963 struct vfio_iommu_driver
*driver
;
966 down_read(&container
->group_lock
);
968 driver
= container
->iommu_driver
;
969 if (likely(driver
&& driver
->ops
->mmap
))
970 ret
= driver
->ops
->mmap(container
->iommu_data
, vma
);
972 up_read(&container
->group_lock
);
977 static const struct file_operations vfio_fops
= {
978 .owner
= THIS_MODULE
,
979 .open
= vfio_fops_open
,
980 .release
= vfio_fops_release
,
981 .read
= vfio_fops_read
,
982 .write
= vfio_fops_write
,
983 .unlocked_ioctl
= vfio_fops_unl_ioctl
,
985 .compat_ioctl
= vfio_fops_compat_ioctl
,
987 .mmap
= vfio_fops_mmap
,
991 * VFIO Group fd, /dev/vfio/$GROUP
993 static void __vfio_group_unset_container(struct vfio_group
*group
)
995 struct vfio_container
*container
= group
->container
;
996 struct vfio_iommu_driver
*driver
;
998 down_write(&container
->group_lock
);
1000 driver
= container
->iommu_driver
;
1002 driver
->ops
->detach_group(container
->iommu_data
,
1003 group
->iommu_group
);
1005 group
->container
= NULL
;
1006 list_del(&group
->container_next
);
1008 /* Detaching the last group deprivileges a container, remove iommu */
1009 if (driver
&& list_empty(&container
->group_list
)) {
1010 driver
->ops
->release(container
->iommu_data
);
1011 module_put(driver
->ops
->owner
);
1012 container
->iommu_driver
= NULL
;
1013 container
->iommu_data
= NULL
;
1016 up_write(&container
->group_lock
);
1018 vfio_container_put(container
);
1022 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1023 * if there was no container to unset. Since the ioctl is called on
1024 * the group, we know that still exists, therefore the only valid
1025 * transition here is 1->0.
1027 static int vfio_group_unset_container(struct vfio_group
*group
)
1029 int users
= atomic_cmpxchg(&group
->container_users
, 1, 0);
1036 __vfio_group_unset_container(group
);
1042 * When removing container users, anything that removes the last user
1043 * implicitly removes the group from the container. That is, if the
1044 * group file descriptor is closed, as well as any device file descriptors,
1045 * the group is free.
1047 static void vfio_group_try_dissolve_container(struct vfio_group
*group
)
1049 if (0 == atomic_dec_if_positive(&group
->container_users
))
1050 __vfio_group_unset_container(group
);
1053 static int vfio_group_set_container(struct vfio_group
*group
, int container_fd
)
1056 struct vfio_container
*container
;
1057 struct vfio_iommu_driver
*driver
;
1060 if (atomic_read(&group
->container_users
))
1063 f
= fdget(container_fd
);
1067 /* Sanity check, is this really our fd? */
1068 if (f
.file
->f_op
!= &vfio_fops
) {
1073 container
= f
.file
->private_data
;
1074 WARN_ON(!container
); /* fget ensures we don't race vfio_release */
1076 down_write(&container
->group_lock
);
1078 driver
= container
->iommu_driver
;
1080 ret
= driver
->ops
->attach_group(container
->iommu_data
,
1081 group
->iommu_group
);
1086 group
->container
= container
;
1087 list_add(&group
->container_next
, &container
->group_list
);
1089 /* Get a reference on the container and mark a user within the group */
1090 vfio_container_get(container
);
1091 atomic_inc(&group
->container_users
);
1094 up_write(&container
->group_lock
);
1099 static bool vfio_group_viable(struct vfio_group
*group
)
1101 return (iommu_group_for_each_dev(group
->iommu_group
,
1102 group
, vfio_dev_viable
) == 0);
1105 static const struct file_operations vfio_device_fops
;
1107 static int vfio_group_get_device_fd(struct vfio_group
*group
, char *buf
)
1109 struct vfio_device
*device
;
1113 if (0 == atomic_read(&group
->container_users
) ||
1114 !group
->container
->iommu_driver
|| !vfio_group_viable(group
))
1117 mutex_lock(&group
->device_lock
);
1118 list_for_each_entry(device
, &group
->device_list
, group_next
) {
1119 if (strcmp(dev_name(device
->dev
), buf
))
1122 ret
= device
->ops
->open(device
->device_data
);
1126 * We can't use anon_inode_getfd() because we need to modify
1127 * the f_mode flags directly to allow more than just ioctls
1129 ret
= get_unused_fd();
1131 device
->ops
->release(device
->device_data
);
1135 filep
= anon_inode_getfile("[vfio-device]", &vfio_device_fops
,
1137 if (IS_ERR(filep
)) {
1139 ret
= PTR_ERR(filep
);
1140 device
->ops
->release(device
->device_data
);
1145 * TODO: add an anon_inode interface to do this.
1146 * Appears to be missing by lack of need rather than
1147 * explicitly prevented. Now there's need.
1149 filep
->f_mode
|= (FMODE_LSEEK
| FMODE_PREAD
| FMODE_PWRITE
);
1151 vfio_device_get(device
);
1152 atomic_inc(&group
->container_users
);
1154 fd_install(ret
, filep
);
1157 mutex_unlock(&group
->device_lock
);
1162 static long vfio_group_fops_unl_ioctl(struct file
*filep
,
1163 unsigned int cmd
, unsigned long arg
)
1165 struct vfio_group
*group
= filep
->private_data
;
1169 case VFIO_GROUP_GET_STATUS
:
1171 struct vfio_group_status status
;
1172 unsigned long minsz
;
1174 minsz
= offsetofend(struct vfio_group_status
, flags
);
1176 if (copy_from_user(&status
, (void __user
*)arg
, minsz
))
1179 if (status
.argsz
< minsz
)
1184 if (vfio_group_viable(group
))
1185 status
.flags
|= VFIO_GROUP_FLAGS_VIABLE
;
1187 if (group
->container
)
1188 status
.flags
|= VFIO_GROUP_FLAGS_CONTAINER_SET
;
1190 if (copy_to_user((void __user
*)arg
, &status
, minsz
))
1196 case VFIO_GROUP_SET_CONTAINER
:
1200 if (get_user(fd
, (int __user
*)arg
))
1206 ret
= vfio_group_set_container(group
, fd
);
1209 case VFIO_GROUP_UNSET_CONTAINER
:
1210 ret
= vfio_group_unset_container(group
);
1212 case VFIO_GROUP_GET_DEVICE_FD
:
1216 buf
= strndup_user((const char __user
*)arg
, PAGE_SIZE
);
1218 return PTR_ERR(buf
);
1220 ret
= vfio_group_get_device_fd(group
, buf
);
1229 #ifdef CONFIG_COMPAT
1230 static long vfio_group_fops_compat_ioctl(struct file
*filep
,
1231 unsigned int cmd
, unsigned long arg
)
1233 arg
= (unsigned long)compat_ptr(arg
);
1234 return vfio_group_fops_unl_ioctl(filep
, cmd
, arg
);
1236 #endif /* CONFIG_COMPAT */
1238 static int vfio_group_fops_open(struct inode
*inode
, struct file
*filep
)
1240 struct vfio_group
*group
;
1243 group
= vfio_group_get_from_minor(iminor(inode
));
1247 /* Do we need multiple instances of the group open? Seems not. */
1248 opened
= atomic_cmpxchg(&group
->opened
, 0, 1);
1250 vfio_group_put(group
);
1254 /* Is something still in use from a previous open? */
1255 if (group
->container
) {
1256 atomic_dec(&group
->opened
);
1257 vfio_group_put(group
);
1261 filep
->private_data
= group
;
1266 static int vfio_group_fops_release(struct inode
*inode
, struct file
*filep
)
1268 struct vfio_group
*group
= filep
->private_data
;
1270 filep
->private_data
= NULL
;
1272 vfio_group_try_dissolve_container(group
);
1274 atomic_dec(&group
->opened
);
1276 vfio_group_put(group
);
1281 static const struct file_operations vfio_group_fops
= {
1282 .owner
= THIS_MODULE
,
1283 .unlocked_ioctl
= vfio_group_fops_unl_ioctl
,
1284 #ifdef CONFIG_COMPAT
1285 .compat_ioctl
= vfio_group_fops_compat_ioctl
,
1287 .open
= vfio_group_fops_open
,
1288 .release
= vfio_group_fops_release
,
1294 static int vfio_device_fops_release(struct inode
*inode
, struct file
*filep
)
1296 struct vfio_device
*device
= filep
->private_data
;
1298 device
->ops
->release(device
->device_data
);
1300 vfio_group_try_dissolve_container(device
->group
);
1302 vfio_device_put(device
);
1307 static long vfio_device_fops_unl_ioctl(struct file
*filep
,
1308 unsigned int cmd
, unsigned long arg
)
1310 struct vfio_device
*device
= filep
->private_data
;
1312 if (unlikely(!device
->ops
->ioctl
))
1315 return device
->ops
->ioctl(device
->device_data
, cmd
, arg
);
1318 static ssize_t
vfio_device_fops_read(struct file
*filep
, char __user
*buf
,
1319 size_t count
, loff_t
*ppos
)
1321 struct vfio_device
*device
= filep
->private_data
;
1323 if (unlikely(!device
->ops
->read
))
1326 return device
->ops
->read(device
->device_data
, buf
, count
, ppos
);
1329 static ssize_t
vfio_device_fops_write(struct file
*filep
,
1330 const char __user
*buf
,
1331 size_t count
, loff_t
*ppos
)
1333 struct vfio_device
*device
= filep
->private_data
;
1335 if (unlikely(!device
->ops
->write
))
1338 return device
->ops
->write(device
->device_data
, buf
, count
, ppos
);
1341 static int vfio_device_fops_mmap(struct file
*filep
, struct vm_area_struct
*vma
)
1343 struct vfio_device
*device
= filep
->private_data
;
1345 if (unlikely(!device
->ops
->mmap
))
1348 return device
->ops
->mmap(device
->device_data
, vma
);
1351 #ifdef CONFIG_COMPAT
1352 static long vfio_device_fops_compat_ioctl(struct file
*filep
,
1353 unsigned int cmd
, unsigned long arg
)
1355 arg
= (unsigned long)compat_ptr(arg
);
1356 return vfio_device_fops_unl_ioctl(filep
, cmd
, arg
);
1358 #endif /* CONFIG_COMPAT */
1360 static const struct file_operations vfio_device_fops
= {
1361 .owner
= THIS_MODULE
,
1362 .release
= vfio_device_fops_release
,
1363 .read
= vfio_device_fops_read
,
1364 .write
= vfio_device_fops_write
,
1365 .unlocked_ioctl
= vfio_device_fops_unl_ioctl
,
1366 #ifdef CONFIG_COMPAT
1367 .compat_ioctl
= vfio_device_fops_compat_ioctl
,
1369 .mmap
= vfio_device_fops_mmap
,
1373 * Module/class support
1375 static char *vfio_devnode(struct device
*dev
, umode_t
*mode
)
1377 if (mode
&& (MINOR(dev
->devt
) == 0))
1378 *mode
= S_IRUGO
| S_IWUGO
;
1380 return kasprintf(GFP_KERNEL
, "vfio/%s", dev_name(dev
));
1383 static int __init
vfio_init(void)
1387 idr_init(&vfio
.group_idr
);
1388 mutex_init(&vfio
.group_lock
);
1389 mutex_init(&vfio
.iommu_drivers_lock
);
1390 INIT_LIST_HEAD(&vfio
.group_list
);
1391 INIT_LIST_HEAD(&vfio
.iommu_drivers_list
);
1392 init_waitqueue_head(&vfio
.release_q
);
1394 vfio
.class = class_create(THIS_MODULE
, "vfio");
1395 if (IS_ERR(vfio
.class)) {
1396 ret
= PTR_ERR(vfio
.class);
1400 vfio
.class->devnode
= vfio_devnode
;
1402 ret
= alloc_chrdev_region(&vfio
.devt
, 0, MINORMASK
, "vfio");
1404 goto err_base_chrdev
;
1406 cdev_init(&vfio
.cdev
, &vfio_fops
);
1407 ret
= cdev_add(&vfio
.cdev
, vfio
.devt
, 1);
1411 vfio
.dev
= device_create(vfio
.class, NULL
, vfio
.devt
, NULL
, "vfio");
1412 if (IS_ERR(vfio
.dev
)) {
1413 ret
= PTR_ERR(vfio
.dev
);
1417 /* /dev/vfio/$GROUP */
1418 cdev_init(&vfio
.group_cdev
, &vfio_group_fops
);
1419 ret
= cdev_add(&vfio
.group_cdev
,
1420 MKDEV(MAJOR(vfio
.devt
), 1), MINORMASK
- 1);
1422 goto err_groups_cdev
;
1424 pr_info(DRIVER_DESC
" version: " DRIVER_VERSION
"\n");
1427 * Attempt to load known iommu-drivers. This gives us a working
1428 * environment without the user needing to explicitly load iommu
1431 request_module_nowait("vfio_iommu_type1");
1432 request_module_nowait("vfio_iommu_spapr_tce");
1437 device_destroy(vfio
.class, vfio
.devt
);
1439 cdev_del(&vfio
.cdev
);
1441 unregister_chrdev_region(vfio
.devt
, MINORMASK
);
1443 class_destroy(vfio
.class);
1449 static void __exit
vfio_cleanup(void)
1451 WARN_ON(!list_empty(&vfio
.group_list
));
1453 idr_destroy(&vfio
.group_idr
);
1454 cdev_del(&vfio
.group_cdev
);
1455 device_destroy(vfio
.class, vfio
.devt
);
1456 cdev_del(&vfio
.cdev
);
1457 unregister_chrdev_region(vfio
.devt
, MINORMASK
);
1458 class_destroy(vfio
.class);
1462 module_init(vfio_init
);
1463 module_exit(vfio_cleanup
);
1465 MODULE_VERSION(DRIVER_VERSION
);
1466 MODULE_LICENSE("GPL v2");
1467 MODULE_AUTHOR(DRIVER_AUTHOR
);
1468 MODULE_DESCRIPTION(DRIVER_DESC
);