vfio/iommufd: Add support for iova_ranges and pgsizes
[qemu/ar7.git] / hw / vfio / iommufd.c
blob01b448e840581e0dd6d3df1897169665f79dcbe3
1 /*
2 * iommufd container backend
4 * Copyright (C) 2023 Intel Corporation.
5 * Copyright Red Hat, Inc. 2023
7 * Authors: Yi Liu <yi.l.liu@intel.com>
8 * Eric Auger <eric.auger@redhat.com>
10 * SPDX-License-Identifier: GPL-2.0-or-later
13 #include "qemu/osdep.h"
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include <linux/iommufd.h>
18 #include "hw/vfio/vfio-common.h"
19 #include "qemu/error-report.h"
20 #include "trace.h"
21 #include "qapi/error.h"
22 #include "sysemu/iommufd.h"
23 #include "hw/qdev-core.h"
24 #include "sysemu/reset.h"
25 #include "qemu/cutils.h"
26 #include "qemu/chardev_open.h"
28 static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
29 ram_addr_t size, void *vaddr, bool readonly)
31 VFIOIOMMUFDContainer *container =
32 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
34 return iommufd_backend_map_dma(container->be,
35 container->ioas_id,
36 iova, size, vaddr, readonly);
39 static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer,
40 hwaddr iova, ram_addr_t size,
41 IOMMUTLBEntry *iotlb)
43 VFIOIOMMUFDContainer *container =
44 container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
46 /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
47 return iommufd_backend_unmap_dma(container->be,
48 container->ioas_id, iova, size);
51 static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp)
53 return vfio_kvm_device_add_fd(vbasedev->fd, errp);
56 static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
58 Error *err = NULL;
60 if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
61 error_report_err(err);
65 static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
67 IOMMUFDBackend *iommufd = vbasedev->iommufd;
68 struct vfio_device_bind_iommufd bind = {
69 .argsz = sizeof(bind),
70 .flags = 0,
72 int ret;
74 ret = iommufd_backend_connect(iommufd, errp);
75 if (ret) {
76 return ret;
80 * Add device to kvm-vfio to be prepared for the tracking
81 * in KVM. Especially for some emulated devices, it requires
82 * to have kvm information in the device open.
84 ret = iommufd_cdev_kvm_device_add(vbasedev, errp);
85 if (ret) {
86 goto err_kvm_device_add;
89 /* Bind device to iommufd */
90 bind.iommufd = iommufd->fd;
91 ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind);
92 if (ret) {
93 error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d",
94 vbasedev->fd, bind.iommufd);
95 goto err_bind;
98 vbasedev->devid = bind.out_devid;
99 trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
100 vbasedev->fd, vbasedev->devid);
101 return ret;
102 err_bind:
103 iommufd_cdev_kvm_device_del(vbasedev);
104 err_kvm_device_add:
105 iommufd_backend_disconnect(iommufd);
106 return ret;
109 static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
111 /* Unbind is automatically conducted when device fd is closed */
112 iommufd_cdev_kvm_device_del(vbasedev);
113 iommufd_backend_disconnect(vbasedev->iommufd);
116 static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
118 long int ret = -ENOTTY;
119 char *path, *vfio_dev_path = NULL, *vfio_path = NULL;
120 DIR *dir = NULL;
121 struct dirent *dent;
122 gchar *contents;
123 struct stat st;
124 gsize length;
125 int major, minor;
126 dev_t vfio_devt;
128 path = g_strdup_printf("%s/vfio-dev", sysfs_path);
129 if (stat(path, &st) < 0) {
130 error_setg_errno(errp, errno, "no such host device");
131 goto out_free_path;
134 dir = opendir(path);
135 if (!dir) {
136 error_setg_errno(errp, errno, "couldn't open directory %s", path);
137 goto out_free_path;
140 while ((dent = readdir(dir))) {
141 if (!strncmp(dent->d_name, "vfio", 4)) {
142 vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name);
143 break;
147 if (!vfio_dev_path) {
148 error_setg(errp, "failed to find vfio-dev/vfioX/dev");
149 goto out_close_dir;
152 if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) {
153 error_setg(errp, "failed to load \"%s\"", vfio_dev_path);
154 goto out_free_dev_path;
157 if (sscanf(contents, "%d:%d", &major, &minor) != 2) {
158 error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path);
159 goto out_free_dev_path;
161 g_free(contents);
162 vfio_devt = makedev(major, minor);
164 vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name);
165 ret = open_cdev(vfio_path, vfio_devt);
166 if (ret < 0) {
167 error_setg(errp, "Failed to open %s", vfio_path);
170 trace_iommufd_cdev_getfd(vfio_path, ret);
171 g_free(vfio_path);
173 out_free_dev_path:
174 g_free(vfio_dev_path);
175 out_close_dir:
176 closedir(dir);
177 out_free_path:
178 if (*errp) {
179 error_prepend(errp, VFIO_MSG_PREFIX, path);
181 g_free(path);
183 return ret;
186 static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
187 Error **errp)
189 int ret, iommufd = vbasedev->iommufd->fd;
190 struct vfio_device_attach_iommufd_pt attach_data = {
191 .argsz = sizeof(attach_data),
192 .flags = 0,
193 .pt_id = id,
196 /* Attach device to an IOAS or hwpt within iommufd */
197 ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data);
198 if (ret) {
199 error_setg_errno(errp, errno,
200 "[iommufd=%d] error attach %s (%d) to id=%d",
201 iommufd, vbasedev->name, vbasedev->fd, id);
202 } else {
203 trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
204 vbasedev->fd, id);
206 return ret;
209 static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
211 int ret, iommufd = vbasedev->iommufd->fd;
212 struct vfio_device_detach_iommufd_pt detach_data = {
213 .argsz = sizeof(detach_data),
214 .flags = 0,
217 ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data);
218 if (ret) {
219 error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
220 } else {
221 trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
223 return ret;
226 static int iommufd_cdev_attach_container(VFIODevice *vbasedev,
227 VFIOIOMMUFDContainer *container,
228 Error **errp)
230 return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
233 static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
234 VFIOIOMMUFDContainer *container)
236 Error *err = NULL;
238 if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
239 error_report_err(err);
243 static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
245 VFIOContainerBase *bcontainer = &container->bcontainer;
247 if (!QLIST_EMPTY(&bcontainer->device_list)) {
248 return;
250 memory_listener_unregister(&bcontainer->listener);
251 vfio_container_destroy(bcontainer);
252 iommufd_backend_free_id(container->be, container->ioas_id);
253 g_free(container);
256 static int iommufd_cdev_ram_block_discard_disable(bool state)
259 * We support coordinated discarding of RAM via the RamDiscardManager.
261 return ram_block_uncoordinated_discard_disable(state);
264 static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container,
265 uint32_t ioas_id, Error **errp)
267 VFIOContainerBase *bcontainer = &container->bcontainer;
268 struct iommu_ioas_iova_ranges *info;
269 struct iommu_iova_range *iova_ranges;
270 int ret, sz, fd = container->be->fd;
272 info = g_malloc0(sizeof(*info));
273 info->size = sizeof(*info);
274 info->ioas_id = ioas_id;
276 ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info);
277 if (ret && errno != EMSGSIZE) {
278 goto error;
281 sz = info->num_iovas * sizeof(struct iommu_iova_range);
282 info = g_realloc(info, sizeof(*info) + sz);
283 info->allowed_iovas = (uintptr_t)(info + 1);
285 ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info);
286 if (ret) {
287 goto error;
290 iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas;
292 for (int i = 0; i < info->num_iovas; i++) {
293 Range *range = g_new(Range, 1);
295 range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last);
296 bcontainer->iova_ranges =
297 range_list_insert(bcontainer->iova_ranges, range);
299 bcontainer->pgsizes = info->out_iova_alignment;
301 g_free(info);
302 return 0;
304 error:
305 ret = -errno;
306 g_free(info);
307 error_setg_errno(errp, errno, "Cannot get IOVA ranges");
308 return ret;
311 static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
312 AddressSpace *as, Error **errp)
314 VFIOContainerBase *bcontainer;
315 VFIOIOMMUFDContainer *container;
316 VFIOAddressSpace *space;
317 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
318 int ret, devfd;
319 uint32_t ioas_id;
320 Error *err = NULL;
322 devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
323 if (devfd < 0) {
324 return devfd;
326 vbasedev->fd = devfd;
328 ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
329 if (ret) {
330 goto err_connect_bind;
333 space = vfio_get_address_space(as);
335 /* try to attach to an existing container in this space */
336 QLIST_FOREACH(bcontainer, &space->containers, next) {
337 container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
338 if (bcontainer->ops != &vfio_iommufd_ops ||
339 vbasedev->iommufd != container->be) {
340 continue;
342 if (iommufd_cdev_attach_container(vbasedev, container, &err)) {
343 const char *msg = error_get_pretty(err);
345 trace_iommufd_cdev_fail_attach_existing_container(msg);
346 error_free(err);
347 err = NULL;
348 } else {
349 ret = iommufd_cdev_ram_block_discard_disable(true);
350 if (ret) {
351 error_setg(errp,
352 "Cannot set discarding of RAM broken (%d)", ret);
353 goto err_discard_disable;
355 goto found_container;
359 /* Need to allocate a new dedicated container */
360 ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp);
361 if (ret < 0) {
362 goto err_alloc_ioas;
365 trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
367 container = g_malloc0(sizeof(*container));
368 container->be = vbasedev->iommufd;
369 container->ioas_id = ioas_id;
371 bcontainer = &container->bcontainer;
372 vfio_container_init(bcontainer, space, &vfio_iommufd_ops);
373 QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
375 ret = iommufd_cdev_attach_container(vbasedev, container, errp);
376 if (ret) {
377 goto err_attach_container;
380 ret = iommufd_cdev_ram_block_discard_disable(true);
381 if (ret) {
382 goto err_discard_disable;
385 ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err);
386 if (ret) {
387 error_append_hint(&err,
388 "Fallback to default 64bit IOVA range and 4K page size\n");
389 warn_report_err(err);
390 err = NULL;
391 bcontainer->pgsizes = qemu_real_host_page_size();
394 bcontainer->listener = vfio_memory_listener;
395 memory_listener_register(&bcontainer->listener, bcontainer->space->as);
397 if (bcontainer->error) {
398 ret = -1;
399 error_propagate_prepend(errp, bcontainer->error,
400 "memory listener initialization failed: ");
401 goto err_listener_register;
404 bcontainer->initialized = true;
406 found_container:
407 ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info);
408 if (ret) {
409 error_setg_errno(errp, errno, "error getting device info");
410 goto err_listener_register;
414 * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level
415 * for discarding incompatibility check as well?
417 if (vbasedev->ram_block_discard_allowed) {
418 iommufd_cdev_ram_block_discard_disable(false);
421 vbasedev->group = 0;
422 vbasedev->num_irqs = dev_info.num_irqs;
423 vbasedev->num_regions = dev_info.num_regions;
424 vbasedev->flags = dev_info.flags;
425 vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
426 vbasedev->bcontainer = bcontainer;
427 QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
428 QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
430 trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
431 vbasedev->num_regions, vbasedev->flags);
432 return 0;
434 err_listener_register:
435 iommufd_cdev_ram_block_discard_disable(false);
436 err_discard_disable:
437 iommufd_cdev_detach_container(vbasedev, container);
438 err_attach_container:
439 iommufd_cdev_container_destroy(container);
440 err_alloc_ioas:
441 vfio_put_address_space(space);
442 iommufd_cdev_unbind_and_disconnect(vbasedev);
443 err_connect_bind:
444 close(vbasedev->fd);
445 return ret;
448 static void iommufd_cdev_detach(VFIODevice *vbasedev)
450 VFIOContainerBase *bcontainer = vbasedev->bcontainer;
451 VFIOAddressSpace *space = bcontainer->space;
452 VFIOIOMMUFDContainer *container = container_of(bcontainer,
453 VFIOIOMMUFDContainer,
454 bcontainer);
455 QLIST_REMOVE(vbasedev, global_next);
456 QLIST_REMOVE(vbasedev, container_next);
457 vbasedev->bcontainer = NULL;
459 if (!vbasedev->ram_block_discard_allowed) {
460 iommufd_cdev_ram_block_discard_disable(false);
463 iommufd_cdev_detach_container(vbasedev, container);
464 iommufd_cdev_container_destroy(container);
465 vfio_put_address_space(space);
467 iommufd_cdev_unbind_and_disconnect(vbasedev);
468 close(vbasedev->fd);
471 const VFIOIOMMUOps vfio_iommufd_ops = {
472 .dma_map = iommufd_cdev_map,
473 .dma_unmap = iommufd_cdev_unmap,
474 .attach_device = iommufd_cdev_attach,
475 .detach_device = iommufd_cdev_detach,