scsi/scsi_bus: switch search direction in scsi_device_find
[qemu/ar7.git] / util / vfio-helpers.c
blobc469beb0616cee3d4e096ef9306f38750038b985
1 /*
2 * VFIO utility
4 * Copyright 2016 - 2018 Red Hat, Inc.
6 * Authors:
7 * Fam Zheng <famz@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include "qapi/error.h"
17 #include "exec/ramlist.h"
18 #include "exec/cpu-common.h"
19 #include "trace.h"
20 #include "qemu/error-report.h"
21 #include "standard-headers/linux/pci_regs.h"
22 #include "qemu/event_notifier.h"
23 #include "qemu/vfio-helpers.h"
24 #include "qemu/lockable.h"
25 #include "trace.h"
27 #define QEMU_VFIO_DEBUG 0
29 #define QEMU_VFIO_IOVA_MIN 0x10000ULL
30 /* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
31 * we can use a runtime limit; alternatively it's also possible to do platform
32 * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
33 **/
34 #define QEMU_VFIO_IOVA_MAX (1ULL << 39)
36 typedef struct {
37 /* Page aligned addr. */
38 void *host;
39 size_t size;
40 uint64_t iova;
41 } IOVAMapping;
43 struct IOVARange {
44 uint64_t start;
45 uint64_t end;
48 struct QEMUVFIOState {
49 QemuMutex lock;
51 /* These fields are protected by BQL */
52 int container;
53 int group;
54 int device;
55 RAMBlockNotifier ram_notifier;
56 struct vfio_region_info config_region_info, bar_region_info[6];
57 struct IOVARange *usable_iova_ranges;
58 uint8_t nb_iova_ranges;
60 /* These fields are protected by @lock */
61 /* VFIO's IO virtual address space is managed by splitting into a few
62 * sections:
64 * --------------- <= 0
65 * |xxxxxxxxxxxxx|
66 * |-------------| <= QEMU_VFIO_IOVA_MIN
67 * | |
68 * | Fixed |
69 * | |
70 * |-------------| <= low_water_mark
71 * | |
72 * | Free |
73 * | |
74 * |-------------| <= high_water_mark
75 * | |
76 * | Temp |
77 * | |
78 * |-------------| <= QEMU_VFIO_IOVA_MAX
79 * |xxxxxxxxxxxxx|
80 * |xxxxxxxxxxxxx|
81 * ---------------
83 * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid;
85 * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of
86 * [QEMU_VFIO_IOVA_MIN, low_water_mark). Once allocated they will not be
87 * reclaimed - low_water_mark never shrinks;
89 * - IOVAs in range [low_water_mark, high_water_mark) are free;
91 * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile
92 * mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area
93 * is recycled. The caller should make sure I/O's depending on these
94 * mappings are completed before calling.
95 **/
96 uint64_t low_water_mark;
97 uint64_t high_water_mark;
98 IOVAMapping *mappings;
99 int nr_mappings;
103 * Find group file by PCI device address as specified @device, and return the
104 * path. The returned string is owned by caller and should be g_free'ed later.
106 static char *sysfs_find_group_file(const char *device, Error **errp)
108 char *sysfs_link;
109 char *sysfs_group;
110 char *p;
111 char *path = NULL;
113 sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device);
114 sysfs_group = g_malloc0(PATH_MAX);
115 if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) {
116 error_setg_errno(errp, errno, "Failed to find iommu group sysfs path");
117 goto out;
119 p = strrchr(sysfs_group, '/');
120 if (!p) {
121 error_setg(errp, "Failed to find iommu group number");
122 goto out;
125 path = g_strdup_printf("/dev/vfio/%s", p + 1);
126 out:
127 g_free(sysfs_link);
128 g_free(sysfs_group);
129 return path;
132 static inline void assert_bar_index_valid(QEMUVFIOState *s, int index)
134 assert(index >= 0 && index < ARRAY_SIZE(s->bar_region_info));
137 static int qemu_vfio_pci_init_bar(QEMUVFIOState *s, int index, Error **errp)
139 assert_bar_index_valid(s, index);
140 s->bar_region_info[index] = (struct vfio_region_info) {
141 .index = VFIO_PCI_BAR0_REGION_INDEX + index,
142 .argsz = sizeof(struct vfio_region_info),
144 if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->bar_region_info[index])) {
145 error_setg_errno(errp, errno, "Failed to get BAR region info");
146 return -errno;
149 return 0;
153 * Map a PCI bar area.
155 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
156 uint64_t offset, uint64_t size, int prot,
157 Error **errp)
159 void *p;
160 assert_bar_index_valid(s, index);
161 p = mmap(NULL, MIN(size, s->bar_region_info[index].size - offset),
162 prot, MAP_SHARED,
163 s->device, s->bar_region_info[index].offset + offset);
164 if (p == MAP_FAILED) {
165 error_setg_errno(errp, errno, "Failed to map BAR region");
166 p = NULL;
168 return p;
172 * Unmap a PCI bar area.
174 void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
175 uint64_t offset, uint64_t size)
177 if (bar) {
178 munmap(bar, MIN(size, s->bar_region_info[index].size - offset));
183 * Initialize device IRQ with @irq_type and register an event notifier.
185 int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
186 int irq_type, Error **errp)
188 int r;
189 struct vfio_irq_set *irq_set;
190 size_t irq_set_size;
191 struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
193 irq_info.index = irq_type;
194 if (ioctl(s->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info)) {
195 error_setg_errno(errp, errno, "Failed to get device interrupt info");
196 return -errno;
198 if (!(irq_info.flags & VFIO_IRQ_INFO_EVENTFD)) {
199 error_setg(errp, "Device interrupt doesn't support eventfd");
200 return -EINVAL;
203 irq_set_size = sizeof(*irq_set) + sizeof(int);
204 irq_set = g_malloc0(irq_set_size);
206 /* Get to a known IRQ state */
207 *irq_set = (struct vfio_irq_set) {
208 .argsz = irq_set_size,
209 .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
210 .index = irq_info.index,
211 .start = 0,
212 .count = 1,
215 *(int *)&irq_set->data = event_notifier_get_fd(e);
216 r = ioctl(s->device, VFIO_DEVICE_SET_IRQS, irq_set);
217 g_free(irq_set);
218 if (r) {
219 error_setg_errno(errp, errno, "Failed to setup device interrupt");
220 return -errno;
222 return 0;
225 static int qemu_vfio_pci_read_config(QEMUVFIOState *s, void *buf,
226 int size, int ofs)
228 int ret;
230 do {
231 ret = pread(s->device, buf, size, s->config_region_info.offset + ofs);
232 } while (ret == -1 && errno == EINTR);
233 return ret == size ? 0 : -errno;
236 static int qemu_vfio_pci_write_config(QEMUVFIOState *s, void *buf, int size, int ofs)
238 int ret;
240 do {
241 ret = pwrite(s->device, buf, size, s->config_region_info.offset + ofs);
242 } while (ret == -1 && errno == EINTR);
243 return ret == size ? 0 : -errno;
246 static void collect_usable_iova_ranges(QEMUVFIOState *s, void *buf)
248 struct vfio_iommu_type1_info *info = (struct vfio_iommu_type1_info *)buf;
249 struct vfio_info_cap_header *cap = (void *)buf + info->cap_offset;
250 struct vfio_iommu_type1_info_cap_iova_range *cap_iova_range;
251 int i;
253 while (cap->id != VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE) {
254 if (!cap->next) {
255 return;
257 cap = (struct vfio_info_cap_header *)(buf + cap->next);
260 cap_iova_range = (struct vfio_iommu_type1_info_cap_iova_range *)cap;
262 s->nb_iova_ranges = cap_iova_range->nr_iovas;
263 if (s->nb_iova_ranges > 1) {
264 s->usable_iova_ranges =
265 g_realloc(s->usable_iova_ranges,
266 s->nb_iova_ranges * sizeof(struct IOVARange));
269 for (i = 0; i < s->nb_iova_ranges; i++) {
270 s->usable_iova_ranges[i].start = cap_iova_range->iova_ranges[i].start;
271 s->usable_iova_ranges[i].end = cap_iova_range->iova_ranges[i].end;
275 static int qemu_vfio_init_pci(QEMUVFIOState *s, const char *device,
276 Error **errp)
278 int ret;
279 int i;
280 uint16_t pci_cmd;
281 struct vfio_group_status group_status = { .argsz = sizeof(group_status) };
282 struct vfio_iommu_type1_info *iommu_info = NULL;
283 size_t iommu_info_size = sizeof(*iommu_info);
284 struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
285 char *group_file = NULL;
287 s->usable_iova_ranges = NULL;
289 /* Create a new container */
290 s->container = open("/dev/vfio/vfio", O_RDWR);
292 if (s->container == -1) {
293 error_setg_errno(errp, errno, "Failed to open /dev/vfio/vfio");
294 return -errno;
296 if (ioctl(s->container, VFIO_GET_API_VERSION) != VFIO_API_VERSION) {
297 error_setg(errp, "Invalid VFIO version");
298 ret = -EINVAL;
299 goto fail_container;
302 if (!ioctl(s->container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
303 error_setg_errno(errp, errno, "VFIO IOMMU check failed");
304 ret = -EINVAL;
305 goto fail_container;
308 /* Open the group */
309 group_file = sysfs_find_group_file(device, errp);
310 if (!group_file) {
311 ret = -EINVAL;
312 goto fail_container;
315 s->group = open(group_file, O_RDWR);
316 if (s->group == -1) {
317 error_setg_errno(errp, errno, "Failed to open VFIO group file: %s",
318 group_file);
319 g_free(group_file);
320 ret = -errno;
321 goto fail_container;
323 g_free(group_file);
325 /* Test the group is viable and available */
326 if (ioctl(s->group, VFIO_GROUP_GET_STATUS, &group_status)) {
327 error_setg_errno(errp, errno, "Failed to get VFIO group status");
328 ret = -errno;
329 goto fail;
332 if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
333 error_setg(errp, "VFIO group is not viable");
334 ret = -EINVAL;
335 goto fail;
338 /* Add the group to the container */
339 if (ioctl(s->group, VFIO_GROUP_SET_CONTAINER, &s->container)) {
340 error_setg_errno(errp, errno, "Failed to add group to VFIO container");
341 ret = -errno;
342 goto fail;
345 /* Enable the IOMMU model we want */
346 if (ioctl(s->container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)) {
347 error_setg_errno(errp, errno, "Failed to set VFIO IOMMU type");
348 ret = -errno;
349 goto fail;
352 iommu_info = g_malloc0(iommu_info_size);
353 iommu_info->argsz = iommu_info_size;
355 /* Get additional IOMMU info */
356 if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
357 error_setg_errno(errp, errno, "Failed to get IOMMU info");
358 ret = -errno;
359 goto fail;
363 * if the kernel does not report usable IOVA regions, choose
364 * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
366 s->nb_iova_ranges = 1;
367 s->usable_iova_ranges = g_new0(struct IOVARange, 1);
368 s->usable_iova_ranges[0].start = QEMU_VFIO_IOVA_MIN;
369 s->usable_iova_ranges[0].end = QEMU_VFIO_IOVA_MAX - 1;
371 if (iommu_info->argsz > iommu_info_size) {
372 iommu_info_size = iommu_info->argsz;
373 iommu_info = g_realloc(iommu_info, iommu_info_size);
374 if (ioctl(s->container, VFIO_IOMMU_GET_INFO, iommu_info)) {
375 ret = -errno;
376 goto fail;
378 collect_usable_iova_ranges(s, iommu_info);
381 s->device = ioctl(s->group, VFIO_GROUP_GET_DEVICE_FD, device);
383 if (s->device < 0) {
384 error_setg_errno(errp, errno, "Failed to get device fd");
385 ret = -errno;
386 goto fail;
389 /* Test and setup the device */
390 if (ioctl(s->device, VFIO_DEVICE_GET_INFO, &device_info)) {
391 error_setg_errno(errp, errno, "Failed to get device info");
392 ret = -errno;
393 goto fail;
396 if (device_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX) {
397 error_setg(errp, "Invalid device regions");
398 ret = -EINVAL;
399 goto fail;
402 s->config_region_info = (struct vfio_region_info) {
403 .index = VFIO_PCI_CONFIG_REGION_INDEX,
404 .argsz = sizeof(struct vfio_region_info),
406 if (ioctl(s->device, VFIO_DEVICE_GET_REGION_INFO, &s->config_region_info)) {
407 error_setg_errno(errp, errno, "Failed to get config region info");
408 ret = -errno;
409 goto fail;
412 for (i = 0; i < ARRAY_SIZE(s->bar_region_info); i++) {
413 ret = qemu_vfio_pci_init_bar(s, i, errp);
414 if (ret) {
415 goto fail;
419 /* Enable bus master */
420 ret = qemu_vfio_pci_read_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND);
421 if (ret) {
422 goto fail;
424 pci_cmd |= PCI_COMMAND_MASTER;
425 ret = qemu_vfio_pci_write_config(s, &pci_cmd, sizeof(pci_cmd), PCI_COMMAND);
426 if (ret) {
427 goto fail;
429 g_free(iommu_info);
430 return 0;
431 fail:
432 g_free(s->usable_iova_ranges);
433 s->usable_iova_ranges = NULL;
434 s->nb_iova_ranges = 0;
435 g_free(iommu_info);
436 close(s->group);
437 fail_container:
438 close(s->container);
439 return ret;
442 static void qemu_vfio_ram_block_added(RAMBlockNotifier *n,
443 void *host, size_t size)
445 QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
446 trace_qemu_vfio_ram_block_added(s, host, size);
447 qemu_vfio_dma_map(s, host, size, false, NULL);
450 static void qemu_vfio_ram_block_removed(RAMBlockNotifier *n,
451 void *host, size_t size)
453 QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
454 if (host) {
455 trace_qemu_vfio_ram_block_removed(s, host, size);
456 qemu_vfio_dma_unmap(s, host);
460 static int qemu_vfio_init_ramblock(RAMBlock *rb, void *opaque)
462 void *host_addr = qemu_ram_get_host_addr(rb);
463 ram_addr_t length = qemu_ram_get_used_length(rb);
464 int ret;
465 QEMUVFIOState *s = opaque;
467 if (!host_addr) {
468 return 0;
470 ret = qemu_vfio_dma_map(s, host_addr, length, false, NULL);
471 if (ret) {
472 fprintf(stderr, "qemu_vfio_init_ramblock: failed %p %" PRId64 "\n",
473 host_addr, (uint64_t)length);
475 return 0;
478 static void qemu_vfio_open_common(QEMUVFIOState *s)
480 qemu_mutex_init(&s->lock);
481 s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
482 s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
483 ram_block_notifier_add(&s->ram_notifier);
484 s->low_water_mark = QEMU_VFIO_IOVA_MIN;
485 s->high_water_mark = QEMU_VFIO_IOVA_MAX;
486 qemu_ram_foreach_block(qemu_vfio_init_ramblock, s);
490 * Open a PCI device, e.g. "0000:00:01.0".
492 QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp)
494 int r;
495 QEMUVFIOState *s = g_new0(QEMUVFIOState, 1);
497 r = qemu_vfio_init_pci(s, device, errp);
498 if (r) {
499 g_free(s);
500 return NULL;
502 qemu_vfio_open_common(s);
503 return s;
506 static void qemu_vfio_dump_mapping(IOVAMapping *m)
508 if (QEMU_VFIO_DEBUG) {
509 printf(" vfio mapping %p %" PRIx64 " to %" PRIx64 "\n", m->host,
510 (uint64_t)m->size, (uint64_t)m->iova);
514 static void qemu_vfio_dump_mappings(QEMUVFIOState *s)
516 int i;
518 if (QEMU_VFIO_DEBUG) {
519 printf("vfio mappings\n");
520 for (i = 0; i < s->nr_mappings; ++i) {
521 qemu_vfio_dump_mapping(&s->mappings[i]);
527 * Find the mapping entry that contains [host, host + size) and set @index to
528 * the position. If no entry contains it, @index is the position _after_ which
529 * to insert the new mapping. IOW, it is the index of the largest element that
530 * is smaller than @host, or -1 if no entry is.
532 static IOVAMapping *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host,
533 int *index)
535 IOVAMapping *p = s->mappings;
536 IOVAMapping *q = p ? p + s->nr_mappings - 1 : NULL;
537 IOVAMapping *mid;
538 trace_qemu_vfio_find_mapping(s, host);
539 if (!p) {
540 *index = -1;
541 return NULL;
543 while (true) {
544 mid = p + (q - p) / 2;
545 if (mid == p) {
546 break;
548 if (mid->host > host) {
549 q = mid;
550 } else if (mid->host < host) {
551 p = mid;
552 } else {
553 break;
556 if (mid->host > host) {
557 mid--;
558 } else if (mid < &s->mappings[s->nr_mappings - 1]
559 && (mid + 1)->host <= host) {
560 mid++;
562 *index = mid - &s->mappings[0];
563 if (mid >= &s->mappings[0] &&
564 mid->host <= host && mid->host + mid->size > host) {
565 assert(mid < &s->mappings[s->nr_mappings]);
566 return mid;
568 /* At this point *index + 1 is the right position to insert the new
569 * mapping.*/
570 return NULL;
574 * Allocate IOVA and create a new mapping record and insert it in @s.
576 static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
577 void *host, size_t size,
578 int index, uint64_t iova)
580 int shift;
581 IOVAMapping m = {.host = host, .size = size, .iova = iova};
582 IOVAMapping *insert;
584 assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
585 assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
586 assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
587 trace_qemu_vfio_new_mapping(s, host, size, index, iova);
589 assert(index >= 0);
590 s->nr_mappings++;
591 s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings);
592 insert = &s->mappings[index];
593 shift = s->nr_mappings - index - 1;
594 if (shift) {
595 memmove(insert + 1, insert, shift * sizeof(s->mappings[0]));
597 *insert = m;
598 return insert;
601 /* Do the DMA mapping with VFIO. */
602 static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
603 uint64_t iova)
605 struct vfio_iommu_type1_dma_map dma_map = {
606 .argsz = sizeof(dma_map),
607 .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
608 .iova = iova,
609 .vaddr = (uintptr_t)host,
610 .size = size,
612 trace_qemu_vfio_do_mapping(s, host, size, iova);
614 if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
615 error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
616 return -errno;
618 return 0;
622 * Undo the DMA mapping from @s with VFIO, and remove from mapping list.
624 static void qemu_vfio_undo_mapping(QEMUVFIOState *s, IOVAMapping *mapping,
625 Error **errp)
627 int index;
628 struct vfio_iommu_type1_dma_unmap unmap = {
629 .argsz = sizeof(unmap),
630 .flags = 0,
631 .iova = mapping->iova,
632 .size = mapping->size,
635 index = mapping - s->mappings;
636 assert(mapping->size > 0);
637 assert(QEMU_IS_ALIGNED(mapping->size, qemu_real_host_page_size));
638 assert(index >= 0 && index < s->nr_mappings);
639 if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
640 error_setg_errno(errp, errno, "VFIO_UNMAP_DMA failed");
642 memmove(mapping, &s->mappings[index + 1],
643 sizeof(s->mappings[0]) * (s->nr_mappings - index - 1));
644 s->nr_mappings--;
645 s->mappings = g_renew(IOVAMapping, s->mappings, s->nr_mappings);
648 /* Check if the mapping list is (ascending) ordered. */
649 static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
651 int i;
652 if (QEMU_VFIO_DEBUG) {
653 for (i = 0; i < s->nr_mappings - 1; ++i) {
654 if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
655 fprintf(stderr, "item %d not sorted!\n", i);
656 qemu_vfio_dump_mappings(s);
657 return false;
659 if (!(s->mappings[i].host + s->mappings[i].size <=
660 s->mappings[i + 1].host)) {
661 fprintf(stderr, "item %d overlap with next!\n", i);
662 qemu_vfio_dump_mappings(s);
663 return false;
667 return true;
670 static int
671 qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
673 int i;
675 for (i = 0; i < s->nb_iova_ranges; i++) {
676 if (s->usable_iova_ranges[i].end < s->low_water_mark) {
677 continue;
679 s->low_water_mark =
680 MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
682 if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
683 s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {
684 *iova = s->low_water_mark;
685 s->low_water_mark += size;
686 return 0;
689 return -ENOMEM;
692 static int
693 qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
695 int i;
697 for (i = s->nb_iova_ranges - 1; i >= 0; i--) {
698 if (s->usable_iova_ranges[i].start > s->high_water_mark) {
699 continue;
701 s->high_water_mark =
702 MIN(s->high_water_mark, s->usable_iova_ranges[i].end + 1);
704 if (s->high_water_mark - s->usable_iova_ranges[i].start + 1 >= size ||
705 s->high_water_mark - s->usable_iova_ranges[i].start + 1 == 0) {
706 *iova = s->high_water_mark - size;
707 s->high_water_mark = *iova;
708 return 0;
711 return -ENOMEM;
714 /* Map [host, host + size) area into a contiguous IOVA address space, and store
715 * the result in @iova if not NULL. The caller need to make sure the area is
716 * aligned to page size, and mustn't overlap with existing mapping areas (split
717 * mapping status within this area is not allowed).
719 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
720 bool temporary, uint64_t *iova)
722 int ret = 0;
723 int index;
724 IOVAMapping *mapping;
725 uint64_t iova0;
727 assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
728 assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
729 trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
730 qemu_mutex_lock(&s->lock);
731 mapping = qemu_vfio_find_mapping(s, host, &index);
732 if (mapping) {
733 iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
734 } else {
735 if (s->high_water_mark - s->low_water_mark + 1 < size) {
736 ret = -ENOMEM;
737 goto out;
739 if (!temporary) {
740 if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
741 ret = -ENOMEM;
742 goto out;
745 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
746 if (!mapping) {
747 ret = -ENOMEM;
748 goto out;
750 assert(qemu_vfio_verify_mappings(s));
751 ret = qemu_vfio_do_mapping(s, host, size, iova0);
752 if (ret) {
753 qemu_vfio_undo_mapping(s, mapping, NULL);
754 goto out;
756 qemu_vfio_dump_mappings(s);
757 } else {
758 if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
759 ret = -ENOMEM;
760 goto out;
762 ret = qemu_vfio_do_mapping(s, host, size, iova0);
763 if (ret) {
764 goto out;
768 if (iova) {
769 *iova = iova0;
771 out:
772 qemu_mutex_unlock(&s->lock);
773 return ret;
776 /* Reset the high watermark and free all "temporary" mappings. */
777 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
779 struct vfio_iommu_type1_dma_unmap unmap = {
780 .argsz = sizeof(unmap),
781 .flags = 0,
782 .iova = s->high_water_mark,
783 .size = QEMU_VFIO_IOVA_MAX - s->high_water_mark,
785 trace_qemu_vfio_dma_reset_temporary(s);
786 QEMU_LOCK_GUARD(&s->lock);
787 if (ioctl(s->container, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
788 error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
789 return -errno;
791 s->high_water_mark = QEMU_VFIO_IOVA_MAX;
792 return 0;
795 /* Unmapping the whole area that was previously mapped with
796 * qemu_vfio_dma_map(). */
797 void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
799 int index = 0;
800 IOVAMapping *m;
802 if (!host) {
803 return;
806 trace_qemu_vfio_dma_unmap(s, host);
807 qemu_mutex_lock(&s->lock);
808 m = qemu_vfio_find_mapping(s, host, &index);
809 if (!m) {
810 goto out;
812 qemu_vfio_undo_mapping(s, m, NULL);
813 out:
814 qemu_mutex_unlock(&s->lock);
817 static void qemu_vfio_reset(QEMUVFIOState *s)
819 ioctl(s->device, VFIO_DEVICE_RESET);
822 /* Close and free the VFIO resources. */
823 void qemu_vfio_close(QEMUVFIOState *s)
825 int i;
827 if (!s) {
828 return;
830 for (i = 0; i < s->nr_mappings; ++i) {
831 qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
833 ram_block_notifier_remove(&s->ram_notifier);
834 g_free(s->usable_iova_ranges);
835 s->nb_iova_ranges = 0;
836 qemu_vfio_reset(s);
837 close(s->device);
838 close(s->group);
839 close(s->container);