4 * Copyright 2016 - 2018 Red Hat, Inc.
7 * Fam Zheng <famz@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include "qapi/error.h"
17 #include "exec/ramlist.h"
18 #include "exec/cpu-common.h"
19 #include "exec/memory.h"
21 #include "qemu/error-report.h"
22 #include "standard-headers/linux/pci_regs.h"
23 #include "qemu/event_notifier.h"
24 #include "qemu/vfio-helpers.h"
25 #include "qemu/lockable.h"
28 #define QEMU_VFIO_DEBUG 0
30 #define QEMU_VFIO_IOVA_MIN 0x10000ULL
31 /* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
32 * we can use a runtime limit; alternatively it's also possible to do platform
33 * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
35 #define QEMU_VFIO_IOVA_MAX (1ULL << 39)
38 /* Page aligned addr. */
49 struct QEMUVFIOState
{
52 /* These fields are protected by BQL */
56 RAMBlockNotifier ram_notifier
;
57 struct vfio_region_info config_region_info
, bar_region_info
[6];
58 struct IOVARange
*usable_iova_ranges
;
59 uint8_t nb_iova_ranges
;
61 /* These fields are protected by @lock */
62 /* VFIO's IO virtual address space is managed by splitting into a few
65 * --------------- <= 0
67 * |-------------| <= QEMU_VFIO_IOVA_MIN
71 * |-------------| <= low_water_mark
75 * |-------------| <= high_water_mark
79 * |-------------| <= QEMU_VFIO_IOVA_MAX
84 * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid;
86 * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of
87 * [QEMU_VFIO_IOVA_MIN, low_water_mark). Once allocated they will not be
88 * reclaimed - low_water_mark never shrinks;
90 * - IOVAs in range [low_water_mark, high_water_mark) are free;
92 * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile
93 * mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area
94 * is recycled. The caller should make sure I/O's depending on these
95 * mappings are completed before calling.
97 uint64_t low_water_mark
;
98 uint64_t high_water_mark
;
99 IOVAMapping
*mappings
;
104 * Find group file by PCI device address as specified @device, and return the
105 * path. The returned string is owned by caller and should be g_free'ed later.
107 static char *sysfs_find_group_file(const char *device
, Error
**errp
)
114 sysfs_link
= g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device
);
115 sysfs_group
= g_malloc0(PATH_MAX
);
116 if (readlink(sysfs_link
, sysfs_group
, PATH_MAX
- 1) == -1) {
117 error_setg_errno(errp
, errno
, "Failed to find iommu group sysfs path");
120 p
= strrchr(sysfs_group
, '/');
122 error_setg(errp
, "Failed to find iommu group number");
126 path
= g_strdup_printf("/dev/vfio/%s", p
+ 1);
133 static inline void assert_bar_index_valid(QEMUVFIOState
*s
, int index
)
135 assert(index
>= 0 && index
< ARRAY_SIZE(s
->bar_region_info
));
138 static int qemu_vfio_pci_init_bar(QEMUVFIOState
*s
, int index
, Error
**errp
)
140 g_autofree
char *barname
= NULL
;
141 assert_bar_index_valid(s
, index
);
142 s
->bar_region_info
[index
] = (struct vfio_region_info
) {
143 .index
= VFIO_PCI_BAR0_REGION_INDEX
+ index
,
144 .argsz
= sizeof(struct vfio_region_info
),
146 if (ioctl(s
->device
, VFIO_DEVICE_GET_REGION_INFO
, &s
->bar_region_info
[index
])) {
147 error_setg_errno(errp
, errno
, "Failed to get BAR region info");
150 barname
= g_strdup_printf("bar[%d]", index
);
151 trace_qemu_vfio_region_info(barname
, s
->bar_region_info
[index
].offset
,
152 s
->bar_region_info
[index
].size
,
153 s
->bar_region_info
[index
].cap_offset
);
159 * Map a PCI bar area.
161 void *qemu_vfio_pci_map_bar(QEMUVFIOState
*s
, int index
,
162 uint64_t offset
, uint64_t size
, int prot
,
166 assert(QEMU_IS_ALIGNED(offset
, qemu_real_host_page_size
));
167 assert_bar_index_valid(s
, index
);
168 p
= mmap(NULL
, MIN(size
, s
->bar_region_info
[index
].size
- offset
),
170 s
->device
, s
->bar_region_info
[index
].offset
+ offset
);
171 trace_qemu_vfio_pci_map_bar(index
, s
->bar_region_info
[index
].offset
,
173 if (p
== MAP_FAILED
) {
174 error_setg_errno(errp
, errno
, "Failed to map BAR region");
181 * Unmap a PCI bar area.
183 void qemu_vfio_pci_unmap_bar(QEMUVFIOState
*s
, int index
, void *bar
,
184 uint64_t offset
, uint64_t size
)
187 munmap(bar
, MIN(size
, s
->bar_region_info
[index
].size
- offset
));
192 * Initialize device IRQ with @irq_type and register an event notifier.
194 int qemu_vfio_pci_init_irq(QEMUVFIOState
*s
, EventNotifier
*e
,
195 int irq_type
, Error
**errp
)
198 struct vfio_irq_set
*irq_set
;
200 struct vfio_irq_info irq_info
= { .argsz
= sizeof(irq_info
) };
202 irq_info
.index
= irq_type
;
203 if (ioctl(s
->device
, VFIO_DEVICE_GET_IRQ_INFO
, &irq_info
)) {
204 error_setg_errno(errp
, errno
, "Failed to get device interrupt info");
207 if (!(irq_info
.flags
& VFIO_IRQ_INFO_EVENTFD
)) {
208 error_setg(errp
, "Device interrupt doesn't support eventfd");
212 irq_set_size
= sizeof(*irq_set
) + sizeof(int);
213 irq_set
= g_malloc0(irq_set_size
);
215 /* Get to a known IRQ state */
216 *irq_set
= (struct vfio_irq_set
) {
217 .argsz
= irq_set_size
,
218 .flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
,
219 .index
= irq_info
.index
,
224 *(int *)&irq_set
->data
= event_notifier_get_fd(e
);
225 r
= ioctl(s
->device
, VFIO_DEVICE_SET_IRQS
, irq_set
);
228 error_setg_errno(errp
, errno
, "Failed to setup device interrupt");
234 static int qemu_vfio_pci_read_config(QEMUVFIOState
*s
, void *buf
,
239 trace_qemu_vfio_pci_read_config(buf
, ofs
, size
,
240 s
->config_region_info
.offset
,
241 s
->config_region_info
.size
);
242 assert(QEMU_IS_ALIGNED(s
->config_region_info
.offset
+ ofs
, size
));
244 ret
= pread(s
->device
, buf
, size
, s
->config_region_info
.offset
+ ofs
);
245 } while (ret
== -1 && errno
== EINTR
);
246 return ret
== size
? 0 : -errno
;
249 static int qemu_vfio_pci_write_config(QEMUVFIOState
*s
, void *buf
, int size
, int ofs
)
253 trace_qemu_vfio_pci_write_config(buf
, ofs
, size
,
254 s
->config_region_info
.offset
,
255 s
->config_region_info
.size
);
256 assert(QEMU_IS_ALIGNED(s
->config_region_info
.offset
+ ofs
, size
));
258 ret
= pwrite(s
->device
, buf
, size
, s
->config_region_info
.offset
+ ofs
);
259 } while (ret
== -1 && errno
== EINTR
);
260 return ret
== size
? 0 : -errno
;
263 static void collect_usable_iova_ranges(QEMUVFIOState
*s
, void *buf
)
265 struct vfio_iommu_type1_info
*info
= (struct vfio_iommu_type1_info
*)buf
;
266 struct vfio_info_cap_header
*cap
= (void *)buf
+ info
->cap_offset
;
267 struct vfio_iommu_type1_info_cap_iova_range
*cap_iova_range
;
270 while (cap
->id
!= VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
) {
274 cap
= (struct vfio_info_cap_header
*)(buf
+ cap
->next
);
277 cap_iova_range
= (struct vfio_iommu_type1_info_cap_iova_range
*)cap
;
279 s
->nb_iova_ranges
= cap_iova_range
->nr_iovas
;
280 if (s
->nb_iova_ranges
> 1) {
281 s
->usable_iova_ranges
=
282 g_realloc(s
->usable_iova_ranges
,
283 s
->nb_iova_ranges
* sizeof(struct IOVARange
));
286 for (i
= 0; i
< s
->nb_iova_ranges
; i
++) {
287 s
->usable_iova_ranges
[i
].start
= cap_iova_range
->iova_ranges
[i
].start
;
288 s
->usable_iova_ranges
[i
].end
= cap_iova_range
->iova_ranges
[i
].end
;
292 static int qemu_vfio_init_pci(QEMUVFIOState
*s
, const char *device
,
298 struct vfio_group_status group_status
= { .argsz
= sizeof(group_status
) };
299 struct vfio_iommu_type1_info
*iommu_info
= NULL
;
300 size_t iommu_info_size
= sizeof(*iommu_info
);
301 struct vfio_device_info device_info
= { .argsz
= sizeof(device_info
) };
302 char *group_file
= NULL
;
304 s
->usable_iova_ranges
= NULL
;
306 /* Create a new container */
307 s
->container
= open("/dev/vfio/vfio", O_RDWR
);
309 if (s
->container
== -1) {
310 error_setg_errno(errp
, errno
, "Failed to open /dev/vfio/vfio");
313 if (ioctl(s
->container
, VFIO_GET_API_VERSION
) != VFIO_API_VERSION
) {
314 error_setg(errp
, "Invalid VFIO version");
319 if (!ioctl(s
->container
, VFIO_CHECK_EXTENSION
, VFIO_TYPE1_IOMMU
)) {
320 error_setg_errno(errp
, errno
, "VFIO IOMMU Type1 is not supported");
326 group_file
= sysfs_find_group_file(device
, errp
);
332 s
->group
= open(group_file
, O_RDWR
);
333 if (s
->group
== -1) {
334 error_setg_errno(errp
, errno
, "Failed to open VFIO group file: %s",
342 /* Test the group is viable and available */
343 if (ioctl(s
->group
, VFIO_GROUP_GET_STATUS
, &group_status
)) {
344 error_setg_errno(errp
, errno
, "Failed to get VFIO group status");
349 if (!(group_status
.flags
& VFIO_GROUP_FLAGS_VIABLE
)) {
350 error_setg(errp
, "VFIO group is not viable");
355 /* Add the group to the container */
356 if (ioctl(s
->group
, VFIO_GROUP_SET_CONTAINER
, &s
->container
)) {
357 error_setg_errno(errp
, errno
, "Failed to add group to VFIO container");
362 /* Enable the IOMMU model we want */
363 if (ioctl(s
->container
, VFIO_SET_IOMMU
, VFIO_TYPE1_IOMMU
)) {
364 error_setg_errno(errp
, errno
, "Failed to set VFIO IOMMU type");
369 iommu_info
= g_malloc0(iommu_info_size
);
370 iommu_info
->argsz
= iommu_info_size
;
372 /* Get additional IOMMU info */
373 if (ioctl(s
->container
, VFIO_IOMMU_GET_INFO
, iommu_info
)) {
374 error_setg_errno(errp
, errno
, "Failed to get IOMMU info");
380 * if the kernel does not report usable IOVA regions, choose
381 * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
383 s
->nb_iova_ranges
= 1;
384 s
->usable_iova_ranges
= g_new0(struct IOVARange
, 1);
385 s
->usable_iova_ranges
[0].start
= QEMU_VFIO_IOVA_MIN
;
386 s
->usable_iova_ranges
[0].end
= QEMU_VFIO_IOVA_MAX
- 1;
388 if (iommu_info
->argsz
> iommu_info_size
) {
389 iommu_info_size
= iommu_info
->argsz
;
390 iommu_info
= g_realloc(iommu_info
, iommu_info_size
);
391 if (ioctl(s
->container
, VFIO_IOMMU_GET_INFO
, iommu_info
)) {
395 collect_usable_iova_ranges(s
, iommu_info
);
398 s
->device
= ioctl(s
->group
, VFIO_GROUP_GET_DEVICE_FD
, device
);
401 error_setg_errno(errp
, errno
, "Failed to get device fd");
406 /* Test and setup the device */
407 if (ioctl(s
->device
, VFIO_DEVICE_GET_INFO
, &device_info
)) {
408 error_setg_errno(errp
, errno
, "Failed to get device info");
413 if (device_info
.num_regions
< VFIO_PCI_CONFIG_REGION_INDEX
) {
414 error_setg(errp
, "Invalid device regions");
419 s
->config_region_info
= (struct vfio_region_info
) {
420 .index
= VFIO_PCI_CONFIG_REGION_INDEX
,
421 .argsz
= sizeof(struct vfio_region_info
),
423 if (ioctl(s
->device
, VFIO_DEVICE_GET_REGION_INFO
, &s
->config_region_info
)) {
424 error_setg_errno(errp
, errno
, "Failed to get config region info");
428 trace_qemu_vfio_region_info("config", s
->config_region_info
.offset
,
429 s
->config_region_info
.size
,
430 s
->config_region_info
.cap_offset
);
432 for (i
= 0; i
< ARRAY_SIZE(s
->bar_region_info
); i
++) {
433 ret
= qemu_vfio_pci_init_bar(s
, i
, errp
);
439 /* Enable bus master */
440 ret
= qemu_vfio_pci_read_config(s
, &pci_cmd
, sizeof(pci_cmd
), PCI_COMMAND
);
444 pci_cmd
|= PCI_COMMAND_MASTER
;
445 ret
= qemu_vfio_pci_write_config(s
, &pci_cmd
, sizeof(pci_cmd
), PCI_COMMAND
);
452 g_free(s
->usable_iova_ranges
);
453 s
->usable_iova_ranges
= NULL
;
454 s
->nb_iova_ranges
= 0;
462 static void qemu_vfio_ram_block_added(RAMBlockNotifier
*n
,
463 void *host
, size_t size
)
465 QEMUVFIOState
*s
= container_of(n
, QEMUVFIOState
, ram_notifier
);
466 trace_qemu_vfio_ram_block_added(s
, host
, size
);
467 qemu_vfio_dma_map(s
, host
, size
, false, NULL
);
470 static void qemu_vfio_ram_block_removed(RAMBlockNotifier
*n
,
471 void *host
, size_t size
)
473 QEMUVFIOState
*s
= container_of(n
, QEMUVFIOState
, ram_notifier
);
475 trace_qemu_vfio_ram_block_removed(s
, host
, size
);
476 qemu_vfio_dma_unmap(s
, host
);
480 static int qemu_vfio_init_ramblock(RAMBlock
*rb
, void *opaque
)
482 void *host_addr
= qemu_ram_get_host_addr(rb
);
483 ram_addr_t length
= qemu_ram_get_used_length(rb
);
485 QEMUVFIOState
*s
= opaque
;
490 ret
= qemu_vfio_dma_map(s
, host_addr
, length
, false, NULL
);
492 fprintf(stderr
, "qemu_vfio_init_ramblock: failed %p %" PRId64
"\n",
493 host_addr
, (uint64_t)length
);
498 static void qemu_vfio_open_common(QEMUVFIOState
*s
)
500 qemu_mutex_init(&s
->lock
);
501 s
->ram_notifier
.ram_block_added
= qemu_vfio_ram_block_added
;
502 s
->ram_notifier
.ram_block_removed
= qemu_vfio_ram_block_removed
;
503 ram_block_notifier_add(&s
->ram_notifier
);
504 s
->low_water_mark
= QEMU_VFIO_IOVA_MIN
;
505 s
->high_water_mark
= QEMU_VFIO_IOVA_MAX
;
506 qemu_ram_foreach_block(qemu_vfio_init_ramblock
, s
);
510 * Open a PCI device, e.g. "0000:00:01.0".
512 QEMUVFIOState
*qemu_vfio_open_pci(const char *device
, Error
**errp
)
515 QEMUVFIOState
*s
= g_new0(QEMUVFIOState
, 1);
518 * VFIO may pin all memory inside mappings, resulting it in pinning
519 * all memory inside RAM blocks unconditionally.
521 r
= ram_block_discard_disable(true);
523 error_setg_errno(errp
, -r
, "Cannot set discarding of RAM broken");
528 r
= qemu_vfio_init_pci(s
, device
, errp
);
530 ram_block_discard_disable(false);
534 qemu_vfio_open_common(s
);
538 static void qemu_vfio_dump_mappings(QEMUVFIOState
*s
)
540 for (int i
= 0; i
< s
->nr_mappings
; ++i
) {
541 trace_qemu_vfio_dump_mapping(s
->mappings
[i
].host
,
543 s
->mappings
[i
].size
);
548 * Find the mapping entry that contains [host, host + size) and set @index to
549 * the position. If no entry contains it, @index is the position _after_ which
550 * to insert the new mapping. IOW, it is the index of the largest element that
551 * is smaller than @host, or -1 if no entry is.
553 static IOVAMapping
*qemu_vfio_find_mapping(QEMUVFIOState
*s
, void *host
,
556 IOVAMapping
*p
= s
->mappings
;
557 IOVAMapping
*q
= p
? p
+ s
->nr_mappings
- 1 : NULL
;
559 trace_qemu_vfio_find_mapping(s
, host
);
565 mid
= p
+ (q
- p
) / 2;
569 if (mid
->host
> host
) {
571 } else if (mid
->host
< host
) {
577 if (mid
->host
> host
) {
579 } else if (mid
< &s
->mappings
[s
->nr_mappings
- 1]
580 && (mid
+ 1)->host
<= host
) {
583 *index
= mid
- &s
->mappings
[0];
584 if (mid
>= &s
->mappings
[0] &&
585 mid
->host
<= host
&& mid
->host
+ mid
->size
> host
) {
586 assert(mid
< &s
->mappings
[s
->nr_mappings
]);
589 /* At this point *index + 1 is the right position to insert the new
595 * Allocate IOVA and create a new mapping record and insert it in @s.
597 static IOVAMapping
*qemu_vfio_add_mapping(QEMUVFIOState
*s
,
598 void *host
, size_t size
,
599 int index
, uint64_t iova
)
602 IOVAMapping m
= {.host
= host
, .size
= size
, .iova
= iova
};
605 assert(QEMU_IS_ALIGNED(size
, qemu_real_host_page_size
));
606 assert(QEMU_IS_ALIGNED(s
->low_water_mark
, qemu_real_host_page_size
));
607 assert(QEMU_IS_ALIGNED(s
->high_water_mark
, qemu_real_host_page_size
));
608 trace_qemu_vfio_new_mapping(s
, host
, size
, index
, iova
);
612 s
->mappings
= g_renew(IOVAMapping
, s
->mappings
, s
->nr_mappings
);
613 insert
= &s
->mappings
[index
];
614 shift
= s
->nr_mappings
- index
- 1;
616 memmove(insert
+ 1, insert
, shift
* sizeof(s
->mappings
[0]));
622 /* Do the DMA mapping with VFIO. */
623 static int qemu_vfio_do_mapping(QEMUVFIOState
*s
, void *host
, size_t size
,
626 struct vfio_iommu_type1_dma_map dma_map
= {
627 .argsz
= sizeof(dma_map
),
628 .flags
= VFIO_DMA_MAP_FLAG_READ
| VFIO_DMA_MAP_FLAG_WRITE
,
630 .vaddr
= (uintptr_t)host
,
633 trace_qemu_vfio_do_mapping(s
, host
, iova
, size
);
635 if (ioctl(s
->container
, VFIO_IOMMU_MAP_DMA
, &dma_map
)) {
636 error_report("VFIO_MAP_DMA failed: %s", strerror(errno
));
643 * Undo the DMA mapping from @s with VFIO, and remove from mapping list.
645 static void qemu_vfio_undo_mapping(QEMUVFIOState
*s
, IOVAMapping
*mapping
,
649 struct vfio_iommu_type1_dma_unmap unmap
= {
650 .argsz
= sizeof(unmap
),
652 .iova
= mapping
->iova
,
653 .size
= mapping
->size
,
656 index
= mapping
- s
->mappings
;
657 assert(mapping
->size
> 0);
658 assert(QEMU_IS_ALIGNED(mapping
->size
, qemu_real_host_page_size
));
659 assert(index
>= 0 && index
< s
->nr_mappings
);
660 if (ioctl(s
->container
, VFIO_IOMMU_UNMAP_DMA
, &unmap
)) {
661 error_setg_errno(errp
, errno
, "VFIO_UNMAP_DMA failed");
663 memmove(mapping
, &s
->mappings
[index
+ 1],
664 sizeof(s
->mappings
[0]) * (s
->nr_mappings
- index
- 1));
666 s
->mappings
= g_renew(IOVAMapping
, s
->mappings
, s
->nr_mappings
);
669 /* Check if the mapping list is (ascending) ordered. */
670 static bool qemu_vfio_verify_mappings(QEMUVFIOState
*s
)
673 if (QEMU_VFIO_DEBUG
) {
674 for (i
= 0; i
< s
->nr_mappings
- 1; ++i
) {
675 if (!(s
->mappings
[i
].host
< s
->mappings
[i
+ 1].host
)) {
676 fprintf(stderr
, "item %d not sorted!\n", i
);
677 qemu_vfio_dump_mappings(s
);
680 if (!(s
->mappings
[i
].host
+ s
->mappings
[i
].size
<=
681 s
->mappings
[i
+ 1].host
)) {
682 fprintf(stderr
, "item %d overlap with next!\n", i
);
683 qemu_vfio_dump_mappings(s
);
692 qemu_vfio_find_fixed_iova(QEMUVFIOState
*s
, size_t size
, uint64_t *iova
)
696 for (i
= 0; i
< s
->nb_iova_ranges
; i
++) {
697 if (s
->usable_iova_ranges
[i
].end
< s
->low_water_mark
) {
701 MAX(s
->low_water_mark
, s
->usable_iova_ranges
[i
].start
);
703 if (s
->usable_iova_ranges
[i
].end
- s
->low_water_mark
+ 1 >= size
||
704 s
->usable_iova_ranges
[i
].end
- s
->low_water_mark
+ 1 == 0) {
705 *iova
= s
->low_water_mark
;
706 s
->low_water_mark
+= size
;
714 qemu_vfio_find_temp_iova(QEMUVFIOState
*s
, size_t size
, uint64_t *iova
)
718 for (i
= s
->nb_iova_ranges
- 1; i
>= 0; i
--) {
719 if (s
->usable_iova_ranges
[i
].start
> s
->high_water_mark
) {
723 MIN(s
->high_water_mark
, s
->usable_iova_ranges
[i
].end
+ 1);
725 if (s
->high_water_mark
- s
->usable_iova_ranges
[i
].start
+ 1 >= size
||
726 s
->high_water_mark
- s
->usable_iova_ranges
[i
].start
+ 1 == 0) {
727 *iova
= s
->high_water_mark
- size
;
728 s
->high_water_mark
= *iova
;
735 /* Map [host, host + size) area into a contiguous IOVA address space, and store
736 * the result in @iova if not NULL. The caller need to make sure the area is
737 * aligned to page size, and mustn't overlap with existing mapping areas (split
738 * mapping status within this area is not allowed).
740 int qemu_vfio_dma_map(QEMUVFIOState
*s
, void *host
, size_t size
,
741 bool temporary
, uint64_t *iova
)
745 IOVAMapping
*mapping
;
748 assert(QEMU_PTR_IS_ALIGNED(host
, qemu_real_host_page_size
));
749 assert(QEMU_IS_ALIGNED(size
, qemu_real_host_page_size
));
750 trace_qemu_vfio_dma_map(s
, host
, size
, temporary
, iova
);
751 qemu_mutex_lock(&s
->lock
);
752 mapping
= qemu_vfio_find_mapping(s
, host
, &index
);
754 iova0
= mapping
->iova
+ ((uint8_t *)host
- (uint8_t *)mapping
->host
);
756 if (s
->high_water_mark
- s
->low_water_mark
+ 1 < size
) {
761 if (qemu_vfio_find_fixed_iova(s
, size
, &iova0
)) {
766 mapping
= qemu_vfio_add_mapping(s
, host
, size
, index
+ 1, iova0
);
771 assert(qemu_vfio_verify_mappings(s
));
772 ret
= qemu_vfio_do_mapping(s
, host
, size
, iova0
);
774 qemu_vfio_undo_mapping(s
, mapping
, NULL
);
777 qemu_vfio_dump_mappings(s
);
779 if (qemu_vfio_find_temp_iova(s
, size
, &iova0
)) {
783 ret
= qemu_vfio_do_mapping(s
, host
, size
, iova0
);
789 trace_qemu_vfio_dma_mapped(s
, host
, iova0
, size
);
794 qemu_mutex_unlock(&s
->lock
);
798 /* Reset the high watermark and free all "temporary" mappings. */
799 int qemu_vfio_dma_reset_temporary(QEMUVFIOState
*s
)
801 struct vfio_iommu_type1_dma_unmap unmap
= {
802 .argsz
= sizeof(unmap
),
804 .iova
= s
->high_water_mark
,
805 .size
= QEMU_VFIO_IOVA_MAX
- s
->high_water_mark
,
807 trace_qemu_vfio_dma_reset_temporary(s
);
808 QEMU_LOCK_GUARD(&s
->lock
);
809 if (ioctl(s
->container
, VFIO_IOMMU_UNMAP_DMA
, &unmap
)) {
810 error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno
));
813 s
->high_water_mark
= QEMU_VFIO_IOVA_MAX
;
817 /* Unmapping the whole area that was previously mapped with
818 * qemu_vfio_dma_map(). */
819 void qemu_vfio_dma_unmap(QEMUVFIOState
*s
, void *host
)
828 trace_qemu_vfio_dma_unmap(s
, host
);
829 qemu_mutex_lock(&s
->lock
);
830 m
= qemu_vfio_find_mapping(s
, host
, &index
);
834 qemu_vfio_undo_mapping(s
, m
, NULL
);
836 qemu_mutex_unlock(&s
->lock
);
839 static void qemu_vfio_reset(QEMUVFIOState
*s
)
841 ioctl(s
->device
, VFIO_DEVICE_RESET
);
844 /* Close and free the VFIO resources. */
845 void qemu_vfio_close(QEMUVFIOState
*s
)
852 for (i
= 0; i
< s
->nr_mappings
; ++i
) {
853 qemu_vfio_undo_mapping(s
, &s
->mappings
[i
], NULL
);
855 ram_block_notifier_remove(&s
->ram_notifier
);
856 g_free(s
->usable_iova_ranges
);
857 s
->nb_iova_ranges
= 0;
862 ram_block_discard_disable(false);