4 * Copyright 2016 - 2018 Red Hat, Inc.
7 * Fam Zheng <famz@redhat.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
13 #include "qemu/osdep.h"
14 #include <sys/ioctl.h>
15 #include <linux/vfio.h>
16 #include "qapi/error.h"
17 #include "exec/ramlist.h"
18 #include "exec/cpu-common.h"
19 #include "exec/memory.h"
21 #include "qemu/error-report.h"
22 #include "standard-headers/linux/pci_regs.h"
23 #include "qemu/event_notifier.h"
24 #include "qemu/vfio-helpers.h"
25 #include "qemu/lockable.h"
28 #define QEMU_VFIO_DEBUG 0
30 #define QEMU_VFIO_IOVA_MIN 0x10000ULL
31 /* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
32 * we can use a runtime limit; alternatively it's also possible to do platform
33 * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
35 #define QEMU_VFIO_IOVA_MAX (1ULL << 39)
38 /* Page aligned addr. */
49 struct QEMUVFIOState
{
52 /* These fields are protected by BQL */
56 RAMBlockNotifier ram_notifier
;
57 struct vfio_region_info config_region_info
, bar_region_info
[6];
58 struct IOVARange
*usable_iova_ranges
;
59 uint8_t nb_iova_ranges
;
61 /* These fields are protected by @lock */
62 /* VFIO's IO virtual address space is managed by splitting into a few
65 * --------------- <= 0
67 * |-------------| <= QEMU_VFIO_IOVA_MIN
71 * |-------------| <= low_water_mark
75 * |-------------| <= high_water_mark
79 * |-------------| <= QEMU_VFIO_IOVA_MAX
84 * - Addresses lower than QEMU_VFIO_IOVA_MIN are reserved as invalid;
86 * - Fixed mappings of HVAs are assigned "low" IOVAs in the range of
87 * [QEMU_VFIO_IOVA_MIN, low_water_mark). Once allocated they will not be
88 * reclaimed - low_water_mark never shrinks;
90 * - IOVAs in range [low_water_mark, high_water_mark) are free;
92 * - IOVAs in range [high_water_mark, QEMU_VFIO_IOVA_MAX) are volatile
93 * mappings. At each qemu_vfio_dma_reset_temporary() call, the whole area
94 * is recycled. The caller should make sure I/O's depending on these
95 * mappings are completed before calling.
97 uint64_t low_water_mark
;
98 uint64_t high_water_mark
;
99 IOVAMapping
*mappings
;
104 * Find group file by PCI device address as specified @device, and return the
105 * path. The returned string is owned by caller and should be g_free'ed later.
107 static char *sysfs_find_group_file(const char *device
, Error
**errp
)
109 g_autoptr(GError
) gerr
= NULL
;
115 sysfs_link
= g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device
);
116 sysfs_group
= g_file_read_link(sysfs_link
, &gerr
);
118 error_setg(errp
, "Failed to find iommu group sysfs path: %s",
122 p
= strrchr(sysfs_group
, '/');
124 error_setg(errp
, "Failed to find iommu group number");
128 path
= g_strdup_printf("/dev/vfio/%s", p
+ 1);
135 static inline void assert_bar_index_valid(QEMUVFIOState
*s
, int index
)
137 assert(index
>= 0 && index
< ARRAY_SIZE(s
->bar_region_info
));
140 static int qemu_vfio_pci_init_bar(QEMUVFIOState
*s
, int index
, Error
**errp
)
142 g_autofree
char *barname
= NULL
;
143 assert_bar_index_valid(s
, index
);
144 s
->bar_region_info
[index
] = (struct vfio_region_info
) {
145 .index
= VFIO_PCI_BAR0_REGION_INDEX
+ index
,
146 .argsz
= sizeof(struct vfio_region_info
),
148 if (ioctl(s
->device
, VFIO_DEVICE_GET_REGION_INFO
, &s
->bar_region_info
[index
])) {
149 error_setg_errno(errp
, errno
, "Failed to get BAR region info");
152 barname
= g_strdup_printf("bar[%d]", index
);
153 trace_qemu_vfio_region_info(barname
, s
->bar_region_info
[index
].offset
,
154 s
->bar_region_info
[index
].size
,
155 s
->bar_region_info
[index
].cap_offset
);
161 * Map a PCI bar area.
163 void *qemu_vfio_pci_map_bar(QEMUVFIOState
*s
, int index
,
164 uint64_t offset
, uint64_t size
, int prot
,
168 assert(QEMU_IS_ALIGNED(offset
, qemu_real_host_page_size()));
169 assert_bar_index_valid(s
, index
);
170 p
= mmap(NULL
, MIN(size
, s
->bar_region_info
[index
].size
- offset
),
172 s
->device
, s
->bar_region_info
[index
].offset
+ offset
);
173 trace_qemu_vfio_pci_map_bar(index
, s
->bar_region_info
[index
].offset
,
175 if (p
== MAP_FAILED
) {
176 error_setg_errno(errp
, errno
, "Failed to map BAR region");
183 * Unmap a PCI bar area.
185 void qemu_vfio_pci_unmap_bar(QEMUVFIOState
*s
, int index
, void *bar
,
186 uint64_t offset
, uint64_t size
)
189 munmap(bar
, MIN(size
, s
->bar_region_info
[index
].size
- offset
));
194 * Initialize device IRQ with @irq_type and register an event notifier.
196 int qemu_vfio_pci_init_irq(QEMUVFIOState
*s
, EventNotifier
*e
,
197 int irq_type
, Error
**errp
)
200 struct vfio_irq_set
*irq_set
;
202 struct vfio_irq_info irq_info
= { .argsz
= sizeof(irq_info
) };
204 irq_info
.index
= irq_type
;
205 if (ioctl(s
->device
, VFIO_DEVICE_GET_IRQ_INFO
, &irq_info
)) {
206 error_setg_errno(errp
, errno
, "Failed to get device interrupt info");
209 if (!(irq_info
.flags
& VFIO_IRQ_INFO_EVENTFD
)) {
210 error_setg(errp
, "Device interrupt doesn't support eventfd");
214 irq_set_size
= sizeof(*irq_set
) + sizeof(int);
215 irq_set
= g_malloc0(irq_set_size
);
217 /* Get to a known IRQ state */
218 *irq_set
= (struct vfio_irq_set
) {
219 .argsz
= irq_set_size
,
220 .flags
= VFIO_IRQ_SET_DATA_EVENTFD
| VFIO_IRQ_SET_ACTION_TRIGGER
,
221 .index
= irq_info
.index
,
226 *(int *)&irq_set
->data
= event_notifier_get_fd(e
);
227 r
= ioctl(s
->device
, VFIO_DEVICE_SET_IRQS
, irq_set
);
230 error_setg_errno(errp
, errno
, "Failed to setup device interrupt");
236 static int qemu_vfio_pci_read_config(QEMUVFIOState
*s
, void *buf
,
241 trace_qemu_vfio_pci_read_config(buf
, ofs
, size
,
242 s
->config_region_info
.offset
,
243 s
->config_region_info
.size
);
244 assert(QEMU_IS_ALIGNED(s
->config_region_info
.offset
+ ofs
, size
));
245 ret
= RETRY_ON_EINTR(
246 pread(s
->device
, buf
, size
, s
->config_region_info
.offset
+ ofs
)
248 return ret
== size
? 0 : -errno
;
251 static int qemu_vfio_pci_write_config(QEMUVFIOState
*s
, void *buf
, int size
, int ofs
)
255 trace_qemu_vfio_pci_write_config(buf
, ofs
, size
,
256 s
->config_region_info
.offset
,
257 s
->config_region_info
.size
);
258 assert(QEMU_IS_ALIGNED(s
->config_region_info
.offset
+ ofs
, size
));
259 ret
= RETRY_ON_EINTR(
260 pwrite(s
->device
, buf
, size
, s
->config_region_info
.offset
+ ofs
)
262 return ret
== size
? 0 : -errno
;
265 static void collect_usable_iova_ranges(QEMUVFIOState
*s
, void *buf
)
267 struct vfio_iommu_type1_info
*info
= (struct vfio_iommu_type1_info
*)buf
;
268 struct vfio_info_cap_header
*cap
= (void *)buf
+ info
->cap_offset
;
269 struct vfio_iommu_type1_info_cap_iova_range
*cap_iova_range
;
272 while (cap
->id
!= VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE
) {
276 cap
= buf
+ cap
->next
;
279 cap_iova_range
= (struct vfio_iommu_type1_info_cap_iova_range
*)cap
;
281 s
->nb_iova_ranges
= cap_iova_range
->nr_iovas
;
282 if (s
->nb_iova_ranges
> 1) {
283 s
->usable_iova_ranges
=
284 g_renew(struct IOVARange
, s
->usable_iova_ranges
,
288 for (i
= 0; i
< s
->nb_iova_ranges
; i
++) {
289 s
->usable_iova_ranges
[i
].start
= cap_iova_range
->iova_ranges
[i
].start
;
290 s
->usable_iova_ranges
[i
].end
= cap_iova_range
->iova_ranges
[i
].end
;
294 static int qemu_vfio_init_pci(QEMUVFIOState
*s
, const char *device
,
300 struct vfio_group_status group_status
= { .argsz
= sizeof(group_status
) };
301 struct vfio_iommu_type1_info
*iommu_info
= NULL
;
302 size_t iommu_info_size
= sizeof(*iommu_info
);
303 struct vfio_device_info device_info
= { .argsz
= sizeof(device_info
) };
304 char *group_file
= NULL
;
306 s
->usable_iova_ranges
= NULL
;
308 /* Create a new container */
309 s
->container
= open("/dev/vfio/vfio", O_RDWR
);
311 if (s
->container
== -1) {
312 error_setg_errno(errp
, errno
, "Failed to open /dev/vfio/vfio");
315 if (ioctl(s
->container
, VFIO_GET_API_VERSION
) != VFIO_API_VERSION
) {
316 error_setg(errp
, "Invalid VFIO version");
321 if (!ioctl(s
->container
, VFIO_CHECK_EXTENSION
, VFIO_TYPE1_IOMMU
)) {
322 error_setg_errno(errp
, errno
, "VFIO IOMMU Type1 is not supported");
328 group_file
= sysfs_find_group_file(device
, errp
);
334 s
->group
= open(group_file
, O_RDWR
);
335 if (s
->group
== -1) {
336 error_setg_errno(errp
, errno
, "Failed to open VFIO group file: %s",
344 /* Test the group is viable and available */
345 if (ioctl(s
->group
, VFIO_GROUP_GET_STATUS
, &group_status
)) {
346 error_setg_errno(errp
, errno
, "Failed to get VFIO group status");
351 if (!(group_status
.flags
& VFIO_GROUP_FLAGS_VIABLE
)) {
352 error_setg(errp
, "VFIO group is not viable");
357 /* Add the group to the container */
358 if (ioctl(s
->group
, VFIO_GROUP_SET_CONTAINER
, &s
->container
)) {
359 error_setg_errno(errp
, errno
, "Failed to add group to VFIO container");
364 /* Enable the IOMMU model we want */
365 if (ioctl(s
->container
, VFIO_SET_IOMMU
, VFIO_TYPE1_IOMMU
)) {
366 error_setg_errno(errp
, errno
, "Failed to set VFIO IOMMU type");
371 iommu_info
= g_malloc0(iommu_info_size
);
372 iommu_info
->argsz
= iommu_info_size
;
374 /* Get additional IOMMU info */
375 if (ioctl(s
->container
, VFIO_IOMMU_GET_INFO
, iommu_info
)) {
376 error_setg_errno(errp
, errno
, "Failed to get IOMMU info");
382 * if the kernel does not report usable IOVA regions, choose
383 * the legacy [QEMU_VFIO_IOVA_MIN, QEMU_VFIO_IOVA_MAX -1] region
385 s
->nb_iova_ranges
= 1;
386 s
->usable_iova_ranges
= g_new0(struct IOVARange
, 1);
387 s
->usable_iova_ranges
[0].start
= QEMU_VFIO_IOVA_MIN
;
388 s
->usable_iova_ranges
[0].end
= QEMU_VFIO_IOVA_MAX
- 1;
390 if (iommu_info
->argsz
> iommu_info_size
) {
391 iommu_info_size
= iommu_info
->argsz
;
392 iommu_info
= g_realloc(iommu_info
, iommu_info_size
);
393 if (ioctl(s
->container
, VFIO_IOMMU_GET_INFO
, iommu_info
)) {
397 collect_usable_iova_ranges(s
, iommu_info
);
400 s
->device
= ioctl(s
->group
, VFIO_GROUP_GET_DEVICE_FD
, device
);
403 error_setg_errno(errp
, errno
, "Failed to get device fd");
408 /* Test and setup the device */
409 if (ioctl(s
->device
, VFIO_DEVICE_GET_INFO
, &device_info
)) {
410 error_setg_errno(errp
, errno
, "Failed to get device info");
415 if (device_info
.num_regions
< VFIO_PCI_CONFIG_REGION_INDEX
) {
416 error_setg(errp
, "Invalid device regions");
421 s
->config_region_info
= (struct vfio_region_info
) {
422 .index
= VFIO_PCI_CONFIG_REGION_INDEX
,
423 .argsz
= sizeof(struct vfio_region_info
),
425 if (ioctl(s
->device
, VFIO_DEVICE_GET_REGION_INFO
, &s
->config_region_info
)) {
426 error_setg_errno(errp
, errno
, "Failed to get config region info");
430 trace_qemu_vfio_region_info("config", s
->config_region_info
.offset
,
431 s
->config_region_info
.size
,
432 s
->config_region_info
.cap_offset
);
434 for (i
= 0; i
< ARRAY_SIZE(s
->bar_region_info
); i
++) {
435 ret
= qemu_vfio_pci_init_bar(s
, i
, errp
);
441 /* Enable bus master */
442 ret
= qemu_vfio_pci_read_config(s
, &pci_cmd
, sizeof(pci_cmd
), PCI_COMMAND
);
446 pci_cmd
|= PCI_COMMAND_MASTER
;
447 ret
= qemu_vfio_pci_write_config(s
, &pci_cmd
, sizeof(pci_cmd
), PCI_COMMAND
);
454 g_free(s
->usable_iova_ranges
);
455 s
->usable_iova_ranges
= NULL
;
456 s
->nb_iova_ranges
= 0;
464 static void qemu_vfio_ram_block_added(RAMBlockNotifier
*n
, void *host
,
465 size_t size
, size_t max_size
)
467 QEMUVFIOState
*s
= container_of(n
, QEMUVFIOState
, ram_notifier
);
468 Error
*local_err
= NULL
;
471 trace_qemu_vfio_ram_block_added(s
, host
, max_size
);
472 ret
= qemu_vfio_dma_map(s
, host
, max_size
, false, NULL
, &local_err
);
474 error_reportf_err(local_err
,
475 "qemu_vfio_dma_map(%p, %zu) failed: ",
480 static void qemu_vfio_ram_block_removed(RAMBlockNotifier
*n
, void *host
,
481 size_t size
, size_t max_size
)
483 QEMUVFIOState
*s
= container_of(n
, QEMUVFIOState
, ram_notifier
);
485 trace_qemu_vfio_ram_block_removed(s
, host
, max_size
);
486 qemu_vfio_dma_unmap(s
, host
);
490 static void qemu_vfio_open_common(QEMUVFIOState
*s
)
492 qemu_mutex_init(&s
->lock
);
493 s
->ram_notifier
.ram_block_added
= qemu_vfio_ram_block_added
;
494 s
->ram_notifier
.ram_block_removed
= qemu_vfio_ram_block_removed
;
495 s
->low_water_mark
= QEMU_VFIO_IOVA_MIN
;
496 s
->high_water_mark
= QEMU_VFIO_IOVA_MAX
;
497 ram_block_notifier_add(&s
->ram_notifier
);
501 * Open a PCI device, e.g. "0000:00:01.0".
503 QEMUVFIOState
*qemu_vfio_open_pci(const char *device
, Error
**errp
)
506 QEMUVFIOState
*s
= g_new0(QEMUVFIOState
, 1);
509 * VFIO may pin all memory inside mappings, resulting it in pinning
510 * all memory inside RAM blocks unconditionally.
512 r
= ram_block_discard_disable(true);
514 error_setg_errno(errp
, -r
, "Cannot set discarding of RAM broken");
519 r
= qemu_vfio_init_pci(s
, device
, errp
);
521 ram_block_discard_disable(false);
525 qemu_vfio_open_common(s
);
529 static void qemu_vfio_dump_mappings(QEMUVFIOState
*s
)
531 for (int i
= 0; i
< s
->nr_mappings
; ++i
) {
532 trace_qemu_vfio_dump_mapping(s
->mappings
[i
].host
,
534 s
->mappings
[i
].size
);
539 * Find the mapping entry that contains [host, host + size) and set @index to
540 * the position. If no entry contains it, @index is the position _after_ which
541 * to insert the new mapping. IOW, it is the index of the largest element that
542 * is smaller than @host, or -1 if no entry is.
544 static IOVAMapping
*qemu_vfio_find_mapping(QEMUVFIOState
*s
, void *host
,
547 IOVAMapping
*p
= s
->mappings
;
548 IOVAMapping
*q
= p
? p
+ s
->nr_mappings
- 1 : NULL
;
550 trace_qemu_vfio_find_mapping(s
, host
);
556 mid
= p
+ (q
- p
) / 2;
560 if (mid
->host
> host
) {
562 } else if (mid
->host
< host
) {
568 if (mid
->host
> host
) {
570 } else if (mid
< &s
->mappings
[s
->nr_mappings
- 1]
571 && (mid
+ 1)->host
<= host
) {
574 *index
= mid
- &s
->mappings
[0];
575 if (mid
>= &s
->mappings
[0] &&
576 mid
->host
<= host
&& mid
->host
+ mid
->size
> host
) {
577 assert(mid
< &s
->mappings
[s
->nr_mappings
]);
580 /* At this point *index + 1 is the right position to insert the new
586 * Allocate IOVA and create a new mapping record and insert it in @s.
588 static IOVAMapping
*qemu_vfio_add_mapping(QEMUVFIOState
*s
,
589 void *host
, size_t size
,
590 int index
, uint64_t iova
)
593 IOVAMapping m
= {.host
= host
, .size
= size
, .iova
= iova
};
596 assert(QEMU_IS_ALIGNED(size
, qemu_real_host_page_size()));
597 assert(QEMU_IS_ALIGNED(s
->low_water_mark
, qemu_real_host_page_size()));
598 assert(QEMU_IS_ALIGNED(s
->high_water_mark
, qemu_real_host_page_size()));
599 trace_qemu_vfio_new_mapping(s
, host
, size
, index
, iova
);
603 s
->mappings
= g_renew(IOVAMapping
, s
->mappings
, s
->nr_mappings
);
604 insert
= &s
->mappings
[index
];
605 shift
= s
->nr_mappings
- index
- 1;
607 memmove(insert
+ 1, insert
, shift
* sizeof(s
->mappings
[0]));
613 /* Do the DMA mapping with VFIO. */
614 static int qemu_vfio_do_mapping(QEMUVFIOState
*s
, void *host
, size_t size
,
615 uint64_t iova
, Error
**errp
)
617 struct vfio_iommu_type1_dma_map dma_map
= {
618 .argsz
= sizeof(dma_map
),
619 .flags
= VFIO_DMA_MAP_FLAG_READ
| VFIO_DMA_MAP_FLAG_WRITE
,
621 .vaddr
= (uintptr_t)host
,
624 trace_qemu_vfio_do_mapping(s
, host
, iova
, size
);
626 if (ioctl(s
->container
, VFIO_IOMMU_MAP_DMA
, &dma_map
)) {
627 error_setg_errno(errp
, errno
, "VFIO_MAP_DMA failed");
634 * Undo the DMA mapping from @s with VFIO, and remove from mapping list.
636 static void qemu_vfio_undo_mapping(QEMUVFIOState
*s
, IOVAMapping
*mapping
,
640 struct vfio_iommu_type1_dma_unmap unmap
= {
641 .argsz
= sizeof(unmap
),
643 .iova
= mapping
->iova
,
644 .size
= mapping
->size
,
647 index
= mapping
- s
->mappings
;
648 assert(mapping
->size
> 0);
649 assert(QEMU_IS_ALIGNED(mapping
->size
, qemu_real_host_page_size()));
650 assert(index
>= 0 && index
< s
->nr_mappings
);
651 if (ioctl(s
->container
, VFIO_IOMMU_UNMAP_DMA
, &unmap
)) {
652 error_setg_errno(errp
, errno
, "VFIO_UNMAP_DMA failed");
654 memmove(mapping
, &s
->mappings
[index
+ 1],
655 sizeof(s
->mappings
[0]) * (s
->nr_mappings
- index
- 1));
657 s
->mappings
= g_renew(IOVAMapping
, s
->mappings
, s
->nr_mappings
);
660 /* Check if the mapping list is (ascending) ordered. */
661 static bool qemu_vfio_verify_mappings(QEMUVFIOState
*s
)
664 if (QEMU_VFIO_DEBUG
) {
665 for (i
= 0; i
< s
->nr_mappings
- 1; ++i
) {
666 if (!(s
->mappings
[i
].host
< s
->mappings
[i
+ 1].host
)) {
667 error_report("item %d not sorted!", i
);
668 qemu_vfio_dump_mappings(s
);
671 if (!(s
->mappings
[i
].host
+ s
->mappings
[i
].size
<=
672 s
->mappings
[i
+ 1].host
)) {
673 error_report("item %d overlap with next!", i
);
674 qemu_vfio_dump_mappings(s
);
682 static bool qemu_vfio_find_fixed_iova(QEMUVFIOState
*s
, size_t size
,
683 uint64_t *iova
, Error
**errp
)
687 for (i
= 0; i
< s
->nb_iova_ranges
; i
++) {
688 if (s
->usable_iova_ranges
[i
].end
< s
->low_water_mark
) {
692 MAX(s
->low_water_mark
, s
->usable_iova_ranges
[i
].start
);
694 if (s
->usable_iova_ranges
[i
].end
- s
->low_water_mark
+ 1 >= size
||
695 s
->usable_iova_ranges
[i
].end
- s
->low_water_mark
+ 1 == 0) {
696 *iova
= s
->low_water_mark
;
697 s
->low_water_mark
+= size
;
701 error_setg(errp
, "fixed iova range not found");
706 static bool qemu_vfio_find_temp_iova(QEMUVFIOState
*s
, size_t size
,
707 uint64_t *iova
, Error
**errp
)
711 for (i
= s
->nb_iova_ranges
- 1; i
>= 0; i
--) {
712 if (s
->usable_iova_ranges
[i
].start
> s
->high_water_mark
) {
716 MIN(s
->high_water_mark
, s
->usable_iova_ranges
[i
].end
+ 1);
718 if (s
->high_water_mark
- s
->usable_iova_ranges
[i
].start
+ 1 >= size
||
719 s
->high_water_mark
- s
->usable_iova_ranges
[i
].start
+ 1 == 0) {
720 *iova
= s
->high_water_mark
- size
;
721 s
->high_water_mark
= *iova
;
725 error_setg(errp
, "temporary iova range not found");
731 * qemu_vfio_water_mark_reached:
733 * Returns %true if high watermark has been reached, %false otherwise.
735 static bool qemu_vfio_water_mark_reached(QEMUVFIOState
*s
, size_t size
,
738 if (s
->high_water_mark
- s
->low_water_mark
+ 1 < size
) {
739 error_setg(errp
, "iova exhausted (water mark reached)");
745 /* Map [host, host + size) area into a contiguous IOVA address space, and store
746 * the result in @iova if not NULL. The caller need to make sure the area is
747 * aligned to page size, and mustn't overlap with existing mapping areas (split
748 * mapping status within this area is not allowed).
750 int qemu_vfio_dma_map(QEMUVFIOState
*s
, void *host
, size_t size
,
751 bool temporary
, uint64_t *iova
, Error
**errp
)
754 IOVAMapping
*mapping
;
757 assert(QEMU_PTR_IS_ALIGNED(host
, qemu_real_host_page_size()));
758 assert(QEMU_IS_ALIGNED(size
, qemu_real_host_page_size()));
759 trace_qemu_vfio_dma_map(s
, host
, size
, temporary
, iova
);
760 QEMU_LOCK_GUARD(&s
->lock
);
761 mapping
= qemu_vfio_find_mapping(s
, host
, &index
);
763 iova0
= mapping
->iova
+ ((uint8_t *)host
- (uint8_t *)mapping
->host
);
767 if (qemu_vfio_water_mark_reached(s
, size
, errp
)) {
771 if (!qemu_vfio_find_fixed_iova(s
, size
, &iova0
, errp
)) {
775 mapping
= qemu_vfio_add_mapping(s
, host
, size
, index
+ 1, iova0
);
776 assert(qemu_vfio_verify_mappings(s
));
777 ret
= qemu_vfio_do_mapping(s
, host
, size
, iova0
, errp
);
779 qemu_vfio_undo_mapping(s
, mapping
, NULL
);
782 qemu_vfio_dump_mappings(s
);
784 if (!qemu_vfio_find_temp_iova(s
, size
, &iova0
, errp
)) {
787 ret
= qemu_vfio_do_mapping(s
, host
, size
, iova0
, errp
);
793 trace_qemu_vfio_dma_mapped(s
, host
, iova0
, size
);
800 /* Reset the high watermark and free all "temporary" mappings. */
801 int qemu_vfio_dma_reset_temporary(QEMUVFIOState
*s
)
803 struct vfio_iommu_type1_dma_unmap unmap
= {
804 .argsz
= sizeof(unmap
),
806 .iova
= s
->high_water_mark
,
807 .size
= QEMU_VFIO_IOVA_MAX
- s
->high_water_mark
,
809 trace_qemu_vfio_dma_reset_temporary(s
);
810 QEMU_LOCK_GUARD(&s
->lock
);
811 if (ioctl(s
->container
, VFIO_IOMMU_UNMAP_DMA
, &unmap
)) {
812 error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno
));
815 s
->high_water_mark
= QEMU_VFIO_IOVA_MAX
;
819 /* Unmapping the whole area that was previously mapped with
820 * qemu_vfio_dma_map(). */
821 void qemu_vfio_dma_unmap(QEMUVFIOState
*s
, void *host
)
830 trace_qemu_vfio_dma_unmap(s
, host
);
831 QEMU_LOCK_GUARD(&s
->lock
);
832 m
= qemu_vfio_find_mapping(s
, host
, &index
);
836 qemu_vfio_undo_mapping(s
, m
, NULL
);
839 static void qemu_vfio_reset(QEMUVFIOState
*s
)
841 ioctl(s
->device
, VFIO_DEVICE_RESET
);
844 /* Close and free the VFIO resources. */
845 void qemu_vfio_close(QEMUVFIOState
*s
)
853 ram_block_notifier_remove(&s
->ram_notifier
);
855 for (i
= 0; i
< s
->nr_mappings
; ++i
) {
856 qemu_vfio_undo_mapping(s
, &s
->mappings
[i
], NULL
);
859 g_free(s
->usable_iova_ranges
);
860 s
->nb_iova_ranges
= 0;
865 ram_block_discard_disable(false);