4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "exec/target_page.h"
18 #include "hw/virtio/vhost.h"
19 #include "hw/virtio/vhost-backend.h"
20 #include "hw/virtio/virtio-net.h"
21 #include "hw/virtio/vhost-shadow-virtqueue.h"
22 #include "hw/virtio/vhost-vdpa.h"
23 #include "exec/address-spaces.h"
24 #include "migration/blocker.h"
25 #include "qemu/cutils.h"
26 #include "qemu/main-loop.h"
28 #include "qapi/error.h"
31 * Return one past the end of the end of section. Be careful with uint64_t
34 static Int128
vhost_vdpa_section_end(const MemoryRegionSection
*section
,
37 Int128 llend
= int128_make64(section
->offset_within_address_space
);
38 llend
= int128_add(llend
, section
->size
);
39 llend
= int128_and(llend
, int128_exts64(page_mask
));
44 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
*section
,
50 bool is_ram
= memory_region_is_ram(section
->mr
);
51 bool is_iommu
= memory_region_is_iommu(section
->mr
);
52 bool is_protected
= memory_region_is_protected(section
->mr
);
54 /* vhost-vDPA doesn't allow MMIO to be mapped */
55 bool is_ram_device
= memory_region_is_ram_device(section
->mr
);
57 if ((!is_ram
&& !is_iommu
) || is_protected
|| is_ram_device
) {
58 trace_vhost_vdpa_skipped_memory_section(is_ram
, is_iommu
, is_protected
,
59 is_ram_device
, iova_min
,
64 if (section
->offset_within_address_space
< iova_min
) {
65 error_report("RAM section out of device range (min=0x%" PRIx64
66 ", addr=0x%" HWADDR_PRIx
")",
67 iova_min
, section
->offset_within_address_space
);
71 * While using vIOMMU, sometimes the section will be larger than iova_max,
72 * but the memory that actually maps is smaller, so move the check to
73 * function vhost_vdpa_iommu_map_notify(). That function will use the actual
74 * size that maps to the kernel
78 llend
= vhost_vdpa_section_end(section
, page_mask
);
79 if (int128_gt(llend
, int128_make64(iova_max
))) {
80 error_report("RAM section out of device range (max=0x%" PRIx64
81 ", end addr=0x%" PRIx64
")",
82 iova_max
, int128_get64(llend
));
91 * The caller must set asid = 0 if the device does not support asid.
92 * This is not an ABI break since it is set to 0 by the initializer anyway.
94 int vhost_vdpa_dma_map(VhostVDPAShared
*s
, uint32_t asid
, hwaddr iova
,
95 hwaddr size
, void *vaddr
, bool readonly
)
97 struct vhost_msg_v2 msg
= {};
98 int fd
= s
->device_fd
;
101 msg
.type
= VHOST_IOTLB_MSG_V2
;
103 msg
.iotlb
.iova
= iova
;
104 msg
.iotlb
.size
= size
;
105 msg
.iotlb
.uaddr
= (uint64_t)(uintptr_t)vaddr
;
106 msg
.iotlb
.perm
= readonly
? VHOST_ACCESS_RO
: VHOST_ACCESS_RW
;
107 msg
.iotlb
.type
= VHOST_IOTLB_UPDATE
;
109 trace_vhost_vdpa_dma_map(s
, fd
, msg
.type
, msg
.asid
, msg
.iotlb
.iova
,
110 msg
.iotlb
.size
, msg
.iotlb
.uaddr
, msg
.iotlb
.perm
,
113 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
114 error_report("failed to write, fd=%d, errno=%d (%s)",
115 fd
, errno
, strerror(errno
));
123 * The caller must set asid = 0 if the device does not support asid.
124 * This is not an ABI break since it is set to 0 by the initializer anyway.
126 int vhost_vdpa_dma_unmap(VhostVDPAShared
*s
, uint32_t asid
, hwaddr iova
,
129 struct vhost_msg_v2 msg
= {};
130 int fd
= s
->device_fd
;
133 msg
.type
= VHOST_IOTLB_MSG_V2
;
135 msg
.iotlb
.iova
= iova
;
136 msg
.iotlb
.size
= size
;
137 msg
.iotlb
.type
= VHOST_IOTLB_INVALIDATE
;
139 trace_vhost_vdpa_dma_unmap(s
, fd
, msg
.type
, msg
.asid
, msg
.iotlb
.iova
,
140 msg
.iotlb
.size
, msg
.iotlb
.type
);
142 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
143 error_report("failed to write, fd=%d, errno=%d (%s)",
144 fd
, errno
, strerror(errno
));
151 static void vhost_vdpa_listener_begin_batch(VhostVDPAShared
*s
)
153 int fd
= s
->device_fd
;
154 struct vhost_msg_v2 msg
= {
155 .type
= VHOST_IOTLB_MSG_V2
,
156 .iotlb
.type
= VHOST_IOTLB_BATCH_BEGIN
,
159 trace_vhost_vdpa_listener_begin_batch(s
, fd
, msg
.type
, msg
.iotlb
.type
);
160 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
161 error_report("failed to write, fd=%d, errno=%d (%s)",
162 fd
, errno
, strerror(errno
));
166 static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared
*s
)
168 if (s
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
) &&
169 !s
->iotlb_batch_begin_sent
) {
170 vhost_vdpa_listener_begin_batch(s
);
173 s
->iotlb_batch_begin_sent
= true;
176 static void vhost_vdpa_listener_commit(MemoryListener
*listener
)
178 VhostVDPAShared
*s
= container_of(listener
, VhostVDPAShared
, listener
);
179 struct vhost_msg_v2 msg
= {};
180 int fd
= s
->device_fd
;
182 if (!(s
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
))) {
186 if (!s
->iotlb_batch_begin_sent
) {
190 msg
.type
= VHOST_IOTLB_MSG_V2
;
191 msg
.iotlb
.type
= VHOST_IOTLB_BATCH_END
;
193 trace_vhost_vdpa_listener_commit(s
, fd
, msg
.type
, msg
.iotlb
.type
);
194 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
195 error_report("failed to write, fd=%d, errno=%d (%s)",
196 fd
, errno
, strerror(errno
));
199 s
->iotlb_batch_begin_sent
= false;
202 static void vhost_vdpa_iommu_map_notify(IOMMUNotifier
*n
, IOMMUTLBEntry
*iotlb
)
204 struct vdpa_iommu
*iommu
= container_of(n
, struct vdpa_iommu
, n
);
206 hwaddr iova
= iotlb
->iova
+ iommu
->iommu_offset
;
207 VhostVDPAShared
*s
= iommu
->dev_shared
;
211 Error
*local_err
= NULL
;
213 if (iotlb
->target_as
!= &address_space_memory
) {
214 error_report("Wrong target AS \"%s\", only system memory is allowed",
215 iotlb
->target_as
->name
? iotlb
->target_as
->name
: "none");
218 RCU_READ_LOCK_GUARD();
219 /* check if RAM section out of device range */
220 llend
= int128_add(int128_makes64(iotlb
->addr_mask
), int128_makes64(iova
));
221 if (int128_gt(llend
, int128_make64(s
->iova_range
.last
))) {
222 error_report("RAM section out of device range (max=0x%" PRIx64
223 ", end addr=0x%" PRIx64
")",
224 s
->iova_range
.last
, int128_get64(llend
));
228 if ((iotlb
->perm
& IOMMU_RW
) != IOMMU_NONE
) {
231 if (!memory_get_xlat_addr(iotlb
, &vaddr
, NULL
, &read_only
, NULL
,
233 error_report_err(local_err
);
236 ret
= vhost_vdpa_dma_map(s
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
237 iotlb
->addr_mask
+ 1, vaddr
, read_only
);
239 error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx
", "
240 "0x%" HWADDR_PRIx
", %p) = %d (%m)",
241 s
, iova
, iotlb
->addr_mask
+ 1, vaddr
, ret
);
244 ret
= vhost_vdpa_dma_unmap(s
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
245 iotlb
->addr_mask
+ 1);
247 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx
", "
248 "0x%" HWADDR_PRIx
") = %d (%m)",
249 s
, iova
, iotlb
->addr_mask
+ 1, ret
);
254 static void vhost_vdpa_iommu_region_add(MemoryListener
*listener
,
255 MemoryRegionSection
*section
)
257 VhostVDPAShared
*s
= container_of(listener
, VhostVDPAShared
, listener
);
259 struct vdpa_iommu
*iommu
;
262 IOMMUMemoryRegion
*iommu_mr
;
265 iommu_mr
= IOMMU_MEMORY_REGION(section
->mr
);
267 iommu
= g_malloc0(sizeof(*iommu
));
268 end
= int128_add(int128_make64(section
->offset_within_region
),
270 end
= int128_sub(end
, int128_one());
271 iommu_idx
= memory_region_iommu_attrs_to_index(iommu_mr
,
272 MEMTXATTRS_UNSPECIFIED
);
273 iommu
->iommu_mr
= iommu_mr
;
274 iommu_notifier_init(&iommu
->n
, vhost_vdpa_iommu_map_notify
,
275 IOMMU_NOTIFIER_IOTLB_EVENTS
,
276 section
->offset_within_region
,
279 iommu
->iommu_offset
= section
->offset_within_address_space
-
280 section
->offset_within_region
;
281 iommu
->dev_shared
= s
;
283 ret
= memory_region_register_iommu_notifier(section
->mr
, &iommu
->n
, NULL
);
289 QLIST_INSERT_HEAD(&s
->iommu_list
, iommu
, iommu_next
);
290 memory_region_iommu_replay(iommu
->iommu_mr
, &iommu
->n
);
295 static void vhost_vdpa_iommu_region_del(MemoryListener
*listener
,
296 MemoryRegionSection
*section
)
298 VhostVDPAShared
*s
= container_of(listener
, VhostVDPAShared
, listener
);
300 struct vdpa_iommu
*iommu
;
302 QLIST_FOREACH(iommu
, &s
->iommu_list
, iommu_next
)
304 if (MEMORY_REGION(iommu
->iommu_mr
) == section
->mr
&&
305 iommu
->n
.start
== section
->offset_within_region
) {
306 memory_region_unregister_iommu_notifier(section
->mr
, &iommu
->n
);
307 QLIST_REMOVE(iommu
, iommu_next
);
314 static void vhost_vdpa_listener_region_add(MemoryListener
*listener
,
315 MemoryRegionSection
*section
)
317 DMAMap mem_region
= {};
318 VhostVDPAShared
*s
= container_of(listener
, VhostVDPAShared
, listener
);
320 Int128 llend
, llsize
;
323 int page_size
= qemu_target_page_size();
324 int page_mask
= -page_size
;
326 if (vhost_vdpa_listener_skipped_section(section
, s
->iova_range
.first
,
327 s
->iova_range
.last
, page_mask
)) {
330 if (memory_region_is_iommu(section
->mr
)) {
331 vhost_vdpa_iommu_region_add(listener
, section
);
335 if (unlikely((section
->offset_within_address_space
& ~page_mask
) !=
336 (section
->offset_within_region
& ~page_mask
))) {
337 trace_vhost_vdpa_listener_region_add_unaligned(s
, section
->mr
->name
,
338 section
->offset_within_address_space
& ~page_mask
,
339 section
->offset_within_region
& ~page_mask
);
343 iova
= ROUND_UP(section
->offset_within_address_space
, page_size
);
344 llend
= vhost_vdpa_section_end(section
, page_mask
);
345 if (int128_ge(int128_make64(iova
), llend
)) {
349 memory_region_ref(section
->mr
);
351 /* Here we assume that memory_region_is_ram(section->mr)==true */
353 vaddr
= memory_region_get_ram_ptr(section
->mr
) +
354 section
->offset_within_region
+
355 (iova
- section
->offset_within_address_space
);
357 trace_vhost_vdpa_listener_region_add(s
, iova
, int128_get64(llend
),
358 vaddr
, section
->readonly
);
360 llsize
= int128_sub(llend
, int128_make64(iova
));
361 if (s
->shadow_data
) {
364 mem_region
.translated_addr
= (hwaddr
)(uintptr_t)vaddr
,
365 mem_region
.size
= int128_get64(llsize
) - 1,
366 mem_region
.perm
= IOMMU_ACCESS_FLAG(true, section
->readonly
),
368 r
= vhost_iova_tree_map_alloc(s
->iova_tree
, &mem_region
);
369 if (unlikely(r
!= IOVA_OK
)) {
370 error_report("Can't allocate a mapping (%d)", r
);
374 iova
= mem_region
.iova
;
377 vhost_vdpa_iotlb_batch_begin_once(s
);
378 ret
= vhost_vdpa_dma_map(s
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
379 int128_get64(llsize
), vaddr
, section
->readonly
);
381 error_report("vhost vdpa map fail!");
388 if (s
->shadow_data
) {
389 vhost_iova_tree_remove(s
->iova_tree
, mem_region
);
394 * On the initfn path, store the first error in the container so we
395 * can gracefully fail. Runtime, there's not much we can do other
396 * than throw a hardware error.
398 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
403 static void vhost_vdpa_listener_region_del(MemoryListener
*listener
,
404 MemoryRegionSection
*section
)
406 VhostVDPAShared
*s
= container_of(listener
, VhostVDPAShared
, listener
);
408 Int128 llend
, llsize
;
410 int page_size
= qemu_target_page_size();
411 int page_mask
= -page_size
;
413 if (vhost_vdpa_listener_skipped_section(section
, s
->iova_range
.first
,
414 s
->iova_range
.last
, page_mask
)) {
417 if (memory_region_is_iommu(section
->mr
)) {
418 vhost_vdpa_iommu_region_del(listener
, section
);
421 if (unlikely((section
->offset_within_address_space
& ~page_mask
) !=
422 (section
->offset_within_region
& ~page_mask
))) {
423 trace_vhost_vdpa_listener_region_del_unaligned(s
, section
->mr
->name
,
424 section
->offset_within_address_space
& ~page_mask
,
425 section
->offset_within_region
& ~page_mask
);
429 iova
= ROUND_UP(section
->offset_within_address_space
, page_size
);
430 llend
= vhost_vdpa_section_end(section
, page_mask
);
432 trace_vhost_vdpa_listener_region_del(s
, iova
,
433 int128_get64(int128_sub(llend
, int128_one())));
435 if (int128_ge(int128_make64(iova
), llend
)) {
439 llsize
= int128_sub(llend
, int128_make64(iova
));
441 if (s
->shadow_data
) {
442 const DMAMap
*result
;
443 const void *vaddr
= memory_region_get_ram_ptr(section
->mr
) +
444 section
->offset_within_region
+
445 (iova
- section
->offset_within_address_space
);
446 DMAMap mem_region
= {
447 .translated_addr
= (hwaddr
)(uintptr_t)vaddr
,
448 .size
= int128_get64(llsize
) - 1,
451 result
= vhost_iova_tree_find_iova(s
->iova_tree
, &mem_region
);
453 /* The memory listener map wasn't mapped */
457 vhost_iova_tree_remove(s
->iova_tree
, *result
);
459 vhost_vdpa_iotlb_batch_begin_once(s
);
461 * The unmap ioctl doesn't accept a full 64-bit. need to check it
463 if (int128_eq(llsize
, int128_2_64())) {
464 llsize
= int128_rshift(llsize
, 1);
465 ret
= vhost_vdpa_dma_unmap(s
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
466 int128_get64(llsize
));
469 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx
", "
470 "0x%" HWADDR_PRIx
") = %d (%m)",
471 s
, iova
, int128_get64(llsize
), ret
);
473 iova
+= int128_get64(llsize
);
475 ret
= vhost_vdpa_dma_unmap(s
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
476 int128_get64(llsize
));
479 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx
", "
480 "0x%" HWADDR_PRIx
") = %d (%m)",
481 s
, iova
, int128_get64(llsize
), ret
);
484 memory_region_unref(section
->mr
);
487 * IOTLB API is used by vhost-vdpa which requires incremental updating
488 * of the mapping. So we can not use generic vhost memory listener which
489 * depends on the addnop().
491 static const MemoryListener vhost_vdpa_memory_listener
= {
492 .name
= "vhost-vdpa",
493 .commit
= vhost_vdpa_listener_commit
,
494 .region_add
= vhost_vdpa_listener_region_add
,
495 .region_del
= vhost_vdpa_listener_region_del
,
498 static int vhost_vdpa_call(struct vhost_dev
*dev
, unsigned long int request
,
501 struct vhost_vdpa
*v
= dev
->opaque
;
502 int fd
= v
->shared
->device_fd
;
505 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
507 ret
= ioctl(fd
, request
, arg
);
508 return ret
< 0 ? -errno
: ret
;
511 static int vhost_vdpa_add_status(struct vhost_dev
*dev
, uint8_t status
)
516 trace_vhost_vdpa_add_status(dev
, status
);
517 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
);
521 if ((s
& status
) == status
) {
522 /* Don't set bits already set */
528 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &s
);
533 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
);
545 int vhost_vdpa_get_iova_range(int fd
, struct vhost_vdpa_iova_range
*iova_range
)
547 int ret
= ioctl(fd
, VHOST_VDPA_GET_IOVA_RANGE
, iova_range
);
549 return ret
< 0 ? -errno
: 0;
553 * The use of this function is for requests that only need to be
554 * applied once. Typically such request occurs at the beginning
555 * of operation, and before setting up queues. It should not be
556 * used for request that performs operation until all queues are
557 * set, which would need to check dev->vq_index_end instead.
559 static bool vhost_vdpa_first_dev(struct vhost_dev
*dev
)
561 struct vhost_vdpa
*v
= dev
->opaque
;
563 return v
->index
== 0;
566 static bool vhost_vdpa_last_dev(struct vhost_dev
*dev
)
568 return dev
->vq_index
+ dev
->nvqs
== dev
->vq_index_end
;
571 static int vhost_vdpa_get_dev_features(struct vhost_dev
*dev
,
576 ret
= vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, features
);
577 trace_vhost_vdpa_get_features(dev
, *features
);
581 static void vhost_vdpa_init_svq(struct vhost_dev
*hdev
, struct vhost_vdpa
*v
)
583 g_autoptr(GPtrArray
) shadow_vqs
= NULL
;
585 shadow_vqs
= g_ptr_array_new_full(hdev
->nvqs
, vhost_svq_free
);
586 for (unsigned n
= 0; n
< hdev
->nvqs
; ++n
) {
587 VhostShadowVirtqueue
*svq
;
589 svq
= vhost_svq_new(v
->shadow_vq_ops
, v
->shadow_vq_ops_opaque
);
590 g_ptr_array_add(shadow_vqs
, svq
);
593 v
->shadow_vqs
= g_steal_pointer(&shadow_vqs
);
596 static int vhost_vdpa_init(struct vhost_dev
*dev
, void *opaque
, Error
**errp
)
598 struct vhost_vdpa
*v
= opaque
;
599 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
600 trace_vhost_vdpa_init(dev
, v
->shared
, opaque
);
604 dev
->opaque
= opaque
;
605 v
->shared
->listener
= vhost_vdpa_memory_listener
;
606 vhost_vdpa_init_svq(dev
, v
);
608 error_propagate(&dev
->migration_blocker
, v
->migration_blocker
);
609 if (!vhost_vdpa_first_dev(dev
)) {
614 * If dev->shadow_vqs_enabled at initialization that means the device has
615 * been started with x-svq=on, so don't block migration
617 if (dev
->migration_blocker
== NULL
&& !v
->shadow_vqs_enabled
) {
618 /* We don't have dev->features yet */
620 ret
= vhost_vdpa_get_dev_features(dev
, &features
);
622 error_setg_errno(errp
, -ret
, "Could not get device features");
625 vhost_svq_valid_features(features
, &dev
->migration_blocker
);
629 * Similar to VFIO, we end up pinning all guest memory and have to
630 * disable discarding of RAM.
632 ret
= ram_block_discard_disable(true);
634 error_report("Cannot set discarding of RAM broken");
638 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
639 VIRTIO_CONFIG_S_DRIVER
);
644 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev
*dev
,
647 size_t page_size
= qemu_real_host_page_size();
648 struct vhost_vdpa
*v
= dev
->opaque
;
649 VirtIODevice
*vdev
= dev
->vdev
;
650 VhostVDPAHostNotifier
*n
;
652 n
= &v
->notifier
[queue_index
];
655 virtio_queue_set_host_notifier_mr(vdev
, queue_index
, &n
->mr
, false);
656 object_unparent(OBJECT(&n
->mr
));
657 munmap(n
->addr
, page_size
);
662 static int vhost_vdpa_host_notifier_init(struct vhost_dev
*dev
, int queue_index
)
664 size_t page_size
= qemu_real_host_page_size();
665 struct vhost_vdpa
*v
= dev
->opaque
;
666 VirtIODevice
*vdev
= dev
->vdev
;
667 VhostVDPAHostNotifier
*n
;
668 int fd
= v
->shared
->device_fd
;
672 vhost_vdpa_host_notifier_uninit(dev
, queue_index
);
674 n
= &v
->notifier
[queue_index
];
676 addr
= mmap(NULL
, page_size
, PROT_WRITE
, MAP_SHARED
, fd
,
677 queue_index
* page_size
);
678 if (addr
== MAP_FAILED
) {
682 name
= g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
684 memory_region_init_ram_device_ptr(&n
->mr
, OBJECT(vdev
), name
,
688 if (virtio_queue_set_host_notifier_mr(vdev
, queue_index
, &n
->mr
, true)) {
689 object_unparent(OBJECT(&n
->mr
));
690 munmap(addr
, page_size
);
701 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev
*dev
, int n
)
706 * Pack all the changes to the memory regions in a single
707 * transaction to avoid a few updating of the address space
710 memory_region_transaction_begin();
712 for (i
= dev
->vq_index
; i
< dev
->vq_index
+ n
; i
++) {
713 vhost_vdpa_host_notifier_uninit(dev
, i
);
716 memory_region_transaction_commit();
719 static void vhost_vdpa_host_notifiers_init(struct vhost_dev
*dev
)
721 struct vhost_vdpa
*v
= dev
->opaque
;
724 if (v
->shadow_vqs_enabled
) {
725 /* FIXME SVQ is not compatible with host notifiers mr */
730 * Pack all the changes to the memory regions in a single
731 * transaction to avoid a few updating of the address space
734 memory_region_transaction_begin();
736 for (i
= dev
->vq_index
; i
< dev
->vq_index
+ dev
->nvqs
; i
++) {
737 if (vhost_vdpa_host_notifier_init(dev
, i
)) {
738 vhost_vdpa_host_notifiers_uninit(dev
, i
- dev
->vq_index
);
743 memory_region_transaction_commit();
746 static void vhost_vdpa_svq_cleanup(struct vhost_dev
*dev
)
748 struct vhost_vdpa
*v
= dev
->opaque
;
751 for (idx
= 0; idx
< v
->shadow_vqs
->len
; ++idx
) {
752 vhost_svq_stop(g_ptr_array_index(v
->shadow_vqs
, idx
));
754 g_ptr_array_free(v
->shadow_vqs
, true);
757 static int vhost_vdpa_cleanup(struct vhost_dev
*dev
)
759 struct vhost_vdpa
*v
;
760 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
762 trace_vhost_vdpa_cleanup(dev
, v
);
763 if (vhost_vdpa_first_dev(dev
)) {
764 ram_block_discard_disable(false);
765 memory_listener_unregister(&v
->shared
->listener
);
768 vhost_vdpa_host_notifiers_uninit(dev
, dev
->nvqs
);
769 vhost_vdpa_svq_cleanup(dev
);
776 static int vhost_vdpa_memslots_limit(struct vhost_dev
*dev
)
778 trace_vhost_vdpa_memslots_limit(dev
, INT_MAX
);
782 static int vhost_vdpa_set_mem_table(struct vhost_dev
*dev
,
783 struct vhost_memory
*mem
)
785 if (!vhost_vdpa_first_dev(dev
)) {
789 trace_vhost_vdpa_set_mem_table(dev
, mem
->nregions
, mem
->padding
);
790 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE
) &&
791 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS
)) {
793 for (i
= 0; i
< mem
->nregions
; i
++) {
794 trace_vhost_vdpa_dump_regions(dev
, i
,
795 mem
->regions
[i
].guest_phys_addr
,
796 mem
->regions
[i
].memory_size
,
797 mem
->regions
[i
].userspace_addr
,
798 mem
->regions
[i
].flags_padding
);
808 static int vhost_vdpa_set_features(struct vhost_dev
*dev
,
811 struct vhost_vdpa
*v
= dev
->opaque
;
814 if (!vhost_vdpa_first_dev(dev
)) {
818 if (v
->shadow_vqs_enabled
) {
819 if ((v
->acked_features
^ features
) == BIT_ULL(VHOST_F_LOG_ALL
)) {
821 * QEMU is just trying to enable or disable logging. SVQ handles
822 * this sepparately, so no need to forward this.
824 v
->acked_features
= features
;
828 v
->acked_features
= features
;
830 /* We must not ack _F_LOG if SVQ is enabled */
831 features
&= ~BIT_ULL(VHOST_F_LOG_ALL
);
834 trace_vhost_vdpa_set_features(dev
, features
);
835 ret
= vhost_vdpa_call(dev
, VHOST_SET_FEATURES
, &features
);
840 return vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_FEATURES_OK
);
843 static int vhost_vdpa_set_backend_cap(struct vhost_dev
*dev
)
845 struct vhost_vdpa
*v
= dev
->opaque
;
848 uint64_t f
= 0x1ULL
<< VHOST_BACKEND_F_IOTLB_MSG_V2
|
849 0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
|
850 0x1ULL
<< VHOST_BACKEND_F_IOTLB_ASID
|
851 0x1ULL
<< VHOST_BACKEND_F_SUSPEND
;
854 if (vhost_vdpa_call(dev
, VHOST_GET_BACKEND_FEATURES
, &features
)) {
860 if (vhost_vdpa_first_dev(dev
)) {
861 r
= vhost_vdpa_call(dev
, VHOST_SET_BACKEND_FEATURES
, &features
);
867 dev
->backend_cap
= features
;
868 v
->shared
->backend_cap
= features
;
873 static int vhost_vdpa_get_device_id(struct vhost_dev
*dev
,
877 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_DEVICE_ID
, device_id
);
878 trace_vhost_vdpa_get_device_id(dev
, *device_id
);
882 static int vhost_vdpa_reset_device(struct vhost_dev
*dev
)
884 struct vhost_vdpa
*v
= dev
->opaque
;
888 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &status
);
889 trace_vhost_vdpa_reset_device(dev
);
890 v
->suspended
= false;
894 static int vhost_vdpa_get_vq_index(struct vhost_dev
*dev
, int idx
)
896 assert(idx
>= dev
->vq_index
&& idx
< dev
->vq_index
+ dev
->nvqs
);
898 trace_vhost_vdpa_get_vq_index(dev
, idx
, idx
);
902 static int vhost_vdpa_set_vring_enable_one(struct vhost_vdpa
*v
, unsigned idx
,
905 struct vhost_dev
*dev
= v
->dev
;
906 struct vhost_vring_state state
= {
910 int r
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_VRING_ENABLE
, &state
);
912 trace_vhost_vdpa_set_vring_enable_one(dev
, idx
, enable
, r
);
916 static int vhost_vdpa_set_vring_enable(struct vhost_dev
*dev
, int enable
)
918 struct vhost_vdpa
*v
= dev
->opaque
;
922 for (i
= 0; i
< dev
->nvqs
; ++i
) {
923 ret
= vhost_vdpa_set_vring_enable_one(v
, i
, enable
);
932 int vhost_vdpa_set_vring_ready(struct vhost_vdpa
*v
, unsigned idx
)
934 return vhost_vdpa_set_vring_enable_one(v
, idx
, 1);
937 static int vhost_vdpa_set_config_call(struct vhost_dev
*dev
,
940 trace_vhost_vdpa_set_config_call(dev
, fd
);
941 return vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG_CALL
, &fd
);
944 static void vhost_vdpa_dump_config(struct vhost_dev
*dev
, const uint8_t *config
,
947 g_autoptr(GString
) str
= g_string_sized_new(4 * 16);
950 for (b
= 0; b
< config_len
; b
+= len
) {
951 len
= MIN(config_len
- b
, 16);
953 g_string_truncate(str
, 0);
954 qemu_hexdump_line(str
, config
+ b
, len
, 1, 4);
955 trace_vhost_vdpa_dump_config(dev
, b
, str
->str
);
959 static int vhost_vdpa_set_config(struct vhost_dev
*dev
, const uint8_t *data
,
960 uint32_t offset
, uint32_t size
,
963 struct vhost_vdpa_config
*config
;
965 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
967 trace_vhost_vdpa_set_config(dev
, offset
, size
, flags
);
968 config
= g_malloc(size
+ config_size
);
969 config
->off
= offset
;
971 memcpy(config
->buf
, data
, size
);
972 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG
) &&
973 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
974 vhost_vdpa_dump_config(dev
, data
, size
);
976 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG
, config
);
981 static int vhost_vdpa_get_config(struct vhost_dev
*dev
, uint8_t *config
,
982 uint32_t config_len
, Error
**errp
)
984 struct vhost_vdpa_config
*v_config
;
985 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
988 trace_vhost_vdpa_get_config(dev
, config
, config_len
);
989 v_config
= g_malloc(config_len
+ config_size
);
990 v_config
->len
= config_len
;
992 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_CONFIG
, v_config
);
993 memcpy(config
, v_config
->buf
, config_len
);
995 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG
) &&
996 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
997 vhost_vdpa_dump_config(dev
, config
, config_len
);
1002 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev
*dev
,
1003 struct vhost_vring_state
*ring
)
1005 struct vhost_vdpa
*v
= dev
->opaque
;
1007 trace_vhost_vdpa_set_dev_vring_base(dev
, ring
->index
, ring
->num
,
1008 v
->shadow_vqs_enabled
);
1009 return vhost_vdpa_call(dev
, VHOST_SET_VRING_BASE
, ring
);
1012 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev
*dev
,
1013 struct vhost_vring_file
*file
)
1015 trace_vhost_vdpa_set_vring_kick(dev
, file
->index
, file
->fd
);
1016 return vhost_vdpa_call(dev
, VHOST_SET_VRING_KICK
, file
);
1019 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev
*dev
,
1020 struct vhost_vring_file
*file
)
1022 trace_vhost_vdpa_set_vring_call(dev
, file
->index
, file
->fd
);
1023 return vhost_vdpa_call(dev
, VHOST_SET_VRING_CALL
, file
);
1026 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev
*dev
,
1027 struct vhost_vring_addr
*addr
)
1029 trace_vhost_vdpa_set_vring_addr(dev
, addr
->index
, addr
->flags
,
1030 addr
->desc_user_addr
, addr
->used_user_addr
,
1031 addr
->avail_user_addr
,
1032 addr
->log_guest_addr
);
1034 return vhost_vdpa_call(dev
, VHOST_SET_VRING_ADDR
, addr
);
1039 * Set the shadow virtqueue descriptors to the device
1041 * @dev: The vhost device model
1042 * @svq: The shadow virtqueue
1043 * @idx: The index of the virtqueue in the vhost device
1046 * Note that this function does not rewind kick file descriptor if cannot set
1049 static int vhost_vdpa_svq_set_fds(struct vhost_dev
*dev
,
1050 VhostShadowVirtqueue
*svq
, unsigned idx
,
1053 struct vhost_vring_file file
= {
1054 .index
= dev
->vq_index
+ idx
,
1056 const EventNotifier
*event_notifier
= &svq
->hdev_kick
;
1059 r
= event_notifier_init(&svq
->hdev_kick
, 0);
1061 error_setg_errno(errp
, -r
, "Couldn't create kick event notifier");
1062 goto err_init_hdev_kick
;
1065 r
= event_notifier_init(&svq
->hdev_call
, 0);
1067 error_setg_errno(errp
, -r
, "Couldn't create call event notifier");
1068 goto err_init_hdev_call
;
1071 file
.fd
= event_notifier_get_fd(event_notifier
);
1072 r
= vhost_vdpa_set_vring_dev_kick(dev
, &file
);
1073 if (unlikely(r
!= 0)) {
1074 error_setg_errno(errp
, -r
, "Can't set device kick fd");
1075 goto err_init_set_dev_fd
;
1078 event_notifier
= &svq
->hdev_call
;
1079 file
.fd
= event_notifier_get_fd(event_notifier
);
1080 r
= vhost_vdpa_set_vring_dev_call(dev
, &file
);
1081 if (unlikely(r
!= 0)) {
1082 error_setg_errno(errp
, -r
, "Can't set device call fd");
1083 goto err_init_set_dev_fd
;
1088 err_init_set_dev_fd
:
1089 event_notifier_set_handler(&svq
->hdev_call
, NULL
);
1092 event_notifier_cleanup(&svq
->hdev_kick
);
1099 * Unmap a SVQ area in the device
1101 static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa
*v
, hwaddr addr
)
1103 const DMAMap needle
= {
1104 .translated_addr
= addr
,
1106 const DMAMap
*result
= vhost_iova_tree_find_iova(v
->shared
->iova_tree
,
1111 if (unlikely(!result
)) {
1112 error_report("Unable to find SVQ address to unmap");
1116 size
= ROUND_UP(result
->size
, qemu_real_host_page_size());
1117 r
= vhost_vdpa_dma_unmap(v
->shared
, v
->address_space_id
, result
->iova
,
1119 if (unlikely(r
< 0)) {
1120 error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r
), -r
);
1124 vhost_iova_tree_remove(v
->shared
->iova_tree
, *result
);
1127 static void vhost_vdpa_svq_unmap_rings(struct vhost_dev
*dev
,
1128 const VhostShadowVirtqueue
*svq
)
1130 struct vhost_vdpa
*v
= dev
->opaque
;
1131 struct vhost_vring_addr svq_addr
;
1133 vhost_svq_get_vring_addr(svq
, &svq_addr
);
1135 vhost_vdpa_svq_unmap_ring(v
, svq_addr
.desc_user_addr
);
1137 vhost_vdpa_svq_unmap_ring(v
, svq_addr
.used_user_addr
);
1141 * Map the SVQ area in the device
1143 * @v: Vhost-vdpa device
1144 * @needle: The area to search iova
1145 * @errorp: Error pointer
1147 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa
*v
, DMAMap
*needle
,
1152 r
= vhost_iova_tree_map_alloc(v
->shared
->iova_tree
, needle
);
1153 if (unlikely(r
!= IOVA_OK
)) {
1154 error_setg(errp
, "Cannot allocate iova (%d)", r
);
1158 r
= vhost_vdpa_dma_map(v
->shared
, v
->address_space_id
, needle
->iova
,
1160 (void *)(uintptr_t)needle
->translated_addr
,
1161 needle
->perm
== IOMMU_RO
);
1162 if (unlikely(r
!= 0)) {
1163 error_setg_errno(errp
, -r
, "Cannot map region to device");
1164 vhost_iova_tree_remove(v
->shared
->iova_tree
, *needle
);
1171 * Map the shadow virtqueue rings in the device
1173 * @dev: The vhost device
1174 * @svq: The shadow virtqueue
1175 * @addr: Assigned IOVA addresses
1176 * @errp: Error pointer
1178 static bool vhost_vdpa_svq_map_rings(struct vhost_dev
*dev
,
1179 const VhostShadowVirtqueue
*svq
,
1180 struct vhost_vring_addr
*addr
,
1184 DMAMap device_region
, driver_region
;
1185 struct vhost_vring_addr svq_addr
;
1186 struct vhost_vdpa
*v
= dev
->opaque
;
1187 size_t device_size
= vhost_svq_device_area_size(svq
);
1188 size_t driver_size
= vhost_svq_driver_area_size(svq
);
1189 size_t avail_offset
;
1192 vhost_svq_get_vring_addr(svq
, &svq_addr
);
1194 driver_region
= (DMAMap
) {
1195 .translated_addr
= svq_addr
.desc_user_addr
,
1196 .size
= driver_size
- 1,
1199 ok
= vhost_vdpa_svq_map_ring(v
, &driver_region
, errp
);
1200 if (unlikely(!ok
)) {
1201 error_prepend(errp
, "Cannot create vq driver region: ");
1204 addr
->desc_user_addr
= driver_region
.iova
;
1205 avail_offset
= svq_addr
.avail_user_addr
- svq_addr
.desc_user_addr
;
1206 addr
->avail_user_addr
= driver_region
.iova
+ avail_offset
;
1208 device_region
= (DMAMap
) {
1209 .translated_addr
= svq_addr
.used_user_addr
,
1210 .size
= device_size
- 1,
1213 ok
= vhost_vdpa_svq_map_ring(v
, &device_region
, errp
);
1214 if (unlikely(!ok
)) {
1215 error_prepend(errp
, "Cannot create vq device region: ");
1216 vhost_vdpa_svq_unmap_ring(v
, driver_region
.translated_addr
);
1218 addr
->used_user_addr
= device_region
.iova
;
1223 static bool vhost_vdpa_svq_setup(struct vhost_dev
*dev
,
1224 VhostShadowVirtqueue
*svq
, unsigned idx
,
1227 uint16_t vq_index
= dev
->vq_index
+ idx
;
1228 struct vhost_vring_state s
= {
1233 r
= vhost_vdpa_set_dev_vring_base(dev
, &s
);
1235 error_setg_errno(errp
, -r
, "Cannot set vring base");
1239 r
= vhost_vdpa_svq_set_fds(dev
, svq
, idx
, errp
);
1243 static bool vhost_vdpa_svqs_start(struct vhost_dev
*dev
)
1245 struct vhost_vdpa
*v
= dev
->opaque
;
1249 if (!v
->shadow_vqs_enabled
) {
1253 for (i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
1254 VirtQueue
*vq
= virtio_get_queue(dev
->vdev
, dev
->vq_index
+ i
);
1255 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
1256 struct vhost_vring_addr addr
= {
1257 .index
= dev
->vq_index
+ i
,
1260 bool ok
= vhost_vdpa_svq_setup(dev
, svq
, i
, &err
);
1261 if (unlikely(!ok
)) {
1265 vhost_svq_start(svq
, dev
->vdev
, vq
, v
->shared
->iova_tree
);
1266 ok
= vhost_vdpa_svq_map_rings(dev
, svq
, &addr
, &err
);
1267 if (unlikely(!ok
)) {
1271 /* Override vring GPA set by vhost subsystem */
1272 r
= vhost_vdpa_set_vring_dev_addr(dev
, &addr
);
1273 if (unlikely(r
!= 0)) {
1274 error_setg_errno(&err
, -r
, "Cannot set device address");
1282 vhost_vdpa_svq_unmap_rings(dev
, g_ptr_array_index(v
->shadow_vqs
, i
));
1285 vhost_svq_stop(g_ptr_array_index(v
->shadow_vqs
, i
));
1288 error_reportf_err(err
, "Cannot setup SVQ %u: ", i
);
1289 for (unsigned j
= 0; j
< i
; ++j
) {
1290 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, j
);
1291 vhost_vdpa_svq_unmap_rings(dev
, svq
);
1292 vhost_svq_stop(svq
);
1298 static void vhost_vdpa_svqs_stop(struct vhost_dev
*dev
)
1300 struct vhost_vdpa
*v
= dev
->opaque
;
1302 if (!v
->shadow_vqs_enabled
) {
1306 for (unsigned i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
1307 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
1309 vhost_svq_stop(svq
);
1310 vhost_vdpa_svq_unmap_rings(dev
, svq
);
1312 event_notifier_cleanup(&svq
->hdev_kick
);
1313 event_notifier_cleanup(&svq
->hdev_call
);
1317 static void vhost_vdpa_suspend(struct vhost_dev
*dev
)
1319 struct vhost_vdpa
*v
= dev
->opaque
;
1322 if (!vhost_vdpa_first_dev(dev
)) {
1326 if (dev
->backend_cap
& BIT_ULL(VHOST_BACKEND_F_SUSPEND
)) {
1327 trace_vhost_vdpa_suspend(dev
);
1328 r
= ioctl(v
->shared
->device_fd
, VHOST_VDPA_SUSPEND
);
1330 error_report("Cannot suspend: %s(%d)", g_strerror(errno
), errno
);
1332 v
->suspended
= true;
1337 vhost_vdpa_reset_device(dev
);
1340 static int vhost_vdpa_dev_start(struct vhost_dev
*dev
, bool started
)
1342 struct vhost_vdpa
*v
= dev
->opaque
;
1344 trace_vhost_vdpa_dev_start(dev
, started
);
1347 vhost_vdpa_host_notifiers_init(dev
);
1348 ok
= vhost_vdpa_svqs_start(dev
);
1349 if (unlikely(!ok
)) {
1353 vhost_vdpa_suspend(dev
);
1354 vhost_vdpa_svqs_stop(dev
);
1355 vhost_vdpa_host_notifiers_uninit(dev
, dev
->nvqs
);
1358 if (!vhost_vdpa_last_dev(dev
)) {
1363 if (vhost_dev_has_iommu(dev
) && (v
->shadow_vqs_enabled
)) {
1364 error_report("SVQ can not work while IOMMU enable, please disable"
1365 "IOMMU and try again");
1368 memory_listener_register(&v
->shared
->listener
, dev
->vdev
->dma_as
);
1370 return vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_DRIVER_OK
);
1376 static void vhost_vdpa_reset_status(struct vhost_dev
*dev
)
1378 struct vhost_vdpa
*v
= dev
->opaque
;
1380 if (!vhost_vdpa_last_dev(dev
)) {
1384 vhost_vdpa_reset_device(dev
);
1385 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
1386 VIRTIO_CONFIG_S_DRIVER
);
1387 memory_listener_unregister(&v
->shared
->listener
);
1390 static int vhost_vdpa_set_log_base(struct vhost_dev
*dev
, uint64_t base
,
1391 struct vhost_log
*log
)
1393 struct vhost_vdpa
*v
= dev
->opaque
;
1394 if (v
->shadow_vqs_enabled
|| !vhost_vdpa_first_dev(dev
)) {
1398 trace_vhost_vdpa_set_log_base(dev
, base
, log
->size
, log
->refcnt
, log
->fd
,
1400 return vhost_vdpa_call(dev
, VHOST_SET_LOG_BASE
, &base
);
1403 static int vhost_vdpa_set_vring_addr(struct vhost_dev
*dev
,
1404 struct vhost_vring_addr
*addr
)
1406 struct vhost_vdpa
*v
= dev
->opaque
;
1408 if (v
->shadow_vqs_enabled
) {
1410 * Device vring addr was set at device start. SVQ base is handled by
1416 return vhost_vdpa_set_vring_dev_addr(dev
, addr
);
1419 static int vhost_vdpa_set_vring_num(struct vhost_dev
*dev
,
1420 struct vhost_vring_state
*ring
)
1422 trace_vhost_vdpa_set_vring_num(dev
, ring
->index
, ring
->num
);
1423 return vhost_vdpa_call(dev
, VHOST_SET_VRING_NUM
, ring
);
1426 static int vhost_vdpa_set_vring_base(struct vhost_dev
*dev
,
1427 struct vhost_vring_state
*ring
)
1429 struct vhost_vdpa
*v
= dev
->opaque
;
1431 if (v
->shadow_vqs_enabled
) {
1433 * Device vring base was set at device start. SVQ base is handled by
1439 return vhost_vdpa_set_dev_vring_base(dev
, ring
);
1442 static int vhost_vdpa_get_vring_base(struct vhost_dev
*dev
,
1443 struct vhost_vring_state
*ring
)
1445 struct vhost_vdpa
*v
= dev
->opaque
;
1448 if (v
->shadow_vqs_enabled
) {
1449 ring
->num
= virtio_queue_get_last_avail_idx(dev
->vdev
, ring
->index
);
1450 trace_vhost_vdpa_get_vring_base(dev
, ring
->index
, ring
->num
, true);
1454 if (!v
->suspended
) {
1456 * Cannot trust in value returned by device, let vhost recover used
1462 ret
= vhost_vdpa_call(dev
, VHOST_GET_VRING_BASE
, ring
);
1463 trace_vhost_vdpa_get_vring_base(dev
, ring
->index
, ring
->num
, false);
1467 static int vhost_vdpa_set_vring_kick(struct vhost_dev
*dev
,
1468 struct vhost_vring_file
*file
)
1470 struct vhost_vdpa
*v
= dev
->opaque
;
1471 int vdpa_idx
= file
->index
- dev
->vq_index
;
1473 if (v
->shadow_vqs_enabled
) {
1474 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, vdpa_idx
);
1475 vhost_svq_set_svq_kick_fd(svq
, file
->fd
);
1478 return vhost_vdpa_set_vring_dev_kick(dev
, file
);
1482 static int vhost_vdpa_set_vring_call(struct vhost_dev
*dev
,
1483 struct vhost_vring_file
*file
)
1485 struct vhost_vdpa
*v
= dev
->opaque
;
1486 int vdpa_idx
= file
->index
- dev
->vq_index
;
1487 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, vdpa_idx
);
1489 /* Remember last call fd because we can switch to SVQ anytime. */
1490 vhost_svq_set_svq_call_fd(svq
, file
->fd
);
1492 * When SVQ is transitioning to off, shadow_vqs_enabled has
1493 * not been set back to false yet, but the underlying call fd
1494 * will have to switch back to the guest notifier to signal the
1495 * passthrough virtqueues. In other situations, SVQ's own call
1496 * fd shall be used to signal the device model.
1498 if (v
->shadow_vqs_enabled
&&
1499 v
->shared
->svq_switching
!= SVQ_TSTATE_DISABLING
) {
1503 return vhost_vdpa_set_vring_dev_call(dev
, file
);
1506 static int vhost_vdpa_get_features(struct vhost_dev
*dev
,
1509 int ret
= vhost_vdpa_get_dev_features(dev
, features
);
1512 /* Add SVQ logging capabilities */
1513 *features
|= BIT_ULL(VHOST_F_LOG_ALL
);
1519 static int vhost_vdpa_set_owner(struct vhost_dev
*dev
)
1521 if (!vhost_vdpa_first_dev(dev
)) {
1525 trace_vhost_vdpa_set_owner(dev
);
1526 return vhost_vdpa_call(dev
, VHOST_SET_OWNER
, NULL
);
1529 static int vhost_vdpa_vq_get_addr(struct vhost_dev
*dev
,
1530 struct vhost_vring_addr
*addr
, struct vhost_virtqueue
*vq
)
1532 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
1533 addr
->desc_user_addr
= (uint64_t)(unsigned long)vq
->desc_phys
;
1534 addr
->avail_user_addr
= (uint64_t)(unsigned long)vq
->avail_phys
;
1535 addr
->used_user_addr
= (uint64_t)(unsigned long)vq
->used_phys
;
1536 trace_vhost_vdpa_vq_get_addr(dev
, vq
, addr
->desc_user_addr
,
1537 addr
->avail_user_addr
, addr
->used_user_addr
);
1541 static bool vhost_vdpa_force_iommu(struct vhost_dev
*dev
)
1546 const VhostOps vdpa_ops
= {
1547 .backend_type
= VHOST_BACKEND_TYPE_VDPA
,
1548 .vhost_backend_init
= vhost_vdpa_init
,
1549 .vhost_backend_cleanup
= vhost_vdpa_cleanup
,
1550 .vhost_set_log_base
= vhost_vdpa_set_log_base
,
1551 .vhost_set_vring_addr
= vhost_vdpa_set_vring_addr
,
1552 .vhost_set_vring_num
= vhost_vdpa_set_vring_num
,
1553 .vhost_set_vring_base
= vhost_vdpa_set_vring_base
,
1554 .vhost_get_vring_base
= vhost_vdpa_get_vring_base
,
1555 .vhost_set_vring_kick
= vhost_vdpa_set_vring_kick
,
1556 .vhost_set_vring_call
= vhost_vdpa_set_vring_call
,
1557 .vhost_get_features
= vhost_vdpa_get_features
,
1558 .vhost_set_backend_cap
= vhost_vdpa_set_backend_cap
,
1559 .vhost_set_owner
= vhost_vdpa_set_owner
,
1560 .vhost_set_vring_endian
= NULL
,
1561 .vhost_backend_memslots_limit
= vhost_vdpa_memslots_limit
,
1562 .vhost_set_mem_table
= vhost_vdpa_set_mem_table
,
1563 .vhost_set_features
= vhost_vdpa_set_features
,
1564 .vhost_reset_device
= vhost_vdpa_reset_device
,
1565 .vhost_get_vq_index
= vhost_vdpa_get_vq_index
,
1566 .vhost_set_vring_enable
= vhost_vdpa_set_vring_enable
,
1567 .vhost_get_config
= vhost_vdpa_get_config
,
1568 .vhost_set_config
= vhost_vdpa_set_config
,
1569 .vhost_requires_shm_log
= NULL
,
1570 .vhost_migration_done
= NULL
,
1571 .vhost_net_set_mtu
= NULL
,
1572 .vhost_set_iotlb_callback
= NULL
,
1573 .vhost_send_device_iotlb_msg
= NULL
,
1574 .vhost_dev_start
= vhost_vdpa_dev_start
,
1575 .vhost_get_device_id
= vhost_vdpa_get_device_id
,
1576 .vhost_vq_get_addr
= vhost_vdpa_vq_get_addr
,
1577 .vhost_force_iommu
= vhost_vdpa_force_iommu
,
1578 .vhost_set_config_call
= vhost_vdpa_set_config_call
,
1579 .vhost_reset_status
= vhost_vdpa_reset_status
,