Merge tag 'qga-pull-2024-03-11-2' of https://github.com/kostyanf14/qemu into staging
[qemu/kevin.git] / hw / virtio / vhost-vdpa.c
blobddae494ca8e8154ce03b88bc781fe9f1e639aceb
1 /*
2 * vhost-vdpa
4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "exec/target_page.h"
18 #include "hw/virtio/vhost.h"
19 #include "hw/virtio/vhost-backend.h"
20 #include "hw/virtio/virtio-net.h"
21 #include "hw/virtio/vhost-shadow-virtqueue.h"
22 #include "hw/virtio/vhost-vdpa.h"
23 #include "exec/address-spaces.h"
24 #include "migration/blocker.h"
25 #include "qemu/cutils.h"
26 #include "qemu/main-loop.h"
27 #include "trace.h"
28 #include "qapi/error.h"
31 * Return one past the end of the end of section. Be careful with uint64_t
32 * conversions!
34 static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section,
35 int page_mask)
37 Int128 llend = int128_make64(section->offset_within_address_space);
38 llend = int128_add(llend, section->size);
39 llend = int128_and(llend, int128_exts64(page_mask));
41 return llend;
44 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
45 uint64_t iova_min,
46 uint64_t iova_max,
47 int page_mask)
49 Int128 llend;
51 if ((!memory_region_is_ram(section->mr) &&
52 !memory_region_is_iommu(section->mr)) ||
53 memory_region_is_protected(section->mr) ||
54 /* vhost-vDPA doesn't allow MMIO to be mapped */
55 memory_region_is_ram_device(section->mr)) {
56 return true;
59 if (section->offset_within_address_space < iova_min) {
60 error_report("RAM section out of device range (min=0x%" PRIx64
61 ", addr=0x%" HWADDR_PRIx ")",
62 iova_min, section->offset_within_address_space);
63 return true;
66 * While using vIOMMU, sometimes the section will be larger than iova_max,
67 * but the memory that actually maps is smaller, so move the check to
68 * function vhost_vdpa_iommu_map_notify(). That function will use the actual
69 * size that maps to the kernel
72 if (!memory_region_is_iommu(section->mr)) {
73 llend = vhost_vdpa_section_end(section, page_mask);
74 if (int128_gt(llend, int128_make64(iova_max))) {
75 error_report("RAM section out of device range (max=0x%" PRIx64
76 ", end addr=0x%" PRIx64 ")",
77 iova_max, int128_get64(llend));
78 return true;
82 return false;
86 * The caller must set asid = 0 if the device does not support asid.
87 * This is not an ABI break since it is set to 0 by the initializer anyway.
89 int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova,
90 hwaddr size, void *vaddr, bool readonly)
92 struct vhost_msg_v2 msg = {};
93 int fd = s->device_fd;
94 int ret = 0;
96 msg.type = VHOST_IOTLB_MSG_V2;
97 msg.asid = asid;
98 msg.iotlb.iova = iova;
99 msg.iotlb.size = size;
100 msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr;
101 msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW;
102 msg.iotlb.type = VHOST_IOTLB_UPDATE;
104 trace_vhost_vdpa_dma_map(s, fd, msg.type, msg.asid, msg.iotlb.iova,
105 msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm,
106 msg.iotlb.type);
108 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
109 error_report("failed to write, fd=%d, errno=%d (%s)",
110 fd, errno, strerror(errno));
111 return -EIO ;
114 return ret;
118 * The caller must set asid = 0 if the device does not support asid.
119 * This is not an ABI break since it is set to 0 by the initializer anyway.
121 int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova,
122 hwaddr size)
124 struct vhost_msg_v2 msg = {};
125 int fd = s->device_fd;
126 int ret = 0;
128 msg.type = VHOST_IOTLB_MSG_V2;
129 msg.asid = asid;
130 msg.iotlb.iova = iova;
131 msg.iotlb.size = size;
132 msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
134 trace_vhost_vdpa_dma_unmap(s, fd, msg.type, msg.asid, msg.iotlb.iova,
135 msg.iotlb.size, msg.iotlb.type);
137 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
138 error_report("failed to write, fd=%d, errno=%d (%s)",
139 fd, errno, strerror(errno));
140 return -EIO ;
143 return ret;
146 static void vhost_vdpa_listener_begin_batch(VhostVDPAShared *s)
148 int fd = s->device_fd;
149 struct vhost_msg_v2 msg = {
150 .type = VHOST_IOTLB_MSG_V2,
151 .iotlb.type = VHOST_IOTLB_BATCH_BEGIN,
154 trace_vhost_vdpa_listener_begin_batch(s, fd, msg.type, msg.iotlb.type);
155 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
156 error_report("failed to write, fd=%d, errno=%d (%s)",
157 fd, errno, strerror(errno));
161 static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared *s)
163 if (s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) &&
164 !s->iotlb_batch_begin_sent) {
165 vhost_vdpa_listener_begin_batch(s);
168 s->iotlb_batch_begin_sent = true;
171 static void vhost_vdpa_listener_commit(MemoryListener *listener)
173 VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener);
174 struct vhost_msg_v2 msg = {};
175 int fd = s->device_fd;
177 if (!(s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) {
178 return;
181 if (!s->iotlb_batch_begin_sent) {
182 return;
185 msg.type = VHOST_IOTLB_MSG_V2;
186 msg.iotlb.type = VHOST_IOTLB_BATCH_END;
188 trace_vhost_vdpa_listener_commit(s, fd, msg.type, msg.iotlb.type);
189 if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) {
190 error_report("failed to write, fd=%d, errno=%d (%s)",
191 fd, errno, strerror(errno));
194 s->iotlb_batch_begin_sent = false;
197 static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
199 struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
201 hwaddr iova = iotlb->iova + iommu->iommu_offset;
202 VhostVDPAShared *s = iommu->dev_shared;
203 void *vaddr;
204 int ret;
205 Int128 llend;
207 if (iotlb->target_as != &address_space_memory) {
208 error_report("Wrong target AS \"%s\", only system memory is allowed",
209 iotlb->target_as->name ? iotlb->target_as->name : "none");
210 return;
212 RCU_READ_LOCK_GUARD();
213 /* check if RAM section out of device range */
214 llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova));
215 if (int128_gt(llend, int128_make64(s->iova_range.last))) {
216 error_report("RAM section out of device range (max=0x%" PRIx64
217 ", end addr=0x%" PRIx64 ")",
218 s->iova_range.last, int128_get64(llend));
219 return;
222 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
223 bool read_only;
225 if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) {
226 return;
228 ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
229 iotlb->addr_mask + 1, vaddr, read_only);
230 if (ret) {
231 error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
232 "0x%" HWADDR_PRIx ", %p) = %d (%m)",
233 s, iova, iotlb->addr_mask + 1, vaddr, ret);
235 } else {
236 ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova,
237 iotlb->addr_mask + 1);
238 if (ret) {
239 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
240 "0x%" HWADDR_PRIx ") = %d (%m)",
241 s, iova, iotlb->addr_mask + 1, ret);
246 static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
247 MemoryRegionSection *section)
249 VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener);
251 struct vdpa_iommu *iommu;
252 Int128 end;
253 int iommu_idx;
254 IOMMUMemoryRegion *iommu_mr;
255 int ret;
257 iommu_mr = IOMMU_MEMORY_REGION(section->mr);
259 iommu = g_malloc0(sizeof(*iommu));
260 end = int128_add(int128_make64(section->offset_within_region),
261 section->size);
262 end = int128_sub(end, int128_one());
263 iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
264 MEMTXATTRS_UNSPECIFIED);
265 iommu->iommu_mr = iommu_mr;
266 iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify,
267 IOMMU_NOTIFIER_IOTLB_EVENTS,
268 section->offset_within_region,
269 int128_get64(end),
270 iommu_idx);
271 iommu->iommu_offset = section->offset_within_address_space -
272 section->offset_within_region;
273 iommu->dev_shared = s;
275 ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
276 if (ret) {
277 g_free(iommu);
278 return;
281 QLIST_INSERT_HEAD(&s->iommu_list, iommu, iommu_next);
282 memory_region_iommu_replay(iommu->iommu_mr, &iommu->n);
284 return;
287 static void vhost_vdpa_iommu_region_del(MemoryListener *listener,
288 MemoryRegionSection *section)
290 VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener);
292 struct vdpa_iommu *iommu;
294 QLIST_FOREACH(iommu, &s->iommu_list, iommu_next)
296 if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
297 iommu->n.start == section->offset_within_region) {
298 memory_region_unregister_iommu_notifier(section->mr, &iommu->n);
299 QLIST_REMOVE(iommu, iommu_next);
300 g_free(iommu);
301 break;
306 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
307 MemoryRegionSection *section)
309 DMAMap mem_region = {};
310 VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener);
311 hwaddr iova;
312 Int128 llend, llsize;
313 void *vaddr;
314 int ret;
315 int page_size = qemu_target_page_size();
316 int page_mask = -page_size;
318 if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first,
319 s->iova_range.last, page_mask)) {
320 return;
322 if (memory_region_is_iommu(section->mr)) {
323 vhost_vdpa_iommu_region_add(listener, section);
324 return;
327 if (unlikely((section->offset_within_address_space & ~page_mask) !=
328 (section->offset_within_region & ~page_mask))) {
329 trace_vhost_vdpa_listener_region_add_unaligned(s, section->mr->name,
330 section->offset_within_address_space & ~page_mask,
331 section->offset_within_region & ~page_mask);
332 return;
335 iova = ROUND_UP(section->offset_within_address_space, page_size);
336 llend = vhost_vdpa_section_end(section, page_mask);
337 if (int128_ge(int128_make64(iova), llend)) {
338 return;
341 memory_region_ref(section->mr);
343 /* Here we assume that memory_region_is_ram(section->mr)==true */
345 vaddr = memory_region_get_ram_ptr(section->mr) +
346 section->offset_within_region +
347 (iova - section->offset_within_address_space);
349 trace_vhost_vdpa_listener_region_add(s, iova, int128_get64(llend),
350 vaddr, section->readonly);
352 llsize = int128_sub(llend, int128_make64(iova));
353 if (s->shadow_data) {
354 int r;
356 mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr,
357 mem_region.size = int128_get64(llsize) - 1,
358 mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly),
360 r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region);
361 if (unlikely(r != IOVA_OK)) {
362 error_report("Can't allocate a mapping (%d)", r);
363 goto fail;
366 iova = mem_region.iova;
369 vhost_vdpa_iotlb_batch_begin_once(s);
370 ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
371 int128_get64(llsize), vaddr, section->readonly);
372 if (ret) {
373 error_report("vhost vdpa map fail!");
374 goto fail_map;
377 return;
379 fail_map:
380 if (s->shadow_data) {
381 vhost_iova_tree_remove(s->iova_tree, mem_region);
384 fail:
386 * On the initfn path, store the first error in the container so we
387 * can gracefully fail. Runtime, there's not much we can do other
388 * than throw a hardware error.
390 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
391 return;
395 static void vhost_vdpa_listener_region_del(MemoryListener *listener,
396 MemoryRegionSection *section)
398 VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener);
399 hwaddr iova;
400 Int128 llend, llsize;
401 int ret;
402 int page_size = qemu_target_page_size();
403 int page_mask = -page_size;
405 if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first,
406 s->iova_range.last, page_mask)) {
407 return;
409 if (memory_region_is_iommu(section->mr)) {
410 vhost_vdpa_iommu_region_del(listener, section);
413 if (unlikely((section->offset_within_address_space & ~page_mask) !=
414 (section->offset_within_region & ~page_mask))) {
415 trace_vhost_vdpa_listener_region_del_unaligned(s, section->mr->name,
416 section->offset_within_address_space & ~page_mask,
417 section->offset_within_region & ~page_mask);
418 return;
421 iova = ROUND_UP(section->offset_within_address_space, page_size);
422 llend = vhost_vdpa_section_end(section, page_mask);
424 trace_vhost_vdpa_listener_region_del(s, iova,
425 int128_get64(int128_sub(llend, int128_one())));
427 if (int128_ge(int128_make64(iova), llend)) {
428 return;
431 llsize = int128_sub(llend, int128_make64(iova));
433 if (s->shadow_data) {
434 const DMAMap *result;
435 const void *vaddr = memory_region_get_ram_ptr(section->mr) +
436 section->offset_within_region +
437 (iova - section->offset_within_address_space);
438 DMAMap mem_region = {
439 .translated_addr = (hwaddr)(uintptr_t)vaddr,
440 .size = int128_get64(llsize) - 1,
443 result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region);
444 if (!result) {
445 /* The memory listener map wasn't mapped */
446 return;
448 iova = result->iova;
449 vhost_iova_tree_remove(s->iova_tree, *result);
451 vhost_vdpa_iotlb_batch_begin_once(s);
453 * The unmap ioctl doesn't accept a full 64-bit. need to check it
455 if (int128_eq(llsize, int128_2_64())) {
456 llsize = int128_rshift(llsize, 1);
457 ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova,
458 int128_get64(llsize));
460 if (ret) {
461 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
462 "0x%" HWADDR_PRIx ") = %d (%m)",
463 s, iova, int128_get64(llsize), ret);
465 iova += int128_get64(llsize);
467 ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova,
468 int128_get64(llsize));
470 if (ret) {
471 error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
472 "0x%" HWADDR_PRIx ") = %d (%m)",
473 s, iova, int128_get64(llsize), ret);
476 memory_region_unref(section->mr);
479 * IOTLB API is used by vhost-vdpa which requires incremental updating
480 * of the mapping. So we can not use generic vhost memory listener which
481 * depends on the addnop().
483 static const MemoryListener vhost_vdpa_memory_listener = {
484 .name = "vhost-vdpa",
485 .commit = vhost_vdpa_listener_commit,
486 .region_add = vhost_vdpa_listener_region_add,
487 .region_del = vhost_vdpa_listener_region_del,
490 static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request,
491 void *arg)
493 struct vhost_vdpa *v = dev->opaque;
494 int fd = v->shared->device_fd;
495 int ret;
497 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
499 ret = ioctl(fd, request, arg);
500 return ret < 0 ? -errno : ret;
503 static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status)
505 uint8_t s;
506 int ret;
508 trace_vhost_vdpa_add_status(dev, status);
509 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
510 if (ret < 0) {
511 return ret;
513 if ((s & status) == status) {
514 /* Don't set bits already set */
515 return 0;
518 s |= status;
520 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s);
521 if (ret < 0) {
522 return ret;
525 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s);
526 if (ret < 0) {
527 return ret;
530 if (!(s & status)) {
531 return -EIO;
534 return 0;
537 int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range)
539 int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
541 return ret < 0 ? -errno : 0;
545 * The use of this function is for requests that only need to be
546 * applied once. Typically such request occurs at the beginning
547 * of operation, and before setting up queues. It should not be
548 * used for request that performs operation until all queues are
549 * set, which would need to check dev->vq_index_end instead.
551 static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
553 struct vhost_vdpa *v = dev->opaque;
555 return v->index == 0;
558 static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
559 uint64_t *features)
561 int ret;
563 ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
564 trace_vhost_vdpa_get_features(dev, *features);
565 return ret;
568 static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
570 g_autoptr(GPtrArray) shadow_vqs = NULL;
572 shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
573 for (unsigned n = 0; n < hdev->nvqs; ++n) {
574 VhostShadowVirtqueue *svq;
576 svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque);
577 g_ptr_array_add(shadow_vqs, svq);
580 v->shadow_vqs = g_steal_pointer(&shadow_vqs);
583 static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
585 struct vhost_vdpa *v = opaque;
586 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
587 trace_vhost_vdpa_init(dev, v->shared, opaque);
588 int ret;
590 v->dev = dev;
591 dev->opaque = opaque ;
592 v->shared->listener = vhost_vdpa_memory_listener;
593 vhost_vdpa_init_svq(dev, v);
595 error_propagate(&dev->migration_blocker, v->migration_blocker);
596 if (!vhost_vdpa_first_dev(dev)) {
597 return 0;
601 * If dev->shadow_vqs_enabled at initialization that means the device has
602 * been started with x-svq=on, so don't block migration
604 if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) {
605 /* We don't have dev->features yet */
606 uint64_t features;
607 ret = vhost_vdpa_get_dev_features(dev, &features);
608 if (unlikely(ret)) {
609 error_setg_errno(errp, -ret, "Could not get device features");
610 return ret;
612 vhost_svq_valid_features(features, &dev->migration_blocker);
616 * Similar to VFIO, we end up pinning all guest memory and have to
617 * disable discarding of RAM.
619 ret = ram_block_discard_disable(true);
620 if (ret) {
621 error_report("Cannot set discarding of RAM broken");
622 return ret;
625 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
626 VIRTIO_CONFIG_S_DRIVER);
628 return 0;
631 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
632 int queue_index)
634 size_t page_size = qemu_real_host_page_size();
635 struct vhost_vdpa *v = dev->opaque;
636 VirtIODevice *vdev = dev->vdev;
637 VhostVDPAHostNotifier *n;
639 n = &v->notifier[queue_index];
641 if (n->addr) {
642 virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false);
643 object_unparent(OBJECT(&n->mr));
644 munmap(n->addr, page_size);
645 n->addr = NULL;
649 static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index)
651 size_t page_size = qemu_real_host_page_size();
652 struct vhost_vdpa *v = dev->opaque;
653 VirtIODevice *vdev = dev->vdev;
654 VhostVDPAHostNotifier *n;
655 int fd = v->shared->device_fd;
656 void *addr;
657 char *name;
659 vhost_vdpa_host_notifier_uninit(dev, queue_index);
661 n = &v->notifier[queue_index];
663 addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
664 queue_index * page_size);
665 if (addr == MAP_FAILED) {
666 goto err;
669 name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
670 v, queue_index);
671 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
672 page_size, addr);
673 g_free(name);
675 if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) {
676 object_unparent(OBJECT(&n->mr));
677 munmap(addr, page_size);
678 goto err;
680 n->addr = addr;
682 return 0;
684 err:
685 return -1;
688 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
690 int i;
693 * Pack all the changes to the memory regions in a single
694 * transaction to avoid a few updating of the address space
695 * topology.
697 memory_region_transaction_begin();
699 for (i = dev->vq_index; i < dev->vq_index + n; i++) {
700 vhost_vdpa_host_notifier_uninit(dev, i);
703 memory_region_transaction_commit();
706 static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
708 struct vhost_vdpa *v = dev->opaque;
709 int i;
711 if (v->shadow_vqs_enabled) {
712 /* FIXME SVQ is not compatible with host notifiers mr */
713 return;
717 * Pack all the changes to the memory regions in a single
718 * transaction to avoid a few updating of the address space
719 * topology.
721 memory_region_transaction_begin();
723 for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
724 if (vhost_vdpa_host_notifier_init(dev, i)) {
725 vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index);
726 break;
730 memory_region_transaction_commit();
733 static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
735 struct vhost_vdpa *v = dev->opaque;
736 size_t idx;
738 for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
739 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
741 g_ptr_array_free(v->shadow_vqs, true);
744 static int vhost_vdpa_cleanup(struct vhost_dev *dev)
746 struct vhost_vdpa *v;
747 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
748 v = dev->opaque;
749 trace_vhost_vdpa_cleanup(dev, v);
750 if (vhost_vdpa_first_dev(dev)) {
751 ram_block_discard_disable(false);
752 memory_listener_unregister(&v->shared->listener);
755 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
756 vhost_vdpa_svq_cleanup(dev);
758 dev->opaque = NULL;
760 return 0;
763 static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
765 trace_vhost_vdpa_memslots_limit(dev, INT_MAX);
766 return INT_MAX;
769 static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
770 struct vhost_memory *mem)
772 if (!vhost_vdpa_first_dev(dev)) {
773 return 0;
776 trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding);
777 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) &&
778 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) {
779 int i;
780 for (i = 0; i < mem->nregions; i++) {
781 trace_vhost_vdpa_dump_regions(dev, i,
782 mem->regions[i].guest_phys_addr,
783 mem->regions[i].memory_size,
784 mem->regions[i].userspace_addr,
785 mem->regions[i].flags_padding);
788 if (mem->padding) {
789 return -EINVAL;
792 return 0;
795 static int vhost_vdpa_set_features(struct vhost_dev *dev,
796 uint64_t features)
798 struct vhost_vdpa *v = dev->opaque;
799 int ret;
801 if (!vhost_vdpa_first_dev(dev)) {
802 return 0;
805 if (v->shadow_vqs_enabled) {
806 if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
808 * QEMU is just trying to enable or disable logging. SVQ handles
809 * this sepparately, so no need to forward this.
811 v->acked_features = features;
812 return 0;
815 v->acked_features = features;
817 /* We must not ack _F_LOG if SVQ is enabled */
818 features &= ~BIT_ULL(VHOST_F_LOG_ALL);
821 trace_vhost_vdpa_set_features(dev, features);
822 ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
823 if (ret) {
824 return ret;
827 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
830 static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
832 struct vhost_vdpa *v = dev->opaque;
834 uint64_t features;
835 uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
836 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
837 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
838 0x1ULL << VHOST_BACKEND_F_SUSPEND;
839 int r;
841 if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
842 return -EFAULT;
845 features &= f;
847 if (vhost_vdpa_first_dev(dev)) {
848 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
849 if (r) {
850 return -EFAULT;
854 dev->backend_cap = features;
855 v->shared->backend_cap = features;
857 return 0;
860 static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
861 uint32_t *device_id)
863 int ret;
864 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id);
865 trace_vhost_vdpa_get_device_id(dev, *device_id);
866 return ret;
869 static int vhost_vdpa_reset_device(struct vhost_dev *dev)
871 struct vhost_vdpa *v = dev->opaque;
872 int ret;
873 uint8_t status = 0;
875 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
876 trace_vhost_vdpa_reset_device(dev);
877 v->suspended = false;
878 return ret;
881 static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
883 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
885 trace_vhost_vdpa_get_vq_index(dev, idx, idx);
886 return idx;
889 int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
891 struct vhost_dev *dev = v->dev;
892 struct vhost_vring_state state = {
893 .index = idx,
894 .num = 1,
896 int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
898 trace_vhost_vdpa_set_vring_ready(dev, idx, r);
899 return r;
902 static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
903 int fd)
905 trace_vhost_vdpa_set_config_call(dev, fd);
906 return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
909 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
910 uint32_t config_len)
912 int b, len;
913 char line[QEMU_HEXDUMP_LINE_LEN];
915 for (b = 0; b < config_len; b += 16) {
916 len = config_len - b;
917 qemu_hexdump_line(line, b, config, len, false);
918 trace_vhost_vdpa_dump_config(dev, line);
922 static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data,
923 uint32_t offset, uint32_t size,
924 uint32_t flags)
926 struct vhost_vdpa_config *config;
927 int ret;
928 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
930 trace_vhost_vdpa_set_config(dev, offset, size, flags);
931 config = g_malloc(size + config_size);
932 config->off = offset;
933 config->len = size;
934 memcpy(config->buf, data, size);
935 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) &&
936 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
937 vhost_vdpa_dump_config(dev, data, size);
939 ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config);
940 g_free(config);
941 return ret;
944 static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
945 uint32_t config_len, Error **errp)
947 struct vhost_vdpa_config *v_config;
948 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
949 int ret;
951 trace_vhost_vdpa_get_config(dev, config, config_len);
952 v_config = g_malloc(config_len + config_size);
953 v_config->len = config_len;
954 v_config->off = 0;
955 ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config);
956 memcpy(config, v_config->buf, config_len);
957 g_free(v_config);
958 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) &&
959 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) {
960 vhost_vdpa_dump_config(dev, config, config_len);
962 return ret;
965 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
966 struct vhost_vring_state *ring)
968 trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
969 return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
972 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
973 struct vhost_vring_file *file)
975 trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
976 return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
979 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
980 struct vhost_vring_file *file)
982 trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
983 return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
986 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
987 struct vhost_vring_addr *addr)
989 trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
990 addr->desc_user_addr, addr->used_user_addr,
991 addr->avail_user_addr,
992 addr->log_guest_addr);
994 return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
999 * Set the shadow virtqueue descriptors to the device
1001 * @dev: The vhost device model
1002 * @svq: The shadow virtqueue
1003 * @idx: The index of the virtqueue in the vhost device
1004 * @errp: Error
1006 * Note that this function does not rewind kick file descriptor if cannot set
1007 * call one.
1009 static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
1010 VhostShadowVirtqueue *svq, unsigned idx,
1011 Error **errp)
1013 struct vhost_vring_file file = {
1014 .index = dev->vq_index + idx,
1016 const EventNotifier *event_notifier = &svq->hdev_kick;
1017 int r;
1019 r = event_notifier_init(&svq->hdev_kick, 0);
1020 if (r != 0) {
1021 error_setg_errno(errp, -r, "Couldn't create kick event notifier");
1022 goto err_init_hdev_kick;
1025 r = event_notifier_init(&svq->hdev_call, 0);
1026 if (r != 0) {
1027 error_setg_errno(errp, -r, "Couldn't create call event notifier");
1028 goto err_init_hdev_call;
1031 file.fd = event_notifier_get_fd(event_notifier);
1032 r = vhost_vdpa_set_vring_dev_kick(dev, &file);
1033 if (unlikely(r != 0)) {
1034 error_setg_errno(errp, -r, "Can't set device kick fd");
1035 goto err_init_set_dev_fd;
1038 event_notifier = &svq->hdev_call;
1039 file.fd = event_notifier_get_fd(event_notifier);
1040 r = vhost_vdpa_set_vring_dev_call(dev, &file);
1041 if (unlikely(r != 0)) {
1042 error_setg_errno(errp, -r, "Can't set device call fd");
1043 goto err_init_set_dev_fd;
1046 return 0;
1048 err_init_set_dev_fd:
1049 event_notifier_set_handler(&svq->hdev_call, NULL);
1051 err_init_hdev_call:
1052 event_notifier_cleanup(&svq->hdev_kick);
1054 err_init_hdev_kick:
1055 return r;
1059 * Unmap a SVQ area in the device
1061 static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr)
1063 const DMAMap needle = {
1064 .translated_addr = addr,
1066 const DMAMap *result = vhost_iova_tree_find_iova(v->shared->iova_tree,
1067 &needle);
1068 hwaddr size;
1069 int r;
1071 if (unlikely(!result)) {
1072 error_report("Unable to find SVQ address to unmap");
1073 return;
1076 size = ROUND_UP(result->size, qemu_real_host_page_size());
1077 r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result->iova,
1078 size);
1079 if (unlikely(r < 0)) {
1080 error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r);
1081 return;
1084 vhost_iova_tree_remove(v->shared->iova_tree, *result);
1087 static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
1088 const VhostShadowVirtqueue *svq)
1090 struct vhost_vdpa *v = dev->opaque;
1091 struct vhost_vring_addr svq_addr;
1093 vhost_svq_get_vring_addr(svq, &svq_addr);
1095 vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr);
1097 vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr);
1101 * Map the SVQ area in the device
1103 * @v: Vhost-vdpa device
1104 * @needle: The area to search iova
1105 * @errorp: Error pointer
1107 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
1108 Error **errp)
1110 int r;
1112 r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle);
1113 if (unlikely(r != IOVA_OK)) {
1114 error_setg(errp, "Cannot allocate iova (%d)", r);
1115 return false;
1118 r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova,
1119 needle->size + 1,
1120 (void *)(uintptr_t)needle->translated_addr,
1121 needle->perm == IOMMU_RO);
1122 if (unlikely(r != 0)) {
1123 error_setg_errno(errp, -r, "Cannot map region to device");
1124 vhost_iova_tree_remove(v->shared->iova_tree, *needle);
1127 return r == 0;
1131 * Map the shadow virtqueue rings in the device
1133 * @dev: The vhost device
1134 * @svq: The shadow virtqueue
1135 * @addr: Assigned IOVA addresses
1136 * @errp: Error pointer
1138 static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
1139 const VhostShadowVirtqueue *svq,
1140 struct vhost_vring_addr *addr,
1141 Error **errp)
1143 ERRP_GUARD();
1144 DMAMap device_region, driver_region;
1145 struct vhost_vring_addr svq_addr;
1146 struct vhost_vdpa *v = dev->opaque;
1147 size_t device_size = vhost_svq_device_area_size(svq);
1148 size_t driver_size = vhost_svq_driver_area_size(svq);
1149 size_t avail_offset;
1150 bool ok;
1152 vhost_svq_get_vring_addr(svq, &svq_addr);
1154 driver_region = (DMAMap) {
1155 .translated_addr = svq_addr.desc_user_addr,
1156 .size = driver_size - 1,
1157 .perm = IOMMU_RO,
1159 ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
1160 if (unlikely(!ok)) {
1161 error_prepend(errp, "Cannot create vq driver region: ");
1162 return false;
1164 addr->desc_user_addr = driver_region.iova;
1165 avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
1166 addr->avail_user_addr = driver_region.iova + avail_offset;
1168 device_region = (DMAMap) {
1169 .translated_addr = svq_addr.used_user_addr,
1170 .size = device_size - 1,
1171 .perm = IOMMU_RW,
1173 ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
1174 if (unlikely(!ok)) {
1175 error_prepend(errp, "Cannot create vq device region: ");
1176 vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr);
1178 addr->used_user_addr = device_region.iova;
1180 return ok;
1183 static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
1184 VhostShadowVirtqueue *svq, unsigned idx,
1185 Error **errp)
1187 uint16_t vq_index = dev->vq_index + idx;
1188 struct vhost_vring_state s = {
1189 .index = vq_index,
1191 int r;
1193 r = vhost_vdpa_set_dev_vring_base(dev, &s);
1194 if (unlikely(r)) {
1195 error_setg_errno(errp, -r, "Cannot set vring base");
1196 return false;
1199 r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
1200 return r == 0;
1203 static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
1205 struct vhost_vdpa *v = dev->opaque;
1206 Error *err = NULL;
1207 unsigned i;
1209 if (!v->shadow_vqs_enabled) {
1210 return true;
1213 for (i = 0; i < v->shadow_vqs->len; ++i) {
1214 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
1215 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
1216 struct vhost_vring_addr addr = {
1217 .index = dev->vq_index + i,
1219 int r;
1220 bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
1221 if (unlikely(!ok)) {
1222 goto err;
1225 vhost_svq_start(svq, dev->vdev, vq, v->shared->iova_tree);
1226 ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
1227 if (unlikely(!ok)) {
1228 goto err_map;
1231 /* Override vring GPA set by vhost subsystem */
1232 r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
1233 if (unlikely(r != 0)) {
1234 error_setg_errno(&err, -r, "Cannot set device address");
1235 goto err_set_addr;
1239 return true;
1241 err_set_addr:
1242 vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
1244 err_map:
1245 vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
1247 err:
1248 error_reportf_err(err, "Cannot setup SVQ %u: ", i);
1249 for (unsigned j = 0; j < i; ++j) {
1250 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
1251 vhost_vdpa_svq_unmap_rings(dev, svq);
1252 vhost_svq_stop(svq);
1255 return false;
1258 static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
1260 struct vhost_vdpa *v = dev->opaque;
1262 if (!v->shadow_vqs_enabled) {
1263 return;
1266 for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
1267 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
1269 vhost_svq_stop(svq);
1270 vhost_vdpa_svq_unmap_rings(dev, svq);
1272 event_notifier_cleanup(&svq->hdev_kick);
1273 event_notifier_cleanup(&svq->hdev_call);
1277 static void vhost_vdpa_suspend(struct vhost_dev *dev)
1279 struct vhost_vdpa *v = dev->opaque;
1280 int r;
1282 if (!vhost_vdpa_first_dev(dev)) {
1283 return;
1286 if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) {
1287 trace_vhost_vdpa_suspend(dev);
1288 r = ioctl(v->shared->device_fd, VHOST_VDPA_SUSPEND);
1289 if (unlikely(r)) {
1290 error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno);
1291 } else {
1292 v->suspended = true;
1293 return;
1297 vhost_vdpa_reset_device(dev);
1300 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
1302 struct vhost_vdpa *v = dev->opaque;
1303 bool ok;
1304 trace_vhost_vdpa_dev_start(dev, started);
1306 if (started) {
1307 vhost_vdpa_host_notifiers_init(dev);
1308 ok = vhost_vdpa_svqs_start(dev);
1309 if (unlikely(!ok)) {
1310 return -1;
1312 } else {
1313 vhost_vdpa_suspend(dev);
1314 vhost_vdpa_svqs_stop(dev);
1315 vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
1318 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
1319 return 0;
1322 if (started) {
1323 if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) {
1324 error_report("SVQ can not work while IOMMU enable, please disable"
1325 "IOMMU and try again");
1326 return -1;
1328 memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
1330 return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
1333 return 0;
1336 static void vhost_vdpa_reset_status(struct vhost_dev *dev)
1338 struct vhost_vdpa *v = dev->opaque;
1340 if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
1341 return;
1344 vhost_vdpa_reset_device(dev);
1345 vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
1346 VIRTIO_CONFIG_S_DRIVER);
1347 memory_listener_unregister(&v->shared->listener);
1350 static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
1351 struct vhost_log *log)
1353 struct vhost_vdpa *v = dev->opaque;
1354 if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) {
1355 return 0;
1358 trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd,
1359 log->log);
1360 return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base);
1363 static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
1364 struct vhost_vring_addr *addr)
1366 struct vhost_vdpa *v = dev->opaque;
1368 if (v->shadow_vqs_enabled) {
1370 * Device vring addr was set at device start. SVQ base is handled by
1371 * VirtQueue code.
1373 return 0;
1376 return vhost_vdpa_set_vring_dev_addr(dev, addr);
1379 static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
1380 struct vhost_vring_state *ring)
1382 trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num);
1383 return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring);
1386 static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
1387 struct vhost_vring_state *ring)
1389 struct vhost_vdpa *v = dev->opaque;
1391 if (v->shadow_vqs_enabled) {
1393 * Device vring base was set at device start. SVQ base is handled by
1394 * VirtQueue code.
1396 return 0;
1399 return vhost_vdpa_set_dev_vring_base(dev, ring);
1402 static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
1403 struct vhost_vring_state *ring)
1405 struct vhost_vdpa *v = dev->opaque;
1406 int ret;
1408 if (v->shadow_vqs_enabled) {
1409 ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
1410 return 0;
1413 if (!v->suspended) {
1415 * Cannot trust in value returned by device, let vhost recover used
1416 * idx from guest.
1418 return -1;
1421 ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
1422 trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
1423 return ret;
1426 static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
1427 struct vhost_vring_file *file)
1429 struct vhost_vdpa *v = dev->opaque;
1430 int vdpa_idx = file->index - dev->vq_index;
1432 if (v->shadow_vqs_enabled) {
1433 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
1434 vhost_svq_set_svq_kick_fd(svq, file->fd);
1435 return 0;
1436 } else {
1437 return vhost_vdpa_set_vring_dev_kick(dev, file);
1441 static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
1442 struct vhost_vring_file *file)
1444 struct vhost_vdpa *v = dev->opaque;
1445 int vdpa_idx = file->index - dev->vq_index;
1446 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
1448 /* Remember last call fd because we can switch to SVQ anytime. */
1449 vhost_svq_set_svq_call_fd(svq, file->fd);
1450 if (v->shadow_vqs_enabled) {
1451 return 0;
1454 return vhost_vdpa_set_vring_dev_call(dev, file);
1457 static int vhost_vdpa_get_features(struct vhost_dev *dev,
1458 uint64_t *features)
1460 int ret = vhost_vdpa_get_dev_features(dev, features);
1462 if (ret == 0) {
1463 /* Add SVQ logging capabilities */
1464 *features |= BIT_ULL(VHOST_F_LOG_ALL);
1467 return ret;
1470 static int vhost_vdpa_set_owner(struct vhost_dev *dev)
1472 if (!vhost_vdpa_first_dev(dev)) {
1473 return 0;
1476 trace_vhost_vdpa_set_owner(dev);
1477 return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
1480 static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
1481 struct vhost_vring_addr *addr, struct vhost_virtqueue *vq)
1483 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
1484 addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys;
1485 addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys;
1486 addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys;
1487 trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr,
1488 addr->avail_user_addr, addr->used_user_addr);
1489 return 0;
1492 static bool vhost_vdpa_force_iommu(struct vhost_dev *dev)
1494 return true;
1497 const VhostOps vdpa_ops = {
1498 .backend_type = VHOST_BACKEND_TYPE_VDPA,
1499 .vhost_backend_init = vhost_vdpa_init,
1500 .vhost_backend_cleanup = vhost_vdpa_cleanup,
1501 .vhost_set_log_base = vhost_vdpa_set_log_base,
1502 .vhost_set_vring_addr = vhost_vdpa_set_vring_addr,
1503 .vhost_set_vring_num = vhost_vdpa_set_vring_num,
1504 .vhost_set_vring_base = vhost_vdpa_set_vring_base,
1505 .vhost_get_vring_base = vhost_vdpa_get_vring_base,
1506 .vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
1507 .vhost_set_vring_call = vhost_vdpa_set_vring_call,
1508 .vhost_get_features = vhost_vdpa_get_features,
1509 .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
1510 .vhost_set_owner = vhost_vdpa_set_owner,
1511 .vhost_set_vring_endian = NULL,
1512 .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
1513 .vhost_set_mem_table = vhost_vdpa_set_mem_table,
1514 .vhost_set_features = vhost_vdpa_set_features,
1515 .vhost_reset_device = vhost_vdpa_reset_device,
1516 .vhost_get_vq_index = vhost_vdpa_get_vq_index,
1517 .vhost_get_config = vhost_vdpa_get_config,
1518 .vhost_set_config = vhost_vdpa_set_config,
1519 .vhost_requires_shm_log = NULL,
1520 .vhost_migration_done = NULL,
1521 .vhost_net_set_mtu = NULL,
1522 .vhost_set_iotlb_callback = NULL,
1523 .vhost_send_device_iotlb_msg = NULL,
1524 .vhost_dev_start = vhost_vdpa_dev_start,
1525 .vhost_get_device_id = vhost_vdpa_get_device_id,
1526 .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
1527 .vhost_force_iommu = vhost_vdpa_force_iommu,
1528 .vhost_set_config_call = vhost_vdpa_set_config_call,
1529 .vhost_reset_status = vhost_vdpa_reset_status,