4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-shadow-virtqueue.h"
21 #include "hw/virtio/vhost-vdpa.h"
22 #include "exec/address-spaces.h"
23 #include "migration/blocker.h"
24 #include "qemu/cutils.h"
25 #include "qemu/main-loop.h"
28 #include "qapi/error.h"
31 * Return one past the end of the end of section. Be careful with uint64_t
34 static Int128
vhost_vdpa_section_end(const MemoryRegionSection
*section
)
36 Int128 llend
= int128_make64(section
->offset_within_address_space
);
37 llend
= int128_add(llend
, section
->size
);
38 llend
= int128_and(llend
, int128_exts64(TARGET_PAGE_MASK
));
43 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
*section
,
49 if ((!memory_region_is_ram(section
->mr
) &&
50 !memory_region_is_iommu(section
->mr
)) ||
51 memory_region_is_protected(section
->mr
) ||
52 /* vhost-vDPA doesn't allow MMIO to be mapped */
53 memory_region_is_ram_device(section
->mr
)) {
57 if (section
->offset_within_address_space
< iova_min
) {
58 error_report("RAM section out of device range (min=0x%" PRIx64
59 ", addr=0x%" HWADDR_PRIx
")",
60 iova_min
, section
->offset_within_address_space
);
64 llend
= vhost_vdpa_section_end(section
);
65 if (int128_gt(llend
, int128_make64(iova_max
))) {
66 error_report("RAM section out of device range (max=0x%" PRIx64
67 ", end addr=0x%" PRIx64
")",
68 iova_max
, int128_get64(llend
));
76 * The caller must set asid = 0 if the device does not support asid.
77 * This is not an ABI break since it is set to 0 by the initializer anyway.
79 int vhost_vdpa_dma_map(struct vhost_vdpa
*v
, uint32_t asid
, hwaddr iova
,
80 hwaddr size
, void *vaddr
, bool readonly
)
82 struct vhost_msg_v2 msg
= {};
83 int fd
= v
->device_fd
;
86 msg
.type
= v
->msg_type
;
88 msg
.iotlb
.iova
= iova
;
89 msg
.iotlb
.size
= size
;
90 msg
.iotlb
.uaddr
= (uint64_t)(uintptr_t)vaddr
;
91 msg
.iotlb
.perm
= readonly
? VHOST_ACCESS_RO
: VHOST_ACCESS_RW
;
92 msg
.iotlb
.type
= VHOST_IOTLB_UPDATE
;
94 trace_vhost_vdpa_dma_map(v
, fd
, msg
.type
, msg
.asid
, msg
.iotlb
.iova
,
95 msg
.iotlb
.size
, msg
.iotlb
.uaddr
, msg
.iotlb
.perm
,
98 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
99 error_report("failed to write, fd=%d, errno=%d (%s)",
100 fd
, errno
, strerror(errno
));
108 * The caller must set asid = 0 if the device does not support asid.
109 * This is not an ABI break since it is set to 0 by the initializer anyway.
111 int vhost_vdpa_dma_unmap(struct vhost_vdpa
*v
, uint32_t asid
, hwaddr iova
,
114 struct vhost_msg_v2 msg
= {};
115 int fd
= v
->device_fd
;
118 msg
.type
= v
->msg_type
;
120 msg
.iotlb
.iova
= iova
;
121 msg
.iotlb
.size
= size
;
122 msg
.iotlb
.type
= VHOST_IOTLB_INVALIDATE
;
124 trace_vhost_vdpa_dma_unmap(v
, fd
, msg
.type
, msg
.asid
, msg
.iotlb
.iova
,
125 msg
.iotlb
.size
, msg
.iotlb
.type
);
127 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
128 error_report("failed to write, fd=%d, errno=%d (%s)",
129 fd
, errno
, strerror(errno
));
136 static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa
*v
)
138 int fd
= v
->device_fd
;
139 struct vhost_msg_v2 msg
= {
141 .iotlb
.type
= VHOST_IOTLB_BATCH_BEGIN
,
144 trace_vhost_vdpa_listener_begin_batch(v
, fd
, msg
.type
, msg
.iotlb
.type
);
145 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
146 error_report("failed to write, fd=%d, errno=%d (%s)",
147 fd
, errno
, strerror(errno
));
151 static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa
*v
)
153 if (v
->dev
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
) &&
154 !v
->iotlb_batch_begin_sent
) {
155 vhost_vdpa_listener_begin_batch(v
);
158 v
->iotlb_batch_begin_sent
= true;
161 static void vhost_vdpa_listener_commit(MemoryListener
*listener
)
163 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
164 struct vhost_dev
*dev
= v
->dev
;
165 struct vhost_msg_v2 msg
= {};
166 int fd
= v
->device_fd
;
168 if (!(dev
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
))) {
172 if (!v
->iotlb_batch_begin_sent
) {
176 msg
.type
= v
->msg_type
;
177 msg
.iotlb
.type
= VHOST_IOTLB_BATCH_END
;
179 trace_vhost_vdpa_listener_commit(v
, fd
, msg
.type
, msg
.iotlb
.type
);
180 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
181 error_report("failed to write, fd=%d, errno=%d (%s)",
182 fd
, errno
, strerror(errno
));
185 v
->iotlb_batch_begin_sent
= false;
188 static void vhost_vdpa_listener_region_add(MemoryListener
*listener
,
189 MemoryRegionSection
*section
)
191 DMAMap mem_region
= {};
192 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
194 Int128 llend
, llsize
;
198 if (vhost_vdpa_listener_skipped_section(section
, v
->iova_range
.first
,
199 v
->iova_range
.last
)) {
203 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
204 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
205 error_report("%s received unaligned region", __func__
);
209 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
210 llend
= vhost_vdpa_section_end(section
);
211 if (int128_ge(int128_make64(iova
), llend
)) {
215 memory_region_ref(section
->mr
);
217 /* Here we assume that memory_region_is_ram(section->mr)==true */
219 vaddr
= memory_region_get_ram_ptr(section
->mr
) +
220 section
->offset_within_region
+
221 (iova
- section
->offset_within_address_space
);
223 trace_vhost_vdpa_listener_region_add(v
, iova
, int128_get64(llend
),
224 vaddr
, section
->readonly
);
226 llsize
= int128_sub(llend
, int128_make64(iova
));
227 if (v
->shadow_vqs_enabled
) {
230 mem_region
.translated_addr
= (hwaddr
)(uintptr_t)vaddr
,
231 mem_region
.size
= int128_get64(llsize
) - 1,
232 mem_region
.perm
= IOMMU_ACCESS_FLAG(true, section
->readonly
),
234 r
= vhost_iova_tree_map_alloc(v
->iova_tree
, &mem_region
);
235 if (unlikely(r
!= IOVA_OK
)) {
236 error_report("Can't allocate a mapping (%d)", r
);
240 iova
= mem_region
.iova
;
243 vhost_vdpa_iotlb_batch_begin_once(v
);
244 ret
= vhost_vdpa_dma_map(v
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
245 int128_get64(llsize
), vaddr
, section
->readonly
);
247 error_report("vhost vdpa map fail!");
254 if (v
->shadow_vqs_enabled
) {
255 vhost_iova_tree_remove(v
->iova_tree
, mem_region
);
260 * On the initfn path, store the first error in the container so we
261 * can gracefully fail. Runtime, there's not much we can do other
262 * than throw a hardware error.
264 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
269 static void vhost_vdpa_listener_region_del(MemoryListener
*listener
,
270 MemoryRegionSection
*section
)
272 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
274 Int128 llend
, llsize
;
277 if (vhost_vdpa_listener_skipped_section(section
, v
->iova_range
.first
,
278 v
->iova_range
.last
)) {
282 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
283 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
284 error_report("%s received unaligned region", __func__
);
288 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
289 llend
= vhost_vdpa_section_end(section
);
291 trace_vhost_vdpa_listener_region_del(v
, iova
, int128_get64(llend
));
293 if (int128_ge(int128_make64(iova
), llend
)) {
297 llsize
= int128_sub(llend
, int128_make64(iova
));
299 if (v
->shadow_vqs_enabled
) {
300 const DMAMap
*result
;
301 const void *vaddr
= memory_region_get_ram_ptr(section
->mr
) +
302 section
->offset_within_region
+
303 (iova
- section
->offset_within_address_space
);
304 DMAMap mem_region
= {
305 .translated_addr
= (hwaddr
)(uintptr_t)vaddr
,
306 .size
= int128_get64(llsize
) - 1,
309 result
= vhost_iova_tree_find_iova(v
->iova_tree
, &mem_region
);
311 /* The memory listener map wasn't mapped */
315 vhost_iova_tree_remove(v
->iova_tree
, *result
);
317 vhost_vdpa_iotlb_batch_begin_once(v
);
318 ret
= vhost_vdpa_dma_unmap(v
, VHOST_VDPA_GUEST_PA_ASID
, iova
,
319 int128_get64(llsize
));
321 error_report("vhost_vdpa dma unmap error!");
324 memory_region_unref(section
->mr
);
327 * IOTLB API is used by vhost-vdpa which requires incremental updating
328 * of the mapping. So we can not use generic vhost memory listener which
329 * depends on the addnop().
331 static const MemoryListener vhost_vdpa_memory_listener
= {
332 .name
= "vhost-vdpa",
333 .commit
= vhost_vdpa_listener_commit
,
334 .region_add
= vhost_vdpa_listener_region_add
,
335 .region_del
= vhost_vdpa_listener_region_del
,
338 static int vhost_vdpa_call(struct vhost_dev
*dev
, unsigned long int request
,
341 struct vhost_vdpa
*v
= dev
->opaque
;
342 int fd
= v
->device_fd
;
345 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
347 ret
= ioctl(fd
, request
, arg
);
348 return ret
< 0 ? -errno
: ret
;
351 static int vhost_vdpa_add_status(struct vhost_dev
*dev
, uint8_t status
)
356 trace_vhost_vdpa_add_status(dev
, status
);
357 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
);
364 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &s
);
369 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
);
382 * The use of this function is for requests that only need to be
383 * applied once. Typically such request occurs at the beginning
384 * of operation, and before setting up queues. It should not be
385 * used for request that performs operation until all queues are
386 * set, which would need to check dev->vq_index_end instead.
388 static bool vhost_vdpa_first_dev(struct vhost_dev
*dev
)
390 struct vhost_vdpa
*v
= dev
->opaque
;
392 return v
->index
== 0;
395 static int vhost_vdpa_get_dev_features(struct vhost_dev
*dev
,
400 ret
= vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, features
);
401 trace_vhost_vdpa_get_features(dev
, *features
);
405 static void vhost_vdpa_init_svq(struct vhost_dev
*hdev
, struct vhost_vdpa
*v
)
407 g_autoptr(GPtrArray
) shadow_vqs
= NULL
;
409 shadow_vqs
= g_ptr_array_new_full(hdev
->nvqs
, vhost_svq_free
);
410 for (unsigned n
= 0; n
< hdev
->nvqs
; ++n
) {
411 VhostShadowVirtqueue
*svq
;
413 svq
= vhost_svq_new(v
->shadow_vq_ops
, v
->shadow_vq_ops_opaque
);
414 g_ptr_array_add(shadow_vqs
, svq
);
417 v
->shadow_vqs
= g_steal_pointer(&shadow_vqs
);
420 static int vhost_vdpa_init(struct vhost_dev
*dev
, void *opaque
, Error
**errp
)
422 struct vhost_vdpa
*v
;
423 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
424 trace_vhost_vdpa_init(dev
, opaque
);
428 * Similar to VFIO, we end up pinning all guest memory and have to
429 * disable discarding of RAM.
431 ret
= ram_block_discard_disable(true);
433 error_report("Cannot set discarding of RAM broken");
439 dev
->opaque
= opaque
;
440 v
->listener
= vhost_vdpa_memory_listener
;
441 v
->msg_type
= VHOST_IOTLB_MSG_V2
;
442 vhost_vdpa_init_svq(dev
, v
);
444 if (!vhost_vdpa_first_dev(dev
)) {
448 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
449 VIRTIO_CONFIG_S_DRIVER
);
454 static void vhost_vdpa_host_notifier_uninit(struct vhost_dev
*dev
,
457 size_t page_size
= qemu_real_host_page_size();
458 struct vhost_vdpa
*v
= dev
->opaque
;
459 VirtIODevice
*vdev
= dev
->vdev
;
460 VhostVDPAHostNotifier
*n
;
462 n
= &v
->notifier
[queue_index
];
465 virtio_queue_set_host_notifier_mr(vdev
, queue_index
, &n
->mr
, false);
466 object_unparent(OBJECT(&n
->mr
));
467 munmap(n
->addr
, page_size
);
472 static int vhost_vdpa_host_notifier_init(struct vhost_dev
*dev
, int queue_index
)
474 size_t page_size
= qemu_real_host_page_size();
475 struct vhost_vdpa
*v
= dev
->opaque
;
476 VirtIODevice
*vdev
= dev
->vdev
;
477 VhostVDPAHostNotifier
*n
;
478 int fd
= v
->device_fd
;
482 vhost_vdpa_host_notifier_uninit(dev
, queue_index
);
484 n
= &v
->notifier
[queue_index
];
486 addr
= mmap(NULL
, page_size
, PROT_WRITE
, MAP_SHARED
, fd
,
487 queue_index
* page_size
);
488 if (addr
== MAP_FAILED
) {
492 name
= g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]",
494 memory_region_init_ram_device_ptr(&n
->mr
, OBJECT(vdev
), name
,
498 if (virtio_queue_set_host_notifier_mr(vdev
, queue_index
, &n
->mr
, true)) {
499 object_unparent(OBJECT(&n
->mr
));
500 munmap(addr
, page_size
);
511 static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev
*dev
, int n
)
515 for (i
= dev
->vq_index
; i
< dev
->vq_index
+ n
; i
++) {
516 vhost_vdpa_host_notifier_uninit(dev
, i
);
520 static void vhost_vdpa_host_notifiers_init(struct vhost_dev
*dev
)
522 struct vhost_vdpa
*v
= dev
->opaque
;
525 if (v
->shadow_vqs_enabled
) {
526 /* FIXME SVQ is not compatible with host notifiers mr */
530 for (i
= dev
->vq_index
; i
< dev
->vq_index
+ dev
->nvqs
; i
++) {
531 if (vhost_vdpa_host_notifier_init(dev
, i
)) {
539 vhost_vdpa_host_notifiers_uninit(dev
, i
- dev
->vq_index
);
543 static void vhost_vdpa_svq_cleanup(struct vhost_dev
*dev
)
545 struct vhost_vdpa
*v
= dev
->opaque
;
548 for (idx
= 0; idx
< v
->shadow_vqs
->len
; ++idx
) {
549 vhost_svq_stop(g_ptr_array_index(v
->shadow_vqs
, idx
));
551 g_ptr_array_free(v
->shadow_vqs
, true);
554 static int vhost_vdpa_cleanup(struct vhost_dev
*dev
)
556 struct vhost_vdpa
*v
;
557 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
559 trace_vhost_vdpa_cleanup(dev
, v
);
560 vhost_vdpa_host_notifiers_uninit(dev
, dev
->nvqs
);
561 memory_listener_unregister(&v
->listener
);
562 vhost_vdpa_svq_cleanup(dev
);
565 ram_block_discard_disable(false);
570 static int vhost_vdpa_memslots_limit(struct vhost_dev
*dev
)
572 trace_vhost_vdpa_memslots_limit(dev
, INT_MAX
);
576 static int vhost_vdpa_set_mem_table(struct vhost_dev
*dev
,
577 struct vhost_memory
*mem
)
579 if (!vhost_vdpa_first_dev(dev
)) {
583 trace_vhost_vdpa_set_mem_table(dev
, mem
->nregions
, mem
->padding
);
584 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE
) &&
585 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS
)) {
587 for (i
= 0; i
< mem
->nregions
; i
++) {
588 trace_vhost_vdpa_dump_regions(dev
, i
,
589 mem
->regions
[i
].guest_phys_addr
,
590 mem
->regions
[i
].memory_size
,
591 mem
->regions
[i
].userspace_addr
,
592 mem
->regions
[i
].flags_padding
);
602 static int vhost_vdpa_set_features(struct vhost_dev
*dev
,
605 struct vhost_vdpa
*v
= dev
->opaque
;
608 if (!vhost_vdpa_first_dev(dev
)) {
612 if (v
->shadow_vqs_enabled
) {
613 if ((v
->acked_features
^ features
) == BIT_ULL(VHOST_F_LOG_ALL
)) {
615 * QEMU is just trying to enable or disable logging. SVQ handles
616 * this sepparately, so no need to forward this.
618 v
->acked_features
= features
;
622 v
->acked_features
= features
;
624 /* We must not ack _F_LOG if SVQ is enabled */
625 features
&= ~BIT_ULL(VHOST_F_LOG_ALL
);
628 trace_vhost_vdpa_set_features(dev
, features
);
629 ret
= vhost_vdpa_call(dev
, VHOST_SET_FEATURES
, &features
);
634 return vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_FEATURES_OK
);
637 static int vhost_vdpa_set_backend_cap(struct vhost_dev
*dev
)
640 uint64_t f
= 0x1ULL
<< VHOST_BACKEND_F_IOTLB_MSG_V2
|
641 0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
;
644 if (vhost_vdpa_call(dev
, VHOST_GET_BACKEND_FEATURES
, &features
)) {
650 if (vhost_vdpa_first_dev(dev
)) {
651 r
= vhost_vdpa_call(dev
, VHOST_SET_BACKEND_FEATURES
, &features
);
657 dev
->backend_cap
= features
;
662 static int vhost_vdpa_get_device_id(struct vhost_dev
*dev
,
666 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_DEVICE_ID
, device_id
);
667 trace_vhost_vdpa_get_device_id(dev
, *device_id
);
671 static void vhost_vdpa_reset_svq(struct vhost_vdpa
*v
)
673 if (!v
->shadow_vqs_enabled
) {
677 for (unsigned i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
678 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
683 static int vhost_vdpa_reset_device(struct vhost_dev
*dev
)
685 struct vhost_vdpa
*v
= dev
->opaque
;
689 vhost_vdpa_reset_svq(v
);
691 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &status
);
692 trace_vhost_vdpa_reset_device(dev
, status
);
696 static int vhost_vdpa_get_vq_index(struct vhost_dev
*dev
, int idx
)
698 assert(idx
>= dev
->vq_index
&& idx
< dev
->vq_index
+ dev
->nvqs
);
700 trace_vhost_vdpa_get_vq_index(dev
, idx
, idx
);
704 static int vhost_vdpa_set_vring_ready(struct vhost_dev
*dev
)
707 trace_vhost_vdpa_set_vring_ready(dev
);
708 for (i
= 0; i
< dev
->nvqs
; ++i
) {
709 struct vhost_vring_state state
= {
710 .index
= dev
->vq_index
+ i
,
713 vhost_vdpa_call(dev
, VHOST_VDPA_SET_VRING_ENABLE
, &state
);
718 static void vhost_vdpa_dump_config(struct vhost_dev
*dev
, const uint8_t *config
,
722 char line
[QEMU_HEXDUMP_LINE_LEN
];
724 for (b
= 0; b
< config_len
; b
+= 16) {
725 len
= config_len
- b
;
726 qemu_hexdump_line(line
, b
, config
, len
, false);
727 trace_vhost_vdpa_dump_config(dev
, line
);
731 static int vhost_vdpa_set_config(struct vhost_dev
*dev
, const uint8_t *data
,
732 uint32_t offset
, uint32_t size
,
735 struct vhost_vdpa_config
*config
;
737 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
739 trace_vhost_vdpa_set_config(dev
, offset
, size
, flags
);
740 config
= g_malloc(size
+ config_size
);
741 config
->off
= offset
;
743 memcpy(config
->buf
, data
, size
);
744 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG
) &&
745 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
746 vhost_vdpa_dump_config(dev
, data
, size
);
748 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG
, config
);
753 static int vhost_vdpa_get_config(struct vhost_dev
*dev
, uint8_t *config
,
754 uint32_t config_len
, Error
**errp
)
756 struct vhost_vdpa_config
*v_config
;
757 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
760 trace_vhost_vdpa_get_config(dev
, config
, config_len
);
761 v_config
= g_malloc(config_len
+ config_size
);
762 v_config
->len
= config_len
;
764 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_CONFIG
, v_config
);
765 memcpy(config
, v_config
->buf
, config_len
);
767 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG
) &&
768 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
769 vhost_vdpa_dump_config(dev
, config
, config_len
);
774 static int vhost_vdpa_set_dev_vring_base(struct vhost_dev
*dev
,
775 struct vhost_vring_state
*ring
)
777 trace_vhost_vdpa_set_vring_base(dev
, ring
->index
, ring
->num
);
778 return vhost_vdpa_call(dev
, VHOST_SET_VRING_BASE
, ring
);
781 static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev
*dev
,
782 struct vhost_vring_file
*file
)
784 trace_vhost_vdpa_set_vring_kick(dev
, file
->index
, file
->fd
);
785 return vhost_vdpa_call(dev
, VHOST_SET_VRING_KICK
, file
);
788 static int vhost_vdpa_set_vring_dev_call(struct vhost_dev
*dev
,
789 struct vhost_vring_file
*file
)
791 trace_vhost_vdpa_set_vring_call(dev
, file
->index
, file
->fd
);
792 return vhost_vdpa_call(dev
, VHOST_SET_VRING_CALL
, file
);
795 static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev
*dev
,
796 struct vhost_vring_addr
*addr
)
798 trace_vhost_vdpa_set_vring_addr(dev
, addr
->index
, addr
->flags
,
799 addr
->desc_user_addr
, addr
->used_user_addr
,
800 addr
->avail_user_addr
,
801 addr
->log_guest_addr
);
803 return vhost_vdpa_call(dev
, VHOST_SET_VRING_ADDR
, addr
);
808 * Set the shadow virtqueue descriptors to the device
810 * @dev: The vhost device model
811 * @svq: The shadow virtqueue
812 * @idx: The index of the virtqueue in the vhost device
815 * Note that this function does not rewind kick file descriptor if cannot set
818 static int vhost_vdpa_svq_set_fds(struct vhost_dev
*dev
,
819 VhostShadowVirtqueue
*svq
, unsigned idx
,
822 struct vhost_vring_file file
= {
823 .index
= dev
->vq_index
+ idx
,
825 const EventNotifier
*event_notifier
= &svq
->hdev_kick
;
828 r
= event_notifier_init(&svq
->hdev_kick
, 0);
830 error_setg_errno(errp
, -r
, "Couldn't create kick event notifier");
831 goto err_init_hdev_kick
;
834 r
= event_notifier_init(&svq
->hdev_call
, 0);
836 error_setg_errno(errp
, -r
, "Couldn't create call event notifier");
837 goto err_init_hdev_call
;
840 file
.fd
= event_notifier_get_fd(event_notifier
);
841 r
= vhost_vdpa_set_vring_dev_kick(dev
, &file
);
842 if (unlikely(r
!= 0)) {
843 error_setg_errno(errp
, -r
, "Can't set device kick fd");
844 goto err_init_set_dev_fd
;
847 event_notifier
= &svq
->hdev_call
;
848 file
.fd
= event_notifier_get_fd(event_notifier
);
849 r
= vhost_vdpa_set_vring_dev_call(dev
, &file
);
850 if (unlikely(r
!= 0)) {
851 error_setg_errno(errp
, -r
, "Can't set device call fd");
852 goto err_init_set_dev_fd
;
858 event_notifier_set_handler(&svq
->hdev_call
, NULL
);
861 event_notifier_cleanup(&svq
->hdev_kick
);
868 * Unmap a SVQ area in the device
870 static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa
*v
, hwaddr addr
)
872 const DMAMap needle
= {
873 .translated_addr
= addr
,
875 const DMAMap
*result
= vhost_iova_tree_find_iova(v
->iova_tree
, &needle
);
879 if (unlikely(!result
)) {
880 error_report("Unable to find SVQ address to unmap");
884 size
= ROUND_UP(result
->size
, qemu_real_host_page_size());
885 r
= vhost_vdpa_dma_unmap(v
, v
->address_space_id
, result
->iova
, size
);
886 if (unlikely(r
< 0)) {
887 error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r
), -r
);
891 vhost_iova_tree_remove(v
->iova_tree
, *result
);
894 static void vhost_vdpa_svq_unmap_rings(struct vhost_dev
*dev
,
895 const VhostShadowVirtqueue
*svq
)
897 struct vhost_vdpa
*v
= dev
->opaque
;
898 struct vhost_vring_addr svq_addr
;
900 vhost_svq_get_vring_addr(svq
, &svq_addr
);
902 vhost_vdpa_svq_unmap_ring(v
, svq_addr
.desc_user_addr
);
904 vhost_vdpa_svq_unmap_ring(v
, svq_addr
.used_user_addr
);
908 * Map the SVQ area in the device
910 * @v: Vhost-vdpa device
911 * @needle: The area to search iova
912 * @errorp: Error pointer
914 static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa
*v
, DMAMap
*needle
,
919 r
= vhost_iova_tree_map_alloc(v
->iova_tree
, needle
);
920 if (unlikely(r
!= IOVA_OK
)) {
921 error_setg(errp
, "Cannot allocate iova (%d)", r
);
925 r
= vhost_vdpa_dma_map(v
, v
->address_space_id
, needle
->iova
,
927 (void *)(uintptr_t)needle
->translated_addr
,
928 needle
->perm
== IOMMU_RO
);
929 if (unlikely(r
!= 0)) {
930 error_setg_errno(errp
, -r
, "Cannot map region to device");
931 vhost_iova_tree_remove(v
->iova_tree
, *needle
);
938 * Map the shadow virtqueue rings in the device
940 * @dev: The vhost device
941 * @svq: The shadow virtqueue
942 * @addr: Assigned IOVA addresses
943 * @errp: Error pointer
945 static bool vhost_vdpa_svq_map_rings(struct vhost_dev
*dev
,
946 const VhostShadowVirtqueue
*svq
,
947 struct vhost_vring_addr
*addr
,
951 DMAMap device_region
, driver_region
;
952 struct vhost_vring_addr svq_addr
;
953 struct vhost_vdpa
*v
= dev
->opaque
;
954 size_t device_size
= vhost_svq_device_area_size(svq
);
955 size_t driver_size
= vhost_svq_driver_area_size(svq
);
959 vhost_svq_get_vring_addr(svq
, &svq_addr
);
961 driver_region
= (DMAMap
) {
962 .translated_addr
= svq_addr
.desc_user_addr
,
963 .size
= driver_size
- 1,
966 ok
= vhost_vdpa_svq_map_ring(v
, &driver_region
, errp
);
968 error_prepend(errp
, "Cannot create vq driver region: ");
971 addr
->desc_user_addr
= driver_region
.iova
;
972 avail_offset
= svq_addr
.avail_user_addr
- svq_addr
.desc_user_addr
;
973 addr
->avail_user_addr
= driver_region
.iova
+ avail_offset
;
975 device_region
= (DMAMap
) {
976 .translated_addr
= svq_addr
.used_user_addr
,
977 .size
= device_size
- 1,
980 ok
= vhost_vdpa_svq_map_ring(v
, &device_region
, errp
);
982 error_prepend(errp
, "Cannot create vq device region: ");
983 vhost_vdpa_svq_unmap_ring(v
, driver_region
.translated_addr
);
985 addr
->used_user_addr
= device_region
.iova
;
990 static bool vhost_vdpa_svq_setup(struct vhost_dev
*dev
,
991 VhostShadowVirtqueue
*svq
, unsigned idx
,
994 uint16_t vq_index
= dev
->vq_index
+ idx
;
995 struct vhost_vring_state s
= {
1000 r
= vhost_vdpa_set_dev_vring_base(dev
, &s
);
1002 error_setg_errno(errp
, -r
, "Cannot set vring base");
1006 r
= vhost_vdpa_svq_set_fds(dev
, svq
, idx
, errp
);
1010 static bool vhost_vdpa_svqs_start(struct vhost_dev
*dev
)
1012 struct vhost_vdpa
*v
= dev
->opaque
;
1016 if (!v
->shadow_vqs_enabled
) {
1020 for (i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
1021 VirtQueue
*vq
= virtio_get_queue(dev
->vdev
, dev
->vq_index
+ i
);
1022 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
1023 struct vhost_vring_addr addr
= {
1024 .index
= dev
->vq_index
+ i
,
1027 bool ok
= vhost_vdpa_svq_setup(dev
, svq
, i
, &err
);
1028 if (unlikely(!ok
)) {
1032 vhost_svq_start(svq
, dev
->vdev
, vq
, v
->iova_tree
);
1033 ok
= vhost_vdpa_svq_map_rings(dev
, svq
, &addr
, &err
);
1034 if (unlikely(!ok
)) {
1038 /* Override vring GPA set by vhost subsystem */
1039 r
= vhost_vdpa_set_vring_dev_addr(dev
, &addr
);
1040 if (unlikely(r
!= 0)) {
1041 error_setg_errno(&err
, -r
, "Cannot set device address");
1049 vhost_vdpa_svq_unmap_rings(dev
, g_ptr_array_index(v
->shadow_vqs
, i
));
1052 vhost_svq_stop(g_ptr_array_index(v
->shadow_vqs
, i
));
1055 error_reportf_err(err
, "Cannot setup SVQ %u: ", i
);
1056 for (unsigned j
= 0; j
< i
; ++j
) {
1057 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, j
);
1058 vhost_vdpa_svq_unmap_rings(dev
, svq
);
1059 vhost_svq_stop(svq
);
1065 static void vhost_vdpa_svqs_stop(struct vhost_dev
*dev
)
1067 struct vhost_vdpa
*v
= dev
->opaque
;
1069 if (!v
->shadow_vqs_enabled
) {
1073 for (unsigned i
= 0; i
< v
->shadow_vqs
->len
; ++i
) {
1074 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, i
);
1075 vhost_vdpa_svq_unmap_rings(dev
, svq
);
1077 event_notifier_cleanup(&svq
->hdev_kick
);
1078 event_notifier_cleanup(&svq
->hdev_call
);
1082 static int vhost_vdpa_dev_start(struct vhost_dev
*dev
, bool started
)
1084 struct vhost_vdpa
*v
= dev
->opaque
;
1086 trace_vhost_vdpa_dev_start(dev
, started
);
1089 vhost_vdpa_host_notifiers_init(dev
);
1090 ok
= vhost_vdpa_svqs_start(dev
);
1091 if (unlikely(!ok
)) {
1094 vhost_vdpa_set_vring_ready(dev
);
1096 vhost_vdpa_svqs_stop(dev
);
1097 vhost_vdpa_host_notifiers_uninit(dev
, dev
->nvqs
);
1100 if (dev
->vq_index
+ dev
->nvqs
!= dev
->vq_index_end
) {
1105 memory_listener_register(&v
->listener
, &address_space_memory
);
1106 return vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_DRIVER_OK
);
1108 vhost_vdpa_reset_device(dev
);
1109 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
1110 VIRTIO_CONFIG_S_DRIVER
);
1111 memory_listener_unregister(&v
->listener
);
1117 static int vhost_vdpa_set_log_base(struct vhost_dev
*dev
, uint64_t base
,
1118 struct vhost_log
*log
)
1120 struct vhost_vdpa
*v
= dev
->opaque
;
1121 if (v
->shadow_vqs_enabled
|| !vhost_vdpa_first_dev(dev
)) {
1125 trace_vhost_vdpa_set_log_base(dev
, base
, log
->size
, log
->refcnt
, log
->fd
,
1127 return vhost_vdpa_call(dev
, VHOST_SET_LOG_BASE
, &base
);
1130 static int vhost_vdpa_set_vring_addr(struct vhost_dev
*dev
,
1131 struct vhost_vring_addr
*addr
)
1133 struct vhost_vdpa
*v
= dev
->opaque
;
1135 if (v
->shadow_vqs_enabled
) {
1137 * Device vring addr was set at device start. SVQ base is handled by
1143 return vhost_vdpa_set_vring_dev_addr(dev
, addr
);
1146 static int vhost_vdpa_set_vring_num(struct vhost_dev
*dev
,
1147 struct vhost_vring_state
*ring
)
1149 trace_vhost_vdpa_set_vring_num(dev
, ring
->index
, ring
->num
);
1150 return vhost_vdpa_call(dev
, VHOST_SET_VRING_NUM
, ring
);
1153 static int vhost_vdpa_set_vring_base(struct vhost_dev
*dev
,
1154 struct vhost_vring_state
*ring
)
1156 struct vhost_vdpa
*v
= dev
->opaque
;
1157 VirtQueue
*vq
= virtio_get_queue(dev
->vdev
, ring
->index
);
1160 * vhost-vdpa devices does not support in-flight requests. Set all of them
1163 * TODO: This is ok for networking, but other kinds of devices might
1164 * have problems with these retransmissions.
1166 while (virtqueue_rewind(vq
, 1)) {
1169 if (v
->shadow_vqs_enabled
) {
1171 * Device vring base was set at device start. SVQ base is handled by
1177 return vhost_vdpa_set_dev_vring_base(dev
, ring
);
1180 static int vhost_vdpa_get_vring_base(struct vhost_dev
*dev
,
1181 struct vhost_vring_state
*ring
)
1183 struct vhost_vdpa
*v
= dev
->opaque
;
1186 if (v
->shadow_vqs_enabled
) {
1187 ring
->num
= virtio_queue_get_last_avail_idx(dev
->vdev
, ring
->index
);
1191 ret
= vhost_vdpa_call(dev
, VHOST_GET_VRING_BASE
, ring
);
1192 trace_vhost_vdpa_get_vring_base(dev
, ring
->index
, ring
->num
);
1196 static int vhost_vdpa_set_vring_kick(struct vhost_dev
*dev
,
1197 struct vhost_vring_file
*file
)
1199 struct vhost_vdpa
*v
= dev
->opaque
;
1200 int vdpa_idx
= file
->index
- dev
->vq_index
;
1202 if (v
->shadow_vqs_enabled
) {
1203 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, vdpa_idx
);
1204 vhost_svq_set_svq_kick_fd(svq
, file
->fd
);
1207 return vhost_vdpa_set_vring_dev_kick(dev
, file
);
1211 static int vhost_vdpa_set_vring_call(struct vhost_dev
*dev
,
1212 struct vhost_vring_file
*file
)
1214 struct vhost_vdpa
*v
= dev
->opaque
;
1216 if (v
->shadow_vqs_enabled
) {
1217 int vdpa_idx
= file
->index
- dev
->vq_index
;
1218 VhostShadowVirtqueue
*svq
= g_ptr_array_index(v
->shadow_vqs
, vdpa_idx
);
1220 vhost_svq_set_svq_call_fd(svq
, file
->fd
);
1223 return vhost_vdpa_set_vring_dev_call(dev
, file
);
1227 static int vhost_vdpa_get_features(struct vhost_dev
*dev
,
1230 struct vhost_vdpa
*v
= dev
->opaque
;
1231 int ret
= vhost_vdpa_get_dev_features(dev
, features
);
1233 if (ret
== 0 && v
->shadow_vqs_enabled
) {
1234 /* Add SVQ logging capabilities */
1235 *features
|= BIT_ULL(VHOST_F_LOG_ALL
);
1241 static int vhost_vdpa_set_owner(struct vhost_dev
*dev
)
1243 if (!vhost_vdpa_first_dev(dev
)) {
1247 trace_vhost_vdpa_set_owner(dev
);
1248 return vhost_vdpa_call(dev
, VHOST_SET_OWNER
, NULL
);
1251 static int vhost_vdpa_vq_get_addr(struct vhost_dev
*dev
,
1252 struct vhost_vring_addr
*addr
, struct vhost_virtqueue
*vq
)
1254 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
1255 addr
->desc_user_addr
= (uint64_t)(unsigned long)vq
->desc_phys
;
1256 addr
->avail_user_addr
= (uint64_t)(unsigned long)vq
->avail_phys
;
1257 addr
->used_user_addr
= (uint64_t)(unsigned long)vq
->used_phys
;
1258 trace_vhost_vdpa_vq_get_addr(dev
, vq
, addr
->desc_user_addr
,
1259 addr
->avail_user_addr
, addr
->used_user_addr
);
1263 static bool vhost_vdpa_force_iommu(struct vhost_dev
*dev
)
1268 const VhostOps vdpa_ops
= {
1269 .backend_type
= VHOST_BACKEND_TYPE_VDPA
,
1270 .vhost_backend_init
= vhost_vdpa_init
,
1271 .vhost_backend_cleanup
= vhost_vdpa_cleanup
,
1272 .vhost_set_log_base
= vhost_vdpa_set_log_base
,
1273 .vhost_set_vring_addr
= vhost_vdpa_set_vring_addr
,
1274 .vhost_set_vring_num
= vhost_vdpa_set_vring_num
,
1275 .vhost_set_vring_base
= vhost_vdpa_set_vring_base
,
1276 .vhost_get_vring_base
= vhost_vdpa_get_vring_base
,
1277 .vhost_set_vring_kick
= vhost_vdpa_set_vring_kick
,
1278 .vhost_set_vring_call
= vhost_vdpa_set_vring_call
,
1279 .vhost_get_features
= vhost_vdpa_get_features
,
1280 .vhost_set_backend_cap
= vhost_vdpa_set_backend_cap
,
1281 .vhost_set_owner
= vhost_vdpa_set_owner
,
1282 .vhost_set_vring_endian
= NULL
,
1283 .vhost_backend_memslots_limit
= vhost_vdpa_memslots_limit
,
1284 .vhost_set_mem_table
= vhost_vdpa_set_mem_table
,
1285 .vhost_set_features
= vhost_vdpa_set_features
,
1286 .vhost_reset_device
= vhost_vdpa_reset_device
,
1287 .vhost_get_vq_index
= vhost_vdpa_get_vq_index
,
1288 .vhost_get_config
= vhost_vdpa_get_config
,
1289 .vhost_set_config
= vhost_vdpa_set_config
,
1290 .vhost_requires_shm_log
= NULL
,
1291 .vhost_migration_done
= NULL
,
1292 .vhost_backend_can_merge
= NULL
,
1293 .vhost_net_set_mtu
= NULL
,
1294 .vhost_set_iotlb_callback
= NULL
,
1295 .vhost_send_device_iotlb_msg
= NULL
,
1296 .vhost_dev_start
= vhost_vdpa_dev_start
,
1297 .vhost_get_device_id
= vhost_vdpa_get_device_id
,
1298 .vhost_vq_get_addr
= vhost_vdpa_vq_get_addr
,
1299 .vhost_force_iommu
= vhost_vdpa_force_iommu
,