4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
24 #include "qemu-common.h"
26 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
*section
)
28 return (!memory_region_is_ram(section
->mr
) &&
29 !memory_region_is_iommu(section
->mr
)) ||
31 * Sizing an enabled 64-bit BAR can cause spurious mappings to
32 * addresses in the upper part of the 64-bit address space. These
33 * are never accessed by the CPU and beyond the address width of
34 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width.
36 section
->offset_within_address_space
& (1ULL << 63);
39 static int vhost_vdpa_dma_map(struct vhost_vdpa
*v
, hwaddr iova
, hwaddr size
,
40 void *vaddr
, bool readonly
)
42 struct vhost_msg_v2 msg
= {};
43 int fd
= v
->device_fd
;
46 msg
.type
= v
->msg_type
;
47 msg
.iotlb
.iova
= iova
;
48 msg
.iotlb
.size
= size
;
49 msg
.iotlb
.uaddr
= (uint64_t)(uintptr_t)vaddr
;
50 msg
.iotlb
.perm
= readonly
? VHOST_ACCESS_RO
: VHOST_ACCESS_RW
;
51 msg
.iotlb
.type
= VHOST_IOTLB_UPDATE
;
53 trace_vhost_vdpa_dma_map(v
, fd
, msg
.type
, msg
.iotlb
.iova
, msg
.iotlb
.size
,
54 msg
.iotlb
.uaddr
, msg
.iotlb
.perm
, msg
.iotlb
.type
);
56 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
57 error_report("failed to write, fd=%d, errno=%d (%s)",
58 fd
, errno
, strerror(errno
));
65 static int vhost_vdpa_dma_unmap(struct vhost_vdpa
*v
, hwaddr iova
,
68 struct vhost_msg_v2 msg
= {};
69 int fd
= v
->device_fd
;
72 msg
.type
= v
->msg_type
;
73 msg
.iotlb
.iova
= iova
;
74 msg
.iotlb
.size
= size
;
75 msg
.iotlb
.type
= VHOST_IOTLB_INVALIDATE
;
77 trace_vhost_vdpa_dma_unmap(v
, fd
, msg
.type
, msg
.iotlb
.iova
,
78 msg
.iotlb
.size
, msg
.iotlb
.type
);
80 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
81 error_report("failed to write, fd=%d, errno=%d (%s)",
82 fd
, errno
, strerror(errno
));
89 static void vhost_vdpa_listener_begin(MemoryListener
*listener
)
91 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
92 struct vhost_dev
*dev
= v
->dev
;
93 struct vhost_msg_v2 msg
= {};
94 int fd
= v
->device_fd
;
96 if (!(dev
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
))) {
100 msg
.type
= v
->msg_type
;
101 msg
.iotlb
.type
= VHOST_IOTLB_BATCH_BEGIN
;
103 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
104 error_report("failed to write, fd=%d, errno=%d (%s)",
105 fd
, errno
, strerror(errno
));
109 static void vhost_vdpa_listener_commit(MemoryListener
*listener
)
111 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
112 struct vhost_dev
*dev
= v
->dev
;
113 struct vhost_msg_v2 msg
= {};
114 int fd
= v
->device_fd
;
116 if (!(dev
->backend_cap
& (0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
))) {
120 msg
.type
= v
->msg_type
;
121 msg
.iotlb
.type
= VHOST_IOTLB_BATCH_END
;
123 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
124 error_report("failed to write, fd=%d, errno=%d (%s)",
125 fd
, errno
, strerror(errno
));
129 static void vhost_vdpa_listener_region_add(MemoryListener
*listener
,
130 MemoryRegionSection
*section
)
132 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
134 Int128 llend
, llsize
;
138 if (vhost_vdpa_listener_skipped_section(section
)) {
142 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
143 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
144 error_report("%s received unaligned region", __func__
);
148 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
149 llend
= int128_make64(section
->offset_within_address_space
);
150 llend
= int128_add(llend
, section
->size
);
151 llend
= int128_and(llend
, int128_exts64(TARGET_PAGE_MASK
));
153 if (int128_ge(int128_make64(iova
), llend
)) {
157 memory_region_ref(section
->mr
);
159 /* Here we assume that memory_region_is_ram(section->mr)==true */
161 vaddr
= memory_region_get_ram_ptr(section
->mr
) +
162 section
->offset_within_region
+
163 (iova
- section
->offset_within_address_space
);
165 trace_vhost_vdpa_listener_region_add(v
, iova
, int128_get64(llend
),
166 vaddr
, section
->readonly
);
168 llsize
= int128_sub(llend
, int128_make64(iova
));
170 ret
= vhost_vdpa_dma_map(v
, iova
, int128_get64(llsize
),
171 vaddr
, section
->readonly
);
173 error_report("vhost vdpa map fail!");
174 if (memory_region_is_ram_device(section
->mr
)) {
175 /* Allow unexpected mappings not to be fatal for RAM devices */
176 error_report("map ram fail!");
185 if (memory_region_is_ram_device(section
->mr
)) {
186 error_report("failed to vdpa_dma_map. pci p2p may not work");
191 * On the initfn path, store the first error in the container so we
192 * can gracefully fail. Runtime, there's not much we can do other
193 * than throw a hardware error.
195 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
200 static void vhost_vdpa_listener_region_del(MemoryListener
*listener
,
201 MemoryRegionSection
*section
)
203 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
205 Int128 llend
, llsize
;
208 if (vhost_vdpa_listener_skipped_section(section
)) {
212 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
213 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
214 error_report("%s received unaligned region", __func__
);
218 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
219 llend
= int128_make64(section
->offset_within_address_space
);
220 llend
= int128_add(llend
, section
->size
);
221 llend
= int128_and(llend
, int128_exts64(TARGET_PAGE_MASK
));
223 trace_vhost_vdpa_listener_region_del(v
, iova
, int128_get64(llend
));
225 if (int128_ge(int128_make64(iova
), llend
)) {
229 llsize
= int128_sub(llend
, int128_make64(iova
));
231 ret
= vhost_vdpa_dma_unmap(v
, iova
, int128_get64(llsize
));
233 error_report("vhost_vdpa dma unmap error!");
236 memory_region_unref(section
->mr
);
239 * IOTLB API is used by vhost-vpda which requires incremental updating
240 * of the mapping. So we can not use generic vhost memory listener which
241 * depends on the addnop().
243 static const MemoryListener vhost_vdpa_memory_listener
= {
244 .begin
= vhost_vdpa_listener_begin
,
245 .commit
= vhost_vdpa_listener_commit
,
246 .region_add
= vhost_vdpa_listener_region_add
,
247 .region_del
= vhost_vdpa_listener_region_del
,
250 static int vhost_vdpa_call(struct vhost_dev
*dev
, unsigned long int request
,
253 struct vhost_vdpa
*v
= dev
->opaque
;
254 int fd
= v
->device_fd
;
256 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
258 return ioctl(fd
, request
, arg
);
261 static void vhost_vdpa_add_status(struct vhost_dev
*dev
, uint8_t status
)
265 trace_vhost_vdpa_add_status(dev
, status
);
266 if (vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
)) {
272 vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &s
);
275 static int vhost_vdpa_init(struct vhost_dev
*dev
, void *opaque
)
277 struct vhost_vdpa
*v
;
279 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
280 trace_vhost_vdpa_init(dev
, opaque
);
284 dev
->opaque
= opaque
;
285 vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, &features
);
286 dev
->backend_features
= features
;
287 v
->listener
= vhost_vdpa_memory_listener
;
288 v
->msg_type
= VHOST_IOTLB_MSG_V2
;
290 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
291 VIRTIO_CONFIG_S_DRIVER
);
296 static int vhost_vdpa_cleanup(struct vhost_dev
*dev
)
298 struct vhost_vdpa
*v
;
299 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
301 trace_vhost_vdpa_cleanup(dev
, v
);
302 memory_listener_unregister(&v
->listener
);
308 static int vhost_vdpa_memslots_limit(struct vhost_dev
*dev
)
310 trace_vhost_vdpa_memslots_limit(dev
, INT_MAX
);
314 static int vhost_vdpa_set_mem_table(struct vhost_dev
*dev
,
315 struct vhost_memory
*mem
)
317 trace_vhost_vdpa_set_mem_table(dev
, mem
->nregions
, mem
->padding
);
318 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE
) &&
319 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS
)) {
321 for (i
= 0; i
< mem
->nregions
; i
++) {
322 trace_vhost_vdpa_dump_regions(dev
, i
,
323 mem
->regions
[i
].guest_phys_addr
,
324 mem
->regions
[i
].memory_size
,
325 mem
->regions
[i
].userspace_addr
,
326 mem
->regions
[i
].flags_padding
);
336 static int vhost_vdpa_set_features(struct vhost_dev
*dev
,
340 trace_vhost_vdpa_set_features(dev
, features
);
341 ret
= vhost_vdpa_call(dev
, VHOST_SET_FEATURES
, &features
);
346 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_FEATURES_OK
);
347 vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &status
);
349 return !(status
& VIRTIO_CONFIG_S_FEATURES_OK
);
352 static int vhost_vdpa_set_backend_cap(struct vhost_dev
*dev
)
355 uint64_t f
= 0x1ULL
<< VHOST_BACKEND_F_IOTLB_MSG_V2
|
356 0x1ULL
<< VHOST_BACKEND_F_IOTLB_BATCH
;
359 if (vhost_vdpa_call(dev
, VHOST_GET_BACKEND_FEATURES
, &features
)) {
364 r
= vhost_vdpa_call(dev
, VHOST_SET_BACKEND_FEATURES
, &features
);
369 dev
->backend_cap
= features
;
374 int vhost_vdpa_get_device_id(struct vhost_dev
*dev
,
378 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_DEVICE_ID
, device_id
);
379 trace_vhost_vdpa_get_device_id(dev
, *device_id
);
383 static int vhost_vdpa_reset_device(struct vhost_dev
*dev
)
388 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &status
);
389 trace_vhost_vdpa_reset_device(dev
, status
);
393 static int vhost_vdpa_get_vq_index(struct vhost_dev
*dev
, int idx
)
395 assert(idx
>= dev
->vq_index
&& idx
< dev
->vq_index
+ dev
->nvqs
);
397 trace_vhost_vdpa_get_vq_index(dev
, idx
, idx
- dev
->vq_index
);
398 return idx
- dev
->vq_index
;
401 static int vhost_vdpa_set_vring_ready(struct vhost_dev
*dev
)
404 trace_vhost_vdpa_set_vring_ready(dev
);
405 for (i
= 0; i
< dev
->nvqs
; ++i
) {
406 struct vhost_vring_state state
= {
407 .index
= dev
->vq_index
+ i
,
410 vhost_vdpa_call(dev
, VHOST_VDPA_SET_VRING_ENABLE
, &state
);
415 static void vhost_vdpa_dump_config(struct vhost_dev
*dev
, const uint8_t *config
,
419 char line
[QEMU_HEXDUMP_LINE_LEN
];
421 for (b
= 0; b
< config_len
; b
+= 16) {
422 len
= config_len
- b
;
423 qemu_hexdump_line(line
, b
, config
, len
, false);
424 trace_vhost_vdpa_dump_config(dev
, line
);
428 static int vhost_vdpa_set_config(struct vhost_dev
*dev
, const uint8_t *data
,
429 uint32_t offset
, uint32_t size
,
432 struct vhost_vdpa_config
*config
;
434 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
436 trace_vhost_vdpa_set_config(dev
, offset
, size
, flags
);
437 config
= g_malloc(size
+ config_size
);
438 config
->off
= offset
;
440 memcpy(config
->buf
, data
, size
);
441 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG
) &&
442 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
443 vhost_vdpa_dump_config(dev
, data
, size
);
445 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG
, config
);
450 static int vhost_vdpa_get_config(struct vhost_dev
*dev
, uint8_t *config
,
453 struct vhost_vdpa_config
*v_config
;
454 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
457 trace_vhost_vdpa_get_config(dev
, config
, config_len
);
458 v_config
= g_malloc(config_len
+ config_size
);
459 v_config
->len
= config_len
;
461 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_CONFIG
, v_config
);
462 memcpy(config
, v_config
->buf
, config_len
);
464 if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG
) &&
465 trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG
)) {
466 vhost_vdpa_dump_config(dev
, config
, config_len
);
471 static int vhost_vdpa_dev_start(struct vhost_dev
*dev
, bool started
)
473 struct vhost_vdpa
*v
= dev
->opaque
;
474 trace_vhost_vdpa_dev_start(dev
, started
);
477 memory_listener_register(&v
->listener
, &address_space_memory
);
478 vhost_vdpa_set_vring_ready(dev
);
479 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_DRIVER_OK
);
480 vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &status
);
482 return !(status
& VIRTIO_CONFIG_S_DRIVER_OK
);
484 vhost_vdpa_reset_device(dev
);
485 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
486 VIRTIO_CONFIG_S_DRIVER
);
487 memory_listener_unregister(&v
->listener
);
493 static int vhost_vdpa_set_log_base(struct vhost_dev
*dev
, uint64_t base
,
494 struct vhost_log
*log
)
496 trace_vhost_vdpa_set_log_base(dev
, base
, log
->size
, log
->refcnt
, log
->fd
,
498 return vhost_vdpa_call(dev
, VHOST_SET_LOG_BASE
, &base
);
501 static int vhost_vdpa_set_vring_addr(struct vhost_dev
*dev
,
502 struct vhost_vring_addr
*addr
)
504 trace_vhost_vdpa_set_vring_addr(dev
, addr
->index
, addr
->flags
,
505 addr
->desc_user_addr
, addr
->used_user_addr
,
506 addr
->avail_user_addr
,
507 addr
->log_guest_addr
);
508 return vhost_vdpa_call(dev
, VHOST_SET_VRING_ADDR
, addr
);
511 static int vhost_vdpa_set_vring_num(struct vhost_dev
*dev
,
512 struct vhost_vring_state
*ring
)
514 trace_vhost_vdpa_set_vring_num(dev
, ring
->index
, ring
->num
);
515 return vhost_vdpa_call(dev
, VHOST_SET_VRING_NUM
, ring
);
518 static int vhost_vdpa_set_vring_base(struct vhost_dev
*dev
,
519 struct vhost_vring_state
*ring
)
521 trace_vhost_vdpa_set_vring_base(dev
, ring
->index
, ring
->num
);
522 return vhost_vdpa_call(dev
, VHOST_SET_VRING_BASE
, ring
);
525 static int vhost_vdpa_get_vring_base(struct vhost_dev
*dev
,
526 struct vhost_vring_state
*ring
)
530 ret
= vhost_vdpa_call(dev
, VHOST_GET_VRING_BASE
, ring
);
531 trace_vhost_vdpa_get_vring_base(dev
, ring
->index
, ring
->num
);
535 static int vhost_vdpa_set_vring_kick(struct vhost_dev
*dev
,
536 struct vhost_vring_file
*file
)
538 trace_vhost_vdpa_set_vring_kick(dev
, file
->index
, file
->fd
);
539 return vhost_vdpa_call(dev
, VHOST_SET_VRING_KICK
, file
);
542 static int vhost_vdpa_set_vring_call(struct vhost_dev
*dev
,
543 struct vhost_vring_file
*file
)
545 trace_vhost_vdpa_set_vring_call(dev
, file
->index
, file
->fd
);
546 return vhost_vdpa_call(dev
, VHOST_SET_VRING_CALL
, file
);
549 static int vhost_vdpa_get_features(struct vhost_dev
*dev
,
554 ret
= vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, features
);
555 trace_vhost_vdpa_get_features(dev
, *features
);
559 static int vhost_vdpa_set_owner(struct vhost_dev
*dev
)
561 trace_vhost_vdpa_set_owner(dev
);
562 return vhost_vdpa_call(dev
, VHOST_SET_OWNER
, NULL
);
565 static int vhost_vdpa_vq_get_addr(struct vhost_dev
*dev
,
566 struct vhost_vring_addr
*addr
, struct vhost_virtqueue
*vq
)
568 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
569 addr
->desc_user_addr
= (uint64_t)(unsigned long)vq
->desc_phys
;
570 addr
->avail_user_addr
= (uint64_t)(unsigned long)vq
->avail_phys
;
571 addr
->used_user_addr
= (uint64_t)(unsigned long)vq
->used_phys
;
572 trace_vhost_vdpa_vq_get_addr(dev
, vq
, addr
->desc_user_addr
,
573 addr
->avail_user_addr
, addr
->used_user_addr
);
577 static bool vhost_vdpa_force_iommu(struct vhost_dev
*dev
)
582 const VhostOps vdpa_ops
= {
583 .backend_type
= VHOST_BACKEND_TYPE_VDPA
,
584 .vhost_backend_init
= vhost_vdpa_init
,
585 .vhost_backend_cleanup
= vhost_vdpa_cleanup
,
586 .vhost_set_log_base
= vhost_vdpa_set_log_base
,
587 .vhost_set_vring_addr
= vhost_vdpa_set_vring_addr
,
588 .vhost_set_vring_num
= vhost_vdpa_set_vring_num
,
589 .vhost_set_vring_base
= vhost_vdpa_set_vring_base
,
590 .vhost_get_vring_base
= vhost_vdpa_get_vring_base
,
591 .vhost_set_vring_kick
= vhost_vdpa_set_vring_kick
,
592 .vhost_set_vring_call
= vhost_vdpa_set_vring_call
,
593 .vhost_get_features
= vhost_vdpa_get_features
,
594 .vhost_set_backend_cap
= vhost_vdpa_set_backend_cap
,
595 .vhost_set_owner
= vhost_vdpa_set_owner
,
596 .vhost_set_vring_endian
= NULL
,
597 .vhost_backend_memslots_limit
= vhost_vdpa_memslots_limit
,
598 .vhost_set_mem_table
= vhost_vdpa_set_mem_table
,
599 .vhost_set_features
= vhost_vdpa_set_features
,
600 .vhost_reset_device
= vhost_vdpa_reset_device
,
601 .vhost_get_vq_index
= vhost_vdpa_get_vq_index
,
602 .vhost_get_config
= vhost_vdpa_get_config
,
603 .vhost_set_config
= vhost_vdpa_set_config
,
604 .vhost_requires_shm_log
= NULL
,
605 .vhost_migration_done
= NULL
,
606 .vhost_backend_can_merge
= NULL
,
607 .vhost_net_set_mtu
= NULL
,
608 .vhost_set_iotlb_callback
= NULL
,
609 .vhost_send_device_iotlb_msg
= NULL
,
610 .vhost_dev_start
= vhost_vdpa_dev_start
,
611 .vhost_get_device_id
= vhost_vdpa_get_device_id
,
612 .vhost_vq_get_addr
= vhost_vdpa_vq_get_addr
,
613 .vhost_force_iommu
= vhost_vdpa_force_iommu
,