4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
12 #include "qemu/osdep.h"
13 #include <linux/vhost.h>
14 #include <linux/vfio.h>
15 #include <sys/eventfd.h>
16 #include <sys/ioctl.h>
17 #include "hw/virtio/vhost.h"
18 #include "hw/virtio/vhost-backend.h"
19 #include "hw/virtio/virtio-net.h"
20 #include "hw/virtio/vhost-vdpa.h"
21 #include "qemu/main-loop.h"
24 static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection
*section
)
26 return (!memory_region_is_ram(section
->mr
) &&
27 !memory_region_is_iommu(section
->mr
)) ||
29 * Sizing an enabled 64-bit BAR can cause spurious mappings to
30 * addresses in the upper part of the 64-bit address space. These
31 * are never accessed by the CPU and beyond the address width of
32 * some IOMMU hardware. TODO: VDPA should tell us the IOMMU width.
34 section
->offset_within_address_space
& (1ULL << 63);
37 static int vhost_vdpa_dma_map(struct vhost_vdpa
*v
, hwaddr iova
, hwaddr size
,
38 void *vaddr
, bool readonly
)
40 struct vhost_msg_v2 msg
= {};
41 int fd
= v
->device_fd
;
44 msg
.type
= v
->msg_type
;
45 msg
.iotlb
.iova
= iova
;
46 msg
.iotlb
.size
= size
;
47 msg
.iotlb
.uaddr
= (uint64_t)(uintptr_t)vaddr
;
48 msg
.iotlb
.perm
= readonly
? VHOST_ACCESS_RO
: VHOST_ACCESS_RW
;
49 msg
.iotlb
.type
= VHOST_IOTLB_UPDATE
;
51 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
52 error_report("failed to write, fd=%d, errno=%d (%s)",
53 fd
, errno
, strerror(errno
));
60 static int vhost_vdpa_dma_unmap(struct vhost_vdpa
*v
, hwaddr iova
,
63 struct vhost_msg_v2 msg
= {};
64 int fd
= v
->device_fd
;
67 msg
.type
= v
->msg_type
;
68 msg
.iotlb
.iova
= iova
;
69 msg
.iotlb
.size
= size
;
70 msg
.iotlb
.type
= VHOST_IOTLB_INVALIDATE
;
72 if (write(fd
, &msg
, sizeof(msg
)) != sizeof(msg
)) {
73 error_report("failed to write, fd=%d, errno=%d (%s)",
74 fd
, errno
, strerror(errno
));
81 static void vhost_vdpa_listener_region_add(MemoryListener
*listener
,
82 MemoryRegionSection
*section
)
84 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
90 if (vhost_vdpa_listener_skipped_section(section
)) {
94 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
95 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
96 error_report("%s received unaligned region", __func__
);
100 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
101 llend
= int128_make64(section
->offset_within_address_space
);
102 llend
= int128_add(llend
, section
->size
);
103 llend
= int128_and(llend
, int128_exts64(TARGET_PAGE_MASK
));
105 if (int128_ge(int128_make64(iova
), llend
)) {
109 memory_region_ref(section
->mr
);
111 /* Here we assume that memory_region_is_ram(section->mr)==true */
113 vaddr
= memory_region_get_ram_ptr(section
->mr
) +
114 section
->offset_within_region
+
115 (iova
- section
->offset_within_address_space
);
117 llsize
= int128_sub(llend
, int128_make64(iova
));
119 ret
= vhost_vdpa_dma_map(v
, iova
, int128_get64(llsize
),
120 vaddr
, section
->readonly
);
122 error_report("vhost vdpa map fail!");
123 if (memory_region_is_ram_device(section
->mr
)) {
124 /* Allow unexpected mappings not to be fatal for RAM devices */
125 error_report("map ram fail!");
134 if (memory_region_is_ram_device(section
->mr
)) {
135 error_report("failed to vdpa_dma_map. pci p2p may not work");
140 * On the initfn path, store the first error in the container so we
141 * can gracefully fail. Runtime, there's not much we can do other
142 * than throw a hardware error.
144 error_report("vhost-vdpa: DMA mapping failed, unable to continue");
149 static void vhost_vdpa_listener_region_del(MemoryListener
*listener
,
150 MemoryRegionSection
*section
)
152 struct vhost_vdpa
*v
= container_of(listener
, struct vhost_vdpa
, listener
);
154 Int128 llend
, llsize
;
156 bool try_unmap
= true;
158 if (vhost_vdpa_listener_skipped_section(section
)) {
162 if (unlikely((section
->offset_within_address_space
& ~TARGET_PAGE_MASK
) !=
163 (section
->offset_within_region
& ~TARGET_PAGE_MASK
))) {
164 error_report("%s received unaligned region", __func__
);
168 iova
= TARGET_PAGE_ALIGN(section
->offset_within_address_space
);
169 llend
= int128_make64(section
->offset_within_address_space
);
170 llend
= int128_add(llend
, section
->size
);
171 llend
= int128_and(llend
, int128_exts64(TARGET_PAGE_MASK
));
173 if (int128_ge(int128_make64(iova
), llend
)) {
177 llsize
= int128_sub(llend
, int128_make64(iova
));
180 ret
= vhost_vdpa_dma_unmap(v
, iova
, int128_get64(llsize
));
182 error_report("vhost_vdpa dma unmap error!");
186 memory_region_unref(section
->mr
);
189 * IOTLB API is used by vhost-vpda which requires incremental updating
190 * of the mapping. So we can not use generic vhost memory listener which
191 * depends on the addnop().
193 static const MemoryListener vhost_vdpa_memory_listener
= {
194 .region_add
= vhost_vdpa_listener_region_add
,
195 .region_del
= vhost_vdpa_listener_region_del
,
198 static int vhost_vdpa_call(struct vhost_dev
*dev
, unsigned long int request
,
201 struct vhost_vdpa
*v
= dev
->opaque
;
202 int fd
= v
->device_fd
;
204 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
206 return ioctl(fd
, request
, arg
);
209 static void vhost_vdpa_add_status(struct vhost_dev
*dev
, uint8_t status
)
213 if (vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &s
)) {
219 vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &s
);
222 static int vhost_vdpa_init(struct vhost_dev
*dev
, void *opaque
)
224 struct vhost_vdpa
*v
;
226 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
229 dev
->opaque
= opaque
;
230 vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, &features
);
231 dev
->backend_features
= features
;
232 v
->listener
= vhost_vdpa_memory_listener
;
233 v
->msg_type
= VHOST_IOTLB_MSG_V2
;
235 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
236 VIRTIO_CONFIG_S_DRIVER
);
241 static int vhost_vdpa_cleanup(struct vhost_dev
*dev
)
243 struct vhost_vdpa
*v
;
244 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
246 memory_listener_unregister(&v
->listener
);
252 static int vhost_vdpa_memslots_limit(struct vhost_dev
*dev
)
257 static int vhost_vdpa_set_mem_table(struct vhost_dev
*dev
,
258 struct vhost_memory
*mem
)
268 static int vhost_vdpa_set_features(struct vhost_dev
*dev
,
272 ret
= vhost_vdpa_call(dev
, VHOST_SET_FEATURES
, &features
);
277 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_FEATURES_OK
);
278 vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &status
);
280 return !(status
& VIRTIO_CONFIG_S_FEATURES_OK
);
283 int vhost_vdpa_get_device_id(struct vhost_dev
*dev
,
286 return vhost_vdpa_call(dev
, VHOST_VDPA_GET_DEVICE_ID
, device_id
);
289 static int vhost_vdpa_reset_device(struct vhost_dev
*dev
)
293 return vhost_vdpa_call(dev
, VHOST_VDPA_SET_STATUS
, &status
);
296 static int vhost_vdpa_get_vq_index(struct vhost_dev
*dev
, int idx
)
298 assert(idx
>= dev
->vq_index
&& idx
< dev
->vq_index
+ dev
->nvqs
);
300 return idx
- dev
->vq_index
;
303 static int vhost_vdpa_set_vring_ready(struct vhost_dev
*dev
)
306 for (i
= 0; i
< dev
->nvqs
; ++i
) {
307 struct vhost_vring_state state
= {
308 .index
= dev
->vq_index
+ i
,
311 vhost_vdpa_call(dev
, VHOST_VDPA_SET_VRING_ENABLE
, &state
);
316 static int vhost_vdpa_set_config(struct vhost_dev
*dev
, const uint8_t *data
,
317 uint32_t offset
, uint32_t size
,
320 struct vhost_vdpa_config
*config
;
322 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
324 config
= g_malloc(size
+ config_size
);
325 config
->off
= offset
;
327 memcpy(config
->buf
, data
, size
);
328 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_SET_CONFIG
, config
);
333 static int vhost_vdpa_get_config(struct vhost_dev
*dev
, uint8_t *config
,
336 struct vhost_vdpa_config
*v_config
;
337 unsigned long config_size
= offsetof(struct vhost_vdpa_config
, buf
);
340 v_config
= g_malloc(config_len
+ config_size
);
341 v_config
->len
= config_len
;
343 ret
= vhost_vdpa_call(dev
, VHOST_VDPA_GET_CONFIG
, v_config
);
344 memcpy(config
, v_config
->buf
, config_len
);
349 static int vhost_vdpa_dev_start(struct vhost_dev
*dev
, bool started
)
351 struct vhost_vdpa
*v
= dev
->opaque
;
354 memory_listener_register(&v
->listener
, &address_space_memory
);
355 vhost_vdpa_set_vring_ready(dev
);
356 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_DRIVER_OK
);
357 vhost_vdpa_call(dev
, VHOST_VDPA_GET_STATUS
, &status
);
359 return !(status
& VIRTIO_CONFIG_S_DRIVER_OK
);
361 vhost_vdpa_reset_device(dev
);
362 vhost_vdpa_add_status(dev
, VIRTIO_CONFIG_S_ACKNOWLEDGE
|
363 VIRTIO_CONFIG_S_DRIVER
);
364 memory_listener_unregister(&v
->listener
);
370 static int vhost_vdpa_set_log_base(struct vhost_dev
*dev
, uint64_t base
,
371 struct vhost_log
*log
)
373 return vhost_vdpa_call(dev
, VHOST_SET_LOG_BASE
, &base
);
376 static int vhost_vdpa_set_vring_addr(struct vhost_dev
*dev
,
377 struct vhost_vring_addr
*addr
)
379 return vhost_vdpa_call(dev
, VHOST_SET_VRING_ADDR
, addr
);
382 static int vhost_vdpa_set_vring_num(struct vhost_dev
*dev
,
383 struct vhost_vring_state
*ring
)
385 return vhost_vdpa_call(dev
, VHOST_SET_VRING_NUM
, ring
);
388 static int vhost_vdpa_set_vring_base(struct vhost_dev
*dev
,
389 struct vhost_vring_state
*ring
)
391 return vhost_vdpa_call(dev
, VHOST_SET_VRING_BASE
, ring
);
394 static int vhost_vdpa_get_vring_base(struct vhost_dev
*dev
,
395 struct vhost_vring_state
*ring
)
397 return vhost_vdpa_call(dev
, VHOST_GET_VRING_BASE
, ring
);
400 static int vhost_vdpa_set_vring_kick(struct vhost_dev
*dev
,
401 struct vhost_vring_file
*file
)
403 return vhost_vdpa_call(dev
, VHOST_SET_VRING_KICK
, file
);
406 static int vhost_vdpa_set_vring_call(struct vhost_dev
*dev
,
407 struct vhost_vring_file
*file
)
409 return vhost_vdpa_call(dev
, VHOST_SET_VRING_CALL
, file
);
412 static int vhost_vdpa_get_features(struct vhost_dev
*dev
,
415 return vhost_vdpa_call(dev
, VHOST_GET_FEATURES
, features
);
418 static int vhost_vdpa_set_owner(struct vhost_dev
*dev
)
420 return vhost_vdpa_call(dev
, VHOST_SET_OWNER
, NULL
);
423 static int vhost_vdpa_vq_get_addr(struct vhost_dev
*dev
,
424 struct vhost_vring_addr
*addr
, struct vhost_virtqueue
*vq
)
426 assert(dev
->vhost_ops
->backend_type
== VHOST_BACKEND_TYPE_VDPA
);
427 addr
->desc_user_addr
= (uint64_t)(unsigned long)vq
->desc_phys
;
428 addr
->avail_user_addr
= (uint64_t)(unsigned long)vq
->avail_phys
;
429 addr
->used_user_addr
= (uint64_t)(unsigned long)vq
->used_phys
;
433 static bool vhost_vdpa_force_iommu(struct vhost_dev
*dev
)
438 const VhostOps vdpa_ops
= {
439 .backend_type
= VHOST_BACKEND_TYPE_VDPA
,
440 .vhost_backend_init
= vhost_vdpa_init
,
441 .vhost_backend_cleanup
= vhost_vdpa_cleanup
,
442 .vhost_set_log_base
= vhost_vdpa_set_log_base
,
443 .vhost_set_vring_addr
= vhost_vdpa_set_vring_addr
,
444 .vhost_set_vring_num
= vhost_vdpa_set_vring_num
,
445 .vhost_set_vring_base
= vhost_vdpa_set_vring_base
,
446 .vhost_get_vring_base
= vhost_vdpa_get_vring_base
,
447 .vhost_set_vring_kick
= vhost_vdpa_set_vring_kick
,
448 .vhost_set_vring_call
= vhost_vdpa_set_vring_call
,
449 .vhost_get_features
= vhost_vdpa_get_features
,
450 .vhost_set_owner
= vhost_vdpa_set_owner
,
451 .vhost_set_vring_endian
= NULL
,
452 .vhost_backend_memslots_limit
= vhost_vdpa_memslots_limit
,
453 .vhost_set_mem_table
= vhost_vdpa_set_mem_table
,
454 .vhost_set_features
= vhost_vdpa_set_features
,
455 .vhost_reset_device
= vhost_vdpa_reset_device
,
456 .vhost_get_vq_index
= vhost_vdpa_get_vq_index
,
457 .vhost_get_config
= vhost_vdpa_get_config
,
458 .vhost_set_config
= vhost_vdpa_set_config
,
459 .vhost_requires_shm_log
= NULL
,
460 .vhost_migration_done
= NULL
,
461 .vhost_backend_can_merge
= NULL
,
462 .vhost_net_set_mtu
= NULL
,
463 .vhost_set_iotlb_callback
= NULL
,
464 .vhost_send_device_iotlb_msg
= NULL
,
465 .vhost_dev_start
= vhost_vdpa_dev_start
,
466 .vhost_get_device_id
= vhost_vdpa_get_device_id
,
467 .vhost_vq_get_addr
= vhost_vdpa_vq_get_addr
,
468 .vhost_force_iommu
= vhost_vdpa_force_iommu
,