Transmit vhost-user memory regions individually
[qemu/rayw.git] / hw / virtio / vhost-user.c
blob3640f017a2eb13c69c18453849f344f63cf76150
1 /*
2 * vhost-user
4 * Copyright (c) 2013 Virtual Open Systems Sarl.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
9 */
11 #include "qemu/osdep.h"
12 #include "qapi/error.h"
13 #include "hw/virtio/vhost.h"
14 #include "hw/virtio/vhost-user.h"
15 #include "hw/virtio/vhost-backend.h"
16 #include "hw/virtio/virtio.h"
17 #include "hw/virtio/virtio-net.h"
18 #include "chardev/char-fe.h"
19 #include "sysemu/kvm.h"
20 #include "qemu/error-report.h"
21 #include "qemu/main-loop.h"
22 #include "qemu/sockets.h"
23 #include "sysemu/cryptodev.h"
24 #include "migration/migration.h"
25 #include "migration/postcopy-ram.h"
26 #include "trace.h"
28 #include <sys/ioctl.h>
29 #include <sys/socket.h>
30 #include <sys/un.h>
32 #include "standard-headers/linux/vhost_types.h"
34 #ifdef CONFIG_LINUX
35 #include <linux/userfaultfd.h>
36 #endif
38 #define VHOST_MEMORY_MAX_NREGIONS 8
39 #define VHOST_USER_F_PROTOCOL_FEATURES 30
40 #define VHOST_USER_SLAVE_MAX_FDS 8
43 * Maximum size of virtio device config space
45 #define VHOST_USER_MAX_CONFIG_SIZE 256
47 enum VhostUserProtocolFeature {
48 VHOST_USER_PROTOCOL_F_MQ = 0,
49 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
50 VHOST_USER_PROTOCOL_F_RARP = 2,
51 VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
52 VHOST_USER_PROTOCOL_F_NET_MTU = 4,
53 VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
54 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
55 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
56 VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
57 VHOST_USER_PROTOCOL_F_CONFIG = 9,
58 VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
59 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
60 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
61 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
62 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
63 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
64 VHOST_USER_PROTOCOL_F_MAX
67 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
69 typedef enum VhostUserRequest {
70 VHOST_USER_NONE = 0,
71 VHOST_USER_GET_FEATURES = 1,
72 VHOST_USER_SET_FEATURES = 2,
73 VHOST_USER_SET_OWNER = 3,
74 VHOST_USER_RESET_OWNER = 4,
75 VHOST_USER_SET_MEM_TABLE = 5,
76 VHOST_USER_SET_LOG_BASE = 6,
77 VHOST_USER_SET_LOG_FD = 7,
78 VHOST_USER_SET_VRING_NUM = 8,
79 VHOST_USER_SET_VRING_ADDR = 9,
80 VHOST_USER_SET_VRING_BASE = 10,
81 VHOST_USER_GET_VRING_BASE = 11,
82 VHOST_USER_SET_VRING_KICK = 12,
83 VHOST_USER_SET_VRING_CALL = 13,
84 VHOST_USER_SET_VRING_ERR = 14,
85 VHOST_USER_GET_PROTOCOL_FEATURES = 15,
86 VHOST_USER_SET_PROTOCOL_FEATURES = 16,
87 VHOST_USER_GET_QUEUE_NUM = 17,
88 VHOST_USER_SET_VRING_ENABLE = 18,
89 VHOST_USER_SEND_RARP = 19,
90 VHOST_USER_NET_SET_MTU = 20,
91 VHOST_USER_SET_SLAVE_REQ_FD = 21,
92 VHOST_USER_IOTLB_MSG = 22,
93 VHOST_USER_SET_VRING_ENDIAN = 23,
94 VHOST_USER_GET_CONFIG = 24,
95 VHOST_USER_SET_CONFIG = 25,
96 VHOST_USER_CREATE_CRYPTO_SESSION = 26,
97 VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
98 VHOST_USER_POSTCOPY_ADVISE = 28,
99 VHOST_USER_POSTCOPY_LISTEN = 29,
100 VHOST_USER_POSTCOPY_END = 30,
101 VHOST_USER_GET_INFLIGHT_FD = 31,
102 VHOST_USER_SET_INFLIGHT_FD = 32,
103 VHOST_USER_GPU_SET_SOCKET = 33,
104 VHOST_USER_RESET_DEVICE = 34,
105 /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
106 VHOST_USER_GET_MAX_MEM_SLOTS = 36,
107 VHOST_USER_ADD_MEM_REG = 37,
108 VHOST_USER_REM_MEM_REG = 38,
109 VHOST_USER_MAX
110 } VhostUserRequest;
112 typedef enum VhostUserSlaveRequest {
113 VHOST_USER_SLAVE_NONE = 0,
114 VHOST_USER_SLAVE_IOTLB_MSG = 1,
115 VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
116 VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
117 VHOST_USER_SLAVE_MAX
118 } VhostUserSlaveRequest;
120 typedef struct VhostUserMemoryRegion {
121 uint64_t guest_phys_addr;
122 uint64_t memory_size;
123 uint64_t userspace_addr;
124 uint64_t mmap_offset;
125 } VhostUserMemoryRegion;
127 typedef struct VhostUserMemory {
128 uint32_t nregions;
129 uint32_t padding;
130 VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
131 } VhostUserMemory;
133 typedef struct VhostUserMemRegMsg {
134 uint32_t padding;
135 VhostUserMemoryRegion region;
136 } VhostUserMemRegMsg;
138 typedef struct VhostUserLog {
139 uint64_t mmap_size;
140 uint64_t mmap_offset;
141 } VhostUserLog;
143 typedef struct VhostUserConfig {
144 uint32_t offset;
145 uint32_t size;
146 uint32_t flags;
147 uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
148 } VhostUserConfig;
150 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN 512
151 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN 64
153 typedef struct VhostUserCryptoSession {
154 /* session id for success, -1 on errors */
155 int64_t session_id;
156 CryptoDevBackendSymSessionInfo session_setup_data;
157 uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
158 uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
159 } VhostUserCryptoSession;
161 static VhostUserConfig c __attribute__ ((unused));
162 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
163 + sizeof(c.size) \
164 + sizeof(c.flags))
166 typedef struct VhostUserVringArea {
167 uint64_t u64;
168 uint64_t size;
169 uint64_t offset;
170 } VhostUserVringArea;
172 typedef struct VhostUserInflight {
173 uint64_t mmap_size;
174 uint64_t mmap_offset;
175 uint16_t num_queues;
176 uint16_t queue_size;
177 } VhostUserInflight;
179 typedef struct {
180 VhostUserRequest request;
182 #define VHOST_USER_VERSION_MASK (0x3)
183 #define VHOST_USER_REPLY_MASK (0x1<<2)
184 #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3)
185 uint32_t flags;
186 uint32_t size; /* the following payload size */
187 } QEMU_PACKED VhostUserHeader;
189 typedef union {
190 #define VHOST_USER_VRING_IDX_MASK (0xff)
191 #define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
192 uint64_t u64;
193 struct vhost_vring_state state;
194 struct vhost_vring_addr addr;
195 VhostUserMemory memory;
196 VhostUserMemRegMsg mem_reg;
197 VhostUserLog log;
198 struct vhost_iotlb_msg iotlb;
199 VhostUserConfig config;
200 VhostUserCryptoSession session;
201 VhostUserVringArea area;
202 VhostUserInflight inflight;
203 } VhostUserPayload;
205 typedef struct VhostUserMsg {
206 VhostUserHeader hdr;
207 VhostUserPayload payload;
208 } QEMU_PACKED VhostUserMsg;
210 static VhostUserMsg m __attribute__ ((unused));
211 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
213 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
215 /* The version of the protocol we support */
216 #define VHOST_USER_VERSION (0x1)
218 struct vhost_user {
219 struct vhost_dev *dev;
220 /* Shared between vhost devs of the same virtio device */
221 VhostUserState *user;
222 int slave_fd;
223 NotifierWithReturn postcopy_notifier;
224 struct PostCopyFD postcopy_fd;
225 uint64_t postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
226 /* Length of the region_rb and region_rb_offset arrays */
227 size_t region_rb_len;
228 /* RAMBlock associated with a given region */
229 RAMBlock **region_rb;
230 /* The offset from the start of the RAMBlock to the start of the
231 * vhost region.
233 ram_addr_t *region_rb_offset;
235 /* True once we've entered postcopy_listen */
236 bool postcopy_listen;
238 /* Our current regions */
239 int num_shadow_regions;
240 struct vhost_memory_region shadow_regions[VHOST_MEMORY_MAX_NREGIONS];
243 struct scrub_regions {
244 struct vhost_memory_region *region;
245 int reg_idx;
246 int fd_idx;
249 static bool ioeventfd_enabled(void)
251 return !kvm_enabled() || kvm_eventfds_enabled();
254 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
256 struct vhost_user *u = dev->opaque;
257 CharBackend *chr = u->user->chr;
258 uint8_t *p = (uint8_t *) msg;
259 int r, size = VHOST_USER_HDR_SIZE;
261 r = qemu_chr_fe_read_all(chr, p, size);
262 if (r != size) {
263 error_report("Failed to read msg header. Read %d instead of %d."
264 " Original request %d.", r, size, msg->hdr.request);
265 return -1;
268 /* validate received flags */
269 if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
270 error_report("Failed to read msg header."
271 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
272 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
273 return -1;
276 return 0;
279 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
281 struct vhost_user *u = dev->opaque;
282 CharBackend *chr = u->user->chr;
283 uint8_t *p = (uint8_t *) msg;
284 int r, size;
286 if (vhost_user_read_header(dev, msg) < 0) {
287 return -1;
290 /* validate message size is sane */
291 if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
292 error_report("Failed to read msg header."
293 " Size %d exceeds the maximum %zu.", msg->hdr.size,
294 VHOST_USER_PAYLOAD_SIZE);
295 return -1;
298 if (msg->hdr.size) {
299 p += VHOST_USER_HDR_SIZE;
300 size = msg->hdr.size;
301 r = qemu_chr_fe_read_all(chr, p, size);
302 if (r != size) {
303 error_report("Failed to read msg payload."
304 " Read %d instead of %d.", r, msg->hdr.size);
305 return -1;
309 return 0;
312 static int process_message_reply(struct vhost_dev *dev,
313 const VhostUserMsg *msg)
315 VhostUserMsg msg_reply;
317 if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
318 return 0;
321 if (vhost_user_read(dev, &msg_reply) < 0) {
322 return -1;
325 if (msg_reply.hdr.request != msg->hdr.request) {
326 error_report("Received unexpected msg type."
327 "Expected %d received %d",
328 msg->hdr.request, msg_reply.hdr.request);
329 return -1;
332 return msg_reply.payload.u64 ? -1 : 0;
335 static bool vhost_user_one_time_request(VhostUserRequest request)
337 switch (request) {
338 case VHOST_USER_SET_OWNER:
339 case VHOST_USER_RESET_OWNER:
340 case VHOST_USER_SET_MEM_TABLE:
341 case VHOST_USER_GET_QUEUE_NUM:
342 case VHOST_USER_NET_SET_MTU:
343 return true;
344 default:
345 return false;
349 /* most non-init callers ignore the error */
350 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
351 int *fds, int fd_num)
353 struct vhost_user *u = dev->opaque;
354 CharBackend *chr = u->user->chr;
355 int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
358 * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
359 * we just need send it once in the first time. For later such
360 * request, we just ignore it.
362 if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
363 msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
364 return 0;
367 if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
368 error_report("Failed to set msg fds.");
369 return -1;
372 ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
373 if (ret != size) {
374 error_report("Failed to write msg."
375 " Wrote %d instead of %d.", ret, size);
376 return -1;
379 return 0;
382 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
384 VhostUserMsg msg = {
385 .hdr.request = VHOST_USER_GPU_SET_SOCKET,
386 .hdr.flags = VHOST_USER_VERSION,
389 return vhost_user_write(dev, &msg, &fd, 1);
392 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
393 struct vhost_log *log)
395 int fds[VHOST_MEMORY_MAX_NREGIONS];
396 size_t fd_num = 0;
397 bool shmfd = virtio_has_feature(dev->protocol_features,
398 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
399 VhostUserMsg msg = {
400 .hdr.request = VHOST_USER_SET_LOG_BASE,
401 .hdr.flags = VHOST_USER_VERSION,
402 .payload.log.mmap_size = log->size * sizeof(*(log->log)),
403 .payload.log.mmap_offset = 0,
404 .hdr.size = sizeof(msg.payload.log),
407 if (shmfd && log->fd != -1) {
408 fds[fd_num++] = log->fd;
411 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
412 return -1;
415 if (shmfd) {
416 msg.hdr.size = 0;
417 if (vhost_user_read(dev, &msg) < 0) {
418 return -1;
421 if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
422 error_report("Received unexpected msg type. "
423 "Expected %d received %d",
424 VHOST_USER_SET_LOG_BASE, msg.hdr.request);
425 return -1;
429 return 0;
432 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
433 int *fd)
435 MemoryRegion *mr;
437 assert((uintptr_t)addr == addr);
438 mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
439 *fd = memory_region_get_fd(mr);
441 return mr;
444 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
445 struct vhost_memory_region *src)
447 assert(src != NULL && dst != NULL);
448 dst->userspace_addr = src->userspace_addr;
449 dst->memory_size = src->memory_size;
450 dst->guest_phys_addr = src->guest_phys_addr;
453 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
454 struct vhost_dev *dev,
455 VhostUserMsg *msg,
456 int *fds, size_t *fd_num,
457 bool track_ramblocks)
459 int i, fd;
460 ram_addr_t offset;
461 MemoryRegion *mr;
462 struct vhost_memory_region *reg;
463 VhostUserMemoryRegion region_buffer;
465 msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
467 for (i = 0; i < dev->mem->nregions; ++i) {
468 reg = dev->mem->regions + i;
470 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
471 if (fd > 0) {
472 if (track_ramblocks) {
473 assert(*fd_num < VHOST_MEMORY_MAX_NREGIONS);
474 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
475 reg->memory_size,
476 reg->guest_phys_addr,
477 reg->userspace_addr,
478 offset);
479 u->region_rb_offset[i] = offset;
480 u->region_rb[i] = mr->ram_block;
481 } else if (*fd_num == VHOST_MEMORY_MAX_NREGIONS) {
482 error_report("Failed preparing vhost-user memory table msg");
483 return -1;
485 vhost_user_fill_msg_region(&region_buffer, reg);
486 msg->payload.memory.regions[*fd_num] = region_buffer;
487 msg->payload.memory.regions[*fd_num].mmap_offset = offset;
488 fds[(*fd_num)++] = fd;
489 } else if (track_ramblocks) {
490 u->region_rb_offset[i] = 0;
491 u->region_rb[i] = NULL;
495 msg->payload.memory.nregions = *fd_num;
497 if (!*fd_num) {
498 error_report("Failed initializing vhost-user memory map, "
499 "consider using -object memory-backend-file share=on");
500 return -1;
503 msg->hdr.size = sizeof(msg->payload.memory.nregions);
504 msg->hdr.size += sizeof(msg->payload.memory.padding);
505 msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
507 return 1;
510 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
511 struct vhost_memory_region *vdev_reg)
513 return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
514 shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
515 shadow_reg->memory_size == vdev_reg->memory_size;
518 static void scrub_shadow_regions(struct vhost_dev *dev,
519 struct scrub_regions *add_reg,
520 int *nr_add_reg,
521 struct scrub_regions *rem_reg,
522 int *nr_rem_reg, uint64_t *shadow_pcb,
523 bool track_ramblocks)
525 struct vhost_user *u = dev->opaque;
526 bool found[VHOST_MEMORY_MAX_NREGIONS] = {};
527 struct vhost_memory_region *reg, *shadow_reg;
528 int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
529 ram_addr_t offset;
530 MemoryRegion *mr;
531 bool matching;
534 * Find memory regions present in our shadow state which are not in
535 * the device's current memory state.
537 * Mark regions in both the shadow and device state as "found".
539 for (i = 0; i < u->num_shadow_regions; i++) {
540 shadow_reg = &u->shadow_regions[i];
541 matching = false;
543 for (j = 0; j < dev->mem->nregions; j++) {
544 reg = &dev->mem->regions[j];
546 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
548 if (reg_equal(shadow_reg, reg)) {
549 matching = true;
550 found[j] = true;
551 if (track_ramblocks) {
553 * Reset postcopy client bases, region_rb, and
554 * region_rb_offset in case regions are removed.
556 if (fd > 0) {
557 u->region_rb_offset[j] = offset;
558 u->region_rb[j] = mr->ram_block;
559 shadow_pcb[j] = u->postcopy_client_bases[i];
560 } else {
561 u->region_rb_offset[j] = 0;
562 u->region_rb[j] = NULL;
565 break;
570 * If the region was not found in the current device memory state
571 * create an entry for it in the removed list.
573 if (!matching) {
574 rem_reg[rm_idx].region = shadow_reg;
575 rem_reg[rm_idx++].reg_idx = i;
580 * For regions not marked "found", create entries in the added list.
582 * Note their indexes in the device memory state and the indexes of their
583 * file descriptors.
585 for (i = 0; i < dev->mem->nregions; i++) {
586 reg = &dev->mem->regions[i];
587 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
588 if (fd > 0) {
589 ++fd_num;
593 * If the region was in both the shadow and device state we don't
594 * need to send a VHOST_USER_ADD_MEM_REG message for it.
596 if (found[i]) {
597 continue;
600 add_reg[add_idx].region = reg;
601 add_reg[add_idx].reg_idx = i;
602 add_reg[add_idx++].fd_idx = fd_num;
604 *nr_rem_reg = rm_idx;
605 *nr_add_reg = add_idx;
607 return;
610 static int send_remove_regions(struct vhost_dev *dev,
611 struct scrub_regions *remove_reg,
612 int nr_rem_reg, VhostUserMsg *msg,
613 bool reply_supported)
615 struct vhost_user *u = dev->opaque;
616 struct vhost_memory_region *shadow_reg;
617 int i, fd, shadow_reg_idx, ret;
618 ram_addr_t offset;
619 VhostUserMemoryRegion region_buffer;
622 * The regions in remove_reg appear in the same order they do in the
623 * shadow table. Therefore we can minimize memory copies by iterating
624 * through remove_reg backwards.
626 for (i = nr_rem_reg - 1; i >= 0; i--) {
627 shadow_reg = remove_reg[i].region;
628 shadow_reg_idx = remove_reg[i].reg_idx;
630 vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
632 if (fd > 0) {
633 msg->hdr.request = VHOST_USER_REM_MEM_REG;
634 vhost_user_fill_msg_region(&region_buffer, shadow_reg);
635 msg->payload.mem_reg.region = region_buffer;
637 if (vhost_user_write(dev, msg, &fd, 1) < 0) {
638 return -1;
641 if (reply_supported) {
642 ret = process_message_reply(dev, msg);
643 if (ret) {
644 return ret;
650 * At this point we know the backend has unmapped the region. It is now
651 * safe to remove it from the shadow table.
653 memmove(&u->shadow_regions[shadow_reg_idx],
654 &u->shadow_regions[shadow_reg_idx + 1],
655 sizeof(struct vhost_memory_region) *
656 (u->num_shadow_regions - shadow_reg_idx));
657 u->num_shadow_regions--;
660 return 0;
663 static int send_add_regions(struct vhost_dev *dev,
664 struct scrub_regions *add_reg, int nr_add_reg,
665 VhostUserMsg *msg, uint64_t *shadow_pcb,
666 bool reply_supported, bool track_ramblocks)
668 struct vhost_user *u = dev->opaque;
669 int i, fd, ret, reg_idx, reg_fd_idx;
670 struct vhost_memory_region *reg;
671 MemoryRegion *mr;
672 ram_addr_t offset;
673 VhostUserMsg msg_reply;
674 VhostUserMemoryRegion region_buffer;
676 for (i = 0; i < nr_add_reg; i++) {
677 reg = add_reg[i].region;
678 reg_idx = add_reg[i].reg_idx;
679 reg_fd_idx = add_reg[i].fd_idx;
681 mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
683 if (fd > 0) {
684 if (track_ramblocks) {
685 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
686 reg->memory_size,
687 reg->guest_phys_addr,
688 reg->userspace_addr,
689 offset);
690 u->region_rb_offset[reg_idx] = offset;
691 u->region_rb[reg_idx] = mr->ram_block;
693 msg->hdr.request = VHOST_USER_ADD_MEM_REG;
694 vhost_user_fill_msg_region(&region_buffer, reg);
695 msg->payload.mem_reg.region = region_buffer;
696 msg->payload.mem_reg.region.mmap_offset = offset;
698 if (vhost_user_write(dev, msg, &fd, 1) < 0) {
699 return -1;
702 if (track_ramblocks) {
703 uint64_t reply_gpa;
705 if (vhost_user_read(dev, &msg_reply) < 0) {
706 return -1;
709 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
711 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
712 error_report("%s: Received unexpected msg type."
713 "Expected %d received %d", __func__,
714 VHOST_USER_ADD_MEM_REG,
715 msg_reply.hdr.request);
716 return -1;
720 * We're using the same structure, just reusing one of the
721 * fields, so it should be the same size.
723 if (msg_reply.hdr.size != msg->hdr.size) {
724 error_report("%s: Unexpected size for postcopy reply "
725 "%d vs %d", __func__, msg_reply.hdr.size,
726 msg->hdr.size);
727 return -1;
730 /* Get the postcopy client base from the backend's reply. */
731 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
732 shadow_pcb[reg_idx] =
733 msg_reply.payload.mem_reg.region.userspace_addr;
734 trace_vhost_user_set_mem_table_postcopy(
735 msg_reply.payload.mem_reg.region.userspace_addr,
736 msg->payload.mem_reg.region.userspace_addr,
737 reg_fd_idx, reg_idx);
738 } else {
739 error_report("%s: invalid postcopy reply for region. "
740 "Got guest physical address %" PRIX64 ", expected "
741 "%" PRIX64, __func__, reply_gpa,
742 dev->mem->regions[reg_idx].guest_phys_addr);
743 return -1;
745 } else if (reply_supported) {
746 ret = process_message_reply(dev, msg);
747 if (ret) {
748 return ret;
751 } else if (track_ramblocks) {
752 u->region_rb_offset[reg_idx] = 0;
753 u->region_rb[reg_idx] = NULL;
757 * At this point, we know the backend has mapped in the new
758 * region, if the region has a valid file descriptor.
760 * The region should now be added to the shadow table.
762 u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
763 reg->guest_phys_addr;
764 u->shadow_regions[u->num_shadow_regions].userspace_addr =
765 reg->userspace_addr;
766 u->shadow_regions[u->num_shadow_regions].memory_size =
767 reg->memory_size;
768 u->num_shadow_regions++;
771 return 0;
774 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
775 VhostUserMsg *msg,
776 bool reply_supported,
777 bool track_ramblocks)
779 struct vhost_user *u = dev->opaque;
780 struct scrub_regions add_reg[VHOST_MEMORY_MAX_NREGIONS];
781 struct scrub_regions rem_reg[VHOST_MEMORY_MAX_NREGIONS];
782 uint64_t shadow_pcb[VHOST_MEMORY_MAX_NREGIONS] = {};
783 int nr_add_reg, nr_rem_reg;
785 msg->hdr.size = sizeof(msg->payload.mem_reg.padding) +
786 sizeof(VhostUserMemoryRegion);
788 /* Find the regions which need to be removed or added. */
789 scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
790 shadow_pcb, track_ramblocks);
792 if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
793 reply_supported) < 0)
795 goto err;
798 if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg,
799 shadow_pcb, reply_supported, track_ramblocks) < 0)
801 goto err;
804 if (track_ramblocks) {
805 memcpy(u->postcopy_client_bases, shadow_pcb,
806 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
808 * Now we've registered this with the postcopy code, we ack to the
809 * client, because now we're in the position to be able to deal with
810 * any faults it generates.
812 /* TODO: Use this for failure cases as well with a bad value. */
813 msg->hdr.size = sizeof(msg->payload.u64);
814 msg->payload.u64 = 0; /* OK */
816 if (vhost_user_write(dev, msg, NULL, 0) < 0) {
817 return -1;
821 return 0;
823 err:
824 if (track_ramblocks) {
825 memcpy(u->postcopy_client_bases, shadow_pcb,
826 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
829 return -1;
832 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
833 struct vhost_memory *mem,
834 bool reply_supported,
835 bool config_mem_slots)
837 struct vhost_user *u = dev->opaque;
838 int fds[VHOST_MEMORY_MAX_NREGIONS];
839 size_t fd_num = 0;
840 VhostUserMsg msg_reply;
841 int region_i, msg_i;
843 VhostUserMsg msg = {
844 .hdr.flags = VHOST_USER_VERSION,
847 if (u->region_rb_len < dev->mem->nregions) {
848 u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
849 u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
850 dev->mem->nregions);
851 memset(&(u->region_rb[u->region_rb_len]), '\0',
852 sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
853 memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
854 sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
855 u->region_rb_len = dev->mem->nregions;
858 if (config_mem_slots) {
859 if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
860 true) < 0) {
861 return -1;
863 } else {
864 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
865 true) < 0) {
866 return -1;
869 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
870 return -1;
873 if (vhost_user_read(dev, &msg_reply) < 0) {
874 return -1;
877 if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
878 error_report("%s: Received unexpected msg type."
879 "Expected %d received %d", __func__,
880 VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
881 return -1;
885 * We're using the same structure, just reusing one of the
886 * fields, so it should be the same size.
888 if (msg_reply.hdr.size != msg.hdr.size) {
889 error_report("%s: Unexpected size for postcopy reply "
890 "%d vs %d", __func__, msg_reply.hdr.size,
891 msg.hdr.size);
892 return -1;
895 memset(u->postcopy_client_bases, 0,
896 sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
899 * They're in the same order as the regions that were sent
900 * but some of the regions were skipped (above) if they
901 * didn't have fd's
903 for (msg_i = 0, region_i = 0;
904 region_i < dev->mem->nregions;
905 region_i++) {
906 if (msg_i < fd_num &&
907 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
908 dev->mem->regions[region_i].guest_phys_addr) {
909 u->postcopy_client_bases[region_i] =
910 msg_reply.payload.memory.regions[msg_i].userspace_addr;
911 trace_vhost_user_set_mem_table_postcopy(
912 msg_reply.payload.memory.regions[msg_i].userspace_addr,
913 msg.payload.memory.regions[msg_i].userspace_addr,
914 msg_i, region_i);
915 msg_i++;
918 if (msg_i != fd_num) {
919 error_report("%s: postcopy reply not fully consumed "
920 "%d vs %zd",
921 __func__, msg_i, fd_num);
922 return -1;
926 * Now we've registered this with the postcopy code, we ack to the
927 * client, because now we're in the position to be able to deal
928 * with any faults it generates.
930 /* TODO: Use this for failure cases as well with a bad value. */
931 msg.hdr.size = sizeof(msg.payload.u64);
932 msg.payload.u64 = 0; /* OK */
933 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
934 return -1;
938 return 0;
941 static int vhost_user_set_mem_table(struct vhost_dev *dev,
942 struct vhost_memory *mem)
944 struct vhost_user *u = dev->opaque;
945 int fds[VHOST_MEMORY_MAX_NREGIONS];
946 size_t fd_num = 0;
947 bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
948 bool reply_supported = virtio_has_feature(dev->protocol_features,
949 VHOST_USER_PROTOCOL_F_REPLY_ACK);
950 bool config_mem_slots =
951 virtio_has_feature(dev->protocol_features,
952 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
954 if (do_postcopy) {
956 * Postcopy has enough differences that it's best done in it's own
957 * version
959 return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
960 config_mem_slots);
963 VhostUserMsg msg = {
964 .hdr.flags = VHOST_USER_VERSION,
967 if (reply_supported) {
968 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
971 if (config_mem_slots) {
972 if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
973 false) < 0) {
974 return -1;
976 } else {
977 if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
978 false) < 0) {
979 return -1;
981 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
982 return -1;
985 if (reply_supported) {
986 return process_message_reply(dev, &msg);
990 return 0;
993 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
994 struct vhost_vring_addr *addr)
996 VhostUserMsg msg = {
997 .hdr.request = VHOST_USER_SET_VRING_ADDR,
998 .hdr.flags = VHOST_USER_VERSION,
999 .payload.addr = *addr,
1000 .hdr.size = sizeof(msg.payload.addr),
1003 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1004 return -1;
1007 return 0;
1010 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1011 struct vhost_vring_state *ring)
1013 bool cross_endian = virtio_has_feature(dev->protocol_features,
1014 VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1015 VhostUserMsg msg = {
1016 .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1017 .hdr.flags = VHOST_USER_VERSION,
1018 .payload.state = *ring,
1019 .hdr.size = sizeof(msg.payload.state),
1022 if (!cross_endian) {
1023 error_report("vhost-user trying to send unhandled ioctl");
1024 return -1;
1027 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1028 return -1;
1031 return 0;
1034 static int vhost_set_vring(struct vhost_dev *dev,
1035 unsigned long int request,
1036 struct vhost_vring_state *ring)
1038 VhostUserMsg msg = {
1039 .hdr.request = request,
1040 .hdr.flags = VHOST_USER_VERSION,
1041 .payload.state = *ring,
1042 .hdr.size = sizeof(msg.payload.state),
1045 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1046 return -1;
1049 return 0;
1052 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1053 struct vhost_vring_state *ring)
1055 return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1058 static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
1059 int queue_idx)
1061 struct vhost_user *u = dev->opaque;
1062 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1063 VirtIODevice *vdev = dev->vdev;
1065 if (n->addr && !n->set) {
1066 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
1067 n->set = true;
1071 static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
1072 int queue_idx)
1074 struct vhost_user *u = dev->opaque;
1075 VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1076 VirtIODevice *vdev = dev->vdev;
1078 if (n->addr && n->set) {
1079 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1080 n->set = false;
1084 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1085 struct vhost_vring_state *ring)
1087 vhost_user_host_notifier_restore(dev, ring->index);
1089 return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1092 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1094 int i;
1096 if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1097 return -1;
1100 for (i = 0; i < dev->nvqs; ++i) {
1101 struct vhost_vring_state state = {
1102 .index = dev->vq_index + i,
1103 .num = enable,
1106 vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1109 return 0;
1112 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1113 struct vhost_vring_state *ring)
1115 VhostUserMsg msg = {
1116 .hdr.request = VHOST_USER_GET_VRING_BASE,
1117 .hdr.flags = VHOST_USER_VERSION,
1118 .payload.state = *ring,
1119 .hdr.size = sizeof(msg.payload.state),
1122 vhost_user_host_notifier_remove(dev, ring->index);
1124 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1125 return -1;
1128 if (vhost_user_read(dev, &msg) < 0) {
1129 return -1;
1132 if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1133 error_report("Received unexpected msg type. Expected %d received %d",
1134 VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1135 return -1;
1138 if (msg.hdr.size != sizeof(msg.payload.state)) {
1139 error_report("Received bad msg size.");
1140 return -1;
1143 *ring = msg.payload.state;
1145 return 0;
1148 static int vhost_set_vring_file(struct vhost_dev *dev,
1149 VhostUserRequest request,
1150 struct vhost_vring_file *file)
1152 int fds[VHOST_MEMORY_MAX_NREGIONS];
1153 size_t fd_num = 0;
1154 VhostUserMsg msg = {
1155 .hdr.request = request,
1156 .hdr.flags = VHOST_USER_VERSION,
1157 .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1158 .hdr.size = sizeof(msg.payload.u64),
1161 if (ioeventfd_enabled() && file->fd > 0) {
1162 fds[fd_num++] = file->fd;
1163 } else {
1164 msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1167 if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
1168 return -1;
1171 return 0;
1174 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1175 struct vhost_vring_file *file)
1177 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1180 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1181 struct vhost_vring_file *file)
1183 return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1186 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
1188 VhostUserMsg msg = {
1189 .hdr.request = request,
1190 .hdr.flags = VHOST_USER_VERSION,
1191 .payload.u64 = u64,
1192 .hdr.size = sizeof(msg.payload.u64),
1195 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1196 return -1;
1199 return 0;
1202 static int vhost_user_set_features(struct vhost_dev *dev,
1203 uint64_t features)
1205 return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
1208 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1209 uint64_t features)
1211 return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
1214 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1216 VhostUserMsg msg = {
1217 .hdr.request = request,
1218 .hdr.flags = VHOST_USER_VERSION,
1221 if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1222 return 0;
1225 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1226 return -1;
1229 if (vhost_user_read(dev, &msg) < 0) {
1230 return -1;
1233 if (msg.hdr.request != request) {
1234 error_report("Received unexpected msg type. Expected %d received %d",
1235 request, msg.hdr.request);
1236 return -1;
1239 if (msg.hdr.size != sizeof(msg.payload.u64)) {
1240 error_report("Received bad msg size.");
1241 return -1;
1244 *u64 = msg.payload.u64;
1246 return 0;
1249 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1251 return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
1254 static int vhost_user_set_owner(struct vhost_dev *dev)
1256 VhostUserMsg msg = {
1257 .hdr.request = VHOST_USER_SET_OWNER,
1258 .hdr.flags = VHOST_USER_VERSION,
1261 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1262 return -1;
1265 return 0;
1268 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1269 uint64_t *max_memslots)
1271 uint64_t backend_max_memslots;
1272 int err;
1274 err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1275 &backend_max_memslots);
1276 if (err < 0) {
1277 return err;
1280 *max_memslots = backend_max_memslots;
1282 return 0;
1285 static int vhost_user_reset_device(struct vhost_dev *dev)
1287 VhostUserMsg msg = {
1288 .hdr.flags = VHOST_USER_VERSION,
1291 msg.hdr.request = virtio_has_feature(dev->protocol_features,
1292 VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1293 ? VHOST_USER_RESET_DEVICE
1294 : VHOST_USER_RESET_OWNER;
1296 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1297 return -1;
1300 return 0;
1303 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1305 int ret = -1;
1307 if (!dev->config_ops) {
1308 return -1;
1311 if (dev->config_ops->vhost_dev_config_notifier) {
1312 ret = dev->config_ops->vhost_dev_config_notifier(dev);
1315 return ret;
1318 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1319 VhostUserVringArea *area,
1320 int fd)
1322 int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1323 size_t page_size = qemu_real_host_page_size;
1324 struct vhost_user *u = dev->opaque;
1325 VhostUserState *user = u->user;
1326 VirtIODevice *vdev = dev->vdev;
1327 VhostUserHostNotifier *n;
1328 void *addr;
1329 char *name;
1331 if (!virtio_has_feature(dev->protocol_features,
1332 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1333 vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1334 return -1;
1337 n = &user->notifier[queue_idx];
1339 if (n->addr) {
1340 virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1341 object_unparent(OBJECT(&n->mr));
1342 munmap(n->addr, page_size);
1343 n->addr = NULL;
1346 if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1347 return 0;
1350 /* Sanity check. */
1351 if (area->size != page_size) {
1352 return -1;
1355 addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1356 fd, area->offset);
1357 if (addr == MAP_FAILED) {
1358 return -1;
1361 name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1362 user, queue_idx);
1363 memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1364 page_size, addr);
1365 g_free(name);
1367 if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1368 munmap(addr, page_size);
1369 return -1;
1372 n->addr = addr;
1373 n->set = true;
1375 return 0;
1378 static void slave_read(void *opaque)
1380 struct vhost_dev *dev = opaque;
1381 struct vhost_user *u = dev->opaque;
1382 VhostUserHeader hdr = { 0, };
1383 VhostUserPayload payload = { 0, };
1384 int size, ret = 0;
1385 struct iovec iov;
1386 struct msghdr msgh;
1387 int fd[VHOST_USER_SLAVE_MAX_FDS];
1388 char control[CMSG_SPACE(sizeof(fd))];
1389 struct cmsghdr *cmsg;
1390 int i, fdsize = 0;
1392 memset(&msgh, 0, sizeof(msgh));
1393 msgh.msg_iov = &iov;
1394 msgh.msg_iovlen = 1;
1395 msgh.msg_control = control;
1396 msgh.msg_controllen = sizeof(control);
1398 memset(fd, -1, sizeof(fd));
1400 /* Read header */
1401 iov.iov_base = &hdr;
1402 iov.iov_len = VHOST_USER_HDR_SIZE;
1404 do {
1405 size = recvmsg(u->slave_fd, &msgh, 0);
1406 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1408 if (size != VHOST_USER_HDR_SIZE) {
1409 error_report("Failed to read from slave.");
1410 goto err;
1413 if (msgh.msg_flags & MSG_CTRUNC) {
1414 error_report("Truncated message.");
1415 goto err;
1418 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
1419 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
1420 if (cmsg->cmsg_level == SOL_SOCKET &&
1421 cmsg->cmsg_type == SCM_RIGHTS) {
1422 fdsize = cmsg->cmsg_len - CMSG_LEN(0);
1423 memcpy(fd, CMSG_DATA(cmsg), fdsize);
1424 break;
1428 if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1429 error_report("Failed to read msg header."
1430 " Size %d exceeds the maximum %zu.", hdr.size,
1431 VHOST_USER_PAYLOAD_SIZE);
1432 goto err;
1435 /* Read payload */
1436 do {
1437 size = read(u->slave_fd, &payload, hdr.size);
1438 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1440 if (size != hdr.size) {
1441 error_report("Failed to read payload from slave.");
1442 goto err;
1445 switch (hdr.request) {
1446 case VHOST_USER_SLAVE_IOTLB_MSG:
1447 ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1448 break;
1449 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1450 ret = vhost_user_slave_handle_config_change(dev);
1451 break;
1452 case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1453 ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1454 fd[0]);
1455 break;
1456 default:
1457 error_report("Received unexpected msg type: %d.", hdr.request);
1458 ret = -EINVAL;
1461 /* Close the remaining file descriptors. */
1462 for (i = 0; i < fdsize; i++) {
1463 if (fd[i] != -1) {
1464 close(fd[i]);
1469 * REPLY_ACK feature handling. Other reply types has to be managed
1470 * directly in their request handlers.
1472 if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1473 struct iovec iovec[2];
1476 hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1477 hdr.flags |= VHOST_USER_REPLY_MASK;
1479 payload.u64 = !!ret;
1480 hdr.size = sizeof(payload.u64);
1482 iovec[0].iov_base = &hdr;
1483 iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1484 iovec[1].iov_base = &payload;
1485 iovec[1].iov_len = hdr.size;
1487 do {
1488 size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1489 } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1491 if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1492 error_report("Failed to send msg reply to slave.");
1493 goto err;
1497 return;
1499 err:
1500 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1501 close(u->slave_fd);
1502 u->slave_fd = -1;
1503 for (i = 0; i < fdsize; i++) {
1504 if (fd[i] != -1) {
1505 close(fd[i]);
1508 return;
1511 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1513 VhostUserMsg msg = {
1514 .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1515 .hdr.flags = VHOST_USER_VERSION,
1517 struct vhost_user *u = dev->opaque;
1518 int sv[2], ret = 0;
1519 bool reply_supported = virtio_has_feature(dev->protocol_features,
1520 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1522 if (!virtio_has_feature(dev->protocol_features,
1523 VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1524 return 0;
1527 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1528 error_report("socketpair() failed");
1529 return -1;
1532 u->slave_fd = sv[0];
1533 qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1535 if (reply_supported) {
1536 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1539 ret = vhost_user_write(dev, &msg, &sv[1], 1);
1540 if (ret) {
1541 goto out;
1544 if (reply_supported) {
1545 ret = process_message_reply(dev, &msg);
1548 out:
1549 close(sv[1]);
1550 if (ret) {
1551 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1552 close(u->slave_fd);
1553 u->slave_fd = -1;
1556 return ret;
1559 #ifdef CONFIG_LINUX
1561 * Called back from the postcopy fault thread when a fault is received on our
1562 * ufd.
1563 * TODO: This is Linux specific
1565 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1566 void *ufd)
1568 struct vhost_dev *dev = pcfd->data;
1569 struct vhost_user *u = dev->opaque;
1570 struct uffd_msg *msg = ufd;
1571 uint64_t faultaddr = msg->arg.pagefault.address;
1572 RAMBlock *rb = NULL;
1573 uint64_t rb_offset;
1574 int i;
1576 trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1577 dev->mem->nregions);
1578 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1579 trace_vhost_user_postcopy_fault_handler_loop(i,
1580 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1581 if (faultaddr >= u->postcopy_client_bases[i]) {
1582 /* Ofset of the fault address in the vhost region */
1583 uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1584 if (region_offset < dev->mem->regions[i].memory_size) {
1585 rb_offset = region_offset + u->region_rb_offset[i];
1586 trace_vhost_user_postcopy_fault_handler_found(i,
1587 region_offset, rb_offset);
1588 rb = u->region_rb[i];
1589 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1590 rb_offset);
1594 error_report("%s: Failed to find region for fault %" PRIx64,
1595 __func__, faultaddr);
1596 return -1;
1599 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1600 uint64_t offset)
1602 struct vhost_dev *dev = pcfd->data;
1603 struct vhost_user *u = dev->opaque;
1604 int i;
1606 trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1608 if (!u) {
1609 return 0;
1611 /* Translate the offset into an address in the clients address space */
1612 for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1613 if (u->region_rb[i] == rb &&
1614 offset >= u->region_rb_offset[i] &&
1615 offset < (u->region_rb_offset[i] +
1616 dev->mem->regions[i].memory_size)) {
1617 uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1618 u->postcopy_client_bases[i];
1619 trace_vhost_user_postcopy_waker_found(client_addr);
1620 return postcopy_wake_shared(pcfd, client_addr, rb);
1624 trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1625 return 0;
1627 #endif
1630 * Called at the start of an inbound postcopy on reception of the
1631 * 'advise' command.
1633 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1635 #ifdef CONFIG_LINUX
1636 struct vhost_user *u = dev->opaque;
1637 CharBackend *chr = u->user->chr;
1638 int ufd;
1639 VhostUserMsg msg = {
1640 .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1641 .hdr.flags = VHOST_USER_VERSION,
1644 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1645 error_setg(errp, "Failed to send postcopy_advise to vhost");
1646 return -1;
1649 if (vhost_user_read(dev, &msg) < 0) {
1650 error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1651 return -1;
1654 if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1655 error_setg(errp, "Unexpected msg type. Expected %d received %d",
1656 VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1657 return -1;
1660 if (msg.hdr.size) {
1661 error_setg(errp, "Received bad msg size.");
1662 return -1;
1664 ufd = qemu_chr_fe_get_msgfd(chr);
1665 if (ufd < 0) {
1666 error_setg(errp, "%s: Failed to get ufd", __func__);
1667 return -1;
1669 qemu_set_nonblock(ufd);
1671 /* register ufd with userfault thread */
1672 u->postcopy_fd.fd = ufd;
1673 u->postcopy_fd.data = dev;
1674 u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1675 u->postcopy_fd.waker = vhost_user_postcopy_waker;
1676 u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1677 postcopy_register_shared_ufd(&u->postcopy_fd);
1678 return 0;
1679 #else
1680 error_setg(errp, "Postcopy not supported on non-Linux systems");
1681 return -1;
1682 #endif
1686 * Called at the switch to postcopy on reception of the 'listen' command.
1688 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1690 struct vhost_user *u = dev->opaque;
1691 int ret;
1692 VhostUserMsg msg = {
1693 .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1694 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1696 u->postcopy_listen = true;
1697 trace_vhost_user_postcopy_listen();
1698 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1699 error_setg(errp, "Failed to send postcopy_listen to vhost");
1700 return -1;
1703 ret = process_message_reply(dev, &msg);
1704 if (ret) {
1705 error_setg(errp, "Failed to receive reply to postcopy_listen");
1706 return ret;
1709 return 0;
1713 * Called at the end of postcopy
1715 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1717 VhostUserMsg msg = {
1718 .hdr.request = VHOST_USER_POSTCOPY_END,
1719 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1721 int ret;
1722 struct vhost_user *u = dev->opaque;
1724 trace_vhost_user_postcopy_end_entry();
1725 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1726 error_setg(errp, "Failed to send postcopy_end to vhost");
1727 return -1;
1730 ret = process_message_reply(dev, &msg);
1731 if (ret) {
1732 error_setg(errp, "Failed to receive reply to postcopy_end");
1733 return ret;
1735 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1736 close(u->postcopy_fd.fd);
1737 u->postcopy_fd.handler = NULL;
1739 trace_vhost_user_postcopy_end_exit();
1741 return 0;
1744 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1745 void *opaque)
1747 struct PostcopyNotifyData *pnd = opaque;
1748 struct vhost_user *u = container_of(notifier, struct vhost_user,
1749 postcopy_notifier);
1750 struct vhost_dev *dev = u->dev;
1752 switch (pnd->reason) {
1753 case POSTCOPY_NOTIFY_PROBE:
1754 if (!virtio_has_feature(dev->protocol_features,
1755 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1756 /* TODO: Get the device name into this error somehow */
1757 error_setg(pnd->errp,
1758 "vhost-user backend not capable of postcopy");
1759 return -ENOENT;
1761 break;
1763 case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1764 return vhost_user_postcopy_advise(dev, pnd->errp);
1766 case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1767 return vhost_user_postcopy_listen(dev, pnd->errp);
1769 case POSTCOPY_NOTIFY_INBOUND_END:
1770 return vhost_user_postcopy_end(dev, pnd->errp);
1772 default:
1773 /* We ignore notifications we don't know */
1774 break;
1777 return 0;
1780 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1782 uint64_t features, protocol_features, ram_slots;
1783 struct vhost_user *u;
1784 int err;
1786 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1788 u = g_new0(struct vhost_user, 1);
1789 u->user = opaque;
1790 u->slave_fd = -1;
1791 u->dev = dev;
1792 dev->opaque = u;
1794 err = vhost_user_get_features(dev, &features);
1795 if (err < 0) {
1796 return err;
1799 if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1800 dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1802 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1803 &protocol_features);
1804 if (err < 0) {
1805 return err;
1808 dev->protocol_features =
1809 protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1811 if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1812 /* Don't acknowledge CONFIG feature if device doesn't support it */
1813 dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1814 } else if (!(protocol_features &
1815 (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1816 error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1817 "but backend does not support it.");
1818 return -1;
1821 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1822 if (err < 0) {
1823 return err;
1826 /* query the max queues we support if backend supports Multiple Queue */
1827 if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1828 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1829 &dev->max_queues);
1830 if (err < 0) {
1831 return err;
1835 if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1836 !(virtio_has_feature(dev->protocol_features,
1837 VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1838 virtio_has_feature(dev->protocol_features,
1839 VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1840 error_report("IOMMU support requires reply-ack and "
1841 "slave-req protocol features.");
1842 return -1;
1845 /* get max memory regions if backend supports configurable RAM slots */
1846 if (!virtio_has_feature(dev->protocol_features,
1847 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
1848 u->user->memory_slots = VHOST_MEMORY_MAX_NREGIONS;
1849 } else {
1850 err = vhost_user_get_max_memslots(dev, &ram_slots);
1851 if (err < 0) {
1852 return err;
1855 if (ram_slots < u->user->memory_slots) {
1856 error_report("The backend specified a max ram slots limit "
1857 "of %" PRIu64", when the prior validated limit was %d. "
1858 "This limit should never decrease.", ram_slots,
1859 u->user->memory_slots);
1860 return -1;
1863 u->user->memory_slots = MIN(ram_slots, VHOST_MEMORY_MAX_NREGIONS);
1867 if (dev->migration_blocker == NULL &&
1868 !virtio_has_feature(dev->protocol_features,
1869 VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1870 error_setg(&dev->migration_blocker,
1871 "Migration disabled: vhost-user backend lacks "
1872 "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1875 if (dev->vq_index == 0) {
1876 err = vhost_setup_slave_channel(dev);
1877 if (err < 0) {
1878 return err;
1882 u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1883 postcopy_add_notifier(&u->postcopy_notifier);
1885 return 0;
1888 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1890 struct vhost_user *u;
1892 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1894 u = dev->opaque;
1895 if (u->postcopy_notifier.notify) {
1896 postcopy_remove_notifier(&u->postcopy_notifier);
1897 u->postcopy_notifier.notify = NULL;
1899 u->postcopy_listen = false;
1900 if (u->postcopy_fd.handler) {
1901 postcopy_unregister_shared_ufd(&u->postcopy_fd);
1902 close(u->postcopy_fd.fd);
1903 u->postcopy_fd.handler = NULL;
1905 if (u->slave_fd >= 0) {
1906 qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1907 close(u->slave_fd);
1908 u->slave_fd = -1;
1910 g_free(u->region_rb);
1911 u->region_rb = NULL;
1912 g_free(u->region_rb_offset);
1913 u->region_rb_offset = NULL;
1914 u->region_rb_len = 0;
1915 g_free(u);
1916 dev->opaque = 0;
1918 return 0;
1921 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1923 assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1925 return idx;
1928 static int vhost_user_memslots_limit(struct vhost_dev *dev)
1930 struct vhost_user *u = dev->opaque;
1932 return u->user->memory_slots;
1935 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1937 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1939 return virtio_has_feature(dev->protocol_features,
1940 VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1943 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1945 VhostUserMsg msg = { };
1947 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1949 /* If guest supports GUEST_ANNOUNCE do nothing */
1950 if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1951 return 0;
1954 /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1955 if (virtio_has_feature(dev->protocol_features,
1956 VHOST_USER_PROTOCOL_F_RARP)) {
1957 msg.hdr.request = VHOST_USER_SEND_RARP;
1958 msg.hdr.flags = VHOST_USER_VERSION;
1959 memcpy((char *)&msg.payload.u64, mac_addr, 6);
1960 msg.hdr.size = sizeof(msg.payload.u64);
1962 return vhost_user_write(dev, &msg, NULL, 0);
1964 return -1;
1967 static bool vhost_user_can_merge(struct vhost_dev *dev,
1968 uint64_t start1, uint64_t size1,
1969 uint64_t start2, uint64_t size2)
1971 ram_addr_t offset;
1972 int mfd, rfd;
1974 (void)vhost_user_get_mr_data(start1, &offset, &mfd);
1975 (void)vhost_user_get_mr_data(start2, &offset, &rfd);
1977 return mfd == rfd;
1980 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1982 VhostUserMsg msg;
1983 bool reply_supported = virtio_has_feature(dev->protocol_features,
1984 VHOST_USER_PROTOCOL_F_REPLY_ACK);
1986 if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1987 return 0;
1990 msg.hdr.request = VHOST_USER_NET_SET_MTU;
1991 msg.payload.u64 = mtu;
1992 msg.hdr.size = sizeof(msg.payload.u64);
1993 msg.hdr.flags = VHOST_USER_VERSION;
1994 if (reply_supported) {
1995 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1998 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1999 return -1;
2002 /* If reply_ack supported, slave has to ack specified MTU is valid */
2003 if (reply_supported) {
2004 return process_message_reply(dev, &msg);
2007 return 0;
2010 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2011 struct vhost_iotlb_msg *imsg)
2013 VhostUserMsg msg = {
2014 .hdr.request = VHOST_USER_IOTLB_MSG,
2015 .hdr.size = sizeof(msg.payload.iotlb),
2016 .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2017 .payload.iotlb = *imsg,
2020 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2021 return -EFAULT;
2024 return process_message_reply(dev, &msg);
2028 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2030 /* No-op as the receive channel is not dedicated to IOTLB messages. */
2033 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2034 uint32_t config_len)
2036 VhostUserMsg msg = {
2037 .hdr.request = VHOST_USER_GET_CONFIG,
2038 .hdr.flags = VHOST_USER_VERSION,
2039 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2042 if (!virtio_has_feature(dev->protocol_features,
2043 VHOST_USER_PROTOCOL_F_CONFIG)) {
2044 return -1;
2047 if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
2048 return -1;
2051 msg.payload.config.offset = 0;
2052 msg.payload.config.size = config_len;
2053 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2054 return -1;
2057 if (vhost_user_read(dev, &msg) < 0) {
2058 return -1;
2061 if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2062 error_report("Received unexpected msg type. Expected %d received %d",
2063 VHOST_USER_GET_CONFIG, msg.hdr.request);
2064 return -1;
2067 if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2068 error_report("Received bad msg size.");
2069 return -1;
2072 memcpy(config, msg.payload.config.region, config_len);
2074 return 0;
2077 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2078 uint32_t offset, uint32_t size, uint32_t flags)
2080 uint8_t *p;
2081 bool reply_supported = virtio_has_feature(dev->protocol_features,
2082 VHOST_USER_PROTOCOL_F_REPLY_ACK);
2084 VhostUserMsg msg = {
2085 .hdr.request = VHOST_USER_SET_CONFIG,
2086 .hdr.flags = VHOST_USER_VERSION,
2087 .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2090 if (!virtio_has_feature(dev->protocol_features,
2091 VHOST_USER_PROTOCOL_F_CONFIG)) {
2092 return -1;
2095 if (reply_supported) {
2096 msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2099 if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2100 return -1;
2103 msg.payload.config.offset = offset,
2104 msg.payload.config.size = size,
2105 msg.payload.config.flags = flags,
2106 p = msg.payload.config.region;
2107 memcpy(p, data, size);
2109 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2110 return -1;
2113 if (reply_supported) {
2114 return process_message_reply(dev, &msg);
2117 return 0;
2120 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2121 void *session_info,
2122 uint64_t *session_id)
2124 bool crypto_session = virtio_has_feature(dev->protocol_features,
2125 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2126 CryptoDevBackendSymSessionInfo *sess_info = session_info;
2127 VhostUserMsg msg = {
2128 .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2129 .hdr.flags = VHOST_USER_VERSION,
2130 .hdr.size = sizeof(msg.payload.session),
2133 assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2135 if (!crypto_session) {
2136 error_report("vhost-user trying to send unhandled ioctl");
2137 return -1;
2140 memcpy(&msg.payload.session.session_setup_data, sess_info,
2141 sizeof(CryptoDevBackendSymSessionInfo));
2142 if (sess_info->key_len) {
2143 memcpy(&msg.payload.session.key, sess_info->cipher_key,
2144 sess_info->key_len);
2146 if (sess_info->auth_key_len > 0) {
2147 memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2148 sess_info->auth_key_len);
2150 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2151 error_report("vhost_user_write() return -1, create session failed");
2152 return -1;
2155 if (vhost_user_read(dev, &msg) < 0) {
2156 error_report("vhost_user_read() return -1, create session failed");
2157 return -1;
2160 if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2161 error_report("Received unexpected msg type. Expected %d received %d",
2162 VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2163 return -1;
2166 if (msg.hdr.size != sizeof(msg.payload.session)) {
2167 error_report("Received bad msg size.");
2168 return -1;
2171 if (msg.payload.session.session_id < 0) {
2172 error_report("Bad session id: %" PRId64 "",
2173 msg.payload.session.session_id);
2174 return -1;
2176 *session_id = msg.payload.session.session_id;
2178 return 0;
2181 static int
2182 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2184 bool crypto_session = virtio_has_feature(dev->protocol_features,
2185 VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2186 VhostUserMsg msg = {
2187 .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2188 .hdr.flags = VHOST_USER_VERSION,
2189 .hdr.size = sizeof(msg.payload.u64),
2191 msg.payload.u64 = session_id;
2193 if (!crypto_session) {
2194 error_report("vhost-user trying to send unhandled ioctl");
2195 return -1;
2198 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2199 error_report("vhost_user_write() return -1, close session failed");
2200 return -1;
2203 return 0;
2206 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2207 MemoryRegionSection *section)
2209 bool result;
2211 result = memory_region_get_fd(section->mr) >= 0;
2213 return result;
2216 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2217 uint16_t queue_size,
2218 struct vhost_inflight *inflight)
2220 void *addr;
2221 int fd;
2222 struct vhost_user *u = dev->opaque;
2223 CharBackend *chr = u->user->chr;
2224 VhostUserMsg msg = {
2225 .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2226 .hdr.flags = VHOST_USER_VERSION,
2227 .payload.inflight.num_queues = dev->nvqs,
2228 .payload.inflight.queue_size = queue_size,
2229 .hdr.size = sizeof(msg.payload.inflight),
2232 if (!virtio_has_feature(dev->protocol_features,
2233 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2234 return 0;
2237 if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2238 return -1;
2241 if (vhost_user_read(dev, &msg) < 0) {
2242 return -1;
2245 if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2246 error_report("Received unexpected msg type. "
2247 "Expected %d received %d",
2248 VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2249 return -1;
2252 if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2253 error_report("Received bad msg size.");
2254 return -1;
2257 if (!msg.payload.inflight.mmap_size) {
2258 return 0;
2261 fd = qemu_chr_fe_get_msgfd(chr);
2262 if (fd < 0) {
2263 error_report("Failed to get mem fd");
2264 return -1;
2267 addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2268 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2270 if (addr == MAP_FAILED) {
2271 error_report("Failed to mmap mem fd");
2272 close(fd);
2273 return -1;
2276 inflight->addr = addr;
2277 inflight->fd = fd;
2278 inflight->size = msg.payload.inflight.mmap_size;
2279 inflight->offset = msg.payload.inflight.mmap_offset;
2280 inflight->queue_size = queue_size;
2282 return 0;
2285 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2286 struct vhost_inflight *inflight)
2288 VhostUserMsg msg = {
2289 .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2290 .hdr.flags = VHOST_USER_VERSION,
2291 .payload.inflight.mmap_size = inflight->size,
2292 .payload.inflight.mmap_offset = inflight->offset,
2293 .payload.inflight.num_queues = dev->nvqs,
2294 .payload.inflight.queue_size = inflight->queue_size,
2295 .hdr.size = sizeof(msg.payload.inflight),
2298 if (!virtio_has_feature(dev->protocol_features,
2299 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2300 return 0;
2303 if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
2304 return -1;
2307 return 0;
2310 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2312 if (user->chr) {
2313 error_setg(errp, "Cannot initialize vhost-user state");
2314 return false;
2316 user->chr = chr;
2317 user->memory_slots = 0;
2318 return true;
2321 void vhost_user_cleanup(VhostUserState *user)
2323 int i;
2325 if (!user->chr) {
2326 return;
2329 for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2330 if (user->notifier[i].addr) {
2331 object_unparent(OBJECT(&user->notifier[i].mr));
2332 munmap(user->notifier[i].addr, qemu_real_host_page_size);
2333 user->notifier[i].addr = NULL;
2336 user->chr = NULL;
2339 const VhostOps user_ops = {
2340 .backend_type = VHOST_BACKEND_TYPE_USER,
2341 .vhost_backend_init = vhost_user_backend_init,
2342 .vhost_backend_cleanup = vhost_user_backend_cleanup,
2343 .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2344 .vhost_set_log_base = vhost_user_set_log_base,
2345 .vhost_set_mem_table = vhost_user_set_mem_table,
2346 .vhost_set_vring_addr = vhost_user_set_vring_addr,
2347 .vhost_set_vring_endian = vhost_user_set_vring_endian,
2348 .vhost_set_vring_num = vhost_user_set_vring_num,
2349 .vhost_set_vring_base = vhost_user_set_vring_base,
2350 .vhost_get_vring_base = vhost_user_get_vring_base,
2351 .vhost_set_vring_kick = vhost_user_set_vring_kick,
2352 .vhost_set_vring_call = vhost_user_set_vring_call,
2353 .vhost_set_features = vhost_user_set_features,
2354 .vhost_get_features = vhost_user_get_features,
2355 .vhost_set_owner = vhost_user_set_owner,
2356 .vhost_reset_device = vhost_user_reset_device,
2357 .vhost_get_vq_index = vhost_user_get_vq_index,
2358 .vhost_set_vring_enable = vhost_user_set_vring_enable,
2359 .vhost_requires_shm_log = vhost_user_requires_shm_log,
2360 .vhost_migration_done = vhost_user_migration_done,
2361 .vhost_backend_can_merge = vhost_user_can_merge,
2362 .vhost_net_set_mtu = vhost_user_net_set_mtu,
2363 .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2364 .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2365 .vhost_get_config = vhost_user_get_config,
2366 .vhost_set_config = vhost_user_set_config,
2367 .vhost_crypto_create_session = vhost_user_crypto_create_session,
2368 .vhost_crypto_close_session = vhost_user_crypto_close_session,
2369 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2370 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2371 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,