hw/virtio/vhost-user.c

   1 /*
   2  * vhost-user
   3  *
   4  * Copyright (c) 2013 Virtual Open Systems Sarl.
   5  *
   6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   7  * See the COPYING file in the top-level directory.
   8  *
   9  */
  10
  11 #include "qemu/osdep.h"
  12 #include "qapi/error.h"
  13 #include "hw/virtio/vhost.h"
  14 #include "hw/virtio/vhost-user.h"
  15 #include "hw/virtio/vhost-backend.h"
  16 #include "hw/virtio/virtio.h"
  17 #include "hw/virtio/virtio-net.h"
  18 #include "chardev/char-fe.h"
  19 #include "sysemu/kvm.h"
  20 #include "qemu/error-report.h"
  21 #include "qemu/main-loop.h"
  22 #include "qemu/sockets.h"
  23 #include "sysemu/cryptodev.h"
  24 #include "migration/migration.h"
  25 #include "migration/postcopy-ram.h"
  26 #include "trace.h"
  27
  28 #include <sys/ioctl.h>
  29 #include <sys/socket.h>
  30 #include <sys/un.h>
  31
  32 #include "standard-headers/linux/vhost_types.h"
  33
  34 #ifdef CONFIG_LINUX
  35 #include <linux/userfaultfd.h>
  36 #endif
  37
  38 #define VHOST_MEMORY_MAX_NREGIONS    8
  39 #define VHOST_USER_F_PROTOCOL_FEATURES 30
  40 #define VHOST_USER_SLAVE_MAX_FDS     8
  41
  42 /*
  43  * Maximum size of virtio device config space
  44  */
  45 #define VHOST_USER_MAX_CONFIG_SIZE 256
  46
  47 enum VhostUserProtocolFeature {
  48     VHOST_USER_PROTOCOL_F_MQ = 0,
  49     VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
  50     VHOST_USER_PROTOCOL_F_RARP = 2,
  51     VHOST_USER_PROTOCOL_F_REPLY_ACK = 3,
  52     VHOST_USER_PROTOCOL_F_NET_MTU = 4,
  53     VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5,
  54     VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6,
  55     VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7,
  56     VHOST_USER_PROTOCOL_F_PAGEFAULT = 8,
  57     VHOST_USER_PROTOCOL_F_CONFIG = 9,
  58     VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10,
  59     VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  60     VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  61     VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
  62     /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
  63     VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
  64     VHOST_USER_PROTOCOL_F_MAX
  65 };
  66
  67 #define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
  68
  69 typedef enum VhostUserRequest {
  70     VHOST_USER_NONE = 0,
  71     VHOST_USER_GET_FEATURES = 1,
  72     VHOST_USER_SET_FEATURES = 2,
  73     VHOST_USER_SET_OWNER = 3,
  74     VHOST_USER_RESET_OWNER = 4,
  75     VHOST_USER_SET_MEM_TABLE = 5,
  76     VHOST_USER_SET_LOG_BASE = 6,
  77     VHOST_USER_SET_LOG_FD = 7,
  78     VHOST_USER_SET_VRING_NUM = 8,
  79     VHOST_USER_SET_VRING_ADDR = 9,
  80     VHOST_USER_SET_VRING_BASE = 10,
  81     VHOST_USER_GET_VRING_BASE = 11,
  82     VHOST_USER_SET_VRING_KICK = 12,
  83     VHOST_USER_SET_VRING_CALL = 13,
  84     VHOST_USER_SET_VRING_ERR = 14,
  85     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
  86     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
  87     VHOST_USER_GET_QUEUE_NUM = 17,
  88     VHOST_USER_SET_VRING_ENABLE = 18,
  89     VHOST_USER_SEND_RARP = 19,
  90     VHOST_USER_NET_SET_MTU = 20,
  91     VHOST_USER_SET_SLAVE_REQ_FD = 21,
  92     VHOST_USER_IOTLB_MSG = 22,
  93     VHOST_USER_SET_VRING_ENDIAN = 23,
  94     VHOST_USER_GET_CONFIG = 24,
  95     VHOST_USER_SET_CONFIG = 25,
  96     VHOST_USER_CREATE_CRYPTO_SESSION = 26,
  97     VHOST_USER_CLOSE_CRYPTO_SESSION = 27,
  98     VHOST_USER_POSTCOPY_ADVISE  = 28,
  99     VHOST_USER_POSTCOPY_LISTEN  = 29,
 100     VHOST_USER_POSTCOPY_END     = 30,
 101     VHOST_USER_GET_INFLIGHT_FD = 31,
 102     VHOST_USER_SET_INFLIGHT_FD = 32,
 103     VHOST_USER_GPU_SET_SOCKET = 33,
 104     VHOST_USER_RESET_DEVICE = 34,
 105     /* Message number 35 reserved for VHOST_USER_VRING_KICK. */
 106     VHOST_USER_GET_MAX_MEM_SLOTS = 36,
 107     VHOST_USER_ADD_MEM_REG = 37,
 108     VHOST_USER_REM_MEM_REG = 38,
 109     VHOST_USER_MAX
 110 } VhostUserRequest;
 111
 112 typedef enum VhostUserSlaveRequest {
 113     VHOST_USER_SLAVE_NONE = 0,
 114     VHOST_USER_SLAVE_IOTLB_MSG = 1,
 115     VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
 116     VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
 117     VHOST_USER_SLAVE_MAX
 118 }  VhostUserSlaveRequest;
 119
 120 typedef struct VhostUserMemoryRegion {
 121     uint64_t guest_phys_addr;
 122     uint64_t memory_size;
 123     uint64_t userspace_addr;
 124     uint64_t mmap_offset;
 125 } VhostUserMemoryRegion;
 126
 127 typedef struct VhostUserMemory {
 128     uint32_t nregions;
 129     uint32_t padding;
 130     VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
 131 } VhostUserMemory;
 132
 133 typedef struct VhostUserMemRegMsg {
 134     uint32_t padding;
 135     VhostUserMemoryRegion region;
 136 } VhostUserMemRegMsg;
 137
 138 typedef struct VhostUserLog {
 139     uint64_t mmap_size;
 140     uint64_t mmap_offset;
 141 } VhostUserLog;
 142
 143 typedef struct VhostUserConfig {
 144     uint32_t offset;
 145     uint32_t size;
 146     uint32_t flags;
 147     uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
 148 } VhostUserConfig;
 149
 150 #define VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN    512
 151 #define VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN  64
 152
 153 typedef struct VhostUserCryptoSession {
 154     /* session id for success, -1 on errors */
 155     int64_t session_id;
 156     CryptoDevBackendSymSessionInfo session_setup_data;
 157     uint8_t key[VHOST_CRYPTO_SYM_CIPHER_MAX_KEY_LEN];
 158     uint8_t auth_key[VHOST_CRYPTO_SYM_HMAC_MAX_KEY_LEN];
 159 } VhostUserCryptoSession;
 160
 161 static VhostUserConfig c __attribute__ ((unused));
 162 #define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
 163                                    + sizeof(c.size) \
 164                                    + sizeof(c.flags))
 165
 166 typedef struct VhostUserVringArea {
 167     uint64_t u64;
 168     uint64_t size;
 169     uint64_t offset;
 170 } VhostUserVringArea;
 171
 172 typedef struct VhostUserInflight {
 173     uint64_t mmap_size;
 174     uint64_t mmap_offset;
 175     uint16_t num_queues;
 176     uint16_t queue_size;
 177 } VhostUserInflight;
 178
 179 typedef struct {
 180     VhostUserRequest request;
 181
 182 #define VHOST_USER_VERSION_MASK     (0x3)
 183 #define VHOST_USER_REPLY_MASK       (0x1<<2)
 184 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 185     uint32_t flags;
 186     uint32_t size; /* the following payload size */
 187 } QEMU_PACKED VhostUserHeader;
 188
 189 typedef union {
 190 #define VHOST_USER_VRING_IDX_MASK   (0xff)
 191 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
 192         uint64_t u64;
 193         struct vhost_vring_state state;
 194         struct vhost_vring_addr addr;
 195         VhostUserMemory memory;
 196         VhostUserMemRegMsg mem_reg;
 197         VhostUserLog log;
 198         struct vhost_iotlb_msg iotlb;
 199         VhostUserConfig config;
 200         VhostUserCryptoSession session;
 201         VhostUserVringArea area;
 202         VhostUserInflight inflight;
 203 } VhostUserPayload;
 204
 205 typedef struct VhostUserMsg {
 206     VhostUserHeader hdr;
 207     VhostUserPayload payload;
 208 } QEMU_PACKED VhostUserMsg;
 209
 210 static VhostUserMsg m __attribute__ ((unused));
 211 #define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 212
 213 #define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 214
 215 /* The version of the protocol we support */
 216 #define VHOST_USER_VERSION    (0x1)
 217
 218 struct vhost_user {
 219     struct vhost_dev *dev;
 220     /* Shared between vhost devs of the same virtio device */
 221     VhostUserState *user;
 222     int slave_fd;
 223     NotifierWithReturn postcopy_notifier;
 224     struct PostCopyFD  postcopy_fd;
 225     uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
 226     /* Length of the region_rb and region_rb_offset arrays */
 227     size_t             region_rb_len;
 228     /* RAMBlock associated with a given region */
 229     RAMBlock         **region_rb;
 230     /* The offset from the start of the RAMBlock to the start of the
 231      * vhost region.
 232      */
 233     ram_addr_t        *region_rb_offset;
 234
 235     /* True once we've entered postcopy_listen */
 236     bool               postcopy_listen;
 237
 238     /* Our current regions */
 239     int num_shadow_regions;
 240     struct vhost_memory_region shadow_regions[VHOST_MEMORY_MAX_NREGIONS];
 241 };
 242
 243 struct scrub_regions {
 244     struct vhost_memory_region *region;
 245     int reg_idx;
 246     int fd_idx;
 247 };
 248
 249 static bool ioeventfd_enabled(void)
 250 {
 251     return !kvm_enabled() || kvm_eventfds_enabled();
 252 }
 253
 254 static int vhost_user_read_header(struct vhost_dev *dev, VhostUserMsg *msg)
 255 {
 256     struct vhost_user *u = dev->opaque;
 257     CharBackend *chr = u->user->chr;
 258     uint8_t *p = (uint8_t *) msg;
 259     int r, size = VHOST_USER_HDR_SIZE;
 260
 261     r = qemu_chr_fe_read_all(chr, p, size);
 262     if (r != size) {
 263         error_report("Failed to read msg header. Read %d instead of %d."
 264                      " Original request %d.", r, size, msg->hdr.request);
 265         return -1;
 266     }
 267
 268     /* validate received flags */
 269     if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 270         error_report("Failed to read msg header."
 271                 " Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 272                 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 273         return -1;
 274     }
 275
 276     return 0;
 277 }
 278
 279 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 280 {
 281     struct vhost_user *u = dev->opaque;
 282     CharBackend *chr = u->user->chr;
 283     uint8_t *p = (uint8_t *) msg;
 284     int r, size;
 285
 286     if (vhost_user_read_header(dev, msg) < 0) {
 287         return -1;
 288     }
 289
 290     /* validate message size is sane */
 291     if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 292         error_report("Failed to read msg header."
 293                 " Size %d exceeds the maximum %zu.", msg->hdr.size,
 294                 VHOST_USER_PAYLOAD_SIZE);
 295         return -1;
 296     }
 297
 298     if (msg->hdr.size) {
 299         p += VHOST_USER_HDR_SIZE;
 300         size = msg->hdr.size;
 301         r = qemu_chr_fe_read_all(chr, p, size);
 302         if (r != size) {
 303             error_report("Failed to read msg payload."
 304                          " Read %d instead of %d.", r, msg->hdr.size);
 305             return -1;
 306         }
 307     }
 308
 309     return 0;
 310 }
 311
 312 static int process_message_reply(struct vhost_dev *dev,
 313                                  const VhostUserMsg *msg)
 314 {
 315     VhostUserMsg msg_reply;
 316
 317     if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 318         return 0;
 319     }
 320
 321     if (vhost_user_read(dev, &msg_reply) < 0) {
 322         return -1;
 323     }
 324
 325     if (msg_reply.hdr.request != msg->hdr.request) {
 326         error_report("Received unexpected msg type."
 327                      "Expected %d received %d",
 328                      msg->hdr.request, msg_reply.hdr.request);
 329         return -1;
 330     }
 331
 332     return msg_reply.payload.u64 ? -1 : 0;
 333 }
 334
 335 static bool vhost_user_one_time_request(VhostUserRequest request)
 336 {
 337     switch (request) {
 338     case VHOST_USER_SET_OWNER:
 339     case VHOST_USER_RESET_OWNER:
 340     case VHOST_USER_SET_MEM_TABLE:
 341     case VHOST_USER_GET_QUEUE_NUM:
 342     case VHOST_USER_NET_SET_MTU:
 343         return true;
 344     default:
 345         return false;
 346     }
 347 }
 348
 349 /* most non-init callers ignore the error */
 350 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
 351                             int *fds, int fd_num)
 352 {
 353     struct vhost_user *u = dev->opaque;
 354     CharBackend *chr = u->user->chr;
 355     int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 356
 357     /*
 358      * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
 359      * we just need send it once in the first time. For later such
 360      * request, we just ignore it.
 361      */
 362     if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
 363         msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
 364         return 0;
 365     }
 366
 367     if (qemu_chr_fe_set_msgfds(chr, fds, fd_num) < 0) {
 368         error_report("Failed to set msg fds.");
 369         return -1;
 370     }
 371
 372     ret = qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size);
 373     if (ret != size) {
 374         error_report("Failed to write msg."
 375                      " Wrote %d instead of %d.", ret, size);
 376         return -1;
 377     }
 378
 379     return 0;
 380 }
 381
 382 int vhost_user_gpu_set_socket(struct vhost_dev *dev, int fd)
 383 {
 384     VhostUserMsg msg = {
 385         .hdr.request = VHOST_USER_GPU_SET_SOCKET,
 386         .hdr.flags = VHOST_USER_VERSION,
 387     };
 388
 389     return vhost_user_write(dev, &msg, &fd, 1);
 390 }
 391
 392 static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
 393                                    struct vhost_log *log)
 394 {
 395     int fds[VHOST_MEMORY_MAX_NREGIONS];
 396     size_t fd_num = 0;
 397     bool shmfd = virtio_has_feature(dev->protocol_features,
 398                                     VHOST_USER_PROTOCOL_F_LOG_SHMFD);
 399     VhostUserMsg msg = {
 400         .hdr.request = VHOST_USER_SET_LOG_BASE,
 401         .hdr.flags = VHOST_USER_VERSION,
 402         .payload.log.mmap_size = log->size * sizeof(*(log->log)),
 403         .payload.log.mmap_offset = 0,
 404         .hdr.size = sizeof(msg.payload.log),
 405     };
 406
 407     if (shmfd && log->fd != -1) {
 408         fds[fd_num++] = log->fd;
 409     }
 410
 411     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 412         return -1;
 413     }
 414
 415     if (shmfd) {
 416         msg.hdr.size = 0;
 417         if (vhost_user_read(dev, &msg) < 0) {
 418             return -1;
 419         }
 420
 421         if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) {
 422             error_report("Received unexpected msg type. "
 423                          "Expected %d received %d",
 424                          VHOST_USER_SET_LOG_BASE, msg.hdr.request);
 425             return -1;
 426         }
 427     }
 428
 429     return 0;
 430 }
 431
 432 static MemoryRegion *vhost_user_get_mr_data(uint64_t addr, ram_addr_t *offset,
 433                                             int *fd)
 434 {
 435     MemoryRegion *mr;
 436
 437     assert((uintptr_t)addr == addr);
 438     mr = memory_region_from_host((void *)(uintptr_t)addr, offset);
 439     *fd = memory_region_get_fd(mr);
 440
 441     return mr;
 442 }
 443
 444 static void vhost_user_fill_msg_region(VhostUserMemoryRegion *dst,
 445                                        struct vhost_memory_region *src)
 446 {
 447     assert(src != NULL && dst != NULL);
 448     dst->userspace_addr = src->userspace_addr;
 449     dst->memory_size = src->memory_size;
 450     dst->guest_phys_addr = src->guest_phys_addr;
 451 }
 452
 453 static int vhost_user_fill_set_mem_table_msg(struct vhost_user *u,
 454                                              struct vhost_dev *dev,
 455                                              VhostUserMsg *msg,
 456                                              int *fds, size_t *fd_num,
 457                                              bool track_ramblocks)
 458 {
 459     int i, fd;
 460     ram_addr_t offset;
 461     MemoryRegion *mr;
 462     struct vhost_memory_region *reg;
 463     VhostUserMemoryRegion region_buffer;
 464
 465     msg->hdr.request = VHOST_USER_SET_MEM_TABLE;
 466
 467     for (i = 0; i < dev->mem->nregions; ++i) {
 468         reg = dev->mem->regions + i;
 469
 470         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 471         if (fd > 0) {
 472             if (track_ramblocks) {
 473                 assert(*fd_num < VHOST_MEMORY_MAX_NREGIONS);
 474                 trace_vhost_user_set_mem_table_withfd(*fd_num, mr->name,
 475                                                       reg->memory_size,
 476                                                       reg->guest_phys_addr,
 477                                                       reg->userspace_addr,
 478                                                       offset);
 479                 u->region_rb_offset[i] = offset;
 480                 u->region_rb[i] = mr->ram_block;
 481             } else if (*fd_num == VHOST_MEMORY_MAX_NREGIONS) {
 482                 error_report("Failed preparing vhost-user memory table msg");
 483                 return -1;
 484             }
 485             vhost_user_fill_msg_region(&region_buffer, reg);
 486             msg->payload.memory.regions[*fd_num] = region_buffer;
 487             msg->payload.memory.regions[*fd_num].mmap_offset = offset;
 488             fds[(*fd_num)++] = fd;
 489         } else if (track_ramblocks) {
 490             u->region_rb_offset[i] = 0;
 491             u->region_rb[i] = NULL;
 492         }
 493     }
 494
 495     msg->payload.memory.nregions = *fd_num;
 496
 497     if (!*fd_num) {
 498         error_report("Failed initializing vhost-user memory map, "
 499                      "consider using -object memory-backend-file share=on");
 500         return -1;
 501     }
 502
 503     msg->hdr.size = sizeof(msg->payload.memory.nregions);
 504     msg->hdr.size += sizeof(msg->payload.memory.padding);
 505     msg->hdr.size += *fd_num * sizeof(VhostUserMemoryRegion);
 506
 507     return 1;
 508 }
 509
 510 static inline bool reg_equal(struct vhost_memory_region *shadow_reg,
 511                              struct vhost_memory_region *vdev_reg)
 512 {
 513     return shadow_reg->guest_phys_addr == vdev_reg->guest_phys_addr &&
 514         shadow_reg->userspace_addr == vdev_reg->userspace_addr &&
 515         shadow_reg->memory_size == vdev_reg->memory_size;
 516 }
 517
 518 static void scrub_shadow_regions(struct vhost_dev *dev,
 519                                  struct scrub_regions *add_reg,
 520                                  int *nr_add_reg,
 521                                  struct scrub_regions *rem_reg,
 522                                  int *nr_rem_reg, uint64_t *shadow_pcb,
 523                                  bool track_ramblocks)
 524 {
 525     struct vhost_user *u = dev->opaque;
 526     bool found[VHOST_MEMORY_MAX_NREGIONS] = {};
 527     struct vhost_memory_region *reg, *shadow_reg;
 528     int i, j, fd, add_idx = 0, rm_idx = 0, fd_num = 0;
 529     ram_addr_t offset;
 530     MemoryRegion *mr;
 531     bool matching;
 532
 533     /*
 534      * Find memory regions present in our shadow state which are not in
 535      * the device's current memory state.
 536      *
 537      * Mark regions in both the shadow and device state as "found".
 538      */
 539     for (i = 0; i < u->num_shadow_regions; i++) {
 540         shadow_reg = &u->shadow_regions[i];
 541         matching = false;
 542
 543         for (j = 0; j < dev->mem->nregions; j++) {
 544             reg = &dev->mem->regions[j];
 545
 546             mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 547
 548             if (reg_equal(shadow_reg, reg)) {
 549                 matching = true;
 550                 found[j] = true;
 551                 if (track_ramblocks) {
 552                     /*
 553                      * Reset postcopy client bases, region_rb, and
 554                      * region_rb_offset in case regions are removed.
 555                      */
 556                     if (fd > 0) {
 557                         u->region_rb_offset[j] = offset;
 558                         u->region_rb[j] = mr->ram_block;
 559                         shadow_pcb[j] = u->postcopy_client_bases[i];
 560                     } else {
 561                         u->region_rb_offset[j] = 0;
 562                         u->region_rb[j] = NULL;
 563                     }
 564                 }
 565                 break;
 566             }
 567         }
 568
 569         /*
 570          * If the region was not found in the current device memory state
 571          * create an entry for it in the removed list.
 572          */
 573         if (!matching) {
 574             rem_reg[rm_idx].region = shadow_reg;
 575             rem_reg[rm_idx++].reg_idx = i;
 576         }
 577     }
 578
 579     /*
 580      * For regions not marked "found", create entries in the added list.
 581      *
 582      * Note their indexes in the device memory state and the indexes of their
 583      * file descriptors.
 584      */
 585     for (i = 0; i < dev->mem->nregions; i++) {
 586         reg = &dev->mem->regions[i];
 587         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 588         if (fd > 0) {
 589             ++fd_num;
 590         }
 591
 592         /*
 593          * If the region was in both the shadow and device state we don't
 594          * need to send a VHOST_USER_ADD_MEM_REG message for it.
 595          */
 596         if (found[i]) {
 597             continue;
 598         }
 599
 600         add_reg[add_idx].region = reg;
 601         add_reg[add_idx].reg_idx = i;
 602         add_reg[add_idx++].fd_idx = fd_num;
 603     }
 604     *nr_rem_reg = rm_idx;
 605     *nr_add_reg = add_idx;
 606
 607     return;
 608 }
 609
 610 static int send_remove_regions(struct vhost_dev *dev,
 611                                struct scrub_regions *remove_reg,
 612                                int nr_rem_reg, VhostUserMsg *msg,
 613                                bool reply_supported)
 614 {
 615     struct vhost_user *u = dev->opaque;
 616     struct vhost_memory_region *shadow_reg;
 617     int i, fd, shadow_reg_idx, ret;
 618     ram_addr_t offset;
 619     VhostUserMemoryRegion region_buffer;
 620
 621     /*
 622      * The regions in remove_reg appear in the same order they do in the
 623      * shadow table. Therefore we can minimize memory copies by iterating
 624      * through remove_reg backwards.
 625      */
 626     for (i = nr_rem_reg - 1; i >= 0; i--) {
 627         shadow_reg = remove_reg[i].region;
 628         shadow_reg_idx = remove_reg[i].reg_idx;
 629
 630         vhost_user_get_mr_data(shadow_reg->userspace_addr, &offset, &fd);
 631
 632         if (fd > 0) {
 633             msg->hdr.request = VHOST_USER_REM_MEM_REG;
 634             vhost_user_fill_msg_region(&region_buffer, shadow_reg);
 635             msg->payload.mem_reg.region = region_buffer;
 636
 637             if (vhost_user_write(dev, msg, &fd, 1) < 0) {
 638                 return -1;
 639             }
 640
 641             if (reply_supported) {
 642                 ret = process_message_reply(dev, msg);
 643                 if (ret) {
 644                     return ret;
 645                 }
 646             }
 647         }
 648
 649         /*
 650          * At this point we know the backend has unmapped the region. It is now
 651          * safe to remove it from the shadow table.
 652          */
 653         memmove(&u->shadow_regions[shadow_reg_idx],
 654                 &u->shadow_regions[shadow_reg_idx + 1],
 655                 sizeof(struct vhost_memory_region) *
 656                 (u->num_shadow_regions - shadow_reg_idx));
 657         u->num_shadow_regions--;
 658     }
 659
 660     return 0;
 661 }
 662
 663 static int send_add_regions(struct vhost_dev *dev,
 664                             struct scrub_regions *add_reg, int nr_add_reg,
 665                             VhostUserMsg *msg, uint64_t *shadow_pcb,
 666                             bool reply_supported, bool track_ramblocks)
 667 {
 668     struct vhost_user *u = dev->opaque;
 669     int i, fd, ret, reg_idx, reg_fd_idx;
 670     struct vhost_memory_region *reg;
 671     MemoryRegion *mr;
 672     ram_addr_t offset;
 673     VhostUserMsg msg_reply;
 674     VhostUserMemoryRegion region_buffer;
 675
 676     for (i = 0; i < nr_add_reg; i++) {
 677         reg = add_reg[i].region;
 678         reg_idx = add_reg[i].reg_idx;
 679         reg_fd_idx = add_reg[i].fd_idx;
 680
 681         mr = vhost_user_get_mr_data(reg->userspace_addr, &offset, &fd);
 682
 683         if (fd > 0) {
 684             if (track_ramblocks) {
 685                 trace_vhost_user_set_mem_table_withfd(reg_fd_idx, mr->name,
 686                                                       reg->memory_size,
 687                                                       reg->guest_phys_addr,
 688                                                       reg->userspace_addr,
 689                                                       offset);
 690                 u->region_rb_offset[reg_idx] = offset;
 691                 u->region_rb[reg_idx] = mr->ram_block;
 692             }
 693             msg->hdr.request = VHOST_USER_ADD_MEM_REG;
 694             vhost_user_fill_msg_region(&region_buffer, reg);
 695             msg->payload.mem_reg.region = region_buffer;
 696             msg->payload.mem_reg.region.mmap_offset = offset;
 697
 698             if (vhost_user_write(dev, msg, &fd, 1) < 0) {
 699                 return -1;
 700             }
 701
 702             if (track_ramblocks) {
 703                 uint64_t reply_gpa;
 704
 705                 if (vhost_user_read(dev, &msg_reply) < 0) {
 706                     return -1;
 707                 }
 708
 709                 reply_gpa = msg_reply.payload.mem_reg.region.guest_phys_addr;
 710
 711                 if (msg_reply.hdr.request != VHOST_USER_ADD_MEM_REG) {
 712                     error_report("%s: Received unexpected msg type."
 713                                  "Expected %d received %d", __func__,
 714                                  VHOST_USER_ADD_MEM_REG,
 715                                  msg_reply.hdr.request);
 716                     return -1;
 717                 }
 718
 719                 /*
 720                  * We're using the same structure, just reusing one of the
 721                  * fields, so it should be the same size.
 722                  */
 723                 if (msg_reply.hdr.size != msg->hdr.size) {
 724                     error_report("%s: Unexpected size for postcopy reply "
 725                                  "%d vs %d", __func__, msg_reply.hdr.size,
 726                                  msg->hdr.size);
 727                     return -1;
 728                 }
 729
 730                 /* Get the postcopy client base from the backend's reply. */
 731                 if (reply_gpa == dev->mem->regions[reg_idx].guest_phys_addr) {
 732                     shadow_pcb[reg_idx] =
 733                         msg_reply.payload.mem_reg.region.userspace_addr;
 734                     trace_vhost_user_set_mem_table_postcopy(
 735                         msg_reply.payload.mem_reg.region.userspace_addr,
 736                         msg->payload.mem_reg.region.userspace_addr,
 737                         reg_fd_idx, reg_idx);
 738                 } else {
 739                     error_report("%s: invalid postcopy reply for region. "
 740                                  "Got guest physical address %" PRIX64 ", expected "
 741                                  "%" PRIX64, __func__, reply_gpa,
 742                                  dev->mem->regions[reg_idx].guest_phys_addr);
 743                     return -1;
 744                 }
 745             } else if (reply_supported) {
 746                 ret = process_message_reply(dev, msg);
 747                 if (ret) {
 748                     return ret;
 749                 }
 750             }
 751         } else if (track_ramblocks) {
 752             u->region_rb_offset[reg_idx] = 0;
 753             u->region_rb[reg_idx] = NULL;
 754         }
 755
 756         /*
 757          * At this point, we know the backend has mapped in the new
 758          * region, if the region has a valid file descriptor.
 759          *
 760          * The region should now be added to the shadow table.
 761          */
 762         u->shadow_regions[u->num_shadow_regions].guest_phys_addr =
 763             reg->guest_phys_addr;
 764         u->shadow_regions[u->num_shadow_regions].userspace_addr =
 765             reg->userspace_addr;
 766         u->shadow_regions[u->num_shadow_regions].memory_size =
 767             reg->memory_size;
 768         u->num_shadow_regions++;
 769     }
 770
 771     return 0;
 772 }
 773
 774 static int vhost_user_add_remove_regions(struct vhost_dev *dev,
 775                                          VhostUserMsg *msg,
 776                                          bool reply_supported,
 777                                          bool track_ramblocks)
 778 {
 779     struct vhost_user *u = dev->opaque;
 780     struct scrub_regions add_reg[VHOST_MEMORY_MAX_NREGIONS];
 781     struct scrub_regions rem_reg[VHOST_MEMORY_MAX_NREGIONS];
 782     uint64_t shadow_pcb[VHOST_MEMORY_MAX_NREGIONS] = {};
 783     int nr_add_reg, nr_rem_reg;
 784
 785     msg->hdr.size = sizeof(msg->payload.mem_reg.padding) +
 786         sizeof(VhostUserMemoryRegion);
 787
 788     /* Find the regions which need to be removed or added. */
 789     scrub_shadow_regions(dev, add_reg, &nr_add_reg, rem_reg, &nr_rem_reg,
 790                          shadow_pcb, track_ramblocks);
 791
 792     if (nr_rem_reg && send_remove_regions(dev, rem_reg, nr_rem_reg, msg,
 793                 reply_supported) < 0)
 794     {
 795         goto err;
 796     }
 797
 798     if (nr_add_reg && send_add_regions(dev, add_reg, nr_add_reg, msg,
 799                 shadow_pcb, reply_supported, track_ramblocks) < 0)
 800     {
 801         goto err;
 802     }
 803
 804     if (track_ramblocks) {
 805         memcpy(u->postcopy_client_bases, shadow_pcb,
 806                sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
 807         /*
 808          * Now we've registered this with the postcopy code, we ack to the
 809          * client, because now we're in the position to be able to deal with
 810          * any faults it generates.
 811          */
 812         /* TODO: Use this for failure cases as well with a bad value. */
 813         msg->hdr.size = sizeof(msg->payload.u64);
 814         msg->payload.u64 = 0; /* OK */
 815
 816         if (vhost_user_write(dev, msg, NULL, 0) < 0) {
 817             return -1;
 818         }
 819     }
 820
 821     return 0;
 822
 823 err:
 824     if (track_ramblocks) {
 825         memcpy(u->postcopy_client_bases, shadow_pcb,
 826                sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
 827     }
 828
 829     return -1;
 830 }
 831
 832 static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev,
 833                                              struct vhost_memory *mem,
 834                                              bool reply_supported,
 835                                              bool config_mem_slots)
 836 {
 837     struct vhost_user *u = dev->opaque;
 838     int fds[VHOST_MEMORY_MAX_NREGIONS];
 839     size_t fd_num = 0;
 840     VhostUserMsg msg_reply;
 841     int region_i, msg_i;
 842
 843     VhostUserMsg msg = {
 844         .hdr.flags = VHOST_USER_VERSION,
 845     };
 846
 847     if (u->region_rb_len < dev->mem->nregions) {
 848         u->region_rb = g_renew(RAMBlock*, u->region_rb, dev->mem->nregions);
 849         u->region_rb_offset = g_renew(ram_addr_t, u->region_rb_offset,
 850                                       dev->mem->nregions);
 851         memset(&(u->region_rb[u->region_rb_len]), '\0',
 852                sizeof(RAMBlock *) * (dev->mem->nregions - u->region_rb_len));
 853         memset(&(u->region_rb_offset[u->region_rb_len]), '\0',
 854                sizeof(ram_addr_t) * (dev->mem->nregions - u->region_rb_len));
 855         u->region_rb_len = dev->mem->nregions;
 856     }
 857
 858     if (config_mem_slots) {
 859         if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
 860                                           true) < 0) {
 861             return -1;
 862         }
 863     } else {
 864         if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
 865                                               true) < 0) {
 866             return -1;
 867         }
 868
 869         if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 870             return -1;
 871         }
 872
 873         if (vhost_user_read(dev, &msg_reply) < 0) {
 874             return -1;
 875         }
 876
 877         if (msg_reply.hdr.request != VHOST_USER_SET_MEM_TABLE) {
 878             error_report("%s: Received unexpected msg type."
 879                          "Expected %d received %d", __func__,
 880                          VHOST_USER_SET_MEM_TABLE, msg_reply.hdr.request);
 881             return -1;
 882         }
 883
 884         /*
 885          * We're using the same structure, just reusing one of the
 886          * fields, so it should be the same size.
 887          */
 888         if (msg_reply.hdr.size != msg.hdr.size) {
 889             error_report("%s: Unexpected size for postcopy reply "
 890                          "%d vs %d", __func__, msg_reply.hdr.size,
 891                          msg.hdr.size);
 892             return -1;
 893         }
 894
 895         memset(u->postcopy_client_bases, 0,
 896                sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
 897
 898         /*
 899          * They're in the same order as the regions that were sent
 900          * but some of the regions were skipped (above) if they
 901          * didn't have fd's
 902          */
 903         for (msg_i = 0, region_i = 0;
 904              region_i < dev->mem->nregions;
 905              region_i++) {
 906             if (msg_i < fd_num &&
 907                 msg_reply.payload.memory.regions[msg_i].guest_phys_addr ==
 908                 dev->mem->regions[region_i].guest_phys_addr) {
 909                 u->postcopy_client_bases[region_i] =
 910                     msg_reply.payload.memory.regions[msg_i].userspace_addr;
 911                 trace_vhost_user_set_mem_table_postcopy(
 912                     msg_reply.payload.memory.regions[msg_i].userspace_addr,
 913                     msg.payload.memory.regions[msg_i].userspace_addr,
 914                     msg_i, region_i);
 915                 msg_i++;
 916             }
 917         }
 918         if (msg_i != fd_num) {
 919             error_report("%s: postcopy reply not fully consumed "
 920                          "%d vs %zd",
 921                          __func__, msg_i, fd_num);
 922             return -1;
 923         }
 924
 925         /*
 926          * Now we've registered this with the postcopy code, we ack to the
 927          * client, because now we're in the position to be able to deal
 928          * with any faults it generates.
 929          */
 930         /* TODO: Use this for failure cases as well with a bad value. */
 931         msg.hdr.size = sizeof(msg.payload.u64);
 932         msg.payload.u64 = 0; /* OK */
 933         if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
 934             return -1;
 935         }
 936     }
 937
 938     return 0;
 939 }
 940
 941 static int vhost_user_set_mem_table(struct vhost_dev *dev,
 942                                     struct vhost_memory *mem)
 943 {
 944     struct vhost_user *u = dev->opaque;
 945     int fds[VHOST_MEMORY_MAX_NREGIONS];
 946     size_t fd_num = 0;
 947     bool do_postcopy = u->postcopy_listen && u->postcopy_fd.handler;
 948     bool reply_supported = virtio_has_feature(dev->protocol_features,
 949                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
 950     bool config_mem_slots =
 951         virtio_has_feature(dev->protocol_features,
 952                            VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS);
 953
 954     if (do_postcopy) {
 955         /*
 956          * Postcopy has enough differences that it's best done in it's own
 957          * version
 958          */
 959         return vhost_user_set_mem_table_postcopy(dev, mem, reply_supported,
 960                                                  config_mem_slots);
 961     }
 962
 963     VhostUserMsg msg = {
 964         .hdr.flags = VHOST_USER_VERSION,
 965     };
 966
 967     if (reply_supported) {
 968         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
 969     }
 970
 971     if (config_mem_slots) {
 972         if (vhost_user_add_remove_regions(dev, &msg, reply_supported,
 973                                           false) < 0) {
 974             return -1;
 975         }
 976     } else {
 977         if (vhost_user_fill_set_mem_table_msg(u, dev, &msg, fds, &fd_num,
 978                                               false) < 0) {
 979             return -1;
 980         }
 981         if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
 982             return -1;
 983         }
 984
 985         if (reply_supported) {
 986             return process_message_reply(dev, &msg);
 987         }
 988     }
 989
 990     return 0;
 991 }
 992
 993 static int vhost_user_set_vring_addr(struct vhost_dev *dev,
 994                                      struct vhost_vring_addr *addr)
 995 {
 996     VhostUserMsg msg = {
 997         .hdr.request = VHOST_USER_SET_VRING_ADDR,
 998         .hdr.flags = VHOST_USER_VERSION,
 999         .payload.addr = *addr,
1000         .hdr.size = sizeof(msg.payload.addr),
1001     };
1002
1003     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1004         return -1;
1005     }
1006
1007     return 0;
1008 }
1009
1010 static int vhost_user_set_vring_endian(struct vhost_dev *dev,
1011                                        struct vhost_vring_state *ring)
1012 {
1013     bool cross_endian = virtio_has_feature(dev->protocol_features,
1014                                            VHOST_USER_PROTOCOL_F_CROSS_ENDIAN);
1015     VhostUserMsg msg = {
1016         .hdr.request = VHOST_USER_SET_VRING_ENDIAN,
1017         .hdr.flags = VHOST_USER_VERSION,
1018         .payload.state = *ring,
1019         .hdr.size = sizeof(msg.payload.state),
1020     };
1021
1022     if (!cross_endian) {
1023         error_report("vhost-user trying to send unhandled ioctl");
1024         return -1;
1025     }
1026
1027     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1028         return -1;
1029     }
1030
1031     return 0;
1032 }
1033
1034 static int vhost_set_vring(struct vhost_dev *dev,
1035                            unsigned long int request,
1036                            struct vhost_vring_state *ring)
1037 {
1038     VhostUserMsg msg = {
1039         .hdr.request = request,
1040         .hdr.flags = VHOST_USER_VERSION,
1041         .payload.state = *ring,
1042         .hdr.size = sizeof(msg.payload.state),
1043     };
1044
1045     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1046         return -1;
1047     }
1048
1049     return 0;
1050 }
1051
1052 static int vhost_user_set_vring_num(struct vhost_dev *dev,
1053                                     struct vhost_vring_state *ring)
1054 {
1055     return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
1056 }
1057
1058 static void vhost_user_host_notifier_restore(struct vhost_dev *dev,
1059                                              int queue_idx)
1060 {
1061     struct vhost_user *u = dev->opaque;
1062     VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1063     VirtIODevice *vdev = dev->vdev;
1064
1065     if (n->addr && !n->set) {
1066         virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true);
1067         n->set = true;
1068     }
1069 }
1070
1071 static void vhost_user_host_notifier_remove(struct vhost_dev *dev,
1072                                             int queue_idx)
1073 {
1074     struct vhost_user *u = dev->opaque;
1075     VhostUserHostNotifier *n = &u->user->notifier[queue_idx];
1076     VirtIODevice *vdev = dev->vdev;
1077
1078     if (n->addr && n->set) {
1079         virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1080         n->set = false;
1081     }
1082 }
1083
1084 static int vhost_user_set_vring_base(struct vhost_dev *dev,
1085                                      struct vhost_vring_state *ring)
1086 {
1087     vhost_user_host_notifier_restore(dev, ring->index);
1088
1089     return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
1090 }
1091
1092 static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
1093 {
1094     int i;
1095
1096     if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1097         return -1;
1098     }
1099
1100     for (i = 0; i < dev->nvqs; ++i) {
1101         struct vhost_vring_state state = {
1102             .index = dev->vq_index + i,
1103             .num   = enable,
1104         };
1105
1106         vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
1107     }
1108
1109     return 0;
1110 }
1111
1112 static int vhost_user_get_vring_base(struct vhost_dev *dev,
1113                                      struct vhost_vring_state *ring)
1114 {
1115     VhostUserMsg msg = {
1116         .hdr.request = VHOST_USER_GET_VRING_BASE,
1117         .hdr.flags = VHOST_USER_VERSION,
1118         .payload.state = *ring,
1119         .hdr.size = sizeof(msg.payload.state),
1120     };
1121
1122     vhost_user_host_notifier_remove(dev, ring->index);
1123
1124     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1125         return -1;
1126     }
1127
1128     if (vhost_user_read(dev, &msg) < 0) {
1129         return -1;
1130     }
1131
1132     if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) {
1133         error_report("Received unexpected msg type. Expected %d received %d",
1134                      VHOST_USER_GET_VRING_BASE, msg.hdr.request);
1135         return -1;
1136     }
1137
1138     if (msg.hdr.size != sizeof(msg.payload.state)) {
1139         error_report("Received bad msg size.");
1140         return -1;
1141     }
1142
1143     *ring = msg.payload.state;
1144
1145     return 0;
1146 }
1147
1148 static int vhost_set_vring_file(struct vhost_dev *dev,
1149                                 VhostUserRequest request,
1150                                 struct vhost_vring_file *file)
1151 {
1152     int fds[VHOST_MEMORY_MAX_NREGIONS];
1153     size_t fd_num = 0;
1154     VhostUserMsg msg = {
1155         .hdr.request = request,
1156         .hdr.flags = VHOST_USER_VERSION,
1157         .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
1158         .hdr.size = sizeof(msg.payload.u64),
1159     };
1160
1161     if (ioeventfd_enabled() && file->fd > 0) {
1162         fds[fd_num++] = file->fd;
1163     } else {
1164         msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
1165     }
1166
1167     if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
1168         return -1;
1169     }
1170
1171     return 0;
1172 }
1173
1174 static int vhost_user_set_vring_kick(struct vhost_dev *dev,
1175                                      struct vhost_vring_file *file)
1176 {
1177     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
1178 }
1179
1180 static int vhost_user_set_vring_call(struct vhost_dev *dev,
1181                                      struct vhost_vring_file *file)
1182 {
1183     return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
1184 }
1185
1186 static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
1187 {
1188     VhostUserMsg msg = {
1189         .hdr.request = request,
1190         .hdr.flags = VHOST_USER_VERSION,
1191         .payload.u64 = u64,
1192         .hdr.size = sizeof(msg.payload.u64),
1193     };
1194
1195     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1196         return -1;
1197     }
1198
1199     return 0;
1200 }
1201
1202 static int vhost_user_set_features(struct vhost_dev *dev,
1203                                    uint64_t features)
1204 {
1205     return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
1206 }
1207
1208 static int vhost_user_set_protocol_features(struct vhost_dev *dev,
1209                                             uint64_t features)
1210 {
1211     return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
1212 }
1213
1214 static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
1215 {
1216     VhostUserMsg msg = {
1217         .hdr.request = request,
1218         .hdr.flags = VHOST_USER_VERSION,
1219     };
1220
1221     if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
1222         return 0;
1223     }
1224
1225     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1226         return -1;
1227     }
1228
1229     if (vhost_user_read(dev, &msg) < 0) {
1230         return -1;
1231     }
1232
1233     if (msg.hdr.request != request) {
1234         error_report("Received unexpected msg type. Expected %d received %d",
1235                      request, msg.hdr.request);
1236         return -1;
1237     }
1238
1239     if (msg.hdr.size != sizeof(msg.payload.u64)) {
1240         error_report("Received bad msg size.");
1241         return -1;
1242     }
1243
1244     *u64 = msg.payload.u64;
1245
1246     return 0;
1247 }
1248
1249 static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
1250 {
1251     return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
1252 }
1253
1254 static int vhost_user_set_owner(struct vhost_dev *dev)
1255 {
1256     VhostUserMsg msg = {
1257         .hdr.request = VHOST_USER_SET_OWNER,
1258         .hdr.flags = VHOST_USER_VERSION,
1259     };
1260
1261     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1262         return -1;
1263     }
1264
1265     return 0;
1266 }
1267
1268 static int vhost_user_get_max_memslots(struct vhost_dev *dev,
1269                                        uint64_t *max_memslots)
1270 {
1271     uint64_t backend_max_memslots;
1272     int err;
1273
1274     err = vhost_user_get_u64(dev, VHOST_USER_GET_MAX_MEM_SLOTS,
1275                              &backend_max_memslots);
1276     if (err < 0) {
1277         return err;
1278     }
1279
1280     *max_memslots = backend_max_memslots;
1281
1282     return 0;
1283 }
1284
1285 static int vhost_user_reset_device(struct vhost_dev *dev)
1286 {
1287     VhostUserMsg msg = {
1288         .hdr.flags = VHOST_USER_VERSION,
1289     };
1290
1291     msg.hdr.request = virtio_has_feature(dev->protocol_features,
1292                                          VHOST_USER_PROTOCOL_F_RESET_DEVICE)
1293         ? VHOST_USER_RESET_DEVICE
1294         : VHOST_USER_RESET_OWNER;
1295
1296     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1297         return -1;
1298     }
1299
1300     return 0;
1301 }
1302
1303 static int vhost_user_slave_handle_config_change(struct vhost_dev *dev)
1304 {
1305     int ret = -1;
1306
1307     if (!dev->config_ops) {
1308         return -1;
1309     }
1310
1311     if (dev->config_ops->vhost_dev_config_notifier) {
1312         ret = dev->config_ops->vhost_dev_config_notifier(dev);
1313     }
1314
1315     return ret;
1316 }
1317
1318 static int vhost_user_slave_handle_vring_host_notifier(struct vhost_dev *dev,
1319                                                        VhostUserVringArea *area,
1320                                                        int fd)
1321 {
1322     int queue_idx = area->u64 & VHOST_USER_VRING_IDX_MASK;
1323     size_t page_size = qemu_real_host_page_size;
1324     struct vhost_user *u = dev->opaque;
1325     VhostUserState *user = u->user;
1326     VirtIODevice *vdev = dev->vdev;
1327     VhostUserHostNotifier *n;
1328     void *addr;
1329     char *name;
1330
1331     if (!virtio_has_feature(dev->protocol_features,
1332                             VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) ||
1333         vdev == NULL || queue_idx >= virtio_get_num_queues(vdev)) {
1334         return -1;
1335     }
1336
1337     n = &user->notifier[queue_idx];
1338
1339     if (n->addr) {
1340         virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, false);
1341         object_unparent(OBJECT(&n->mr));
1342         munmap(n->addr, page_size);
1343         n->addr = NULL;
1344     }
1345
1346     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
1347         return 0;
1348     }
1349
1350     /* Sanity check. */
1351     if (area->size != page_size) {
1352         return -1;
1353     }
1354
1355     addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
1356                 fd, area->offset);
1357     if (addr == MAP_FAILED) {
1358         return -1;
1359     }
1360
1361     name = g_strdup_printf("vhost-user/host-notifier@%p mmaps[%d]",
1362                            user, queue_idx);
1363     memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name,
1364                                       page_size, addr);
1365     g_free(name);
1366
1367     if (virtio_queue_set_host_notifier_mr(vdev, queue_idx, &n->mr, true)) {
1368         munmap(addr, page_size);
1369         return -1;
1370     }
1371
1372     n->addr = addr;
1373     n->set = true;
1374
1375     return 0;
1376 }
1377
1378 static void slave_read(void *opaque)
1379 {
1380     struct vhost_dev *dev = opaque;
1381     struct vhost_user *u = dev->opaque;
1382     VhostUserHeader hdr = { 0, };
1383     VhostUserPayload payload = { 0, };
1384     int size, ret = 0;
1385     struct iovec iov;
1386     struct msghdr msgh;
1387     int fd[VHOST_USER_SLAVE_MAX_FDS];
1388     char control[CMSG_SPACE(sizeof(fd))];
1389     struct cmsghdr *cmsg;
1390     int i, fdsize = 0;
1391
1392     memset(&msgh, 0, sizeof(msgh));
1393     msgh.msg_iov = &iov;
1394     msgh.msg_iovlen = 1;
1395     msgh.msg_control = control;
1396     msgh.msg_controllen = sizeof(control);
1397
1398     memset(fd, -1, sizeof(fd));
1399
1400     /* Read header */
1401     iov.iov_base = &hdr;
1402     iov.iov_len = VHOST_USER_HDR_SIZE;
1403
1404     do {
1405         size = recvmsg(u->slave_fd, &msgh, 0);
1406     } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1407
1408     if (size != VHOST_USER_HDR_SIZE) {
1409         error_report("Failed to read from slave.");
1410         goto err;
1411     }
1412
1413     if (msgh.msg_flags & MSG_CTRUNC) {
1414         error_report("Truncated message.");
1415         goto err;
1416     }
1417
1418     for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
1419          cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
1420             if (cmsg->cmsg_level == SOL_SOCKET &&
1421                 cmsg->cmsg_type == SCM_RIGHTS) {
1422                     fdsize = cmsg->cmsg_len - CMSG_LEN(0);
1423                     memcpy(fd, CMSG_DATA(cmsg), fdsize);
1424                     break;
1425             }
1426     }
1427
1428     if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
1429         error_report("Failed to read msg header."
1430                 " Size %d exceeds the maximum %zu.", hdr.size,
1431                 VHOST_USER_PAYLOAD_SIZE);
1432         goto err;
1433     }
1434
1435     /* Read payload */
1436     do {
1437         size = read(u->slave_fd, &payload, hdr.size);
1438     } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1439
1440     if (size != hdr.size) {
1441         error_report("Failed to read payload from slave.");
1442         goto err;
1443     }
1444
1445     switch (hdr.request) {
1446     case VHOST_USER_SLAVE_IOTLB_MSG:
1447         ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb);
1448         break;
1449     case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
1450         ret = vhost_user_slave_handle_config_change(dev);
1451         break;
1452     case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
1453         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
1454                                                           fd[0]);
1455         break;
1456     default:
1457         error_report("Received unexpected msg type: %d.", hdr.request);
1458         ret = -EINVAL;
1459     }
1460
1461     /* Close the remaining file descriptors. */
1462     for (i = 0; i < fdsize; i++) {
1463         if (fd[i] != -1) {
1464             close(fd[i]);
1465         }
1466     }
1467
1468     /*
1469      * REPLY_ACK feature handling. Other reply types has to be managed
1470      * directly in their request handlers.
1471      */
1472     if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
1473         struct iovec iovec[2];
1474
1475
1476         hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
1477         hdr.flags |= VHOST_USER_REPLY_MASK;
1478
1479         payload.u64 = !!ret;
1480         hdr.size = sizeof(payload.u64);
1481
1482         iovec[0].iov_base = &hdr;
1483         iovec[0].iov_len = VHOST_USER_HDR_SIZE;
1484         iovec[1].iov_base = &payload;
1485         iovec[1].iov_len = hdr.size;
1486
1487         do {
1488             size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
1489         } while (size < 0 && (errno == EINTR || errno == EAGAIN));
1490
1491         if (size != VHOST_USER_HDR_SIZE + hdr.size) {
1492             error_report("Failed to send msg reply to slave.");
1493             goto err;
1494         }
1495     }
1496
1497     return;
1498
1499 err:
1500     qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1501     close(u->slave_fd);
1502     u->slave_fd = -1;
1503     for (i = 0; i < fdsize; i++) {
1504         if (fd[i] != -1) {
1505             close(fd[i]);
1506         }
1507     }
1508     return;
1509 }
1510
1511 static int vhost_setup_slave_channel(struct vhost_dev *dev)
1512 {
1513     VhostUserMsg msg = {
1514         .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD,
1515         .hdr.flags = VHOST_USER_VERSION,
1516     };
1517     struct vhost_user *u = dev->opaque;
1518     int sv[2], ret = 0;
1519     bool reply_supported = virtio_has_feature(dev->protocol_features,
1520                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1521
1522     if (!virtio_has_feature(dev->protocol_features,
1523                             VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
1524         return 0;
1525     }
1526
1527     if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
1528         error_report("socketpair() failed");
1529         return -1;
1530     }
1531
1532     u->slave_fd = sv[0];
1533     qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev);
1534
1535     if (reply_supported) {
1536         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1537     }
1538
1539     ret = vhost_user_write(dev, &msg, &sv[1], 1);
1540     if (ret) {
1541         goto out;
1542     }
1543
1544     if (reply_supported) {
1545         ret = process_message_reply(dev, &msg);
1546     }
1547
1548 out:
1549     close(sv[1]);
1550     if (ret) {
1551         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1552         close(u->slave_fd);
1553         u->slave_fd = -1;
1554     }
1555
1556     return ret;
1557 }
1558
1559 #ifdef CONFIG_LINUX
1560 /*
1561  * Called back from the postcopy fault thread when a fault is received on our
1562  * ufd.
1563  * TODO: This is Linux specific
1564  */
1565 static int vhost_user_postcopy_fault_handler(struct PostCopyFD *pcfd,
1566                                              void *ufd)
1567 {
1568     struct vhost_dev *dev = pcfd->data;
1569     struct vhost_user *u = dev->opaque;
1570     struct uffd_msg *msg = ufd;
1571     uint64_t faultaddr = msg->arg.pagefault.address;
1572     RAMBlock *rb = NULL;
1573     uint64_t rb_offset;
1574     int i;
1575
1576     trace_vhost_user_postcopy_fault_handler(pcfd->idstr, faultaddr,
1577                                             dev->mem->nregions);
1578     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1579         trace_vhost_user_postcopy_fault_handler_loop(i,
1580                 u->postcopy_client_bases[i], dev->mem->regions[i].memory_size);
1581         if (faultaddr >= u->postcopy_client_bases[i]) {
1582             /* Ofset of the fault address in the vhost region */
1583             uint64_t region_offset = faultaddr - u->postcopy_client_bases[i];
1584             if (region_offset < dev->mem->regions[i].memory_size) {
1585                 rb_offset = region_offset + u->region_rb_offset[i];
1586                 trace_vhost_user_postcopy_fault_handler_found(i,
1587                         region_offset, rb_offset);
1588                 rb = u->region_rb[i];
1589                 return postcopy_request_shared_page(pcfd, rb, faultaddr,
1590                                                     rb_offset);
1591             }
1592         }
1593     }
1594     error_report("%s: Failed to find region for fault %" PRIx64,
1595                  __func__, faultaddr);
1596     return -1;
1597 }
1598
1599 static int vhost_user_postcopy_waker(struct PostCopyFD *pcfd, RAMBlock *rb,
1600                                      uint64_t offset)
1601 {
1602     struct vhost_dev *dev = pcfd->data;
1603     struct vhost_user *u = dev->opaque;
1604     int i;
1605
1606     trace_vhost_user_postcopy_waker(qemu_ram_get_idstr(rb), offset);
1607
1608     if (!u) {
1609         return 0;
1610     }
1611     /* Translate the offset into an address in the clients address space */
1612     for (i = 0; i < MIN(dev->mem->nregions, u->region_rb_len); i++) {
1613         if (u->region_rb[i] == rb &&
1614             offset >= u->region_rb_offset[i] &&
1615             offset < (u->region_rb_offset[i] +
1616                       dev->mem->regions[i].memory_size)) {
1617             uint64_t client_addr = (offset - u->region_rb_offset[i]) +
1618                                    u->postcopy_client_bases[i];
1619             trace_vhost_user_postcopy_waker_found(client_addr);
1620             return postcopy_wake_shared(pcfd, client_addr, rb);
1621         }
1622     }
1623
1624     trace_vhost_user_postcopy_waker_nomatch(qemu_ram_get_idstr(rb), offset);
1625     return 0;
1626 }
1627 #endif
1628
1629 /*
1630  * Called at the start of an inbound postcopy on reception of the
1631  * 'advise' command.
1632  */
1633 static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
1634 {
1635 #ifdef CONFIG_LINUX
1636     struct vhost_user *u = dev->opaque;
1637     CharBackend *chr = u->user->chr;
1638     int ufd;
1639     VhostUserMsg msg = {
1640         .hdr.request = VHOST_USER_POSTCOPY_ADVISE,
1641         .hdr.flags = VHOST_USER_VERSION,
1642     };
1643
1644     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1645         error_setg(errp, "Failed to send postcopy_advise to vhost");
1646         return -1;
1647     }
1648
1649     if (vhost_user_read(dev, &msg) < 0) {
1650         error_setg(errp, "Failed to get postcopy_advise reply from vhost");
1651         return -1;
1652     }
1653
1654     if (msg.hdr.request != VHOST_USER_POSTCOPY_ADVISE) {
1655         error_setg(errp, "Unexpected msg type. Expected %d received %d",
1656                      VHOST_USER_POSTCOPY_ADVISE, msg.hdr.request);
1657         return -1;
1658     }
1659
1660     if (msg.hdr.size) {
1661         error_setg(errp, "Received bad msg size.");
1662         return -1;
1663     }
1664     ufd = qemu_chr_fe_get_msgfd(chr);
1665     if (ufd < 0) {
1666         error_setg(errp, "%s: Failed to get ufd", __func__);
1667         return -1;
1668     }
1669     qemu_set_nonblock(ufd);
1670
1671     /* register ufd with userfault thread */
1672     u->postcopy_fd.fd = ufd;
1673     u->postcopy_fd.data = dev;
1674     u->postcopy_fd.handler = vhost_user_postcopy_fault_handler;
1675     u->postcopy_fd.waker = vhost_user_postcopy_waker;
1676     u->postcopy_fd.idstr = "vhost-user"; /* Need to find unique name */
1677     postcopy_register_shared_ufd(&u->postcopy_fd);
1678     return 0;
1679 #else
1680     error_setg(errp, "Postcopy not supported on non-Linux systems");
1681     return -1;
1682 #endif
1683 }
1684
1685 /*
1686  * Called at the switch to postcopy on reception of the 'listen' command.
1687  */
1688 static int vhost_user_postcopy_listen(struct vhost_dev *dev, Error **errp)
1689 {
1690     struct vhost_user *u = dev->opaque;
1691     int ret;
1692     VhostUserMsg msg = {
1693         .hdr.request = VHOST_USER_POSTCOPY_LISTEN,
1694         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1695     };
1696     u->postcopy_listen = true;
1697     trace_vhost_user_postcopy_listen();
1698     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1699         error_setg(errp, "Failed to send postcopy_listen to vhost");
1700         return -1;
1701     }
1702
1703     ret = process_message_reply(dev, &msg);
1704     if (ret) {
1705         error_setg(errp, "Failed to receive reply to postcopy_listen");
1706         return ret;
1707     }
1708
1709     return 0;
1710 }
1711
1712 /*
1713  * Called at the end of postcopy
1714  */
1715 static int vhost_user_postcopy_end(struct vhost_dev *dev, Error **errp)
1716 {
1717     VhostUserMsg msg = {
1718         .hdr.request = VHOST_USER_POSTCOPY_END,
1719         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
1720     };
1721     int ret;
1722     struct vhost_user *u = dev->opaque;
1723
1724     trace_vhost_user_postcopy_end_entry();
1725     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1726         error_setg(errp, "Failed to send postcopy_end to vhost");
1727         return -1;
1728     }
1729
1730     ret = process_message_reply(dev, &msg);
1731     if (ret) {
1732         error_setg(errp, "Failed to receive reply to postcopy_end");
1733         return ret;
1734     }
1735     postcopy_unregister_shared_ufd(&u->postcopy_fd);
1736     close(u->postcopy_fd.fd);
1737     u->postcopy_fd.handler = NULL;
1738
1739     trace_vhost_user_postcopy_end_exit();
1740
1741     return 0;
1742 }
1743
1744 static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
1745                                         void *opaque)
1746 {
1747     struct PostcopyNotifyData *pnd = opaque;
1748     struct vhost_user *u = container_of(notifier, struct vhost_user,
1749                                          postcopy_notifier);
1750     struct vhost_dev *dev = u->dev;
1751
1752     switch (pnd->reason) {
1753     case POSTCOPY_NOTIFY_PROBE:
1754         if (!virtio_has_feature(dev->protocol_features,
1755                                 VHOST_USER_PROTOCOL_F_PAGEFAULT)) {
1756             /* TODO: Get the device name into this error somehow */
1757             error_setg(pnd->errp,
1758                        "vhost-user backend not capable of postcopy");
1759             return -ENOENT;
1760         }
1761         break;
1762
1763     case POSTCOPY_NOTIFY_INBOUND_ADVISE:
1764         return vhost_user_postcopy_advise(dev, pnd->errp);
1765
1766     case POSTCOPY_NOTIFY_INBOUND_LISTEN:
1767         return vhost_user_postcopy_listen(dev, pnd->errp);
1768
1769     case POSTCOPY_NOTIFY_INBOUND_END:
1770         return vhost_user_postcopy_end(dev, pnd->errp);
1771
1772     default:
1773         /* We ignore notifications we don't know */
1774         break;
1775     }
1776
1777     return 0;
1778 }
1779
1780 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque)
1781 {
1782     uint64_t features, protocol_features, ram_slots;
1783     struct vhost_user *u;
1784     int err;
1785
1786     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1787
1788     u = g_new0(struct vhost_user, 1);
1789     u->user = opaque;
1790     u->slave_fd = -1;
1791     u->dev = dev;
1792     dev->opaque = u;
1793
1794     err = vhost_user_get_features(dev, &features);
1795     if (err < 0) {
1796         return err;
1797     }
1798
1799     if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
1800         dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
1801
1802         err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
1803                                  &protocol_features);
1804         if (err < 0) {
1805             return err;
1806         }
1807
1808         dev->protocol_features =
1809             protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK;
1810
1811         if (!dev->config_ops || !dev->config_ops->vhost_dev_config_notifier) {
1812             /* Don't acknowledge CONFIG feature if device doesn't support it */
1813             dev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
1814         } else if (!(protocol_features &
1815                     (1ULL << VHOST_USER_PROTOCOL_F_CONFIG))) {
1816             error_report("Device expects VHOST_USER_PROTOCOL_F_CONFIG "
1817                     "but backend does not support it.");
1818             return -1;
1819         }
1820
1821         err = vhost_user_set_protocol_features(dev, dev->protocol_features);
1822         if (err < 0) {
1823             return err;
1824         }
1825
1826         /* query the max queues we support if backend supports Multiple Queue */
1827         if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
1828             err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
1829                                      &dev->max_queues);
1830             if (err < 0) {
1831                 return err;
1832             }
1833         }
1834
1835         if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) &&
1836                 !(virtio_has_feature(dev->protocol_features,
1837                     VHOST_USER_PROTOCOL_F_SLAVE_REQ) &&
1838                  virtio_has_feature(dev->protocol_features,
1839                     VHOST_USER_PROTOCOL_F_REPLY_ACK))) {
1840             error_report("IOMMU support requires reply-ack and "
1841                          "slave-req protocol features.");
1842             return -1;
1843         }
1844
1845         /* get max memory regions if backend supports configurable RAM slots */
1846         if (!virtio_has_feature(dev->protocol_features,
1847                                 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) {
1848             u->user->memory_slots = VHOST_MEMORY_MAX_NREGIONS;
1849         } else {
1850             err = vhost_user_get_max_memslots(dev, &ram_slots);
1851             if (err < 0) {
1852                 return err;
1853             }
1854
1855             if (ram_slots < u->user->memory_slots) {
1856                 error_report("The backend specified a max ram slots limit "
1857                              "of %" PRIu64", when the prior validated limit was %d. "
1858                              "This limit should never decrease.", ram_slots,
1859                              u->user->memory_slots);
1860                 return -1;
1861             }
1862
1863             u->user->memory_slots = MIN(ram_slots, VHOST_MEMORY_MAX_NREGIONS);
1864         }
1865     }
1866
1867     if (dev->migration_blocker == NULL &&
1868         !virtio_has_feature(dev->protocol_features,
1869                             VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
1870         error_setg(&dev->migration_blocker,
1871                    "Migration disabled: vhost-user backend lacks "
1872                    "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
1873     }
1874
1875     if (dev->vq_index == 0) {
1876         err = vhost_setup_slave_channel(dev);
1877         if (err < 0) {
1878             return err;
1879         }
1880     }
1881
1882     u->postcopy_notifier.notify = vhost_user_postcopy_notifier;
1883     postcopy_add_notifier(&u->postcopy_notifier);
1884
1885     return 0;
1886 }
1887
1888 static int vhost_user_backend_cleanup(struct vhost_dev *dev)
1889 {
1890     struct vhost_user *u;
1891
1892     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1893
1894     u = dev->opaque;
1895     if (u->postcopy_notifier.notify) {
1896         postcopy_remove_notifier(&u->postcopy_notifier);
1897         u->postcopy_notifier.notify = NULL;
1898     }
1899     u->postcopy_listen = false;
1900     if (u->postcopy_fd.handler) {
1901         postcopy_unregister_shared_ufd(&u->postcopy_fd);
1902         close(u->postcopy_fd.fd);
1903         u->postcopy_fd.handler = NULL;
1904     }
1905     if (u->slave_fd >= 0) {
1906         qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL);
1907         close(u->slave_fd);
1908         u->slave_fd = -1;
1909     }
1910     g_free(u->region_rb);
1911     u->region_rb = NULL;
1912     g_free(u->region_rb_offset);
1913     u->region_rb_offset = NULL;
1914     u->region_rb_len = 0;
1915     g_free(u);
1916     dev->opaque = 0;
1917
1918     return 0;
1919 }
1920
1921 static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
1922 {
1923     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
1924
1925     return idx;
1926 }
1927
1928 static int vhost_user_memslots_limit(struct vhost_dev *dev)
1929 {
1930     struct vhost_user *u = dev->opaque;
1931
1932     return u->user->memory_slots;
1933 }
1934
1935 static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
1936 {
1937     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1938
1939     return virtio_has_feature(dev->protocol_features,
1940                               VHOST_USER_PROTOCOL_F_LOG_SHMFD);
1941 }
1942
1943 static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
1944 {
1945     VhostUserMsg msg = { };
1946
1947     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
1948
1949     /* If guest supports GUEST_ANNOUNCE do nothing */
1950     if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
1951         return 0;
1952     }
1953
1954     /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
1955     if (virtio_has_feature(dev->protocol_features,
1956                            VHOST_USER_PROTOCOL_F_RARP)) {
1957         msg.hdr.request = VHOST_USER_SEND_RARP;
1958         msg.hdr.flags = VHOST_USER_VERSION;
1959         memcpy((char *)&msg.payload.u64, mac_addr, 6);
1960         msg.hdr.size = sizeof(msg.payload.u64);
1961
1962         return vhost_user_write(dev, &msg, NULL, 0);
1963     }
1964     return -1;
1965 }
1966
1967 static bool vhost_user_can_merge(struct vhost_dev *dev,
1968                                  uint64_t start1, uint64_t size1,
1969                                  uint64_t start2, uint64_t size2)
1970 {
1971     ram_addr_t offset;
1972     int mfd, rfd;
1973
1974     (void)vhost_user_get_mr_data(start1, &offset, &mfd);
1975     (void)vhost_user_get_mr_data(start2, &offset, &rfd);
1976
1977     return mfd == rfd;
1978 }
1979
1980 static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu)
1981 {
1982     VhostUserMsg msg;
1983     bool reply_supported = virtio_has_feature(dev->protocol_features,
1984                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
1985
1986     if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_NET_MTU))) {
1987         return 0;
1988     }
1989
1990     msg.hdr.request = VHOST_USER_NET_SET_MTU;
1991     msg.payload.u64 = mtu;
1992     msg.hdr.size = sizeof(msg.payload.u64);
1993     msg.hdr.flags = VHOST_USER_VERSION;
1994     if (reply_supported) {
1995         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
1996     }
1997
1998     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
1999         return -1;
2000     }
2001
2002     /* If reply_ack supported, slave has to ack specified MTU is valid */
2003     if (reply_supported) {
2004         return process_message_reply(dev, &msg);
2005     }
2006
2007     return 0;
2008 }
2009
2010 static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev,
2011                                             struct vhost_iotlb_msg *imsg)
2012 {
2013     VhostUserMsg msg = {
2014         .hdr.request = VHOST_USER_IOTLB_MSG,
2015         .hdr.size = sizeof(msg.payload.iotlb),
2016         .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK,
2017         .payload.iotlb = *imsg,
2018     };
2019
2020     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2021         return -EFAULT;
2022     }
2023
2024     return process_message_reply(dev, &msg);
2025 }
2026
2027
2028 static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled)
2029 {
2030     /* No-op as the receive channel is not dedicated to IOTLB messages. */
2031 }
2032
2033 static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config,
2034                                  uint32_t config_len)
2035 {
2036     VhostUserMsg msg = {
2037         .hdr.request = VHOST_USER_GET_CONFIG,
2038         .hdr.flags = VHOST_USER_VERSION,
2039         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len,
2040     };
2041
2042     if (!virtio_has_feature(dev->protocol_features,
2043                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2044         return -1;
2045     }
2046
2047     if (config_len > VHOST_USER_MAX_CONFIG_SIZE) {
2048         return -1;
2049     }
2050
2051     msg.payload.config.offset = 0;
2052     msg.payload.config.size = config_len;
2053     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2054         return -1;
2055     }
2056
2057     if (vhost_user_read(dev, &msg) < 0) {
2058         return -1;
2059     }
2060
2061     if (msg.hdr.request != VHOST_USER_GET_CONFIG) {
2062         error_report("Received unexpected msg type. Expected %d received %d",
2063                      VHOST_USER_GET_CONFIG, msg.hdr.request);
2064         return -1;
2065     }
2066
2067     if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) {
2068         error_report("Received bad msg size.");
2069         return -1;
2070     }
2071
2072     memcpy(config, msg.payload.config.region, config_len);
2073
2074     return 0;
2075 }
2076
2077 static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data,
2078                                  uint32_t offset, uint32_t size, uint32_t flags)
2079 {
2080     uint8_t *p;
2081     bool reply_supported = virtio_has_feature(dev->protocol_features,
2082                                               VHOST_USER_PROTOCOL_F_REPLY_ACK);
2083
2084     VhostUserMsg msg = {
2085         .hdr.request = VHOST_USER_SET_CONFIG,
2086         .hdr.flags = VHOST_USER_VERSION,
2087         .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size,
2088     };
2089
2090     if (!virtio_has_feature(dev->protocol_features,
2091                 VHOST_USER_PROTOCOL_F_CONFIG)) {
2092         return -1;
2093     }
2094
2095     if (reply_supported) {
2096         msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
2097     }
2098
2099     if (size > VHOST_USER_MAX_CONFIG_SIZE) {
2100         return -1;
2101     }
2102
2103     msg.payload.config.offset = offset,
2104     msg.payload.config.size = size,
2105     msg.payload.config.flags = flags,
2106     p = msg.payload.config.region;
2107     memcpy(p, data, size);
2108
2109     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2110         return -1;
2111     }
2112
2113     if (reply_supported) {
2114         return process_message_reply(dev, &msg);
2115     }
2116
2117     return 0;
2118 }
2119
2120 static int vhost_user_crypto_create_session(struct vhost_dev *dev,
2121                                             void *session_info,
2122                                             uint64_t *session_id)
2123 {
2124     bool crypto_session = virtio_has_feature(dev->protocol_features,
2125                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2126     CryptoDevBackendSymSessionInfo *sess_info = session_info;
2127     VhostUserMsg msg = {
2128         .hdr.request = VHOST_USER_CREATE_CRYPTO_SESSION,
2129         .hdr.flags = VHOST_USER_VERSION,
2130         .hdr.size = sizeof(msg.payload.session),
2131     };
2132
2133     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
2134
2135     if (!crypto_session) {
2136         error_report("vhost-user trying to send unhandled ioctl");
2137         return -1;
2138     }
2139
2140     memcpy(&msg.payload.session.session_setup_data, sess_info,
2141               sizeof(CryptoDevBackendSymSessionInfo));
2142     if (sess_info->key_len) {
2143         memcpy(&msg.payload.session.key, sess_info->cipher_key,
2144                sess_info->key_len);
2145     }
2146     if (sess_info->auth_key_len > 0) {
2147         memcpy(&msg.payload.session.auth_key, sess_info->auth_key,
2148                sess_info->auth_key_len);
2149     }
2150     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2151         error_report("vhost_user_write() return -1, create session failed");
2152         return -1;
2153     }
2154
2155     if (vhost_user_read(dev, &msg) < 0) {
2156         error_report("vhost_user_read() return -1, create session failed");
2157         return -1;
2158     }
2159
2160     if (msg.hdr.request != VHOST_USER_CREATE_CRYPTO_SESSION) {
2161         error_report("Received unexpected msg type. Expected %d received %d",
2162                      VHOST_USER_CREATE_CRYPTO_SESSION, msg.hdr.request);
2163         return -1;
2164     }
2165
2166     if (msg.hdr.size != sizeof(msg.payload.session)) {
2167         error_report("Received bad msg size.");
2168         return -1;
2169     }
2170
2171     if (msg.payload.session.session_id < 0) {
2172         error_report("Bad session id: %" PRId64 "",
2173                               msg.payload.session.session_id);
2174         return -1;
2175     }
2176     *session_id = msg.payload.session.session_id;
2177
2178     return 0;
2179 }
2180
2181 static int
2182 vhost_user_crypto_close_session(struct vhost_dev *dev, uint64_t session_id)
2183 {
2184     bool crypto_session = virtio_has_feature(dev->protocol_features,
2185                                        VHOST_USER_PROTOCOL_F_CRYPTO_SESSION);
2186     VhostUserMsg msg = {
2187         .hdr.request = VHOST_USER_CLOSE_CRYPTO_SESSION,
2188         .hdr.flags = VHOST_USER_VERSION,
2189         .hdr.size = sizeof(msg.payload.u64),
2190     };
2191     msg.payload.u64 = session_id;
2192
2193     if (!crypto_session) {
2194         error_report("vhost-user trying to send unhandled ioctl");
2195         return -1;
2196     }
2197
2198     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2199         error_report("vhost_user_write() return -1, close session failed");
2200         return -1;
2201     }
2202
2203     return 0;
2204 }
2205
2206 static bool vhost_user_mem_section_filter(struct vhost_dev *dev,
2207                                           MemoryRegionSection *section)
2208 {
2209     bool result;
2210
2211     result = memory_region_get_fd(section->mr) >= 0;
2212
2213     return result;
2214 }
2215
2216 static int vhost_user_get_inflight_fd(struct vhost_dev *dev,
2217                                       uint16_t queue_size,
2218                                       struct vhost_inflight *inflight)
2219 {
2220     void *addr;
2221     int fd;
2222     struct vhost_user *u = dev->opaque;
2223     CharBackend *chr = u->user->chr;
2224     VhostUserMsg msg = {
2225         .hdr.request = VHOST_USER_GET_INFLIGHT_FD,
2226         .hdr.flags = VHOST_USER_VERSION,
2227         .payload.inflight.num_queues = dev->nvqs,
2228         .payload.inflight.queue_size = queue_size,
2229         .hdr.size = sizeof(msg.payload.inflight),
2230     };
2231
2232     if (!virtio_has_feature(dev->protocol_features,
2233                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2234         return 0;
2235     }
2236
2237     if (vhost_user_write(dev, &msg, NULL, 0) < 0) {
2238         return -1;
2239     }
2240
2241     if (vhost_user_read(dev, &msg) < 0) {
2242         return -1;
2243     }
2244
2245     if (msg.hdr.request != VHOST_USER_GET_INFLIGHT_FD) {
2246         error_report("Received unexpected msg type. "
2247                      "Expected %d received %d",
2248                      VHOST_USER_GET_INFLIGHT_FD, msg.hdr.request);
2249         return -1;
2250     }
2251
2252     if (msg.hdr.size != sizeof(msg.payload.inflight)) {
2253         error_report("Received bad msg size.");
2254         return -1;
2255     }
2256
2257     if (!msg.payload.inflight.mmap_size) {
2258         return 0;
2259     }
2260
2261     fd = qemu_chr_fe_get_msgfd(chr);
2262     if (fd < 0) {
2263         error_report("Failed to get mem fd");
2264         return -1;
2265     }
2266
2267     addr = mmap(0, msg.payload.inflight.mmap_size, PROT_READ | PROT_WRITE,
2268                 MAP_SHARED, fd, msg.payload.inflight.mmap_offset);
2269
2270     if (addr == MAP_FAILED) {
2271         error_report("Failed to mmap mem fd");
2272         close(fd);
2273         return -1;
2274     }
2275
2276     inflight->addr = addr;
2277     inflight->fd = fd;
2278     inflight->size = msg.payload.inflight.mmap_size;
2279     inflight->offset = msg.payload.inflight.mmap_offset;
2280     inflight->queue_size = queue_size;
2281
2282     return 0;
2283 }
2284
2285 static int vhost_user_set_inflight_fd(struct vhost_dev *dev,
2286                                       struct vhost_inflight *inflight)
2287 {
2288     VhostUserMsg msg = {
2289         .hdr.request = VHOST_USER_SET_INFLIGHT_FD,
2290         .hdr.flags = VHOST_USER_VERSION,
2291         .payload.inflight.mmap_size = inflight->size,
2292         .payload.inflight.mmap_offset = inflight->offset,
2293         .payload.inflight.num_queues = dev->nvqs,
2294         .payload.inflight.queue_size = inflight->queue_size,
2295         .hdr.size = sizeof(msg.payload.inflight),
2296     };
2297
2298     if (!virtio_has_feature(dev->protocol_features,
2299                             VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)) {
2300         return 0;
2301     }
2302
2303     if (vhost_user_write(dev, &msg, &inflight->fd, 1) < 0) {
2304         return -1;
2305     }
2306
2307     return 0;
2308 }
2309
2310 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
2311 {
2312     if (user->chr) {
2313         error_setg(errp, "Cannot initialize vhost-user state");
2314         return false;
2315     }
2316     user->chr = chr;
2317     user->memory_slots = 0;
2318     return true;
2319 }
2320
2321 void vhost_user_cleanup(VhostUserState *user)
2322 {
2323     int i;
2324
2325     if (!user->chr) {
2326         return;
2327     }
2328
2329     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2330         if (user->notifier[i].addr) {
2331             object_unparent(OBJECT(&user->notifier[i].mr));
2332             munmap(user->notifier[i].addr, qemu_real_host_page_size);
2333             user->notifier[i].addr = NULL;
2334         }
2335     }
2336     user->chr = NULL;
2337 }
2338
2339 const VhostOps user_ops = {
2340         .backend_type = VHOST_BACKEND_TYPE_USER,
2341         .vhost_backend_init = vhost_user_backend_init,
2342         .vhost_backend_cleanup = vhost_user_backend_cleanup,
2343         .vhost_backend_memslots_limit = vhost_user_memslots_limit,
2344         .vhost_set_log_base = vhost_user_set_log_base,
2345         .vhost_set_mem_table = vhost_user_set_mem_table,
2346         .vhost_set_vring_addr = vhost_user_set_vring_addr,
2347         .vhost_set_vring_endian = vhost_user_set_vring_endian,
2348         .vhost_set_vring_num = vhost_user_set_vring_num,
2349         .vhost_set_vring_base = vhost_user_set_vring_base,
2350         .vhost_get_vring_base = vhost_user_get_vring_base,
2351         .vhost_set_vring_kick = vhost_user_set_vring_kick,
2352         .vhost_set_vring_call = vhost_user_set_vring_call,
2353         .vhost_set_features = vhost_user_set_features,
2354         .vhost_get_features = vhost_user_get_features,
2355         .vhost_set_owner = vhost_user_set_owner,
2356         .vhost_reset_device = vhost_user_reset_device,
2357         .vhost_get_vq_index = vhost_user_get_vq_index,
2358         .vhost_set_vring_enable = vhost_user_set_vring_enable,
2359         .vhost_requires_shm_log = vhost_user_requires_shm_log,
2360         .vhost_migration_done = vhost_user_migration_done,
2361         .vhost_backend_can_merge = vhost_user_can_merge,
2362         .vhost_net_set_mtu = vhost_user_net_set_mtu,
2363         .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback,
2364         .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg,
2365         .vhost_get_config = vhost_user_get_config,
2366         .vhost_set_config = vhost_user_set_config,
2367         .vhost_crypto_create_session = vhost_user_crypto_create_session,
2368         .vhost_crypto_close_session = vhost_user_crypto_close_session,
2369         .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
2370         .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
2371         .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
2372 };