hw/net/vhost_net.c

   1 /*
   2  * vhost-net support
   3  *
   4  * Copyright Red Hat, Inc. 2010
   5  *
   6  * Authors:
   7  *  Michael S. Tsirkin <mst@redhat.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  * Contributions after 2012-01-13 are licensed under the terms of the
  13  * GNU GPL, version 2 or (at your option) any later version.
  14  */
  15
  16 #include "qemu/osdep.h"
  17 #include "net/net.h"
  18 #include "net/tap.h"
  19 #include "net/vhost-user.h"
  20 #include "net/vhost-vdpa.h"
  21
  22 #include "standard-headers/linux/vhost_types.h"
  23 #include "hw/virtio/virtio-net.h"
  24 #include "net/vhost_net.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "qemu/main-loop.h"
  28
  29 #include <sys/socket.h>
  30 #include <net/if.h>
  31 #include <netinet/in.h>
  32
  33
  34 #include "standard-headers/linux/virtio_ring.h"
  35 #include "hw/virtio/vhost.h"
  36 #include "hw/virtio/virtio-bus.h"
  37 #include "linux-headers/linux/vhost.h"
  38
  39
  40 /* Features supported by host kernel. */
  41 static const int kernel_feature_bits[] = {
  42     VIRTIO_F_NOTIFY_ON_EMPTY,
  43     VIRTIO_RING_F_INDIRECT_DESC,
  44     VIRTIO_RING_F_EVENT_IDX,
  45     VIRTIO_NET_F_MRG_RXBUF,
  46     VIRTIO_F_VERSION_1,
  47     VIRTIO_NET_F_MTU,
  48     VIRTIO_F_IOMMU_PLATFORM,
  49     VIRTIO_F_RING_PACKED,
  50     VIRTIO_F_RING_RESET,
  51     VIRTIO_F_NOTIFICATION_DATA,
  52     VIRTIO_NET_F_HASH_REPORT,
  53     VHOST_INVALID_FEATURE_BIT
  54 };
  55
  56 /* Features supported by others. */
  57 static const int user_feature_bits[] = {
  58     VIRTIO_F_NOTIFY_ON_EMPTY,
  59     VIRTIO_F_NOTIFICATION_DATA,
  60     VIRTIO_RING_F_INDIRECT_DESC,
  61     VIRTIO_RING_F_EVENT_IDX,
  62
  63     VIRTIO_F_ANY_LAYOUT,
  64     VIRTIO_F_VERSION_1,
  65     VIRTIO_NET_F_CSUM,
  66     VIRTIO_NET_F_GUEST_CSUM,
  67     VIRTIO_NET_F_GSO,
  68     VIRTIO_NET_F_GUEST_TSO4,
  69     VIRTIO_NET_F_GUEST_TSO6,
  70     VIRTIO_NET_F_GUEST_ECN,
  71     VIRTIO_NET_F_GUEST_UFO,
  72     VIRTIO_NET_F_HOST_TSO4,
  73     VIRTIO_NET_F_HOST_TSO6,
  74     VIRTIO_NET_F_HOST_ECN,
  75     VIRTIO_NET_F_HOST_UFO,
  76     VIRTIO_NET_F_MRG_RXBUF,
  77     VIRTIO_NET_F_MTU,
  78     VIRTIO_F_IOMMU_PLATFORM,
  79     VIRTIO_F_RING_PACKED,
  80     VIRTIO_F_RING_RESET,
  81     VIRTIO_NET_F_RSS,
  82     VIRTIO_NET_F_HASH_REPORT,
  83     VIRTIO_NET_F_GUEST_USO4,
  84     VIRTIO_NET_F_GUEST_USO6,
  85     VIRTIO_NET_F_HOST_USO,
  86
  87     /* This bit implies RARP isn't sent by QEMU out of band */
  88     VIRTIO_NET_F_GUEST_ANNOUNCE,
  89
  90     VIRTIO_NET_F_MQ,
  91
  92     VHOST_INVALID_FEATURE_BIT
  93 };
  94
  95 static const int *vhost_net_get_feature_bits(struct vhost_net *net)
  96 {
  97     const int *feature_bits = 0;
  98
  99     switch (net->nc->info->type) {
 100     case NET_CLIENT_DRIVER_TAP:
 101         feature_bits = kernel_feature_bits;
 102         break;
 103     case NET_CLIENT_DRIVER_VHOST_USER:
 104         feature_bits = user_feature_bits;
 105         break;
 106 #ifdef CONFIG_VHOST_NET_VDPA
 107     case NET_CLIENT_DRIVER_VHOST_VDPA:
 108         feature_bits = vdpa_feature_bits;
 109         break;
 110 #endif
 111     default:
 112         error_report("Feature bits not defined for this type: %d",
 113                 net->nc->info->type);
 114         break;
 115     }
 116
 117     return feature_bits;
 118 }
 119
 120 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
 121 {
 122     return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
 123             features);
 124 }
 125 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
 126                          uint32_t config_len)
 127 {
 128     return vhost_dev_get_config(&net->dev, config, config_len, NULL);
 129 }
 130 int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
 131                          uint32_t offset, uint32_t size, uint32_t flags)
 132 {
 133     return vhost_dev_set_config(&net->dev, data, offset, size, flags);
 134 }
 135
 136 void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
 137 {
 138     net->dev.acked_features = net->dev.backend_features;
 139     vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
 140 }
 141
 142 uint64_t vhost_net_get_max_queues(VHostNetState *net)
 143 {
 144     return net->dev.max_queues;
 145 }
 146
 147 uint64_t vhost_net_get_acked_features(VHostNetState *net)
 148 {
 149     return net->dev.acked_features;
 150 }
 151
 152 void vhost_net_save_acked_features(NetClientState *nc)
 153 {
 154 #ifdef CONFIG_VHOST_NET_USER
 155     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 156         vhost_user_save_acked_features(nc);
 157     }
 158 #endif
 159 }
 160
 161 static int vhost_net_get_fd(NetClientState *backend)
 162 {
 163     switch (backend->info->type) {
 164     case NET_CLIENT_DRIVER_TAP:
 165         return tap_get_fd(backend);
 166     default:
 167         fprintf(stderr, "vhost-net requires tap backend\n");
 168         return -ENOSYS;
 169     }
 170 }
 171
 172 struct vhost_net *vhost_net_init(VhostNetOptions *options)
 173 {
 174     int r;
 175     bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
 176     struct vhost_net *net = g_new0(struct vhost_net, 1);
 177     uint64_t features = 0;
 178     Error *local_err = NULL;
 179
 180     if (!options->net_backend) {
 181         fprintf(stderr, "vhost-net requires net backend to be setup\n");
 182         goto fail;
 183     }
 184     net->nc = options->net_backend;
 185     net->dev.nvqs = options->nvqs;
 186
 187     net->dev.max_queues = 1;
 188     net->dev.vqs = net->vqs;
 189
 190     if (backend_kernel) {
 191         r = vhost_net_get_fd(options->net_backend);
 192         if (r < 0) {
 193             goto fail;
 194         }
 195         net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
 196             ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
 197         net->backend = r;
 198         net->dev.protocol_features = 0;
 199     } else {
 200         net->dev.backend_features = 0;
 201         net->dev.protocol_features = 0;
 202         net->backend = -1;
 203
 204         /* vhost-user needs vq_index to initiate a specific queue pair */
 205         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
 206     }
 207
 208     r = vhost_dev_init(&net->dev, options->opaque,
 209                        options->backend_type, options->busyloop_timeout,
 210                        &local_err);
 211     if (r < 0) {
 212         error_report_err(local_err);
 213         goto fail;
 214     }
 215     if (backend_kernel) {
 216         if (!qemu_has_vnet_hdr_len(options->net_backend,
 217                                sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
 218             net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
 219         }
 220         if (~net->dev.features & net->dev.backend_features) {
 221             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
 222                    " for backend\n",
 223                    (uint64_t)(~net->dev.features & net->dev.backend_features));
 224             goto fail;
 225         }
 226     }
 227
 228     /* Set sane init value. Override when guest acks. */
 229 #ifdef CONFIG_VHOST_NET_USER
 230     if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 231         features = vhost_user_get_acked_features(net->nc);
 232         if (~net->dev.features & features) {
 233             fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
 234                     " for backend\n",
 235                     (uint64_t)(~net->dev.features & features));
 236             goto fail;
 237         }
 238     }
 239 #endif
 240
 241     vhost_net_ack_features(net, features);
 242
 243     return net;
 244
 245 fail:
 246     vhost_dev_cleanup(&net->dev);
 247     g_free(net);
 248     return NULL;
 249 }
 250
 251 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
 252                                    int vq_index_end)
 253 {
 254     net->dev.vq_index = vq_index;
 255     net->dev.vq_index_end = vq_index_end;
 256 }
 257
 258 static int vhost_net_start_one(struct vhost_net *net,
 259                                VirtIODevice *dev)
 260 {
 261     struct vhost_vring_file file = { };
 262     int r;
 263
 264     if (net->nc->info->start) {
 265         r = net->nc->info->start(net->nc);
 266         if (r < 0) {
 267             return r;
 268         }
 269     }
 270
 271     r = vhost_dev_enable_notifiers(&net->dev, dev);
 272     if (r < 0) {
 273         goto fail_notifiers;
 274     }
 275
 276     r = vhost_dev_start(&net->dev, dev, false);
 277     if (r < 0) {
 278         goto fail_start;
 279     }
 280
 281     if (net->nc->info->poll) {
 282         net->nc->info->poll(net->nc, false);
 283     }
 284
 285     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 286         qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
 287         file.fd = net->backend;
 288         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
 289             if (!virtio_queue_enabled(dev, net->dev.vq_index +
 290                                       file.index)) {
 291                 /* Queue might not be ready for start */
 292                 continue;
 293             }
 294             r = vhost_net_set_backend(&net->dev, &file);
 295             if (r < 0) {
 296                 r = -errno;
 297                 goto fail;
 298             }
 299         }
 300     }
 301
 302     if (net->nc->info->load) {
 303         r = net->nc->info->load(net->nc);
 304         if (r < 0) {
 305             goto fail;
 306         }
 307     }
 308     return 0;
 309 fail:
 310     file.fd = -1;
 311     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 312         while (file.index-- > 0) {
 313             if (!virtio_queue_enabled(dev, net->dev.vq_index +
 314                                       file.index)) {
 315                 /* Queue might not be ready for start */
 316                 continue;
 317             }
 318             int ret = vhost_net_set_backend(&net->dev, &file);
 319             assert(ret >= 0);
 320         }
 321     }
 322     if (net->nc->info->poll) {
 323         net->nc->info->poll(net->nc, true);
 324     }
 325     vhost_dev_stop(&net->dev, dev, false);
 326 fail_start:
 327     vhost_dev_disable_notifiers(&net->dev, dev);
 328 fail_notifiers:
 329     return r;
 330 }
 331
 332 static void vhost_net_stop_one(struct vhost_net *net,
 333                                VirtIODevice *dev)
 334 {
 335     struct vhost_vring_file file = { .fd = -1 };
 336
 337     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 338         for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
 339             int r = vhost_net_set_backend(&net->dev, &file);
 340             assert(r >= 0);
 341         }
 342     }
 343     if (net->nc->info->poll) {
 344         net->nc->info->poll(net->nc, true);
 345     }
 346     vhost_dev_stop(&net->dev, dev, false);
 347     if (net->nc->info->stop) {
 348         net->nc->info->stop(net->nc);
 349     }
 350     vhost_dev_disable_notifiers(&net->dev, dev);
 351 }
 352
 353 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
 354                     int data_queue_pairs, int cvq)
 355 {
 356     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
 357     VirtioBusState *vbus = VIRTIO_BUS(qbus);
 358     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
 359     int total_notifiers = data_queue_pairs * 2 + cvq;
 360     VirtIONet *n = VIRTIO_NET(dev);
 361     int nvhosts = data_queue_pairs + cvq;
 362     struct vhost_net *net;
 363     int r, e, i, index_end = data_queue_pairs * 2;
 364     NetClientState *peer;
 365
 366     if (cvq) {
 367         index_end += 1;
 368     }
 369
 370     if (!k->set_guest_notifiers) {
 371         error_report("binding does not support guest notifiers");
 372         return -ENOSYS;
 373     }
 374
 375     for (i = 0; i < nvhosts; i++) {
 376
 377         if (i < data_queue_pairs) {
 378             peer = qemu_get_peer(ncs, i);
 379         } else { /* Control Virtqueue */
 380             peer = qemu_get_peer(ncs, n->max_queue_pairs);
 381         }
 382
 383         net = get_vhost_net(peer);
 384         vhost_net_set_vq_index(net, i * 2, index_end);
 385
 386         /* Suppress the masking guest notifiers on vhost user
 387          * because vhost user doesn't interrupt masking/unmasking
 388          * properly.
 389          */
 390         if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 391             dev->use_guest_notifier_mask = false;
 392         }
 393      }
 394
 395     r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
 396     if (r < 0) {
 397         error_report("Error binding guest notifier: %d", -r);
 398         goto err;
 399     }
 400
 401     for (i = 0; i < nvhosts; i++) {
 402         if (i < data_queue_pairs) {
 403             peer = qemu_get_peer(ncs, i);
 404         } else {
 405             peer = qemu_get_peer(ncs, n->max_queue_pairs);
 406         }
 407
 408         if (peer->vring_enable) {
 409             /* restore vring enable state */
 410             r = vhost_set_vring_enable(peer, peer->vring_enable);
 411
 412             if (r < 0) {
 413                 goto err_start;
 414             }
 415         }
 416
 417         r = vhost_net_start_one(get_vhost_net(peer), dev);
 418         if (r < 0) {
 419             goto err_start;
 420         }
 421     }
 422
 423     return 0;
 424
 425 err_start:
 426     while (--i >= 0) {
 427         peer = qemu_get_peer(ncs, i < data_queue_pairs ?
 428                                   i : n->max_queue_pairs);
 429         vhost_net_stop_one(get_vhost_net(peer), dev);
 430     }
 431     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
 432     if (e < 0) {
 433         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
 434         fflush(stderr);
 435     }
 436 err:
 437     return r;
 438 }
 439
 440 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
 441                     int data_queue_pairs, int cvq)
 442 {
 443     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
 444     VirtioBusState *vbus = VIRTIO_BUS(qbus);
 445     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
 446     VirtIONet *n = VIRTIO_NET(dev);
 447     NetClientState *peer;
 448     int total_notifiers = data_queue_pairs * 2 + cvq;
 449     int nvhosts = data_queue_pairs + cvq;
 450     int i, r;
 451
 452     for (i = 0; i < nvhosts; i++) {
 453         if (i < data_queue_pairs) {
 454             peer = qemu_get_peer(ncs, i);
 455         } else {
 456             peer = qemu_get_peer(ncs, n->max_queue_pairs);
 457         }
 458         vhost_net_stop_one(get_vhost_net(peer), dev);
 459     }
 460
 461     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
 462     if (r < 0) {
 463         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
 464         fflush(stderr);
 465     }
 466     assert(r >= 0);
 467 }
 468
 469 void vhost_net_cleanup(struct vhost_net *net)
 470 {
 471     vhost_dev_cleanup(&net->dev);
 472 }
 473
 474 int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
 475 {
 476     const VhostOps *vhost_ops = net->dev.vhost_ops;
 477
 478     assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
 479     assert(vhost_ops->vhost_migration_done);
 480
 481     return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
 482 }
 483
 484 bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
 485 {
 486     return vhost_virtqueue_pending(&net->dev, idx);
 487 }
 488
 489 void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
 490                               int idx, bool mask)
 491 {
 492     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
 493 }
 494
 495 bool vhost_net_config_pending(VHostNetState *net)
 496 {
 497     return vhost_config_pending(&net->dev);
 498 }
 499
 500 void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
 501 {
 502     vhost_config_mask(&net->dev, dev, mask);
 503 }
 504 VHostNetState *get_vhost_net(NetClientState *nc)
 505 {
 506     VHostNetState *vhost_net = 0;
 507
 508     if (!nc) {
 509         return 0;
 510     }
 511
 512     switch (nc->info->type) {
 513     case NET_CLIENT_DRIVER_TAP:
 514         vhost_net = tap_get_vhost_net(nc);
 515         /*
 516          * tap_get_vhost_net() can return NULL if a tap net-device backend is
 517          * created with 'vhost=off' option, 'vhostforce=off' or no vhost or
 518          * vhostforce or vhostfd options at all. Please see net_init_tap_one().
 519          * Hence, we omit the assertion here.
 520          */
 521         break;
 522 #ifdef CONFIG_VHOST_NET_USER
 523     case NET_CLIENT_DRIVER_VHOST_USER:
 524         vhost_net = vhost_user_get_vhost_net(nc);
 525         assert(vhost_net);
 526         break;
 527 #endif
 528 #ifdef CONFIG_VHOST_NET_VDPA
 529     case NET_CLIENT_DRIVER_VHOST_VDPA:
 530         vhost_net = vhost_vdpa_get_vhost_net(nc);
 531         assert(vhost_net);
 532         break;
 533 #endif
 534     default:
 535         break;
 536     }
 537
 538     return vhost_net;
 539 }
 540
 541 int vhost_set_vring_enable(NetClientState *nc, int enable)
 542 {
 543     VHostNetState *net = get_vhost_net(nc);
 544     const VhostOps *vhost_ops = net->dev.vhost_ops;
 545
 546     /*
 547      * vhost-vdpa network devices need to enable dataplane virtqueues after
 548      * DRIVER_OK, so they can recover device state before starting dataplane.
 549      * Because of that, we don't enable virtqueues here and leave it to
 550      * net/vhost-vdpa.c.
 551      */
 552     if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 553         return 0;
 554     }
 555
 556     nc->vring_enable = enable;
 557
 558     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
 559         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
 560     }
 561
 562     return 0;
 563 }
 564
 565 int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 566 {
 567     const VhostOps *vhost_ops = net->dev.vhost_ops;
 568
 569     if (!vhost_ops->vhost_net_set_mtu) {
 570         return 0;
 571     }
 572
 573     return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
 574 }
 575
 576 void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc,
 577                                int vq_index)
 578 {
 579     VHostNetState *net = get_vhost_net(nc->peer);
 580     const VhostOps *vhost_ops = net->dev.vhost_ops;
 581     struct vhost_vring_file file = { .fd = -1 };
 582     int idx;
 583
 584     /* should only be called after backend is connected */
 585     assert(vhost_ops);
 586
 587     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
 588
 589     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 590         file.index = idx;
 591         int r = vhost_net_set_backend(&net->dev, &file);
 592         assert(r >= 0);
 593     }
 594
 595     vhost_virtqueue_stop(&net->dev,
 596                          vdev,
 597                          net->dev.vqs + idx,
 598                          net->dev.vq_index + idx);
 599 }
 600
 601 int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc,
 602                                 int vq_index)
 603 {
 604     VHostNetState *net = get_vhost_net(nc->peer);
 605     const VhostOps *vhost_ops = net->dev.vhost_ops;
 606     struct vhost_vring_file file = { };
 607     int idx, r;
 608
 609     if (!net->dev.started) {
 610         return -EBUSY;
 611     }
 612
 613     /* should only be called after backend is connected */
 614     assert(vhost_ops);
 615
 616     idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index);
 617
 618     r = vhost_virtqueue_start(&net->dev,
 619                               vdev,
 620                               net->dev.vqs + idx,
 621                               net->dev.vq_index + idx);
 622     if (r < 0) {
 623         goto err_start;
 624     }
 625
 626     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 627         file.index = idx;
 628         file.fd = net->backend;
 629         r = vhost_net_set_backend(&net->dev, &file);
 630         if (r < 0) {
 631             r = -errno;
 632             goto err_start;
 633         }
 634     }
 635
 636     return 0;
 637
 638 err_start:
 639     error_report("Error when restarting the queue.");
 640
 641     if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 642         file.fd = VHOST_FILE_UNBIND;
 643         file.index = idx;
 644         int ret = vhost_net_set_backend(&net->dev, &file);
 645         assert(ret >= 0);
 646     }
 647
 648     vhost_dev_stop(&net->dev, vdev, false);
 649
 650     return r;
 651 }