hw/net/virtio-net.c

   1 /*
   2  * Virtio Network Device
   3  *
   4  * Copyright IBM, Corp. 2007
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #include "qemu/osdep.h"
  15 #include "qemu/atomic.h"
  16 #include "qemu/iov.h"
  17 #include "qemu/log.h"
  18 #include "qemu/main-loop.h"
  19 #include "qemu/module.h"
  20 #include "hw/virtio/virtio.h"
  21 #include "net/net.h"
  22 #include "net/checksum.h"
  23 #include "net/tap.h"
  24 #include "qemu/error-report.h"
  25 #include "qemu/timer.h"
  26 #include "qemu/option.h"
  27 #include "qemu/option_int.h"
  28 #include "qemu/config-file.h"
  29 #include "qapi/qmp/qdict.h"
  30 #include "hw/virtio/virtio-net.h"
  31 #include "net/vhost_net.h"
  32 #include "net/announce.h"
  33 #include "hw/virtio/virtio-bus.h"
  34 #include "qapi/error.h"
  35 #include "qapi/qapi-events-net.h"
  36 #include "hw/qdev-properties.h"
  37 #include "qapi/qapi-types-migration.h"
  38 #include "qapi/qapi-events-migration.h"
  39 #include "hw/virtio/virtio-access.h"
  40 #include "migration/misc.h"
  41 #include "standard-headers/linux/ethtool.h"
  42 #include "sysemu/sysemu.h"
  43 #include "trace.h"
  44 #include "monitor/qdev.h"
  45 #include "hw/pci/pci.h"
  46 #include "net_rx_pkt.h"
  47 #include "hw/virtio/vhost.h"
  48 #include "sysemu/qtest.h"
  49
  50 #define VIRTIO_NET_VM_VERSION    11
  51
  52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54 /* previously fixed value */
  55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64 #define VIRTIO_NET_TCP_FLAG         0x3F
  65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67 /* IPv4 max payload, 16 bits in the header */
  68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71 /* header length value in ip header without option */
  72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77 /* Purge coalesced packets timer interval, This value affects the performance
  78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80    tso/gso/gro 'off'. */
  81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93 static const VirtIOFeature feature_sizes[] = {
  94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95      .end = endof(struct virtio_net_config, mac)},
  96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97      .end = endof(struct virtio_net_config, status)},
  98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101      .end = endof(struct virtio_net_config, mtu)},
 102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103      .end = endof(struct virtio_net_config, duplex)},
 104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105      .end = endof(struct virtio_net_config, supported_hash_types)},
 106     {}
 107 };
 108
 109 static const VirtIOConfigSizeParams cfg_size_params = {
 110     .min_size = endof(struct virtio_net_config, mac),
 111     .max_size = sizeof(struct virtio_net_config),
 112     .feature_sizes = feature_sizes
 113 };
 114
 115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 116 {
 117     VirtIONet *n = qemu_get_nic_opaque(nc);
 118
 119     return &n->vqs[nc->queue_index];
 120 }
 121
 122 static int vq2q(int queue_index)
 123 {
 124     return queue_index / 2;
 125 }
 126
 127 static void flush_or_purge_queued_packets(NetClientState *nc)
 128 {
 129     if (!nc->peer) {
 130         return;
 131     }
 132
 133     qemu_flush_or_purge_queued_packets(nc->peer, true);
 134     assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 135 }
 136
 137 /* TODO
 138  * - we could suppress RX interrupt if we were so inclined.
 139  */
 140
 141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 142 {
 143     VirtIONet *n = VIRTIO_NET(vdev);
 144     struct virtio_net_config netcfg;
 145     NetClientState *nc = qemu_get_queue(n->nic);
 146     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 147
 148     int ret = 0;
 149     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 150     virtio_stw_p(vdev, &netcfg.status, n->status);
 151     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 152     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 153     memcpy(netcfg.mac, n->mac, ETH_ALEN);
 154     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 155     netcfg.duplex = n->net_conf.duplex;
 156     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 157     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 158                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 159                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 160     virtio_stl_p(vdev, &netcfg.supported_hash_types,
 161                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
 162     memcpy(config, &netcfg, n->config_size);
 163
 164     /*
 165      * Is this VDPA? No peer means not VDPA: there's no way to
 166      * disconnect/reconnect a VDPA peer.
 167      */
 168     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 169         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 170                                    n->config_size);
 171         if (ret == -1) {
 172             return;
 173         }
 174
 175         /*
 176          * Some NIC/kernel combinations present 0 as the mac address.  As that
 177          * is not a legal address, try to proceed with the address from the
 178          * QEMU command line in the hope that the address has been configured
 179          * correctly elsewhere - just not reported by the device.
 180          */
 181         if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 182             info_report("Zero hardware mac address detected. Ignoring.");
 183             memcpy(netcfg.mac, n->mac, ETH_ALEN);
 184         }
 185
 186         netcfg.status |= virtio_tswap16(vdev,
 187                                         n->status & VIRTIO_NET_S_ANNOUNCE);
 188         memcpy(config, &netcfg, n->config_size);
 189     }
 190 }
 191
 192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 193 {
 194     VirtIONet *n = VIRTIO_NET(vdev);
 195     struct virtio_net_config netcfg = {};
 196     NetClientState *nc = qemu_get_queue(n->nic);
 197
 198     memcpy(&netcfg, config, n->config_size);
 199
 200     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 201         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 202         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 203         memcpy(n->mac, netcfg.mac, ETH_ALEN);
 204         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 205     }
 206
 207     /*
 208      * Is this VDPA? No peer means not VDPA: there's no way to
 209      * disconnect/reconnect a VDPA peer.
 210      */
 211     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 212         vhost_net_set_config(get_vhost_net(nc->peer),
 213                              (uint8_t *)&netcfg, 0, n->config_size,
 214                              VHOST_SET_CONFIG_TYPE_MASTER);
 215       }
 216 }
 217
 218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 219 {
 220     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 221     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 222         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 223 }
 224
 225 static void virtio_net_announce_notify(VirtIONet *net)
 226 {
 227     VirtIODevice *vdev = VIRTIO_DEVICE(net);
 228     trace_virtio_net_announce_notify();
 229
 230     net->status |= VIRTIO_NET_S_ANNOUNCE;
 231     virtio_notify_config(vdev);
 232 }
 233
 234 static void virtio_net_announce_timer(void *opaque)
 235 {
 236     VirtIONet *n = opaque;
 237     trace_virtio_net_announce_timer(n->announce_timer.round);
 238
 239     n->announce_timer.round--;
 240     virtio_net_announce_notify(n);
 241 }
 242
 243 static void virtio_net_announce(NetClientState *nc)
 244 {
 245     VirtIONet *n = qemu_get_nic_opaque(nc);
 246     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 247
 248     /*
 249      * Make sure the virtio migration announcement timer isn't running
 250      * If it is, let it trigger announcement so that we do not cause
 251      * confusion.
 252      */
 253     if (n->announce_timer.round) {
 254         return;
 255     }
 256
 257     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 258         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 259             virtio_net_announce_notify(n);
 260     }
 261 }
 262
 263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 264 {
 265     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 266     NetClientState *nc = qemu_get_queue(n->nic);
 267     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 268     int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
 269               n->max_ncs - n->max_queue_pairs : 0;
 270
 271     if (!get_vhost_net(nc->peer)) {
 272         return;
 273     }
 274
 275     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 276         !!n->vhost_started) {
 277         return;
 278     }
 279     if (!n->vhost_started) {
 280         int r, i;
 281
 282         if (n->needs_vnet_hdr_swap) {
 283             error_report("backend does not support %s vnet headers; "
 284                          "falling back on userspace virtio",
 285                          virtio_is_big_endian(vdev) ? "BE" : "LE");
 286             return;
 287         }
 288
 289         /* Any packets outstanding? Purge them to avoid touching rings
 290          * when vhost is running.
 291          */
 292         for (i = 0;  i < queue_pairs; i++) {
 293             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 294
 295             /* Purge both directions: TX and RX. */
 296             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 297             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 298         }
 299
 300         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 301             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 302             if (r < 0) {
 303                 error_report("%uBytes MTU not supported by the backend",
 304                              n->net_conf.mtu);
 305
 306                 return;
 307             }
 308         }
 309
 310         n->vhost_started = 1;
 311         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 312         if (r < 0) {
 313             error_report("unable to start vhost net: %d: "
 314                          "falling back on userspace virtio", -r);
 315             n->vhost_started = 0;
 316         }
 317     } else {
 318         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 319         n->vhost_started = 0;
 320     }
 321 }
 322
 323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 324                                           NetClientState *peer,
 325                                           bool enable)
 326 {
 327     if (virtio_is_big_endian(vdev)) {
 328         return qemu_set_vnet_be(peer, enable);
 329     } else {
 330         return qemu_set_vnet_le(peer, enable);
 331     }
 332 }
 333
 334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 335                                        int queue_pairs, bool enable)
 336 {
 337     int i;
 338
 339     for (i = 0; i < queue_pairs; i++) {
 340         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 341             enable) {
 342             while (--i >= 0) {
 343                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 344             }
 345
 346             return true;
 347         }
 348     }
 349
 350     return false;
 351 }
 352
 353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 354 {
 355     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 356     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 357
 358     if (virtio_net_started(n, status)) {
 359         /* Before using the device, we tell the network backend about the
 360          * endianness to use when parsing vnet headers. If the backend
 361          * can't do it, we fallback onto fixing the headers in the core
 362          * virtio-net code.
 363          */
 364         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 365                                                             queue_pairs, true);
 366     } else if (virtio_net_started(n, vdev->status)) {
 367         /* After using the device, we need to reset the network backend to
 368          * the default (guest native endianness), otherwise the guest may
 369          * lose network connectivity if it is rebooted into a different
 370          * endianness.
 371          */
 372         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 373     }
 374 }
 375
 376 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 377 {
 378     unsigned int dropped = virtqueue_drop_all(vq);
 379     if (dropped) {
 380         virtio_notify(vdev, vq);
 381     }
 382 }
 383
 384 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 385 {
 386     VirtIONet *n = VIRTIO_NET(vdev);
 387     VirtIONetQueue *q;
 388     int i;
 389     uint8_t queue_status;
 390
 391     virtio_net_vnet_endian_status(n, status);
 392     virtio_net_vhost_status(n, status);
 393
 394     for (i = 0; i < n->max_queue_pairs; i++) {
 395         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 396         bool queue_started;
 397         q = &n->vqs[i];
 398
 399         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 400             queue_status = 0;
 401         } else {
 402             queue_status = status;
 403         }
 404         queue_started =
 405             virtio_net_started(n, queue_status) && !n->vhost_started;
 406
 407         if (queue_started) {
 408             qemu_flush_queued_packets(ncs);
 409         }
 410
 411         if (!q->tx_waiting) {
 412             continue;
 413         }
 414
 415         if (queue_started) {
 416             if (q->tx_timer) {
 417                 timer_mod(q->tx_timer,
 418                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 419             } else {
 420                 qemu_bh_schedule(q->tx_bh);
 421             }
 422         } else {
 423             if (q->tx_timer) {
 424                 timer_del(q->tx_timer);
 425             } else {
 426                 qemu_bh_cancel(q->tx_bh);
 427             }
 428             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 429                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 430                 vdev->vm_running) {
 431                 /* if tx is waiting we are likely have some packets in tx queue
 432                  * and disabled notification */
 433                 q->tx_waiting = 0;
 434                 virtio_queue_set_notification(q->tx_vq, 1);
 435                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 436             }
 437         }
 438     }
 439 }
 440
 441 static void virtio_net_set_link_status(NetClientState *nc)
 442 {
 443     VirtIONet *n = qemu_get_nic_opaque(nc);
 444     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 445     uint16_t old_status = n->status;
 446
 447     if (nc->link_down)
 448         n->status &= ~VIRTIO_NET_S_LINK_UP;
 449     else
 450         n->status |= VIRTIO_NET_S_LINK_UP;
 451
 452     if (n->status != old_status)
 453         virtio_notify_config(vdev);
 454
 455     virtio_net_set_status(vdev, vdev->status);
 456 }
 457
 458 static void rxfilter_notify(NetClientState *nc)
 459 {
 460     VirtIONet *n = qemu_get_nic_opaque(nc);
 461
 462     if (nc->rxfilter_notify_enabled) {
 463         char *path = object_get_canonical_path(OBJECT(n->qdev));
 464         qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
 465         g_free(path);
 466
 467         /* disable event notification to avoid events flooding */
 468         nc->rxfilter_notify_enabled = 0;
 469     }
 470 }
 471
 472 static intList *get_vlan_table(VirtIONet *n)
 473 {
 474     intList *list;
 475     int i, j;
 476
 477     list = NULL;
 478     for (i = 0; i < MAX_VLAN >> 5; i++) {
 479         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 480             if (n->vlans[i] & (1U << j)) {
 481                 QAPI_LIST_PREPEND(list, (i << 5) + j);
 482             }
 483         }
 484     }
 485
 486     return list;
 487 }
 488
 489 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 490 {
 491     VirtIONet *n = qemu_get_nic_opaque(nc);
 492     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 493     RxFilterInfo *info;
 494     strList *str_list;
 495     int i;
 496
 497     info = g_malloc0(sizeof(*info));
 498     info->name = g_strdup(nc->name);
 499     info->promiscuous = n->promisc;
 500
 501     if (n->nouni) {
 502         info->unicast = RX_STATE_NONE;
 503     } else if (n->alluni) {
 504         info->unicast = RX_STATE_ALL;
 505     } else {
 506         info->unicast = RX_STATE_NORMAL;
 507     }
 508
 509     if (n->nomulti) {
 510         info->multicast = RX_STATE_NONE;
 511     } else if (n->allmulti) {
 512         info->multicast = RX_STATE_ALL;
 513     } else {
 514         info->multicast = RX_STATE_NORMAL;
 515     }
 516
 517     info->broadcast_allowed = n->nobcast;
 518     info->multicast_overflow = n->mac_table.multi_overflow;
 519     info->unicast_overflow = n->mac_table.uni_overflow;
 520
 521     info->main_mac = qemu_mac_strdup_printf(n->mac);
 522
 523     str_list = NULL;
 524     for (i = 0; i < n->mac_table.first_multi; i++) {
 525         QAPI_LIST_PREPEND(str_list,
 526                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 527     }
 528     info->unicast_table = str_list;
 529
 530     str_list = NULL;
 531     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 532         QAPI_LIST_PREPEND(str_list,
 533                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 534     }
 535     info->multicast_table = str_list;
 536     info->vlan_table = get_vlan_table(n);
 537
 538     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 539         info->vlan = RX_STATE_ALL;
 540     } else if (!info->vlan_table) {
 541         info->vlan = RX_STATE_NONE;
 542     } else {
 543         info->vlan = RX_STATE_NORMAL;
 544     }
 545
 546     /* enable event notification after query */
 547     nc->rxfilter_notify_enabled = 1;
 548
 549     return info;
 550 }
 551
 552 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
 553 {
 554     VirtIONet *n = VIRTIO_NET(vdev);
 555     NetClientState *nc;
 556
 557     /* validate queue_index and skip for cvq */
 558     if (queue_index >= n->max_queue_pairs * 2) {
 559         return;
 560     }
 561
 562     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 563
 564     if (!nc->peer) {
 565         return;
 566     }
 567
 568     if (get_vhost_net(nc->peer) &&
 569         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 570         vhost_net_virtqueue_reset(vdev, nc, queue_index);
 571     }
 572
 573     flush_or_purge_queued_packets(nc);
 574 }
 575
 576 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
 577 {
 578     VirtIONet *n = VIRTIO_NET(vdev);
 579     NetClientState *nc;
 580     int r;
 581
 582     /* validate queue_index and skip for cvq */
 583     if (queue_index >= n->max_queue_pairs * 2) {
 584         return;
 585     }
 586
 587     nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
 588
 589     if (!nc->peer || !vdev->vhost_started) {
 590         return;
 591     }
 592
 593     if (get_vhost_net(nc->peer) &&
 594         nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
 595         r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
 596         if (r < 0) {
 597             error_report("unable to restart vhost net virtqueue: %d, "
 598                             "when resetting the queue", queue_index);
 599         }
 600     }
 601 }
 602
 603 static void virtio_net_reset(VirtIODevice *vdev)
 604 {
 605     VirtIONet *n = VIRTIO_NET(vdev);
 606     int i;
 607
 608     /* Reset back to compatibility mode */
 609     n->promisc = 1;
 610     n->allmulti = 0;
 611     n->alluni = 0;
 612     n->nomulti = 0;
 613     n->nouni = 0;
 614     n->nobcast = 0;
 615     /* multiqueue is disabled by default */
 616     n->curr_queue_pairs = 1;
 617     timer_del(n->announce_timer.tm);
 618     n->announce_timer.round = 0;
 619     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 620
 621     /* Flush any MAC and VLAN filter table state */
 622     n->mac_table.in_use = 0;
 623     n->mac_table.first_multi = 0;
 624     n->mac_table.multi_overflow = 0;
 625     n->mac_table.uni_overflow = 0;
 626     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 627     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 628     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 629     memset(n->vlans, 0, MAX_VLAN >> 3);
 630
 631     /* Flush any async TX */
 632     for (i = 0;  i < n->max_queue_pairs; i++) {
 633         flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
 634     }
 635 }
 636
 637 static void peer_test_vnet_hdr(VirtIONet *n)
 638 {
 639     NetClientState *nc = qemu_get_queue(n->nic);
 640     if (!nc->peer) {
 641         return;
 642     }
 643
 644     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 645 }
 646
 647 static int peer_has_vnet_hdr(VirtIONet *n)
 648 {
 649     return n->has_vnet_hdr;
 650 }
 651
 652 static int peer_has_ufo(VirtIONet *n)
 653 {
 654     if (!peer_has_vnet_hdr(n))
 655         return 0;
 656
 657     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 658
 659     return n->has_ufo;
 660 }
 661
 662 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 663                                        int version_1, int hash_report)
 664 {
 665     int i;
 666     NetClientState *nc;
 667
 668     n->mergeable_rx_bufs = mergeable_rx_bufs;
 669
 670     if (version_1) {
 671         n->guest_hdr_len = hash_report ?
 672             sizeof(struct virtio_net_hdr_v1_hash) :
 673             sizeof(struct virtio_net_hdr_mrg_rxbuf);
 674         n->rss_data.populate_hash = !!hash_report;
 675     } else {
 676         n->guest_hdr_len = n->mergeable_rx_bufs ?
 677             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 678             sizeof(struct virtio_net_hdr);
 679     }
 680
 681     for (i = 0; i < n->max_queue_pairs; i++) {
 682         nc = qemu_get_subqueue(n->nic, i);
 683
 684         if (peer_has_vnet_hdr(n) &&
 685             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 686             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 687             n->host_hdr_len = n->guest_hdr_len;
 688         }
 689     }
 690 }
 691
 692 static int virtio_net_max_tx_queue_size(VirtIONet *n)
 693 {
 694     NetClientState *peer = n->nic_conf.peers.ncs[0];
 695
 696     /*
 697      * Backends other than vhost-user or vhost-vdpa don't support max queue
 698      * size.
 699      */
 700     if (!peer) {
 701         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 702     }
 703
 704     switch(peer->info->type) {
 705     case NET_CLIENT_DRIVER_VHOST_USER:
 706     case NET_CLIENT_DRIVER_VHOST_VDPA:
 707         return VIRTQUEUE_MAX_SIZE;
 708     default:
 709         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 710     };
 711 }
 712
 713 static int peer_attach(VirtIONet *n, int index)
 714 {
 715     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 716
 717     if (!nc->peer) {
 718         return 0;
 719     }
 720
 721     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 722         vhost_set_vring_enable(nc->peer, 1);
 723     }
 724
 725     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 726         return 0;
 727     }
 728
 729     if (n->max_queue_pairs == 1) {
 730         return 0;
 731     }
 732
 733     return tap_enable(nc->peer);
 734 }
 735
 736 static int peer_detach(VirtIONet *n, int index)
 737 {
 738     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 739
 740     if (!nc->peer) {
 741         return 0;
 742     }
 743
 744     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 745         vhost_set_vring_enable(nc->peer, 0);
 746     }
 747
 748     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 749         return 0;
 750     }
 751
 752     return tap_disable(nc->peer);
 753 }
 754
 755 static void virtio_net_set_queue_pairs(VirtIONet *n)
 756 {
 757     int i;
 758     int r;
 759
 760     if (n->nic->peer_deleted) {
 761         return;
 762     }
 763
 764     for (i = 0; i < n->max_queue_pairs; i++) {
 765         if (i < n->curr_queue_pairs) {
 766             r = peer_attach(n, i);
 767             assert(!r);
 768         } else {
 769             r = peer_detach(n, i);
 770             assert(!r);
 771         }
 772     }
 773 }
 774
 775 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 776
 777 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 778                                         Error **errp)
 779 {
 780     VirtIONet *n = VIRTIO_NET(vdev);
 781     NetClientState *nc = qemu_get_queue(n->nic);
 782
 783     /* Firstly sync all virtio-net possible supported features */
 784     features |= n->host_features;
 785
 786     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 787
 788     if (!peer_has_vnet_hdr(n)) {
 789         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 790         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 791         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 792         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 793
 794         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 795         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 796         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 797         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 798
 799         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 800     }
 801
 802     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 803         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 804         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 805     }
 806
 807     if (!get_vhost_net(nc->peer)) {
 808         virtio_add_feature(&features, VIRTIO_F_RING_RESET);
 809         return features;
 810     }
 811
 812     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 813         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 814     }
 815     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 816     vdev->backend_features = features;
 817
 818     if (n->mtu_bypass_backend &&
 819             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 820         features |= (1ULL << VIRTIO_NET_F_MTU);
 821     }
 822
 823     return features;
 824 }
 825
 826 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 827 {
 828     uint64_t features = 0;
 829
 830     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 831      * but also these: */
 832     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 833     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 834     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 835     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 836     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 837
 838     return features;
 839 }
 840
 841 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 842 {
 843     qemu_set_offload(qemu_get_queue(n->nic)->peer,
 844             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 845             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 846             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 847             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 848             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 849 }
 850
 851 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 852 {
 853     static const uint64_t guest_offloads_mask =
 854         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 855         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 856         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 857         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 858         (1ULL << VIRTIO_NET_F_GUEST_UFO);
 859
 860     return guest_offloads_mask & features;
 861 }
 862
 863 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 864 {
 865     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 866     return virtio_net_guest_offloads_by_features(vdev->guest_features);
 867 }
 868
 869 typedef struct {
 870     VirtIONet *n;
 871     DeviceState *dev;
 872 } FailoverDevice;
 873
 874 /**
 875  * Set the failover primary device
 876  *
 877  * @opaque: FailoverId to setup
 878  * @opts: opts for device we are handling
 879  * @errp: returns an error if this function fails
 880  */
 881 static int failover_set_primary(DeviceState *dev, void *opaque)
 882 {
 883     FailoverDevice *fdev = opaque;
 884     PCIDevice *pci_dev = (PCIDevice *)
 885         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 886
 887     if (!pci_dev) {
 888         return 0;
 889     }
 890
 891     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 892         fdev->dev = dev;
 893         return 1;
 894     }
 895
 896     return 0;
 897 }
 898
 899 /**
 900  * Find the primary device for this failover virtio-net
 901  *
 902  * @n: VirtIONet device
 903  * @errp: returns an error if this function fails
 904  */
 905 static DeviceState *failover_find_primary_device(VirtIONet *n)
 906 {
 907     FailoverDevice fdev = {
 908         .n = n,
 909     };
 910
 911     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 912                        NULL, NULL, &fdev);
 913     return fdev.dev;
 914 }
 915
 916 static void failover_add_primary(VirtIONet *n, Error **errp)
 917 {
 918     Error *err = NULL;
 919     DeviceState *dev = failover_find_primary_device(n);
 920
 921     if (dev) {
 922         return;
 923     }
 924
 925     if (!n->primary_opts) {
 926         error_setg(errp, "Primary device not found");
 927         error_append_hint(errp, "Virtio-net failover will not work. Make "
 928                           "sure primary device has parameter"
 929                           " failover_pair_id=%s\n", n->netclient_name);
 930         return;
 931     }
 932
 933     dev = qdev_device_add_from_qdict(n->primary_opts,
 934                                      n->primary_opts_from_json,
 935                                      &err);
 936     if (err) {
 937         qobject_unref(n->primary_opts);
 938         n->primary_opts = NULL;
 939     } else {
 940         object_unref(OBJECT(dev));
 941     }
 942     error_propagate(errp, err);
 943 }
 944
 945 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 946 {
 947     VirtIONet *n = VIRTIO_NET(vdev);
 948     Error *err = NULL;
 949     int i;
 950
 951     if (n->mtu_bypass_backend &&
 952             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 953         features &= ~(1ULL << VIRTIO_NET_F_MTU);
 954     }
 955
 956     virtio_net_set_multiqueue(n,
 957                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 958                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
 959
 960     virtio_net_set_mrg_rx_bufs(n,
 961                                virtio_has_feature(features,
 962                                                   VIRTIO_NET_F_MRG_RXBUF),
 963                                virtio_has_feature(features,
 964                                                   VIRTIO_F_VERSION_1),
 965                                virtio_has_feature(features,
 966                                                   VIRTIO_NET_F_HASH_REPORT));
 967
 968     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 969         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 970     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 971         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 972     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 973
 974     if (n->has_vnet_hdr) {
 975         n->curr_guest_offloads =
 976             virtio_net_guest_offloads_by_features(features);
 977         virtio_net_apply_guest_offloads(n);
 978     }
 979
 980     for (i = 0;  i < n->max_queue_pairs; i++) {
 981         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 982
 983         if (!get_vhost_net(nc->peer)) {
 984             continue;
 985         }
 986         vhost_net_ack_features(get_vhost_net(nc->peer), features);
 987
 988         /*
 989          * keep acked_features in NetVhostUserState up-to-date so it
 990          * can't miss any features configured by guest virtio driver.
 991          */
 992         vhost_net_save_acked_features(nc->peer);
 993     }
 994
 995     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 996         memset(n->vlans, 0, MAX_VLAN >> 3);
 997     } else {
 998         memset(n->vlans, 0xff, MAX_VLAN >> 3);
 999     }
1000
1001     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1002         qapi_event_send_failover_negotiated(n->netclient_name);
1003         qatomic_set(&n->failover_primary_hidden, false);
1004         failover_add_primary(n, &err);
1005         if (err) {
1006             if (!qtest_enabled()) {
1007                 warn_report_err(err);
1008             } else {
1009                 error_free(err);
1010             }
1011         }
1012     }
1013 }
1014
1015 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1016                                      struct iovec *iov, unsigned int iov_cnt)
1017 {
1018     uint8_t on;
1019     size_t s;
1020     NetClientState *nc = qemu_get_queue(n->nic);
1021
1022     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1023     if (s != sizeof(on)) {
1024         return VIRTIO_NET_ERR;
1025     }
1026
1027     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1028         n->promisc = on;
1029     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1030         n->allmulti = on;
1031     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1032         n->alluni = on;
1033     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1034         n->nomulti = on;
1035     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1036         n->nouni = on;
1037     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1038         n->nobcast = on;
1039     } else {
1040         return VIRTIO_NET_ERR;
1041     }
1042
1043     rxfilter_notify(nc);
1044
1045     return VIRTIO_NET_OK;
1046 }
1047
1048 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1049                                      struct iovec *iov, unsigned int iov_cnt)
1050 {
1051     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1052     uint64_t offloads;
1053     size_t s;
1054
1055     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1056         return VIRTIO_NET_ERR;
1057     }
1058
1059     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1060     if (s != sizeof(offloads)) {
1061         return VIRTIO_NET_ERR;
1062     }
1063
1064     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1065         uint64_t supported_offloads;
1066
1067         offloads = virtio_ldq_p(vdev, &offloads);
1068
1069         if (!n->has_vnet_hdr) {
1070             return VIRTIO_NET_ERR;
1071         }
1072
1073         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1074             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1075         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1076             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1077         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1078
1079         supported_offloads = virtio_net_supported_guest_offloads(n);
1080         if (offloads & ~supported_offloads) {
1081             return VIRTIO_NET_ERR;
1082         }
1083
1084         n->curr_guest_offloads = offloads;
1085         virtio_net_apply_guest_offloads(n);
1086
1087         return VIRTIO_NET_OK;
1088     } else {
1089         return VIRTIO_NET_ERR;
1090     }
1091 }
1092
1093 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1094                                  struct iovec *iov, unsigned int iov_cnt)
1095 {
1096     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1097     struct virtio_net_ctrl_mac mac_data;
1098     size_t s;
1099     NetClientState *nc = qemu_get_queue(n->nic);
1100
1101     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1102         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1103             return VIRTIO_NET_ERR;
1104         }
1105         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1106         assert(s == sizeof(n->mac));
1107         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1108         rxfilter_notify(nc);
1109
1110         return VIRTIO_NET_OK;
1111     }
1112
1113     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1114         return VIRTIO_NET_ERR;
1115     }
1116
1117     int in_use = 0;
1118     int first_multi = 0;
1119     uint8_t uni_overflow = 0;
1120     uint8_t multi_overflow = 0;
1121     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1122
1123     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1124                    sizeof(mac_data.entries));
1125     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1126     if (s != sizeof(mac_data.entries)) {
1127         goto error;
1128     }
1129     iov_discard_front(&iov, &iov_cnt, s);
1130
1131     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1132         goto error;
1133     }
1134
1135     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1136         s = iov_to_buf(iov, iov_cnt, 0, macs,
1137                        mac_data.entries * ETH_ALEN);
1138         if (s != mac_data.entries * ETH_ALEN) {
1139             goto error;
1140         }
1141         in_use += mac_data.entries;
1142     } else {
1143         uni_overflow = 1;
1144     }
1145
1146     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1147
1148     first_multi = in_use;
1149
1150     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1151                    sizeof(mac_data.entries));
1152     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1153     if (s != sizeof(mac_data.entries)) {
1154         goto error;
1155     }
1156
1157     iov_discard_front(&iov, &iov_cnt, s);
1158
1159     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1160         goto error;
1161     }
1162
1163     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1164         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1165                        mac_data.entries * ETH_ALEN);
1166         if (s != mac_data.entries * ETH_ALEN) {
1167             goto error;
1168         }
1169         in_use += mac_data.entries;
1170     } else {
1171         multi_overflow = 1;
1172     }
1173
1174     n->mac_table.in_use = in_use;
1175     n->mac_table.first_multi = first_multi;
1176     n->mac_table.uni_overflow = uni_overflow;
1177     n->mac_table.multi_overflow = multi_overflow;
1178     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1179     g_free(macs);
1180     rxfilter_notify(nc);
1181
1182     return VIRTIO_NET_OK;
1183
1184 error:
1185     g_free(macs);
1186     return VIRTIO_NET_ERR;
1187 }
1188
1189 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1190                                         struct iovec *iov, unsigned int iov_cnt)
1191 {
1192     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1193     uint16_t vid;
1194     size_t s;
1195     NetClientState *nc = qemu_get_queue(n->nic);
1196
1197     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1198     vid = virtio_lduw_p(vdev, &vid);
1199     if (s != sizeof(vid)) {
1200         return VIRTIO_NET_ERR;
1201     }
1202
1203     if (vid >= MAX_VLAN)
1204         return VIRTIO_NET_ERR;
1205
1206     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1207         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1208     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1209         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1210     else
1211         return VIRTIO_NET_ERR;
1212
1213     rxfilter_notify(nc);
1214
1215     return VIRTIO_NET_OK;
1216 }
1217
1218 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1219                                       struct iovec *iov, unsigned int iov_cnt)
1220 {
1221     trace_virtio_net_handle_announce(n->announce_timer.round);
1222     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1223         n->status & VIRTIO_NET_S_ANNOUNCE) {
1224         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1225         if (n->announce_timer.round) {
1226             qemu_announce_timer_step(&n->announce_timer);
1227         }
1228         return VIRTIO_NET_OK;
1229     } else {
1230         return VIRTIO_NET_ERR;
1231     }
1232 }
1233
1234 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1235
1236 static void virtio_net_disable_rss(VirtIONet *n)
1237 {
1238     if (n->rss_data.enabled) {
1239         trace_virtio_net_rss_disable();
1240     }
1241     n->rss_data.enabled = false;
1242
1243     virtio_net_detach_epbf_rss(n);
1244 }
1245
1246 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1247 {
1248     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1249     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1250         return false;
1251     }
1252
1253     return nc->info->set_steering_ebpf(nc, prog_fd);
1254 }
1255
1256 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1257                                    struct EBPFRSSConfig *config)
1258 {
1259     config->redirect = data->redirect;
1260     config->populate_hash = data->populate_hash;
1261     config->hash_types = data->hash_types;
1262     config->indirections_len = data->indirections_len;
1263     config->default_queue = data->default_queue;
1264 }
1265
1266 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1267 {
1268     struct EBPFRSSConfig config = {};
1269
1270     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1271         return false;
1272     }
1273
1274     rss_data_to_rss_config(&n->rss_data, &config);
1275
1276     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1277                           n->rss_data.indirections_table, n->rss_data.key)) {
1278         return false;
1279     }
1280
1281     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1282         return false;
1283     }
1284
1285     return true;
1286 }
1287
1288 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1289 {
1290     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1291 }
1292
1293 static bool virtio_net_load_ebpf(VirtIONet *n)
1294 {
1295     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1296         /* backend does't support steering ebpf */
1297         return false;
1298     }
1299
1300     return ebpf_rss_load(&n->ebpf_rss);
1301 }
1302
1303 static void virtio_net_unload_ebpf(VirtIONet *n)
1304 {
1305     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1306     ebpf_rss_unload(&n->ebpf_rss);
1307 }
1308
1309 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1310                                       struct iovec *iov,
1311                                       unsigned int iov_cnt,
1312                                       bool do_rss)
1313 {
1314     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1315     struct virtio_net_rss_config cfg;
1316     size_t s, offset = 0, size_get;
1317     uint16_t queue_pairs, i;
1318     struct {
1319         uint16_t us;
1320         uint8_t b;
1321     } QEMU_PACKED temp;
1322     const char *err_msg = "";
1323     uint32_t err_value = 0;
1324
1325     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1326         err_msg = "RSS is not negotiated";
1327         goto error;
1328     }
1329     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1330         err_msg = "Hash report is not negotiated";
1331         goto error;
1332     }
1333     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1334     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1335     if (s != size_get) {
1336         err_msg = "Short command buffer";
1337         err_value = (uint32_t)s;
1338         goto error;
1339     }
1340     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1341     n->rss_data.indirections_len =
1342         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1343     n->rss_data.indirections_len++;
1344     if (!do_rss) {
1345         n->rss_data.indirections_len = 1;
1346     }
1347     if (!is_power_of_2(n->rss_data.indirections_len)) {
1348         err_msg = "Invalid size of indirection table";
1349         err_value = n->rss_data.indirections_len;
1350         goto error;
1351     }
1352     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1353         err_msg = "Too large indirection table";
1354         err_value = n->rss_data.indirections_len;
1355         goto error;
1356     }
1357     n->rss_data.default_queue = do_rss ?
1358         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1359     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1360         err_msg = "Invalid default queue";
1361         err_value = n->rss_data.default_queue;
1362         goto error;
1363     }
1364     offset += size_get;
1365     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1366     g_free(n->rss_data.indirections_table);
1367     n->rss_data.indirections_table = g_malloc(size_get);
1368     if (!n->rss_data.indirections_table) {
1369         err_msg = "Can't allocate indirections table";
1370         err_value = n->rss_data.indirections_len;
1371         goto error;
1372     }
1373     s = iov_to_buf(iov, iov_cnt, offset,
1374                    n->rss_data.indirections_table, size_get);
1375     if (s != size_get) {
1376         err_msg = "Short indirection table buffer";
1377         err_value = (uint32_t)s;
1378         goto error;
1379     }
1380     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1381         uint16_t val = n->rss_data.indirections_table[i];
1382         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1383     }
1384     offset += size_get;
1385     size_get = sizeof(temp);
1386     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1387     if (s != size_get) {
1388         err_msg = "Can't get queue_pairs";
1389         err_value = (uint32_t)s;
1390         goto error;
1391     }
1392     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1393     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1394         err_msg = "Invalid number of queue_pairs";
1395         err_value = queue_pairs;
1396         goto error;
1397     }
1398     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1399         err_msg = "Invalid key size";
1400         err_value = temp.b;
1401         goto error;
1402     }
1403     if (!temp.b && n->rss_data.hash_types) {
1404         err_msg = "No key provided";
1405         err_value = 0;
1406         goto error;
1407     }
1408     if (!temp.b && !n->rss_data.hash_types) {
1409         virtio_net_disable_rss(n);
1410         return queue_pairs;
1411     }
1412     offset += size_get;
1413     size_get = temp.b;
1414     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1415     if (s != size_get) {
1416         err_msg = "Can get key buffer";
1417         err_value = (uint32_t)s;
1418         goto error;
1419     }
1420     n->rss_data.enabled = true;
1421
1422     if (!n->rss_data.populate_hash) {
1423         if (!virtio_net_attach_epbf_rss(n)) {
1424             /* EBPF must be loaded for vhost */
1425             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1426                 warn_report("Can't load eBPF RSS for vhost");
1427                 goto error;
1428             }
1429             /* fallback to software RSS */
1430             warn_report("Can't load eBPF RSS - fallback to software RSS");
1431             n->rss_data.enabled_software_rss = true;
1432         }
1433     } else {
1434         /* use software RSS for hash populating */
1435         /* and detach eBPF if was loaded before */
1436         virtio_net_detach_epbf_rss(n);
1437         n->rss_data.enabled_software_rss = true;
1438     }
1439
1440     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1441                                 n->rss_data.indirections_len,
1442                                 temp.b);
1443     return queue_pairs;
1444 error:
1445     trace_virtio_net_rss_error(err_msg, err_value);
1446     virtio_net_disable_rss(n);
1447     return 0;
1448 }
1449
1450 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1451                                 struct iovec *iov, unsigned int iov_cnt)
1452 {
1453     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1454     uint16_t queue_pairs;
1455     NetClientState *nc = qemu_get_queue(n->nic);
1456
1457     virtio_net_disable_rss(n);
1458     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1459         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1460         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1461     }
1462     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1463         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1464     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1465         struct virtio_net_ctrl_mq mq;
1466         size_t s;
1467         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1468             return VIRTIO_NET_ERR;
1469         }
1470         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1471         if (s != sizeof(mq)) {
1472             return VIRTIO_NET_ERR;
1473         }
1474         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1475
1476     } else {
1477         return VIRTIO_NET_ERR;
1478     }
1479
1480     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1481         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1482         queue_pairs > n->max_queue_pairs ||
1483         !n->multiqueue) {
1484         return VIRTIO_NET_ERR;
1485     }
1486
1487     n->curr_queue_pairs = queue_pairs;
1488     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1489         /*
1490          * Avoid updating the backend for a vdpa device: We're only interested
1491          * in updating the device model queues.
1492          */
1493         return VIRTIO_NET_OK;
1494     }
1495     /* stop the backend before changing the number of queue_pairs to avoid handling a
1496      * disabled queue */
1497     virtio_net_set_status(vdev, vdev->status);
1498     virtio_net_set_queue_pairs(n);
1499
1500     return VIRTIO_NET_OK;
1501 }
1502
1503 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1504                                   const struct iovec *in_sg, unsigned in_num,
1505                                   const struct iovec *out_sg,
1506                                   unsigned out_num)
1507 {
1508     VirtIONet *n = VIRTIO_NET(vdev);
1509     struct virtio_net_ctrl_hdr ctrl;
1510     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1511     size_t s;
1512     struct iovec *iov, *iov2;
1513
1514     if (iov_size(in_sg, in_num) < sizeof(status) ||
1515         iov_size(out_sg, out_num) < sizeof(ctrl)) {
1516         virtio_error(vdev, "virtio-net ctrl missing headers");
1517         return 0;
1518     }
1519
1520     iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1521     s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1522     iov_discard_front(&iov, &out_num, sizeof(ctrl));
1523     if (s != sizeof(ctrl)) {
1524         status = VIRTIO_NET_ERR;
1525     } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1526         status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1527     } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1528         status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1529     } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1530         status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1531     } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1532         status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1533     } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1534         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1535     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1536         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1537     }
1538
1539     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1540     assert(s == sizeof(status));
1541
1542     g_free(iov2);
1543     return sizeof(status);
1544 }
1545
1546 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1547 {
1548     VirtQueueElement *elem;
1549
1550     for (;;) {
1551         size_t written;
1552         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1553         if (!elem) {
1554             break;
1555         }
1556
1557         written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1558                                              elem->out_sg, elem->out_num);
1559         if (written > 0) {
1560             virtqueue_push(vq, elem, written);
1561             virtio_notify(vdev, vq);
1562             g_free(elem);
1563         } else {
1564             virtqueue_detach_element(vq, elem, 0);
1565             g_free(elem);
1566             break;
1567         }
1568     }
1569 }
1570
1571 /* RX */
1572
1573 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1574 {
1575     VirtIONet *n = VIRTIO_NET(vdev);
1576     int queue_index = vq2q(virtio_get_queue_index(vq));
1577
1578     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1579 }
1580
1581 static bool virtio_net_can_receive(NetClientState *nc)
1582 {
1583     VirtIONet *n = qemu_get_nic_opaque(nc);
1584     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1585     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1586
1587     if (!vdev->vm_running) {
1588         return false;
1589     }
1590
1591     if (nc->queue_index >= n->curr_queue_pairs) {
1592         return false;
1593     }
1594
1595     if (!virtio_queue_ready(q->rx_vq) ||
1596         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1597         return false;
1598     }
1599
1600     return true;
1601 }
1602
1603 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1604 {
1605     VirtIONet *n = q->n;
1606     if (virtio_queue_empty(q->rx_vq) ||
1607         (n->mergeable_rx_bufs &&
1608          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1609         virtio_queue_set_notification(q->rx_vq, 1);
1610
1611         /* To avoid a race condition where the guest has made some buffers
1612          * available after the above check but before notification was
1613          * enabled, check for available buffers again.
1614          */
1615         if (virtio_queue_empty(q->rx_vq) ||
1616             (n->mergeable_rx_bufs &&
1617              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1618             return 0;
1619         }
1620     }
1621
1622     virtio_queue_set_notification(q->rx_vq, 0);
1623     return 1;
1624 }
1625
1626 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1627 {
1628     virtio_tswap16s(vdev, &hdr->hdr_len);
1629     virtio_tswap16s(vdev, &hdr->gso_size);
1630     virtio_tswap16s(vdev, &hdr->csum_start);
1631     virtio_tswap16s(vdev, &hdr->csum_offset);
1632 }
1633
1634 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1635  * it never finds out that the packets don't have valid checksums.  This
1636  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1637  * fix this with Xen but it hasn't appeared in an upstream release of
1638  * dhclient yet.
1639  *
1640  * To avoid breaking existing guests, we catch udp packets and add
1641  * checksums.  This is terrible but it's better than hacking the guest
1642  * kernels.
1643  *
1644  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1645  * we should provide a mechanism to disable it to avoid polluting the host
1646  * cache.
1647  */
1648 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1649                                         uint8_t *buf, size_t size)
1650 {
1651     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1652         (size > 27 && size < 1500) && /* normal sized MTU */
1653         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1654         (buf[23] == 17) && /* ip.protocol == UDP */
1655         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1656         net_checksum_calculate(buf, size, CSUM_UDP);
1657         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1658     }
1659 }
1660
1661 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1662                            const void *buf, size_t size)
1663 {
1664     if (n->has_vnet_hdr) {
1665         /* FIXME this cast is evil */
1666         void *wbuf = (void *)buf;
1667         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1668                                     size - n->host_hdr_len);
1669
1670         if (n->needs_vnet_hdr_swap) {
1671             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1672         }
1673         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1674     } else {
1675         struct virtio_net_hdr hdr = {
1676             .flags = 0,
1677             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1678         };
1679         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1680     }
1681 }
1682
1683 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1684 {
1685     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1686     static const uint8_t vlan[] = {0x81, 0x00};
1687     uint8_t *ptr = (uint8_t *)buf;
1688     int i;
1689
1690     if (n->promisc)
1691         return 1;
1692
1693     ptr += n->host_hdr_len;
1694
1695     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1696         int vid = lduw_be_p(ptr + 14) & 0xfff;
1697         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1698             return 0;
1699     }
1700
1701     if (ptr[0] & 1) { // multicast
1702         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1703             return !n->nobcast;
1704         } else if (n->nomulti) {
1705             return 0;
1706         } else if (n->allmulti || n->mac_table.multi_overflow) {
1707             return 1;
1708         }
1709
1710         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1711             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1712                 return 1;
1713             }
1714         }
1715     } else { // unicast
1716         if (n->nouni) {
1717             return 0;
1718         } else if (n->alluni || n->mac_table.uni_overflow) {
1719             return 1;
1720         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1721             return 1;
1722         }
1723
1724         for (i = 0; i < n->mac_table.first_multi; i++) {
1725             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1726                 return 1;
1727             }
1728         }
1729     }
1730
1731     return 0;
1732 }
1733
1734 static uint8_t virtio_net_get_hash_type(bool isip4,
1735                                         bool isip6,
1736                                         bool isudp,
1737                                         bool istcp,
1738                                         uint32_t types)
1739 {
1740     if (isip4) {
1741         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1742             return NetPktRssIpV4Tcp;
1743         }
1744         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1745             return NetPktRssIpV4Udp;
1746         }
1747         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1748             return NetPktRssIpV4;
1749         }
1750     } else if (isip6) {
1751         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1752                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1753
1754         if (istcp && (types & mask)) {
1755             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1756                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1757         }
1758         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1759         if (isudp && (types & mask)) {
1760             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1761                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1762         }
1763         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1764         if (types & mask) {
1765             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1766                 NetPktRssIpV6Ex : NetPktRssIpV6;
1767         }
1768     }
1769     return 0xff;
1770 }
1771
1772 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1773                                    uint32_t hash)
1774 {
1775     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1776     hdr->hash_value = hash;
1777     hdr->hash_report = report;
1778 }
1779
1780 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1781                                   size_t size)
1782 {
1783     VirtIONet *n = qemu_get_nic_opaque(nc);
1784     unsigned int index = nc->queue_index, new_index = index;
1785     struct NetRxPkt *pkt = n->rx_pkt;
1786     uint8_t net_hash_type;
1787     uint32_t hash;
1788     bool isip4, isip6, isudp, istcp;
1789     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1790         VIRTIO_NET_HASH_REPORT_IPv4,
1791         VIRTIO_NET_HASH_REPORT_TCPv4,
1792         VIRTIO_NET_HASH_REPORT_TCPv6,
1793         VIRTIO_NET_HASH_REPORT_IPv6,
1794         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1795         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1796         VIRTIO_NET_HASH_REPORT_UDPv4,
1797         VIRTIO_NET_HASH_REPORT_UDPv6,
1798         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1799     };
1800
1801     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1802                              size - n->host_hdr_len);
1803     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1804     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1805         istcp = isudp = false;
1806     }
1807     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1808         istcp = isudp = false;
1809     }
1810     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1811                                              n->rss_data.hash_types);
1812     if (net_hash_type > NetPktRssIpV6UdpEx) {
1813         if (n->rss_data.populate_hash) {
1814             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1815         }
1816         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1817     }
1818
1819     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1820
1821     if (n->rss_data.populate_hash) {
1822         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1823     }
1824
1825     if (n->rss_data.redirect) {
1826         new_index = hash & (n->rss_data.indirections_len - 1);
1827         new_index = n->rss_data.indirections_table[new_index];
1828     }
1829
1830     return (index == new_index) ? -1 : new_index;
1831 }
1832
1833 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1834                                       size_t size, bool no_rss)
1835 {
1836     VirtIONet *n = qemu_get_nic_opaque(nc);
1837     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1838     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1839     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1840     size_t lens[VIRTQUEUE_MAX_SIZE];
1841     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1842     struct virtio_net_hdr_mrg_rxbuf mhdr;
1843     unsigned mhdr_cnt = 0;
1844     size_t offset, i, guest_offset, j;
1845     ssize_t err;
1846
1847     if (!virtio_net_can_receive(nc)) {
1848         return -1;
1849     }
1850
1851     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1852         int index = virtio_net_process_rss(nc, buf, size);
1853         if (index >= 0) {
1854             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1855             return virtio_net_receive_rcu(nc2, buf, size, true);
1856         }
1857     }
1858
1859     /* hdr_len refers to the header we supply to the guest */
1860     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1861         return 0;
1862     }
1863
1864     if (!receive_filter(n, buf, size))
1865         return size;
1866
1867     offset = i = 0;
1868
1869     while (offset < size) {
1870         VirtQueueElement *elem;
1871         int len, total;
1872         const struct iovec *sg;
1873
1874         total = 0;
1875
1876         if (i == VIRTQUEUE_MAX_SIZE) {
1877             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1878             err = size;
1879             goto err;
1880         }
1881
1882         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1883         if (!elem) {
1884             if (i) {
1885                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1886                              "i %zd mergeable %d offset %zd, size %zd, "
1887                              "guest hdr len %zd, host hdr len %zd "
1888                              "guest features 0x%" PRIx64,
1889                              i, n->mergeable_rx_bufs, offset, size,
1890                              n->guest_hdr_len, n->host_hdr_len,
1891                              vdev->guest_features);
1892             }
1893             err = -1;
1894             goto err;
1895         }
1896
1897         if (elem->in_num < 1) {
1898             virtio_error(vdev,
1899                          "virtio-net receive queue contains no in buffers");
1900             virtqueue_detach_element(q->rx_vq, elem, 0);
1901             g_free(elem);
1902             err = -1;
1903             goto err;
1904         }
1905
1906         sg = elem->in_sg;
1907         if (i == 0) {
1908             assert(offset == 0);
1909             if (n->mergeable_rx_bufs) {
1910                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1911                                     sg, elem->in_num,
1912                                     offsetof(typeof(mhdr), num_buffers),
1913                                     sizeof(mhdr.num_buffers));
1914             }
1915
1916             receive_header(n, sg, elem->in_num, buf, size);
1917             if (n->rss_data.populate_hash) {
1918                 offset = sizeof(mhdr);
1919                 iov_from_buf(sg, elem->in_num, offset,
1920                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1921             }
1922             offset = n->host_hdr_len;
1923             total += n->guest_hdr_len;
1924             guest_offset = n->guest_hdr_len;
1925         } else {
1926             guest_offset = 0;
1927         }
1928
1929         /* copy in packet.  ugh */
1930         len = iov_from_buf(sg, elem->in_num, guest_offset,
1931                            buf + offset, size - offset);
1932         total += len;
1933         offset += len;
1934         /* If buffers can't be merged, at this point we
1935          * must have consumed the complete packet.
1936          * Otherwise, drop it. */
1937         if (!n->mergeable_rx_bufs && offset < size) {
1938             virtqueue_unpop(q->rx_vq, elem, total);
1939             g_free(elem);
1940             err = size;
1941             goto err;
1942         }
1943
1944         elems[i] = elem;
1945         lens[i] = total;
1946         i++;
1947     }
1948
1949     if (mhdr_cnt) {
1950         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1951         iov_from_buf(mhdr_sg, mhdr_cnt,
1952                      0,
1953                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1954     }
1955
1956     for (j = 0; j < i; j++) {
1957         /* signal other side */
1958         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1959         g_free(elems[j]);
1960     }
1961
1962     virtqueue_flush(q->rx_vq, i);
1963     virtio_notify(vdev, q->rx_vq);
1964
1965     return size;
1966
1967 err:
1968     for (j = 0; j < i; j++) {
1969         virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
1970         g_free(elems[j]);
1971     }
1972
1973     return err;
1974 }
1975
1976 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1977                                   size_t size)
1978 {
1979     RCU_READ_LOCK_GUARD();
1980
1981     return virtio_net_receive_rcu(nc, buf, size, false);
1982 }
1983
1984 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1985                                          const uint8_t *buf,
1986                                          VirtioNetRscUnit *unit)
1987 {
1988     uint16_t ip_hdrlen;
1989     struct ip_header *ip;
1990
1991     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1992                               + sizeof(struct eth_header));
1993     unit->ip = (void *)ip;
1994     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1995     unit->ip_plen = &ip->ip_len;
1996     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1997     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1998     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1999 }
2000
2001 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2002                                          const uint8_t *buf,
2003                                          VirtioNetRscUnit *unit)
2004 {
2005     struct ip6_header *ip6;
2006
2007     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2008                                  + sizeof(struct eth_header));
2009     unit->ip = ip6;
2010     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2011     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2012                                         + sizeof(struct ip6_header));
2013     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2014
2015     /* There is a difference between payload lenght in ipv4 and v6,
2016        ip header is excluded in ipv6 */
2017     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2018 }
2019
2020 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2021                                        VirtioNetRscSeg *seg)
2022 {
2023     int ret;
2024     struct virtio_net_hdr_v1 *h;
2025
2026     h = (struct virtio_net_hdr_v1 *)seg->buf;
2027     h->flags = 0;
2028     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2029
2030     if (seg->is_coalesced) {
2031         h->rsc.segments = seg->packets;
2032         h->rsc.dup_acks = seg->dup_ack;
2033         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2034         if (chain->proto == ETH_P_IP) {
2035             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2036         } else {
2037             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2038         }
2039     }
2040
2041     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2042     QTAILQ_REMOVE(&chain->buffers, seg, next);
2043     g_free(seg->buf);
2044     g_free(seg);
2045
2046     return ret;
2047 }
2048
2049 static void virtio_net_rsc_purge(void *opq)
2050 {
2051     VirtioNetRscSeg *seg, *rn;
2052     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2053
2054     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2055         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2056             chain->stat.purge_failed++;
2057             continue;
2058         }
2059     }
2060
2061     chain->stat.timer++;
2062     if (!QTAILQ_EMPTY(&chain->buffers)) {
2063         timer_mod(chain->drain_timer,
2064               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2065     }
2066 }
2067
2068 static void virtio_net_rsc_cleanup(VirtIONet *n)
2069 {
2070     VirtioNetRscChain *chain, *rn_chain;
2071     VirtioNetRscSeg *seg, *rn_seg;
2072
2073     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2074         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2075             QTAILQ_REMOVE(&chain->buffers, seg, next);
2076             g_free(seg->buf);
2077             g_free(seg);
2078         }
2079
2080         timer_free(chain->drain_timer);
2081         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2082         g_free(chain);
2083     }
2084 }
2085
2086 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2087                                      NetClientState *nc,
2088                                      const uint8_t *buf, size_t size)
2089 {
2090     uint16_t hdr_len;
2091     VirtioNetRscSeg *seg;
2092
2093     hdr_len = chain->n->guest_hdr_len;
2094     seg = g_new(VirtioNetRscSeg, 1);
2095     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2096         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2097     memcpy(seg->buf, buf, size);
2098     seg->size = size;
2099     seg->packets = 1;
2100     seg->dup_ack = 0;
2101     seg->is_coalesced = 0;
2102     seg->nc = nc;
2103
2104     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2105     chain->stat.cache++;
2106
2107     switch (chain->proto) {
2108     case ETH_P_IP:
2109         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2110         break;
2111     case ETH_P_IPV6:
2112         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2113         break;
2114     default:
2115         g_assert_not_reached();
2116     }
2117 }
2118
2119 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2120                                          VirtioNetRscSeg *seg,
2121                                          const uint8_t *buf,
2122                                          struct tcp_header *n_tcp,
2123                                          struct tcp_header *o_tcp)
2124 {
2125     uint32_t nack, oack;
2126     uint16_t nwin, owin;
2127
2128     nack = htonl(n_tcp->th_ack);
2129     nwin = htons(n_tcp->th_win);
2130     oack = htonl(o_tcp->th_ack);
2131     owin = htons(o_tcp->th_win);
2132
2133     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2134         chain->stat.ack_out_of_win++;
2135         return RSC_FINAL;
2136     } else if (nack == oack) {
2137         /* duplicated ack or window probe */
2138         if (nwin == owin) {
2139             /* duplicated ack, add dup ack count due to whql test up to 1 */
2140             chain->stat.dup_ack++;
2141             return RSC_FINAL;
2142         } else {
2143             /* Coalesce window update */
2144             o_tcp->th_win = n_tcp->th_win;
2145             chain->stat.win_update++;
2146             return RSC_COALESCE;
2147         }
2148     } else {
2149         /* pure ack, go to 'C', finalize*/
2150         chain->stat.pure_ack++;
2151         return RSC_FINAL;
2152     }
2153 }
2154
2155 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2156                                             VirtioNetRscSeg *seg,
2157                                             const uint8_t *buf,
2158                                             VirtioNetRscUnit *n_unit)
2159 {
2160     void *data;
2161     uint16_t o_ip_len;
2162     uint32_t nseq, oseq;
2163     VirtioNetRscUnit *o_unit;
2164
2165     o_unit = &seg->unit;
2166     o_ip_len = htons(*o_unit->ip_plen);
2167     nseq = htonl(n_unit->tcp->th_seq);
2168     oseq = htonl(o_unit->tcp->th_seq);
2169
2170     /* out of order or retransmitted. */
2171     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2172         chain->stat.data_out_of_win++;
2173         return RSC_FINAL;
2174     }
2175
2176     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2177     if (nseq == oseq) {
2178         if ((o_unit->payload == 0) && n_unit->payload) {
2179             /* From no payload to payload, normal case, not a dup ack or etc */
2180             chain->stat.data_after_pure_ack++;
2181             goto coalesce;
2182         } else {
2183             return virtio_net_rsc_handle_ack(chain, seg, buf,
2184                                              n_unit->tcp, o_unit->tcp);
2185         }
2186     } else if ((nseq - oseq) != o_unit->payload) {
2187         /* Not a consistent packet, out of order */
2188         chain->stat.data_out_of_order++;
2189         return RSC_FINAL;
2190     } else {
2191 coalesce:
2192         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2193             chain->stat.over_size++;
2194             return RSC_FINAL;
2195         }
2196
2197         /* Here comes the right data, the payload length in v4/v6 is different,
2198            so use the field value to update and record the new data len */
2199         o_unit->payload += n_unit->payload; /* update new data len */
2200
2201         /* update field in ip header */
2202         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2203
2204         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2205            for windows guest, while this may change the behavior for linux
2206            guest (only if it uses RSC feature). */
2207         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2208
2209         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2210         o_unit->tcp->th_win = n_unit->tcp->th_win;
2211
2212         memmove(seg->buf + seg->size, data, n_unit->payload);
2213         seg->size += n_unit->payload;
2214         seg->packets++;
2215         chain->stat.coalesced++;
2216         return RSC_COALESCE;
2217     }
2218 }
2219
2220 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2221                                         VirtioNetRscSeg *seg,
2222                                         const uint8_t *buf, size_t size,
2223                                         VirtioNetRscUnit *unit)
2224 {
2225     struct ip_header *ip1, *ip2;
2226
2227     ip1 = (struct ip_header *)(unit->ip);
2228     ip2 = (struct ip_header *)(seg->unit.ip);
2229     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2230         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2231         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2232         chain->stat.no_match++;
2233         return RSC_NO_MATCH;
2234     }
2235
2236     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2237 }
2238
2239 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2240                                         VirtioNetRscSeg *seg,
2241                                         const uint8_t *buf, size_t size,
2242                                         VirtioNetRscUnit *unit)
2243 {
2244     struct ip6_header *ip1, *ip2;
2245
2246     ip1 = (struct ip6_header *)(unit->ip);
2247     ip2 = (struct ip6_header *)(seg->unit.ip);
2248     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2249         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2250         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2251         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2252             chain->stat.no_match++;
2253             return RSC_NO_MATCH;
2254     }
2255
2256     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2257 }
2258
2259 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2260  * to prevent out of order */
2261 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2262                                          struct tcp_header *tcp)
2263 {
2264     uint16_t tcp_hdr;
2265     uint16_t tcp_flag;
2266
2267     tcp_flag = htons(tcp->th_offset_flags);
2268     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2269     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2270     if (tcp_flag & TH_SYN) {
2271         chain->stat.tcp_syn++;
2272         return RSC_BYPASS;
2273     }
2274
2275     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2276         chain->stat.tcp_ctrl_drain++;
2277         return RSC_FINAL;
2278     }
2279
2280     if (tcp_hdr > sizeof(struct tcp_header)) {
2281         chain->stat.tcp_all_opt++;
2282         return RSC_FINAL;
2283     }
2284
2285     return RSC_CANDIDATE;
2286 }
2287
2288 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2289                                          NetClientState *nc,
2290                                          const uint8_t *buf, size_t size,
2291                                          VirtioNetRscUnit *unit)
2292 {
2293     int ret;
2294     VirtioNetRscSeg *seg, *nseg;
2295
2296     if (QTAILQ_EMPTY(&chain->buffers)) {
2297         chain->stat.empty_cache++;
2298         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2299         timer_mod(chain->drain_timer,
2300               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2301         return size;
2302     }
2303
2304     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2305         if (chain->proto == ETH_P_IP) {
2306             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2307         } else {
2308             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2309         }
2310
2311         if (ret == RSC_FINAL) {
2312             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2313                 /* Send failed */
2314                 chain->stat.final_failed++;
2315                 return 0;
2316             }
2317
2318             /* Send current packet */
2319             return virtio_net_do_receive(nc, buf, size);
2320         } else if (ret == RSC_NO_MATCH) {
2321             continue;
2322         } else {
2323             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2324             seg->is_coalesced = 1;
2325             return size;
2326         }
2327     }
2328
2329     chain->stat.no_match_cache++;
2330     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2331     return size;
2332 }
2333
2334 /* Drain a connection data, this is to avoid out of order segments */
2335 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2336                                         NetClientState *nc,
2337                                         const uint8_t *buf, size_t size,
2338                                         uint16_t ip_start, uint16_t ip_size,
2339                                         uint16_t tcp_port)
2340 {
2341     VirtioNetRscSeg *seg, *nseg;
2342     uint32_t ppair1, ppair2;
2343
2344     ppair1 = *(uint32_t *)(buf + tcp_port);
2345     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2346         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2347         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2348             || (ppair1 != ppair2)) {
2349             continue;
2350         }
2351         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2352             chain->stat.drain_failed++;
2353         }
2354
2355         break;
2356     }
2357
2358     return virtio_net_do_receive(nc, buf, size);
2359 }
2360
2361 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2362                                             struct ip_header *ip,
2363                                             const uint8_t *buf, size_t size)
2364 {
2365     uint16_t ip_len;
2366
2367     /* Not an ipv4 packet */
2368     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2369         chain->stat.ip_option++;
2370         return RSC_BYPASS;
2371     }
2372
2373     /* Don't handle packets with ip option */
2374     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2375         chain->stat.ip_option++;
2376         return RSC_BYPASS;
2377     }
2378
2379     if (ip->ip_p != IPPROTO_TCP) {
2380         chain->stat.bypass_not_tcp++;
2381         return RSC_BYPASS;
2382     }
2383
2384     /* Don't handle packets with ip fragment */
2385     if (!(htons(ip->ip_off) & IP_DF)) {
2386         chain->stat.ip_frag++;
2387         return RSC_BYPASS;
2388     }
2389
2390     /* Don't handle packets with ecn flag */
2391     if (IPTOS_ECN(ip->ip_tos)) {
2392         chain->stat.ip_ecn++;
2393         return RSC_BYPASS;
2394     }
2395
2396     ip_len = htons(ip->ip_len);
2397     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2398         || ip_len > (size - chain->n->guest_hdr_len -
2399                      sizeof(struct eth_header))) {
2400         chain->stat.ip_hacked++;
2401         return RSC_BYPASS;
2402     }
2403
2404     return RSC_CANDIDATE;
2405 }
2406
2407 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2408                                       NetClientState *nc,
2409                                       const uint8_t *buf, size_t size)
2410 {
2411     int32_t ret;
2412     uint16_t hdr_len;
2413     VirtioNetRscUnit unit;
2414
2415     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2416
2417     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2418         + sizeof(struct tcp_header))) {
2419         chain->stat.bypass_not_tcp++;
2420         return virtio_net_do_receive(nc, buf, size);
2421     }
2422
2423     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2424     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2425         != RSC_CANDIDATE) {
2426         return virtio_net_do_receive(nc, buf, size);
2427     }
2428
2429     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2430     if (ret == RSC_BYPASS) {
2431         return virtio_net_do_receive(nc, buf, size);
2432     } else if (ret == RSC_FINAL) {
2433         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2434                 ((hdr_len + sizeof(struct eth_header)) + 12),
2435                 VIRTIO_NET_IP4_ADDR_SIZE,
2436                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2437     }
2438
2439     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2440 }
2441
2442 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2443                                             struct ip6_header *ip6,
2444                                             const uint8_t *buf, size_t size)
2445 {
2446     uint16_t ip_len;
2447
2448     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2449         != IP_HEADER_VERSION_6) {
2450         return RSC_BYPASS;
2451     }
2452
2453     /* Both option and protocol is checked in this */
2454     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2455         chain->stat.bypass_not_tcp++;
2456         return RSC_BYPASS;
2457     }
2458
2459     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2460     if (ip_len < sizeof(struct tcp_header) ||
2461         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2462                   - sizeof(struct ip6_header))) {
2463         chain->stat.ip_hacked++;
2464         return RSC_BYPASS;
2465     }
2466
2467     /* Don't handle packets with ecn flag */
2468     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2469         chain->stat.ip_ecn++;
2470         return RSC_BYPASS;
2471     }
2472
2473     return RSC_CANDIDATE;
2474 }
2475
2476 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2477                                       const uint8_t *buf, size_t size)
2478 {
2479     int32_t ret;
2480     uint16_t hdr_len;
2481     VirtioNetRscChain *chain;
2482     VirtioNetRscUnit unit;
2483
2484     chain = opq;
2485     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2486
2487     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2488         + sizeof(tcp_header))) {
2489         return virtio_net_do_receive(nc, buf, size);
2490     }
2491
2492     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2493     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2494                                                  unit.ip, buf, size)) {
2495         return virtio_net_do_receive(nc, buf, size);
2496     }
2497
2498     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2499     if (ret == RSC_BYPASS) {
2500         return virtio_net_do_receive(nc, buf, size);
2501     } else if (ret == RSC_FINAL) {
2502         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2503                 ((hdr_len + sizeof(struct eth_header)) + 8),
2504                 VIRTIO_NET_IP6_ADDR_SIZE,
2505                 hdr_len + sizeof(struct eth_header)
2506                 + sizeof(struct ip6_header));
2507     }
2508
2509     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2510 }
2511
2512 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2513                                                       NetClientState *nc,
2514                                                       uint16_t proto)
2515 {
2516     VirtioNetRscChain *chain;
2517
2518     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2519         return NULL;
2520     }
2521
2522     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2523         if (chain->proto == proto) {
2524             return chain;
2525         }
2526     }
2527
2528     chain = g_malloc(sizeof(*chain));
2529     chain->n = n;
2530     chain->proto = proto;
2531     if (proto == (uint16_t)ETH_P_IP) {
2532         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2533         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2534     } else {
2535         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2536         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2537     }
2538     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2539                                       virtio_net_rsc_purge, chain);
2540     memset(&chain->stat, 0, sizeof(chain->stat));
2541
2542     QTAILQ_INIT(&chain->buffers);
2543     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2544
2545     return chain;
2546 }
2547
2548 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2549                                       const uint8_t *buf,
2550                                       size_t size)
2551 {
2552     uint16_t proto;
2553     VirtioNetRscChain *chain;
2554     struct eth_header *eth;
2555     VirtIONet *n;
2556
2557     n = qemu_get_nic_opaque(nc);
2558     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2559         return virtio_net_do_receive(nc, buf, size);
2560     }
2561
2562     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2563     proto = htons(eth->h_proto);
2564
2565     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2566     if (chain) {
2567         chain->stat.received++;
2568         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2569             return virtio_net_rsc_receive4(chain, nc, buf, size);
2570         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2571             return virtio_net_rsc_receive6(chain, nc, buf, size);
2572         }
2573     }
2574     return virtio_net_do_receive(nc, buf, size);
2575 }
2576
2577 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2578                                   size_t size)
2579 {
2580     VirtIONet *n = qemu_get_nic_opaque(nc);
2581     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2582         return virtio_net_rsc_receive(nc, buf, size);
2583     } else {
2584         return virtio_net_do_receive(nc, buf, size);
2585     }
2586 }
2587
2588 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2589
2590 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2591 {
2592     VirtIONet *n = qemu_get_nic_opaque(nc);
2593     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2594     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2595     int ret;
2596
2597     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2598     virtio_notify(vdev, q->tx_vq);
2599
2600     g_free(q->async_tx.elem);
2601     q->async_tx.elem = NULL;
2602
2603     virtio_queue_set_notification(q->tx_vq, 1);
2604     ret = virtio_net_flush_tx(q);
2605     if (ret >= n->tx_burst) {
2606         /*
2607          * the flush has been stopped by tx_burst
2608          * we will not receive notification for the
2609          * remainining part, so re-schedule
2610          */
2611         virtio_queue_set_notification(q->tx_vq, 0);
2612         if (q->tx_bh) {
2613             qemu_bh_schedule(q->tx_bh);
2614         } else {
2615             timer_mod(q->tx_timer,
2616                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2617         }
2618         q->tx_waiting = 1;
2619     }
2620 }
2621
2622 /* TX */
2623 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2624 {
2625     VirtIONet *n = q->n;
2626     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2627     VirtQueueElement *elem;
2628     int32_t num_packets = 0;
2629     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2630     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2631         return num_packets;
2632     }
2633
2634     if (q->async_tx.elem) {
2635         virtio_queue_set_notification(q->tx_vq, 0);
2636         return num_packets;
2637     }
2638
2639     for (;;) {
2640         ssize_t ret;
2641         unsigned int out_num;
2642         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2643         struct virtio_net_hdr_mrg_rxbuf mhdr;
2644
2645         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2646         if (!elem) {
2647             break;
2648         }
2649
2650         out_num = elem->out_num;
2651         out_sg = elem->out_sg;
2652         if (out_num < 1) {
2653             virtio_error(vdev, "virtio-net header not in first element");
2654             virtqueue_detach_element(q->tx_vq, elem, 0);
2655             g_free(elem);
2656             return -EINVAL;
2657         }
2658
2659         if (n->has_vnet_hdr) {
2660             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2661                 n->guest_hdr_len) {
2662                 virtio_error(vdev, "virtio-net header incorrect");
2663                 virtqueue_detach_element(q->tx_vq, elem, 0);
2664                 g_free(elem);
2665                 return -EINVAL;
2666             }
2667             if (n->needs_vnet_hdr_swap) {
2668                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2669                 sg2[0].iov_base = &mhdr;
2670                 sg2[0].iov_len = n->guest_hdr_len;
2671                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2672                                    out_sg, out_num,
2673                                    n->guest_hdr_len, -1);
2674                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2675                     goto drop;
2676                 }
2677                 out_num += 1;
2678                 out_sg = sg2;
2679             }
2680         }
2681         /*
2682          * If host wants to see the guest header as is, we can
2683          * pass it on unchanged. Otherwise, copy just the parts
2684          * that host is interested in.
2685          */
2686         assert(n->host_hdr_len <= n->guest_hdr_len);
2687         if (n->host_hdr_len != n->guest_hdr_len) {
2688             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2689                                        out_sg, out_num,
2690                                        0, n->host_hdr_len);
2691             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2692                              out_sg, out_num,
2693                              n->guest_hdr_len, -1);
2694             out_num = sg_num;
2695             out_sg = sg;
2696         }
2697
2698         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2699                                       out_sg, out_num, virtio_net_tx_complete);
2700         if (ret == 0) {
2701             virtio_queue_set_notification(q->tx_vq, 0);
2702             q->async_tx.elem = elem;
2703             return -EBUSY;
2704         }
2705
2706 drop:
2707         virtqueue_push(q->tx_vq, elem, 0);
2708         virtio_notify(vdev, q->tx_vq);
2709         g_free(elem);
2710
2711         if (++num_packets >= n->tx_burst) {
2712             break;
2713         }
2714     }
2715     return num_packets;
2716 }
2717
2718 static void virtio_net_tx_timer(void *opaque);
2719
2720 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2721 {
2722     VirtIONet *n = VIRTIO_NET(vdev);
2723     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2724
2725     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2726         virtio_net_drop_tx_queue_data(vdev, vq);
2727         return;
2728     }
2729
2730     /* This happens when device was stopped but VCPU wasn't. */
2731     if (!vdev->vm_running) {
2732         q->tx_waiting = 1;
2733         return;
2734     }
2735
2736     if (q->tx_waiting) {
2737         /* We already have queued packets, immediately flush */
2738         timer_del(q->tx_timer);
2739         virtio_net_tx_timer(q);
2740     } else {
2741         /* re-arm timer to flush it (and more) on next tick */
2742         timer_mod(q->tx_timer,
2743                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2744         q->tx_waiting = 1;
2745         virtio_queue_set_notification(vq, 0);
2746     }
2747 }
2748
2749 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2750 {
2751     VirtIONet *n = VIRTIO_NET(vdev);
2752     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2753
2754     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2755         virtio_net_drop_tx_queue_data(vdev, vq);
2756         return;
2757     }
2758
2759     if (unlikely(q->tx_waiting)) {
2760         return;
2761     }
2762     q->tx_waiting = 1;
2763     /* This happens when device was stopped but VCPU wasn't. */
2764     if (!vdev->vm_running) {
2765         return;
2766     }
2767     virtio_queue_set_notification(vq, 0);
2768     qemu_bh_schedule(q->tx_bh);
2769 }
2770
2771 static void virtio_net_tx_timer(void *opaque)
2772 {
2773     VirtIONetQueue *q = opaque;
2774     VirtIONet *n = q->n;
2775     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2776     int ret;
2777
2778     /* This happens when device was stopped but BH wasn't. */
2779     if (!vdev->vm_running) {
2780         /* Make sure tx waiting is set, so we'll run when restarted. */
2781         assert(q->tx_waiting);
2782         return;
2783     }
2784
2785     q->tx_waiting = 0;
2786
2787     /* Just in case the driver is not ready on more */
2788     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2789         return;
2790     }
2791
2792     ret = virtio_net_flush_tx(q);
2793     if (ret == -EBUSY || ret == -EINVAL) {
2794         return;
2795     }
2796     /*
2797      * If we flush a full burst of packets, assume there are
2798      * more coming and immediately rearm
2799      */
2800     if (ret >= n->tx_burst) {
2801         q->tx_waiting = 1;
2802         timer_mod(q->tx_timer,
2803                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2804         return;
2805     }
2806     /*
2807      * If less than a full burst, re-enable notification and flush
2808      * anything that may have come in while we weren't looking.  If
2809      * we find something, assume the guest is still active and rearm
2810      */
2811     virtio_queue_set_notification(q->tx_vq, 1);
2812     ret = virtio_net_flush_tx(q);
2813     if (ret > 0) {
2814         virtio_queue_set_notification(q->tx_vq, 0);
2815         q->tx_waiting = 1;
2816         timer_mod(q->tx_timer,
2817                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2818     }
2819 }
2820
2821 static void virtio_net_tx_bh(void *opaque)
2822 {
2823     VirtIONetQueue *q = opaque;
2824     VirtIONet *n = q->n;
2825     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2826     int32_t ret;
2827
2828     /* This happens when device was stopped but BH wasn't. */
2829     if (!vdev->vm_running) {
2830         /* Make sure tx waiting is set, so we'll run when restarted. */
2831         assert(q->tx_waiting);
2832         return;
2833     }
2834
2835     q->tx_waiting = 0;
2836
2837     /* Just in case the driver is not ready on more */
2838     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2839         return;
2840     }
2841
2842     ret = virtio_net_flush_tx(q);
2843     if (ret == -EBUSY || ret == -EINVAL) {
2844         return; /* Notification re-enable handled by tx_complete or device
2845                  * broken */
2846     }
2847
2848     /* If we flush a full burst of packets, assume there are
2849      * more coming and immediately reschedule */
2850     if (ret >= n->tx_burst) {
2851         qemu_bh_schedule(q->tx_bh);
2852         q->tx_waiting = 1;
2853         return;
2854     }
2855
2856     /* If less than a full burst, re-enable notification and flush
2857      * anything that may have come in while we weren't looking.  If
2858      * we find something, assume the guest is still active and reschedule */
2859     virtio_queue_set_notification(q->tx_vq, 1);
2860     ret = virtio_net_flush_tx(q);
2861     if (ret == -EINVAL) {
2862         return;
2863     } else if (ret > 0) {
2864         virtio_queue_set_notification(q->tx_vq, 0);
2865         qemu_bh_schedule(q->tx_bh);
2866         q->tx_waiting = 1;
2867     }
2868 }
2869
2870 static void virtio_net_add_queue(VirtIONet *n, int index)
2871 {
2872     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2873
2874     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2875                                            virtio_net_handle_rx);
2876
2877     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2878         n->vqs[index].tx_vq =
2879             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2880                              virtio_net_handle_tx_timer);
2881         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2882                                               virtio_net_tx_timer,
2883                                               &n->vqs[index]);
2884     } else {
2885         n->vqs[index].tx_vq =
2886             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2887                              virtio_net_handle_tx_bh);
2888         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2889     }
2890
2891     n->vqs[index].tx_waiting = 0;
2892     n->vqs[index].n = n;
2893 }
2894
2895 static void virtio_net_del_queue(VirtIONet *n, int index)
2896 {
2897     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2898     VirtIONetQueue *q = &n->vqs[index];
2899     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2900
2901     qemu_purge_queued_packets(nc);
2902
2903     virtio_del_queue(vdev, index * 2);
2904     if (q->tx_timer) {
2905         timer_free(q->tx_timer);
2906         q->tx_timer = NULL;
2907     } else {
2908         qemu_bh_delete(q->tx_bh);
2909         q->tx_bh = NULL;
2910     }
2911     q->tx_waiting = 0;
2912     virtio_del_queue(vdev, index * 2 + 1);
2913 }
2914
2915 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2916 {
2917     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2918     int old_num_queues = virtio_get_num_queues(vdev);
2919     int new_num_queues = new_max_queue_pairs * 2 + 1;
2920     int i;
2921
2922     assert(old_num_queues >= 3);
2923     assert(old_num_queues % 2 == 1);
2924
2925     if (old_num_queues == new_num_queues) {
2926         return;
2927     }
2928
2929     /*
2930      * We always need to remove and add ctrl vq if
2931      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2932      * and then we only enter one of the following two loops.
2933      */
2934     virtio_del_queue(vdev, old_num_queues - 1);
2935
2936     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2937         /* new_num_queues < old_num_queues */
2938         virtio_net_del_queue(n, i / 2);
2939     }
2940
2941     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2942         /* new_num_queues > old_num_queues */
2943         virtio_net_add_queue(n, i / 2);
2944     }
2945
2946     /* add ctrl_vq last */
2947     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2948 }
2949
2950 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2951 {
2952     int max = multiqueue ? n->max_queue_pairs : 1;
2953
2954     n->multiqueue = multiqueue;
2955     virtio_net_change_num_queue_pairs(n, max);
2956
2957     virtio_net_set_queue_pairs(n);
2958 }
2959
2960 static int virtio_net_post_load_device(void *opaque, int version_id)
2961 {
2962     VirtIONet *n = opaque;
2963     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2964     int i, link_down;
2965
2966     trace_virtio_net_post_load_device();
2967     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2968                                virtio_vdev_has_feature(vdev,
2969                                                        VIRTIO_F_VERSION_1),
2970                                virtio_vdev_has_feature(vdev,
2971                                                        VIRTIO_NET_F_HASH_REPORT));
2972
2973     /* MAC_TABLE_ENTRIES may be different from the saved image */
2974     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2975         n->mac_table.in_use = 0;
2976     }
2977
2978     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2979         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2980     }
2981
2982     /*
2983      * curr_guest_offloads will be later overwritten by the
2984      * virtio_set_features_nocheck call done from the virtio_load.
2985      * Here we make sure it is preserved and restored accordingly
2986      * in the virtio_net_post_load_virtio callback.
2987      */
2988     n->saved_guest_offloads = n->curr_guest_offloads;
2989
2990     virtio_net_set_queue_pairs(n);
2991
2992     /* Find the first multicast entry in the saved MAC filter */
2993     for (i = 0; i < n->mac_table.in_use; i++) {
2994         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2995             break;
2996         }
2997     }
2998     n->mac_table.first_multi = i;
2999
3000     /* nc.link_down can't be migrated, so infer link_down according
3001      * to link status bit in n->status */
3002     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3003     for (i = 0; i < n->max_queue_pairs; i++) {
3004         qemu_get_subqueue(n->nic, i)->link_down = link_down;
3005     }
3006
3007     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3008         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3009         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3010                                   QEMU_CLOCK_VIRTUAL,
3011                                   virtio_net_announce_timer, n);
3012         if (n->announce_timer.round) {
3013             timer_mod(n->announce_timer.tm,
3014                       qemu_clock_get_ms(n->announce_timer.type));
3015         } else {
3016             qemu_announce_timer_del(&n->announce_timer, false);
3017         }
3018     }
3019
3020     if (n->rss_data.enabled) {
3021         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3022         if (!n->rss_data.populate_hash) {
3023             if (!virtio_net_attach_epbf_rss(n)) {
3024                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3025                     warn_report("Can't post-load eBPF RSS for vhost");
3026                 } else {
3027                     warn_report("Can't post-load eBPF RSS - "
3028                                 "fallback to software RSS");
3029                     n->rss_data.enabled_software_rss = true;
3030                 }
3031             }
3032         }
3033
3034         trace_virtio_net_rss_enable(n->rss_data.hash_types,
3035                                     n->rss_data.indirections_len,
3036                                     sizeof(n->rss_data.key));
3037     } else {
3038         trace_virtio_net_rss_disable();
3039     }
3040     return 0;
3041 }
3042
3043 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3044 {
3045     VirtIONet *n = VIRTIO_NET(vdev);
3046     /*
3047      * The actual needed state is now in saved_guest_offloads,
3048      * see virtio_net_post_load_device for detail.
3049      * Restore it back and apply the desired offloads.
3050      */
3051     n->curr_guest_offloads = n->saved_guest_offloads;
3052     if (peer_has_vnet_hdr(n)) {
3053         virtio_net_apply_guest_offloads(n);
3054     }
3055
3056     return 0;
3057 }
3058
3059 /* tx_waiting field of a VirtIONetQueue */
3060 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3061     .name = "virtio-net-queue-tx_waiting",
3062     .fields = (VMStateField[]) {
3063         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3064         VMSTATE_END_OF_LIST()
3065    },
3066 };
3067
3068 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3069 {
3070     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3071 }
3072
3073 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3074 {
3075     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3076                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3077 }
3078
3079 static bool mac_table_fits(void *opaque, int version_id)
3080 {
3081     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3082 }
3083
3084 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3085 {
3086     return !mac_table_fits(opaque, version_id);
3087 }
3088
3089 /* This temporary type is shared by all the WITH_TMP methods
3090  * although only some fields are used by each.
3091  */
3092 struct VirtIONetMigTmp {
3093     VirtIONet      *parent;
3094     VirtIONetQueue *vqs_1;
3095     uint16_t        curr_queue_pairs_1;
3096     uint8_t         has_ufo;
3097     uint32_t        has_vnet_hdr;
3098 };
3099
3100 /* The 2nd and subsequent tx_waiting flags are loaded later than
3101  * the 1st entry in the queue_pairs and only if there's more than one
3102  * entry.  We use the tmp mechanism to calculate a temporary
3103  * pointer and count and also validate the count.
3104  */
3105
3106 static int virtio_net_tx_waiting_pre_save(void *opaque)
3107 {
3108     struct VirtIONetMigTmp *tmp = opaque;
3109
3110     tmp->vqs_1 = tmp->parent->vqs + 1;
3111     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3112     if (tmp->parent->curr_queue_pairs == 0) {
3113         tmp->curr_queue_pairs_1 = 0;
3114     }
3115
3116     return 0;
3117 }
3118
3119 static int virtio_net_tx_waiting_pre_load(void *opaque)
3120 {
3121     struct VirtIONetMigTmp *tmp = opaque;
3122
3123     /* Reuse the pointer setup from save */
3124     virtio_net_tx_waiting_pre_save(opaque);
3125
3126     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3127         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3128             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3129
3130         return -EINVAL;
3131     }
3132
3133     return 0; /* all good */
3134 }
3135
3136 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3137     .name      = "virtio-net-tx_waiting",
3138     .pre_load  = virtio_net_tx_waiting_pre_load,
3139     .pre_save  = virtio_net_tx_waiting_pre_save,
3140     .fields    = (VMStateField[]) {
3141         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3142                                      curr_queue_pairs_1,
3143                                      vmstate_virtio_net_queue_tx_waiting,
3144                                      struct VirtIONetQueue),
3145         VMSTATE_END_OF_LIST()
3146     },
3147 };
3148
3149 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3150  * flag set we need to check that we have it
3151  */
3152 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3153 {
3154     struct VirtIONetMigTmp *tmp = opaque;
3155
3156     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3157         error_report("virtio-net: saved image requires TUN_F_UFO support");
3158         return -EINVAL;
3159     }
3160
3161     return 0;
3162 }
3163
3164 static int virtio_net_ufo_pre_save(void *opaque)
3165 {
3166     struct VirtIONetMigTmp *tmp = opaque;
3167
3168     tmp->has_ufo = tmp->parent->has_ufo;
3169
3170     return 0;
3171 }
3172
3173 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3174     .name      = "virtio-net-ufo",
3175     .post_load = virtio_net_ufo_post_load,
3176     .pre_save  = virtio_net_ufo_pre_save,
3177     .fields    = (VMStateField[]) {
3178         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3179         VMSTATE_END_OF_LIST()
3180     },
3181 };
3182
3183 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3184  * flag set we need to check that we have it
3185  */
3186 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3187 {
3188     struct VirtIONetMigTmp *tmp = opaque;
3189
3190     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3191         error_report("virtio-net: saved image requires vnet_hdr=on");
3192         return -EINVAL;
3193     }
3194
3195     return 0;
3196 }
3197
3198 static int virtio_net_vnet_pre_save(void *opaque)
3199 {
3200     struct VirtIONetMigTmp *tmp = opaque;
3201
3202     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3203
3204     return 0;
3205 }
3206
3207 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3208     .name      = "virtio-net-vnet",
3209     .post_load = virtio_net_vnet_post_load,
3210     .pre_save  = virtio_net_vnet_pre_save,
3211     .fields    = (VMStateField[]) {
3212         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3213         VMSTATE_END_OF_LIST()
3214     },
3215 };
3216
3217 static bool virtio_net_rss_needed(void *opaque)
3218 {
3219     return VIRTIO_NET(opaque)->rss_data.enabled;
3220 }
3221
3222 static const VMStateDescription vmstate_virtio_net_rss = {
3223     .name      = "virtio-net-device/rss",
3224     .version_id = 1,
3225     .minimum_version_id = 1,
3226     .needed = virtio_net_rss_needed,
3227     .fields = (VMStateField[]) {
3228         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3229         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3230         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3231         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3232         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3233         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3234         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3235                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3236         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3237                                     rss_data.indirections_len, 0,
3238                                     vmstate_info_uint16, uint16_t),
3239         VMSTATE_END_OF_LIST()
3240     },
3241 };
3242
3243 static const VMStateDescription vmstate_virtio_net_device = {
3244     .name = "virtio-net-device",
3245     .version_id = VIRTIO_NET_VM_VERSION,
3246     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3247     .post_load = virtio_net_post_load_device,
3248     .fields = (VMStateField[]) {
3249         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3250         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3251                                vmstate_virtio_net_queue_tx_waiting,
3252                                VirtIONetQueue),
3253         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3254         VMSTATE_UINT16(status, VirtIONet),
3255         VMSTATE_UINT8(promisc, VirtIONet),
3256         VMSTATE_UINT8(allmulti, VirtIONet),
3257         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3258
3259         /* Guarded pair: If it fits we load it, else we throw it away
3260          * - can happen if source has a larger MAC table.; post-load
3261          *  sets flags in this case.
3262          */
3263         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3264                                 0, mac_table_fits, mac_table.in_use,
3265                                  ETH_ALEN),
3266         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3267                                      mac_table.in_use, ETH_ALEN),
3268
3269         /* Note: This is an array of uint32's that's always been saved as a
3270          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3271          * but based on the uint.
3272          */
3273         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3274         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3275                          vmstate_virtio_net_has_vnet),
3276         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3277         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3278         VMSTATE_UINT8(alluni, VirtIONet),
3279         VMSTATE_UINT8(nomulti, VirtIONet),
3280         VMSTATE_UINT8(nouni, VirtIONet),
3281         VMSTATE_UINT8(nobcast, VirtIONet),
3282         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3283                          vmstate_virtio_net_has_ufo),
3284         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3285                             vmstate_info_uint16_equal, uint16_t),
3286         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3287         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3288                          vmstate_virtio_net_tx_waiting),
3289         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3290                             has_ctrl_guest_offloads),
3291         VMSTATE_END_OF_LIST()
3292    },
3293     .subsections = (const VMStateDescription * []) {
3294         &vmstate_virtio_net_rss,
3295         NULL
3296     }
3297 };
3298
3299 static NetClientInfo net_virtio_info = {
3300     .type = NET_CLIENT_DRIVER_NIC,
3301     .size = sizeof(NICState),
3302     .can_receive = virtio_net_can_receive,
3303     .receive = virtio_net_receive,
3304     .link_status_changed = virtio_net_set_link_status,
3305     .query_rx_filter = virtio_net_query_rxfilter,
3306     .announce = virtio_net_announce,
3307 };
3308
3309 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3310 {
3311     VirtIONet *n = VIRTIO_NET(vdev);
3312     NetClientState *nc;
3313     assert(n->vhost_started);
3314     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3315         /* Must guard against invalid features and bogus queue index
3316          * from being set by malicious guest, or penetrated through
3317          * buggy migration stream.
3318          */
3319         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3320             qemu_log_mask(LOG_GUEST_ERROR,
3321                           "%s: bogus vq index ignored\n", __func__);
3322             return false;
3323         }
3324         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3325     } else {
3326         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3327     }
3328     /*
3329      * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3330      * as the Marco of configure interrupt's IDX, If this driver does not
3331      * support, the function will return false
3332      */
3333
3334     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3335         return false;
3336     }
3337     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3338 }
3339
3340 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3341                                            bool mask)
3342 {
3343     VirtIONet *n = VIRTIO_NET(vdev);
3344     NetClientState *nc;
3345     assert(n->vhost_started);
3346     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3347         /* Must guard against invalid features and bogus queue index
3348          * from being set by malicious guest, or penetrated through
3349          * buggy migration stream.
3350          */
3351         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3352             qemu_log_mask(LOG_GUEST_ERROR,
3353                           "%s: bogus vq index ignored\n", __func__);
3354             return;
3355         }
3356         nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3357     } else {
3358         nc = qemu_get_subqueue(n->nic, vq2q(idx));
3359     }
3360     /*
3361      *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3362      * as the Marco of configure interrupt's IDX, If this driver does not
3363      * support, the function will return
3364      */
3365
3366     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3367         return;
3368     }
3369
3370     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3371 }
3372
3373 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3374 {
3375     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3376
3377     n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3378 }
3379
3380 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3381                                    const char *type)
3382 {
3383     /*
3384      * The name can be NULL, the netclient name will be type.x.
3385      */
3386     assert(type != NULL);
3387
3388     g_free(n->netclient_name);
3389     g_free(n->netclient_type);
3390     n->netclient_name = g_strdup(name);
3391     n->netclient_type = g_strdup(type);
3392 }
3393
3394 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3395 {
3396     HotplugHandler *hotplug_ctrl;
3397     PCIDevice *pci_dev;
3398     Error *err = NULL;
3399
3400     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3401     if (hotplug_ctrl) {
3402         pci_dev = PCI_DEVICE(dev);
3403         pci_dev->partially_hotplugged = true;
3404         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3405         if (err) {
3406             error_report_err(err);
3407             return false;
3408         }
3409     } else {
3410         return false;
3411     }
3412     return true;
3413 }
3414
3415 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3416                                     Error **errp)
3417 {
3418     Error *err = NULL;
3419     HotplugHandler *hotplug_ctrl;
3420     PCIDevice *pdev = PCI_DEVICE(dev);
3421     BusState *primary_bus;
3422
3423     if (!pdev->partially_hotplugged) {
3424         return true;
3425     }
3426     primary_bus = dev->parent_bus;
3427     if (!primary_bus) {
3428         error_setg(errp, "virtio_net: couldn't find primary bus");
3429         return false;
3430     }
3431     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3432     qatomic_set(&n->failover_primary_hidden, false);
3433     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3434     if (hotplug_ctrl) {
3435         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3436         if (err) {
3437             goto out;
3438         }
3439         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3440     }
3441     pdev->partially_hotplugged = false;
3442
3443 out:
3444     error_propagate(errp, err);
3445     return !err;
3446 }
3447
3448 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3449 {
3450     bool should_be_hidden;
3451     Error *err = NULL;
3452     DeviceState *dev = failover_find_primary_device(n);
3453
3454     if (!dev) {
3455         return;
3456     }
3457
3458     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3459
3460     if (migration_in_setup(s) && !should_be_hidden) {
3461         if (failover_unplug_primary(n, dev)) {
3462             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3463             qapi_event_send_unplug_primary(dev->id);
3464             qatomic_set(&n->failover_primary_hidden, true);
3465         } else {
3466             warn_report("couldn't unplug primary device");
3467         }
3468     } else if (migration_has_failed(s)) {
3469         /* We already unplugged the device let's plug it back */
3470         if (!failover_replug_primary(n, dev, &err)) {
3471             if (err) {
3472                 error_report_err(err);
3473             }
3474         }
3475     }
3476 }
3477
3478 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3479 {
3480     MigrationState *s = data;
3481     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3482     virtio_net_handle_migration_primary(n, s);
3483 }
3484
3485 static bool failover_hide_primary_device(DeviceListener *listener,
3486                                          const QDict *device_opts,
3487                                          bool from_json,
3488                                          Error **errp)
3489 {
3490     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3491     const char *standby_id;
3492
3493     if (!device_opts) {
3494         return false;
3495     }
3496
3497     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3498         return false;
3499     }
3500
3501     if (!qdict_haskey(device_opts, "id")) {
3502         error_setg(errp, "Device with failover_pair_id needs to have id");
3503         return false;
3504     }
3505
3506     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3507     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3508         return false;
3509     }
3510
3511     /*
3512      * The hide helper can be called several times for a given device.
3513      * Check there is only one primary for a virtio-net device but
3514      * don't duplicate the qdict several times if it's called for the same
3515      * device.
3516      */
3517     if (n->primary_opts) {
3518         const char *old, *new;
3519         /* devices with failover_pair_id always have an id */
3520         old = qdict_get_str(n->primary_opts, "id");
3521         new = qdict_get_str(device_opts, "id");
3522         if (strcmp(old, new) != 0) {
3523             error_setg(errp, "Cannot attach more than one primary device to "
3524                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3525             return false;
3526         }
3527     } else {
3528         n->primary_opts = qdict_clone_shallow(device_opts);
3529         n->primary_opts_from_json = from_json;
3530     }
3531
3532     /* failover_primary_hidden is set during feature negotiation */
3533     return qatomic_read(&n->failover_primary_hidden);
3534 }
3535
3536 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3537 {
3538     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3539     VirtIONet *n = VIRTIO_NET(dev);
3540     NetClientState *nc;
3541     int i;
3542
3543     if (n->net_conf.mtu) {
3544         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3545     }
3546
3547     if (n->net_conf.duplex_str) {
3548         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3549             n->net_conf.duplex = DUPLEX_HALF;
3550         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3551             n->net_conf.duplex = DUPLEX_FULL;
3552         } else {
3553             error_setg(errp, "'duplex' must be 'half' or 'full'");
3554             return;
3555         }
3556         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3557     } else {
3558         n->net_conf.duplex = DUPLEX_UNKNOWN;
3559     }
3560
3561     if (n->net_conf.speed < SPEED_UNKNOWN) {
3562         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3563         return;
3564     }
3565     if (n->net_conf.speed >= 0) {
3566         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3567     }
3568
3569     if (n->failover) {
3570         n->primary_listener.hide_device = failover_hide_primary_device;
3571         qatomic_set(&n->failover_primary_hidden, true);
3572         device_listener_register(&n->primary_listener);
3573         n->migration_state.notify = virtio_net_migration_state_notifier;
3574         add_migration_state_change_notifier(&n->migration_state);
3575         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3576     }
3577
3578     virtio_net_set_config_size(n, n->host_features);
3579     virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3580
3581     /*
3582      * We set a lower limit on RX queue size to what it always was.
3583      * Guests that want a smaller ring can always resize it without
3584      * help from us (using virtio 1 and up).
3585      */
3586     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3587         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3588         !is_power_of_2(n->net_conf.rx_queue_size)) {
3589         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3590                    "must be a power of 2 between %d and %d.",
3591                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3592                    VIRTQUEUE_MAX_SIZE);
3593         virtio_cleanup(vdev);
3594         return;
3595     }
3596
3597     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3598         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3599         !is_power_of_2(n->net_conf.tx_queue_size)) {
3600         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3601                    "must be a power of 2 between %d and %d",
3602                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3603                    VIRTQUEUE_MAX_SIZE);
3604         virtio_cleanup(vdev);
3605         return;
3606     }
3607
3608     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3609
3610     /*
3611      * Figure out the datapath queue pairs since the backend could
3612      * provide control queue via peers as well.
3613      */
3614     if (n->nic_conf.peers.queues) {
3615         for (i = 0; i < n->max_ncs; i++) {
3616             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3617                 ++n->max_queue_pairs;
3618             }
3619         }
3620     }
3621     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3622
3623     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3624         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3625                    "must be a positive integer less than %d.",
3626                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3627         virtio_cleanup(vdev);
3628         return;
3629     }
3630     n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3631     n->curr_queue_pairs = 1;
3632     n->tx_timeout = n->net_conf.txtimer;
3633
3634     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3635                        && strcmp(n->net_conf.tx, "bh")) {
3636         warn_report("virtio-net: "
3637                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3638                     n->net_conf.tx);
3639         error_printf("Defaulting to \"bh\"");
3640     }
3641
3642     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3643                                     n->net_conf.tx_queue_size);
3644
3645     for (i = 0; i < n->max_queue_pairs; i++) {
3646         virtio_net_add_queue(n, i);
3647     }
3648
3649     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3650     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3651     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3652     n->status = VIRTIO_NET_S_LINK_UP;
3653     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3654                               QEMU_CLOCK_VIRTUAL,
3655                               virtio_net_announce_timer, n);
3656     n->announce_timer.round = 0;
3657
3658     if (n->netclient_type) {
3659         /*
3660          * Happen when virtio_net_set_netclient_name has been called.
3661          */
3662         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3663                               n->netclient_type, n->netclient_name, n);
3664     } else {
3665         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3666                               object_get_typename(OBJECT(dev)), dev->id, n);
3667     }
3668
3669     for (i = 0; i < n->max_queue_pairs; i++) {
3670         n->nic->ncs[i].do_not_pad = true;
3671     }
3672
3673     peer_test_vnet_hdr(n);
3674     if (peer_has_vnet_hdr(n)) {
3675         for (i = 0; i < n->max_queue_pairs; i++) {
3676             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3677         }
3678         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3679     } else {
3680         n->host_hdr_len = 0;
3681     }
3682
3683     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3684
3685     n->vqs[0].tx_waiting = 0;
3686     n->tx_burst = n->net_conf.txburst;
3687     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3688     n->promisc = 1; /* for compatibility */
3689
3690     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3691
3692     n->vlans = g_malloc0(MAX_VLAN >> 3);
3693
3694     nc = qemu_get_queue(n->nic);
3695     nc->rxfilter_notify_enabled = 1;
3696
3697    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3698         struct virtio_net_config netcfg = {};
3699         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3700         vhost_net_set_config(get_vhost_net(nc->peer),
3701             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3702     }
3703     QTAILQ_INIT(&n->rsc_chains);
3704     n->qdev = dev;
3705
3706     net_rx_pkt_init(&n->rx_pkt, false);
3707
3708     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3709         virtio_net_load_ebpf(n);
3710     }
3711 }
3712
3713 static void virtio_net_device_unrealize(DeviceState *dev)
3714 {
3715     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3716     VirtIONet *n = VIRTIO_NET(dev);
3717     int i, max_queue_pairs;
3718
3719     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3720         virtio_net_unload_ebpf(n);
3721     }
3722
3723     /* This will stop vhost backend if appropriate. */
3724     virtio_net_set_status(vdev, 0);
3725
3726     g_free(n->netclient_name);
3727     n->netclient_name = NULL;
3728     g_free(n->netclient_type);
3729     n->netclient_type = NULL;
3730
3731     g_free(n->mac_table.macs);
3732     g_free(n->vlans);
3733
3734     if (n->failover) {
3735         qobject_unref(n->primary_opts);
3736         device_listener_unregister(&n->primary_listener);
3737         remove_migration_state_change_notifier(&n->migration_state);
3738     } else {
3739         assert(n->primary_opts == NULL);
3740     }
3741
3742     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3743     for (i = 0; i < max_queue_pairs; i++) {
3744         virtio_net_del_queue(n, i);
3745     }
3746     /* delete also control vq */
3747     virtio_del_queue(vdev, max_queue_pairs * 2);
3748     qemu_announce_timer_del(&n->announce_timer, false);
3749     g_free(n->vqs);
3750     qemu_del_nic(n->nic);
3751     virtio_net_rsc_cleanup(n);
3752     g_free(n->rss_data.indirections_table);
3753     net_rx_pkt_uninit(n->rx_pkt);
3754     virtio_cleanup(vdev);
3755 }
3756
3757 static void virtio_net_instance_init(Object *obj)
3758 {
3759     VirtIONet *n = VIRTIO_NET(obj);
3760
3761     /*
3762      * The default config_size is sizeof(struct virtio_net_config).
3763      * Can be overriden with virtio_net_set_config_size.
3764      */
3765     n->config_size = sizeof(struct virtio_net_config);
3766     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3767                                   "bootindex", "/ethernet-phy@0",
3768                                   DEVICE(n));
3769
3770     ebpf_rss_init(&n->ebpf_rss);
3771 }
3772
3773 static int virtio_net_pre_save(void *opaque)
3774 {
3775     VirtIONet *n = opaque;
3776
3777     /* At this point, backend must be stopped, otherwise
3778      * it might keep writing to memory. */
3779     assert(!n->vhost_started);
3780
3781     return 0;
3782 }
3783
3784 static bool primary_unplug_pending(void *opaque)
3785 {
3786     DeviceState *dev = opaque;
3787     DeviceState *primary;
3788     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3789     VirtIONet *n = VIRTIO_NET(vdev);
3790
3791     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3792         return false;
3793     }
3794     primary = failover_find_primary_device(n);
3795     return primary ? primary->pending_deleted_event : false;
3796 }
3797
3798 static bool dev_unplug_pending(void *opaque)
3799 {
3800     DeviceState *dev = opaque;
3801     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3802
3803     return vdc->primary_unplug_pending(dev);
3804 }
3805
3806 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3807 {
3808     VirtIONet *n = VIRTIO_NET(vdev);
3809     NetClientState *nc = qemu_get_queue(n->nic);
3810     struct vhost_net *net = get_vhost_net(nc->peer);
3811     return &net->dev;
3812 }
3813
3814 static const VMStateDescription vmstate_virtio_net = {
3815     .name = "virtio-net",
3816     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3817     .version_id = VIRTIO_NET_VM_VERSION,
3818     .fields = (VMStateField[]) {
3819         VMSTATE_VIRTIO_DEVICE,
3820         VMSTATE_END_OF_LIST()
3821     },
3822     .pre_save = virtio_net_pre_save,
3823     .dev_unplug_pending = dev_unplug_pending,
3824 };
3825
3826 static Property virtio_net_properties[] = {
3827     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3828                     VIRTIO_NET_F_CSUM, true),
3829     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3830                     VIRTIO_NET_F_GUEST_CSUM, true),
3831     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3832     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3833                     VIRTIO_NET_F_GUEST_TSO4, true),
3834     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3835                     VIRTIO_NET_F_GUEST_TSO6, true),
3836     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3837                     VIRTIO_NET_F_GUEST_ECN, true),
3838     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3839                     VIRTIO_NET_F_GUEST_UFO, true),
3840     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3841                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3842     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3843                     VIRTIO_NET_F_HOST_TSO4, true),
3844     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3845                     VIRTIO_NET_F_HOST_TSO6, true),
3846     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3847                     VIRTIO_NET_F_HOST_ECN, true),
3848     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3849                     VIRTIO_NET_F_HOST_UFO, true),
3850     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3851                     VIRTIO_NET_F_MRG_RXBUF, true),
3852     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3853                     VIRTIO_NET_F_STATUS, true),
3854     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3855                     VIRTIO_NET_F_CTRL_VQ, true),
3856     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3857                     VIRTIO_NET_F_CTRL_RX, true),
3858     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3859                     VIRTIO_NET_F_CTRL_VLAN, true),
3860     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3861                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3862     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3863                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3864     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3865                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3866     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3867     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3868                     VIRTIO_NET_F_RSS, false),
3869     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3870                     VIRTIO_NET_F_HASH_REPORT, false),
3871     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3872                     VIRTIO_NET_F_RSC_EXT, false),
3873     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3874                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3875     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3876     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3877                        TX_TIMER_INTERVAL),
3878     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3879     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3880     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3881                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3882     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3883                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3884     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3885     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3886                      true),
3887     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3888     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3889     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3890     DEFINE_PROP_END_OF_LIST(),
3891 };
3892
3893 static void virtio_net_class_init(ObjectClass *klass, void *data)
3894 {
3895     DeviceClass *dc = DEVICE_CLASS(klass);
3896     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3897
3898     device_class_set_props(dc, virtio_net_properties);
3899     dc->vmsd = &vmstate_virtio_net;
3900     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3901     vdc->realize = virtio_net_device_realize;
3902     vdc->unrealize = virtio_net_device_unrealize;
3903     vdc->get_config = virtio_net_get_config;
3904     vdc->set_config = virtio_net_set_config;
3905     vdc->get_features = virtio_net_get_features;
3906     vdc->set_features = virtio_net_set_features;
3907     vdc->bad_features = virtio_net_bad_features;
3908     vdc->reset = virtio_net_reset;
3909     vdc->queue_reset = virtio_net_queue_reset;
3910     vdc->queue_enable = virtio_net_queue_enable;
3911     vdc->set_status = virtio_net_set_status;
3912     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3913     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3914     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3915     vdc->post_load = virtio_net_post_load_virtio;
3916     vdc->vmsd = &vmstate_virtio_net_device;
3917     vdc->primary_unplug_pending = primary_unplug_pending;
3918     vdc->get_vhost = virtio_net_get_vhost;
3919 }
3920
3921 static const TypeInfo virtio_net_info = {
3922     .name = TYPE_VIRTIO_NET,
3923     .parent = TYPE_VIRTIO_DEVICE,
3924     .instance_size = sizeof(VirtIONet),
3925     .instance_init = virtio_net_instance_init,
3926     .class_init = virtio_net_class_init,
3927 };
3928
3929 static void virtio_register_types(void)
3930 {
3931     type_register_static(&virtio_net_info);
3932 }
3933
3934 type_init(virtio_register_types)