hw/net/virtio-net.c

   1 /*
   2  * Virtio Network Device
   3  *
   4  * Copyright IBM, Corp. 2007
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #include "qemu/osdep.h"
  15 #include "qemu/atomic.h"
  16 #include "qemu/iov.h"
  17 #include "qemu/main-loop.h"
  18 #include "qemu/module.h"
  19 #include "hw/virtio/virtio.h"
  20 #include "net/net.h"
  21 #include "net/checksum.h"
  22 #include "net/tap.h"
  23 #include "qemu/error-report.h"
  24 #include "qemu/timer.h"
  25 #include "qemu/option.h"
  26 #include "qemu/option_int.h"
  27 #include "qemu/config-file.h"
  28 #include "qapi/qmp/qdict.h"
  29 #include "hw/virtio/virtio-net.h"
  30 #include "net/vhost_net.h"
  31 #include "net/announce.h"
  32 #include "hw/virtio/virtio-bus.h"
  33 #include "qapi/error.h"
  34 #include "qapi/qapi-events-net.h"
  35 #include "hw/qdev-properties.h"
  36 #include "qapi/qapi-types-migration.h"
  37 #include "qapi/qapi-events-migration.h"
  38 #include "hw/virtio/virtio-access.h"
  39 #include "migration/misc.h"
  40 #include "standard-headers/linux/ethtool.h"
  41 #include "sysemu/sysemu.h"
  42 #include "trace.h"
  43 #include "monitor/qdev.h"
  44 #include "hw/pci/pci.h"
  45 #include "net_rx_pkt.h"
  46 #include "hw/virtio/vhost.h"
  47 #include "sysemu/qtest.h"
  48
  49 #define VIRTIO_NET_VM_VERSION    11
  50
  51 #define MAC_TABLE_ENTRIES    64
  52 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
  53
  54 /* previously fixed value */
  55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
  56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
  57
  58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
  59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
  60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
  61
  62 #define VIRTIO_NET_IP4_ADDR_SIZE   8        /* ipv4 saddr + daddr */
  63
  64 #define VIRTIO_NET_TCP_FLAG         0x3F
  65 #define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
  66
  67 /* IPv4 max payload, 16 bits in the header */
  68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
  69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
  70
  71 /* header length value in ip header without option */
  72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
  73
  74 #define VIRTIO_NET_IP6_ADDR_SIZE   32      /* ipv6 saddr + daddr */
  75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
  76
  77 /* Purge coalesced packets timer interval, This value affects the performance
  78    a lot, and should be tuned carefully, '300000'(300us) is the recommended
  79    value to pass the WHQL test, '50000' can gain 2x netperf throughput with
  80    tso/gso/gro 'off'. */
  81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
  82
  83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
  84                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
  85                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
  86                                          VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
  87                                          VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
  88                                          VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
  89                                          VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
  90                                          VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
  91                                          VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
  92
  93 static const VirtIOFeature feature_sizes[] = {
  94     {.flags = 1ULL << VIRTIO_NET_F_MAC,
  95      .end = endof(struct virtio_net_config, mac)},
  96     {.flags = 1ULL << VIRTIO_NET_F_STATUS,
  97      .end = endof(struct virtio_net_config, status)},
  98     {.flags = 1ULL << VIRTIO_NET_F_MQ,
  99      .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
 100     {.flags = 1ULL << VIRTIO_NET_F_MTU,
 101      .end = endof(struct virtio_net_config, mtu)},
 102     {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
 103      .end = endof(struct virtio_net_config, duplex)},
 104     {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
 105      .end = endof(struct virtio_net_config, supported_hash_types)},
 106     {}
 107 };
 108
 109 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
 110 {
 111     VirtIONet *n = qemu_get_nic_opaque(nc);
 112
 113     return &n->vqs[nc->queue_index];
 114 }
 115
 116 static int vq2q(int queue_index)
 117 {
 118     return queue_index / 2;
 119 }
 120
 121 /* TODO
 122  * - we could suppress RX interrupt if we were so inclined.
 123  */
 124
 125 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 126 {
 127     VirtIONet *n = VIRTIO_NET(vdev);
 128     struct virtio_net_config netcfg;
 129     NetClientState *nc = qemu_get_queue(n->nic);
 130     static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
 131
 132     int ret = 0;
 133     memset(&netcfg, 0 , sizeof(struct virtio_net_config));
 134     virtio_stw_p(vdev, &netcfg.status, n->status);
 135     virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
 136     virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
 137     memcpy(netcfg.mac, n->mac, ETH_ALEN);
 138     virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
 139     netcfg.duplex = n->net_conf.duplex;
 140     netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
 141     virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
 142                  virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
 143                  VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
 144     virtio_stl_p(vdev, &netcfg.supported_hash_types,
 145                  VIRTIO_NET_RSS_SUPPORTED_HASHES);
 146     memcpy(config, &netcfg, n->config_size);
 147
 148     /*
 149      * Is this VDPA? No peer means not VDPA: there's no way to
 150      * disconnect/reconnect a VDPA peer.
 151      */
 152     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 153         ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
 154                                    n->config_size);
 155         if (ret != -1) {
 156             /*
 157              * Some NIC/kernel combinations present 0 as the mac address.  As
 158              * that is not a legal address, try to proceed with the
 159              * address from the QEMU command line in the hope that the
 160              * address has been configured correctly elsewhere - just not
 161              * reported by the device.
 162              */
 163             if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
 164                 info_report("Zero hardware mac address detected. Ignoring.");
 165                 memcpy(netcfg.mac, n->mac, ETH_ALEN);
 166             }
 167             memcpy(config, &netcfg, n->config_size);
 168         }
 169     }
 170 }
 171
 172 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
 173 {
 174     VirtIONet *n = VIRTIO_NET(vdev);
 175     struct virtio_net_config netcfg = {};
 176     NetClientState *nc = qemu_get_queue(n->nic);
 177
 178     memcpy(&netcfg, config, n->config_size);
 179
 180     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
 181         !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
 182         memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
 183         memcpy(n->mac, netcfg.mac, ETH_ALEN);
 184         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 185     }
 186
 187     /*
 188      * Is this VDPA? No peer means not VDPA: there's no way to
 189      * disconnect/reconnect a VDPA peer.
 190      */
 191     if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
 192         vhost_net_set_config(get_vhost_net(nc->peer),
 193                              (uint8_t *)&netcfg, 0, n->config_size,
 194                              VHOST_SET_CONFIG_TYPE_MASTER);
 195       }
 196 }
 197
 198 static bool virtio_net_started(VirtIONet *n, uint8_t status)
 199 {
 200     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 201     return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 202         (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
 203 }
 204
 205 static void virtio_net_announce_notify(VirtIONet *net)
 206 {
 207     VirtIODevice *vdev = VIRTIO_DEVICE(net);
 208     trace_virtio_net_announce_notify();
 209
 210     net->status |= VIRTIO_NET_S_ANNOUNCE;
 211     virtio_notify_config(vdev);
 212 }
 213
 214 static void virtio_net_announce_timer(void *opaque)
 215 {
 216     VirtIONet *n = opaque;
 217     trace_virtio_net_announce_timer(n->announce_timer.round);
 218
 219     n->announce_timer.round--;
 220     virtio_net_announce_notify(n);
 221 }
 222
 223 static void virtio_net_announce(NetClientState *nc)
 224 {
 225     VirtIONet *n = qemu_get_nic_opaque(nc);
 226     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 227
 228     /*
 229      * Make sure the virtio migration announcement timer isn't running
 230      * If it is, let it trigger announcement so that we do not cause
 231      * confusion.
 232      */
 233     if (n->announce_timer.round) {
 234         return;
 235     }
 236
 237     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
 238         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
 239             virtio_net_announce_notify(n);
 240     }
 241 }
 242
 243 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
 244 {
 245     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 246     NetClientState *nc = qemu_get_queue(n->nic);
 247     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 248     int cvq = n->max_ncs - n->max_queue_pairs;
 249
 250     if (!get_vhost_net(nc->peer)) {
 251         return;
 252     }
 253
 254     if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
 255         !!n->vhost_started) {
 256         return;
 257     }
 258     if (!n->vhost_started) {
 259         int r, i;
 260
 261         if (n->needs_vnet_hdr_swap) {
 262             error_report("backend does not support %s vnet headers; "
 263                          "falling back on userspace virtio",
 264                          virtio_is_big_endian(vdev) ? "BE" : "LE");
 265             return;
 266         }
 267
 268         /* Any packets outstanding? Purge them to avoid touching rings
 269          * when vhost is running.
 270          */
 271         for (i = 0;  i < queue_pairs; i++) {
 272             NetClientState *qnc = qemu_get_subqueue(n->nic, i);
 273
 274             /* Purge both directions: TX and RX. */
 275             qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
 276             qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
 277         }
 278
 279         if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
 280             r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
 281             if (r < 0) {
 282                 error_report("%uBytes MTU not supported by the backend",
 283                              n->net_conf.mtu);
 284
 285                 return;
 286             }
 287         }
 288
 289         n->vhost_started = 1;
 290         r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
 291         if (r < 0) {
 292             error_report("unable to start vhost net: %d: "
 293                          "falling back on userspace virtio", -r);
 294             n->vhost_started = 0;
 295         }
 296     } else {
 297         vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
 298         n->vhost_started = 0;
 299     }
 300 }
 301
 302 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
 303                                           NetClientState *peer,
 304                                           bool enable)
 305 {
 306     if (virtio_is_big_endian(vdev)) {
 307         return qemu_set_vnet_be(peer, enable);
 308     } else {
 309         return qemu_set_vnet_le(peer, enable);
 310     }
 311 }
 312
 313 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
 314                                        int queue_pairs, bool enable)
 315 {
 316     int i;
 317
 318     for (i = 0; i < queue_pairs; i++) {
 319         if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
 320             enable) {
 321             while (--i >= 0) {
 322                 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
 323             }
 324
 325             return true;
 326         }
 327     }
 328
 329     return false;
 330 }
 331
 332 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
 333 {
 334     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 335     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
 336
 337     if (virtio_net_started(n, status)) {
 338         /* Before using the device, we tell the network backend about the
 339          * endianness to use when parsing vnet headers. If the backend
 340          * can't do it, we fallback onto fixing the headers in the core
 341          * virtio-net code.
 342          */
 343         n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
 344                                                             queue_pairs, true);
 345     } else if (virtio_net_started(n, vdev->status)) {
 346         /* After using the device, we need to reset the network backend to
 347          * the default (guest native endianness), otherwise the guest may
 348          * lose network connectivity if it is rebooted into a different
 349          * endianness.
 350          */
 351         virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
 352     }
 353 }
 354
 355 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
 356 {
 357     unsigned int dropped = virtqueue_drop_all(vq);
 358     if (dropped) {
 359         virtio_notify(vdev, vq);
 360     }
 361 }
 362
 363 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
 364 {
 365     VirtIONet *n = VIRTIO_NET(vdev);
 366     VirtIONetQueue *q;
 367     int i;
 368     uint8_t queue_status;
 369
 370     virtio_net_vnet_endian_status(n, status);
 371     virtio_net_vhost_status(n, status);
 372
 373     for (i = 0; i < n->max_queue_pairs; i++) {
 374         NetClientState *ncs = qemu_get_subqueue(n->nic, i);
 375         bool queue_started;
 376         q = &n->vqs[i];
 377
 378         if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
 379             queue_status = 0;
 380         } else {
 381             queue_status = status;
 382         }
 383         queue_started =
 384             virtio_net_started(n, queue_status) && !n->vhost_started;
 385
 386         if (queue_started) {
 387             qemu_flush_queued_packets(ncs);
 388         }
 389
 390         if (!q->tx_waiting) {
 391             continue;
 392         }
 393
 394         if (queue_started) {
 395             if (q->tx_timer) {
 396                 timer_mod(q->tx_timer,
 397                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
 398             } else {
 399                 qemu_bh_schedule(q->tx_bh);
 400             }
 401         } else {
 402             if (q->tx_timer) {
 403                 timer_del(q->tx_timer);
 404             } else {
 405                 qemu_bh_cancel(q->tx_bh);
 406             }
 407             if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
 408                 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
 409                 vdev->vm_running) {
 410                 /* if tx is waiting we are likely have some packets in tx queue
 411                  * and disabled notification */
 412                 q->tx_waiting = 0;
 413                 virtio_queue_set_notification(q->tx_vq, 1);
 414                 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
 415             }
 416         }
 417     }
 418 }
 419
 420 static void virtio_net_set_link_status(NetClientState *nc)
 421 {
 422     VirtIONet *n = qemu_get_nic_opaque(nc);
 423     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 424     uint16_t old_status = n->status;
 425
 426     if (nc->link_down)
 427         n->status &= ~VIRTIO_NET_S_LINK_UP;
 428     else
 429         n->status |= VIRTIO_NET_S_LINK_UP;
 430
 431     if (n->status != old_status)
 432         virtio_notify_config(vdev);
 433
 434     virtio_net_set_status(vdev, vdev->status);
 435 }
 436
 437 static void rxfilter_notify(NetClientState *nc)
 438 {
 439     VirtIONet *n = qemu_get_nic_opaque(nc);
 440
 441     if (nc->rxfilter_notify_enabled) {
 442         char *path = object_get_canonical_path(OBJECT(n->qdev));
 443         qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
 444                                               n->netclient_name, path);
 445         g_free(path);
 446
 447         /* disable event notification to avoid events flooding */
 448         nc->rxfilter_notify_enabled = 0;
 449     }
 450 }
 451
 452 static intList *get_vlan_table(VirtIONet *n)
 453 {
 454     intList *list;
 455     int i, j;
 456
 457     list = NULL;
 458     for (i = 0; i < MAX_VLAN >> 5; i++) {
 459         for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
 460             if (n->vlans[i] & (1U << j)) {
 461                 QAPI_LIST_PREPEND(list, (i << 5) + j);
 462             }
 463         }
 464     }
 465
 466     return list;
 467 }
 468
 469 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
 470 {
 471     VirtIONet *n = qemu_get_nic_opaque(nc);
 472     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 473     RxFilterInfo *info;
 474     strList *str_list;
 475     int i;
 476
 477     info = g_malloc0(sizeof(*info));
 478     info->name = g_strdup(nc->name);
 479     info->promiscuous = n->promisc;
 480
 481     if (n->nouni) {
 482         info->unicast = RX_STATE_NONE;
 483     } else if (n->alluni) {
 484         info->unicast = RX_STATE_ALL;
 485     } else {
 486         info->unicast = RX_STATE_NORMAL;
 487     }
 488
 489     if (n->nomulti) {
 490         info->multicast = RX_STATE_NONE;
 491     } else if (n->allmulti) {
 492         info->multicast = RX_STATE_ALL;
 493     } else {
 494         info->multicast = RX_STATE_NORMAL;
 495     }
 496
 497     info->broadcast_allowed = n->nobcast;
 498     info->multicast_overflow = n->mac_table.multi_overflow;
 499     info->unicast_overflow = n->mac_table.uni_overflow;
 500
 501     info->main_mac = qemu_mac_strdup_printf(n->mac);
 502
 503     str_list = NULL;
 504     for (i = 0; i < n->mac_table.first_multi; i++) {
 505         QAPI_LIST_PREPEND(str_list,
 506                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 507     }
 508     info->unicast_table = str_list;
 509
 510     str_list = NULL;
 511     for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
 512         QAPI_LIST_PREPEND(str_list,
 513                       qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
 514     }
 515     info->multicast_table = str_list;
 516     info->vlan_table = get_vlan_table(n);
 517
 518     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
 519         info->vlan = RX_STATE_ALL;
 520     } else if (!info->vlan_table) {
 521         info->vlan = RX_STATE_NONE;
 522     } else {
 523         info->vlan = RX_STATE_NORMAL;
 524     }
 525
 526     /* enable event notification after query */
 527     nc->rxfilter_notify_enabled = 1;
 528
 529     return info;
 530 }
 531
 532 static void virtio_net_reset(VirtIODevice *vdev)
 533 {
 534     VirtIONet *n = VIRTIO_NET(vdev);
 535     int i;
 536
 537     /* Reset back to compatibility mode */
 538     n->promisc = 1;
 539     n->allmulti = 0;
 540     n->alluni = 0;
 541     n->nomulti = 0;
 542     n->nouni = 0;
 543     n->nobcast = 0;
 544     /* multiqueue is disabled by default */
 545     n->curr_queue_pairs = 1;
 546     timer_del(n->announce_timer.tm);
 547     n->announce_timer.round = 0;
 548     n->status &= ~VIRTIO_NET_S_ANNOUNCE;
 549
 550     /* Flush any MAC and VLAN filter table state */
 551     n->mac_table.in_use = 0;
 552     n->mac_table.first_multi = 0;
 553     n->mac_table.multi_overflow = 0;
 554     n->mac_table.uni_overflow = 0;
 555     memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
 556     memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
 557     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
 558     memset(n->vlans, 0, MAX_VLAN >> 3);
 559
 560     /* Flush any async TX */
 561     for (i = 0;  i < n->max_queue_pairs; i++) {
 562         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 563
 564         if (nc->peer) {
 565             qemu_flush_or_purge_queued_packets(nc->peer, true);
 566             assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
 567         }
 568     }
 569 }
 570
 571 static void peer_test_vnet_hdr(VirtIONet *n)
 572 {
 573     NetClientState *nc = qemu_get_queue(n->nic);
 574     if (!nc->peer) {
 575         return;
 576     }
 577
 578     n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
 579 }
 580
 581 static int peer_has_vnet_hdr(VirtIONet *n)
 582 {
 583     return n->has_vnet_hdr;
 584 }
 585
 586 static int peer_has_ufo(VirtIONet *n)
 587 {
 588     if (!peer_has_vnet_hdr(n))
 589         return 0;
 590
 591     n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
 592
 593     return n->has_ufo;
 594 }
 595
 596 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
 597                                        int version_1, int hash_report)
 598 {
 599     int i;
 600     NetClientState *nc;
 601
 602     n->mergeable_rx_bufs = mergeable_rx_bufs;
 603
 604     if (version_1) {
 605         n->guest_hdr_len = hash_report ?
 606             sizeof(struct virtio_net_hdr_v1_hash) :
 607             sizeof(struct virtio_net_hdr_mrg_rxbuf);
 608         n->rss_data.populate_hash = !!hash_report;
 609     } else {
 610         n->guest_hdr_len = n->mergeable_rx_bufs ?
 611             sizeof(struct virtio_net_hdr_mrg_rxbuf) :
 612             sizeof(struct virtio_net_hdr);
 613     }
 614
 615     for (i = 0; i < n->max_queue_pairs; i++) {
 616         nc = qemu_get_subqueue(n->nic, i);
 617
 618         if (peer_has_vnet_hdr(n) &&
 619             qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
 620             qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
 621             n->host_hdr_len = n->guest_hdr_len;
 622         }
 623     }
 624 }
 625
 626 static int virtio_net_max_tx_queue_size(VirtIONet *n)
 627 {
 628     NetClientState *peer = n->nic_conf.peers.ncs[0];
 629
 630     /*
 631      * Backends other than vhost-user don't support max queue size.
 632      */
 633     if (!peer) {
 634         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 635     }
 636
 637     if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
 638         return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
 639     }
 640
 641     return VIRTQUEUE_MAX_SIZE;
 642 }
 643
 644 static int peer_attach(VirtIONet *n, int index)
 645 {
 646     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 647
 648     if (!nc->peer) {
 649         return 0;
 650     }
 651
 652     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 653         vhost_set_vring_enable(nc->peer, 1);
 654     }
 655
 656     if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
 657         return 0;
 658     }
 659
 660     if (n->max_queue_pairs == 1) {
 661         return 0;
 662     }
 663
 664     return tap_enable(nc->peer);
 665 }
 666
 667 static int peer_detach(VirtIONet *n, int index)
 668 {
 669     NetClientState *nc = qemu_get_subqueue(n->nic, index);
 670
 671     if (!nc->peer) {
 672         return 0;
 673     }
 674
 675     if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 676         vhost_set_vring_enable(nc->peer, 0);
 677     }
 678
 679     if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
 680         return 0;
 681     }
 682
 683     return tap_disable(nc->peer);
 684 }
 685
 686 static void virtio_net_set_queue_pairs(VirtIONet *n)
 687 {
 688     int i;
 689     int r;
 690
 691     if (n->nic->peer_deleted) {
 692         return;
 693     }
 694
 695     for (i = 0; i < n->max_queue_pairs; i++) {
 696         if (i < n->curr_queue_pairs) {
 697             r = peer_attach(n, i);
 698             assert(!r);
 699         } else {
 700             r = peer_detach(n, i);
 701             assert(!r);
 702         }
 703     }
 704 }
 705
 706 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
 707
 708 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
 709                                         Error **errp)
 710 {
 711     VirtIONet *n = VIRTIO_NET(vdev);
 712     NetClientState *nc = qemu_get_queue(n->nic);
 713
 714     /* Firstly sync all virtio-net possible supported features */
 715     features |= n->host_features;
 716
 717     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 718
 719     if (!peer_has_vnet_hdr(n)) {
 720         virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
 721         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 722         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 723         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
 724
 725         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
 726         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
 727         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
 728         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
 729
 730         virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
 731     }
 732
 733     if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
 734         virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
 735         virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
 736     }
 737
 738     if (!get_vhost_net(nc->peer)) {
 739         return features;
 740     }
 741
 742     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
 743         virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
 744     }
 745     features = vhost_net_get_features(get_vhost_net(nc->peer), features);
 746     vdev->backend_features = features;
 747
 748     if (n->mtu_bypass_backend &&
 749             (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
 750         features |= (1ULL << VIRTIO_NET_F_MTU);
 751     }
 752
 753     return features;
 754 }
 755
 756 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
 757 {
 758     uint64_t features = 0;
 759
 760     /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
 761      * but also these: */
 762     virtio_add_feature(&features, VIRTIO_NET_F_MAC);
 763     virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
 764     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
 765     virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
 766     virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
 767
 768     return features;
 769 }
 770
 771 static void virtio_net_apply_guest_offloads(VirtIONet *n)
 772 {
 773     qemu_set_offload(qemu_get_queue(n->nic)->peer,
 774             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
 775             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
 776             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
 777             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
 778             !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
 779 }
 780
 781 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
 782 {
 783     static const uint64_t guest_offloads_mask =
 784         (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
 785         (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
 786         (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
 787         (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 788         (1ULL << VIRTIO_NET_F_GUEST_UFO);
 789
 790     return guest_offloads_mask & features;
 791 }
 792
 793 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
 794 {
 795     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 796     return virtio_net_guest_offloads_by_features(vdev->guest_features);
 797 }
 798
 799 typedef struct {
 800     VirtIONet *n;
 801     DeviceState *dev;
 802 } FailoverDevice;
 803
 804 /**
 805  * Set the failover primary device
 806  *
 807  * @opaque: FailoverId to setup
 808  * @opts: opts for device we are handling
 809  * @errp: returns an error if this function fails
 810  */
 811 static int failover_set_primary(DeviceState *dev, void *opaque)
 812 {
 813     FailoverDevice *fdev = opaque;
 814     PCIDevice *pci_dev = (PCIDevice *)
 815         object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
 816
 817     if (!pci_dev) {
 818         return 0;
 819     }
 820
 821     if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
 822         fdev->dev = dev;
 823         return 1;
 824     }
 825
 826     return 0;
 827 }
 828
 829 /**
 830  * Find the primary device for this failover virtio-net
 831  *
 832  * @n: VirtIONet device
 833  * @errp: returns an error if this function fails
 834  */
 835 static DeviceState *failover_find_primary_device(VirtIONet *n)
 836 {
 837     FailoverDevice fdev = {
 838         .n = n,
 839     };
 840
 841     qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
 842                        NULL, NULL, &fdev);
 843     return fdev.dev;
 844 }
 845
 846 static void failover_add_primary(VirtIONet *n, Error **errp)
 847 {
 848     Error *err = NULL;
 849     DeviceState *dev = failover_find_primary_device(n);
 850
 851     if (dev) {
 852         return;
 853     }
 854
 855     if (!n->primary_opts) {
 856         error_setg(errp, "Primary device not found");
 857         error_append_hint(errp, "Virtio-net failover will not work. Make "
 858                           "sure primary device has parameter"
 859                           " failover_pair_id=%s\n", n->netclient_name);
 860         return;
 861     }
 862
 863     dev = qdev_device_add_from_qdict(n->primary_opts,
 864                                      n->primary_opts_from_json,
 865                                      &err);
 866     if (err) {
 867         qobject_unref(n->primary_opts);
 868         n->primary_opts = NULL;
 869     } else {
 870         object_unref(OBJECT(dev));
 871     }
 872     error_propagate(errp, err);
 873 }
 874
 875 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 876 {
 877     VirtIONet *n = VIRTIO_NET(vdev);
 878     Error *err = NULL;
 879     int i;
 880
 881     if (n->mtu_bypass_backend &&
 882             !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
 883         features &= ~(1ULL << VIRTIO_NET_F_MTU);
 884     }
 885
 886     virtio_net_set_multiqueue(n,
 887                               virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
 888                               virtio_has_feature(features, VIRTIO_NET_F_MQ));
 889
 890     virtio_net_set_mrg_rx_bufs(n,
 891                                virtio_has_feature(features,
 892                                                   VIRTIO_NET_F_MRG_RXBUF),
 893                                virtio_has_feature(features,
 894                                                   VIRTIO_F_VERSION_1),
 895                                virtio_has_feature(features,
 896                                                   VIRTIO_NET_F_HASH_REPORT));
 897
 898     n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 899         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
 900     n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
 901         virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
 902     n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
 903
 904     if (n->has_vnet_hdr) {
 905         n->curr_guest_offloads =
 906             virtio_net_guest_offloads_by_features(features);
 907         virtio_net_apply_guest_offloads(n);
 908     }
 909
 910     for (i = 0;  i < n->max_queue_pairs; i++) {
 911         NetClientState *nc = qemu_get_subqueue(n->nic, i);
 912
 913         if (!get_vhost_net(nc->peer)) {
 914             continue;
 915         }
 916         vhost_net_ack_features(get_vhost_net(nc->peer), features);
 917     }
 918
 919     if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
 920         memset(n->vlans, 0, MAX_VLAN >> 3);
 921     } else {
 922         memset(n->vlans, 0xff, MAX_VLAN >> 3);
 923     }
 924
 925     if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
 926         qapi_event_send_failover_negotiated(n->netclient_name);
 927         qatomic_set(&n->failover_primary_hidden, false);
 928         failover_add_primary(n, &err);
 929         if (err) {
 930             if (!qtest_enabled()) {
 931                 warn_report_err(err);
 932             } else {
 933                 error_free(err);
 934             }
 935         }
 936     }
 937 }
 938
 939 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
 940                                      struct iovec *iov, unsigned int iov_cnt)
 941 {
 942     uint8_t on;
 943     size_t s;
 944     NetClientState *nc = qemu_get_queue(n->nic);
 945
 946     s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
 947     if (s != sizeof(on)) {
 948         return VIRTIO_NET_ERR;
 949     }
 950
 951     if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
 952         n->promisc = on;
 953     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
 954         n->allmulti = on;
 955     } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
 956         n->alluni = on;
 957     } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
 958         n->nomulti = on;
 959     } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
 960         n->nouni = on;
 961     } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
 962         n->nobcast = on;
 963     } else {
 964         return VIRTIO_NET_ERR;
 965     }
 966
 967     rxfilter_notify(nc);
 968
 969     return VIRTIO_NET_OK;
 970 }
 971
 972 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
 973                                      struct iovec *iov, unsigned int iov_cnt)
 974 {
 975     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 976     uint64_t offloads;
 977     size_t s;
 978
 979     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
 980         return VIRTIO_NET_ERR;
 981     }
 982
 983     s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
 984     if (s != sizeof(offloads)) {
 985         return VIRTIO_NET_ERR;
 986     }
 987
 988     if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
 989         uint64_t supported_offloads;
 990
 991         offloads = virtio_ldq_p(vdev, &offloads);
 992
 993         if (!n->has_vnet_hdr) {
 994             return VIRTIO_NET_ERR;
 995         }
 996
 997         n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
 998             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
 999         n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1000             virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1001         virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1002
1003         supported_offloads = virtio_net_supported_guest_offloads(n);
1004         if (offloads & ~supported_offloads) {
1005             return VIRTIO_NET_ERR;
1006         }
1007
1008         n->curr_guest_offloads = offloads;
1009         virtio_net_apply_guest_offloads(n);
1010
1011         return VIRTIO_NET_OK;
1012     } else {
1013         return VIRTIO_NET_ERR;
1014     }
1015 }
1016
1017 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1018                                  struct iovec *iov, unsigned int iov_cnt)
1019 {
1020     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1021     struct virtio_net_ctrl_mac mac_data;
1022     size_t s;
1023     NetClientState *nc = qemu_get_queue(n->nic);
1024
1025     if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1026         if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1027             return VIRTIO_NET_ERR;
1028         }
1029         s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1030         assert(s == sizeof(n->mac));
1031         qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1032         rxfilter_notify(nc);
1033
1034         return VIRTIO_NET_OK;
1035     }
1036
1037     if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1038         return VIRTIO_NET_ERR;
1039     }
1040
1041     int in_use = 0;
1042     int first_multi = 0;
1043     uint8_t uni_overflow = 0;
1044     uint8_t multi_overflow = 0;
1045     uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1046
1047     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1048                    sizeof(mac_data.entries));
1049     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1050     if (s != sizeof(mac_data.entries)) {
1051         goto error;
1052     }
1053     iov_discard_front(&iov, &iov_cnt, s);
1054
1055     if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1056         goto error;
1057     }
1058
1059     if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1060         s = iov_to_buf(iov, iov_cnt, 0, macs,
1061                        mac_data.entries * ETH_ALEN);
1062         if (s != mac_data.entries * ETH_ALEN) {
1063             goto error;
1064         }
1065         in_use += mac_data.entries;
1066     } else {
1067         uni_overflow = 1;
1068     }
1069
1070     iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1071
1072     first_multi = in_use;
1073
1074     s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1075                    sizeof(mac_data.entries));
1076     mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1077     if (s != sizeof(mac_data.entries)) {
1078         goto error;
1079     }
1080
1081     iov_discard_front(&iov, &iov_cnt, s);
1082
1083     if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1084         goto error;
1085     }
1086
1087     if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1088         s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1089                        mac_data.entries * ETH_ALEN);
1090         if (s != mac_data.entries * ETH_ALEN) {
1091             goto error;
1092         }
1093         in_use += mac_data.entries;
1094     } else {
1095         multi_overflow = 1;
1096     }
1097
1098     n->mac_table.in_use = in_use;
1099     n->mac_table.first_multi = first_multi;
1100     n->mac_table.uni_overflow = uni_overflow;
1101     n->mac_table.multi_overflow = multi_overflow;
1102     memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1103     g_free(macs);
1104     rxfilter_notify(nc);
1105
1106     return VIRTIO_NET_OK;
1107
1108 error:
1109     g_free(macs);
1110     return VIRTIO_NET_ERR;
1111 }
1112
1113 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1114                                         struct iovec *iov, unsigned int iov_cnt)
1115 {
1116     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1117     uint16_t vid;
1118     size_t s;
1119     NetClientState *nc = qemu_get_queue(n->nic);
1120
1121     s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1122     vid = virtio_lduw_p(vdev, &vid);
1123     if (s != sizeof(vid)) {
1124         return VIRTIO_NET_ERR;
1125     }
1126
1127     if (vid >= MAX_VLAN)
1128         return VIRTIO_NET_ERR;
1129
1130     if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1131         n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1132     else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1133         n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1134     else
1135         return VIRTIO_NET_ERR;
1136
1137     rxfilter_notify(nc);
1138
1139     return VIRTIO_NET_OK;
1140 }
1141
1142 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1143                                       struct iovec *iov, unsigned int iov_cnt)
1144 {
1145     trace_virtio_net_handle_announce(n->announce_timer.round);
1146     if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1147         n->status & VIRTIO_NET_S_ANNOUNCE) {
1148         n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1149         if (n->announce_timer.round) {
1150             qemu_announce_timer_step(&n->announce_timer);
1151         }
1152         return VIRTIO_NET_OK;
1153     } else {
1154         return VIRTIO_NET_ERR;
1155     }
1156 }
1157
1158 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1159
1160 static void virtio_net_disable_rss(VirtIONet *n)
1161 {
1162     if (n->rss_data.enabled) {
1163         trace_virtio_net_rss_disable();
1164     }
1165     n->rss_data.enabled = false;
1166
1167     virtio_net_detach_epbf_rss(n);
1168 }
1169
1170 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1171 {
1172     NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1173     if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1174         return false;
1175     }
1176
1177     return nc->info->set_steering_ebpf(nc, prog_fd);
1178 }
1179
1180 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1181                                    struct EBPFRSSConfig *config)
1182 {
1183     config->redirect = data->redirect;
1184     config->populate_hash = data->populate_hash;
1185     config->hash_types = data->hash_types;
1186     config->indirections_len = data->indirections_len;
1187     config->default_queue = data->default_queue;
1188 }
1189
1190 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1191 {
1192     struct EBPFRSSConfig config = {};
1193
1194     if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1195         return false;
1196     }
1197
1198     rss_data_to_rss_config(&n->rss_data, &config);
1199
1200     if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1201                           n->rss_data.indirections_table, n->rss_data.key)) {
1202         return false;
1203     }
1204
1205     if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1206         return false;
1207     }
1208
1209     return true;
1210 }
1211
1212 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1213 {
1214     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1215 }
1216
1217 static bool virtio_net_load_ebpf(VirtIONet *n)
1218 {
1219     if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1220         /* backend does't support steering ebpf */
1221         return false;
1222     }
1223
1224     return ebpf_rss_load(&n->ebpf_rss);
1225 }
1226
1227 static void virtio_net_unload_ebpf(VirtIONet *n)
1228 {
1229     virtio_net_attach_ebpf_to_backend(n->nic, -1);
1230     ebpf_rss_unload(&n->ebpf_rss);
1231 }
1232
1233 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1234                                       struct iovec *iov,
1235                                       unsigned int iov_cnt,
1236                                       bool do_rss)
1237 {
1238     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1239     struct virtio_net_rss_config cfg;
1240     size_t s, offset = 0, size_get;
1241     uint16_t queue_pairs, i;
1242     struct {
1243         uint16_t us;
1244         uint8_t b;
1245     } QEMU_PACKED temp;
1246     const char *err_msg = "";
1247     uint32_t err_value = 0;
1248
1249     if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1250         err_msg = "RSS is not negotiated";
1251         goto error;
1252     }
1253     if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1254         err_msg = "Hash report is not negotiated";
1255         goto error;
1256     }
1257     size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1258     s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1259     if (s != size_get) {
1260         err_msg = "Short command buffer";
1261         err_value = (uint32_t)s;
1262         goto error;
1263     }
1264     n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1265     n->rss_data.indirections_len =
1266         virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1267     n->rss_data.indirections_len++;
1268     if (!do_rss) {
1269         n->rss_data.indirections_len = 1;
1270     }
1271     if (!is_power_of_2(n->rss_data.indirections_len)) {
1272         err_msg = "Invalid size of indirection table";
1273         err_value = n->rss_data.indirections_len;
1274         goto error;
1275     }
1276     if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1277         err_msg = "Too large indirection table";
1278         err_value = n->rss_data.indirections_len;
1279         goto error;
1280     }
1281     n->rss_data.default_queue = do_rss ?
1282         virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1283     if (n->rss_data.default_queue >= n->max_queue_pairs) {
1284         err_msg = "Invalid default queue";
1285         err_value = n->rss_data.default_queue;
1286         goto error;
1287     }
1288     offset += size_get;
1289     size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1290     g_free(n->rss_data.indirections_table);
1291     n->rss_data.indirections_table = g_malloc(size_get);
1292     if (!n->rss_data.indirections_table) {
1293         err_msg = "Can't allocate indirections table";
1294         err_value = n->rss_data.indirections_len;
1295         goto error;
1296     }
1297     s = iov_to_buf(iov, iov_cnt, offset,
1298                    n->rss_data.indirections_table, size_get);
1299     if (s != size_get) {
1300         err_msg = "Short indirection table buffer";
1301         err_value = (uint32_t)s;
1302         goto error;
1303     }
1304     for (i = 0; i < n->rss_data.indirections_len; ++i) {
1305         uint16_t val = n->rss_data.indirections_table[i];
1306         n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1307     }
1308     offset += size_get;
1309     size_get = sizeof(temp);
1310     s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1311     if (s != size_get) {
1312         err_msg = "Can't get queue_pairs";
1313         err_value = (uint32_t)s;
1314         goto error;
1315     }
1316     queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1317     if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1318         err_msg = "Invalid number of queue_pairs";
1319         err_value = queue_pairs;
1320         goto error;
1321     }
1322     if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1323         err_msg = "Invalid key size";
1324         err_value = temp.b;
1325         goto error;
1326     }
1327     if (!temp.b && n->rss_data.hash_types) {
1328         err_msg = "No key provided";
1329         err_value = 0;
1330         goto error;
1331     }
1332     if (!temp.b && !n->rss_data.hash_types) {
1333         virtio_net_disable_rss(n);
1334         return queue_pairs;
1335     }
1336     offset += size_get;
1337     size_get = temp.b;
1338     s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1339     if (s != size_get) {
1340         err_msg = "Can get key buffer";
1341         err_value = (uint32_t)s;
1342         goto error;
1343     }
1344     n->rss_data.enabled = true;
1345
1346     if (!n->rss_data.populate_hash) {
1347         if (!virtio_net_attach_epbf_rss(n)) {
1348             /* EBPF must be loaded for vhost */
1349             if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1350                 warn_report("Can't load eBPF RSS for vhost");
1351                 goto error;
1352             }
1353             /* fallback to software RSS */
1354             warn_report("Can't load eBPF RSS - fallback to software RSS");
1355             n->rss_data.enabled_software_rss = true;
1356         }
1357     } else {
1358         /* use software RSS for hash populating */
1359         /* and detach eBPF if was loaded before */
1360         virtio_net_detach_epbf_rss(n);
1361         n->rss_data.enabled_software_rss = true;
1362     }
1363
1364     trace_virtio_net_rss_enable(n->rss_data.hash_types,
1365                                 n->rss_data.indirections_len,
1366                                 temp.b);
1367     return queue_pairs;
1368 error:
1369     trace_virtio_net_rss_error(err_msg, err_value);
1370     virtio_net_disable_rss(n);
1371     return 0;
1372 }
1373
1374 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1375                                 struct iovec *iov, unsigned int iov_cnt)
1376 {
1377     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1378     uint16_t queue_pairs;
1379
1380     virtio_net_disable_rss(n);
1381     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1382         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1383         return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1384     }
1385     if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1386         queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1387     } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1388         struct virtio_net_ctrl_mq mq;
1389         size_t s;
1390         if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1391             return VIRTIO_NET_ERR;
1392         }
1393         s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1394         if (s != sizeof(mq)) {
1395             return VIRTIO_NET_ERR;
1396         }
1397         queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1398
1399     } else {
1400         return VIRTIO_NET_ERR;
1401     }
1402
1403     if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1404         queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1405         queue_pairs > n->max_queue_pairs ||
1406         !n->multiqueue) {
1407         return VIRTIO_NET_ERR;
1408     }
1409
1410     n->curr_queue_pairs = queue_pairs;
1411     /* stop the backend before changing the number of queue_pairs to avoid handling a
1412      * disabled queue */
1413     virtio_net_set_status(vdev, vdev->status);
1414     virtio_net_set_queue_pairs(n);
1415
1416     return VIRTIO_NET_OK;
1417 }
1418
1419 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1420 {
1421     VirtIONet *n = VIRTIO_NET(vdev);
1422     struct virtio_net_ctrl_hdr ctrl;
1423     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1424     VirtQueueElement *elem;
1425     size_t s;
1426     struct iovec *iov, *iov2;
1427     unsigned int iov_cnt;
1428
1429     for (;;) {
1430         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1431         if (!elem) {
1432             break;
1433         }
1434         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1435             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1436             virtio_error(vdev, "virtio-net ctrl missing headers");
1437             virtqueue_detach_element(vq, elem, 0);
1438             g_free(elem);
1439             break;
1440         }
1441
1442         iov_cnt = elem->out_num;
1443         iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1444         s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1445         iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1446         if (s != sizeof(ctrl)) {
1447             status = VIRTIO_NET_ERR;
1448         } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1449             status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1450         } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1451             status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1452         } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1453             status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1454         } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1455             status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1456         } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1457             status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1458         } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1459             status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1460         }
1461
1462         s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1463         assert(s == sizeof(status));
1464
1465         virtqueue_push(vq, elem, sizeof(status));
1466         virtio_notify(vdev, vq);
1467         g_free(iov2);
1468         g_free(elem);
1469     }
1470 }
1471
1472 /* RX */
1473
1474 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1475 {
1476     VirtIONet *n = VIRTIO_NET(vdev);
1477     int queue_index = vq2q(virtio_get_queue_index(vq));
1478
1479     qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1480 }
1481
1482 static bool virtio_net_can_receive(NetClientState *nc)
1483 {
1484     VirtIONet *n = qemu_get_nic_opaque(nc);
1485     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1486     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1487
1488     if (!vdev->vm_running) {
1489         return false;
1490     }
1491
1492     if (nc->queue_index >= n->curr_queue_pairs) {
1493         return false;
1494     }
1495
1496     if (!virtio_queue_ready(q->rx_vq) ||
1497         !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1498         return false;
1499     }
1500
1501     return true;
1502 }
1503
1504 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1505 {
1506     VirtIONet *n = q->n;
1507     if (virtio_queue_empty(q->rx_vq) ||
1508         (n->mergeable_rx_bufs &&
1509          !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1510         virtio_queue_set_notification(q->rx_vq, 1);
1511
1512         /* To avoid a race condition where the guest has made some buffers
1513          * available after the above check but before notification was
1514          * enabled, check for available buffers again.
1515          */
1516         if (virtio_queue_empty(q->rx_vq) ||
1517             (n->mergeable_rx_bufs &&
1518              !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1519             return 0;
1520         }
1521     }
1522
1523     virtio_queue_set_notification(q->rx_vq, 0);
1524     return 1;
1525 }
1526
1527 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1528 {
1529     virtio_tswap16s(vdev, &hdr->hdr_len);
1530     virtio_tswap16s(vdev, &hdr->gso_size);
1531     virtio_tswap16s(vdev, &hdr->csum_start);
1532     virtio_tswap16s(vdev, &hdr->csum_offset);
1533 }
1534
1535 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1536  * it never finds out that the packets don't have valid checksums.  This
1537  * causes dhclient to get upset.  Fedora's carried a patch for ages to
1538  * fix this with Xen but it hasn't appeared in an upstream release of
1539  * dhclient yet.
1540  *
1541  * To avoid breaking existing guests, we catch udp packets and add
1542  * checksums.  This is terrible but it's better than hacking the guest
1543  * kernels.
1544  *
1545  * N.B. if we introduce a zero-copy API, this operation is no longer free so
1546  * we should provide a mechanism to disable it to avoid polluting the host
1547  * cache.
1548  */
1549 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1550                                         uint8_t *buf, size_t size)
1551 {
1552     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1553         (size > 27 && size < 1500) && /* normal sized MTU */
1554         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1555         (buf[23] == 17) && /* ip.protocol == UDP */
1556         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1557         net_checksum_calculate(buf, size, CSUM_UDP);
1558         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1559     }
1560 }
1561
1562 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1563                            const void *buf, size_t size)
1564 {
1565     if (n->has_vnet_hdr) {
1566         /* FIXME this cast is evil */
1567         void *wbuf = (void *)buf;
1568         work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1569                                     size - n->host_hdr_len);
1570
1571         if (n->needs_vnet_hdr_swap) {
1572             virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1573         }
1574         iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1575     } else {
1576         struct virtio_net_hdr hdr = {
1577             .flags = 0,
1578             .gso_type = VIRTIO_NET_HDR_GSO_NONE
1579         };
1580         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1581     }
1582 }
1583
1584 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1585 {
1586     static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1587     static const uint8_t vlan[] = {0x81, 0x00};
1588     uint8_t *ptr = (uint8_t *)buf;
1589     int i;
1590
1591     if (n->promisc)
1592         return 1;
1593
1594     ptr += n->host_hdr_len;
1595
1596     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1597         int vid = lduw_be_p(ptr + 14) & 0xfff;
1598         if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1599             return 0;
1600     }
1601
1602     if (ptr[0] & 1) { // multicast
1603         if (!memcmp(ptr, bcast, sizeof(bcast))) {
1604             return !n->nobcast;
1605         } else if (n->nomulti) {
1606             return 0;
1607         } else if (n->allmulti || n->mac_table.multi_overflow) {
1608             return 1;
1609         }
1610
1611         for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1612             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1613                 return 1;
1614             }
1615         }
1616     } else { // unicast
1617         if (n->nouni) {
1618             return 0;
1619         } else if (n->alluni || n->mac_table.uni_overflow) {
1620             return 1;
1621         } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1622             return 1;
1623         }
1624
1625         for (i = 0; i < n->mac_table.first_multi; i++) {
1626             if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1627                 return 1;
1628             }
1629         }
1630     }
1631
1632     return 0;
1633 }
1634
1635 static uint8_t virtio_net_get_hash_type(bool isip4,
1636                                         bool isip6,
1637                                         bool isudp,
1638                                         bool istcp,
1639                                         uint32_t types)
1640 {
1641     if (isip4) {
1642         if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1643             return NetPktRssIpV4Tcp;
1644         }
1645         if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1646             return NetPktRssIpV4Udp;
1647         }
1648         if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1649             return NetPktRssIpV4;
1650         }
1651     } else if (isip6) {
1652         uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1653                         VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1654
1655         if (istcp && (types & mask)) {
1656             return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1657                 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1658         }
1659         mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1660         if (isudp && (types & mask)) {
1661             return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1662                 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1663         }
1664         mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1665         if (types & mask) {
1666             return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1667                 NetPktRssIpV6Ex : NetPktRssIpV6;
1668         }
1669     }
1670     return 0xff;
1671 }
1672
1673 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1674                                    uint32_t hash)
1675 {
1676     struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1677     hdr->hash_value = hash;
1678     hdr->hash_report = report;
1679 }
1680
1681 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1682                                   size_t size)
1683 {
1684     VirtIONet *n = qemu_get_nic_opaque(nc);
1685     unsigned int index = nc->queue_index, new_index = index;
1686     struct NetRxPkt *pkt = n->rx_pkt;
1687     uint8_t net_hash_type;
1688     uint32_t hash;
1689     bool isip4, isip6, isudp, istcp;
1690     static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1691         VIRTIO_NET_HASH_REPORT_IPv4,
1692         VIRTIO_NET_HASH_REPORT_TCPv4,
1693         VIRTIO_NET_HASH_REPORT_TCPv6,
1694         VIRTIO_NET_HASH_REPORT_IPv6,
1695         VIRTIO_NET_HASH_REPORT_IPv6_EX,
1696         VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1697         VIRTIO_NET_HASH_REPORT_UDPv4,
1698         VIRTIO_NET_HASH_REPORT_UDPv6,
1699         VIRTIO_NET_HASH_REPORT_UDPv6_EX
1700     };
1701
1702     net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1703                              size - n->host_hdr_len);
1704     net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1705     if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1706         istcp = isudp = false;
1707     }
1708     if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1709         istcp = isudp = false;
1710     }
1711     net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1712                                              n->rss_data.hash_types);
1713     if (net_hash_type > NetPktRssIpV6UdpEx) {
1714         if (n->rss_data.populate_hash) {
1715             virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1716         }
1717         return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1718     }
1719
1720     hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1721
1722     if (n->rss_data.populate_hash) {
1723         virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1724     }
1725
1726     if (n->rss_data.redirect) {
1727         new_index = hash & (n->rss_data.indirections_len - 1);
1728         new_index = n->rss_data.indirections_table[new_index];
1729     }
1730
1731     return (index == new_index) ? -1 : new_index;
1732 }
1733
1734 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1735                                       size_t size, bool no_rss)
1736 {
1737     VirtIONet *n = qemu_get_nic_opaque(nc);
1738     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1739     VirtIODevice *vdev = VIRTIO_DEVICE(n);
1740     VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1741     size_t lens[VIRTQUEUE_MAX_SIZE];
1742     struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1743     struct virtio_net_hdr_mrg_rxbuf mhdr;
1744     unsigned mhdr_cnt = 0;
1745     size_t offset, i, guest_offset, j;
1746     ssize_t err;
1747
1748     if (!virtio_net_can_receive(nc)) {
1749         return -1;
1750     }
1751
1752     if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1753         int index = virtio_net_process_rss(nc, buf, size);
1754         if (index >= 0) {
1755             NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1756             return virtio_net_receive_rcu(nc2, buf, size, true);
1757         }
1758     }
1759
1760     /* hdr_len refers to the header we supply to the guest */
1761     if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1762         return 0;
1763     }
1764
1765     if (!receive_filter(n, buf, size))
1766         return size;
1767
1768     offset = i = 0;
1769
1770     while (offset < size) {
1771         VirtQueueElement *elem;
1772         int len, total;
1773         const struct iovec *sg;
1774
1775         total = 0;
1776
1777         if (i == VIRTQUEUE_MAX_SIZE) {
1778             virtio_error(vdev, "virtio-net unexpected long buffer chain");
1779             err = size;
1780             goto err;
1781         }
1782
1783         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1784         if (!elem) {
1785             if (i) {
1786                 virtio_error(vdev, "virtio-net unexpected empty queue: "
1787                              "i %zd mergeable %d offset %zd, size %zd, "
1788                              "guest hdr len %zd, host hdr len %zd "
1789                              "guest features 0x%" PRIx64,
1790                              i, n->mergeable_rx_bufs, offset, size,
1791                              n->guest_hdr_len, n->host_hdr_len,
1792                              vdev->guest_features);
1793             }
1794             err = -1;
1795             goto err;
1796         }
1797
1798         if (elem->in_num < 1) {
1799             virtio_error(vdev,
1800                          "virtio-net receive queue contains no in buffers");
1801             virtqueue_detach_element(q->rx_vq, elem, 0);
1802             g_free(elem);
1803             err = -1;
1804             goto err;
1805         }
1806
1807         sg = elem->in_sg;
1808         if (i == 0) {
1809             assert(offset == 0);
1810             if (n->mergeable_rx_bufs) {
1811                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1812                                     sg, elem->in_num,
1813                                     offsetof(typeof(mhdr), num_buffers),
1814                                     sizeof(mhdr.num_buffers));
1815             }
1816
1817             receive_header(n, sg, elem->in_num, buf, size);
1818             if (n->rss_data.populate_hash) {
1819                 offset = sizeof(mhdr);
1820                 iov_from_buf(sg, elem->in_num, offset,
1821                              buf + offset, n->host_hdr_len - sizeof(mhdr));
1822             }
1823             offset = n->host_hdr_len;
1824             total += n->guest_hdr_len;
1825             guest_offset = n->guest_hdr_len;
1826         } else {
1827             guest_offset = 0;
1828         }
1829
1830         /* copy in packet.  ugh */
1831         len = iov_from_buf(sg, elem->in_num, guest_offset,
1832                            buf + offset, size - offset);
1833         total += len;
1834         offset += len;
1835         /* If buffers can't be merged, at this point we
1836          * must have consumed the complete packet.
1837          * Otherwise, drop it. */
1838         if (!n->mergeable_rx_bufs && offset < size) {
1839             virtqueue_unpop(q->rx_vq, elem, total);
1840             g_free(elem);
1841             err = size;
1842             goto err;
1843         }
1844
1845         elems[i] = elem;
1846         lens[i] = total;
1847         i++;
1848     }
1849
1850     if (mhdr_cnt) {
1851         virtio_stw_p(vdev, &mhdr.num_buffers, i);
1852         iov_from_buf(mhdr_sg, mhdr_cnt,
1853                      0,
1854                      &mhdr.num_buffers, sizeof mhdr.num_buffers);
1855     }
1856
1857     for (j = 0; j < i; j++) {
1858         /* signal other side */
1859         virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
1860         g_free(elems[j]);
1861     }
1862
1863     virtqueue_flush(q->rx_vq, i);
1864     virtio_notify(vdev, q->rx_vq);
1865
1866     return size;
1867
1868 err:
1869     for (j = 0; j < i; j++) {
1870         g_free(elems[j]);
1871     }
1872
1873     return err;
1874 }
1875
1876 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1877                                   size_t size)
1878 {
1879     RCU_READ_LOCK_GUARD();
1880
1881     return virtio_net_receive_rcu(nc, buf, size, false);
1882 }
1883
1884 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1885                                          const uint8_t *buf,
1886                                          VirtioNetRscUnit *unit)
1887 {
1888     uint16_t ip_hdrlen;
1889     struct ip_header *ip;
1890
1891     ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1892                               + sizeof(struct eth_header));
1893     unit->ip = (void *)ip;
1894     ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1895     unit->ip_plen = &ip->ip_len;
1896     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1897     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1898     unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1899 }
1900
1901 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1902                                          const uint8_t *buf,
1903                                          VirtioNetRscUnit *unit)
1904 {
1905     struct ip6_header *ip6;
1906
1907     ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1908                                  + sizeof(struct eth_header));
1909     unit->ip = ip6;
1910     unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1911     unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1912                                         + sizeof(struct ip6_header));
1913     unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1914
1915     /* There is a difference between payload lenght in ipv4 and v6,
1916        ip header is excluded in ipv6 */
1917     unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1918 }
1919
1920 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1921                                        VirtioNetRscSeg *seg)
1922 {
1923     int ret;
1924     struct virtio_net_hdr_v1 *h;
1925
1926     h = (struct virtio_net_hdr_v1 *)seg->buf;
1927     h->flags = 0;
1928     h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1929
1930     if (seg->is_coalesced) {
1931         h->rsc.segments = seg->packets;
1932         h->rsc.dup_acks = seg->dup_ack;
1933         h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1934         if (chain->proto == ETH_P_IP) {
1935             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1936         } else {
1937             h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1938         }
1939     }
1940
1941     ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1942     QTAILQ_REMOVE(&chain->buffers, seg, next);
1943     g_free(seg->buf);
1944     g_free(seg);
1945
1946     return ret;
1947 }
1948
1949 static void virtio_net_rsc_purge(void *opq)
1950 {
1951     VirtioNetRscSeg *seg, *rn;
1952     VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1953
1954     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1955         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1956             chain->stat.purge_failed++;
1957             continue;
1958         }
1959     }
1960
1961     chain->stat.timer++;
1962     if (!QTAILQ_EMPTY(&chain->buffers)) {
1963         timer_mod(chain->drain_timer,
1964               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1965     }
1966 }
1967
1968 static void virtio_net_rsc_cleanup(VirtIONet *n)
1969 {
1970     VirtioNetRscChain *chain, *rn_chain;
1971     VirtioNetRscSeg *seg, *rn_seg;
1972
1973     QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1974         QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1975             QTAILQ_REMOVE(&chain->buffers, seg, next);
1976             g_free(seg->buf);
1977             g_free(seg);
1978         }
1979
1980         timer_free(chain->drain_timer);
1981         QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1982         g_free(chain);
1983     }
1984 }
1985
1986 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1987                                      NetClientState *nc,
1988                                      const uint8_t *buf, size_t size)
1989 {
1990     uint16_t hdr_len;
1991     VirtioNetRscSeg *seg;
1992
1993     hdr_len = chain->n->guest_hdr_len;
1994     seg = g_malloc(sizeof(VirtioNetRscSeg));
1995     seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1996         + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1997     memcpy(seg->buf, buf, size);
1998     seg->size = size;
1999     seg->packets = 1;
2000     seg->dup_ack = 0;
2001     seg->is_coalesced = 0;
2002     seg->nc = nc;
2003
2004     QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2005     chain->stat.cache++;
2006
2007     switch (chain->proto) {
2008     case ETH_P_IP:
2009         virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2010         break;
2011     case ETH_P_IPV6:
2012         virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2013         break;
2014     default:
2015         g_assert_not_reached();
2016     }
2017 }
2018
2019 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2020                                          VirtioNetRscSeg *seg,
2021                                          const uint8_t *buf,
2022                                          struct tcp_header *n_tcp,
2023                                          struct tcp_header *o_tcp)
2024 {
2025     uint32_t nack, oack;
2026     uint16_t nwin, owin;
2027
2028     nack = htonl(n_tcp->th_ack);
2029     nwin = htons(n_tcp->th_win);
2030     oack = htonl(o_tcp->th_ack);
2031     owin = htons(o_tcp->th_win);
2032
2033     if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2034         chain->stat.ack_out_of_win++;
2035         return RSC_FINAL;
2036     } else if (nack == oack) {
2037         /* duplicated ack or window probe */
2038         if (nwin == owin) {
2039             /* duplicated ack, add dup ack count due to whql test up to 1 */
2040             chain->stat.dup_ack++;
2041             return RSC_FINAL;
2042         } else {
2043             /* Coalesce window update */
2044             o_tcp->th_win = n_tcp->th_win;
2045             chain->stat.win_update++;
2046             return RSC_COALESCE;
2047         }
2048     } else {
2049         /* pure ack, go to 'C', finalize*/
2050         chain->stat.pure_ack++;
2051         return RSC_FINAL;
2052     }
2053 }
2054
2055 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2056                                             VirtioNetRscSeg *seg,
2057                                             const uint8_t *buf,
2058                                             VirtioNetRscUnit *n_unit)
2059 {
2060     void *data;
2061     uint16_t o_ip_len;
2062     uint32_t nseq, oseq;
2063     VirtioNetRscUnit *o_unit;
2064
2065     o_unit = &seg->unit;
2066     o_ip_len = htons(*o_unit->ip_plen);
2067     nseq = htonl(n_unit->tcp->th_seq);
2068     oseq = htonl(o_unit->tcp->th_seq);
2069
2070     /* out of order or retransmitted. */
2071     if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2072         chain->stat.data_out_of_win++;
2073         return RSC_FINAL;
2074     }
2075
2076     data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2077     if (nseq == oseq) {
2078         if ((o_unit->payload == 0) && n_unit->payload) {
2079             /* From no payload to payload, normal case, not a dup ack or etc */
2080             chain->stat.data_after_pure_ack++;
2081             goto coalesce;
2082         } else {
2083             return virtio_net_rsc_handle_ack(chain, seg, buf,
2084                                              n_unit->tcp, o_unit->tcp);
2085         }
2086     } else if ((nseq - oseq) != o_unit->payload) {
2087         /* Not a consistent packet, out of order */
2088         chain->stat.data_out_of_order++;
2089         return RSC_FINAL;
2090     } else {
2091 coalesce:
2092         if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2093             chain->stat.over_size++;
2094             return RSC_FINAL;
2095         }
2096
2097         /* Here comes the right data, the payload length in v4/v6 is different,
2098            so use the field value to update and record the new data len */
2099         o_unit->payload += n_unit->payload; /* update new data len */
2100
2101         /* update field in ip header */
2102         *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2103
2104         /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2105            for windows guest, while this may change the behavior for linux
2106            guest (only if it uses RSC feature). */
2107         o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2108
2109         o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2110         o_unit->tcp->th_win = n_unit->tcp->th_win;
2111
2112         memmove(seg->buf + seg->size, data, n_unit->payload);
2113         seg->size += n_unit->payload;
2114         seg->packets++;
2115         chain->stat.coalesced++;
2116         return RSC_COALESCE;
2117     }
2118 }
2119
2120 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2121                                         VirtioNetRscSeg *seg,
2122                                         const uint8_t *buf, size_t size,
2123                                         VirtioNetRscUnit *unit)
2124 {
2125     struct ip_header *ip1, *ip2;
2126
2127     ip1 = (struct ip_header *)(unit->ip);
2128     ip2 = (struct ip_header *)(seg->unit.ip);
2129     if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2130         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2131         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2132         chain->stat.no_match++;
2133         return RSC_NO_MATCH;
2134     }
2135
2136     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2137 }
2138
2139 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2140                                         VirtioNetRscSeg *seg,
2141                                         const uint8_t *buf, size_t size,
2142                                         VirtioNetRscUnit *unit)
2143 {
2144     struct ip6_header *ip1, *ip2;
2145
2146     ip1 = (struct ip6_header *)(unit->ip);
2147     ip2 = (struct ip6_header *)(seg->unit.ip);
2148     if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2149         || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2150         || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2151         || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2152             chain->stat.no_match++;
2153             return RSC_NO_MATCH;
2154     }
2155
2156     return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2157 }
2158
2159 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2160  * to prevent out of order */
2161 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2162                                          struct tcp_header *tcp)
2163 {
2164     uint16_t tcp_hdr;
2165     uint16_t tcp_flag;
2166
2167     tcp_flag = htons(tcp->th_offset_flags);
2168     tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2169     tcp_flag &= VIRTIO_NET_TCP_FLAG;
2170     if (tcp_flag & TH_SYN) {
2171         chain->stat.tcp_syn++;
2172         return RSC_BYPASS;
2173     }
2174
2175     if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2176         chain->stat.tcp_ctrl_drain++;
2177         return RSC_FINAL;
2178     }
2179
2180     if (tcp_hdr > sizeof(struct tcp_header)) {
2181         chain->stat.tcp_all_opt++;
2182         return RSC_FINAL;
2183     }
2184
2185     return RSC_CANDIDATE;
2186 }
2187
2188 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2189                                          NetClientState *nc,
2190                                          const uint8_t *buf, size_t size,
2191                                          VirtioNetRscUnit *unit)
2192 {
2193     int ret;
2194     VirtioNetRscSeg *seg, *nseg;
2195
2196     if (QTAILQ_EMPTY(&chain->buffers)) {
2197         chain->stat.empty_cache++;
2198         virtio_net_rsc_cache_buf(chain, nc, buf, size);
2199         timer_mod(chain->drain_timer,
2200               qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2201         return size;
2202     }
2203
2204     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2205         if (chain->proto == ETH_P_IP) {
2206             ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2207         } else {
2208             ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2209         }
2210
2211         if (ret == RSC_FINAL) {
2212             if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2213                 /* Send failed */
2214                 chain->stat.final_failed++;
2215                 return 0;
2216             }
2217
2218             /* Send current packet */
2219             return virtio_net_do_receive(nc, buf, size);
2220         } else if (ret == RSC_NO_MATCH) {
2221             continue;
2222         } else {
2223             /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2224             seg->is_coalesced = 1;
2225             return size;
2226         }
2227     }
2228
2229     chain->stat.no_match_cache++;
2230     virtio_net_rsc_cache_buf(chain, nc, buf, size);
2231     return size;
2232 }
2233
2234 /* Drain a connection data, this is to avoid out of order segments */
2235 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2236                                         NetClientState *nc,
2237                                         const uint8_t *buf, size_t size,
2238                                         uint16_t ip_start, uint16_t ip_size,
2239                                         uint16_t tcp_port)
2240 {
2241     VirtioNetRscSeg *seg, *nseg;
2242     uint32_t ppair1, ppair2;
2243
2244     ppair1 = *(uint32_t *)(buf + tcp_port);
2245     QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2246         ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2247         if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2248             || (ppair1 != ppair2)) {
2249             continue;
2250         }
2251         if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2252             chain->stat.drain_failed++;
2253         }
2254
2255         break;
2256     }
2257
2258     return virtio_net_do_receive(nc, buf, size);
2259 }
2260
2261 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2262                                             struct ip_header *ip,
2263                                             const uint8_t *buf, size_t size)
2264 {
2265     uint16_t ip_len;
2266
2267     /* Not an ipv4 packet */
2268     if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2269         chain->stat.ip_option++;
2270         return RSC_BYPASS;
2271     }
2272
2273     /* Don't handle packets with ip option */
2274     if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2275         chain->stat.ip_option++;
2276         return RSC_BYPASS;
2277     }
2278
2279     if (ip->ip_p != IPPROTO_TCP) {
2280         chain->stat.bypass_not_tcp++;
2281         return RSC_BYPASS;
2282     }
2283
2284     /* Don't handle packets with ip fragment */
2285     if (!(htons(ip->ip_off) & IP_DF)) {
2286         chain->stat.ip_frag++;
2287         return RSC_BYPASS;
2288     }
2289
2290     /* Don't handle packets with ecn flag */
2291     if (IPTOS_ECN(ip->ip_tos)) {
2292         chain->stat.ip_ecn++;
2293         return RSC_BYPASS;
2294     }
2295
2296     ip_len = htons(ip->ip_len);
2297     if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2298         || ip_len > (size - chain->n->guest_hdr_len -
2299                      sizeof(struct eth_header))) {
2300         chain->stat.ip_hacked++;
2301         return RSC_BYPASS;
2302     }
2303
2304     return RSC_CANDIDATE;
2305 }
2306
2307 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2308                                       NetClientState *nc,
2309                                       const uint8_t *buf, size_t size)
2310 {
2311     int32_t ret;
2312     uint16_t hdr_len;
2313     VirtioNetRscUnit unit;
2314
2315     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2316
2317     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2318         + sizeof(struct tcp_header))) {
2319         chain->stat.bypass_not_tcp++;
2320         return virtio_net_do_receive(nc, buf, size);
2321     }
2322
2323     virtio_net_rsc_extract_unit4(chain, buf, &unit);
2324     if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2325         != RSC_CANDIDATE) {
2326         return virtio_net_do_receive(nc, buf, size);
2327     }
2328
2329     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2330     if (ret == RSC_BYPASS) {
2331         return virtio_net_do_receive(nc, buf, size);
2332     } else if (ret == RSC_FINAL) {
2333         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2334                 ((hdr_len + sizeof(struct eth_header)) + 12),
2335                 VIRTIO_NET_IP4_ADDR_SIZE,
2336                 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2337     }
2338
2339     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2340 }
2341
2342 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2343                                             struct ip6_header *ip6,
2344                                             const uint8_t *buf, size_t size)
2345 {
2346     uint16_t ip_len;
2347
2348     if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2349         != IP_HEADER_VERSION_6) {
2350         return RSC_BYPASS;
2351     }
2352
2353     /* Both option and protocol is checked in this */
2354     if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2355         chain->stat.bypass_not_tcp++;
2356         return RSC_BYPASS;
2357     }
2358
2359     ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2360     if (ip_len < sizeof(struct tcp_header) ||
2361         ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2362                   - sizeof(struct ip6_header))) {
2363         chain->stat.ip_hacked++;
2364         return RSC_BYPASS;
2365     }
2366
2367     /* Don't handle packets with ecn flag */
2368     if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2369         chain->stat.ip_ecn++;
2370         return RSC_BYPASS;
2371     }
2372
2373     return RSC_CANDIDATE;
2374 }
2375
2376 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2377                                       const uint8_t *buf, size_t size)
2378 {
2379     int32_t ret;
2380     uint16_t hdr_len;
2381     VirtioNetRscChain *chain;
2382     VirtioNetRscUnit unit;
2383
2384     chain = (VirtioNetRscChain *)opq;
2385     hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2386
2387     if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2388         + sizeof(tcp_header))) {
2389         return virtio_net_do_receive(nc, buf, size);
2390     }
2391
2392     virtio_net_rsc_extract_unit6(chain, buf, &unit);
2393     if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2394                                                  unit.ip, buf, size)) {
2395         return virtio_net_do_receive(nc, buf, size);
2396     }
2397
2398     ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2399     if (ret == RSC_BYPASS) {
2400         return virtio_net_do_receive(nc, buf, size);
2401     } else if (ret == RSC_FINAL) {
2402         return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2403                 ((hdr_len + sizeof(struct eth_header)) + 8),
2404                 VIRTIO_NET_IP6_ADDR_SIZE,
2405                 hdr_len + sizeof(struct eth_header)
2406                 + sizeof(struct ip6_header));
2407     }
2408
2409     return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2410 }
2411
2412 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2413                                                       NetClientState *nc,
2414                                                       uint16_t proto)
2415 {
2416     VirtioNetRscChain *chain;
2417
2418     if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2419         return NULL;
2420     }
2421
2422     QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2423         if (chain->proto == proto) {
2424             return chain;
2425         }
2426     }
2427
2428     chain = g_malloc(sizeof(*chain));
2429     chain->n = n;
2430     chain->proto = proto;
2431     if (proto == (uint16_t)ETH_P_IP) {
2432         chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2433         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2434     } else {
2435         chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2436         chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2437     }
2438     chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2439                                       virtio_net_rsc_purge, chain);
2440     memset(&chain->stat, 0, sizeof(chain->stat));
2441
2442     QTAILQ_INIT(&chain->buffers);
2443     QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2444
2445     return chain;
2446 }
2447
2448 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2449                                       const uint8_t *buf,
2450                                       size_t size)
2451 {
2452     uint16_t proto;
2453     VirtioNetRscChain *chain;
2454     struct eth_header *eth;
2455     VirtIONet *n;
2456
2457     n = qemu_get_nic_opaque(nc);
2458     if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2459         return virtio_net_do_receive(nc, buf, size);
2460     }
2461
2462     eth = (struct eth_header *)(buf + n->guest_hdr_len);
2463     proto = htons(eth->h_proto);
2464
2465     chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2466     if (chain) {
2467         chain->stat.received++;
2468         if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2469             return virtio_net_rsc_receive4(chain, nc, buf, size);
2470         } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2471             return virtio_net_rsc_receive6(chain, nc, buf, size);
2472         }
2473     }
2474     return virtio_net_do_receive(nc, buf, size);
2475 }
2476
2477 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2478                                   size_t size)
2479 {
2480     VirtIONet *n = qemu_get_nic_opaque(nc);
2481     if ((n->rsc4_enabled || n->rsc6_enabled)) {
2482         return virtio_net_rsc_receive(nc, buf, size);
2483     } else {
2484         return virtio_net_do_receive(nc, buf, size);
2485     }
2486 }
2487
2488 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2489
2490 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2491 {
2492     VirtIONet *n = qemu_get_nic_opaque(nc);
2493     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2494     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2495
2496     virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2497     virtio_notify(vdev, q->tx_vq);
2498
2499     g_free(q->async_tx.elem);
2500     q->async_tx.elem = NULL;
2501
2502     virtio_queue_set_notification(q->tx_vq, 1);
2503     virtio_net_flush_tx(q);
2504 }
2505
2506 /* TX */
2507 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2508 {
2509     VirtIONet *n = q->n;
2510     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2511     VirtQueueElement *elem;
2512     int32_t num_packets = 0;
2513     int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2514     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2515         return num_packets;
2516     }
2517
2518     if (q->async_tx.elem) {
2519         virtio_queue_set_notification(q->tx_vq, 0);
2520         return num_packets;
2521     }
2522
2523     for (;;) {
2524         ssize_t ret;
2525         unsigned int out_num;
2526         struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2527         struct virtio_net_hdr_mrg_rxbuf mhdr;
2528
2529         elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2530         if (!elem) {
2531             break;
2532         }
2533
2534         out_num = elem->out_num;
2535         out_sg = elem->out_sg;
2536         if (out_num < 1) {
2537             virtio_error(vdev, "virtio-net header not in first element");
2538             virtqueue_detach_element(q->tx_vq, elem, 0);
2539             g_free(elem);
2540             return -EINVAL;
2541         }
2542
2543         if (n->has_vnet_hdr) {
2544             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2545                 n->guest_hdr_len) {
2546                 virtio_error(vdev, "virtio-net header incorrect");
2547                 virtqueue_detach_element(q->tx_vq, elem, 0);
2548                 g_free(elem);
2549                 return -EINVAL;
2550             }
2551             if (n->needs_vnet_hdr_swap) {
2552                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2553                 sg2[0].iov_base = &mhdr;
2554                 sg2[0].iov_len = n->guest_hdr_len;
2555                 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2556                                    out_sg, out_num,
2557                                    n->guest_hdr_len, -1);
2558                 if (out_num == VIRTQUEUE_MAX_SIZE) {
2559                     goto drop;
2560                 }
2561                 out_num += 1;
2562                 out_sg = sg2;
2563             }
2564         }
2565         /*
2566          * If host wants to see the guest header as is, we can
2567          * pass it on unchanged. Otherwise, copy just the parts
2568          * that host is interested in.
2569          */
2570         assert(n->host_hdr_len <= n->guest_hdr_len);
2571         if (n->host_hdr_len != n->guest_hdr_len) {
2572             unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2573                                        out_sg, out_num,
2574                                        0, n->host_hdr_len);
2575             sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2576                              out_sg, out_num,
2577                              n->guest_hdr_len, -1);
2578             out_num = sg_num;
2579             out_sg = sg;
2580         }
2581
2582         ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2583                                       out_sg, out_num, virtio_net_tx_complete);
2584         if (ret == 0) {
2585             virtio_queue_set_notification(q->tx_vq, 0);
2586             q->async_tx.elem = elem;
2587             return -EBUSY;
2588         }
2589
2590 drop:
2591         virtqueue_push(q->tx_vq, elem, 0);
2592         virtio_notify(vdev, q->tx_vq);
2593         g_free(elem);
2594
2595         if (++num_packets >= n->tx_burst) {
2596             break;
2597         }
2598     }
2599     return num_packets;
2600 }
2601
2602 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2603 {
2604     VirtIONet *n = VIRTIO_NET(vdev);
2605     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2606
2607     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2608         virtio_net_drop_tx_queue_data(vdev, vq);
2609         return;
2610     }
2611
2612     /* This happens when device was stopped but VCPU wasn't. */
2613     if (!vdev->vm_running) {
2614         q->tx_waiting = 1;
2615         return;
2616     }
2617
2618     if (q->tx_waiting) {
2619         virtio_queue_set_notification(vq, 1);
2620         timer_del(q->tx_timer);
2621         q->tx_waiting = 0;
2622         if (virtio_net_flush_tx(q) == -EINVAL) {
2623             return;
2624         }
2625     } else {
2626         timer_mod(q->tx_timer,
2627                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2628         q->tx_waiting = 1;
2629         virtio_queue_set_notification(vq, 0);
2630     }
2631 }
2632
2633 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2634 {
2635     VirtIONet *n = VIRTIO_NET(vdev);
2636     VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2637
2638     if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2639         virtio_net_drop_tx_queue_data(vdev, vq);
2640         return;
2641     }
2642
2643     if (unlikely(q->tx_waiting)) {
2644         return;
2645     }
2646     q->tx_waiting = 1;
2647     /* This happens when device was stopped but VCPU wasn't. */
2648     if (!vdev->vm_running) {
2649         return;
2650     }
2651     virtio_queue_set_notification(vq, 0);
2652     qemu_bh_schedule(q->tx_bh);
2653 }
2654
2655 static void virtio_net_tx_timer(void *opaque)
2656 {
2657     VirtIONetQueue *q = opaque;
2658     VirtIONet *n = q->n;
2659     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2660     /* This happens when device was stopped but BH wasn't. */
2661     if (!vdev->vm_running) {
2662         /* Make sure tx waiting is set, so we'll run when restarted. */
2663         assert(q->tx_waiting);
2664         return;
2665     }
2666
2667     q->tx_waiting = 0;
2668
2669     /* Just in case the driver is not ready on more */
2670     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2671         return;
2672     }
2673
2674     virtio_queue_set_notification(q->tx_vq, 1);
2675     virtio_net_flush_tx(q);
2676 }
2677
2678 static void virtio_net_tx_bh(void *opaque)
2679 {
2680     VirtIONetQueue *q = opaque;
2681     VirtIONet *n = q->n;
2682     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2683     int32_t ret;
2684
2685     /* This happens when device was stopped but BH wasn't. */
2686     if (!vdev->vm_running) {
2687         /* Make sure tx waiting is set, so we'll run when restarted. */
2688         assert(q->tx_waiting);
2689         return;
2690     }
2691
2692     q->tx_waiting = 0;
2693
2694     /* Just in case the driver is not ready on more */
2695     if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2696         return;
2697     }
2698
2699     ret = virtio_net_flush_tx(q);
2700     if (ret == -EBUSY || ret == -EINVAL) {
2701         return; /* Notification re-enable handled by tx_complete or device
2702                  * broken */
2703     }
2704
2705     /* If we flush a full burst of packets, assume there are
2706      * more coming and immediately reschedule */
2707     if (ret >= n->tx_burst) {
2708         qemu_bh_schedule(q->tx_bh);
2709         q->tx_waiting = 1;
2710         return;
2711     }
2712
2713     /* If less than a full burst, re-enable notification and flush
2714      * anything that may have come in while we weren't looking.  If
2715      * we find something, assume the guest is still active and reschedule */
2716     virtio_queue_set_notification(q->tx_vq, 1);
2717     ret = virtio_net_flush_tx(q);
2718     if (ret == -EINVAL) {
2719         return;
2720     } else if (ret > 0) {
2721         virtio_queue_set_notification(q->tx_vq, 0);
2722         qemu_bh_schedule(q->tx_bh);
2723         q->tx_waiting = 1;
2724     }
2725 }
2726
2727 static void virtio_net_add_queue(VirtIONet *n, int index)
2728 {
2729     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2730
2731     n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2732                                            virtio_net_handle_rx);
2733
2734     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2735         n->vqs[index].tx_vq =
2736             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2737                              virtio_net_handle_tx_timer);
2738         n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2739                                               virtio_net_tx_timer,
2740                                               &n->vqs[index]);
2741     } else {
2742         n->vqs[index].tx_vq =
2743             virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2744                              virtio_net_handle_tx_bh);
2745         n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2746     }
2747
2748     n->vqs[index].tx_waiting = 0;
2749     n->vqs[index].n = n;
2750 }
2751
2752 static void virtio_net_del_queue(VirtIONet *n, int index)
2753 {
2754     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2755     VirtIONetQueue *q = &n->vqs[index];
2756     NetClientState *nc = qemu_get_subqueue(n->nic, index);
2757
2758     qemu_purge_queued_packets(nc);
2759
2760     virtio_del_queue(vdev, index * 2);
2761     if (q->tx_timer) {
2762         timer_free(q->tx_timer);
2763         q->tx_timer = NULL;
2764     } else {
2765         qemu_bh_delete(q->tx_bh);
2766         q->tx_bh = NULL;
2767     }
2768     q->tx_waiting = 0;
2769     virtio_del_queue(vdev, index * 2 + 1);
2770 }
2771
2772 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2773 {
2774     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2775     int old_num_queues = virtio_get_num_queues(vdev);
2776     int new_num_queues = new_max_queue_pairs * 2 + 1;
2777     int i;
2778
2779     assert(old_num_queues >= 3);
2780     assert(old_num_queues % 2 == 1);
2781
2782     if (old_num_queues == new_num_queues) {
2783         return;
2784     }
2785
2786     /*
2787      * We always need to remove and add ctrl vq if
2788      * old_num_queues != new_num_queues. Remove ctrl_vq first,
2789      * and then we only enter one of the following two loops.
2790      */
2791     virtio_del_queue(vdev, old_num_queues - 1);
2792
2793     for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2794         /* new_num_queues < old_num_queues */
2795         virtio_net_del_queue(n, i / 2);
2796     }
2797
2798     for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2799         /* new_num_queues > old_num_queues */
2800         virtio_net_add_queue(n, i / 2);
2801     }
2802
2803     /* add ctrl_vq last */
2804     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2805 }
2806
2807 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2808 {
2809     int max = multiqueue ? n->max_queue_pairs : 1;
2810
2811     n->multiqueue = multiqueue;
2812     virtio_net_change_num_queue_pairs(n, max);
2813
2814     virtio_net_set_queue_pairs(n);
2815 }
2816
2817 static int virtio_net_post_load_device(void *opaque, int version_id)
2818 {
2819     VirtIONet *n = opaque;
2820     VirtIODevice *vdev = VIRTIO_DEVICE(n);
2821     int i, link_down;
2822
2823     trace_virtio_net_post_load_device();
2824     virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2825                                virtio_vdev_has_feature(vdev,
2826                                                        VIRTIO_F_VERSION_1),
2827                                virtio_vdev_has_feature(vdev,
2828                                                        VIRTIO_NET_F_HASH_REPORT));
2829
2830     /* MAC_TABLE_ENTRIES may be different from the saved image */
2831     if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2832         n->mac_table.in_use = 0;
2833     }
2834
2835     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2836         n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2837     }
2838
2839     /*
2840      * curr_guest_offloads will be later overwritten by the
2841      * virtio_set_features_nocheck call done from the virtio_load.
2842      * Here we make sure it is preserved and restored accordingly
2843      * in the virtio_net_post_load_virtio callback.
2844      */
2845     n->saved_guest_offloads = n->curr_guest_offloads;
2846
2847     virtio_net_set_queue_pairs(n);
2848
2849     /* Find the first multicast entry in the saved MAC filter */
2850     for (i = 0; i < n->mac_table.in_use; i++) {
2851         if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2852             break;
2853         }
2854     }
2855     n->mac_table.first_multi = i;
2856
2857     /* nc.link_down can't be migrated, so infer link_down according
2858      * to link status bit in n->status */
2859     link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2860     for (i = 0; i < n->max_queue_pairs; i++) {
2861         qemu_get_subqueue(n->nic, i)->link_down = link_down;
2862     }
2863
2864     if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2865         virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2866         qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2867                                   QEMU_CLOCK_VIRTUAL,
2868                                   virtio_net_announce_timer, n);
2869         if (n->announce_timer.round) {
2870             timer_mod(n->announce_timer.tm,
2871                       qemu_clock_get_ms(n->announce_timer.type));
2872         } else {
2873             qemu_announce_timer_del(&n->announce_timer, false);
2874         }
2875     }
2876
2877     if (n->rss_data.enabled) {
2878         n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
2879         if (!n->rss_data.populate_hash) {
2880             if (!virtio_net_attach_epbf_rss(n)) {
2881                 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
2882                     warn_report("Can't post-load eBPF RSS for vhost");
2883                 } else {
2884                     warn_report("Can't post-load eBPF RSS - "
2885                                 "fallback to software RSS");
2886                     n->rss_data.enabled_software_rss = true;
2887                 }
2888             }
2889         }
2890
2891         trace_virtio_net_rss_enable(n->rss_data.hash_types,
2892                                     n->rss_data.indirections_len,
2893                                     sizeof(n->rss_data.key));
2894     } else {
2895         trace_virtio_net_rss_disable();
2896     }
2897     return 0;
2898 }
2899
2900 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2901 {
2902     VirtIONet *n = VIRTIO_NET(vdev);
2903     /*
2904      * The actual needed state is now in saved_guest_offloads,
2905      * see virtio_net_post_load_device for detail.
2906      * Restore it back and apply the desired offloads.
2907      */
2908     n->curr_guest_offloads = n->saved_guest_offloads;
2909     if (peer_has_vnet_hdr(n)) {
2910         virtio_net_apply_guest_offloads(n);
2911     }
2912
2913     return 0;
2914 }
2915
2916 /* tx_waiting field of a VirtIONetQueue */
2917 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2918     .name = "virtio-net-queue-tx_waiting",
2919     .fields = (VMStateField[]) {
2920         VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2921         VMSTATE_END_OF_LIST()
2922    },
2923 };
2924
2925 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
2926 {
2927     return VIRTIO_NET(opaque)->max_queue_pairs > 1;
2928 }
2929
2930 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2931 {
2932     return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2933                                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2934 }
2935
2936 static bool mac_table_fits(void *opaque, int version_id)
2937 {
2938     return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2939 }
2940
2941 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2942 {
2943     return !mac_table_fits(opaque, version_id);
2944 }
2945
2946 /* This temporary type is shared by all the WITH_TMP methods
2947  * although only some fields are used by each.
2948  */
2949 struct VirtIONetMigTmp {
2950     VirtIONet      *parent;
2951     VirtIONetQueue *vqs_1;
2952     uint16_t        curr_queue_pairs_1;
2953     uint8_t         has_ufo;
2954     uint32_t        has_vnet_hdr;
2955 };
2956
2957 /* The 2nd and subsequent tx_waiting flags are loaded later than
2958  * the 1st entry in the queue_pairs and only if there's more than one
2959  * entry.  We use the tmp mechanism to calculate a temporary
2960  * pointer and count and also validate the count.
2961  */
2962
2963 static int virtio_net_tx_waiting_pre_save(void *opaque)
2964 {
2965     struct VirtIONetMigTmp *tmp = opaque;
2966
2967     tmp->vqs_1 = tmp->parent->vqs + 1;
2968     tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
2969     if (tmp->parent->curr_queue_pairs == 0) {
2970         tmp->curr_queue_pairs_1 = 0;
2971     }
2972
2973     return 0;
2974 }
2975
2976 static int virtio_net_tx_waiting_pre_load(void *opaque)
2977 {
2978     struct VirtIONetMigTmp *tmp = opaque;
2979
2980     /* Reuse the pointer setup from save */
2981     virtio_net_tx_waiting_pre_save(opaque);
2982
2983     if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
2984         error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
2985             tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
2986
2987         return -EINVAL;
2988     }
2989
2990     return 0; /* all good */
2991 }
2992
2993 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2994     .name      = "virtio-net-tx_waiting",
2995     .pre_load  = virtio_net_tx_waiting_pre_load,
2996     .pre_save  = virtio_net_tx_waiting_pre_save,
2997     .fields    = (VMStateField[]) {
2998         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2999                                      curr_queue_pairs_1,
3000                                      vmstate_virtio_net_queue_tx_waiting,
3001                                      struct VirtIONetQueue),
3002         VMSTATE_END_OF_LIST()
3003     },
3004 };
3005
3006 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3007  * flag set we need to check that we have it
3008  */
3009 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3010 {
3011     struct VirtIONetMigTmp *tmp = opaque;
3012
3013     if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3014         error_report("virtio-net: saved image requires TUN_F_UFO support");
3015         return -EINVAL;
3016     }
3017
3018     return 0;
3019 }
3020
3021 static int virtio_net_ufo_pre_save(void *opaque)
3022 {
3023     struct VirtIONetMigTmp *tmp = opaque;
3024
3025     tmp->has_ufo = tmp->parent->has_ufo;
3026
3027     return 0;
3028 }
3029
3030 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3031     .name      = "virtio-net-ufo",
3032     .post_load = virtio_net_ufo_post_load,
3033     .pre_save  = virtio_net_ufo_pre_save,
3034     .fields    = (VMStateField[]) {
3035         VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3036         VMSTATE_END_OF_LIST()
3037     },
3038 };
3039
3040 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3041  * flag set we need to check that we have it
3042  */
3043 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3044 {
3045     struct VirtIONetMigTmp *tmp = opaque;
3046
3047     if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3048         error_report("virtio-net: saved image requires vnet_hdr=on");
3049         return -EINVAL;
3050     }
3051
3052     return 0;
3053 }
3054
3055 static int virtio_net_vnet_pre_save(void *opaque)
3056 {
3057     struct VirtIONetMigTmp *tmp = opaque;
3058
3059     tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3060
3061     return 0;
3062 }
3063
3064 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3065     .name      = "virtio-net-vnet",
3066     .post_load = virtio_net_vnet_post_load,
3067     .pre_save  = virtio_net_vnet_pre_save,
3068     .fields    = (VMStateField[]) {
3069         VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3070         VMSTATE_END_OF_LIST()
3071     },
3072 };
3073
3074 static bool virtio_net_rss_needed(void *opaque)
3075 {
3076     return VIRTIO_NET(opaque)->rss_data.enabled;
3077 }
3078
3079 static const VMStateDescription vmstate_virtio_net_rss = {
3080     .name      = "virtio-net-device/rss",
3081     .version_id = 1,
3082     .minimum_version_id = 1,
3083     .needed = virtio_net_rss_needed,
3084     .fields = (VMStateField[]) {
3085         VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3086         VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3087         VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3088         VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3089         VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3090         VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3091         VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3092                             VIRTIO_NET_RSS_MAX_KEY_SIZE),
3093         VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3094                                     rss_data.indirections_len, 0,
3095                                     vmstate_info_uint16, uint16_t),
3096         VMSTATE_END_OF_LIST()
3097     },
3098 };
3099
3100 static const VMStateDescription vmstate_virtio_net_device = {
3101     .name = "virtio-net-device",
3102     .version_id = VIRTIO_NET_VM_VERSION,
3103     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3104     .post_load = virtio_net_post_load_device,
3105     .fields = (VMStateField[]) {
3106         VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3107         VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3108                                vmstate_virtio_net_queue_tx_waiting,
3109                                VirtIONetQueue),
3110         VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3111         VMSTATE_UINT16(status, VirtIONet),
3112         VMSTATE_UINT8(promisc, VirtIONet),
3113         VMSTATE_UINT8(allmulti, VirtIONet),
3114         VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3115
3116         /* Guarded pair: If it fits we load it, else we throw it away
3117          * - can happen if source has a larger MAC table.; post-load
3118          *  sets flags in this case.
3119          */
3120         VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3121                                 0, mac_table_fits, mac_table.in_use,
3122                                  ETH_ALEN),
3123         VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3124                                      mac_table.in_use, ETH_ALEN),
3125
3126         /* Note: This is an array of uint32's that's always been saved as a
3127          * buffer; hold onto your endiannesses; it's actually used as a bitmap
3128          * but based on the uint.
3129          */
3130         VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3131         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3132                          vmstate_virtio_net_has_vnet),
3133         VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3134         VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3135         VMSTATE_UINT8(alluni, VirtIONet),
3136         VMSTATE_UINT8(nomulti, VirtIONet),
3137         VMSTATE_UINT8(nouni, VirtIONet),
3138         VMSTATE_UINT8(nobcast, VirtIONet),
3139         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3140                          vmstate_virtio_net_has_ufo),
3141         VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3142                             vmstate_info_uint16_equal, uint16_t),
3143         VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3144         VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3145                          vmstate_virtio_net_tx_waiting),
3146         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3147                             has_ctrl_guest_offloads),
3148         VMSTATE_END_OF_LIST()
3149    },
3150     .subsections = (const VMStateDescription * []) {
3151         &vmstate_virtio_net_rss,
3152         NULL
3153     }
3154 };
3155
3156 static NetClientInfo net_virtio_info = {
3157     .type = NET_CLIENT_DRIVER_NIC,
3158     .size = sizeof(NICState),
3159     .can_receive = virtio_net_can_receive,
3160     .receive = virtio_net_receive,
3161     .link_status_changed = virtio_net_set_link_status,
3162     .query_rx_filter = virtio_net_query_rxfilter,
3163     .announce = virtio_net_announce,
3164 };
3165
3166 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3167 {
3168     VirtIONet *n = VIRTIO_NET(vdev);
3169     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3170     assert(n->vhost_started);
3171     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3172 }
3173
3174 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3175                                            bool mask)
3176 {
3177     VirtIONet *n = VIRTIO_NET(vdev);
3178     NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3179     assert(n->vhost_started);
3180     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3181                              vdev, idx, mask);
3182 }
3183
3184 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3185 {
3186     virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3187
3188     n->config_size = virtio_feature_get_config_size(feature_sizes,
3189                                                     host_features);
3190 }
3191
3192 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3193                                    const char *type)
3194 {
3195     /*
3196      * The name can be NULL, the netclient name will be type.x.
3197      */
3198     assert(type != NULL);
3199
3200     g_free(n->netclient_name);
3201     g_free(n->netclient_type);
3202     n->netclient_name = g_strdup(name);
3203     n->netclient_type = g_strdup(type);
3204 }
3205
3206 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3207 {
3208     HotplugHandler *hotplug_ctrl;
3209     PCIDevice *pci_dev;
3210     Error *err = NULL;
3211
3212     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3213     if (hotplug_ctrl) {
3214         pci_dev = PCI_DEVICE(dev);
3215         pci_dev->partially_hotplugged = true;
3216         hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3217         if (err) {
3218             error_report_err(err);
3219             return false;
3220         }
3221     } else {
3222         return false;
3223     }
3224     return true;
3225 }
3226
3227 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3228                                     Error **errp)
3229 {
3230     Error *err = NULL;
3231     HotplugHandler *hotplug_ctrl;
3232     PCIDevice *pdev = PCI_DEVICE(dev);
3233     BusState *primary_bus;
3234
3235     if (!pdev->partially_hotplugged) {
3236         return true;
3237     }
3238     primary_bus = dev->parent_bus;
3239     if (!primary_bus) {
3240         error_setg(errp, "virtio_net: couldn't find primary bus");
3241         return false;
3242     }
3243     qdev_set_parent_bus(dev, primary_bus, &error_abort);
3244     qatomic_set(&n->failover_primary_hidden, false);
3245     hotplug_ctrl = qdev_get_hotplug_handler(dev);
3246     if (hotplug_ctrl) {
3247         hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3248         if (err) {
3249             goto out;
3250         }
3251         hotplug_handler_plug(hotplug_ctrl, dev, &err);
3252     }
3253     pdev->partially_hotplugged = false;
3254
3255 out:
3256     error_propagate(errp, err);
3257     return !err;
3258 }
3259
3260 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3261 {
3262     bool should_be_hidden;
3263     Error *err = NULL;
3264     DeviceState *dev = failover_find_primary_device(n);
3265
3266     if (!dev) {
3267         return;
3268     }
3269
3270     should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3271
3272     if (migration_in_setup(s) && !should_be_hidden) {
3273         if (failover_unplug_primary(n, dev)) {
3274             vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3275             qapi_event_send_unplug_primary(dev->id);
3276             qatomic_set(&n->failover_primary_hidden, true);
3277         } else {
3278             warn_report("couldn't unplug primary device");
3279         }
3280     } else if (migration_has_failed(s)) {
3281         /* We already unplugged the device let's plug it back */
3282         if (!failover_replug_primary(n, dev, &err)) {
3283             if (err) {
3284                 error_report_err(err);
3285             }
3286         }
3287     }
3288 }
3289
3290 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3291 {
3292     MigrationState *s = data;
3293     VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3294     virtio_net_handle_migration_primary(n, s);
3295 }
3296
3297 static bool failover_hide_primary_device(DeviceListener *listener,
3298                                          const QDict *device_opts,
3299                                          bool from_json,
3300                                          Error **errp)
3301 {
3302     VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3303     const char *standby_id;
3304
3305     if (!device_opts) {
3306         return false;
3307     }
3308
3309     if (!qdict_haskey(device_opts, "failover_pair_id")) {
3310         return false;
3311     }
3312
3313     if (!qdict_haskey(device_opts, "id")) {
3314         error_setg(errp, "Device with failover_pair_id needs to have id");
3315         return false;
3316     }
3317
3318     standby_id = qdict_get_str(device_opts, "failover_pair_id");
3319     if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3320         return false;
3321     }
3322
3323     /*
3324      * The hide helper can be called several times for a given device.
3325      * Check there is only one primary for a virtio-net device but
3326      * don't duplicate the qdict several times if it's called for the same
3327      * device.
3328      */
3329     if (n->primary_opts) {
3330         const char *old, *new;
3331         /* devices with failover_pair_id always have an id */
3332         old = qdict_get_str(n->primary_opts, "id");
3333         new = qdict_get_str(device_opts, "id");
3334         if (strcmp(old, new) != 0) {
3335             error_setg(errp, "Cannot attach more than one primary device to "
3336                        "'%s': '%s' and '%s'", n->netclient_name, old, new);
3337             return false;
3338         }
3339     } else {
3340         n->primary_opts = qdict_clone_shallow(device_opts);
3341         n->primary_opts_from_json = from_json;
3342     }
3343
3344     /* failover_primary_hidden is set during feature negotiation */
3345     return qatomic_read(&n->failover_primary_hidden);
3346 }
3347
3348 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3349 {
3350     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3351     VirtIONet *n = VIRTIO_NET(dev);
3352     NetClientState *nc;
3353     int i;
3354
3355     if (n->net_conf.mtu) {
3356         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3357     }
3358
3359     if (n->net_conf.duplex_str) {
3360         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3361             n->net_conf.duplex = DUPLEX_HALF;
3362         } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3363             n->net_conf.duplex = DUPLEX_FULL;
3364         } else {
3365             error_setg(errp, "'duplex' must be 'half' or 'full'");
3366             return;
3367         }
3368         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3369     } else {
3370         n->net_conf.duplex = DUPLEX_UNKNOWN;
3371     }
3372
3373     if (n->net_conf.speed < SPEED_UNKNOWN) {
3374         error_setg(errp, "'speed' must be between 0 and INT_MAX");
3375         return;
3376     }
3377     if (n->net_conf.speed >= 0) {
3378         n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3379     }
3380
3381     if (n->failover) {
3382         n->primary_listener.hide_device = failover_hide_primary_device;
3383         qatomic_set(&n->failover_primary_hidden, true);
3384         device_listener_register(&n->primary_listener);
3385         n->migration_state.notify = virtio_net_migration_state_notifier;
3386         add_migration_state_change_notifier(&n->migration_state);
3387         n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3388     }
3389
3390     virtio_net_set_config_size(n, n->host_features);
3391     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3392
3393     /*
3394      * We set a lower limit on RX queue size to what it always was.
3395      * Guests that want a smaller ring can always resize it without
3396      * help from us (using virtio 1 and up).
3397      */
3398     if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3399         n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3400         !is_power_of_2(n->net_conf.rx_queue_size)) {
3401         error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3402                    "must be a power of 2 between %d and %d.",
3403                    n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3404                    VIRTQUEUE_MAX_SIZE);
3405         virtio_cleanup(vdev);
3406         return;
3407     }
3408
3409     if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3410         n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3411         !is_power_of_2(n->net_conf.tx_queue_size)) {
3412         error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3413                    "must be a power of 2 between %d and %d",
3414                    n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3415                    VIRTQUEUE_MAX_SIZE);
3416         virtio_cleanup(vdev);
3417         return;
3418     }
3419
3420     n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3421
3422     /*
3423      * Figure out the datapath queue pairs since the backend could
3424      * provide control queue via peers as well.
3425      */
3426     if (n->nic_conf.peers.queues) {
3427         for (i = 0; i < n->max_ncs; i++) {
3428             if (n->nic_conf.peers.ncs[i]->is_datapath) {
3429                 ++n->max_queue_pairs;
3430             }
3431         }
3432     }
3433     n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3434
3435     if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3436         error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3437                    "must be a positive integer less than %d.",
3438                    n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3439         virtio_cleanup(vdev);
3440         return;
3441     }
3442     n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queue_pairs);
3443     n->curr_queue_pairs = 1;
3444     n->tx_timeout = n->net_conf.txtimer;
3445
3446     if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3447                        && strcmp(n->net_conf.tx, "bh")) {
3448         warn_report("virtio-net: "
3449                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3450                     n->net_conf.tx);
3451         error_printf("Defaulting to \"bh\"");
3452     }
3453
3454     n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3455                                     n->net_conf.tx_queue_size);
3456
3457     for (i = 0; i < n->max_queue_pairs; i++) {
3458         virtio_net_add_queue(n, i);
3459     }
3460
3461     n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3462     qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3463     memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3464     n->status = VIRTIO_NET_S_LINK_UP;
3465     qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3466                               QEMU_CLOCK_VIRTUAL,
3467                               virtio_net_announce_timer, n);
3468     n->announce_timer.round = 0;
3469
3470     if (n->netclient_type) {
3471         /*
3472          * Happen when virtio_net_set_netclient_name has been called.
3473          */
3474         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3475                               n->netclient_type, n->netclient_name, n);
3476     } else {
3477         n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3478                               object_get_typename(OBJECT(dev)), dev->id, n);
3479     }
3480
3481     for (i = 0; i < n->max_queue_pairs; i++) {
3482         n->nic->ncs[i].do_not_pad = true;
3483     }
3484
3485     peer_test_vnet_hdr(n);
3486     if (peer_has_vnet_hdr(n)) {
3487         for (i = 0; i < n->max_queue_pairs; i++) {
3488             qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3489         }
3490         n->host_hdr_len = sizeof(struct virtio_net_hdr);
3491     } else {
3492         n->host_hdr_len = 0;
3493     }
3494
3495     qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3496
3497     n->vqs[0].tx_waiting = 0;
3498     n->tx_burst = n->net_conf.txburst;
3499     virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3500     n->promisc = 1; /* for compatibility */
3501
3502     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3503
3504     n->vlans = g_malloc0(MAX_VLAN >> 3);
3505
3506     nc = qemu_get_queue(n->nic);
3507     nc->rxfilter_notify_enabled = 1;
3508
3509    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3510         struct virtio_net_config netcfg = {};
3511         memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3512         vhost_net_set_config(get_vhost_net(nc->peer),
3513             (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3514     }
3515     QTAILQ_INIT(&n->rsc_chains);
3516     n->qdev = dev;
3517
3518     net_rx_pkt_init(&n->rx_pkt, false);
3519
3520     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3521         virtio_net_load_ebpf(n);
3522     }
3523 }
3524
3525 static void virtio_net_device_unrealize(DeviceState *dev)
3526 {
3527     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3528     VirtIONet *n = VIRTIO_NET(dev);
3529     int i, max_queue_pairs;
3530
3531     if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3532         virtio_net_unload_ebpf(n);
3533     }
3534
3535     /* This will stop vhost backend if appropriate. */
3536     virtio_net_set_status(vdev, 0);
3537
3538     g_free(n->netclient_name);
3539     n->netclient_name = NULL;
3540     g_free(n->netclient_type);
3541     n->netclient_type = NULL;
3542
3543     g_free(n->mac_table.macs);
3544     g_free(n->vlans);
3545
3546     if (n->failover) {
3547         qobject_unref(n->primary_opts);
3548         device_listener_unregister(&n->primary_listener);
3549         remove_migration_state_change_notifier(&n->migration_state);
3550     } else {
3551         assert(n->primary_opts == NULL);
3552     }
3553
3554     max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3555     for (i = 0; i < max_queue_pairs; i++) {
3556         virtio_net_del_queue(n, i);
3557     }
3558     /* delete also control vq */
3559     virtio_del_queue(vdev, max_queue_pairs * 2);
3560     qemu_announce_timer_del(&n->announce_timer, false);
3561     g_free(n->vqs);
3562     qemu_del_nic(n->nic);
3563     virtio_net_rsc_cleanup(n);
3564     g_free(n->rss_data.indirections_table);
3565     net_rx_pkt_uninit(n->rx_pkt);
3566     virtio_cleanup(vdev);
3567 }
3568
3569 static void virtio_net_instance_init(Object *obj)
3570 {
3571     VirtIONet *n = VIRTIO_NET(obj);
3572
3573     /*
3574      * The default config_size is sizeof(struct virtio_net_config).
3575      * Can be overriden with virtio_net_set_config_size.
3576      */
3577     n->config_size = sizeof(struct virtio_net_config);
3578     device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3579                                   "bootindex", "/ethernet-phy@0",
3580                                   DEVICE(n));
3581
3582     ebpf_rss_init(&n->ebpf_rss);
3583 }
3584
3585 static int virtio_net_pre_save(void *opaque)
3586 {
3587     VirtIONet *n = opaque;
3588
3589     /* At this point, backend must be stopped, otherwise
3590      * it might keep writing to memory. */
3591     assert(!n->vhost_started);
3592
3593     return 0;
3594 }
3595
3596 static bool primary_unplug_pending(void *opaque)
3597 {
3598     DeviceState *dev = opaque;
3599     DeviceState *primary;
3600     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3601     VirtIONet *n = VIRTIO_NET(vdev);
3602
3603     if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3604         return false;
3605     }
3606     primary = failover_find_primary_device(n);
3607     return primary ? primary->pending_deleted_event : false;
3608 }
3609
3610 static bool dev_unplug_pending(void *opaque)
3611 {
3612     DeviceState *dev = opaque;
3613     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3614
3615     return vdc->primary_unplug_pending(dev);
3616 }
3617
3618 static const VMStateDescription vmstate_virtio_net = {
3619     .name = "virtio-net",
3620     .minimum_version_id = VIRTIO_NET_VM_VERSION,
3621     .version_id = VIRTIO_NET_VM_VERSION,
3622     .fields = (VMStateField[]) {
3623         VMSTATE_VIRTIO_DEVICE,
3624         VMSTATE_END_OF_LIST()
3625     },
3626     .pre_save = virtio_net_pre_save,
3627     .dev_unplug_pending = dev_unplug_pending,
3628 };
3629
3630 static Property virtio_net_properties[] = {
3631     DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3632                     VIRTIO_NET_F_CSUM, true),
3633     DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3634                     VIRTIO_NET_F_GUEST_CSUM, true),
3635     DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3636     DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3637                     VIRTIO_NET_F_GUEST_TSO4, true),
3638     DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3639                     VIRTIO_NET_F_GUEST_TSO6, true),
3640     DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3641                     VIRTIO_NET_F_GUEST_ECN, true),
3642     DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3643                     VIRTIO_NET_F_GUEST_UFO, true),
3644     DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3645                     VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3646     DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3647                     VIRTIO_NET_F_HOST_TSO4, true),
3648     DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3649                     VIRTIO_NET_F_HOST_TSO6, true),
3650     DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3651                     VIRTIO_NET_F_HOST_ECN, true),
3652     DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3653                     VIRTIO_NET_F_HOST_UFO, true),
3654     DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3655                     VIRTIO_NET_F_MRG_RXBUF, true),
3656     DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3657                     VIRTIO_NET_F_STATUS, true),
3658     DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3659                     VIRTIO_NET_F_CTRL_VQ, true),
3660     DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3661                     VIRTIO_NET_F_CTRL_RX, true),
3662     DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3663                     VIRTIO_NET_F_CTRL_VLAN, true),
3664     DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3665                     VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3666     DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3667                     VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3668     DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3669                     VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3670     DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3671     DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3672                     VIRTIO_NET_F_RSS, false),
3673     DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3674                     VIRTIO_NET_F_HASH_REPORT, false),
3675     DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3676                     VIRTIO_NET_F_RSC_EXT, false),
3677     DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3678                        VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3679     DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3680     DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3681                        TX_TIMER_INTERVAL),
3682     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3683     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3684     DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3685                        VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3686     DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3687                        VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3688     DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3689     DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3690                      true),
3691     DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3692     DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3693     DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3694     DEFINE_PROP_END_OF_LIST(),
3695 };
3696
3697 static void virtio_net_class_init(ObjectClass *klass, void *data)
3698 {
3699     DeviceClass *dc = DEVICE_CLASS(klass);
3700     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3701
3702     device_class_set_props(dc, virtio_net_properties);
3703     dc->vmsd = &vmstate_virtio_net;
3704     set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3705     vdc->realize = virtio_net_device_realize;
3706     vdc->unrealize = virtio_net_device_unrealize;
3707     vdc->get_config = virtio_net_get_config;
3708     vdc->set_config = virtio_net_set_config;
3709     vdc->get_features = virtio_net_get_features;
3710     vdc->set_features = virtio_net_set_features;
3711     vdc->bad_features = virtio_net_bad_features;
3712     vdc->reset = virtio_net_reset;
3713     vdc->set_status = virtio_net_set_status;
3714     vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3715     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3716     vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3717     vdc->post_load = virtio_net_post_load_virtio;
3718     vdc->vmsd = &vmstate_virtio_net_device;
3719     vdc->primary_unplug_pending = primary_unplug_pending;
3720 }
3721
3722 static const TypeInfo virtio_net_info = {
3723     .name = TYPE_VIRTIO_NET,
3724     .parent = TYPE_VIRTIO_DEVICE,
3725     .instance_size = sizeof(VirtIONet),
3726     .instance_init = virtio_net_instance_init,
3727     .class_init = virtio_net_class_init,
3728 };
3729
3730 static void virtio_register_types(void)
3731 {
3732     type_register_static(&virtio_net_info);
3733 }
3734
3735 type_init(virtio_register_types)