virtio-net: correctly copy vnet header when flushing TX
[qemu/kevin.git] / hw / net / virtio-net.c
blob5a79bc3a3a0211d03fae472b02f6fbd955f8bd69
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci_device.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
50 #define VIRTIO_NET_VM_VERSION 11
52 /* previously fixed value */
53 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
54 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
57 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
58 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
62 #define VIRTIO_NET_TCP_FLAG 0x3F
63 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
65 /* IPv4 max payload, 16 bits in the header */
66 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
67 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 /* header length value in ip header without option */
70 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
73 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 /* Purge coalesced packets timer interval, This value affects the performance
76 a lot, and should be tuned carefully, '300000'(300us) is the recommended
77 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
78 tso/gso/gro 'off'. */
79 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
82 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
85 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
88 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 static const VirtIOFeature feature_sizes[] = {
92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
93 .end = endof(struct virtio_net_config, mac)},
94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95 .end = endof(struct virtio_net_config, status)},
96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
99 .end = endof(struct virtio_net_config, mtu)},
100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101 .end = endof(struct virtio_net_config, duplex)},
102 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
103 .end = endof(struct virtio_net_config, supported_hash_types)},
107 static const VirtIOConfigSizeParams cfg_size_params = {
108 .min_size = endof(struct virtio_net_config, mac),
109 .max_size = sizeof(struct virtio_net_config),
110 .feature_sizes = feature_sizes
113 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
115 VirtIONet *n = qemu_get_nic_opaque(nc);
117 return &n->vqs[nc->queue_index];
120 static int vq2q(int queue_index)
122 return queue_index / 2;
125 static void flush_or_purge_queued_packets(NetClientState *nc)
127 if (!nc->peer) {
128 return;
131 qemu_flush_or_purge_queued_packets(nc->peer, true);
132 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
135 /* TODO
136 * - we could suppress RX interrupt if we were so inclined.
139 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
141 VirtIONet *n = VIRTIO_NET(vdev);
142 struct virtio_net_config netcfg;
143 NetClientState *nc = qemu_get_queue(n->nic);
144 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
146 int ret = 0;
147 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
148 virtio_stw_p(vdev, &netcfg.status, n->status);
149 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
150 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
151 memcpy(netcfg.mac, n->mac, ETH_ALEN);
152 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
153 netcfg.duplex = n->net_conf.duplex;
154 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
155 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
156 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
157 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
158 virtio_stl_p(vdev, &netcfg.supported_hash_types,
159 VIRTIO_NET_RSS_SUPPORTED_HASHES);
160 memcpy(config, &netcfg, n->config_size);
163 * Is this VDPA? No peer means not VDPA: there's no way to
164 * disconnect/reconnect a VDPA peer.
166 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
167 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
168 n->config_size);
169 if (ret == -1) {
170 return;
174 * Some NIC/kernel combinations present 0 as the mac address. As that
175 * is not a legal address, try to proceed with the address from the
176 * QEMU command line in the hope that the address has been configured
177 * correctly elsewhere - just not reported by the device.
179 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
180 info_report("Zero hardware mac address detected. Ignoring.");
181 memcpy(netcfg.mac, n->mac, ETH_ALEN);
184 netcfg.status |= virtio_tswap16(vdev,
185 n->status & VIRTIO_NET_S_ANNOUNCE);
186 memcpy(config, &netcfg, n->config_size);
190 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
192 VirtIONet *n = VIRTIO_NET(vdev);
193 struct virtio_net_config netcfg = {};
194 NetClientState *nc = qemu_get_queue(n->nic);
196 memcpy(&netcfg, config, n->config_size);
198 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
199 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
200 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
201 memcpy(n->mac, netcfg.mac, ETH_ALEN);
202 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
206 * Is this VDPA? No peer means not VDPA: there's no way to
207 * disconnect/reconnect a VDPA peer.
209 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
210 vhost_net_set_config(get_vhost_net(nc->peer),
211 (uint8_t *)&netcfg, 0, n->config_size,
212 VHOST_SET_CONFIG_TYPE_FRONTEND);
216 static bool virtio_net_started(VirtIONet *n, uint8_t status)
218 VirtIODevice *vdev = VIRTIO_DEVICE(n);
219 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
220 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
223 static void virtio_net_announce_notify(VirtIONet *net)
225 VirtIODevice *vdev = VIRTIO_DEVICE(net);
226 trace_virtio_net_announce_notify();
228 net->status |= VIRTIO_NET_S_ANNOUNCE;
229 virtio_notify_config(vdev);
232 static void virtio_net_announce_timer(void *opaque)
234 VirtIONet *n = opaque;
235 trace_virtio_net_announce_timer(n->announce_timer.round);
237 n->announce_timer.round--;
238 virtio_net_announce_notify(n);
241 static void virtio_net_announce(NetClientState *nc)
243 VirtIONet *n = qemu_get_nic_opaque(nc);
244 VirtIODevice *vdev = VIRTIO_DEVICE(n);
247 * Make sure the virtio migration announcement timer isn't running
248 * If it is, let it trigger announcement so that we do not cause
249 * confusion.
251 if (n->announce_timer.round) {
252 return;
255 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
256 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
257 virtio_net_announce_notify(n);
261 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
263 VirtIODevice *vdev = VIRTIO_DEVICE(n);
264 NetClientState *nc = qemu_get_queue(n->nic);
265 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
266 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
267 n->max_ncs - n->max_queue_pairs : 0;
269 if (!get_vhost_net(nc->peer)) {
270 return;
273 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
274 !!n->vhost_started) {
275 return;
277 if (!n->vhost_started) {
278 int r, i;
280 if (n->needs_vnet_hdr_swap) {
281 error_report("backend does not support %s vnet headers; "
282 "falling back on userspace virtio",
283 virtio_is_big_endian(vdev) ? "BE" : "LE");
284 return;
287 /* Any packets outstanding? Purge them to avoid touching rings
288 * when vhost is running.
290 for (i = 0; i < queue_pairs; i++) {
291 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
293 /* Purge both directions: TX and RX. */
294 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
295 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
298 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
299 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
300 if (r < 0) {
301 error_report("%uBytes MTU not supported by the backend",
302 n->net_conf.mtu);
304 return;
308 n->vhost_started = 1;
309 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
310 if (r < 0) {
311 error_report("unable to start vhost net: %d: "
312 "falling back on userspace virtio", -r);
313 n->vhost_started = 0;
315 } else {
316 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
317 n->vhost_started = 0;
321 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
322 NetClientState *peer,
323 bool enable)
325 if (virtio_is_big_endian(vdev)) {
326 return qemu_set_vnet_be(peer, enable);
327 } else {
328 return qemu_set_vnet_le(peer, enable);
332 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
333 int queue_pairs, bool enable)
335 int i;
337 for (i = 0; i < queue_pairs; i++) {
338 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
339 enable) {
340 while (--i >= 0) {
341 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
344 return true;
348 return false;
351 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
353 VirtIODevice *vdev = VIRTIO_DEVICE(n);
354 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
356 if (virtio_net_started(n, status)) {
357 /* Before using the device, we tell the network backend about the
358 * endianness to use when parsing vnet headers. If the backend
359 * can't do it, we fallback onto fixing the headers in the core
360 * virtio-net code.
362 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
363 queue_pairs, true);
364 } else if (virtio_net_started(n, vdev->status)) {
365 /* After using the device, we need to reset the network backend to
366 * the default (guest native endianness), otherwise the guest may
367 * lose network connectivity if it is rebooted into a different
368 * endianness.
370 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
374 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
376 unsigned int dropped = virtqueue_drop_all(vq);
377 if (dropped) {
378 virtio_notify(vdev, vq);
382 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
384 VirtIONet *n = VIRTIO_NET(vdev);
385 VirtIONetQueue *q;
386 int i;
387 uint8_t queue_status;
389 virtio_net_vnet_endian_status(n, status);
390 virtio_net_vhost_status(n, status);
392 for (i = 0; i < n->max_queue_pairs; i++) {
393 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
394 bool queue_started;
395 q = &n->vqs[i];
397 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
398 queue_status = 0;
399 } else {
400 queue_status = status;
402 queue_started =
403 virtio_net_started(n, queue_status) && !n->vhost_started;
405 if (queue_started) {
406 qemu_flush_queued_packets(ncs);
409 if (!q->tx_waiting) {
410 continue;
413 if (queue_started) {
414 if (q->tx_timer) {
415 timer_mod(q->tx_timer,
416 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
417 } else {
418 qemu_bh_schedule(q->tx_bh);
420 } else {
421 if (q->tx_timer) {
422 timer_del(q->tx_timer);
423 } else {
424 qemu_bh_cancel(q->tx_bh);
426 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
427 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
428 vdev->vm_running) {
429 /* if tx is waiting we are likely have some packets in tx queue
430 * and disabled notification */
431 q->tx_waiting = 0;
432 virtio_queue_set_notification(q->tx_vq, 1);
433 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
439 static void virtio_net_set_link_status(NetClientState *nc)
441 VirtIONet *n = qemu_get_nic_opaque(nc);
442 VirtIODevice *vdev = VIRTIO_DEVICE(n);
443 uint16_t old_status = n->status;
445 if (nc->link_down)
446 n->status &= ~VIRTIO_NET_S_LINK_UP;
447 else
448 n->status |= VIRTIO_NET_S_LINK_UP;
450 if (n->status != old_status)
451 virtio_notify_config(vdev);
453 virtio_net_set_status(vdev, vdev->status);
456 static void rxfilter_notify(NetClientState *nc)
458 VirtIONet *n = qemu_get_nic_opaque(nc);
460 if (nc->rxfilter_notify_enabled) {
461 char *path = object_get_canonical_path(OBJECT(n->qdev));
462 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
463 g_free(path);
465 /* disable event notification to avoid events flooding */
466 nc->rxfilter_notify_enabled = 0;
470 static intList *get_vlan_table(VirtIONet *n)
472 intList *list;
473 int i, j;
475 list = NULL;
476 for (i = 0; i < MAX_VLAN >> 5; i++) {
477 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
478 if (n->vlans[i] & (1U << j)) {
479 QAPI_LIST_PREPEND(list, (i << 5) + j);
484 return list;
487 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
489 VirtIONet *n = qemu_get_nic_opaque(nc);
490 VirtIODevice *vdev = VIRTIO_DEVICE(n);
491 RxFilterInfo *info;
492 strList *str_list;
493 int i;
495 info = g_malloc0(sizeof(*info));
496 info->name = g_strdup(nc->name);
497 info->promiscuous = n->promisc;
499 if (n->nouni) {
500 info->unicast = RX_STATE_NONE;
501 } else if (n->alluni) {
502 info->unicast = RX_STATE_ALL;
503 } else {
504 info->unicast = RX_STATE_NORMAL;
507 if (n->nomulti) {
508 info->multicast = RX_STATE_NONE;
509 } else if (n->allmulti) {
510 info->multicast = RX_STATE_ALL;
511 } else {
512 info->multicast = RX_STATE_NORMAL;
515 info->broadcast_allowed = n->nobcast;
516 info->multicast_overflow = n->mac_table.multi_overflow;
517 info->unicast_overflow = n->mac_table.uni_overflow;
519 info->main_mac = qemu_mac_strdup_printf(n->mac);
521 str_list = NULL;
522 for (i = 0; i < n->mac_table.first_multi; i++) {
523 QAPI_LIST_PREPEND(str_list,
524 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
526 info->unicast_table = str_list;
528 str_list = NULL;
529 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
530 QAPI_LIST_PREPEND(str_list,
531 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
533 info->multicast_table = str_list;
534 info->vlan_table = get_vlan_table(n);
536 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
537 info->vlan = RX_STATE_ALL;
538 } else if (!info->vlan_table) {
539 info->vlan = RX_STATE_NONE;
540 } else {
541 info->vlan = RX_STATE_NORMAL;
544 /* enable event notification after query */
545 nc->rxfilter_notify_enabled = 1;
547 return info;
550 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
552 VirtIONet *n = VIRTIO_NET(vdev);
553 NetClientState *nc;
555 /* validate queue_index and skip for cvq */
556 if (queue_index >= n->max_queue_pairs * 2) {
557 return;
560 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
562 if (!nc->peer) {
563 return;
566 if (get_vhost_net(nc->peer) &&
567 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
568 vhost_net_virtqueue_reset(vdev, nc, queue_index);
571 flush_or_purge_queued_packets(nc);
574 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
576 VirtIONet *n = VIRTIO_NET(vdev);
577 NetClientState *nc;
578 int r;
580 /* validate queue_index and skip for cvq */
581 if (queue_index >= n->max_queue_pairs * 2) {
582 return;
585 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
587 if (!nc->peer || !vdev->vhost_started) {
588 return;
591 if (get_vhost_net(nc->peer) &&
592 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
593 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
594 if (r < 0) {
595 error_report("unable to restart vhost net virtqueue: %d, "
596 "when resetting the queue", queue_index);
601 static void virtio_net_reset(VirtIODevice *vdev)
603 VirtIONet *n = VIRTIO_NET(vdev);
604 int i;
606 /* Reset back to compatibility mode */
607 n->promisc = 1;
608 n->allmulti = 0;
609 n->alluni = 0;
610 n->nomulti = 0;
611 n->nouni = 0;
612 n->nobcast = 0;
613 /* multiqueue is disabled by default */
614 n->curr_queue_pairs = 1;
615 timer_del(n->announce_timer.tm);
616 n->announce_timer.round = 0;
617 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
619 /* Flush any MAC and VLAN filter table state */
620 n->mac_table.in_use = 0;
621 n->mac_table.first_multi = 0;
622 n->mac_table.multi_overflow = 0;
623 n->mac_table.uni_overflow = 0;
624 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
625 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
626 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
627 memset(n->vlans, 0, MAX_VLAN >> 3);
629 /* Flush any async TX */
630 for (i = 0; i < n->max_queue_pairs; i++) {
631 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
635 static void peer_test_vnet_hdr(VirtIONet *n)
637 NetClientState *nc = qemu_get_queue(n->nic);
638 if (!nc->peer) {
639 return;
642 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
645 static int peer_has_vnet_hdr(VirtIONet *n)
647 return n->has_vnet_hdr;
650 static int peer_has_ufo(VirtIONet *n)
652 if (!peer_has_vnet_hdr(n))
653 return 0;
655 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
657 return n->has_ufo;
660 static int peer_has_uso(VirtIONet *n)
662 if (!peer_has_vnet_hdr(n)) {
663 return 0;
666 return qemu_has_uso(qemu_get_queue(n->nic)->peer);
669 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
670 int version_1, int hash_report)
672 int i;
673 NetClientState *nc;
675 n->mergeable_rx_bufs = mergeable_rx_bufs;
678 * Note: when extending the vnet header, please make sure to
679 * change the vnet header copying logic in virtio_net_flush_tx()
680 * as well.
682 if (version_1) {
683 n->guest_hdr_len = hash_report ?
684 sizeof(struct virtio_net_hdr_v1_hash) :
685 sizeof(struct virtio_net_hdr_mrg_rxbuf);
686 n->rss_data.populate_hash = !!hash_report;
687 } else {
688 n->guest_hdr_len = n->mergeable_rx_bufs ?
689 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
690 sizeof(struct virtio_net_hdr);
693 for (i = 0; i < n->max_queue_pairs; i++) {
694 nc = qemu_get_subqueue(n->nic, i);
696 if (peer_has_vnet_hdr(n) &&
697 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
698 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
699 n->host_hdr_len = n->guest_hdr_len;
704 static int virtio_net_max_tx_queue_size(VirtIONet *n)
706 NetClientState *peer = n->nic_conf.peers.ncs[0];
709 * Backends other than vhost-user or vhost-vdpa don't support max queue
710 * size.
712 if (!peer) {
713 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
716 switch(peer->info->type) {
717 case NET_CLIENT_DRIVER_VHOST_USER:
718 case NET_CLIENT_DRIVER_VHOST_VDPA:
719 return VIRTQUEUE_MAX_SIZE;
720 default:
721 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
725 static int peer_attach(VirtIONet *n, int index)
727 NetClientState *nc = qemu_get_subqueue(n->nic, index);
729 if (!nc->peer) {
730 return 0;
733 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
734 vhost_set_vring_enable(nc->peer, 1);
737 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
738 return 0;
741 if (n->max_queue_pairs == 1) {
742 return 0;
745 return tap_enable(nc->peer);
748 static int peer_detach(VirtIONet *n, int index)
750 NetClientState *nc = qemu_get_subqueue(n->nic, index);
752 if (!nc->peer) {
753 return 0;
756 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
757 vhost_set_vring_enable(nc->peer, 0);
760 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
761 return 0;
764 return tap_disable(nc->peer);
767 static void virtio_net_set_queue_pairs(VirtIONet *n)
769 int i;
770 int r;
772 if (n->nic->peer_deleted) {
773 return;
776 for (i = 0; i < n->max_queue_pairs; i++) {
777 if (i < n->curr_queue_pairs) {
778 r = peer_attach(n, i);
779 assert(!r);
780 } else {
781 r = peer_detach(n, i);
782 assert(!r);
787 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
789 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
790 Error **errp)
792 VirtIONet *n = VIRTIO_NET(vdev);
793 NetClientState *nc = qemu_get_queue(n->nic);
795 /* Firstly sync all virtio-net possible supported features */
796 features |= n->host_features;
798 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
800 if (!peer_has_vnet_hdr(n)) {
801 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
802 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
803 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
804 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
806 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
807 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
808 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
809 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
811 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
812 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
813 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
815 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
818 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
819 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
820 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
823 if (!peer_has_uso(n)) {
824 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
825 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
826 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
829 if (!get_vhost_net(nc->peer)) {
830 return features;
833 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
834 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
836 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
837 vdev->backend_features = features;
839 if (n->mtu_bypass_backend &&
840 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
841 features |= (1ULL << VIRTIO_NET_F_MTU);
845 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
846 * enabled. This happens in the vDPA case.
848 * Make sure the feature set is not incoherent, as the driver could refuse
849 * to start.
851 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
852 * helping guest to notify the new location with vDPA devices that does not
853 * support it.
855 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
856 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
859 return features;
862 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
864 uint64_t features = 0;
866 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
867 * but also these: */
868 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
869 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
870 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
871 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
872 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
874 return features;
877 static void virtio_net_apply_guest_offloads(VirtIONet *n)
879 qemu_set_offload(qemu_get_queue(n->nic)->peer,
880 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
881 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
882 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
883 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
884 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
885 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
886 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
889 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
891 static const uint64_t guest_offloads_mask =
892 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
893 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
894 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
895 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
896 (1ULL << VIRTIO_NET_F_GUEST_UFO) |
897 (1ULL << VIRTIO_NET_F_GUEST_USO4) |
898 (1ULL << VIRTIO_NET_F_GUEST_USO6);
900 return guest_offloads_mask & features;
903 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
905 VirtIODevice *vdev = VIRTIO_DEVICE(n);
906 return virtio_net_guest_offloads_by_features(vdev->guest_features);
909 typedef struct {
910 VirtIONet *n;
911 DeviceState *dev;
912 } FailoverDevice;
915 * Set the failover primary device
917 * @opaque: FailoverId to setup
918 * @opts: opts for device we are handling
919 * @errp: returns an error if this function fails
921 static int failover_set_primary(DeviceState *dev, void *opaque)
923 FailoverDevice *fdev = opaque;
924 PCIDevice *pci_dev = (PCIDevice *)
925 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
927 if (!pci_dev) {
928 return 0;
931 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
932 fdev->dev = dev;
933 return 1;
936 return 0;
940 * Find the primary device for this failover virtio-net
942 * @n: VirtIONet device
943 * @errp: returns an error if this function fails
945 static DeviceState *failover_find_primary_device(VirtIONet *n)
947 FailoverDevice fdev = {
948 .n = n,
951 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
952 NULL, NULL, &fdev);
953 return fdev.dev;
956 static void failover_add_primary(VirtIONet *n, Error **errp)
958 Error *err = NULL;
959 DeviceState *dev = failover_find_primary_device(n);
961 if (dev) {
962 return;
965 if (!n->primary_opts) {
966 error_setg(errp, "Primary device not found");
967 error_append_hint(errp, "Virtio-net failover will not work. Make "
968 "sure primary device has parameter"
969 " failover_pair_id=%s\n", n->netclient_name);
970 return;
973 dev = qdev_device_add_from_qdict(n->primary_opts,
974 n->primary_opts_from_json,
975 &err);
976 if (err) {
977 qobject_unref(n->primary_opts);
978 n->primary_opts = NULL;
979 } else {
980 object_unref(OBJECT(dev));
982 error_propagate(errp, err);
985 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
987 VirtIONet *n = VIRTIO_NET(vdev);
988 Error *err = NULL;
989 int i;
991 if (n->mtu_bypass_backend &&
992 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
993 features &= ~(1ULL << VIRTIO_NET_F_MTU);
996 virtio_net_set_multiqueue(n,
997 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
998 virtio_has_feature(features, VIRTIO_NET_F_MQ));
1000 virtio_net_set_mrg_rx_bufs(n,
1001 virtio_has_feature(features,
1002 VIRTIO_NET_F_MRG_RXBUF),
1003 virtio_has_feature(features,
1004 VIRTIO_F_VERSION_1),
1005 virtio_has_feature(features,
1006 VIRTIO_NET_F_HASH_REPORT));
1008 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1009 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
1010 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1011 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
1012 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
1014 if (n->has_vnet_hdr) {
1015 n->curr_guest_offloads =
1016 virtio_net_guest_offloads_by_features(features);
1017 virtio_net_apply_guest_offloads(n);
1020 for (i = 0; i < n->max_queue_pairs; i++) {
1021 NetClientState *nc = qemu_get_subqueue(n->nic, i);
1023 if (!get_vhost_net(nc->peer)) {
1024 continue;
1026 vhost_net_ack_features(get_vhost_net(nc->peer), features);
1029 * keep acked_features in NetVhostUserState up-to-date so it
1030 * can't miss any features configured by guest virtio driver.
1032 vhost_net_save_acked_features(nc->peer);
1035 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1036 memset(n->vlans, 0xff, MAX_VLAN >> 3);
1039 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1040 qapi_event_send_failover_negotiated(n->netclient_name);
1041 qatomic_set(&n->failover_primary_hidden, false);
1042 failover_add_primary(n, &err);
1043 if (err) {
1044 if (!qtest_enabled()) {
1045 warn_report_err(err);
1046 } else {
1047 error_free(err);
1053 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1054 struct iovec *iov, unsigned int iov_cnt)
1056 uint8_t on;
1057 size_t s;
1058 NetClientState *nc = qemu_get_queue(n->nic);
1060 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1061 if (s != sizeof(on)) {
1062 return VIRTIO_NET_ERR;
1065 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1066 n->promisc = on;
1067 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1068 n->allmulti = on;
1069 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1070 n->alluni = on;
1071 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1072 n->nomulti = on;
1073 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1074 n->nouni = on;
1075 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1076 n->nobcast = on;
1077 } else {
1078 return VIRTIO_NET_ERR;
1081 rxfilter_notify(nc);
1083 return VIRTIO_NET_OK;
1086 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1087 struct iovec *iov, unsigned int iov_cnt)
1089 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1090 uint64_t offloads;
1091 size_t s;
1093 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1094 return VIRTIO_NET_ERR;
1097 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1098 if (s != sizeof(offloads)) {
1099 return VIRTIO_NET_ERR;
1102 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1103 uint64_t supported_offloads;
1105 offloads = virtio_ldq_p(vdev, &offloads);
1107 if (!n->has_vnet_hdr) {
1108 return VIRTIO_NET_ERR;
1111 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1112 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1113 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1114 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1115 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1117 supported_offloads = virtio_net_supported_guest_offloads(n);
1118 if (offloads & ~supported_offloads) {
1119 return VIRTIO_NET_ERR;
1122 n->curr_guest_offloads = offloads;
1123 virtio_net_apply_guest_offloads(n);
1125 return VIRTIO_NET_OK;
1126 } else {
1127 return VIRTIO_NET_ERR;
1131 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1132 struct iovec *iov, unsigned int iov_cnt)
1134 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1135 struct virtio_net_ctrl_mac mac_data;
1136 size_t s;
1137 NetClientState *nc = qemu_get_queue(n->nic);
1139 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1140 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1141 return VIRTIO_NET_ERR;
1143 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1144 assert(s == sizeof(n->mac));
1145 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1146 rxfilter_notify(nc);
1148 return VIRTIO_NET_OK;
1151 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1152 return VIRTIO_NET_ERR;
1155 int in_use = 0;
1156 int first_multi = 0;
1157 uint8_t uni_overflow = 0;
1158 uint8_t multi_overflow = 0;
1159 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1161 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1162 sizeof(mac_data.entries));
1163 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1164 if (s != sizeof(mac_data.entries)) {
1165 goto error;
1167 iov_discard_front(&iov, &iov_cnt, s);
1169 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1170 goto error;
1173 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1174 s = iov_to_buf(iov, iov_cnt, 0, macs,
1175 mac_data.entries * ETH_ALEN);
1176 if (s != mac_data.entries * ETH_ALEN) {
1177 goto error;
1179 in_use += mac_data.entries;
1180 } else {
1181 uni_overflow = 1;
1184 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1186 first_multi = in_use;
1188 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1189 sizeof(mac_data.entries));
1190 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1191 if (s != sizeof(mac_data.entries)) {
1192 goto error;
1195 iov_discard_front(&iov, &iov_cnt, s);
1197 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1198 goto error;
1201 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1202 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1203 mac_data.entries * ETH_ALEN);
1204 if (s != mac_data.entries * ETH_ALEN) {
1205 goto error;
1207 in_use += mac_data.entries;
1208 } else {
1209 multi_overflow = 1;
1212 n->mac_table.in_use = in_use;
1213 n->mac_table.first_multi = first_multi;
1214 n->mac_table.uni_overflow = uni_overflow;
1215 n->mac_table.multi_overflow = multi_overflow;
1216 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1217 g_free(macs);
1218 rxfilter_notify(nc);
1220 return VIRTIO_NET_OK;
1222 error:
1223 g_free(macs);
1224 return VIRTIO_NET_ERR;
1227 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1228 struct iovec *iov, unsigned int iov_cnt)
1230 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1231 uint16_t vid;
1232 size_t s;
1233 NetClientState *nc = qemu_get_queue(n->nic);
1235 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1236 vid = virtio_lduw_p(vdev, &vid);
1237 if (s != sizeof(vid)) {
1238 return VIRTIO_NET_ERR;
1241 if (vid >= MAX_VLAN)
1242 return VIRTIO_NET_ERR;
1244 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1245 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1246 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1247 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1248 else
1249 return VIRTIO_NET_ERR;
1251 rxfilter_notify(nc);
1253 return VIRTIO_NET_OK;
1256 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1257 struct iovec *iov, unsigned int iov_cnt)
1259 trace_virtio_net_handle_announce(n->announce_timer.round);
1260 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1261 n->status & VIRTIO_NET_S_ANNOUNCE) {
1262 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1263 if (n->announce_timer.round) {
1264 qemu_announce_timer_step(&n->announce_timer);
1266 return VIRTIO_NET_OK;
1267 } else {
1268 return VIRTIO_NET_ERR;
1272 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1274 static void virtio_net_disable_rss(VirtIONet *n)
1276 if (n->rss_data.enabled) {
1277 trace_virtio_net_rss_disable();
1279 n->rss_data.enabled = false;
1281 virtio_net_detach_epbf_rss(n);
1284 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1286 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1287 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1288 return false;
1291 return nc->info->set_steering_ebpf(nc, prog_fd);
1294 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1295 struct EBPFRSSConfig *config)
1297 config->redirect = data->redirect;
1298 config->populate_hash = data->populate_hash;
1299 config->hash_types = data->hash_types;
1300 config->indirections_len = data->indirections_len;
1301 config->default_queue = data->default_queue;
1304 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1306 struct EBPFRSSConfig config = {};
1308 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1309 return false;
1312 rss_data_to_rss_config(&n->rss_data, &config);
1314 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1315 n->rss_data.indirections_table, n->rss_data.key)) {
1316 return false;
1319 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1320 return false;
1323 return true;
1326 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1328 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1331 static bool virtio_net_load_ebpf(VirtIONet *n)
1333 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1334 /* backend doesn't support steering ebpf */
1335 return false;
1338 return ebpf_rss_load(&n->ebpf_rss);
1341 static void virtio_net_unload_ebpf(VirtIONet *n)
1343 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1344 ebpf_rss_unload(&n->ebpf_rss);
1347 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1348 struct iovec *iov,
1349 unsigned int iov_cnt,
1350 bool do_rss)
1352 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1353 struct virtio_net_rss_config cfg;
1354 size_t s, offset = 0, size_get;
1355 uint16_t queue_pairs, i;
1356 struct {
1357 uint16_t us;
1358 uint8_t b;
1359 } QEMU_PACKED temp;
1360 const char *err_msg = "";
1361 uint32_t err_value = 0;
1363 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1364 err_msg = "RSS is not negotiated";
1365 goto error;
1367 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1368 err_msg = "Hash report is not negotiated";
1369 goto error;
1371 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1372 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1373 if (s != size_get) {
1374 err_msg = "Short command buffer";
1375 err_value = (uint32_t)s;
1376 goto error;
1378 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1379 n->rss_data.indirections_len =
1380 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1381 n->rss_data.indirections_len++;
1382 if (!do_rss) {
1383 n->rss_data.indirections_len = 1;
1385 if (!is_power_of_2(n->rss_data.indirections_len)) {
1386 err_msg = "Invalid size of indirection table";
1387 err_value = n->rss_data.indirections_len;
1388 goto error;
1390 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1391 err_msg = "Too large indirection table";
1392 err_value = n->rss_data.indirections_len;
1393 goto error;
1395 n->rss_data.default_queue = do_rss ?
1396 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1397 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1398 err_msg = "Invalid default queue";
1399 err_value = n->rss_data.default_queue;
1400 goto error;
1402 offset += size_get;
1403 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1404 g_free(n->rss_data.indirections_table);
1405 n->rss_data.indirections_table = g_malloc(size_get);
1406 if (!n->rss_data.indirections_table) {
1407 err_msg = "Can't allocate indirections table";
1408 err_value = n->rss_data.indirections_len;
1409 goto error;
1411 s = iov_to_buf(iov, iov_cnt, offset,
1412 n->rss_data.indirections_table, size_get);
1413 if (s != size_get) {
1414 err_msg = "Short indirection table buffer";
1415 err_value = (uint32_t)s;
1416 goto error;
1418 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1419 uint16_t val = n->rss_data.indirections_table[i];
1420 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1422 offset += size_get;
1423 size_get = sizeof(temp);
1424 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1425 if (s != size_get) {
1426 err_msg = "Can't get queue_pairs";
1427 err_value = (uint32_t)s;
1428 goto error;
1430 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1431 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1432 err_msg = "Invalid number of queue_pairs";
1433 err_value = queue_pairs;
1434 goto error;
1436 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1437 err_msg = "Invalid key size";
1438 err_value = temp.b;
1439 goto error;
1441 if (!temp.b && n->rss_data.hash_types) {
1442 err_msg = "No key provided";
1443 err_value = 0;
1444 goto error;
1446 if (!temp.b && !n->rss_data.hash_types) {
1447 virtio_net_disable_rss(n);
1448 return queue_pairs;
1450 offset += size_get;
1451 size_get = temp.b;
1452 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1453 if (s != size_get) {
1454 err_msg = "Can get key buffer";
1455 err_value = (uint32_t)s;
1456 goto error;
1458 n->rss_data.enabled = true;
1460 if (!n->rss_data.populate_hash) {
1461 if (!virtio_net_attach_epbf_rss(n)) {
1462 /* EBPF must be loaded for vhost */
1463 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1464 warn_report("Can't load eBPF RSS for vhost");
1465 goto error;
1467 /* fallback to software RSS */
1468 warn_report("Can't load eBPF RSS - fallback to software RSS");
1469 n->rss_data.enabled_software_rss = true;
1471 } else {
1472 /* use software RSS for hash populating */
1473 /* and detach eBPF if was loaded before */
1474 virtio_net_detach_epbf_rss(n);
1475 n->rss_data.enabled_software_rss = true;
1478 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1479 n->rss_data.indirections_len,
1480 temp.b);
1481 return queue_pairs;
1482 error:
1483 trace_virtio_net_rss_error(err_msg, err_value);
1484 virtio_net_disable_rss(n);
1485 return 0;
1488 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1489 struct iovec *iov, unsigned int iov_cnt)
1491 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1492 uint16_t queue_pairs;
1493 NetClientState *nc = qemu_get_queue(n->nic);
1495 virtio_net_disable_rss(n);
1496 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1497 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1498 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1500 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1501 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1502 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1503 struct virtio_net_ctrl_mq mq;
1504 size_t s;
1505 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1506 return VIRTIO_NET_ERR;
1508 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1509 if (s != sizeof(mq)) {
1510 return VIRTIO_NET_ERR;
1512 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1514 } else {
1515 return VIRTIO_NET_ERR;
1518 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1519 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1520 queue_pairs > n->max_queue_pairs ||
1521 !n->multiqueue) {
1522 return VIRTIO_NET_ERR;
1525 n->curr_queue_pairs = queue_pairs;
1526 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1528 * Avoid updating the backend for a vdpa device: We're only interested
1529 * in updating the device model queues.
1531 return VIRTIO_NET_OK;
1533 /* stop the backend before changing the number of queue_pairs to avoid handling a
1534 * disabled queue */
1535 virtio_net_set_status(vdev, vdev->status);
1536 virtio_net_set_queue_pairs(n);
1538 return VIRTIO_NET_OK;
1541 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1542 const struct iovec *in_sg, unsigned in_num,
1543 const struct iovec *out_sg,
1544 unsigned out_num)
1546 VirtIONet *n = VIRTIO_NET(vdev);
1547 struct virtio_net_ctrl_hdr ctrl;
1548 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1549 size_t s;
1550 struct iovec *iov, *iov2;
1552 if (iov_size(in_sg, in_num) < sizeof(status) ||
1553 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1554 virtio_error(vdev, "virtio-net ctrl missing headers");
1555 return 0;
1558 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1559 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1560 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1561 if (s != sizeof(ctrl)) {
1562 status = VIRTIO_NET_ERR;
1563 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1564 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1565 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1566 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1567 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1568 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1569 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1570 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1571 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1572 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1573 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1574 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1577 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1578 assert(s == sizeof(status));
1580 g_free(iov2);
1581 return sizeof(status);
1584 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1586 VirtQueueElement *elem;
1588 for (;;) {
1589 size_t written;
1590 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1591 if (!elem) {
1592 break;
1595 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1596 elem->out_sg, elem->out_num);
1597 if (written > 0) {
1598 virtqueue_push(vq, elem, written);
1599 virtio_notify(vdev, vq);
1600 g_free(elem);
1601 } else {
1602 virtqueue_detach_element(vq, elem, 0);
1603 g_free(elem);
1604 break;
1609 /* RX */
1611 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1613 VirtIONet *n = VIRTIO_NET(vdev);
1614 int queue_index = vq2q(virtio_get_queue_index(vq));
1616 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1619 static bool virtio_net_can_receive(NetClientState *nc)
1621 VirtIONet *n = qemu_get_nic_opaque(nc);
1622 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1623 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1625 if (!vdev->vm_running) {
1626 return false;
1629 if (nc->queue_index >= n->curr_queue_pairs) {
1630 return false;
1633 if (!virtio_queue_ready(q->rx_vq) ||
1634 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1635 return false;
1638 return true;
1641 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1643 VirtIONet *n = q->n;
1644 if (virtio_queue_empty(q->rx_vq) ||
1645 (n->mergeable_rx_bufs &&
1646 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1647 virtio_queue_set_notification(q->rx_vq, 1);
1649 /* To avoid a race condition where the guest has made some buffers
1650 * available after the above check but before notification was
1651 * enabled, check for available buffers again.
1653 if (virtio_queue_empty(q->rx_vq) ||
1654 (n->mergeable_rx_bufs &&
1655 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1656 return 0;
1660 virtio_queue_set_notification(q->rx_vq, 0);
1661 return 1;
1664 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1666 virtio_tswap16s(vdev, &hdr->hdr_len);
1667 virtio_tswap16s(vdev, &hdr->gso_size);
1668 virtio_tswap16s(vdev, &hdr->csum_start);
1669 virtio_tswap16s(vdev, &hdr->csum_offset);
1672 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1673 * it never finds out that the packets don't have valid checksums. This
1674 * causes dhclient to get upset. Fedora's carried a patch for ages to
1675 * fix this with Xen but it hasn't appeared in an upstream release of
1676 * dhclient yet.
1678 * To avoid breaking existing guests, we catch udp packets and add
1679 * checksums. This is terrible but it's better than hacking the guest
1680 * kernels.
1682 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1683 * we should provide a mechanism to disable it to avoid polluting the host
1684 * cache.
1686 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1687 uint8_t *buf, size_t size)
1689 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1690 (size > 27 && size < 1500) && /* normal sized MTU */
1691 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1692 (buf[23] == 17) && /* ip.protocol == UDP */
1693 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1694 net_checksum_calculate(buf, size, CSUM_UDP);
1695 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1699 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1700 const void *buf, size_t size)
1702 if (n->has_vnet_hdr) {
1703 /* FIXME this cast is evil */
1704 void *wbuf = (void *)buf;
1705 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1706 size - n->host_hdr_len);
1708 if (n->needs_vnet_hdr_swap) {
1709 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1711 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1712 } else {
1713 struct virtio_net_hdr hdr = {
1714 .flags = 0,
1715 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1717 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1721 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1723 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1724 static const uint8_t vlan[] = {0x81, 0x00};
1725 uint8_t *ptr = (uint8_t *)buf;
1726 int i;
1728 if (n->promisc)
1729 return 1;
1731 ptr += n->host_hdr_len;
1733 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1734 int vid = lduw_be_p(ptr + 14) & 0xfff;
1735 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1736 return 0;
1739 if (ptr[0] & 1) { // multicast
1740 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1741 return !n->nobcast;
1742 } else if (n->nomulti) {
1743 return 0;
1744 } else if (n->allmulti || n->mac_table.multi_overflow) {
1745 return 1;
1748 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1749 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1750 return 1;
1753 } else { // unicast
1754 if (n->nouni) {
1755 return 0;
1756 } else if (n->alluni || n->mac_table.uni_overflow) {
1757 return 1;
1758 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1759 return 1;
1762 for (i = 0; i < n->mac_table.first_multi; i++) {
1763 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1764 return 1;
1769 return 0;
1772 static uint8_t virtio_net_get_hash_type(bool hasip4,
1773 bool hasip6,
1774 EthL4HdrProto l4hdr_proto,
1775 uint32_t types)
1777 if (hasip4) {
1778 switch (l4hdr_proto) {
1779 case ETH_L4_HDR_PROTO_TCP:
1780 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1781 return NetPktRssIpV4Tcp;
1783 break;
1785 case ETH_L4_HDR_PROTO_UDP:
1786 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1787 return NetPktRssIpV4Udp;
1789 break;
1791 default:
1792 break;
1795 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1796 return NetPktRssIpV4;
1798 } else if (hasip6) {
1799 switch (l4hdr_proto) {
1800 case ETH_L4_HDR_PROTO_TCP:
1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1802 return NetPktRssIpV6TcpEx;
1804 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1805 return NetPktRssIpV6Tcp;
1807 break;
1809 case ETH_L4_HDR_PROTO_UDP:
1810 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1811 return NetPktRssIpV6UdpEx;
1813 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1814 return NetPktRssIpV6Udp;
1816 break;
1818 default:
1819 break;
1822 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1823 return NetPktRssIpV6Ex;
1825 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1826 return NetPktRssIpV6;
1829 return 0xff;
1832 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1833 uint32_t hash)
1835 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1836 hdr->hash_value = hash;
1837 hdr->hash_report = report;
1840 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1841 size_t size)
1843 VirtIONet *n = qemu_get_nic_opaque(nc);
1844 unsigned int index = nc->queue_index, new_index = index;
1845 struct NetRxPkt *pkt = n->rx_pkt;
1846 uint8_t net_hash_type;
1847 uint32_t hash;
1848 bool hasip4, hasip6;
1849 EthL4HdrProto l4hdr_proto;
1850 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1851 VIRTIO_NET_HASH_REPORT_IPv4,
1852 VIRTIO_NET_HASH_REPORT_TCPv4,
1853 VIRTIO_NET_HASH_REPORT_TCPv6,
1854 VIRTIO_NET_HASH_REPORT_IPv6,
1855 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1856 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1857 VIRTIO_NET_HASH_REPORT_UDPv4,
1858 VIRTIO_NET_HASH_REPORT_UDPv6,
1859 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1861 struct iovec iov = {
1862 .iov_base = (void *)buf,
1863 .iov_len = size
1866 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1867 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1868 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1869 n->rss_data.hash_types);
1870 if (net_hash_type > NetPktRssIpV6UdpEx) {
1871 if (n->rss_data.populate_hash) {
1872 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1874 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1877 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1879 if (n->rss_data.populate_hash) {
1880 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1883 if (n->rss_data.redirect) {
1884 new_index = hash & (n->rss_data.indirections_len - 1);
1885 new_index = n->rss_data.indirections_table[new_index];
1888 return (index == new_index) ? -1 : new_index;
1891 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1892 size_t size, bool no_rss)
1894 VirtIONet *n = qemu_get_nic_opaque(nc);
1895 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1896 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1897 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1898 size_t lens[VIRTQUEUE_MAX_SIZE];
1899 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1900 struct virtio_net_hdr_mrg_rxbuf mhdr;
1901 unsigned mhdr_cnt = 0;
1902 size_t offset, i, guest_offset, j;
1903 ssize_t err;
1905 if (!virtio_net_can_receive(nc)) {
1906 return -1;
1909 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1910 int index = virtio_net_process_rss(nc, buf, size);
1911 if (index >= 0) {
1912 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1913 return virtio_net_receive_rcu(nc2, buf, size, true);
1917 /* hdr_len refers to the header we supply to the guest */
1918 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1919 return 0;
1922 if (!receive_filter(n, buf, size))
1923 return size;
1925 offset = i = 0;
1927 while (offset < size) {
1928 VirtQueueElement *elem;
1929 int len, total;
1930 const struct iovec *sg;
1932 total = 0;
1934 if (i == VIRTQUEUE_MAX_SIZE) {
1935 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1936 err = size;
1937 goto err;
1940 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1941 if (!elem) {
1942 if (i) {
1943 virtio_error(vdev, "virtio-net unexpected empty queue: "
1944 "i %zd mergeable %d offset %zd, size %zd, "
1945 "guest hdr len %zd, host hdr len %zd "
1946 "guest features 0x%" PRIx64,
1947 i, n->mergeable_rx_bufs, offset, size,
1948 n->guest_hdr_len, n->host_hdr_len,
1949 vdev->guest_features);
1951 err = -1;
1952 goto err;
1955 if (elem->in_num < 1) {
1956 virtio_error(vdev,
1957 "virtio-net receive queue contains no in buffers");
1958 virtqueue_detach_element(q->rx_vq, elem, 0);
1959 g_free(elem);
1960 err = -1;
1961 goto err;
1964 sg = elem->in_sg;
1965 if (i == 0) {
1966 assert(offset == 0);
1967 if (n->mergeable_rx_bufs) {
1968 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1969 sg, elem->in_num,
1970 offsetof(typeof(mhdr), num_buffers),
1971 sizeof(mhdr.num_buffers));
1974 receive_header(n, sg, elem->in_num, buf, size);
1975 if (n->rss_data.populate_hash) {
1976 offset = sizeof(mhdr);
1977 iov_from_buf(sg, elem->in_num, offset,
1978 buf + offset, n->host_hdr_len - sizeof(mhdr));
1980 offset = n->host_hdr_len;
1981 total += n->guest_hdr_len;
1982 guest_offset = n->guest_hdr_len;
1983 } else {
1984 guest_offset = 0;
1987 /* copy in packet. ugh */
1988 len = iov_from_buf(sg, elem->in_num, guest_offset,
1989 buf + offset, size - offset);
1990 total += len;
1991 offset += len;
1992 /* If buffers can't be merged, at this point we
1993 * must have consumed the complete packet.
1994 * Otherwise, drop it. */
1995 if (!n->mergeable_rx_bufs && offset < size) {
1996 virtqueue_unpop(q->rx_vq, elem, total);
1997 g_free(elem);
1998 err = size;
1999 goto err;
2002 elems[i] = elem;
2003 lens[i] = total;
2004 i++;
2007 if (mhdr_cnt) {
2008 virtio_stw_p(vdev, &mhdr.num_buffers, i);
2009 iov_from_buf(mhdr_sg, mhdr_cnt,
2011 &mhdr.num_buffers, sizeof mhdr.num_buffers);
2014 for (j = 0; j < i; j++) {
2015 /* signal other side */
2016 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2017 g_free(elems[j]);
2020 virtqueue_flush(q->rx_vq, i);
2021 virtio_notify(vdev, q->rx_vq);
2023 return size;
2025 err:
2026 for (j = 0; j < i; j++) {
2027 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2028 g_free(elems[j]);
2031 return err;
2034 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2035 size_t size)
2037 RCU_READ_LOCK_GUARD();
2039 return virtio_net_receive_rcu(nc, buf, size, false);
2042 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2043 const uint8_t *buf,
2044 VirtioNetRscUnit *unit)
2046 uint16_t ip_hdrlen;
2047 struct ip_header *ip;
2049 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2050 + sizeof(struct eth_header));
2051 unit->ip = (void *)ip;
2052 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2053 unit->ip_plen = &ip->ip_len;
2054 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2055 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2056 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2059 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2060 const uint8_t *buf,
2061 VirtioNetRscUnit *unit)
2063 struct ip6_header *ip6;
2065 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2066 + sizeof(struct eth_header));
2067 unit->ip = ip6;
2068 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2069 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2070 + sizeof(struct ip6_header));
2071 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2073 /* There is a difference between payload length in ipv4 and v6,
2074 ip header is excluded in ipv6 */
2075 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2078 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2079 VirtioNetRscSeg *seg)
2081 int ret;
2082 struct virtio_net_hdr_v1 *h;
2084 h = (struct virtio_net_hdr_v1 *)seg->buf;
2085 h->flags = 0;
2086 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2088 if (seg->is_coalesced) {
2089 h->rsc.segments = seg->packets;
2090 h->rsc.dup_acks = seg->dup_ack;
2091 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2092 if (chain->proto == ETH_P_IP) {
2093 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2094 } else {
2095 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2099 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2100 QTAILQ_REMOVE(&chain->buffers, seg, next);
2101 g_free(seg->buf);
2102 g_free(seg);
2104 return ret;
2107 static void virtio_net_rsc_purge(void *opq)
2109 VirtioNetRscSeg *seg, *rn;
2110 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2112 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2113 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2114 chain->stat.purge_failed++;
2115 continue;
2119 chain->stat.timer++;
2120 if (!QTAILQ_EMPTY(&chain->buffers)) {
2121 timer_mod(chain->drain_timer,
2122 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2126 static void virtio_net_rsc_cleanup(VirtIONet *n)
2128 VirtioNetRscChain *chain, *rn_chain;
2129 VirtioNetRscSeg *seg, *rn_seg;
2131 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2132 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2133 QTAILQ_REMOVE(&chain->buffers, seg, next);
2134 g_free(seg->buf);
2135 g_free(seg);
2138 timer_free(chain->drain_timer);
2139 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2140 g_free(chain);
2144 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2145 NetClientState *nc,
2146 const uint8_t *buf, size_t size)
2148 uint16_t hdr_len;
2149 VirtioNetRscSeg *seg;
2151 hdr_len = chain->n->guest_hdr_len;
2152 seg = g_new(VirtioNetRscSeg, 1);
2153 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2154 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2155 memcpy(seg->buf, buf, size);
2156 seg->size = size;
2157 seg->packets = 1;
2158 seg->dup_ack = 0;
2159 seg->is_coalesced = 0;
2160 seg->nc = nc;
2162 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2163 chain->stat.cache++;
2165 switch (chain->proto) {
2166 case ETH_P_IP:
2167 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2168 break;
2169 case ETH_P_IPV6:
2170 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2171 break;
2172 default:
2173 g_assert_not_reached();
2177 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2178 VirtioNetRscSeg *seg,
2179 const uint8_t *buf,
2180 struct tcp_header *n_tcp,
2181 struct tcp_header *o_tcp)
2183 uint32_t nack, oack;
2184 uint16_t nwin, owin;
2186 nack = htonl(n_tcp->th_ack);
2187 nwin = htons(n_tcp->th_win);
2188 oack = htonl(o_tcp->th_ack);
2189 owin = htons(o_tcp->th_win);
2191 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2192 chain->stat.ack_out_of_win++;
2193 return RSC_FINAL;
2194 } else if (nack == oack) {
2195 /* duplicated ack or window probe */
2196 if (nwin == owin) {
2197 /* duplicated ack, add dup ack count due to whql test up to 1 */
2198 chain->stat.dup_ack++;
2199 return RSC_FINAL;
2200 } else {
2201 /* Coalesce window update */
2202 o_tcp->th_win = n_tcp->th_win;
2203 chain->stat.win_update++;
2204 return RSC_COALESCE;
2206 } else {
2207 /* pure ack, go to 'C', finalize*/
2208 chain->stat.pure_ack++;
2209 return RSC_FINAL;
2213 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2214 VirtioNetRscSeg *seg,
2215 const uint8_t *buf,
2216 VirtioNetRscUnit *n_unit)
2218 void *data;
2219 uint16_t o_ip_len;
2220 uint32_t nseq, oseq;
2221 VirtioNetRscUnit *o_unit;
2223 o_unit = &seg->unit;
2224 o_ip_len = htons(*o_unit->ip_plen);
2225 nseq = htonl(n_unit->tcp->th_seq);
2226 oseq = htonl(o_unit->tcp->th_seq);
2228 /* out of order or retransmitted. */
2229 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2230 chain->stat.data_out_of_win++;
2231 return RSC_FINAL;
2234 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2235 if (nseq == oseq) {
2236 if ((o_unit->payload == 0) && n_unit->payload) {
2237 /* From no payload to payload, normal case, not a dup ack or etc */
2238 chain->stat.data_after_pure_ack++;
2239 goto coalesce;
2240 } else {
2241 return virtio_net_rsc_handle_ack(chain, seg, buf,
2242 n_unit->tcp, o_unit->tcp);
2244 } else if ((nseq - oseq) != o_unit->payload) {
2245 /* Not a consistent packet, out of order */
2246 chain->stat.data_out_of_order++;
2247 return RSC_FINAL;
2248 } else {
2249 coalesce:
2250 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2251 chain->stat.over_size++;
2252 return RSC_FINAL;
2255 /* Here comes the right data, the payload length in v4/v6 is different,
2256 so use the field value to update and record the new data len */
2257 o_unit->payload += n_unit->payload; /* update new data len */
2259 /* update field in ip header */
2260 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2262 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2263 for windows guest, while this may change the behavior for linux
2264 guest (only if it uses RSC feature). */
2265 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2267 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2268 o_unit->tcp->th_win = n_unit->tcp->th_win;
2270 memmove(seg->buf + seg->size, data, n_unit->payload);
2271 seg->size += n_unit->payload;
2272 seg->packets++;
2273 chain->stat.coalesced++;
2274 return RSC_COALESCE;
2278 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2279 VirtioNetRscSeg *seg,
2280 const uint8_t *buf, size_t size,
2281 VirtioNetRscUnit *unit)
2283 struct ip_header *ip1, *ip2;
2285 ip1 = (struct ip_header *)(unit->ip);
2286 ip2 = (struct ip_header *)(seg->unit.ip);
2287 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2288 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2289 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2290 chain->stat.no_match++;
2291 return RSC_NO_MATCH;
2294 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2297 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2298 VirtioNetRscSeg *seg,
2299 const uint8_t *buf, size_t size,
2300 VirtioNetRscUnit *unit)
2302 struct ip6_header *ip1, *ip2;
2304 ip1 = (struct ip6_header *)(unit->ip);
2305 ip2 = (struct ip6_header *)(seg->unit.ip);
2306 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2307 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2308 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2309 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2310 chain->stat.no_match++;
2311 return RSC_NO_MATCH;
2314 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2317 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2318 * to prevent out of order */
2319 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2320 struct tcp_header *tcp)
2322 uint16_t tcp_hdr;
2323 uint16_t tcp_flag;
2325 tcp_flag = htons(tcp->th_offset_flags);
2326 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2327 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2328 if (tcp_flag & TH_SYN) {
2329 chain->stat.tcp_syn++;
2330 return RSC_BYPASS;
2333 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2334 chain->stat.tcp_ctrl_drain++;
2335 return RSC_FINAL;
2338 if (tcp_hdr > sizeof(struct tcp_header)) {
2339 chain->stat.tcp_all_opt++;
2340 return RSC_FINAL;
2343 return RSC_CANDIDATE;
2346 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2347 NetClientState *nc,
2348 const uint8_t *buf, size_t size,
2349 VirtioNetRscUnit *unit)
2351 int ret;
2352 VirtioNetRscSeg *seg, *nseg;
2354 if (QTAILQ_EMPTY(&chain->buffers)) {
2355 chain->stat.empty_cache++;
2356 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2357 timer_mod(chain->drain_timer,
2358 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2359 return size;
2362 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2363 if (chain->proto == ETH_P_IP) {
2364 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2365 } else {
2366 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2369 if (ret == RSC_FINAL) {
2370 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2371 /* Send failed */
2372 chain->stat.final_failed++;
2373 return 0;
2376 /* Send current packet */
2377 return virtio_net_do_receive(nc, buf, size);
2378 } else if (ret == RSC_NO_MATCH) {
2379 continue;
2380 } else {
2381 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2382 seg->is_coalesced = 1;
2383 return size;
2387 chain->stat.no_match_cache++;
2388 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2389 return size;
2392 /* Drain a connection data, this is to avoid out of order segments */
2393 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2394 NetClientState *nc,
2395 const uint8_t *buf, size_t size,
2396 uint16_t ip_start, uint16_t ip_size,
2397 uint16_t tcp_port)
2399 VirtioNetRscSeg *seg, *nseg;
2400 uint32_t ppair1, ppair2;
2402 ppair1 = *(uint32_t *)(buf + tcp_port);
2403 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2404 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2405 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2406 || (ppair1 != ppair2)) {
2407 continue;
2409 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2410 chain->stat.drain_failed++;
2413 break;
2416 return virtio_net_do_receive(nc, buf, size);
2419 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2420 struct ip_header *ip,
2421 const uint8_t *buf, size_t size)
2423 uint16_t ip_len;
2425 /* Not an ipv4 packet */
2426 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2427 chain->stat.ip_option++;
2428 return RSC_BYPASS;
2431 /* Don't handle packets with ip option */
2432 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2433 chain->stat.ip_option++;
2434 return RSC_BYPASS;
2437 if (ip->ip_p != IPPROTO_TCP) {
2438 chain->stat.bypass_not_tcp++;
2439 return RSC_BYPASS;
2442 /* Don't handle packets with ip fragment */
2443 if (!(htons(ip->ip_off) & IP_DF)) {
2444 chain->stat.ip_frag++;
2445 return RSC_BYPASS;
2448 /* Don't handle packets with ecn flag */
2449 if (IPTOS_ECN(ip->ip_tos)) {
2450 chain->stat.ip_ecn++;
2451 return RSC_BYPASS;
2454 ip_len = htons(ip->ip_len);
2455 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2456 || ip_len > (size - chain->n->guest_hdr_len -
2457 sizeof(struct eth_header))) {
2458 chain->stat.ip_hacked++;
2459 return RSC_BYPASS;
2462 return RSC_CANDIDATE;
2465 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2466 NetClientState *nc,
2467 const uint8_t *buf, size_t size)
2469 int32_t ret;
2470 uint16_t hdr_len;
2471 VirtioNetRscUnit unit;
2473 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2475 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2476 + sizeof(struct tcp_header))) {
2477 chain->stat.bypass_not_tcp++;
2478 return virtio_net_do_receive(nc, buf, size);
2481 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2482 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2483 != RSC_CANDIDATE) {
2484 return virtio_net_do_receive(nc, buf, size);
2487 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2488 if (ret == RSC_BYPASS) {
2489 return virtio_net_do_receive(nc, buf, size);
2490 } else if (ret == RSC_FINAL) {
2491 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2492 ((hdr_len + sizeof(struct eth_header)) + 12),
2493 VIRTIO_NET_IP4_ADDR_SIZE,
2494 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2497 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2500 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2501 struct ip6_header *ip6,
2502 const uint8_t *buf, size_t size)
2504 uint16_t ip_len;
2506 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2507 != IP_HEADER_VERSION_6) {
2508 return RSC_BYPASS;
2511 /* Both option and protocol is checked in this */
2512 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2513 chain->stat.bypass_not_tcp++;
2514 return RSC_BYPASS;
2517 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2518 if (ip_len < sizeof(struct tcp_header) ||
2519 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2520 - sizeof(struct ip6_header))) {
2521 chain->stat.ip_hacked++;
2522 return RSC_BYPASS;
2525 /* Don't handle packets with ecn flag */
2526 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2527 chain->stat.ip_ecn++;
2528 return RSC_BYPASS;
2531 return RSC_CANDIDATE;
2534 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2535 const uint8_t *buf, size_t size)
2537 int32_t ret;
2538 uint16_t hdr_len;
2539 VirtioNetRscChain *chain;
2540 VirtioNetRscUnit unit;
2542 chain = opq;
2543 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2545 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2546 + sizeof(tcp_header))) {
2547 return virtio_net_do_receive(nc, buf, size);
2550 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2551 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2552 unit.ip, buf, size)) {
2553 return virtio_net_do_receive(nc, buf, size);
2556 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2557 if (ret == RSC_BYPASS) {
2558 return virtio_net_do_receive(nc, buf, size);
2559 } else if (ret == RSC_FINAL) {
2560 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2561 ((hdr_len + sizeof(struct eth_header)) + 8),
2562 VIRTIO_NET_IP6_ADDR_SIZE,
2563 hdr_len + sizeof(struct eth_header)
2564 + sizeof(struct ip6_header));
2567 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2570 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2571 NetClientState *nc,
2572 uint16_t proto)
2574 VirtioNetRscChain *chain;
2576 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2577 return NULL;
2580 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2581 if (chain->proto == proto) {
2582 return chain;
2586 chain = g_malloc(sizeof(*chain));
2587 chain->n = n;
2588 chain->proto = proto;
2589 if (proto == (uint16_t)ETH_P_IP) {
2590 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2591 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2592 } else {
2593 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2594 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2596 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2597 virtio_net_rsc_purge, chain);
2598 memset(&chain->stat, 0, sizeof(chain->stat));
2600 QTAILQ_INIT(&chain->buffers);
2601 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2603 return chain;
2606 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2607 const uint8_t *buf,
2608 size_t size)
2610 uint16_t proto;
2611 VirtioNetRscChain *chain;
2612 struct eth_header *eth;
2613 VirtIONet *n;
2615 n = qemu_get_nic_opaque(nc);
2616 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2617 return virtio_net_do_receive(nc, buf, size);
2620 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2621 proto = htons(eth->h_proto);
2623 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2624 if (chain) {
2625 chain->stat.received++;
2626 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2627 return virtio_net_rsc_receive4(chain, nc, buf, size);
2628 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2629 return virtio_net_rsc_receive6(chain, nc, buf, size);
2632 return virtio_net_do_receive(nc, buf, size);
2635 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2636 size_t size)
2638 VirtIONet *n = qemu_get_nic_opaque(nc);
2639 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2640 return virtio_net_rsc_receive(nc, buf, size);
2641 } else {
2642 return virtio_net_do_receive(nc, buf, size);
2646 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2648 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2650 VirtIONet *n = qemu_get_nic_opaque(nc);
2651 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2652 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2653 int ret;
2655 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2656 virtio_notify(vdev, q->tx_vq);
2658 g_free(q->async_tx.elem);
2659 q->async_tx.elem = NULL;
2661 virtio_queue_set_notification(q->tx_vq, 1);
2662 ret = virtio_net_flush_tx(q);
2663 if (ret >= n->tx_burst) {
2665 * the flush has been stopped by tx_burst
2666 * we will not receive notification for the
2667 * remainining part, so re-schedule
2669 virtio_queue_set_notification(q->tx_vq, 0);
2670 if (q->tx_bh) {
2671 qemu_bh_schedule(q->tx_bh);
2672 } else {
2673 timer_mod(q->tx_timer,
2674 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2676 q->tx_waiting = 1;
2680 /* TX */
2681 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2683 VirtIONet *n = q->n;
2684 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2685 VirtQueueElement *elem;
2686 int32_t num_packets = 0;
2687 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2688 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2689 return num_packets;
2692 if (q->async_tx.elem) {
2693 virtio_queue_set_notification(q->tx_vq, 0);
2694 return num_packets;
2697 for (;;) {
2698 ssize_t ret;
2699 unsigned int out_num;
2700 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2701 struct virtio_net_hdr_v1_hash vhdr;
2703 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2704 if (!elem) {
2705 break;
2708 out_num = elem->out_num;
2709 out_sg = elem->out_sg;
2710 if (out_num < 1) {
2711 virtio_error(vdev, "virtio-net header not in first element");
2712 virtqueue_detach_element(q->tx_vq, elem, 0);
2713 g_free(elem);
2714 return -EINVAL;
2717 if (n->has_vnet_hdr) {
2718 if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) <
2719 n->guest_hdr_len) {
2720 virtio_error(vdev, "virtio-net header incorrect");
2721 virtqueue_detach_element(q->tx_vq, elem, 0);
2722 g_free(elem);
2723 return -EINVAL;
2725 if (n->needs_vnet_hdr_swap) {
2726 virtio_net_hdr_swap(vdev, (void *) &vhdr);
2727 sg2[0].iov_base = &vhdr;
2728 sg2[0].iov_len = n->guest_hdr_len;
2729 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2730 out_sg, out_num,
2731 n->guest_hdr_len, -1);
2732 if (out_num == VIRTQUEUE_MAX_SIZE) {
2733 goto drop;
2735 out_num += 1;
2736 out_sg = sg2;
2740 * If host wants to see the guest header as is, we can
2741 * pass it on unchanged. Otherwise, copy just the parts
2742 * that host is interested in.
2744 assert(n->host_hdr_len <= n->guest_hdr_len);
2745 if (n->host_hdr_len != n->guest_hdr_len) {
2746 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2747 out_sg, out_num,
2748 0, n->host_hdr_len);
2749 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2750 out_sg, out_num,
2751 n->guest_hdr_len, -1);
2752 out_num = sg_num;
2753 out_sg = sg;
2756 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2757 out_sg, out_num, virtio_net_tx_complete);
2758 if (ret == 0) {
2759 virtio_queue_set_notification(q->tx_vq, 0);
2760 q->async_tx.elem = elem;
2761 return -EBUSY;
2764 drop:
2765 virtqueue_push(q->tx_vq, elem, 0);
2766 virtio_notify(vdev, q->tx_vq);
2767 g_free(elem);
2769 if (++num_packets >= n->tx_burst) {
2770 break;
2773 return num_packets;
2776 static void virtio_net_tx_timer(void *opaque);
2778 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2780 VirtIONet *n = VIRTIO_NET(vdev);
2781 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2783 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2784 virtio_net_drop_tx_queue_data(vdev, vq);
2785 return;
2788 /* This happens when device was stopped but VCPU wasn't. */
2789 if (!vdev->vm_running) {
2790 q->tx_waiting = 1;
2791 return;
2794 if (q->tx_waiting) {
2795 /* We already have queued packets, immediately flush */
2796 timer_del(q->tx_timer);
2797 virtio_net_tx_timer(q);
2798 } else {
2799 /* re-arm timer to flush it (and more) on next tick */
2800 timer_mod(q->tx_timer,
2801 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2802 q->tx_waiting = 1;
2803 virtio_queue_set_notification(vq, 0);
2807 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2809 VirtIONet *n = VIRTIO_NET(vdev);
2810 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2812 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2813 virtio_net_drop_tx_queue_data(vdev, vq);
2814 return;
2817 if (unlikely(q->tx_waiting)) {
2818 return;
2820 q->tx_waiting = 1;
2821 /* This happens when device was stopped but VCPU wasn't. */
2822 if (!vdev->vm_running) {
2823 return;
2825 virtio_queue_set_notification(vq, 0);
2826 qemu_bh_schedule(q->tx_bh);
2829 static void virtio_net_tx_timer(void *opaque)
2831 VirtIONetQueue *q = opaque;
2832 VirtIONet *n = q->n;
2833 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2834 int ret;
2836 /* This happens when device was stopped but BH wasn't. */
2837 if (!vdev->vm_running) {
2838 /* Make sure tx waiting is set, so we'll run when restarted. */
2839 assert(q->tx_waiting);
2840 return;
2843 q->tx_waiting = 0;
2845 /* Just in case the driver is not ready on more */
2846 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2847 return;
2850 ret = virtio_net_flush_tx(q);
2851 if (ret == -EBUSY || ret == -EINVAL) {
2852 return;
2855 * If we flush a full burst of packets, assume there are
2856 * more coming and immediately rearm
2858 if (ret >= n->tx_burst) {
2859 q->tx_waiting = 1;
2860 timer_mod(q->tx_timer,
2861 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2862 return;
2865 * If less than a full burst, re-enable notification and flush
2866 * anything that may have come in while we weren't looking. If
2867 * we find something, assume the guest is still active and rearm
2869 virtio_queue_set_notification(q->tx_vq, 1);
2870 ret = virtio_net_flush_tx(q);
2871 if (ret > 0) {
2872 virtio_queue_set_notification(q->tx_vq, 0);
2873 q->tx_waiting = 1;
2874 timer_mod(q->tx_timer,
2875 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2879 static void virtio_net_tx_bh(void *opaque)
2881 VirtIONetQueue *q = opaque;
2882 VirtIONet *n = q->n;
2883 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2884 int32_t ret;
2886 /* This happens when device was stopped but BH wasn't. */
2887 if (!vdev->vm_running) {
2888 /* Make sure tx waiting is set, so we'll run when restarted. */
2889 assert(q->tx_waiting);
2890 return;
2893 q->tx_waiting = 0;
2895 /* Just in case the driver is not ready on more */
2896 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2897 return;
2900 ret = virtio_net_flush_tx(q);
2901 if (ret == -EBUSY || ret == -EINVAL) {
2902 return; /* Notification re-enable handled by tx_complete or device
2903 * broken */
2906 /* If we flush a full burst of packets, assume there are
2907 * more coming and immediately reschedule */
2908 if (ret >= n->tx_burst) {
2909 qemu_bh_schedule(q->tx_bh);
2910 q->tx_waiting = 1;
2911 return;
2914 /* If less than a full burst, re-enable notification and flush
2915 * anything that may have come in while we weren't looking. If
2916 * we find something, assume the guest is still active and reschedule */
2917 virtio_queue_set_notification(q->tx_vq, 1);
2918 ret = virtio_net_flush_tx(q);
2919 if (ret == -EINVAL) {
2920 return;
2921 } else if (ret > 0) {
2922 virtio_queue_set_notification(q->tx_vq, 0);
2923 qemu_bh_schedule(q->tx_bh);
2924 q->tx_waiting = 1;
2928 static void virtio_net_add_queue(VirtIONet *n, int index)
2930 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2932 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2933 virtio_net_handle_rx);
2935 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2936 n->vqs[index].tx_vq =
2937 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2938 virtio_net_handle_tx_timer);
2939 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2940 virtio_net_tx_timer,
2941 &n->vqs[index]);
2942 } else {
2943 n->vqs[index].tx_vq =
2944 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2945 virtio_net_handle_tx_bh);
2946 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2947 &DEVICE(vdev)->mem_reentrancy_guard);
2950 n->vqs[index].tx_waiting = 0;
2951 n->vqs[index].n = n;
2954 static void virtio_net_del_queue(VirtIONet *n, int index)
2956 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2957 VirtIONetQueue *q = &n->vqs[index];
2958 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2960 qemu_purge_queued_packets(nc);
2962 virtio_del_queue(vdev, index * 2);
2963 if (q->tx_timer) {
2964 timer_free(q->tx_timer);
2965 q->tx_timer = NULL;
2966 } else {
2967 qemu_bh_delete(q->tx_bh);
2968 q->tx_bh = NULL;
2970 q->tx_waiting = 0;
2971 virtio_del_queue(vdev, index * 2 + 1);
2974 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2976 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2977 int old_num_queues = virtio_get_num_queues(vdev);
2978 int new_num_queues = new_max_queue_pairs * 2 + 1;
2979 int i;
2981 assert(old_num_queues >= 3);
2982 assert(old_num_queues % 2 == 1);
2984 if (old_num_queues == new_num_queues) {
2985 return;
2989 * We always need to remove and add ctrl vq if
2990 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2991 * and then we only enter one of the following two loops.
2993 virtio_del_queue(vdev, old_num_queues - 1);
2995 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2996 /* new_num_queues < old_num_queues */
2997 virtio_net_del_queue(n, i / 2);
3000 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
3001 /* new_num_queues > old_num_queues */
3002 virtio_net_add_queue(n, i / 2);
3005 /* add ctrl_vq last */
3006 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3009 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3011 int max = multiqueue ? n->max_queue_pairs : 1;
3013 n->multiqueue = multiqueue;
3014 virtio_net_change_num_queue_pairs(n, max);
3016 virtio_net_set_queue_pairs(n);
3019 static int virtio_net_post_load_device(void *opaque, int version_id)
3021 VirtIONet *n = opaque;
3022 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3023 int i, link_down;
3025 trace_virtio_net_post_load_device();
3026 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3027 virtio_vdev_has_feature(vdev,
3028 VIRTIO_F_VERSION_1),
3029 virtio_vdev_has_feature(vdev,
3030 VIRTIO_NET_F_HASH_REPORT));
3032 /* MAC_TABLE_ENTRIES may be different from the saved image */
3033 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3034 n->mac_table.in_use = 0;
3037 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3038 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3042 * curr_guest_offloads will be later overwritten by the
3043 * virtio_set_features_nocheck call done from the virtio_load.
3044 * Here we make sure it is preserved and restored accordingly
3045 * in the virtio_net_post_load_virtio callback.
3047 n->saved_guest_offloads = n->curr_guest_offloads;
3049 virtio_net_set_queue_pairs(n);
3051 /* Find the first multicast entry in the saved MAC filter */
3052 for (i = 0; i < n->mac_table.in_use; i++) {
3053 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3054 break;
3057 n->mac_table.first_multi = i;
3059 /* nc.link_down can't be migrated, so infer link_down according
3060 * to link status bit in n->status */
3061 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3062 for (i = 0; i < n->max_queue_pairs; i++) {
3063 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3066 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3067 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3068 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3069 QEMU_CLOCK_VIRTUAL,
3070 virtio_net_announce_timer, n);
3071 if (n->announce_timer.round) {
3072 timer_mod(n->announce_timer.tm,
3073 qemu_clock_get_ms(n->announce_timer.type));
3074 } else {
3075 qemu_announce_timer_del(&n->announce_timer, false);
3079 if (n->rss_data.enabled) {
3080 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3081 if (!n->rss_data.populate_hash) {
3082 if (!virtio_net_attach_epbf_rss(n)) {
3083 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3084 warn_report("Can't post-load eBPF RSS for vhost");
3085 } else {
3086 warn_report("Can't post-load eBPF RSS - "
3087 "fallback to software RSS");
3088 n->rss_data.enabled_software_rss = true;
3093 trace_virtio_net_rss_enable(n->rss_data.hash_types,
3094 n->rss_data.indirections_len,
3095 sizeof(n->rss_data.key));
3096 } else {
3097 trace_virtio_net_rss_disable();
3099 return 0;
3102 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3104 VirtIONet *n = VIRTIO_NET(vdev);
3106 * The actual needed state is now in saved_guest_offloads,
3107 * see virtio_net_post_load_device for detail.
3108 * Restore it back and apply the desired offloads.
3110 n->curr_guest_offloads = n->saved_guest_offloads;
3111 if (peer_has_vnet_hdr(n)) {
3112 virtio_net_apply_guest_offloads(n);
3115 return 0;
3118 /* tx_waiting field of a VirtIONetQueue */
3119 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3120 .name = "virtio-net-queue-tx_waiting",
3121 .fields = (const VMStateField[]) {
3122 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3123 VMSTATE_END_OF_LIST()
3127 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3129 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3132 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3134 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3135 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3138 static bool mac_table_fits(void *opaque, int version_id)
3140 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3143 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3145 return !mac_table_fits(opaque, version_id);
3148 /* This temporary type is shared by all the WITH_TMP methods
3149 * although only some fields are used by each.
3151 struct VirtIONetMigTmp {
3152 VirtIONet *parent;
3153 VirtIONetQueue *vqs_1;
3154 uint16_t curr_queue_pairs_1;
3155 uint8_t has_ufo;
3156 uint32_t has_vnet_hdr;
3159 /* The 2nd and subsequent tx_waiting flags are loaded later than
3160 * the 1st entry in the queue_pairs and only if there's more than one
3161 * entry. We use the tmp mechanism to calculate a temporary
3162 * pointer and count and also validate the count.
3165 static int virtio_net_tx_waiting_pre_save(void *opaque)
3167 struct VirtIONetMigTmp *tmp = opaque;
3169 tmp->vqs_1 = tmp->parent->vqs + 1;
3170 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3171 if (tmp->parent->curr_queue_pairs == 0) {
3172 tmp->curr_queue_pairs_1 = 0;
3175 return 0;
3178 static int virtio_net_tx_waiting_pre_load(void *opaque)
3180 struct VirtIONetMigTmp *tmp = opaque;
3182 /* Reuse the pointer setup from save */
3183 virtio_net_tx_waiting_pre_save(opaque);
3185 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3186 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3187 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3189 return -EINVAL;
3192 return 0; /* all good */
3195 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3196 .name = "virtio-net-tx_waiting",
3197 .pre_load = virtio_net_tx_waiting_pre_load,
3198 .pre_save = virtio_net_tx_waiting_pre_save,
3199 .fields = (const VMStateField[]) {
3200 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3201 curr_queue_pairs_1,
3202 vmstate_virtio_net_queue_tx_waiting,
3203 struct VirtIONetQueue),
3204 VMSTATE_END_OF_LIST()
3208 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3209 * flag set we need to check that we have it
3211 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3213 struct VirtIONetMigTmp *tmp = opaque;
3215 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3216 error_report("virtio-net: saved image requires TUN_F_UFO support");
3217 return -EINVAL;
3220 return 0;
3223 static int virtio_net_ufo_pre_save(void *opaque)
3225 struct VirtIONetMigTmp *tmp = opaque;
3227 tmp->has_ufo = tmp->parent->has_ufo;
3229 return 0;
3232 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3233 .name = "virtio-net-ufo",
3234 .post_load = virtio_net_ufo_post_load,
3235 .pre_save = virtio_net_ufo_pre_save,
3236 .fields = (const VMStateField[]) {
3237 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3238 VMSTATE_END_OF_LIST()
3242 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3243 * flag set we need to check that we have it
3245 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3247 struct VirtIONetMigTmp *tmp = opaque;
3249 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3250 error_report("virtio-net: saved image requires vnet_hdr=on");
3251 return -EINVAL;
3254 return 0;
3257 static int virtio_net_vnet_pre_save(void *opaque)
3259 struct VirtIONetMigTmp *tmp = opaque;
3261 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3263 return 0;
3266 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3267 .name = "virtio-net-vnet",
3268 .post_load = virtio_net_vnet_post_load,
3269 .pre_save = virtio_net_vnet_pre_save,
3270 .fields = (const VMStateField[]) {
3271 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3272 VMSTATE_END_OF_LIST()
3276 static bool virtio_net_rss_needed(void *opaque)
3278 return VIRTIO_NET(opaque)->rss_data.enabled;
3281 static const VMStateDescription vmstate_virtio_net_rss = {
3282 .name = "virtio-net-device/rss",
3283 .version_id = 1,
3284 .minimum_version_id = 1,
3285 .needed = virtio_net_rss_needed,
3286 .fields = (const VMStateField[]) {
3287 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3288 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3289 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3290 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3291 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3292 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3293 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3294 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3295 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3296 rss_data.indirections_len, 0,
3297 vmstate_info_uint16, uint16_t),
3298 VMSTATE_END_OF_LIST()
3302 static const VMStateDescription vmstate_virtio_net_device = {
3303 .name = "virtio-net-device",
3304 .version_id = VIRTIO_NET_VM_VERSION,
3305 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3306 .post_load = virtio_net_post_load_device,
3307 .fields = (const VMStateField[]) {
3308 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3309 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3310 vmstate_virtio_net_queue_tx_waiting,
3311 VirtIONetQueue),
3312 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3313 VMSTATE_UINT16(status, VirtIONet),
3314 VMSTATE_UINT8(promisc, VirtIONet),
3315 VMSTATE_UINT8(allmulti, VirtIONet),
3316 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3318 /* Guarded pair: If it fits we load it, else we throw it away
3319 * - can happen if source has a larger MAC table.; post-load
3320 * sets flags in this case.
3322 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3323 0, mac_table_fits, mac_table.in_use,
3324 ETH_ALEN),
3325 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3326 mac_table.in_use, ETH_ALEN),
3328 /* Note: This is an array of uint32's that's always been saved as a
3329 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3330 * but based on the uint.
3332 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3333 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3334 vmstate_virtio_net_has_vnet),
3335 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3336 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3337 VMSTATE_UINT8(alluni, VirtIONet),
3338 VMSTATE_UINT8(nomulti, VirtIONet),
3339 VMSTATE_UINT8(nouni, VirtIONet),
3340 VMSTATE_UINT8(nobcast, VirtIONet),
3341 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3342 vmstate_virtio_net_has_ufo),
3343 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3344 vmstate_info_uint16_equal, uint16_t),
3345 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3346 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3347 vmstate_virtio_net_tx_waiting),
3348 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3349 has_ctrl_guest_offloads),
3350 VMSTATE_END_OF_LIST()
3352 .subsections = (const VMStateDescription * const []) {
3353 &vmstate_virtio_net_rss,
3354 NULL
3358 static NetClientInfo net_virtio_info = {
3359 .type = NET_CLIENT_DRIVER_NIC,
3360 .size = sizeof(NICState),
3361 .can_receive = virtio_net_can_receive,
3362 .receive = virtio_net_receive,
3363 .link_status_changed = virtio_net_set_link_status,
3364 .query_rx_filter = virtio_net_query_rxfilter,
3365 .announce = virtio_net_announce,
3368 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3370 VirtIONet *n = VIRTIO_NET(vdev);
3371 NetClientState *nc;
3372 assert(n->vhost_started);
3373 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3374 /* Must guard against invalid features and bogus queue index
3375 * from being set by malicious guest, or penetrated through
3376 * buggy migration stream.
3378 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3379 qemu_log_mask(LOG_GUEST_ERROR,
3380 "%s: bogus vq index ignored\n", __func__);
3381 return false;
3383 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3384 } else {
3385 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3388 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3389 * as the macro of configure interrupt's IDX, If this driver does not
3390 * support, the function will return false
3393 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3394 return vhost_net_config_pending(get_vhost_net(nc->peer));
3396 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3399 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3400 bool mask)
3402 VirtIONet *n = VIRTIO_NET(vdev);
3403 NetClientState *nc;
3404 assert(n->vhost_started);
3405 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3406 /* Must guard against invalid features and bogus queue index
3407 * from being set by malicious guest, or penetrated through
3408 * buggy migration stream.
3410 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3411 qemu_log_mask(LOG_GUEST_ERROR,
3412 "%s: bogus vq index ignored\n", __func__);
3413 return;
3415 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3416 } else {
3417 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3420 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3421 * as the macro of configure interrupt's IDX, If this driver does not
3422 * support, the function will return
3425 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3426 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3427 return;
3429 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3432 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3434 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3436 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3439 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3440 const char *type)
3443 * The name can be NULL, the netclient name will be type.x.
3445 assert(type != NULL);
3447 g_free(n->netclient_name);
3448 g_free(n->netclient_type);
3449 n->netclient_name = g_strdup(name);
3450 n->netclient_type = g_strdup(type);
3453 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3455 HotplugHandler *hotplug_ctrl;
3456 PCIDevice *pci_dev;
3457 Error *err = NULL;
3459 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3460 if (hotplug_ctrl) {
3461 pci_dev = PCI_DEVICE(dev);
3462 pci_dev->partially_hotplugged = true;
3463 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3464 if (err) {
3465 error_report_err(err);
3466 return false;
3468 } else {
3469 return false;
3471 return true;
3474 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3475 Error **errp)
3477 Error *err = NULL;
3478 HotplugHandler *hotplug_ctrl;
3479 PCIDevice *pdev = PCI_DEVICE(dev);
3480 BusState *primary_bus;
3482 if (!pdev->partially_hotplugged) {
3483 return true;
3485 primary_bus = dev->parent_bus;
3486 if (!primary_bus) {
3487 error_setg(errp, "virtio_net: couldn't find primary bus");
3488 return false;
3490 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3491 qatomic_set(&n->failover_primary_hidden, false);
3492 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3493 if (hotplug_ctrl) {
3494 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3495 if (err) {
3496 goto out;
3498 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3500 pdev->partially_hotplugged = false;
3502 out:
3503 error_propagate(errp, err);
3504 return !err;
3507 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3509 bool should_be_hidden;
3510 Error *err = NULL;
3511 DeviceState *dev = failover_find_primary_device(n);
3513 if (!dev) {
3514 return;
3517 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3519 if (migration_in_setup(s) && !should_be_hidden) {
3520 if (failover_unplug_primary(n, dev)) {
3521 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3522 qapi_event_send_unplug_primary(dev->id);
3523 qatomic_set(&n->failover_primary_hidden, true);
3524 } else {
3525 warn_report("couldn't unplug primary device");
3527 } else if (migration_has_failed(s)) {
3528 /* We already unplugged the device let's plug it back */
3529 if (!failover_replug_primary(n, dev, &err)) {
3530 if (err) {
3531 error_report_err(err);
3537 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3539 MigrationState *s = data;
3540 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3541 virtio_net_handle_migration_primary(n, s);
3544 static bool failover_hide_primary_device(DeviceListener *listener,
3545 const QDict *device_opts,
3546 bool from_json,
3547 Error **errp)
3549 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3550 const char *standby_id;
3552 if (!device_opts) {
3553 return false;
3556 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3557 return false;
3560 if (!qdict_haskey(device_opts, "id")) {
3561 error_setg(errp, "Device with failover_pair_id needs to have id");
3562 return false;
3565 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3566 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3567 return false;
3571 * The hide helper can be called several times for a given device.
3572 * Check there is only one primary for a virtio-net device but
3573 * don't duplicate the qdict several times if it's called for the same
3574 * device.
3576 if (n->primary_opts) {
3577 const char *old, *new;
3578 /* devices with failover_pair_id always have an id */
3579 old = qdict_get_str(n->primary_opts, "id");
3580 new = qdict_get_str(device_opts, "id");
3581 if (strcmp(old, new) != 0) {
3582 error_setg(errp, "Cannot attach more than one primary device to "
3583 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3584 return false;
3586 } else {
3587 n->primary_opts = qdict_clone_shallow(device_opts);
3588 n->primary_opts_from_json = from_json;
3591 /* failover_primary_hidden is set during feature negotiation */
3592 return qatomic_read(&n->failover_primary_hidden);
3595 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3597 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3598 VirtIONet *n = VIRTIO_NET(dev);
3599 NetClientState *nc;
3600 int i;
3602 if (n->net_conf.mtu) {
3603 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3606 if (n->net_conf.duplex_str) {
3607 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3608 n->net_conf.duplex = DUPLEX_HALF;
3609 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3610 n->net_conf.duplex = DUPLEX_FULL;
3611 } else {
3612 error_setg(errp, "'duplex' must be 'half' or 'full'");
3613 return;
3615 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3616 } else {
3617 n->net_conf.duplex = DUPLEX_UNKNOWN;
3620 if (n->net_conf.speed < SPEED_UNKNOWN) {
3621 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3622 return;
3624 if (n->net_conf.speed >= 0) {
3625 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3628 if (n->failover) {
3629 n->primary_listener.hide_device = failover_hide_primary_device;
3630 qatomic_set(&n->failover_primary_hidden, true);
3631 device_listener_register(&n->primary_listener);
3632 migration_add_notifier(&n->migration_state,
3633 virtio_net_migration_state_notifier);
3634 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3637 virtio_net_set_config_size(n, n->host_features);
3638 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3641 * We set a lower limit on RX queue size to what it always was.
3642 * Guests that want a smaller ring can always resize it without
3643 * help from us (using virtio 1 and up).
3645 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3646 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3647 !is_power_of_2(n->net_conf.rx_queue_size)) {
3648 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3649 "must be a power of 2 between %d and %d.",
3650 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3651 VIRTQUEUE_MAX_SIZE);
3652 virtio_cleanup(vdev);
3653 return;
3656 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3657 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3658 !is_power_of_2(n->net_conf.tx_queue_size)) {
3659 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3660 "must be a power of 2 between %d and %d",
3661 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3662 virtio_net_max_tx_queue_size(n));
3663 virtio_cleanup(vdev);
3664 return;
3667 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3670 * Figure out the datapath queue pairs since the backend could
3671 * provide control queue via peers as well.
3673 if (n->nic_conf.peers.queues) {
3674 for (i = 0; i < n->max_ncs; i++) {
3675 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3676 ++n->max_queue_pairs;
3680 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3682 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3683 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3684 "must be a positive integer less than %d.",
3685 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3686 virtio_cleanup(vdev);
3687 return;
3689 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3690 n->curr_queue_pairs = 1;
3691 n->tx_timeout = n->net_conf.txtimer;
3693 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3694 && strcmp(n->net_conf.tx, "bh")) {
3695 warn_report("virtio-net: "
3696 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3697 n->net_conf.tx);
3698 error_printf("Defaulting to \"bh\"");
3701 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3702 n->net_conf.tx_queue_size);
3704 for (i = 0; i < n->max_queue_pairs; i++) {
3705 virtio_net_add_queue(n, i);
3708 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3709 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3710 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3711 n->status = VIRTIO_NET_S_LINK_UP;
3712 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3713 QEMU_CLOCK_VIRTUAL,
3714 virtio_net_announce_timer, n);
3715 n->announce_timer.round = 0;
3717 if (n->netclient_type) {
3719 * Happen when virtio_net_set_netclient_name has been called.
3721 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3722 n->netclient_type, n->netclient_name,
3723 &dev->mem_reentrancy_guard, n);
3724 } else {
3725 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3726 object_get_typename(OBJECT(dev)), dev->id,
3727 &dev->mem_reentrancy_guard, n);
3730 for (i = 0; i < n->max_queue_pairs; i++) {
3731 n->nic->ncs[i].do_not_pad = true;
3734 peer_test_vnet_hdr(n);
3735 if (peer_has_vnet_hdr(n)) {
3736 for (i = 0; i < n->max_queue_pairs; i++) {
3737 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3739 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3740 } else {
3741 n->host_hdr_len = 0;
3744 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3746 n->vqs[0].tx_waiting = 0;
3747 n->tx_burst = n->net_conf.txburst;
3748 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3749 n->promisc = 1; /* for compatibility */
3751 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3753 n->vlans = g_malloc0(MAX_VLAN >> 3);
3755 nc = qemu_get_queue(n->nic);
3756 nc->rxfilter_notify_enabled = 1;
3758 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3759 struct virtio_net_config netcfg = {};
3760 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3761 vhost_net_set_config(get_vhost_net(nc->peer),
3762 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3764 QTAILQ_INIT(&n->rsc_chains);
3765 n->qdev = dev;
3767 net_rx_pkt_init(&n->rx_pkt);
3769 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3770 virtio_net_load_ebpf(n);
3774 static void virtio_net_device_unrealize(DeviceState *dev)
3776 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3777 VirtIONet *n = VIRTIO_NET(dev);
3778 int i, max_queue_pairs;
3780 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3781 virtio_net_unload_ebpf(n);
3784 /* This will stop vhost backend if appropriate. */
3785 virtio_net_set_status(vdev, 0);
3787 g_free(n->netclient_name);
3788 n->netclient_name = NULL;
3789 g_free(n->netclient_type);
3790 n->netclient_type = NULL;
3792 g_free(n->mac_table.macs);
3793 g_free(n->vlans);
3795 if (n->failover) {
3796 qobject_unref(n->primary_opts);
3797 device_listener_unregister(&n->primary_listener);
3798 migration_remove_notifier(&n->migration_state);
3799 } else {
3800 assert(n->primary_opts == NULL);
3803 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3804 for (i = 0; i < max_queue_pairs; i++) {
3805 virtio_net_del_queue(n, i);
3807 /* delete also control vq */
3808 virtio_del_queue(vdev, max_queue_pairs * 2);
3809 qemu_announce_timer_del(&n->announce_timer, false);
3810 g_free(n->vqs);
3811 qemu_del_nic(n->nic);
3812 virtio_net_rsc_cleanup(n);
3813 g_free(n->rss_data.indirections_table);
3814 net_rx_pkt_uninit(n->rx_pkt);
3815 virtio_cleanup(vdev);
3818 static void virtio_net_instance_init(Object *obj)
3820 VirtIONet *n = VIRTIO_NET(obj);
3823 * The default config_size is sizeof(struct virtio_net_config).
3824 * Can be overridden with virtio_net_set_config_size.
3826 n->config_size = sizeof(struct virtio_net_config);
3827 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3828 "bootindex", "/ethernet-phy@0",
3829 DEVICE(n));
3831 ebpf_rss_init(&n->ebpf_rss);
3834 static int virtio_net_pre_save(void *opaque)
3836 VirtIONet *n = opaque;
3838 /* At this point, backend must be stopped, otherwise
3839 * it might keep writing to memory. */
3840 assert(!n->vhost_started);
3842 return 0;
3845 static bool primary_unplug_pending(void *opaque)
3847 DeviceState *dev = opaque;
3848 DeviceState *primary;
3849 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3850 VirtIONet *n = VIRTIO_NET(vdev);
3852 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3853 return false;
3855 primary = failover_find_primary_device(n);
3856 return primary ? primary->pending_deleted_event : false;
3859 static bool dev_unplug_pending(void *opaque)
3861 DeviceState *dev = opaque;
3862 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3864 return vdc->primary_unplug_pending(dev);
3867 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3869 VirtIONet *n = VIRTIO_NET(vdev);
3870 NetClientState *nc = qemu_get_queue(n->nic);
3871 struct vhost_net *net = get_vhost_net(nc->peer);
3872 return &net->dev;
3875 static const VMStateDescription vmstate_virtio_net = {
3876 .name = "virtio-net",
3877 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3878 .version_id = VIRTIO_NET_VM_VERSION,
3879 .fields = (const VMStateField[]) {
3880 VMSTATE_VIRTIO_DEVICE,
3881 VMSTATE_END_OF_LIST()
3883 .pre_save = virtio_net_pre_save,
3884 .dev_unplug_pending = dev_unplug_pending,
3887 static Property virtio_net_properties[] = {
3888 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3889 VIRTIO_NET_F_CSUM, true),
3890 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3891 VIRTIO_NET_F_GUEST_CSUM, true),
3892 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3893 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3894 VIRTIO_NET_F_GUEST_TSO4, true),
3895 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3896 VIRTIO_NET_F_GUEST_TSO6, true),
3897 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3898 VIRTIO_NET_F_GUEST_ECN, true),
3899 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3900 VIRTIO_NET_F_GUEST_UFO, true),
3901 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3902 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3903 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3904 VIRTIO_NET_F_HOST_TSO4, true),
3905 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3906 VIRTIO_NET_F_HOST_TSO6, true),
3907 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3908 VIRTIO_NET_F_HOST_ECN, true),
3909 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3910 VIRTIO_NET_F_HOST_UFO, true),
3911 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3912 VIRTIO_NET_F_MRG_RXBUF, true),
3913 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3914 VIRTIO_NET_F_STATUS, true),
3915 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3916 VIRTIO_NET_F_CTRL_VQ, true),
3917 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3918 VIRTIO_NET_F_CTRL_RX, true),
3919 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3920 VIRTIO_NET_F_CTRL_VLAN, true),
3921 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3922 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3923 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3924 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3925 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3926 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3927 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3928 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3929 VIRTIO_NET_F_RSS, false),
3930 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3931 VIRTIO_NET_F_HASH_REPORT, false),
3932 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3933 VIRTIO_NET_F_RSC_EXT, false),
3934 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3935 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3936 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3937 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3938 TX_TIMER_INTERVAL),
3939 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3940 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3941 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3942 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3943 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3944 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3945 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3946 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3947 true),
3948 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3949 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3950 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3951 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3952 VIRTIO_NET_F_GUEST_USO4, true),
3953 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
3954 VIRTIO_NET_F_GUEST_USO6, true),
3955 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
3956 VIRTIO_NET_F_HOST_USO, true),
3957 DEFINE_PROP_END_OF_LIST(),
3960 static void virtio_net_class_init(ObjectClass *klass, void *data)
3962 DeviceClass *dc = DEVICE_CLASS(klass);
3963 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3965 device_class_set_props(dc, virtio_net_properties);
3966 dc->vmsd = &vmstate_virtio_net;
3967 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3968 vdc->realize = virtio_net_device_realize;
3969 vdc->unrealize = virtio_net_device_unrealize;
3970 vdc->get_config = virtio_net_get_config;
3971 vdc->set_config = virtio_net_set_config;
3972 vdc->get_features = virtio_net_get_features;
3973 vdc->set_features = virtio_net_set_features;
3974 vdc->bad_features = virtio_net_bad_features;
3975 vdc->reset = virtio_net_reset;
3976 vdc->queue_reset = virtio_net_queue_reset;
3977 vdc->queue_enable = virtio_net_queue_enable;
3978 vdc->set_status = virtio_net_set_status;
3979 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3980 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3981 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3982 vdc->post_load = virtio_net_post_load_virtio;
3983 vdc->vmsd = &vmstate_virtio_net_device;
3984 vdc->primary_unplug_pending = primary_unplug_pending;
3985 vdc->get_vhost = virtio_net_get_vhost;
3986 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
3989 static const TypeInfo virtio_net_info = {
3990 .name = TYPE_VIRTIO_NET,
3991 .parent = TYPE_VIRTIO_DEVICE,
3992 .instance_size = sizeof(VirtIONet),
3993 .instance_init = virtio_net_instance_init,
3994 .class_init = virtio_net_class_init,
3997 static void virtio_register_types(void)
3999 type_register_static(&virtio_net_info);
4002 type_init(virtio_register_types)