virtio-net: do not reset vlan filtering at set_features
[qemu/ar7.git] / hw / net / virtio-net.c
blob1c31374334cc1dc67213005b186c2f51ef3eb227
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/log.h"
18 #include "qemu/main-loop.h"
19 #include "qemu/module.h"
20 #include "hw/virtio/virtio.h"
21 #include "net/net.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
24 #include "qemu/error-report.h"
25 #include "qemu/timer.h"
26 #include "qemu/option.h"
27 #include "qemu/option_int.h"
28 #include "qemu/config-file.h"
29 #include "qapi/qmp/qdict.h"
30 #include "hw/virtio/virtio-net.h"
31 #include "net/vhost_net.h"
32 #include "net/announce.h"
33 #include "hw/virtio/virtio-bus.h"
34 #include "qapi/error.h"
35 #include "qapi/qapi-events-net.h"
36 #include "hw/qdev-properties.h"
37 #include "qapi/qapi-types-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "hw/virtio/virtio-access.h"
40 #include "migration/misc.h"
41 #include "standard-headers/linux/ethtool.h"
42 #include "sysemu/sysemu.h"
43 #include "trace.h"
44 #include "monitor/qdev.h"
45 #include "hw/pci/pci_device.h"
46 #include "net_rx_pkt.h"
47 #include "hw/virtio/vhost.h"
48 #include "sysemu/qtest.h"
50 #define VIRTIO_NET_VM_VERSION 11
52 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
54 /* previously fixed value */
55 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
56 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
58 /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */
59 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
62 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
64 #define VIRTIO_NET_TCP_FLAG 0x3F
65 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
67 /* IPv4 max payload, 16 bits in the header */
68 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
69 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
71 /* header length value in ip header without option */
72 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
74 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
75 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
77 /* Purge coalesced packets timer interval, This value affects the performance
78 a lot, and should be tuned carefully, '300000'(300us) is the recommended
79 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
80 tso/gso/gro 'off'. */
81 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
83 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
86 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
89 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
91 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
93 static const VirtIOFeature feature_sizes[] = {
94 {.flags = 1ULL << VIRTIO_NET_F_MAC,
95 .end = endof(struct virtio_net_config, mac)},
96 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
97 .end = endof(struct virtio_net_config, status)},
98 {.flags = 1ULL << VIRTIO_NET_F_MQ,
99 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
100 {.flags = 1ULL << VIRTIO_NET_F_MTU,
101 .end = endof(struct virtio_net_config, mtu)},
102 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
103 .end = endof(struct virtio_net_config, duplex)},
104 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
105 .end = endof(struct virtio_net_config, supported_hash_types)},
109 static const VirtIOConfigSizeParams cfg_size_params = {
110 .min_size = endof(struct virtio_net_config, mac),
111 .max_size = sizeof(struct virtio_net_config),
112 .feature_sizes = feature_sizes
115 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
117 VirtIONet *n = qemu_get_nic_opaque(nc);
119 return &n->vqs[nc->queue_index];
122 static int vq2q(int queue_index)
124 return queue_index / 2;
127 static void flush_or_purge_queued_packets(NetClientState *nc)
129 if (!nc->peer) {
130 return;
133 qemu_flush_or_purge_queued_packets(nc->peer, true);
134 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
137 /* TODO
138 * - we could suppress RX interrupt if we were so inclined.
141 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
143 VirtIONet *n = VIRTIO_NET(vdev);
144 struct virtio_net_config netcfg;
145 NetClientState *nc = qemu_get_queue(n->nic);
146 static const MACAddr zero = { .a = { 0, 0, 0, 0, 0, 0 } };
148 int ret = 0;
149 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
150 virtio_stw_p(vdev, &netcfg.status, n->status);
151 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queue_pairs);
152 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
153 memcpy(netcfg.mac, n->mac, ETH_ALEN);
154 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
155 netcfg.duplex = n->net_conf.duplex;
156 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
157 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
158 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
159 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
160 virtio_stl_p(vdev, &netcfg.supported_hash_types,
161 VIRTIO_NET_RSS_SUPPORTED_HASHES);
162 memcpy(config, &netcfg, n->config_size);
165 * Is this VDPA? No peer means not VDPA: there's no way to
166 * disconnect/reconnect a VDPA peer.
168 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
169 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
170 n->config_size);
171 if (ret == -1) {
172 return;
176 * Some NIC/kernel combinations present 0 as the mac address. As that
177 * is not a legal address, try to proceed with the address from the
178 * QEMU command line in the hope that the address has been configured
179 * correctly elsewhere - just not reported by the device.
181 if (memcmp(&netcfg.mac, &zero, sizeof(zero)) == 0) {
182 info_report("Zero hardware mac address detected. Ignoring.");
183 memcpy(netcfg.mac, n->mac, ETH_ALEN);
186 netcfg.status |= virtio_tswap16(vdev,
187 n->status & VIRTIO_NET_S_ANNOUNCE);
188 memcpy(config, &netcfg, n->config_size);
192 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
194 VirtIONet *n = VIRTIO_NET(vdev);
195 struct virtio_net_config netcfg = {};
196 NetClientState *nc = qemu_get_queue(n->nic);
198 memcpy(&netcfg, config, n->config_size);
200 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
201 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
202 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
203 memcpy(n->mac, netcfg.mac, ETH_ALEN);
204 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
208 * Is this VDPA? No peer means not VDPA: there's no way to
209 * disconnect/reconnect a VDPA peer.
211 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
212 vhost_net_set_config(get_vhost_net(nc->peer),
213 (uint8_t *)&netcfg, 0, n->config_size,
214 VHOST_SET_CONFIG_TYPE_FRONTEND);
218 static bool virtio_net_started(VirtIONet *n, uint8_t status)
220 VirtIODevice *vdev = VIRTIO_DEVICE(n);
221 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
222 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
225 static void virtio_net_announce_notify(VirtIONet *net)
227 VirtIODevice *vdev = VIRTIO_DEVICE(net);
228 trace_virtio_net_announce_notify();
230 net->status |= VIRTIO_NET_S_ANNOUNCE;
231 virtio_notify_config(vdev);
234 static void virtio_net_announce_timer(void *opaque)
236 VirtIONet *n = opaque;
237 trace_virtio_net_announce_timer(n->announce_timer.round);
239 n->announce_timer.round--;
240 virtio_net_announce_notify(n);
243 static void virtio_net_announce(NetClientState *nc)
245 VirtIONet *n = qemu_get_nic_opaque(nc);
246 VirtIODevice *vdev = VIRTIO_DEVICE(n);
249 * Make sure the virtio migration announcement timer isn't running
250 * If it is, let it trigger announcement so that we do not cause
251 * confusion.
253 if (n->announce_timer.round) {
254 return;
257 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
258 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
259 virtio_net_announce_notify(n);
263 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
265 VirtIODevice *vdev = VIRTIO_DEVICE(n);
266 NetClientState *nc = qemu_get_queue(n->nic);
267 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
268 int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
269 n->max_ncs - n->max_queue_pairs : 0;
271 if (!get_vhost_net(nc->peer)) {
272 return;
275 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
276 !!n->vhost_started) {
277 return;
279 if (!n->vhost_started) {
280 int r, i;
282 if (n->needs_vnet_hdr_swap) {
283 error_report("backend does not support %s vnet headers; "
284 "falling back on userspace virtio",
285 virtio_is_big_endian(vdev) ? "BE" : "LE");
286 return;
289 /* Any packets outstanding? Purge them to avoid touching rings
290 * when vhost is running.
292 for (i = 0; i < queue_pairs; i++) {
293 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
295 /* Purge both directions: TX and RX. */
296 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
297 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
300 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
301 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
302 if (r < 0) {
303 error_report("%uBytes MTU not supported by the backend",
304 n->net_conf.mtu);
306 return;
310 n->vhost_started = 1;
311 r = vhost_net_start(vdev, n->nic->ncs, queue_pairs, cvq);
312 if (r < 0) {
313 error_report("unable to start vhost net: %d: "
314 "falling back on userspace virtio", -r);
315 n->vhost_started = 0;
317 } else {
318 vhost_net_stop(vdev, n->nic->ncs, queue_pairs, cvq);
319 n->vhost_started = 0;
323 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
324 NetClientState *peer,
325 bool enable)
327 if (virtio_is_big_endian(vdev)) {
328 return qemu_set_vnet_be(peer, enable);
329 } else {
330 return qemu_set_vnet_le(peer, enable);
334 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
335 int queue_pairs, bool enable)
337 int i;
339 for (i = 0; i < queue_pairs; i++) {
340 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
341 enable) {
342 while (--i >= 0) {
343 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
346 return true;
350 return false;
353 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
355 VirtIODevice *vdev = VIRTIO_DEVICE(n);
356 int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
358 if (virtio_net_started(n, status)) {
359 /* Before using the device, we tell the network backend about the
360 * endianness to use when parsing vnet headers. If the backend
361 * can't do it, we fallback onto fixing the headers in the core
362 * virtio-net code.
364 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
365 queue_pairs, true);
366 } else if (virtio_net_started(n, vdev->status)) {
367 /* After using the device, we need to reset the network backend to
368 * the default (guest native endianness), otherwise the guest may
369 * lose network connectivity if it is rebooted into a different
370 * endianness.
372 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queue_pairs, false);
376 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
378 unsigned int dropped = virtqueue_drop_all(vq);
379 if (dropped) {
380 virtio_notify(vdev, vq);
384 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
386 VirtIONet *n = VIRTIO_NET(vdev);
387 VirtIONetQueue *q;
388 int i;
389 uint8_t queue_status;
391 virtio_net_vnet_endian_status(n, status);
392 virtio_net_vhost_status(n, status);
394 for (i = 0; i < n->max_queue_pairs; i++) {
395 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
396 bool queue_started;
397 q = &n->vqs[i];
399 if ((!n->multiqueue && i != 0) || i >= n->curr_queue_pairs) {
400 queue_status = 0;
401 } else {
402 queue_status = status;
404 queue_started =
405 virtio_net_started(n, queue_status) && !n->vhost_started;
407 if (queue_started) {
408 qemu_flush_queued_packets(ncs);
411 if (!q->tx_waiting) {
412 continue;
415 if (queue_started) {
416 if (q->tx_timer) {
417 timer_mod(q->tx_timer,
418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
419 } else {
420 qemu_bh_schedule(q->tx_bh);
422 } else {
423 if (q->tx_timer) {
424 timer_del(q->tx_timer);
425 } else {
426 qemu_bh_cancel(q->tx_bh);
428 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
429 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
430 vdev->vm_running) {
431 /* if tx is waiting we are likely have some packets in tx queue
432 * and disabled notification */
433 q->tx_waiting = 0;
434 virtio_queue_set_notification(q->tx_vq, 1);
435 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
441 static void virtio_net_set_link_status(NetClientState *nc)
443 VirtIONet *n = qemu_get_nic_opaque(nc);
444 VirtIODevice *vdev = VIRTIO_DEVICE(n);
445 uint16_t old_status = n->status;
447 if (nc->link_down)
448 n->status &= ~VIRTIO_NET_S_LINK_UP;
449 else
450 n->status |= VIRTIO_NET_S_LINK_UP;
452 if (n->status != old_status)
453 virtio_notify_config(vdev);
455 virtio_net_set_status(vdev, vdev->status);
458 static void rxfilter_notify(NetClientState *nc)
460 VirtIONet *n = qemu_get_nic_opaque(nc);
462 if (nc->rxfilter_notify_enabled) {
463 char *path = object_get_canonical_path(OBJECT(n->qdev));
464 qapi_event_send_nic_rx_filter_changed(n->netclient_name, path);
465 g_free(path);
467 /* disable event notification to avoid events flooding */
468 nc->rxfilter_notify_enabled = 0;
472 static intList *get_vlan_table(VirtIONet *n)
474 intList *list;
475 int i, j;
477 list = NULL;
478 for (i = 0; i < MAX_VLAN >> 5; i++) {
479 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
480 if (n->vlans[i] & (1U << j)) {
481 QAPI_LIST_PREPEND(list, (i << 5) + j);
486 return list;
489 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
491 VirtIONet *n = qemu_get_nic_opaque(nc);
492 VirtIODevice *vdev = VIRTIO_DEVICE(n);
493 RxFilterInfo *info;
494 strList *str_list;
495 int i;
497 info = g_malloc0(sizeof(*info));
498 info->name = g_strdup(nc->name);
499 info->promiscuous = n->promisc;
501 if (n->nouni) {
502 info->unicast = RX_STATE_NONE;
503 } else if (n->alluni) {
504 info->unicast = RX_STATE_ALL;
505 } else {
506 info->unicast = RX_STATE_NORMAL;
509 if (n->nomulti) {
510 info->multicast = RX_STATE_NONE;
511 } else if (n->allmulti) {
512 info->multicast = RX_STATE_ALL;
513 } else {
514 info->multicast = RX_STATE_NORMAL;
517 info->broadcast_allowed = n->nobcast;
518 info->multicast_overflow = n->mac_table.multi_overflow;
519 info->unicast_overflow = n->mac_table.uni_overflow;
521 info->main_mac = qemu_mac_strdup_printf(n->mac);
523 str_list = NULL;
524 for (i = 0; i < n->mac_table.first_multi; i++) {
525 QAPI_LIST_PREPEND(str_list,
526 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
528 info->unicast_table = str_list;
530 str_list = NULL;
531 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
532 QAPI_LIST_PREPEND(str_list,
533 qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN));
535 info->multicast_table = str_list;
536 info->vlan_table = get_vlan_table(n);
538 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
539 info->vlan = RX_STATE_ALL;
540 } else if (!info->vlan_table) {
541 info->vlan = RX_STATE_NONE;
542 } else {
543 info->vlan = RX_STATE_NORMAL;
546 /* enable event notification after query */
547 nc->rxfilter_notify_enabled = 1;
549 return info;
552 static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index)
554 VirtIONet *n = VIRTIO_NET(vdev);
555 NetClientState *nc;
557 /* validate queue_index and skip for cvq */
558 if (queue_index >= n->max_queue_pairs * 2) {
559 return;
562 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
564 if (!nc->peer) {
565 return;
568 if (get_vhost_net(nc->peer) &&
569 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
570 vhost_net_virtqueue_reset(vdev, nc, queue_index);
573 flush_or_purge_queued_packets(nc);
576 static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index)
578 VirtIONet *n = VIRTIO_NET(vdev);
579 NetClientState *nc;
580 int r;
582 /* validate queue_index and skip for cvq */
583 if (queue_index >= n->max_queue_pairs * 2) {
584 return;
587 nc = qemu_get_subqueue(n->nic, vq2q(queue_index));
589 if (!nc->peer || !vdev->vhost_started) {
590 return;
593 if (get_vhost_net(nc->peer) &&
594 nc->peer->info->type == NET_CLIENT_DRIVER_TAP) {
595 r = vhost_net_virtqueue_restart(vdev, nc, queue_index);
596 if (r < 0) {
597 error_report("unable to restart vhost net virtqueue: %d, "
598 "when resetting the queue", queue_index);
603 static void virtio_net_reset(VirtIODevice *vdev)
605 VirtIONet *n = VIRTIO_NET(vdev);
606 int i;
608 /* Reset back to compatibility mode */
609 n->promisc = 1;
610 n->allmulti = 0;
611 n->alluni = 0;
612 n->nomulti = 0;
613 n->nouni = 0;
614 n->nobcast = 0;
615 /* multiqueue is disabled by default */
616 n->curr_queue_pairs = 1;
617 timer_del(n->announce_timer.tm);
618 n->announce_timer.round = 0;
619 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
621 /* Flush any MAC and VLAN filter table state */
622 n->mac_table.in_use = 0;
623 n->mac_table.first_multi = 0;
624 n->mac_table.multi_overflow = 0;
625 n->mac_table.uni_overflow = 0;
626 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
627 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
628 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
629 memset(n->vlans, 0, MAX_VLAN >> 3);
631 /* Flush any async TX */
632 for (i = 0; i < n->max_queue_pairs; i++) {
633 flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i));
637 static void peer_test_vnet_hdr(VirtIONet *n)
639 NetClientState *nc = qemu_get_queue(n->nic);
640 if (!nc->peer) {
641 return;
644 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
647 static int peer_has_vnet_hdr(VirtIONet *n)
649 return n->has_vnet_hdr;
652 static int peer_has_ufo(VirtIONet *n)
654 if (!peer_has_vnet_hdr(n))
655 return 0;
657 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
659 return n->has_ufo;
662 static int peer_has_uso(VirtIONet *n)
664 if (!peer_has_vnet_hdr(n)) {
665 return 0;
668 return qemu_has_uso(qemu_get_queue(n->nic)->peer);
671 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
672 int version_1, int hash_report)
674 int i;
675 NetClientState *nc;
677 n->mergeable_rx_bufs = mergeable_rx_bufs;
679 if (version_1) {
680 n->guest_hdr_len = hash_report ?
681 sizeof(struct virtio_net_hdr_v1_hash) :
682 sizeof(struct virtio_net_hdr_mrg_rxbuf);
683 n->rss_data.populate_hash = !!hash_report;
684 } else {
685 n->guest_hdr_len = n->mergeable_rx_bufs ?
686 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
687 sizeof(struct virtio_net_hdr);
690 for (i = 0; i < n->max_queue_pairs; i++) {
691 nc = qemu_get_subqueue(n->nic, i);
693 if (peer_has_vnet_hdr(n) &&
694 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
695 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
696 n->host_hdr_len = n->guest_hdr_len;
701 static int virtio_net_max_tx_queue_size(VirtIONet *n)
703 NetClientState *peer = n->nic_conf.peers.ncs[0];
706 * Backends other than vhost-user or vhost-vdpa don't support max queue
707 * size.
709 if (!peer) {
710 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
713 switch(peer->info->type) {
714 case NET_CLIENT_DRIVER_VHOST_USER:
715 case NET_CLIENT_DRIVER_VHOST_VDPA:
716 return VIRTQUEUE_MAX_SIZE;
717 default:
718 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
722 static int peer_attach(VirtIONet *n, int index)
724 NetClientState *nc = qemu_get_subqueue(n->nic, index);
726 if (!nc->peer) {
727 return 0;
730 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
731 vhost_set_vring_enable(nc->peer, 1);
734 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
735 return 0;
738 if (n->max_queue_pairs == 1) {
739 return 0;
742 return tap_enable(nc->peer);
745 static int peer_detach(VirtIONet *n, int index)
747 NetClientState *nc = qemu_get_subqueue(n->nic, index);
749 if (!nc->peer) {
750 return 0;
753 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
754 vhost_set_vring_enable(nc->peer, 0);
757 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
758 return 0;
761 return tap_disable(nc->peer);
764 static void virtio_net_set_queue_pairs(VirtIONet *n)
766 int i;
767 int r;
769 if (n->nic->peer_deleted) {
770 return;
773 for (i = 0; i < n->max_queue_pairs; i++) {
774 if (i < n->curr_queue_pairs) {
775 r = peer_attach(n, i);
776 assert(!r);
777 } else {
778 r = peer_detach(n, i);
779 assert(!r);
784 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
786 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
787 Error **errp)
789 VirtIONet *n = VIRTIO_NET(vdev);
790 NetClientState *nc = qemu_get_queue(n->nic);
792 /* Firstly sync all virtio-net possible supported features */
793 features |= n->host_features;
795 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
797 if (!peer_has_vnet_hdr(n)) {
798 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
799 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
800 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
801 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
803 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
804 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
805 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
806 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
808 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
809 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
810 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
812 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
815 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
816 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
817 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
820 if (!peer_has_uso(n)) {
821 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
822 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
823 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
826 if (!get_vhost_net(nc->peer)) {
827 return features;
830 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
831 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
833 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
834 vdev->backend_features = features;
836 if (n->mtu_bypass_backend &&
837 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
838 features |= (1ULL << VIRTIO_NET_F_MTU);
842 * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
843 * enabled. This happens in the vDPA case.
845 * Make sure the feature set is not incoherent, as the driver could refuse
846 * to start.
848 * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
849 * helping guest to notify the new location with vDPA devices that does not
850 * support it.
852 if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
853 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
856 return features;
859 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
861 uint64_t features = 0;
863 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
864 * but also these: */
865 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
866 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
867 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
868 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
869 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
871 return features;
874 static void virtio_net_apply_guest_offloads(VirtIONet *n)
876 qemu_set_offload(qemu_get_queue(n->nic)->peer,
877 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
878 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
879 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
880 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
881 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
882 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
883 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
886 static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
888 static const uint64_t guest_offloads_mask =
889 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
890 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
891 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
892 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
893 (1ULL << VIRTIO_NET_F_GUEST_UFO) |
894 (1ULL << VIRTIO_NET_F_GUEST_USO4) |
895 (1ULL << VIRTIO_NET_F_GUEST_USO6);
897 return guest_offloads_mask & features;
900 uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
902 VirtIODevice *vdev = VIRTIO_DEVICE(n);
903 return virtio_net_guest_offloads_by_features(vdev->guest_features);
906 typedef struct {
907 VirtIONet *n;
908 DeviceState *dev;
909 } FailoverDevice;
912 * Set the failover primary device
914 * @opaque: FailoverId to setup
915 * @opts: opts for device we are handling
916 * @errp: returns an error if this function fails
918 static int failover_set_primary(DeviceState *dev, void *opaque)
920 FailoverDevice *fdev = opaque;
921 PCIDevice *pci_dev = (PCIDevice *)
922 object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE);
924 if (!pci_dev) {
925 return 0;
928 if (!g_strcmp0(pci_dev->failover_pair_id, fdev->n->netclient_name)) {
929 fdev->dev = dev;
930 return 1;
933 return 0;
937 * Find the primary device for this failover virtio-net
939 * @n: VirtIONet device
940 * @errp: returns an error if this function fails
942 static DeviceState *failover_find_primary_device(VirtIONet *n)
944 FailoverDevice fdev = {
945 .n = n,
948 qbus_walk_children(sysbus_get_default(), failover_set_primary, NULL,
949 NULL, NULL, &fdev);
950 return fdev.dev;
953 static void failover_add_primary(VirtIONet *n, Error **errp)
955 Error *err = NULL;
956 DeviceState *dev = failover_find_primary_device(n);
958 if (dev) {
959 return;
962 if (!n->primary_opts) {
963 error_setg(errp, "Primary device not found");
964 error_append_hint(errp, "Virtio-net failover will not work. Make "
965 "sure primary device has parameter"
966 " failover_pair_id=%s\n", n->netclient_name);
967 return;
970 dev = qdev_device_add_from_qdict(n->primary_opts,
971 n->primary_opts_from_json,
972 &err);
973 if (err) {
974 qobject_unref(n->primary_opts);
975 n->primary_opts = NULL;
976 } else {
977 object_unref(OBJECT(dev));
979 error_propagate(errp, err);
982 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
984 VirtIONet *n = VIRTIO_NET(vdev);
985 Error *err = NULL;
986 int i;
988 if (n->mtu_bypass_backend &&
989 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
990 features &= ~(1ULL << VIRTIO_NET_F_MTU);
993 virtio_net_set_multiqueue(n,
994 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
995 virtio_has_feature(features, VIRTIO_NET_F_MQ));
997 virtio_net_set_mrg_rx_bufs(n,
998 virtio_has_feature(features,
999 VIRTIO_NET_F_MRG_RXBUF),
1000 virtio_has_feature(features,
1001 VIRTIO_F_VERSION_1),
1002 virtio_has_feature(features,
1003 VIRTIO_NET_F_HASH_REPORT));
1005 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1006 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
1007 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
1008 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
1009 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
1011 if (n->has_vnet_hdr) {
1012 n->curr_guest_offloads =
1013 virtio_net_guest_offloads_by_features(features);
1014 virtio_net_apply_guest_offloads(n);
1017 for (i = 0; i < n->max_queue_pairs; i++) {
1018 NetClientState *nc = qemu_get_subqueue(n->nic, i);
1020 if (!get_vhost_net(nc->peer)) {
1021 continue;
1023 vhost_net_ack_features(get_vhost_net(nc->peer), features);
1026 * keep acked_features in NetVhostUserState up-to-date so it
1027 * can't miss any features configured by guest virtio driver.
1029 vhost_net_save_acked_features(nc->peer);
1032 if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
1033 memset(n->vlans, 0xff, MAX_VLAN >> 3);
1036 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
1037 qapi_event_send_failover_negotiated(n->netclient_name);
1038 qatomic_set(&n->failover_primary_hidden, false);
1039 failover_add_primary(n, &err);
1040 if (err) {
1041 if (!qtest_enabled()) {
1042 warn_report_err(err);
1043 } else {
1044 error_free(err);
1050 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
1051 struct iovec *iov, unsigned int iov_cnt)
1053 uint8_t on;
1054 size_t s;
1055 NetClientState *nc = qemu_get_queue(n->nic);
1057 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
1058 if (s != sizeof(on)) {
1059 return VIRTIO_NET_ERR;
1062 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
1063 n->promisc = on;
1064 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
1065 n->allmulti = on;
1066 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
1067 n->alluni = on;
1068 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
1069 n->nomulti = on;
1070 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
1071 n->nouni = on;
1072 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
1073 n->nobcast = on;
1074 } else {
1075 return VIRTIO_NET_ERR;
1078 rxfilter_notify(nc);
1080 return VIRTIO_NET_OK;
1083 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
1084 struct iovec *iov, unsigned int iov_cnt)
1086 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1087 uint64_t offloads;
1088 size_t s;
1090 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1091 return VIRTIO_NET_ERR;
1094 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
1095 if (s != sizeof(offloads)) {
1096 return VIRTIO_NET_ERR;
1099 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
1100 uint64_t supported_offloads;
1102 offloads = virtio_ldq_p(vdev, &offloads);
1104 if (!n->has_vnet_hdr) {
1105 return VIRTIO_NET_ERR;
1108 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1109 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1110 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1111 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1112 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1114 supported_offloads = virtio_net_supported_guest_offloads(n);
1115 if (offloads & ~supported_offloads) {
1116 return VIRTIO_NET_ERR;
1119 n->curr_guest_offloads = offloads;
1120 virtio_net_apply_guest_offloads(n);
1122 return VIRTIO_NET_OK;
1123 } else {
1124 return VIRTIO_NET_ERR;
1128 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1129 struct iovec *iov, unsigned int iov_cnt)
1131 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1132 struct virtio_net_ctrl_mac mac_data;
1133 size_t s;
1134 NetClientState *nc = qemu_get_queue(n->nic);
1136 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1137 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1138 return VIRTIO_NET_ERR;
1140 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1141 assert(s == sizeof(n->mac));
1142 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1143 rxfilter_notify(nc);
1145 return VIRTIO_NET_OK;
1148 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1149 return VIRTIO_NET_ERR;
1152 int in_use = 0;
1153 int first_multi = 0;
1154 uint8_t uni_overflow = 0;
1155 uint8_t multi_overflow = 0;
1156 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1158 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1159 sizeof(mac_data.entries));
1160 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1161 if (s != sizeof(mac_data.entries)) {
1162 goto error;
1164 iov_discard_front(&iov, &iov_cnt, s);
1166 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1167 goto error;
1170 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1171 s = iov_to_buf(iov, iov_cnt, 0, macs,
1172 mac_data.entries * ETH_ALEN);
1173 if (s != mac_data.entries * ETH_ALEN) {
1174 goto error;
1176 in_use += mac_data.entries;
1177 } else {
1178 uni_overflow = 1;
1181 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1183 first_multi = in_use;
1185 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1186 sizeof(mac_data.entries));
1187 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1188 if (s != sizeof(mac_data.entries)) {
1189 goto error;
1192 iov_discard_front(&iov, &iov_cnt, s);
1194 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1195 goto error;
1198 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1199 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1200 mac_data.entries * ETH_ALEN);
1201 if (s != mac_data.entries * ETH_ALEN) {
1202 goto error;
1204 in_use += mac_data.entries;
1205 } else {
1206 multi_overflow = 1;
1209 n->mac_table.in_use = in_use;
1210 n->mac_table.first_multi = first_multi;
1211 n->mac_table.uni_overflow = uni_overflow;
1212 n->mac_table.multi_overflow = multi_overflow;
1213 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1214 g_free(macs);
1215 rxfilter_notify(nc);
1217 return VIRTIO_NET_OK;
1219 error:
1220 g_free(macs);
1221 return VIRTIO_NET_ERR;
1224 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1225 struct iovec *iov, unsigned int iov_cnt)
1227 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1228 uint16_t vid;
1229 size_t s;
1230 NetClientState *nc = qemu_get_queue(n->nic);
1232 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1233 vid = virtio_lduw_p(vdev, &vid);
1234 if (s != sizeof(vid)) {
1235 return VIRTIO_NET_ERR;
1238 if (vid >= MAX_VLAN)
1239 return VIRTIO_NET_ERR;
1241 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1242 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1243 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1244 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1245 else
1246 return VIRTIO_NET_ERR;
1248 rxfilter_notify(nc);
1250 return VIRTIO_NET_OK;
1253 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1254 struct iovec *iov, unsigned int iov_cnt)
1256 trace_virtio_net_handle_announce(n->announce_timer.round);
1257 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1258 n->status & VIRTIO_NET_S_ANNOUNCE) {
1259 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1260 if (n->announce_timer.round) {
1261 qemu_announce_timer_step(&n->announce_timer);
1263 return VIRTIO_NET_OK;
1264 } else {
1265 return VIRTIO_NET_ERR;
1269 static void virtio_net_detach_epbf_rss(VirtIONet *n);
1271 static void virtio_net_disable_rss(VirtIONet *n)
1273 if (n->rss_data.enabled) {
1274 trace_virtio_net_rss_disable();
1276 n->rss_data.enabled = false;
1278 virtio_net_detach_epbf_rss(n);
1281 static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd)
1283 NetClientState *nc = qemu_get_peer(qemu_get_queue(nic), 0);
1284 if (nc == NULL || nc->info->set_steering_ebpf == NULL) {
1285 return false;
1288 return nc->info->set_steering_ebpf(nc, prog_fd);
1291 static void rss_data_to_rss_config(struct VirtioNetRssData *data,
1292 struct EBPFRSSConfig *config)
1294 config->redirect = data->redirect;
1295 config->populate_hash = data->populate_hash;
1296 config->hash_types = data->hash_types;
1297 config->indirections_len = data->indirections_len;
1298 config->default_queue = data->default_queue;
1301 static bool virtio_net_attach_epbf_rss(VirtIONet *n)
1303 struct EBPFRSSConfig config = {};
1305 if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
1306 return false;
1309 rss_data_to_rss_config(&n->rss_data, &config);
1311 if (!ebpf_rss_set_all(&n->ebpf_rss, &config,
1312 n->rss_data.indirections_table, n->rss_data.key)) {
1313 return false;
1316 if (!virtio_net_attach_ebpf_to_backend(n->nic, n->ebpf_rss.program_fd)) {
1317 return false;
1320 return true;
1323 static void virtio_net_detach_epbf_rss(VirtIONet *n)
1325 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1328 static bool virtio_net_load_ebpf(VirtIONet *n)
1330 if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
1331 /* backend doesn't support steering ebpf */
1332 return false;
1335 return ebpf_rss_load(&n->ebpf_rss);
1338 static void virtio_net_unload_ebpf(VirtIONet *n)
1340 virtio_net_attach_ebpf_to_backend(n->nic, -1);
1341 ebpf_rss_unload(&n->ebpf_rss);
1344 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1345 struct iovec *iov,
1346 unsigned int iov_cnt,
1347 bool do_rss)
1349 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1350 struct virtio_net_rss_config cfg;
1351 size_t s, offset = 0, size_get;
1352 uint16_t queue_pairs, i;
1353 struct {
1354 uint16_t us;
1355 uint8_t b;
1356 } QEMU_PACKED temp;
1357 const char *err_msg = "";
1358 uint32_t err_value = 0;
1360 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1361 err_msg = "RSS is not negotiated";
1362 goto error;
1364 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1365 err_msg = "Hash report is not negotiated";
1366 goto error;
1368 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1369 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1370 if (s != size_get) {
1371 err_msg = "Short command buffer";
1372 err_value = (uint32_t)s;
1373 goto error;
1375 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1376 n->rss_data.indirections_len =
1377 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1378 n->rss_data.indirections_len++;
1379 if (!do_rss) {
1380 n->rss_data.indirections_len = 1;
1382 if (!is_power_of_2(n->rss_data.indirections_len)) {
1383 err_msg = "Invalid size of indirection table";
1384 err_value = n->rss_data.indirections_len;
1385 goto error;
1387 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1388 err_msg = "Too large indirection table";
1389 err_value = n->rss_data.indirections_len;
1390 goto error;
1392 n->rss_data.default_queue = do_rss ?
1393 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1394 if (n->rss_data.default_queue >= n->max_queue_pairs) {
1395 err_msg = "Invalid default queue";
1396 err_value = n->rss_data.default_queue;
1397 goto error;
1399 offset += size_get;
1400 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1401 g_free(n->rss_data.indirections_table);
1402 n->rss_data.indirections_table = g_malloc(size_get);
1403 if (!n->rss_data.indirections_table) {
1404 err_msg = "Can't allocate indirections table";
1405 err_value = n->rss_data.indirections_len;
1406 goto error;
1408 s = iov_to_buf(iov, iov_cnt, offset,
1409 n->rss_data.indirections_table, size_get);
1410 if (s != size_get) {
1411 err_msg = "Short indirection table buffer";
1412 err_value = (uint32_t)s;
1413 goto error;
1415 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1416 uint16_t val = n->rss_data.indirections_table[i];
1417 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1419 offset += size_get;
1420 size_get = sizeof(temp);
1421 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1422 if (s != size_get) {
1423 err_msg = "Can't get queue_pairs";
1424 err_value = (uint32_t)s;
1425 goto error;
1427 queue_pairs = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queue_pairs;
1428 if (queue_pairs == 0 || queue_pairs > n->max_queue_pairs) {
1429 err_msg = "Invalid number of queue_pairs";
1430 err_value = queue_pairs;
1431 goto error;
1433 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1434 err_msg = "Invalid key size";
1435 err_value = temp.b;
1436 goto error;
1438 if (!temp.b && n->rss_data.hash_types) {
1439 err_msg = "No key provided";
1440 err_value = 0;
1441 goto error;
1443 if (!temp.b && !n->rss_data.hash_types) {
1444 virtio_net_disable_rss(n);
1445 return queue_pairs;
1447 offset += size_get;
1448 size_get = temp.b;
1449 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1450 if (s != size_get) {
1451 err_msg = "Can get key buffer";
1452 err_value = (uint32_t)s;
1453 goto error;
1455 n->rss_data.enabled = true;
1457 if (!n->rss_data.populate_hash) {
1458 if (!virtio_net_attach_epbf_rss(n)) {
1459 /* EBPF must be loaded for vhost */
1460 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
1461 warn_report("Can't load eBPF RSS for vhost");
1462 goto error;
1464 /* fallback to software RSS */
1465 warn_report("Can't load eBPF RSS - fallback to software RSS");
1466 n->rss_data.enabled_software_rss = true;
1468 } else {
1469 /* use software RSS for hash populating */
1470 /* and detach eBPF if was loaded before */
1471 virtio_net_detach_epbf_rss(n);
1472 n->rss_data.enabled_software_rss = true;
1475 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1476 n->rss_data.indirections_len,
1477 temp.b);
1478 return queue_pairs;
1479 error:
1480 trace_virtio_net_rss_error(err_msg, err_value);
1481 virtio_net_disable_rss(n);
1482 return 0;
1485 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1486 struct iovec *iov, unsigned int iov_cnt)
1488 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1489 uint16_t queue_pairs;
1490 NetClientState *nc = qemu_get_queue(n->nic);
1492 virtio_net_disable_rss(n);
1493 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1494 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, false);
1495 return queue_pairs ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1497 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1498 queue_pairs = virtio_net_handle_rss(n, iov, iov_cnt, true);
1499 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1500 struct virtio_net_ctrl_mq mq;
1501 size_t s;
1502 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1503 return VIRTIO_NET_ERR;
1505 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1506 if (s != sizeof(mq)) {
1507 return VIRTIO_NET_ERR;
1509 queue_pairs = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1511 } else {
1512 return VIRTIO_NET_ERR;
1515 if (queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1516 queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1517 queue_pairs > n->max_queue_pairs ||
1518 !n->multiqueue) {
1519 return VIRTIO_NET_ERR;
1522 n->curr_queue_pairs = queue_pairs;
1523 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
1525 * Avoid updating the backend for a vdpa device: We're only interested
1526 * in updating the device model queues.
1528 return VIRTIO_NET_OK;
1530 /* stop the backend before changing the number of queue_pairs to avoid handling a
1531 * disabled queue */
1532 virtio_net_set_status(vdev, vdev->status);
1533 virtio_net_set_queue_pairs(n);
1535 return VIRTIO_NET_OK;
1538 size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
1539 const struct iovec *in_sg, unsigned in_num,
1540 const struct iovec *out_sg,
1541 unsigned out_num)
1543 VirtIONet *n = VIRTIO_NET(vdev);
1544 struct virtio_net_ctrl_hdr ctrl;
1545 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1546 size_t s;
1547 struct iovec *iov, *iov2;
1549 if (iov_size(in_sg, in_num) < sizeof(status) ||
1550 iov_size(out_sg, out_num) < sizeof(ctrl)) {
1551 virtio_error(vdev, "virtio-net ctrl missing headers");
1552 return 0;
1555 iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
1556 s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
1557 iov_discard_front(&iov, &out_num, sizeof(ctrl));
1558 if (s != sizeof(ctrl)) {
1559 status = VIRTIO_NET_ERR;
1560 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1561 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
1562 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1563 status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
1564 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1565 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
1566 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1567 status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
1568 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1569 status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
1570 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1571 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
1574 s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
1575 assert(s == sizeof(status));
1577 g_free(iov2);
1578 return sizeof(status);
1581 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1583 VirtQueueElement *elem;
1585 for (;;) {
1586 size_t written;
1587 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1588 if (!elem) {
1589 break;
1592 written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
1593 elem->out_sg, elem->out_num);
1594 if (written > 0) {
1595 virtqueue_push(vq, elem, written);
1596 virtio_notify(vdev, vq);
1597 g_free(elem);
1598 } else {
1599 virtqueue_detach_element(vq, elem, 0);
1600 g_free(elem);
1601 break;
1606 /* RX */
1608 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1610 VirtIONet *n = VIRTIO_NET(vdev);
1611 int queue_index = vq2q(virtio_get_queue_index(vq));
1613 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1616 static bool virtio_net_can_receive(NetClientState *nc)
1618 VirtIONet *n = qemu_get_nic_opaque(nc);
1619 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1620 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1622 if (!vdev->vm_running) {
1623 return false;
1626 if (nc->queue_index >= n->curr_queue_pairs) {
1627 return false;
1630 if (!virtio_queue_ready(q->rx_vq) ||
1631 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1632 return false;
1635 return true;
1638 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1640 VirtIONet *n = q->n;
1641 if (virtio_queue_empty(q->rx_vq) ||
1642 (n->mergeable_rx_bufs &&
1643 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1644 virtio_queue_set_notification(q->rx_vq, 1);
1646 /* To avoid a race condition where the guest has made some buffers
1647 * available after the above check but before notification was
1648 * enabled, check for available buffers again.
1650 if (virtio_queue_empty(q->rx_vq) ||
1651 (n->mergeable_rx_bufs &&
1652 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1653 return 0;
1657 virtio_queue_set_notification(q->rx_vq, 0);
1658 return 1;
1661 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1663 virtio_tswap16s(vdev, &hdr->hdr_len);
1664 virtio_tswap16s(vdev, &hdr->gso_size);
1665 virtio_tswap16s(vdev, &hdr->csum_start);
1666 virtio_tswap16s(vdev, &hdr->csum_offset);
1669 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1670 * it never finds out that the packets don't have valid checksums. This
1671 * causes dhclient to get upset. Fedora's carried a patch for ages to
1672 * fix this with Xen but it hasn't appeared in an upstream release of
1673 * dhclient yet.
1675 * To avoid breaking existing guests, we catch udp packets and add
1676 * checksums. This is terrible but it's better than hacking the guest
1677 * kernels.
1679 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1680 * we should provide a mechanism to disable it to avoid polluting the host
1681 * cache.
1683 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1684 uint8_t *buf, size_t size)
1686 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1687 (size > 27 && size < 1500) && /* normal sized MTU */
1688 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1689 (buf[23] == 17) && /* ip.protocol == UDP */
1690 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1691 net_checksum_calculate(buf, size, CSUM_UDP);
1692 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1696 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1697 const void *buf, size_t size)
1699 if (n->has_vnet_hdr) {
1700 /* FIXME this cast is evil */
1701 void *wbuf = (void *)buf;
1702 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1703 size - n->host_hdr_len);
1705 if (n->needs_vnet_hdr_swap) {
1706 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1708 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1709 } else {
1710 struct virtio_net_hdr hdr = {
1711 .flags = 0,
1712 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1714 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1718 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1720 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1721 static const uint8_t vlan[] = {0x81, 0x00};
1722 uint8_t *ptr = (uint8_t *)buf;
1723 int i;
1725 if (n->promisc)
1726 return 1;
1728 ptr += n->host_hdr_len;
1730 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1731 int vid = lduw_be_p(ptr + 14) & 0xfff;
1732 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1733 return 0;
1736 if (ptr[0] & 1) { // multicast
1737 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1738 return !n->nobcast;
1739 } else if (n->nomulti) {
1740 return 0;
1741 } else if (n->allmulti || n->mac_table.multi_overflow) {
1742 return 1;
1745 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1746 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1747 return 1;
1750 } else { // unicast
1751 if (n->nouni) {
1752 return 0;
1753 } else if (n->alluni || n->mac_table.uni_overflow) {
1754 return 1;
1755 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1756 return 1;
1759 for (i = 0; i < n->mac_table.first_multi; i++) {
1760 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1761 return 1;
1766 return 0;
1769 static uint8_t virtio_net_get_hash_type(bool hasip4,
1770 bool hasip6,
1771 EthL4HdrProto l4hdr_proto,
1772 uint32_t types)
1774 if (hasip4) {
1775 switch (l4hdr_proto) {
1776 case ETH_L4_HDR_PROTO_TCP:
1777 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
1778 return NetPktRssIpV4Tcp;
1780 break;
1782 case ETH_L4_HDR_PROTO_UDP:
1783 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
1784 return NetPktRssIpV4Udp;
1786 break;
1788 default:
1789 break;
1792 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1793 return NetPktRssIpV4;
1795 } else if (hasip6) {
1796 switch (l4hdr_proto) {
1797 case ETH_L4_HDR_PROTO_TCP:
1798 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) {
1799 return NetPktRssIpV6TcpEx;
1801 if (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
1802 return NetPktRssIpV6Tcp;
1804 break;
1806 case ETH_L4_HDR_PROTO_UDP:
1807 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) {
1808 return NetPktRssIpV6UdpEx;
1810 if (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
1811 return NetPktRssIpV6Udp;
1813 break;
1815 default:
1816 break;
1819 if (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) {
1820 return NetPktRssIpV6Ex;
1822 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
1823 return NetPktRssIpV6;
1826 return 0xff;
1829 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1830 uint32_t hash)
1832 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1833 hdr->hash_value = hash;
1834 hdr->hash_report = report;
1837 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1838 size_t size)
1840 VirtIONet *n = qemu_get_nic_opaque(nc);
1841 unsigned int index = nc->queue_index, new_index = index;
1842 struct NetRxPkt *pkt = n->rx_pkt;
1843 uint8_t net_hash_type;
1844 uint32_t hash;
1845 bool hasip4, hasip6;
1846 EthL4HdrProto l4hdr_proto;
1847 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1848 VIRTIO_NET_HASH_REPORT_IPv4,
1849 VIRTIO_NET_HASH_REPORT_TCPv4,
1850 VIRTIO_NET_HASH_REPORT_TCPv6,
1851 VIRTIO_NET_HASH_REPORT_IPv6,
1852 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1853 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1854 VIRTIO_NET_HASH_REPORT_UDPv4,
1855 VIRTIO_NET_HASH_REPORT_UDPv6,
1856 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1858 struct iovec iov = {
1859 .iov_base = (void *)buf,
1860 .iov_len = size
1863 net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
1864 net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
1865 net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
1866 n->rss_data.hash_types);
1867 if (net_hash_type > NetPktRssIpV6UdpEx) {
1868 if (n->rss_data.populate_hash) {
1869 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1871 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1874 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1876 if (n->rss_data.populate_hash) {
1877 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1880 if (n->rss_data.redirect) {
1881 new_index = hash & (n->rss_data.indirections_len - 1);
1882 new_index = n->rss_data.indirections_table[new_index];
1885 return (index == new_index) ? -1 : new_index;
1888 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1889 size_t size, bool no_rss)
1891 VirtIONet *n = qemu_get_nic_opaque(nc);
1892 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1893 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1894 VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
1895 size_t lens[VIRTQUEUE_MAX_SIZE];
1896 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1897 struct virtio_net_hdr_mrg_rxbuf mhdr;
1898 unsigned mhdr_cnt = 0;
1899 size_t offset, i, guest_offset, j;
1900 ssize_t err;
1902 if (!virtio_net_can_receive(nc)) {
1903 return -1;
1906 if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) {
1907 int index = virtio_net_process_rss(nc, buf, size);
1908 if (index >= 0) {
1909 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1910 return virtio_net_receive_rcu(nc2, buf, size, true);
1914 /* hdr_len refers to the header we supply to the guest */
1915 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1916 return 0;
1919 if (!receive_filter(n, buf, size))
1920 return size;
1922 offset = i = 0;
1924 while (offset < size) {
1925 VirtQueueElement *elem;
1926 int len, total;
1927 const struct iovec *sg;
1929 total = 0;
1931 if (i == VIRTQUEUE_MAX_SIZE) {
1932 virtio_error(vdev, "virtio-net unexpected long buffer chain");
1933 err = size;
1934 goto err;
1937 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1938 if (!elem) {
1939 if (i) {
1940 virtio_error(vdev, "virtio-net unexpected empty queue: "
1941 "i %zd mergeable %d offset %zd, size %zd, "
1942 "guest hdr len %zd, host hdr len %zd "
1943 "guest features 0x%" PRIx64,
1944 i, n->mergeable_rx_bufs, offset, size,
1945 n->guest_hdr_len, n->host_hdr_len,
1946 vdev->guest_features);
1948 err = -1;
1949 goto err;
1952 if (elem->in_num < 1) {
1953 virtio_error(vdev,
1954 "virtio-net receive queue contains no in buffers");
1955 virtqueue_detach_element(q->rx_vq, elem, 0);
1956 g_free(elem);
1957 err = -1;
1958 goto err;
1961 sg = elem->in_sg;
1962 if (i == 0) {
1963 assert(offset == 0);
1964 if (n->mergeable_rx_bufs) {
1965 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1966 sg, elem->in_num,
1967 offsetof(typeof(mhdr), num_buffers),
1968 sizeof(mhdr.num_buffers));
1971 receive_header(n, sg, elem->in_num, buf, size);
1972 if (n->rss_data.populate_hash) {
1973 offset = sizeof(mhdr);
1974 iov_from_buf(sg, elem->in_num, offset,
1975 buf + offset, n->host_hdr_len - sizeof(mhdr));
1977 offset = n->host_hdr_len;
1978 total += n->guest_hdr_len;
1979 guest_offset = n->guest_hdr_len;
1980 } else {
1981 guest_offset = 0;
1984 /* copy in packet. ugh */
1985 len = iov_from_buf(sg, elem->in_num, guest_offset,
1986 buf + offset, size - offset);
1987 total += len;
1988 offset += len;
1989 /* If buffers can't be merged, at this point we
1990 * must have consumed the complete packet.
1991 * Otherwise, drop it. */
1992 if (!n->mergeable_rx_bufs && offset < size) {
1993 virtqueue_unpop(q->rx_vq, elem, total);
1994 g_free(elem);
1995 err = size;
1996 goto err;
1999 elems[i] = elem;
2000 lens[i] = total;
2001 i++;
2004 if (mhdr_cnt) {
2005 virtio_stw_p(vdev, &mhdr.num_buffers, i);
2006 iov_from_buf(mhdr_sg, mhdr_cnt,
2008 &mhdr.num_buffers, sizeof mhdr.num_buffers);
2011 for (j = 0; j < i; j++) {
2012 /* signal other side */
2013 virtqueue_fill(q->rx_vq, elems[j], lens[j], j);
2014 g_free(elems[j]);
2017 virtqueue_flush(q->rx_vq, i);
2018 virtio_notify(vdev, q->rx_vq);
2020 return size;
2022 err:
2023 for (j = 0; j < i; j++) {
2024 virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
2025 g_free(elems[j]);
2028 return err;
2031 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
2032 size_t size)
2034 RCU_READ_LOCK_GUARD();
2036 return virtio_net_receive_rcu(nc, buf, size, false);
2039 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
2040 const uint8_t *buf,
2041 VirtioNetRscUnit *unit)
2043 uint16_t ip_hdrlen;
2044 struct ip_header *ip;
2046 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
2047 + sizeof(struct eth_header));
2048 unit->ip = (void *)ip;
2049 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
2050 unit->ip_plen = &ip->ip_len;
2051 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
2052 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2053 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
2056 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
2057 const uint8_t *buf,
2058 VirtioNetRscUnit *unit)
2060 struct ip6_header *ip6;
2062 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
2063 + sizeof(struct eth_header));
2064 unit->ip = ip6;
2065 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2066 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
2067 + sizeof(struct ip6_header));
2068 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
2070 /* There is a difference between payload length in ipv4 and v6,
2071 ip header is excluded in ipv6 */
2072 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
2075 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
2076 VirtioNetRscSeg *seg)
2078 int ret;
2079 struct virtio_net_hdr_v1 *h;
2081 h = (struct virtio_net_hdr_v1 *)seg->buf;
2082 h->flags = 0;
2083 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
2085 if (seg->is_coalesced) {
2086 h->rsc.segments = seg->packets;
2087 h->rsc.dup_acks = seg->dup_ack;
2088 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
2089 if (chain->proto == ETH_P_IP) {
2090 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2091 } else {
2092 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2096 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
2097 QTAILQ_REMOVE(&chain->buffers, seg, next);
2098 g_free(seg->buf);
2099 g_free(seg);
2101 return ret;
2104 static void virtio_net_rsc_purge(void *opq)
2106 VirtioNetRscSeg *seg, *rn;
2107 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
2109 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
2110 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2111 chain->stat.purge_failed++;
2112 continue;
2116 chain->stat.timer++;
2117 if (!QTAILQ_EMPTY(&chain->buffers)) {
2118 timer_mod(chain->drain_timer,
2119 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2123 static void virtio_net_rsc_cleanup(VirtIONet *n)
2125 VirtioNetRscChain *chain, *rn_chain;
2126 VirtioNetRscSeg *seg, *rn_seg;
2128 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
2129 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
2130 QTAILQ_REMOVE(&chain->buffers, seg, next);
2131 g_free(seg->buf);
2132 g_free(seg);
2135 timer_free(chain->drain_timer);
2136 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
2137 g_free(chain);
2141 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
2142 NetClientState *nc,
2143 const uint8_t *buf, size_t size)
2145 uint16_t hdr_len;
2146 VirtioNetRscSeg *seg;
2148 hdr_len = chain->n->guest_hdr_len;
2149 seg = g_new(VirtioNetRscSeg, 1);
2150 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
2151 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
2152 memcpy(seg->buf, buf, size);
2153 seg->size = size;
2154 seg->packets = 1;
2155 seg->dup_ack = 0;
2156 seg->is_coalesced = 0;
2157 seg->nc = nc;
2159 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
2160 chain->stat.cache++;
2162 switch (chain->proto) {
2163 case ETH_P_IP:
2164 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
2165 break;
2166 case ETH_P_IPV6:
2167 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
2168 break;
2169 default:
2170 g_assert_not_reached();
2174 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
2175 VirtioNetRscSeg *seg,
2176 const uint8_t *buf,
2177 struct tcp_header *n_tcp,
2178 struct tcp_header *o_tcp)
2180 uint32_t nack, oack;
2181 uint16_t nwin, owin;
2183 nack = htonl(n_tcp->th_ack);
2184 nwin = htons(n_tcp->th_win);
2185 oack = htonl(o_tcp->th_ack);
2186 owin = htons(o_tcp->th_win);
2188 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
2189 chain->stat.ack_out_of_win++;
2190 return RSC_FINAL;
2191 } else if (nack == oack) {
2192 /* duplicated ack or window probe */
2193 if (nwin == owin) {
2194 /* duplicated ack, add dup ack count due to whql test up to 1 */
2195 chain->stat.dup_ack++;
2196 return RSC_FINAL;
2197 } else {
2198 /* Coalesce window update */
2199 o_tcp->th_win = n_tcp->th_win;
2200 chain->stat.win_update++;
2201 return RSC_COALESCE;
2203 } else {
2204 /* pure ack, go to 'C', finalize*/
2205 chain->stat.pure_ack++;
2206 return RSC_FINAL;
2210 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
2211 VirtioNetRscSeg *seg,
2212 const uint8_t *buf,
2213 VirtioNetRscUnit *n_unit)
2215 void *data;
2216 uint16_t o_ip_len;
2217 uint32_t nseq, oseq;
2218 VirtioNetRscUnit *o_unit;
2220 o_unit = &seg->unit;
2221 o_ip_len = htons(*o_unit->ip_plen);
2222 nseq = htonl(n_unit->tcp->th_seq);
2223 oseq = htonl(o_unit->tcp->th_seq);
2225 /* out of order or retransmitted. */
2226 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
2227 chain->stat.data_out_of_win++;
2228 return RSC_FINAL;
2231 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
2232 if (nseq == oseq) {
2233 if ((o_unit->payload == 0) && n_unit->payload) {
2234 /* From no payload to payload, normal case, not a dup ack or etc */
2235 chain->stat.data_after_pure_ack++;
2236 goto coalesce;
2237 } else {
2238 return virtio_net_rsc_handle_ack(chain, seg, buf,
2239 n_unit->tcp, o_unit->tcp);
2241 } else if ((nseq - oseq) != o_unit->payload) {
2242 /* Not a consistent packet, out of order */
2243 chain->stat.data_out_of_order++;
2244 return RSC_FINAL;
2245 } else {
2246 coalesce:
2247 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
2248 chain->stat.over_size++;
2249 return RSC_FINAL;
2252 /* Here comes the right data, the payload length in v4/v6 is different,
2253 so use the field value to update and record the new data len */
2254 o_unit->payload += n_unit->payload; /* update new data len */
2256 /* update field in ip header */
2257 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2259 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2260 for windows guest, while this may change the behavior for linux
2261 guest (only if it uses RSC feature). */
2262 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2264 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2265 o_unit->tcp->th_win = n_unit->tcp->th_win;
2267 memmove(seg->buf + seg->size, data, n_unit->payload);
2268 seg->size += n_unit->payload;
2269 seg->packets++;
2270 chain->stat.coalesced++;
2271 return RSC_COALESCE;
2275 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2276 VirtioNetRscSeg *seg,
2277 const uint8_t *buf, size_t size,
2278 VirtioNetRscUnit *unit)
2280 struct ip_header *ip1, *ip2;
2282 ip1 = (struct ip_header *)(unit->ip);
2283 ip2 = (struct ip_header *)(seg->unit.ip);
2284 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2285 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2286 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2287 chain->stat.no_match++;
2288 return RSC_NO_MATCH;
2291 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2294 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2295 VirtioNetRscSeg *seg,
2296 const uint8_t *buf, size_t size,
2297 VirtioNetRscUnit *unit)
2299 struct ip6_header *ip1, *ip2;
2301 ip1 = (struct ip6_header *)(unit->ip);
2302 ip2 = (struct ip6_header *)(seg->unit.ip);
2303 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2304 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2305 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2306 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2307 chain->stat.no_match++;
2308 return RSC_NO_MATCH;
2311 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2314 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2315 * to prevent out of order */
2316 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2317 struct tcp_header *tcp)
2319 uint16_t tcp_hdr;
2320 uint16_t tcp_flag;
2322 tcp_flag = htons(tcp->th_offset_flags);
2323 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2324 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2325 if (tcp_flag & TH_SYN) {
2326 chain->stat.tcp_syn++;
2327 return RSC_BYPASS;
2330 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2331 chain->stat.tcp_ctrl_drain++;
2332 return RSC_FINAL;
2335 if (tcp_hdr > sizeof(struct tcp_header)) {
2336 chain->stat.tcp_all_opt++;
2337 return RSC_FINAL;
2340 return RSC_CANDIDATE;
2343 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2344 NetClientState *nc,
2345 const uint8_t *buf, size_t size,
2346 VirtioNetRscUnit *unit)
2348 int ret;
2349 VirtioNetRscSeg *seg, *nseg;
2351 if (QTAILQ_EMPTY(&chain->buffers)) {
2352 chain->stat.empty_cache++;
2353 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2354 timer_mod(chain->drain_timer,
2355 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2356 return size;
2359 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2360 if (chain->proto == ETH_P_IP) {
2361 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2362 } else {
2363 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2366 if (ret == RSC_FINAL) {
2367 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2368 /* Send failed */
2369 chain->stat.final_failed++;
2370 return 0;
2373 /* Send current packet */
2374 return virtio_net_do_receive(nc, buf, size);
2375 } else if (ret == RSC_NO_MATCH) {
2376 continue;
2377 } else {
2378 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2379 seg->is_coalesced = 1;
2380 return size;
2384 chain->stat.no_match_cache++;
2385 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2386 return size;
2389 /* Drain a connection data, this is to avoid out of order segments */
2390 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2391 NetClientState *nc,
2392 const uint8_t *buf, size_t size,
2393 uint16_t ip_start, uint16_t ip_size,
2394 uint16_t tcp_port)
2396 VirtioNetRscSeg *seg, *nseg;
2397 uint32_t ppair1, ppair2;
2399 ppair1 = *(uint32_t *)(buf + tcp_port);
2400 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2401 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2402 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2403 || (ppair1 != ppair2)) {
2404 continue;
2406 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2407 chain->stat.drain_failed++;
2410 break;
2413 return virtio_net_do_receive(nc, buf, size);
2416 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2417 struct ip_header *ip,
2418 const uint8_t *buf, size_t size)
2420 uint16_t ip_len;
2422 /* Not an ipv4 packet */
2423 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2424 chain->stat.ip_option++;
2425 return RSC_BYPASS;
2428 /* Don't handle packets with ip option */
2429 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2430 chain->stat.ip_option++;
2431 return RSC_BYPASS;
2434 if (ip->ip_p != IPPROTO_TCP) {
2435 chain->stat.bypass_not_tcp++;
2436 return RSC_BYPASS;
2439 /* Don't handle packets with ip fragment */
2440 if (!(htons(ip->ip_off) & IP_DF)) {
2441 chain->stat.ip_frag++;
2442 return RSC_BYPASS;
2445 /* Don't handle packets with ecn flag */
2446 if (IPTOS_ECN(ip->ip_tos)) {
2447 chain->stat.ip_ecn++;
2448 return RSC_BYPASS;
2451 ip_len = htons(ip->ip_len);
2452 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2453 || ip_len > (size - chain->n->guest_hdr_len -
2454 sizeof(struct eth_header))) {
2455 chain->stat.ip_hacked++;
2456 return RSC_BYPASS;
2459 return RSC_CANDIDATE;
2462 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2463 NetClientState *nc,
2464 const uint8_t *buf, size_t size)
2466 int32_t ret;
2467 uint16_t hdr_len;
2468 VirtioNetRscUnit unit;
2470 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2472 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2473 + sizeof(struct tcp_header))) {
2474 chain->stat.bypass_not_tcp++;
2475 return virtio_net_do_receive(nc, buf, size);
2478 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2479 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2480 != RSC_CANDIDATE) {
2481 return virtio_net_do_receive(nc, buf, size);
2484 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2485 if (ret == RSC_BYPASS) {
2486 return virtio_net_do_receive(nc, buf, size);
2487 } else if (ret == RSC_FINAL) {
2488 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2489 ((hdr_len + sizeof(struct eth_header)) + 12),
2490 VIRTIO_NET_IP4_ADDR_SIZE,
2491 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2494 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2497 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2498 struct ip6_header *ip6,
2499 const uint8_t *buf, size_t size)
2501 uint16_t ip_len;
2503 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2504 != IP_HEADER_VERSION_6) {
2505 return RSC_BYPASS;
2508 /* Both option and protocol is checked in this */
2509 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2510 chain->stat.bypass_not_tcp++;
2511 return RSC_BYPASS;
2514 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2515 if (ip_len < sizeof(struct tcp_header) ||
2516 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2517 - sizeof(struct ip6_header))) {
2518 chain->stat.ip_hacked++;
2519 return RSC_BYPASS;
2522 /* Don't handle packets with ecn flag */
2523 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2524 chain->stat.ip_ecn++;
2525 return RSC_BYPASS;
2528 return RSC_CANDIDATE;
2531 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2532 const uint8_t *buf, size_t size)
2534 int32_t ret;
2535 uint16_t hdr_len;
2536 VirtioNetRscChain *chain;
2537 VirtioNetRscUnit unit;
2539 chain = opq;
2540 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2542 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2543 + sizeof(tcp_header))) {
2544 return virtio_net_do_receive(nc, buf, size);
2547 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2548 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2549 unit.ip, buf, size)) {
2550 return virtio_net_do_receive(nc, buf, size);
2553 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2554 if (ret == RSC_BYPASS) {
2555 return virtio_net_do_receive(nc, buf, size);
2556 } else if (ret == RSC_FINAL) {
2557 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2558 ((hdr_len + sizeof(struct eth_header)) + 8),
2559 VIRTIO_NET_IP6_ADDR_SIZE,
2560 hdr_len + sizeof(struct eth_header)
2561 + sizeof(struct ip6_header));
2564 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2567 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2568 NetClientState *nc,
2569 uint16_t proto)
2571 VirtioNetRscChain *chain;
2573 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2574 return NULL;
2577 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2578 if (chain->proto == proto) {
2579 return chain;
2583 chain = g_malloc(sizeof(*chain));
2584 chain->n = n;
2585 chain->proto = proto;
2586 if (proto == (uint16_t)ETH_P_IP) {
2587 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2588 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2589 } else {
2590 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2591 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2593 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2594 virtio_net_rsc_purge, chain);
2595 memset(&chain->stat, 0, sizeof(chain->stat));
2597 QTAILQ_INIT(&chain->buffers);
2598 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2600 return chain;
2603 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2604 const uint8_t *buf,
2605 size_t size)
2607 uint16_t proto;
2608 VirtioNetRscChain *chain;
2609 struct eth_header *eth;
2610 VirtIONet *n;
2612 n = qemu_get_nic_opaque(nc);
2613 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2614 return virtio_net_do_receive(nc, buf, size);
2617 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2618 proto = htons(eth->h_proto);
2620 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2621 if (chain) {
2622 chain->stat.received++;
2623 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2624 return virtio_net_rsc_receive4(chain, nc, buf, size);
2625 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2626 return virtio_net_rsc_receive6(chain, nc, buf, size);
2629 return virtio_net_do_receive(nc, buf, size);
2632 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2633 size_t size)
2635 VirtIONet *n = qemu_get_nic_opaque(nc);
2636 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2637 return virtio_net_rsc_receive(nc, buf, size);
2638 } else {
2639 return virtio_net_do_receive(nc, buf, size);
2643 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2645 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2647 VirtIONet *n = qemu_get_nic_opaque(nc);
2648 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2649 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2650 int ret;
2652 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2653 virtio_notify(vdev, q->tx_vq);
2655 g_free(q->async_tx.elem);
2656 q->async_tx.elem = NULL;
2658 virtio_queue_set_notification(q->tx_vq, 1);
2659 ret = virtio_net_flush_tx(q);
2660 if (ret >= n->tx_burst) {
2662 * the flush has been stopped by tx_burst
2663 * we will not receive notification for the
2664 * remainining part, so re-schedule
2666 virtio_queue_set_notification(q->tx_vq, 0);
2667 if (q->tx_bh) {
2668 qemu_bh_schedule(q->tx_bh);
2669 } else {
2670 timer_mod(q->tx_timer,
2671 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2673 q->tx_waiting = 1;
2677 /* TX */
2678 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2680 VirtIONet *n = q->n;
2681 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2682 VirtQueueElement *elem;
2683 int32_t num_packets = 0;
2684 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2685 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2686 return num_packets;
2689 if (q->async_tx.elem) {
2690 virtio_queue_set_notification(q->tx_vq, 0);
2691 return num_packets;
2694 for (;;) {
2695 ssize_t ret;
2696 unsigned int out_num;
2697 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2698 struct virtio_net_hdr_mrg_rxbuf mhdr;
2700 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2701 if (!elem) {
2702 break;
2705 out_num = elem->out_num;
2706 out_sg = elem->out_sg;
2707 if (out_num < 1) {
2708 virtio_error(vdev, "virtio-net header not in first element");
2709 virtqueue_detach_element(q->tx_vq, elem, 0);
2710 g_free(elem);
2711 return -EINVAL;
2714 if (n->has_vnet_hdr) {
2715 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2716 n->guest_hdr_len) {
2717 virtio_error(vdev, "virtio-net header incorrect");
2718 virtqueue_detach_element(q->tx_vq, elem, 0);
2719 g_free(elem);
2720 return -EINVAL;
2722 if (n->needs_vnet_hdr_swap) {
2723 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2724 sg2[0].iov_base = &mhdr;
2725 sg2[0].iov_len = n->guest_hdr_len;
2726 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2727 out_sg, out_num,
2728 n->guest_hdr_len, -1);
2729 if (out_num == VIRTQUEUE_MAX_SIZE) {
2730 goto drop;
2732 out_num += 1;
2733 out_sg = sg2;
2737 * If host wants to see the guest header as is, we can
2738 * pass it on unchanged. Otherwise, copy just the parts
2739 * that host is interested in.
2741 assert(n->host_hdr_len <= n->guest_hdr_len);
2742 if (n->host_hdr_len != n->guest_hdr_len) {
2743 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2744 out_sg, out_num,
2745 0, n->host_hdr_len);
2746 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2747 out_sg, out_num,
2748 n->guest_hdr_len, -1);
2749 out_num = sg_num;
2750 out_sg = sg;
2753 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2754 out_sg, out_num, virtio_net_tx_complete);
2755 if (ret == 0) {
2756 virtio_queue_set_notification(q->tx_vq, 0);
2757 q->async_tx.elem = elem;
2758 return -EBUSY;
2761 drop:
2762 virtqueue_push(q->tx_vq, elem, 0);
2763 virtio_notify(vdev, q->tx_vq);
2764 g_free(elem);
2766 if (++num_packets >= n->tx_burst) {
2767 break;
2770 return num_packets;
2773 static void virtio_net_tx_timer(void *opaque);
2775 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2777 VirtIONet *n = VIRTIO_NET(vdev);
2778 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2780 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2781 virtio_net_drop_tx_queue_data(vdev, vq);
2782 return;
2785 /* This happens when device was stopped but VCPU wasn't. */
2786 if (!vdev->vm_running) {
2787 q->tx_waiting = 1;
2788 return;
2791 if (q->tx_waiting) {
2792 /* We already have queued packets, immediately flush */
2793 timer_del(q->tx_timer);
2794 virtio_net_tx_timer(q);
2795 } else {
2796 /* re-arm timer to flush it (and more) on next tick */
2797 timer_mod(q->tx_timer,
2798 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2799 q->tx_waiting = 1;
2800 virtio_queue_set_notification(vq, 0);
2804 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2806 VirtIONet *n = VIRTIO_NET(vdev);
2807 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2809 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2810 virtio_net_drop_tx_queue_data(vdev, vq);
2811 return;
2814 if (unlikely(q->tx_waiting)) {
2815 return;
2817 q->tx_waiting = 1;
2818 /* This happens when device was stopped but VCPU wasn't. */
2819 if (!vdev->vm_running) {
2820 return;
2822 virtio_queue_set_notification(vq, 0);
2823 qemu_bh_schedule(q->tx_bh);
2826 static void virtio_net_tx_timer(void *opaque)
2828 VirtIONetQueue *q = opaque;
2829 VirtIONet *n = q->n;
2830 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2831 int ret;
2833 /* This happens when device was stopped but BH wasn't. */
2834 if (!vdev->vm_running) {
2835 /* Make sure tx waiting is set, so we'll run when restarted. */
2836 assert(q->tx_waiting);
2837 return;
2840 q->tx_waiting = 0;
2842 /* Just in case the driver is not ready on more */
2843 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2844 return;
2847 ret = virtio_net_flush_tx(q);
2848 if (ret == -EBUSY || ret == -EINVAL) {
2849 return;
2852 * If we flush a full burst of packets, assume there are
2853 * more coming and immediately rearm
2855 if (ret >= n->tx_burst) {
2856 q->tx_waiting = 1;
2857 timer_mod(q->tx_timer,
2858 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2859 return;
2862 * If less than a full burst, re-enable notification and flush
2863 * anything that may have come in while we weren't looking. If
2864 * we find something, assume the guest is still active and rearm
2866 virtio_queue_set_notification(q->tx_vq, 1);
2867 ret = virtio_net_flush_tx(q);
2868 if (ret > 0) {
2869 virtio_queue_set_notification(q->tx_vq, 0);
2870 q->tx_waiting = 1;
2871 timer_mod(q->tx_timer,
2872 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2876 static void virtio_net_tx_bh(void *opaque)
2878 VirtIONetQueue *q = opaque;
2879 VirtIONet *n = q->n;
2880 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2881 int32_t ret;
2883 /* This happens when device was stopped but BH wasn't. */
2884 if (!vdev->vm_running) {
2885 /* Make sure tx waiting is set, so we'll run when restarted. */
2886 assert(q->tx_waiting);
2887 return;
2890 q->tx_waiting = 0;
2892 /* Just in case the driver is not ready on more */
2893 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2894 return;
2897 ret = virtio_net_flush_tx(q);
2898 if (ret == -EBUSY || ret == -EINVAL) {
2899 return; /* Notification re-enable handled by tx_complete or device
2900 * broken */
2903 /* If we flush a full burst of packets, assume there are
2904 * more coming and immediately reschedule */
2905 if (ret >= n->tx_burst) {
2906 qemu_bh_schedule(q->tx_bh);
2907 q->tx_waiting = 1;
2908 return;
2911 /* If less than a full burst, re-enable notification and flush
2912 * anything that may have come in while we weren't looking. If
2913 * we find something, assume the guest is still active and reschedule */
2914 virtio_queue_set_notification(q->tx_vq, 1);
2915 ret = virtio_net_flush_tx(q);
2916 if (ret == -EINVAL) {
2917 return;
2918 } else if (ret > 0) {
2919 virtio_queue_set_notification(q->tx_vq, 0);
2920 qemu_bh_schedule(q->tx_bh);
2921 q->tx_waiting = 1;
2925 static void virtio_net_add_queue(VirtIONet *n, int index)
2927 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2929 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2930 virtio_net_handle_rx);
2932 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2933 n->vqs[index].tx_vq =
2934 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2935 virtio_net_handle_tx_timer);
2936 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2937 virtio_net_tx_timer,
2938 &n->vqs[index]);
2939 } else {
2940 n->vqs[index].tx_vq =
2941 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2942 virtio_net_handle_tx_bh);
2943 n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index],
2944 &DEVICE(vdev)->mem_reentrancy_guard);
2947 n->vqs[index].tx_waiting = 0;
2948 n->vqs[index].n = n;
2951 static void virtio_net_del_queue(VirtIONet *n, int index)
2953 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2954 VirtIONetQueue *q = &n->vqs[index];
2955 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2957 qemu_purge_queued_packets(nc);
2959 virtio_del_queue(vdev, index * 2);
2960 if (q->tx_timer) {
2961 timer_free(q->tx_timer);
2962 q->tx_timer = NULL;
2963 } else {
2964 qemu_bh_delete(q->tx_bh);
2965 q->tx_bh = NULL;
2967 q->tx_waiting = 0;
2968 virtio_del_queue(vdev, index * 2 + 1);
2971 static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
2973 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2974 int old_num_queues = virtio_get_num_queues(vdev);
2975 int new_num_queues = new_max_queue_pairs * 2 + 1;
2976 int i;
2978 assert(old_num_queues >= 3);
2979 assert(old_num_queues % 2 == 1);
2981 if (old_num_queues == new_num_queues) {
2982 return;
2986 * We always need to remove and add ctrl vq if
2987 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2988 * and then we only enter one of the following two loops.
2990 virtio_del_queue(vdev, old_num_queues - 1);
2992 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2993 /* new_num_queues < old_num_queues */
2994 virtio_net_del_queue(n, i / 2);
2997 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2998 /* new_num_queues > old_num_queues */
2999 virtio_net_add_queue(n, i / 2);
3002 /* add ctrl_vq last */
3003 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3006 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
3008 int max = multiqueue ? n->max_queue_pairs : 1;
3010 n->multiqueue = multiqueue;
3011 virtio_net_change_num_queue_pairs(n, max);
3013 virtio_net_set_queue_pairs(n);
3016 static int virtio_net_post_load_device(void *opaque, int version_id)
3018 VirtIONet *n = opaque;
3019 VirtIODevice *vdev = VIRTIO_DEVICE(n);
3020 int i, link_down;
3022 trace_virtio_net_post_load_device();
3023 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
3024 virtio_vdev_has_feature(vdev,
3025 VIRTIO_F_VERSION_1),
3026 virtio_vdev_has_feature(vdev,
3027 VIRTIO_NET_F_HASH_REPORT));
3029 /* MAC_TABLE_ENTRIES may be different from the saved image */
3030 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
3031 n->mac_table.in_use = 0;
3034 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
3035 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
3039 * curr_guest_offloads will be later overwritten by the
3040 * virtio_set_features_nocheck call done from the virtio_load.
3041 * Here we make sure it is preserved and restored accordingly
3042 * in the virtio_net_post_load_virtio callback.
3044 n->saved_guest_offloads = n->curr_guest_offloads;
3046 virtio_net_set_queue_pairs(n);
3048 /* Find the first multicast entry in the saved MAC filter */
3049 for (i = 0; i < n->mac_table.in_use; i++) {
3050 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
3051 break;
3054 n->mac_table.first_multi = i;
3056 /* nc.link_down can't be migrated, so infer link_down according
3057 * to link status bit in n->status */
3058 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
3059 for (i = 0; i < n->max_queue_pairs; i++) {
3060 qemu_get_subqueue(n->nic, i)->link_down = link_down;
3063 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
3064 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3065 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3066 QEMU_CLOCK_VIRTUAL,
3067 virtio_net_announce_timer, n);
3068 if (n->announce_timer.round) {
3069 timer_mod(n->announce_timer.tm,
3070 qemu_clock_get_ms(n->announce_timer.type));
3071 } else {
3072 qemu_announce_timer_del(&n->announce_timer, false);
3076 if (n->rss_data.enabled) {
3077 n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
3078 if (!n->rss_data.populate_hash) {
3079 if (!virtio_net_attach_epbf_rss(n)) {
3080 if (get_vhost_net(qemu_get_queue(n->nic)->peer)) {
3081 warn_report("Can't post-load eBPF RSS for vhost");
3082 } else {
3083 warn_report("Can't post-load eBPF RSS - "
3084 "fallback to software RSS");
3085 n->rss_data.enabled_software_rss = true;
3090 trace_virtio_net_rss_enable(n->rss_data.hash_types,
3091 n->rss_data.indirections_len,
3092 sizeof(n->rss_data.key));
3093 } else {
3094 trace_virtio_net_rss_disable();
3096 return 0;
3099 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
3101 VirtIONet *n = VIRTIO_NET(vdev);
3103 * The actual needed state is now in saved_guest_offloads,
3104 * see virtio_net_post_load_device for detail.
3105 * Restore it back and apply the desired offloads.
3107 n->curr_guest_offloads = n->saved_guest_offloads;
3108 if (peer_has_vnet_hdr(n)) {
3109 virtio_net_apply_guest_offloads(n);
3112 return 0;
3115 /* tx_waiting field of a VirtIONetQueue */
3116 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
3117 .name = "virtio-net-queue-tx_waiting",
3118 .fields = (VMStateField[]) {
3119 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
3120 VMSTATE_END_OF_LIST()
3124 static bool max_queue_pairs_gt_1(void *opaque, int version_id)
3126 return VIRTIO_NET(opaque)->max_queue_pairs > 1;
3129 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
3131 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
3132 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3135 static bool mac_table_fits(void *opaque, int version_id)
3137 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
3140 static bool mac_table_doesnt_fit(void *opaque, int version_id)
3142 return !mac_table_fits(opaque, version_id);
3145 /* This temporary type is shared by all the WITH_TMP methods
3146 * although only some fields are used by each.
3148 struct VirtIONetMigTmp {
3149 VirtIONet *parent;
3150 VirtIONetQueue *vqs_1;
3151 uint16_t curr_queue_pairs_1;
3152 uint8_t has_ufo;
3153 uint32_t has_vnet_hdr;
3156 /* The 2nd and subsequent tx_waiting flags are loaded later than
3157 * the 1st entry in the queue_pairs and only if there's more than one
3158 * entry. We use the tmp mechanism to calculate a temporary
3159 * pointer and count and also validate the count.
3162 static int virtio_net_tx_waiting_pre_save(void *opaque)
3164 struct VirtIONetMigTmp *tmp = opaque;
3166 tmp->vqs_1 = tmp->parent->vqs + 1;
3167 tmp->curr_queue_pairs_1 = tmp->parent->curr_queue_pairs - 1;
3168 if (tmp->parent->curr_queue_pairs == 0) {
3169 tmp->curr_queue_pairs_1 = 0;
3172 return 0;
3175 static int virtio_net_tx_waiting_pre_load(void *opaque)
3177 struct VirtIONetMigTmp *tmp = opaque;
3179 /* Reuse the pointer setup from save */
3180 virtio_net_tx_waiting_pre_save(opaque);
3182 if (tmp->parent->curr_queue_pairs > tmp->parent->max_queue_pairs) {
3183 error_report("virtio-net: curr_queue_pairs %x > max_queue_pairs %x",
3184 tmp->parent->curr_queue_pairs, tmp->parent->max_queue_pairs);
3186 return -EINVAL;
3189 return 0; /* all good */
3192 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
3193 .name = "virtio-net-tx_waiting",
3194 .pre_load = virtio_net_tx_waiting_pre_load,
3195 .pre_save = virtio_net_tx_waiting_pre_save,
3196 .fields = (VMStateField[]) {
3197 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
3198 curr_queue_pairs_1,
3199 vmstate_virtio_net_queue_tx_waiting,
3200 struct VirtIONetQueue),
3201 VMSTATE_END_OF_LIST()
3205 /* the 'has_ufo' flag is just tested; if the incoming stream has the
3206 * flag set we need to check that we have it
3208 static int virtio_net_ufo_post_load(void *opaque, int version_id)
3210 struct VirtIONetMigTmp *tmp = opaque;
3212 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
3213 error_report("virtio-net: saved image requires TUN_F_UFO support");
3214 return -EINVAL;
3217 return 0;
3220 static int virtio_net_ufo_pre_save(void *opaque)
3222 struct VirtIONetMigTmp *tmp = opaque;
3224 tmp->has_ufo = tmp->parent->has_ufo;
3226 return 0;
3229 static const VMStateDescription vmstate_virtio_net_has_ufo = {
3230 .name = "virtio-net-ufo",
3231 .post_load = virtio_net_ufo_post_load,
3232 .pre_save = virtio_net_ufo_pre_save,
3233 .fields = (VMStateField[]) {
3234 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
3235 VMSTATE_END_OF_LIST()
3239 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
3240 * flag set we need to check that we have it
3242 static int virtio_net_vnet_post_load(void *opaque, int version_id)
3244 struct VirtIONetMigTmp *tmp = opaque;
3246 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
3247 error_report("virtio-net: saved image requires vnet_hdr=on");
3248 return -EINVAL;
3251 return 0;
3254 static int virtio_net_vnet_pre_save(void *opaque)
3256 struct VirtIONetMigTmp *tmp = opaque;
3258 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
3260 return 0;
3263 static const VMStateDescription vmstate_virtio_net_has_vnet = {
3264 .name = "virtio-net-vnet",
3265 .post_load = virtio_net_vnet_post_load,
3266 .pre_save = virtio_net_vnet_pre_save,
3267 .fields = (VMStateField[]) {
3268 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
3269 VMSTATE_END_OF_LIST()
3273 static bool virtio_net_rss_needed(void *opaque)
3275 return VIRTIO_NET(opaque)->rss_data.enabled;
3278 static const VMStateDescription vmstate_virtio_net_rss = {
3279 .name = "virtio-net-device/rss",
3280 .version_id = 1,
3281 .minimum_version_id = 1,
3282 .needed = virtio_net_rss_needed,
3283 .fields = (VMStateField[]) {
3284 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
3285 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
3286 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
3287 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
3288 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
3289 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
3290 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
3291 VIRTIO_NET_RSS_MAX_KEY_SIZE),
3292 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
3293 rss_data.indirections_len, 0,
3294 vmstate_info_uint16, uint16_t),
3295 VMSTATE_END_OF_LIST()
3299 static const VMStateDescription vmstate_virtio_net_device = {
3300 .name = "virtio-net-device",
3301 .version_id = VIRTIO_NET_VM_VERSION,
3302 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3303 .post_load = virtio_net_post_load_device,
3304 .fields = (VMStateField[]) {
3305 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
3306 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
3307 vmstate_virtio_net_queue_tx_waiting,
3308 VirtIONetQueue),
3309 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3310 VMSTATE_UINT16(status, VirtIONet),
3311 VMSTATE_UINT8(promisc, VirtIONet),
3312 VMSTATE_UINT8(allmulti, VirtIONet),
3313 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3315 /* Guarded pair: If it fits we load it, else we throw it away
3316 * - can happen if source has a larger MAC table.; post-load
3317 * sets flags in this case.
3319 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3320 0, mac_table_fits, mac_table.in_use,
3321 ETH_ALEN),
3322 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3323 mac_table.in_use, ETH_ALEN),
3325 /* Note: This is an array of uint32's that's always been saved as a
3326 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3327 * but based on the uint.
3329 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3330 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3331 vmstate_virtio_net_has_vnet),
3332 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3333 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3334 VMSTATE_UINT8(alluni, VirtIONet),
3335 VMSTATE_UINT8(nomulti, VirtIONet),
3336 VMSTATE_UINT8(nouni, VirtIONet),
3337 VMSTATE_UINT8(nobcast, VirtIONet),
3338 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3339 vmstate_virtio_net_has_ufo),
3340 VMSTATE_SINGLE_TEST(max_queue_pairs, VirtIONet, max_queue_pairs_gt_1, 0,
3341 vmstate_info_uint16_equal, uint16_t),
3342 VMSTATE_UINT16_TEST(curr_queue_pairs, VirtIONet, max_queue_pairs_gt_1),
3343 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3344 vmstate_virtio_net_tx_waiting),
3345 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3346 has_ctrl_guest_offloads),
3347 VMSTATE_END_OF_LIST()
3349 .subsections = (const VMStateDescription * []) {
3350 &vmstate_virtio_net_rss,
3351 NULL
3355 static NetClientInfo net_virtio_info = {
3356 .type = NET_CLIENT_DRIVER_NIC,
3357 .size = sizeof(NICState),
3358 .can_receive = virtio_net_can_receive,
3359 .receive = virtio_net_receive,
3360 .link_status_changed = virtio_net_set_link_status,
3361 .query_rx_filter = virtio_net_query_rxfilter,
3362 .announce = virtio_net_announce,
3365 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3367 VirtIONet *n = VIRTIO_NET(vdev);
3368 NetClientState *nc;
3369 assert(n->vhost_started);
3370 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3371 /* Must guard against invalid features and bogus queue index
3372 * from being set by malicious guest, or penetrated through
3373 * buggy migration stream.
3375 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3376 qemu_log_mask(LOG_GUEST_ERROR,
3377 "%s: bogus vq index ignored\n", __func__);
3378 return false;
3380 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3381 } else {
3382 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3385 * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3386 * as the macro of configure interrupt's IDX, If this driver does not
3387 * support, the function will return false
3390 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3391 return vhost_net_config_pending(get_vhost_net(nc->peer));
3393 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3396 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3397 bool mask)
3399 VirtIONet *n = VIRTIO_NET(vdev);
3400 NetClientState *nc;
3401 assert(n->vhost_started);
3402 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
3403 /* Must guard against invalid features and bogus queue index
3404 * from being set by malicious guest, or penetrated through
3405 * buggy migration stream.
3407 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
3408 qemu_log_mask(LOG_GUEST_ERROR,
3409 "%s: bogus vq index ignored\n", __func__);
3410 return;
3412 nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
3413 } else {
3414 nc = qemu_get_subqueue(n->nic, vq2q(idx));
3417 *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
3418 * as the macro of configure interrupt's IDX, If this driver does not
3419 * support, the function will return
3422 if (idx == VIRTIO_CONFIG_IRQ_IDX) {
3423 vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
3424 return;
3426 vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
3429 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3431 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3433 n->config_size = virtio_get_config_size(&cfg_size_params, host_features);
3436 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3437 const char *type)
3440 * The name can be NULL, the netclient name will be type.x.
3442 assert(type != NULL);
3444 g_free(n->netclient_name);
3445 g_free(n->netclient_type);
3446 n->netclient_name = g_strdup(name);
3447 n->netclient_type = g_strdup(type);
3450 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3452 HotplugHandler *hotplug_ctrl;
3453 PCIDevice *pci_dev;
3454 Error *err = NULL;
3456 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3457 if (hotplug_ctrl) {
3458 pci_dev = PCI_DEVICE(dev);
3459 pci_dev->partially_hotplugged = true;
3460 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3461 if (err) {
3462 error_report_err(err);
3463 return false;
3465 } else {
3466 return false;
3468 return true;
3471 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3472 Error **errp)
3474 Error *err = NULL;
3475 HotplugHandler *hotplug_ctrl;
3476 PCIDevice *pdev = PCI_DEVICE(dev);
3477 BusState *primary_bus;
3479 if (!pdev->partially_hotplugged) {
3480 return true;
3482 primary_bus = dev->parent_bus;
3483 if (!primary_bus) {
3484 error_setg(errp, "virtio_net: couldn't find primary bus");
3485 return false;
3487 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3488 qatomic_set(&n->failover_primary_hidden, false);
3489 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3490 if (hotplug_ctrl) {
3491 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3492 if (err) {
3493 goto out;
3495 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3497 pdev->partially_hotplugged = false;
3499 out:
3500 error_propagate(errp, err);
3501 return !err;
3504 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3506 bool should_be_hidden;
3507 Error *err = NULL;
3508 DeviceState *dev = failover_find_primary_device(n);
3510 if (!dev) {
3511 return;
3514 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3516 if (migration_in_setup(s) && !should_be_hidden) {
3517 if (failover_unplug_primary(n, dev)) {
3518 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3519 qapi_event_send_unplug_primary(dev->id);
3520 qatomic_set(&n->failover_primary_hidden, true);
3521 } else {
3522 warn_report("couldn't unplug primary device");
3524 } else if (migration_has_failed(s)) {
3525 /* We already unplugged the device let's plug it back */
3526 if (!failover_replug_primary(n, dev, &err)) {
3527 if (err) {
3528 error_report_err(err);
3534 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3536 MigrationState *s = data;
3537 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3538 virtio_net_handle_migration_primary(n, s);
3541 static bool failover_hide_primary_device(DeviceListener *listener,
3542 const QDict *device_opts,
3543 bool from_json,
3544 Error **errp)
3546 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3547 const char *standby_id;
3549 if (!device_opts) {
3550 return false;
3553 if (!qdict_haskey(device_opts, "failover_pair_id")) {
3554 return false;
3557 if (!qdict_haskey(device_opts, "id")) {
3558 error_setg(errp, "Device with failover_pair_id needs to have id");
3559 return false;
3562 standby_id = qdict_get_str(device_opts, "failover_pair_id");
3563 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3564 return false;
3568 * The hide helper can be called several times for a given device.
3569 * Check there is only one primary for a virtio-net device but
3570 * don't duplicate the qdict several times if it's called for the same
3571 * device.
3573 if (n->primary_opts) {
3574 const char *old, *new;
3575 /* devices with failover_pair_id always have an id */
3576 old = qdict_get_str(n->primary_opts, "id");
3577 new = qdict_get_str(device_opts, "id");
3578 if (strcmp(old, new) != 0) {
3579 error_setg(errp, "Cannot attach more than one primary device to "
3580 "'%s': '%s' and '%s'", n->netclient_name, old, new);
3581 return false;
3583 } else {
3584 n->primary_opts = qdict_clone_shallow(device_opts);
3585 n->primary_opts_from_json = from_json;
3588 /* failover_primary_hidden is set during feature negotiation */
3589 return qatomic_read(&n->failover_primary_hidden);
3592 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3594 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3595 VirtIONet *n = VIRTIO_NET(dev);
3596 NetClientState *nc;
3597 int i;
3599 if (n->net_conf.mtu) {
3600 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3603 if (n->net_conf.duplex_str) {
3604 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3605 n->net_conf.duplex = DUPLEX_HALF;
3606 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3607 n->net_conf.duplex = DUPLEX_FULL;
3608 } else {
3609 error_setg(errp, "'duplex' must be 'half' or 'full'");
3610 return;
3612 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3613 } else {
3614 n->net_conf.duplex = DUPLEX_UNKNOWN;
3617 if (n->net_conf.speed < SPEED_UNKNOWN) {
3618 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3619 return;
3621 if (n->net_conf.speed >= 0) {
3622 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3625 if (n->failover) {
3626 n->primary_listener.hide_device = failover_hide_primary_device;
3627 qatomic_set(&n->failover_primary_hidden, true);
3628 device_listener_register(&n->primary_listener);
3629 n->migration_state.notify = virtio_net_migration_state_notifier;
3630 add_migration_state_change_notifier(&n->migration_state);
3631 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3634 virtio_net_set_config_size(n, n->host_features);
3635 virtio_init(vdev, VIRTIO_ID_NET, n->config_size);
3638 * We set a lower limit on RX queue size to what it always was.
3639 * Guests that want a smaller ring can always resize it without
3640 * help from us (using virtio 1 and up).
3642 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3643 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3644 !is_power_of_2(n->net_conf.rx_queue_size)) {
3645 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3646 "must be a power of 2 between %d and %d.",
3647 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3648 VIRTQUEUE_MAX_SIZE);
3649 virtio_cleanup(vdev);
3650 return;
3653 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3654 n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) ||
3655 !is_power_of_2(n->net_conf.tx_queue_size)) {
3656 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3657 "must be a power of 2 between %d and %d",
3658 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3659 virtio_net_max_tx_queue_size(n));
3660 virtio_cleanup(vdev);
3661 return;
3664 n->max_ncs = MAX(n->nic_conf.peers.queues, 1);
3667 * Figure out the datapath queue pairs since the backend could
3668 * provide control queue via peers as well.
3670 if (n->nic_conf.peers.queues) {
3671 for (i = 0; i < n->max_ncs; i++) {
3672 if (n->nic_conf.peers.ncs[i]->is_datapath) {
3673 ++n->max_queue_pairs;
3677 n->max_queue_pairs = MAX(n->max_queue_pairs, 1);
3679 if (n->max_queue_pairs * 2 + 1 > VIRTIO_QUEUE_MAX) {
3680 error_setg(errp, "Invalid number of queue pairs (= %" PRIu32 "), "
3681 "must be a positive integer less than %d.",
3682 n->max_queue_pairs, (VIRTIO_QUEUE_MAX - 1) / 2);
3683 virtio_cleanup(vdev);
3684 return;
3686 n->vqs = g_new0(VirtIONetQueue, n->max_queue_pairs);
3687 n->curr_queue_pairs = 1;
3688 n->tx_timeout = n->net_conf.txtimer;
3690 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3691 && strcmp(n->net_conf.tx, "bh")) {
3692 warn_report("virtio-net: "
3693 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3694 n->net_conf.tx);
3695 error_printf("Defaulting to \"bh\"");
3698 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3699 n->net_conf.tx_queue_size);
3701 for (i = 0; i < n->max_queue_pairs; i++) {
3702 virtio_net_add_queue(n, i);
3705 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3706 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3707 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3708 n->status = VIRTIO_NET_S_LINK_UP;
3709 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3710 QEMU_CLOCK_VIRTUAL,
3711 virtio_net_announce_timer, n);
3712 n->announce_timer.round = 0;
3714 if (n->netclient_type) {
3716 * Happen when virtio_net_set_netclient_name has been called.
3718 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3719 n->netclient_type, n->netclient_name, n);
3720 } else {
3721 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3722 object_get_typename(OBJECT(dev)), dev->id, n);
3725 for (i = 0; i < n->max_queue_pairs; i++) {
3726 n->nic->ncs[i].do_not_pad = true;
3729 peer_test_vnet_hdr(n);
3730 if (peer_has_vnet_hdr(n)) {
3731 for (i = 0; i < n->max_queue_pairs; i++) {
3732 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3734 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3735 } else {
3736 n->host_hdr_len = 0;
3739 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3741 n->vqs[0].tx_waiting = 0;
3742 n->tx_burst = n->net_conf.txburst;
3743 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3744 n->promisc = 1; /* for compatibility */
3746 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3748 n->vlans = g_malloc0(MAX_VLAN >> 3);
3750 nc = qemu_get_queue(n->nic);
3751 nc->rxfilter_notify_enabled = 1;
3753 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3754 struct virtio_net_config netcfg = {};
3755 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3756 vhost_net_set_config(get_vhost_net(nc->peer),
3757 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_FRONTEND);
3759 QTAILQ_INIT(&n->rsc_chains);
3760 n->qdev = dev;
3762 net_rx_pkt_init(&n->rx_pkt);
3764 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3765 virtio_net_load_ebpf(n);
3769 static void virtio_net_device_unrealize(DeviceState *dev)
3771 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3772 VirtIONet *n = VIRTIO_NET(dev);
3773 int i, max_queue_pairs;
3775 if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
3776 virtio_net_unload_ebpf(n);
3779 /* This will stop vhost backend if appropriate. */
3780 virtio_net_set_status(vdev, 0);
3782 g_free(n->netclient_name);
3783 n->netclient_name = NULL;
3784 g_free(n->netclient_type);
3785 n->netclient_type = NULL;
3787 g_free(n->mac_table.macs);
3788 g_free(n->vlans);
3790 if (n->failover) {
3791 qobject_unref(n->primary_opts);
3792 device_listener_unregister(&n->primary_listener);
3793 remove_migration_state_change_notifier(&n->migration_state);
3794 } else {
3795 assert(n->primary_opts == NULL);
3798 max_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
3799 for (i = 0; i < max_queue_pairs; i++) {
3800 virtio_net_del_queue(n, i);
3802 /* delete also control vq */
3803 virtio_del_queue(vdev, max_queue_pairs * 2);
3804 qemu_announce_timer_del(&n->announce_timer, false);
3805 g_free(n->vqs);
3806 qemu_del_nic(n->nic);
3807 virtio_net_rsc_cleanup(n);
3808 g_free(n->rss_data.indirections_table);
3809 net_rx_pkt_uninit(n->rx_pkt);
3810 virtio_cleanup(vdev);
3813 static void virtio_net_instance_init(Object *obj)
3815 VirtIONet *n = VIRTIO_NET(obj);
3818 * The default config_size is sizeof(struct virtio_net_config).
3819 * Can be overridden with virtio_net_set_config_size.
3821 n->config_size = sizeof(struct virtio_net_config);
3822 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3823 "bootindex", "/ethernet-phy@0",
3824 DEVICE(n));
3826 ebpf_rss_init(&n->ebpf_rss);
3829 static int virtio_net_pre_save(void *opaque)
3831 VirtIONet *n = opaque;
3833 /* At this point, backend must be stopped, otherwise
3834 * it might keep writing to memory. */
3835 assert(!n->vhost_started);
3837 return 0;
3840 static bool primary_unplug_pending(void *opaque)
3842 DeviceState *dev = opaque;
3843 DeviceState *primary;
3844 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3845 VirtIONet *n = VIRTIO_NET(vdev);
3847 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3848 return false;
3850 primary = failover_find_primary_device(n);
3851 return primary ? primary->pending_deleted_event : false;
3854 static bool dev_unplug_pending(void *opaque)
3856 DeviceState *dev = opaque;
3857 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3859 return vdc->primary_unplug_pending(dev);
3862 static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev)
3864 VirtIONet *n = VIRTIO_NET(vdev);
3865 NetClientState *nc = qemu_get_queue(n->nic);
3866 struct vhost_net *net = get_vhost_net(nc->peer);
3867 return &net->dev;
3870 static const VMStateDescription vmstate_virtio_net = {
3871 .name = "virtio-net",
3872 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3873 .version_id = VIRTIO_NET_VM_VERSION,
3874 .fields = (VMStateField[]) {
3875 VMSTATE_VIRTIO_DEVICE,
3876 VMSTATE_END_OF_LIST()
3878 .pre_save = virtio_net_pre_save,
3879 .dev_unplug_pending = dev_unplug_pending,
3882 static Property virtio_net_properties[] = {
3883 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3884 VIRTIO_NET_F_CSUM, true),
3885 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3886 VIRTIO_NET_F_GUEST_CSUM, true),
3887 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3888 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3889 VIRTIO_NET_F_GUEST_TSO4, true),
3890 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3891 VIRTIO_NET_F_GUEST_TSO6, true),
3892 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3893 VIRTIO_NET_F_GUEST_ECN, true),
3894 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3895 VIRTIO_NET_F_GUEST_UFO, true),
3896 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3897 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3898 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3899 VIRTIO_NET_F_HOST_TSO4, true),
3900 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3901 VIRTIO_NET_F_HOST_TSO6, true),
3902 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3903 VIRTIO_NET_F_HOST_ECN, true),
3904 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3905 VIRTIO_NET_F_HOST_UFO, true),
3906 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3907 VIRTIO_NET_F_MRG_RXBUF, true),
3908 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3909 VIRTIO_NET_F_STATUS, true),
3910 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3911 VIRTIO_NET_F_CTRL_VQ, true),
3912 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3913 VIRTIO_NET_F_CTRL_RX, true),
3914 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3915 VIRTIO_NET_F_CTRL_VLAN, true),
3916 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3917 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3918 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3919 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3920 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3921 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3922 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3923 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3924 VIRTIO_NET_F_RSS, false),
3925 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3926 VIRTIO_NET_F_HASH_REPORT, false),
3927 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3928 VIRTIO_NET_F_RSC_EXT, false),
3929 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3930 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3931 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3932 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3933 TX_TIMER_INTERVAL),
3934 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3935 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3936 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3937 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3938 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3939 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3940 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3941 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3942 true),
3943 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3944 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3945 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3946 DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
3947 VIRTIO_NET_F_GUEST_USO4, true),
3948 DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
3949 VIRTIO_NET_F_GUEST_USO6, true),
3950 DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
3951 VIRTIO_NET_F_HOST_USO, true),
3952 DEFINE_PROP_END_OF_LIST(),
3955 static void virtio_net_class_init(ObjectClass *klass, void *data)
3957 DeviceClass *dc = DEVICE_CLASS(klass);
3958 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3960 device_class_set_props(dc, virtio_net_properties);
3961 dc->vmsd = &vmstate_virtio_net;
3962 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3963 vdc->realize = virtio_net_device_realize;
3964 vdc->unrealize = virtio_net_device_unrealize;
3965 vdc->get_config = virtio_net_get_config;
3966 vdc->set_config = virtio_net_set_config;
3967 vdc->get_features = virtio_net_get_features;
3968 vdc->set_features = virtio_net_set_features;
3969 vdc->bad_features = virtio_net_bad_features;
3970 vdc->reset = virtio_net_reset;
3971 vdc->queue_reset = virtio_net_queue_reset;
3972 vdc->queue_enable = virtio_net_queue_enable;
3973 vdc->set_status = virtio_net_set_status;
3974 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3975 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3976 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3977 vdc->post_load = virtio_net_post_load_virtio;
3978 vdc->vmsd = &vmstate_virtio_net_device;
3979 vdc->primary_unplug_pending = primary_unplug_pending;
3980 vdc->get_vhost = virtio_net_get_vhost;
3981 vdc->toggle_device_iotlb = vhost_toggle_device_iotlb;
3984 static const TypeInfo virtio_net_info = {
3985 .name = TYPE_VIRTIO_NET,
3986 .parent = TYPE_VIRTIO_DEVICE,
3987 .instance_size = sizeof(VirtIONet),
3988 .instance_init = virtio_net_instance_init,
3989 .class_init = virtio_net_class_init,
3992 static void virtio_register_types(void)
3994 type_register_static(&virtio_net_info);
3997 type_init(virtio_register_types)