failover: Remove memory leak
[qemu/kevin.git] / hw / net / virtio-net.c
blob70fa372c0834647773160684e492652f13a7a121
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
48 #define VIRTIO_NET_VM_VERSION 11
50 #define MAC_TABLE_ENTRIES 64
51 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63 #define VIRTIO_NET_TCP_FLAG 0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 /* Purge coalesced packets timer interval, This value affects the performance
77 a lot, and should be tuned carefully, '300000'(300us) is the recommended
78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79 tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 static VirtIOFeature feature_sizes[] = {
93 {.flags = 1ULL << VIRTIO_NET_F_MAC,
94 .end = endof(struct virtio_net_config, mac)},
95 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96 .end = endof(struct virtio_net_config, status)},
97 {.flags = 1ULL << VIRTIO_NET_F_MQ,
98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99 {.flags = 1ULL << VIRTIO_NET_F_MTU,
100 .end = endof(struct virtio_net_config, mtu)},
101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102 .end = endof(struct virtio_net_config, duplex)},
103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104 .end = endof(struct virtio_net_config, supported_hash_types)},
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
110 VirtIONet *n = qemu_get_nic_opaque(nc);
112 return &n->vqs[nc->queue_index];
115 static int vq2q(int queue_index)
117 return queue_index / 2;
120 /* TODO
121 * - we could suppress RX interrupt if we were so inclined.
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
126 VirtIONet *n = VIRTIO_NET(vdev);
127 struct virtio_net_config netcfg;
128 NetClientState *nc = qemu_get_queue(n->nic);
130 int ret = 0;
131 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132 virtio_stw_p(vdev, &netcfg.status, n->status);
133 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135 memcpy(netcfg.mac, n->mac, ETH_ALEN);
136 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137 netcfg.duplex = n->net_conf.duplex;
138 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142 virtio_stl_p(vdev, &netcfg.supported_hash_types,
143 VIRTIO_NET_RSS_SUPPORTED_HASHES);
144 memcpy(config, &netcfg, n->config_size);
147 * Is this VDPA? No peer means not VDPA: there's no way to
148 * disconnect/reconnect a VDPA peer.
150 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152 n->config_size);
153 if (ret != -1) {
154 memcpy(config, &netcfg, n->config_size);
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
161 VirtIONet *n = VIRTIO_NET(vdev);
162 struct virtio_net_config netcfg = {};
163 NetClientState *nc = qemu_get_queue(n->nic);
165 memcpy(&netcfg, config, n->config_size);
167 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170 memcpy(n->mac, netcfg.mac, ETH_ALEN);
171 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
175 * Is this VDPA? No peer means not VDPA: there's no way to
176 * disconnect/reconnect a VDPA peer.
178 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179 vhost_net_set_config(get_vhost_net(nc->peer),
180 (uint8_t *)&netcfg, 0, n->config_size,
181 VHOST_SET_CONFIG_TYPE_MASTER);
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
187 VirtIODevice *vdev = VIRTIO_DEVICE(n);
188 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
192 static void virtio_net_announce_notify(VirtIONet *net)
194 VirtIODevice *vdev = VIRTIO_DEVICE(net);
195 trace_virtio_net_announce_notify();
197 net->status |= VIRTIO_NET_S_ANNOUNCE;
198 virtio_notify_config(vdev);
201 static void virtio_net_announce_timer(void *opaque)
203 VirtIONet *n = opaque;
204 trace_virtio_net_announce_timer(n->announce_timer.round);
206 n->announce_timer.round--;
207 virtio_net_announce_notify(n);
210 static void virtio_net_announce(NetClientState *nc)
212 VirtIONet *n = qemu_get_nic_opaque(nc);
213 VirtIODevice *vdev = VIRTIO_DEVICE(n);
216 * Make sure the virtio migration announcement timer isn't running
217 * If it is, let it trigger announcement so that we do not cause
218 * confusion.
220 if (n->announce_timer.round) {
221 return;
224 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226 virtio_net_announce_notify(n);
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
232 VirtIODevice *vdev = VIRTIO_DEVICE(n);
233 NetClientState *nc = qemu_get_queue(n->nic);
234 int queues = n->multiqueue ? n->max_queues : 1;
236 if (!get_vhost_net(nc->peer)) {
237 return;
240 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241 !!n->vhost_started) {
242 return;
244 if (!n->vhost_started) {
245 int r, i;
247 if (n->needs_vnet_hdr_swap) {
248 error_report("backend does not support %s vnet headers; "
249 "falling back on userspace virtio",
250 virtio_is_big_endian(vdev) ? "BE" : "LE");
251 return;
254 /* Any packets outstanding? Purge them to avoid touching rings
255 * when vhost is running.
257 for (i = 0; i < queues; i++) {
258 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
260 /* Purge both directions: TX and RX. */
261 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
265 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267 if (r < 0) {
268 error_report("%uBytes MTU not supported by the backend",
269 n->net_conf.mtu);
271 return;
275 n->vhost_started = 1;
276 r = vhost_net_start(vdev, n->nic->ncs, queues);
277 if (r < 0) {
278 error_report("unable to start vhost net: %d: "
279 "falling back on userspace virtio", -r);
280 n->vhost_started = 0;
282 } else {
283 vhost_net_stop(vdev, n->nic->ncs, queues);
284 n->vhost_started = 0;
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289 NetClientState *peer,
290 bool enable)
292 if (virtio_is_big_endian(vdev)) {
293 return qemu_set_vnet_be(peer, enable);
294 } else {
295 return qemu_set_vnet_le(peer, enable);
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300 int queues, bool enable)
302 int i;
304 for (i = 0; i < queues; i++) {
305 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306 enable) {
307 while (--i >= 0) {
308 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
311 return true;
315 return false;
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
320 VirtIODevice *vdev = VIRTIO_DEVICE(n);
321 int queues = n->multiqueue ? n->max_queues : 1;
323 if (virtio_net_started(n, status)) {
324 /* Before using the device, we tell the network backend about the
325 * endianness to use when parsing vnet headers. If the backend
326 * can't do it, we fallback onto fixing the headers in the core
327 * virtio-net code.
329 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330 queues, true);
331 } else if (virtio_net_started(n, vdev->status)) {
332 /* After using the device, we need to reset the network backend to
333 * the default (guest native endianness), otherwise the guest may
334 * lose network connectivity if it is rebooted into a different
335 * endianness.
337 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
343 unsigned int dropped = virtqueue_drop_all(vq);
344 if (dropped) {
345 virtio_notify(vdev, vq);
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
351 VirtIONet *n = VIRTIO_NET(vdev);
352 VirtIONetQueue *q;
353 int i;
354 uint8_t queue_status;
356 virtio_net_vnet_endian_status(n, status);
357 virtio_net_vhost_status(n, status);
359 for (i = 0; i < n->max_queues; i++) {
360 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361 bool queue_started;
362 q = &n->vqs[i];
364 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365 queue_status = 0;
366 } else {
367 queue_status = status;
369 queue_started =
370 virtio_net_started(n, queue_status) && !n->vhost_started;
372 if (queue_started) {
373 qemu_flush_queued_packets(ncs);
376 if (!q->tx_waiting) {
377 continue;
380 if (queue_started) {
381 if (q->tx_timer) {
382 timer_mod(q->tx_timer,
383 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384 } else {
385 qemu_bh_schedule(q->tx_bh);
387 } else {
388 if (q->tx_timer) {
389 timer_del(q->tx_timer);
390 } else {
391 qemu_bh_cancel(q->tx_bh);
393 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395 vdev->vm_running) {
396 /* if tx is waiting we are likely have some packets in tx queue
397 * and disabled notification */
398 q->tx_waiting = 0;
399 virtio_queue_set_notification(q->tx_vq, 1);
400 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
406 static void virtio_net_set_link_status(NetClientState *nc)
408 VirtIONet *n = qemu_get_nic_opaque(nc);
409 VirtIODevice *vdev = VIRTIO_DEVICE(n);
410 uint16_t old_status = n->status;
412 if (nc->link_down)
413 n->status &= ~VIRTIO_NET_S_LINK_UP;
414 else
415 n->status |= VIRTIO_NET_S_LINK_UP;
417 if (n->status != old_status)
418 virtio_notify_config(vdev);
420 virtio_net_set_status(vdev, vdev->status);
423 static void rxfilter_notify(NetClientState *nc)
425 VirtIONet *n = qemu_get_nic_opaque(nc);
427 if (nc->rxfilter_notify_enabled) {
428 char *path = object_get_canonical_path(OBJECT(n->qdev));
429 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430 n->netclient_name, path);
431 g_free(path);
433 /* disable event notification to avoid events flooding */
434 nc->rxfilter_notify_enabled = 0;
438 static intList *get_vlan_table(VirtIONet *n)
440 intList *list, *entry;
441 int i, j;
443 list = NULL;
444 for (i = 0; i < MAX_VLAN >> 5; i++) {
445 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446 if (n->vlans[i] & (1U << j)) {
447 entry = g_malloc0(sizeof(*entry));
448 entry->value = (i << 5) + j;
449 entry->next = list;
450 list = entry;
455 return list;
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
460 VirtIONet *n = qemu_get_nic_opaque(nc);
461 VirtIODevice *vdev = VIRTIO_DEVICE(n);
462 RxFilterInfo *info;
463 strList *str_list, *entry;
464 int i;
466 info = g_malloc0(sizeof(*info));
467 info->name = g_strdup(nc->name);
468 info->promiscuous = n->promisc;
470 if (n->nouni) {
471 info->unicast = RX_STATE_NONE;
472 } else if (n->alluni) {
473 info->unicast = RX_STATE_ALL;
474 } else {
475 info->unicast = RX_STATE_NORMAL;
478 if (n->nomulti) {
479 info->multicast = RX_STATE_NONE;
480 } else if (n->allmulti) {
481 info->multicast = RX_STATE_ALL;
482 } else {
483 info->multicast = RX_STATE_NORMAL;
486 info->broadcast_allowed = n->nobcast;
487 info->multicast_overflow = n->mac_table.multi_overflow;
488 info->unicast_overflow = n->mac_table.uni_overflow;
490 info->main_mac = qemu_mac_strdup_printf(n->mac);
492 str_list = NULL;
493 for (i = 0; i < n->mac_table.first_multi; i++) {
494 entry = g_malloc0(sizeof(*entry));
495 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496 entry->next = str_list;
497 str_list = entry;
499 info->unicast_table = str_list;
501 str_list = NULL;
502 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503 entry = g_malloc0(sizeof(*entry));
504 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505 entry->next = str_list;
506 str_list = entry;
508 info->multicast_table = str_list;
509 info->vlan_table = get_vlan_table(n);
511 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512 info->vlan = RX_STATE_ALL;
513 } else if (!info->vlan_table) {
514 info->vlan = RX_STATE_NONE;
515 } else {
516 info->vlan = RX_STATE_NORMAL;
519 /* enable event notification after query */
520 nc->rxfilter_notify_enabled = 1;
522 return info;
525 static void virtio_net_reset(VirtIODevice *vdev)
527 VirtIONet *n = VIRTIO_NET(vdev);
528 int i;
530 /* Reset back to compatibility mode */
531 n->promisc = 1;
532 n->allmulti = 0;
533 n->alluni = 0;
534 n->nomulti = 0;
535 n->nouni = 0;
536 n->nobcast = 0;
537 /* multiqueue is disabled by default */
538 n->curr_queues = 1;
539 timer_del(n->announce_timer.tm);
540 n->announce_timer.round = 0;
541 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
543 /* Flush any MAC and VLAN filter table state */
544 n->mac_table.in_use = 0;
545 n->mac_table.first_multi = 0;
546 n->mac_table.multi_overflow = 0;
547 n->mac_table.uni_overflow = 0;
548 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551 memset(n->vlans, 0, MAX_VLAN >> 3);
553 /* Flush any async TX */
554 for (i = 0; i < n->max_queues; i++) {
555 NetClientState *nc = qemu_get_subqueue(n->nic, i);
557 if (nc->peer) {
558 qemu_flush_or_purge_queued_packets(nc->peer, true);
559 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
564 static void peer_test_vnet_hdr(VirtIONet *n)
566 NetClientState *nc = qemu_get_queue(n->nic);
567 if (!nc->peer) {
568 return;
571 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
574 static int peer_has_vnet_hdr(VirtIONet *n)
576 return n->has_vnet_hdr;
579 static int peer_has_ufo(VirtIONet *n)
581 if (!peer_has_vnet_hdr(n))
582 return 0;
584 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
586 return n->has_ufo;
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590 int version_1, int hash_report)
592 int i;
593 NetClientState *nc;
595 n->mergeable_rx_bufs = mergeable_rx_bufs;
597 if (version_1) {
598 n->guest_hdr_len = hash_report ?
599 sizeof(struct virtio_net_hdr_v1_hash) :
600 sizeof(struct virtio_net_hdr_mrg_rxbuf);
601 n->rss_data.populate_hash = !!hash_report;
602 } else {
603 n->guest_hdr_len = n->mergeable_rx_bufs ?
604 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605 sizeof(struct virtio_net_hdr);
608 for (i = 0; i < n->max_queues; i++) {
609 nc = qemu_get_subqueue(n->nic, i);
611 if (peer_has_vnet_hdr(n) &&
612 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614 n->host_hdr_len = n->guest_hdr_len;
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
621 NetClientState *peer = n->nic_conf.peers.ncs[0];
624 * Backends other than vhost-user don't support max queue size.
626 if (!peer) {
627 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
630 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
634 return VIRTQUEUE_MAX_SIZE;
637 static int peer_attach(VirtIONet *n, int index)
639 NetClientState *nc = qemu_get_subqueue(n->nic, index);
641 if (!nc->peer) {
642 return 0;
645 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646 vhost_set_vring_enable(nc->peer, 1);
649 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650 return 0;
653 if (n->max_queues == 1) {
654 return 0;
657 return tap_enable(nc->peer);
660 static int peer_detach(VirtIONet *n, int index)
662 NetClientState *nc = qemu_get_subqueue(n->nic, index);
664 if (!nc->peer) {
665 return 0;
668 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669 vhost_set_vring_enable(nc->peer, 0);
672 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
673 return 0;
676 return tap_disable(nc->peer);
679 static void virtio_net_set_queues(VirtIONet *n)
681 int i;
682 int r;
684 if (n->nic->peer_deleted) {
685 return;
688 for (i = 0; i < n->max_queues; i++) {
689 if (i < n->curr_queues) {
690 r = peer_attach(n, i);
691 assert(!r);
692 } else {
693 r = peer_detach(n, i);
694 assert(!r);
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702 Error **errp)
704 VirtIONet *n = VIRTIO_NET(vdev);
705 NetClientState *nc = qemu_get_queue(n->nic);
707 /* Firstly sync all virtio-net possible supported features */
708 features |= n->host_features;
710 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
712 if (!peer_has_vnet_hdr(n)) {
713 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
718 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
723 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
726 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
731 if (!get_vhost_net(nc->peer)) {
732 return features;
735 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738 vdev->backend_features = features;
740 if (n->mtu_bypass_backend &&
741 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742 features |= (1ULL << VIRTIO_NET_F_MTU);
745 return features;
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
750 uint64_t features = 0;
752 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
753 * but also these: */
754 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
760 return features;
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
765 qemu_set_offload(qemu_get_queue(n->nic)->peer,
766 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
775 static const uint64_t guest_offloads_mask =
776 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
780 (1ULL << VIRTIO_NET_F_GUEST_UFO);
782 return guest_offloads_mask & features;
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
787 VirtIODevice *vdev = VIRTIO_DEVICE(n);
788 return virtio_net_guest_offloads_by_features(vdev->guest_features);
791 static void failover_add_primary(VirtIONet *n, Error **errp)
793 Error *err = NULL;
794 QemuOpts *opts;
796 if (n->primary_dev) {
797 return;
800 opts = qemu_opts_find(qemu_find_opts("device"), n->primary_device_id);
801 if (opts) {
802 n->primary_dev = qdev_device_add(opts, &err);
803 if (err) {
804 qemu_opts_del(opts);
806 } else {
807 error_setg(errp, "Primary device not found");
808 error_append_hint(errp, "Virtio-net failover will not work. Make "
809 "sure primary device has parameter"
810 " failover_pair_id=<virtio-net-id>\n");
812 error_propagate(errp, err);
815 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
817 VirtIONet *n = opaque;
818 int ret = 0;
819 const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
821 if (g_strcmp0(standby_id, n->netclient_name) == 0) {
822 n->primary_device_id = g_strdup(opts->id);
823 ret = 1;
826 return ret;
829 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
831 DeviceState *dev = NULL;
832 Error *err = NULL;
834 if (qemu_opts_foreach(qemu_find_opts("device"),
835 is_my_primary, n, &err)) {
836 if (err) {
837 error_propagate(errp, err);
838 return NULL;
840 if (n->primary_device_id) {
841 dev = qdev_find_recursive(sysbus_get_default(),
842 n->primary_device_id);
843 } else {
844 error_setg(errp, "Primary device id not found");
845 return NULL;
848 return dev;
851 static DeviceState *virtio_connect_failover_devices(VirtIONet *n, Error **errp)
853 DeviceState *prim_dev = NULL;
854 Error *err = NULL;
856 prim_dev = virtio_net_find_primary(n, &err);
857 if (!prim_dev) {
858 error_propagate(errp, err);
861 return prim_dev;
864 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
866 VirtIONet *n = VIRTIO_NET(vdev);
867 Error *err = NULL;
868 int i;
870 if (n->mtu_bypass_backend &&
871 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
872 features &= ~(1ULL << VIRTIO_NET_F_MTU);
875 virtio_net_set_multiqueue(n,
876 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
877 virtio_has_feature(features, VIRTIO_NET_F_MQ));
879 virtio_net_set_mrg_rx_bufs(n,
880 virtio_has_feature(features,
881 VIRTIO_NET_F_MRG_RXBUF),
882 virtio_has_feature(features,
883 VIRTIO_F_VERSION_1),
884 virtio_has_feature(features,
885 VIRTIO_NET_F_HASH_REPORT));
887 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
888 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
889 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
890 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
891 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
893 if (n->has_vnet_hdr) {
894 n->curr_guest_offloads =
895 virtio_net_guest_offloads_by_features(features);
896 virtio_net_apply_guest_offloads(n);
899 for (i = 0; i < n->max_queues; i++) {
900 NetClientState *nc = qemu_get_subqueue(n->nic, i);
902 if (!get_vhost_net(nc->peer)) {
903 continue;
905 vhost_net_ack_features(get_vhost_net(nc->peer), features);
908 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
909 memset(n->vlans, 0, MAX_VLAN >> 3);
910 } else {
911 memset(n->vlans, 0xff, MAX_VLAN >> 3);
914 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
915 qapi_event_send_failover_negotiated(n->netclient_name);
916 qatomic_set(&n->failover_primary_hidden, false);
917 failover_add_primary(n, &err);
918 if (err) {
919 n->primary_dev = virtio_connect_failover_devices(n, &err);
920 if (err) {
921 goto out_err;
923 failover_add_primary(n, &err);
924 if (err) {
925 goto out_err;
929 return;
931 out_err:
932 if (err) {
933 warn_report_err(err);
937 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
938 struct iovec *iov, unsigned int iov_cnt)
940 uint8_t on;
941 size_t s;
942 NetClientState *nc = qemu_get_queue(n->nic);
944 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
945 if (s != sizeof(on)) {
946 return VIRTIO_NET_ERR;
949 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
950 n->promisc = on;
951 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
952 n->allmulti = on;
953 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
954 n->alluni = on;
955 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
956 n->nomulti = on;
957 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
958 n->nouni = on;
959 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
960 n->nobcast = on;
961 } else {
962 return VIRTIO_NET_ERR;
965 rxfilter_notify(nc);
967 return VIRTIO_NET_OK;
970 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
971 struct iovec *iov, unsigned int iov_cnt)
973 VirtIODevice *vdev = VIRTIO_DEVICE(n);
974 uint64_t offloads;
975 size_t s;
977 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
978 return VIRTIO_NET_ERR;
981 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
982 if (s != sizeof(offloads)) {
983 return VIRTIO_NET_ERR;
986 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
987 uint64_t supported_offloads;
989 offloads = virtio_ldq_p(vdev, &offloads);
991 if (!n->has_vnet_hdr) {
992 return VIRTIO_NET_ERR;
995 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
996 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
997 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
998 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
999 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1001 supported_offloads = virtio_net_supported_guest_offloads(n);
1002 if (offloads & ~supported_offloads) {
1003 return VIRTIO_NET_ERR;
1006 n->curr_guest_offloads = offloads;
1007 virtio_net_apply_guest_offloads(n);
1009 return VIRTIO_NET_OK;
1010 } else {
1011 return VIRTIO_NET_ERR;
1015 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1016 struct iovec *iov, unsigned int iov_cnt)
1018 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1019 struct virtio_net_ctrl_mac mac_data;
1020 size_t s;
1021 NetClientState *nc = qemu_get_queue(n->nic);
1023 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1024 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1025 return VIRTIO_NET_ERR;
1027 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1028 assert(s == sizeof(n->mac));
1029 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1030 rxfilter_notify(nc);
1032 return VIRTIO_NET_OK;
1035 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1036 return VIRTIO_NET_ERR;
1039 int in_use = 0;
1040 int first_multi = 0;
1041 uint8_t uni_overflow = 0;
1042 uint8_t multi_overflow = 0;
1043 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1045 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1046 sizeof(mac_data.entries));
1047 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1048 if (s != sizeof(mac_data.entries)) {
1049 goto error;
1051 iov_discard_front(&iov, &iov_cnt, s);
1053 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1054 goto error;
1057 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1058 s = iov_to_buf(iov, iov_cnt, 0, macs,
1059 mac_data.entries * ETH_ALEN);
1060 if (s != mac_data.entries * ETH_ALEN) {
1061 goto error;
1063 in_use += mac_data.entries;
1064 } else {
1065 uni_overflow = 1;
1068 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1070 first_multi = in_use;
1072 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1073 sizeof(mac_data.entries));
1074 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1075 if (s != sizeof(mac_data.entries)) {
1076 goto error;
1079 iov_discard_front(&iov, &iov_cnt, s);
1081 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1082 goto error;
1085 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1086 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1087 mac_data.entries * ETH_ALEN);
1088 if (s != mac_data.entries * ETH_ALEN) {
1089 goto error;
1091 in_use += mac_data.entries;
1092 } else {
1093 multi_overflow = 1;
1096 n->mac_table.in_use = in_use;
1097 n->mac_table.first_multi = first_multi;
1098 n->mac_table.uni_overflow = uni_overflow;
1099 n->mac_table.multi_overflow = multi_overflow;
1100 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1101 g_free(macs);
1102 rxfilter_notify(nc);
1104 return VIRTIO_NET_OK;
1106 error:
1107 g_free(macs);
1108 return VIRTIO_NET_ERR;
1111 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1112 struct iovec *iov, unsigned int iov_cnt)
1114 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1115 uint16_t vid;
1116 size_t s;
1117 NetClientState *nc = qemu_get_queue(n->nic);
1119 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1120 vid = virtio_lduw_p(vdev, &vid);
1121 if (s != sizeof(vid)) {
1122 return VIRTIO_NET_ERR;
1125 if (vid >= MAX_VLAN)
1126 return VIRTIO_NET_ERR;
1128 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1129 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1130 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1131 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1132 else
1133 return VIRTIO_NET_ERR;
1135 rxfilter_notify(nc);
1137 return VIRTIO_NET_OK;
1140 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1141 struct iovec *iov, unsigned int iov_cnt)
1143 trace_virtio_net_handle_announce(n->announce_timer.round);
1144 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1145 n->status & VIRTIO_NET_S_ANNOUNCE) {
1146 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1147 if (n->announce_timer.round) {
1148 qemu_announce_timer_step(&n->announce_timer);
1150 return VIRTIO_NET_OK;
1151 } else {
1152 return VIRTIO_NET_ERR;
1156 static void virtio_net_disable_rss(VirtIONet *n)
1158 if (n->rss_data.enabled) {
1159 trace_virtio_net_rss_disable();
1161 n->rss_data.enabled = false;
1164 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1165 struct iovec *iov,
1166 unsigned int iov_cnt,
1167 bool do_rss)
1169 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1170 struct virtio_net_rss_config cfg;
1171 size_t s, offset = 0, size_get;
1172 uint16_t queues, i;
1173 struct {
1174 uint16_t us;
1175 uint8_t b;
1176 } QEMU_PACKED temp;
1177 const char *err_msg = "";
1178 uint32_t err_value = 0;
1180 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1181 err_msg = "RSS is not negotiated";
1182 goto error;
1184 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1185 err_msg = "Hash report is not negotiated";
1186 goto error;
1188 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1189 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1190 if (s != size_get) {
1191 err_msg = "Short command buffer";
1192 err_value = (uint32_t)s;
1193 goto error;
1195 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1196 n->rss_data.indirections_len =
1197 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1198 n->rss_data.indirections_len++;
1199 if (!do_rss) {
1200 n->rss_data.indirections_len = 1;
1202 if (!is_power_of_2(n->rss_data.indirections_len)) {
1203 err_msg = "Invalid size of indirection table";
1204 err_value = n->rss_data.indirections_len;
1205 goto error;
1207 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1208 err_msg = "Too large indirection table";
1209 err_value = n->rss_data.indirections_len;
1210 goto error;
1212 n->rss_data.default_queue = do_rss ?
1213 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1214 if (n->rss_data.default_queue >= n->max_queues) {
1215 err_msg = "Invalid default queue";
1216 err_value = n->rss_data.default_queue;
1217 goto error;
1219 offset += size_get;
1220 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1221 g_free(n->rss_data.indirections_table);
1222 n->rss_data.indirections_table = g_malloc(size_get);
1223 if (!n->rss_data.indirections_table) {
1224 err_msg = "Can't allocate indirections table";
1225 err_value = n->rss_data.indirections_len;
1226 goto error;
1228 s = iov_to_buf(iov, iov_cnt, offset,
1229 n->rss_data.indirections_table, size_get);
1230 if (s != size_get) {
1231 err_msg = "Short indirection table buffer";
1232 err_value = (uint32_t)s;
1233 goto error;
1235 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1236 uint16_t val = n->rss_data.indirections_table[i];
1237 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1239 offset += size_get;
1240 size_get = sizeof(temp);
1241 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1242 if (s != size_get) {
1243 err_msg = "Can't get queues";
1244 err_value = (uint32_t)s;
1245 goto error;
1247 queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1248 if (queues == 0 || queues > n->max_queues) {
1249 err_msg = "Invalid number of queues";
1250 err_value = queues;
1251 goto error;
1253 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1254 err_msg = "Invalid key size";
1255 err_value = temp.b;
1256 goto error;
1258 if (!temp.b && n->rss_data.hash_types) {
1259 err_msg = "No key provided";
1260 err_value = 0;
1261 goto error;
1263 if (!temp.b && !n->rss_data.hash_types) {
1264 virtio_net_disable_rss(n);
1265 return queues;
1267 offset += size_get;
1268 size_get = temp.b;
1269 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1270 if (s != size_get) {
1271 err_msg = "Can get key buffer";
1272 err_value = (uint32_t)s;
1273 goto error;
1275 n->rss_data.enabled = true;
1276 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1277 n->rss_data.indirections_len,
1278 temp.b);
1279 return queues;
1280 error:
1281 trace_virtio_net_rss_error(err_msg, err_value);
1282 virtio_net_disable_rss(n);
1283 return 0;
1286 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1287 struct iovec *iov, unsigned int iov_cnt)
1289 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1290 uint16_t queues;
1292 virtio_net_disable_rss(n);
1293 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1294 queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1295 return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1297 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1298 queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1299 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1300 struct virtio_net_ctrl_mq mq;
1301 size_t s;
1302 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1303 return VIRTIO_NET_ERR;
1305 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1306 if (s != sizeof(mq)) {
1307 return VIRTIO_NET_ERR;
1309 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1311 } else {
1312 return VIRTIO_NET_ERR;
1315 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1316 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1317 queues > n->max_queues ||
1318 !n->multiqueue) {
1319 return VIRTIO_NET_ERR;
1322 n->curr_queues = queues;
1323 /* stop the backend before changing the number of queues to avoid handling a
1324 * disabled queue */
1325 virtio_net_set_status(vdev, vdev->status);
1326 virtio_net_set_queues(n);
1328 return VIRTIO_NET_OK;
1331 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1333 VirtIONet *n = VIRTIO_NET(vdev);
1334 struct virtio_net_ctrl_hdr ctrl;
1335 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1336 VirtQueueElement *elem;
1337 size_t s;
1338 struct iovec *iov, *iov2;
1339 unsigned int iov_cnt;
1341 for (;;) {
1342 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1343 if (!elem) {
1344 break;
1346 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1347 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1348 virtio_error(vdev, "virtio-net ctrl missing headers");
1349 virtqueue_detach_element(vq, elem, 0);
1350 g_free(elem);
1351 break;
1354 iov_cnt = elem->out_num;
1355 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1356 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1357 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1358 if (s != sizeof(ctrl)) {
1359 status = VIRTIO_NET_ERR;
1360 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1361 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1362 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1363 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1364 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1365 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1366 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1367 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1368 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1369 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1370 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1371 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1374 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1375 assert(s == sizeof(status));
1377 virtqueue_push(vq, elem, sizeof(status));
1378 virtio_notify(vdev, vq);
1379 g_free(iov2);
1380 g_free(elem);
1384 /* RX */
1386 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1388 VirtIONet *n = VIRTIO_NET(vdev);
1389 int queue_index = vq2q(virtio_get_queue_index(vq));
1391 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1394 static bool virtio_net_can_receive(NetClientState *nc)
1396 VirtIONet *n = qemu_get_nic_opaque(nc);
1397 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1398 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1400 if (!vdev->vm_running) {
1401 return false;
1404 if (nc->queue_index >= n->curr_queues) {
1405 return false;
1408 if (!virtio_queue_ready(q->rx_vq) ||
1409 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1410 return false;
1413 return true;
1416 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1418 VirtIONet *n = q->n;
1419 if (virtio_queue_empty(q->rx_vq) ||
1420 (n->mergeable_rx_bufs &&
1421 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1422 virtio_queue_set_notification(q->rx_vq, 1);
1424 /* To avoid a race condition where the guest has made some buffers
1425 * available after the above check but before notification was
1426 * enabled, check for available buffers again.
1428 if (virtio_queue_empty(q->rx_vq) ||
1429 (n->mergeable_rx_bufs &&
1430 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1431 return 0;
1435 virtio_queue_set_notification(q->rx_vq, 0);
1436 return 1;
1439 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1441 virtio_tswap16s(vdev, &hdr->hdr_len);
1442 virtio_tswap16s(vdev, &hdr->gso_size);
1443 virtio_tswap16s(vdev, &hdr->csum_start);
1444 virtio_tswap16s(vdev, &hdr->csum_offset);
1447 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1448 * it never finds out that the packets don't have valid checksums. This
1449 * causes dhclient to get upset. Fedora's carried a patch for ages to
1450 * fix this with Xen but it hasn't appeared in an upstream release of
1451 * dhclient yet.
1453 * To avoid breaking existing guests, we catch udp packets and add
1454 * checksums. This is terrible but it's better than hacking the guest
1455 * kernels.
1457 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1458 * we should provide a mechanism to disable it to avoid polluting the host
1459 * cache.
1461 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1462 uint8_t *buf, size_t size)
1464 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1465 (size > 27 && size < 1500) && /* normal sized MTU */
1466 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1467 (buf[23] == 17) && /* ip.protocol == UDP */
1468 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1469 net_checksum_calculate(buf, size);
1470 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1474 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1475 const void *buf, size_t size)
1477 if (n->has_vnet_hdr) {
1478 /* FIXME this cast is evil */
1479 void *wbuf = (void *)buf;
1480 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1481 size - n->host_hdr_len);
1483 if (n->needs_vnet_hdr_swap) {
1484 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1486 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1487 } else {
1488 struct virtio_net_hdr hdr = {
1489 .flags = 0,
1490 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1492 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1496 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1498 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1499 static const uint8_t vlan[] = {0x81, 0x00};
1500 uint8_t *ptr = (uint8_t *)buf;
1501 int i;
1503 if (n->promisc)
1504 return 1;
1506 ptr += n->host_hdr_len;
1508 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1509 int vid = lduw_be_p(ptr + 14) & 0xfff;
1510 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1511 return 0;
1514 if (ptr[0] & 1) { // multicast
1515 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1516 return !n->nobcast;
1517 } else if (n->nomulti) {
1518 return 0;
1519 } else if (n->allmulti || n->mac_table.multi_overflow) {
1520 return 1;
1523 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1524 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1525 return 1;
1528 } else { // unicast
1529 if (n->nouni) {
1530 return 0;
1531 } else if (n->alluni || n->mac_table.uni_overflow) {
1532 return 1;
1533 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1534 return 1;
1537 for (i = 0; i < n->mac_table.first_multi; i++) {
1538 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1539 return 1;
1544 return 0;
1547 static uint8_t virtio_net_get_hash_type(bool isip4,
1548 bool isip6,
1549 bool isudp,
1550 bool istcp,
1551 uint32_t types)
1553 if (isip4) {
1554 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1555 return NetPktRssIpV4Tcp;
1557 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1558 return NetPktRssIpV4Udp;
1560 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1561 return NetPktRssIpV4;
1563 } else if (isip6) {
1564 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1565 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1567 if (istcp && (types & mask)) {
1568 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1569 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1571 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1572 if (isudp && (types & mask)) {
1573 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1574 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1576 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1577 if (types & mask) {
1578 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1579 NetPktRssIpV6Ex : NetPktRssIpV6;
1582 return 0xff;
1585 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1586 uint32_t hash)
1588 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1589 hdr->hash_value = hash;
1590 hdr->hash_report = report;
1593 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1594 size_t size)
1596 VirtIONet *n = qemu_get_nic_opaque(nc);
1597 unsigned int index = nc->queue_index, new_index = index;
1598 struct NetRxPkt *pkt = n->rx_pkt;
1599 uint8_t net_hash_type;
1600 uint32_t hash;
1601 bool isip4, isip6, isudp, istcp;
1602 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1603 VIRTIO_NET_HASH_REPORT_IPv4,
1604 VIRTIO_NET_HASH_REPORT_TCPv4,
1605 VIRTIO_NET_HASH_REPORT_TCPv6,
1606 VIRTIO_NET_HASH_REPORT_IPv6,
1607 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1608 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1609 VIRTIO_NET_HASH_REPORT_UDPv4,
1610 VIRTIO_NET_HASH_REPORT_UDPv6,
1611 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1614 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1615 size - n->host_hdr_len);
1616 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1617 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1618 istcp = isudp = false;
1620 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1621 istcp = isudp = false;
1623 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1624 n->rss_data.hash_types);
1625 if (net_hash_type > NetPktRssIpV6UdpEx) {
1626 if (n->rss_data.populate_hash) {
1627 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1629 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1632 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1634 if (n->rss_data.populate_hash) {
1635 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1638 if (n->rss_data.redirect) {
1639 new_index = hash & (n->rss_data.indirections_len - 1);
1640 new_index = n->rss_data.indirections_table[new_index];
1643 return (index == new_index) ? -1 : new_index;
1646 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1647 size_t size, bool no_rss)
1649 VirtIONet *n = qemu_get_nic_opaque(nc);
1650 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1651 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1652 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1653 struct virtio_net_hdr_mrg_rxbuf mhdr;
1654 unsigned mhdr_cnt = 0;
1655 size_t offset, i, guest_offset;
1657 if (!virtio_net_can_receive(nc)) {
1658 return -1;
1661 if (!no_rss && n->rss_data.enabled) {
1662 int index = virtio_net_process_rss(nc, buf, size);
1663 if (index >= 0) {
1664 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1665 return virtio_net_receive_rcu(nc2, buf, size, true);
1669 /* hdr_len refers to the header we supply to the guest */
1670 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1671 return 0;
1674 if (!receive_filter(n, buf, size))
1675 return size;
1677 offset = i = 0;
1679 while (offset < size) {
1680 VirtQueueElement *elem;
1681 int len, total;
1682 const struct iovec *sg;
1684 total = 0;
1686 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1687 if (!elem) {
1688 if (i) {
1689 virtio_error(vdev, "virtio-net unexpected empty queue: "
1690 "i %zd mergeable %d offset %zd, size %zd, "
1691 "guest hdr len %zd, host hdr len %zd "
1692 "guest features 0x%" PRIx64,
1693 i, n->mergeable_rx_bufs, offset, size,
1694 n->guest_hdr_len, n->host_hdr_len,
1695 vdev->guest_features);
1697 return -1;
1700 if (elem->in_num < 1) {
1701 virtio_error(vdev,
1702 "virtio-net receive queue contains no in buffers");
1703 virtqueue_detach_element(q->rx_vq, elem, 0);
1704 g_free(elem);
1705 return -1;
1708 sg = elem->in_sg;
1709 if (i == 0) {
1710 assert(offset == 0);
1711 if (n->mergeable_rx_bufs) {
1712 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1713 sg, elem->in_num,
1714 offsetof(typeof(mhdr), num_buffers),
1715 sizeof(mhdr.num_buffers));
1718 receive_header(n, sg, elem->in_num, buf, size);
1719 if (n->rss_data.populate_hash) {
1720 offset = sizeof(mhdr);
1721 iov_from_buf(sg, elem->in_num, offset,
1722 buf + offset, n->host_hdr_len - sizeof(mhdr));
1724 offset = n->host_hdr_len;
1725 total += n->guest_hdr_len;
1726 guest_offset = n->guest_hdr_len;
1727 } else {
1728 guest_offset = 0;
1731 /* copy in packet. ugh */
1732 len = iov_from_buf(sg, elem->in_num, guest_offset,
1733 buf + offset, size - offset);
1734 total += len;
1735 offset += len;
1736 /* If buffers can't be merged, at this point we
1737 * must have consumed the complete packet.
1738 * Otherwise, drop it. */
1739 if (!n->mergeable_rx_bufs && offset < size) {
1740 virtqueue_unpop(q->rx_vq, elem, total);
1741 g_free(elem);
1742 return size;
1745 /* signal other side */
1746 virtqueue_fill(q->rx_vq, elem, total, i++);
1747 g_free(elem);
1750 if (mhdr_cnt) {
1751 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1752 iov_from_buf(mhdr_sg, mhdr_cnt,
1754 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1757 virtqueue_flush(q->rx_vq, i);
1758 virtio_notify(vdev, q->rx_vq);
1760 return size;
1763 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1764 size_t size)
1766 RCU_READ_LOCK_GUARD();
1768 return virtio_net_receive_rcu(nc, buf, size, false);
1771 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1772 const uint8_t *buf,
1773 VirtioNetRscUnit *unit)
1775 uint16_t ip_hdrlen;
1776 struct ip_header *ip;
1778 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1779 + sizeof(struct eth_header));
1780 unit->ip = (void *)ip;
1781 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1782 unit->ip_plen = &ip->ip_len;
1783 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1784 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1785 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1788 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1789 const uint8_t *buf,
1790 VirtioNetRscUnit *unit)
1792 struct ip6_header *ip6;
1794 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1795 + sizeof(struct eth_header));
1796 unit->ip = ip6;
1797 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1798 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1799 + sizeof(struct ip6_header));
1800 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1802 /* There is a difference between payload lenght in ipv4 and v6,
1803 ip header is excluded in ipv6 */
1804 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1807 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1808 VirtioNetRscSeg *seg)
1810 int ret;
1811 struct virtio_net_hdr_v1 *h;
1813 h = (struct virtio_net_hdr_v1 *)seg->buf;
1814 h->flags = 0;
1815 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1817 if (seg->is_coalesced) {
1818 h->rsc.segments = seg->packets;
1819 h->rsc.dup_acks = seg->dup_ack;
1820 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1821 if (chain->proto == ETH_P_IP) {
1822 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1823 } else {
1824 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1828 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1829 QTAILQ_REMOVE(&chain->buffers, seg, next);
1830 g_free(seg->buf);
1831 g_free(seg);
1833 return ret;
1836 static void virtio_net_rsc_purge(void *opq)
1838 VirtioNetRscSeg *seg, *rn;
1839 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1841 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1842 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1843 chain->stat.purge_failed++;
1844 continue;
1848 chain->stat.timer++;
1849 if (!QTAILQ_EMPTY(&chain->buffers)) {
1850 timer_mod(chain->drain_timer,
1851 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1855 static void virtio_net_rsc_cleanup(VirtIONet *n)
1857 VirtioNetRscChain *chain, *rn_chain;
1858 VirtioNetRscSeg *seg, *rn_seg;
1860 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1861 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1862 QTAILQ_REMOVE(&chain->buffers, seg, next);
1863 g_free(seg->buf);
1864 g_free(seg);
1867 timer_del(chain->drain_timer);
1868 timer_free(chain->drain_timer);
1869 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1870 g_free(chain);
1874 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1875 NetClientState *nc,
1876 const uint8_t *buf, size_t size)
1878 uint16_t hdr_len;
1879 VirtioNetRscSeg *seg;
1881 hdr_len = chain->n->guest_hdr_len;
1882 seg = g_malloc(sizeof(VirtioNetRscSeg));
1883 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1884 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1885 memcpy(seg->buf, buf, size);
1886 seg->size = size;
1887 seg->packets = 1;
1888 seg->dup_ack = 0;
1889 seg->is_coalesced = 0;
1890 seg->nc = nc;
1892 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1893 chain->stat.cache++;
1895 switch (chain->proto) {
1896 case ETH_P_IP:
1897 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1898 break;
1899 case ETH_P_IPV6:
1900 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1901 break;
1902 default:
1903 g_assert_not_reached();
1907 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1908 VirtioNetRscSeg *seg,
1909 const uint8_t *buf,
1910 struct tcp_header *n_tcp,
1911 struct tcp_header *o_tcp)
1913 uint32_t nack, oack;
1914 uint16_t nwin, owin;
1916 nack = htonl(n_tcp->th_ack);
1917 nwin = htons(n_tcp->th_win);
1918 oack = htonl(o_tcp->th_ack);
1919 owin = htons(o_tcp->th_win);
1921 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1922 chain->stat.ack_out_of_win++;
1923 return RSC_FINAL;
1924 } else if (nack == oack) {
1925 /* duplicated ack or window probe */
1926 if (nwin == owin) {
1927 /* duplicated ack, add dup ack count due to whql test up to 1 */
1928 chain->stat.dup_ack++;
1929 return RSC_FINAL;
1930 } else {
1931 /* Coalesce window update */
1932 o_tcp->th_win = n_tcp->th_win;
1933 chain->stat.win_update++;
1934 return RSC_COALESCE;
1936 } else {
1937 /* pure ack, go to 'C', finalize*/
1938 chain->stat.pure_ack++;
1939 return RSC_FINAL;
1943 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1944 VirtioNetRscSeg *seg,
1945 const uint8_t *buf,
1946 VirtioNetRscUnit *n_unit)
1948 void *data;
1949 uint16_t o_ip_len;
1950 uint32_t nseq, oseq;
1951 VirtioNetRscUnit *o_unit;
1953 o_unit = &seg->unit;
1954 o_ip_len = htons(*o_unit->ip_plen);
1955 nseq = htonl(n_unit->tcp->th_seq);
1956 oseq = htonl(o_unit->tcp->th_seq);
1958 /* out of order or retransmitted. */
1959 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1960 chain->stat.data_out_of_win++;
1961 return RSC_FINAL;
1964 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1965 if (nseq == oseq) {
1966 if ((o_unit->payload == 0) && n_unit->payload) {
1967 /* From no payload to payload, normal case, not a dup ack or etc */
1968 chain->stat.data_after_pure_ack++;
1969 goto coalesce;
1970 } else {
1971 return virtio_net_rsc_handle_ack(chain, seg, buf,
1972 n_unit->tcp, o_unit->tcp);
1974 } else if ((nseq - oseq) != o_unit->payload) {
1975 /* Not a consistent packet, out of order */
1976 chain->stat.data_out_of_order++;
1977 return RSC_FINAL;
1978 } else {
1979 coalesce:
1980 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1981 chain->stat.over_size++;
1982 return RSC_FINAL;
1985 /* Here comes the right data, the payload length in v4/v6 is different,
1986 so use the field value to update and record the new data len */
1987 o_unit->payload += n_unit->payload; /* update new data len */
1989 /* update field in ip header */
1990 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1992 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1993 for windows guest, while this may change the behavior for linux
1994 guest (only if it uses RSC feature). */
1995 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1997 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1998 o_unit->tcp->th_win = n_unit->tcp->th_win;
2000 memmove(seg->buf + seg->size, data, n_unit->payload);
2001 seg->size += n_unit->payload;
2002 seg->packets++;
2003 chain->stat.coalesced++;
2004 return RSC_COALESCE;
2008 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2009 VirtioNetRscSeg *seg,
2010 const uint8_t *buf, size_t size,
2011 VirtioNetRscUnit *unit)
2013 struct ip_header *ip1, *ip2;
2015 ip1 = (struct ip_header *)(unit->ip);
2016 ip2 = (struct ip_header *)(seg->unit.ip);
2017 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2018 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2019 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2020 chain->stat.no_match++;
2021 return RSC_NO_MATCH;
2024 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2027 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2028 VirtioNetRscSeg *seg,
2029 const uint8_t *buf, size_t size,
2030 VirtioNetRscUnit *unit)
2032 struct ip6_header *ip1, *ip2;
2034 ip1 = (struct ip6_header *)(unit->ip);
2035 ip2 = (struct ip6_header *)(seg->unit.ip);
2036 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2037 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2038 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2039 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2040 chain->stat.no_match++;
2041 return RSC_NO_MATCH;
2044 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2047 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2048 * to prevent out of order */
2049 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2050 struct tcp_header *tcp)
2052 uint16_t tcp_hdr;
2053 uint16_t tcp_flag;
2055 tcp_flag = htons(tcp->th_offset_flags);
2056 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2057 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2058 if (tcp_flag & TH_SYN) {
2059 chain->stat.tcp_syn++;
2060 return RSC_BYPASS;
2063 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2064 chain->stat.tcp_ctrl_drain++;
2065 return RSC_FINAL;
2068 if (tcp_hdr > sizeof(struct tcp_header)) {
2069 chain->stat.tcp_all_opt++;
2070 return RSC_FINAL;
2073 return RSC_CANDIDATE;
2076 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2077 NetClientState *nc,
2078 const uint8_t *buf, size_t size,
2079 VirtioNetRscUnit *unit)
2081 int ret;
2082 VirtioNetRscSeg *seg, *nseg;
2084 if (QTAILQ_EMPTY(&chain->buffers)) {
2085 chain->stat.empty_cache++;
2086 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2087 timer_mod(chain->drain_timer,
2088 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2089 return size;
2092 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2093 if (chain->proto == ETH_P_IP) {
2094 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2095 } else {
2096 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2099 if (ret == RSC_FINAL) {
2100 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2101 /* Send failed */
2102 chain->stat.final_failed++;
2103 return 0;
2106 /* Send current packet */
2107 return virtio_net_do_receive(nc, buf, size);
2108 } else if (ret == RSC_NO_MATCH) {
2109 continue;
2110 } else {
2111 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2112 seg->is_coalesced = 1;
2113 return size;
2117 chain->stat.no_match_cache++;
2118 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2119 return size;
2122 /* Drain a connection data, this is to avoid out of order segments */
2123 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2124 NetClientState *nc,
2125 const uint8_t *buf, size_t size,
2126 uint16_t ip_start, uint16_t ip_size,
2127 uint16_t tcp_port)
2129 VirtioNetRscSeg *seg, *nseg;
2130 uint32_t ppair1, ppair2;
2132 ppair1 = *(uint32_t *)(buf + tcp_port);
2133 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2134 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2135 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2136 || (ppair1 != ppair2)) {
2137 continue;
2139 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2140 chain->stat.drain_failed++;
2143 break;
2146 return virtio_net_do_receive(nc, buf, size);
2149 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2150 struct ip_header *ip,
2151 const uint8_t *buf, size_t size)
2153 uint16_t ip_len;
2155 /* Not an ipv4 packet */
2156 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2157 chain->stat.ip_option++;
2158 return RSC_BYPASS;
2161 /* Don't handle packets with ip option */
2162 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2163 chain->stat.ip_option++;
2164 return RSC_BYPASS;
2167 if (ip->ip_p != IPPROTO_TCP) {
2168 chain->stat.bypass_not_tcp++;
2169 return RSC_BYPASS;
2172 /* Don't handle packets with ip fragment */
2173 if (!(htons(ip->ip_off) & IP_DF)) {
2174 chain->stat.ip_frag++;
2175 return RSC_BYPASS;
2178 /* Don't handle packets with ecn flag */
2179 if (IPTOS_ECN(ip->ip_tos)) {
2180 chain->stat.ip_ecn++;
2181 return RSC_BYPASS;
2184 ip_len = htons(ip->ip_len);
2185 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2186 || ip_len > (size - chain->n->guest_hdr_len -
2187 sizeof(struct eth_header))) {
2188 chain->stat.ip_hacked++;
2189 return RSC_BYPASS;
2192 return RSC_CANDIDATE;
2195 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2196 NetClientState *nc,
2197 const uint8_t *buf, size_t size)
2199 int32_t ret;
2200 uint16_t hdr_len;
2201 VirtioNetRscUnit unit;
2203 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2205 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2206 + sizeof(struct tcp_header))) {
2207 chain->stat.bypass_not_tcp++;
2208 return virtio_net_do_receive(nc, buf, size);
2211 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2212 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2213 != RSC_CANDIDATE) {
2214 return virtio_net_do_receive(nc, buf, size);
2217 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2218 if (ret == RSC_BYPASS) {
2219 return virtio_net_do_receive(nc, buf, size);
2220 } else if (ret == RSC_FINAL) {
2221 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2222 ((hdr_len + sizeof(struct eth_header)) + 12),
2223 VIRTIO_NET_IP4_ADDR_SIZE,
2224 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2227 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2230 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2231 struct ip6_header *ip6,
2232 const uint8_t *buf, size_t size)
2234 uint16_t ip_len;
2236 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2237 != IP_HEADER_VERSION_6) {
2238 return RSC_BYPASS;
2241 /* Both option and protocol is checked in this */
2242 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2243 chain->stat.bypass_not_tcp++;
2244 return RSC_BYPASS;
2247 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2248 if (ip_len < sizeof(struct tcp_header) ||
2249 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2250 - sizeof(struct ip6_header))) {
2251 chain->stat.ip_hacked++;
2252 return RSC_BYPASS;
2255 /* Don't handle packets with ecn flag */
2256 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2257 chain->stat.ip_ecn++;
2258 return RSC_BYPASS;
2261 return RSC_CANDIDATE;
2264 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2265 const uint8_t *buf, size_t size)
2267 int32_t ret;
2268 uint16_t hdr_len;
2269 VirtioNetRscChain *chain;
2270 VirtioNetRscUnit unit;
2272 chain = (VirtioNetRscChain *)opq;
2273 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2275 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2276 + sizeof(tcp_header))) {
2277 return virtio_net_do_receive(nc, buf, size);
2280 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2281 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2282 unit.ip, buf, size)) {
2283 return virtio_net_do_receive(nc, buf, size);
2286 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2287 if (ret == RSC_BYPASS) {
2288 return virtio_net_do_receive(nc, buf, size);
2289 } else if (ret == RSC_FINAL) {
2290 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2291 ((hdr_len + sizeof(struct eth_header)) + 8),
2292 VIRTIO_NET_IP6_ADDR_SIZE,
2293 hdr_len + sizeof(struct eth_header)
2294 + sizeof(struct ip6_header));
2297 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2300 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2301 NetClientState *nc,
2302 uint16_t proto)
2304 VirtioNetRscChain *chain;
2306 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2307 return NULL;
2310 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2311 if (chain->proto == proto) {
2312 return chain;
2316 chain = g_malloc(sizeof(*chain));
2317 chain->n = n;
2318 chain->proto = proto;
2319 if (proto == (uint16_t)ETH_P_IP) {
2320 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2321 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2322 } else {
2323 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2324 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2326 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2327 virtio_net_rsc_purge, chain);
2328 memset(&chain->stat, 0, sizeof(chain->stat));
2330 QTAILQ_INIT(&chain->buffers);
2331 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2333 return chain;
2336 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2337 const uint8_t *buf,
2338 size_t size)
2340 uint16_t proto;
2341 VirtioNetRscChain *chain;
2342 struct eth_header *eth;
2343 VirtIONet *n;
2345 n = qemu_get_nic_opaque(nc);
2346 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2347 return virtio_net_do_receive(nc, buf, size);
2350 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2351 proto = htons(eth->h_proto);
2353 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2354 if (chain) {
2355 chain->stat.received++;
2356 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2357 return virtio_net_rsc_receive4(chain, nc, buf, size);
2358 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2359 return virtio_net_rsc_receive6(chain, nc, buf, size);
2362 return virtio_net_do_receive(nc, buf, size);
2365 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2366 size_t size)
2368 VirtIONet *n = qemu_get_nic_opaque(nc);
2369 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2370 return virtio_net_rsc_receive(nc, buf, size);
2371 } else {
2372 return virtio_net_do_receive(nc, buf, size);
2376 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2378 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2380 VirtIONet *n = qemu_get_nic_opaque(nc);
2381 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2382 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2384 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2385 virtio_notify(vdev, q->tx_vq);
2387 g_free(q->async_tx.elem);
2388 q->async_tx.elem = NULL;
2390 virtio_queue_set_notification(q->tx_vq, 1);
2391 virtio_net_flush_tx(q);
2394 /* TX */
2395 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2397 VirtIONet *n = q->n;
2398 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2399 VirtQueueElement *elem;
2400 int32_t num_packets = 0;
2401 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2402 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2403 return num_packets;
2406 if (q->async_tx.elem) {
2407 virtio_queue_set_notification(q->tx_vq, 0);
2408 return num_packets;
2411 for (;;) {
2412 ssize_t ret;
2413 unsigned int out_num;
2414 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2415 struct virtio_net_hdr_mrg_rxbuf mhdr;
2417 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2418 if (!elem) {
2419 break;
2422 out_num = elem->out_num;
2423 out_sg = elem->out_sg;
2424 if (out_num < 1) {
2425 virtio_error(vdev, "virtio-net header not in first element");
2426 virtqueue_detach_element(q->tx_vq, elem, 0);
2427 g_free(elem);
2428 return -EINVAL;
2431 if (n->has_vnet_hdr) {
2432 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2433 n->guest_hdr_len) {
2434 virtio_error(vdev, "virtio-net header incorrect");
2435 virtqueue_detach_element(q->tx_vq, elem, 0);
2436 g_free(elem);
2437 return -EINVAL;
2439 if (n->needs_vnet_hdr_swap) {
2440 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2441 sg2[0].iov_base = &mhdr;
2442 sg2[0].iov_len = n->guest_hdr_len;
2443 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2444 out_sg, out_num,
2445 n->guest_hdr_len, -1);
2446 if (out_num == VIRTQUEUE_MAX_SIZE) {
2447 goto drop;
2449 out_num += 1;
2450 out_sg = sg2;
2454 * If host wants to see the guest header as is, we can
2455 * pass it on unchanged. Otherwise, copy just the parts
2456 * that host is interested in.
2458 assert(n->host_hdr_len <= n->guest_hdr_len);
2459 if (n->host_hdr_len != n->guest_hdr_len) {
2460 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2461 out_sg, out_num,
2462 0, n->host_hdr_len);
2463 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2464 out_sg, out_num,
2465 n->guest_hdr_len, -1);
2466 out_num = sg_num;
2467 out_sg = sg;
2470 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2471 out_sg, out_num, virtio_net_tx_complete);
2472 if (ret == 0) {
2473 virtio_queue_set_notification(q->tx_vq, 0);
2474 q->async_tx.elem = elem;
2475 return -EBUSY;
2478 drop:
2479 virtqueue_push(q->tx_vq, elem, 0);
2480 virtio_notify(vdev, q->tx_vq);
2481 g_free(elem);
2483 if (++num_packets >= n->tx_burst) {
2484 break;
2487 return num_packets;
2490 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2492 VirtIONet *n = VIRTIO_NET(vdev);
2493 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2495 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2496 virtio_net_drop_tx_queue_data(vdev, vq);
2497 return;
2500 /* This happens when device was stopped but VCPU wasn't. */
2501 if (!vdev->vm_running) {
2502 q->tx_waiting = 1;
2503 return;
2506 if (q->tx_waiting) {
2507 virtio_queue_set_notification(vq, 1);
2508 timer_del(q->tx_timer);
2509 q->tx_waiting = 0;
2510 if (virtio_net_flush_tx(q) == -EINVAL) {
2511 return;
2513 } else {
2514 timer_mod(q->tx_timer,
2515 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2516 q->tx_waiting = 1;
2517 virtio_queue_set_notification(vq, 0);
2521 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2523 VirtIONet *n = VIRTIO_NET(vdev);
2524 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2526 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2527 virtio_net_drop_tx_queue_data(vdev, vq);
2528 return;
2531 if (unlikely(q->tx_waiting)) {
2532 return;
2534 q->tx_waiting = 1;
2535 /* This happens when device was stopped but VCPU wasn't. */
2536 if (!vdev->vm_running) {
2537 return;
2539 virtio_queue_set_notification(vq, 0);
2540 qemu_bh_schedule(q->tx_bh);
2543 static void virtio_net_tx_timer(void *opaque)
2545 VirtIONetQueue *q = opaque;
2546 VirtIONet *n = q->n;
2547 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2548 /* This happens when device was stopped but BH wasn't. */
2549 if (!vdev->vm_running) {
2550 /* Make sure tx waiting is set, so we'll run when restarted. */
2551 assert(q->tx_waiting);
2552 return;
2555 q->tx_waiting = 0;
2557 /* Just in case the driver is not ready on more */
2558 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2559 return;
2562 virtio_queue_set_notification(q->tx_vq, 1);
2563 virtio_net_flush_tx(q);
2566 static void virtio_net_tx_bh(void *opaque)
2568 VirtIONetQueue *q = opaque;
2569 VirtIONet *n = q->n;
2570 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2571 int32_t ret;
2573 /* This happens when device was stopped but BH wasn't. */
2574 if (!vdev->vm_running) {
2575 /* Make sure tx waiting is set, so we'll run when restarted. */
2576 assert(q->tx_waiting);
2577 return;
2580 q->tx_waiting = 0;
2582 /* Just in case the driver is not ready on more */
2583 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2584 return;
2587 ret = virtio_net_flush_tx(q);
2588 if (ret == -EBUSY || ret == -EINVAL) {
2589 return; /* Notification re-enable handled by tx_complete or device
2590 * broken */
2593 /* If we flush a full burst of packets, assume there are
2594 * more coming and immediately reschedule */
2595 if (ret >= n->tx_burst) {
2596 qemu_bh_schedule(q->tx_bh);
2597 q->tx_waiting = 1;
2598 return;
2601 /* If less than a full burst, re-enable notification and flush
2602 * anything that may have come in while we weren't looking. If
2603 * we find something, assume the guest is still active and reschedule */
2604 virtio_queue_set_notification(q->tx_vq, 1);
2605 ret = virtio_net_flush_tx(q);
2606 if (ret == -EINVAL) {
2607 return;
2608 } else if (ret > 0) {
2609 virtio_queue_set_notification(q->tx_vq, 0);
2610 qemu_bh_schedule(q->tx_bh);
2611 q->tx_waiting = 1;
2615 static void virtio_net_add_queue(VirtIONet *n, int index)
2617 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2619 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2620 virtio_net_handle_rx);
2622 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2623 n->vqs[index].tx_vq =
2624 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2625 virtio_net_handle_tx_timer);
2626 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2627 virtio_net_tx_timer,
2628 &n->vqs[index]);
2629 } else {
2630 n->vqs[index].tx_vq =
2631 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2632 virtio_net_handle_tx_bh);
2633 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2636 n->vqs[index].tx_waiting = 0;
2637 n->vqs[index].n = n;
2640 static void virtio_net_del_queue(VirtIONet *n, int index)
2642 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2643 VirtIONetQueue *q = &n->vqs[index];
2644 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2646 qemu_purge_queued_packets(nc);
2648 virtio_del_queue(vdev, index * 2);
2649 if (q->tx_timer) {
2650 timer_del(q->tx_timer);
2651 timer_free(q->tx_timer);
2652 q->tx_timer = NULL;
2653 } else {
2654 qemu_bh_delete(q->tx_bh);
2655 q->tx_bh = NULL;
2657 q->tx_waiting = 0;
2658 virtio_del_queue(vdev, index * 2 + 1);
2661 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2663 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2664 int old_num_queues = virtio_get_num_queues(vdev);
2665 int new_num_queues = new_max_queues * 2 + 1;
2666 int i;
2668 assert(old_num_queues >= 3);
2669 assert(old_num_queues % 2 == 1);
2671 if (old_num_queues == new_num_queues) {
2672 return;
2676 * We always need to remove and add ctrl vq if
2677 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2678 * and then we only enter one of the following two loops.
2680 virtio_del_queue(vdev, old_num_queues - 1);
2682 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2683 /* new_num_queues < old_num_queues */
2684 virtio_net_del_queue(n, i / 2);
2687 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2688 /* new_num_queues > old_num_queues */
2689 virtio_net_add_queue(n, i / 2);
2692 /* add ctrl_vq last */
2693 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2696 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2698 int max = multiqueue ? n->max_queues : 1;
2700 n->multiqueue = multiqueue;
2701 virtio_net_change_num_queues(n, max);
2703 virtio_net_set_queues(n);
2706 static int virtio_net_post_load_device(void *opaque, int version_id)
2708 VirtIONet *n = opaque;
2709 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2710 int i, link_down;
2712 trace_virtio_net_post_load_device();
2713 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2714 virtio_vdev_has_feature(vdev,
2715 VIRTIO_F_VERSION_1),
2716 virtio_vdev_has_feature(vdev,
2717 VIRTIO_NET_F_HASH_REPORT));
2719 /* MAC_TABLE_ENTRIES may be different from the saved image */
2720 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2721 n->mac_table.in_use = 0;
2724 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2725 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2729 * curr_guest_offloads will be later overwritten by the
2730 * virtio_set_features_nocheck call done from the virtio_load.
2731 * Here we make sure it is preserved and restored accordingly
2732 * in the virtio_net_post_load_virtio callback.
2734 n->saved_guest_offloads = n->curr_guest_offloads;
2736 virtio_net_set_queues(n);
2738 /* Find the first multicast entry in the saved MAC filter */
2739 for (i = 0; i < n->mac_table.in_use; i++) {
2740 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2741 break;
2744 n->mac_table.first_multi = i;
2746 /* nc.link_down can't be migrated, so infer link_down according
2747 * to link status bit in n->status */
2748 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2749 for (i = 0; i < n->max_queues; i++) {
2750 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2753 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2754 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2755 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2756 QEMU_CLOCK_VIRTUAL,
2757 virtio_net_announce_timer, n);
2758 if (n->announce_timer.round) {
2759 timer_mod(n->announce_timer.tm,
2760 qemu_clock_get_ms(n->announce_timer.type));
2761 } else {
2762 qemu_announce_timer_del(&n->announce_timer, false);
2766 if (n->rss_data.enabled) {
2767 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2768 n->rss_data.indirections_len,
2769 sizeof(n->rss_data.key));
2770 } else {
2771 trace_virtio_net_rss_disable();
2773 return 0;
2776 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2778 VirtIONet *n = VIRTIO_NET(vdev);
2780 * The actual needed state is now in saved_guest_offloads,
2781 * see virtio_net_post_load_device for detail.
2782 * Restore it back and apply the desired offloads.
2784 n->curr_guest_offloads = n->saved_guest_offloads;
2785 if (peer_has_vnet_hdr(n)) {
2786 virtio_net_apply_guest_offloads(n);
2789 return 0;
2792 /* tx_waiting field of a VirtIONetQueue */
2793 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2794 .name = "virtio-net-queue-tx_waiting",
2795 .fields = (VMStateField[]) {
2796 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2797 VMSTATE_END_OF_LIST()
2801 static bool max_queues_gt_1(void *opaque, int version_id)
2803 return VIRTIO_NET(opaque)->max_queues > 1;
2806 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2808 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2809 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2812 static bool mac_table_fits(void *opaque, int version_id)
2814 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2817 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2819 return !mac_table_fits(opaque, version_id);
2822 /* This temporary type is shared by all the WITH_TMP methods
2823 * although only some fields are used by each.
2825 struct VirtIONetMigTmp {
2826 VirtIONet *parent;
2827 VirtIONetQueue *vqs_1;
2828 uint16_t curr_queues_1;
2829 uint8_t has_ufo;
2830 uint32_t has_vnet_hdr;
2833 /* The 2nd and subsequent tx_waiting flags are loaded later than
2834 * the 1st entry in the queues and only if there's more than one
2835 * entry. We use the tmp mechanism to calculate a temporary
2836 * pointer and count and also validate the count.
2839 static int virtio_net_tx_waiting_pre_save(void *opaque)
2841 struct VirtIONetMigTmp *tmp = opaque;
2843 tmp->vqs_1 = tmp->parent->vqs + 1;
2844 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2845 if (tmp->parent->curr_queues == 0) {
2846 tmp->curr_queues_1 = 0;
2849 return 0;
2852 static int virtio_net_tx_waiting_pre_load(void *opaque)
2854 struct VirtIONetMigTmp *tmp = opaque;
2856 /* Reuse the pointer setup from save */
2857 virtio_net_tx_waiting_pre_save(opaque);
2859 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2860 error_report("virtio-net: curr_queues %x > max_queues %x",
2861 tmp->parent->curr_queues, tmp->parent->max_queues);
2863 return -EINVAL;
2866 return 0; /* all good */
2869 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2870 .name = "virtio-net-tx_waiting",
2871 .pre_load = virtio_net_tx_waiting_pre_load,
2872 .pre_save = virtio_net_tx_waiting_pre_save,
2873 .fields = (VMStateField[]) {
2874 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2875 curr_queues_1,
2876 vmstate_virtio_net_queue_tx_waiting,
2877 struct VirtIONetQueue),
2878 VMSTATE_END_OF_LIST()
2882 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2883 * flag set we need to check that we have it
2885 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2887 struct VirtIONetMigTmp *tmp = opaque;
2889 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2890 error_report("virtio-net: saved image requires TUN_F_UFO support");
2891 return -EINVAL;
2894 return 0;
2897 static int virtio_net_ufo_pre_save(void *opaque)
2899 struct VirtIONetMigTmp *tmp = opaque;
2901 tmp->has_ufo = tmp->parent->has_ufo;
2903 return 0;
2906 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2907 .name = "virtio-net-ufo",
2908 .post_load = virtio_net_ufo_post_load,
2909 .pre_save = virtio_net_ufo_pre_save,
2910 .fields = (VMStateField[]) {
2911 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2912 VMSTATE_END_OF_LIST()
2916 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2917 * flag set we need to check that we have it
2919 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2921 struct VirtIONetMigTmp *tmp = opaque;
2923 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2924 error_report("virtio-net: saved image requires vnet_hdr=on");
2925 return -EINVAL;
2928 return 0;
2931 static int virtio_net_vnet_pre_save(void *opaque)
2933 struct VirtIONetMigTmp *tmp = opaque;
2935 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2937 return 0;
2940 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2941 .name = "virtio-net-vnet",
2942 .post_load = virtio_net_vnet_post_load,
2943 .pre_save = virtio_net_vnet_pre_save,
2944 .fields = (VMStateField[]) {
2945 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2946 VMSTATE_END_OF_LIST()
2950 static bool virtio_net_rss_needed(void *opaque)
2952 return VIRTIO_NET(opaque)->rss_data.enabled;
2955 static const VMStateDescription vmstate_virtio_net_rss = {
2956 .name = "virtio-net-device/rss",
2957 .version_id = 1,
2958 .minimum_version_id = 1,
2959 .needed = virtio_net_rss_needed,
2960 .fields = (VMStateField[]) {
2961 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2962 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2963 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2964 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2965 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2966 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2967 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2968 VIRTIO_NET_RSS_MAX_KEY_SIZE),
2969 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2970 rss_data.indirections_len, 0,
2971 vmstate_info_uint16, uint16_t),
2972 VMSTATE_END_OF_LIST()
2976 static const VMStateDescription vmstate_virtio_net_device = {
2977 .name = "virtio-net-device",
2978 .version_id = VIRTIO_NET_VM_VERSION,
2979 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2980 .post_load = virtio_net_post_load_device,
2981 .fields = (VMStateField[]) {
2982 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2983 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2984 vmstate_virtio_net_queue_tx_waiting,
2985 VirtIONetQueue),
2986 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2987 VMSTATE_UINT16(status, VirtIONet),
2988 VMSTATE_UINT8(promisc, VirtIONet),
2989 VMSTATE_UINT8(allmulti, VirtIONet),
2990 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2992 /* Guarded pair: If it fits we load it, else we throw it away
2993 * - can happen if source has a larger MAC table.; post-load
2994 * sets flags in this case.
2996 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2997 0, mac_table_fits, mac_table.in_use,
2998 ETH_ALEN),
2999 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3000 mac_table.in_use, ETH_ALEN),
3002 /* Note: This is an array of uint32's that's always been saved as a
3003 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3004 * but based on the uint.
3006 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3007 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3008 vmstate_virtio_net_has_vnet),
3009 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3010 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3011 VMSTATE_UINT8(alluni, VirtIONet),
3012 VMSTATE_UINT8(nomulti, VirtIONet),
3013 VMSTATE_UINT8(nouni, VirtIONet),
3014 VMSTATE_UINT8(nobcast, VirtIONet),
3015 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3016 vmstate_virtio_net_has_ufo),
3017 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3018 vmstate_info_uint16_equal, uint16_t),
3019 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3020 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3021 vmstate_virtio_net_tx_waiting),
3022 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3023 has_ctrl_guest_offloads),
3024 VMSTATE_END_OF_LIST()
3026 .subsections = (const VMStateDescription * []) {
3027 &vmstate_virtio_net_rss,
3028 NULL
3032 static NetClientInfo net_virtio_info = {
3033 .type = NET_CLIENT_DRIVER_NIC,
3034 .size = sizeof(NICState),
3035 .can_receive = virtio_net_can_receive,
3036 .receive = virtio_net_receive,
3037 .link_status_changed = virtio_net_set_link_status,
3038 .query_rx_filter = virtio_net_query_rxfilter,
3039 .announce = virtio_net_announce,
3042 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3044 VirtIONet *n = VIRTIO_NET(vdev);
3045 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3046 assert(n->vhost_started);
3047 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3050 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3051 bool mask)
3053 VirtIONet *n = VIRTIO_NET(vdev);
3054 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3055 assert(n->vhost_started);
3056 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3057 vdev, idx, mask);
3060 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3062 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3064 n->config_size = virtio_feature_get_config_size(feature_sizes,
3065 host_features);
3068 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3069 const char *type)
3072 * The name can be NULL, the netclient name will be type.x.
3074 assert(type != NULL);
3076 g_free(n->netclient_name);
3077 g_free(n->netclient_type);
3078 n->netclient_name = g_strdup(name);
3079 n->netclient_type = g_strdup(type);
3082 static bool failover_unplug_primary(VirtIONet *n)
3084 HotplugHandler *hotplug_ctrl;
3085 PCIDevice *pci_dev;
3086 Error *err = NULL;
3088 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3089 if (hotplug_ctrl) {
3090 pci_dev = PCI_DEVICE(n->primary_dev);
3091 pci_dev->partially_hotplugged = true;
3092 hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3093 if (err) {
3094 error_report_err(err);
3095 return false;
3097 } else {
3098 return false;
3100 return true;
3103 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3105 Error *err = NULL;
3106 HotplugHandler *hotplug_ctrl;
3107 PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3108 BusState *primary_bus;
3110 if (!pdev->partially_hotplugged) {
3111 return true;
3113 primary_bus = n->primary_dev->parent_bus;
3114 if (!primary_bus) {
3115 error_setg(errp, "virtio_net: couldn't find primary bus");
3116 return false;
3118 qdev_set_parent_bus(n->primary_dev, primary_bus, &error_abort);
3119 qatomic_set(&n->failover_primary_hidden, false);
3120 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3121 if (hotplug_ctrl) {
3122 hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3123 if (err) {
3124 goto out;
3126 hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3129 out:
3130 error_propagate(errp, err);
3131 return !err;
3134 static void virtio_net_handle_migration_primary(VirtIONet *n,
3135 MigrationState *s)
3137 bool should_be_hidden;
3138 Error *err = NULL;
3140 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3142 if (!n->primary_dev) {
3143 n->primary_dev = virtio_connect_failover_devices(n, &err);
3144 if (!n->primary_dev) {
3145 return;
3149 if (migration_in_setup(s) && !should_be_hidden) {
3150 if (failover_unplug_primary(n)) {
3151 vmstate_unregister(VMSTATE_IF(n->primary_dev),
3152 qdev_get_vmsd(n->primary_dev),
3153 n->primary_dev);
3154 qapi_event_send_unplug_primary(n->primary_device_id);
3155 qatomic_set(&n->failover_primary_hidden, true);
3156 } else {
3157 warn_report("couldn't unplug primary device");
3159 } else if (migration_has_failed(s)) {
3160 /* We already unplugged the device let's plug it back */
3161 if (!failover_replug_primary(n, &err)) {
3162 if (err) {
3163 error_report_err(err);
3169 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3171 MigrationState *s = data;
3172 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3173 virtio_net_handle_migration_primary(n, s);
3176 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3177 QemuOpts *device_opts)
3179 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3180 bool match_found = false;
3181 bool hide = false;
3182 const char *standby_id;
3184 if (!device_opts) {
3185 return -1;
3187 standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3188 if (g_strcmp0(standby_id, n->netclient_name) == 0) {
3189 match_found = true;
3190 } else {
3191 match_found = false;
3192 hide = false;
3193 goto out;
3196 /* failover_primary_hidden is set during feature negotiation */
3197 hide = qatomic_read(&n->failover_primary_hidden);
3198 g_free(n->primary_device_id);
3199 n->primary_device_id = g_strdup(device_opts->id);
3200 if (!n->primary_device_id) {
3201 warn_report("primary_device_id not set");
3204 out:
3205 if (match_found && hide) {
3206 return 1;
3207 } else if (match_found && !hide) {
3208 return 0;
3209 } else {
3210 return -1;
3214 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3216 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3217 VirtIONet *n = VIRTIO_NET(dev);
3218 NetClientState *nc;
3219 int i;
3221 if (n->net_conf.mtu) {
3222 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3225 if (n->net_conf.duplex_str) {
3226 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3227 n->net_conf.duplex = DUPLEX_HALF;
3228 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3229 n->net_conf.duplex = DUPLEX_FULL;
3230 } else {
3231 error_setg(errp, "'duplex' must be 'half' or 'full'");
3232 return;
3234 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3235 } else {
3236 n->net_conf.duplex = DUPLEX_UNKNOWN;
3239 if (n->net_conf.speed < SPEED_UNKNOWN) {
3240 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3241 return;
3243 if (n->net_conf.speed >= 0) {
3244 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3247 if (n->failover) {
3248 n->primary_listener.should_be_hidden =
3249 virtio_net_primary_should_be_hidden;
3250 qatomic_set(&n->failover_primary_hidden, true);
3251 device_listener_register(&n->primary_listener);
3252 n->migration_state.notify = virtio_net_migration_state_notifier;
3253 add_migration_state_change_notifier(&n->migration_state);
3254 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3257 virtio_net_set_config_size(n, n->host_features);
3258 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3261 * We set a lower limit on RX queue size to what it always was.
3262 * Guests that want a smaller ring can always resize it without
3263 * help from us (using virtio 1 and up).
3265 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3266 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3267 !is_power_of_2(n->net_conf.rx_queue_size)) {
3268 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3269 "must be a power of 2 between %d and %d.",
3270 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3271 VIRTQUEUE_MAX_SIZE);
3272 virtio_cleanup(vdev);
3273 return;
3276 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3277 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3278 !is_power_of_2(n->net_conf.tx_queue_size)) {
3279 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3280 "must be a power of 2 between %d and %d",
3281 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3282 VIRTQUEUE_MAX_SIZE);
3283 virtio_cleanup(vdev);
3284 return;
3287 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3288 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3289 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3290 "must be a positive integer less than %d.",
3291 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3292 virtio_cleanup(vdev);
3293 return;
3295 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3296 n->curr_queues = 1;
3297 n->tx_timeout = n->net_conf.txtimer;
3299 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3300 && strcmp(n->net_conf.tx, "bh")) {
3301 warn_report("virtio-net: "
3302 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3303 n->net_conf.tx);
3304 error_printf("Defaulting to \"bh\"");
3307 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3308 n->net_conf.tx_queue_size);
3310 for (i = 0; i < n->max_queues; i++) {
3311 virtio_net_add_queue(n, i);
3314 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3315 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3316 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3317 n->status = VIRTIO_NET_S_LINK_UP;
3318 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3319 QEMU_CLOCK_VIRTUAL,
3320 virtio_net_announce_timer, n);
3321 n->announce_timer.round = 0;
3323 if (n->netclient_type) {
3325 * Happen when virtio_net_set_netclient_name has been called.
3327 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3328 n->netclient_type, n->netclient_name, n);
3329 } else {
3330 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3331 object_get_typename(OBJECT(dev)), dev->id, n);
3334 peer_test_vnet_hdr(n);
3335 if (peer_has_vnet_hdr(n)) {
3336 for (i = 0; i < n->max_queues; i++) {
3337 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3339 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3340 } else {
3341 n->host_hdr_len = 0;
3344 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3346 n->vqs[0].tx_waiting = 0;
3347 n->tx_burst = n->net_conf.txburst;
3348 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3349 n->promisc = 1; /* for compatibility */
3351 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3353 n->vlans = g_malloc0(MAX_VLAN >> 3);
3355 nc = qemu_get_queue(n->nic);
3356 nc->rxfilter_notify_enabled = 1;
3358 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3359 struct virtio_net_config netcfg = {};
3360 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3361 vhost_net_set_config(get_vhost_net(nc->peer),
3362 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3364 QTAILQ_INIT(&n->rsc_chains);
3365 n->qdev = dev;
3367 net_rx_pkt_init(&n->rx_pkt, false);
3370 static void virtio_net_device_unrealize(DeviceState *dev)
3372 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3373 VirtIONet *n = VIRTIO_NET(dev);
3374 int i, max_queues;
3376 /* This will stop vhost backend if appropriate. */
3377 virtio_net_set_status(vdev, 0);
3379 g_free(n->netclient_name);
3380 n->netclient_name = NULL;
3381 g_free(n->netclient_type);
3382 n->netclient_type = NULL;
3384 g_free(n->mac_table.macs);
3385 g_free(n->vlans);
3387 if (n->failover) {
3388 device_listener_unregister(&n->primary_listener);
3389 g_free(n->primary_device_id);
3392 max_queues = n->multiqueue ? n->max_queues : 1;
3393 for (i = 0; i < max_queues; i++) {
3394 virtio_net_del_queue(n, i);
3396 /* delete also control vq */
3397 virtio_del_queue(vdev, max_queues * 2);
3398 qemu_announce_timer_del(&n->announce_timer, false);
3399 g_free(n->vqs);
3400 qemu_del_nic(n->nic);
3401 virtio_net_rsc_cleanup(n);
3402 g_free(n->rss_data.indirections_table);
3403 net_rx_pkt_uninit(n->rx_pkt);
3404 virtio_cleanup(vdev);
3407 static void virtio_net_instance_init(Object *obj)
3409 VirtIONet *n = VIRTIO_NET(obj);
3412 * The default config_size is sizeof(struct virtio_net_config).
3413 * Can be overriden with virtio_net_set_config_size.
3415 n->config_size = sizeof(struct virtio_net_config);
3416 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3417 "bootindex", "/ethernet-phy@0",
3418 DEVICE(n));
3421 static int virtio_net_pre_save(void *opaque)
3423 VirtIONet *n = opaque;
3425 /* At this point, backend must be stopped, otherwise
3426 * it might keep writing to memory. */
3427 assert(!n->vhost_started);
3429 return 0;
3432 static bool primary_unplug_pending(void *opaque)
3434 DeviceState *dev = opaque;
3435 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3436 VirtIONet *n = VIRTIO_NET(vdev);
3438 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3439 return false;
3441 return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3444 static bool dev_unplug_pending(void *opaque)
3446 DeviceState *dev = opaque;
3447 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3449 return vdc->primary_unplug_pending(dev);
3452 static const VMStateDescription vmstate_virtio_net = {
3453 .name = "virtio-net",
3454 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3455 .version_id = VIRTIO_NET_VM_VERSION,
3456 .fields = (VMStateField[]) {
3457 VMSTATE_VIRTIO_DEVICE,
3458 VMSTATE_END_OF_LIST()
3460 .pre_save = virtio_net_pre_save,
3461 .dev_unplug_pending = dev_unplug_pending,
3464 static Property virtio_net_properties[] = {
3465 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3466 VIRTIO_NET_F_CSUM, true),
3467 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3468 VIRTIO_NET_F_GUEST_CSUM, true),
3469 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3470 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3471 VIRTIO_NET_F_GUEST_TSO4, true),
3472 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3473 VIRTIO_NET_F_GUEST_TSO6, true),
3474 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3475 VIRTIO_NET_F_GUEST_ECN, true),
3476 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3477 VIRTIO_NET_F_GUEST_UFO, true),
3478 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3479 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3480 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3481 VIRTIO_NET_F_HOST_TSO4, true),
3482 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3483 VIRTIO_NET_F_HOST_TSO6, true),
3484 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3485 VIRTIO_NET_F_HOST_ECN, true),
3486 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3487 VIRTIO_NET_F_HOST_UFO, true),
3488 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3489 VIRTIO_NET_F_MRG_RXBUF, true),
3490 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3491 VIRTIO_NET_F_STATUS, true),
3492 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3493 VIRTIO_NET_F_CTRL_VQ, true),
3494 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3495 VIRTIO_NET_F_CTRL_RX, true),
3496 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3497 VIRTIO_NET_F_CTRL_VLAN, true),
3498 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3499 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3500 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3501 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3502 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3503 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3504 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3505 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3506 VIRTIO_NET_F_RSS, false),
3507 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3508 VIRTIO_NET_F_HASH_REPORT, false),
3509 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3510 VIRTIO_NET_F_RSC_EXT, false),
3511 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3512 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3513 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3514 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3515 TX_TIMER_INTERVAL),
3516 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3517 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3518 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3519 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3520 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3521 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3522 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3523 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3524 true),
3525 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3526 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3527 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3528 DEFINE_PROP_END_OF_LIST(),
3531 static void virtio_net_class_init(ObjectClass *klass, void *data)
3533 DeviceClass *dc = DEVICE_CLASS(klass);
3534 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3536 device_class_set_props(dc, virtio_net_properties);
3537 dc->vmsd = &vmstate_virtio_net;
3538 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3539 vdc->realize = virtio_net_device_realize;
3540 vdc->unrealize = virtio_net_device_unrealize;
3541 vdc->get_config = virtio_net_get_config;
3542 vdc->set_config = virtio_net_set_config;
3543 vdc->get_features = virtio_net_get_features;
3544 vdc->set_features = virtio_net_set_features;
3545 vdc->bad_features = virtio_net_bad_features;
3546 vdc->reset = virtio_net_reset;
3547 vdc->set_status = virtio_net_set_status;
3548 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3549 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3550 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3551 vdc->post_load = virtio_net_post_load_virtio;
3552 vdc->vmsd = &vmstate_virtio_net_device;
3553 vdc->primary_unplug_pending = primary_unplug_pending;
3556 static const TypeInfo virtio_net_info = {
3557 .name = TYPE_VIRTIO_NET,
3558 .parent = TYPE_VIRTIO_DEVICE,
3559 .instance_size = sizeof(VirtIONet),
3560 .instance_init = virtio_net_instance_init,
3561 .class_init = virtio_net_class_init,
3564 static void virtio_register_types(void)
3566 type_register_static(&virtio_net_info);
3569 type_init(virtio_register_types)