tcg/optimize: Add fallthrough annotations
[qemu/ar7.git] / hw / net / virtio-net.c
blob044ac95f6f2839970bfe6c6a3e78c00205aed04f
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
48 #define VIRTIO_NET_VM_VERSION 11
50 #define MAC_TABLE_ENTRIES 64
51 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63 #define VIRTIO_NET_TCP_FLAG 0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 /* Purge coalesced packets timer interval, This value affects the performance
77 a lot, and should be tuned carefully, '300000'(300us) is the recommended
78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79 tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 static VirtIOFeature feature_sizes[] = {
93 {.flags = 1ULL << VIRTIO_NET_F_MAC,
94 .end = endof(struct virtio_net_config, mac)},
95 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96 .end = endof(struct virtio_net_config, status)},
97 {.flags = 1ULL << VIRTIO_NET_F_MQ,
98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99 {.flags = 1ULL << VIRTIO_NET_F_MTU,
100 .end = endof(struct virtio_net_config, mtu)},
101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102 .end = endof(struct virtio_net_config, duplex)},
103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104 .end = endof(struct virtio_net_config, supported_hash_types)},
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
110 VirtIONet *n = qemu_get_nic_opaque(nc);
112 return &n->vqs[nc->queue_index];
115 static int vq2q(int queue_index)
117 return queue_index / 2;
120 /* TODO
121 * - we could suppress RX interrupt if we were so inclined.
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
126 VirtIONet *n = VIRTIO_NET(vdev);
127 struct virtio_net_config netcfg;
128 NetClientState *nc = qemu_get_queue(n->nic);
130 int ret = 0;
131 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
132 virtio_stw_p(vdev, &netcfg.status, n->status);
133 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135 memcpy(netcfg.mac, n->mac, ETH_ALEN);
136 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137 netcfg.duplex = n->net_conf.duplex;
138 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
139 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
140 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
141 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
142 virtio_stl_p(vdev, &netcfg.supported_hash_types,
143 VIRTIO_NET_RSS_SUPPORTED_HASHES);
144 memcpy(config, &netcfg, n->config_size);
147 * Is this VDPA? No peer means not VDPA: there's no way to
148 * disconnect/reconnect a VDPA peer.
150 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
151 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
152 n->config_size);
153 if (ret != -1) {
154 memcpy(config, &netcfg, n->config_size);
159 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
161 VirtIONet *n = VIRTIO_NET(vdev);
162 struct virtio_net_config netcfg = {};
163 NetClientState *nc = qemu_get_queue(n->nic);
165 memcpy(&netcfg, config, n->config_size);
167 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
168 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
169 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
170 memcpy(n->mac, netcfg.mac, ETH_ALEN);
171 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
175 * Is this VDPA? No peer means not VDPA: there's no way to
176 * disconnect/reconnect a VDPA peer.
178 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
179 vhost_net_set_config(get_vhost_net(nc->peer),
180 (uint8_t *)&netcfg, 0, n->config_size,
181 VHOST_SET_CONFIG_TYPE_MASTER);
185 static bool virtio_net_started(VirtIONet *n, uint8_t status)
187 VirtIODevice *vdev = VIRTIO_DEVICE(n);
188 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
189 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
192 static void virtio_net_announce_notify(VirtIONet *net)
194 VirtIODevice *vdev = VIRTIO_DEVICE(net);
195 trace_virtio_net_announce_notify();
197 net->status |= VIRTIO_NET_S_ANNOUNCE;
198 virtio_notify_config(vdev);
201 static void virtio_net_announce_timer(void *opaque)
203 VirtIONet *n = opaque;
204 trace_virtio_net_announce_timer(n->announce_timer.round);
206 n->announce_timer.round--;
207 virtio_net_announce_notify(n);
210 static void virtio_net_announce(NetClientState *nc)
212 VirtIONet *n = qemu_get_nic_opaque(nc);
213 VirtIODevice *vdev = VIRTIO_DEVICE(n);
216 * Make sure the virtio migration announcement timer isn't running
217 * If it is, let it trigger announcement so that we do not cause
218 * confusion.
220 if (n->announce_timer.round) {
221 return;
224 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
225 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
226 virtio_net_announce_notify(n);
230 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
232 VirtIODevice *vdev = VIRTIO_DEVICE(n);
233 NetClientState *nc = qemu_get_queue(n->nic);
234 int queues = n->multiqueue ? n->max_queues : 1;
236 if (!get_vhost_net(nc->peer)) {
237 return;
240 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
241 !!n->vhost_started) {
242 return;
244 if (!n->vhost_started) {
245 int r, i;
247 if (n->needs_vnet_hdr_swap) {
248 error_report("backend does not support %s vnet headers; "
249 "falling back on userspace virtio",
250 virtio_is_big_endian(vdev) ? "BE" : "LE");
251 return;
254 /* Any packets outstanding? Purge them to avoid touching rings
255 * when vhost is running.
257 for (i = 0; i < queues; i++) {
258 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
260 /* Purge both directions: TX and RX. */
261 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
262 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
265 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
266 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
267 if (r < 0) {
268 error_report("%uBytes MTU not supported by the backend",
269 n->net_conf.mtu);
271 return;
275 n->vhost_started = 1;
276 r = vhost_net_start(vdev, n->nic->ncs, queues);
277 if (r < 0) {
278 error_report("unable to start vhost net: %d: "
279 "falling back on userspace virtio", -r);
280 n->vhost_started = 0;
282 } else {
283 vhost_net_stop(vdev, n->nic->ncs, queues);
284 n->vhost_started = 0;
288 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
289 NetClientState *peer,
290 bool enable)
292 if (virtio_is_big_endian(vdev)) {
293 return qemu_set_vnet_be(peer, enable);
294 } else {
295 return qemu_set_vnet_le(peer, enable);
299 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
300 int queues, bool enable)
302 int i;
304 for (i = 0; i < queues; i++) {
305 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
306 enable) {
307 while (--i >= 0) {
308 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
311 return true;
315 return false;
318 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
320 VirtIODevice *vdev = VIRTIO_DEVICE(n);
321 int queues = n->multiqueue ? n->max_queues : 1;
323 if (virtio_net_started(n, status)) {
324 /* Before using the device, we tell the network backend about the
325 * endianness to use when parsing vnet headers. If the backend
326 * can't do it, we fallback onto fixing the headers in the core
327 * virtio-net code.
329 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
330 queues, true);
331 } else if (virtio_net_started(n, vdev->status)) {
332 /* After using the device, we need to reset the network backend to
333 * the default (guest native endianness), otherwise the guest may
334 * lose network connectivity if it is rebooted into a different
335 * endianness.
337 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
341 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
343 unsigned int dropped = virtqueue_drop_all(vq);
344 if (dropped) {
345 virtio_notify(vdev, vq);
349 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
351 VirtIONet *n = VIRTIO_NET(vdev);
352 VirtIONetQueue *q;
353 int i;
354 uint8_t queue_status;
356 virtio_net_vnet_endian_status(n, status);
357 virtio_net_vhost_status(n, status);
359 for (i = 0; i < n->max_queues; i++) {
360 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
361 bool queue_started;
362 q = &n->vqs[i];
364 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
365 queue_status = 0;
366 } else {
367 queue_status = status;
369 queue_started =
370 virtio_net_started(n, queue_status) && !n->vhost_started;
372 if (queue_started) {
373 qemu_flush_queued_packets(ncs);
376 if (!q->tx_waiting) {
377 continue;
380 if (queue_started) {
381 if (q->tx_timer) {
382 timer_mod(q->tx_timer,
383 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
384 } else {
385 qemu_bh_schedule(q->tx_bh);
387 } else {
388 if (q->tx_timer) {
389 timer_del(q->tx_timer);
390 } else {
391 qemu_bh_cancel(q->tx_bh);
393 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
394 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
395 vdev->vm_running) {
396 /* if tx is waiting we are likely have some packets in tx queue
397 * and disabled notification */
398 q->tx_waiting = 0;
399 virtio_queue_set_notification(q->tx_vq, 1);
400 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
406 static void virtio_net_set_link_status(NetClientState *nc)
408 VirtIONet *n = qemu_get_nic_opaque(nc);
409 VirtIODevice *vdev = VIRTIO_DEVICE(n);
410 uint16_t old_status = n->status;
412 if (nc->link_down)
413 n->status &= ~VIRTIO_NET_S_LINK_UP;
414 else
415 n->status |= VIRTIO_NET_S_LINK_UP;
417 if (n->status != old_status)
418 virtio_notify_config(vdev);
420 virtio_net_set_status(vdev, vdev->status);
423 static void rxfilter_notify(NetClientState *nc)
425 VirtIONet *n = qemu_get_nic_opaque(nc);
427 if (nc->rxfilter_notify_enabled) {
428 char *path = object_get_canonical_path(OBJECT(n->qdev));
429 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
430 n->netclient_name, path);
431 g_free(path);
433 /* disable event notification to avoid events flooding */
434 nc->rxfilter_notify_enabled = 0;
438 static intList *get_vlan_table(VirtIONet *n)
440 intList *list, *entry;
441 int i, j;
443 list = NULL;
444 for (i = 0; i < MAX_VLAN >> 5; i++) {
445 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
446 if (n->vlans[i] & (1U << j)) {
447 entry = g_malloc0(sizeof(*entry));
448 entry->value = (i << 5) + j;
449 entry->next = list;
450 list = entry;
455 return list;
458 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
460 VirtIONet *n = qemu_get_nic_opaque(nc);
461 VirtIODevice *vdev = VIRTIO_DEVICE(n);
462 RxFilterInfo *info;
463 strList *str_list, *entry;
464 int i;
466 info = g_malloc0(sizeof(*info));
467 info->name = g_strdup(nc->name);
468 info->promiscuous = n->promisc;
470 if (n->nouni) {
471 info->unicast = RX_STATE_NONE;
472 } else if (n->alluni) {
473 info->unicast = RX_STATE_ALL;
474 } else {
475 info->unicast = RX_STATE_NORMAL;
478 if (n->nomulti) {
479 info->multicast = RX_STATE_NONE;
480 } else if (n->allmulti) {
481 info->multicast = RX_STATE_ALL;
482 } else {
483 info->multicast = RX_STATE_NORMAL;
486 info->broadcast_allowed = n->nobcast;
487 info->multicast_overflow = n->mac_table.multi_overflow;
488 info->unicast_overflow = n->mac_table.uni_overflow;
490 info->main_mac = qemu_mac_strdup_printf(n->mac);
492 str_list = NULL;
493 for (i = 0; i < n->mac_table.first_multi; i++) {
494 entry = g_malloc0(sizeof(*entry));
495 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
496 entry->next = str_list;
497 str_list = entry;
499 info->unicast_table = str_list;
501 str_list = NULL;
502 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
503 entry = g_malloc0(sizeof(*entry));
504 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
505 entry->next = str_list;
506 str_list = entry;
508 info->multicast_table = str_list;
509 info->vlan_table = get_vlan_table(n);
511 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
512 info->vlan = RX_STATE_ALL;
513 } else if (!info->vlan_table) {
514 info->vlan = RX_STATE_NONE;
515 } else {
516 info->vlan = RX_STATE_NORMAL;
519 /* enable event notification after query */
520 nc->rxfilter_notify_enabled = 1;
522 return info;
525 static void virtio_net_reset(VirtIODevice *vdev)
527 VirtIONet *n = VIRTIO_NET(vdev);
528 int i;
530 /* Reset back to compatibility mode */
531 n->promisc = 1;
532 n->allmulti = 0;
533 n->alluni = 0;
534 n->nomulti = 0;
535 n->nouni = 0;
536 n->nobcast = 0;
537 /* multiqueue is disabled by default */
538 n->curr_queues = 1;
539 timer_del(n->announce_timer.tm);
540 n->announce_timer.round = 0;
541 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
543 /* Flush any MAC and VLAN filter table state */
544 n->mac_table.in_use = 0;
545 n->mac_table.first_multi = 0;
546 n->mac_table.multi_overflow = 0;
547 n->mac_table.uni_overflow = 0;
548 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
549 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
550 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
551 memset(n->vlans, 0, MAX_VLAN >> 3);
553 /* Flush any async TX */
554 for (i = 0; i < n->max_queues; i++) {
555 NetClientState *nc = qemu_get_subqueue(n->nic, i);
557 if (nc->peer) {
558 qemu_flush_or_purge_queued_packets(nc->peer, true);
559 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
564 static void peer_test_vnet_hdr(VirtIONet *n)
566 NetClientState *nc = qemu_get_queue(n->nic);
567 if (!nc->peer) {
568 return;
571 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
574 static int peer_has_vnet_hdr(VirtIONet *n)
576 return n->has_vnet_hdr;
579 static int peer_has_ufo(VirtIONet *n)
581 if (!peer_has_vnet_hdr(n))
582 return 0;
584 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
586 return n->has_ufo;
589 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
590 int version_1, int hash_report)
592 int i;
593 NetClientState *nc;
595 n->mergeable_rx_bufs = mergeable_rx_bufs;
597 if (version_1) {
598 n->guest_hdr_len = hash_report ?
599 sizeof(struct virtio_net_hdr_v1_hash) :
600 sizeof(struct virtio_net_hdr_mrg_rxbuf);
601 n->rss_data.populate_hash = !!hash_report;
602 } else {
603 n->guest_hdr_len = n->mergeable_rx_bufs ?
604 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
605 sizeof(struct virtio_net_hdr);
608 for (i = 0; i < n->max_queues; i++) {
609 nc = qemu_get_subqueue(n->nic, i);
611 if (peer_has_vnet_hdr(n) &&
612 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
613 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
614 n->host_hdr_len = n->guest_hdr_len;
619 static int virtio_net_max_tx_queue_size(VirtIONet *n)
621 NetClientState *peer = n->nic_conf.peers.ncs[0];
624 * Backends other than vhost-user don't support max queue size.
626 if (!peer) {
627 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
630 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
631 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
634 return VIRTQUEUE_MAX_SIZE;
637 static int peer_attach(VirtIONet *n, int index)
639 NetClientState *nc = qemu_get_subqueue(n->nic, index);
641 if (!nc->peer) {
642 return 0;
645 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
646 vhost_set_vring_enable(nc->peer, 1);
649 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
650 return 0;
653 if (n->max_queues == 1) {
654 return 0;
657 return tap_enable(nc->peer);
660 static int peer_detach(VirtIONet *n, int index)
662 NetClientState *nc = qemu_get_subqueue(n->nic, index);
664 if (!nc->peer) {
665 return 0;
668 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
669 vhost_set_vring_enable(nc->peer, 0);
672 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
673 return 0;
676 return tap_disable(nc->peer);
679 static void virtio_net_set_queues(VirtIONet *n)
681 int i;
682 int r;
684 if (n->nic->peer_deleted) {
685 return;
688 for (i = 0; i < n->max_queues; i++) {
689 if (i < n->curr_queues) {
690 r = peer_attach(n, i);
691 assert(!r);
692 } else {
693 r = peer_detach(n, i);
694 assert(!r);
699 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
701 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
702 Error **errp)
704 VirtIONet *n = VIRTIO_NET(vdev);
705 NetClientState *nc = qemu_get_queue(n->nic);
707 /* Firstly sync all virtio-net possible supported features */
708 features |= n->host_features;
710 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
712 if (!peer_has_vnet_hdr(n)) {
713 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
714 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
718 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
719 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
720 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
721 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
723 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
726 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
727 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
728 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
731 if (!get_vhost_net(nc->peer)) {
732 return features;
735 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
736 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
737 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
738 vdev->backend_features = features;
740 if (n->mtu_bypass_backend &&
741 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
742 features |= (1ULL << VIRTIO_NET_F_MTU);
745 return features;
748 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
750 uint64_t features = 0;
752 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
753 * but also these: */
754 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
755 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
756 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
757 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
758 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
760 return features;
763 static void virtio_net_apply_guest_offloads(VirtIONet *n)
765 qemu_set_offload(qemu_get_queue(n->nic)->peer,
766 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
767 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
768 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
769 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
770 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
773 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
775 static const uint64_t guest_offloads_mask =
776 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
777 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
778 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
779 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
780 (1ULL << VIRTIO_NET_F_GUEST_UFO);
782 return guest_offloads_mask & features;
785 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
787 VirtIODevice *vdev = VIRTIO_DEVICE(n);
788 return virtio_net_guest_offloads_by_features(vdev->guest_features);
791 typedef struct {
792 VirtIONet *n;
793 char *id;
794 } FailoverId;
797 * Set the id of the failover primary device
799 * @opaque: FailoverId to setup
800 * @opts: opts for device we are handling
801 * @errp: returns an error if this function fails
803 static int failover_set_primary(void *opaque, QemuOpts *opts, Error **errp)
805 FailoverId *fid = opaque;
806 const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
808 if (g_strcmp0(standby_id, fid->n->netclient_name) == 0) {
809 fid->id = g_strdup(opts->id);
810 return 1;
813 return 0;
817 * Find the primary device id for this failover virtio-net
819 * @n: VirtIONet device
820 * @errp: returns an error if this function fails
822 static char *failover_find_primary_device_id(VirtIONet *n)
824 Error *err = NULL;
825 FailoverId fid;
827 fid.n = n;
828 if (!qemu_opts_foreach(qemu_find_opts("device"),
829 failover_set_primary, &fid, &err)) {
830 return NULL;
832 return fid.id;
836 * Find the primary device for this failover virtio-net
838 * @n: VirtIONet device
839 * @errp: returns an error if this function fails
841 static DeviceState *failover_find_primary_device(VirtIONet *n)
843 char *id = failover_find_primary_device_id(n);
845 if (!id) {
846 return NULL;
849 return qdev_find_recursive(sysbus_get_default(), id);
852 static void failover_add_primary(VirtIONet *n, Error **errp)
854 Error *err = NULL;
855 QemuOpts *opts;
856 char *id;
857 DeviceState *dev = failover_find_primary_device(n);
859 if (dev) {
860 return;
863 id = failover_find_primary_device_id(n);
864 if (!id) {
865 return;
867 opts = qemu_opts_find(qemu_find_opts("device"), id);
868 if (opts) {
869 dev = qdev_device_add(opts, &err);
870 if (err) {
871 qemu_opts_del(opts);
873 } else {
874 error_setg(errp, "Primary device not found");
875 error_append_hint(errp, "Virtio-net failover will not work. Make "
876 "sure primary device has parameter"
877 " failover_pair_id=<virtio-net-id>\n");
879 error_propagate(errp, err);
882 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
884 VirtIONet *n = VIRTIO_NET(vdev);
885 Error *err = NULL;
886 int i;
888 if (n->mtu_bypass_backend &&
889 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
890 features &= ~(1ULL << VIRTIO_NET_F_MTU);
893 virtio_net_set_multiqueue(n,
894 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
895 virtio_has_feature(features, VIRTIO_NET_F_MQ));
897 virtio_net_set_mrg_rx_bufs(n,
898 virtio_has_feature(features,
899 VIRTIO_NET_F_MRG_RXBUF),
900 virtio_has_feature(features,
901 VIRTIO_F_VERSION_1),
902 virtio_has_feature(features,
903 VIRTIO_NET_F_HASH_REPORT));
905 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
906 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
907 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
908 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
909 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
911 if (n->has_vnet_hdr) {
912 n->curr_guest_offloads =
913 virtio_net_guest_offloads_by_features(features);
914 virtio_net_apply_guest_offloads(n);
917 for (i = 0; i < n->max_queues; i++) {
918 NetClientState *nc = qemu_get_subqueue(n->nic, i);
920 if (!get_vhost_net(nc->peer)) {
921 continue;
923 vhost_net_ack_features(get_vhost_net(nc->peer), features);
926 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
927 memset(n->vlans, 0, MAX_VLAN >> 3);
928 } else {
929 memset(n->vlans, 0xff, MAX_VLAN >> 3);
932 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
933 qapi_event_send_failover_negotiated(n->netclient_name);
934 qatomic_set(&n->failover_primary_hidden, false);
935 failover_add_primary(n, &err);
936 if (err) {
937 warn_report_err(err);
942 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
943 struct iovec *iov, unsigned int iov_cnt)
945 uint8_t on;
946 size_t s;
947 NetClientState *nc = qemu_get_queue(n->nic);
949 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
950 if (s != sizeof(on)) {
951 return VIRTIO_NET_ERR;
954 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
955 n->promisc = on;
956 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
957 n->allmulti = on;
958 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
959 n->alluni = on;
960 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
961 n->nomulti = on;
962 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
963 n->nouni = on;
964 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
965 n->nobcast = on;
966 } else {
967 return VIRTIO_NET_ERR;
970 rxfilter_notify(nc);
972 return VIRTIO_NET_OK;
975 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
976 struct iovec *iov, unsigned int iov_cnt)
978 VirtIODevice *vdev = VIRTIO_DEVICE(n);
979 uint64_t offloads;
980 size_t s;
982 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
983 return VIRTIO_NET_ERR;
986 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
987 if (s != sizeof(offloads)) {
988 return VIRTIO_NET_ERR;
991 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
992 uint64_t supported_offloads;
994 offloads = virtio_ldq_p(vdev, &offloads);
996 if (!n->has_vnet_hdr) {
997 return VIRTIO_NET_ERR;
1000 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1001 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1002 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1003 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1004 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1006 supported_offloads = virtio_net_supported_guest_offloads(n);
1007 if (offloads & ~supported_offloads) {
1008 return VIRTIO_NET_ERR;
1011 n->curr_guest_offloads = offloads;
1012 virtio_net_apply_guest_offloads(n);
1014 return VIRTIO_NET_OK;
1015 } else {
1016 return VIRTIO_NET_ERR;
1020 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1021 struct iovec *iov, unsigned int iov_cnt)
1023 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1024 struct virtio_net_ctrl_mac mac_data;
1025 size_t s;
1026 NetClientState *nc = qemu_get_queue(n->nic);
1028 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1029 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1030 return VIRTIO_NET_ERR;
1032 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1033 assert(s == sizeof(n->mac));
1034 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1035 rxfilter_notify(nc);
1037 return VIRTIO_NET_OK;
1040 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1041 return VIRTIO_NET_ERR;
1044 int in_use = 0;
1045 int first_multi = 0;
1046 uint8_t uni_overflow = 0;
1047 uint8_t multi_overflow = 0;
1048 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1050 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1051 sizeof(mac_data.entries));
1052 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1053 if (s != sizeof(mac_data.entries)) {
1054 goto error;
1056 iov_discard_front(&iov, &iov_cnt, s);
1058 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1059 goto error;
1062 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1063 s = iov_to_buf(iov, iov_cnt, 0, macs,
1064 mac_data.entries * ETH_ALEN);
1065 if (s != mac_data.entries * ETH_ALEN) {
1066 goto error;
1068 in_use += mac_data.entries;
1069 } else {
1070 uni_overflow = 1;
1073 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1075 first_multi = in_use;
1077 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1078 sizeof(mac_data.entries));
1079 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1080 if (s != sizeof(mac_data.entries)) {
1081 goto error;
1084 iov_discard_front(&iov, &iov_cnt, s);
1086 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1087 goto error;
1090 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1091 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1092 mac_data.entries * ETH_ALEN);
1093 if (s != mac_data.entries * ETH_ALEN) {
1094 goto error;
1096 in_use += mac_data.entries;
1097 } else {
1098 multi_overflow = 1;
1101 n->mac_table.in_use = in_use;
1102 n->mac_table.first_multi = first_multi;
1103 n->mac_table.uni_overflow = uni_overflow;
1104 n->mac_table.multi_overflow = multi_overflow;
1105 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1106 g_free(macs);
1107 rxfilter_notify(nc);
1109 return VIRTIO_NET_OK;
1111 error:
1112 g_free(macs);
1113 return VIRTIO_NET_ERR;
1116 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1117 struct iovec *iov, unsigned int iov_cnt)
1119 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1120 uint16_t vid;
1121 size_t s;
1122 NetClientState *nc = qemu_get_queue(n->nic);
1124 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1125 vid = virtio_lduw_p(vdev, &vid);
1126 if (s != sizeof(vid)) {
1127 return VIRTIO_NET_ERR;
1130 if (vid >= MAX_VLAN)
1131 return VIRTIO_NET_ERR;
1133 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1134 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1135 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1136 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1137 else
1138 return VIRTIO_NET_ERR;
1140 rxfilter_notify(nc);
1142 return VIRTIO_NET_OK;
1145 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1146 struct iovec *iov, unsigned int iov_cnt)
1148 trace_virtio_net_handle_announce(n->announce_timer.round);
1149 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1150 n->status & VIRTIO_NET_S_ANNOUNCE) {
1151 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1152 if (n->announce_timer.round) {
1153 qemu_announce_timer_step(&n->announce_timer);
1155 return VIRTIO_NET_OK;
1156 } else {
1157 return VIRTIO_NET_ERR;
1161 static void virtio_net_disable_rss(VirtIONet *n)
1163 if (n->rss_data.enabled) {
1164 trace_virtio_net_rss_disable();
1166 n->rss_data.enabled = false;
1169 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1170 struct iovec *iov,
1171 unsigned int iov_cnt,
1172 bool do_rss)
1174 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1175 struct virtio_net_rss_config cfg;
1176 size_t s, offset = 0, size_get;
1177 uint16_t queues, i;
1178 struct {
1179 uint16_t us;
1180 uint8_t b;
1181 } QEMU_PACKED temp;
1182 const char *err_msg = "";
1183 uint32_t err_value = 0;
1185 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1186 err_msg = "RSS is not negotiated";
1187 goto error;
1189 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1190 err_msg = "Hash report is not negotiated";
1191 goto error;
1193 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1194 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1195 if (s != size_get) {
1196 err_msg = "Short command buffer";
1197 err_value = (uint32_t)s;
1198 goto error;
1200 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1201 n->rss_data.indirections_len =
1202 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1203 n->rss_data.indirections_len++;
1204 if (!do_rss) {
1205 n->rss_data.indirections_len = 1;
1207 if (!is_power_of_2(n->rss_data.indirections_len)) {
1208 err_msg = "Invalid size of indirection table";
1209 err_value = n->rss_data.indirections_len;
1210 goto error;
1212 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1213 err_msg = "Too large indirection table";
1214 err_value = n->rss_data.indirections_len;
1215 goto error;
1217 n->rss_data.default_queue = do_rss ?
1218 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1219 if (n->rss_data.default_queue >= n->max_queues) {
1220 err_msg = "Invalid default queue";
1221 err_value = n->rss_data.default_queue;
1222 goto error;
1224 offset += size_get;
1225 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1226 g_free(n->rss_data.indirections_table);
1227 n->rss_data.indirections_table = g_malloc(size_get);
1228 if (!n->rss_data.indirections_table) {
1229 err_msg = "Can't allocate indirections table";
1230 err_value = n->rss_data.indirections_len;
1231 goto error;
1233 s = iov_to_buf(iov, iov_cnt, offset,
1234 n->rss_data.indirections_table, size_get);
1235 if (s != size_get) {
1236 err_msg = "Short indirection table buffer";
1237 err_value = (uint32_t)s;
1238 goto error;
1240 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1241 uint16_t val = n->rss_data.indirections_table[i];
1242 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1244 offset += size_get;
1245 size_get = sizeof(temp);
1246 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1247 if (s != size_get) {
1248 err_msg = "Can't get queues";
1249 err_value = (uint32_t)s;
1250 goto error;
1252 queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1253 if (queues == 0 || queues > n->max_queues) {
1254 err_msg = "Invalid number of queues";
1255 err_value = queues;
1256 goto error;
1258 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1259 err_msg = "Invalid key size";
1260 err_value = temp.b;
1261 goto error;
1263 if (!temp.b && n->rss_data.hash_types) {
1264 err_msg = "No key provided";
1265 err_value = 0;
1266 goto error;
1268 if (!temp.b && !n->rss_data.hash_types) {
1269 virtio_net_disable_rss(n);
1270 return queues;
1272 offset += size_get;
1273 size_get = temp.b;
1274 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1275 if (s != size_get) {
1276 err_msg = "Can get key buffer";
1277 err_value = (uint32_t)s;
1278 goto error;
1280 n->rss_data.enabled = true;
1281 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1282 n->rss_data.indirections_len,
1283 temp.b);
1284 return queues;
1285 error:
1286 trace_virtio_net_rss_error(err_msg, err_value);
1287 virtio_net_disable_rss(n);
1288 return 0;
1291 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1292 struct iovec *iov, unsigned int iov_cnt)
1294 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1295 uint16_t queues;
1297 virtio_net_disable_rss(n);
1298 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1299 queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1300 return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1302 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1303 queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1304 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1305 struct virtio_net_ctrl_mq mq;
1306 size_t s;
1307 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1308 return VIRTIO_NET_ERR;
1310 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1311 if (s != sizeof(mq)) {
1312 return VIRTIO_NET_ERR;
1314 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1316 } else {
1317 return VIRTIO_NET_ERR;
1320 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1321 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1322 queues > n->max_queues ||
1323 !n->multiqueue) {
1324 return VIRTIO_NET_ERR;
1327 n->curr_queues = queues;
1328 /* stop the backend before changing the number of queues to avoid handling a
1329 * disabled queue */
1330 virtio_net_set_status(vdev, vdev->status);
1331 virtio_net_set_queues(n);
1333 return VIRTIO_NET_OK;
1336 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1338 VirtIONet *n = VIRTIO_NET(vdev);
1339 struct virtio_net_ctrl_hdr ctrl;
1340 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1341 VirtQueueElement *elem;
1342 size_t s;
1343 struct iovec *iov, *iov2;
1344 unsigned int iov_cnt;
1346 for (;;) {
1347 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1348 if (!elem) {
1349 break;
1351 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1352 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1353 virtio_error(vdev, "virtio-net ctrl missing headers");
1354 virtqueue_detach_element(vq, elem, 0);
1355 g_free(elem);
1356 break;
1359 iov_cnt = elem->out_num;
1360 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1361 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1362 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1363 if (s != sizeof(ctrl)) {
1364 status = VIRTIO_NET_ERR;
1365 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1366 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1367 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1368 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1369 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1370 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1371 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1372 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1373 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1374 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1375 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1376 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1379 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1380 assert(s == sizeof(status));
1382 virtqueue_push(vq, elem, sizeof(status));
1383 virtio_notify(vdev, vq);
1384 g_free(iov2);
1385 g_free(elem);
1389 /* RX */
1391 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1393 VirtIONet *n = VIRTIO_NET(vdev);
1394 int queue_index = vq2q(virtio_get_queue_index(vq));
1396 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1399 static bool virtio_net_can_receive(NetClientState *nc)
1401 VirtIONet *n = qemu_get_nic_opaque(nc);
1402 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1403 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1405 if (!vdev->vm_running) {
1406 return false;
1409 if (nc->queue_index >= n->curr_queues) {
1410 return false;
1413 if (!virtio_queue_ready(q->rx_vq) ||
1414 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1415 return false;
1418 return true;
1421 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1423 VirtIONet *n = q->n;
1424 if (virtio_queue_empty(q->rx_vq) ||
1425 (n->mergeable_rx_bufs &&
1426 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1427 virtio_queue_set_notification(q->rx_vq, 1);
1429 /* To avoid a race condition where the guest has made some buffers
1430 * available after the above check but before notification was
1431 * enabled, check for available buffers again.
1433 if (virtio_queue_empty(q->rx_vq) ||
1434 (n->mergeable_rx_bufs &&
1435 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1436 return 0;
1440 virtio_queue_set_notification(q->rx_vq, 0);
1441 return 1;
1444 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1446 virtio_tswap16s(vdev, &hdr->hdr_len);
1447 virtio_tswap16s(vdev, &hdr->gso_size);
1448 virtio_tswap16s(vdev, &hdr->csum_start);
1449 virtio_tswap16s(vdev, &hdr->csum_offset);
1452 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1453 * it never finds out that the packets don't have valid checksums. This
1454 * causes dhclient to get upset. Fedora's carried a patch for ages to
1455 * fix this with Xen but it hasn't appeared in an upstream release of
1456 * dhclient yet.
1458 * To avoid breaking existing guests, we catch udp packets and add
1459 * checksums. This is terrible but it's better than hacking the guest
1460 * kernels.
1462 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1463 * we should provide a mechanism to disable it to avoid polluting the host
1464 * cache.
1466 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1467 uint8_t *buf, size_t size)
1469 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1470 (size > 27 && size < 1500) && /* normal sized MTU */
1471 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1472 (buf[23] == 17) && /* ip.protocol == UDP */
1473 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1474 net_checksum_calculate(buf, size);
1475 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1479 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1480 const void *buf, size_t size)
1482 if (n->has_vnet_hdr) {
1483 /* FIXME this cast is evil */
1484 void *wbuf = (void *)buf;
1485 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1486 size - n->host_hdr_len);
1488 if (n->needs_vnet_hdr_swap) {
1489 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1491 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1492 } else {
1493 struct virtio_net_hdr hdr = {
1494 .flags = 0,
1495 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1497 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1501 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1503 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1504 static const uint8_t vlan[] = {0x81, 0x00};
1505 uint8_t *ptr = (uint8_t *)buf;
1506 int i;
1508 if (n->promisc)
1509 return 1;
1511 ptr += n->host_hdr_len;
1513 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1514 int vid = lduw_be_p(ptr + 14) & 0xfff;
1515 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1516 return 0;
1519 if (ptr[0] & 1) { // multicast
1520 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1521 return !n->nobcast;
1522 } else if (n->nomulti) {
1523 return 0;
1524 } else if (n->allmulti || n->mac_table.multi_overflow) {
1525 return 1;
1528 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1529 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1530 return 1;
1533 } else { // unicast
1534 if (n->nouni) {
1535 return 0;
1536 } else if (n->alluni || n->mac_table.uni_overflow) {
1537 return 1;
1538 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1539 return 1;
1542 for (i = 0; i < n->mac_table.first_multi; i++) {
1543 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1544 return 1;
1549 return 0;
1552 static uint8_t virtio_net_get_hash_type(bool isip4,
1553 bool isip6,
1554 bool isudp,
1555 bool istcp,
1556 uint32_t types)
1558 if (isip4) {
1559 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1560 return NetPktRssIpV4Tcp;
1562 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1563 return NetPktRssIpV4Udp;
1565 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1566 return NetPktRssIpV4;
1568 } else if (isip6) {
1569 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1570 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1572 if (istcp && (types & mask)) {
1573 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1574 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1576 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1577 if (isudp && (types & mask)) {
1578 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1579 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1581 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1582 if (types & mask) {
1583 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1584 NetPktRssIpV6Ex : NetPktRssIpV6;
1587 return 0xff;
1590 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1591 uint32_t hash)
1593 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1594 hdr->hash_value = hash;
1595 hdr->hash_report = report;
1598 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1599 size_t size)
1601 VirtIONet *n = qemu_get_nic_opaque(nc);
1602 unsigned int index = nc->queue_index, new_index = index;
1603 struct NetRxPkt *pkt = n->rx_pkt;
1604 uint8_t net_hash_type;
1605 uint32_t hash;
1606 bool isip4, isip6, isudp, istcp;
1607 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1608 VIRTIO_NET_HASH_REPORT_IPv4,
1609 VIRTIO_NET_HASH_REPORT_TCPv4,
1610 VIRTIO_NET_HASH_REPORT_TCPv6,
1611 VIRTIO_NET_HASH_REPORT_IPv6,
1612 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1613 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1614 VIRTIO_NET_HASH_REPORT_UDPv4,
1615 VIRTIO_NET_HASH_REPORT_UDPv6,
1616 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1619 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1620 size - n->host_hdr_len);
1621 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1622 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1623 istcp = isudp = false;
1625 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1626 istcp = isudp = false;
1628 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1629 n->rss_data.hash_types);
1630 if (net_hash_type > NetPktRssIpV6UdpEx) {
1631 if (n->rss_data.populate_hash) {
1632 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1634 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1637 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1639 if (n->rss_data.populate_hash) {
1640 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1643 if (n->rss_data.redirect) {
1644 new_index = hash & (n->rss_data.indirections_len - 1);
1645 new_index = n->rss_data.indirections_table[new_index];
1648 return (index == new_index) ? -1 : new_index;
1651 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1652 size_t size, bool no_rss)
1654 VirtIONet *n = qemu_get_nic_opaque(nc);
1655 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1656 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1657 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1658 struct virtio_net_hdr_mrg_rxbuf mhdr;
1659 unsigned mhdr_cnt = 0;
1660 size_t offset, i, guest_offset;
1662 if (!virtio_net_can_receive(nc)) {
1663 return -1;
1666 if (!no_rss && n->rss_data.enabled) {
1667 int index = virtio_net_process_rss(nc, buf, size);
1668 if (index >= 0) {
1669 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1670 return virtio_net_receive_rcu(nc2, buf, size, true);
1674 /* hdr_len refers to the header we supply to the guest */
1675 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1676 return 0;
1679 if (!receive_filter(n, buf, size))
1680 return size;
1682 offset = i = 0;
1684 while (offset < size) {
1685 VirtQueueElement *elem;
1686 int len, total;
1687 const struct iovec *sg;
1689 total = 0;
1691 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1692 if (!elem) {
1693 if (i) {
1694 virtio_error(vdev, "virtio-net unexpected empty queue: "
1695 "i %zd mergeable %d offset %zd, size %zd, "
1696 "guest hdr len %zd, host hdr len %zd "
1697 "guest features 0x%" PRIx64,
1698 i, n->mergeable_rx_bufs, offset, size,
1699 n->guest_hdr_len, n->host_hdr_len,
1700 vdev->guest_features);
1702 return -1;
1705 if (elem->in_num < 1) {
1706 virtio_error(vdev,
1707 "virtio-net receive queue contains no in buffers");
1708 virtqueue_detach_element(q->rx_vq, elem, 0);
1709 g_free(elem);
1710 return -1;
1713 sg = elem->in_sg;
1714 if (i == 0) {
1715 assert(offset == 0);
1716 if (n->mergeable_rx_bufs) {
1717 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1718 sg, elem->in_num,
1719 offsetof(typeof(mhdr), num_buffers),
1720 sizeof(mhdr.num_buffers));
1723 receive_header(n, sg, elem->in_num, buf, size);
1724 if (n->rss_data.populate_hash) {
1725 offset = sizeof(mhdr);
1726 iov_from_buf(sg, elem->in_num, offset,
1727 buf + offset, n->host_hdr_len - sizeof(mhdr));
1729 offset = n->host_hdr_len;
1730 total += n->guest_hdr_len;
1731 guest_offset = n->guest_hdr_len;
1732 } else {
1733 guest_offset = 0;
1736 /* copy in packet. ugh */
1737 len = iov_from_buf(sg, elem->in_num, guest_offset,
1738 buf + offset, size - offset);
1739 total += len;
1740 offset += len;
1741 /* If buffers can't be merged, at this point we
1742 * must have consumed the complete packet.
1743 * Otherwise, drop it. */
1744 if (!n->mergeable_rx_bufs && offset < size) {
1745 virtqueue_unpop(q->rx_vq, elem, total);
1746 g_free(elem);
1747 return size;
1750 /* signal other side */
1751 virtqueue_fill(q->rx_vq, elem, total, i++);
1752 g_free(elem);
1755 if (mhdr_cnt) {
1756 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1757 iov_from_buf(mhdr_sg, mhdr_cnt,
1759 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1762 virtqueue_flush(q->rx_vq, i);
1763 virtio_notify(vdev, q->rx_vq);
1765 return size;
1768 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1769 size_t size)
1771 RCU_READ_LOCK_GUARD();
1773 return virtio_net_receive_rcu(nc, buf, size, false);
1776 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1777 const uint8_t *buf,
1778 VirtioNetRscUnit *unit)
1780 uint16_t ip_hdrlen;
1781 struct ip_header *ip;
1783 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1784 + sizeof(struct eth_header));
1785 unit->ip = (void *)ip;
1786 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1787 unit->ip_plen = &ip->ip_len;
1788 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1789 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1790 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1793 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1794 const uint8_t *buf,
1795 VirtioNetRscUnit *unit)
1797 struct ip6_header *ip6;
1799 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1800 + sizeof(struct eth_header));
1801 unit->ip = ip6;
1802 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1803 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1804 + sizeof(struct ip6_header));
1805 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1807 /* There is a difference between payload lenght in ipv4 and v6,
1808 ip header is excluded in ipv6 */
1809 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1812 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1813 VirtioNetRscSeg *seg)
1815 int ret;
1816 struct virtio_net_hdr_v1 *h;
1818 h = (struct virtio_net_hdr_v1 *)seg->buf;
1819 h->flags = 0;
1820 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1822 if (seg->is_coalesced) {
1823 h->rsc.segments = seg->packets;
1824 h->rsc.dup_acks = seg->dup_ack;
1825 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1826 if (chain->proto == ETH_P_IP) {
1827 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1828 } else {
1829 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1833 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1834 QTAILQ_REMOVE(&chain->buffers, seg, next);
1835 g_free(seg->buf);
1836 g_free(seg);
1838 return ret;
1841 static void virtio_net_rsc_purge(void *opq)
1843 VirtioNetRscSeg *seg, *rn;
1844 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1846 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1847 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1848 chain->stat.purge_failed++;
1849 continue;
1853 chain->stat.timer++;
1854 if (!QTAILQ_EMPTY(&chain->buffers)) {
1855 timer_mod(chain->drain_timer,
1856 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1860 static void virtio_net_rsc_cleanup(VirtIONet *n)
1862 VirtioNetRscChain *chain, *rn_chain;
1863 VirtioNetRscSeg *seg, *rn_seg;
1865 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1866 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1867 QTAILQ_REMOVE(&chain->buffers, seg, next);
1868 g_free(seg->buf);
1869 g_free(seg);
1872 timer_del(chain->drain_timer);
1873 timer_free(chain->drain_timer);
1874 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1875 g_free(chain);
1879 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1880 NetClientState *nc,
1881 const uint8_t *buf, size_t size)
1883 uint16_t hdr_len;
1884 VirtioNetRscSeg *seg;
1886 hdr_len = chain->n->guest_hdr_len;
1887 seg = g_malloc(sizeof(VirtioNetRscSeg));
1888 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1889 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1890 memcpy(seg->buf, buf, size);
1891 seg->size = size;
1892 seg->packets = 1;
1893 seg->dup_ack = 0;
1894 seg->is_coalesced = 0;
1895 seg->nc = nc;
1897 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1898 chain->stat.cache++;
1900 switch (chain->proto) {
1901 case ETH_P_IP:
1902 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1903 break;
1904 case ETH_P_IPV6:
1905 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1906 break;
1907 default:
1908 g_assert_not_reached();
1912 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1913 VirtioNetRscSeg *seg,
1914 const uint8_t *buf,
1915 struct tcp_header *n_tcp,
1916 struct tcp_header *o_tcp)
1918 uint32_t nack, oack;
1919 uint16_t nwin, owin;
1921 nack = htonl(n_tcp->th_ack);
1922 nwin = htons(n_tcp->th_win);
1923 oack = htonl(o_tcp->th_ack);
1924 owin = htons(o_tcp->th_win);
1926 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1927 chain->stat.ack_out_of_win++;
1928 return RSC_FINAL;
1929 } else if (nack == oack) {
1930 /* duplicated ack or window probe */
1931 if (nwin == owin) {
1932 /* duplicated ack, add dup ack count due to whql test up to 1 */
1933 chain->stat.dup_ack++;
1934 return RSC_FINAL;
1935 } else {
1936 /* Coalesce window update */
1937 o_tcp->th_win = n_tcp->th_win;
1938 chain->stat.win_update++;
1939 return RSC_COALESCE;
1941 } else {
1942 /* pure ack, go to 'C', finalize*/
1943 chain->stat.pure_ack++;
1944 return RSC_FINAL;
1948 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1949 VirtioNetRscSeg *seg,
1950 const uint8_t *buf,
1951 VirtioNetRscUnit *n_unit)
1953 void *data;
1954 uint16_t o_ip_len;
1955 uint32_t nseq, oseq;
1956 VirtioNetRscUnit *o_unit;
1958 o_unit = &seg->unit;
1959 o_ip_len = htons(*o_unit->ip_plen);
1960 nseq = htonl(n_unit->tcp->th_seq);
1961 oseq = htonl(o_unit->tcp->th_seq);
1963 /* out of order or retransmitted. */
1964 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1965 chain->stat.data_out_of_win++;
1966 return RSC_FINAL;
1969 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1970 if (nseq == oseq) {
1971 if ((o_unit->payload == 0) && n_unit->payload) {
1972 /* From no payload to payload, normal case, not a dup ack or etc */
1973 chain->stat.data_after_pure_ack++;
1974 goto coalesce;
1975 } else {
1976 return virtio_net_rsc_handle_ack(chain, seg, buf,
1977 n_unit->tcp, o_unit->tcp);
1979 } else if ((nseq - oseq) != o_unit->payload) {
1980 /* Not a consistent packet, out of order */
1981 chain->stat.data_out_of_order++;
1982 return RSC_FINAL;
1983 } else {
1984 coalesce:
1985 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1986 chain->stat.over_size++;
1987 return RSC_FINAL;
1990 /* Here comes the right data, the payload length in v4/v6 is different,
1991 so use the field value to update and record the new data len */
1992 o_unit->payload += n_unit->payload; /* update new data len */
1994 /* update field in ip header */
1995 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1997 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1998 for windows guest, while this may change the behavior for linux
1999 guest (only if it uses RSC feature). */
2000 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2002 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2003 o_unit->tcp->th_win = n_unit->tcp->th_win;
2005 memmove(seg->buf + seg->size, data, n_unit->payload);
2006 seg->size += n_unit->payload;
2007 seg->packets++;
2008 chain->stat.coalesced++;
2009 return RSC_COALESCE;
2013 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2014 VirtioNetRscSeg *seg,
2015 const uint8_t *buf, size_t size,
2016 VirtioNetRscUnit *unit)
2018 struct ip_header *ip1, *ip2;
2020 ip1 = (struct ip_header *)(unit->ip);
2021 ip2 = (struct ip_header *)(seg->unit.ip);
2022 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2023 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2024 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2025 chain->stat.no_match++;
2026 return RSC_NO_MATCH;
2029 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2032 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2033 VirtioNetRscSeg *seg,
2034 const uint8_t *buf, size_t size,
2035 VirtioNetRscUnit *unit)
2037 struct ip6_header *ip1, *ip2;
2039 ip1 = (struct ip6_header *)(unit->ip);
2040 ip2 = (struct ip6_header *)(seg->unit.ip);
2041 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2042 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2043 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2044 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2045 chain->stat.no_match++;
2046 return RSC_NO_MATCH;
2049 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2052 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2053 * to prevent out of order */
2054 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2055 struct tcp_header *tcp)
2057 uint16_t tcp_hdr;
2058 uint16_t tcp_flag;
2060 tcp_flag = htons(tcp->th_offset_flags);
2061 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2062 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2063 if (tcp_flag & TH_SYN) {
2064 chain->stat.tcp_syn++;
2065 return RSC_BYPASS;
2068 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2069 chain->stat.tcp_ctrl_drain++;
2070 return RSC_FINAL;
2073 if (tcp_hdr > sizeof(struct tcp_header)) {
2074 chain->stat.tcp_all_opt++;
2075 return RSC_FINAL;
2078 return RSC_CANDIDATE;
2081 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2082 NetClientState *nc,
2083 const uint8_t *buf, size_t size,
2084 VirtioNetRscUnit *unit)
2086 int ret;
2087 VirtioNetRscSeg *seg, *nseg;
2089 if (QTAILQ_EMPTY(&chain->buffers)) {
2090 chain->stat.empty_cache++;
2091 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2092 timer_mod(chain->drain_timer,
2093 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2094 return size;
2097 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2098 if (chain->proto == ETH_P_IP) {
2099 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2100 } else {
2101 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2104 if (ret == RSC_FINAL) {
2105 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2106 /* Send failed */
2107 chain->stat.final_failed++;
2108 return 0;
2111 /* Send current packet */
2112 return virtio_net_do_receive(nc, buf, size);
2113 } else if (ret == RSC_NO_MATCH) {
2114 continue;
2115 } else {
2116 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2117 seg->is_coalesced = 1;
2118 return size;
2122 chain->stat.no_match_cache++;
2123 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2124 return size;
2127 /* Drain a connection data, this is to avoid out of order segments */
2128 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2129 NetClientState *nc,
2130 const uint8_t *buf, size_t size,
2131 uint16_t ip_start, uint16_t ip_size,
2132 uint16_t tcp_port)
2134 VirtioNetRscSeg *seg, *nseg;
2135 uint32_t ppair1, ppair2;
2137 ppair1 = *(uint32_t *)(buf + tcp_port);
2138 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2139 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2140 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2141 || (ppair1 != ppair2)) {
2142 continue;
2144 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2145 chain->stat.drain_failed++;
2148 break;
2151 return virtio_net_do_receive(nc, buf, size);
2154 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2155 struct ip_header *ip,
2156 const uint8_t *buf, size_t size)
2158 uint16_t ip_len;
2160 /* Not an ipv4 packet */
2161 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2162 chain->stat.ip_option++;
2163 return RSC_BYPASS;
2166 /* Don't handle packets with ip option */
2167 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2168 chain->stat.ip_option++;
2169 return RSC_BYPASS;
2172 if (ip->ip_p != IPPROTO_TCP) {
2173 chain->stat.bypass_not_tcp++;
2174 return RSC_BYPASS;
2177 /* Don't handle packets with ip fragment */
2178 if (!(htons(ip->ip_off) & IP_DF)) {
2179 chain->stat.ip_frag++;
2180 return RSC_BYPASS;
2183 /* Don't handle packets with ecn flag */
2184 if (IPTOS_ECN(ip->ip_tos)) {
2185 chain->stat.ip_ecn++;
2186 return RSC_BYPASS;
2189 ip_len = htons(ip->ip_len);
2190 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2191 || ip_len > (size - chain->n->guest_hdr_len -
2192 sizeof(struct eth_header))) {
2193 chain->stat.ip_hacked++;
2194 return RSC_BYPASS;
2197 return RSC_CANDIDATE;
2200 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2201 NetClientState *nc,
2202 const uint8_t *buf, size_t size)
2204 int32_t ret;
2205 uint16_t hdr_len;
2206 VirtioNetRscUnit unit;
2208 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2210 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2211 + sizeof(struct tcp_header))) {
2212 chain->stat.bypass_not_tcp++;
2213 return virtio_net_do_receive(nc, buf, size);
2216 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2217 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2218 != RSC_CANDIDATE) {
2219 return virtio_net_do_receive(nc, buf, size);
2222 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2223 if (ret == RSC_BYPASS) {
2224 return virtio_net_do_receive(nc, buf, size);
2225 } else if (ret == RSC_FINAL) {
2226 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2227 ((hdr_len + sizeof(struct eth_header)) + 12),
2228 VIRTIO_NET_IP4_ADDR_SIZE,
2229 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2232 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2235 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2236 struct ip6_header *ip6,
2237 const uint8_t *buf, size_t size)
2239 uint16_t ip_len;
2241 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2242 != IP_HEADER_VERSION_6) {
2243 return RSC_BYPASS;
2246 /* Both option and protocol is checked in this */
2247 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2248 chain->stat.bypass_not_tcp++;
2249 return RSC_BYPASS;
2252 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2253 if (ip_len < sizeof(struct tcp_header) ||
2254 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2255 - sizeof(struct ip6_header))) {
2256 chain->stat.ip_hacked++;
2257 return RSC_BYPASS;
2260 /* Don't handle packets with ecn flag */
2261 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2262 chain->stat.ip_ecn++;
2263 return RSC_BYPASS;
2266 return RSC_CANDIDATE;
2269 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2270 const uint8_t *buf, size_t size)
2272 int32_t ret;
2273 uint16_t hdr_len;
2274 VirtioNetRscChain *chain;
2275 VirtioNetRscUnit unit;
2277 chain = (VirtioNetRscChain *)opq;
2278 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2280 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2281 + sizeof(tcp_header))) {
2282 return virtio_net_do_receive(nc, buf, size);
2285 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2286 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2287 unit.ip, buf, size)) {
2288 return virtio_net_do_receive(nc, buf, size);
2291 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2292 if (ret == RSC_BYPASS) {
2293 return virtio_net_do_receive(nc, buf, size);
2294 } else if (ret == RSC_FINAL) {
2295 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2296 ((hdr_len + sizeof(struct eth_header)) + 8),
2297 VIRTIO_NET_IP6_ADDR_SIZE,
2298 hdr_len + sizeof(struct eth_header)
2299 + sizeof(struct ip6_header));
2302 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2305 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2306 NetClientState *nc,
2307 uint16_t proto)
2309 VirtioNetRscChain *chain;
2311 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2312 return NULL;
2315 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2316 if (chain->proto == proto) {
2317 return chain;
2321 chain = g_malloc(sizeof(*chain));
2322 chain->n = n;
2323 chain->proto = proto;
2324 if (proto == (uint16_t)ETH_P_IP) {
2325 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2326 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2327 } else {
2328 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2329 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2331 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2332 virtio_net_rsc_purge, chain);
2333 memset(&chain->stat, 0, sizeof(chain->stat));
2335 QTAILQ_INIT(&chain->buffers);
2336 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2338 return chain;
2341 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2342 const uint8_t *buf,
2343 size_t size)
2345 uint16_t proto;
2346 VirtioNetRscChain *chain;
2347 struct eth_header *eth;
2348 VirtIONet *n;
2350 n = qemu_get_nic_opaque(nc);
2351 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2352 return virtio_net_do_receive(nc, buf, size);
2355 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2356 proto = htons(eth->h_proto);
2358 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2359 if (chain) {
2360 chain->stat.received++;
2361 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2362 return virtio_net_rsc_receive4(chain, nc, buf, size);
2363 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2364 return virtio_net_rsc_receive6(chain, nc, buf, size);
2367 return virtio_net_do_receive(nc, buf, size);
2370 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2371 size_t size)
2373 VirtIONet *n = qemu_get_nic_opaque(nc);
2374 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2375 return virtio_net_rsc_receive(nc, buf, size);
2376 } else {
2377 return virtio_net_do_receive(nc, buf, size);
2381 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2383 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2385 VirtIONet *n = qemu_get_nic_opaque(nc);
2386 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2387 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2389 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2390 virtio_notify(vdev, q->tx_vq);
2392 g_free(q->async_tx.elem);
2393 q->async_tx.elem = NULL;
2395 virtio_queue_set_notification(q->tx_vq, 1);
2396 virtio_net_flush_tx(q);
2399 /* TX */
2400 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2402 VirtIONet *n = q->n;
2403 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2404 VirtQueueElement *elem;
2405 int32_t num_packets = 0;
2406 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2407 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2408 return num_packets;
2411 if (q->async_tx.elem) {
2412 virtio_queue_set_notification(q->tx_vq, 0);
2413 return num_packets;
2416 for (;;) {
2417 ssize_t ret;
2418 unsigned int out_num;
2419 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2420 struct virtio_net_hdr_mrg_rxbuf mhdr;
2422 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2423 if (!elem) {
2424 break;
2427 out_num = elem->out_num;
2428 out_sg = elem->out_sg;
2429 if (out_num < 1) {
2430 virtio_error(vdev, "virtio-net header not in first element");
2431 virtqueue_detach_element(q->tx_vq, elem, 0);
2432 g_free(elem);
2433 return -EINVAL;
2436 if (n->has_vnet_hdr) {
2437 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2438 n->guest_hdr_len) {
2439 virtio_error(vdev, "virtio-net header incorrect");
2440 virtqueue_detach_element(q->tx_vq, elem, 0);
2441 g_free(elem);
2442 return -EINVAL;
2444 if (n->needs_vnet_hdr_swap) {
2445 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2446 sg2[0].iov_base = &mhdr;
2447 sg2[0].iov_len = n->guest_hdr_len;
2448 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2449 out_sg, out_num,
2450 n->guest_hdr_len, -1);
2451 if (out_num == VIRTQUEUE_MAX_SIZE) {
2452 goto drop;
2454 out_num += 1;
2455 out_sg = sg2;
2459 * If host wants to see the guest header as is, we can
2460 * pass it on unchanged. Otherwise, copy just the parts
2461 * that host is interested in.
2463 assert(n->host_hdr_len <= n->guest_hdr_len);
2464 if (n->host_hdr_len != n->guest_hdr_len) {
2465 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2466 out_sg, out_num,
2467 0, n->host_hdr_len);
2468 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2469 out_sg, out_num,
2470 n->guest_hdr_len, -1);
2471 out_num = sg_num;
2472 out_sg = sg;
2475 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2476 out_sg, out_num, virtio_net_tx_complete);
2477 if (ret == 0) {
2478 virtio_queue_set_notification(q->tx_vq, 0);
2479 q->async_tx.elem = elem;
2480 return -EBUSY;
2483 drop:
2484 virtqueue_push(q->tx_vq, elem, 0);
2485 virtio_notify(vdev, q->tx_vq);
2486 g_free(elem);
2488 if (++num_packets >= n->tx_burst) {
2489 break;
2492 return num_packets;
2495 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2497 VirtIONet *n = VIRTIO_NET(vdev);
2498 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2500 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2501 virtio_net_drop_tx_queue_data(vdev, vq);
2502 return;
2505 /* This happens when device was stopped but VCPU wasn't. */
2506 if (!vdev->vm_running) {
2507 q->tx_waiting = 1;
2508 return;
2511 if (q->tx_waiting) {
2512 virtio_queue_set_notification(vq, 1);
2513 timer_del(q->tx_timer);
2514 q->tx_waiting = 0;
2515 if (virtio_net_flush_tx(q) == -EINVAL) {
2516 return;
2518 } else {
2519 timer_mod(q->tx_timer,
2520 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2521 q->tx_waiting = 1;
2522 virtio_queue_set_notification(vq, 0);
2526 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2528 VirtIONet *n = VIRTIO_NET(vdev);
2529 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2531 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2532 virtio_net_drop_tx_queue_data(vdev, vq);
2533 return;
2536 if (unlikely(q->tx_waiting)) {
2537 return;
2539 q->tx_waiting = 1;
2540 /* This happens when device was stopped but VCPU wasn't. */
2541 if (!vdev->vm_running) {
2542 return;
2544 virtio_queue_set_notification(vq, 0);
2545 qemu_bh_schedule(q->tx_bh);
2548 static void virtio_net_tx_timer(void *opaque)
2550 VirtIONetQueue *q = opaque;
2551 VirtIONet *n = q->n;
2552 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2553 /* This happens when device was stopped but BH wasn't. */
2554 if (!vdev->vm_running) {
2555 /* Make sure tx waiting is set, so we'll run when restarted. */
2556 assert(q->tx_waiting);
2557 return;
2560 q->tx_waiting = 0;
2562 /* Just in case the driver is not ready on more */
2563 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2564 return;
2567 virtio_queue_set_notification(q->tx_vq, 1);
2568 virtio_net_flush_tx(q);
2571 static void virtio_net_tx_bh(void *opaque)
2573 VirtIONetQueue *q = opaque;
2574 VirtIONet *n = q->n;
2575 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2576 int32_t ret;
2578 /* This happens when device was stopped but BH wasn't. */
2579 if (!vdev->vm_running) {
2580 /* Make sure tx waiting is set, so we'll run when restarted. */
2581 assert(q->tx_waiting);
2582 return;
2585 q->tx_waiting = 0;
2587 /* Just in case the driver is not ready on more */
2588 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2589 return;
2592 ret = virtio_net_flush_tx(q);
2593 if (ret == -EBUSY || ret == -EINVAL) {
2594 return; /* Notification re-enable handled by tx_complete or device
2595 * broken */
2598 /* If we flush a full burst of packets, assume there are
2599 * more coming and immediately reschedule */
2600 if (ret >= n->tx_burst) {
2601 qemu_bh_schedule(q->tx_bh);
2602 q->tx_waiting = 1;
2603 return;
2606 /* If less than a full burst, re-enable notification and flush
2607 * anything that may have come in while we weren't looking. If
2608 * we find something, assume the guest is still active and reschedule */
2609 virtio_queue_set_notification(q->tx_vq, 1);
2610 ret = virtio_net_flush_tx(q);
2611 if (ret == -EINVAL) {
2612 return;
2613 } else if (ret > 0) {
2614 virtio_queue_set_notification(q->tx_vq, 0);
2615 qemu_bh_schedule(q->tx_bh);
2616 q->tx_waiting = 1;
2620 static void virtio_net_add_queue(VirtIONet *n, int index)
2622 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2624 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2625 virtio_net_handle_rx);
2627 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2628 n->vqs[index].tx_vq =
2629 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2630 virtio_net_handle_tx_timer);
2631 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2632 virtio_net_tx_timer,
2633 &n->vqs[index]);
2634 } else {
2635 n->vqs[index].tx_vq =
2636 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2637 virtio_net_handle_tx_bh);
2638 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2641 n->vqs[index].tx_waiting = 0;
2642 n->vqs[index].n = n;
2645 static void virtio_net_del_queue(VirtIONet *n, int index)
2647 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2648 VirtIONetQueue *q = &n->vqs[index];
2649 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2651 qemu_purge_queued_packets(nc);
2653 virtio_del_queue(vdev, index * 2);
2654 if (q->tx_timer) {
2655 timer_del(q->tx_timer);
2656 timer_free(q->tx_timer);
2657 q->tx_timer = NULL;
2658 } else {
2659 qemu_bh_delete(q->tx_bh);
2660 q->tx_bh = NULL;
2662 q->tx_waiting = 0;
2663 virtio_del_queue(vdev, index * 2 + 1);
2666 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2668 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2669 int old_num_queues = virtio_get_num_queues(vdev);
2670 int new_num_queues = new_max_queues * 2 + 1;
2671 int i;
2673 assert(old_num_queues >= 3);
2674 assert(old_num_queues % 2 == 1);
2676 if (old_num_queues == new_num_queues) {
2677 return;
2681 * We always need to remove and add ctrl vq if
2682 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2683 * and then we only enter one of the following two loops.
2685 virtio_del_queue(vdev, old_num_queues - 1);
2687 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2688 /* new_num_queues < old_num_queues */
2689 virtio_net_del_queue(n, i / 2);
2692 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2693 /* new_num_queues > old_num_queues */
2694 virtio_net_add_queue(n, i / 2);
2697 /* add ctrl_vq last */
2698 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2701 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2703 int max = multiqueue ? n->max_queues : 1;
2705 n->multiqueue = multiqueue;
2706 virtio_net_change_num_queues(n, max);
2708 virtio_net_set_queues(n);
2711 static int virtio_net_post_load_device(void *opaque, int version_id)
2713 VirtIONet *n = opaque;
2714 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2715 int i, link_down;
2717 trace_virtio_net_post_load_device();
2718 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2719 virtio_vdev_has_feature(vdev,
2720 VIRTIO_F_VERSION_1),
2721 virtio_vdev_has_feature(vdev,
2722 VIRTIO_NET_F_HASH_REPORT));
2724 /* MAC_TABLE_ENTRIES may be different from the saved image */
2725 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2726 n->mac_table.in_use = 0;
2729 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2730 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2734 * curr_guest_offloads will be later overwritten by the
2735 * virtio_set_features_nocheck call done from the virtio_load.
2736 * Here we make sure it is preserved and restored accordingly
2737 * in the virtio_net_post_load_virtio callback.
2739 n->saved_guest_offloads = n->curr_guest_offloads;
2741 virtio_net_set_queues(n);
2743 /* Find the first multicast entry in the saved MAC filter */
2744 for (i = 0; i < n->mac_table.in_use; i++) {
2745 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2746 break;
2749 n->mac_table.first_multi = i;
2751 /* nc.link_down can't be migrated, so infer link_down according
2752 * to link status bit in n->status */
2753 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2754 for (i = 0; i < n->max_queues; i++) {
2755 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2758 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2759 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2760 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2761 QEMU_CLOCK_VIRTUAL,
2762 virtio_net_announce_timer, n);
2763 if (n->announce_timer.round) {
2764 timer_mod(n->announce_timer.tm,
2765 qemu_clock_get_ms(n->announce_timer.type));
2766 } else {
2767 qemu_announce_timer_del(&n->announce_timer, false);
2771 if (n->rss_data.enabled) {
2772 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2773 n->rss_data.indirections_len,
2774 sizeof(n->rss_data.key));
2775 } else {
2776 trace_virtio_net_rss_disable();
2778 return 0;
2781 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2783 VirtIONet *n = VIRTIO_NET(vdev);
2785 * The actual needed state is now in saved_guest_offloads,
2786 * see virtio_net_post_load_device for detail.
2787 * Restore it back and apply the desired offloads.
2789 n->curr_guest_offloads = n->saved_guest_offloads;
2790 if (peer_has_vnet_hdr(n)) {
2791 virtio_net_apply_guest_offloads(n);
2794 return 0;
2797 /* tx_waiting field of a VirtIONetQueue */
2798 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2799 .name = "virtio-net-queue-tx_waiting",
2800 .fields = (VMStateField[]) {
2801 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2802 VMSTATE_END_OF_LIST()
2806 static bool max_queues_gt_1(void *opaque, int version_id)
2808 return VIRTIO_NET(opaque)->max_queues > 1;
2811 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2813 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2814 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2817 static bool mac_table_fits(void *opaque, int version_id)
2819 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2822 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2824 return !mac_table_fits(opaque, version_id);
2827 /* This temporary type is shared by all the WITH_TMP methods
2828 * although only some fields are used by each.
2830 struct VirtIONetMigTmp {
2831 VirtIONet *parent;
2832 VirtIONetQueue *vqs_1;
2833 uint16_t curr_queues_1;
2834 uint8_t has_ufo;
2835 uint32_t has_vnet_hdr;
2838 /* The 2nd and subsequent tx_waiting flags are loaded later than
2839 * the 1st entry in the queues and only if there's more than one
2840 * entry. We use the tmp mechanism to calculate a temporary
2841 * pointer and count and also validate the count.
2844 static int virtio_net_tx_waiting_pre_save(void *opaque)
2846 struct VirtIONetMigTmp *tmp = opaque;
2848 tmp->vqs_1 = tmp->parent->vqs + 1;
2849 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2850 if (tmp->parent->curr_queues == 0) {
2851 tmp->curr_queues_1 = 0;
2854 return 0;
2857 static int virtio_net_tx_waiting_pre_load(void *opaque)
2859 struct VirtIONetMigTmp *tmp = opaque;
2861 /* Reuse the pointer setup from save */
2862 virtio_net_tx_waiting_pre_save(opaque);
2864 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2865 error_report("virtio-net: curr_queues %x > max_queues %x",
2866 tmp->parent->curr_queues, tmp->parent->max_queues);
2868 return -EINVAL;
2871 return 0; /* all good */
2874 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2875 .name = "virtio-net-tx_waiting",
2876 .pre_load = virtio_net_tx_waiting_pre_load,
2877 .pre_save = virtio_net_tx_waiting_pre_save,
2878 .fields = (VMStateField[]) {
2879 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2880 curr_queues_1,
2881 vmstate_virtio_net_queue_tx_waiting,
2882 struct VirtIONetQueue),
2883 VMSTATE_END_OF_LIST()
2887 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2888 * flag set we need to check that we have it
2890 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2892 struct VirtIONetMigTmp *tmp = opaque;
2894 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2895 error_report("virtio-net: saved image requires TUN_F_UFO support");
2896 return -EINVAL;
2899 return 0;
2902 static int virtio_net_ufo_pre_save(void *opaque)
2904 struct VirtIONetMigTmp *tmp = opaque;
2906 tmp->has_ufo = tmp->parent->has_ufo;
2908 return 0;
2911 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2912 .name = "virtio-net-ufo",
2913 .post_load = virtio_net_ufo_post_load,
2914 .pre_save = virtio_net_ufo_pre_save,
2915 .fields = (VMStateField[]) {
2916 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2917 VMSTATE_END_OF_LIST()
2921 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2922 * flag set we need to check that we have it
2924 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2926 struct VirtIONetMigTmp *tmp = opaque;
2928 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2929 error_report("virtio-net: saved image requires vnet_hdr=on");
2930 return -EINVAL;
2933 return 0;
2936 static int virtio_net_vnet_pre_save(void *opaque)
2938 struct VirtIONetMigTmp *tmp = opaque;
2940 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2942 return 0;
2945 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2946 .name = "virtio-net-vnet",
2947 .post_load = virtio_net_vnet_post_load,
2948 .pre_save = virtio_net_vnet_pre_save,
2949 .fields = (VMStateField[]) {
2950 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2951 VMSTATE_END_OF_LIST()
2955 static bool virtio_net_rss_needed(void *opaque)
2957 return VIRTIO_NET(opaque)->rss_data.enabled;
2960 static const VMStateDescription vmstate_virtio_net_rss = {
2961 .name = "virtio-net-device/rss",
2962 .version_id = 1,
2963 .minimum_version_id = 1,
2964 .needed = virtio_net_rss_needed,
2965 .fields = (VMStateField[]) {
2966 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2967 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2968 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2969 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2970 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2971 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2972 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2973 VIRTIO_NET_RSS_MAX_KEY_SIZE),
2974 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2975 rss_data.indirections_len, 0,
2976 vmstate_info_uint16, uint16_t),
2977 VMSTATE_END_OF_LIST()
2981 static const VMStateDescription vmstate_virtio_net_device = {
2982 .name = "virtio-net-device",
2983 .version_id = VIRTIO_NET_VM_VERSION,
2984 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2985 .post_load = virtio_net_post_load_device,
2986 .fields = (VMStateField[]) {
2987 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2988 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2989 vmstate_virtio_net_queue_tx_waiting,
2990 VirtIONetQueue),
2991 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2992 VMSTATE_UINT16(status, VirtIONet),
2993 VMSTATE_UINT8(promisc, VirtIONet),
2994 VMSTATE_UINT8(allmulti, VirtIONet),
2995 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2997 /* Guarded pair: If it fits we load it, else we throw it away
2998 * - can happen if source has a larger MAC table.; post-load
2999 * sets flags in this case.
3001 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3002 0, mac_table_fits, mac_table.in_use,
3003 ETH_ALEN),
3004 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3005 mac_table.in_use, ETH_ALEN),
3007 /* Note: This is an array of uint32's that's always been saved as a
3008 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3009 * but based on the uint.
3011 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3012 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3013 vmstate_virtio_net_has_vnet),
3014 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3015 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3016 VMSTATE_UINT8(alluni, VirtIONet),
3017 VMSTATE_UINT8(nomulti, VirtIONet),
3018 VMSTATE_UINT8(nouni, VirtIONet),
3019 VMSTATE_UINT8(nobcast, VirtIONet),
3020 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3021 vmstate_virtio_net_has_ufo),
3022 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3023 vmstate_info_uint16_equal, uint16_t),
3024 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3025 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3026 vmstate_virtio_net_tx_waiting),
3027 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3028 has_ctrl_guest_offloads),
3029 VMSTATE_END_OF_LIST()
3031 .subsections = (const VMStateDescription * []) {
3032 &vmstate_virtio_net_rss,
3033 NULL
3037 static NetClientInfo net_virtio_info = {
3038 .type = NET_CLIENT_DRIVER_NIC,
3039 .size = sizeof(NICState),
3040 .can_receive = virtio_net_can_receive,
3041 .receive = virtio_net_receive,
3042 .link_status_changed = virtio_net_set_link_status,
3043 .query_rx_filter = virtio_net_query_rxfilter,
3044 .announce = virtio_net_announce,
3047 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3049 VirtIONet *n = VIRTIO_NET(vdev);
3050 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3051 assert(n->vhost_started);
3052 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3055 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3056 bool mask)
3058 VirtIONet *n = VIRTIO_NET(vdev);
3059 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3060 assert(n->vhost_started);
3061 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3062 vdev, idx, mask);
3065 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3067 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3069 n->config_size = virtio_feature_get_config_size(feature_sizes,
3070 host_features);
3073 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3074 const char *type)
3077 * The name can be NULL, the netclient name will be type.x.
3079 assert(type != NULL);
3081 g_free(n->netclient_name);
3082 g_free(n->netclient_type);
3083 n->netclient_name = g_strdup(name);
3084 n->netclient_type = g_strdup(type);
3087 static bool failover_unplug_primary(VirtIONet *n, DeviceState *dev)
3089 HotplugHandler *hotplug_ctrl;
3090 PCIDevice *pci_dev;
3091 Error *err = NULL;
3093 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3094 if (hotplug_ctrl) {
3095 pci_dev = PCI_DEVICE(dev);
3096 pci_dev->partially_hotplugged = true;
3097 hotplug_handler_unplug_request(hotplug_ctrl, dev, &err);
3098 if (err) {
3099 error_report_err(err);
3100 return false;
3102 } else {
3103 return false;
3105 return true;
3108 static bool failover_replug_primary(VirtIONet *n, DeviceState *dev,
3109 Error **errp)
3111 Error *err = NULL;
3112 HotplugHandler *hotplug_ctrl;
3113 PCIDevice *pdev = PCI_DEVICE(dev);
3114 BusState *primary_bus;
3116 if (!pdev->partially_hotplugged) {
3117 return true;
3119 primary_bus = dev->parent_bus;
3120 if (!primary_bus) {
3121 error_setg(errp, "virtio_net: couldn't find primary bus");
3122 return false;
3124 qdev_set_parent_bus(dev, primary_bus, &error_abort);
3125 qatomic_set(&n->failover_primary_hidden, false);
3126 hotplug_ctrl = qdev_get_hotplug_handler(dev);
3127 if (hotplug_ctrl) {
3128 hotplug_handler_pre_plug(hotplug_ctrl, dev, &err);
3129 if (err) {
3130 goto out;
3132 hotplug_handler_plug(hotplug_ctrl, dev, &err);
3135 out:
3136 error_propagate(errp, err);
3137 return !err;
3140 static void virtio_net_handle_migration_primary(VirtIONet *n, MigrationState *s)
3142 bool should_be_hidden;
3143 Error *err = NULL;
3144 DeviceState *dev = failover_find_primary_device(n);
3146 if (!dev) {
3147 return;
3150 should_be_hidden = qatomic_read(&n->failover_primary_hidden);
3152 if (migration_in_setup(s) && !should_be_hidden) {
3153 if (failover_unplug_primary(n, dev)) {
3154 vmstate_unregister(VMSTATE_IF(dev), qdev_get_vmsd(dev), dev);
3155 qapi_event_send_unplug_primary(dev->id);
3156 qatomic_set(&n->failover_primary_hidden, true);
3157 } else {
3158 warn_report("couldn't unplug primary device");
3160 } else if (migration_has_failed(s)) {
3161 /* We already unplugged the device let's plug it back */
3162 if (!failover_replug_primary(n, dev, &err)) {
3163 if (err) {
3164 error_report_err(err);
3170 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3172 MigrationState *s = data;
3173 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3174 virtio_net_handle_migration_primary(n, s);
3177 static bool failover_hide_primary_device(DeviceListener *listener,
3178 QemuOpts *device_opts)
3180 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3181 const char *standby_id;
3183 if (!device_opts) {
3184 return false;
3186 standby_id = qemu_opt_get(device_opts, "failover_pair_id");
3187 if (g_strcmp0(standby_id, n->netclient_name) != 0) {
3188 return false;
3191 /* failover_primary_hidden is set during feature negotiation */
3192 return qatomic_read(&n->failover_primary_hidden);
3195 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3197 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3198 VirtIONet *n = VIRTIO_NET(dev);
3199 NetClientState *nc;
3200 int i;
3202 if (n->net_conf.mtu) {
3203 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3206 if (n->net_conf.duplex_str) {
3207 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3208 n->net_conf.duplex = DUPLEX_HALF;
3209 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3210 n->net_conf.duplex = DUPLEX_FULL;
3211 } else {
3212 error_setg(errp, "'duplex' must be 'half' or 'full'");
3213 return;
3215 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3216 } else {
3217 n->net_conf.duplex = DUPLEX_UNKNOWN;
3220 if (n->net_conf.speed < SPEED_UNKNOWN) {
3221 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3222 return;
3224 if (n->net_conf.speed >= 0) {
3225 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3228 if (n->failover) {
3229 n->primary_listener.hide_device = failover_hide_primary_device;
3230 qatomic_set(&n->failover_primary_hidden, true);
3231 device_listener_register(&n->primary_listener);
3232 n->migration_state.notify = virtio_net_migration_state_notifier;
3233 add_migration_state_change_notifier(&n->migration_state);
3234 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3237 virtio_net_set_config_size(n, n->host_features);
3238 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3241 * We set a lower limit on RX queue size to what it always was.
3242 * Guests that want a smaller ring can always resize it without
3243 * help from us (using virtio 1 and up).
3245 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3246 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3247 !is_power_of_2(n->net_conf.rx_queue_size)) {
3248 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3249 "must be a power of 2 between %d and %d.",
3250 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3251 VIRTQUEUE_MAX_SIZE);
3252 virtio_cleanup(vdev);
3253 return;
3256 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3257 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3258 !is_power_of_2(n->net_conf.tx_queue_size)) {
3259 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3260 "must be a power of 2 between %d and %d",
3261 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3262 VIRTQUEUE_MAX_SIZE);
3263 virtio_cleanup(vdev);
3264 return;
3267 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3268 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3269 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3270 "must be a positive integer less than %d.",
3271 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3272 virtio_cleanup(vdev);
3273 return;
3275 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3276 n->curr_queues = 1;
3277 n->tx_timeout = n->net_conf.txtimer;
3279 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3280 && strcmp(n->net_conf.tx, "bh")) {
3281 warn_report("virtio-net: "
3282 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3283 n->net_conf.tx);
3284 error_printf("Defaulting to \"bh\"");
3287 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3288 n->net_conf.tx_queue_size);
3290 for (i = 0; i < n->max_queues; i++) {
3291 virtio_net_add_queue(n, i);
3294 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3295 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3296 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3297 n->status = VIRTIO_NET_S_LINK_UP;
3298 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3299 QEMU_CLOCK_VIRTUAL,
3300 virtio_net_announce_timer, n);
3301 n->announce_timer.round = 0;
3303 if (n->netclient_type) {
3305 * Happen when virtio_net_set_netclient_name has been called.
3307 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3308 n->netclient_type, n->netclient_name, n);
3309 } else {
3310 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3311 object_get_typename(OBJECT(dev)), dev->id, n);
3314 peer_test_vnet_hdr(n);
3315 if (peer_has_vnet_hdr(n)) {
3316 for (i = 0; i < n->max_queues; i++) {
3317 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3319 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3320 } else {
3321 n->host_hdr_len = 0;
3324 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3326 n->vqs[0].tx_waiting = 0;
3327 n->tx_burst = n->net_conf.txburst;
3328 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3329 n->promisc = 1; /* for compatibility */
3331 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3333 n->vlans = g_malloc0(MAX_VLAN >> 3);
3335 nc = qemu_get_queue(n->nic);
3336 nc->rxfilter_notify_enabled = 1;
3338 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
3339 struct virtio_net_config netcfg = {};
3340 memcpy(&netcfg.mac, &n->nic_conf.macaddr, ETH_ALEN);
3341 vhost_net_set_config(get_vhost_net(nc->peer),
3342 (uint8_t *)&netcfg, 0, ETH_ALEN, VHOST_SET_CONFIG_TYPE_MASTER);
3344 QTAILQ_INIT(&n->rsc_chains);
3345 n->qdev = dev;
3347 net_rx_pkt_init(&n->rx_pkt, false);
3350 static void virtio_net_device_unrealize(DeviceState *dev)
3352 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3353 VirtIONet *n = VIRTIO_NET(dev);
3354 int i, max_queues;
3356 /* This will stop vhost backend if appropriate. */
3357 virtio_net_set_status(vdev, 0);
3359 g_free(n->netclient_name);
3360 n->netclient_name = NULL;
3361 g_free(n->netclient_type);
3362 n->netclient_type = NULL;
3364 g_free(n->mac_table.macs);
3365 g_free(n->vlans);
3367 if (n->failover) {
3368 device_listener_unregister(&n->primary_listener);
3371 max_queues = n->multiqueue ? n->max_queues : 1;
3372 for (i = 0; i < max_queues; i++) {
3373 virtio_net_del_queue(n, i);
3375 /* delete also control vq */
3376 virtio_del_queue(vdev, max_queues * 2);
3377 qemu_announce_timer_del(&n->announce_timer, false);
3378 g_free(n->vqs);
3379 qemu_del_nic(n->nic);
3380 virtio_net_rsc_cleanup(n);
3381 g_free(n->rss_data.indirections_table);
3382 net_rx_pkt_uninit(n->rx_pkt);
3383 virtio_cleanup(vdev);
3386 static void virtio_net_instance_init(Object *obj)
3388 VirtIONet *n = VIRTIO_NET(obj);
3391 * The default config_size is sizeof(struct virtio_net_config).
3392 * Can be overriden with virtio_net_set_config_size.
3394 n->config_size = sizeof(struct virtio_net_config);
3395 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3396 "bootindex", "/ethernet-phy@0",
3397 DEVICE(n));
3400 static int virtio_net_pre_save(void *opaque)
3402 VirtIONet *n = opaque;
3404 /* At this point, backend must be stopped, otherwise
3405 * it might keep writing to memory. */
3406 assert(!n->vhost_started);
3408 return 0;
3411 static bool primary_unplug_pending(void *opaque)
3413 DeviceState *dev = opaque;
3414 DeviceState *primary;
3415 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3416 VirtIONet *n = VIRTIO_NET(vdev);
3418 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3419 return false;
3421 primary = failover_find_primary_device(n);
3422 return primary ? primary->pending_deleted_event : false;
3425 static bool dev_unplug_pending(void *opaque)
3427 DeviceState *dev = opaque;
3428 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3430 return vdc->primary_unplug_pending(dev);
3433 static const VMStateDescription vmstate_virtio_net = {
3434 .name = "virtio-net",
3435 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3436 .version_id = VIRTIO_NET_VM_VERSION,
3437 .fields = (VMStateField[]) {
3438 VMSTATE_VIRTIO_DEVICE,
3439 VMSTATE_END_OF_LIST()
3441 .pre_save = virtio_net_pre_save,
3442 .dev_unplug_pending = dev_unplug_pending,
3445 static Property virtio_net_properties[] = {
3446 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3447 VIRTIO_NET_F_CSUM, true),
3448 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3449 VIRTIO_NET_F_GUEST_CSUM, true),
3450 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3451 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3452 VIRTIO_NET_F_GUEST_TSO4, true),
3453 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3454 VIRTIO_NET_F_GUEST_TSO6, true),
3455 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3456 VIRTIO_NET_F_GUEST_ECN, true),
3457 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3458 VIRTIO_NET_F_GUEST_UFO, true),
3459 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3460 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3461 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3462 VIRTIO_NET_F_HOST_TSO4, true),
3463 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3464 VIRTIO_NET_F_HOST_TSO6, true),
3465 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3466 VIRTIO_NET_F_HOST_ECN, true),
3467 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3468 VIRTIO_NET_F_HOST_UFO, true),
3469 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3470 VIRTIO_NET_F_MRG_RXBUF, true),
3471 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3472 VIRTIO_NET_F_STATUS, true),
3473 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3474 VIRTIO_NET_F_CTRL_VQ, true),
3475 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3476 VIRTIO_NET_F_CTRL_RX, true),
3477 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3478 VIRTIO_NET_F_CTRL_VLAN, true),
3479 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3480 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3481 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3482 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3483 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3484 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3485 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3486 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3487 VIRTIO_NET_F_RSS, false),
3488 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3489 VIRTIO_NET_F_HASH_REPORT, false),
3490 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3491 VIRTIO_NET_F_RSC_EXT, false),
3492 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3493 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3494 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3495 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3496 TX_TIMER_INTERVAL),
3497 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3498 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3499 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3500 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3501 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3502 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3503 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3504 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3505 true),
3506 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3507 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3508 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3509 DEFINE_PROP_END_OF_LIST(),
3512 static void virtio_net_class_init(ObjectClass *klass, void *data)
3514 DeviceClass *dc = DEVICE_CLASS(klass);
3515 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3517 device_class_set_props(dc, virtio_net_properties);
3518 dc->vmsd = &vmstate_virtio_net;
3519 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3520 vdc->realize = virtio_net_device_realize;
3521 vdc->unrealize = virtio_net_device_unrealize;
3522 vdc->get_config = virtio_net_get_config;
3523 vdc->set_config = virtio_net_set_config;
3524 vdc->get_features = virtio_net_get_features;
3525 vdc->set_features = virtio_net_set_features;
3526 vdc->bad_features = virtio_net_bad_features;
3527 vdc->reset = virtio_net_reset;
3528 vdc->set_status = virtio_net_set_status;
3529 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3530 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3531 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3532 vdc->post_load = virtio_net_post_load_virtio;
3533 vdc->vmsd = &vmstate_virtio_net_device;
3534 vdc->primary_unplug_pending = primary_unplug_pending;
3537 static const TypeInfo virtio_net_info = {
3538 .name = TYPE_VIRTIO_NET,
3539 .parent = TYPE_VIRTIO_DEVICE,
3540 .instance_size = sizeof(VirtIONet),
3541 .instance_init = virtio_net_instance_init,
3542 .class_init = virtio_net_class_init,
3545 static void virtio_register_types(void)
3547 type_register_static(&virtio_net_info);
3550 type_init(virtio_register_types)