tests/acceptance: add record/replay test for arm
[qemu/ar7.git] / hw / net / virtio-net.c
blobaff67a92dfc432c063c719d8c721da993bd7d2e4
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
47 #define VIRTIO_NET_VM_VERSION 11
49 #define MAC_TABLE_ENTRIES 64
50 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
52 /* previously fixed value */
53 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
54 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
56 /* for now, only allow larger queues; with virtio-1, guest can downsize */
57 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
58 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
60 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
62 #define VIRTIO_NET_TCP_FLAG 0x3F
63 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
65 /* IPv4 max payload, 16 bits in the header */
66 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
67 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
69 /* header length value in ip header without option */
70 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
72 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
73 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
75 /* Purge coalesced packets timer interval, This value affects the performance
76 a lot, and should be tuned carefully, '300000'(300us) is the recommended
77 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
78 tso/gso/gro 'off'. */
79 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
81 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
82 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
85 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
88 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
91 static VirtIOFeature feature_sizes[] = {
92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
93 .end = endof(struct virtio_net_config, mac)},
94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95 .end = endof(struct virtio_net_config, status)},
96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
99 .end = endof(struct virtio_net_config, mtu)},
100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101 .end = endof(struct virtio_net_config, duplex)},
102 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
103 .end = endof(struct virtio_net_config, supported_hash_types)},
107 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
109 VirtIONet *n = qemu_get_nic_opaque(nc);
111 return &n->vqs[nc->queue_index];
114 static int vq2q(int queue_index)
116 return queue_index / 2;
119 /* TODO
120 * - we could suppress RX interrupt if we were so inclined.
123 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
125 VirtIONet *n = VIRTIO_NET(vdev);
126 struct virtio_net_config netcfg;
128 virtio_stw_p(vdev, &netcfg.status, n->status);
129 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
130 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
131 memcpy(netcfg.mac, n->mac, ETH_ALEN);
132 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
133 netcfg.duplex = n->net_conf.duplex;
134 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
135 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
136 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
137 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
138 virtio_stl_p(vdev, &netcfg.supported_hash_types,
139 VIRTIO_NET_RSS_SUPPORTED_HASHES);
140 memcpy(config, &netcfg, n->config_size);
143 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
145 VirtIONet *n = VIRTIO_NET(vdev);
146 struct virtio_net_config netcfg = {};
148 memcpy(&netcfg, config, n->config_size);
150 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
151 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
152 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
153 memcpy(n->mac, netcfg.mac, ETH_ALEN);
154 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
158 static bool virtio_net_started(VirtIONet *n, uint8_t status)
160 VirtIODevice *vdev = VIRTIO_DEVICE(n);
161 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
162 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
165 static void virtio_net_announce_notify(VirtIONet *net)
167 VirtIODevice *vdev = VIRTIO_DEVICE(net);
168 trace_virtio_net_announce_notify();
170 net->status |= VIRTIO_NET_S_ANNOUNCE;
171 virtio_notify_config(vdev);
174 static void virtio_net_announce_timer(void *opaque)
176 VirtIONet *n = opaque;
177 trace_virtio_net_announce_timer(n->announce_timer.round);
179 n->announce_timer.round--;
180 virtio_net_announce_notify(n);
183 static void virtio_net_announce(NetClientState *nc)
185 VirtIONet *n = qemu_get_nic_opaque(nc);
186 VirtIODevice *vdev = VIRTIO_DEVICE(n);
189 * Make sure the virtio migration announcement timer isn't running
190 * If it is, let it trigger announcement so that we do not cause
191 * confusion.
193 if (n->announce_timer.round) {
194 return;
197 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
198 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
199 virtio_net_announce_notify(n);
203 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
205 VirtIODevice *vdev = VIRTIO_DEVICE(n);
206 NetClientState *nc = qemu_get_queue(n->nic);
207 int queues = n->multiqueue ? n->max_queues : 1;
209 if (!get_vhost_net(nc->peer)) {
210 return;
213 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
214 !!n->vhost_started) {
215 return;
217 if (!n->vhost_started) {
218 int r, i;
220 if (n->needs_vnet_hdr_swap) {
221 error_report("backend does not support %s vnet headers; "
222 "falling back on userspace virtio",
223 virtio_is_big_endian(vdev) ? "BE" : "LE");
224 return;
227 /* Any packets outstanding? Purge them to avoid touching rings
228 * when vhost is running.
230 for (i = 0; i < queues; i++) {
231 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
233 /* Purge both directions: TX and RX. */
234 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
235 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
238 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
239 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
240 if (r < 0) {
241 error_report("%uBytes MTU not supported by the backend",
242 n->net_conf.mtu);
244 return;
248 n->vhost_started = 1;
249 r = vhost_net_start(vdev, n->nic->ncs, queues);
250 if (r < 0) {
251 error_report("unable to start vhost net: %d: "
252 "falling back on userspace virtio", -r);
253 n->vhost_started = 0;
255 } else {
256 vhost_net_stop(vdev, n->nic->ncs, queues);
257 n->vhost_started = 0;
261 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
262 NetClientState *peer,
263 bool enable)
265 if (virtio_is_big_endian(vdev)) {
266 return qemu_set_vnet_be(peer, enable);
267 } else {
268 return qemu_set_vnet_le(peer, enable);
272 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
273 int queues, bool enable)
275 int i;
277 for (i = 0; i < queues; i++) {
278 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
279 enable) {
280 while (--i >= 0) {
281 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
284 return true;
288 return false;
291 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
293 VirtIODevice *vdev = VIRTIO_DEVICE(n);
294 int queues = n->multiqueue ? n->max_queues : 1;
296 if (virtio_net_started(n, status)) {
297 /* Before using the device, we tell the network backend about the
298 * endianness to use when parsing vnet headers. If the backend
299 * can't do it, we fallback onto fixing the headers in the core
300 * virtio-net code.
302 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
303 queues, true);
304 } else if (virtio_net_started(n, vdev->status)) {
305 /* After using the device, we need to reset the network backend to
306 * the default (guest native endianness), otherwise the guest may
307 * lose network connectivity if it is rebooted into a different
308 * endianness.
310 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
314 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
316 unsigned int dropped = virtqueue_drop_all(vq);
317 if (dropped) {
318 virtio_notify(vdev, vq);
322 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
324 VirtIONet *n = VIRTIO_NET(vdev);
325 VirtIONetQueue *q;
326 int i;
327 uint8_t queue_status;
329 virtio_net_vnet_endian_status(n, status);
330 virtio_net_vhost_status(n, status);
332 for (i = 0; i < n->max_queues; i++) {
333 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
334 bool queue_started;
335 q = &n->vqs[i];
337 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
338 queue_status = 0;
339 } else {
340 queue_status = status;
342 queue_started =
343 virtio_net_started(n, queue_status) && !n->vhost_started;
345 if (queue_started) {
346 qemu_flush_queued_packets(ncs);
349 if (!q->tx_waiting) {
350 continue;
353 if (queue_started) {
354 if (q->tx_timer) {
355 timer_mod(q->tx_timer,
356 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
357 } else {
358 qemu_bh_schedule(q->tx_bh);
360 } else {
361 if (q->tx_timer) {
362 timer_del(q->tx_timer);
363 } else {
364 qemu_bh_cancel(q->tx_bh);
366 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
367 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
368 vdev->vm_running) {
369 /* if tx is waiting we are likely have some packets in tx queue
370 * and disabled notification */
371 q->tx_waiting = 0;
372 virtio_queue_set_notification(q->tx_vq, 1);
373 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
379 static void virtio_net_set_link_status(NetClientState *nc)
381 VirtIONet *n = qemu_get_nic_opaque(nc);
382 VirtIODevice *vdev = VIRTIO_DEVICE(n);
383 uint16_t old_status = n->status;
385 if (nc->link_down)
386 n->status &= ~VIRTIO_NET_S_LINK_UP;
387 else
388 n->status |= VIRTIO_NET_S_LINK_UP;
390 if (n->status != old_status)
391 virtio_notify_config(vdev);
393 virtio_net_set_status(vdev, vdev->status);
396 static void rxfilter_notify(NetClientState *nc)
398 VirtIONet *n = qemu_get_nic_opaque(nc);
400 if (nc->rxfilter_notify_enabled) {
401 char *path = object_get_canonical_path(OBJECT(n->qdev));
402 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
403 n->netclient_name, path);
404 g_free(path);
406 /* disable event notification to avoid events flooding */
407 nc->rxfilter_notify_enabled = 0;
411 static intList *get_vlan_table(VirtIONet *n)
413 intList *list, *entry;
414 int i, j;
416 list = NULL;
417 for (i = 0; i < MAX_VLAN >> 5; i++) {
418 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
419 if (n->vlans[i] & (1U << j)) {
420 entry = g_malloc0(sizeof(*entry));
421 entry->value = (i << 5) + j;
422 entry->next = list;
423 list = entry;
428 return list;
431 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
433 VirtIONet *n = qemu_get_nic_opaque(nc);
434 VirtIODevice *vdev = VIRTIO_DEVICE(n);
435 RxFilterInfo *info;
436 strList *str_list, *entry;
437 int i;
439 info = g_malloc0(sizeof(*info));
440 info->name = g_strdup(nc->name);
441 info->promiscuous = n->promisc;
443 if (n->nouni) {
444 info->unicast = RX_STATE_NONE;
445 } else if (n->alluni) {
446 info->unicast = RX_STATE_ALL;
447 } else {
448 info->unicast = RX_STATE_NORMAL;
451 if (n->nomulti) {
452 info->multicast = RX_STATE_NONE;
453 } else if (n->allmulti) {
454 info->multicast = RX_STATE_ALL;
455 } else {
456 info->multicast = RX_STATE_NORMAL;
459 info->broadcast_allowed = n->nobcast;
460 info->multicast_overflow = n->mac_table.multi_overflow;
461 info->unicast_overflow = n->mac_table.uni_overflow;
463 info->main_mac = qemu_mac_strdup_printf(n->mac);
465 str_list = NULL;
466 for (i = 0; i < n->mac_table.first_multi; i++) {
467 entry = g_malloc0(sizeof(*entry));
468 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
469 entry->next = str_list;
470 str_list = entry;
472 info->unicast_table = str_list;
474 str_list = NULL;
475 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
476 entry = g_malloc0(sizeof(*entry));
477 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
478 entry->next = str_list;
479 str_list = entry;
481 info->multicast_table = str_list;
482 info->vlan_table = get_vlan_table(n);
484 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
485 info->vlan = RX_STATE_ALL;
486 } else if (!info->vlan_table) {
487 info->vlan = RX_STATE_NONE;
488 } else {
489 info->vlan = RX_STATE_NORMAL;
492 /* enable event notification after query */
493 nc->rxfilter_notify_enabled = 1;
495 return info;
498 static void virtio_net_reset(VirtIODevice *vdev)
500 VirtIONet *n = VIRTIO_NET(vdev);
501 int i;
503 /* Reset back to compatibility mode */
504 n->promisc = 1;
505 n->allmulti = 0;
506 n->alluni = 0;
507 n->nomulti = 0;
508 n->nouni = 0;
509 n->nobcast = 0;
510 /* multiqueue is disabled by default */
511 n->curr_queues = 1;
512 timer_del(n->announce_timer.tm);
513 n->announce_timer.round = 0;
514 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
516 /* Flush any MAC and VLAN filter table state */
517 n->mac_table.in_use = 0;
518 n->mac_table.first_multi = 0;
519 n->mac_table.multi_overflow = 0;
520 n->mac_table.uni_overflow = 0;
521 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
522 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
523 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
524 memset(n->vlans, 0, MAX_VLAN >> 3);
526 /* Flush any async TX */
527 for (i = 0; i < n->max_queues; i++) {
528 NetClientState *nc = qemu_get_subqueue(n->nic, i);
530 if (nc->peer) {
531 qemu_flush_or_purge_queued_packets(nc->peer, true);
532 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
537 static void peer_test_vnet_hdr(VirtIONet *n)
539 NetClientState *nc = qemu_get_queue(n->nic);
540 if (!nc->peer) {
541 return;
544 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
547 static int peer_has_vnet_hdr(VirtIONet *n)
549 return n->has_vnet_hdr;
552 static int peer_has_ufo(VirtIONet *n)
554 if (!peer_has_vnet_hdr(n))
555 return 0;
557 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
559 return n->has_ufo;
562 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
563 int version_1, int hash_report)
565 int i;
566 NetClientState *nc;
568 n->mergeable_rx_bufs = mergeable_rx_bufs;
570 if (version_1) {
571 n->guest_hdr_len = hash_report ?
572 sizeof(struct virtio_net_hdr_v1_hash) :
573 sizeof(struct virtio_net_hdr_mrg_rxbuf);
574 n->rss_data.populate_hash = !!hash_report;
575 } else {
576 n->guest_hdr_len = n->mergeable_rx_bufs ?
577 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
578 sizeof(struct virtio_net_hdr);
581 for (i = 0; i < n->max_queues; i++) {
582 nc = qemu_get_subqueue(n->nic, i);
584 if (peer_has_vnet_hdr(n) &&
585 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
586 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
587 n->host_hdr_len = n->guest_hdr_len;
592 static int virtio_net_max_tx_queue_size(VirtIONet *n)
594 NetClientState *peer = n->nic_conf.peers.ncs[0];
597 * Backends other than vhost-user don't support max queue size.
599 if (!peer) {
600 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
603 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
604 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
607 return VIRTQUEUE_MAX_SIZE;
610 static int peer_attach(VirtIONet *n, int index)
612 NetClientState *nc = qemu_get_subqueue(n->nic, index);
614 if (!nc->peer) {
615 return 0;
618 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
619 vhost_set_vring_enable(nc->peer, 1);
622 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
623 return 0;
626 if (n->max_queues == 1) {
627 return 0;
630 return tap_enable(nc->peer);
633 static int peer_detach(VirtIONet *n, int index)
635 NetClientState *nc = qemu_get_subqueue(n->nic, index);
637 if (!nc->peer) {
638 return 0;
641 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
642 vhost_set_vring_enable(nc->peer, 0);
645 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
646 return 0;
649 return tap_disable(nc->peer);
652 static void virtio_net_set_queues(VirtIONet *n)
654 int i;
655 int r;
657 if (n->nic->peer_deleted) {
658 return;
661 for (i = 0; i < n->max_queues; i++) {
662 if (i < n->curr_queues) {
663 r = peer_attach(n, i);
664 assert(!r);
665 } else {
666 r = peer_detach(n, i);
667 assert(!r);
672 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
674 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
675 Error **errp)
677 VirtIONet *n = VIRTIO_NET(vdev);
678 NetClientState *nc = qemu_get_queue(n->nic);
680 /* Firstly sync all virtio-net possible supported features */
681 features |= n->host_features;
683 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
685 if (!peer_has_vnet_hdr(n)) {
686 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
687 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
688 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
689 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
691 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
692 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
693 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
694 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
696 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
699 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
700 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
701 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
704 if (!get_vhost_net(nc->peer)) {
705 return features;
708 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
709 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
710 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
711 vdev->backend_features = features;
713 if (n->mtu_bypass_backend &&
714 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
715 features |= (1ULL << VIRTIO_NET_F_MTU);
718 return features;
721 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
723 uint64_t features = 0;
725 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
726 * but also these: */
727 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
728 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
729 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
730 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
731 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
733 return features;
736 static void virtio_net_apply_guest_offloads(VirtIONet *n)
738 qemu_set_offload(qemu_get_queue(n->nic)->peer,
739 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
740 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
741 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
742 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
743 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
746 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
748 static const uint64_t guest_offloads_mask =
749 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
750 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
751 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
752 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
753 (1ULL << VIRTIO_NET_F_GUEST_UFO);
755 return guest_offloads_mask & features;
758 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
760 VirtIODevice *vdev = VIRTIO_DEVICE(n);
761 return virtio_net_guest_offloads_by_features(vdev->guest_features);
764 static void failover_add_primary(VirtIONet *n, Error **errp)
766 Error *err = NULL;
768 if (n->primary_dev) {
769 return;
772 n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
773 n->primary_device_id);
774 if (n->primary_device_opts) {
775 n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
776 if (err) {
777 qemu_opts_del(n->primary_device_opts);
779 if (n->primary_dev) {
780 n->primary_bus = n->primary_dev->parent_bus;
781 if (err) {
782 qdev_unplug(n->primary_dev, &err);
783 qdev_set_id(n->primary_dev, "");
787 } else {
788 error_setg(errp, "Primary device not found");
789 error_append_hint(errp, "Virtio-net failover will not work. Make "
790 "sure primary device has parameter"
791 " failover_pair_id=<virtio-net-id>\n");
793 if (err) {
794 error_propagate(errp, err);
798 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
800 VirtIONet *n = opaque;
801 int ret = 0;
803 const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
805 if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
806 n->primary_device_id = g_strdup(opts->id);
807 ret = 1;
810 return ret;
813 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
815 DeviceState *dev = NULL;
816 Error *err = NULL;
818 if (qemu_opts_foreach(qemu_find_opts("device"),
819 is_my_primary, n, &err)) {
820 if (err) {
821 error_propagate(errp, err);
822 return NULL;
824 if (n->primary_device_id) {
825 dev = qdev_find_recursive(sysbus_get_default(),
826 n->primary_device_id);
827 } else {
828 error_setg(errp, "Primary device id not found");
829 return NULL;
832 return dev;
837 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
838 DeviceState *dev,
839 Error **errp)
841 DeviceState *prim_dev = NULL;
842 Error *err = NULL;
844 prim_dev = virtio_net_find_primary(n, &err);
845 if (prim_dev) {
846 n->primary_device_id = g_strdup(prim_dev->id);
847 n->primary_device_opts = prim_dev->opts;
848 } else {
849 if (err) {
850 error_propagate(errp, err);
854 return prim_dev;
857 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
859 VirtIONet *n = VIRTIO_NET(vdev);
860 Error *err = NULL;
861 int i;
863 if (n->mtu_bypass_backend &&
864 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
865 features &= ~(1ULL << VIRTIO_NET_F_MTU);
868 virtio_net_set_multiqueue(n,
869 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
870 virtio_has_feature(features, VIRTIO_NET_F_MQ));
872 virtio_net_set_mrg_rx_bufs(n,
873 virtio_has_feature(features,
874 VIRTIO_NET_F_MRG_RXBUF),
875 virtio_has_feature(features,
876 VIRTIO_F_VERSION_1),
877 virtio_has_feature(features,
878 VIRTIO_NET_F_HASH_REPORT));
880 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
881 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
882 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
883 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
884 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
886 if (n->has_vnet_hdr) {
887 n->curr_guest_offloads =
888 virtio_net_guest_offloads_by_features(features);
889 virtio_net_apply_guest_offloads(n);
892 for (i = 0; i < n->max_queues; i++) {
893 NetClientState *nc = qemu_get_subqueue(n->nic, i);
895 if (!get_vhost_net(nc->peer)) {
896 continue;
898 vhost_net_ack_features(get_vhost_net(nc->peer), features);
901 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
902 memset(n->vlans, 0, MAX_VLAN >> 3);
903 } else {
904 memset(n->vlans, 0xff, MAX_VLAN >> 3);
907 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
908 qapi_event_send_failover_negotiated(n->netclient_name);
909 atomic_set(&n->primary_should_be_hidden, false);
910 failover_add_primary(n, &err);
911 if (err) {
912 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
913 if (err) {
914 goto out_err;
916 failover_add_primary(n, &err);
917 if (err) {
918 goto out_err;
922 return;
924 out_err:
925 if (err) {
926 warn_report_err(err);
930 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
931 struct iovec *iov, unsigned int iov_cnt)
933 uint8_t on;
934 size_t s;
935 NetClientState *nc = qemu_get_queue(n->nic);
937 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
938 if (s != sizeof(on)) {
939 return VIRTIO_NET_ERR;
942 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
943 n->promisc = on;
944 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
945 n->allmulti = on;
946 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
947 n->alluni = on;
948 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
949 n->nomulti = on;
950 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
951 n->nouni = on;
952 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
953 n->nobcast = on;
954 } else {
955 return VIRTIO_NET_ERR;
958 rxfilter_notify(nc);
960 return VIRTIO_NET_OK;
963 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
964 struct iovec *iov, unsigned int iov_cnt)
966 VirtIODevice *vdev = VIRTIO_DEVICE(n);
967 uint64_t offloads;
968 size_t s;
970 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
971 return VIRTIO_NET_ERR;
974 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
975 if (s != sizeof(offloads)) {
976 return VIRTIO_NET_ERR;
979 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
980 uint64_t supported_offloads;
982 offloads = virtio_ldq_p(vdev, &offloads);
984 if (!n->has_vnet_hdr) {
985 return VIRTIO_NET_ERR;
988 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
989 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
990 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
991 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
992 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
994 supported_offloads = virtio_net_supported_guest_offloads(n);
995 if (offloads & ~supported_offloads) {
996 return VIRTIO_NET_ERR;
999 n->curr_guest_offloads = offloads;
1000 virtio_net_apply_guest_offloads(n);
1002 return VIRTIO_NET_OK;
1003 } else {
1004 return VIRTIO_NET_ERR;
1008 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1009 struct iovec *iov, unsigned int iov_cnt)
1011 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1012 struct virtio_net_ctrl_mac mac_data;
1013 size_t s;
1014 NetClientState *nc = qemu_get_queue(n->nic);
1016 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1017 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1018 return VIRTIO_NET_ERR;
1020 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1021 assert(s == sizeof(n->mac));
1022 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1023 rxfilter_notify(nc);
1025 return VIRTIO_NET_OK;
1028 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1029 return VIRTIO_NET_ERR;
1032 int in_use = 0;
1033 int first_multi = 0;
1034 uint8_t uni_overflow = 0;
1035 uint8_t multi_overflow = 0;
1036 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1038 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1039 sizeof(mac_data.entries));
1040 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1041 if (s != sizeof(mac_data.entries)) {
1042 goto error;
1044 iov_discard_front(&iov, &iov_cnt, s);
1046 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1047 goto error;
1050 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1051 s = iov_to_buf(iov, iov_cnt, 0, macs,
1052 mac_data.entries * ETH_ALEN);
1053 if (s != mac_data.entries * ETH_ALEN) {
1054 goto error;
1056 in_use += mac_data.entries;
1057 } else {
1058 uni_overflow = 1;
1061 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1063 first_multi = in_use;
1065 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1066 sizeof(mac_data.entries));
1067 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1068 if (s != sizeof(mac_data.entries)) {
1069 goto error;
1072 iov_discard_front(&iov, &iov_cnt, s);
1074 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1075 goto error;
1078 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1079 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1080 mac_data.entries * ETH_ALEN);
1081 if (s != mac_data.entries * ETH_ALEN) {
1082 goto error;
1084 in_use += mac_data.entries;
1085 } else {
1086 multi_overflow = 1;
1089 n->mac_table.in_use = in_use;
1090 n->mac_table.first_multi = first_multi;
1091 n->mac_table.uni_overflow = uni_overflow;
1092 n->mac_table.multi_overflow = multi_overflow;
1093 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1094 g_free(macs);
1095 rxfilter_notify(nc);
1097 return VIRTIO_NET_OK;
1099 error:
1100 g_free(macs);
1101 return VIRTIO_NET_ERR;
1104 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1105 struct iovec *iov, unsigned int iov_cnt)
1107 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1108 uint16_t vid;
1109 size_t s;
1110 NetClientState *nc = qemu_get_queue(n->nic);
1112 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1113 vid = virtio_lduw_p(vdev, &vid);
1114 if (s != sizeof(vid)) {
1115 return VIRTIO_NET_ERR;
1118 if (vid >= MAX_VLAN)
1119 return VIRTIO_NET_ERR;
1121 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1122 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1123 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1124 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1125 else
1126 return VIRTIO_NET_ERR;
1128 rxfilter_notify(nc);
1130 return VIRTIO_NET_OK;
1133 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1134 struct iovec *iov, unsigned int iov_cnt)
1136 trace_virtio_net_handle_announce(n->announce_timer.round);
1137 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1138 n->status & VIRTIO_NET_S_ANNOUNCE) {
1139 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1140 if (n->announce_timer.round) {
1141 qemu_announce_timer_step(&n->announce_timer);
1143 return VIRTIO_NET_OK;
1144 } else {
1145 return VIRTIO_NET_ERR;
1149 static void virtio_net_disable_rss(VirtIONet *n)
1151 if (n->rss_data.enabled) {
1152 trace_virtio_net_rss_disable();
1154 n->rss_data.enabled = false;
1157 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1158 struct iovec *iov,
1159 unsigned int iov_cnt,
1160 bool do_rss)
1162 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1163 struct virtio_net_rss_config cfg;
1164 size_t s, offset = 0, size_get;
1165 uint16_t queues, i;
1166 struct {
1167 uint16_t us;
1168 uint8_t b;
1169 } QEMU_PACKED temp;
1170 const char *err_msg = "";
1171 uint32_t err_value = 0;
1173 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1174 err_msg = "RSS is not negotiated";
1175 goto error;
1177 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1178 err_msg = "Hash report is not negotiated";
1179 goto error;
1181 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1182 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1183 if (s != size_get) {
1184 err_msg = "Short command buffer";
1185 err_value = (uint32_t)s;
1186 goto error;
1188 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1189 n->rss_data.indirections_len =
1190 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1191 n->rss_data.indirections_len++;
1192 if (!do_rss) {
1193 n->rss_data.indirections_len = 1;
1195 if (!is_power_of_2(n->rss_data.indirections_len)) {
1196 err_msg = "Invalid size of indirection table";
1197 err_value = n->rss_data.indirections_len;
1198 goto error;
1200 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1201 err_msg = "Too large indirection table";
1202 err_value = n->rss_data.indirections_len;
1203 goto error;
1205 n->rss_data.default_queue = do_rss ?
1206 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1207 if (n->rss_data.default_queue >= n->max_queues) {
1208 err_msg = "Invalid default queue";
1209 err_value = n->rss_data.default_queue;
1210 goto error;
1212 offset += size_get;
1213 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1214 g_free(n->rss_data.indirections_table);
1215 n->rss_data.indirections_table = g_malloc(size_get);
1216 if (!n->rss_data.indirections_table) {
1217 err_msg = "Can't allocate indirections table";
1218 err_value = n->rss_data.indirections_len;
1219 goto error;
1221 s = iov_to_buf(iov, iov_cnt, offset,
1222 n->rss_data.indirections_table, size_get);
1223 if (s != size_get) {
1224 err_msg = "Short indirection table buffer";
1225 err_value = (uint32_t)s;
1226 goto error;
1228 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1229 uint16_t val = n->rss_data.indirections_table[i];
1230 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1232 offset += size_get;
1233 size_get = sizeof(temp);
1234 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1235 if (s != size_get) {
1236 err_msg = "Can't get queues";
1237 err_value = (uint32_t)s;
1238 goto error;
1240 queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1241 if (queues == 0 || queues > n->max_queues) {
1242 err_msg = "Invalid number of queues";
1243 err_value = queues;
1244 goto error;
1246 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1247 err_msg = "Invalid key size";
1248 err_value = temp.b;
1249 goto error;
1251 if (!temp.b && n->rss_data.hash_types) {
1252 err_msg = "No key provided";
1253 err_value = 0;
1254 goto error;
1256 if (!temp.b && !n->rss_data.hash_types) {
1257 virtio_net_disable_rss(n);
1258 return queues;
1260 offset += size_get;
1261 size_get = temp.b;
1262 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1263 if (s != size_get) {
1264 err_msg = "Can get key buffer";
1265 err_value = (uint32_t)s;
1266 goto error;
1268 n->rss_data.enabled = true;
1269 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1270 n->rss_data.indirections_len,
1271 temp.b);
1272 return queues;
1273 error:
1274 trace_virtio_net_rss_error(err_msg, err_value);
1275 virtio_net_disable_rss(n);
1276 return 0;
1279 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1280 struct iovec *iov, unsigned int iov_cnt)
1282 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1283 uint16_t queues;
1285 virtio_net_disable_rss(n);
1286 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1287 queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1288 return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1290 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1291 queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1292 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1293 struct virtio_net_ctrl_mq mq;
1294 size_t s;
1295 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1296 return VIRTIO_NET_ERR;
1298 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1299 if (s != sizeof(mq)) {
1300 return VIRTIO_NET_ERR;
1302 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1304 } else {
1305 return VIRTIO_NET_ERR;
1308 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1309 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1310 queues > n->max_queues ||
1311 !n->multiqueue) {
1312 return VIRTIO_NET_ERR;
1315 n->curr_queues = queues;
1316 /* stop the backend before changing the number of queues to avoid handling a
1317 * disabled queue */
1318 virtio_net_set_status(vdev, vdev->status);
1319 virtio_net_set_queues(n);
1321 return VIRTIO_NET_OK;
1324 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1326 VirtIONet *n = VIRTIO_NET(vdev);
1327 struct virtio_net_ctrl_hdr ctrl;
1328 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1329 VirtQueueElement *elem;
1330 size_t s;
1331 struct iovec *iov, *iov2;
1332 unsigned int iov_cnt;
1334 for (;;) {
1335 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1336 if (!elem) {
1337 break;
1339 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1340 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1341 virtio_error(vdev, "virtio-net ctrl missing headers");
1342 virtqueue_detach_element(vq, elem, 0);
1343 g_free(elem);
1344 break;
1347 iov_cnt = elem->out_num;
1348 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1349 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1350 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1351 if (s != sizeof(ctrl)) {
1352 status = VIRTIO_NET_ERR;
1353 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1354 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1355 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1356 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1357 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1358 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1359 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1360 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1361 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1362 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1363 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1364 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1367 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1368 assert(s == sizeof(status));
1370 virtqueue_push(vq, elem, sizeof(status));
1371 virtio_notify(vdev, vq);
1372 g_free(iov2);
1373 g_free(elem);
1377 /* RX */
1379 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1381 VirtIONet *n = VIRTIO_NET(vdev);
1382 int queue_index = vq2q(virtio_get_queue_index(vq));
1384 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1387 static bool virtio_net_can_receive(NetClientState *nc)
1389 VirtIONet *n = qemu_get_nic_opaque(nc);
1390 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1391 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1393 if (!vdev->vm_running) {
1394 return false;
1397 if (nc->queue_index >= n->curr_queues) {
1398 return false;
1401 if (!virtio_queue_ready(q->rx_vq) ||
1402 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1403 return false;
1406 return true;
1409 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1411 VirtIONet *n = q->n;
1412 if (virtio_queue_empty(q->rx_vq) ||
1413 (n->mergeable_rx_bufs &&
1414 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1415 virtio_queue_set_notification(q->rx_vq, 1);
1417 /* To avoid a race condition where the guest has made some buffers
1418 * available after the above check but before notification was
1419 * enabled, check for available buffers again.
1421 if (virtio_queue_empty(q->rx_vq) ||
1422 (n->mergeable_rx_bufs &&
1423 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1424 return 0;
1428 virtio_queue_set_notification(q->rx_vq, 0);
1429 return 1;
1432 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1434 virtio_tswap16s(vdev, &hdr->hdr_len);
1435 virtio_tswap16s(vdev, &hdr->gso_size);
1436 virtio_tswap16s(vdev, &hdr->csum_start);
1437 virtio_tswap16s(vdev, &hdr->csum_offset);
1440 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1441 * it never finds out that the packets don't have valid checksums. This
1442 * causes dhclient to get upset. Fedora's carried a patch for ages to
1443 * fix this with Xen but it hasn't appeared in an upstream release of
1444 * dhclient yet.
1446 * To avoid breaking existing guests, we catch udp packets and add
1447 * checksums. This is terrible but it's better than hacking the guest
1448 * kernels.
1450 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1451 * we should provide a mechanism to disable it to avoid polluting the host
1452 * cache.
1454 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1455 uint8_t *buf, size_t size)
1457 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1458 (size > 27 && size < 1500) && /* normal sized MTU */
1459 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1460 (buf[23] == 17) && /* ip.protocol == UDP */
1461 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1462 net_checksum_calculate(buf, size);
1463 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1467 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1468 const void *buf, size_t size)
1470 if (n->has_vnet_hdr) {
1471 /* FIXME this cast is evil */
1472 void *wbuf = (void *)buf;
1473 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1474 size - n->host_hdr_len);
1476 if (n->needs_vnet_hdr_swap) {
1477 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1479 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1480 } else {
1481 struct virtio_net_hdr hdr = {
1482 .flags = 0,
1483 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1485 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1489 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1491 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1492 static const uint8_t vlan[] = {0x81, 0x00};
1493 uint8_t *ptr = (uint8_t *)buf;
1494 int i;
1496 if (n->promisc)
1497 return 1;
1499 ptr += n->host_hdr_len;
1501 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1502 int vid = lduw_be_p(ptr + 14) & 0xfff;
1503 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1504 return 0;
1507 if (ptr[0] & 1) { // multicast
1508 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1509 return !n->nobcast;
1510 } else if (n->nomulti) {
1511 return 0;
1512 } else if (n->allmulti || n->mac_table.multi_overflow) {
1513 return 1;
1516 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1517 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1518 return 1;
1521 } else { // unicast
1522 if (n->nouni) {
1523 return 0;
1524 } else if (n->alluni || n->mac_table.uni_overflow) {
1525 return 1;
1526 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1527 return 1;
1530 for (i = 0; i < n->mac_table.first_multi; i++) {
1531 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1532 return 1;
1537 return 0;
1540 static uint8_t virtio_net_get_hash_type(bool isip4,
1541 bool isip6,
1542 bool isudp,
1543 bool istcp,
1544 uint32_t types)
1546 if (isip4) {
1547 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1548 return NetPktRssIpV4Tcp;
1550 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1551 return NetPktRssIpV4Udp;
1553 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1554 return NetPktRssIpV4;
1556 } else if (isip6) {
1557 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1558 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1560 if (istcp && (types & mask)) {
1561 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1562 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1564 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1565 if (isudp && (types & mask)) {
1566 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1567 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1569 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1570 if (types & mask) {
1571 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1572 NetPktRssIpV6Ex : NetPktRssIpV6;
1575 return 0xff;
1578 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1579 uint32_t hash)
1581 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1582 hdr->hash_value = hash;
1583 hdr->hash_report = report;
1586 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1587 size_t size)
1589 VirtIONet *n = qemu_get_nic_opaque(nc);
1590 unsigned int index = nc->queue_index, new_index = index;
1591 struct NetRxPkt *pkt = n->rx_pkt;
1592 uint8_t net_hash_type;
1593 uint32_t hash;
1594 bool isip4, isip6, isudp, istcp;
1595 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1596 VIRTIO_NET_HASH_REPORT_IPv4,
1597 VIRTIO_NET_HASH_REPORT_TCPv4,
1598 VIRTIO_NET_HASH_REPORT_TCPv6,
1599 VIRTIO_NET_HASH_REPORT_IPv6,
1600 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1601 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1602 VIRTIO_NET_HASH_REPORT_UDPv4,
1603 VIRTIO_NET_HASH_REPORT_UDPv6,
1604 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1607 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1608 size - n->host_hdr_len);
1609 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1610 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1611 istcp = isudp = false;
1613 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1614 istcp = isudp = false;
1616 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1617 n->rss_data.hash_types);
1618 if (net_hash_type > NetPktRssIpV6UdpEx) {
1619 if (n->rss_data.populate_hash) {
1620 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1622 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1625 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1627 if (n->rss_data.populate_hash) {
1628 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1631 if (n->rss_data.redirect) {
1632 new_index = hash & (n->rss_data.indirections_len - 1);
1633 new_index = n->rss_data.indirections_table[new_index];
1636 return (index == new_index) ? -1 : new_index;
1639 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1640 size_t size, bool no_rss)
1642 VirtIONet *n = qemu_get_nic_opaque(nc);
1643 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1644 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1645 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1646 struct virtio_net_hdr_mrg_rxbuf mhdr;
1647 unsigned mhdr_cnt = 0;
1648 size_t offset, i, guest_offset;
1650 if (!virtio_net_can_receive(nc)) {
1651 return -1;
1654 if (!no_rss && n->rss_data.enabled) {
1655 int index = virtio_net_process_rss(nc, buf, size);
1656 if (index >= 0) {
1657 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1658 return virtio_net_receive_rcu(nc2, buf, size, true);
1662 /* hdr_len refers to the header we supply to the guest */
1663 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1664 return 0;
1667 if (!receive_filter(n, buf, size))
1668 return size;
1670 offset = i = 0;
1672 while (offset < size) {
1673 VirtQueueElement *elem;
1674 int len, total;
1675 const struct iovec *sg;
1677 total = 0;
1679 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1680 if (!elem) {
1681 if (i) {
1682 virtio_error(vdev, "virtio-net unexpected empty queue: "
1683 "i %zd mergeable %d offset %zd, size %zd, "
1684 "guest hdr len %zd, host hdr len %zd "
1685 "guest features 0x%" PRIx64,
1686 i, n->mergeable_rx_bufs, offset, size,
1687 n->guest_hdr_len, n->host_hdr_len,
1688 vdev->guest_features);
1690 return -1;
1693 if (elem->in_num < 1) {
1694 virtio_error(vdev,
1695 "virtio-net receive queue contains no in buffers");
1696 virtqueue_detach_element(q->rx_vq, elem, 0);
1697 g_free(elem);
1698 return -1;
1701 sg = elem->in_sg;
1702 if (i == 0) {
1703 assert(offset == 0);
1704 if (n->mergeable_rx_bufs) {
1705 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1706 sg, elem->in_num,
1707 offsetof(typeof(mhdr), num_buffers),
1708 sizeof(mhdr.num_buffers));
1711 receive_header(n, sg, elem->in_num, buf, size);
1712 if (n->rss_data.populate_hash) {
1713 offset = sizeof(mhdr);
1714 iov_from_buf(sg, elem->in_num, offset,
1715 buf + offset, n->host_hdr_len - sizeof(mhdr));
1717 offset = n->host_hdr_len;
1718 total += n->guest_hdr_len;
1719 guest_offset = n->guest_hdr_len;
1720 } else {
1721 guest_offset = 0;
1724 /* copy in packet. ugh */
1725 len = iov_from_buf(sg, elem->in_num, guest_offset,
1726 buf + offset, size - offset);
1727 total += len;
1728 offset += len;
1729 /* If buffers can't be merged, at this point we
1730 * must have consumed the complete packet.
1731 * Otherwise, drop it. */
1732 if (!n->mergeable_rx_bufs && offset < size) {
1733 virtqueue_unpop(q->rx_vq, elem, total);
1734 g_free(elem);
1735 return size;
1738 /* signal other side */
1739 virtqueue_fill(q->rx_vq, elem, total, i++);
1740 g_free(elem);
1743 if (mhdr_cnt) {
1744 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1745 iov_from_buf(mhdr_sg, mhdr_cnt,
1747 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1750 virtqueue_flush(q->rx_vq, i);
1751 virtio_notify(vdev, q->rx_vq);
1753 return size;
1756 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1757 size_t size)
1759 RCU_READ_LOCK_GUARD();
1761 return virtio_net_receive_rcu(nc, buf, size, false);
1764 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1765 const uint8_t *buf,
1766 VirtioNetRscUnit *unit)
1768 uint16_t ip_hdrlen;
1769 struct ip_header *ip;
1771 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1772 + sizeof(struct eth_header));
1773 unit->ip = (void *)ip;
1774 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1775 unit->ip_plen = &ip->ip_len;
1776 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1777 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1778 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1781 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1782 const uint8_t *buf,
1783 VirtioNetRscUnit *unit)
1785 struct ip6_header *ip6;
1787 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1788 + sizeof(struct eth_header));
1789 unit->ip = ip6;
1790 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1791 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1792 + sizeof(struct ip6_header));
1793 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1795 /* There is a difference between payload lenght in ipv4 and v6,
1796 ip header is excluded in ipv6 */
1797 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1800 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1801 VirtioNetRscSeg *seg)
1803 int ret;
1804 struct virtio_net_hdr_v1 *h;
1806 h = (struct virtio_net_hdr_v1 *)seg->buf;
1807 h->flags = 0;
1808 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1810 if (seg->is_coalesced) {
1811 h->rsc.segments = seg->packets;
1812 h->rsc.dup_acks = seg->dup_ack;
1813 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1814 if (chain->proto == ETH_P_IP) {
1815 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1816 } else {
1817 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1821 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1822 QTAILQ_REMOVE(&chain->buffers, seg, next);
1823 g_free(seg->buf);
1824 g_free(seg);
1826 return ret;
1829 static void virtio_net_rsc_purge(void *opq)
1831 VirtioNetRscSeg *seg, *rn;
1832 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1834 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1835 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1836 chain->stat.purge_failed++;
1837 continue;
1841 chain->stat.timer++;
1842 if (!QTAILQ_EMPTY(&chain->buffers)) {
1843 timer_mod(chain->drain_timer,
1844 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1848 static void virtio_net_rsc_cleanup(VirtIONet *n)
1850 VirtioNetRscChain *chain, *rn_chain;
1851 VirtioNetRscSeg *seg, *rn_seg;
1853 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1854 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1855 QTAILQ_REMOVE(&chain->buffers, seg, next);
1856 g_free(seg->buf);
1857 g_free(seg);
1860 timer_del(chain->drain_timer);
1861 timer_free(chain->drain_timer);
1862 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1863 g_free(chain);
1867 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1868 NetClientState *nc,
1869 const uint8_t *buf, size_t size)
1871 uint16_t hdr_len;
1872 VirtioNetRscSeg *seg;
1874 hdr_len = chain->n->guest_hdr_len;
1875 seg = g_malloc(sizeof(VirtioNetRscSeg));
1876 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1877 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1878 memcpy(seg->buf, buf, size);
1879 seg->size = size;
1880 seg->packets = 1;
1881 seg->dup_ack = 0;
1882 seg->is_coalesced = 0;
1883 seg->nc = nc;
1885 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1886 chain->stat.cache++;
1888 switch (chain->proto) {
1889 case ETH_P_IP:
1890 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1891 break;
1892 case ETH_P_IPV6:
1893 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1894 break;
1895 default:
1896 g_assert_not_reached();
1900 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1901 VirtioNetRscSeg *seg,
1902 const uint8_t *buf,
1903 struct tcp_header *n_tcp,
1904 struct tcp_header *o_tcp)
1906 uint32_t nack, oack;
1907 uint16_t nwin, owin;
1909 nack = htonl(n_tcp->th_ack);
1910 nwin = htons(n_tcp->th_win);
1911 oack = htonl(o_tcp->th_ack);
1912 owin = htons(o_tcp->th_win);
1914 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1915 chain->stat.ack_out_of_win++;
1916 return RSC_FINAL;
1917 } else if (nack == oack) {
1918 /* duplicated ack or window probe */
1919 if (nwin == owin) {
1920 /* duplicated ack, add dup ack count due to whql test up to 1 */
1921 chain->stat.dup_ack++;
1922 return RSC_FINAL;
1923 } else {
1924 /* Coalesce window update */
1925 o_tcp->th_win = n_tcp->th_win;
1926 chain->stat.win_update++;
1927 return RSC_COALESCE;
1929 } else {
1930 /* pure ack, go to 'C', finalize*/
1931 chain->stat.pure_ack++;
1932 return RSC_FINAL;
1936 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1937 VirtioNetRscSeg *seg,
1938 const uint8_t *buf,
1939 VirtioNetRscUnit *n_unit)
1941 void *data;
1942 uint16_t o_ip_len;
1943 uint32_t nseq, oseq;
1944 VirtioNetRscUnit *o_unit;
1946 o_unit = &seg->unit;
1947 o_ip_len = htons(*o_unit->ip_plen);
1948 nseq = htonl(n_unit->tcp->th_seq);
1949 oseq = htonl(o_unit->tcp->th_seq);
1951 /* out of order or retransmitted. */
1952 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1953 chain->stat.data_out_of_win++;
1954 return RSC_FINAL;
1957 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1958 if (nseq == oseq) {
1959 if ((o_unit->payload == 0) && n_unit->payload) {
1960 /* From no payload to payload, normal case, not a dup ack or etc */
1961 chain->stat.data_after_pure_ack++;
1962 goto coalesce;
1963 } else {
1964 return virtio_net_rsc_handle_ack(chain, seg, buf,
1965 n_unit->tcp, o_unit->tcp);
1967 } else if ((nseq - oseq) != o_unit->payload) {
1968 /* Not a consistent packet, out of order */
1969 chain->stat.data_out_of_order++;
1970 return RSC_FINAL;
1971 } else {
1972 coalesce:
1973 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1974 chain->stat.over_size++;
1975 return RSC_FINAL;
1978 /* Here comes the right data, the payload length in v4/v6 is different,
1979 so use the field value to update and record the new data len */
1980 o_unit->payload += n_unit->payload; /* update new data len */
1982 /* update field in ip header */
1983 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1985 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1986 for windows guest, while this may change the behavior for linux
1987 guest (only if it uses RSC feature). */
1988 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1990 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1991 o_unit->tcp->th_win = n_unit->tcp->th_win;
1993 memmove(seg->buf + seg->size, data, n_unit->payload);
1994 seg->size += n_unit->payload;
1995 seg->packets++;
1996 chain->stat.coalesced++;
1997 return RSC_COALESCE;
2001 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2002 VirtioNetRscSeg *seg,
2003 const uint8_t *buf, size_t size,
2004 VirtioNetRscUnit *unit)
2006 struct ip_header *ip1, *ip2;
2008 ip1 = (struct ip_header *)(unit->ip);
2009 ip2 = (struct ip_header *)(seg->unit.ip);
2010 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2011 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2012 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2013 chain->stat.no_match++;
2014 return RSC_NO_MATCH;
2017 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2020 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2021 VirtioNetRscSeg *seg,
2022 const uint8_t *buf, size_t size,
2023 VirtioNetRscUnit *unit)
2025 struct ip6_header *ip1, *ip2;
2027 ip1 = (struct ip6_header *)(unit->ip);
2028 ip2 = (struct ip6_header *)(seg->unit.ip);
2029 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2030 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2031 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2032 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2033 chain->stat.no_match++;
2034 return RSC_NO_MATCH;
2037 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2040 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2041 * to prevent out of order */
2042 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2043 struct tcp_header *tcp)
2045 uint16_t tcp_hdr;
2046 uint16_t tcp_flag;
2048 tcp_flag = htons(tcp->th_offset_flags);
2049 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2050 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2051 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
2052 if (tcp_flag & TH_SYN) {
2053 chain->stat.tcp_syn++;
2054 return RSC_BYPASS;
2057 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2058 chain->stat.tcp_ctrl_drain++;
2059 return RSC_FINAL;
2062 if (tcp_hdr > sizeof(struct tcp_header)) {
2063 chain->stat.tcp_all_opt++;
2064 return RSC_FINAL;
2067 return RSC_CANDIDATE;
2070 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2071 NetClientState *nc,
2072 const uint8_t *buf, size_t size,
2073 VirtioNetRscUnit *unit)
2075 int ret;
2076 VirtioNetRscSeg *seg, *nseg;
2078 if (QTAILQ_EMPTY(&chain->buffers)) {
2079 chain->stat.empty_cache++;
2080 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2081 timer_mod(chain->drain_timer,
2082 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2083 return size;
2086 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2087 if (chain->proto == ETH_P_IP) {
2088 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2089 } else {
2090 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2093 if (ret == RSC_FINAL) {
2094 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2095 /* Send failed */
2096 chain->stat.final_failed++;
2097 return 0;
2100 /* Send current packet */
2101 return virtio_net_do_receive(nc, buf, size);
2102 } else if (ret == RSC_NO_MATCH) {
2103 continue;
2104 } else {
2105 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2106 seg->is_coalesced = 1;
2107 return size;
2111 chain->stat.no_match_cache++;
2112 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2113 return size;
2116 /* Drain a connection data, this is to avoid out of order segments */
2117 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2118 NetClientState *nc,
2119 const uint8_t *buf, size_t size,
2120 uint16_t ip_start, uint16_t ip_size,
2121 uint16_t tcp_port)
2123 VirtioNetRscSeg *seg, *nseg;
2124 uint32_t ppair1, ppair2;
2126 ppair1 = *(uint32_t *)(buf + tcp_port);
2127 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2128 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2129 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2130 || (ppair1 != ppair2)) {
2131 continue;
2133 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2134 chain->stat.drain_failed++;
2137 break;
2140 return virtio_net_do_receive(nc, buf, size);
2143 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2144 struct ip_header *ip,
2145 const uint8_t *buf, size_t size)
2147 uint16_t ip_len;
2149 /* Not an ipv4 packet */
2150 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2151 chain->stat.ip_option++;
2152 return RSC_BYPASS;
2155 /* Don't handle packets with ip option */
2156 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2157 chain->stat.ip_option++;
2158 return RSC_BYPASS;
2161 if (ip->ip_p != IPPROTO_TCP) {
2162 chain->stat.bypass_not_tcp++;
2163 return RSC_BYPASS;
2166 /* Don't handle packets with ip fragment */
2167 if (!(htons(ip->ip_off) & IP_DF)) {
2168 chain->stat.ip_frag++;
2169 return RSC_BYPASS;
2172 /* Don't handle packets with ecn flag */
2173 if (IPTOS_ECN(ip->ip_tos)) {
2174 chain->stat.ip_ecn++;
2175 return RSC_BYPASS;
2178 ip_len = htons(ip->ip_len);
2179 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2180 || ip_len > (size - chain->n->guest_hdr_len -
2181 sizeof(struct eth_header))) {
2182 chain->stat.ip_hacked++;
2183 return RSC_BYPASS;
2186 return RSC_CANDIDATE;
2189 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2190 NetClientState *nc,
2191 const uint8_t *buf, size_t size)
2193 int32_t ret;
2194 uint16_t hdr_len;
2195 VirtioNetRscUnit unit;
2197 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2199 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2200 + sizeof(struct tcp_header))) {
2201 chain->stat.bypass_not_tcp++;
2202 return virtio_net_do_receive(nc, buf, size);
2205 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2206 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2207 != RSC_CANDIDATE) {
2208 return virtio_net_do_receive(nc, buf, size);
2211 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2212 if (ret == RSC_BYPASS) {
2213 return virtio_net_do_receive(nc, buf, size);
2214 } else if (ret == RSC_FINAL) {
2215 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2216 ((hdr_len + sizeof(struct eth_header)) + 12),
2217 VIRTIO_NET_IP4_ADDR_SIZE,
2218 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2221 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2224 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2225 struct ip6_header *ip6,
2226 const uint8_t *buf, size_t size)
2228 uint16_t ip_len;
2230 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2231 != IP_HEADER_VERSION_6) {
2232 return RSC_BYPASS;
2235 /* Both option and protocol is checked in this */
2236 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2237 chain->stat.bypass_not_tcp++;
2238 return RSC_BYPASS;
2241 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2242 if (ip_len < sizeof(struct tcp_header) ||
2243 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2244 - sizeof(struct ip6_header))) {
2245 chain->stat.ip_hacked++;
2246 return RSC_BYPASS;
2249 /* Don't handle packets with ecn flag */
2250 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2251 chain->stat.ip_ecn++;
2252 return RSC_BYPASS;
2255 return RSC_CANDIDATE;
2258 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2259 const uint8_t *buf, size_t size)
2261 int32_t ret;
2262 uint16_t hdr_len;
2263 VirtioNetRscChain *chain;
2264 VirtioNetRscUnit unit;
2266 chain = (VirtioNetRscChain *)opq;
2267 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2269 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2270 + sizeof(tcp_header))) {
2271 return virtio_net_do_receive(nc, buf, size);
2274 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2275 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2276 unit.ip, buf, size)) {
2277 return virtio_net_do_receive(nc, buf, size);
2280 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2281 if (ret == RSC_BYPASS) {
2282 return virtio_net_do_receive(nc, buf, size);
2283 } else if (ret == RSC_FINAL) {
2284 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2285 ((hdr_len + sizeof(struct eth_header)) + 8),
2286 VIRTIO_NET_IP6_ADDR_SIZE,
2287 hdr_len + sizeof(struct eth_header)
2288 + sizeof(struct ip6_header));
2291 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2294 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2295 NetClientState *nc,
2296 uint16_t proto)
2298 VirtioNetRscChain *chain;
2300 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2301 return NULL;
2304 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2305 if (chain->proto == proto) {
2306 return chain;
2310 chain = g_malloc(sizeof(*chain));
2311 chain->n = n;
2312 chain->proto = proto;
2313 if (proto == (uint16_t)ETH_P_IP) {
2314 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2315 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2316 } else {
2317 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2318 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2320 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2321 virtio_net_rsc_purge, chain);
2322 memset(&chain->stat, 0, sizeof(chain->stat));
2324 QTAILQ_INIT(&chain->buffers);
2325 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2327 return chain;
2330 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2331 const uint8_t *buf,
2332 size_t size)
2334 uint16_t proto;
2335 VirtioNetRscChain *chain;
2336 struct eth_header *eth;
2337 VirtIONet *n;
2339 n = qemu_get_nic_opaque(nc);
2340 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2341 return virtio_net_do_receive(nc, buf, size);
2344 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2345 proto = htons(eth->h_proto);
2347 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2348 if (chain) {
2349 chain->stat.received++;
2350 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2351 return virtio_net_rsc_receive4(chain, nc, buf, size);
2352 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2353 return virtio_net_rsc_receive6(chain, nc, buf, size);
2356 return virtio_net_do_receive(nc, buf, size);
2359 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2360 size_t size)
2362 VirtIONet *n = qemu_get_nic_opaque(nc);
2363 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2364 return virtio_net_rsc_receive(nc, buf, size);
2365 } else {
2366 return virtio_net_do_receive(nc, buf, size);
2370 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2372 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2374 VirtIONet *n = qemu_get_nic_opaque(nc);
2375 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2376 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2378 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2379 virtio_notify(vdev, q->tx_vq);
2381 g_free(q->async_tx.elem);
2382 q->async_tx.elem = NULL;
2384 virtio_queue_set_notification(q->tx_vq, 1);
2385 virtio_net_flush_tx(q);
2388 /* TX */
2389 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2391 VirtIONet *n = q->n;
2392 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2393 VirtQueueElement *elem;
2394 int32_t num_packets = 0;
2395 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2396 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2397 return num_packets;
2400 if (q->async_tx.elem) {
2401 virtio_queue_set_notification(q->tx_vq, 0);
2402 return num_packets;
2405 for (;;) {
2406 ssize_t ret;
2407 unsigned int out_num;
2408 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2409 struct virtio_net_hdr_mrg_rxbuf mhdr;
2411 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2412 if (!elem) {
2413 break;
2416 out_num = elem->out_num;
2417 out_sg = elem->out_sg;
2418 if (out_num < 1) {
2419 virtio_error(vdev, "virtio-net header not in first element");
2420 virtqueue_detach_element(q->tx_vq, elem, 0);
2421 g_free(elem);
2422 return -EINVAL;
2425 if (n->has_vnet_hdr) {
2426 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2427 n->guest_hdr_len) {
2428 virtio_error(vdev, "virtio-net header incorrect");
2429 virtqueue_detach_element(q->tx_vq, elem, 0);
2430 g_free(elem);
2431 return -EINVAL;
2433 if (n->needs_vnet_hdr_swap) {
2434 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2435 sg2[0].iov_base = &mhdr;
2436 sg2[0].iov_len = n->guest_hdr_len;
2437 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2438 out_sg, out_num,
2439 n->guest_hdr_len, -1);
2440 if (out_num == VIRTQUEUE_MAX_SIZE) {
2441 goto drop;
2443 out_num += 1;
2444 out_sg = sg2;
2448 * If host wants to see the guest header as is, we can
2449 * pass it on unchanged. Otherwise, copy just the parts
2450 * that host is interested in.
2452 assert(n->host_hdr_len <= n->guest_hdr_len);
2453 if (n->host_hdr_len != n->guest_hdr_len) {
2454 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2455 out_sg, out_num,
2456 0, n->host_hdr_len);
2457 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2458 out_sg, out_num,
2459 n->guest_hdr_len, -1);
2460 out_num = sg_num;
2461 out_sg = sg;
2464 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2465 out_sg, out_num, virtio_net_tx_complete);
2466 if (ret == 0) {
2467 virtio_queue_set_notification(q->tx_vq, 0);
2468 q->async_tx.elem = elem;
2469 return -EBUSY;
2472 drop:
2473 virtqueue_push(q->tx_vq, elem, 0);
2474 virtio_notify(vdev, q->tx_vq);
2475 g_free(elem);
2477 if (++num_packets >= n->tx_burst) {
2478 break;
2481 return num_packets;
2484 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2486 VirtIONet *n = VIRTIO_NET(vdev);
2487 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2489 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2490 virtio_net_drop_tx_queue_data(vdev, vq);
2491 return;
2494 /* This happens when device was stopped but VCPU wasn't. */
2495 if (!vdev->vm_running) {
2496 q->tx_waiting = 1;
2497 return;
2500 if (q->tx_waiting) {
2501 virtio_queue_set_notification(vq, 1);
2502 timer_del(q->tx_timer);
2503 q->tx_waiting = 0;
2504 if (virtio_net_flush_tx(q) == -EINVAL) {
2505 return;
2507 } else {
2508 timer_mod(q->tx_timer,
2509 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2510 q->tx_waiting = 1;
2511 virtio_queue_set_notification(vq, 0);
2515 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2517 VirtIONet *n = VIRTIO_NET(vdev);
2518 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2520 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2521 virtio_net_drop_tx_queue_data(vdev, vq);
2522 return;
2525 if (unlikely(q->tx_waiting)) {
2526 return;
2528 q->tx_waiting = 1;
2529 /* This happens when device was stopped but VCPU wasn't. */
2530 if (!vdev->vm_running) {
2531 return;
2533 virtio_queue_set_notification(vq, 0);
2534 qemu_bh_schedule(q->tx_bh);
2537 static void virtio_net_tx_timer(void *opaque)
2539 VirtIONetQueue *q = opaque;
2540 VirtIONet *n = q->n;
2541 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2542 /* This happens when device was stopped but BH wasn't. */
2543 if (!vdev->vm_running) {
2544 /* Make sure tx waiting is set, so we'll run when restarted. */
2545 assert(q->tx_waiting);
2546 return;
2549 q->tx_waiting = 0;
2551 /* Just in case the driver is not ready on more */
2552 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2553 return;
2556 virtio_queue_set_notification(q->tx_vq, 1);
2557 virtio_net_flush_tx(q);
2560 static void virtio_net_tx_bh(void *opaque)
2562 VirtIONetQueue *q = opaque;
2563 VirtIONet *n = q->n;
2564 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2565 int32_t ret;
2567 /* This happens when device was stopped but BH wasn't. */
2568 if (!vdev->vm_running) {
2569 /* Make sure tx waiting is set, so we'll run when restarted. */
2570 assert(q->tx_waiting);
2571 return;
2574 q->tx_waiting = 0;
2576 /* Just in case the driver is not ready on more */
2577 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2578 return;
2581 ret = virtio_net_flush_tx(q);
2582 if (ret == -EBUSY || ret == -EINVAL) {
2583 return; /* Notification re-enable handled by tx_complete or device
2584 * broken */
2587 /* If we flush a full burst of packets, assume there are
2588 * more coming and immediately reschedule */
2589 if (ret >= n->tx_burst) {
2590 qemu_bh_schedule(q->tx_bh);
2591 q->tx_waiting = 1;
2592 return;
2595 /* If less than a full burst, re-enable notification and flush
2596 * anything that may have come in while we weren't looking. If
2597 * we find something, assume the guest is still active and reschedule */
2598 virtio_queue_set_notification(q->tx_vq, 1);
2599 ret = virtio_net_flush_tx(q);
2600 if (ret == -EINVAL) {
2601 return;
2602 } else if (ret > 0) {
2603 virtio_queue_set_notification(q->tx_vq, 0);
2604 qemu_bh_schedule(q->tx_bh);
2605 q->tx_waiting = 1;
2609 static void virtio_net_add_queue(VirtIONet *n, int index)
2611 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2613 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2614 virtio_net_handle_rx);
2616 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2617 n->vqs[index].tx_vq =
2618 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2619 virtio_net_handle_tx_timer);
2620 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2621 virtio_net_tx_timer,
2622 &n->vqs[index]);
2623 } else {
2624 n->vqs[index].tx_vq =
2625 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2626 virtio_net_handle_tx_bh);
2627 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2630 n->vqs[index].tx_waiting = 0;
2631 n->vqs[index].n = n;
2634 static void virtio_net_del_queue(VirtIONet *n, int index)
2636 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2637 VirtIONetQueue *q = &n->vqs[index];
2638 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2640 qemu_purge_queued_packets(nc);
2642 virtio_del_queue(vdev, index * 2);
2643 if (q->tx_timer) {
2644 timer_del(q->tx_timer);
2645 timer_free(q->tx_timer);
2646 q->tx_timer = NULL;
2647 } else {
2648 qemu_bh_delete(q->tx_bh);
2649 q->tx_bh = NULL;
2651 q->tx_waiting = 0;
2652 virtio_del_queue(vdev, index * 2 + 1);
2655 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2657 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2658 int old_num_queues = virtio_get_num_queues(vdev);
2659 int new_num_queues = new_max_queues * 2 + 1;
2660 int i;
2662 assert(old_num_queues >= 3);
2663 assert(old_num_queues % 2 == 1);
2665 if (old_num_queues == new_num_queues) {
2666 return;
2670 * We always need to remove and add ctrl vq if
2671 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2672 * and then we only enter one of the following two loops.
2674 virtio_del_queue(vdev, old_num_queues - 1);
2676 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2677 /* new_num_queues < old_num_queues */
2678 virtio_net_del_queue(n, i / 2);
2681 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2682 /* new_num_queues > old_num_queues */
2683 virtio_net_add_queue(n, i / 2);
2686 /* add ctrl_vq last */
2687 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2690 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2692 int max = multiqueue ? n->max_queues : 1;
2694 n->multiqueue = multiqueue;
2695 virtio_net_change_num_queues(n, max);
2697 virtio_net_set_queues(n);
2700 static int virtio_net_post_load_device(void *opaque, int version_id)
2702 VirtIONet *n = opaque;
2703 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2704 int i, link_down;
2706 trace_virtio_net_post_load_device();
2707 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2708 virtio_vdev_has_feature(vdev,
2709 VIRTIO_F_VERSION_1),
2710 virtio_vdev_has_feature(vdev,
2711 VIRTIO_NET_F_HASH_REPORT));
2713 /* MAC_TABLE_ENTRIES may be different from the saved image */
2714 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2715 n->mac_table.in_use = 0;
2718 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2719 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2723 * curr_guest_offloads will be later overwritten by the
2724 * virtio_set_features_nocheck call done from the virtio_load.
2725 * Here we make sure it is preserved and restored accordingly
2726 * in the virtio_net_post_load_virtio callback.
2728 n->saved_guest_offloads = n->curr_guest_offloads;
2730 virtio_net_set_queues(n);
2732 /* Find the first multicast entry in the saved MAC filter */
2733 for (i = 0; i < n->mac_table.in_use; i++) {
2734 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2735 break;
2738 n->mac_table.first_multi = i;
2740 /* nc.link_down can't be migrated, so infer link_down according
2741 * to link status bit in n->status */
2742 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2743 for (i = 0; i < n->max_queues; i++) {
2744 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2747 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2748 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2749 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2750 QEMU_CLOCK_VIRTUAL,
2751 virtio_net_announce_timer, n);
2752 if (n->announce_timer.round) {
2753 timer_mod(n->announce_timer.tm,
2754 qemu_clock_get_ms(n->announce_timer.type));
2755 } else {
2756 qemu_announce_timer_del(&n->announce_timer, false);
2760 if (n->rss_data.enabled) {
2761 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2762 n->rss_data.indirections_len,
2763 sizeof(n->rss_data.key));
2764 } else {
2765 trace_virtio_net_rss_disable();
2767 return 0;
2770 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2772 VirtIONet *n = VIRTIO_NET(vdev);
2774 * The actual needed state is now in saved_guest_offloads,
2775 * see virtio_net_post_load_device for detail.
2776 * Restore it back and apply the desired offloads.
2778 n->curr_guest_offloads = n->saved_guest_offloads;
2779 if (peer_has_vnet_hdr(n)) {
2780 virtio_net_apply_guest_offloads(n);
2783 return 0;
2786 /* tx_waiting field of a VirtIONetQueue */
2787 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2788 .name = "virtio-net-queue-tx_waiting",
2789 .fields = (VMStateField[]) {
2790 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2791 VMSTATE_END_OF_LIST()
2795 static bool max_queues_gt_1(void *opaque, int version_id)
2797 return VIRTIO_NET(opaque)->max_queues > 1;
2800 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2802 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2803 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2806 static bool mac_table_fits(void *opaque, int version_id)
2808 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2811 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2813 return !mac_table_fits(opaque, version_id);
2816 /* This temporary type is shared by all the WITH_TMP methods
2817 * although only some fields are used by each.
2819 struct VirtIONetMigTmp {
2820 VirtIONet *parent;
2821 VirtIONetQueue *vqs_1;
2822 uint16_t curr_queues_1;
2823 uint8_t has_ufo;
2824 uint32_t has_vnet_hdr;
2827 /* The 2nd and subsequent tx_waiting flags are loaded later than
2828 * the 1st entry in the queues and only if there's more than one
2829 * entry. We use the tmp mechanism to calculate a temporary
2830 * pointer and count and also validate the count.
2833 static int virtio_net_tx_waiting_pre_save(void *opaque)
2835 struct VirtIONetMigTmp *tmp = opaque;
2837 tmp->vqs_1 = tmp->parent->vqs + 1;
2838 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2839 if (tmp->parent->curr_queues == 0) {
2840 tmp->curr_queues_1 = 0;
2843 return 0;
2846 static int virtio_net_tx_waiting_pre_load(void *opaque)
2848 struct VirtIONetMigTmp *tmp = opaque;
2850 /* Reuse the pointer setup from save */
2851 virtio_net_tx_waiting_pre_save(opaque);
2853 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2854 error_report("virtio-net: curr_queues %x > max_queues %x",
2855 tmp->parent->curr_queues, tmp->parent->max_queues);
2857 return -EINVAL;
2860 return 0; /* all good */
2863 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2864 .name = "virtio-net-tx_waiting",
2865 .pre_load = virtio_net_tx_waiting_pre_load,
2866 .pre_save = virtio_net_tx_waiting_pre_save,
2867 .fields = (VMStateField[]) {
2868 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2869 curr_queues_1,
2870 vmstate_virtio_net_queue_tx_waiting,
2871 struct VirtIONetQueue),
2872 VMSTATE_END_OF_LIST()
2876 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2877 * flag set we need to check that we have it
2879 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2881 struct VirtIONetMigTmp *tmp = opaque;
2883 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2884 error_report("virtio-net: saved image requires TUN_F_UFO support");
2885 return -EINVAL;
2888 return 0;
2891 static int virtio_net_ufo_pre_save(void *opaque)
2893 struct VirtIONetMigTmp *tmp = opaque;
2895 tmp->has_ufo = tmp->parent->has_ufo;
2897 return 0;
2900 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2901 .name = "virtio-net-ufo",
2902 .post_load = virtio_net_ufo_post_load,
2903 .pre_save = virtio_net_ufo_pre_save,
2904 .fields = (VMStateField[]) {
2905 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2906 VMSTATE_END_OF_LIST()
2910 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2911 * flag set we need to check that we have it
2913 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2915 struct VirtIONetMigTmp *tmp = opaque;
2917 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2918 error_report("virtio-net: saved image requires vnet_hdr=on");
2919 return -EINVAL;
2922 return 0;
2925 static int virtio_net_vnet_pre_save(void *opaque)
2927 struct VirtIONetMigTmp *tmp = opaque;
2929 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2931 return 0;
2934 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2935 .name = "virtio-net-vnet",
2936 .post_load = virtio_net_vnet_post_load,
2937 .pre_save = virtio_net_vnet_pre_save,
2938 .fields = (VMStateField[]) {
2939 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2940 VMSTATE_END_OF_LIST()
2944 static bool virtio_net_rss_needed(void *opaque)
2946 return VIRTIO_NET(opaque)->rss_data.enabled;
2949 static const VMStateDescription vmstate_virtio_net_rss = {
2950 .name = "virtio-net-device/rss",
2951 .version_id = 1,
2952 .minimum_version_id = 1,
2953 .needed = virtio_net_rss_needed,
2954 .fields = (VMStateField[]) {
2955 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2956 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2957 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2958 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2959 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2960 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2961 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2962 VIRTIO_NET_RSS_MAX_KEY_SIZE),
2963 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2964 rss_data.indirections_len, 0,
2965 vmstate_info_uint16, uint16_t),
2966 VMSTATE_END_OF_LIST()
2970 static const VMStateDescription vmstate_virtio_net_device = {
2971 .name = "virtio-net-device",
2972 .version_id = VIRTIO_NET_VM_VERSION,
2973 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2974 .post_load = virtio_net_post_load_device,
2975 .fields = (VMStateField[]) {
2976 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2977 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2978 vmstate_virtio_net_queue_tx_waiting,
2979 VirtIONetQueue),
2980 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2981 VMSTATE_UINT16(status, VirtIONet),
2982 VMSTATE_UINT8(promisc, VirtIONet),
2983 VMSTATE_UINT8(allmulti, VirtIONet),
2984 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2986 /* Guarded pair: If it fits we load it, else we throw it away
2987 * - can happen if source has a larger MAC table.; post-load
2988 * sets flags in this case.
2990 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2991 0, mac_table_fits, mac_table.in_use,
2992 ETH_ALEN),
2993 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2994 mac_table.in_use, ETH_ALEN),
2996 /* Note: This is an array of uint32's that's always been saved as a
2997 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2998 * but based on the uint.
3000 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3001 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3002 vmstate_virtio_net_has_vnet),
3003 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3004 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3005 VMSTATE_UINT8(alluni, VirtIONet),
3006 VMSTATE_UINT8(nomulti, VirtIONet),
3007 VMSTATE_UINT8(nouni, VirtIONet),
3008 VMSTATE_UINT8(nobcast, VirtIONet),
3009 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3010 vmstate_virtio_net_has_ufo),
3011 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3012 vmstate_info_uint16_equal, uint16_t),
3013 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3014 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3015 vmstate_virtio_net_tx_waiting),
3016 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3017 has_ctrl_guest_offloads),
3018 VMSTATE_END_OF_LIST()
3020 .subsections = (const VMStateDescription * []) {
3021 &vmstate_virtio_net_rss,
3022 NULL
3026 static NetClientInfo net_virtio_info = {
3027 .type = NET_CLIENT_DRIVER_NIC,
3028 .size = sizeof(NICState),
3029 .can_receive = virtio_net_can_receive,
3030 .receive = virtio_net_receive,
3031 .link_status_changed = virtio_net_set_link_status,
3032 .query_rx_filter = virtio_net_query_rxfilter,
3033 .announce = virtio_net_announce,
3036 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3038 VirtIONet *n = VIRTIO_NET(vdev);
3039 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3040 assert(n->vhost_started);
3041 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3044 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3045 bool mask)
3047 VirtIONet *n = VIRTIO_NET(vdev);
3048 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3049 assert(n->vhost_started);
3050 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3051 vdev, idx, mask);
3054 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3056 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3058 n->config_size = virtio_feature_get_config_size(feature_sizes,
3059 host_features);
3062 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3063 const char *type)
3066 * The name can be NULL, the netclient name will be type.x.
3068 assert(type != NULL);
3070 g_free(n->netclient_name);
3071 g_free(n->netclient_type);
3072 n->netclient_name = g_strdup(name);
3073 n->netclient_type = g_strdup(type);
3076 static bool failover_unplug_primary(VirtIONet *n)
3078 HotplugHandler *hotplug_ctrl;
3079 PCIDevice *pci_dev;
3080 Error *err = NULL;
3082 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3083 if (hotplug_ctrl) {
3084 pci_dev = PCI_DEVICE(n->primary_dev);
3085 pci_dev->partially_hotplugged = true;
3086 hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3087 if (err) {
3088 error_report_err(err);
3089 return false;
3091 } else {
3092 return false;
3094 return true;
3097 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3099 Error *err = NULL;
3100 HotplugHandler *hotplug_ctrl;
3101 PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3103 if (!pdev->partially_hotplugged) {
3104 return true;
3106 if (!n->primary_device_opts) {
3107 n->primary_device_opts = qemu_opts_from_qdict(
3108 qemu_find_opts("device"),
3109 n->primary_device_dict, errp);
3110 if (!n->primary_device_opts) {
3111 return false;
3114 n->primary_bus = n->primary_dev->parent_bus;
3115 if (!n->primary_bus) {
3116 error_setg(errp, "virtio_net: couldn't find primary bus");
3117 return false;
3119 qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3120 n->primary_should_be_hidden = false;
3121 qemu_opt_set_bool(n->primary_device_opts,
3122 "partially_hotplugged", true, &err);
3123 if (err) {
3124 goto out;
3126 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3127 if (hotplug_ctrl) {
3128 hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3129 if (err) {
3130 goto out;
3132 hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
3135 out:
3136 error_propagate(errp, err);
3137 return !err;
3140 static void virtio_net_handle_migration_primary(VirtIONet *n,
3141 MigrationState *s)
3143 bool should_be_hidden;
3144 Error *err = NULL;
3146 should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3148 if (!n->primary_dev) {
3149 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3150 if (!n->primary_dev) {
3151 return;
3155 if (migration_in_setup(s) && !should_be_hidden) {
3156 if (failover_unplug_primary(n)) {
3157 vmstate_unregister(VMSTATE_IF(n->primary_dev),
3158 qdev_get_vmsd(n->primary_dev),
3159 n->primary_dev);
3160 qapi_event_send_unplug_primary(n->primary_device_id);
3161 atomic_set(&n->primary_should_be_hidden, true);
3162 } else {
3163 warn_report("couldn't unplug primary device");
3165 } else if (migration_has_failed(s)) {
3166 /* We already unplugged the device let's plug it back */
3167 if (!failover_replug_primary(n, &err)) {
3168 if (err) {
3169 error_report_err(err);
3175 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3177 MigrationState *s = data;
3178 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3179 virtio_net_handle_migration_primary(n, s);
3182 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3183 QemuOpts *device_opts)
3185 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3186 bool match_found = false;
3187 bool hide = false;
3189 if (!device_opts) {
3190 return -1;
3192 n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3193 n->primary_device_dict);
3194 if (n->primary_device_dict) {
3195 g_free(n->standby_id);
3196 n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3197 "failover_pair_id"));
3199 if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3200 match_found = true;
3201 } else {
3202 match_found = false;
3203 hide = false;
3204 g_free(n->standby_id);
3205 n->primary_device_dict = NULL;
3206 goto out;
3209 n->primary_device_opts = device_opts;
3211 /* primary_should_be_hidden is set during feature negotiation */
3212 hide = atomic_read(&n->primary_should_be_hidden);
3214 if (n->primary_device_dict) {
3215 g_free(n->primary_device_id);
3216 n->primary_device_id = g_strdup(qdict_get_try_str(
3217 n->primary_device_dict, "id"));
3218 if (!n->primary_device_id) {
3219 warn_report("primary_device_id not set");
3223 out:
3224 if (match_found && hide) {
3225 return 1;
3226 } else if (match_found && !hide) {
3227 return 0;
3228 } else {
3229 return -1;
3233 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3235 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3236 VirtIONet *n = VIRTIO_NET(dev);
3237 NetClientState *nc;
3238 int i;
3240 if (n->net_conf.mtu) {
3241 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3244 if (n->net_conf.duplex_str) {
3245 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3246 n->net_conf.duplex = DUPLEX_HALF;
3247 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3248 n->net_conf.duplex = DUPLEX_FULL;
3249 } else {
3250 error_setg(errp, "'duplex' must be 'half' or 'full'");
3251 return;
3253 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3254 } else {
3255 n->net_conf.duplex = DUPLEX_UNKNOWN;
3258 if (n->net_conf.speed < SPEED_UNKNOWN) {
3259 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3260 return;
3262 if (n->net_conf.speed >= 0) {
3263 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3266 if (n->failover) {
3267 n->primary_listener.should_be_hidden =
3268 virtio_net_primary_should_be_hidden;
3269 atomic_set(&n->primary_should_be_hidden, true);
3270 device_listener_register(&n->primary_listener);
3271 n->migration_state.notify = virtio_net_migration_state_notifier;
3272 add_migration_state_change_notifier(&n->migration_state);
3273 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3276 virtio_net_set_config_size(n, n->host_features);
3277 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3280 * We set a lower limit on RX queue size to what it always was.
3281 * Guests that want a smaller ring can always resize it without
3282 * help from us (using virtio 1 and up).
3284 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3285 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3286 !is_power_of_2(n->net_conf.rx_queue_size)) {
3287 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3288 "must be a power of 2 between %d and %d.",
3289 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3290 VIRTQUEUE_MAX_SIZE);
3291 virtio_cleanup(vdev);
3292 return;
3295 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3296 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3297 !is_power_of_2(n->net_conf.tx_queue_size)) {
3298 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3299 "must be a power of 2 between %d and %d",
3300 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3301 VIRTQUEUE_MAX_SIZE);
3302 virtio_cleanup(vdev);
3303 return;
3306 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3307 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3308 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3309 "must be a positive integer less than %d.",
3310 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3311 virtio_cleanup(vdev);
3312 return;
3314 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3315 n->curr_queues = 1;
3316 n->tx_timeout = n->net_conf.txtimer;
3318 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3319 && strcmp(n->net_conf.tx, "bh")) {
3320 warn_report("virtio-net: "
3321 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3322 n->net_conf.tx);
3323 error_printf("Defaulting to \"bh\"");
3326 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3327 n->net_conf.tx_queue_size);
3329 for (i = 0; i < n->max_queues; i++) {
3330 virtio_net_add_queue(n, i);
3333 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3334 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3335 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3336 n->status = VIRTIO_NET_S_LINK_UP;
3337 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3338 QEMU_CLOCK_VIRTUAL,
3339 virtio_net_announce_timer, n);
3340 n->announce_timer.round = 0;
3342 if (n->netclient_type) {
3344 * Happen when virtio_net_set_netclient_name has been called.
3346 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3347 n->netclient_type, n->netclient_name, n);
3348 } else {
3349 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3350 object_get_typename(OBJECT(dev)), dev->id, n);
3353 peer_test_vnet_hdr(n);
3354 if (peer_has_vnet_hdr(n)) {
3355 for (i = 0; i < n->max_queues; i++) {
3356 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3358 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3359 } else {
3360 n->host_hdr_len = 0;
3363 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3365 n->vqs[0].tx_waiting = 0;
3366 n->tx_burst = n->net_conf.txburst;
3367 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3368 n->promisc = 1; /* for compatibility */
3370 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3372 n->vlans = g_malloc0(MAX_VLAN >> 3);
3374 nc = qemu_get_queue(n->nic);
3375 nc->rxfilter_notify_enabled = 1;
3377 QTAILQ_INIT(&n->rsc_chains);
3378 n->qdev = dev;
3380 net_rx_pkt_init(&n->rx_pkt, false);
3383 static void virtio_net_device_unrealize(DeviceState *dev)
3385 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3386 VirtIONet *n = VIRTIO_NET(dev);
3387 int i, max_queues;
3389 /* This will stop vhost backend if appropriate. */
3390 virtio_net_set_status(vdev, 0);
3392 g_free(n->netclient_name);
3393 n->netclient_name = NULL;
3394 g_free(n->netclient_type);
3395 n->netclient_type = NULL;
3397 g_free(n->mac_table.macs);
3398 g_free(n->vlans);
3400 if (n->failover) {
3401 g_free(n->primary_device_id);
3402 g_free(n->standby_id);
3403 qobject_unref(n->primary_device_dict);
3404 n->primary_device_dict = NULL;
3407 max_queues = n->multiqueue ? n->max_queues : 1;
3408 for (i = 0; i < max_queues; i++) {
3409 virtio_net_del_queue(n, i);
3411 /* delete also control vq */
3412 virtio_del_queue(vdev, max_queues * 2);
3413 qemu_announce_timer_del(&n->announce_timer, false);
3414 g_free(n->vqs);
3415 qemu_del_nic(n->nic);
3416 virtio_net_rsc_cleanup(n);
3417 g_free(n->rss_data.indirections_table);
3418 net_rx_pkt_uninit(n->rx_pkt);
3419 virtio_cleanup(vdev);
3422 static void virtio_net_instance_init(Object *obj)
3424 VirtIONet *n = VIRTIO_NET(obj);
3427 * The default config_size is sizeof(struct virtio_net_config).
3428 * Can be overriden with virtio_net_set_config_size.
3430 n->config_size = sizeof(struct virtio_net_config);
3431 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3432 "bootindex", "/ethernet-phy@0",
3433 DEVICE(n));
3436 static int virtio_net_pre_save(void *opaque)
3438 VirtIONet *n = opaque;
3440 /* At this point, backend must be stopped, otherwise
3441 * it might keep writing to memory. */
3442 assert(!n->vhost_started);
3444 return 0;
3447 static bool primary_unplug_pending(void *opaque)
3449 DeviceState *dev = opaque;
3450 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3451 VirtIONet *n = VIRTIO_NET(vdev);
3453 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3454 return false;
3456 return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3459 static bool dev_unplug_pending(void *opaque)
3461 DeviceState *dev = opaque;
3462 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3464 return vdc->primary_unplug_pending(dev);
3467 static const VMStateDescription vmstate_virtio_net = {
3468 .name = "virtio-net",
3469 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3470 .version_id = VIRTIO_NET_VM_VERSION,
3471 .fields = (VMStateField[]) {
3472 VMSTATE_VIRTIO_DEVICE,
3473 VMSTATE_END_OF_LIST()
3475 .pre_save = virtio_net_pre_save,
3476 .dev_unplug_pending = dev_unplug_pending,
3479 static Property virtio_net_properties[] = {
3480 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3481 VIRTIO_NET_F_CSUM, true),
3482 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3483 VIRTIO_NET_F_GUEST_CSUM, true),
3484 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3485 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3486 VIRTIO_NET_F_GUEST_TSO4, true),
3487 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3488 VIRTIO_NET_F_GUEST_TSO6, true),
3489 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3490 VIRTIO_NET_F_GUEST_ECN, true),
3491 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3492 VIRTIO_NET_F_GUEST_UFO, true),
3493 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3494 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3495 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3496 VIRTIO_NET_F_HOST_TSO4, true),
3497 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3498 VIRTIO_NET_F_HOST_TSO6, true),
3499 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3500 VIRTIO_NET_F_HOST_ECN, true),
3501 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3502 VIRTIO_NET_F_HOST_UFO, true),
3503 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3504 VIRTIO_NET_F_MRG_RXBUF, true),
3505 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3506 VIRTIO_NET_F_STATUS, true),
3507 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3508 VIRTIO_NET_F_CTRL_VQ, true),
3509 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3510 VIRTIO_NET_F_CTRL_RX, true),
3511 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3512 VIRTIO_NET_F_CTRL_VLAN, true),
3513 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3514 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3515 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3516 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3517 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3518 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3519 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3520 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3521 VIRTIO_NET_F_RSS, false),
3522 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3523 VIRTIO_NET_F_HASH_REPORT, false),
3524 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3525 VIRTIO_NET_F_RSC_EXT, false),
3526 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3527 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3528 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3529 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3530 TX_TIMER_INTERVAL),
3531 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3532 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3533 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3534 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3535 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3536 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3537 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3538 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3539 true),
3540 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3541 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3542 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3543 DEFINE_PROP_END_OF_LIST(),
3546 static void virtio_net_class_init(ObjectClass *klass, void *data)
3548 DeviceClass *dc = DEVICE_CLASS(klass);
3549 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3551 device_class_set_props(dc, virtio_net_properties);
3552 dc->vmsd = &vmstate_virtio_net;
3553 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3554 vdc->realize = virtio_net_device_realize;
3555 vdc->unrealize = virtio_net_device_unrealize;
3556 vdc->get_config = virtio_net_get_config;
3557 vdc->set_config = virtio_net_set_config;
3558 vdc->get_features = virtio_net_get_features;
3559 vdc->set_features = virtio_net_set_features;
3560 vdc->bad_features = virtio_net_bad_features;
3561 vdc->reset = virtio_net_reset;
3562 vdc->set_status = virtio_net_set_status;
3563 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3564 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3565 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3566 vdc->post_load = virtio_net_post_load_virtio;
3567 vdc->vmsd = &vmstate_virtio_net_device;
3568 vdc->primary_unplug_pending = primary_unplug_pending;
3571 static const TypeInfo virtio_net_info = {
3572 .name = TYPE_VIRTIO_NET,
3573 .parent = TYPE_VIRTIO_DEVICE,
3574 .instance_size = sizeof(VirtIONet),
3575 .instance_init = virtio_net_instance_init,
3576 .class_init = virtio_net_class_init,
3579 static void virtio_register_types(void)
3581 type_register_static(&virtio_net_info);
3584 type_init(virtio_register_types)