qemu-option: Use returned bool to check for failure
[qemu/ar7.git] / hw / net / virtio-net.c
blob48b07eb921824dd9ec1fe9e6144888749ef0ddc9
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/atomic.h"
16 #include "qemu/iov.h"
17 #include "qemu/main-loop.h"
18 #include "qemu/module.h"
19 #include "hw/virtio/virtio.h"
20 #include "net/net.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "qemu/error-report.h"
24 #include "qemu/timer.h"
25 #include "qemu/option.h"
26 #include "qemu/option_int.h"
27 #include "qemu/config-file.h"
28 #include "qapi/qmp/qdict.h"
29 #include "hw/virtio/virtio-net.h"
30 #include "net/vhost_net.h"
31 #include "net/announce.h"
32 #include "hw/virtio/virtio-bus.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-net.h"
35 #include "hw/qdev-properties.h"
36 #include "qapi/qapi-types-migration.h"
37 #include "qapi/qapi-events-migration.h"
38 #include "hw/virtio/virtio-access.h"
39 #include "migration/misc.h"
40 #include "standard-headers/linux/ethtool.h"
41 #include "sysemu/sysemu.h"
42 #include "trace.h"
43 #include "monitor/qdev.h"
44 #include "hw/pci/pci.h"
45 #include "net_rx_pkt.h"
46 #include "hw/virtio/vhost.h"
48 #define VIRTIO_NET_VM_VERSION 11
50 #define MAC_TABLE_ENTRIES 64
51 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
53 /* previously fixed value */
54 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
55 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
57 /* for now, only allow larger queues; with virtio-1, guest can downsize */
58 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
59 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
61 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
63 #define VIRTIO_NET_TCP_FLAG 0x3F
64 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
66 /* IPv4 max payload, 16 bits in the header */
67 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
68 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
70 /* header length value in ip header without option */
71 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
73 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
74 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
76 /* Purge coalesced packets timer interval, This value affects the performance
77 a lot, and should be tuned carefully, '300000'(300us) is the recommended
78 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
79 tso/gso/gro 'off'. */
80 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
82 #define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
83 VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
84 VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
85 VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
86 VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
87 VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \
88 VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \
89 VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
90 VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
92 static VirtIOFeature feature_sizes[] = {
93 {.flags = 1ULL << VIRTIO_NET_F_MAC,
94 .end = endof(struct virtio_net_config, mac)},
95 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
96 .end = endof(struct virtio_net_config, status)},
97 {.flags = 1ULL << VIRTIO_NET_F_MQ,
98 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
99 {.flags = 1ULL << VIRTIO_NET_F_MTU,
100 .end = endof(struct virtio_net_config, mtu)},
101 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
102 .end = endof(struct virtio_net_config, duplex)},
103 {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT),
104 .end = endof(struct virtio_net_config, supported_hash_types)},
108 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
110 VirtIONet *n = qemu_get_nic_opaque(nc);
112 return &n->vqs[nc->queue_index];
115 static int vq2q(int queue_index)
117 return queue_index / 2;
120 /* TODO
121 * - we could suppress RX interrupt if we were so inclined.
124 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
126 VirtIONet *n = VIRTIO_NET(vdev);
127 struct virtio_net_config netcfg;
129 int ret = 0;
130 memset(&netcfg, 0 , sizeof(struct virtio_net_config));
131 virtio_stw_p(vdev, &netcfg.status, n->status);
132 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
133 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
134 memcpy(netcfg.mac, n->mac, ETH_ALEN);
135 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
136 netcfg.duplex = n->net_conf.duplex;
137 netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE;
138 virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length,
139 virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
140 VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
141 virtio_stl_p(vdev, &netcfg.supported_hash_types,
142 VIRTIO_NET_RSS_SUPPORTED_HASHES);
143 memcpy(config, &netcfg, n->config_size);
145 NetClientState *nc = qemu_get_queue(n->nic);
146 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
147 ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
148 n->config_size);
149 if (ret != -1) {
150 memcpy(config, &netcfg, n->config_size);
155 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
157 VirtIONet *n = VIRTIO_NET(vdev);
158 struct virtio_net_config netcfg = {};
160 memcpy(&netcfg, config, n->config_size);
162 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
163 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
164 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
165 memcpy(n->mac, netcfg.mac, ETH_ALEN);
166 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
169 NetClientState *nc = qemu_get_queue(n->nic);
170 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
171 vhost_net_set_config(get_vhost_net(nc->peer), (uint8_t *)&netcfg,
172 0, n->config_size,
173 VHOST_SET_CONFIG_TYPE_MASTER);
177 static bool virtio_net_started(VirtIONet *n, uint8_t status)
179 VirtIODevice *vdev = VIRTIO_DEVICE(n);
180 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
181 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
184 static void virtio_net_announce_notify(VirtIONet *net)
186 VirtIODevice *vdev = VIRTIO_DEVICE(net);
187 trace_virtio_net_announce_notify();
189 net->status |= VIRTIO_NET_S_ANNOUNCE;
190 virtio_notify_config(vdev);
193 static void virtio_net_announce_timer(void *opaque)
195 VirtIONet *n = opaque;
196 trace_virtio_net_announce_timer(n->announce_timer.round);
198 n->announce_timer.round--;
199 virtio_net_announce_notify(n);
202 static void virtio_net_announce(NetClientState *nc)
204 VirtIONet *n = qemu_get_nic_opaque(nc);
205 VirtIODevice *vdev = VIRTIO_DEVICE(n);
208 * Make sure the virtio migration announcement timer isn't running
209 * If it is, let it trigger announcement so that we do not cause
210 * confusion.
212 if (n->announce_timer.round) {
213 return;
216 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
217 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
218 virtio_net_announce_notify(n);
222 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
224 VirtIODevice *vdev = VIRTIO_DEVICE(n);
225 NetClientState *nc = qemu_get_queue(n->nic);
226 int queues = n->multiqueue ? n->max_queues : 1;
228 if (!get_vhost_net(nc->peer)) {
229 return;
232 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
233 !!n->vhost_started) {
234 return;
236 if (!n->vhost_started) {
237 int r, i;
239 if (n->needs_vnet_hdr_swap) {
240 error_report("backend does not support %s vnet headers; "
241 "falling back on userspace virtio",
242 virtio_is_big_endian(vdev) ? "BE" : "LE");
243 return;
246 /* Any packets outstanding? Purge them to avoid touching rings
247 * when vhost is running.
249 for (i = 0; i < queues; i++) {
250 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
252 /* Purge both directions: TX and RX. */
253 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
254 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
257 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
258 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
259 if (r < 0) {
260 error_report("%uBytes MTU not supported by the backend",
261 n->net_conf.mtu);
263 return;
267 n->vhost_started = 1;
268 r = vhost_net_start(vdev, n->nic->ncs, queues);
269 if (r < 0) {
270 error_report("unable to start vhost net: %d: "
271 "falling back on userspace virtio", -r);
272 n->vhost_started = 0;
274 } else {
275 vhost_net_stop(vdev, n->nic->ncs, queues);
276 n->vhost_started = 0;
280 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
281 NetClientState *peer,
282 bool enable)
284 if (virtio_is_big_endian(vdev)) {
285 return qemu_set_vnet_be(peer, enable);
286 } else {
287 return qemu_set_vnet_le(peer, enable);
291 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
292 int queues, bool enable)
294 int i;
296 for (i = 0; i < queues; i++) {
297 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
298 enable) {
299 while (--i >= 0) {
300 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
303 return true;
307 return false;
310 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
312 VirtIODevice *vdev = VIRTIO_DEVICE(n);
313 int queues = n->multiqueue ? n->max_queues : 1;
315 if (virtio_net_started(n, status)) {
316 /* Before using the device, we tell the network backend about the
317 * endianness to use when parsing vnet headers. If the backend
318 * can't do it, we fallback onto fixing the headers in the core
319 * virtio-net code.
321 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
322 queues, true);
323 } else if (virtio_net_started(n, vdev->status)) {
324 /* After using the device, we need to reset the network backend to
325 * the default (guest native endianness), otherwise the guest may
326 * lose network connectivity if it is rebooted into a different
327 * endianness.
329 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
333 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
335 unsigned int dropped = virtqueue_drop_all(vq);
336 if (dropped) {
337 virtio_notify(vdev, vq);
341 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
343 VirtIONet *n = VIRTIO_NET(vdev);
344 VirtIONetQueue *q;
345 int i;
346 uint8_t queue_status;
348 virtio_net_vnet_endian_status(n, status);
349 virtio_net_vhost_status(n, status);
351 for (i = 0; i < n->max_queues; i++) {
352 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
353 bool queue_started;
354 q = &n->vqs[i];
356 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
357 queue_status = 0;
358 } else {
359 queue_status = status;
361 queue_started =
362 virtio_net_started(n, queue_status) && !n->vhost_started;
364 if (queue_started) {
365 qemu_flush_queued_packets(ncs);
368 if (!q->tx_waiting) {
369 continue;
372 if (queue_started) {
373 if (q->tx_timer) {
374 timer_mod(q->tx_timer,
375 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
376 } else {
377 qemu_bh_schedule(q->tx_bh);
379 } else {
380 if (q->tx_timer) {
381 timer_del(q->tx_timer);
382 } else {
383 qemu_bh_cancel(q->tx_bh);
385 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
386 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
387 vdev->vm_running) {
388 /* if tx is waiting we are likely have some packets in tx queue
389 * and disabled notification */
390 q->tx_waiting = 0;
391 virtio_queue_set_notification(q->tx_vq, 1);
392 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
398 static void virtio_net_set_link_status(NetClientState *nc)
400 VirtIONet *n = qemu_get_nic_opaque(nc);
401 VirtIODevice *vdev = VIRTIO_DEVICE(n);
402 uint16_t old_status = n->status;
404 if (nc->link_down)
405 n->status &= ~VIRTIO_NET_S_LINK_UP;
406 else
407 n->status |= VIRTIO_NET_S_LINK_UP;
409 if (n->status != old_status)
410 virtio_notify_config(vdev);
412 virtio_net_set_status(vdev, vdev->status);
415 static void rxfilter_notify(NetClientState *nc)
417 VirtIONet *n = qemu_get_nic_opaque(nc);
419 if (nc->rxfilter_notify_enabled) {
420 char *path = object_get_canonical_path(OBJECT(n->qdev));
421 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
422 n->netclient_name, path);
423 g_free(path);
425 /* disable event notification to avoid events flooding */
426 nc->rxfilter_notify_enabled = 0;
430 static intList *get_vlan_table(VirtIONet *n)
432 intList *list, *entry;
433 int i, j;
435 list = NULL;
436 for (i = 0; i < MAX_VLAN >> 5; i++) {
437 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
438 if (n->vlans[i] & (1U << j)) {
439 entry = g_malloc0(sizeof(*entry));
440 entry->value = (i << 5) + j;
441 entry->next = list;
442 list = entry;
447 return list;
450 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
452 VirtIONet *n = qemu_get_nic_opaque(nc);
453 VirtIODevice *vdev = VIRTIO_DEVICE(n);
454 RxFilterInfo *info;
455 strList *str_list, *entry;
456 int i;
458 info = g_malloc0(sizeof(*info));
459 info->name = g_strdup(nc->name);
460 info->promiscuous = n->promisc;
462 if (n->nouni) {
463 info->unicast = RX_STATE_NONE;
464 } else if (n->alluni) {
465 info->unicast = RX_STATE_ALL;
466 } else {
467 info->unicast = RX_STATE_NORMAL;
470 if (n->nomulti) {
471 info->multicast = RX_STATE_NONE;
472 } else if (n->allmulti) {
473 info->multicast = RX_STATE_ALL;
474 } else {
475 info->multicast = RX_STATE_NORMAL;
478 info->broadcast_allowed = n->nobcast;
479 info->multicast_overflow = n->mac_table.multi_overflow;
480 info->unicast_overflow = n->mac_table.uni_overflow;
482 info->main_mac = qemu_mac_strdup_printf(n->mac);
484 str_list = NULL;
485 for (i = 0; i < n->mac_table.first_multi; i++) {
486 entry = g_malloc0(sizeof(*entry));
487 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
488 entry->next = str_list;
489 str_list = entry;
491 info->unicast_table = str_list;
493 str_list = NULL;
494 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
495 entry = g_malloc0(sizeof(*entry));
496 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
497 entry->next = str_list;
498 str_list = entry;
500 info->multicast_table = str_list;
501 info->vlan_table = get_vlan_table(n);
503 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
504 info->vlan = RX_STATE_ALL;
505 } else if (!info->vlan_table) {
506 info->vlan = RX_STATE_NONE;
507 } else {
508 info->vlan = RX_STATE_NORMAL;
511 /* enable event notification after query */
512 nc->rxfilter_notify_enabled = 1;
514 return info;
517 static void virtio_net_reset(VirtIODevice *vdev)
519 VirtIONet *n = VIRTIO_NET(vdev);
520 int i;
522 /* Reset back to compatibility mode */
523 n->promisc = 1;
524 n->allmulti = 0;
525 n->alluni = 0;
526 n->nomulti = 0;
527 n->nouni = 0;
528 n->nobcast = 0;
529 /* multiqueue is disabled by default */
530 n->curr_queues = 1;
531 timer_del(n->announce_timer.tm);
532 n->announce_timer.round = 0;
533 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
535 /* Flush any MAC and VLAN filter table state */
536 n->mac_table.in_use = 0;
537 n->mac_table.first_multi = 0;
538 n->mac_table.multi_overflow = 0;
539 n->mac_table.uni_overflow = 0;
540 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
541 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
542 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
543 memset(n->vlans, 0, MAX_VLAN >> 3);
545 /* Flush any async TX */
546 for (i = 0; i < n->max_queues; i++) {
547 NetClientState *nc = qemu_get_subqueue(n->nic, i);
549 if (nc->peer) {
550 qemu_flush_or_purge_queued_packets(nc->peer, true);
551 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
556 static void peer_test_vnet_hdr(VirtIONet *n)
558 NetClientState *nc = qemu_get_queue(n->nic);
559 if (!nc->peer) {
560 return;
563 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
566 static int peer_has_vnet_hdr(VirtIONet *n)
568 return n->has_vnet_hdr;
571 static int peer_has_ufo(VirtIONet *n)
573 if (!peer_has_vnet_hdr(n))
574 return 0;
576 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
578 return n->has_ufo;
581 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
582 int version_1, int hash_report)
584 int i;
585 NetClientState *nc;
587 n->mergeable_rx_bufs = mergeable_rx_bufs;
589 if (version_1) {
590 n->guest_hdr_len = hash_report ?
591 sizeof(struct virtio_net_hdr_v1_hash) :
592 sizeof(struct virtio_net_hdr_mrg_rxbuf);
593 n->rss_data.populate_hash = !!hash_report;
594 } else {
595 n->guest_hdr_len = n->mergeable_rx_bufs ?
596 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
597 sizeof(struct virtio_net_hdr);
600 for (i = 0; i < n->max_queues; i++) {
601 nc = qemu_get_subqueue(n->nic, i);
603 if (peer_has_vnet_hdr(n) &&
604 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
605 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
606 n->host_hdr_len = n->guest_hdr_len;
611 static int virtio_net_max_tx_queue_size(VirtIONet *n)
613 NetClientState *peer = n->nic_conf.peers.ncs[0];
616 * Backends other than vhost-user don't support max queue size.
618 if (!peer) {
619 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
622 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
623 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
626 return VIRTQUEUE_MAX_SIZE;
629 static int peer_attach(VirtIONet *n, int index)
631 NetClientState *nc = qemu_get_subqueue(n->nic, index);
633 if (!nc->peer) {
634 return 0;
637 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
638 vhost_set_vring_enable(nc->peer, 1);
641 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
642 return 0;
645 if (n->max_queues == 1) {
646 return 0;
649 return tap_enable(nc->peer);
652 static int peer_detach(VirtIONet *n, int index)
654 NetClientState *nc = qemu_get_subqueue(n->nic, index);
656 if (!nc->peer) {
657 return 0;
660 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
661 vhost_set_vring_enable(nc->peer, 0);
664 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
665 return 0;
668 return tap_disable(nc->peer);
671 static void virtio_net_set_queues(VirtIONet *n)
673 int i;
674 int r;
676 if (n->nic->peer_deleted) {
677 return;
680 for (i = 0; i < n->max_queues; i++) {
681 if (i < n->curr_queues) {
682 r = peer_attach(n, i);
683 assert(!r);
684 } else {
685 r = peer_detach(n, i);
686 assert(!r);
691 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
693 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
694 Error **errp)
696 VirtIONet *n = VIRTIO_NET(vdev);
697 NetClientState *nc = qemu_get_queue(n->nic);
699 /* Firstly sync all virtio-net possible supported features */
700 features |= n->host_features;
702 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
704 if (!peer_has_vnet_hdr(n)) {
705 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
706 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
707 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
708 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
710 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
711 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
712 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
713 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
715 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
718 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
719 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
720 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
723 if (!get_vhost_net(nc->peer)) {
724 return features;
727 virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
728 virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
729 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
730 vdev->backend_features = features;
732 if (n->mtu_bypass_backend &&
733 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
734 features |= (1ULL << VIRTIO_NET_F_MTU);
737 return features;
740 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
742 uint64_t features = 0;
744 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
745 * but also these: */
746 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
747 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
748 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
749 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
750 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
752 return features;
755 static void virtio_net_apply_guest_offloads(VirtIONet *n)
757 qemu_set_offload(qemu_get_queue(n->nic)->peer,
758 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
759 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
760 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
761 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
762 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
765 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
767 static const uint64_t guest_offloads_mask =
768 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
769 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
770 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
771 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
772 (1ULL << VIRTIO_NET_F_GUEST_UFO);
774 return guest_offloads_mask & features;
777 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
779 VirtIODevice *vdev = VIRTIO_DEVICE(n);
780 return virtio_net_guest_offloads_by_features(vdev->guest_features);
783 static void failover_add_primary(VirtIONet *n, Error **errp)
785 Error *err = NULL;
787 if (n->primary_dev) {
788 return;
791 n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
792 n->primary_device_id);
793 if (n->primary_device_opts) {
794 n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
795 if (err) {
796 qemu_opts_del(n->primary_device_opts);
798 if (n->primary_dev) {
799 n->primary_bus = n->primary_dev->parent_bus;
800 if (err) {
801 qdev_unplug(n->primary_dev, &err);
802 qdev_set_id(n->primary_dev, "");
806 } else {
807 error_setg(errp, "Primary device not found");
808 error_append_hint(errp, "Virtio-net failover will not work. Make "
809 "sure primary device has parameter"
810 " failover_pair_id=<virtio-net-id>\n");
812 if (err) {
813 error_propagate(errp, err);
817 static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
819 VirtIONet *n = opaque;
820 int ret = 0;
822 const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
824 if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
825 n->primary_device_id = g_strdup(opts->id);
826 ret = 1;
829 return ret;
832 static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
834 DeviceState *dev = NULL;
835 Error *err = NULL;
837 if (qemu_opts_foreach(qemu_find_opts("device"),
838 is_my_primary, n, &err)) {
839 if (err) {
840 error_propagate(errp, err);
841 return NULL;
843 if (n->primary_device_id) {
844 dev = qdev_find_recursive(sysbus_get_default(),
845 n->primary_device_id);
846 } else {
847 error_setg(errp, "Primary device id not found");
848 return NULL;
851 return dev;
856 static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
857 DeviceState *dev,
858 Error **errp)
860 DeviceState *prim_dev = NULL;
861 Error *err = NULL;
863 prim_dev = virtio_net_find_primary(n, &err);
864 if (prim_dev) {
865 n->primary_device_id = g_strdup(prim_dev->id);
866 n->primary_device_opts = prim_dev->opts;
867 } else {
868 if (err) {
869 error_propagate(errp, err);
873 return prim_dev;
876 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
878 VirtIONet *n = VIRTIO_NET(vdev);
879 Error *err = NULL;
880 int i;
882 if (n->mtu_bypass_backend &&
883 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
884 features &= ~(1ULL << VIRTIO_NET_F_MTU);
887 virtio_net_set_multiqueue(n,
888 virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
889 virtio_has_feature(features, VIRTIO_NET_F_MQ));
891 virtio_net_set_mrg_rx_bufs(n,
892 virtio_has_feature(features,
893 VIRTIO_NET_F_MRG_RXBUF),
894 virtio_has_feature(features,
895 VIRTIO_F_VERSION_1),
896 virtio_has_feature(features,
897 VIRTIO_NET_F_HASH_REPORT));
899 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
900 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
901 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
902 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
903 n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
905 if (n->has_vnet_hdr) {
906 n->curr_guest_offloads =
907 virtio_net_guest_offloads_by_features(features);
908 virtio_net_apply_guest_offloads(n);
911 for (i = 0; i < n->max_queues; i++) {
912 NetClientState *nc = qemu_get_subqueue(n->nic, i);
914 if (!get_vhost_net(nc->peer)) {
915 continue;
917 vhost_net_ack_features(get_vhost_net(nc->peer), features);
920 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
921 memset(n->vlans, 0, MAX_VLAN >> 3);
922 } else {
923 memset(n->vlans, 0xff, MAX_VLAN >> 3);
926 if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
927 qapi_event_send_failover_negotiated(n->netclient_name);
928 atomic_set(&n->primary_should_be_hidden, false);
929 failover_add_primary(n, &err);
930 if (err) {
931 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
932 if (err) {
933 goto out_err;
935 failover_add_primary(n, &err);
936 if (err) {
937 goto out_err;
941 return;
943 out_err:
944 if (err) {
945 warn_report_err(err);
949 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
950 struct iovec *iov, unsigned int iov_cnt)
952 uint8_t on;
953 size_t s;
954 NetClientState *nc = qemu_get_queue(n->nic);
956 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
957 if (s != sizeof(on)) {
958 return VIRTIO_NET_ERR;
961 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
962 n->promisc = on;
963 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
964 n->allmulti = on;
965 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
966 n->alluni = on;
967 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
968 n->nomulti = on;
969 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
970 n->nouni = on;
971 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
972 n->nobcast = on;
973 } else {
974 return VIRTIO_NET_ERR;
977 rxfilter_notify(nc);
979 return VIRTIO_NET_OK;
982 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
983 struct iovec *iov, unsigned int iov_cnt)
985 VirtIODevice *vdev = VIRTIO_DEVICE(n);
986 uint64_t offloads;
987 size_t s;
989 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
990 return VIRTIO_NET_ERR;
993 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
994 if (s != sizeof(offloads)) {
995 return VIRTIO_NET_ERR;
998 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
999 uint64_t supported_offloads;
1001 offloads = virtio_ldq_p(vdev, &offloads);
1003 if (!n->has_vnet_hdr) {
1004 return VIRTIO_NET_ERR;
1007 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1008 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
1009 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
1010 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
1011 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
1013 supported_offloads = virtio_net_supported_guest_offloads(n);
1014 if (offloads & ~supported_offloads) {
1015 return VIRTIO_NET_ERR;
1018 n->curr_guest_offloads = offloads;
1019 virtio_net_apply_guest_offloads(n);
1021 return VIRTIO_NET_OK;
1022 } else {
1023 return VIRTIO_NET_ERR;
1027 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
1028 struct iovec *iov, unsigned int iov_cnt)
1030 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1031 struct virtio_net_ctrl_mac mac_data;
1032 size_t s;
1033 NetClientState *nc = qemu_get_queue(n->nic);
1035 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
1036 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
1037 return VIRTIO_NET_ERR;
1039 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
1040 assert(s == sizeof(n->mac));
1041 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
1042 rxfilter_notify(nc);
1044 return VIRTIO_NET_OK;
1047 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
1048 return VIRTIO_NET_ERR;
1051 int in_use = 0;
1052 int first_multi = 0;
1053 uint8_t uni_overflow = 0;
1054 uint8_t multi_overflow = 0;
1055 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1057 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1058 sizeof(mac_data.entries));
1059 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1060 if (s != sizeof(mac_data.entries)) {
1061 goto error;
1063 iov_discard_front(&iov, &iov_cnt, s);
1065 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
1066 goto error;
1069 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
1070 s = iov_to_buf(iov, iov_cnt, 0, macs,
1071 mac_data.entries * ETH_ALEN);
1072 if (s != mac_data.entries * ETH_ALEN) {
1073 goto error;
1075 in_use += mac_data.entries;
1076 } else {
1077 uni_overflow = 1;
1080 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
1082 first_multi = in_use;
1084 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
1085 sizeof(mac_data.entries));
1086 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
1087 if (s != sizeof(mac_data.entries)) {
1088 goto error;
1091 iov_discard_front(&iov, &iov_cnt, s);
1093 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
1094 goto error;
1097 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
1098 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
1099 mac_data.entries * ETH_ALEN);
1100 if (s != mac_data.entries * ETH_ALEN) {
1101 goto error;
1103 in_use += mac_data.entries;
1104 } else {
1105 multi_overflow = 1;
1108 n->mac_table.in_use = in_use;
1109 n->mac_table.first_multi = first_multi;
1110 n->mac_table.uni_overflow = uni_overflow;
1111 n->mac_table.multi_overflow = multi_overflow;
1112 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
1113 g_free(macs);
1114 rxfilter_notify(nc);
1116 return VIRTIO_NET_OK;
1118 error:
1119 g_free(macs);
1120 return VIRTIO_NET_ERR;
1123 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
1124 struct iovec *iov, unsigned int iov_cnt)
1126 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1127 uint16_t vid;
1128 size_t s;
1129 NetClientState *nc = qemu_get_queue(n->nic);
1131 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
1132 vid = virtio_lduw_p(vdev, &vid);
1133 if (s != sizeof(vid)) {
1134 return VIRTIO_NET_ERR;
1137 if (vid >= MAX_VLAN)
1138 return VIRTIO_NET_ERR;
1140 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
1141 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
1142 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
1143 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
1144 else
1145 return VIRTIO_NET_ERR;
1147 rxfilter_notify(nc);
1149 return VIRTIO_NET_OK;
1152 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
1153 struct iovec *iov, unsigned int iov_cnt)
1155 trace_virtio_net_handle_announce(n->announce_timer.round);
1156 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1157 n->status & VIRTIO_NET_S_ANNOUNCE) {
1158 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1159 if (n->announce_timer.round) {
1160 qemu_announce_timer_step(&n->announce_timer);
1162 return VIRTIO_NET_OK;
1163 } else {
1164 return VIRTIO_NET_ERR;
1168 static void virtio_net_disable_rss(VirtIONet *n)
1170 if (n->rss_data.enabled) {
1171 trace_virtio_net_rss_disable();
1173 n->rss_data.enabled = false;
1176 static uint16_t virtio_net_handle_rss(VirtIONet *n,
1177 struct iovec *iov,
1178 unsigned int iov_cnt,
1179 bool do_rss)
1181 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1182 struct virtio_net_rss_config cfg;
1183 size_t s, offset = 0, size_get;
1184 uint16_t queues, i;
1185 struct {
1186 uint16_t us;
1187 uint8_t b;
1188 } QEMU_PACKED temp;
1189 const char *err_msg = "";
1190 uint32_t err_value = 0;
1192 if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) {
1193 err_msg = "RSS is not negotiated";
1194 goto error;
1196 if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) {
1197 err_msg = "Hash report is not negotiated";
1198 goto error;
1200 size_get = offsetof(struct virtio_net_rss_config, indirection_table);
1201 s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get);
1202 if (s != size_get) {
1203 err_msg = "Short command buffer";
1204 err_value = (uint32_t)s;
1205 goto error;
1207 n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
1208 n->rss_data.indirections_len =
1209 virtio_lduw_p(vdev, &cfg.indirection_table_mask);
1210 n->rss_data.indirections_len++;
1211 if (!do_rss) {
1212 n->rss_data.indirections_len = 1;
1214 if (!is_power_of_2(n->rss_data.indirections_len)) {
1215 err_msg = "Invalid size of indirection table";
1216 err_value = n->rss_data.indirections_len;
1217 goto error;
1219 if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
1220 err_msg = "Too large indirection table";
1221 err_value = n->rss_data.indirections_len;
1222 goto error;
1224 n->rss_data.default_queue = do_rss ?
1225 virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0;
1226 if (n->rss_data.default_queue >= n->max_queues) {
1227 err_msg = "Invalid default queue";
1228 err_value = n->rss_data.default_queue;
1229 goto error;
1231 offset += size_get;
1232 size_get = sizeof(uint16_t) * n->rss_data.indirections_len;
1233 g_free(n->rss_data.indirections_table);
1234 n->rss_data.indirections_table = g_malloc(size_get);
1235 if (!n->rss_data.indirections_table) {
1236 err_msg = "Can't allocate indirections table";
1237 err_value = n->rss_data.indirections_len;
1238 goto error;
1240 s = iov_to_buf(iov, iov_cnt, offset,
1241 n->rss_data.indirections_table, size_get);
1242 if (s != size_get) {
1243 err_msg = "Short indirection table buffer";
1244 err_value = (uint32_t)s;
1245 goto error;
1247 for (i = 0; i < n->rss_data.indirections_len; ++i) {
1248 uint16_t val = n->rss_data.indirections_table[i];
1249 n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val);
1251 offset += size_get;
1252 size_get = sizeof(temp);
1253 s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get);
1254 if (s != size_get) {
1255 err_msg = "Can't get queues";
1256 err_value = (uint32_t)s;
1257 goto error;
1259 queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues;
1260 if (queues == 0 || queues > n->max_queues) {
1261 err_msg = "Invalid number of queues";
1262 err_value = queues;
1263 goto error;
1265 if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) {
1266 err_msg = "Invalid key size";
1267 err_value = temp.b;
1268 goto error;
1270 if (!temp.b && n->rss_data.hash_types) {
1271 err_msg = "No key provided";
1272 err_value = 0;
1273 goto error;
1275 if (!temp.b && !n->rss_data.hash_types) {
1276 virtio_net_disable_rss(n);
1277 return queues;
1279 offset += size_get;
1280 size_get = temp.b;
1281 s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get);
1282 if (s != size_get) {
1283 err_msg = "Can get key buffer";
1284 err_value = (uint32_t)s;
1285 goto error;
1287 n->rss_data.enabled = true;
1288 trace_virtio_net_rss_enable(n->rss_data.hash_types,
1289 n->rss_data.indirections_len,
1290 temp.b);
1291 return queues;
1292 error:
1293 trace_virtio_net_rss_error(err_msg, err_value);
1294 virtio_net_disable_rss(n);
1295 return 0;
1298 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1299 struct iovec *iov, unsigned int iov_cnt)
1301 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1302 uint16_t queues;
1304 virtio_net_disable_rss(n);
1305 if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
1306 queues = virtio_net_handle_rss(n, iov, iov_cnt, false);
1307 return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR;
1309 if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) {
1310 queues = virtio_net_handle_rss(n, iov, iov_cnt, true);
1311 } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1312 struct virtio_net_ctrl_mq mq;
1313 size_t s;
1314 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) {
1315 return VIRTIO_NET_ERR;
1317 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1318 if (s != sizeof(mq)) {
1319 return VIRTIO_NET_ERR;
1321 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1323 } else {
1324 return VIRTIO_NET_ERR;
1327 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1328 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1329 queues > n->max_queues ||
1330 !n->multiqueue) {
1331 return VIRTIO_NET_ERR;
1334 n->curr_queues = queues;
1335 /* stop the backend before changing the number of queues to avoid handling a
1336 * disabled queue */
1337 virtio_net_set_status(vdev, vdev->status);
1338 virtio_net_set_queues(n);
1340 return VIRTIO_NET_OK;
1343 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1345 VirtIONet *n = VIRTIO_NET(vdev);
1346 struct virtio_net_ctrl_hdr ctrl;
1347 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1348 VirtQueueElement *elem;
1349 size_t s;
1350 struct iovec *iov, *iov2;
1351 unsigned int iov_cnt;
1353 for (;;) {
1354 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1355 if (!elem) {
1356 break;
1358 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1359 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1360 virtio_error(vdev, "virtio-net ctrl missing headers");
1361 virtqueue_detach_element(vq, elem, 0);
1362 g_free(elem);
1363 break;
1366 iov_cnt = elem->out_num;
1367 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1368 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1369 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1370 if (s != sizeof(ctrl)) {
1371 status = VIRTIO_NET_ERR;
1372 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1373 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1374 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1375 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1376 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1377 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1378 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1379 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1380 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1381 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1382 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1383 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1386 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1387 assert(s == sizeof(status));
1389 virtqueue_push(vq, elem, sizeof(status));
1390 virtio_notify(vdev, vq);
1391 g_free(iov2);
1392 g_free(elem);
1396 /* RX */
1398 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1400 VirtIONet *n = VIRTIO_NET(vdev);
1401 int queue_index = vq2q(virtio_get_queue_index(vq));
1403 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1406 static bool virtio_net_can_receive(NetClientState *nc)
1408 VirtIONet *n = qemu_get_nic_opaque(nc);
1409 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1410 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1412 if (!vdev->vm_running) {
1413 return false;
1416 if (nc->queue_index >= n->curr_queues) {
1417 return false;
1420 if (!virtio_queue_ready(q->rx_vq) ||
1421 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1422 return false;
1425 return true;
1428 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1430 VirtIONet *n = q->n;
1431 if (virtio_queue_empty(q->rx_vq) ||
1432 (n->mergeable_rx_bufs &&
1433 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1434 virtio_queue_set_notification(q->rx_vq, 1);
1436 /* To avoid a race condition where the guest has made some buffers
1437 * available after the above check but before notification was
1438 * enabled, check for available buffers again.
1440 if (virtio_queue_empty(q->rx_vq) ||
1441 (n->mergeable_rx_bufs &&
1442 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1443 return 0;
1447 virtio_queue_set_notification(q->rx_vq, 0);
1448 return 1;
1451 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1453 virtio_tswap16s(vdev, &hdr->hdr_len);
1454 virtio_tswap16s(vdev, &hdr->gso_size);
1455 virtio_tswap16s(vdev, &hdr->csum_start);
1456 virtio_tswap16s(vdev, &hdr->csum_offset);
1459 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1460 * it never finds out that the packets don't have valid checksums. This
1461 * causes dhclient to get upset. Fedora's carried a patch for ages to
1462 * fix this with Xen but it hasn't appeared in an upstream release of
1463 * dhclient yet.
1465 * To avoid breaking existing guests, we catch udp packets and add
1466 * checksums. This is terrible but it's better than hacking the guest
1467 * kernels.
1469 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1470 * we should provide a mechanism to disable it to avoid polluting the host
1471 * cache.
1473 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1474 uint8_t *buf, size_t size)
1476 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1477 (size > 27 && size < 1500) && /* normal sized MTU */
1478 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1479 (buf[23] == 17) && /* ip.protocol == UDP */
1480 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1481 net_checksum_calculate(buf, size);
1482 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1486 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1487 const void *buf, size_t size)
1489 if (n->has_vnet_hdr) {
1490 /* FIXME this cast is evil */
1491 void *wbuf = (void *)buf;
1492 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1493 size - n->host_hdr_len);
1495 if (n->needs_vnet_hdr_swap) {
1496 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1498 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1499 } else {
1500 struct virtio_net_hdr hdr = {
1501 .flags = 0,
1502 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1504 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1508 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1510 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1511 static const uint8_t vlan[] = {0x81, 0x00};
1512 uint8_t *ptr = (uint8_t *)buf;
1513 int i;
1515 if (n->promisc)
1516 return 1;
1518 ptr += n->host_hdr_len;
1520 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1521 int vid = lduw_be_p(ptr + 14) & 0xfff;
1522 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1523 return 0;
1526 if (ptr[0] & 1) { // multicast
1527 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1528 return !n->nobcast;
1529 } else if (n->nomulti) {
1530 return 0;
1531 } else if (n->allmulti || n->mac_table.multi_overflow) {
1532 return 1;
1535 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1536 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1537 return 1;
1540 } else { // unicast
1541 if (n->nouni) {
1542 return 0;
1543 } else if (n->alluni || n->mac_table.uni_overflow) {
1544 return 1;
1545 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1546 return 1;
1549 for (i = 0; i < n->mac_table.first_multi; i++) {
1550 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1551 return 1;
1556 return 0;
1559 static uint8_t virtio_net_get_hash_type(bool isip4,
1560 bool isip6,
1561 bool isudp,
1562 bool istcp,
1563 uint32_t types)
1565 if (isip4) {
1566 if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) {
1567 return NetPktRssIpV4Tcp;
1569 if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) {
1570 return NetPktRssIpV4Udp;
1572 if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
1573 return NetPktRssIpV4;
1575 } else if (isip6) {
1576 uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
1577 VIRTIO_NET_RSS_HASH_TYPE_TCPv6;
1579 if (istcp && (types & mask)) {
1580 return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ?
1581 NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp;
1583 mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6;
1584 if (isudp && (types & mask)) {
1585 return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ?
1586 NetPktRssIpV6UdpEx : NetPktRssIpV6Udp;
1588 mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6;
1589 if (types & mask) {
1590 return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ?
1591 NetPktRssIpV6Ex : NetPktRssIpV6;
1594 return 0xff;
1597 static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report,
1598 uint32_t hash)
1600 struct virtio_net_hdr_v1_hash *hdr = (void *)buf;
1601 hdr->hash_value = hash;
1602 hdr->hash_report = report;
1605 static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
1606 size_t size)
1608 VirtIONet *n = qemu_get_nic_opaque(nc);
1609 unsigned int index = nc->queue_index, new_index = index;
1610 struct NetRxPkt *pkt = n->rx_pkt;
1611 uint8_t net_hash_type;
1612 uint32_t hash;
1613 bool isip4, isip6, isudp, istcp;
1614 static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = {
1615 VIRTIO_NET_HASH_REPORT_IPv4,
1616 VIRTIO_NET_HASH_REPORT_TCPv4,
1617 VIRTIO_NET_HASH_REPORT_TCPv6,
1618 VIRTIO_NET_HASH_REPORT_IPv6,
1619 VIRTIO_NET_HASH_REPORT_IPv6_EX,
1620 VIRTIO_NET_HASH_REPORT_TCPv6_EX,
1621 VIRTIO_NET_HASH_REPORT_UDPv4,
1622 VIRTIO_NET_HASH_REPORT_UDPv6,
1623 VIRTIO_NET_HASH_REPORT_UDPv6_EX
1626 net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
1627 size - n->host_hdr_len);
1628 net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp);
1629 if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) {
1630 istcp = isudp = false;
1632 if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) {
1633 istcp = isudp = false;
1635 net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp,
1636 n->rss_data.hash_types);
1637 if (net_hash_type > NetPktRssIpV6UdpEx) {
1638 if (n->rss_data.populate_hash) {
1639 virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0);
1641 return n->rss_data.redirect ? n->rss_data.default_queue : -1;
1644 hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key);
1646 if (n->rss_data.populate_hash) {
1647 virtio_set_packet_hash(buf, reports[net_hash_type], hash);
1650 if (n->rss_data.redirect) {
1651 new_index = hash & (n->rss_data.indirections_len - 1);
1652 new_index = n->rss_data.indirections_table[new_index];
1655 return (index == new_index) ? -1 : new_index;
1658 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1659 size_t size, bool no_rss)
1661 VirtIONet *n = qemu_get_nic_opaque(nc);
1662 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1663 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1664 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1665 struct virtio_net_hdr_mrg_rxbuf mhdr;
1666 unsigned mhdr_cnt = 0;
1667 size_t offset, i, guest_offset;
1669 if (!virtio_net_can_receive(nc)) {
1670 return -1;
1673 if (!no_rss && n->rss_data.enabled) {
1674 int index = virtio_net_process_rss(nc, buf, size);
1675 if (index >= 0) {
1676 NetClientState *nc2 = qemu_get_subqueue(n->nic, index);
1677 return virtio_net_receive_rcu(nc2, buf, size, true);
1681 /* hdr_len refers to the header we supply to the guest */
1682 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1683 return 0;
1686 if (!receive_filter(n, buf, size))
1687 return size;
1689 offset = i = 0;
1691 while (offset < size) {
1692 VirtQueueElement *elem;
1693 int len, total;
1694 const struct iovec *sg;
1696 total = 0;
1698 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1699 if (!elem) {
1700 if (i) {
1701 virtio_error(vdev, "virtio-net unexpected empty queue: "
1702 "i %zd mergeable %d offset %zd, size %zd, "
1703 "guest hdr len %zd, host hdr len %zd "
1704 "guest features 0x%" PRIx64,
1705 i, n->mergeable_rx_bufs, offset, size,
1706 n->guest_hdr_len, n->host_hdr_len,
1707 vdev->guest_features);
1709 return -1;
1712 if (elem->in_num < 1) {
1713 virtio_error(vdev,
1714 "virtio-net receive queue contains no in buffers");
1715 virtqueue_detach_element(q->rx_vq, elem, 0);
1716 g_free(elem);
1717 return -1;
1720 sg = elem->in_sg;
1721 if (i == 0) {
1722 assert(offset == 0);
1723 if (n->mergeable_rx_bufs) {
1724 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1725 sg, elem->in_num,
1726 offsetof(typeof(mhdr), num_buffers),
1727 sizeof(mhdr.num_buffers));
1730 receive_header(n, sg, elem->in_num, buf, size);
1731 if (n->rss_data.populate_hash) {
1732 offset = sizeof(mhdr);
1733 iov_from_buf(sg, elem->in_num, offset,
1734 buf + offset, n->host_hdr_len - sizeof(mhdr));
1736 offset = n->host_hdr_len;
1737 total += n->guest_hdr_len;
1738 guest_offset = n->guest_hdr_len;
1739 } else {
1740 guest_offset = 0;
1743 /* copy in packet. ugh */
1744 len = iov_from_buf(sg, elem->in_num, guest_offset,
1745 buf + offset, size - offset);
1746 total += len;
1747 offset += len;
1748 /* If buffers can't be merged, at this point we
1749 * must have consumed the complete packet.
1750 * Otherwise, drop it. */
1751 if (!n->mergeable_rx_bufs && offset < size) {
1752 virtqueue_unpop(q->rx_vq, elem, total);
1753 g_free(elem);
1754 return size;
1757 /* signal other side */
1758 virtqueue_fill(q->rx_vq, elem, total, i++);
1759 g_free(elem);
1762 if (mhdr_cnt) {
1763 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1764 iov_from_buf(mhdr_sg, mhdr_cnt,
1766 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1769 virtqueue_flush(q->rx_vq, i);
1770 virtio_notify(vdev, q->rx_vq);
1772 return size;
1775 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1776 size_t size)
1778 RCU_READ_LOCK_GUARD();
1780 return virtio_net_receive_rcu(nc, buf, size, false);
1783 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1784 const uint8_t *buf,
1785 VirtioNetRscUnit *unit)
1787 uint16_t ip_hdrlen;
1788 struct ip_header *ip;
1790 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1791 + sizeof(struct eth_header));
1792 unit->ip = (void *)ip;
1793 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1794 unit->ip_plen = &ip->ip_len;
1795 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1796 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1797 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1800 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1801 const uint8_t *buf,
1802 VirtioNetRscUnit *unit)
1804 struct ip6_header *ip6;
1806 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1807 + sizeof(struct eth_header));
1808 unit->ip = ip6;
1809 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1810 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)
1811 + sizeof(struct ip6_header));
1812 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1814 /* There is a difference between payload lenght in ipv4 and v6,
1815 ip header is excluded in ipv6 */
1816 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1819 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1820 VirtioNetRscSeg *seg)
1822 int ret;
1823 struct virtio_net_hdr_v1 *h;
1825 h = (struct virtio_net_hdr_v1 *)seg->buf;
1826 h->flags = 0;
1827 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1829 if (seg->is_coalesced) {
1830 h->rsc.segments = seg->packets;
1831 h->rsc.dup_acks = seg->dup_ack;
1832 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1833 if (chain->proto == ETH_P_IP) {
1834 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1835 } else {
1836 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1840 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1841 QTAILQ_REMOVE(&chain->buffers, seg, next);
1842 g_free(seg->buf);
1843 g_free(seg);
1845 return ret;
1848 static void virtio_net_rsc_purge(void *opq)
1850 VirtioNetRscSeg *seg, *rn;
1851 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1853 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1854 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1855 chain->stat.purge_failed++;
1856 continue;
1860 chain->stat.timer++;
1861 if (!QTAILQ_EMPTY(&chain->buffers)) {
1862 timer_mod(chain->drain_timer,
1863 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1867 static void virtio_net_rsc_cleanup(VirtIONet *n)
1869 VirtioNetRscChain *chain, *rn_chain;
1870 VirtioNetRscSeg *seg, *rn_seg;
1872 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1873 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1874 QTAILQ_REMOVE(&chain->buffers, seg, next);
1875 g_free(seg->buf);
1876 g_free(seg);
1879 timer_del(chain->drain_timer);
1880 timer_free(chain->drain_timer);
1881 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1882 g_free(chain);
1886 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1887 NetClientState *nc,
1888 const uint8_t *buf, size_t size)
1890 uint16_t hdr_len;
1891 VirtioNetRscSeg *seg;
1893 hdr_len = chain->n->guest_hdr_len;
1894 seg = g_malloc(sizeof(VirtioNetRscSeg));
1895 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1896 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1897 memcpy(seg->buf, buf, size);
1898 seg->size = size;
1899 seg->packets = 1;
1900 seg->dup_ack = 0;
1901 seg->is_coalesced = 0;
1902 seg->nc = nc;
1904 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1905 chain->stat.cache++;
1907 switch (chain->proto) {
1908 case ETH_P_IP:
1909 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1910 break;
1911 case ETH_P_IPV6:
1912 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1913 break;
1914 default:
1915 g_assert_not_reached();
1919 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1920 VirtioNetRscSeg *seg,
1921 const uint8_t *buf,
1922 struct tcp_header *n_tcp,
1923 struct tcp_header *o_tcp)
1925 uint32_t nack, oack;
1926 uint16_t nwin, owin;
1928 nack = htonl(n_tcp->th_ack);
1929 nwin = htons(n_tcp->th_win);
1930 oack = htonl(o_tcp->th_ack);
1931 owin = htons(o_tcp->th_win);
1933 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1934 chain->stat.ack_out_of_win++;
1935 return RSC_FINAL;
1936 } else if (nack == oack) {
1937 /* duplicated ack or window probe */
1938 if (nwin == owin) {
1939 /* duplicated ack, add dup ack count due to whql test up to 1 */
1940 chain->stat.dup_ack++;
1941 return RSC_FINAL;
1942 } else {
1943 /* Coalesce window update */
1944 o_tcp->th_win = n_tcp->th_win;
1945 chain->stat.win_update++;
1946 return RSC_COALESCE;
1948 } else {
1949 /* pure ack, go to 'C', finalize*/
1950 chain->stat.pure_ack++;
1951 return RSC_FINAL;
1955 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1956 VirtioNetRscSeg *seg,
1957 const uint8_t *buf,
1958 VirtioNetRscUnit *n_unit)
1960 void *data;
1961 uint16_t o_ip_len;
1962 uint32_t nseq, oseq;
1963 VirtioNetRscUnit *o_unit;
1965 o_unit = &seg->unit;
1966 o_ip_len = htons(*o_unit->ip_plen);
1967 nseq = htonl(n_unit->tcp->th_seq);
1968 oseq = htonl(o_unit->tcp->th_seq);
1970 /* out of order or retransmitted. */
1971 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1972 chain->stat.data_out_of_win++;
1973 return RSC_FINAL;
1976 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1977 if (nseq == oseq) {
1978 if ((o_unit->payload == 0) && n_unit->payload) {
1979 /* From no payload to payload, normal case, not a dup ack or etc */
1980 chain->stat.data_after_pure_ack++;
1981 goto coalesce;
1982 } else {
1983 return virtio_net_rsc_handle_ack(chain, seg, buf,
1984 n_unit->tcp, o_unit->tcp);
1986 } else if ((nseq - oseq) != o_unit->payload) {
1987 /* Not a consistent packet, out of order */
1988 chain->stat.data_out_of_order++;
1989 return RSC_FINAL;
1990 } else {
1991 coalesce:
1992 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1993 chain->stat.over_size++;
1994 return RSC_FINAL;
1997 /* Here comes the right data, the payload length in v4/v6 is different,
1998 so use the field value to update and record the new data len */
1999 o_unit->payload += n_unit->payload; /* update new data len */
2001 /* update field in ip header */
2002 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
2004 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
2005 for windows guest, while this may change the behavior for linux
2006 guest (only if it uses RSC feature). */
2007 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
2009 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
2010 o_unit->tcp->th_win = n_unit->tcp->th_win;
2012 memmove(seg->buf + seg->size, data, n_unit->payload);
2013 seg->size += n_unit->payload;
2014 seg->packets++;
2015 chain->stat.coalesced++;
2016 return RSC_COALESCE;
2020 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
2021 VirtioNetRscSeg *seg,
2022 const uint8_t *buf, size_t size,
2023 VirtioNetRscUnit *unit)
2025 struct ip_header *ip1, *ip2;
2027 ip1 = (struct ip_header *)(unit->ip);
2028 ip2 = (struct ip_header *)(seg->unit.ip);
2029 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
2030 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2031 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2032 chain->stat.no_match++;
2033 return RSC_NO_MATCH;
2036 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2039 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
2040 VirtioNetRscSeg *seg,
2041 const uint8_t *buf, size_t size,
2042 VirtioNetRscUnit *unit)
2044 struct ip6_header *ip1, *ip2;
2046 ip1 = (struct ip6_header *)(unit->ip);
2047 ip2 = (struct ip6_header *)(seg->unit.ip);
2048 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
2049 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
2050 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
2051 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
2052 chain->stat.no_match++;
2053 return RSC_NO_MATCH;
2056 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
2059 /* Packets with 'SYN' should bypass, other flag should be sent after drain
2060 * to prevent out of order */
2061 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
2062 struct tcp_header *tcp)
2064 uint16_t tcp_hdr;
2065 uint16_t tcp_flag;
2067 tcp_flag = htons(tcp->th_offset_flags);
2068 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
2069 tcp_flag &= VIRTIO_NET_TCP_FLAG;
2070 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
2071 if (tcp_flag & TH_SYN) {
2072 chain->stat.tcp_syn++;
2073 return RSC_BYPASS;
2076 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
2077 chain->stat.tcp_ctrl_drain++;
2078 return RSC_FINAL;
2081 if (tcp_hdr > sizeof(struct tcp_header)) {
2082 chain->stat.tcp_all_opt++;
2083 return RSC_FINAL;
2086 return RSC_CANDIDATE;
2089 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
2090 NetClientState *nc,
2091 const uint8_t *buf, size_t size,
2092 VirtioNetRscUnit *unit)
2094 int ret;
2095 VirtioNetRscSeg *seg, *nseg;
2097 if (QTAILQ_EMPTY(&chain->buffers)) {
2098 chain->stat.empty_cache++;
2099 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2100 timer_mod(chain->drain_timer,
2101 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
2102 return size;
2105 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2106 if (chain->proto == ETH_P_IP) {
2107 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
2108 } else {
2109 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
2112 if (ret == RSC_FINAL) {
2113 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2114 /* Send failed */
2115 chain->stat.final_failed++;
2116 return 0;
2119 /* Send current packet */
2120 return virtio_net_do_receive(nc, buf, size);
2121 } else if (ret == RSC_NO_MATCH) {
2122 continue;
2123 } else {
2124 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
2125 seg->is_coalesced = 1;
2126 return size;
2130 chain->stat.no_match_cache++;
2131 virtio_net_rsc_cache_buf(chain, nc, buf, size);
2132 return size;
2135 /* Drain a connection data, this is to avoid out of order segments */
2136 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
2137 NetClientState *nc,
2138 const uint8_t *buf, size_t size,
2139 uint16_t ip_start, uint16_t ip_size,
2140 uint16_t tcp_port)
2142 VirtioNetRscSeg *seg, *nseg;
2143 uint32_t ppair1, ppair2;
2145 ppair1 = *(uint32_t *)(buf + tcp_port);
2146 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
2147 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
2148 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
2149 || (ppair1 != ppair2)) {
2150 continue;
2152 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
2153 chain->stat.drain_failed++;
2156 break;
2159 return virtio_net_do_receive(nc, buf, size);
2162 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
2163 struct ip_header *ip,
2164 const uint8_t *buf, size_t size)
2166 uint16_t ip_len;
2168 /* Not an ipv4 packet */
2169 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
2170 chain->stat.ip_option++;
2171 return RSC_BYPASS;
2174 /* Don't handle packets with ip option */
2175 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
2176 chain->stat.ip_option++;
2177 return RSC_BYPASS;
2180 if (ip->ip_p != IPPROTO_TCP) {
2181 chain->stat.bypass_not_tcp++;
2182 return RSC_BYPASS;
2185 /* Don't handle packets with ip fragment */
2186 if (!(htons(ip->ip_off) & IP_DF)) {
2187 chain->stat.ip_frag++;
2188 return RSC_BYPASS;
2191 /* Don't handle packets with ecn flag */
2192 if (IPTOS_ECN(ip->ip_tos)) {
2193 chain->stat.ip_ecn++;
2194 return RSC_BYPASS;
2197 ip_len = htons(ip->ip_len);
2198 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
2199 || ip_len > (size - chain->n->guest_hdr_len -
2200 sizeof(struct eth_header))) {
2201 chain->stat.ip_hacked++;
2202 return RSC_BYPASS;
2205 return RSC_CANDIDATE;
2208 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
2209 NetClientState *nc,
2210 const uint8_t *buf, size_t size)
2212 int32_t ret;
2213 uint16_t hdr_len;
2214 VirtioNetRscUnit unit;
2216 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2218 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
2219 + sizeof(struct tcp_header))) {
2220 chain->stat.bypass_not_tcp++;
2221 return virtio_net_do_receive(nc, buf, size);
2224 virtio_net_rsc_extract_unit4(chain, buf, &unit);
2225 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
2226 != RSC_CANDIDATE) {
2227 return virtio_net_do_receive(nc, buf, size);
2230 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2231 if (ret == RSC_BYPASS) {
2232 return virtio_net_do_receive(nc, buf, size);
2233 } else if (ret == RSC_FINAL) {
2234 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2235 ((hdr_len + sizeof(struct eth_header)) + 12),
2236 VIRTIO_NET_IP4_ADDR_SIZE,
2237 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
2240 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2243 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
2244 struct ip6_header *ip6,
2245 const uint8_t *buf, size_t size)
2247 uint16_t ip_len;
2249 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
2250 != IP_HEADER_VERSION_6) {
2251 return RSC_BYPASS;
2254 /* Both option and protocol is checked in this */
2255 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
2256 chain->stat.bypass_not_tcp++;
2257 return RSC_BYPASS;
2260 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
2261 if (ip_len < sizeof(struct tcp_header) ||
2262 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
2263 - sizeof(struct ip6_header))) {
2264 chain->stat.ip_hacked++;
2265 return RSC_BYPASS;
2268 /* Don't handle packets with ecn flag */
2269 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
2270 chain->stat.ip_ecn++;
2271 return RSC_BYPASS;
2274 return RSC_CANDIDATE;
2277 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
2278 const uint8_t *buf, size_t size)
2280 int32_t ret;
2281 uint16_t hdr_len;
2282 VirtioNetRscChain *chain;
2283 VirtioNetRscUnit unit;
2285 chain = (VirtioNetRscChain *)opq;
2286 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
2288 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
2289 + sizeof(tcp_header))) {
2290 return virtio_net_do_receive(nc, buf, size);
2293 virtio_net_rsc_extract_unit6(chain, buf, &unit);
2294 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
2295 unit.ip, buf, size)) {
2296 return virtio_net_do_receive(nc, buf, size);
2299 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
2300 if (ret == RSC_BYPASS) {
2301 return virtio_net_do_receive(nc, buf, size);
2302 } else if (ret == RSC_FINAL) {
2303 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
2304 ((hdr_len + sizeof(struct eth_header)) + 8),
2305 VIRTIO_NET_IP6_ADDR_SIZE,
2306 hdr_len + sizeof(struct eth_header)
2307 + sizeof(struct ip6_header));
2310 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
2313 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
2314 NetClientState *nc,
2315 uint16_t proto)
2317 VirtioNetRscChain *chain;
2319 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
2320 return NULL;
2323 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
2324 if (chain->proto == proto) {
2325 return chain;
2329 chain = g_malloc(sizeof(*chain));
2330 chain->n = n;
2331 chain->proto = proto;
2332 if (proto == (uint16_t)ETH_P_IP) {
2333 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
2334 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2335 } else {
2336 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
2337 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2339 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
2340 virtio_net_rsc_purge, chain);
2341 memset(&chain->stat, 0, sizeof(chain->stat));
2343 QTAILQ_INIT(&chain->buffers);
2344 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
2346 return chain;
2349 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
2350 const uint8_t *buf,
2351 size_t size)
2353 uint16_t proto;
2354 VirtioNetRscChain *chain;
2355 struct eth_header *eth;
2356 VirtIONet *n;
2358 n = qemu_get_nic_opaque(nc);
2359 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
2360 return virtio_net_do_receive(nc, buf, size);
2363 eth = (struct eth_header *)(buf + n->guest_hdr_len);
2364 proto = htons(eth->h_proto);
2366 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
2367 if (chain) {
2368 chain->stat.received++;
2369 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
2370 return virtio_net_rsc_receive4(chain, nc, buf, size);
2371 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
2372 return virtio_net_rsc_receive6(chain, nc, buf, size);
2375 return virtio_net_do_receive(nc, buf, size);
2378 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
2379 size_t size)
2381 VirtIONet *n = qemu_get_nic_opaque(nc);
2382 if ((n->rsc4_enabled || n->rsc6_enabled)) {
2383 return virtio_net_rsc_receive(nc, buf, size);
2384 } else {
2385 return virtio_net_do_receive(nc, buf, size);
2389 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
2391 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
2393 VirtIONet *n = qemu_get_nic_opaque(nc);
2394 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
2395 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2397 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
2398 virtio_notify(vdev, q->tx_vq);
2400 g_free(q->async_tx.elem);
2401 q->async_tx.elem = NULL;
2403 virtio_queue_set_notification(q->tx_vq, 1);
2404 virtio_net_flush_tx(q);
2407 /* TX */
2408 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2410 VirtIONet *n = q->n;
2411 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2412 VirtQueueElement *elem;
2413 int32_t num_packets = 0;
2414 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2415 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2416 return num_packets;
2419 if (q->async_tx.elem) {
2420 virtio_queue_set_notification(q->tx_vq, 0);
2421 return num_packets;
2424 for (;;) {
2425 ssize_t ret;
2426 unsigned int out_num;
2427 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2428 struct virtio_net_hdr_mrg_rxbuf mhdr;
2430 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2431 if (!elem) {
2432 break;
2435 out_num = elem->out_num;
2436 out_sg = elem->out_sg;
2437 if (out_num < 1) {
2438 virtio_error(vdev, "virtio-net header not in first element");
2439 virtqueue_detach_element(q->tx_vq, elem, 0);
2440 g_free(elem);
2441 return -EINVAL;
2444 if (n->has_vnet_hdr) {
2445 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2446 n->guest_hdr_len) {
2447 virtio_error(vdev, "virtio-net header incorrect");
2448 virtqueue_detach_element(q->tx_vq, elem, 0);
2449 g_free(elem);
2450 return -EINVAL;
2452 if (n->needs_vnet_hdr_swap) {
2453 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2454 sg2[0].iov_base = &mhdr;
2455 sg2[0].iov_len = n->guest_hdr_len;
2456 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2457 out_sg, out_num,
2458 n->guest_hdr_len, -1);
2459 if (out_num == VIRTQUEUE_MAX_SIZE) {
2460 goto drop;
2462 out_num += 1;
2463 out_sg = sg2;
2467 * If host wants to see the guest header as is, we can
2468 * pass it on unchanged. Otherwise, copy just the parts
2469 * that host is interested in.
2471 assert(n->host_hdr_len <= n->guest_hdr_len);
2472 if (n->host_hdr_len != n->guest_hdr_len) {
2473 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2474 out_sg, out_num,
2475 0, n->host_hdr_len);
2476 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2477 out_sg, out_num,
2478 n->guest_hdr_len, -1);
2479 out_num = sg_num;
2480 out_sg = sg;
2483 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2484 out_sg, out_num, virtio_net_tx_complete);
2485 if (ret == 0) {
2486 virtio_queue_set_notification(q->tx_vq, 0);
2487 q->async_tx.elem = elem;
2488 return -EBUSY;
2491 drop:
2492 virtqueue_push(q->tx_vq, elem, 0);
2493 virtio_notify(vdev, q->tx_vq);
2494 g_free(elem);
2496 if (++num_packets >= n->tx_burst) {
2497 break;
2500 return num_packets;
2503 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2505 VirtIONet *n = VIRTIO_NET(vdev);
2506 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2508 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2509 virtio_net_drop_tx_queue_data(vdev, vq);
2510 return;
2513 /* This happens when device was stopped but VCPU wasn't. */
2514 if (!vdev->vm_running) {
2515 q->tx_waiting = 1;
2516 return;
2519 if (q->tx_waiting) {
2520 virtio_queue_set_notification(vq, 1);
2521 timer_del(q->tx_timer);
2522 q->tx_waiting = 0;
2523 if (virtio_net_flush_tx(q) == -EINVAL) {
2524 return;
2526 } else {
2527 timer_mod(q->tx_timer,
2528 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2529 q->tx_waiting = 1;
2530 virtio_queue_set_notification(vq, 0);
2534 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2536 VirtIONet *n = VIRTIO_NET(vdev);
2537 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2539 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2540 virtio_net_drop_tx_queue_data(vdev, vq);
2541 return;
2544 if (unlikely(q->tx_waiting)) {
2545 return;
2547 q->tx_waiting = 1;
2548 /* This happens when device was stopped but VCPU wasn't. */
2549 if (!vdev->vm_running) {
2550 return;
2552 virtio_queue_set_notification(vq, 0);
2553 qemu_bh_schedule(q->tx_bh);
2556 static void virtio_net_tx_timer(void *opaque)
2558 VirtIONetQueue *q = opaque;
2559 VirtIONet *n = q->n;
2560 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2561 /* This happens when device was stopped but BH wasn't. */
2562 if (!vdev->vm_running) {
2563 /* Make sure tx waiting is set, so we'll run when restarted. */
2564 assert(q->tx_waiting);
2565 return;
2568 q->tx_waiting = 0;
2570 /* Just in case the driver is not ready on more */
2571 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2572 return;
2575 virtio_queue_set_notification(q->tx_vq, 1);
2576 virtio_net_flush_tx(q);
2579 static void virtio_net_tx_bh(void *opaque)
2581 VirtIONetQueue *q = opaque;
2582 VirtIONet *n = q->n;
2583 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2584 int32_t ret;
2586 /* This happens when device was stopped but BH wasn't. */
2587 if (!vdev->vm_running) {
2588 /* Make sure tx waiting is set, so we'll run when restarted. */
2589 assert(q->tx_waiting);
2590 return;
2593 q->tx_waiting = 0;
2595 /* Just in case the driver is not ready on more */
2596 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2597 return;
2600 ret = virtio_net_flush_tx(q);
2601 if (ret == -EBUSY || ret == -EINVAL) {
2602 return; /* Notification re-enable handled by tx_complete or device
2603 * broken */
2606 /* If we flush a full burst of packets, assume there are
2607 * more coming and immediately reschedule */
2608 if (ret >= n->tx_burst) {
2609 qemu_bh_schedule(q->tx_bh);
2610 q->tx_waiting = 1;
2611 return;
2614 /* If less than a full burst, re-enable notification and flush
2615 * anything that may have come in while we weren't looking. If
2616 * we find something, assume the guest is still active and reschedule */
2617 virtio_queue_set_notification(q->tx_vq, 1);
2618 ret = virtio_net_flush_tx(q);
2619 if (ret == -EINVAL) {
2620 return;
2621 } else if (ret > 0) {
2622 virtio_queue_set_notification(q->tx_vq, 0);
2623 qemu_bh_schedule(q->tx_bh);
2624 q->tx_waiting = 1;
2628 static void virtio_net_add_queue(VirtIONet *n, int index)
2630 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2632 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2633 virtio_net_handle_rx);
2635 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2636 n->vqs[index].tx_vq =
2637 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2638 virtio_net_handle_tx_timer);
2639 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2640 virtio_net_tx_timer,
2641 &n->vqs[index]);
2642 } else {
2643 n->vqs[index].tx_vq =
2644 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2645 virtio_net_handle_tx_bh);
2646 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2649 n->vqs[index].tx_waiting = 0;
2650 n->vqs[index].n = n;
2653 static void virtio_net_del_queue(VirtIONet *n, int index)
2655 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2656 VirtIONetQueue *q = &n->vqs[index];
2657 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2659 qemu_purge_queued_packets(nc);
2661 virtio_del_queue(vdev, index * 2);
2662 if (q->tx_timer) {
2663 timer_del(q->tx_timer);
2664 timer_free(q->tx_timer);
2665 q->tx_timer = NULL;
2666 } else {
2667 qemu_bh_delete(q->tx_bh);
2668 q->tx_bh = NULL;
2670 q->tx_waiting = 0;
2671 virtio_del_queue(vdev, index * 2 + 1);
2674 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2676 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2677 int old_num_queues = virtio_get_num_queues(vdev);
2678 int new_num_queues = new_max_queues * 2 + 1;
2679 int i;
2681 assert(old_num_queues >= 3);
2682 assert(old_num_queues % 2 == 1);
2684 if (old_num_queues == new_num_queues) {
2685 return;
2689 * We always need to remove and add ctrl vq if
2690 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2691 * and then we only enter one of the following two loops.
2693 virtio_del_queue(vdev, old_num_queues - 1);
2695 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2696 /* new_num_queues < old_num_queues */
2697 virtio_net_del_queue(n, i / 2);
2700 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2701 /* new_num_queues > old_num_queues */
2702 virtio_net_add_queue(n, i / 2);
2705 /* add ctrl_vq last */
2706 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2709 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2711 int max = multiqueue ? n->max_queues : 1;
2713 n->multiqueue = multiqueue;
2714 virtio_net_change_num_queues(n, max);
2716 virtio_net_set_queues(n);
2719 static int virtio_net_post_load_device(void *opaque, int version_id)
2721 VirtIONet *n = opaque;
2722 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2723 int i, link_down;
2725 trace_virtio_net_post_load_device();
2726 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2727 virtio_vdev_has_feature(vdev,
2728 VIRTIO_F_VERSION_1),
2729 virtio_vdev_has_feature(vdev,
2730 VIRTIO_NET_F_HASH_REPORT));
2732 /* MAC_TABLE_ENTRIES may be different from the saved image */
2733 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2734 n->mac_table.in_use = 0;
2737 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2738 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2742 * curr_guest_offloads will be later overwritten by the
2743 * virtio_set_features_nocheck call done from the virtio_load.
2744 * Here we make sure it is preserved and restored accordingly
2745 * in the virtio_net_post_load_virtio callback.
2747 n->saved_guest_offloads = n->curr_guest_offloads;
2749 virtio_net_set_queues(n);
2751 /* Find the first multicast entry in the saved MAC filter */
2752 for (i = 0; i < n->mac_table.in_use; i++) {
2753 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2754 break;
2757 n->mac_table.first_multi = i;
2759 /* nc.link_down can't be migrated, so infer link_down according
2760 * to link status bit in n->status */
2761 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2762 for (i = 0; i < n->max_queues; i++) {
2763 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2766 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2767 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2768 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2769 QEMU_CLOCK_VIRTUAL,
2770 virtio_net_announce_timer, n);
2771 if (n->announce_timer.round) {
2772 timer_mod(n->announce_timer.tm,
2773 qemu_clock_get_ms(n->announce_timer.type));
2774 } else {
2775 qemu_announce_timer_del(&n->announce_timer, false);
2779 if (n->rss_data.enabled) {
2780 trace_virtio_net_rss_enable(n->rss_data.hash_types,
2781 n->rss_data.indirections_len,
2782 sizeof(n->rss_data.key));
2783 } else {
2784 trace_virtio_net_rss_disable();
2786 return 0;
2789 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2791 VirtIONet *n = VIRTIO_NET(vdev);
2793 * The actual needed state is now in saved_guest_offloads,
2794 * see virtio_net_post_load_device for detail.
2795 * Restore it back and apply the desired offloads.
2797 n->curr_guest_offloads = n->saved_guest_offloads;
2798 if (peer_has_vnet_hdr(n)) {
2799 virtio_net_apply_guest_offloads(n);
2802 return 0;
2805 /* tx_waiting field of a VirtIONetQueue */
2806 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2807 .name = "virtio-net-queue-tx_waiting",
2808 .fields = (VMStateField[]) {
2809 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2810 VMSTATE_END_OF_LIST()
2814 static bool max_queues_gt_1(void *opaque, int version_id)
2816 return VIRTIO_NET(opaque)->max_queues > 1;
2819 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2821 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2822 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2825 static bool mac_table_fits(void *opaque, int version_id)
2827 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2830 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2832 return !mac_table_fits(opaque, version_id);
2835 /* This temporary type is shared by all the WITH_TMP methods
2836 * although only some fields are used by each.
2838 struct VirtIONetMigTmp {
2839 VirtIONet *parent;
2840 VirtIONetQueue *vqs_1;
2841 uint16_t curr_queues_1;
2842 uint8_t has_ufo;
2843 uint32_t has_vnet_hdr;
2846 /* The 2nd and subsequent tx_waiting flags are loaded later than
2847 * the 1st entry in the queues and only if there's more than one
2848 * entry. We use the tmp mechanism to calculate a temporary
2849 * pointer and count and also validate the count.
2852 static int virtio_net_tx_waiting_pre_save(void *opaque)
2854 struct VirtIONetMigTmp *tmp = opaque;
2856 tmp->vqs_1 = tmp->parent->vqs + 1;
2857 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2858 if (tmp->parent->curr_queues == 0) {
2859 tmp->curr_queues_1 = 0;
2862 return 0;
2865 static int virtio_net_tx_waiting_pre_load(void *opaque)
2867 struct VirtIONetMigTmp *tmp = opaque;
2869 /* Reuse the pointer setup from save */
2870 virtio_net_tx_waiting_pre_save(opaque);
2872 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2873 error_report("virtio-net: curr_queues %x > max_queues %x",
2874 tmp->parent->curr_queues, tmp->parent->max_queues);
2876 return -EINVAL;
2879 return 0; /* all good */
2882 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2883 .name = "virtio-net-tx_waiting",
2884 .pre_load = virtio_net_tx_waiting_pre_load,
2885 .pre_save = virtio_net_tx_waiting_pre_save,
2886 .fields = (VMStateField[]) {
2887 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2888 curr_queues_1,
2889 vmstate_virtio_net_queue_tx_waiting,
2890 struct VirtIONetQueue),
2891 VMSTATE_END_OF_LIST()
2895 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2896 * flag set we need to check that we have it
2898 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2900 struct VirtIONetMigTmp *tmp = opaque;
2902 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2903 error_report("virtio-net: saved image requires TUN_F_UFO support");
2904 return -EINVAL;
2907 return 0;
2910 static int virtio_net_ufo_pre_save(void *opaque)
2912 struct VirtIONetMigTmp *tmp = opaque;
2914 tmp->has_ufo = tmp->parent->has_ufo;
2916 return 0;
2919 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2920 .name = "virtio-net-ufo",
2921 .post_load = virtio_net_ufo_post_load,
2922 .pre_save = virtio_net_ufo_pre_save,
2923 .fields = (VMStateField[]) {
2924 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2925 VMSTATE_END_OF_LIST()
2929 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2930 * flag set we need to check that we have it
2932 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2934 struct VirtIONetMigTmp *tmp = opaque;
2936 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2937 error_report("virtio-net: saved image requires vnet_hdr=on");
2938 return -EINVAL;
2941 return 0;
2944 static int virtio_net_vnet_pre_save(void *opaque)
2946 struct VirtIONetMigTmp *tmp = opaque;
2948 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2950 return 0;
2953 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2954 .name = "virtio-net-vnet",
2955 .post_load = virtio_net_vnet_post_load,
2956 .pre_save = virtio_net_vnet_pre_save,
2957 .fields = (VMStateField[]) {
2958 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2959 VMSTATE_END_OF_LIST()
2963 static bool virtio_net_rss_needed(void *opaque)
2965 return VIRTIO_NET(opaque)->rss_data.enabled;
2968 static const VMStateDescription vmstate_virtio_net_rss = {
2969 .name = "virtio-net-device/rss",
2970 .version_id = 1,
2971 .minimum_version_id = 1,
2972 .needed = virtio_net_rss_needed,
2973 .fields = (VMStateField[]) {
2974 VMSTATE_BOOL(rss_data.enabled, VirtIONet),
2975 VMSTATE_BOOL(rss_data.redirect, VirtIONet),
2976 VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
2977 VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
2978 VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
2979 VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
2980 VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
2981 VIRTIO_NET_RSS_MAX_KEY_SIZE),
2982 VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet,
2983 rss_data.indirections_len, 0,
2984 vmstate_info_uint16, uint16_t),
2985 VMSTATE_END_OF_LIST()
2989 static const VMStateDescription vmstate_virtio_net_device = {
2990 .name = "virtio-net-device",
2991 .version_id = VIRTIO_NET_VM_VERSION,
2992 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2993 .post_load = virtio_net_post_load_device,
2994 .fields = (VMStateField[]) {
2995 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2996 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2997 vmstate_virtio_net_queue_tx_waiting,
2998 VirtIONetQueue),
2999 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
3000 VMSTATE_UINT16(status, VirtIONet),
3001 VMSTATE_UINT8(promisc, VirtIONet),
3002 VMSTATE_UINT8(allmulti, VirtIONet),
3003 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
3005 /* Guarded pair: If it fits we load it, else we throw it away
3006 * - can happen if source has a larger MAC table.; post-load
3007 * sets flags in this case.
3009 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
3010 0, mac_table_fits, mac_table.in_use,
3011 ETH_ALEN),
3012 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
3013 mac_table.in_use, ETH_ALEN),
3015 /* Note: This is an array of uint32's that's always been saved as a
3016 * buffer; hold onto your endiannesses; it's actually used as a bitmap
3017 * but based on the uint.
3019 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
3020 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3021 vmstate_virtio_net_has_vnet),
3022 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
3023 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
3024 VMSTATE_UINT8(alluni, VirtIONet),
3025 VMSTATE_UINT8(nomulti, VirtIONet),
3026 VMSTATE_UINT8(nouni, VirtIONet),
3027 VMSTATE_UINT8(nobcast, VirtIONet),
3028 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3029 vmstate_virtio_net_has_ufo),
3030 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
3031 vmstate_info_uint16_equal, uint16_t),
3032 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
3033 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
3034 vmstate_virtio_net_tx_waiting),
3035 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
3036 has_ctrl_guest_offloads),
3037 VMSTATE_END_OF_LIST()
3039 .subsections = (const VMStateDescription * []) {
3040 &vmstate_virtio_net_rss,
3041 NULL
3045 static NetClientInfo net_virtio_info = {
3046 .type = NET_CLIENT_DRIVER_NIC,
3047 .size = sizeof(NICState),
3048 .can_receive = virtio_net_can_receive,
3049 .receive = virtio_net_receive,
3050 .link_status_changed = virtio_net_set_link_status,
3051 .query_rx_filter = virtio_net_query_rxfilter,
3052 .announce = virtio_net_announce,
3055 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
3057 VirtIONet *n = VIRTIO_NET(vdev);
3058 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3059 assert(n->vhost_started);
3060 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
3063 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
3064 bool mask)
3066 VirtIONet *n = VIRTIO_NET(vdev);
3067 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
3068 assert(n->vhost_started);
3069 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
3070 vdev, idx, mask);
3073 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
3075 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
3077 n->config_size = virtio_feature_get_config_size(feature_sizes,
3078 host_features);
3081 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
3082 const char *type)
3085 * The name can be NULL, the netclient name will be type.x.
3087 assert(type != NULL);
3089 g_free(n->netclient_name);
3090 g_free(n->netclient_type);
3091 n->netclient_name = g_strdup(name);
3092 n->netclient_type = g_strdup(type);
3095 static bool failover_unplug_primary(VirtIONet *n)
3097 HotplugHandler *hotplug_ctrl;
3098 PCIDevice *pci_dev;
3099 Error *err = NULL;
3101 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3102 if (hotplug_ctrl) {
3103 pci_dev = PCI_DEVICE(n->primary_dev);
3104 pci_dev->partially_hotplugged = true;
3105 hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
3106 if (err) {
3107 error_report_err(err);
3108 return false;
3110 } else {
3111 return false;
3113 return true;
3116 static bool failover_replug_primary(VirtIONet *n, Error **errp)
3118 Error *err = NULL;
3119 HotplugHandler *hotplug_ctrl;
3120 PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
3122 if (!pdev->partially_hotplugged) {
3123 return true;
3125 if (!n->primary_device_opts) {
3126 n->primary_device_opts = qemu_opts_from_qdict(
3127 qemu_find_opts("device"),
3128 n->primary_device_dict, errp);
3129 if (!n->primary_device_opts) {
3130 return false;
3133 n->primary_bus = n->primary_dev->parent_bus;
3134 if (!n->primary_bus) {
3135 error_setg(errp, "virtio_net: couldn't find primary bus");
3136 return false;
3138 qdev_set_parent_bus(n->primary_dev, n->primary_bus);
3139 n->primary_should_be_hidden = false;
3140 if (!qemu_opt_set_bool(n->primary_device_opts,
3141 "partially_hotplugged", true, &err)) {
3142 goto out;
3144 hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
3145 if (hotplug_ctrl) {
3146 hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, &err);
3147 if (err) {
3148 goto out;
3150 hotplug_handler_plug(hotplug_ctrl, n->primary_dev, &err);
3153 out:
3154 error_propagate(errp, err);
3155 return !err;
3158 static void virtio_net_handle_migration_primary(VirtIONet *n,
3159 MigrationState *s)
3161 bool should_be_hidden;
3162 Error *err = NULL;
3164 should_be_hidden = atomic_read(&n->primary_should_be_hidden);
3166 if (!n->primary_dev) {
3167 n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
3168 if (!n->primary_dev) {
3169 return;
3173 if (migration_in_setup(s) && !should_be_hidden) {
3174 if (failover_unplug_primary(n)) {
3175 vmstate_unregister(VMSTATE_IF(n->primary_dev),
3176 qdev_get_vmsd(n->primary_dev),
3177 n->primary_dev);
3178 qapi_event_send_unplug_primary(n->primary_device_id);
3179 atomic_set(&n->primary_should_be_hidden, true);
3180 } else {
3181 warn_report("couldn't unplug primary device");
3183 } else if (migration_has_failed(s)) {
3184 /* We already unplugged the device let's plug it back */
3185 if (!failover_replug_primary(n, &err)) {
3186 if (err) {
3187 error_report_err(err);
3193 static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
3195 MigrationState *s = data;
3196 VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
3197 virtio_net_handle_migration_primary(n, s);
3200 static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
3201 QemuOpts *device_opts)
3203 VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
3204 bool match_found = false;
3205 bool hide = false;
3207 if (!device_opts) {
3208 return -1;
3210 n->primary_device_dict = qemu_opts_to_qdict(device_opts,
3211 n->primary_device_dict);
3212 if (n->primary_device_dict) {
3213 g_free(n->standby_id);
3214 n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
3215 "failover_pair_id"));
3217 if (g_strcmp0(n->standby_id, n->netclient_name) == 0) {
3218 match_found = true;
3219 } else {
3220 match_found = false;
3221 hide = false;
3222 g_free(n->standby_id);
3223 n->primary_device_dict = NULL;
3224 goto out;
3227 n->primary_device_opts = device_opts;
3229 /* primary_should_be_hidden is set during feature negotiation */
3230 hide = atomic_read(&n->primary_should_be_hidden);
3232 if (n->primary_device_dict) {
3233 g_free(n->primary_device_id);
3234 n->primary_device_id = g_strdup(qdict_get_try_str(
3235 n->primary_device_dict, "id"));
3236 if (!n->primary_device_id) {
3237 warn_report("primary_device_id not set");
3241 out:
3242 if (match_found && hide) {
3243 return 1;
3244 } else if (match_found && !hide) {
3245 return 0;
3246 } else {
3247 return -1;
3251 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
3253 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3254 VirtIONet *n = VIRTIO_NET(dev);
3255 NetClientState *nc;
3256 int i;
3258 if (n->net_conf.mtu) {
3259 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
3262 if (n->net_conf.duplex_str) {
3263 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
3264 n->net_conf.duplex = DUPLEX_HALF;
3265 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
3266 n->net_conf.duplex = DUPLEX_FULL;
3267 } else {
3268 error_setg(errp, "'duplex' must be 'half' or 'full'");
3269 return;
3271 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3272 } else {
3273 n->net_conf.duplex = DUPLEX_UNKNOWN;
3276 if (n->net_conf.speed < SPEED_UNKNOWN) {
3277 error_setg(errp, "'speed' must be between 0 and INT_MAX");
3278 return;
3280 if (n->net_conf.speed >= 0) {
3281 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
3284 if (n->failover) {
3285 n->primary_listener.should_be_hidden =
3286 virtio_net_primary_should_be_hidden;
3287 atomic_set(&n->primary_should_be_hidden, true);
3288 device_listener_register(&n->primary_listener);
3289 n->migration_state.notify = virtio_net_migration_state_notifier;
3290 add_migration_state_change_notifier(&n->migration_state);
3291 n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
3294 virtio_net_set_config_size(n, n->host_features);
3295 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
3298 * We set a lower limit on RX queue size to what it always was.
3299 * Guests that want a smaller ring can always resize it without
3300 * help from us (using virtio 1 and up).
3302 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
3303 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
3304 !is_power_of_2(n->net_conf.rx_queue_size)) {
3305 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
3306 "must be a power of 2 between %d and %d.",
3307 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
3308 VIRTQUEUE_MAX_SIZE);
3309 virtio_cleanup(vdev);
3310 return;
3313 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
3314 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
3315 !is_power_of_2(n->net_conf.tx_queue_size)) {
3316 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
3317 "must be a power of 2 between %d and %d",
3318 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
3319 VIRTQUEUE_MAX_SIZE);
3320 virtio_cleanup(vdev);
3321 return;
3324 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
3325 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
3326 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
3327 "must be a positive integer less than %d.",
3328 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
3329 virtio_cleanup(vdev);
3330 return;
3332 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
3333 n->curr_queues = 1;
3334 n->tx_timeout = n->net_conf.txtimer;
3336 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
3337 && strcmp(n->net_conf.tx, "bh")) {
3338 warn_report("virtio-net: "
3339 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
3340 n->net_conf.tx);
3341 error_printf("Defaulting to \"bh\"");
3344 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
3345 n->net_conf.tx_queue_size);
3347 for (i = 0; i < n->max_queues; i++) {
3348 virtio_net_add_queue(n, i);
3351 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
3352 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
3353 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
3354 n->status = VIRTIO_NET_S_LINK_UP;
3355 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
3356 QEMU_CLOCK_VIRTUAL,
3357 virtio_net_announce_timer, n);
3358 n->announce_timer.round = 0;
3360 if (n->netclient_type) {
3362 * Happen when virtio_net_set_netclient_name has been called.
3364 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3365 n->netclient_type, n->netclient_name, n);
3366 } else {
3367 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
3368 object_get_typename(OBJECT(dev)), dev->id, n);
3371 peer_test_vnet_hdr(n);
3372 if (peer_has_vnet_hdr(n)) {
3373 for (i = 0; i < n->max_queues; i++) {
3374 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
3376 n->host_hdr_len = sizeof(struct virtio_net_hdr);
3377 } else {
3378 n->host_hdr_len = 0;
3381 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
3383 n->vqs[0].tx_waiting = 0;
3384 n->tx_burst = n->net_conf.txburst;
3385 virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
3386 n->promisc = 1; /* for compatibility */
3388 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
3390 n->vlans = g_malloc0(MAX_VLAN >> 3);
3392 nc = qemu_get_queue(n->nic);
3393 nc->rxfilter_notify_enabled = 1;
3395 QTAILQ_INIT(&n->rsc_chains);
3396 n->qdev = dev;
3398 net_rx_pkt_init(&n->rx_pkt, false);
3401 static void virtio_net_device_unrealize(DeviceState *dev)
3403 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3404 VirtIONet *n = VIRTIO_NET(dev);
3405 int i, max_queues;
3407 /* This will stop vhost backend if appropriate. */
3408 virtio_net_set_status(vdev, 0);
3410 g_free(n->netclient_name);
3411 n->netclient_name = NULL;
3412 g_free(n->netclient_type);
3413 n->netclient_type = NULL;
3415 g_free(n->mac_table.macs);
3416 g_free(n->vlans);
3418 if (n->failover) {
3419 g_free(n->primary_device_id);
3420 g_free(n->standby_id);
3421 qobject_unref(n->primary_device_dict);
3422 n->primary_device_dict = NULL;
3425 max_queues = n->multiqueue ? n->max_queues : 1;
3426 for (i = 0; i < max_queues; i++) {
3427 virtio_net_del_queue(n, i);
3429 /* delete also control vq */
3430 virtio_del_queue(vdev, max_queues * 2);
3431 qemu_announce_timer_del(&n->announce_timer, false);
3432 g_free(n->vqs);
3433 qemu_del_nic(n->nic);
3434 virtio_net_rsc_cleanup(n);
3435 g_free(n->rss_data.indirections_table);
3436 net_rx_pkt_uninit(n->rx_pkt);
3437 virtio_cleanup(vdev);
3440 static void virtio_net_instance_init(Object *obj)
3442 VirtIONet *n = VIRTIO_NET(obj);
3445 * The default config_size is sizeof(struct virtio_net_config).
3446 * Can be overriden with virtio_net_set_config_size.
3448 n->config_size = sizeof(struct virtio_net_config);
3449 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
3450 "bootindex", "/ethernet-phy@0",
3451 DEVICE(n));
3454 static int virtio_net_pre_save(void *opaque)
3456 VirtIONet *n = opaque;
3458 /* At this point, backend must be stopped, otherwise
3459 * it might keep writing to memory. */
3460 assert(!n->vhost_started);
3462 return 0;
3465 static bool primary_unplug_pending(void *opaque)
3467 DeviceState *dev = opaque;
3468 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
3469 VirtIONet *n = VIRTIO_NET(vdev);
3471 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
3472 return false;
3474 return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
3477 static bool dev_unplug_pending(void *opaque)
3479 DeviceState *dev = opaque;
3480 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
3482 return vdc->primary_unplug_pending(dev);
3485 static const VMStateDescription vmstate_virtio_net = {
3486 .name = "virtio-net",
3487 .minimum_version_id = VIRTIO_NET_VM_VERSION,
3488 .version_id = VIRTIO_NET_VM_VERSION,
3489 .fields = (VMStateField[]) {
3490 VMSTATE_VIRTIO_DEVICE,
3491 VMSTATE_END_OF_LIST()
3493 .pre_save = virtio_net_pre_save,
3494 .dev_unplug_pending = dev_unplug_pending,
3497 static Property virtio_net_properties[] = {
3498 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
3499 VIRTIO_NET_F_CSUM, true),
3500 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
3501 VIRTIO_NET_F_GUEST_CSUM, true),
3502 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
3503 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
3504 VIRTIO_NET_F_GUEST_TSO4, true),
3505 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
3506 VIRTIO_NET_F_GUEST_TSO6, true),
3507 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
3508 VIRTIO_NET_F_GUEST_ECN, true),
3509 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
3510 VIRTIO_NET_F_GUEST_UFO, true),
3511 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
3512 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
3513 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
3514 VIRTIO_NET_F_HOST_TSO4, true),
3515 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
3516 VIRTIO_NET_F_HOST_TSO6, true),
3517 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
3518 VIRTIO_NET_F_HOST_ECN, true),
3519 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
3520 VIRTIO_NET_F_HOST_UFO, true),
3521 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
3522 VIRTIO_NET_F_MRG_RXBUF, true),
3523 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
3524 VIRTIO_NET_F_STATUS, true),
3525 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
3526 VIRTIO_NET_F_CTRL_VQ, true),
3527 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
3528 VIRTIO_NET_F_CTRL_RX, true),
3529 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
3530 VIRTIO_NET_F_CTRL_VLAN, true),
3531 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
3532 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
3533 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
3534 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
3535 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
3536 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
3537 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
3538 DEFINE_PROP_BIT64("rss", VirtIONet, host_features,
3539 VIRTIO_NET_F_RSS, false),
3540 DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
3541 VIRTIO_NET_F_HASH_REPORT, false),
3542 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
3543 VIRTIO_NET_F_RSC_EXT, false),
3544 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
3545 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
3546 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
3547 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
3548 TX_TIMER_INTERVAL),
3549 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
3550 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
3551 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
3552 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
3553 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
3554 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
3555 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
3556 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
3557 true),
3558 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
3559 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
3560 DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
3561 DEFINE_PROP_END_OF_LIST(),
3564 static void virtio_net_class_init(ObjectClass *klass, void *data)
3566 DeviceClass *dc = DEVICE_CLASS(klass);
3567 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
3569 device_class_set_props(dc, virtio_net_properties);
3570 dc->vmsd = &vmstate_virtio_net;
3571 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
3572 vdc->realize = virtio_net_device_realize;
3573 vdc->unrealize = virtio_net_device_unrealize;
3574 vdc->get_config = virtio_net_get_config;
3575 vdc->set_config = virtio_net_set_config;
3576 vdc->get_features = virtio_net_get_features;
3577 vdc->set_features = virtio_net_set_features;
3578 vdc->bad_features = virtio_net_bad_features;
3579 vdc->reset = virtio_net_reset;
3580 vdc->set_status = virtio_net_set_status;
3581 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
3582 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
3583 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
3584 vdc->post_load = virtio_net_post_load_virtio;
3585 vdc->vmsd = &vmstate_virtio_net_device;
3586 vdc->primary_unplug_pending = primary_unplug_pending;
3589 static const TypeInfo virtio_net_info = {
3590 .name = TYPE_VIRTIO_NET,
3591 .parent = TYPE_VIRTIO_DEVICE,
3592 .instance_size = sizeof(VirtIONet),
3593 .instance_init = virtio_net_instance_init,
3594 .class_init = virtio_net_class_init,
3597 static void virtio_register_types(void)
3599 type_register_static(&virtio_net_info);
3602 type_init(virtio_register_types)