virtio-net: support RSC v4/v6 tcp traffic for Windows HCK
[qemu/ar7.git] / hw / net / virtio-net.c
blob9e5fb1ed622c729b1902b1ae7809a530a05963f3
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-net.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "migration/misc.h"
29 #include "standard-headers/linux/ethtool.h"
31 #define VIRTIO_NET_VM_VERSION 11
33 #define MAC_TABLE_ENTRIES 64
34 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
36 /* previously fixed value */
37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
40 /* for now, only allow larger queues; with virtio-1, guest can downsize */
41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
44 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
46 #define VIRTIO_NET_TCP_FLAG 0x3F
47 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
49 /* IPv4 max payload, 16 bits in the header */
50 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
51 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
53 /* header length value in ip header without option */
54 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
56 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
57 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
59 /* Purge coalesced packets timer interval, This value affects the performance
60 a lot, and should be tuned carefully, '300000'(300us) is the recommended
61 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
62 tso/gso/gro 'off'. */
63 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
65 /* temporary until standard header include it */
66 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
68 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
69 #define VIRTIO_NET_F_RSC_EXT 38
71 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
72 struct virtio_net_hdr *hdr)
74 return &hdr->csum_start;
77 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
78 struct virtio_net_hdr *hdr)
80 return &hdr->csum_offset;
83 #endif
86 * Calculate the number of bytes up to and including the given 'field' of
87 * 'container'.
89 #define endof(container, field) \
90 (offsetof(container, field) + sizeof_field(container, field))
92 typedef struct VirtIOFeature {
93 uint64_t flags;
94 size_t end;
95 } VirtIOFeature;
97 static VirtIOFeature feature_sizes[] = {
98 {.flags = 1ULL << VIRTIO_NET_F_MAC,
99 .end = endof(struct virtio_net_config, mac)},
100 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
101 .end = endof(struct virtio_net_config, status)},
102 {.flags = 1ULL << VIRTIO_NET_F_MQ,
103 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
104 {.flags = 1ULL << VIRTIO_NET_F_MTU,
105 .end = endof(struct virtio_net_config, mtu)},
106 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
107 .end = endof(struct virtio_net_config, duplex)},
111 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
113 VirtIONet *n = qemu_get_nic_opaque(nc);
115 return &n->vqs[nc->queue_index];
118 static int vq2q(int queue_index)
120 return queue_index / 2;
123 /* TODO
124 * - we could suppress RX interrupt if we were so inclined.
127 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
129 VirtIONet *n = VIRTIO_NET(vdev);
130 struct virtio_net_config netcfg;
132 virtio_stw_p(vdev, &netcfg.status, n->status);
133 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
134 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
135 memcpy(netcfg.mac, n->mac, ETH_ALEN);
136 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
137 netcfg.duplex = n->net_conf.duplex;
138 memcpy(config, &netcfg, n->config_size);
141 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
143 VirtIONet *n = VIRTIO_NET(vdev);
144 struct virtio_net_config netcfg = {};
146 memcpy(&netcfg, config, n->config_size);
148 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
149 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
150 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
151 memcpy(n->mac, netcfg.mac, ETH_ALEN);
152 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
156 static bool virtio_net_started(VirtIONet *n, uint8_t status)
158 VirtIODevice *vdev = VIRTIO_DEVICE(n);
159 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
160 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
163 static void virtio_net_announce_timer(void *opaque)
165 VirtIONet *n = opaque;
166 VirtIODevice *vdev = VIRTIO_DEVICE(n);
168 n->announce_counter--;
169 n->status |= VIRTIO_NET_S_ANNOUNCE;
170 virtio_notify_config(vdev);
173 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
175 VirtIODevice *vdev = VIRTIO_DEVICE(n);
176 NetClientState *nc = qemu_get_queue(n->nic);
177 int queues = n->multiqueue ? n->max_queues : 1;
179 if (!get_vhost_net(nc->peer)) {
180 return;
183 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
184 !!n->vhost_started) {
185 return;
187 if (!n->vhost_started) {
188 int r, i;
190 if (n->needs_vnet_hdr_swap) {
191 error_report("backend does not support %s vnet headers; "
192 "falling back on userspace virtio",
193 virtio_is_big_endian(vdev) ? "BE" : "LE");
194 return;
197 /* Any packets outstanding? Purge them to avoid touching rings
198 * when vhost is running.
200 for (i = 0; i < queues; i++) {
201 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
203 /* Purge both directions: TX and RX. */
204 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
205 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
208 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
209 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
210 if (r < 0) {
211 error_report("%uBytes MTU not supported by the backend",
212 n->net_conf.mtu);
214 return;
218 n->vhost_started = 1;
219 r = vhost_net_start(vdev, n->nic->ncs, queues);
220 if (r < 0) {
221 error_report("unable to start vhost net: %d: "
222 "falling back on userspace virtio", -r);
223 n->vhost_started = 0;
225 } else {
226 vhost_net_stop(vdev, n->nic->ncs, queues);
227 n->vhost_started = 0;
231 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
232 NetClientState *peer,
233 bool enable)
235 if (virtio_is_big_endian(vdev)) {
236 return qemu_set_vnet_be(peer, enable);
237 } else {
238 return qemu_set_vnet_le(peer, enable);
242 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
243 int queues, bool enable)
245 int i;
247 for (i = 0; i < queues; i++) {
248 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
249 enable) {
250 while (--i >= 0) {
251 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
254 return true;
258 return false;
261 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
263 VirtIODevice *vdev = VIRTIO_DEVICE(n);
264 int queues = n->multiqueue ? n->max_queues : 1;
266 if (virtio_net_started(n, status)) {
267 /* Before using the device, we tell the network backend about the
268 * endianness to use when parsing vnet headers. If the backend
269 * can't do it, we fallback onto fixing the headers in the core
270 * virtio-net code.
272 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
273 queues, true);
274 } else if (virtio_net_started(n, vdev->status)) {
275 /* After using the device, we need to reset the network backend to
276 * the default (guest native endianness), otherwise the guest may
277 * lose network connectivity if it is rebooted into a different
278 * endianness.
280 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
284 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
286 unsigned int dropped = virtqueue_drop_all(vq);
287 if (dropped) {
288 virtio_notify(vdev, vq);
292 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
294 VirtIONet *n = VIRTIO_NET(vdev);
295 VirtIONetQueue *q;
296 int i;
297 uint8_t queue_status;
299 virtio_net_vnet_endian_status(n, status);
300 virtio_net_vhost_status(n, status);
302 for (i = 0; i < n->max_queues; i++) {
303 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
304 bool queue_started;
305 q = &n->vqs[i];
307 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
308 queue_status = 0;
309 } else {
310 queue_status = status;
312 queue_started =
313 virtio_net_started(n, queue_status) && !n->vhost_started;
315 if (queue_started) {
316 qemu_flush_queued_packets(ncs);
319 if (!q->tx_waiting) {
320 continue;
323 if (queue_started) {
324 if (q->tx_timer) {
325 timer_mod(q->tx_timer,
326 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
327 } else {
328 qemu_bh_schedule(q->tx_bh);
330 } else {
331 if (q->tx_timer) {
332 timer_del(q->tx_timer);
333 } else {
334 qemu_bh_cancel(q->tx_bh);
336 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
337 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
338 vdev->vm_running) {
339 /* if tx is waiting we are likely have some packets in tx queue
340 * and disabled notification */
341 q->tx_waiting = 0;
342 virtio_queue_set_notification(q->tx_vq, 1);
343 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
349 static void virtio_net_set_link_status(NetClientState *nc)
351 VirtIONet *n = qemu_get_nic_opaque(nc);
352 VirtIODevice *vdev = VIRTIO_DEVICE(n);
353 uint16_t old_status = n->status;
355 if (nc->link_down)
356 n->status &= ~VIRTIO_NET_S_LINK_UP;
357 else
358 n->status |= VIRTIO_NET_S_LINK_UP;
360 if (n->status != old_status)
361 virtio_notify_config(vdev);
363 virtio_net_set_status(vdev, vdev->status);
366 static void rxfilter_notify(NetClientState *nc)
368 VirtIONet *n = qemu_get_nic_opaque(nc);
370 if (nc->rxfilter_notify_enabled) {
371 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
372 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
373 n->netclient_name, path);
374 g_free(path);
376 /* disable event notification to avoid events flooding */
377 nc->rxfilter_notify_enabled = 0;
381 static intList *get_vlan_table(VirtIONet *n)
383 intList *list, *entry;
384 int i, j;
386 list = NULL;
387 for (i = 0; i < MAX_VLAN >> 5; i++) {
388 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
389 if (n->vlans[i] & (1U << j)) {
390 entry = g_malloc0(sizeof(*entry));
391 entry->value = (i << 5) + j;
392 entry->next = list;
393 list = entry;
398 return list;
401 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
403 VirtIONet *n = qemu_get_nic_opaque(nc);
404 VirtIODevice *vdev = VIRTIO_DEVICE(n);
405 RxFilterInfo *info;
406 strList *str_list, *entry;
407 int i;
409 info = g_malloc0(sizeof(*info));
410 info->name = g_strdup(nc->name);
411 info->promiscuous = n->promisc;
413 if (n->nouni) {
414 info->unicast = RX_STATE_NONE;
415 } else if (n->alluni) {
416 info->unicast = RX_STATE_ALL;
417 } else {
418 info->unicast = RX_STATE_NORMAL;
421 if (n->nomulti) {
422 info->multicast = RX_STATE_NONE;
423 } else if (n->allmulti) {
424 info->multicast = RX_STATE_ALL;
425 } else {
426 info->multicast = RX_STATE_NORMAL;
429 info->broadcast_allowed = n->nobcast;
430 info->multicast_overflow = n->mac_table.multi_overflow;
431 info->unicast_overflow = n->mac_table.uni_overflow;
433 info->main_mac = qemu_mac_strdup_printf(n->mac);
435 str_list = NULL;
436 for (i = 0; i < n->mac_table.first_multi; i++) {
437 entry = g_malloc0(sizeof(*entry));
438 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
439 entry->next = str_list;
440 str_list = entry;
442 info->unicast_table = str_list;
444 str_list = NULL;
445 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
446 entry = g_malloc0(sizeof(*entry));
447 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
448 entry->next = str_list;
449 str_list = entry;
451 info->multicast_table = str_list;
452 info->vlan_table = get_vlan_table(n);
454 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
455 info->vlan = RX_STATE_ALL;
456 } else if (!info->vlan_table) {
457 info->vlan = RX_STATE_NONE;
458 } else {
459 info->vlan = RX_STATE_NORMAL;
462 /* enable event notification after query */
463 nc->rxfilter_notify_enabled = 1;
465 return info;
468 static void virtio_net_reset(VirtIODevice *vdev)
470 VirtIONet *n = VIRTIO_NET(vdev);
471 int i;
473 /* Reset back to compatibility mode */
474 n->promisc = 1;
475 n->allmulti = 0;
476 n->alluni = 0;
477 n->nomulti = 0;
478 n->nouni = 0;
479 n->nobcast = 0;
480 /* multiqueue is disabled by default */
481 n->curr_queues = 1;
482 timer_del(n->announce_timer);
483 n->announce_counter = 0;
484 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
486 /* Flush any MAC and VLAN filter table state */
487 n->mac_table.in_use = 0;
488 n->mac_table.first_multi = 0;
489 n->mac_table.multi_overflow = 0;
490 n->mac_table.uni_overflow = 0;
491 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
492 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
493 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
494 memset(n->vlans, 0, MAX_VLAN >> 3);
496 /* Flush any async TX */
497 for (i = 0; i < n->max_queues; i++) {
498 NetClientState *nc = qemu_get_subqueue(n->nic, i);
500 if (nc->peer) {
501 qemu_flush_or_purge_queued_packets(nc->peer, true);
502 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
507 static void peer_test_vnet_hdr(VirtIONet *n)
509 NetClientState *nc = qemu_get_queue(n->nic);
510 if (!nc->peer) {
511 return;
514 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
517 static int peer_has_vnet_hdr(VirtIONet *n)
519 return n->has_vnet_hdr;
522 static int peer_has_ufo(VirtIONet *n)
524 if (!peer_has_vnet_hdr(n))
525 return 0;
527 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
529 return n->has_ufo;
532 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
533 int version_1)
535 int i;
536 NetClientState *nc;
538 n->mergeable_rx_bufs = mergeable_rx_bufs;
540 if (version_1) {
541 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
542 } else {
543 n->guest_hdr_len = n->mergeable_rx_bufs ?
544 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
545 sizeof(struct virtio_net_hdr);
548 for (i = 0; i < n->max_queues; i++) {
549 nc = qemu_get_subqueue(n->nic, i);
551 if (peer_has_vnet_hdr(n) &&
552 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
553 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
554 n->host_hdr_len = n->guest_hdr_len;
559 static int virtio_net_max_tx_queue_size(VirtIONet *n)
561 NetClientState *peer = n->nic_conf.peers.ncs[0];
564 * Backends other than vhost-user don't support max queue size.
566 if (!peer) {
567 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
570 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
571 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
574 return VIRTQUEUE_MAX_SIZE;
577 static int peer_attach(VirtIONet *n, int index)
579 NetClientState *nc = qemu_get_subqueue(n->nic, index);
581 if (!nc->peer) {
582 return 0;
585 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
586 vhost_set_vring_enable(nc->peer, 1);
589 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
590 return 0;
593 if (n->max_queues == 1) {
594 return 0;
597 return tap_enable(nc->peer);
600 static int peer_detach(VirtIONet *n, int index)
602 NetClientState *nc = qemu_get_subqueue(n->nic, index);
604 if (!nc->peer) {
605 return 0;
608 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
609 vhost_set_vring_enable(nc->peer, 0);
612 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
613 return 0;
616 return tap_disable(nc->peer);
619 static void virtio_net_set_queues(VirtIONet *n)
621 int i;
622 int r;
624 if (n->nic->peer_deleted) {
625 return;
628 for (i = 0; i < n->max_queues; i++) {
629 if (i < n->curr_queues) {
630 r = peer_attach(n, i);
631 assert(!r);
632 } else {
633 r = peer_detach(n, i);
634 assert(!r);
639 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
641 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
642 Error **errp)
644 VirtIONet *n = VIRTIO_NET(vdev);
645 NetClientState *nc = qemu_get_queue(n->nic);
647 /* Firstly sync all virtio-net possible supported features */
648 features |= n->host_features;
650 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
652 if (!peer_has_vnet_hdr(n)) {
653 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
654 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
655 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
656 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
658 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
659 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
660 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
661 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
664 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
665 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
666 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
669 if (!get_vhost_net(nc->peer)) {
670 return features;
673 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
674 vdev->backend_features = features;
676 if (n->mtu_bypass_backend &&
677 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
678 features |= (1ULL << VIRTIO_NET_F_MTU);
681 return features;
684 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
686 uint64_t features = 0;
688 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
689 * but also these: */
690 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
691 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
692 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
693 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
694 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
696 return features;
699 static void virtio_net_apply_guest_offloads(VirtIONet *n)
701 qemu_set_offload(qemu_get_queue(n->nic)->peer,
702 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
703 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
704 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
705 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
706 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
709 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
711 static const uint64_t guest_offloads_mask =
712 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
713 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
714 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
715 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
716 (1ULL << VIRTIO_NET_F_GUEST_UFO);
718 return guest_offloads_mask & features;
721 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
723 VirtIODevice *vdev = VIRTIO_DEVICE(n);
724 return virtio_net_guest_offloads_by_features(vdev->guest_features);
727 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
729 VirtIONet *n = VIRTIO_NET(vdev);
730 int i;
732 if (n->mtu_bypass_backend &&
733 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
734 features &= ~(1ULL << VIRTIO_NET_F_MTU);
737 virtio_net_set_multiqueue(n,
738 virtio_has_feature(features, VIRTIO_NET_F_MQ));
740 virtio_net_set_mrg_rx_bufs(n,
741 virtio_has_feature(features,
742 VIRTIO_NET_F_MRG_RXBUF),
743 virtio_has_feature(features,
744 VIRTIO_F_VERSION_1));
746 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
747 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
748 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
749 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
751 if (n->has_vnet_hdr) {
752 n->curr_guest_offloads =
753 virtio_net_guest_offloads_by_features(features);
754 virtio_net_apply_guest_offloads(n);
757 for (i = 0; i < n->max_queues; i++) {
758 NetClientState *nc = qemu_get_subqueue(n->nic, i);
760 if (!get_vhost_net(nc->peer)) {
761 continue;
763 vhost_net_ack_features(get_vhost_net(nc->peer), features);
766 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
767 memset(n->vlans, 0, MAX_VLAN >> 3);
768 } else {
769 memset(n->vlans, 0xff, MAX_VLAN >> 3);
773 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
774 struct iovec *iov, unsigned int iov_cnt)
776 uint8_t on;
777 size_t s;
778 NetClientState *nc = qemu_get_queue(n->nic);
780 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
781 if (s != sizeof(on)) {
782 return VIRTIO_NET_ERR;
785 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
786 n->promisc = on;
787 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
788 n->allmulti = on;
789 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
790 n->alluni = on;
791 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
792 n->nomulti = on;
793 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
794 n->nouni = on;
795 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
796 n->nobcast = on;
797 } else {
798 return VIRTIO_NET_ERR;
801 rxfilter_notify(nc);
803 return VIRTIO_NET_OK;
806 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
807 struct iovec *iov, unsigned int iov_cnt)
809 VirtIODevice *vdev = VIRTIO_DEVICE(n);
810 uint64_t offloads;
811 size_t s;
813 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
814 return VIRTIO_NET_ERR;
817 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
818 if (s != sizeof(offloads)) {
819 return VIRTIO_NET_ERR;
822 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
823 uint64_t supported_offloads;
825 offloads = virtio_ldq_p(vdev, &offloads);
827 if (!n->has_vnet_hdr) {
828 return VIRTIO_NET_ERR;
831 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
832 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
833 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
834 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
835 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
837 supported_offloads = virtio_net_supported_guest_offloads(n);
838 if (offloads & ~supported_offloads) {
839 return VIRTIO_NET_ERR;
842 n->curr_guest_offloads = offloads;
843 virtio_net_apply_guest_offloads(n);
845 return VIRTIO_NET_OK;
846 } else {
847 return VIRTIO_NET_ERR;
851 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
852 struct iovec *iov, unsigned int iov_cnt)
854 VirtIODevice *vdev = VIRTIO_DEVICE(n);
855 struct virtio_net_ctrl_mac mac_data;
856 size_t s;
857 NetClientState *nc = qemu_get_queue(n->nic);
859 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
860 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
861 return VIRTIO_NET_ERR;
863 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
864 assert(s == sizeof(n->mac));
865 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
866 rxfilter_notify(nc);
868 return VIRTIO_NET_OK;
871 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
872 return VIRTIO_NET_ERR;
875 int in_use = 0;
876 int first_multi = 0;
877 uint8_t uni_overflow = 0;
878 uint8_t multi_overflow = 0;
879 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
881 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
882 sizeof(mac_data.entries));
883 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
884 if (s != sizeof(mac_data.entries)) {
885 goto error;
887 iov_discard_front(&iov, &iov_cnt, s);
889 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
890 goto error;
893 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
894 s = iov_to_buf(iov, iov_cnt, 0, macs,
895 mac_data.entries * ETH_ALEN);
896 if (s != mac_data.entries * ETH_ALEN) {
897 goto error;
899 in_use += mac_data.entries;
900 } else {
901 uni_overflow = 1;
904 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
906 first_multi = in_use;
908 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
909 sizeof(mac_data.entries));
910 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
911 if (s != sizeof(mac_data.entries)) {
912 goto error;
915 iov_discard_front(&iov, &iov_cnt, s);
917 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
918 goto error;
921 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
922 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
923 mac_data.entries * ETH_ALEN);
924 if (s != mac_data.entries * ETH_ALEN) {
925 goto error;
927 in_use += mac_data.entries;
928 } else {
929 multi_overflow = 1;
932 n->mac_table.in_use = in_use;
933 n->mac_table.first_multi = first_multi;
934 n->mac_table.uni_overflow = uni_overflow;
935 n->mac_table.multi_overflow = multi_overflow;
936 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
937 g_free(macs);
938 rxfilter_notify(nc);
940 return VIRTIO_NET_OK;
942 error:
943 g_free(macs);
944 return VIRTIO_NET_ERR;
947 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
948 struct iovec *iov, unsigned int iov_cnt)
950 VirtIODevice *vdev = VIRTIO_DEVICE(n);
951 uint16_t vid;
952 size_t s;
953 NetClientState *nc = qemu_get_queue(n->nic);
955 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
956 vid = virtio_lduw_p(vdev, &vid);
957 if (s != sizeof(vid)) {
958 return VIRTIO_NET_ERR;
961 if (vid >= MAX_VLAN)
962 return VIRTIO_NET_ERR;
964 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
965 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
966 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
967 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
968 else
969 return VIRTIO_NET_ERR;
971 rxfilter_notify(nc);
973 return VIRTIO_NET_OK;
976 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
977 struct iovec *iov, unsigned int iov_cnt)
979 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
980 n->status & VIRTIO_NET_S_ANNOUNCE) {
981 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
982 if (n->announce_counter) {
983 timer_mod(n->announce_timer,
984 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
985 self_announce_delay(n->announce_counter));
987 return VIRTIO_NET_OK;
988 } else {
989 return VIRTIO_NET_ERR;
993 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
994 struct iovec *iov, unsigned int iov_cnt)
996 VirtIODevice *vdev = VIRTIO_DEVICE(n);
997 struct virtio_net_ctrl_mq mq;
998 size_t s;
999 uint16_t queues;
1001 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1002 if (s != sizeof(mq)) {
1003 return VIRTIO_NET_ERR;
1006 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1007 return VIRTIO_NET_ERR;
1010 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1012 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1013 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1014 queues > n->max_queues ||
1015 !n->multiqueue) {
1016 return VIRTIO_NET_ERR;
1019 n->curr_queues = queues;
1020 /* stop the backend before changing the number of queues to avoid handling a
1021 * disabled queue */
1022 virtio_net_set_status(vdev, vdev->status);
1023 virtio_net_set_queues(n);
1025 return VIRTIO_NET_OK;
1028 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1030 VirtIONet *n = VIRTIO_NET(vdev);
1031 struct virtio_net_ctrl_hdr ctrl;
1032 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1033 VirtQueueElement *elem;
1034 size_t s;
1035 struct iovec *iov, *iov2;
1036 unsigned int iov_cnt;
1038 for (;;) {
1039 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1040 if (!elem) {
1041 break;
1043 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1044 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1045 virtio_error(vdev, "virtio-net ctrl missing headers");
1046 virtqueue_detach_element(vq, elem, 0);
1047 g_free(elem);
1048 break;
1051 iov_cnt = elem->out_num;
1052 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1053 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1054 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1055 if (s != sizeof(ctrl)) {
1056 status = VIRTIO_NET_ERR;
1057 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1058 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1059 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1060 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1061 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1062 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1063 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1064 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1065 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1066 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1067 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1068 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1071 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1072 assert(s == sizeof(status));
1074 virtqueue_push(vq, elem, sizeof(status));
1075 virtio_notify(vdev, vq);
1076 g_free(iov2);
1077 g_free(elem);
1081 /* RX */
1083 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1085 VirtIONet *n = VIRTIO_NET(vdev);
1086 int queue_index = vq2q(virtio_get_queue_index(vq));
1088 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1091 static int virtio_net_can_receive(NetClientState *nc)
1093 VirtIONet *n = qemu_get_nic_opaque(nc);
1094 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1095 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1097 if (!vdev->vm_running) {
1098 return 0;
1101 if (nc->queue_index >= n->curr_queues) {
1102 return 0;
1105 if (!virtio_queue_ready(q->rx_vq) ||
1106 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1107 return 0;
1110 return 1;
1113 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1115 VirtIONet *n = q->n;
1116 if (virtio_queue_empty(q->rx_vq) ||
1117 (n->mergeable_rx_bufs &&
1118 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1119 virtio_queue_set_notification(q->rx_vq, 1);
1121 /* To avoid a race condition where the guest has made some buffers
1122 * available after the above check but before notification was
1123 * enabled, check for available buffers again.
1125 if (virtio_queue_empty(q->rx_vq) ||
1126 (n->mergeable_rx_bufs &&
1127 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1128 return 0;
1132 virtio_queue_set_notification(q->rx_vq, 0);
1133 return 1;
1136 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1138 virtio_tswap16s(vdev, &hdr->hdr_len);
1139 virtio_tswap16s(vdev, &hdr->gso_size);
1140 virtio_tswap16s(vdev, &hdr->csum_start);
1141 virtio_tswap16s(vdev, &hdr->csum_offset);
1144 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1145 * it never finds out that the packets don't have valid checksums. This
1146 * causes dhclient to get upset. Fedora's carried a patch for ages to
1147 * fix this with Xen but it hasn't appeared in an upstream release of
1148 * dhclient yet.
1150 * To avoid breaking existing guests, we catch udp packets and add
1151 * checksums. This is terrible but it's better than hacking the guest
1152 * kernels.
1154 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1155 * we should provide a mechanism to disable it to avoid polluting the host
1156 * cache.
1158 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1159 uint8_t *buf, size_t size)
1161 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1162 (size > 27 && size < 1500) && /* normal sized MTU */
1163 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1164 (buf[23] == 17) && /* ip.protocol == UDP */
1165 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1166 net_checksum_calculate(buf, size);
1167 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1171 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1172 const void *buf, size_t size)
1174 if (n->has_vnet_hdr) {
1175 /* FIXME this cast is evil */
1176 void *wbuf = (void *)buf;
1177 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1178 size - n->host_hdr_len);
1180 if (n->needs_vnet_hdr_swap) {
1181 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1183 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1184 } else {
1185 struct virtio_net_hdr hdr = {
1186 .flags = 0,
1187 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1189 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1193 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1195 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1196 static const uint8_t vlan[] = {0x81, 0x00};
1197 uint8_t *ptr = (uint8_t *)buf;
1198 int i;
1200 if (n->promisc)
1201 return 1;
1203 ptr += n->host_hdr_len;
1205 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1206 int vid = lduw_be_p(ptr + 14) & 0xfff;
1207 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1208 return 0;
1211 if (ptr[0] & 1) { // multicast
1212 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1213 return !n->nobcast;
1214 } else if (n->nomulti) {
1215 return 0;
1216 } else if (n->allmulti || n->mac_table.multi_overflow) {
1217 return 1;
1220 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1221 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1222 return 1;
1225 } else { // unicast
1226 if (n->nouni) {
1227 return 0;
1228 } else if (n->alluni || n->mac_table.uni_overflow) {
1229 return 1;
1230 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1231 return 1;
1234 for (i = 0; i < n->mac_table.first_multi; i++) {
1235 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1236 return 1;
1241 return 0;
1244 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1245 size_t size)
1247 VirtIONet *n = qemu_get_nic_opaque(nc);
1248 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1249 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1250 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1251 struct virtio_net_hdr_mrg_rxbuf mhdr;
1252 unsigned mhdr_cnt = 0;
1253 size_t offset, i, guest_offset;
1255 if (!virtio_net_can_receive(nc)) {
1256 return -1;
1259 /* hdr_len refers to the header we supply to the guest */
1260 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1261 return 0;
1264 if (!receive_filter(n, buf, size))
1265 return size;
1267 offset = i = 0;
1269 while (offset < size) {
1270 VirtQueueElement *elem;
1271 int len, total;
1272 const struct iovec *sg;
1274 total = 0;
1276 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1277 if (!elem) {
1278 if (i) {
1279 virtio_error(vdev, "virtio-net unexpected empty queue: "
1280 "i %zd mergeable %d offset %zd, size %zd, "
1281 "guest hdr len %zd, host hdr len %zd "
1282 "guest features 0x%" PRIx64,
1283 i, n->mergeable_rx_bufs, offset, size,
1284 n->guest_hdr_len, n->host_hdr_len,
1285 vdev->guest_features);
1287 return -1;
1290 if (elem->in_num < 1) {
1291 virtio_error(vdev,
1292 "virtio-net receive queue contains no in buffers");
1293 virtqueue_detach_element(q->rx_vq, elem, 0);
1294 g_free(elem);
1295 return -1;
1298 sg = elem->in_sg;
1299 if (i == 0) {
1300 assert(offset == 0);
1301 if (n->mergeable_rx_bufs) {
1302 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1303 sg, elem->in_num,
1304 offsetof(typeof(mhdr), num_buffers),
1305 sizeof(mhdr.num_buffers));
1308 receive_header(n, sg, elem->in_num, buf, size);
1309 offset = n->host_hdr_len;
1310 total += n->guest_hdr_len;
1311 guest_offset = n->guest_hdr_len;
1312 } else {
1313 guest_offset = 0;
1316 /* copy in packet. ugh */
1317 len = iov_from_buf(sg, elem->in_num, guest_offset,
1318 buf + offset, size - offset);
1319 total += len;
1320 offset += len;
1321 /* If buffers can't be merged, at this point we
1322 * must have consumed the complete packet.
1323 * Otherwise, drop it. */
1324 if (!n->mergeable_rx_bufs && offset < size) {
1325 virtqueue_unpop(q->rx_vq, elem, total);
1326 g_free(elem);
1327 return size;
1330 /* signal other side */
1331 virtqueue_fill(q->rx_vq, elem, total, i++);
1332 g_free(elem);
1335 if (mhdr_cnt) {
1336 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1337 iov_from_buf(mhdr_sg, mhdr_cnt,
1339 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1342 virtqueue_flush(q->rx_vq, i);
1343 virtio_notify(vdev, q->rx_vq);
1345 return size;
1348 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1349 size_t size)
1351 ssize_t r;
1353 rcu_read_lock();
1354 r = virtio_net_receive_rcu(nc, buf, size);
1355 rcu_read_unlock();
1356 return r;
1359 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1360 const uint8_t *buf,
1361 VirtioNetRscUnit *unit)
1363 uint16_t ip_hdrlen;
1364 struct ip_header *ip;
1366 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1367 + sizeof(struct eth_header));
1368 unit->ip = (void *)ip;
1369 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1370 unit->ip_plen = &ip->ip_len;
1371 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1372 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1373 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1376 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1377 const uint8_t *buf,
1378 VirtioNetRscUnit *unit)
1380 struct ip6_header *ip6;
1382 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1383 + sizeof(struct eth_header));
1384 unit->ip = ip6;
1385 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1386 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1387 + sizeof(struct ip6_header));
1388 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1390 /* There is a difference between payload lenght in ipv4 and v6,
1391 ip header is excluded in ipv6 */
1392 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1395 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1396 VirtioNetRscSeg *seg)
1398 int ret;
1399 struct virtio_net_hdr *h;
1401 h = (struct virtio_net_hdr *)seg->buf;
1402 h->flags = 0;
1403 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1405 if (seg->is_coalesced) {
1406 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1407 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1408 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1409 if (chain->proto == ETH_P_IP) {
1410 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1411 } else {
1412 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1416 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1417 QTAILQ_REMOVE(&chain->buffers, seg, next);
1418 g_free(seg->buf);
1419 g_free(seg);
1421 return ret;
1424 static void virtio_net_rsc_purge(void *opq)
1426 VirtioNetRscSeg *seg, *rn;
1427 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1429 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1430 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1431 chain->stat.purge_failed++;
1432 continue;
1436 chain->stat.timer++;
1437 if (!QTAILQ_EMPTY(&chain->buffers)) {
1438 timer_mod(chain->drain_timer,
1439 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1443 static void virtio_net_rsc_cleanup(VirtIONet *n)
1445 VirtioNetRscChain *chain, *rn_chain;
1446 VirtioNetRscSeg *seg, *rn_seg;
1448 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1449 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1450 QTAILQ_REMOVE(&chain->buffers, seg, next);
1451 g_free(seg->buf);
1452 g_free(seg);
1455 timer_del(chain->drain_timer);
1456 timer_free(chain->drain_timer);
1457 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1458 g_free(chain);
1462 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1463 NetClientState *nc,
1464 const uint8_t *buf, size_t size)
1466 uint16_t hdr_len;
1467 VirtioNetRscSeg *seg;
1469 hdr_len = chain->n->guest_hdr_len;
1470 seg = g_malloc(sizeof(VirtioNetRscSeg));
1471 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1472 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1473 memcpy(seg->buf, buf, size);
1474 seg->size = size;
1475 seg->packets = 1;
1476 seg->dup_ack = 0;
1477 seg->is_coalesced = 0;
1478 seg->nc = nc;
1480 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1481 chain->stat.cache++;
1483 switch (chain->proto) {
1484 case ETH_P_IP:
1485 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1486 break;
1487 case ETH_P_IPV6:
1488 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1489 break;
1490 default:
1491 g_assert_not_reached();
1495 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1496 VirtioNetRscSeg *seg,
1497 const uint8_t *buf,
1498 struct tcp_header *n_tcp,
1499 struct tcp_header *o_tcp)
1501 uint32_t nack, oack;
1502 uint16_t nwin, owin;
1504 nack = htonl(n_tcp->th_ack);
1505 nwin = htons(n_tcp->th_win);
1506 oack = htonl(o_tcp->th_ack);
1507 owin = htons(o_tcp->th_win);
1509 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1510 chain->stat.ack_out_of_win++;
1511 return RSC_FINAL;
1512 } else if (nack == oack) {
1513 /* duplicated ack or window probe */
1514 if (nwin == owin) {
1515 /* duplicated ack, add dup ack count due to whql test up to 1 */
1516 chain->stat.dup_ack++;
1517 return RSC_FINAL;
1518 } else {
1519 /* Coalesce window update */
1520 o_tcp->th_win = n_tcp->th_win;
1521 chain->stat.win_update++;
1522 return RSC_COALESCE;
1524 } else {
1525 /* pure ack, go to 'C', finalize*/
1526 chain->stat.pure_ack++;
1527 return RSC_FINAL;
1531 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1532 VirtioNetRscSeg *seg,
1533 const uint8_t *buf,
1534 VirtioNetRscUnit *n_unit)
1536 void *data;
1537 uint16_t o_ip_len;
1538 uint32_t nseq, oseq;
1539 VirtioNetRscUnit *o_unit;
1541 o_unit = &seg->unit;
1542 o_ip_len = htons(*o_unit->ip_plen);
1543 nseq = htonl(n_unit->tcp->th_seq);
1544 oseq = htonl(o_unit->tcp->th_seq);
1546 /* out of order or retransmitted. */
1547 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1548 chain->stat.data_out_of_win++;
1549 return RSC_FINAL;
1552 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1553 if (nseq == oseq) {
1554 if ((o_unit->payload == 0) && n_unit->payload) {
1555 /* From no payload to payload, normal case, not a dup ack or etc */
1556 chain->stat.data_after_pure_ack++;
1557 goto coalesce;
1558 } else {
1559 return virtio_net_rsc_handle_ack(chain, seg, buf,
1560 n_unit->tcp, o_unit->tcp);
1562 } else if ((nseq - oseq) != o_unit->payload) {
1563 /* Not a consistent packet, out of order */
1564 chain->stat.data_out_of_order++;
1565 return RSC_FINAL;
1566 } else {
1567 coalesce:
1568 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1569 chain->stat.over_size++;
1570 return RSC_FINAL;
1573 /* Here comes the right data, the payload length in v4/v6 is different,
1574 so use the field value to update and record the new data len */
1575 o_unit->payload += n_unit->payload; /* update new data len */
1577 /* update field in ip header */
1578 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1580 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1581 for windows guest, while this may change the behavior for linux
1582 guest (only if it uses RSC feature). */
1583 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1585 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1586 o_unit->tcp->th_win = n_unit->tcp->th_win;
1588 memmove(seg->buf + seg->size, data, n_unit->payload);
1589 seg->size += n_unit->payload;
1590 seg->packets++;
1591 chain->stat.coalesced++;
1592 return RSC_COALESCE;
1596 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1597 VirtioNetRscSeg *seg,
1598 const uint8_t *buf, size_t size,
1599 VirtioNetRscUnit *unit)
1601 struct ip_header *ip1, *ip2;
1603 ip1 = (struct ip_header *)(unit->ip);
1604 ip2 = (struct ip_header *)(seg->unit.ip);
1605 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1606 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1607 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1608 chain->stat.no_match++;
1609 return RSC_NO_MATCH;
1612 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1615 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1616 VirtioNetRscSeg *seg,
1617 const uint8_t *buf, size_t size,
1618 VirtioNetRscUnit *unit)
1620 struct ip6_header *ip1, *ip2;
1622 ip1 = (struct ip6_header *)(unit->ip);
1623 ip2 = (struct ip6_header *)(seg->unit.ip);
1624 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1625 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1626 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1627 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1628 chain->stat.no_match++;
1629 return RSC_NO_MATCH;
1632 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1635 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1636 * to prevent out of order */
1637 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1638 struct tcp_header *tcp)
1640 uint16_t tcp_hdr;
1641 uint16_t tcp_flag;
1643 tcp_flag = htons(tcp->th_offset_flags);
1644 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1645 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1646 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1647 if (tcp_flag & TH_SYN) {
1648 chain->stat.tcp_syn++;
1649 return RSC_BYPASS;
1652 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1653 chain->stat.tcp_ctrl_drain++;
1654 return RSC_FINAL;
1657 if (tcp_hdr > sizeof(struct tcp_header)) {
1658 chain->stat.tcp_all_opt++;
1659 return RSC_FINAL;
1662 return RSC_CANDIDATE;
1665 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1666 NetClientState *nc,
1667 const uint8_t *buf, size_t size,
1668 VirtioNetRscUnit *unit)
1670 int ret;
1671 VirtioNetRscSeg *seg, *nseg;
1673 if (QTAILQ_EMPTY(&chain->buffers)) {
1674 chain->stat.empty_cache++;
1675 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1676 timer_mod(chain->drain_timer,
1677 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1678 return size;
1681 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1682 if (chain->proto == ETH_P_IP) {
1683 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1684 } else {
1685 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1688 if (ret == RSC_FINAL) {
1689 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1690 /* Send failed */
1691 chain->stat.final_failed++;
1692 return 0;
1695 /* Send current packet */
1696 return virtio_net_do_receive(nc, buf, size);
1697 } else if (ret == RSC_NO_MATCH) {
1698 continue;
1699 } else {
1700 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1701 seg->is_coalesced = 1;
1702 return size;
1706 chain->stat.no_match_cache++;
1707 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1708 return size;
1711 /* Drain a connection data, this is to avoid out of order segments */
1712 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1713 NetClientState *nc,
1714 const uint8_t *buf, size_t size,
1715 uint16_t ip_start, uint16_t ip_size,
1716 uint16_t tcp_port)
1718 VirtioNetRscSeg *seg, *nseg;
1719 uint32_t ppair1, ppair2;
1721 ppair1 = *(uint32_t *)(buf + tcp_port);
1722 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1723 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1724 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1725 || (ppair1 != ppair2)) {
1726 continue;
1728 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1729 chain->stat.drain_failed++;
1732 break;
1735 return virtio_net_do_receive(nc, buf, size);
1738 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1739 struct ip_header *ip,
1740 const uint8_t *buf, size_t size)
1742 uint16_t ip_len;
1744 /* Not an ipv4 packet */
1745 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1746 chain->stat.ip_option++;
1747 return RSC_BYPASS;
1750 /* Don't handle packets with ip option */
1751 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1752 chain->stat.ip_option++;
1753 return RSC_BYPASS;
1756 if (ip->ip_p != IPPROTO_TCP) {
1757 chain->stat.bypass_not_tcp++;
1758 return RSC_BYPASS;
1761 /* Don't handle packets with ip fragment */
1762 if (!(htons(ip->ip_off) & IP_DF)) {
1763 chain->stat.ip_frag++;
1764 return RSC_BYPASS;
1767 /* Don't handle packets with ecn flag */
1768 if (IPTOS_ECN(ip->ip_tos)) {
1769 chain->stat.ip_ecn++;
1770 return RSC_BYPASS;
1773 ip_len = htons(ip->ip_len);
1774 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1775 || ip_len > (size - chain->n->guest_hdr_len -
1776 sizeof(struct eth_header))) {
1777 chain->stat.ip_hacked++;
1778 return RSC_BYPASS;
1781 return RSC_CANDIDATE;
1784 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1785 NetClientState *nc,
1786 const uint8_t *buf, size_t size)
1788 int32_t ret;
1789 uint16_t hdr_len;
1790 VirtioNetRscUnit unit;
1792 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1794 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1795 + sizeof(struct tcp_header))) {
1796 chain->stat.bypass_not_tcp++;
1797 return virtio_net_do_receive(nc, buf, size);
1800 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1801 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1802 != RSC_CANDIDATE) {
1803 return virtio_net_do_receive(nc, buf, size);
1806 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1807 if (ret == RSC_BYPASS) {
1808 return virtio_net_do_receive(nc, buf, size);
1809 } else if (ret == RSC_FINAL) {
1810 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1811 ((hdr_len + sizeof(struct eth_header)) + 12),
1812 VIRTIO_NET_IP4_ADDR_SIZE,
1813 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1816 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1819 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1820 struct ip6_header *ip6,
1821 const uint8_t *buf, size_t size)
1823 uint16_t ip_len;
1825 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1826 != IP_HEADER_VERSION_6) {
1827 return RSC_BYPASS;
1830 /* Both option and protocol is checked in this */
1831 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1832 chain->stat.bypass_not_tcp++;
1833 return RSC_BYPASS;
1836 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1837 if (ip_len < sizeof(struct tcp_header) ||
1838 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1839 - sizeof(struct ip6_header))) {
1840 chain->stat.ip_hacked++;
1841 return RSC_BYPASS;
1844 /* Don't handle packets with ecn flag */
1845 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1846 chain->stat.ip_ecn++;
1847 return RSC_BYPASS;
1850 return RSC_CANDIDATE;
1853 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1854 const uint8_t *buf, size_t size)
1856 int32_t ret;
1857 uint16_t hdr_len;
1858 VirtioNetRscChain *chain;
1859 VirtioNetRscUnit unit;
1861 chain = (VirtioNetRscChain *)opq;
1862 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1864 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1865 + sizeof(tcp_header))) {
1866 return virtio_net_do_receive(nc, buf, size);
1869 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1870 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1871 unit.ip, buf, size)) {
1872 return virtio_net_do_receive(nc, buf, size);
1875 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1876 if (ret == RSC_BYPASS) {
1877 return virtio_net_do_receive(nc, buf, size);
1878 } else if (ret == RSC_FINAL) {
1879 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1880 ((hdr_len + sizeof(struct eth_header)) + 8),
1881 VIRTIO_NET_IP6_ADDR_SIZE,
1882 hdr_len + sizeof(struct eth_header)
1883 + sizeof(struct ip6_header));
1886 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1889 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1890 NetClientState *nc,
1891 uint16_t proto)
1893 VirtioNetRscChain *chain;
1895 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1896 return NULL;
1899 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1900 if (chain->proto == proto) {
1901 return chain;
1905 chain = g_malloc(sizeof(*chain));
1906 chain->n = n;
1907 chain->proto = proto;
1908 if (proto == (uint16_t)ETH_P_IP) {
1909 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1910 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1911 } else {
1912 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1913 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1915 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1916 virtio_net_rsc_purge, chain);
1917 memset(&chain->stat, 0, sizeof(chain->stat));
1919 QTAILQ_INIT(&chain->buffers);
1920 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1922 return chain;
1925 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1926 const uint8_t *buf,
1927 size_t size)
1929 uint16_t proto;
1930 VirtioNetRscChain *chain;
1931 struct eth_header *eth;
1932 VirtIONet *n;
1934 n = qemu_get_nic_opaque(nc);
1935 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1936 return virtio_net_do_receive(nc, buf, size);
1939 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1940 proto = htons(eth->h_proto);
1942 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1943 if (chain) {
1944 chain->stat.received++;
1945 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1946 return virtio_net_rsc_receive4(chain, nc, buf, size);
1947 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1948 return virtio_net_rsc_receive6(chain, nc, buf, size);
1951 return virtio_net_do_receive(nc, buf, size);
1954 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1955 size_t size)
1957 VirtIONet *n = qemu_get_nic_opaque(nc);
1958 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1959 return virtio_net_rsc_receive(nc, buf, size);
1960 } else {
1961 return virtio_net_do_receive(nc, buf, size);
1965 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1967 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1969 VirtIONet *n = qemu_get_nic_opaque(nc);
1970 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1971 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1973 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1974 virtio_notify(vdev, q->tx_vq);
1976 g_free(q->async_tx.elem);
1977 q->async_tx.elem = NULL;
1979 virtio_queue_set_notification(q->tx_vq, 1);
1980 virtio_net_flush_tx(q);
1983 /* TX */
1984 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1986 VirtIONet *n = q->n;
1987 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1988 VirtQueueElement *elem;
1989 int32_t num_packets = 0;
1990 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1991 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1992 return num_packets;
1995 if (q->async_tx.elem) {
1996 virtio_queue_set_notification(q->tx_vq, 0);
1997 return num_packets;
2000 for (;;) {
2001 ssize_t ret;
2002 unsigned int out_num;
2003 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2004 struct virtio_net_hdr_mrg_rxbuf mhdr;
2006 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2007 if (!elem) {
2008 break;
2011 out_num = elem->out_num;
2012 out_sg = elem->out_sg;
2013 if (out_num < 1) {
2014 virtio_error(vdev, "virtio-net header not in first element");
2015 virtqueue_detach_element(q->tx_vq, elem, 0);
2016 g_free(elem);
2017 return -EINVAL;
2020 if (n->has_vnet_hdr) {
2021 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2022 n->guest_hdr_len) {
2023 virtio_error(vdev, "virtio-net header incorrect");
2024 virtqueue_detach_element(q->tx_vq, elem, 0);
2025 g_free(elem);
2026 return -EINVAL;
2028 if (n->needs_vnet_hdr_swap) {
2029 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2030 sg2[0].iov_base = &mhdr;
2031 sg2[0].iov_len = n->guest_hdr_len;
2032 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2033 out_sg, out_num,
2034 n->guest_hdr_len, -1);
2035 if (out_num == VIRTQUEUE_MAX_SIZE) {
2036 goto drop;
2038 out_num += 1;
2039 out_sg = sg2;
2043 * If host wants to see the guest header as is, we can
2044 * pass it on unchanged. Otherwise, copy just the parts
2045 * that host is interested in.
2047 assert(n->host_hdr_len <= n->guest_hdr_len);
2048 if (n->host_hdr_len != n->guest_hdr_len) {
2049 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2050 out_sg, out_num,
2051 0, n->host_hdr_len);
2052 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2053 out_sg, out_num,
2054 n->guest_hdr_len, -1);
2055 out_num = sg_num;
2056 out_sg = sg;
2059 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2060 out_sg, out_num, virtio_net_tx_complete);
2061 if (ret == 0) {
2062 virtio_queue_set_notification(q->tx_vq, 0);
2063 q->async_tx.elem = elem;
2064 return -EBUSY;
2067 drop:
2068 virtqueue_push(q->tx_vq, elem, 0);
2069 virtio_notify(vdev, q->tx_vq);
2070 g_free(elem);
2072 if (++num_packets >= n->tx_burst) {
2073 break;
2076 return num_packets;
2079 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2081 VirtIONet *n = VIRTIO_NET(vdev);
2082 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2084 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2085 virtio_net_drop_tx_queue_data(vdev, vq);
2086 return;
2089 /* This happens when device was stopped but VCPU wasn't. */
2090 if (!vdev->vm_running) {
2091 q->tx_waiting = 1;
2092 return;
2095 if (q->tx_waiting) {
2096 virtio_queue_set_notification(vq, 1);
2097 timer_del(q->tx_timer);
2098 q->tx_waiting = 0;
2099 if (virtio_net_flush_tx(q) == -EINVAL) {
2100 return;
2102 } else {
2103 timer_mod(q->tx_timer,
2104 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2105 q->tx_waiting = 1;
2106 virtio_queue_set_notification(vq, 0);
2110 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2112 VirtIONet *n = VIRTIO_NET(vdev);
2113 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2115 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2116 virtio_net_drop_tx_queue_data(vdev, vq);
2117 return;
2120 if (unlikely(q->tx_waiting)) {
2121 return;
2123 q->tx_waiting = 1;
2124 /* This happens when device was stopped but VCPU wasn't. */
2125 if (!vdev->vm_running) {
2126 return;
2128 virtio_queue_set_notification(vq, 0);
2129 qemu_bh_schedule(q->tx_bh);
2132 static void virtio_net_tx_timer(void *opaque)
2134 VirtIONetQueue *q = opaque;
2135 VirtIONet *n = q->n;
2136 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2137 /* This happens when device was stopped but BH wasn't. */
2138 if (!vdev->vm_running) {
2139 /* Make sure tx waiting is set, so we'll run when restarted. */
2140 assert(q->tx_waiting);
2141 return;
2144 q->tx_waiting = 0;
2146 /* Just in case the driver is not ready on more */
2147 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2148 return;
2151 virtio_queue_set_notification(q->tx_vq, 1);
2152 virtio_net_flush_tx(q);
2155 static void virtio_net_tx_bh(void *opaque)
2157 VirtIONetQueue *q = opaque;
2158 VirtIONet *n = q->n;
2159 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2160 int32_t ret;
2162 /* This happens when device was stopped but BH wasn't. */
2163 if (!vdev->vm_running) {
2164 /* Make sure tx waiting is set, so we'll run when restarted. */
2165 assert(q->tx_waiting);
2166 return;
2169 q->tx_waiting = 0;
2171 /* Just in case the driver is not ready on more */
2172 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2173 return;
2176 ret = virtio_net_flush_tx(q);
2177 if (ret == -EBUSY || ret == -EINVAL) {
2178 return; /* Notification re-enable handled by tx_complete or device
2179 * broken */
2182 /* If we flush a full burst of packets, assume there are
2183 * more coming and immediately reschedule */
2184 if (ret >= n->tx_burst) {
2185 qemu_bh_schedule(q->tx_bh);
2186 q->tx_waiting = 1;
2187 return;
2190 /* If less than a full burst, re-enable notification and flush
2191 * anything that may have come in while we weren't looking. If
2192 * we find something, assume the guest is still active and reschedule */
2193 virtio_queue_set_notification(q->tx_vq, 1);
2194 ret = virtio_net_flush_tx(q);
2195 if (ret == -EINVAL) {
2196 return;
2197 } else if (ret > 0) {
2198 virtio_queue_set_notification(q->tx_vq, 0);
2199 qemu_bh_schedule(q->tx_bh);
2200 q->tx_waiting = 1;
2204 static void virtio_net_add_queue(VirtIONet *n, int index)
2206 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2208 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2209 virtio_net_handle_rx);
2211 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2212 n->vqs[index].tx_vq =
2213 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2214 virtio_net_handle_tx_timer);
2215 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2216 virtio_net_tx_timer,
2217 &n->vqs[index]);
2218 } else {
2219 n->vqs[index].tx_vq =
2220 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2221 virtio_net_handle_tx_bh);
2222 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2225 n->vqs[index].tx_waiting = 0;
2226 n->vqs[index].n = n;
2229 static void virtio_net_del_queue(VirtIONet *n, int index)
2231 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2232 VirtIONetQueue *q = &n->vqs[index];
2233 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2235 qemu_purge_queued_packets(nc);
2237 virtio_del_queue(vdev, index * 2);
2238 if (q->tx_timer) {
2239 timer_del(q->tx_timer);
2240 timer_free(q->tx_timer);
2241 q->tx_timer = NULL;
2242 } else {
2243 qemu_bh_delete(q->tx_bh);
2244 q->tx_bh = NULL;
2246 q->tx_waiting = 0;
2247 virtio_del_queue(vdev, index * 2 + 1);
2250 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2252 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2253 int old_num_queues = virtio_get_num_queues(vdev);
2254 int new_num_queues = new_max_queues * 2 + 1;
2255 int i;
2257 assert(old_num_queues >= 3);
2258 assert(old_num_queues % 2 == 1);
2260 if (old_num_queues == new_num_queues) {
2261 return;
2265 * We always need to remove and add ctrl vq if
2266 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2267 * and then we only enter one of the following too loops.
2269 virtio_del_queue(vdev, old_num_queues - 1);
2271 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2272 /* new_num_queues < old_num_queues */
2273 virtio_net_del_queue(n, i / 2);
2276 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2277 /* new_num_queues > old_num_queues */
2278 virtio_net_add_queue(n, i / 2);
2281 /* add ctrl_vq last */
2282 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2285 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2287 int max = multiqueue ? n->max_queues : 1;
2289 n->multiqueue = multiqueue;
2290 virtio_net_change_num_queues(n, max);
2292 virtio_net_set_queues(n);
2295 static int virtio_net_post_load_device(void *opaque, int version_id)
2297 VirtIONet *n = opaque;
2298 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2299 int i, link_down;
2301 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2302 virtio_vdev_has_feature(vdev,
2303 VIRTIO_F_VERSION_1));
2305 /* MAC_TABLE_ENTRIES may be different from the saved image */
2306 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2307 n->mac_table.in_use = 0;
2310 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2311 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2314 if (peer_has_vnet_hdr(n)) {
2315 virtio_net_apply_guest_offloads(n);
2318 virtio_net_set_queues(n);
2320 /* Find the first multicast entry in the saved MAC filter */
2321 for (i = 0; i < n->mac_table.in_use; i++) {
2322 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2323 break;
2326 n->mac_table.first_multi = i;
2328 /* nc.link_down can't be migrated, so infer link_down according
2329 * to link status bit in n->status */
2330 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2331 for (i = 0; i < n->max_queues; i++) {
2332 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2335 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2336 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2337 n->announce_counter = SELF_ANNOUNCE_ROUNDS;
2338 timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
2341 return 0;
2344 /* tx_waiting field of a VirtIONetQueue */
2345 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2346 .name = "virtio-net-queue-tx_waiting",
2347 .fields = (VMStateField[]) {
2348 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2349 VMSTATE_END_OF_LIST()
2353 static bool max_queues_gt_1(void *opaque, int version_id)
2355 return VIRTIO_NET(opaque)->max_queues > 1;
2358 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2360 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2361 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2364 static bool mac_table_fits(void *opaque, int version_id)
2366 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2369 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2371 return !mac_table_fits(opaque, version_id);
2374 /* This temporary type is shared by all the WITH_TMP methods
2375 * although only some fields are used by each.
2377 struct VirtIONetMigTmp {
2378 VirtIONet *parent;
2379 VirtIONetQueue *vqs_1;
2380 uint16_t curr_queues_1;
2381 uint8_t has_ufo;
2382 uint32_t has_vnet_hdr;
2385 /* The 2nd and subsequent tx_waiting flags are loaded later than
2386 * the 1st entry in the queues and only if there's more than one
2387 * entry. We use the tmp mechanism to calculate a temporary
2388 * pointer and count and also validate the count.
2391 static int virtio_net_tx_waiting_pre_save(void *opaque)
2393 struct VirtIONetMigTmp *tmp = opaque;
2395 tmp->vqs_1 = tmp->parent->vqs + 1;
2396 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2397 if (tmp->parent->curr_queues == 0) {
2398 tmp->curr_queues_1 = 0;
2401 return 0;
2404 static int virtio_net_tx_waiting_pre_load(void *opaque)
2406 struct VirtIONetMigTmp *tmp = opaque;
2408 /* Reuse the pointer setup from save */
2409 virtio_net_tx_waiting_pre_save(opaque);
2411 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2412 error_report("virtio-net: curr_queues %x > max_queues %x",
2413 tmp->parent->curr_queues, tmp->parent->max_queues);
2415 return -EINVAL;
2418 return 0; /* all good */
2421 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2422 .name = "virtio-net-tx_waiting",
2423 .pre_load = virtio_net_tx_waiting_pre_load,
2424 .pre_save = virtio_net_tx_waiting_pre_save,
2425 .fields = (VMStateField[]) {
2426 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2427 curr_queues_1,
2428 vmstate_virtio_net_queue_tx_waiting,
2429 struct VirtIONetQueue),
2430 VMSTATE_END_OF_LIST()
2434 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2435 * flag set we need to check that we have it
2437 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2439 struct VirtIONetMigTmp *tmp = opaque;
2441 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2442 error_report("virtio-net: saved image requires TUN_F_UFO support");
2443 return -EINVAL;
2446 return 0;
2449 static int virtio_net_ufo_pre_save(void *opaque)
2451 struct VirtIONetMigTmp *tmp = opaque;
2453 tmp->has_ufo = tmp->parent->has_ufo;
2455 return 0;
2458 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2459 .name = "virtio-net-ufo",
2460 .post_load = virtio_net_ufo_post_load,
2461 .pre_save = virtio_net_ufo_pre_save,
2462 .fields = (VMStateField[]) {
2463 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2464 VMSTATE_END_OF_LIST()
2468 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2469 * flag set we need to check that we have it
2471 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2473 struct VirtIONetMigTmp *tmp = opaque;
2475 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2476 error_report("virtio-net: saved image requires vnet_hdr=on");
2477 return -EINVAL;
2480 return 0;
2483 static int virtio_net_vnet_pre_save(void *opaque)
2485 struct VirtIONetMigTmp *tmp = opaque;
2487 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2489 return 0;
2492 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2493 .name = "virtio-net-vnet",
2494 .post_load = virtio_net_vnet_post_load,
2495 .pre_save = virtio_net_vnet_pre_save,
2496 .fields = (VMStateField[]) {
2497 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2498 VMSTATE_END_OF_LIST()
2502 static const VMStateDescription vmstate_virtio_net_device = {
2503 .name = "virtio-net-device",
2504 .version_id = VIRTIO_NET_VM_VERSION,
2505 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2506 .post_load = virtio_net_post_load_device,
2507 .fields = (VMStateField[]) {
2508 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2509 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2510 vmstate_virtio_net_queue_tx_waiting,
2511 VirtIONetQueue),
2512 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2513 VMSTATE_UINT16(status, VirtIONet),
2514 VMSTATE_UINT8(promisc, VirtIONet),
2515 VMSTATE_UINT8(allmulti, VirtIONet),
2516 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2518 /* Guarded pair: If it fits we load it, else we throw it away
2519 * - can happen if source has a larger MAC table.; post-load
2520 * sets flags in this case.
2522 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2523 0, mac_table_fits, mac_table.in_use,
2524 ETH_ALEN),
2525 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2526 mac_table.in_use, ETH_ALEN),
2528 /* Note: This is an array of uint32's that's always been saved as a
2529 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2530 * but based on the uint.
2532 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2533 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2534 vmstate_virtio_net_has_vnet),
2535 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2536 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2537 VMSTATE_UINT8(alluni, VirtIONet),
2538 VMSTATE_UINT8(nomulti, VirtIONet),
2539 VMSTATE_UINT8(nouni, VirtIONet),
2540 VMSTATE_UINT8(nobcast, VirtIONet),
2541 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2542 vmstate_virtio_net_has_ufo),
2543 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2544 vmstate_info_uint16_equal, uint16_t),
2545 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2546 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2547 vmstate_virtio_net_tx_waiting),
2548 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2549 has_ctrl_guest_offloads),
2550 VMSTATE_END_OF_LIST()
2554 static NetClientInfo net_virtio_info = {
2555 .type = NET_CLIENT_DRIVER_NIC,
2556 .size = sizeof(NICState),
2557 .can_receive = virtio_net_can_receive,
2558 .receive = virtio_net_receive,
2559 .link_status_changed = virtio_net_set_link_status,
2560 .query_rx_filter = virtio_net_query_rxfilter,
2563 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2565 VirtIONet *n = VIRTIO_NET(vdev);
2566 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2567 assert(n->vhost_started);
2568 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2571 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2572 bool mask)
2574 VirtIONet *n = VIRTIO_NET(vdev);
2575 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2576 assert(n->vhost_started);
2577 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2578 vdev, idx, mask);
2581 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2583 int i, config_size = 0;
2584 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2586 for (i = 0; feature_sizes[i].flags != 0; i++) {
2587 if (host_features & feature_sizes[i].flags) {
2588 config_size = MAX(feature_sizes[i].end, config_size);
2591 n->config_size = config_size;
2594 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2595 const char *type)
2598 * The name can be NULL, the netclient name will be type.x.
2600 assert(type != NULL);
2602 g_free(n->netclient_name);
2603 g_free(n->netclient_type);
2604 n->netclient_name = g_strdup(name);
2605 n->netclient_type = g_strdup(type);
2608 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2610 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2611 VirtIONet *n = VIRTIO_NET(dev);
2612 NetClientState *nc;
2613 int i;
2615 if (n->net_conf.mtu) {
2616 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2619 if (n->net_conf.duplex_str) {
2620 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2621 n->net_conf.duplex = DUPLEX_HALF;
2622 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2623 n->net_conf.duplex = DUPLEX_FULL;
2624 } else {
2625 error_setg(errp, "'duplex' must be 'half' or 'full'");
2627 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2628 } else {
2629 n->net_conf.duplex = DUPLEX_UNKNOWN;
2632 if (n->net_conf.speed < SPEED_UNKNOWN) {
2633 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2634 } else if (n->net_conf.speed >= 0) {
2635 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2638 virtio_net_set_config_size(n, n->host_features);
2639 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2642 * We set a lower limit on RX queue size to what it always was.
2643 * Guests that want a smaller ring can always resize it without
2644 * help from us (using virtio 1 and up).
2646 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2647 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2648 !is_power_of_2(n->net_conf.rx_queue_size)) {
2649 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2650 "must be a power of 2 between %d and %d.",
2651 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2652 VIRTQUEUE_MAX_SIZE);
2653 virtio_cleanup(vdev);
2654 return;
2657 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2658 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2659 !is_power_of_2(n->net_conf.tx_queue_size)) {
2660 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2661 "must be a power of 2 between %d and %d",
2662 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2663 VIRTQUEUE_MAX_SIZE);
2664 virtio_cleanup(vdev);
2665 return;
2668 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2669 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2670 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2671 "must be a positive integer less than %d.",
2672 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2673 virtio_cleanup(vdev);
2674 return;
2676 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2677 n->curr_queues = 1;
2678 n->tx_timeout = n->net_conf.txtimer;
2680 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2681 && strcmp(n->net_conf.tx, "bh")) {
2682 warn_report("virtio-net: "
2683 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2684 n->net_conf.tx);
2685 error_printf("Defaulting to \"bh\"");
2688 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2689 n->net_conf.tx_queue_size);
2691 for (i = 0; i < n->max_queues; i++) {
2692 virtio_net_add_queue(n, i);
2695 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2696 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2697 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2698 n->status = VIRTIO_NET_S_LINK_UP;
2699 n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2700 virtio_net_announce_timer, n);
2702 if (n->netclient_type) {
2704 * Happen when virtio_net_set_netclient_name has been called.
2706 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2707 n->netclient_type, n->netclient_name, n);
2708 } else {
2709 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2710 object_get_typename(OBJECT(dev)), dev->id, n);
2713 peer_test_vnet_hdr(n);
2714 if (peer_has_vnet_hdr(n)) {
2715 for (i = 0; i < n->max_queues; i++) {
2716 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2718 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2719 } else {
2720 n->host_hdr_len = 0;
2723 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2725 n->vqs[0].tx_waiting = 0;
2726 n->tx_burst = n->net_conf.txburst;
2727 virtio_net_set_mrg_rx_bufs(n, 0, 0);
2728 n->promisc = 1; /* for compatibility */
2730 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2732 n->vlans = g_malloc0(MAX_VLAN >> 3);
2734 nc = qemu_get_queue(n->nic);
2735 nc->rxfilter_notify_enabled = 1;
2737 QTAILQ_INIT(&n->rsc_chains);
2738 n->qdev = dev;
2741 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2743 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2744 VirtIONet *n = VIRTIO_NET(dev);
2745 int i, max_queues;
2747 /* This will stop vhost backend if appropriate. */
2748 virtio_net_set_status(vdev, 0);
2750 g_free(n->netclient_name);
2751 n->netclient_name = NULL;
2752 g_free(n->netclient_type);
2753 n->netclient_type = NULL;
2755 g_free(n->mac_table.macs);
2756 g_free(n->vlans);
2758 max_queues = n->multiqueue ? n->max_queues : 1;
2759 for (i = 0; i < max_queues; i++) {
2760 virtio_net_del_queue(n, i);
2763 timer_del(n->announce_timer);
2764 timer_free(n->announce_timer);
2765 g_free(n->vqs);
2766 qemu_del_nic(n->nic);
2767 virtio_net_rsc_cleanup(n);
2768 virtio_cleanup(vdev);
2771 static void virtio_net_instance_init(Object *obj)
2773 VirtIONet *n = VIRTIO_NET(obj);
2776 * The default config_size is sizeof(struct virtio_net_config).
2777 * Can be overriden with virtio_net_set_config_size.
2779 n->config_size = sizeof(struct virtio_net_config);
2780 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2781 "bootindex", "/ethernet-phy@0",
2782 DEVICE(n), NULL);
2785 static int virtio_net_pre_save(void *opaque)
2787 VirtIONet *n = opaque;
2789 /* At this point, backend must be stopped, otherwise
2790 * it might keep writing to memory. */
2791 assert(!n->vhost_started);
2793 return 0;
2796 static const VMStateDescription vmstate_virtio_net = {
2797 .name = "virtio-net",
2798 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2799 .version_id = VIRTIO_NET_VM_VERSION,
2800 .fields = (VMStateField[]) {
2801 VMSTATE_VIRTIO_DEVICE,
2802 VMSTATE_END_OF_LIST()
2804 .pre_save = virtio_net_pre_save,
2807 static Property virtio_net_properties[] = {
2808 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2809 VIRTIO_NET_F_CSUM, true),
2810 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2811 VIRTIO_NET_F_GUEST_CSUM, true),
2812 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2813 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2814 VIRTIO_NET_F_GUEST_TSO4, true),
2815 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2816 VIRTIO_NET_F_GUEST_TSO6, true),
2817 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2818 VIRTIO_NET_F_GUEST_ECN, true),
2819 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2820 VIRTIO_NET_F_GUEST_UFO, true),
2821 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2822 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2823 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2824 VIRTIO_NET_F_HOST_TSO4, true),
2825 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2826 VIRTIO_NET_F_HOST_TSO6, true),
2827 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2828 VIRTIO_NET_F_HOST_ECN, true),
2829 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2830 VIRTIO_NET_F_HOST_UFO, true),
2831 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2832 VIRTIO_NET_F_MRG_RXBUF, true),
2833 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2834 VIRTIO_NET_F_STATUS, true),
2835 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2836 VIRTIO_NET_F_CTRL_VQ, true),
2837 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2838 VIRTIO_NET_F_CTRL_RX, true),
2839 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2840 VIRTIO_NET_F_CTRL_VLAN, true),
2841 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2842 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2843 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2844 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2845 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2846 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2847 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2848 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2849 VIRTIO_NET_F_RSC_EXT, false),
2850 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2851 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2852 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2853 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2854 TX_TIMER_INTERVAL),
2855 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2856 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2857 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2858 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2859 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2860 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2861 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2862 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2863 true),
2864 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2865 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2866 DEFINE_PROP_END_OF_LIST(),
2869 static void virtio_net_class_init(ObjectClass *klass, void *data)
2871 DeviceClass *dc = DEVICE_CLASS(klass);
2872 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2874 dc->props = virtio_net_properties;
2875 dc->vmsd = &vmstate_virtio_net;
2876 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2877 vdc->realize = virtio_net_device_realize;
2878 vdc->unrealize = virtio_net_device_unrealize;
2879 vdc->get_config = virtio_net_get_config;
2880 vdc->set_config = virtio_net_set_config;
2881 vdc->get_features = virtio_net_get_features;
2882 vdc->set_features = virtio_net_set_features;
2883 vdc->bad_features = virtio_net_bad_features;
2884 vdc->reset = virtio_net_reset;
2885 vdc->set_status = virtio_net_set_status;
2886 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2887 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2888 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2889 vdc->vmsd = &vmstate_virtio_net_device;
2892 static const TypeInfo virtio_net_info = {
2893 .name = TYPE_VIRTIO_NET,
2894 .parent = TYPE_VIRTIO_DEVICE,
2895 .instance_size = sizeof(VirtIONet),
2896 .instance_init = virtio_net_instance_init,
2897 .class_init = virtio_net_class_init,
2900 static void virtio_register_types(void)
2902 type_register_static(&virtio_net_info);
2905 type_init(virtio_register_types)