Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into staging
[qemu.git] / hw / net / virtio-net.c
blobd3d688a5f8e8961d43ec9eb1ec579c074512c8f7
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/module.h"
18 #include "hw/virtio/virtio.h"
19 #include "net/net.h"
20 #include "net/checksum.h"
21 #include "net/tap.h"
22 #include "qemu/error-report.h"
23 #include "qemu/timer.h"
24 #include "hw/virtio/virtio-net.h"
25 #include "net/vhost_net.h"
26 #include "net/announce.h"
27 #include "hw/virtio/virtio-bus.h"
28 #include "qapi/error.h"
29 #include "qapi/qapi-events-net.h"
30 #include "hw/qdev-properties.h"
31 #include "hw/virtio/virtio-access.h"
32 #include "migration/misc.h"
33 #include "standard-headers/linux/ethtool.h"
34 #include "sysemu/sysemu.h"
35 #include "trace.h"
37 #define VIRTIO_NET_VM_VERSION 11
39 #define MAC_TABLE_ENTRIES 64
40 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
42 /* previously fixed value */
43 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
44 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
46 /* for now, only allow larger queues; with virtio-1, guest can downsize */
47 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
48 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
50 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
52 #define VIRTIO_NET_TCP_FLAG 0x3F
53 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
55 /* IPv4 max payload, 16 bits in the header */
56 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
57 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
59 /* header length value in ip header without option */
60 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
62 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
63 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
65 /* Purge coalesced packets timer interval, This value affects the performance
66 a lot, and should be tuned carefully, '300000'(300us) is the recommended
67 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
68 tso/gso/gro 'off'. */
69 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
71 /* temporary until standard header include it */
72 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
74 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
75 #define VIRTIO_NET_F_RSC_EXT 61
77 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
78 struct virtio_net_hdr *hdr)
80 return &hdr->csum_start;
83 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
84 struct virtio_net_hdr *hdr)
86 return &hdr->csum_offset;
89 #endif
91 static VirtIOFeature feature_sizes[] = {
92 {.flags = 1ULL << VIRTIO_NET_F_MAC,
93 .end = endof(struct virtio_net_config, mac)},
94 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
95 .end = endof(struct virtio_net_config, status)},
96 {.flags = 1ULL << VIRTIO_NET_F_MQ,
97 .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
98 {.flags = 1ULL << VIRTIO_NET_F_MTU,
99 .end = endof(struct virtio_net_config, mtu)},
100 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
101 .end = endof(struct virtio_net_config, duplex)},
105 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
107 VirtIONet *n = qemu_get_nic_opaque(nc);
109 return &n->vqs[nc->queue_index];
112 static int vq2q(int queue_index)
114 return queue_index / 2;
117 /* TODO
118 * - we could suppress RX interrupt if we were so inclined.
121 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
123 VirtIONet *n = VIRTIO_NET(vdev);
124 struct virtio_net_config netcfg;
126 virtio_stw_p(vdev, &netcfg.status, n->status);
127 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
128 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
129 memcpy(netcfg.mac, n->mac, ETH_ALEN);
130 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
131 netcfg.duplex = n->net_conf.duplex;
132 memcpy(config, &netcfg, n->config_size);
135 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
137 VirtIONet *n = VIRTIO_NET(vdev);
138 struct virtio_net_config netcfg = {};
140 memcpy(&netcfg, config, n->config_size);
142 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
143 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
144 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
145 memcpy(n->mac, netcfg.mac, ETH_ALEN);
146 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
150 static bool virtio_net_started(VirtIONet *n, uint8_t status)
152 VirtIODevice *vdev = VIRTIO_DEVICE(n);
153 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
154 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
157 static void virtio_net_announce_notify(VirtIONet *net)
159 VirtIODevice *vdev = VIRTIO_DEVICE(net);
160 trace_virtio_net_announce_notify();
162 net->status |= VIRTIO_NET_S_ANNOUNCE;
163 virtio_notify_config(vdev);
166 static void virtio_net_announce_timer(void *opaque)
168 VirtIONet *n = opaque;
169 trace_virtio_net_announce_timer(n->announce_timer.round);
171 n->announce_timer.round--;
172 virtio_net_announce_notify(n);
175 static void virtio_net_announce(NetClientState *nc)
177 VirtIONet *n = qemu_get_nic_opaque(nc);
178 VirtIODevice *vdev = VIRTIO_DEVICE(n);
181 * Make sure the virtio migration announcement timer isn't running
182 * If it is, let it trigger announcement so that we do not cause
183 * confusion.
185 if (n->announce_timer.round) {
186 return;
189 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
190 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
191 virtio_net_announce_notify(n);
195 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
197 VirtIODevice *vdev = VIRTIO_DEVICE(n);
198 NetClientState *nc = qemu_get_queue(n->nic);
199 int queues = n->multiqueue ? n->max_queues : 1;
201 if (!get_vhost_net(nc->peer)) {
202 return;
205 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
206 !!n->vhost_started) {
207 return;
209 if (!n->vhost_started) {
210 int r, i;
212 if (n->needs_vnet_hdr_swap) {
213 error_report("backend does not support %s vnet headers; "
214 "falling back on userspace virtio",
215 virtio_is_big_endian(vdev) ? "BE" : "LE");
216 return;
219 /* Any packets outstanding? Purge them to avoid touching rings
220 * when vhost is running.
222 for (i = 0; i < queues; i++) {
223 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
225 /* Purge both directions: TX and RX. */
226 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
227 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
230 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
231 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
232 if (r < 0) {
233 error_report("%uBytes MTU not supported by the backend",
234 n->net_conf.mtu);
236 return;
240 n->vhost_started = 1;
241 r = vhost_net_start(vdev, n->nic->ncs, queues);
242 if (r < 0) {
243 error_report("unable to start vhost net: %d: "
244 "falling back on userspace virtio", -r);
245 n->vhost_started = 0;
247 } else {
248 vhost_net_stop(vdev, n->nic->ncs, queues);
249 n->vhost_started = 0;
253 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
254 NetClientState *peer,
255 bool enable)
257 if (virtio_is_big_endian(vdev)) {
258 return qemu_set_vnet_be(peer, enable);
259 } else {
260 return qemu_set_vnet_le(peer, enable);
264 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
265 int queues, bool enable)
267 int i;
269 for (i = 0; i < queues; i++) {
270 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
271 enable) {
272 while (--i >= 0) {
273 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
276 return true;
280 return false;
283 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
285 VirtIODevice *vdev = VIRTIO_DEVICE(n);
286 int queues = n->multiqueue ? n->max_queues : 1;
288 if (virtio_net_started(n, status)) {
289 /* Before using the device, we tell the network backend about the
290 * endianness to use when parsing vnet headers. If the backend
291 * can't do it, we fallback onto fixing the headers in the core
292 * virtio-net code.
294 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
295 queues, true);
296 } else if (virtio_net_started(n, vdev->status)) {
297 /* After using the device, we need to reset the network backend to
298 * the default (guest native endianness), otherwise the guest may
299 * lose network connectivity if it is rebooted into a different
300 * endianness.
302 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
306 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
308 unsigned int dropped = virtqueue_drop_all(vq);
309 if (dropped) {
310 virtio_notify(vdev, vq);
314 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
316 VirtIONet *n = VIRTIO_NET(vdev);
317 VirtIONetQueue *q;
318 int i;
319 uint8_t queue_status;
321 virtio_net_vnet_endian_status(n, status);
322 virtio_net_vhost_status(n, status);
324 for (i = 0; i < n->max_queues; i++) {
325 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
326 bool queue_started;
327 q = &n->vqs[i];
329 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
330 queue_status = 0;
331 } else {
332 queue_status = status;
334 queue_started =
335 virtio_net_started(n, queue_status) && !n->vhost_started;
337 if (queue_started) {
338 qemu_flush_queued_packets(ncs);
341 if (!q->tx_waiting) {
342 continue;
345 if (queue_started) {
346 if (q->tx_timer) {
347 timer_mod(q->tx_timer,
348 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
349 } else {
350 qemu_bh_schedule(q->tx_bh);
352 } else {
353 if (q->tx_timer) {
354 timer_del(q->tx_timer);
355 } else {
356 qemu_bh_cancel(q->tx_bh);
358 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
359 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
360 vdev->vm_running) {
361 /* if tx is waiting we are likely have some packets in tx queue
362 * and disabled notification */
363 q->tx_waiting = 0;
364 virtio_queue_set_notification(q->tx_vq, 1);
365 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
371 static void virtio_net_set_link_status(NetClientState *nc)
373 VirtIONet *n = qemu_get_nic_opaque(nc);
374 VirtIODevice *vdev = VIRTIO_DEVICE(n);
375 uint16_t old_status = n->status;
377 if (nc->link_down)
378 n->status &= ~VIRTIO_NET_S_LINK_UP;
379 else
380 n->status |= VIRTIO_NET_S_LINK_UP;
382 if (n->status != old_status)
383 virtio_notify_config(vdev);
385 virtio_net_set_status(vdev, vdev->status);
388 static void rxfilter_notify(NetClientState *nc)
390 VirtIONet *n = qemu_get_nic_opaque(nc);
392 if (nc->rxfilter_notify_enabled) {
393 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
394 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
395 n->netclient_name, path);
396 g_free(path);
398 /* disable event notification to avoid events flooding */
399 nc->rxfilter_notify_enabled = 0;
403 static intList *get_vlan_table(VirtIONet *n)
405 intList *list, *entry;
406 int i, j;
408 list = NULL;
409 for (i = 0; i < MAX_VLAN >> 5; i++) {
410 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
411 if (n->vlans[i] & (1U << j)) {
412 entry = g_malloc0(sizeof(*entry));
413 entry->value = (i << 5) + j;
414 entry->next = list;
415 list = entry;
420 return list;
423 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
425 VirtIONet *n = qemu_get_nic_opaque(nc);
426 VirtIODevice *vdev = VIRTIO_DEVICE(n);
427 RxFilterInfo *info;
428 strList *str_list, *entry;
429 int i;
431 info = g_malloc0(sizeof(*info));
432 info->name = g_strdup(nc->name);
433 info->promiscuous = n->promisc;
435 if (n->nouni) {
436 info->unicast = RX_STATE_NONE;
437 } else if (n->alluni) {
438 info->unicast = RX_STATE_ALL;
439 } else {
440 info->unicast = RX_STATE_NORMAL;
443 if (n->nomulti) {
444 info->multicast = RX_STATE_NONE;
445 } else if (n->allmulti) {
446 info->multicast = RX_STATE_ALL;
447 } else {
448 info->multicast = RX_STATE_NORMAL;
451 info->broadcast_allowed = n->nobcast;
452 info->multicast_overflow = n->mac_table.multi_overflow;
453 info->unicast_overflow = n->mac_table.uni_overflow;
455 info->main_mac = qemu_mac_strdup_printf(n->mac);
457 str_list = NULL;
458 for (i = 0; i < n->mac_table.first_multi; i++) {
459 entry = g_malloc0(sizeof(*entry));
460 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
461 entry->next = str_list;
462 str_list = entry;
464 info->unicast_table = str_list;
466 str_list = NULL;
467 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
468 entry = g_malloc0(sizeof(*entry));
469 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
470 entry->next = str_list;
471 str_list = entry;
473 info->multicast_table = str_list;
474 info->vlan_table = get_vlan_table(n);
476 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
477 info->vlan = RX_STATE_ALL;
478 } else if (!info->vlan_table) {
479 info->vlan = RX_STATE_NONE;
480 } else {
481 info->vlan = RX_STATE_NORMAL;
484 /* enable event notification after query */
485 nc->rxfilter_notify_enabled = 1;
487 return info;
490 static void virtio_net_reset(VirtIODevice *vdev)
492 VirtIONet *n = VIRTIO_NET(vdev);
493 int i;
495 /* Reset back to compatibility mode */
496 n->promisc = 1;
497 n->allmulti = 0;
498 n->alluni = 0;
499 n->nomulti = 0;
500 n->nouni = 0;
501 n->nobcast = 0;
502 /* multiqueue is disabled by default */
503 n->curr_queues = 1;
504 timer_del(n->announce_timer.tm);
505 n->announce_timer.round = 0;
506 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
508 /* Flush any MAC and VLAN filter table state */
509 n->mac_table.in_use = 0;
510 n->mac_table.first_multi = 0;
511 n->mac_table.multi_overflow = 0;
512 n->mac_table.uni_overflow = 0;
513 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
514 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
515 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
516 memset(n->vlans, 0, MAX_VLAN >> 3);
518 /* Flush any async TX */
519 for (i = 0; i < n->max_queues; i++) {
520 NetClientState *nc = qemu_get_subqueue(n->nic, i);
522 if (nc->peer) {
523 qemu_flush_or_purge_queued_packets(nc->peer, true);
524 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
529 static void peer_test_vnet_hdr(VirtIONet *n)
531 NetClientState *nc = qemu_get_queue(n->nic);
532 if (!nc->peer) {
533 return;
536 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
539 static int peer_has_vnet_hdr(VirtIONet *n)
541 return n->has_vnet_hdr;
544 static int peer_has_ufo(VirtIONet *n)
546 if (!peer_has_vnet_hdr(n))
547 return 0;
549 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
551 return n->has_ufo;
554 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
555 int version_1)
557 int i;
558 NetClientState *nc;
560 n->mergeable_rx_bufs = mergeable_rx_bufs;
562 if (version_1) {
563 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
564 } else {
565 n->guest_hdr_len = n->mergeable_rx_bufs ?
566 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
567 sizeof(struct virtio_net_hdr);
570 for (i = 0; i < n->max_queues; i++) {
571 nc = qemu_get_subqueue(n->nic, i);
573 if (peer_has_vnet_hdr(n) &&
574 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
575 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
576 n->host_hdr_len = n->guest_hdr_len;
581 static int virtio_net_max_tx_queue_size(VirtIONet *n)
583 NetClientState *peer = n->nic_conf.peers.ncs[0];
586 * Backends other than vhost-user don't support max queue size.
588 if (!peer) {
589 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
592 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
593 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
596 return VIRTQUEUE_MAX_SIZE;
599 static int peer_attach(VirtIONet *n, int index)
601 NetClientState *nc = qemu_get_subqueue(n->nic, index);
603 if (!nc->peer) {
604 return 0;
607 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
608 vhost_set_vring_enable(nc->peer, 1);
611 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
612 return 0;
615 if (n->max_queues == 1) {
616 return 0;
619 return tap_enable(nc->peer);
622 static int peer_detach(VirtIONet *n, int index)
624 NetClientState *nc = qemu_get_subqueue(n->nic, index);
626 if (!nc->peer) {
627 return 0;
630 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
631 vhost_set_vring_enable(nc->peer, 0);
634 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
635 return 0;
638 return tap_disable(nc->peer);
641 static void virtio_net_set_queues(VirtIONet *n)
643 int i;
644 int r;
646 if (n->nic->peer_deleted) {
647 return;
650 for (i = 0; i < n->max_queues; i++) {
651 if (i < n->curr_queues) {
652 r = peer_attach(n, i);
653 assert(!r);
654 } else {
655 r = peer_detach(n, i);
656 assert(!r);
661 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
663 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
664 Error **errp)
666 VirtIONet *n = VIRTIO_NET(vdev);
667 NetClientState *nc = qemu_get_queue(n->nic);
669 /* Firstly sync all virtio-net possible supported features */
670 features |= n->host_features;
672 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
674 if (!peer_has_vnet_hdr(n)) {
675 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
676 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
677 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
678 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
680 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
681 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
682 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
683 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
686 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
687 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
688 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
691 if (!get_vhost_net(nc->peer)) {
692 return features;
695 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
696 vdev->backend_features = features;
698 if (n->mtu_bypass_backend &&
699 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
700 features |= (1ULL << VIRTIO_NET_F_MTU);
703 return features;
706 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
708 uint64_t features = 0;
710 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
711 * but also these: */
712 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
713 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
714 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
715 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
716 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
718 return features;
721 static void virtio_net_apply_guest_offloads(VirtIONet *n)
723 qemu_set_offload(qemu_get_queue(n->nic)->peer,
724 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
725 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
726 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
727 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
728 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
731 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
733 static const uint64_t guest_offloads_mask =
734 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
735 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
736 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
737 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
738 (1ULL << VIRTIO_NET_F_GUEST_UFO);
740 return guest_offloads_mask & features;
743 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
745 VirtIODevice *vdev = VIRTIO_DEVICE(n);
746 return virtio_net_guest_offloads_by_features(vdev->guest_features);
749 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
751 VirtIONet *n = VIRTIO_NET(vdev);
752 int i;
754 if (n->mtu_bypass_backend &&
755 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
756 features &= ~(1ULL << VIRTIO_NET_F_MTU);
759 virtio_net_set_multiqueue(n,
760 virtio_has_feature(features, VIRTIO_NET_F_MQ));
762 virtio_net_set_mrg_rx_bufs(n,
763 virtio_has_feature(features,
764 VIRTIO_NET_F_MRG_RXBUF),
765 virtio_has_feature(features,
766 VIRTIO_F_VERSION_1));
768 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
769 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
770 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
771 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
773 if (n->has_vnet_hdr) {
774 n->curr_guest_offloads =
775 virtio_net_guest_offloads_by_features(features);
776 virtio_net_apply_guest_offloads(n);
779 for (i = 0; i < n->max_queues; i++) {
780 NetClientState *nc = qemu_get_subqueue(n->nic, i);
782 if (!get_vhost_net(nc->peer)) {
783 continue;
785 vhost_net_ack_features(get_vhost_net(nc->peer), features);
788 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
789 memset(n->vlans, 0, MAX_VLAN >> 3);
790 } else {
791 memset(n->vlans, 0xff, MAX_VLAN >> 3);
795 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
796 struct iovec *iov, unsigned int iov_cnt)
798 uint8_t on;
799 size_t s;
800 NetClientState *nc = qemu_get_queue(n->nic);
802 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
803 if (s != sizeof(on)) {
804 return VIRTIO_NET_ERR;
807 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
808 n->promisc = on;
809 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
810 n->allmulti = on;
811 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
812 n->alluni = on;
813 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
814 n->nomulti = on;
815 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
816 n->nouni = on;
817 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
818 n->nobcast = on;
819 } else {
820 return VIRTIO_NET_ERR;
823 rxfilter_notify(nc);
825 return VIRTIO_NET_OK;
828 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
829 struct iovec *iov, unsigned int iov_cnt)
831 VirtIODevice *vdev = VIRTIO_DEVICE(n);
832 uint64_t offloads;
833 size_t s;
835 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
836 return VIRTIO_NET_ERR;
839 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
840 if (s != sizeof(offloads)) {
841 return VIRTIO_NET_ERR;
844 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
845 uint64_t supported_offloads;
847 offloads = virtio_ldq_p(vdev, &offloads);
849 if (!n->has_vnet_hdr) {
850 return VIRTIO_NET_ERR;
853 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
854 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
855 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
856 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
857 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
859 supported_offloads = virtio_net_supported_guest_offloads(n);
860 if (offloads & ~supported_offloads) {
861 return VIRTIO_NET_ERR;
864 n->curr_guest_offloads = offloads;
865 virtio_net_apply_guest_offloads(n);
867 return VIRTIO_NET_OK;
868 } else {
869 return VIRTIO_NET_ERR;
873 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
874 struct iovec *iov, unsigned int iov_cnt)
876 VirtIODevice *vdev = VIRTIO_DEVICE(n);
877 struct virtio_net_ctrl_mac mac_data;
878 size_t s;
879 NetClientState *nc = qemu_get_queue(n->nic);
881 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
882 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
883 return VIRTIO_NET_ERR;
885 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
886 assert(s == sizeof(n->mac));
887 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
888 rxfilter_notify(nc);
890 return VIRTIO_NET_OK;
893 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
894 return VIRTIO_NET_ERR;
897 int in_use = 0;
898 int first_multi = 0;
899 uint8_t uni_overflow = 0;
900 uint8_t multi_overflow = 0;
901 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
903 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
904 sizeof(mac_data.entries));
905 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
906 if (s != sizeof(mac_data.entries)) {
907 goto error;
909 iov_discard_front(&iov, &iov_cnt, s);
911 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
912 goto error;
915 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
916 s = iov_to_buf(iov, iov_cnt, 0, macs,
917 mac_data.entries * ETH_ALEN);
918 if (s != mac_data.entries * ETH_ALEN) {
919 goto error;
921 in_use += mac_data.entries;
922 } else {
923 uni_overflow = 1;
926 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
928 first_multi = in_use;
930 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
931 sizeof(mac_data.entries));
932 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
933 if (s != sizeof(mac_data.entries)) {
934 goto error;
937 iov_discard_front(&iov, &iov_cnt, s);
939 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
940 goto error;
943 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
944 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
945 mac_data.entries * ETH_ALEN);
946 if (s != mac_data.entries * ETH_ALEN) {
947 goto error;
949 in_use += mac_data.entries;
950 } else {
951 multi_overflow = 1;
954 n->mac_table.in_use = in_use;
955 n->mac_table.first_multi = first_multi;
956 n->mac_table.uni_overflow = uni_overflow;
957 n->mac_table.multi_overflow = multi_overflow;
958 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
959 g_free(macs);
960 rxfilter_notify(nc);
962 return VIRTIO_NET_OK;
964 error:
965 g_free(macs);
966 return VIRTIO_NET_ERR;
969 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
970 struct iovec *iov, unsigned int iov_cnt)
972 VirtIODevice *vdev = VIRTIO_DEVICE(n);
973 uint16_t vid;
974 size_t s;
975 NetClientState *nc = qemu_get_queue(n->nic);
977 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
978 vid = virtio_lduw_p(vdev, &vid);
979 if (s != sizeof(vid)) {
980 return VIRTIO_NET_ERR;
983 if (vid >= MAX_VLAN)
984 return VIRTIO_NET_ERR;
986 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
987 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
988 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
989 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
990 else
991 return VIRTIO_NET_ERR;
993 rxfilter_notify(nc);
995 return VIRTIO_NET_OK;
998 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
999 struct iovec *iov, unsigned int iov_cnt)
1001 trace_virtio_net_handle_announce(n->announce_timer.round);
1002 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
1003 n->status & VIRTIO_NET_S_ANNOUNCE) {
1004 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
1005 if (n->announce_timer.round) {
1006 qemu_announce_timer_step(&n->announce_timer);
1008 return VIRTIO_NET_OK;
1009 } else {
1010 return VIRTIO_NET_ERR;
1014 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
1015 struct iovec *iov, unsigned int iov_cnt)
1017 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1018 struct virtio_net_ctrl_mq mq;
1019 size_t s;
1020 uint16_t queues;
1022 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
1023 if (s != sizeof(mq)) {
1024 return VIRTIO_NET_ERR;
1027 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
1028 return VIRTIO_NET_ERR;
1031 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1033 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1034 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1035 queues > n->max_queues ||
1036 !n->multiqueue) {
1037 return VIRTIO_NET_ERR;
1040 n->curr_queues = queues;
1041 /* stop the backend before changing the number of queues to avoid handling a
1042 * disabled queue */
1043 virtio_net_set_status(vdev, vdev->status);
1044 virtio_net_set_queues(n);
1046 return VIRTIO_NET_OK;
1049 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1051 VirtIONet *n = VIRTIO_NET(vdev);
1052 struct virtio_net_ctrl_hdr ctrl;
1053 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1054 VirtQueueElement *elem;
1055 size_t s;
1056 struct iovec *iov, *iov2;
1057 unsigned int iov_cnt;
1059 for (;;) {
1060 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1061 if (!elem) {
1062 break;
1064 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1065 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1066 virtio_error(vdev, "virtio-net ctrl missing headers");
1067 virtqueue_detach_element(vq, elem, 0);
1068 g_free(elem);
1069 break;
1072 iov_cnt = elem->out_num;
1073 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1074 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1075 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1076 if (s != sizeof(ctrl)) {
1077 status = VIRTIO_NET_ERR;
1078 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1079 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1080 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1081 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1082 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1083 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1084 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1085 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1086 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1087 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1088 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1089 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1092 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1093 assert(s == sizeof(status));
1095 virtqueue_push(vq, elem, sizeof(status));
1096 virtio_notify(vdev, vq);
1097 g_free(iov2);
1098 g_free(elem);
1102 /* RX */
1104 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1106 VirtIONet *n = VIRTIO_NET(vdev);
1107 int queue_index = vq2q(virtio_get_queue_index(vq));
1109 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1112 static int virtio_net_can_receive(NetClientState *nc)
1114 VirtIONet *n = qemu_get_nic_opaque(nc);
1115 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1116 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1118 if (!vdev->vm_running) {
1119 return 0;
1122 if (nc->queue_index >= n->curr_queues) {
1123 return 0;
1126 if (!virtio_queue_ready(q->rx_vq) ||
1127 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1128 return 0;
1131 return 1;
1134 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1136 VirtIONet *n = q->n;
1137 if (virtio_queue_empty(q->rx_vq) ||
1138 (n->mergeable_rx_bufs &&
1139 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1140 virtio_queue_set_notification(q->rx_vq, 1);
1142 /* To avoid a race condition where the guest has made some buffers
1143 * available after the above check but before notification was
1144 * enabled, check for available buffers again.
1146 if (virtio_queue_empty(q->rx_vq) ||
1147 (n->mergeable_rx_bufs &&
1148 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1149 return 0;
1153 virtio_queue_set_notification(q->rx_vq, 0);
1154 return 1;
1157 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1159 virtio_tswap16s(vdev, &hdr->hdr_len);
1160 virtio_tswap16s(vdev, &hdr->gso_size);
1161 virtio_tswap16s(vdev, &hdr->csum_start);
1162 virtio_tswap16s(vdev, &hdr->csum_offset);
1165 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1166 * it never finds out that the packets don't have valid checksums. This
1167 * causes dhclient to get upset. Fedora's carried a patch for ages to
1168 * fix this with Xen but it hasn't appeared in an upstream release of
1169 * dhclient yet.
1171 * To avoid breaking existing guests, we catch udp packets and add
1172 * checksums. This is terrible but it's better than hacking the guest
1173 * kernels.
1175 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1176 * we should provide a mechanism to disable it to avoid polluting the host
1177 * cache.
1179 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1180 uint8_t *buf, size_t size)
1182 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1183 (size > 27 && size < 1500) && /* normal sized MTU */
1184 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1185 (buf[23] == 17) && /* ip.protocol == UDP */
1186 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1187 net_checksum_calculate(buf, size);
1188 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1192 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1193 const void *buf, size_t size)
1195 if (n->has_vnet_hdr) {
1196 /* FIXME this cast is evil */
1197 void *wbuf = (void *)buf;
1198 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1199 size - n->host_hdr_len);
1201 if (n->needs_vnet_hdr_swap) {
1202 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1204 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1205 } else {
1206 struct virtio_net_hdr hdr = {
1207 .flags = 0,
1208 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1210 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1214 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1216 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1217 static const uint8_t vlan[] = {0x81, 0x00};
1218 uint8_t *ptr = (uint8_t *)buf;
1219 int i;
1221 if (n->promisc)
1222 return 1;
1224 ptr += n->host_hdr_len;
1226 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1227 int vid = lduw_be_p(ptr + 14) & 0xfff;
1228 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1229 return 0;
1232 if (ptr[0] & 1) { // multicast
1233 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1234 return !n->nobcast;
1235 } else if (n->nomulti) {
1236 return 0;
1237 } else if (n->allmulti || n->mac_table.multi_overflow) {
1238 return 1;
1241 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1242 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1243 return 1;
1246 } else { // unicast
1247 if (n->nouni) {
1248 return 0;
1249 } else if (n->alluni || n->mac_table.uni_overflow) {
1250 return 1;
1251 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1252 return 1;
1255 for (i = 0; i < n->mac_table.first_multi; i++) {
1256 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1257 return 1;
1262 return 0;
1265 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1266 size_t size)
1268 VirtIONet *n = qemu_get_nic_opaque(nc);
1269 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1270 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1271 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1272 struct virtio_net_hdr_mrg_rxbuf mhdr;
1273 unsigned mhdr_cnt = 0;
1274 size_t offset, i, guest_offset;
1276 if (!virtio_net_can_receive(nc)) {
1277 return -1;
1280 /* hdr_len refers to the header we supply to the guest */
1281 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1282 return 0;
1285 if (!receive_filter(n, buf, size))
1286 return size;
1288 offset = i = 0;
1290 while (offset < size) {
1291 VirtQueueElement *elem;
1292 int len, total;
1293 const struct iovec *sg;
1295 total = 0;
1297 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1298 if (!elem) {
1299 if (i) {
1300 virtio_error(vdev, "virtio-net unexpected empty queue: "
1301 "i %zd mergeable %d offset %zd, size %zd, "
1302 "guest hdr len %zd, host hdr len %zd "
1303 "guest features 0x%" PRIx64,
1304 i, n->mergeable_rx_bufs, offset, size,
1305 n->guest_hdr_len, n->host_hdr_len,
1306 vdev->guest_features);
1308 return -1;
1311 if (elem->in_num < 1) {
1312 virtio_error(vdev,
1313 "virtio-net receive queue contains no in buffers");
1314 virtqueue_detach_element(q->rx_vq, elem, 0);
1315 g_free(elem);
1316 return -1;
1319 sg = elem->in_sg;
1320 if (i == 0) {
1321 assert(offset == 0);
1322 if (n->mergeable_rx_bufs) {
1323 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1324 sg, elem->in_num,
1325 offsetof(typeof(mhdr), num_buffers),
1326 sizeof(mhdr.num_buffers));
1329 receive_header(n, sg, elem->in_num, buf, size);
1330 offset = n->host_hdr_len;
1331 total += n->guest_hdr_len;
1332 guest_offset = n->guest_hdr_len;
1333 } else {
1334 guest_offset = 0;
1337 /* copy in packet. ugh */
1338 len = iov_from_buf(sg, elem->in_num, guest_offset,
1339 buf + offset, size - offset);
1340 total += len;
1341 offset += len;
1342 /* If buffers can't be merged, at this point we
1343 * must have consumed the complete packet.
1344 * Otherwise, drop it. */
1345 if (!n->mergeable_rx_bufs && offset < size) {
1346 virtqueue_unpop(q->rx_vq, elem, total);
1347 g_free(elem);
1348 return size;
1351 /* signal other side */
1352 virtqueue_fill(q->rx_vq, elem, total, i++);
1353 g_free(elem);
1356 if (mhdr_cnt) {
1357 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1358 iov_from_buf(mhdr_sg, mhdr_cnt,
1360 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1363 virtqueue_flush(q->rx_vq, i);
1364 virtio_notify(vdev, q->rx_vq);
1366 return size;
1369 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1370 size_t size)
1372 ssize_t r;
1374 rcu_read_lock();
1375 r = virtio_net_receive_rcu(nc, buf, size);
1376 rcu_read_unlock();
1377 return r;
1380 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1381 const uint8_t *buf,
1382 VirtioNetRscUnit *unit)
1384 uint16_t ip_hdrlen;
1385 struct ip_header *ip;
1387 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1388 + sizeof(struct eth_header));
1389 unit->ip = (void *)ip;
1390 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1391 unit->ip_plen = &ip->ip_len;
1392 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1393 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1394 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1397 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1398 const uint8_t *buf,
1399 VirtioNetRscUnit *unit)
1401 struct ip6_header *ip6;
1403 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1404 + sizeof(struct eth_header));
1405 unit->ip = ip6;
1406 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1407 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1408 + sizeof(struct ip6_header));
1409 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1411 /* There is a difference between payload lenght in ipv4 and v6,
1412 ip header is excluded in ipv6 */
1413 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1416 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1417 VirtioNetRscSeg *seg)
1419 int ret;
1420 struct virtio_net_hdr *h;
1422 h = (struct virtio_net_hdr *)seg->buf;
1423 h->flags = 0;
1424 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1426 if (seg->is_coalesced) {
1427 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1428 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1429 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1430 if (chain->proto == ETH_P_IP) {
1431 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1432 } else {
1433 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1437 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1438 QTAILQ_REMOVE(&chain->buffers, seg, next);
1439 g_free(seg->buf);
1440 g_free(seg);
1442 return ret;
1445 static void virtio_net_rsc_purge(void *opq)
1447 VirtioNetRscSeg *seg, *rn;
1448 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1450 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1451 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1452 chain->stat.purge_failed++;
1453 continue;
1457 chain->stat.timer++;
1458 if (!QTAILQ_EMPTY(&chain->buffers)) {
1459 timer_mod(chain->drain_timer,
1460 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1464 static void virtio_net_rsc_cleanup(VirtIONet *n)
1466 VirtioNetRscChain *chain, *rn_chain;
1467 VirtioNetRscSeg *seg, *rn_seg;
1469 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1470 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1471 QTAILQ_REMOVE(&chain->buffers, seg, next);
1472 g_free(seg->buf);
1473 g_free(seg);
1476 timer_del(chain->drain_timer);
1477 timer_free(chain->drain_timer);
1478 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1479 g_free(chain);
1483 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1484 NetClientState *nc,
1485 const uint8_t *buf, size_t size)
1487 uint16_t hdr_len;
1488 VirtioNetRscSeg *seg;
1490 hdr_len = chain->n->guest_hdr_len;
1491 seg = g_malloc(sizeof(VirtioNetRscSeg));
1492 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1493 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1494 memcpy(seg->buf, buf, size);
1495 seg->size = size;
1496 seg->packets = 1;
1497 seg->dup_ack = 0;
1498 seg->is_coalesced = 0;
1499 seg->nc = nc;
1501 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1502 chain->stat.cache++;
1504 switch (chain->proto) {
1505 case ETH_P_IP:
1506 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1507 break;
1508 case ETH_P_IPV6:
1509 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1510 break;
1511 default:
1512 g_assert_not_reached();
1516 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1517 VirtioNetRscSeg *seg,
1518 const uint8_t *buf,
1519 struct tcp_header *n_tcp,
1520 struct tcp_header *o_tcp)
1522 uint32_t nack, oack;
1523 uint16_t nwin, owin;
1525 nack = htonl(n_tcp->th_ack);
1526 nwin = htons(n_tcp->th_win);
1527 oack = htonl(o_tcp->th_ack);
1528 owin = htons(o_tcp->th_win);
1530 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1531 chain->stat.ack_out_of_win++;
1532 return RSC_FINAL;
1533 } else if (nack == oack) {
1534 /* duplicated ack or window probe */
1535 if (nwin == owin) {
1536 /* duplicated ack, add dup ack count due to whql test up to 1 */
1537 chain->stat.dup_ack++;
1538 return RSC_FINAL;
1539 } else {
1540 /* Coalesce window update */
1541 o_tcp->th_win = n_tcp->th_win;
1542 chain->stat.win_update++;
1543 return RSC_COALESCE;
1545 } else {
1546 /* pure ack, go to 'C', finalize*/
1547 chain->stat.pure_ack++;
1548 return RSC_FINAL;
1552 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1553 VirtioNetRscSeg *seg,
1554 const uint8_t *buf,
1555 VirtioNetRscUnit *n_unit)
1557 void *data;
1558 uint16_t o_ip_len;
1559 uint32_t nseq, oseq;
1560 VirtioNetRscUnit *o_unit;
1562 o_unit = &seg->unit;
1563 o_ip_len = htons(*o_unit->ip_plen);
1564 nseq = htonl(n_unit->tcp->th_seq);
1565 oseq = htonl(o_unit->tcp->th_seq);
1567 /* out of order or retransmitted. */
1568 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1569 chain->stat.data_out_of_win++;
1570 return RSC_FINAL;
1573 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1574 if (nseq == oseq) {
1575 if ((o_unit->payload == 0) && n_unit->payload) {
1576 /* From no payload to payload, normal case, not a dup ack or etc */
1577 chain->stat.data_after_pure_ack++;
1578 goto coalesce;
1579 } else {
1580 return virtio_net_rsc_handle_ack(chain, seg, buf,
1581 n_unit->tcp, o_unit->tcp);
1583 } else if ((nseq - oseq) != o_unit->payload) {
1584 /* Not a consistent packet, out of order */
1585 chain->stat.data_out_of_order++;
1586 return RSC_FINAL;
1587 } else {
1588 coalesce:
1589 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1590 chain->stat.over_size++;
1591 return RSC_FINAL;
1594 /* Here comes the right data, the payload length in v4/v6 is different,
1595 so use the field value to update and record the new data len */
1596 o_unit->payload += n_unit->payload; /* update new data len */
1598 /* update field in ip header */
1599 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1601 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1602 for windows guest, while this may change the behavior for linux
1603 guest (only if it uses RSC feature). */
1604 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1606 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1607 o_unit->tcp->th_win = n_unit->tcp->th_win;
1609 memmove(seg->buf + seg->size, data, n_unit->payload);
1610 seg->size += n_unit->payload;
1611 seg->packets++;
1612 chain->stat.coalesced++;
1613 return RSC_COALESCE;
1617 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1618 VirtioNetRscSeg *seg,
1619 const uint8_t *buf, size_t size,
1620 VirtioNetRscUnit *unit)
1622 struct ip_header *ip1, *ip2;
1624 ip1 = (struct ip_header *)(unit->ip);
1625 ip2 = (struct ip_header *)(seg->unit.ip);
1626 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1627 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1628 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1629 chain->stat.no_match++;
1630 return RSC_NO_MATCH;
1633 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1636 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1637 VirtioNetRscSeg *seg,
1638 const uint8_t *buf, size_t size,
1639 VirtioNetRscUnit *unit)
1641 struct ip6_header *ip1, *ip2;
1643 ip1 = (struct ip6_header *)(unit->ip);
1644 ip2 = (struct ip6_header *)(seg->unit.ip);
1645 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1646 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1647 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1648 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1649 chain->stat.no_match++;
1650 return RSC_NO_MATCH;
1653 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1656 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1657 * to prevent out of order */
1658 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1659 struct tcp_header *tcp)
1661 uint16_t tcp_hdr;
1662 uint16_t tcp_flag;
1664 tcp_flag = htons(tcp->th_offset_flags);
1665 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1666 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1667 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1668 if (tcp_flag & TH_SYN) {
1669 chain->stat.tcp_syn++;
1670 return RSC_BYPASS;
1673 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1674 chain->stat.tcp_ctrl_drain++;
1675 return RSC_FINAL;
1678 if (tcp_hdr > sizeof(struct tcp_header)) {
1679 chain->stat.tcp_all_opt++;
1680 return RSC_FINAL;
1683 return RSC_CANDIDATE;
1686 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1687 NetClientState *nc,
1688 const uint8_t *buf, size_t size,
1689 VirtioNetRscUnit *unit)
1691 int ret;
1692 VirtioNetRscSeg *seg, *nseg;
1694 if (QTAILQ_EMPTY(&chain->buffers)) {
1695 chain->stat.empty_cache++;
1696 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1697 timer_mod(chain->drain_timer,
1698 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1699 return size;
1702 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1703 if (chain->proto == ETH_P_IP) {
1704 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1705 } else {
1706 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1709 if (ret == RSC_FINAL) {
1710 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1711 /* Send failed */
1712 chain->stat.final_failed++;
1713 return 0;
1716 /* Send current packet */
1717 return virtio_net_do_receive(nc, buf, size);
1718 } else if (ret == RSC_NO_MATCH) {
1719 continue;
1720 } else {
1721 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1722 seg->is_coalesced = 1;
1723 return size;
1727 chain->stat.no_match_cache++;
1728 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1729 return size;
1732 /* Drain a connection data, this is to avoid out of order segments */
1733 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1734 NetClientState *nc,
1735 const uint8_t *buf, size_t size,
1736 uint16_t ip_start, uint16_t ip_size,
1737 uint16_t tcp_port)
1739 VirtioNetRscSeg *seg, *nseg;
1740 uint32_t ppair1, ppair2;
1742 ppair1 = *(uint32_t *)(buf + tcp_port);
1743 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1744 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1745 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1746 || (ppair1 != ppair2)) {
1747 continue;
1749 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1750 chain->stat.drain_failed++;
1753 break;
1756 return virtio_net_do_receive(nc, buf, size);
1759 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1760 struct ip_header *ip,
1761 const uint8_t *buf, size_t size)
1763 uint16_t ip_len;
1765 /* Not an ipv4 packet */
1766 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1767 chain->stat.ip_option++;
1768 return RSC_BYPASS;
1771 /* Don't handle packets with ip option */
1772 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1773 chain->stat.ip_option++;
1774 return RSC_BYPASS;
1777 if (ip->ip_p != IPPROTO_TCP) {
1778 chain->stat.bypass_not_tcp++;
1779 return RSC_BYPASS;
1782 /* Don't handle packets with ip fragment */
1783 if (!(htons(ip->ip_off) & IP_DF)) {
1784 chain->stat.ip_frag++;
1785 return RSC_BYPASS;
1788 /* Don't handle packets with ecn flag */
1789 if (IPTOS_ECN(ip->ip_tos)) {
1790 chain->stat.ip_ecn++;
1791 return RSC_BYPASS;
1794 ip_len = htons(ip->ip_len);
1795 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1796 || ip_len > (size - chain->n->guest_hdr_len -
1797 sizeof(struct eth_header))) {
1798 chain->stat.ip_hacked++;
1799 return RSC_BYPASS;
1802 return RSC_CANDIDATE;
1805 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1806 NetClientState *nc,
1807 const uint8_t *buf, size_t size)
1809 int32_t ret;
1810 uint16_t hdr_len;
1811 VirtioNetRscUnit unit;
1813 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1815 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1816 + sizeof(struct tcp_header))) {
1817 chain->stat.bypass_not_tcp++;
1818 return virtio_net_do_receive(nc, buf, size);
1821 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1822 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1823 != RSC_CANDIDATE) {
1824 return virtio_net_do_receive(nc, buf, size);
1827 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1828 if (ret == RSC_BYPASS) {
1829 return virtio_net_do_receive(nc, buf, size);
1830 } else if (ret == RSC_FINAL) {
1831 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1832 ((hdr_len + sizeof(struct eth_header)) + 12),
1833 VIRTIO_NET_IP4_ADDR_SIZE,
1834 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1837 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1840 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1841 struct ip6_header *ip6,
1842 const uint8_t *buf, size_t size)
1844 uint16_t ip_len;
1846 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1847 != IP_HEADER_VERSION_6) {
1848 return RSC_BYPASS;
1851 /* Both option and protocol is checked in this */
1852 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1853 chain->stat.bypass_not_tcp++;
1854 return RSC_BYPASS;
1857 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1858 if (ip_len < sizeof(struct tcp_header) ||
1859 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1860 - sizeof(struct ip6_header))) {
1861 chain->stat.ip_hacked++;
1862 return RSC_BYPASS;
1865 /* Don't handle packets with ecn flag */
1866 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1867 chain->stat.ip_ecn++;
1868 return RSC_BYPASS;
1871 return RSC_CANDIDATE;
1874 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1875 const uint8_t *buf, size_t size)
1877 int32_t ret;
1878 uint16_t hdr_len;
1879 VirtioNetRscChain *chain;
1880 VirtioNetRscUnit unit;
1882 chain = (VirtioNetRscChain *)opq;
1883 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1885 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1886 + sizeof(tcp_header))) {
1887 return virtio_net_do_receive(nc, buf, size);
1890 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1891 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1892 unit.ip, buf, size)) {
1893 return virtio_net_do_receive(nc, buf, size);
1896 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1897 if (ret == RSC_BYPASS) {
1898 return virtio_net_do_receive(nc, buf, size);
1899 } else if (ret == RSC_FINAL) {
1900 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1901 ((hdr_len + sizeof(struct eth_header)) + 8),
1902 VIRTIO_NET_IP6_ADDR_SIZE,
1903 hdr_len + sizeof(struct eth_header)
1904 + sizeof(struct ip6_header));
1907 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1910 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1911 NetClientState *nc,
1912 uint16_t proto)
1914 VirtioNetRscChain *chain;
1916 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1917 return NULL;
1920 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1921 if (chain->proto == proto) {
1922 return chain;
1926 chain = g_malloc(sizeof(*chain));
1927 chain->n = n;
1928 chain->proto = proto;
1929 if (proto == (uint16_t)ETH_P_IP) {
1930 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1931 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1932 } else {
1933 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1934 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1936 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1937 virtio_net_rsc_purge, chain);
1938 memset(&chain->stat, 0, sizeof(chain->stat));
1940 QTAILQ_INIT(&chain->buffers);
1941 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1943 return chain;
1946 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1947 const uint8_t *buf,
1948 size_t size)
1950 uint16_t proto;
1951 VirtioNetRscChain *chain;
1952 struct eth_header *eth;
1953 VirtIONet *n;
1955 n = qemu_get_nic_opaque(nc);
1956 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1957 return virtio_net_do_receive(nc, buf, size);
1960 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1961 proto = htons(eth->h_proto);
1963 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1964 if (chain) {
1965 chain->stat.received++;
1966 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1967 return virtio_net_rsc_receive4(chain, nc, buf, size);
1968 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1969 return virtio_net_rsc_receive6(chain, nc, buf, size);
1972 return virtio_net_do_receive(nc, buf, size);
1975 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1976 size_t size)
1978 VirtIONet *n = qemu_get_nic_opaque(nc);
1979 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1980 return virtio_net_rsc_receive(nc, buf, size);
1981 } else {
1982 return virtio_net_do_receive(nc, buf, size);
1986 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1988 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1990 VirtIONet *n = qemu_get_nic_opaque(nc);
1991 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1992 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1994 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1995 virtio_notify(vdev, q->tx_vq);
1997 g_free(q->async_tx.elem);
1998 q->async_tx.elem = NULL;
2000 virtio_queue_set_notification(q->tx_vq, 1);
2001 virtio_net_flush_tx(q);
2004 /* TX */
2005 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
2007 VirtIONet *n = q->n;
2008 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2009 VirtQueueElement *elem;
2010 int32_t num_packets = 0;
2011 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
2012 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2013 return num_packets;
2016 if (q->async_tx.elem) {
2017 virtio_queue_set_notification(q->tx_vq, 0);
2018 return num_packets;
2021 for (;;) {
2022 ssize_t ret;
2023 unsigned int out_num;
2024 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
2025 struct virtio_net_hdr_mrg_rxbuf mhdr;
2027 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
2028 if (!elem) {
2029 break;
2032 out_num = elem->out_num;
2033 out_sg = elem->out_sg;
2034 if (out_num < 1) {
2035 virtio_error(vdev, "virtio-net header not in first element");
2036 virtqueue_detach_element(q->tx_vq, elem, 0);
2037 g_free(elem);
2038 return -EINVAL;
2041 if (n->has_vnet_hdr) {
2042 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2043 n->guest_hdr_len) {
2044 virtio_error(vdev, "virtio-net header incorrect");
2045 virtqueue_detach_element(q->tx_vq, elem, 0);
2046 g_free(elem);
2047 return -EINVAL;
2049 if (n->needs_vnet_hdr_swap) {
2050 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2051 sg2[0].iov_base = &mhdr;
2052 sg2[0].iov_len = n->guest_hdr_len;
2053 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2054 out_sg, out_num,
2055 n->guest_hdr_len, -1);
2056 if (out_num == VIRTQUEUE_MAX_SIZE) {
2057 goto drop;
2059 out_num += 1;
2060 out_sg = sg2;
2064 * If host wants to see the guest header as is, we can
2065 * pass it on unchanged. Otherwise, copy just the parts
2066 * that host is interested in.
2068 assert(n->host_hdr_len <= n->guest_hdr_len);
2069 if (n->host_hdr_len != n->guest_hdr_len) {
2070 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2071 out_sg, out_num,
2072 0, n->host_hdr_len);
2073 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2074 out_sg, out_num,
2075 n->guest_hdr_len, -1);
2076 out_num = sg_num;
2077 out_sg = sg;
2080 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2081 out_sg, out_num, virtio_net_tx_complete);
2082 if (ret == 0) {
2083 virtio_queue_set_notification(q->tx_vq, 0);
2084 q->async_tx.elem = elem;
2085 return -EBUSY;
2088 drop:
2089 virtqueue_push(q->tx_vq, elem, 0);
2090 virtio_notify(vdev, q->tx_vq);
2091 g_free(elem);
2093 if (++num_packets >= n->tx_burst) {
2094 break;
2097 return num_packets;
2100 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2102 VirtIONet *n = VIRTIO_NET(vdev);
2103 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2105 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2106 virtio_net_drop_tx_queue_data(vdev, vq);
2107 return;
2110 /* This happens when device was stopped but VCPU wasn't. */
2111 if (!vdev->vm_running) {
2112 q->tx_waiting = 1;
2113 return;
2116 if (q->tx_waiting) {
2117 virtio_queue_set_notification(vq, 1);
2118 timer_del(q->tx_timer);
2119 q->tx_waiting = 0;
2120 if (virtio_net_flush_tx(q) == -EINVAL) {
2121 return;
2123 } else {
2124 timer_mod(q->tx_timer,
2125 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2126 q->tx_waiting = 1;
2127 virtio_queue_set_notification(vq, 0);
2131 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2133 VirtIONet *n = VIRTIO_NET(vdev);
2134 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2136 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2137 virtio_net_drop_tx_queue_data(vdev, vq);
2138 return;
2141 if (unlikely(q->tx_waiting)) {
2142 return;
2144 q->tx_waiting = 1;
2145 /* This happens when device was stopped but VCPU wasn't. */
2146 if (!vdev->vm_running) {
2147 return;
2149 virtio_queue_set_notification(vq, 0);
2150 qemu_bh_schedule(q->tx_bh);
2153 static void virtio_net_tx_timer(void *opaque)
2155 VirtIONetQueue *q = opaque;
2156 VirtIONet *n = q->n;
2157 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2158 /* This happens when device was stopped but BH wasn't. */
2159 if (!vdev->vm_running) {
2160 /* Make sure tx waiting is set, so we'll run when restarted. */
2161 assert(q->tx_waiting);
2162 return;
2165 q->tx_waiting = 0;
2167 /* Just in case the driver is not ready on more */
2168 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2169 return;
2172 virtio_queue_set_notification(q->tx_vq, 1);
2173 virtio_net_flush_tx(q);
2176 static void virtio_net_tx_bh(void *opaque)
2178 VirtIONetQueue *q = opaque;
2179 VirtIONet *n = q->n;
2180 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2181 int32_t ret;
2183 /* This happens when device was stopped but BH wasn't. */
2184 if (!vdev->vm_running) {
2185 /* Make sure tx waiting is set, so we'll run when restarted. */
2186 assert(q->tx_waiting);
2187 return;
2190 q->tx_waiting = 0;
2192 /* Just in case the driver is not ready on more */
2193 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2194 return;
2197 ret = virtio_net_flush_tx(q);
2198 if (ret == -EBUSY || ret == -EINVAL) {
2199 return; /* Notification re-enable handled by tx_complete or device
2200 * broken */
2203 /* If we flush a full burst of packets, assume there are
2204 * more coming and immediately reschedule */
2205 if (ret >= n->tx_burst) {
2206 qemu_bh_schedule(q->tx_bh);
2207 q->tx_waiting = 1;
2208 return;
2211 /* If less than a full burst, re-enable notification and flush
2212 * anything that may have come in while we weren't looking. If
2213 * we find something, assume the guest is still active and reschedule */
2214 virtio_queue_set_notification(q->tx_vq, 1);
2215 ret = virtio_net_flush_tx(q);
2216 if (ret == -EINVAL) {
2217 return;
2218 } else if (ret > 0) {
2219 virtio_queue_set_notification(q->tx_vq, 0);
2220 qemu_bh_schedule(q->tx_bh);
2221 q->tx_waiting = 1;
2225 static void virtio_net_add_queue(VirtIONet *n, int index)
2227 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2229 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2230 virtio_net_handle_rx);
2232 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2233 n->vqs[index].tx_vq =
2234 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2235 virtio_net_handle_tx_timer);
2236 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2237 virtio_net_tx_timer,
2238 &n->vqs[index]);
2239 } else {
2240 n->vqs[index].tx_vq =
2241 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2242 virtio_net_handle_tx_bh);
2243 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2246 n->vqs[index].tx_waiting = 0;
2247 n->vqs[index].n = n;
2250 static void virtio_net_del_queue(VirtIONet *n, int index)
2252 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2253 VirtIONetQueue *q = &n->vqs[index];
2254 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2256 qemu_purge_queued_packets(nc);
2258 virtio_del_queue(vdev, index * 2);
2259 if (q->tx_timer) {
2260 timer_del(q->tx_timer);
2261 timer_free(q->tx_timer);
2262 q->tx_timer = NULL;
2263 } else {
2264 qemu_bh_delete(q->tx_bh);
2265 q->tx_bh = NULL;
2267 q->tx_waiting = 0;
2268 virtio_del_queue(vdev, index * 2 + 1);
2271 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2273 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2274 int old_num_queues = virtio_get_num_queues(vdev);
2275 int new_num_queues = new_max_queues * 2 + 1;
2276 int i;
2278 assert(old_num_queues >= 3);
2279 assert(old_num_queues % 2 == 1);
2281 if (old_num_queues == new_num_queues) {
2282 return;
2286 * We always need to remove and add ctrl vq if
2287 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2288 * and then we only enter one of the following two loops.
2290 virtio_del_queue(vdev, old_num_queues - 1);
2292 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2293 /* new_num_queues < old_num_queues */
2294 virtio_net_del_queue(n, i / 2);
2297 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2298 /* new_num_queues > old_num_queues */
2299 virtio_net_add_queue(n, i / 2);
2302 /* add ctrl_vq last */
2303 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2306 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2308 int max = multiqueue ? n->max_queues : 1;
2310 n->multiqueue = multiqueue;
2311 virtio_net_change_num_queues(n, max);
2313 virtio_net_set_queues(n);
2316 static int virtio_net_post_load_device(void *opaque, int version_id)
2318 VirtIONet *n = opaque;
2319 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2320 int i, link_down;
2322 trace_virtio_net_post_load_device();
2323 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2324 virtio_vdev_has_feature(vdev,
2325 VIRTIO_F_VERSION_1));
2327 /* MAC_TABLE_ENTRIES may be different from the saved image */
2328 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2329 n->mac_table.in_use = 0;
2332 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2333 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2337 * curr_guest_offloads will be later overwritten by the
2338 * virtio_set_features_nocheck call done from the virtio_load.
2339 * Here we make sure it is preserved and restored accordingly
2340 * in the virtio_net_post_load_virtio callback.
2342 n->saved_guest_offloads = n->curr_guest_offloads;
2344 virtio_net_set_queues(n);
2346 /* Find the first multicast entry in the saved MAC filter */
2347 for (i = 0; i < n->mac_table.in_use; i++) {
2348 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2349 break;
2352 n->mac_table.first_multi = i;
2354 /* nc.link_down can't be migrated, so infer link_down according
2355 * to link status bit in n->status */
2356 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2357 for (i = 0; i < n->max_queues; i++) {
2358 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2361 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2362 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2363 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2364 QEMU_CLOCK_VIRTUAL,
2365 virtio_net_announce_timer, n);
2366 if (n->announce_timer.round) {
2367 timer_mod(n->announce_timer.tm,
2368 qemu_clock_get_ms(n->announce_timer.type));
2369 } else {
2370 qemu_announce_timer_del(&n->announce_timer, false);
2374 return 0;
2377 static int virtio_net_post_load_virtio(VirtIODevice *vdev)
2379 VirtIONet *n = VIRTIO_NET(vdev);
2381 * The actual needed state is now in saved_guest_offloads,
2382 * see virtio_net_post_load_device for detail.
2383 * Restore it back and apply the desired offloads.
2385 n->curr_guest_offloads = n->saved_guest_offloads;
2386 if (peer_has_vnet_hdr(n)) {
2387 virtio_net_apply_guest_offloads(n);
2390 return 0;
2393 /* tx_waiting field of a VirtIONetQueue */
2394 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2395 .name = "virtio-net-queue-tx_waiting",
2396 .fields = (VMStateField[]) {
2397 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2398 VMSTATE_END_OF_LIST()
2402 static bool max_queues_gt_1(void *opaque, int version_id)
2404 return VIRTIO_NET(opaque)->max_queues > 1;
2407 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2409 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2410 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2413 static bool mac_table_fits(void *opaque, int version_id)
2415 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2418 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2420 return !mac_table_fits(opaque, version_id);
2423 /* This temporary type is shared by all the WITH_TMP methods
2424 * although only some fields are used by each.
2426 struct VirtIONetMigTmp {
2427 VirtIONet *parent;
2428 VirtIONetQueue *vqs_1;
2429 uint16_t curr_queues_1;
2430 uint8_t has_ufo;
2431 uint32_t has_vnet_hdr;
2434 /* The 2nd and subsequent tx_waiting flags are loaded later than
2435 * the 1st entry in the queues and only if there's more than one
2436 * entry. We use the tmp mechanism to calculate a temporary
2437 * pointer and count and also validate the count.
2440 static int virtio_net_tx_waiting_pre_save(void *opaque)
2442 struct VirtIONetMigTmp *tmp = opaque;
2444 tmp->vqs_1 = tmp->parent->vqs + 1;
2445 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2446 if (tmp->parent->curr_queues == 0) {
2447 tmp->curr_queues_1 = 0;
2450 return 0;
2453 static int virtio_net_tx_waiting_pre_load(void *opaque)
2455 struct VirtIONetMigTmp *tmp = opaque;
2457 /* Reuse the pointer setup from save */
2458 virtio_net_tx_waiting_pre_save(opaque);
2460 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2461 error_report("virtio-net: curr_queues %x > max_queues %x",
2462 tmp->parent->curr_queues, tmp->parent->max_queues);
2464 return -EINVAL;
2467 return 0; /* all good */
2470 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2471 .name = "virtio-net-tx_waiting",
2472 .pre_load = virtio_net_tx_waiting_pre_load,
2473 .pre_save = virtio_net_tx_waiting_pre_save,
2474 .fields = (VMStateField[]) {
2475 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2476 curr_queues_1,
2477 vmstate_virtio_net_queue_tx_waiting,
2478 struct VirtIONetQueue),
2479 VMSTATE_END_OF_LIST()
2483 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2484 * flag set we need to check that we have it
2486 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2488 struct VirtIONetMigTmp *tmp = opaque;
2490 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2491 error_report("virtio-net: saved image requires TUN_F_UFO support");
2492 return -EINVAL;
2495 return 0;
2498 static int virtio_net_ufo_pre_save(void *opaque)
2500 struct VirtIONetMigTmp *tmp = opaque;
2502 tmp->has_ufo = tmp->parent->has_ufo;
2504 return 0;
2507 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2508 .name = "virtio-net-ufo",
2509 .post_load = virtio_net_ufo_post_load,
2510 .pre_save = virtio_net_ufo_pre_save,
2511 .fields = (VMStateField[]) {
2512 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2513 VMSTATE_END_OF_LIST()
2517 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2518 * flag set we need to check that we have it
2520 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2522 struct VirtIONetMigTmp *tmp = opaque;
2524 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2525 error_report("virtio-net: saved image requires vnet_hdr=on");
2526 return -EINVAL;
2529 return 0;
2532 static int virtio_net_vnet_pre_save(void *opaque)
2534 struct VirtIONetMigTmp *tmp = opaque;
2536 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2538 return 0;
2541 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2542 .name = "virtio-net-vnet",
2543 .post_load = virtio_net_vnet_post_load,
2544 .pre_save = virtio_net_vnet_pre_save,
2545 .fields = (VMStateField[]) {
2546 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2547 VMSTATE_END_OF_LIST()
2551 static const VMStateDescription vmstate_virtio_net_device = {
2552 .name = "virtio-net-device",
2553 .version_id = VIRTIO_NET_VM_VERSION,
2554 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2555 .post_load = virtio_net_post_load_device,
2556 .fields = (VMStateField[]) {
2557 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2558 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2559 vmstate_virtio_net_queue_tx_waiting,
2560 VirtIONetQueue),
2561 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2562 VMSTATE_UINT16(status, VirtIONet),
2563 VMSTATE_UINT8(promisc, VirtIONet),
2564 VMSTATE_UINT8(allmulti, VirtIONet),
2565 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2567 /* Guarded pair: If it fits we load it, else we throw it away
2568 * - can happen if source has a larger MAC table.; post-load
2569 * sets flags in this case.
2571 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2572 0, mac_table_fits, mac_table.in_use,
2573 ETH_ALEN),
2574 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2575 mac_table.in_use, ETH_ALEN),
2577 /* Note: This is an array of uint32's that's always been saved as a
2578 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2579 * but based on the uint.
2581 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2582 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2583 vmstate_virtio_net_has_vnet),
2584 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2585 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2586 VMSTATE_UINT8(alluni, VirtIONet),
2587 VMSTATE_UINT8(nomulti, VirtIONet),
2588 VMSTATE_UINT8(nouni, VirtIONet),
2589 VMSTATE_UINT8(nobcast, VirtIONet),
2590 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2591 vmstate_virtio_net_has_ufo),
2592 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2593 vmstate_info_uint16_equal, uint16_t),
2594 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2595 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2596 vmstate_virtio_net_tx_waiting),
2597 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2598 has_ctrl_guest_offloads),
2599 VMSTATE_END_OF_LIST()
2603 static NetClientInfo net_virtio_info = {
2604 .type = NET_CLIENT_DRIVER_NIC,
2605 .size = sizeof(NICState),
2606 .can_receive = virtio_net_can_receive,
2607 .receive = virtio_net_receive,
2608 .link_status_changed = virtio_net_set_link_status,
2609 .query_rx_filter = virtio_net_query_rxfilter,
2610 .announce = virtio_net_announce,
2613 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2615 VirtIONet *n = VIRTIO_NET(vdev);
2616 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2617 assert(n->vhost_started);
2618 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2621 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2622 bool mask)
2624 VirtIONet *n = VIRTIO_NET(vdev);
2625 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2626 assert(n->vhost_started);
2627 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2628 vdev, idx, mask);
2631 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2633 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2635 n->config_size = virtio_feature_get_config_size(feature_sizes,
2636 host_features);
2639 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2640 const char *type)
2643 * The name can be NULL, the netclient name will be type.x.
2645 assert(type != NULL);
2647 g_free(n->netclient_name);
2648 g_free(n->netclient_type);
2649 n->netclient_name = g_strdup(name);
2650 n->netclient_type = g_strdup(type);
2653 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2655 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2656 VirtIONet *n = VIRTIO_NET(dev);
2657 NetClientState *nc;
2658 int i;
2660 if (n->net_conf.mtu) {
2661 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2664 if (n->net_conf.duplex_str) {
2665 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2666 n->net_conf.duplex = DUPLEX_HALF;
2667 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2668 n->net_conf.duplex = DUPLEX_FULL;
2669 } else {
2670 error_setg(errp, "'duplex' must be 'half' or 'full'");
2672 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2673 } else {
2674 n->net_conf.duplex = DUPLEX_UNKNOWN;
2677 if (n->net_conf.speed < SPEED_UNKNOWN) {
2678 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2679 } else if (n->net_conf.speed >= 0) {
2680 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2683 virtio_net_set_config_size(n, n->host_features);
2684 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2687 * We set a lower limit on RX queue size to what it always was.
2688 * Guests that want a smaller ring can always resize it without
2689 * help from us (using virtio 1 and up).
2691 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2692 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2693 !is_power_of_2(n->net_conf.rx_queue_size)) {
2694 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2695 "must be a power of 2 between %d and %d.",
2696 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2697 VIRTQUEUE_MAX_SIZE);
2698 virtio_cleanup(vdev);
2699 return;
2702 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2703 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2704 !is_power_of_2(n->net_conf.tx_queue_size)) {
2705 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2706 "must be a power of 2 between %d and %d",
2707 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2708 VIRTQUEUE_MAX_SIZE);
2709 virtio_cleanup(vdev);
2710 return;
2713 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2714 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2715 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2716 "must be a positive integer less than %d.",
2717 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2718 virtio_cleanup(vdev);
2719 return;
2721 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2722 n->curr_queues = 1;
2723 n->tx_timeout = n->net_conf.txtimer;
2725 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2726 && strcmp(n->net_conf.tx, "bh")) {
2727 warn_report("virtio-net: "
2728 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2729 n->net_conf.tx);
2730 error_printf("Defaulting to \"bh\"");
2733 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2734 n->net_conf.tx_queue_size);
2736 for (i = 0; i < n->max_queues; i++) {
2737 virtio_net_add_queue(n, i);
2740 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2741 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2742 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2743 n->status = VIRTIO_NET_S_LINK_UP;
2744 qemu_announce_timer_reset(&n->announce_timer, migrate_announce_params(),
2745 QEMU_CLOCK_VIRTUAL,
2746 virtio_net_announce_timer, n);
2747 n->announce_timer.round = 0;
2749 if (n->netclient_type) {
2751 * Happen when virtio_net_set_netclient_name has been called.
2753 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2754 n->netclient_type, n->netclient_name, n);
2755 } else {
2756 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2757 object_get_typename(OBJECT(dev)), dev->id, n);
2760 peer_test_vnet_hdr(n);
2761 if (peer_has_vnet_hdr(n)) {
2762 for (i = 0; i < n->max_queues; i++) {
2763 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2765 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2766 } else {
2767 n->host_hdr_len = 0;
2770 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2772 n->vqs[0].tx_waiting = 0;
2773 n->tx_burst = n->net_conf.txburst;
2774 virtio_net_set_mrg_rx_bufs(n, 0, 0);
2775 n->promisc = 1; /* for compatibility */
2777 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2779 n->vlans = g_malloc0(MAX_VLAN >> 3);
2781 nc = qemu_get_queue(n->nic);
2782 nc->rxfilter_notify_enabled = 1;
2784 QTAILQ_INIT(&n->rsc_chains);
2785 n->qdev = dev;
2788 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2790 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2791 VirtIONet *n = VIRTIO_NET(dev);
2792 int i, max_queues;
2794 /* This will stop vhost backend if appropriate. */
2795 virtio_net_set_status(vdev, 0);
2797 g_free(n->netclient_name);
2798 n->netclient_name = NULL;
2799 g_free(n->netclient_type);
2800 n->netclient_type = NULL;
2802 g_free(n->mac_table.macs);
2803 g_free(n->vlans);
2805 max_queues = n->multiqueue ? n->max_queues : 1;
2806 for (i = 0; i < max_queues; i++) {
2807 virtio_net_del_queue(n, i);
2810 qemu_announce_timer_del(&n->announce_timer, false);
2811 g_free(n->vqs);
2812 qemu_del_nic(n->nic);
2813 virtio_net_rsc_cleanup(n);
2814 virtio_cleanup(vdev);
2817 static void virtio_net_instance_init(Object *obj)
2819 VirtIONet *n = VIRTIO_NET(obj);
2822 * The default config_size is sizeof(struct virtio_net_config).
2823 * Can be overriden with virtio_net_set_config_size.
2825 n->config_size = sizeof(struct virtio_net_config);
2826 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2827 "bootindex", "/ethernet-phy@0",
2828 DEVICE(n), NULL);
2831 static int virtio_net_pre_save(void *opaque)
2833 VirtIONet *n = opaque;
2835 /* At this point, backend must be stopped, otherwise
2836 * it might keep writing to memory. */
2837 assert(!n->vhost_started);
2839 return 0;
2842 static const VMStateDescription vmstate_virtio_net = {
2843 .name = "virtio-net",
2844 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2845 .version_id = VIRTIO_NET_VM_VERSION,
2846 .fields = (VMStateField[]) {
2847 VMSTATE_VIRTIO_DEVICE,
2848 VMSTATE_END_OF_LIST()
2850 .pre_save = virtio_net_pre_save,
2853 static Property virtio_net_properties[] = {
2854 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2855 VIRTIO_NET_F_CSUM, true),
2856 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2857 VIRTIO_NET_F_GUEST_CSUM, true),
2858 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2859 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2860 VIRTIO_NET_F_GUEST_TSO4, true),
2861 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2862 VIRTIO_NET_F_GUEST_TSO6, true),
2863 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2864 VIRTIO_NET_F_GUEST_ECN, true),
2865 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2866 VIRTIO_NET_F_GUEST_UFO, true),
2867 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2868 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2869 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2870 VIRTIO_NET_F_HOST_TSO4, true),
2871 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2872 VIRTIO_NET_F_HOST_TSO6, true),
2873 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2874 VIRTIO_NET_F_HOST_ECN, true),
2875 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2876 VIRTIO_NET_F_HOST_UFO, true),
2877 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2878 VIRTIO_NET_F_MRG_RXBUF, true),
2879 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2880 VIRTIO_NET_F_STATUS, true),
2881 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2882 VIRTIO_NET_F_CTRL_VQ, true),
2883 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2884 VIRTIO_NET_F_CTRL_RX, true),
2885 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2886 VIRTIO_NET_F_CTRL_VLAN, true),
2887 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2888 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2889 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2890 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2891 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2892 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2893 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2894 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2895 VIRTIO_NET_F_RSC_EXT, false),
2896 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2897 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2898 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2899 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2900 TX_TIMER_INTERVAL),
2901 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2902 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2903 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2904 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2905 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2906 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2907 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2908 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2909 true),
2910 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2911 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2912 DEFINE_PROP_END_OF_LIST(),
2915 static void virtio_net_class_init(ObjectClass *klass, void *data)
2917 DeviceClass *dc = DEVICE_CLASS(klass);
2918 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2920 dc->props = virtio_net_properties;
2921 dc->vmsd = &vmstate_virtio_net;
2922 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2923 vdc->realize = virtio_net_device_realize;
2924 vdc->unrealize = virtio_net_device_unrealize;
2925 vdc->get_config = virtio_net_get_config;
2926 vdc->set_config = virtio_net_set_config;
2927 vdc->get_features = virtio_net_get_features;
2928 vdc->set_features = virtio_net_set_features;
2929 vdc->bad_features = virtio_net_bad_features;
2930 vdc->reset = virtio_net_reset;
2931 vdc->set_status = virtio_net_set_status;
2932 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2933 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2934 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2935 vdc->post_load = virtio_net_post_load_virtio;
2936 vdc->vmsd = &vmstate_virtio_net_device;
2939 static const TypeInfo virtio_net_info = {
2940 .name = TYPE_VIRTIO_NET,
2941 .parent = TYPE_VIRTIO_DEVICE,
2942 .instance_size = sizeof(VirtIONet),
2943 .instance_init = virtio_net_instance_init,
2944 .class_init = virtio_net_class_init,
2947 static void virtio_register_types(void)
2949 type_register_static(&virtio_net_info);
2952 type_init(virtio_register_types)