virtio-net: make VirtIOFeature usable for other virtio devices
[qemu/ar7.git] / hw / net / virtio-net.c
blob6e6b146022a71614fef3abf7b8abe9ebc758cd0d
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "qemu/osdep.h"
15 #include "qemu/iov.h"
16 #include "hw/virtio/virtio.h"
17 #include "net/net.h"
18 #include "net/checksum.h"
19 #include "net/tap.h"
20 #include "qemu/error-report.h"
21 #include "qemu/timer.h"
22 #include "hw/virtio/virtio-net.h"
23 #include "net/vhost_net.h"
24 #include "hw/virtio/virtio-bus.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-events-net.h"
27 #include "hw/virtio/virtio-access.h"
28 #include "migration/misc.h"
29 #include "standard-headers/linux/ethtool.h"
31 #define VIRTIO_NET_VM_VERSION 11
33 #define MAC_TABLE_ENTRIES 64
34 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
36 /* previously fixed value */
37 #define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
38 #define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256
40 /* for now, only allow larger queues; with virtio-1, guest can downsize */
41 #define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
42 #define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE
44 #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
46 #define VIRTIO_NET_TCP_FLAG 0x3F
47 #define VIRTIO_NET_TCP_HDR_LENGTH 0xF000
49 /* IPv4 max payload, 16 bits in the header */
50 #define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
51 #define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
53 /* header length value in ip header without option */
54 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
56 #define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
57 #define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
59 /* Purge coalesced packets timer interval, This value affects the performance
60 a lot, and should be tuned carefully, '300000'(300us) is the recommended
61 value to pass the WHQL test, '50000' can gain 2x netperf throughput with
62 tso/gso/gro 'off'. */
63 #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000
65 /* temporary until standard header include it */
66 #if !defined(VIRTIO_NET_HDR_F_RSC_INFO)
68 #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */
69 #define VIRTIO_NET_F_RSC_EXT 61
71 static inline __virtio16 *virtio_net_rsc_ext_num_packets(
72 struct virtio_net_hdr *hdr)
74 return &hdr->csum_start;
77 static inline __virtio16 *virtio_net_rsc_ext_num_dupacks(
78 struct virtio_net_hdr *hdr)
80 return &hdr->csum_offset;
83 #endif
85 static VirtIOFeature feature_sizes[] = {
86 {.flags = 1ULL << VIRTIO_NET_F_MAC,
87 .end = virtio_endof(struct virtio_net_config, mac)},
88 {.flags = 1ULL << VIRTIO_NET_F_STATUS,
89 .end = virtio_endof(struct virtio_net_config, status)},
90 {.flags = 1ULL << VIRTIO_NET_F_MQ,
91 .end = virtio_endof(struct virtio_net_config, max_virtqueue_pairs)},
92 {.flags = 1ULL << VIRTIO_NET_F_MTU,
93 .end = virtio_endof(struct virtio_net_config, mtu)},
94 {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX,
95 .end = virtio_endof(struct virtio_net_config, duplex)},
99 static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
101 VirtIONet *n = qemu_get_nic_opaque(nc);
103 return &n->vqs[nc->queue_index];
106 static int vq2q(int queue_index)
108 return queue_index / 2;
111 /* TODO
112 * - we could suppress RX interrupt if we were so inclined.
115 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
117 VirtIONet *n = VIRTIO_NET(vdev);
118 struct virtio_net_config netcfg;
120 virtio_stw_p(vdev, &netcfg.status, n->status);
121 virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
122 virtio_stw_p(vdev, &netcfg.mtu, n->net_conf.mtu);
123 memcpy(netcfg.mac, n->mac, ETH_ALEN);
124 virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed);
125 netcfg.duplex = n->net_conf.duplex;
126 memcpy(config, &netcfg, n->config_size);
129 static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
131 VirtIONet *n = VIRTIO_NET(vdev);
132 struct virtio_net_config netcfg = {};
134 memcpy(&netcfg, config, n->config_size);
136 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
137 !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
138 memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
139 memcpy(n->mac, netcfg.mac, ETH_ALEN);
140 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
144 static bool virtio_net_started(VirtIONet *n, uint8_t status)
146 VirtIODevice *vdev = VIRTIO_DEVICE(n);
147 return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
148 (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
151 static void virtio_net_announce_timer(void *opaque)
153 VirtIONet *n = opaque;
154 VirtIODevice *vdev = VIRTIO_DEVICE(n);
156 n->announce_counter--;
157 n->status |= VIRTIO_NET_S_ANNOUNCE;
158 virtio_notify_config(vdev);
161 static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
163 VirtIODevice *vdev = VIRTIO_DEVICE(n);
164 NetClientState *nc = qemu_get_queue(n->nic);
165 int queues = n->multiqueue ? n->max_queues : 1;
167 if (!get_vhost_net(nc->peer)) {
168 return;
171 if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
172 !!n->vhost_started) {
173 return;
175 if (!n->vhost_started) {
176 int r, i;
178 if (n->needs_vnet_hdr_swap) {
179 error_report("backend does not support %s vnet headers; "
180 "falling back on userspace virtio",
181 virtio_is_big_endian(vdev) ? "BE" : "LE");
182 return;
185 /* Any packets outstanding? Purge them to avoid touching rings
186 * when vhost is running.
188 for (i = 0; i < queues; i++) {
189 NetClientState *qnc = qemu_get_subqueue(n->nic, i);
191 /* Purge both directions: TX and RX. */
192 qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
193 qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
196 if (virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MTU)) {
197 r = vhost_net_set_mtu(get_vhost_net(nc->peer), n->net_conf.mtu);
198 if (r < 0) {
199 error_report("%uBytes MTU not supported by the backend",
200 n->net_conf.mtu);
202 return;
206 n->vhost_started = 1;
207 r = vhost_net_start(vdev, n->nic->ncs, queues);
208 if (r < 0) {
209 error_report("unable to start vhost net: %d: "
210 "falling back on userspace virtio", -r);
211 n->vhost_started = 0;
213 } else {
214 vhost_net_stop(vdev, n->nic->ncs, queues);
215 n->vhost_started = 0;
219 static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
220 NetClientState *peer,
221 bool enable)
223 if (virtio_is_big_endian(vdev)) {
224 return qemu_set_vnet_be(peer, enable);
225 } else {
226 return qemu_set_vnet_le(peer, enable);
230 static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
231 int queues, bool enable)
233 int i;
235 for (i = 0; i < queues; i++) {
236 if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
237 enable) {
238 while (--i >= 0) {
239 virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
242 return true;
246 return false;
249 static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
251 VirtIODevice *vdev = VIRTIO_DEVICE(n);
252 int queues = n->multiqueue ? n->max_queues : 1;
254 if (virtio_net_started(n, status)) {
255 /* Before using the device, we tell the network backend about the
256 * endianness to use when parsing vnet headers. If the backend
257 * can't do it, we fallback onto fixing the headers in the core
258 * virtio-net code.
260 n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
261 queues, true);
262 } else if (virtio_net_started(n, vdev->status)) {
263 /* After using the device, we need to reset the network backend to
264 * the default (guest native endianness), otherwise the guest may
265 * lose network connectivity if it is rebooted into a different
266 * endianness.
268 virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
272 static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
274 unsigned int dropped = virtqueue_drop_all(vq);
275 if (dropped) {
276 virtio_notify(vdev, vq);
280 static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
282 VirtIONet *n = VIRTIO_NET(vdev);
283 VirtIONetQueue *q;
284 int i;
285 uint8_t queue_status;
287 virtio_net_vnet_endian_status(n, status);
288 virtio_net_vhost_status(n, status);
290 for (i = 0; i < n->max_queues; i++) {
291 NetClientState *ncs = qemu_get_subqueue(n->nic, i);
292 bool queue_started;
293 q = &n->vqs[i];
295 if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
296 queue_status = 0;
297 } else {
298 queue_status = status;
300 queue_started =
301 virtio_net_started(n, queue_status) && !n->vhost_started;
303 if (queue_started) {
304 qemu_flush_queued_packets(ncs);
307 if (!q->tx_waiting) {
308 continue;
311 if (queue_started) {
312 if (q->tx_timer) {
313 timer_mod(q->tx_timer,
314 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
315 } else {
316 qemu_bh_schedule(q->tx_bh);
318 } else {
319 if (q->tx_timer) {
320 timer_del(q->tx_timer);
321 } else {
322 qemu_bh_cancel(q->tx_bh);
324 if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
325 (queue_status & VIRTIO_CONFIG_S_DRIVER_OK) &&
326 vdev->vm_running) {
327 /* if tx is waiting we are likely have some packets in tx queue
328 * and disabled notification */
329 q->tx_waiting = 0;
330 virtio_queue_set_notification(q->tx_vq, 1);
331 virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
337 static void virtio_net_set_link_status(NetClientState *nc)
339 VirtIONet *n = qemu_get_nic_opaque(nc);
340 VirtIODevice *vdev = VIRTIO_DEVICE(n);
341 uint16_t old_status = n->status;
343 if (nc->link_down)
344 n->status &= ~VIRTIO_NET_S_LINK_UP;
345 else
346 n->status |= VIRTIO_NET_S_LINK_UP;
348 if (n->status != old_status)
349 virtio_notify_config(vdev);
351 virtio_net_set_status(vdev, vdev->status);
354 static void rxfilter_notify(NetClientState *nc)
356 VirtIONet *n = qemu_get_nic_opaque(nc);
358 if (nc->rxfilter_notify_enabled) {
359 gchar *path = object_get_canonical_path(OBJECT(n->qdev));
360 qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
361 n->netclient_name, path);
362 g_free(path);
364 /* disable event notification to avoid events flooding */
365 nc->rxfilter_notify_enabled = 0;
369 static intList *get_vlan_table(VirtIONet *n)
371 intList *list, *entry;
372 int i, j;
374 list = NULL;
375 for (i = 0; i < MAX_VLAN >> 5; i++) {
376 for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
377 if (n->vlans[i] & (1U << j)) {
378 entry = g_malloc0(sizeof(*entry));
379 entry->value = (i << 5) + j;
380 entry->next = list;
381 list = entry;
386 return list;
389 static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
391 VirtIONet *n = qemu_get_nic_opaque(nc);
392 VirtIODevice *vdev = VIRTIO_DEVICE(n);
393 RxFilterInfo *info;
394 strList *str_list, *entry;
395 int i;
397 info = g_malloc0(sizeof(*info));
398 info->name = g_strdup(nc->name);
399 info->promiscuous = n->promisc;
401 if (n->nouni) {
402 info->unicast = RX_STATE_NONE;
403 } else if (n->alluni) {
404 info->unicast = RX_STATE_ALL;
405 } else {
406 info->unicast = RX_STATE_NORMAL;
409 if (n->nomulti) {
410 info->multicast = RX_STATE_NONE;
411 } else if (n->allmulti) {
412 info->multicast = RX_STATE_ALL;
413 } else {
414 info->multicast = RX_STATE_NORMAL;
417 info->broadcast_allowed = n->nobcast;
418 info->multicast_overflow = n->mac_table.multi_overflow;
419 info->unicast_overflow = n->mac_table.uni_overflow;
421 info->main_mac = qemu_mac_strdup_printf(n->mac);
423 str_list = NULL;
424 for (i = 0; i < n->mac_table.first_multi; i++) {
425 entry = g_malloc0(sizeof(*entry));
426 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
427 entry->next = str_list;
428 str_list = entry;
430 info->unicast_table = str_list;
432 str_list = NULL;
433 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
434 entry = g_malloc0(sizeof(*entry));
435 entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
436 entry->next = str_list;
437 str_list = entry;
439 info->multicast_table = str_list;
440 info->vlan_table = get_vlan_table(n);
442 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
443 info->vlan = RX_STATE_ALL;
444 } else if (!info->vlan_table) {
445 info->vlan = RX_STATE_NONE;
446 } else {
447 info->vlan = RX_STATE_NORMAL;
450 /* enable event notification after query */
451 nc->rxfilter_notify_enabled = 1;
453 return info;
456 static void virtio_net_reset(VirtIODevice *vdev)
458 VirtIONet *n = VIRTIO_NET(vdev);
459 int i;
461 /* Reset back to compatibility mode */
462 n->promisc = 1;
463 n->allmulti = 0;
464 n->alluni = 0;
465 n->nomulti = 0;
466 n->nouni = 0;
467 n->nobcast = 0;
468 /* multiqueue is disabled by default */
469 n->curr_queues = 1;
470 timer_del(n->announce_timer);
471 n->announce_counter = 0;
472 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
474 /* Flush any MAC and VLAN filter table state */
475 n->mac_table.in_use = 0;
476 n->mac_table.first_multi = 0;
477 n->mac_table.multi_overflow = 0;
478 n->mac_table.uni_overflow = 0;
479 memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
480 memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
481 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
482 memset(n->vlans, 0, MAX_VLAN >> 3);
484 /* Flush any async TX */
485 for (i = 0; i < n->max_queues; i++) {
486 NetClientState *nc = qemu_get_subqueue(n->nic, i);
488 if (nc->peer) {
489 qemu_flush_or_purge_queued_packets(nc->peer, true);
490 assert(!virtio_net_get_subqueue(nc)->async_tx.elem);
495 static void peer_test_vnet_hdr(VirtIONet *n)
497 NetClientState *nc = qemu_get_queue(n->nic);
498 if (!nc->peer) {
499 return;
502 n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
505 static int peer_has_vnet_hdr(VirtIONet *n)
507 return n->has_vnet_hdr;
510 static int peer_has_ufo(VirtIONet *n)
512 if (!peer_has_vnet_hdr(n))
513 return 0;
515 n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
517 return n->has_ufo;
520 static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
521 int version_1)
523 int i;
524 NetClientState *nc;
526 n->mergeable_rx_bufs = mergeable_rx_bufs;
528 if (version_1) {
529 n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
530 } else {
531 n->guest_hdr_len = n->mergeable_rx_bufs ?
532 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
533 sizeof(struct virtio_net_hdr);
536 for (i = 0; i < n->max_queues; i++) {
537 nc = qemu_get_subqueue(n->nic, i);
539 if (peer_has_vnet_hdr(n) &&
540 qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
541 qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
542 n->host_hdr_len = n->guest_hdr_len;
547 static int virtio_net_max_tx_queue_size(VirtIONet *n)
549 NetClientState *peer = n->nic_conf.peers.ncs[0];
552 * Backends other than vhost-user don't support max queue size.
554 if (!peer) {
555 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
558 if (peer->info->type != NET_CLIENT_DRIVER_VHOST_USER) {
559 return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
562 return VIRTQUEUE_MAX_SIZE;
565 static int peer_attach(VirtIONet *n, int index)
567 NetClientState *nc = qemu_get_subqueue(n->nic, index);
569 if (!nc->peer) {
570 return 0;
573 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
574 vhost_set_vring_enable(nc->peer, 1);
577 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
578 return 0;
581 if (n->max_queues == 1) {
582 return 0;
585 return tap_enable(nc->peer);
588 static int peer_detach(VirtIONet *n, int index)
590 NetClientState *nc = qemu_get_subqueue(n->nic, index);
592 if (!nc->peer) {
593 return 0;
596 if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
597 vhost_set_vring_enable(nc->peer, 0);
600 if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
601 return 0;
604 return tap_disable(nc->peer);
607 static void virtio_net_set_queues(VirtIONet *n)
609 int i;
610 int r;
612 if (n->nic->peer_deleted) {
613 return;
616 for (i = 0; i < n->max_queues; i++) {
617 if (i < n->curr_queues) {
618 r = peer_attach(n, i);
619 assert(!r);
620 } else {
621 r = peer_detach(n, i);
622 assert(!r);
627 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
629 static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
630 Error **errp)
632 VirtIONet *n = VIRTIO_NET(vdev);
633 NetClientState *nc = qemu_get_queue(n->nic);
635 /* Firstly sync all virtio-net possible supported features */
636 features |= n->host_features;
638 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
640 if (!peer_has_vnet_hdr(n)) {
641 virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
642 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
643 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
644 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
646 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
647 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
648 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
649 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
652 if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
653 virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
654 virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
657 if (!get_vhost_net(nc->peer)) {
658 return features;
661 features = vhost_net_get_features(get_vhost_net(nc->peer), features);
662 vdev->backend_features = features;
664 if (n->mtu_bypass_backend &&
665 (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
666 features |= (1ULL << VIRTIO_NET_F_MTU);
669 return features;
672 static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
674 uint64_t features = 0;
676 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
677 * but also these: */
678 virtio_add_feature(&features, VIRTIO_NET_F_MAC);
679 virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
680 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
681 virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
682 virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
684 return features;
687 static void virtio_net_apply_guest_offloads(VirtIONet *n)
689 qemu_set_offload(qemu_get_queue(n->nic)->peer,
690 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
691 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
692 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
693 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
694 !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
697 static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
699 static const uint64_t guest_offloads_mask =
700 (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
701 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
702 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
703 (1ULL << VIRTIO_NET_F_GUEST_ECN) |
704 (1ULL << VIRTIO_NET_F_GUEST_UFO);
706 return guest_offloads_mask & features;
709 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
711 VirtIODevice *vdev = VIRTIO_DEVICE(n);
712 return virtio_net_guest_offloads_by_features(vdev->guest_features);
715 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
717 VirtIONet *n = VIRTIO_NET(vdev);
718 int i;
720 if (n->mtu_bypass_backend &&
721 !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
722 features &= ~(1ULL << VIRTIO_NET_F_MTU);
725 virtio_net_set_multiqueue(n,
726 virtio_has_feature(features, VIRTIO_NET_F_MQ));
728 virtio_net_set_mrg_rx_bufs(n,
729 virtio_has_feature(features,
730 VIRTIO_NET_F_MRG_RXBUF),
731 virtio_has_feature(features,
732 VIRTIO_F_VERSION_1));
734 n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
735 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
736 n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
737 virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
739 if (n->has_vnet_hdr) {
740 n->curr_guest_offloads =
741 virtio_net_guest_offloads_by_features(features);
742 virtio_net_apply_guest_offloads(n);
745 for (i = 0; i < n->max_queues; i++) {
746 NetClientState *nc = qemu_get_subqueue(n->nic, i);
748 if (!get_vhost_net(nc->peer)) {
749 continue;
751 vhost_net_ack_features(get_vhost_net(nc->peer), features);
754 if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
755 memset(n->vlans, 0, MAX_VLAN >> 3);
756 } else {
757 memset(n->vlans, 0xff, MAX_VLAN >> 3);
761 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
762 struct iovec *iov, unsigned int iov_cnt)
764 uint8_t on;
765 size_t s;
766 NetClientState *nc = qemu_get_queue(n->nic);
768 s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
769 if (s != sizeof(on)) {
770 return VIRTIO_NET_ERR;
773 if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
774 n->promisc = on;
775 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
776 n->allmulti = on;
777 } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
778 n->alluni = on;
779 } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
780 n->nomulti = on;
781 } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
782 n->nouni = on;
783 } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
784 n->nobcast = on;
785 } else {
786 return VIRTIO_NET_ERR;
789 rxfilter_notify(nc);
791 return VIRTIO_NET_OK;
794 static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
795 struct iovec *iov, unsigned int iov_cnt)
797 VirtIODevice *vdev = VIRTIO_DEVICE(n);
798 uint64_t offloads;
799 size_t s;
801 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
802 return VIRTIO_NET_ERR;
805 s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
806 if (s != sizeof(offloads)) {
807 return VIRTIO_NET_ERR;
810 if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
811 uint64_t supported_offloads;
813 offloads = virtio_ldq_p(vdev, &offloads);
815 if (!n->has_vnet_hdr) {
816 return VIRTIO_NET_ERR;
819 n->rsc4_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
820 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO4);
821 n->rsc6_enabled = virtio_has_feature(offloads, VIRTIO_NET_F_RSC_EXT) &&
822 virtio_has_feature(offloads, VIRTIO_NET_F_GUEST_TSO6);
823 virtio_clear_feature(&offloads, VIRTIO_NET_F_RSC_EXT);
825 supported_offloads = virtio_net_supported_guest_offloads(n);
826 if (offloads & ~supported_offloads) {
827 return VIRTIO_NET_ERR;
830 n->curr_guest_offloads = offloads;
831 virtio_net_apply_guest_offloads(n);
833 return VIRTIO_NET_OK;
834 } else {
835 return VIRTIO_NET_ERR;
839 static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
840 struct iovec *iov, unsigned int iov_cnt)
842 VirtIODevice *vdev = VIRTIO_DEVICE(n);
843 struct virtio_net_ctrl_mac mac_data;
844 size_t s;
845 NetClientState *nc = qemu_get_queue(n->nic);
847 if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
848 if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
849 return VIRTIO_NET_ERR;
851 s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
852 assert(s == sizeof(n->mac));
853 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
854 rxfilter_notify(nc);
856 return VIRTIO_NET_OK;
859 if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
860 return VIRTIO_NET_ERR;
863 int in_use = 0;
864 int first_multi = 0;
865 uint8_t uni_overflow = 0;
866 uint8_t multi_overflow = 0;
867 uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
869 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
870 sizeof(mac_data.entries));
871 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
872 if (s != sizeof(mac_data.entries)) {
873 goto error;
875 iov_discard_front(&iov, &iov_cnt, s);
877 if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
878 goto error;
881 if (mac_data.entries <= MAC_TABLE_ENTRIES) {
882 s = iov_to_buf(iov, iov_cnt, 0, macs,
883 mac_data.entries * ETH_ALEN);
884 if (s != mac_data.entries * ETH_ALEN) {
885 goto error;
887 in_use += mac_data.entries;
888 } else {
889 uni_overflow = 1;
892 iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);
894 first_multi = in_use;
896 s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
897 sizeof(mac_data.entries));
898 mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
899 if (s != sizeof(mac_data.entries)) {
900 goto error;
903 iov_discard_front(&iov, &iov_cnt, s);
905 if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
906 goto error;
909 if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
910 s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
911 mac_data.entries * ETH_ALEN);
912 if (s != mac_data.entries * ETH_ALEN) {
913 goto error;
915 in_use += mac_data.entries;
916 } else {
917 multi_overflow = 1;
920 n->mac_table.in_use = in_use;
921 n->mac_table.first_multi = first_multi;
922 n->mac_table.uni_overflow = uni_overflow;
923 n->mac_table.multi_overflow = multi_overflow;
924 memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
925 g_free(macs);
926 rxfilter_notify(nc);
928 return VIRTIO_NET_OK;
930 error:
931 g_free(macs);
932 return VIRTIO_NET_ERR;
935 static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
936 struct iovec *iov, unsigned int iov_cnt)
938 VirtIODevice *vdev = VIRTIO_DEVICE(n);
939 uint16_t vid;
940 size_t s;
941 NetClientState *nc = qemu_get_queue(n->nic);
943 s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
944 vid = virtio_lduw_p(vdev, &vid);
945 if (s != sizeof(vid)) {
946 return VIRTIO_NET_ERR;
949 if (vid >= MAX_VLAN)
950 return VIRTIO_NET_ERR;
952 if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
953 n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
954 else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
955 n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
956 else
957 return VIRTIO_NET_ERR;
959 rxfilter_notify(nc);
961 return VIRTIO_NET_OK;
964 static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
965 struct iovec *iov, unsigned int iov_cnt)
967 if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
968 n->status & VIRTIO_NET_S_ANNOUNCE) {
969 n->status &= ~VIRTIO_NET_S_ANNOUNCE;
970 if (n->announce_counter) {
971 timer_mod(n->announce_timer,
972 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
973 self_announce_delay(n->announce_counter));
975 return VIRTIO_NET_OK;
976 } else {
977 return VIRTIO_NET_ERR;
981 static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
982 struct iovec *iov, unsigned int iov_cnt)
984 VirtIODevice *vdev = VIRTIO_DEVICE(n);
985 struct virtio_net_ctrl_mq mq;
986 size_t s;
987 uint16_t queues;
989 s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
990 if (s != sizeof(mq)) {
991 return VIRTIO_NET_ERR;
994 if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
995 return VIRTIO_NET_ERR;
998 queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
1000 if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1001 queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
1002 queues > n->max_queues ||
1003 !n->multiqueue) {
1004 return VIRTIO_NET_ERR;
1007 n->curr_queues = queues;
1008 /* stop the backend before changing the number of queues to avoid handling a
1009 * disabled queue */
1010 virtio_net_set_status(vdev, vdev->status);
1011 virtio_net_set_queues(n);
1013 return VIRTIO_NET_OK;
1016 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
1018 VirtIONet *n = VIRTIO_NET(vdev);
1019 struct virtio_net_ctrl_hdr ctrl;
1020 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1021 VirtQueueElement *elem;
1022 size_t s;
1023 struct iovec *iov, *iov2;
1024 unsigned int iov_cnt;
1026 for (;;) {
1027 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
1028 if (!elem) {
1029 break;
1031 if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
1032 iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
1033 virtio_error(vdev, "virtio-net ctrl missing headers");
1034 virtqueue_detach_element(vq, elem, 0);
1035 g_free(elem);
1036 break;
1039 iov_cnt = elem->out_num;
1040 iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
1041 s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
1042 iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
1043 if (s != sizeof(ctrl)) {
1044 status = VIRTIO_NET_ERR;
1045 } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
1046 status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
1047 } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
1048 status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
1049 } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
1050 status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
1051 } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
1052 status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
1053 } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
1054 status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
1055 } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
1056 status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
1059 s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
1060 assert(s == sizeof(status));
1062 virtqueue_push(vq, elem, sizeof(status));
1063 virtio_notify(vdev, vq);
1064 g_free(iov2);
1065 g_free(elem);
1069 /* RX */
1071 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
1073 VirtIONet *n = VIRTIO_NET(vdev);
1074 int queue_index = vq2q(virtio_get_queue_index(vq));
1076 qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
1079 static int virtio_net_can_receive(NetClientState *nc)
1081 VirtIONet *n = qemu_get_nic_opaque(nc);
1082 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1083 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1085 if (!vdev->vm_running) {
1086 return 0;
1089 if (nc->queue_index >= n->curr_queues) {
1090 return 0;
1093 if (!virtio_queue_ready(q->rx_vq) ||
1094 !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1095 return 0;
1098 return 1;
1101 static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
1103 VirtIONet *n = q->n;
1104 if (virtio_queue_empty(q->rx_vq) ||
1105 (n->mergeable_rx_bufs &&
1106 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1107 virtio_queue_set_notification(q->rx_vq, 1);
1109 /* To avoid a race condition where the guest has made some buffers
1110 * available after the above check but before notification was
1111 * enabled, check for available buffers again.
1113 if (virtio_queue_empty(q->rx_vq) ||
1114 (n->mergeable_rx_bufs &&
1115 !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1116 return 0;
1120 virtio_queue_set_notification(q->rx_vq, 0);
1121 return 1;
1124 static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1126 virtio_tswap16s(vdev, &hdr->hdr_len);
1127 virtio_tswap16s(vdev, &hdr->gso_size);
1128 virtio_tswap16s(vdev, &hdr->csum_start);
1129 virtio_tswap16s(vdev, &hdr->csum_offset);
1132 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
1133 * it never finds out that the packets don't have valid checksums. This
1134 * causes dhclient to get upset. Fedora's carried a patch for ages to
1135 * fix this with Xen but it hasn't appeared in an upstream release of
1136 * dhclient yet.
1138 * To avoid breaking existing guests, we catch udp packets and add
1139 * checksums. This is terrible but it's better than hacking the guest
1140 * kernels.
1142 * N.B. if we introduce a zero-copy API, this operation is no longer free so
1143 * we should provide a mechanism to disable it to avoid polluting the host
1144 * cache.
1146 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
1147 uint8_t *buf, size_t size)
1149 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
1150 (size > 27 && size < 1500) && /* normal sized MTU */
1151 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
1152 (buf[23] == 17) && /* ip.protocol == UDP */
1153 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
1154 net_checksum_calculate(buf, size);
1155 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
1159 static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
1160 const void *buf, size_t size)
1162 if (n->has_vnet_hdr) {
1163 /* FIXME this cast is evil */
1164 void *wbuf = (void *)buf;
1165 work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
1166 size - n->host_hdr_len);
1168 if (n->needs_vnet_hdr_swap) {
1169 virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
1171 iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
1172 } else {
1173 struct virtio_net_hdr hdr = {
1174 .flags = 0,
1175 .gso_type = VIRTIO_NET_HDR_GSO_NONE
1177 iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
1181 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
1183 static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1184 static const uint8_t vlan[] = {0x81, 0x00};
1185 uint8_t *ptr = (uint8_t *)buf;
1186 int i;
1188 if (n->promisc)
1189 return 1;
1191 ptr += n->host_hdr_len;
1193 if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1194 int vid = lduw_be_p(ptr + 14) & 0xfff;
1195 if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
1196 return 0;
1199 if (ptr[0] & 1) { // multicast
1200 if (!memcmp(ptr, bcast, sizeof(bcast))) {
1201 return !n->nobcast;
1202 } else if (n->nomulti) {
1203 return 0;
1204 } else if (n->allmulti || n->mac_table.multi_overflow) {
1205 return 1;
1208 for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
1209 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1210 return 1;
1213 } else { // unicast
1214 if (n->nouni) {
1215 return 0;
1216 } else if (n->alluni || n->mac_table.uni_overflow) {
1217 return 1;
1218 } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1219 return 1;
1222 for (i = 0; i < n->mac_table.first_multi; i++) {
1223 if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
1224 return 1;
1229 return 0;
1232 static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
1233 size_t size)
1235 VirtIONet *n = qemu_get_nic_opaque(nc);
1236 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1237 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1238 struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
1239 struct virtio_net_hdr_mrg_rxbuf mhdr;
1240 unsigned mhdr_cnt = 0;
1241 size_t offset, i, guest_offset;
1243 if (!virtio_net_can_receive(nc)) {
1244 return -1;
1247 /* hdr_len refers to the header we supply to the guest */
1248 if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1249 return 0;
1252 if (!receive_filter(n, buf, size))
1253 return size;
1255 offset = i = 0;
1257 while (offset < size) {
1258 VirtQueueElement *elem;
1259 int len, total;
1260 const struct iovec *sg;
1262 total = 0;
1264 elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
1265 if (!elem) {
1266 if (i) {
1267 virtio_error(vdev, "virtio-net unexpected empty queue: "
1268 "i %zd mergeable %d offset %zd, size %zd, "
1269 "guest hdr len %zd, host hdr len %zd "
1270 "guest features 0x%" PRIx64,
1271 i, n->mergeable_rx_bufs, offset, size,
1272 n->guest_hdr_len, n->host_hdr_len,
1273 vdev->guest_features);
1275 return -1;
1278 if (elem->in_num < 1) {
1279 virtio_error(vdev,
1280 "virtio-net receive queue contains no in buffers");
1281 virtqueue_detach_element(q->rx_vq, elem, 0);
1282 g_free(elem);
1283 return -1;
1286 sg = elem->in_sg;
1287 if (i == 0) {
1288 assert(offset == 0);
1289 if (n->mergeable_rx_bufs) {
1290 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1291 sg, elem->in_num,
1292 offsetof(typeof(mhdr), num_buffers),
1293 sizeof(mhdr.num_buffers));
1296 receive_header(n, sg, elem->in_num, buf, size);
1297 offset = n->host_hdr_len;
1298 total += n->guest_hdr_len;
1299 guest_offset = n->guest_hdr_len;
1300 } else {
1301 guest_offset = 0;
1304 /* copy in packet. ugh */
1305 len = iov_from_buf(sg, elem->in_num, guest_offset,
1306 buf + offset, size - offset);
1307 total += len;
1308 offset += len;
1309 /* If buffers can't be merged, at this point we
1310 * must have consumed the complete packet.
1311 * Otherwise, drop it. */
1312 if (!n->mergeable_rx_bufs && offset < size) {
1313 virtqueue_unpop(q->rx_vq, elem, total);
1314 g_free(elem);
1315 return size;
1318 /* signal other side */
1319 virtqueue_fill(q->rx_vq, elem, total, i++);
1320 g_free(elem);
1323 if (mhdr_cnt) {
1324 virtio_stw_p(vdev, &mhdr.num_buffers, i);
1325 iov_from_buf(mhdr_sg, mhdr_cnt,
1327 &mhdr.num_buffers, sizeof mhdr.num_buffers);
1330 virtqueue_flush(q->rx_vq, i);
1331 virtio_notify(vdev, q->rx_vq);
1333 return size;
1336 static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
1337 size_t size)
1339 ssize_t r;
1341 rcu_read_lock();
1342 r = virtio_net_receive_rcu(nc, buf, size);
1343 rcu_read_unlock();
1344 return r;
1347 static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
1348 const uint8_t *buf,
1349 VirtioNetRscUnit *unit)
1351 uint16_t ip_hdrlen;
1352 struct ip_header *ip;
1354 ip = (struct ip_header *)(buf + chain->n->guest_hdr_len
1355 + sizeof(struct eth_header));
1356 unit->ip = (void *)ip;
1357 ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
1358 unit->ip_plen = &ip->ip_len;
1359 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
1360 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1361 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
1364 static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain,
1365 const uint8_t *buf,
1366 VirtioNetRscUnit *unit)
1368 struct ip6_header *ip6;
1370 ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
1371 + sizeof(struct eth_header));
1372 unit->ip = ip6;
1373 unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1374 unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
1375 + sizeof(struct ip6_header));
1376 unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
1378 /* There is a difference between payload lenght in ipv4 and v6,
1379 ip header is excluded in ipv6 */
1380 unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
1383 static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain,
1384 VirtioNetRscSeg *seg)
1386 int ret;
1387 struct virtio_net_hdr *h;
1389 h = (struct virtio_net_hdr *)seg->buf;
1390 h->flags = 0;
1391 h->gso_type = VIRTIO_NET_HDR_GSO_NONE;
1393 if (seg->is_coalesced) {
1394 *virtio_net_rsc_ext_num_packets(h) = seg->packets;
1395 *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack;
1396 h->flags = VIRTIO_NET_HDR_F_RSC_INFO;
1397 if (chain->proto == ETH_P_IP) {
1398 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1399 } else {
1400 h->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1404 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
1405 QTAILQ_REMOVE(&chain->buffers, seg, next);
1406 g_free(seg->buf);
1407 g_free(seg);
1409 return ret;
1412 static void virtio_net_rsc_purge(void *opq)
1414 VirtioNetRscSeg *seg, *rn;
1415 VirtioNetRscChain *chain = (VirtioNetRscChain *)opq;
1417 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
1418 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1419 chain->stat.purge_failed++;
1420 continue;
1424 chain->stat.timer++;
1425 if (!QTAILQ_EMPTY(&chain->buffers)) {
1426 timer_mod(chain->drain_timer,
1427 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1431 static void virtio_net_rsc_cleanup(VirtIONet *n)
1433 VirtioNetRscChain *chain, *rn_chain;
1434 VirtioNetRscSeg *seg, *rn_seg;
1436 QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
1437 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
1438 QTAILQ_REMOVE(&chain->buffers, seg, next);
1439 g_free(seg->buf);
1440 g_free(seg);
1443 timer_del(chain->drain_timer);
1444 timer_free(chain->drain_timer);
1445 QTAILQ_REMOVE(&n->rsc_chains, chain, next);
1446 g_free(chain);
1450 static void virtio_net_rsc_cache_buf(VirtioNetRscChain *chain,
1451 NetClientState *nc,
1452 const uint8_t *buf, size_t size)
1454 uint16_t hdr_len;
1455 VirtioNetRscSeg *seg;
1457 hdr_len = chain->n->guest_hdr_len;
1458 seg = g_malloc(sizeof(VirtioNetRscSeg));
1459 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)
1460 + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
1461 memcpy(seg->buf, buf, size);
1462 seg->size = size;
1463 seg->packets = 1;
1464 seg->dup_ack = 0;
1465 seg->is_coalesced = 0;
1466 seg->nc = nc;
1468 QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
1469 chain->stat.cache++;
1471 switch (chain->proto) {
1472 case ETH_P_IP:
1473 virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
1474 break;
1475 case ETH_P_IPV6:
1476 virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
1477 break;
1478 default:
1479 g_assert_not_reached();
1483 static int32_t virtio_net_rsc_handle_ack(VirtioNetRscChain *chain,
1484 VirtioNetRscSeg *seg,
1485 const uint8_t *buf,
1486 struct tcp_header *n_tcp,
1487 struct tcp_header *o_tcp)
1489 uint32_t nack, oack;
1490 uint16_t nwin, owin;
1492 nack = htonl(n_tcp->th_ack);
1493 nwin = htons(n_tcp->th_win);
1494 oack = htonl(o_tcp->th_ack);
1495 owin = htons(o_tcp->th_win);
1497 if ((nack - oack) >= VIRTIO_NET_MAX_TCP_PAYLOAD) {
1498 chain->stat.ack_out_of_win++;
1499 return RSC_FINAL;
1500 } else if (nack == oack) {
1501 /* duplicated ack or window probe */
1502 if (nwin == owin) {
1503 /* duplicated ack, add dup ack count due to whql test up to 1 */
1504 chain->stat.dup_ack++;
1505 return RSC_FINAL;
1506 } else {
1507 /* Coalesce window update */
1508 o_tcp->th_win = n_tcp->th_win;
1509 chain->stat.win_update++;
1510 return RSC_COALESCE;
1512 } else {
1513 /* pure ack, go to 'C', finalize*/
1514 chain->stat.pure_ack++;
1515 return RSC_FINAL;
1519 static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain,
1520 VirtioNetRscSeg *seg,
1521 const uint8_t *buf,
1522 VirtioNetRscUnit *n_unit)
1524 void *data;
1525 uint16_t o_ip_len;
1526 uint32_t nseq, oseq;
1527 VirtioNetRscUnit *o_unit;
1529 o_unit = &seg->unit;
1530 o_ip_len = htons(*o_unit->ip_plen);
1531 nseq = htonl(n_unit->tcp->th_seq);
1532 oseq = htonl(o_unit->tcp->th_seq);
1534 /* out of order or retransmitted. */
1535 if ((nseq - oseq) > VIRTIO_NET_MAX_TCP_PAYLOAD) {
1536 chain->stat.data_out_of_win++;
1537 return RSC_FINAL;
1540 data = ((uint8_t *)n_unit->tcp) + n_unit->tcp_hdrlen;
1541 if (nseq == oseq) {
1542 if ((o_unit->payload == 0) && n_unit->payload) {
1543 /* From no payload to payload, normal case, not a dup ack or etc */
1544 chain->stat.data_after_pure_ack++;
1545 goto coalesce;
1546 } else {
1547 return virtio_net_rsc_handle_ack(chain, seg, buf,
1548 n_unit->tcp, o_unit->tcp);
1550 } else if ((nseq - oseq) != o_unit->payload) {
1551 /* Not a consistent packet, out of order */
1552 chain->stat.data_out_of_order++;
1553 return RSC_FINAL;
1554 } else {
1555 coalesce:
1556 if ((o_ip_len + n_unit->payload) > chain->max_payload) {
1557 chain->stat.over_size++;
1558 return RSC_FINAL;
1561 /* Here comes the right data, the payload length in v4/v6 is different,
1562 so use the field value to update and record the new data len */
1563 o_unit->payload += n_unit->payload; /* update new data len */
1565 /* update field in ip header */
1566 *o_unit->ip_plen = htons(o_ip_len + n_unit->payload);
1568 /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced
1569 for windows guest, while this may change the behavior for linux
1570 guest (only if it uses RSC feature). */
1571 o_unit->tcp->th_offset_flags = n_unit->tcp->th_offset_flags;
1573 o_unit->tcp->th_ack = n_unit->tcp->th_ack;
1574 o_unit->tcp->th_win = n_unit->tcp->th_win;
1576 memmove(seg->buf + seg->size, data, n_unit->payload);
1577 seg->size += n_unit->payload;
1578 seg->packets++;
1579 chain->stat.coalesced++;
1580 return RSC_COALESCE;
1584 static int32_t virtio_net_rsc_coalesce4(VirtioNetRscChain *chain,
1585 VirtioNetRscSeg *seg,
1586 const uint8_t *buf, size_t size,
1587 VirtioNetRscUnit *unit)
1589 struct ip_header *ip1, *ip2;
1591 ip1 = (struct ip_header *)(unit->ip);
1592 ip2 = (struct ip_header *)(seg->unit.ip);
1593 if ((ip1->ip_src ^ ip2->ip_src) || (ip1->ip_dst ^ ip2->ip_dst)
1594 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1595 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1596 chain->stat.no_match++;
1597 return RSC_NO_MATCH;
1600 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1603 static int32_t virtio_net_rsc_coalesce6(VirtioNetRscChain *chain,
1604 VirtioNetRscSeg *seg,
1605 const uint8_t *buf, size_t size,
1606 VirtioNetRscUnit *unit)
1608 struct ip6_header *ip1, *ip2;
1610 ip1 = (struct ip6_header *)(unit->ip);
1611 ip2 = (struct ip6_header *)(seg->unit.ip);
1612 if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
1613 || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_address))
1614 || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
1615 || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
1616 chain->stat.no_match++;
1617 return RSC_NO_MATCH;
1620 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
1623 /* Packets with 'SYN' should bypass, other flag should be sent after drain
1624 * to prevent out of order */
1625 static int virtio_net_rsc_tcp_ctrl_check(VirtioNetRscChain *chain,
1626 struct tcp_header *tcp)
1628 uint16_t tcp_hdr;
1629 uint16_t tcp_flag;
1631 tcp_flag = htons(tcp->th_offset_flags);
1632 tcp_hdr = (tcp_flag & VIRTIO_NET_TCP_HDR_LENGTH) >> 10;
1633 tcp_flag &= VIRTIO_NET_TCP_FLAG;
1634 tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
1635 if (tcp_flag & TH_SYN) {
1636 chain->stat.tcp_syn++;
1637 return RSC_BYPASS;
1640 if (tcp_flag & (TH_FIN | TH_URG | TH_RST | TH_ECE | TH_CWR)) {
1641 chain->stat.tcp_ctrl_drain++;
1642 return RSC_FINAL;
1645 if (tcp_hdr > sizeof(struct tcp_header)) {
1646 chain->stat.tcp_all_opt++;
1647 return RSC_FINAL;
1650 return RSC_CANDIDATE;
1653 static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
1654 NetClientState *nc,
1655 const uint8_t *buf, size_t size,
1656 VirtioNetRscUnit *unit)
1658 int ret;
1659 VirtioNetRscSeg *seg, *nseg;
1661 if (QTAILQ_EMPTY(&chain->buffers)) {
1662 chain->stat.empty_cache++;
1663 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1664 timer_mod(chain->drain_timer,
1665 qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
1666 return size;
1669 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1670 if (chain->proto == ETH_P_IP) {
1671 ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
1672 } else {
1673 ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
1676 if (ret == RSC_FINAL) {
1677 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1678 /* Send failed */
1679 chain->stat.final_failed++;
1680 return 0;
1683 /* Send current packet */
1684 return virtio_net_do_receive(nc, buf, size);
1685 } else if (ret == RSC_NO_MATCH) {
1686 continue;
1687 } else {
1688 /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
1689 seg->is_coalesced = 1;
1690 return size;
1694 chain->stat.no_match_cache++;
1695 virtio_net_rsc_cache_buf(chain, nc, buf, size);
1696 return size;
1699 /* Drain a connection data, this is to avoid out of order segments */
1700 static size_t virtio_net_rsc_drain_flow(VirtioNetRscChain *chain,
1701 NetClientState *nc,
1702 const uint8_t *buf, size_t size,
1703 uint16_t ip_start, uint16_t ip_size,
1704 uint16_t tcp_port)
1706 VirtioNetRscSeg *seg, *nseg;
1707 uint32_t ppair1, ppair2;
1709 ppair1 = *(uint32_t *)(buf + tcp_port);
1710 QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
1711 ppair2 = *(uint32_t *)(seg->buf + tcp_port);
1712 if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
1713 || (ppair1 != ppair2)) {
1714 continue;
1716 if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
1717 chain->stat.drain_failed++;
1720 break;
1723 return virtio_net_do_receive(nc, buf, size);
1726 static int32_t virtio_net_rsc_sanity_check4(VirtioNetRscChain *chain,
1727 struct ip_header *ip,
1728 const uint8_t *buf, size_t size)
1730 uint16_t ip_len;
1732 /* Not an ipv4 packet */
1733 if (((ip->ip_ver_len & 0xF0) >> 4) != IP_HEADER_VERSION_4) {
1734 chain->stat.ip_option++;
1735 return RSC_BYPASS;
1738 /* Don't handle packets with ip option */
1739 if ((ip->ip_ver_len & 0xF) != VIRTIO_NET_IP4_HEADER_LENGTH) {
1740 chain->stat.ip_option++;
1741 return RSC_BYPASS;
1744 if (ip->ip_p != IPPROTO_TCP) {
1745 chain->stat.bypass_not_tcp++;
1746 return RSC_BYPASS;
1749 /* Don't handle packets with ip fragment */
1750 if (!(htons(ip->ip_off) & IP_DF)) {
1751 chain->stat.ip_frag++;
1752 return RSC_BYPASS;
1755 /* Don't handle packets with ecn flag */
1756 if (IPTOS_ECN(ip->ip_tos)) {
1757 chain->stat.ip_ecn++;
1758 return RSC_BYPASS;
1761 ip_len = htons(ip->ip_len);
1762 if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
1763 || ip_len > (size - chain->n->guest_hdr_len -
1764 sizeof(struct eth_header))) {
1765 chain->stat.ip_hacked++;
1766 return RSC_BYPASS;
1769 return RSC_CANDIDATE;
1772 static size_t virtio_net_rsc_receive4(VirtioNetRscChain *chain,
1773 NetClientState *nc,
1774 const uint8_t *buf, size_t size)
1776 int32_t ret;
1777 uint16_t hdr_len;
1778 VirtioNetRscUnit unit;
1780 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1782 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header)
1783 + sizeof(struct tcp_header))) {
1784 chain->stat.bypass_not_tcp++;
1785 return virtio_net_do_receive(nc, buf, size);
1788 virtio_net_rsc_extract_unit4(chain, buf, &unit);
1789 if (virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)
1790 != RSC_CANDIDATE) {
1791 return virtio_net_do_receive(nc, buf, size);
1794 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1795 if (ret == RSC_BYPASS) {
1796 return virtio_net_do_receive(nc, buf, size);
1797 } else if (ret == RSC_FINAL) {
1798 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1799 ((hdr_len + sizeof(struct eth_header)) + 12),
1800 VIRTIO_NET_IP4_ADDR_SIZE,
1801 hdr_len + sizeof(struct eth_header) + sizeof(struct ip_header));
1804 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1807 static int32_t virtio_net_rsc_sanity_check6(VirtioNetRscChain *chain,
1808 struct ip6_header *ip6,
1809 const uint8_t *buf, size_t size)
1811 uint16_t ip_len;
1813 if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
1814 != IP_HEADER_VERSION_6) {
1815 return RSC_BYPASS;
1818 /* Both option and protocol is checked in this */
1819 if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
1820 chain->stat.bypass_not_tcp++;
1821 return RSC_BYPASS;
1824 ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
1825 if (ip_len < sizeof(struct tcp_header) ||
1826 ip_len > (size - chain->n->guest_hdr_len - sizeof(struct eth_header)
1827 - sizeof(struct ip6_header))) {
1828 chain->stat.ip_hacked++;
1829 return RSC_BYPASS;
1832 /* Don't handle packets with ecn flag */
1833 if (IP6_ECN(ip6->ip6_ctlun.ip6_un3.ip6_un3_ecn)) {
1834 chain->stat.ip_ecn++;
1835 return RSC_BYPASS;
1838 return RSC_CANDIDATE;
1841 static size_t virtio_net_rsc_receive6(void *opq, NetClientState *nc,
1842 const uint8_t *buf, size_t size)
1844 int32_t ret;
1845 uint16_t hdr_len;
1846 VirtioNetRscChain *chain;
1847 VirtioNetRscUnit unit;
1849 chain = (VirtioNetRscChain *)opq;
1850 hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
1852 if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip6_header)
1853 + sizeof(tcp_header))) {
1854 return virtio_net_do_receive(nc, buf, size);
1857 virtio_net_rsc_extract_unit6(chain, buf, &unit);
1858 if (RSC_CANDIDATE != virtio_net_rsc_sanity_check6(chain,
1859 unit.ip, buf, size)) {
1860 return virtio_net_do_receive(nc, buf, size);
1863 ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
1864 if (ret == RSC_BYPASS) {
1865 return virtio_net_do_receive(nc, buf, size);
1866 } else if (ret == RSC_FINAL) {
1867 return virtio_net_rsc_drain_flow(chain, nc, buf, size,
1868 ((hdr_len + sizeof(struct eth_header)) + 8),
1869 VIRTIO_NET_IP6_ADDR_SIZE,
1870 hdr_len + sizeof(struct eth_header)
1871 + sizeof(struct ip6_header));
1874 return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
1877 static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n,
1878 NetClientState *nc,
1879 uint16_t proto)
1881 VirtioNetRscChain *chain;
1883 if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
1884 return NULL;
1887 QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
1888 if (chain->proto == proto) {
1889 return chain;
1893 chain = g_malloc(sizeof(*chain));
1894 chain->n = n;
1895 chain->proto = proto;
1896 if (proto == (uint16_t)ETH_P_IP) {
1897 chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
1898 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1899 } else {
1900 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
1901 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1903 chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
1904 virtio_net_rsc_purge, chain);
1905 memset(&chain->stat, 0, sizeof(chain->stat));
1907 QTAILQ_INIT(&chain->buffers);
1908 QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
1910 return chain;
1913 static ssize_t virtio_net_rsc_receive(NetClientState *nc,
1914 const uint8_t *buf,
1915 size_t size)
1917 uint16_t proto;
1918 VirtioNetRscChain *chain;
1919 struct eth_header *eth;
1920 VirtIONet *n;
1922 n = qemu_get_nic_opaque(nc);
1923 if (size < (n->host_hdr_len + sizeof(struct eth_header))) {
1924 return virtio_net_do_receive(nc, buf, size);
1927 eth = (struct eth_header *)(buf + n->guest_hdr_len);
1928 proto = htons(eth->h_proto);
1930 chain = virtio_net_rsc_lookup_chain(n, nc, proto);
1931 if (chain) {
1932 chain->stat.received++;
1933 if (proto == (uint16_t)ETH_P_IP && n->rsc4_enabled) {
1934 return virtio_net_rsc_receive4(chain, nc, buf, size);
1935 } else if (proto == (uint16_t)ETH_P_IPV6 && n->rsc6_enabled) {
1936 return virtio_net_rsc_receive6(chain, nc, buf, size);
1939 return virtio_net_do_receive(nc, buf, size);
1942 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf,
1943 size_t size)
1945 VirtIONet *n = qemu_get_nic_opaque(nc);
1946 if ((n->rsc4_enabled || n->rsc6_enabled)) {
1947 return virtio_net_rsc_receive(nc, buf, size);
1948 } else {
1949 return virtio_net_do_receive(nc, buf, size);
1953 static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1955 static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1957 VirtIONet *n = qemu_get_nic_opaque(nc);
1958 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1959 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1961 virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1962 virtio_notify(vdev, q->tx_vq);
1964 g_free(q->async_tx.elem);
1965 q->async_tx.elem = NULL;
1967 virtio_queue_set_notification(q->tx_vq, 1);
1968 virtio_net_flush_tx(q);
1971 /* TX */
1972 static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
1974 VirtIONet *n = q->n;
1975 VirtIODevice *vdev = VIRTIO_DEVICE(n);
1976 VirtQueueElement *elem;
1977 int32_t num_packets = 0;
1978 int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1979 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1980 return num_packets;
1983 if (q->async_tx.elem) {
1984 virtio_queue_set_notification(q->tx_vq, 0);
1985 return num_packets;
1988 for (;;) {
1989 ssize_t ret;
1990 unsigned int out_num;
1991 struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
1992 struct virtio_net_hdr_mrg_rxbuf mhdr;
1994 elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
1995 if (!elem) {
1996 break;
1999 out_num = elem->out_num;
2000 out_sg = elem->out_sg;
2001 if (out_num < 1) {
2002 virtio_error(vdev, "virtio-net header not in first element");
2003 virtqueue_detach_element(q->tx_vq, elem, 0);
2004 g_free(elem);
2005 return -EINVAL;
2008 if (n->has_vnet_hdr) {
2009 if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
2010 n->guest_hdr_len) {
2011 virtio_error(vdev, "virtio-net header incorrect");
2012 virtqueue_detach_element(q->tx_vq, elem, 0);
2013 g_free(elem);
2014 return -EINVAL;
2016 if (n->needs_vnet_hdr_swap) {
2017 virtio_net_hdr_swap(vdev, (void *) &mhdr);
2018 sg2[0].iov_base = &mhdr;
2019 sg2[0].iov_len = n->guest_hdr_len;
2020 out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
2021 out_sg, out_num,
2022 n->guest_hdr_len, -1);
2023 if (out_num == VIRTQUEUE_MAX_SIZE) {
2024 goto drop;
2026 out_num += 1;
2027 out_sg = sg2;
2031 * If host wants to see the guest header as is, we can
2032 * pass it on unchanged. Otherwise, copy just the parts
2033 * that host is interested in.
2035 assert(n->host_hdr_len <= n->guest_hdr_len);
2036 if (n->host_hdr_len != n->guest_hdr_len) {
2037 unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
2038 out_sg, out_num,
2039 0, n->host_hdr_len);
2040 sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
2041 out_sg, out_num,
2042 n->guest_hdr_len, -1);
2043 out_num = sg_num;
2044 out_sg = sg;
2047 ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
2048 out_sg, out_num, virtio_net_tx_complete);
2049 if (ret == 0) {
2050 virtio_queue_set_notification(q->tx_vq, 0);
2051 q->async_tx.elem = elem;
2052 return -EBUSY;
2055 drop:
2056 virtqueue_push(q->tx_vq, elem, 0);
2057 virtio_notify(vdev, q->tx_vq);
2058 g_free(elem);
2060 if (++num_packets >= n->tx_burst) {
2061 break;
2064 return num_packets;
2067 static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
2069 VirtIONet *n = VIRTIO_NET(vdev);
2070 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2072 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2073 virtio_net_drop_tx_queue_data(vdev, vq);
2074 return;
2077 /* This happens when device was stopped but VCPU wasn't. */
2078 if (!vdev->vm_running) {
2079 q->tx_waiting = 1;
2080 return;
2083 if (q->tx_waiting) {
2084 virtio_queue_set_notification(vq, 1);
2085 timer_del(q->tx_timer);
2086 q->tx_waiting = 0;
2087 if (virtio_net_flush_tx(q) == -EINVAL) {
2088 return;
2090 } else {
2091 timer_mod(q->tx_timer,
2092 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
2093 q->tx_waiting = 1;
2094 virtio_queue_set_notification(vq, 0);
2098 static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
2100 VirtIONet *n = VIRTIO_NET(vdev);
2101 VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
2103 if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
2104 virtio_net_drop_tx_queue_data(vdev, vq);
2105 return;
2108 if (unlikely(q->tx_waiting)) {
2109 return;
2111 q->tx_waiting = 1;
2112 /* This happens when device was stopped but VCPU wasn't. */
2113 if (!vdev->vm_running) {
2114 return;
2116 virtio_queue_set_notification(vq, 0);
2117 qemu_bh_schedule(q->tx_bh);
2120 static void virtio_net_tx_timer(void *opaque)
2122 VirtIONetQueue *q = opaque;
2123 VirtIONet *n = q->n;
2124 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2125 /* This happens when device was stopped but BH wasn't. */
2126 if (!vdev->vm_running) {
2127 /* Make sure tx waiting is set, so we'll run when restarted. */
2128 assert(q->tx_waiting);
2129 return;
2132 q->tx_waiting = 0;
2134 /* Just in case the driver is not ready on more */
2135 if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
2136 return;
2139 virtio_queue_set_notification(q->tx_vq, 1);
2140 virtio_net_flush_tx(q);
2143 static void virtio_net_tx_bh(void *opaque)
2145 VirtIONetQueue *q = opaque;
2146 VirtIONet *n = q->n;
2147 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2148 int32_t ret;
2150 /* This happens when device was stopped but BH wasn't. */
2151 if (!vdev->vm_running) {
2152 /* Make sure tx waiting is set, so we'll run when restarted. */
2153 assert(q->tx_waiting);
2154 return;
2157 q->tx_waiting = 0;
2159 /* Just in case the driver is not ready on more */
2160 if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
2161 return;
2164 ret = virtio_net_flush_tx(q);
2165 if (ret == -EBUSY || ret == -EINVAL) {
2166 return; /* Notification re-enable handled by tx_complete or device
2167 * broken */
2170 /* If we flush a full burst of packets, assume there are
2171 * more coming and immediately reschedule */
2172 if (ret >= n->tx_burst) {
2173 qemu_bh_schedule(q->tx_bh);
2174 q->tx_waiting = 1;
2175 return;
2178 /* If less than a full burst, re-enable notification and flush
2179 * anything that may have come in while we weren't looking. If
2180 * we find something, assume the guest is still active and reschedule */
2181 virtio_queue_set_notification(q->tx_vq, 1);
2182 ret = virtio_net_flush_tx(q);
2183 if (ret == -EINVAL) {
2184 return;
2185 } else if (ret > 0) {
2186 virtio_queue_set_notification(q->tx_vq, 0);
2187 qemu_bh_schedule(q->tx_bh);
2188 q->tx_waiting = 1;
2192 static void virtio_net_add_queue(VirtIONet *n, int index)
2194 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2196 n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
2197 virtio_net_handle_rx);
2199 if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
2200 n->vqs[index].tx_vq =
2201 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2202 virtio_net_handle_tx_timer);
2203 n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
2204 virtio_net_tx_timer,
2205 &n->vqs[index]);
2206 } else {
2207 n->vqs[index].tx_vq =
2208 virtio_add_queue(vdev, n->net_conf.tx_queue_size,
2209 virtio_net_handle_tx_bh);
2210 n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
2213 n->vqs[index].tx_waiting = 0;
2214 n->vqs[index].n = n;
2217 static void virtio_net_del_queue(VirtIONet *n, int index)
2219 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2220 VirtIONetQueue *q = &n->vqs[index];
2221 NetClientState *nc = qemu_get_subqueue(n->nic, index);
2223 qemu_purge_queued_packets(nc);
2225 virtio_del_queue(vdev, index * 2);
2226 if (q->tx_timer) {
2227 timer_del(q->tx_timer);
2228 timer_free(q->tx_timer);
2229 q->tx_timer = NULL;
2230 } else {
2231 qemu_bh_delete(q->tx_bh);
2232 q->tx_bh = NULL;
2234 q->tx_waiting = 0;
2235 virtio_del_queue(vdev, index * 2 + 1);
2238 static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
2240 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2241 int old_num_queues = virtio_get_num_queues(vdev);
2242 int new_num_queues = new_max_queues * 2 + 1;
2243 int i;
2245 assert(old_num_queues >= 3);
2246 assert(old_num_queues % 2 == 1);
2248 if (old_num_queues == new_num_queues) {
2249 return;
2253 * We always need to remove and add ctrl vq if
2254 * old_num_queues != new_num_queues. Remove ctrl_vq first,
2255 * and then we only enter one of the following too loops.
2257 virtio_del_queue(vdev, old_num_queues - 1);
2259 for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
2260 /* new_num_queues < old_num_queues */
2261 virtio_net_del_queue(n, i / 2);
2264 for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
2265 /* new_num_queues > old_num_queues */
2266 virtio_net_add_queue(n, i / 2);
2269 /* add ctrl_vq last */
2270 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2273 static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
2275 int max = multiqueue ? n->max_queues : 1;
2277 n->multiqueue = multiqueue;
2278 virtio_net_change_num_queues(n, max);
2280 virtio_net_set_queues(n);
2283 static int virtio_net_post_load_device(void *opaque, int version_id)
2285 VirtIONet *n = opaque;
2286 VirtIODevice *vdev = VIRTIO_DEVICE(n);
2287 int i, link_down;
2289 virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
2290 virtio_vdev_has_feature(vdev,
2291 VIRTIO_F_VERSION_1));
2293 /* MAC_TABLE_ENTRIES may be different from the saved image */
2294 if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
2295 n->mac_table.in_use = 0;
2298 if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
2299 n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
2302 if (peer_has_vnet_hdr(n)) {
2303 virtio_net_apply_guest_offloads(n);
2306 virtio_net_set_queues(n);
2308 /* Find the first multicast entry in the saved MAC filter */
2309 for (i = 0; i < n->mac_table.in_use; i++) {
2310 if (n->mac_table.macs[i * ETH_ALEN] & 1) {
2311 break;
2314 n->mac_table.first_multi = i;
2316 /* nc.link_down can't be migrated, so infer link_down according
2317 * to link status bit in n->status */
2318 link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
2319 for (i = 0; i < n->max_queues; i++) {
2320 qemu_get_subqueue(n->nic, i)->link_down = link_down;
2323 if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
2324 virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
2325 n->announce_counter = SELF_ANNOUNCE_ROUNDS;
2326 timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
2329 return 0;
2332 /* tx_waiting field of a VirtIONetQueue */
2333 static const VMStateDescription vmstate_virtio_net_queue_tx_waiting = {
2334 .name = "virtio-net-queue-tx_waiting",
2335 .fields = (VMStateField[]) {
2336 VMSTATE_UINT32(tx_waiting, VirtIONetQueue),
2337 VMSTATE_END_OF_LIST()
2341 static bool max_queues_gt_1(void *opaque, int version_id)
2343 return VIRTIO_NET(opaque)->max_queues > 1;
2346 static bool has_ctrl_guest_offloads(void *opaque, int version_id)
2348 return virtio_vdev_has_feature(VIRTIO_DEVICE(opaque),
2349 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
2352 static bool mac_table_fits(void *opaque, int version_id)
2354 return VIRTIO_NET(opaque)->mac_table.in_use <= MAC_TABLE_ENTRIES;
2357 static bool mac_table_doesnt_fit(void *opaque, int version_id)
2359 return !mac_table_fits(opaque, version_id);
2362 /* This temporary type is shared by all the WITH_TMP methods
2363 * although only some fields are used by each.
2365 struct VirtIONetMigTmp {
2366 VirtIONet *parent;
2367 VirtIONetQueue *vqs_1;
2368 uint16_t curr_queues_1;
2369 uint8_t has_ufo;
2370 uint32_t has_vnet_hdr;
2373 /* The 2nd and subsequent tx_waiting flags are loaded later than
2374 * the 1st entry in the queues and only if there's more than one
2375 * entry. We use the tmp mechanism to calculate a temporary
2376 * pointer and count and also validate the count.
2379 static int virtio_net_tx_waiting_pre_save(void *opaque)
2381 struct VirtIONetMigTmp *tmp = opaque;
2383 tmp->vqs_1 = tmp->parent->vqs + 1;
2384 tmp->curr_queues_1 = tmp->parent->curr_queues - 1;
2385 if (tmp->parent->curr_queues == 0) {
2386 tmp->curr_queues_1 = 0;
2389 return 0;
2392 static int virtio_net_tx_waiting_pre_load(void *opaque)
2394 struct VirtIONetMigTmp *tmp = opaque;
2396 /* Reuse the pointer setup from save */
2397 virtio_net_tx_waiting_pre_save(opaque);
2399 if (tmp->parent->curr_queues > tmp->parent->max_queues) {
2400 error_report("virtio-net: curr_queues %x > max_queues %x",
2401 tmp->parent->curr_queues, tmp->parent->max_queues);
2403 return -EINVAL;
2406 return 0; /* all good */
2409 static const VMStateDescription vmstate_virtio_net_tx_waiting = {
2410 .name = "virtio-net-tx_waiting",
2411 .pre_load = virtio_net_tx_waiting_pre_load,
2412 .pre_save = virtio_net_tx_waiting_pre_save,
2413 .fields = (VMStateField[]) {
2414 VMSTATE_STRUCT_VARRAY_POINTER_UINT16(vqs_1, struct VirtIONetMigTmp,
2415 curr_queues_1,
2416 vmstate_virtio_net_queue_tx_waiting,
2417 struct VirtIONetQueue),
2418 VMSTATE_END_OF_LIST()
2422 /* the 'has_ufo' flag is just tested; if the incoming stream has the
2423 * flag set we need to check that we have it
2425 static int virtio_net_ufo_post_load(void *opaque, int version_id)
2427 struct VirtIONetMigTmp *tmp = opaque;
2429 if (tmp->has_ufo && !peer_has_ufo(tmp->parent)) {
2430 error_report("virtio-net: saved image requires TUN_F_UFO support");
2431 return -EINVAL;
2434 return 0;
2437 static int virtio_net_ufo_pre_save(void *opaque)
2439 struct VirtIONetMigTmp *tmp = opaque;
2441 tmp->has_ufo = tmp->parent->has_ufo;
2443 return 0;
2446 static const VMStateDescription vmstate_virtio_net_has_ufo = {
2447 .name = "virtio-net-ufo",
2448 .post_load = virtio_net_ufo_post_load,
2449 .pre_save = virtio_net_ufo_pre_save,
2450 .fields = (VMStateField[]) {
2451 VMSTATE_UINT8(has_ufo, struct VirtIONetMigTmp),
2452 VMSTATE_END_OF_LIST()
2456 /* the 'has_vnet_hdr' flag is just tested; if the incoming stream has the
2457 * flag set we need to check that we have it
2459 static int virtio_net_vnet_post_load(void *opaque, int version_id)
2461 struct VirtIONetMigTmp *tmp = opaque;
2463 if (tmp->has_vnet_hdr && !peer_has_vnet_hdr(tmp->parent)) {
2464 error_report("virtio-net: saved image requires vnet_hdr=on");
2465 return -EINVAL;
2468 return 0;
2471 static int virtio_net_vnet_pre_save(void *opaque)
2473 struct VirtIONetMigTmp *tmp = opaque;
2475 tmp->has_vnet_hdr = tmp->parent->has_vnet_hdr;
2477 return 0;
2480 static const VMStateDescription vmstate_virtio_net_has_vnet = {
2481 .name = "virtio-net-vnet",
2482 .post_load = virtio_net_vnet_post_load,
2483 .pre_save = virtio_net_vnet_pre_save,
2484 .fields = (VMStateField[]) {
2485 VMSTATE_UINT32(has_vnet_hdr, struct VirtIONetMigTmp),
2486 VMSTATE_END_OF_LIST()
2490 static const VMStateDescription vmstate_virtio_net_device = {
2491 .name = "virtio-net-device",
2492 .version_id = VIRTIO_NET_VM_VERSION,
2493 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2494 .post_load = virtio_net_post_load_device,
2495 .fields = (VMStateField[]) {
2496 VMSTATE_UINT8_ARRAY(mac, VirtIONet, ETH_ALEN),
2497 VMSTATE_STRUCT_POINTER(vqs, VirtIONet,
2498 vmstate_virtio_net_queue_tx_waiting,
2499 VirtIONetQueue),
2500 VMSTATE_UINT32(mergeable_rx_bufs, VirtIONet),
2501 VMSTATE_UINT16(status, VirtIONet),
2502 VMSTATE_UINT8(promisc, VirtIONet),
2503 VMSTATE_UINT8(allmulti, VirtIONet),
2504 VMSTATE_UINT32(mac_table.in_use, VirtIONet),
2506 /* Guarded pair: If it fits we load it, else we throw it away
2507 * - can happen if source has a larger MAC table.; post-load
2508 * sets flags in this case.
2510 VMSTATE_VBUFFER_MULTIPLY(mac_table.macs, VirtIONet,
2511 0, mac_table_fits, mac_table.in_use,
2512 ETH_ALEN),
2513 VMSTATE_UNUSED_VARRAY_UINT32(VirtIONet, mac_table_doesnt_fit, 0,
2514 mac_table.in_use, ETH_ALEN),
2516 /* Note: This is an array of uint32's that's always been saved as a
2517 * buffer; hold onto your endiannesses; it's actually used as a bitmap
2518 * but based on the uint.
2520 VMSTATE_BUFFER_POINTER_UNSAFE(vlans, VirtIONet, 0, MAX_VLAN >> 3),
2521 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2522 vmstate_virtio_net_has_vnet),
2523 VMSTATE_UINT8(mac_table.multi_overflow, VirtIONet),
2524 VMSTATE_UINT8(mac_table.uni_overflow, VirtIONet),
2525 VMSTATE_UINT8(alluni, VirtIONet),
2526 VMSTATE_UINT8(nomulti, VirtIONet),
2527 VMSTATE_UINT8(nouni, VirtIONet),
2528 VMSTATE_UINT8(nobcast, VirtIONet),
2529 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2530 vmstate_virtio_net_has_ufo),
2531 VMSTATE_SINGLE_TEST(max_queues, VirtIONet, max_queues_gt_1, 0,
2532 vmstate_info_uint16_equal, uint16_t),
2533 VMSTATE_UINT16_TEST(curr_queues, VirtIONet, max_queues_gt_1),
2534 VMSTATE_WITH_TMP(VirtIONet, struct VirtIONetMigTmp,
2535 vmstate_virtio_net_tx_waiting),
2536 VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
2537 has_ctrl_guest_offloads),
2538 VMSTATE_END_OF_LIST()
2542 static NetClientInfo net_virtio_info = {
2543 .type = NET_CLIENT_DRIVER_NIC,
2544 .size = sizeof(NICState),
2545 .can_receive = virtio_net_can_receive,
2546 .receive = virtio_net_receive,
2547 .link_status_changed = virtio_net_set_link_status,
2548 .query_rx_filter = virtio_net_query_rxfilter,
2551 static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
2553 VirtIONet *n = VIRTIO_NET(vdev);
2554 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2555 assert(n->vhost_started);
2556 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
2559 static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
2560 bool mask)
2562 VirtIONet *n = VIRTIO_NET(vdev);
2563 NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
2564 assert(n->vhost_started);
2565 vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
2566 vdev, idx, mask);
2569 static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
2571 virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
2573 n->config_size = virtio_feature_get_config_size(feature_sizes,
2574 host_features);
2577 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
2578 const char *type)
2581 * The name can be NULL, the netclient name will be type.x.
2583 assert(type != NULL);
2585 g_free(n->netclient_name);
2586 g_free(n->netclient_type);
2587 n->netclient_name = g_strdup(name);
2588 n->netclient_type = g_strdup(type);
2591 static void virtio_net_device_realize(DeviceState *dev, Error **errp)
2593 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2594 VirtIONet *n = VIRTIO_NET(dev);
2595 NetClientState *nc;
2596 int i;
2598 if (n->net_conf.mtu) {
2599 n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
2602 if (n->net_conf.duplex_str) {
2603 if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
2604 n->net_conf.duplex = DUPLEX_HALF;
2605 } else if (strncmp(n->net_conf.duplex_str, "full", 5) == 0) {
2606 n->net_conf.duplex = DUPLEX_FULL;
2607 } else {
2608 error_setg(errp, "'duplex' must be 'half' or 'full'");
2610 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2611 } else {
2612 n->net_conf.duplex = DUPLEX_UNKNOWN;
2615 if (n->net_conf.speed < SPEED_UNKNOWN) {
2616 error_setg(errp, "'speed' must be between 0 and INT_MAX");
2617 } else if (n->net_conf.speed >= 0) {
2618 n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
2621 virtio_net_set_config_size(n, n->host_features);
2622 virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
2625 * We set a lower limit on RX queue size to what it always was.
2626 * Guests that want a smaller ring can always resize it without
2627 * help from us (using virtio 1 and up).
2629 if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
2630 n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
2631 !is_power_of_2(n->net_conf.rx_queue_size)) {
2632 error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
2633 "must be a power of 2 between %d and %d.",
2634 n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
2635 VIRTQUEUE_MAX_SIZE);
2636 virtio_cleanup(vdev);
2637 return;
2640 if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE ||
2641 n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE ||
2642 !is_power_of_2(n->net_conf.tx_queue_size)) {
2643 error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), "
2644 "must be a power of 2 between %d and %d",
2645 n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE,
2646 VIRTQUEUE_MAX_SIZE);
2647 virtio_cleanup(vdev);
2648 return;
2651 n->max_queues = MAX(n->nic_conf.peers.queues, 1);
2652 if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
2653 error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
2654 "must be a positive integer less than %d.",
2655 n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
2656 virtio_cleanup(vdev);
2657 return;
2659 n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
2660 n->curr_queues = 1;
2661 n->tx_timeout = n->net_conf.txtimer;
2663 if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
2664 && strcmp(n->net_conf.tx, "bh")) {
2665 warn_report("virtio-net: "
2666 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
2667 n->net_conf.tx);
2668 error_printf("Defaulting to \"bh\"");
2671 n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n),
2672 n->net_conf.tx_queue_size);
2674 for (i = 0; i < n->max_queues; i++) {
2675 virtio_net_add_queue(n, i);
2678 n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
2679 qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
2680 memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
2681 n->status = VIRTIO_NET_S_LINK_UP;
2682 n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
2683 virtio_net_announce_timer, n);
2685 if (n->netclient_type) {
2687 * Happen when virtio_net_set_netclient_name has been called.
2689 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2690 n->netclient_type, n->netclient_name, n);
2691 } else {
2692 n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
2693 object_get_typename(OBJECT(dev)), dev->id, n);
2696 peer_test_vnet_hdr(n);
2697 if (peer_has_vnet_hdr(n)) {
2698 for (i = 0; i < n->max_queues; i++) {
2699 qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
2701 n->host_hdr_len = sizeof(struct virtio_net_hdr);
2702 } else {
2703 n->host_hdr_len = 0;
2706 qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
2708 n->vqs[0].tx_waiting = 0;
2709 n->tx_burst = n->net_conf.txburst;
2710 virtio_net_set_mrg_rx_bufs(n, 0, 0);
2711 n->promisc = 1; /* for compatibility */
2713 n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
2715 n->vlans = g_malloc0(MAX_VLAN >> 3);
2717 nc = qemu_get_queue(n->nic);
2718 nc->rxfilter_notify_enabled = 1;
2720 QTAILQ_INIT(&n->rsc_chains);
2721 n->qdev = dev;
2724 static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
2726 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2727 VirtIONet *n = VIRTIO_NET(dev);
2728 int i, max_queues;
2730 /* This will stop vhost backend if appropriate. */
2731 virtio_net_set_status(vdev, 0);
2733 g_free(n->netclient_name);
2734 n->netclient_name = NULL;
2735 g_free(n->netclient_type);
2736 n->netclient_type = NULL;
2738 g_free(n->mac_table.macs);
2739 g_free(n->vlans);
2741 max_queues = n->multiqueue ? n->max_queues : 1;
2742 for (i = 0; i < max_queues; i++) {
2743 virtio_net_del_queue(n, i);
2746 timer_del(n->announce_timer);
2747 timer_free(n->announce_timer);
2748 g_free(n->vqs);
2749 qemu_del_nic(n->nic);
2750 virtio_net_rsc_cleanup(n);
2751 virtio_cleanup(vdev);
2754 static void virtio_net_instance_init(Object *obj)
2756 VirtIONet *n = VIRTIO_NET(obj);
2759 * The default config_size is sizeof(struct virtio_net_config).
2760 * Can be overriden with virtio_net_set_config_size.
2762 n->config_size = sizeof(struct virtio_net_config);
2763 device_add_bootindex_property(obj, &n->nic_conf.bootindex,
2764 "bootindex", "/ethernet-phy@0",
2765 DEVICE(n), NULL);
2768 static int virtio_net_pre_save(void *opaque)
2770 VirtIONet *n = opaque;
2772 /* At this point, backend must be stopped, otherwise
2773 * it might keep writing to memory. */
2774 assert(!n->vhost_started);
2776 return 0;
2779 static const VMStateDescription vmstate_virtio_net = {
2780 .name = "virtio-net",
2781 .minimum_version_id = VIRTIO_NET_VM_VERSION,
2782 .version_id = VIRTIO_NET_VM_VERSION,
2783 .fields = (VMStateField[]) {
2784 VMSTATE_VIRTIO_DEVICE,
2785 VMSTATE_END_OF_LIST()
2787 .pre_save = virtio_net_pre_save,
2790 static Property virtio_net_properties[] = {
2791 DEFINE_PROP_BIT64("csum", VirtIONet, host_features,
2792 VIRTIO_NET_F_CSUM, true),
2793 DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features,
2794 VIRTIO_NET_F_GUEST_CSUM, true),
2795 DEFINE_PROP_BIT64("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
2796 DEFINE_PROP_BIT64("guest_tso4", VirtIONet, host_features,
2797 VIRTIO_NET_F_GUEST_TSO4, true),
2798 DEFINE_PROP_BIT64("guest_tso6", VirtIONet, host_features,
2799 VIRTIO_NET_F_GUEST_TSO6, true),
2800 DEFINE_PROP_BIT64("guest_ecn", VirtIONet, host_features,
2801 VIRTIO_NET_F_GUEST_ECN, true),
2802 DEFINE_PROP_BIT64("guest_ufo", VirtIONet, host_features,
2803 VIRTIO_NET_F_GUEST_UFO, true),
2804 DEFINE_PROP_BIT64("guest_announce", VirtIONet, host_features,
2805 VIRTIO_NET_F_GUEST_ANNOUNCE, true),
2806 DEFINE_PROP_BIT64("host_tso4", VirtIONet, host_features,
2807 VIRTIO_NET_F_HOST_TSO4, true),
2808 DEFINE_PROP_BIT64("host_tso6", VirtIONet, host_features,
2809 VIRTIO_NET_F_HOST_TSO6, true),
2810 DEFINE_PROP_BIT64("host_ecn", VirtIONet, host_features,
2811 VIRTIO_NET_F_HOST_ECN, true),
2812 DEFINE_PROP_BIT64("host_ufo", VirtIONet, host_features,
2813 VIRTIO_NET_F_HOST_UFO, true),
2814 DEFINE_PROP_BIT64("mrg_rxbuf", VirtIONet, host_features,
2815 VIRTIO_NET_F_MRG_RXBUF, true),
2816 DEFINE_PROP_BIT64("status", VirtIONet, host_features,
2817 VIRTIO_NET_F_STATUS, true),
2818 DEFINE_PROP_BIT64("ctrl_vq", VirtIONet, host_features,
2819 VIRTIO_NET_F_CTRL_VQ, true),
2820 DEFINE_PROP_BIT64("ctrl_rx", VirtIONet, host_features,
2821 VIRTIO_NET_F_CTRL_RX, true),
2822 DEFINE_PROP_BIT64("ctrl_vlan", VirtIONet, host_features,
2823 VIRTIO_NET_F_CTRL_VLAN, true),
2824 DEFINE_PROP_BIT64("ctrl_rx_extra", VirtIONet, host_features,
2825 VIRTIO_NET_F_CTRL_RX_EXTRA, true),
2826 DEFINE_PROP_BIT64("ctrl_mac_addr", VirtIONet, host_features,
2827 VIRTIO_NET_F_CTRL_MAC_ADDR, true),
2828 DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features,
2829 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
2830 DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
2831 DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
2832 VIRTIO_NET_F_RSC_EXT, false),
2833 DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,
2834 VIRTIO_NET_RSC_DEFAULT_INTERVAL),
2835 DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
2836 DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
2837 TX_TIMER_INTERVAL),
2838 DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
2839 DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
2840 DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
2841 VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
2842 DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size,
2843 VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE),
2844 DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0),
2845 DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend,
2846 true),
2847 DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
2848 DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
2849 DEFINE_PROP_END_OF_LIST(),
2852 static void virtio_net_class_init(ObjectClass *klass, void *data)
2854 DeviceClass *dc = DEVICE_CLASS(klass);
2855 VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2857 dc->props = virtio_net_properties;
2858 dc->vmsd = &vmstate_virtio_net;
2859 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
2860 vdc->realize = virtio_net_device_realize;
2861 vdc->unrealize = virtio_net_device_unrealize;
2862 vdc->get_config = virtio_net_get_config;
2863 vdc->set_config = virtio_net_set_config;
2864 vdc->get_features = virtio_net_get_features;
2865 vdc->set_features = virtio_net_set_features;
2866 vdc->bad_features = virtio_net_bad_features;
2867 vdc->reset = virtio_net_reset;
2868 vdc->set_status = virtio_net_set_status;
2869 vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
2870 vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
2871 vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
2872 vdc->vmsd = &vmstate_virtio_net_device;
2875 static const TypeInfo virtio_net_info = {
2876 .name = TYPE_VIRTIO_NET,
2877 .parent = TYPE_VIRTIO_DEVICE,
2878 .instance_size = sizeof(VirtIONet),
2879 .instance_init = virtio_net_instance_init,
2880 .class_init = virtio_net_class_init,
2883 static void virtio_register_types(void)
2885 type_register_static(&virtio_net_info);
2888 type_init(virtio_register_types)