2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
17 #include "net/checksum.h"
19 #include "qemu/error-report.h"
20 #include "qemu/timer.h"
21 #include "virtio-net.h"
22 #include "vhost_net.h"
24 #define VIRTIO_NET_VM_VERSION 11
26 #define MAC_TABLE_ENTRIES 64
27 #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
29 typedef struct VirtIONet
32 uint8_t mac
[ETH_ALEN
];
43 uint32_t has_vnet_hdr
;
48 VirtQueueElement elem
;
51 int mergeable_rx_bufs
;
58 uint8_t vhost_started
;
62 uint8_t multi_overflow
;
71 * - we could suppress RX interrupt if we were so inclined.
74 static VirtIONet
*to_virtio_net(VirtIODevice
*vdev
)
76 return (VirtIONet
*)vdev
;
79 static void virtio_net_get_config(VirtIODevice
*vdev
, uint8_t *config
)
81 VirtIONet
*n
= to_virtio_net(vdev
);
82 struct virtio_net_config netcfg
;
84 stw_p(&netcfg
.status
, n
->status
);
85 memcpy(netcfg
.mac
, n
->mac
, ETH_ALEN
);
86 memcpy(config
, &netcfg
, sizeof(netcfg
));
89 static void virtio_net_set_config(VirtIODevice
*vdev
, const uint8_t *config
)
91 VirtIONet
*n
= to_virtio_net(vdev
);
92 struct virtio_net_config netcfg
;
94 memcpy(&netcfg
, config
, sizeof(netcfg
));
96 if (!(n
->vdev
.guest_features
>> VIRTIO_NET_F_CTRL_MAC_ADDR
& 1) &&
97 memcmp(netcfg
.mac
, n
->mac
, ETH_ALEN
)) {
98 memcpy(n
->mac
, netcfg
.mac
, ETH_ALEN
);
99 qemu_format_nic_info_str(&n
->nic
->nc
, n
->mac
);
103 static bool virtio_net_started(VirtIONet
*n
, uint8_t status
)
105 return (status
& VIRTIO_CONFIG_S_DRIVER_OK
) &&
106 (n
->status
& VIRTIO_NET_S_LINK_UP
) && n
->vdev
.vm_running
;
109 static void virtio_net_vhost_status(VirtIONet
*n
, uint8_t status
)
111 if (!n
->nic
->nc
.peer
) {
114 if (n
->nic
->nc
.peer
->info
->type
!= NET_CLIENT_OPTIONS_KIND_TAP
) {
118 if (!tap_get_vhost_net(n
->nic
->nc
.peer
)) {
121 if (!!n
->vhost_started
== virtio_net_started(n
, status
) &&
122 !n
->nic
->nc
.peer
->link_down
) {
125 if (!n
->vhost_started
) {
127 if (!vhost_net_query(tap_get_vhost_net(n
->nic
->nc
.peer
), &n
->vdev
)) {
130 n
->vhost_started
= 1;
131 r
= vhost_net_start(tap_get_vhost_net(n
->nic
->nc
.peer
), &n
->vdev
);
133 error_report("unable to start vhost net: %d: "
134 "falling back on userspace virtio", -r
);
135 n
->vhost_started
= 0;
138 vhost_net_stop(tap_get_vhost_net(n
->nic
->nc
.peer
), &n
->vdev
);
139 n
->vhost_started
= 0;
143 static void virtio_net_set_status(struct VirtIODevice
*vdev
, uint8_t status
)
145 VirtIONet
*n
= to_virtio_net(vdev
);
147 virtio_net_vhost_status(n
, status
);
149 if (!n
->tx_waiting
) {
153 if (virtio_net_started(n
, status
) && !n
->vhost_started
) {
155 qemu_mod_timer(n
->tx_timer
,
156 qemu_get_clock_ns(vm_clock
) + n
->tx_timeout
);
158 qemu_bh_schedule(n
->tx_bh
);
162 qemu_del_timer(n
->tx_timer
);
164 qemu_bh_cancel(n
->tx_bh
);
169 static void virtio_net_set_link_status(NetClientState
*nc
)
171 VirtIONet
*n
= DO_UPCAST(NICState
, nc
, nc
)->opaque
;
172 uint16_t old_status
= n
->status
;
175 n
->status
&= ~VIRTIO_NET_S_LINK_UP
;
177 n
->status
|= VIRTIO_NET_S_LINK_UP
;
179 if (n
->status
!= old_status
)
180 virtio_notify_config(&n
->vdev
);
182 virtio_net_set_status(&n
->vdev
, n
->vdev
.status
);
185 static void virtio_net_reset(VirtIODevice
*vdev
)
187 VirtIONet
*n
= to_virtio_net(vdev
);
189 /* Reset back to compatibility mode */
197 /* Flush any MAC and VLAN filter table state */
198 n
->mac_table
.in_use
= 0;
199 n
->mac_table
.first_multi
= 0;
200 n
->mac_table
.multi_overflow
= 0;
201 n
->mac_table
.uni_overflow
= 0;
202 memset(n
->mac_table
.macs
, 0, MAC_TABLE_ENTRIES
* ETH_ALEN
);
203 memcpy(&n
->mac
[0], &n
->nic
->conf
->macaddr
, sizeof(n
->mac
));
204 memset(n
->vlans
, 0, MAX_VLAN
>> 3);
207 static void peer_test_vnet_hdr(VirtIONet
*n
)
209 if (!n
->nic
->nc
.peer
)
212 if (n
->nic
->nc
.peer
->info
->type
!= NET_CLIENT_OPTIONS_KIND_TAP
)
215 n
->has_vnet_hdr
= tap_has_vnet_hdr(n
->nic
->nc
.peer
);
218 static int peer_has_vnet_hdr(VirtIONet
*n
)
220 return n
->has_vnet_hdr
;
223 static int peer_has_ufo(VirtIONet
*n
)
225 if (!peer_has_vnet_hdr(n
))
228 n
->has_ufo
= tap_has_ufo(n
->nic
->nc
.peer
);
233 static void virtio_net_set_mrg_rx_bufs(VirtIONet
*n
, int mergeable_rx_bufs
)
235 n
->mergeable_rx_bufs
= mergeable_rx_bufs
;
237 n
->guest_hdr_len
= n
->mergeable_rx_bufs
?
238 sizeof(struct virtio_net_hdr_mrg_rxbuf
) : sizeof(struct virtio_net_hdr
);
240 if (peer_has_vnet_hdr(n
) &&
241 tap_has_vnet_hdr_len(n
->nic
->nc
.peer
, n
->guest_hdr_len
)) {
242 tap_set_vnet_hdr_len(n
->nic
->nc
.peer
, n
->guest_hdr_len
);
243 n
->host_hdr_len
= n
->guest_hdr_len
;
247 static uint32_t virtio_net_get_features(VirtIODevice
*vdev
, uint32_t features
)
249 VirtIONet
*n
= to_virtio_net(vdev
);
251 features
|= (1 << VIRTIO_NET_F_MAC
);
253 if (!peer_has_vnet_hdr(n
)) {
254 features
&= ~(0x1 << VIRTIO_NET_F_CSUM
);
255 features
&= ~(0x1 << VIRTIO_NET_F_HOST_TSO4
);
256 features
&= ~(0x1 << VIRTIO_NET_F_HOST_TSO6
);
257 features
&= ~(0x1 << VIRTIO_NET_F_HOST_ECN
);
259 features
&= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM
);
260 features
&= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4
);
261 features
&= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6
);
262 features
&= ~(0x1 << VIRTIO_NET_F_GUEST_ECN
);
265 if (!peer_has_vnet_hdr(n
) || !peer_has_ufo(n
)) {
266 features
&= ~(0x1 << VIRTIO_NET_F_GUEST_UFO
);
267 features
&= ~(0x1 << VIRTIO_NET_F_HOST_UFO
);
270 if (!n
->nic
->nc
.peer
||
271 n
->nic
->nc
.peer
->info
->type
!= NET_CLIENT_OPTIONS_KIND_TAP
) {
274 if (!tap_get_vhost_net(n
->nic
->nc
.peer
)) {
277 return vhost_net_get_features(tap_get_vhost_net(n
->nic
->nc
.peer
), features
);
280 static uint32_t virtio_net_bad_features(VirtIODevice
*vdev
)
282 uint32_t features
= 0;
284 /* Linux kernel 2.6.25. It understood MAC (as everyone must),
286 features
|= (1 << VIRTIO_NET_F_MAC
);
287 features
|= (1 << VIRTIO_NET_F_CSUM
);
288 features
|= (1 << VIRTIO_NET_F_HOST_TSO4
);
289 features
|= (1 << VIRTIO_NET_F_HOST_TSO6
);
290 features
|= (1 << VIRTIO_NET_F_HOST_ECN
);
295 static void virtio_net_set_features(VirtIODevice
*vdev
, uint32_t features
)
297 VirtIONet
*n
= to_virtio_net(vdev
);
299 virtio_net_set_mrg_rx_bufs(n
, !!(features
& (1 << VIRTIO_NET_F_MRG_RXBUF
)));
301 if (n
->has_vnet_hdr
) {
302 tap_set_offload(n
->nic
->nc
.peer
,
303 (features
>> VIRTIO_NET_F_GUEST_CSUM
) & 1,
304 (features
>> VIRTIO_NET_F_GUEST_TSO4
) & 1,
305 (features
>> VIRTIO_NET_F_GUEST_TSO6
) & 1,
306 (features
>> VIRTIO_NET_F_GUEST_ECN
) & 1,
307 (features
>> VIRTIO_NET_F_GUEST_UFO
) & 1);
309 if (!n
->nic
->nc
.peer
||
310 n
->nic
->nc
.peer
->info
->type
!= NET_CLIENT_OPTIONS_KIND_TAP
) {
313 if (!tap_get_vhost_net(n
->nic
->nc
.peer
)) {
316 vhost_net_ack_features(tap_get_vhost_net(n
->nic
->nc
.peer
), features
);
319 static int virtio_net_handle_rx_mode(VirtIONet
*n
, uint8_t cmd
,
320 struct iovec
*iov
, unsigned int iov_cnt
)
325 s
= iov_to_buf(iov
, iov_cnt
, 0, &on
, sizeof(on
));
326 if (s
!= sizeof(on
)) {
327 return VIRTIO_NET_ERR
;
330 if (cmd
== VIRTIO_NET_CTRL_RX_PROMISC
) {
332 } else if (cmd
== VIRTIO_NET_CTRL_RX_ALLMULTI
) {
334 } else if (cmd
== VIRTIO_NET_CTRL_RX_ALLUNI
) {
336 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOMULTI
) {
338 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOUNI
) {
340 } else if (cmd
== VIRTIO_NET_CTRL_RX_NOBCAST
) {
343 return VIRTIO_NET_ERR
;
346 return VIRTIO_NET_OK
;
349 static int virtio_net_handle_mac(VirtIONet
*n
, uint8_t cmd
,
350 struct iovec
*iov
, unsigned int iov_cnt
)
352 struct virtio_net_ctrl_mac mac_data
;
355 if (cmd
== VIRTIO_NET_CTRL_MAC_ADDR_SET
) {
356 if (iov_size(iov
, iov_cnt
) != sizeof(n
->mac
)) {
357 return VIRTIO_NET_ERR
;
359 s
= iov_to_buf(iov
, iov_cnt
, 0, &n
->mac
, sizeof(n
->mac
));
360 assert(s
== sizeof(n
->mac
));
361 qemu_format_nic_info_str(&n
->nic
->nc
, n
->mac
);
362 return VIRTIO_NET_OK
;
365 if (cmd
!= VIRTIO_NET_CTRL_MAC_TABLE_SET
) {
366 return VIRTIO_NET_ERR
;
369 n
->mac_table
.in_use
= 0;
370 n
->mac_table
.first_multi
= 0;
371 n
->mac_table
.uni_overflow
= 0;
372 n
->mac_table
.multi_overflow
= 0;
373 memset(n
->mac_table
.macs
, 0, MAC_TABLE_ENTRIES
* ETH_ALEN
);
375 s
= iov_to_buf(iov
, iov_cnt
, 0, &mac_data
.entries
,
376 sizeof(mac_data
.entries
));
377 mac_data
.entries
= ldl_p(&mac_data
.entries
);
378 if (s
!= sizeof(mac_data
.entries
)) {
379 return VIRTIO_NET_ERR
;
381 iov_discard_front(&iov
, &iov_cnt
, s
);
383 if (mac_data
.entries
* ETH_ALEN
> iov_size(iov
, iov_cnt
)) {
384 return VIRTIO_NET_ERR
;
387 if (mac_data
.entries
<= MAC_TABLE_ENTRIES
) {
388 s
= iov_to_buf(iov
, iov_cnt
, 0, n
->mac_table
.macs
,
389 mac_data
.entries
* ETH_ALEN
);
390 if (s
!= mac_data
.entries
* ETH_ALEN
) {
391 return VIRTIO_NET_ERR
;
393 n
->mac_table
.in_use
+= mac_data
.entries
;
395 n
->mac_table
.uni_overflow
= 1;
398 iov_discard_front(&iov
, &iov_cnt
, mac_data
.entries
* ETH_ALEN
);
400 n
->mac_table
.first_multi
= n
->mac_table
.in_use
;
402 s
= iov_to_buf(iov
, iov_cnt
, 0, &mac_data
.entries
,
403 sizeof(mac_data
.entries
));
404 mac_data
.entries
= ldl_p(&mac_data
.entries
);
405 if (s
!= sizeof(mac_data
.entries
)) {
406 return VIRTIO_NET_ERR
;
409 iov_discard_front(&iov
, &iov_cnt
, s
);
411 if (mac_data
.entries
* ETH_ALEN
!= iov_size(iov
, iov_cnt
)) {
412 return VIRTIO_NET_ERR
;
415 if (n
->mac_table
.in_use
+ mac_data
.entries
<= MAC_TABLE_ENTRIES
) {
416 s
= iov_to_buf(iov
, iov_cnt
, 0, n
->mac_table
.macs
,
417 mac_data
.entries
* ETH_ALEN
);
418 if (s
!= mac_data
.entries
* ETH_ALEN
) {
419 return VIRTIO_NET_ERR
;
421 n
->mac_table
.in_use
+= mac_data
.entries
;
423 n
->mac_table
.multi_overflow
= 1;
426 return VIRTIO_NET_OK
;
429 static int virtio_net_handle_vlan_table(VirtIONet
*n
, uint8_t cmd
,
430 struct iovec
*iov
, unsigned int iov_cnt
)
435 s
= iov_to_buf(iov
, iov_cnt
, 0, &vid
, sizeof(vid
));
437 if (s
!= sizeof(vid
)) {
438 return VIRTIO_NET_ERR
;
442 return VIRTIO_NET_ERR
;
444 if (cmd
== VIRTIO_NET_CTRL_VLAN_ADD
)
445 n
->vlans
[vid
>> 5] |= (1U << (vid
& 0x1f));
446 else if (cmd
== VIRTIO_NET_CTRL_VLAN_DEL
)
447 n
->vlans
[vid
>> 5] &= ~(1U << (vid
& 0x1f));
449 return VIRTIO_NET_ERR
;
451 return VIRTIO_NET_OK
;
454 static void virtio_net_handle_ctrl(VirtIODevice
*vdev
, VirtQueue
*vq
)
456 VirtIONet
*n
= to_virtio_net(vdev
);
457 struct virtio_net_ctrl_hdr ctrl
;
458 virtio_net_ctrl_ack status
= VIRTIO_NET_ERR
;
459 VirtQueueElement elem
;
462 unsigned int iov_cnt
;
464 while (virtqueue_pop(vq
, &elem
)) {
465 if (iov_size(elem
.in_sg
, elem
.in_num
) < sizeof(status
) ||
466 iov_size(elem
.out_sg
, elem
.out_num
) < sizeof(ctrl
)) {
467 error_report("virtio-net ctrl missing headers");
472 iov_cnt
= elem
.out_num
;
473 s
= iov_to_buf(iov
, iov_cnt
, 0, &ctrl
, sizeof(ctrl
));
474 iov_discard_front(&iov
, &iov_cnt
, sizeof(ctrl
));
475 if (s
!= sizeof(ctrl
)) {
476 status
= VIRTIO_NET_ERR
;
477 } else if (ctrl
.class == VIRTIO_NET_CTRL_RX
) {
478 status
= virtio_net_handle_rx_mode(n
, ctrl
.cmd
, iov
, iov_cnt
);
479 } else if (ctrl
.class == VIRTIO_NET_CTRL_MAC
) {
480 status
= virtio_net_handle_mac(n
, ctrl
.cmd
, iov
, iov_cnt
);
481 } else if (ctrl
.class == VIRTIO_NET_CTRL_VLAN
) {
482 status
= virtio_net_handle_vlan_table(n
, ctrl
.cmd
, iov
, iov_cnt
);
485 s
= iov_from_buf(elem
.in_sg
, elem
.in_num
, 0, &status
, sizeof(status
));
486 assert(s
== sizeof(status
));
488 virtqueue_push(vq
, &elem
, sizeof(status
));
489 virtio_notify(vdev
, vq
);
495 static void virtio_net_handle_rx(VirtIODevice
*vdev
, VirtQueue
*vq
)
497 VirtIONet
*n
= to_virtio_net(vdev
);
499 qemu_flush_queued_packets(&n
->nic
->nc
);
502 static int virtio_net_can_receive(NetClientState
*nc
)
504 VirtIONet
*n
= DO_UPCAST(NICState
, nc
, nc
)->opaque
;
505 if (!n
->vdev
.vm_running
) {
509 if (!virtio_queue_ready(n
->rx_vq
) ||
510 !(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
516 static int virtio_net_has_buffers(VirtIONet
*n
, int bufsize
)
518 if (virtio_queue_empty(n
->rx_vq
) ||
519 (n
->mergeable_rx_bufs
&&
520 !virtqueue_avail_bytes(n
->rx_vq
, bufsize
, 0))) {
521 virtio_queue_set_notification(n
->rx_vq
, 1);
523 /* To avoid a race condition where the guest has made some buffers
524 * available after the above check but before notification was
525 * enabled, check for available buffers again.
527 if (virtio_queue_empty(n
->rx_vq
) ||
528 (n
->mergeable_rx_bufs
&&
529 !virtqueue_avail_bytes(n
->rx_vq
, bufsize
, 0)))
533 virtio_queue_set_notification(n
->rx_vq
, 0);
537 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
538 * it never finds out that the packets don't have valid checksums. This
539 * causes dhclient to get upset. Fedora's carried a patch for ages to
540 * fix this with Xen but it hasn't appeared in an upstream release of
543 * To avoid breaking existing guests, we catch udp packets and add
544 * checksums. This is terrible but it's better than hacking the guest
547 * N.B. if we introduce a zero-copy API, this operation is no longer free so
548 * we should provide a mechanism to disable it to avoid polluting the host
551 static void work_around_broken_dhclient(struct virtio_net_hdr
*hdr
,
552 uint8_t *buf
, size_t size
)
554 if ((hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) && /* missing csum */
555 (size
> 27 && size
< 1500) && /* normal sized MTU */
556 (buf
[12] == 0x08 && buf
[13] == 0x00) && /* ethertype == IPv4 */
557 (buf
[23] == 17) && /* ip.protocol == UDP */
558 (buf
[34] == 0 && buf
[35] == 67)) { /* udp.srcport == bootps */
559 net_checksum_calculate(buf
, size
);
560 hdr
->flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
564 static void receive_header(VirtIONet
*n
, const struct iovec
*iov
, int iov_cnt
,
565 const void *buf
, size_t size
)
567 if (n
->has_vnet_hdr
) {
568 /* FIXME this cast is evil */
569 void *wbuf
= (void *)buf
;
570 work_around_broken_dhclient(wbuf
, wbuf
+ n
->host_hdr_len
,
571 size
- n
->host_hdr_len
);
572 iov_from_buf(iov
, iov_cnt
, 0, buf
, sizeof(struct virtio_net_hdr
));
574 struct virtio_net_hdr hdr
= {
576 .gso_type
= VIRTIO_NET_HDR_GSO_NONE
578 iov_from_buf(iov
, iov_cnt
, 0, &hdr
, sizeof hdr
);
582 static int receive_filter(VirtIONet
*n
, const uint8_t *buf
, int size
)
584 static const uint8_t bcast
[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
585 static const uint8_t vlan
[] = {0x81, 0x00};
586 uint8_t *ptr
= (uint8_t *)buf
;
592 ptr
+= n
->host_hdr_len
;
594 if (!memcmp(&ptr
[12], vlan
, sizeof(vlan
))) {
595 int vid
= be16_to_cpup((uint16_t *)(ptr
+ 14)) & 0xfff;
596 if (!(n
->vlans
[vid
>> 5] & (1U << (vid
& 0x1f))))
600 if (ptr
[0] & 1) { // multicast
601 if (!memcmp(ptr
, bcast
, sizeof(bcast
))) {
603 } else if (n
->nomulti
) {
605 } else if (n
->allmulti
|| n
->mac_table
.multi_overflow
) {
609 for (i
= n
->mac_table
.first_multi
; i
< n
->mac_table
.in_use
; i
++) {
610 if (!memcmp(ptr
, &n
->mac_table
.macs
[i
* ETH_ALEN
], ETH_ALEN
)) {
617 } else if (n
->alluni
|| n
->mac_table
.uni_overflow
) {
619 } else if (!memcmp(ptr
, n
->mac
, ETH_ALEN
)) {
623 for (i
= 0; i
< n
->mac_table
.first_multi
; i
++) {
624 if (!memcmp(ptr
, &n
->mac_table
.macs
[i
* ETH_ALEN
], ETH_ALEN
)) {
633 static ssize_t
virtio_net_receive(NetClientState
*nc
, const uint8_t *buf
, size_t size
)
635 VirtIONet
*n
= DO_UPCAST(NICState
, nc
, nc
)->opaque
;
636 struct iovec mhdr_sg
[VIRTQUEUE_MAX_SIZE
];
637 struct virtio_net_hdr_mrg_rxbuf mhdr
;
638 unsigned mhdr_cnt
= 0;
639 size_t offset
, i
, guest_offset
;
641 if (!virtio_net_can_receive(&n
->nic
->nc
))
644 /* hdr_len refers to the header we supply to the guest */
645 if (!virtio_net_has_buffers(n
, size
+ n
->guest_hdr_len
- n
->host_hdr_len
))
648 if (!receive_filter(n
, buf
, size
))
653 while (offset
< size
) {
654 VirtQueueElement elem
;
656 const struct iovec
*sg
= elem
.in_sg
;
660 if (virtqueue_pop(n
->rx_vq
, &elem
) == 0) {
663 error_report("virtio-net unexpected empty queue: "
664 "i %zd mergeable %d offset %zd, size %zd, "
665 "guest hdr len %zd, host hdr len %zd guest features 0x%x",
666 i
, n
->mergeable_rx_bufs
, offset
, size
,
667 n
->guest_hdr_len
, n
->host_hdr_len
, n
->vdev
.guest_features
);
671 if (elem
.in_num
< 1) {
672 error_report("virtio-net receive queue contains no in buffers");
678 if (n
->mergeable_rx_bufs
) {
679 mhdr_cnt
= iov_copy(mhdr_sg
, ARRAY_SIZE(mhdr_sg
),
681 offsetof(typeof(mhdr
), num_buffers
),
682 sizeof(mhdr
.num_buffers
));
685 receive_header(n
, sg
, elem
.in_num
, buf
, size
);
686 offset
= n
->host_hdr_len
;
687 total
+= n
->guest_hdr_len
;
688 guest_offset
= n
->guest_hdr_len
;
693 /* copy in packet. ugh */
694 len
= iov_from_buf(sg
, elem
.in_num
, guest_offset
,
695 buf
+ offset
, size
- offset
);
698 /* If buffers can't be merged, at this point we
699 * must have consumed the complete packet.
700 * Otherwise, drop it. */
701 if (!n
->mergeable_rx_bufs
&& offset
< size
) {
703 error_report("virtio-net truncated non-mergeable packet: "
704 "i %zd mergeable %d offset %zd, size %zd, "
705 "guest hdr len %zd, host hdr len %zd",
706 i
, n
->mergeable_rx_bufs
,
707 offset
, size
, n
->guest_hdr_len
, n
->host_hdr_len
);
712 /* signal other side */
713 virtqueue_fill(n
->rx_vq
, &elem
, total
, i
++);
717 stw_p(&mhdr
.num_buffers
, i
);
718 iov_from_buf(mhdr_sg
, mhdr_cnt
,
720 &mhdr
.num_buffers
, sizeof mhdr
.num_buffers
);
723 virtqueue_flush(n
->rx_vq
, i
);
724 virtio_notify(&n
->vdev
, n
->rx_vq
);
729 static int32_t virtio_net_flush_tx(VirtIONet
*n
, VirtQueue
*vq
);
731 static void virtio_net_tx_complete(NetClientState
*nc
, ssize_t len
)
733 VirtIONet
*n
= DO_UPCAST(NICState
, nc
, nc
)->opaque
;
735 virtqueue_push(n
->tx_vq
, &n
->async_tx
.elem
, 0);
736 virtio_notify(&n
->vdev
, n
->tx_vq
);
738 n
->async_tx
.elem
.out_num
= n
->async_tx
.len
= 0;
740 virtio_queue_set_notification(n
->tx_vq
, 1);
741 virtio_net_flush_tx(n
, n
->tx_vq
);
745 static int32_t virtio_net_flush_tx(VirtIONet
*n
, VirtQueue
*vq
)
747 VirtQueueElement elem
;
748 int32_t num_packets
= 0;
749 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
)) {
753 assert(n
->vdev
.vm_running
);
755 if (n
->async_tx
.elem
.out_num
) {
756 virtio_queue_set_notification(n
->tx_vq
, 0);
760 while (virtqueue_pop(vq
, &elem
)) {
762 unsigned int out_num
= elem
.out_num
;
763 struct iovec
*out_sg
= &elem
.out_sg
[0];
764 struct iovec sg
[VIRTQUEUE_MAX_SIZE
];
767 error_report("virtio-net header not in first element");
772 * If host wants to see the guest header as is, we can
773 * pass it on unchanged. Otherwise, copy just the parts
774 * that host is interested in.
776 assert(n
->host_hdr_len
<= n
->guest_hdr_len
);
777 if (n
->host_hdr_len
!= n
->guest_hdr_len
) {
778 unsigned sg_num
= iov_copy(sg
, ARRAY_SIZE(sg
),
781 sg_num
+= iov_copy(sg
+ sg_num
, ARRAY_SIZE(sg
) - sg_num
,
783 n
->guest_hdr_len
, -1);
788 len
= n
->guest_hdr_len
;
790 ret
= qemu_sendv_packet_async(&n
->nic
->nc
, out_sg
, out_num
,
791 virtio_net_tx_complete
);
793 virtio_queue_set_notification(n
->tx_vq
, 0);
794 n
->async_tx
.elem
= elem
;
795 n
->async_tx
.len
= len
;
801 virtqueue_push(vq
, &elem
, 0);
802 virtio_notify(&n
->vdev
, vq
);
804 if (++num_packets
>= n
->tx_burst
) {
811 static void virtio_net_handle_tx_timer(VirtIODevice
*vdev
, VirtQueue
*vq
)
813 VirtIONet
*n
= to_virtio_net(vdev
);
815 /* This happens when device was stopped but VCPU wasn't. */
816 if (!n
->vdev
.vm_running
) {
822 virtio_queue_set_notification(vq
, 1);
823 qemu_del_timer(n
->tx_timer
);
825 virtio_net_flush_tx(n
, vq
);
827 qemu_mod_timer(n
->tx_timer
,
828 qemu_get_clock_ns(vm_clock
) + n
->tx_timeout
);
830 virtio_queue_set_notification(vq
, 0);
834 static void virtio_net_handle_tx_bh(VirtIODevice
*vdev
, VirtQueue
*vq
)
836 VirtIONet
*n
= to_virtio_net(vdev
);
838 if (unlikely(n
->tx_waiting
)) {
842 /* This happens when device was stopped but VCPU wasn't. */
843 if (!n
->vdev
.vm_running
) {
846 virtio_queue_set_notification(vq
, 0);
847 qemu_bh_schedule(n
->tx_bh
);
850 static void virtio_net_tx_timer(void *opaque
)
852 VirtIONet
*n
= opaque
;
853 assert(n
->vdev
.vm_running
);
857 /* Just in case the driver is not ready on more */
858 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
861 virtio_queue_set_notification(n
->tx_vq
, 1);
862 virtio_net_flush_tx(n
, n
->tx_vq
);
865 static void virtio_net_tx_bh(void *opaque
)
867 VirtIONet
*n
= opaque
;
870 assert(n
->vdev
.vm_running
);
874 /* Just in case the driver is not ready on more */
875 if (unlikely(!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
)))
878 ret
= virtio_net_flush_tx(n
, n
->tx_vq
);
880 return; /* Notification re-enable handled by tx_complete */
883 /* If we flush a full burst of packets, assume there are
884 * more coming and immediately reschedule */
885 if (ret
>= n
->tx_burst
) {
886 qemu_bh_schedule(n
->tx_bh
);
891 /* If less than a full burst, re-enable notification and flush
892 * anything that may have come in while we weren't looking. If
893 * we find something, assume the guest is still active and reschedule */
894 virtio_queue_set_notification(n
->tx_vq
, 1);
895 if (virtio_net_flush_tx(n
, n
->tx_vq
) > 0) {
896 virtio_queue_set_notification(n
->tx_vq
, 0);
897 qemu_bh_schedule(n
->tx_bh
);
902 static void virtio_net_save(QEMUFile
*f
, void *opaque
)
904 VirtIONet
*n
= opaque
;
906 /* At this point, backend must be stopped, otherwise
907 * it might keep writing to memory. */
908 assert(!n
->vhost_started
);
909 virtio_save(&n
->vdev
, f
);
911 qemu_put_buffer(f
, n
->mac
, ETH_ALEN
);
912 qemu_put_be32(f
, n
->tx_waiting
);
913 qemu_put_be32(f
, n
->mergeable_rx_bufs
);
914 qemu_put_be16(f
, n
->status
);
915 qemu_put_byte(f
, n
->promisc
);
916 qemu_put_byte(f
, n
->allmulti
);
917 qemu_put_be32(f
, n
->mac_table
.in_use
);
918 qemu_put_buffer(f
, n
->mac_table
.macs
, n
->mac_table
.in_use
* ETH_ALEN
);
919 qemu_put_buffer(f
, (uint8_t *)n
->vlans
, MAX_VLAN
>> 3);
920 qemu_put_be32(f
, n
->has_vnet_hdr
);
921 qemu_put_byte(f
, n
->mac_table
.multi_overflow
);
922 qemu_put_byte(f
, n
->mac_table
.uni_overflow
);
923 qemu_put_byte(f
, n
->alluni
);
924 qemu_put_byte(f
, n
->nomulti
);
925 qemu_put_byte(f
, n
->nouni
);
926 qemu_put_byte(f
, n
->nobcast
);
927 qemu_put_byte(f
, n
->has_ufo
);
930 static int virtio_net_load(QEMUFile
*f
, void *opaque
, int version_id
)
932 VirtIONet
*n
= opaque
;
936 if (version_id
< 2 || version_id
> VIRTIO_NET_VM_VERSION
)
939 ret
= virtio_load(&n
->vdev
, f
);
944 qemu_get_buffer(f
, n
->mac
, ETH_ALEN
);
945 n
->tx_waiting
= qemu_get_be32(f
);
947 virtio_net_set_mrg_rx_bufs(n
, qemu_get_be32(f
));
950 n
->status
= qemu_get_be16(f
);
952 if (version_id
>= 4) {
953 if (version_id
< 8) {
954 n
->promisc
= qemu_get_be32(f
);
955 n
->allmulti
= qemu_get_be32(f
);
957 n
->promisc
= qemu_get_byte(f
);
958 n
->allmulti
= qemu_get_byte(f
);
962 if (version_id
>= 5) {
963 n
->mac_table
.in_use
= qemu_get_be32(f
);
964 /* MAC_TABLE_ENTRIES may be different from the saved image */
965 if (n
->mac_table
.in_use
<= MAC_TABLE_ENTRIES
) {
966 qemu_get_buffer(f
, n
->mac_table
.macs
,
967 n
->mac_table
.in_use
* ETH_ALEN
);
968 } else if (n
->mac_table
.in_use
) {
969 uint8_t *buf
= g_malloc0(n
->mac_table
.in_use
);
970 qemu_get_buffer(f
, buf
, n
->mac_table
.in_use
* ETH_ALEN
);
972 n
->mac_table
.multi_overflow
= n
->mac_table
.uni_overflow
= 1;
973 n
->mac_table
.in_use
= 0;
978 qemu_get_buffer(f
, (uint8_t *)n
->vlans
, MAX_VLAN
>> 3);
980 if (version_id
>= 7) {
981 if (qemu_get_be32(f
) && !peer_has_vnet_hdr(n
)) {
982 error_report("virtio-net: saved image requires vnet_hdr=on");
986 if (n
->has_vnet_hdr
) {
987 tap_set_offload(n
->nic
->nc
.peer
,
988 (n
->vdev
.guest_features
>> VIRTIO_NET_F_GUEST_CSUM
) & 1,
989 (n
->vdev
.guest_features
>> VIRTIO_NET_F_GUEST_TSO4
) & 1,
990 (n
->vdev
.guest_features
>> VIRTIO_NET_F_GUEST_TSO6
) & 1,
991 (n
->vdev
.guest_features
>> VIRTIO_NET_F_GUEST_ECN
) & 1,
992 (n
->vdev
.guest_features
>> VIRTIO_NET_F_GUEST_UFO
) & 1);
996 if (version_id
>= 9) {
997 n
->mac_table
.multi_overflow
= qemu_get_byte(f
);
998 n
->mac_table
.uni_overflow
= qemu_get_byte(f
);
1001 if (version_id
>= 10) {
1002 n
->alluni
= qemu_get_byte(f
);
1003 n
->nomulti
= qemu_get_byte(f
);
1004 n
->nouni
= qemu_get_byte(f
);
1005 n
->nobcast
= qemu_get_byte(f
);
1008 if (version_id
>= 11) {
1009 if (qemu_get_byte(f
) && !peer_has_ufo(n
)) {
1010 error_report("virtio-net: saved image requires TUN_F_UFO support");
1015 /* Find the first multicast entry in the saved MAC filter */
1016 for (i
= 0; i
< n
->mac_table
.in_use
; i
++) {
1017 if (n
->mac_table
.macs
[i
* ETH_ALEN
] & 1) {
1021 n
->mac_table
.first_multi
= i
;
1023 /* nc.link_down can't be migrated, so infer link_down according
1024 * to link status bit in n->status */
1025 n
->nic
->nc
.link_down
= (n
->status
& VIRTIO_NET_S_LINK_UP
) == 0;
1030 static void virtio_net_cleanup(NetClientState
*nc
)
1032 VirtIONet
*n
= DO_UPCAST(NICState
, nc
, nc
)->opaque
;
1037 static NetClientInfo net_virtio_info
= {
1038 .type
= NET_CLIENT_OPTIONS_KIND_NIC
,
1039 .size
= sizeof(NICState
),
1040 .can_receive
= virtio_net_can_receive
,
1041 .receive
= virtio_net_receive
,
1042 .cleanup
= virtio_net_cleanup
,
1043 .link_status_changed
= virtio_net_set_link_status
,
1046 static bool virtio_net_guest_notifier_pending(VirtIODevice
*vdev
, int idx
)
1048 VirtIONet
*n
= to_virtio_net(vdev
);
1049 assert(n
->vhost_started
);
1050 return vhost_net_virtqueue_pending(tap_get_vhost_net(n
->nic
->nc
.peer
), idx
);
1053 static void virtio_net_guest_notifier_mask(VirtIODevice
*vdev
, int idx
,
1056 VirtIONet
*n
= to_virtio_net(vdev
);
1057 assert(n
->vhost_started
);
1058 vhost_net_virtqueue_mask(tap_get_vhost_net(n
->nic
->nc
.peer
),
1062 VirtIODevice
*virtio_net_init(DeviceState
*dev
, NICConf
*conf
,
1063 virtio_net_conf
*net
)
1067 n
= (VirtIONet
*)virtio_common_init("virtio-net", VIRTIO_ID_NET
,
1068 sizeof(struct virtio_net_config
),
1071 n
->vdev
.get_config
= virtio_net_get_config
;
1072 n
->vdev
.set_config
= virtio_net_set_config
;
1073 n
->vdev
.get_features
= virtio_net_get_features
;
1074 n
->vdev
.set_features
= virtio_net_set_features
;
1075 n
->vdev
.bad_features
= virtio_net_bad_features
;
1076 n
->vdev
.reset
= virtio_net_reset
;
1077 n
->vdev
.set_status
= virtio_net_set_status
;
1078 n
->vdev
.guest_notifier_mask
= virtio_net_guest_notifier_mask
;
1079 n
->vdev
.guest_notifier_pending
= virtio_net_guest_notifier_pending
;
1080 n
->rx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_rx
);
1082 if (net
->tx
&& strcmp(net
->tx
, "timer") && strcmp(net
->tx
, "bh")) {
1083 error_report("virtio-net: "
1084 "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1086 error_report("Defaulting to \"bh\"");
1089 if (net
->tx
&& !strcmp(net
->tx
, "timer")) {
1090 n
->tx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_tx_timer
);
1091 n
->tx_timer
= qemu_new_timer_ns(vm_clock
, virtio_net_tx_timer
, n
);
1092 n
->tx_timeout
= net
->txtimer
;
1094 n
->tx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_tx_bh
);
1095 n
->tx_bh
= qemu_bh_new(virtio_net_tx_bh
, n
);
1097 n
->ctrl_vq
= virtio_add_queue(&n
->vdev
, 64, virtio_net_handle_ctrl
);
1098 qemu_macaddr_default_if_unset(&conf
->macaddr
);
1099 memcpy(&n
->mac
[0], &conf
->macaddr
, sizeof(n
->mac
));
1100 n
->status
= VIRTIO_NET_S_LINK_UP
;
1102 n
->nic
= qemu_new_nic(&net_virtio_info
, conf
, object_get_typename(OBJECT(dev
)), dev
->id
, n
);
1103 peer_test_vnet_hdr(n
);
1104 if (peer_has_vnet_hdr(n
)) {
1105 tap_using_vnet_hdr(n
->nic
->nc
.peer
, 1);
1106 n
->host_hdr_len
= sizeof(struct virtio_net_hdr
);
1108 n
->host_hdr_len
= 0;
1111 qemu_format_nic_info_str(&n
->nic
->nc
, conf
->macaddr
.a
);
1114 n
->tx_burst
= net
->txburst
;
1115 virtio_net_set_mrg_rx_bufs(n
, 0);
1116 n
->promisc
= 1; /* for compatibility */
1118 n
->mac_table
.macs
= g_malloc0(MAC_TABLE_ENTRIES
* ETH_ALEN
);
1120 n
->vlans
= g_malloc0(MAX_VLAN
>> 3);
1123 register_savevm(dev
, "virtio-net", -1, VIRTIO_NET_VM_VERSION
,
1124 virtio_net_save
, virtio_net_load
, n
);
1126 add_boot_device_path(conf
->bootindex
, dev
, "/ethernet-phy@0");
1131 void virtio_net_exit(VirtIODevice
*vdev
)
1133 VirtIONet
*n
= DO_UPCAST(VirtIONet
, vdev
, vdev
);
1135 /* This will stop vhost backend if appropriate. */
1136 virtio_net_set_status(vdev
, 0);
1138 qemu_purge_queued_packets(&n
->nic
->nc
);
1140 unregister_savevm(n
->qdev
, "virtio-net", n
);
1142 g_free(n
->mac_table
.macs
);
1146 qemu_del_timer(n
->tx_timer
);
1147 qemu_free_timer(n
->tx_timer
);
1149 qemu_bh_delete(n
->tx_bh
);
1152 qemu_del_net_client(&n
->nic
->nc
);
1153 virtio_cleanup(&n
->vdev
);