2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
16 #include "qemu-timer.h"
17 #include "virtio-net.h"
20 typedef struct VirtIONet
29 int mergeable_rx_bufs
;
33 * - we could suppress RX interrupt if we were so inclined.
36 static VirtIONet
*to_virtio_net(VirtIODevice
*vdev
)
38 return (VirtIONet
*)vdev
;
41 static void virtio_net_update_config(VirtIODevice
*vdev
, uint8_t *config
)
43 VirtIONet
*n
= to_virtio_net(vdev
);
44 struct virtio_net_config netcfg
;
46 memcpy(netcfg
.mac
, n
->mac
, 6);
47 memcpy(config
, &netcfg
, sizeof(netcfg
));
50 static uint32_t virtio_net_get_features(VirtIODevice
*vdev
)
52 VirtIONet
*n
= to_virtio_net(vdev
);
53 VLANClientState
*host
= n
->vc
->vlan
->first_client
;
54 uint32_t features
= (1 << VIRTIO_NET_F_MAC
);
56 if (tap_has_vnet_hdr(host
)) {
57 tap_using_vnet_hdr(host
, 1);
58 features
|= (1 << VIRTIO_NET_F_CSUM
);
59 features
|= (1 << VIRTIO_NET_F_GUEST_CSUM
);
60 features
|= (1 << VIRTIO_NET_F_GUEST_TSO4
);
61 features
|= (1 << VIRTIO_NET_F_GUEST_TSO6
);
62 features
|= (1 << VIRTIO_NET_F_GUEST_ECN
);
63 features
|= (1 << VIRTIO_NET_F_HOST_TSO4
);
64 features
|= (1 << VIRTIO_NET_F_HOST_TSO6
);
65 features
|= (1 << VIRTIO_NET_F_HOST_ECN
);
66 features
|= (1 << VIRTIO_NET_F_MRG_RXBUF
);
67 /* Kernel can't actually handle UFO in software currently. */
73 static void virtio_net_set_features(VirtIODevice
*vdev
, uint32_t features
)
75 VirtIONet
*n
= to_virtio_net(vdev
);
76 VLANClientState
*host
= n
->vc
->vlan
->first_client
;
78 n
->mergeable_rx_bufs
= !!(features
& (1 << VIRTIO_NET_F_MRG_RXBUF
));
80 if (!tap_has_vnet_hdr(host
) || !host
->set_offload
)
83 host
->set_offload(host
,
84 (features
>> VIRTIO_NET_F_GUEST_CSUM
) & 1,
85 (features
>> VIRTIO_NET_F_GUEST_TSO4
) & 1,
86 (features
>> VIRTIO_NET_F_GUEST_TSO6
) & 1,
87 (features
>> VIRTIO_NET_F_GUEST_ECN
) & 1);
92 static void virtio_net_handle_rx(VirtIODevice
*vdev
, VirtQueue
*vq
)
94 /* We now have RX buffers, signal to the IO thread to break out of the
95 select to re-poll the tap file descriptor */
97 qemu_kvm_notify_work();
100 static int virtio_net_can_receive(void *opaque
)
102 VirtIONet
*n
= opaque
;
104 if (!virtio_queue_ready(n
->rx_vq
) ||
105 !(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
108 if (virtio_queue_empty(n
->rx_vq
) ||
109 (n
->mergeable_rx_bufs
&&
110 !virtqueue_avail_bytes(n
->rx_vq
, VIRTIO_NET_MAX_BUFSIZE
, 0))) {
111 virtio_queue_set_notification(n
->rx_vq
, 1);
115 virtio_queue_set_notification(n
->rx_vq
, 0);
119 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
120 * it never finds out that the packets don't have valid checksums. This
121 * causes dhclient to get upset. Fedora's carried a patch for ages to
122 * fix this with Xen but it hasn't appeared in an upstream release of
125 * To avoid breaking existing guests, we catch udp packets and add
126 * checksums. This is terrible but it's better than hacking the guest
129 * N.B. if we introduce a zero-copy API, this operation is no longer free so
130 * we should provide a mechanism to disable it to avoid polluting the host
133 static void work_around_broken_dhclient(struct virtio_net_hdr
*hdr
,
134 const uint8_t *buf
, size_t size
)
136 if ((hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) && /* missing csum */
137 (size
> 27 && size
< 1500) && /* normal sized MTU */
138 (buf
[12] == 0x08 && buf
[13] == 0x00) && /* ethertype == IPv4 */
139 (buf
[23] == 17) && /* ip.protocol == UDP */
140 (buf
[34] == 0 && buf
[35] == 67)) { /* udp.srcport == bootps */
141 /* FIXME this cast is evil */
142 net_checksum_calculate((uint8_t *)buf
, size
);
143 hdr
->flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
147 static int iov_fill(struct iovec
*iov
, int iovcnt
, const void *buf
, int count
)
152 while (offset
< count
&& i
< iovcnt
) {
153 int len
= MIN(iov
[i
].iov_len
, count
- offset
);
154 memcpy(iov
[i
].iov_base
, buf
+ offset
, len
);
162 static int receive_header(VirtIONet
*n
, struct iovec
*iov
, int iovcnt
,
163 const void *buf
, int size
, int hdr_len
)
165 struct virtio_net_hdr
*hdr
= iov
[0].iov_base
;
169 hdr
->gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
171 if (tap_has_vnet_hdr(n
->vc
->vlan
->first_client
)) {
172 memcpy(hdr
, buf
, sizeof(*hdr
));
173 offset
= sizeof(*hdr
);
174 work_around_broken_dhclient(hdr
, buf
+ offset
, size
- offset
);
177 /* We only ever receive a struct virtio_net_hdr from the tapfd,
178 * but we may be passing along a larger header to the guest.
180 iov
[0].iov_base
+= hdr_len
;
181 iov
[0].iov_len
-= hdr_len
;
186 static void virtio_net_receive(void *opaque
, const uint8_t *buf
, int size
)
188 VirtIONet
*n
= opaque
;
189 struct virtio_net_hdr_mrg_rxbuf
*mhdr
= NULL
;
190 int hdr_len
, offset
, i
;
192 /* hdr_len refers to the header we supply to the guest */
193 hdr_len
= n
->mergeable_rx_bufs
?
194 sizeof(struct virtio_net_hdr_mrg_rxbuf
) : sizeof(struct virtio_net_hdr
);
198 while (offset
< size
) {
199 VirtQueueElement elem
;
201 struct iovec sg
[VIRTQUEUE_MAX_SIZE
];
205 if ((i
!= 0 && !n
->mergeable_rx_bufs
) ||
206 virtqueue_pop(n
->rx_vq
, &elem
) == 0) {
209 fprintf(stderr
, "virtio-net truncating packet\n");
213 if (elem
.in_num
< 1) {
214 fprintf(stderr
, "virtio-net receive queue contains no in buffers\n");
218 if (!n
->mergeable_rx_bufs
&& elem
.in_sg
[0].iov_len
!= hdr_len
) {
219 fprintf(stderr
, "virtio-net header not in first element\n");
223 memcpy(&sg
, &elem
.in_sg
[0], sizeof(sg
[0]) * elem
.in_num
);
226 if (n
->mergeable_rx_bufs
)
227 mhdr
= (struct virtio_net_hdr_mrg_rxbuf
*)sg
[0].iov_base
;
229 offset
+= receive_header(n
, sg
, elem
.in_num
,
230 buf
+ offset
, size
- offset
, hdr_len
);
234 /* copy in packet. ugh */
235 len
= iov_fill(sg
, elem
.in_num
,
236 buf
+ offset
, size
- offset
);
239 /* signal other side */
240 virtqueue_fill(n
->rx_vq
, &elem
, total
, i
++);
246 mhdr
->num_buffers
= i
;
248 virtqueue_flush(n
->rx_vq
, i
);
249 virtio_notify(&n
->vdev
, n
->rx_vq
);
253 static void virtio_net_flush_tx(VirtIONet
*n
, VirtQueue
*vq
)
255 VirtQueueElement elem
;
256 int has_vnet_hdr
= tap_has_vnet_hdr(n
->vc
->vlan
->first_client
);
258 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
261 while (virtqueue_pop(vq
, &elem
)) {
263 unsigned int out_num
= elem
.out_num
;
264 struct iovec
*out_sg
= &elem
.out_sg
[0];
267 /* hdr_len refers to the header received from the guest */
268 hdr_len
= n
->mergeable_rx_bufs
?
269 sizeof(struct virtio_net_hdr_mrg_rxbuf
) :
270 sizeof(struct virtio_net_hdr
);
272 if (out_num
< 1 || out_sg
->iov_len
!= hdr_len
) {
273 fprintf(stderr
, "virtio-net header not in first element\n");
277 /* ignore the header if GSO is not supported */
282 } else if (n
->mergeable_rx_bufs
) {
283 /* tapfd expects a struct virtio_net_hdr */
284 hdr_len
-= sizeof(struct virtio_net_hdr
);
285 out_sg
->iov_len
-= hdr_len
;
289 len
+= qemu_sendv_packet(n
->vc
, out_sg
, out_num
);
291 virtqueue_push(vq
, &elem
, len
);
292 virtio_notify(&n
->vdev
, vq
);
296 static void virtio_net_handle_tx(VirtIODevice
*vdev
, VirtQueue
*vq
)
298 VirtIONet
*n
= to_virtio_net(vdev
);
300 if (n
->tx_timer_active
) {
301 virtio_queue_set_notification(vq
, 1);
302 qemu_del_timer(n
->tx_timer
);
303 n
->tx_timer_active
= 0;
304 virtio_net_flush_tx(n
, vq
);
306 qemu_mod_timer(n
->tx_timer
,
307 qemu_get_clock(vm_clock
) + TX_TIMER_INTERVAL
);
308 n
->tx_timer_active
= 1;
309 virtio_queue_set_notification(vq
, 0);
313 static void virtio_net_tx_timer(void *opaque
)
315 VirtIONet
*n
= opaque
;
317 n
->tx_timer_active
= 0;
319 /* Just in case the driver is not ready on more */
320 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
323 virtio_queue_set_notification(n
->tx_vq
, 1);
324 virtio_net_flush_tx(n
, n
->tx_vq
);
327 static void virtio_net_save(QEMUFile
*f
, void *opaque
)
329 VirtIONet
*n
= opaque
;
331 virtio_save(&n
->vdev
, f
);
333 qemu_put_buffer(f
, n
->mac
, 6);
334 qemu_put_be32(f
, n
->tx_timer_active
);
337 static int virtio_net_load(QEMUFile
*f
, void *opaque
, int version_id
)
339 VirtIONet
*n
= opaque
;
344 virtio_load(&n
->vdev
, f
);
346 qemu_get_buffer(f
, n
->mac
, 6);
347 n
->tx_timer_active
= qemu_get_be32(f
);
349 if (n
->tx_timer_active
) {
350 qemu_mod_timer(n
->tx_timer
,
351 qemu_get_clock(vm_clock
) + TX_TIMER_INTERVAL
);
357 PCIDevice
*virtio_net_init(PCIBus
*bus
, NICInfo
*nd
, int devfn
)
360 static int virtio_net_id
;
362 n
= (VirtIONet
*)virtio_init_pci(bus
, "virtio-net", 6900, 0x1000,
365 6, sizeof(VirtIONet
));
369 n
->vdev
.get_config
= virtio_net_update_config
;
370 n
->vdev
.get_features
= virtio_net_get_features
;
371 n
->vdev
.set_features
= virtio_net_set_features
;
372 n
->rx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_rx
);
373 n
->tx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_tx
);
374 memcpy(n
->mac
, nd
->macaddr
, 6);
375 n
->vc
= qemu_new_vlan_client(nd
->vlan
, virtio_net_receive
,
376 virtio_net_can_receive
, n
);
378 n
->tx_timer
= qemu_new_timer(vm_clock
, virtio_net_tx_timer
, n
);
379 n
->tx_timer_active
= 0;
380 n
->mergeable_rx_bufs
= 0;
382 register_savevm("virtio-net", virtio_net_id
++, 1,
383 virtio_net_save
, virtio_net_load
, n
);
385 return (PCIDevice
*)n
;