Optionally bounce the virtio buffers
[qemu-kvm/fedora.git] / hw / virtio-net.c
blob6493ff63792c3ccebd12806c8f6e71a11f74d7e8
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "virtio.h"
15 #include "net.h"
16 #include "qemu-timer.h"
17 #include "virtio-net.h"
18 #include "qemu-kvm.h"
20 typedef struct VirtIONet
22 VirtIODevice vdev;
23 uint8_t mac[6];
24 VirtQueue *rx_vq;
25 VirtQueue *tx_vq;
26 VLANClientState *vc;
27 QEMUTimer *tx_timer;
28 int tx_timer_active;
29 int mergeable_rx_bufs;
30 } VirtIONet;
32 /* TODO
33 * - we could suppress RX interrupt if we were so inclined.
36 static VirtIONet *to_virtio_net(VirtIODevice *vdev)
38 return (VirtIONet *)vdev;
41 static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
43 VirtIONet *n = to_virtio_net(vdev);
44 struct virtio_net_config netcfg;
46 memcpy(netcfg.mac, n->mac, 6);
47 memcpy(config, &netcfg, sizeof(netcfg));
50 static uint32_t virtio_net_get_features(VirtIODevice *vdev)
52 VirtIONet *n = to_virtio_net(vdev);
53 VLANClientState *host = n->vc->vlan->first_client;
54 uint32_t features = (1 << VIRTIO_NET_F_MAC);
56 if (tap_has_vnet_hdr(host)) {
57 tap_using_vnet_hdr(host, 1);
58 features |= (1 << VIRTIO_NET_F_CSUM);
59 features |= (1 << VIRTIO_NET_F_GUEST_CSUM);
60 features |= (1 << VIRTIO_NET_F_GUEST_TSO4);
61 features |= (1 << VIRTIO_NET_F_GUEST_TSO6);
62 features |= (1 << VIRTIO_NET_F_GUEST_ECN);
63 features |= (1 << VIRTIO_NET_F_HOST_TSO4);
64 features |= (1 << VIRTIO_NET_F_HOST_TSO6);
65 features |= (1 << VIRTIO_NET_F_HOST_ECN);
66 features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
67 /* Kernel can't actually handle UFO in software currently. */
70 return features;
73 static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
75 VirtIONet *n = to_virtio_net(vdev);
76 VLANClientState *host = n->vc->vlan->first_client;
78 n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
80 if (!tap_has_vnet_hdr(host) || !host->set_offload)
81 return;
83 host->set_offload(host,
84 (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
85 (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
86 (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
87 (features >> VIRTIO_NET_F_GUEST_ECN) & 1);
90 /* RX */
92 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
94 /* We now have RX buffers, signal to the IO thread to break out of the
95 select to re-poll the tap file descriptor */
96 if (kvm_enabled())
97 qemu_kvm_notify_work();
100 static int virtio_net_can_receive(void *opaque)
102 VirtIONet *n = opaque;
104 if (!virtio_queue_ready(n->rx_vq) ||
105 !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
106 return 0;
108 if (virtio_queue_empty(n->rx_vq) ||
109 (n->mergeable_rx_bufs &&
110 !virtqueue_avail_bytes(n->rx_vq, VIRTIO_NET_MAX_BUFSIZE, 0))) {
111 virtio_queue_set_notification(n->rx_vq, 1);
112 return 0;
115 virtio_queue_set_notification(n->rx_vq, 0);
116 return 1;
119 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
120 * it never finds out that the packets don't have valid checksums. This
121 * causes dhclient to get upset. Fedora's carried a patch for ages to
122 * fix this with Xen but it hasn't appeared in an upstream release of
123 * dhclient yet.
125 * To avoid breaking existing guests, we catch udp packets and add
126 * checksums. This is terrible but it's better than hacking the guest
127 * kernels.
129 * N.B. if we introduce a zero-copy API, this operation is no longer free so
130 * we should provide a mechanism to disable it to avoid polluting the host
131 * cache.
133 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
134 const uint8_t *buf, size_t size)
136 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
137 (size > 27 && size < 1500) && /* normal sized MTU */
138 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
139 (buf[23] == 17) && /* ip.protocol == UDP */
140 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
141 /* FIXME this cast is evil */
142 net_checksum_calculate((uint8_t *)buf, size);
143 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
147 static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count)
149 int offset, i;
151 offset = i = 0;
152 while (offset < count && i < iovcnt) {
153 int len = MIN(iov[i].iov_len, count - offset);
154 memcpy(iov[i].iov_base, buf + offset, len);
155 offset += len;
156 i++;
159 return offset;
162 static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
163 const void *buf, int size, int hdr_len)
165 struct virtio_net_hdr *hdr = iov[0].iov_base;
166 int offset;
168 hdr->flags = 0;
169 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
171 if (tap_has_vnet_hdr(n->vc->vlan->first_client)) {
172 memcpy(hdr, buf, sizeof(*hdr));
173 offset = sizeof(*hdr);
174 work_around_broken_dhclient(hdr, buf + offset, size - offset);
177 /* We only ever receive a struct virtio_net_hdr from the tapfd,
178 * but we may be passing along a larger header to the guest.
180 iov[0].iov_base += hdr_len;
181 iov[0].iov_len -= hdr_len;
183 return offset;
186 static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
188 VirtIONet *n = opaque;
189 struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
190 int hdr_len, offset, i;
192 /* hdr_len refers to the header we supply to the guest */
193 hdr_len = n->mergeable_rx_bufs ?
194 sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
196 offset = i = 0;
198 while (offset < size) {
199 VirtQueueElement elem;
200 int len, total;
201 struct iovec sg[VIRTQUEUE_MAX_SIZE];
203 len = total = 0;
205 if ((i != 0 && !n->mergeable_rx_bufs) ||
206 virtqueue_pop(n->rx_vq, &elem) == 0) {
207 if (i == 0)
208 return;
209 fprintf(stderr, "virtio-net truncating packet\n");
210 exit(1);
213 if (elem.in_num < 1) {
214 fprintf(stderr, "virtio-net receive queue contains no in buffers\n");
215 exit(1);
218 if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != hdr_len) {
219 fprintf(stderr, "virtio-net header not in first element\n");
220 exit(1);
223 memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
225 if (i == 0) {
226 if (n->mergeable_rx_bufs)
227 mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
229 offset += receive_header(n, sg, elem.in_num,
230 buf + offset, size - offset, hdr_len);
231 total += hdr_len;
234 /* copy in packet. ugh */
235 len = iov_fill(sg, elem.in_num,
236 buf + offset, size - offset);
237 total += len;
239 /* signal other side */
240 virtqueue_fill(n->rx_vq, &elem, total, i++);
242 offset += len;
245 if (mhdr)
246 mhdr->num_buffers = i;
248 virtqueue_flush(n->rx_vq, i);
249 virtio_notify(&n->vdev, n->rx_vq);
252 /* TX */
253 static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
255 VirtQueueElement elem;
256 int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
258 if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
259 return;
261 while (virtqueue_pop(vq, &elem)) {
262 ssize_t len = 0;
263 unsigned int out_num = elem.out_num;
264 struct iovec *out_sg = &elem.out_sg[0];
265 unsigned hdr_len;
267 /* hdr_len refers to the header received from the guest */
268 hdr_len = n->mergeable_rx_bufs ?
269 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
270 sizeof(struct virtio_net_hdr);
272 if (out_num < 1 || out_sg->iov_len != hdr_len) {
273 fprintf(stderr, "virtio-net header not in first element\n");
274 exit(1);
277 /* ignore the header if GSO is not supported */
278 if (!has_vnet_hdr) {
279 out_num--;
280 out_sg++;
281 len += hdr_len;
282 } else if (n->mergeable_rx_bufs) {
283 /* tapfd expects a struct virtio_net_hdr */
284 hdr_len -= sizeof(struct virtio_net_hdr);
285 out_sg->iov_len -= hdr_len;
286 len += hdr_len;
289 len += qemu_sendv_packet(n->vc, out_sg, out_num);
291 virtqueue_push(vq, &elem, len);
292 virtio_notify(&n->vdev, vq);
296 static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
298 VirtIONet *n = to_virtio_net(vdev);
300 if (n->tx_timer_active) {
301 virtio_queue_set_notification(vq, 1);
302 qemu_del_timer(n->tx_timer);
303 n->tx_timer_active = 0;
304 virtio_net_flush_tx(n, vq);
305 } else {
306 qemu_mod_timer(n->tx_timer,
307 qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
308 n->tx_timer_active = 1;
309 virtio_queue_set_notification(vq, 0);
313 static void virtio_net_tx_timer(void *opaque)
315 VirtIONet *n = opaque;
317 n->tx_timer_active = 0;
319 /* Just in case the driver is not ready on more */
320 if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
321 return;
323 virtio_queue_set_notification(n->tx_vq, 1);
324 virtio_net_flush_tx(n, n->tx_vq);
327 static void virtio_net_save(QEMUFile *f, void *opaque)
329 VirtIONet *n = opaque;
331 virtio_save(&n->vdev, f);
333 qemu_put_buffer(f, n->mac, 6);
334 qemu_put_be32(f, n->tx_timer_active);
337 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
339 VirtIONet *n = opaque;
341 if (version_id != 1)
342 return -EINVAL;
344 virtio_load(&n->vdev, f);
346 qemu_get_buffer(f, n->mac, 6);
347 n->tx_timer_active = qemu_get_be32(f);
349 if (n->tx_timer_active) {
350 qemu_mod_timer(n->tx_timer,
351 qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
354 return 0;
357 PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
359 VirtIONet *n;
360 static int virtio_net_id;
362 n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
363 0, VIRTIO_ID_NET,
364 0x02, 0x00, 0x00,
365 6, sizeof(VirtIONet));
366 if (!n)
367 return NULL;
369 n->vdev.get_config = virtio_net_update_config;
370 n->vdev.get_features = virtio_net_get_features;
371 n->vdev.set_features = virtio_net_set_features;
372 n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
373 n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
374 memcpy(n->mac, nd->macaddr, 6);
375 n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
376 virtio_net_can_receive, n);
378 n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
379 n->tx_timer_active = 0;
380 n->mergeable_rx_bufs = 0;
382 register_savevm("virtio-net", virtio_net_id++, 1,
383 virtio_net_save, virtio_net_load, n);
385 return (PCIDevice *)n;