Do not access virtio-rings directly
[qemu-kvm/fedora.git] / hw / virtio-net.c
blobb64fb0084de4e1da8470466e32d416b8c9de7836
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "virtio.h"
15 #include "net.h"
16 #include "qemu-timer.h"
17 #include "virtio-net.h"
18 #include "qemu-kvm.h"
20 typedef struct VirtIONet
22 VirtIODevice vdev;
23 uint8_t mac[6];
24 VirtQueue *rx_vq;
25 VirtQueue *tx_vq;
26 VLANClientState *vc;
27 QEMUTimer *tx_timer;
28 int tx_timer_active;
29 int mergeable_rx_bufs;
30 } VirtIONet;
32 /* TODO
33 * - we could suppress RX interrupt if we were so inclined.
36 static VirtIONet *to_virtio_net(VirtIODevice *vdev)
38 return (VirtIONet *)vdev;
41 static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
43 VirtIONet *n = to_virtio_net(vdev);
44 struct virtio_net_config netcfg;
46 memcpy(netcfg.mac, n->mac, 6);
47 memcpy(config, &netcfg, sizeof(netcfg));
50 static uint32_t virtio_net_get_features(VirtIODevice *vdev)
52 VirtIONet *n = to_virtio_net(vdev);
53 VLANClientState *host = n->vc->vlan->first_client;
54 uint32_t features = (1 << VIRTIO_NET_F_MAC);
56 if (tap_has_vnet_hdr(host)) {
57 tap_using_vnet_hdr(host, 1);
58 features |= (1 << VIRTIO_NET_F_CSUM);
59 features |= (1 << VIRTIO_NET_F_GUEST_CSUM);
60 features |= (1 << VIRTIO_NET_F_GUEST_TSO4);
61 features |= (1 << VIRTIO_NET_F_GUEST_TSO6);
62 features |= (1 << VIRTIO_NET_F_GUEST_ECN);
63 features |= (1 << VIRTIO_NET_F_HOST_TSO4);
64 features |= (1 << VIRTIO_NET_F_HOST_TSO6);
65 features |= (1 << VIRTIO_NET_F_HOST_ECN);
66 features |= (1 << VIRTIO_NET_F_MRG_RXBUF);
67 /* Kernel can't actually handle UFO in software currently. */
70 return features;
73 static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
75 VirtIONet *n = to_virtio_net(vdev);
76 VLANClientState *host = n->vc->vlan->first_client;
78 n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
80 if (!tap_has_vnet_hdr(host) || !host->set_offload)
81 return;
83 host->set_offload(host,
84 (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
85 (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
86 (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
87 (features >> VIRTIO_NET_F_GUEST_ECN) & 1);
90 /* RX */
92 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
94 /* We now have RX buffers, signal to the IO thread to break out of the
95 select to re-poll the tap file descriptor */
96 if (kvm_enabled())
97 qemu_kvm_notify_work();
100 static int virtio_net_can_receive(void *opaque)
102 VirtIONet *n = opaque;
104 if (!virtio_queue_ready(n->rx_vq) ||
105 !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
106 return 0;
108 if (virtio_queue_empty(n->rx_vq) ||
109 (n->mergeable_rx_bufs &&
110 !virtqueue_avail_bytes(n->rx_vq, VIRTIO_NET_MAX_BUFSIZE, 0))) {
111 virtio_queue_set_notification(n->rx_vq, 1);
112 return 0;
115 virtio_queue_set_notification(n->rx_vq, 0);
116 return 1;
119 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
120 * it never finds out that the packets don't have valid checksums. This
121 * causes dhclient to get upset. Fedora's carried a patch for ages to
122 * fix this with Xen but it hasn't appeared in an upstream release of
123 * dhclient yet.
125 * To avoid breaking existing guests, we catch udp packets and add
126 * checksums. This is terrible but it's better than hacking the guest
127 * kernels.
129 * N.B. if we introduce a zero-copy API, this operation is no longer free so
130 * we should provide a mechanism to disable it to avoid polluting the host
131 * cache.
133 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
134 const uint8_t *buf, size_t size)
136 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
137 (size > 27 && size < 1500) && /* normal sized MTU */
138 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
139 (buf[23] == 17) && /* ip.protocol == UDP */
140 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
141 /* FIXME this cast is evil */
142 net_checksum_calculate((uint8_t *)buf, size);
143 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
147 static int iov_fill(struct iovec *iov, int iovcnt, const void *buf, int count)
149 int offset, i;
151 offset = i = 0;
152 while (offset < count && i < iovcnt) {
153 int len = MIN(iov[i].iov_len, count - offset);
154 memcpy(iov[i].iov_base, buf + offset, len);
155 offset += len;
156 i++;
159 return offset;
162 static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
163 const void *buf, int size, int hdr_len)
165 struct virtio_net_hdr *hdr = iov[0].iov_base;
166 int offset;
168 hdr->flags = 0;
169 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
171 if (tap_has_vnet_hdr(n->vc->vlan->first_client)) {
172 memcpy(hdr, buf, sizeof(*hdr));
173 offset = sizeof(*hdr);
174 work_around_broken_dhclient(hdr, buf + offset, size - offset);
177 /* We only ever receive a struct virtio_net_hdr from the tapfd,
178 * but we may be passing along a larger header to the guest.
180 iov[0].iov_base += hdr_len;
181 iov[0].iov_len -= hdr_len;
183 return offset;
186 static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
188 VirtIONet *n = opaque;
189 struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
190 int hdr_len, offset, i;
192 /* hdr_len refers to the header we supply to the guest */
193 hdr_len = n->mergeable_rx_bufs ?
194 sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
196 offset = i = 0;
198 while (offset < size) {
199 VirtQueueElement elem;
200 int len, total;
202 len = total = 0;
204 if ((i != 0 && !n->mergeable_rx_bufs) ||
205 virtqueue_pop(n->rx_vq, &elem) == 0) {
206 if (i == 0)
207 return;
208 fprintf(stderr, "virtio-net truncating packet\n");
209 exit(1);
212 if (elem.in_num < 1) {
213 fprintf(stderr, "virtio-net receive queue contains no in buffers\n");
214 exit(1);
217 if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != hdr_len) {
218 fprintf(stderr, "virtio-net header not in first element\n");
219 exit(1);
222 if (i == 0) {
223 if (n->mergeable_rx_bufs)
224 mhdr = (struct virtio_net_hdr_mrg_rxbuf *)elem.in_sg[0].iov_base;
226 offset += receive_header(n, &elem.in_sg[0], elem.in_num,
227 buf + offset, size - offset, hdr_len);
228 total += hdr_len;
231 /* copy in packet. ugh */
232 len = iov_fill(&elem.in_sg[0], elem.in_num,
233 buf + offset, size - offset);
234 total += len;
236 /* signal other side */
237 virtqueue_fill(n->rx_vq, &elem, total, i++);
239 offset += len;
242 if (mhdr)
243 mhdr->num_buffers = i;
245 virtqueue_flush(n->rx_vq, i);
246 virtio_notify(&n->vdev, n->rx_vq);
249 /* TX */
250 static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
252 VirtQueueElement elem;
253 int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
255 if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
256 return;
258 while (virtqueue_pop(vq, &elem)) {
259 ssize_t len = 0;
260 unsigned int out_num = elem.out_num;
261 struct iovec *out_sg = &elem.out_sg[0];
262 unsigned hdr_len;
264 /* hdr_len refers to the header received from the guest */
265 hdr_len = n->mergeable_rx_bufs ?
266 sizeof(struct virtio_net_hdr_mrg_rxbuf) :
267 sizeof(struct virtio_net_hdr);
269 if (out_num < 1 || out_sg->iov_len != hdr_len) {
270 fprintf(stderr, "virtio-net header not in first element\n");
271 exit(1);
274 /* ignore the header if GSO is not supported */
275 if (!has_vnet_hdr) {
276 out_num--;
277 out_sg++;
278 len += hdr_len;
279 } else if (n->mergeable_rx_bufs) {
280 /* tapfd expects a struct virtio_net_hdr */
281 hdr_len -= sizeof(struct virtio_net_hdr);
282 out_sg->iov_len -= hdr_len;
283 len += hdr_len;
286 len += qemu_sendv_packet(n->vc, out_sg, out_num);
288 virtqueue_push(vq, &elem, len);
289 virtio_notify(&n->vdev, vq);
293 static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
295 VirtIONet *n = to_virtio_net(vdev);
297 if (n->tx_timer_active) {
298 virtio_queue_set_notification(vq, 1);
299 qemu_del_timer(n->tx_timer);
300 n->tx_timer_active = 0;
301 virtio_net_flush_tx(n, vq);
302 } else {
303 qemu_mod_timer(n->tx_timer,
304 qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
305 n->tx_timer_active = 1;
306 virtio_queue_set_notification(vq, 0);
310 static void virtio_net_tx_timer(void *opaque)
312 VirtIONet *n = opaque;
314 n->tx_timer_active = 0;
316 /* Just in case the driver is not ready on more */
317 if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
318 return;
320 virtio_queue_set_notification(n->tx_vq, 1);
321 virtio_net_flush_tx(n, n->tx_vq);
324 static void virtio_net_save(QEMUFile *f, void *opaque)
326 VirtIONet *n = opaque;
328 virtio_save(&n->vdev, f);
330 qemu_put_buffer(f, n->mac, 6);
331 qemu_put_be32(f, n->tx_timer_active);
334 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
336 VirtIONet *n = opaque;
338 if (version_id != 1)
339 return -EINVAL;
341 virtio_load(&n->vdev, f);
343 qemu_get_buffer(f, n->mac, 6);
344 n->tx_timer_active = qemu_get_be32(f);
346 if (n->tx_timer_active) {
347 qemu_mod_timer(n->tx_timer,
348 qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
351 return 0;
354 PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
356 VirtIONet *n;
357 static int virtio_net_id;
359 n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
360 0, VIRTIO_ID_NET,
361 0x02, 0x00, 0x00,
362 6, sizeof(VirtIONet));
363 if (!n)
364 return NULL;
366 n->vdev.get_config = virtio_net_update_config;
367 n->vdev.get_features = virtio_net_get_features;
368 n->vdev.set_features = virtio_net_set_features;
369 n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
370 n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
371 memcpy(n->mac, nd->macaddr, 6);
372 n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
373 virtio_net_can_receive, n);
375 n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
376 n->tx_timer_active = 0;
377 n->mergeable_rx_bufs = 0;
379 register_savevm("virtio-net", virtio_net_id++, 1,
380 virtio_net_save, virtio_net_load, n);
382 return (PCIDevice *)n;