Extend virtio config routines to support ballooning
[qemu-kvm/amd-iommu.git] / hw / virtio-net.c
blobbc2ede628e84c66b98843235b7fef31a4063f727
1 /*
2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include "virtio.h"
15 #include "net.h"
16 #include "qemu-timer.h"
17 #include "qemu-kvm.h"
19 /* from Linux's virtio_net.h */
21 /* The ID for virtio_net */
22 #define VIRTIO_ID_NET 1
24 /* The feature bitmap for virtio net */
25 #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
26 #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
27 #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
28 #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
29 #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
30 #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
31 #define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */
32 #define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
33 #define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
34 #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
35 #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
36 #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
38 #define TX_TIMER_INTERVAL 150000 /* 150 us */
40 /* The config defining mac address (6 bytes) */
41 struct virtio_net_config
43 uint8_t mac[6];
44 } __attribute__((packed));
46 /* This is the first element of the scatter-gather list. If you don't
47 * specify GSO or CSUM features, you can simply ignore the header. */
48 struct virtio_net_hdr
50 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
51 uint8_t flags;
52 #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
53 #define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
54 #define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
55 #define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
56 #define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
57 uint8_t gso_type;
58 uint16_t hdr_len;
59 uint16_t gso_size;
60 uint16_t csum_start;
61 uint16_t csum_offset;
64 typedef struct VirtIONet
66 VirtIODevice vdev;
67 uint8_t mac[6];
68 VirtQueue *rx_vq;
69 VirtQueue *tx_vq;
70 VLANClientState *vc;
71 QEMUTimer *tx_timer;
72 int tx_timer_active;
73 } VirtIONet;
75 /* TODO
76 * - we could suppress RX interrupt if we were so inclined.
79 static VirtIONet *to_virtio_net(VirtIODevice *vdev)
81 return (VirtIONet *)vdev;
84 static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
86 VirtIONet *n = to_virtio_net(vdev);
87 struct virtio_net_config netcfg;
89 memcpy(netcfg.mac, n->mac, 6);
90 memcpy(config, &netcfg, sizeof(netcfg));
93 static uint32_t virtio_net_get_features(VirtIODevice *vdev)
95 VirtIONet *n = to_virtio_net(vdev);
96 VLANClientState *host = n->vc->vlan->first_client;
97 uint32_t features = (1 << VIRTIO_NET_F_MAC);
99 if (tap_has_vnet_hdr(host)) {
100 tap_using_vnet_hdr(host, 1);
101 features |= (1 << VIRTIO_NET_F_CSUM);
102 features |= (1 << VIRTIO_NET_F_GUEST_CSUM);
103 features |= (1 << VIRTIO_NET_F_GUEST_TSO4);
104 features |= (1 << VIRTIO_NET_F_GUEST_TSO6);
105 features |= (1 << VIRTIO_NET_F_GUEST_ECN);
106 features |= (1 << VIRTIO_NET_F_HOST_TSO4);
107 features |= (1 << VIRTIO_NET_F_HOST_TSO6);
108 features |= (1 << VIRTIO_NET_F_HOST_ECN);
109 /* Kernel can't actually handle UFO in software currently. */
112 return features;
115 static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
117 VirtIONet *n = to_virtio_net(vdev);
118 VLANClientState *host = n->vc->vlan->first_client;
120 if (!tap_has_vnet_hdr(host) || !host->set_offload)
121 return;
123 host->set_offload(host,
124 (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
125 (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
126 (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
127 (features >> VIRTIO_NET_F_GUEST_ECN) & 1);
130 /* RX */
132 static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
134 /* We now have RX buffers, signal to the IO thread to break out of the
135 select to re-poll the tap file descriptor */
136 if (kvm_enabled())
137 qemu_kvm_notify_work();
140 static int virtio_net_can_receive(void *opaque)
142 VirtIONet *n = opaque;
144 if (n->rx_vq->vring.avail == NULL ||
145 !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
146 return 0;
148 if (n->rx_vq->vring.avail->idx == n->rx_vq->last_avail_idx) {
149 n->rx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
150 return 0;
153 n->rx_vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
154 return 1;
157 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
158 * it never finds out that the packets don't have valid checksums. This
159 * causes dhclient to get upset. Fedora's carried a patch for ages to
160 * fix this with Xen but it hasn't appeared in an upstream release of
161 * dhclient yet.
163 * To avoid breaking existing guests, we catch udp packets and add
164 * checksums. This is terrible but it's better than hacking the guest
165 * kernels.
167 * N.B. if we introduce a zero-copy API, this operation is no longer free so
168 * we should provide a mechanism to disable it to avoid polluting the host
169 * cache.
171 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
172 const uint8_t *buf, size_t size)
174 if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
175 (size > 27 && size < 1500) && /* normal sized MTU */
176 (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
177 (buf[23] == 17) && /* ip.protocol == UDP */
178 (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
179 /* FIXME this cast is evil */
180 net_checksum_calculate((uint8_t *)buf, size);
181 hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
185 static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
187 VirtIONet *n = opaque;
188 VirtQueueElement elem;
189 struct virtio_net_hdr *hdr;
190 int offset, i;
191 int total;
193 if (virtqueue_pop(n->rx_vq, &elem) == 0)
194 return;
196 if (elem.in_num < 1 || elem.in_sg[0].iov_len != sizeof(*hdr)) {
197 fprintf(stderr, "virtio-net header not in first element\n");
198 exit(1);
201 hdr = (void *)elem.in_sg[0].iov_base;
202 hdr->flags = 0;
203 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
205 offset = 0;
206 total = sizeof(*hdr);
208 if (tap_has_vnet_hdr(n->vc->vlan->first_client)) {
209 memcpy(hdr, buf, sizeof(*hdr));
210 offset += total;
211 work_around_broken_dhclient(hdr, buf + offset, size - offset);
214 /* copy in packet. ugh */
215 i = 1;
216 while (offset < size && i < elem.in_num) {
217 int len = MIN(elem.in_sg[i].iov_len, size - offset);
218 memcpy(elem.in_sg[i].iov_base, buf + offset, len);
219 offset += len;
220 total += len;
221 i++;
224 /* signal other side */
225 virtqueue_push(n->rx_vq, &elem, total);
226 virtio_notify(&n->vdev, n->rx_vq);
229 /* TX */
230 static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
232 VirtQueueElement elem;
233 int has_vnet_hdr = tap_has_vnet_hdr(n->vc->vlan->first_client);
235 if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
236 return;
238 while (virtqueue_pop(vq, &elem)) {
239 ssize_t len = 0;
240 unsigned int out_num = elem.out_num;
241 struct iovec *out_sg = &elem.out_sg[0];
243 if (out_num < 1 || out_sg->iov_len != sizeof(struct virtio_net_hdr)) {
244 fprintf(stderr, "virtio-net header not in first element\n");
245 exit(1);
248 /* ignore the header if GSO is not supported */
249 if (!has_vnet_hdr) {
250 out_num--;
251 out_sg++;
252 len += sizeof(struct virtio_net_hdr);
255 len += qemu_sendv_packet(n->vc, out_sg, out_num);
257 virtqueue_push(vq, &elem, len);
258 virtio_notify(&n->vdev, vq);
262 static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
264 VirtIONet *n = to_virtio_net(vdev);
266 if (n->tx_timer_active) {
267 vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
268 qemu_del_timer(n->tx_timer);
269 n->tx_timer_active = 0;
270 virtio_net_flush_tx(n, vq);
271 } else {
272 qemu_mod_timer(n->tx_timer,
273 qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
274 n->tx_timer_active = 1;
275 vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
279 static void virtio_net_tx_timer(void *opaque)
281 VirtIONet *n = opaque;
283 n->tx_timer_active = 0;
285 /* Just in case the driver is not ready on more */
286 if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
287 return;
289 n->tx_vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
290 virtio_net_flush_tx(n, n->tx_vq);
293 static void virtio_net_save(QEMUFile *f, void *opaque)
295 VirtIONet *n = opaque;
297 virtio_save(&n->vdev, f);
299 qemu_put_buffer(f, n->mac, 6);
300 qemu_put_be32(f, n->tx_timer_active);
303 static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
305 VirtIONet *n = opaque;
307 if (version_id != 1)
308 return -EINVAL;
310 virtio_load(&n->vdev, f);
312 qemu_get_buffer(f, n->mac, 6);
313 n->tx_timer_active = qemu_get_be32(f);
315 if (n->tx_timer_active) {
316 qemu_mod_timer(n->tx_timer,
317 qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
320 return 0;
323 PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
325 VirtIONet *n;
326 static int virtio_net_id;
328 n = (VirtIONet *)virtio_init_pci(bus, "virtio-net", 6900, 0x1000,
329 0, VIRTIO_ID_NET,
330 0x02, 0x00, 0x00,
331 6, sizeof(VirtIONet));
332 if (!n)
333 return NULL;
335 n->vdev.get_config = virtio_net_update_config;
336 n->vdev.get_features = virtio_net_get_features;
337 n->vdev.set_features = virtio_net_set_features;
338 n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
339 n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
340 memcpy(n->mac, nd->macaddr, 6);
341 n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
342 virtio_net_can_receive, n);
344 n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
345 n->tx_timer_active = 0;
347 register_savevm("virtio-net", virtio_net_id++, 1,
348 virtio_net_save, virtio_net_load, n);
350 return (PCIDevice *)n;