2 * Virtio Network Device
4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
16 #include "qemu-timer.h"
19 /* from Linux's virtio_net.h */
21 /* The ID for virtio_net */
22 #define VIRTIO_ID_NET 1
24 /* The feature bitmap for virtio net */
25 #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
26 #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */
27 #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
28 #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
29 #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
30 #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
31 #define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */
32 #define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
33 #define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
34 #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
35 #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
36 #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
38 #define TX_TIMER_INTERVAL 150000 /* 150 us */
40 /* The config defining mac address (6 bytes) */
41 struct virtio_net_config
44 } __attribute__((packed
));
46 /* This is the first element of the scatter-gather list. If you don't
47 * specify GSO or CSUM features, you can simply ignore the header. */
50 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
52 #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
53 #define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
54 #define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
55 #define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
56 #define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
64 typedef struct VirtIONet
76 * - we could suppress RX interrupt if we were so inclined.
79 static VirtIONet
*to_virtio_net(VirtIODevice
*vdev
)
81 return (VirtIONet
*)vdev
;
84 static void virtio_net_update_config(VirtIODevice
*vdev
, uint8_t *config
)
86 VirtIONet
*n
= to_virtio_net(vdev
);
87 struct virtio_net_config netcfg
;
89 memcpy(netcfg
.mac
, n
->mac
, 6);
90 memcpy(config
, &netcfg
, sizeof(netcfg
));
93 static uint32_t virtio_net_get_features(VirtIODevice
*vdev
)
95 VirtIONet
*n
= to_virtio_net(vdev
);
96 VLANClientState
*host
= n
->vc
->vlan
->first_client
;
97 uint32_t features
= (1 << VIRTIO_NET_F_MAC
);
99 if (tap_has_vnet_hdr(host
)) {
100 tap_using_vnet_hdr(host
, 1);
101 features
|= (1 << VIRTIO_NET_F_CSUM
);
102 features
|= (1 << VIRTIO_NET_F_GUEST_CSUM
);
103 features
|= (1 << VIRTIO_NET_F_GUEST_TSO4
);
104 features
|= (1 << VIRTIO_NET_F_GUEST_TSO6
);
105 features
|= (1 << VIRTIO_NET_F_GUEST_ECN
);
106 features
|= (1 << VIRTIO_NET_F_HOST_TSO4
);
107 features
|= (1 << VIRTIO_NET_F_HOST_TSO6
);
108 features
|= (1 << VIRTIO_NET_F_HOST_ECN
);
109 /* Kernel can't actually handle UFO in software currently. */
115 static void virtio_net_set_features(VirtIODevice
*vdev
, uint32_t features
)
117 VirtIONet
*n
= to_virtio_net(vdev
);
118 VLANClientState
*host
= n
->vc
->vlan
->first_client
;
120 if (!tap_has_vnet_hdr(host
) || !host
->set_offload
)
123 host
->set_offload(host
,
124 (features
>> VIRTIO_NET_F_GUEST_CSUM
) & 1,
125 (features
>> VIRTIO_NET_F_GUEST_TSO4
) & 1,
126 (features
>> VIRTIO_NET_F_GUEST_TSO6
) & 1,
127 (features
>> VIRTIO_NET_F_GUEST_ECN
) & 1);
132 static void virtio_net_handle_rx(VirtIODevice
*vdev
, VirtQueue
*vq
)
134 /* We now have RX buffers, signal to the IO thread to break out of the
135 select to re-poll the tap file descriptor */
137 qemu_kvm_notify_work();
140 static int virtio_net_can_receive(void *opaque
)
142 VirtIONet
*n
= opaque
;
144 if (n
->rx_vq
->vring
.avail
== NULL
||
145 !(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
148 if (n
->rx_vq
->vring
.avail
->idx
== n
->rx_vq
->last_avail_idx
) {
149 n
->rx_vq
->vring
.used
->flags
&= ~VRING_USED_F_NO_NOTIFY
;
153 n
->rx_vq
->vring
.used
->flags
|= VRING_USED_F_NO_NOTIFY
;
157 /* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
158 * it never finds out that the packets don't have valid checksums. This
159 * causes dhclient to get upset. Fedora's carried a patch for ages to
160 * fix this with Xen but it hasn't appeared in an upstream release of
163 * To avoid breaking existing guests, we catch udp packets and add
164 * checksums. This is terrible but it's better than hacking the guest
167 * N.B. if we introduce a zero-copy API, this operation is no longer free so
168 * we should provide a mechanism to disable it to avoid polluting the host
171 static void work_around_broken_dhclient(struct virtio_net_hdr
*hdr
,
172 const uint8_t *buf
, size_t size
)
174 if ((hdr
->flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) && /* missing csum */
175 (size
> 27 && size
< 1500) && /* normal sized MTU */
176 (buf
[12] == 0x08 && buf
[13] == 0x00) && /* ethertype == IPv4 */
177 (buf
[23] == 17) && /* ip.protocol == UDP */
178 (buf
[34] == 0 && buf
[35] == 67)) { /* udp.srcport == bootps */
179 /* FIXME this cast is evil */
180 net_checksum_calculate((uint8_t *)buf
, size
);
181 hdr
->flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
185 static void virtio_net_receive(void *opaque
, const uint8_t *buf
, int size
)
187 VirtIONet
*n
= opaque
;
188 VirtQueueElement elem
;
189 struct virtio_net_hdr
*hdr
;
193 if (virtqueue_pop(n
->rx_vq
, &elem
) == 0)
196 if (elem
.in_num
< 1 || elem
.in_sg
[0].iov_len
!= sizeof(*hdr
)) {
197 fprintf(stderr
, "virtio-net header not in first element\n");
201 hdr
= (void *)elem
.in_sg
[0].iov_base
;
203 hdr
->gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
206 total
= sizeof(*hdr
);
208 if (tap_has_vnet_hdr(n
->vc
->vlan
->first_client
)) {
209 memcpy(hdr
, buf
, sizeof(*hdr
));
211 work_around_broken_dhclient(hdr
, buf
+ offset
, size
- offset
);
214 /* copy in packet. ugh */
216 while (offset
< size
&& i
< elem
.in_num
) {
217 int len
= MIN(elem
.in_sg
[i
].iov_len
, size
- offset
);
218 memcpy(elem
.in_sg
[i
].iov_base
, buf
+ offset
, len
);
224 /* signal other side */
225 virtqueue_push(n
->rx_vq
, &elem
, total
);
226 virtio_notify(&n
->vdev
, n
->rx_vq
);
230 static void virtio_net_flush_tx(VirtIONet
*n
, VirtQueue
*vq
)
232 VirtQueueElement elem
;
233 int has_vnet_hdr
= tap_has_vnet_hdr(n
->vc
->vlan
->first_client
);
235 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
238 while (virtqueue_pop(vq
, &elem
)) {
240 unsigned int out_num
= elem
.out_num
;
241 struct iovec
*out_sg
= &elem
.out_sg
[0];
243 if (out_num
< 1 || out_sg
->iov_len
!= sizeof(struct virtio_net_hdr
)) {
244 fprintf(stderr
, "virtio-net header not in first element\n");
248 /* ignore the header if GSO is not supported */
252 len
+= sizeof(struct virtio_net_hdr
);
255 len
+= qemu_sendv_packet(n
->vc
, out_sg
, out_num
);
257 virtqueue_push(vq
, &elem
, len
);
258 virtio_notify(&n
->vdev
, vq
);
262 static void virtio_net_handle_tx(VirtIODevice
*vdev
, VirtQueue
*vq
)
264 VirtIONet
*n
= to_virtio_net(vdev
);
266 if (n
->tx_timer_active
) {
267 vq
->vring
.used
->flags
&= ~VRING_USED_F_NO_NOTIFY
;
268 qemu_del_timer(n
->tx_timer
);
269 n
->tx_timer_active
= 0;
270 virtio_net_flush_tx(n
, vq
);
272 qemu_mod_timer(n
->tx_timer
,
273 qemu_get_clock(vm_clock
) + TX_TIMER_INTERVAL
);
274 n
->tx_timer_active
= 1;
275 vq
->vring
.used
->flags
|= VRING_USED_F_NO_NOTIFY
;
279 static void virtio_net_tx_timer(void *opaque
)
281 VirtIONet
*n
= opaque
;
283 n
->tx_timer_active
= 0;
285 /* Just in case the driver is not ready on more */
286 if (!(n
->vdev
.status
& VIRTIO_CONFIG_S_DRIVER_OK
))
289 n
->tx_vq
->vring
.used
->flags
&= ~VRING_USED_F_NO_NOTIFY
;
290 virtio_net_flush_tx(n
, n
->tx_vq
);
293 static void virtio_net_save(QEMUFile
*f
, void *opaque
)
295 VirtIONet
*n
= opaque
;
297 virtio_save(&n
->vdev
, f
);
299 qemu_put_buffer(f
, n
->mac
, 6);
300 qemu_put_be32(f
, n
->tx_timer_active
);
303 static int virtio_net_load(QEMUFile
*f
, void *opaque
, int version_id
)
305 VirtIONet
*n
= opaque
;
310 virtio_load(&n
->vdev
, f
);
312 qemu_get_buffer(f
, n
->mac
, 6);
313 n
->tx_timer_active
= qemu_get_be32(f
);
315 if (n
->tx_timer_active
) {
316 qemu_mod_timer(n
->tx_timer
,
317 qemu_get_clock(vm_clock
) + TX_TIMER_INTERVAL
);
323 PCIDevice
*virtio_net_init(PCIBus
*bus
, NICInfo
*nd
, int devfn
)
326 static int virtio_net_id
;
328 n
= (VirtIONet
*)virtio_init_pci(bus
, "virtio-net", 6900, 0x1000,
331 6, sizeof(VirtIONet
));
335 n
->vdev
.update_config
= virtio_net_update_config
;
336 n
->vdev
.get_features
= virtio_net_get_features
;
337 n
->vdev
.set_features
= virtio_net_set_features
;
338 n
->rx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_rx
);
339 n
->tx_vq
= virtio_add_queue(&n
->vdev
, 256, virtio_net_handle_tx
);
340 memcpy(n
->mac
, nd
->macaddr
, 6);
341 n
->vc
= qemu_new_vlan_client(nd
->vlan
, virtio_net_receive
,
342 virtio_net_can_receive
, n
);
344 n
->tx_timer
= qemu_new_timer(vm_clock
, virtio_net_tx_timer
, n
);
345 n
->tx_timer_active
= 0;
347 register_savevm("virtio-net", virtio_net_id
++, 1,
348 virtio_net_save
, virtio_net_load
, n
);
350 return (PCIDevice
*)n
;