virtio_net: skb_orphan() and nf_reset() in xmit path.
[linux-2.6/kvm.git] / drivers / net / virtio_net.c
blobdc4c68718976c91b7604aa987990d4757ee5d4ff
1 /* A simple network driver using virtio.
3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 //#define DEBUG
20 #include <linux/netdevice.h>
21 #include <linux/etherdevice.h>
22 #include <linux/ethtool.h>
23 #include <linux/module.h>
24 #include <linux/virtio.h>
25 #include <linux/virtio_ids.h>
26 #include <linux/virtio_net.h>
27 #include <linux/scatterlist.h>
28 #include <linux/if_vlan.h>
30 static int napi_weight = 128;
31 module_param(napi_weight, int, 0444);
33 static int csum = 1, gso = 1;
34 module_param(csum, bool, 0444);
35 module_param(gso, bool, 0444);
37 /* FIXME: MTU in config. */
38 #define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
39 #define GOOD_COPY_LEN 128
41 #define VIRTNET_SEND_COMMAND_SG_MAX 2
43 struct virtnet_info
45 struct virtio_device *vdev;
46 struct virtqueue *rvq, *svq, *cvq;
47 struct net_device *dev;
48 struct napi_struct napi;
49 unsigned int status;
51 /* The skb we couldn't send because buffers were full. */
52 struct sk_buff *last_xmit_skb;
54 /* If we need to free in a timer, this is it. */
55 struct timer_list xmit_free_timer;
57 /* Number of input buffers, and max we've ever had. */
58 unsigned int num, max;
60 /* For cleaning up after transmission. */
61 struct tasklet_struct tasklet;
62 bool free_in_tasklet;
64 /* I like... big packets and I cannot lie! */
65 bool big_packets;
67 /* Host will merge rx buffers for big packets (shake it! shake it!) */
68 bool mergeable_rx_bufs;
70 /* Receive & send queues. */
71 struct sk_buff_head recv;
72 struct sk_buff_head send;
74 /* Work struct for refilling if we run low on memory. */
75 struct delayed_work refill;
77 /* Chain pages by the private ptr. */
78 struct page *pages;
81 static inline void *skb_vnet_hdr(struct sk_buff *skb)
83 return (struct virtio_net_hdr *)skb->cb;
86 static void give_a_page(struct virtnet_info *vi, struct page *page)
88 page->private = (unsigned long)vi->pages;
89 vi->pages = page;
92 static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb)
94 unsigned int i;
96 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
97 give_a_page(vi, skb_shinfo(skb)->frags[i].page);
98 skb_shinfo(skb)->nr_frags = 0;
99 skb->data_len = 0;
102 static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
104 struct page *p = vi->pages;
106 if (p)
107 vi->pages = (struct page *)p->private;
108 else
109 p = alloc_page(gfp_mask);
110 return p;
113 static void skb_xmit_done(struct virtqueue *svq)
115 struct virtnet_info *vi = svq->vdev->priv;
117 /* Suppress further interrupts. */
118 svq->vq_ops->disable_cb(svq);
120 /* We were probably waiting for more output buffers. */
121 netif_wake_queue(vi->dev);
123 /* Make sure we re-xmit last_xmit_skb: if there are no more packets
124 * queued, start_xmit won't be called. */
125 tasklet_schedule(&vi->tasklet);
128 static void receive_skb(struct net_device *dev, struct sk_buff *skb,
129 unsigned len)
131 struct virtnet_info *vi = netdev_priv(dev);
132 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
133 int err;
134 int i;
136 if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
137 pr_debug("%s: short packet %i\n", dev->name, len);
138 dev->stats.rx_length_errors++;
139 goto drop;
142 if (vi->mergeable_rx_bufs) {
143 struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb);
144 unsigned int copy;
145 char *p = page_address(skb_shinfo(skb)->frags[0].page);
147 if (len > PAGE_SIZE)
148 len = PAGE_SIZE;
149 len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
151 memcpy(hdr, p, sizeof(*mhdr));
152 p += sizeof(*mhdr);
154 copy = len;
155 if (copy > skb_tailroom(skb))
156 copy = skb_tailroom(skb);
158 memcpy(skb_put(skb, copy), p, copy);
160 len -= copy;
162 if (!len) {
163 give_a_page(vi, skb_shinfo(skb)->frags[0].page);
164 skb_shinfo(skb)->nr_frags--;
165 } else {
166 skb_shinfo(skb)->frags[0].page_offset +=
167 sizeof(*mhdr) + copy;
168 skb_shinfo(skb)->frags[0].size = len;
169 skb->data_len += len;
170 skb->len += len;
173 while (--mhdr->num_buffers) {
174 struct sk_buff *nskb;
176 i = skb_shinfo(skb)->nr_frags;
177 if (i >= MAX_SKB_FRAGS) {
178 pr_debug("%s: packet too long %d\n", dev->name,
179 len);
180 dev->stats.rx_length_errors++;
181 goto drop;
184 nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
185 if (!nskb) {
186 pr_debug("%s: rx error: %d buffers missing\n",
187 dev->name, mhdr->num_buffers);
188 dev->stats.rx_length_errors++;
189 goto drop;
192 __skb_unlink(nskb, &vi->recv);
193 vi->num--;
195 skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0];
196 skb_shinfo(nskb)->nr_frags = 0;
197 kfree_skb(nskb);
199 if (len > PAGE_SIZE)
200 len = PAGE_SIZE;
202 skb_shinfo(skb)->frags[i].size = len;
203 skb_shinfo(skb)->nr_frags++;
204 skb->data_len += len;
205 skb->len += len;
207 } else {
208 len -= sizeof(struct virtio_net_hdr);
210 if (len <= MAX_PACKET_LEN)
211 trim_pages(vi, skb);
213 err = pskb_trim(skb, len);
214 if (err) {
215 pr_debug("%s: pskb_trim failed %i %d\n", dev->name,
216 len, err);
217 dev->stats.rx_dropped++;
218 goto drop;
222 skb->truesize += skb->data_len;
223 dev->stats.rx_bytes += skb->len;
224 dev->stats.rx_packets++;
226 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
227 pr_debug("Needs csum!\n");
228 if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
229 goto frame_err;
232 skb->protocol = eth_type_trans(skb, dev);
233 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
234 ntohs(skb->protocol), skb->len, skb->pkt_type);
236 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
237 pr_debug("GSO!\n");
238 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
239 case VIRTIO_NET_HDR_GSO_TCPV4:
240 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
241 break;
242 case VIRTIO_NET_HDR_GSO_UDP:
243 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
244 break;
245 case VIRTIO_NET_HDR_GSO_TCPV6:
246 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
247 break;
248 default:
249 if (net_ratelimit())
250 printk(KERN_WARNING "%s: bad gso type %u.\n",
251 dev->name, hdr->gso_type);
252 goto frame_err;
255 if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
256 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
258 skb_shinfo(skb)->gso_size = hdr->gso_size;
259 if (skb_shinfo(skb)->gso_size == 0) {
260 if (net_ratelimit())
261 printk(KERN_WARNING "%s: zero gso size.\n",
262 dev->name);
263 goto frame_err;
266 /* Header must be checked, and gso_segs computed. */
267 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
268 skb_shinfo(skb)->gso_segs = 0;
271 netif_receive_skb(skb);
272 return;
274 frame_err:
275 dev->stats.rx_frame_errors++;
276 drop:
277 dev_kfree_skb(skb);
280 static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
282 struct sk_buff *skb;
283 struct scatterlist sg[2+MAX_SKB_FRAGS];
284 int num, err, i;
285 bool oom = false;
287 sg_init_table(sg, 2+MAX_SKB_FRAGS);
288 for (;;) {
289 struct virtio_net_hdr *hdr;
291 skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN);
292 if (unlikely(!skb)) {
293 oom = true;
294 break;
297 skb_reserve(skb, NET_IP_ALIGN);
298 skb_put(skb, MAX_PACKET_LEN);
300 hdr = skb_vnet_hdr(skb);
301 sg_set_buf(sg, hdr, sizeof(*hdr));
303 if (vi->big_packets) {
304 for (i = 0; i < MAX_SKB_FRAGS; i++) {
305 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
306 f->page = get_a_page(vi, gfp);
307 if (!f->page)
308 break;
310 f->page_offset = 0;
311 f->size = PAGE_SIZE;
313 skb->data_len += PAGE_SIZE;
314 skb->len += PAGE_SIZE;
316 skb_shinfo(skb)->nr_frags++;
320 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
321 skb_queue_head(&vi->recv, skb);
323 err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
324 if (err < 0) {
325 skb_unlink(skb, &vi->recv);
326 trim_pages(vi, skb);
327 kfree_skb(skb);
328 break;
330 vi->num++;
332 if (unlikely(vi->num > vi->max))
333 vi->max = vi->num;
334 vi->rvq->vq_ops->kick(vi->rvq);
335 return !oom;
338 /* Returns false if we couldn't fill entirely (OOM). */
339 static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
341 struct sk_buff *skb;
342 struct scatterlist sg[1];
343 int err;
344 bool oom = false;
346 if (!vi->mergeable_rx_bufs)
347 return try_fill_recv_maxbufs(vi, gfp);
349 for (;;) {
350 skb_frag_t *f;
352 skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN);
353 if (unlikely(!skb)) {
354 oom = true;
355 break;
358 skb_reserve(skb, NET_IP_ALIGN);
360 f = &skb_shinfo(skb)->frags[0];
361 f->page = get_a_page(vi, gfp);
362 if (!f->page) {
363 oom = true;
364 kfree_skb(skb);
365 break;
368 f->page_offset = 0;
369 f->size = PAGE_SIZE;
371 skb_shinfo(skb)->nr_frags++;
373 sg_init_one(sg, page_address(f->page), PAGE_SIZE);
374 skb_queue_head(&vi->recv, skb);
376 err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
377 if (err < 0) {
378 skb_unlink(skb, &vi->recv);
379 kfree_skb(skb);
380 break;
382 vi->num++;
384 if (unlikely(vi->num > vi->max))
385 vi->max = vi->num;
386 vi->rvq->vq_ops->kick(vi->rvq);
387 return !oom;
390 static void skb_recv_done(struct virtqueue *rvq)
392 struct virtnet_info *vi = rvq->vdev->priv;
393 /* Schedule NAPI, Suppress further interrupts if successful. */
394 if (napi_schedule_prep(&vi->napi)) {
395 rvq->vq_ops->disable_cb(rvq);
396 __napi_schedule(&vi->napi);
400 static void refill_work(struct work_struct *work)
402 struct virtnet_info *vi;
403 bool still_empty;
405 vi = container_of(work, struct virtnet_info, refill.work);
406 napi_disable(&vi->napi);
407 try_fill_recv(vi, GFP_KERNEL);
408 still_empty = (vi->num == 0);
409 napi_enable(&vi->napi);
411 /* In theory, this can happen: if we don't get any buffers in
412 * we will *never* try to fill again. */
413 if (still_empty)
414 schedule_delayed_work(&vi->refill, HZ/2);
417 static int virtnet_poll(struct napi_struct *napi, int budget)
419 struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
420 struct sk_buff *skb = NULL;
421 unsigned int len, received = 0;
423 again:
424 while (received < budget &&
425 (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
426 __skb_unlink(skb, &vi->recv);
427 receive_skb(vi->dev, skb, len);
428 vi->num--;
429 received++;
432 if (vi->num < vi->max / 2) {
433 if (!try_fill_recv(vi, GFP_ATOMIC))
434 schedule_delayed_work(&vi->refill, 0);
437 /* Out of packets? */
438 if (received < budget) {
439 napi_complete(napi);
440 if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
441 && napi_schedule_prep(napi)) {
442 vi->rvq->vq_ops->disable_cb(vi->rvq);
443 __napi_schedule(napi);
444 goto again;
448 return received;
451 static void free_old_xmit_skbs(struct virtnet_info *vi)
453 struct sk_buff *skb;
454 unsigned int len;
456 while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
457 pr_debug("Sent skb %p\n", skb);
458 __skb_unlink(skb, &vi->send);
459 vi->dev->stats.tx_bytes += skb->len;
460 vi->dev->stats.tx_packets++;
461 kfree_skb(skb);
465 /* If the virtio transport doesn't always notify us when all in-flight packets
466 * are consumed, we fall back to using this function on a timer to free them. */
467 static void xmit_free(unsigned long data)
469 struct virtnet_info *vi = (void *)data;
471 netif_tx_lock(vi->dev);
473 free_old_xmit_skbs(vi);
475 if (!skb_queue_empty(&vi->send))
476 mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
478 netif_tx_unlock(vi->dev);
481 static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
483 int num, err;
484 struct scatterlist sg[2+MAX_SKB_FRAGS];
485 struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb);
486 struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
487 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
489 sg_init_table(sg, 2+MAX_SKB_FRAGS);
491 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
493 if (skb->ip_summed == CHECKSUM_PARTIAL) {
494 hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
495 hdr->csum_start = skb->csum_start - skb_headroom(skb);
496 hdr->csum_offset = skb->csum_offset;
497 } else {
498 hdr->flags = 0;
499 hdr->csum_offset = hdr->csum_start = 0;
502 if (skb_is_gso(skb)) {
503 hdr->hdr_len = skb_headlen(skb);
504 hdr->gso_size = skb_shinfo(skb)->gso_size;
505 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
506 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
507 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
508 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
509 else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
510 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
511 else
512 BUG();
513 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
514 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
515 } else {
516 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
517 hdr->gso_size = hdr->hdr_len = 0;
520 mhdr->num_buffers = 0;
522 /* Encode metadata header at front. */
523 if (vi->mergeable_rx_bufs)
524 sg_set_buf(sg, mhdr, sizeof(*mhdr));
525 else
526 sg_set_buf(sg, hdr, sizeof(*hdr));
528 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
530 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
531 if (err >= 0 && !vi->free_in_tasklet) {
532 /* Don't wait up for transmitted skbs to be freed. */
533 skb_orphan(skb);
534 nf_reset(skb);
535 mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
538 return err;
541 static void xmit_tasklet(unsigned long data)
543 struct virtnet_info *vi = (void *)data;
545 netif_tx_lock_bh(vi->dev);
546 if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) {
547 vi->svq->vq_ops->kick(vi->svq);
548 vi->last_xmit_skb = NULL;
550 if (vi->free_in_tasklet)
551 free_old_xmit_skbs(vi);
552 netif_tx_unlock_bh(vi->dev);
555 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
557 struct virtnet_info *vi = netdev_priv(dev);
559 again:
560 /* Free up any pending old buffers before queueing new ones. */
561 free_old_xmit_skbs(vi);
563 /* If we has a buffer left over from last time, send it now. */
564 if (unlikely(vi->last_xmit_skb) &&
565 xmit_skb(vi, vi->last_xmit_skb) < 0)
566 goto stop_queue;
568 vi->last_xmit_skb = NULL;
570 /* Put new one in send queue and do transmit */
571 if (likely(skb)) {
572 __skb_queue_head(&vi->send, skb);
573 if (xmit_skb(vi, skb) < 0) {
574 vi->last_xmit_skb = skb;
575 skb = NULL;
576 goto stop_queue;
579 done:
580 vi->svq->vq_ops->kick(vi->svq);
581 return NETDEV_TX_OK;
583 stop_queue:
584 pr_debug("%s: virtio not prepared to send\n", dev->name);
585 netif_stop_queue(dev);
587 /* Activate callback for using skbs: if this returns false it
588 * means some were used in the meantime. */
589 if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
590 vi->svq->vq_ops->disable_cb(vi->svq);
591 netif_start_queue(dev);
592 goto again;
594 if (skb) {
595 /* Drop this skb: we only queue one. */
596 vi->dev->stats.tx_dropped++;
597 kfree_skb(skb);
599 goto done;
602 static int virtnet_set_mac_address(struct net_device *dev, void *p)
604 struct virtnet_info *vi = netdev_priv(dev);
605 struct virtio_device *vdev = vi->vdev;
606 int ret;
608 ret = eth_mac_addr(dev, p);
609 if (ret)
610 return ret;
612 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
613 vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
614 dev->dev_addr, dev->addr_len);
616 return 0;
619 #ifdef CONFIG_NET_POLL_CONTROLLER
620 static void virtnet_netpoll(struct net_device *dev)
622 struct virtnet_info *vi = netdev_priv(dev);
624 napi_schedule(&vi->napi);
626 #endif
628 static int virtnet_open(struct net_device *dev)
630 struct virtnet_info *vi = netdev_priv(dev);
632 napi_enable(&vi->napi);
634 /* If all buffers were filled by other side before we napi_enabled, we
635 * won't get another interrupt, so process any outstanding packets
636 * now. virtnet_poll wants re-enable the queue, so we disable here.
637 * We synchronize against interrupts via NAPI_STATE_SCHED */
638 if (napi_schedule_prep(&vi->napi)) {
639 vi->rvq->vq_ops->disable_cb(vi->rvq);
640 __napi_schedule(&vi->napi);
642 return 0;
646 * Send command via the control virtqueue and check status. Commands
647 * supported by the hypervisor, as indicated by feature bits, should
648 * never fail unless improperly formated.
650 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
651 struct scatterlist *data, int out, int in)
653 struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
654 struct virtio_net_ctrl_hdr ctrl;
655 virtio_net_ctrl_ack status = ~0;
656 unsigned int tmp;
657 int i;
659 /* Caller should know better */
660 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
661 (out + in > VIRTNET_SEND_COMMAND_SG_MAX));
663 out++; /* Add header */
664 in++; /* Add return status */
666 ctrl.class = class;
667 ctrl.cmd = cmd;
669 sg_init_table(sg, out + in);
671 sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
672 for_each_sg(data, s, out + in - 2, i)
673 sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
674 sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
676 BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
678 vi->cvq->vq_ops->kick(vi->cvq);
681 * Spin for a response, the kick causes an ioport write, trapping
682 * into the hypervisor, so the request should be handled immediately.
684 while (!vi->cvq->vq_ops->get_buf(vi->cvq, &tmp))
685 cpu_relax();
687 return status == VIRTIO_NET_OK;
690 static int virtnet_close(struct net_device *dev)
692 struct virtnet_info *vi = netdev_priv(dev);
694 napi_disable(&vi->napi);
696 return 0;
699 static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
701 struct virtnet_info *vi = netdev_priv(dev);
702 struct virtio_device *vdev = vi->vdev;
704 if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
705 return -ENOSYS;
707 return ethtool_op_set_tx_hw_csum(dev, data);
710 static void virtnet_set_rx_mode(struct net_device *dev)
712 struct virtnet_info *vi = netdev_priv(dev);
713 struct scatterlist sg[2];
714 u8 promisc, allmulti;
715 struct virtio_net_ctrl_mac *mac_data;
716 struct dev_addr_list *addr;
717 struct netdev_hw_addr *ha;
718 void *buf;
719 int i;
721 /* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
722 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
723 return;
725 promisc = ((dev->flags & IFF_PROMISC) != 0);
726 allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
728 sg_init_one(sg, &promisc, sizeof(promisc));
730 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
731 VIRTIO_NET_CTRL_RX_PROMISC,
732 sg, 1, 0))
733 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
734 promisc ? "en" : "dis");
736 sg_init_one(sg, &allmulti, sizeof(allmulti));
738 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
739 VIRTIO_NET_CTRL_RX_ALLMULTI,
740 sg, 1, 0))
741 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
742 allmulti ? "en" : "dis");
744 /* MAC filter - use one buffer for both lists */
745 mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) +
746 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
747 if (!buf) {
748 dev_warn(&dev->dev, "No memory for MAC address buffer\n");
749 return;
752 sg_init_table(sg, 2);
754 /* Store the unicast list and count in the front of the buffer */
755 mac_data->entries = dev->uc.count;
756 i = 0;
757 list_for_each_entry(ha, &dev->uc.list, list)
758 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
760 sg_set_buf(&sg[0], mac_data,
761 sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN));
763 /* multicast list and count fill the end */
764 mac_data = (void *)&mac_data->macs[dev->uc.count][0];
766 mac_data->entries = dev->mc_count;
767 addr = dev->mc_list;
768 for (i = 0; i < dev->mc_count; i++, addr = addr->next)
769 memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN);
771 sg_set_buf(&sg[1], mac_data,
772 sizeof(mac_data->entries) + (dev->mc_count * ETH_ALEN));
774 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
775 VIRTIO_NET_CTRL_MAC_TABLE_SET,
776 sg, 2, 0))
777 dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
779 kfree(buf);
782 static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
784 struct virtnet_info *vi = netdev_priv(dev);
785 struct scatterlist sg;
787 sg_init_one(&sg, &vid, sizeof(vid));
789 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
790 VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
791 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
794 static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
796 struct virtnet_info *vi = netdev_priv(dev);
797 struct scatterlist sg;
799 sg_init_one(&sg, &vid, sizeof(vid));
801 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
802 VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
803 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
806 static const struct ethtool_ops virtnet_ethtool_ops = {
807 .set_tx_csum = virtnet_set_tx_csum,
808 .set_sg = ethtool_op_set_sg,
809 .set_tso = ethtool_op_set_tso,
810 .set_ufo = ethtool_op_set_ufo,
811 .get_link = ethtool_op_get_link,
814 #define MIN_MTU 68
815 #define MAX_MTU 65535
817 static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
819 if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
820 return -EINVAL;
821 dev->mtu = new_mtu;
822 return 0;
825 static const struct net_device_ops virtnet_netdev = {
826 .ndo_open = virtnet_open,
827 .ndo_stop = virtnet_close,
828 .ndo_start_xmit = start_xmit,
829 .ndo_validate_addr = eth_validate_addr,
830 .ndo_set_mac_address = virtnet_set_mac_address,
831 .ndo_set_rx_mode = virtnet_set_rx_mode,
832 .ndo_change_mtu = virtnet_change_mtu,
833 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
834 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
835 #ifdef CONFIG_NET_POLL_CONTROLLER
836 .ndo_poll_controller = virtnet_netpoll,
837 #endif
840 static void virtnet_update_status(struct virtnet_info *vi)
842 u16 v;
844 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS))
845 return;
847 vi->vdev->config->get(vi->vdev,
848 offsetof(struct virtio_net_config, status),
849 &v, sizeof(v));
851 /* Ignore unknown (future) status bits */
852 v &= VIRTIO_NET_S_LINK_UP;
854 if (vi->status == v)
855 return;
857 vi->status = v;
859 if (vi->status & VIRTIO_NET_S_LINK_UP) {
860 netif_carrier_on(vi->dev);
861 netif_wake_queue(vi->dev);
862 } else {
863 netif_carrier_off(vi->dev);
864 netif_stop_queue(vi->dev);
868 static void virtnet_config_changed(struct virtio_device *vdev)
870 struct virtnet_info *vi = vdev->priv;
872 virtnet_update_status(vi);
875 static int virtnet_probe(struct virtio_device *vdev)
877 int err;
878 struct net_device *dev;
879 struct virtnet_info *vi;
880 struct virtqueue *vqs[3];
881 vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
882 const char *names[] = { "input", "output", "control" };
883 int nvqs;
885 /* Allocate ourselves a network device with room for our info */
886 dev = alloc_etherdev(sizeof(struct virtnet_info));
887 if (!dev)
888 return -ENOMEM;
890 /* Set up network device as normal. */
891 dev->netdev_ops = &virtnet_netdev;
892 dev->features = NETIF_F_HIGHDMA;
893 SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
894 SET_NETDEV_DEV(dev, &vdev->dev);
896 /* Do we support "hardware" checksums? */
897 if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
898 /* This opens up the world of extra features. */
899 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
900 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
901 dev->features |= NETIF_F_TSO | NETIF_F_UFO
902 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
904 /* Individual feature bits: what can host handle? */
905 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
906 dev->features |= NETIF_F_TSO;
907 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
908 dev->features |= NETIF_F_TSO6;
909 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
910 dev->features |= NETIF_F_TSO_ECN;
911 if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
912 dev->features |= NETIF_F_UFO;
915 /* Configuration may specify what MAC to use. Otherwise random. */
916 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
917 vdev->config->get(vdev,
918 offsetof(struct virtio_net_config, mac),
919 dev->dev_addr, dev->addr_len);
920 } else
921 random_ether_addr(dev->dev_addr);
923 /* Set up our device-specific information */
924 vi = netdev_priv(dev);
925 netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
926 vi->dev = dev;
927 vi->vdev = vdev;
928 vdev->priv = vi;
929 vi->pages = NULL;
930 INIT_DELAYED_WORK(&vi->refill, refill_work);
932 /* If they give us a callback when all buffers are done, we don't need
933 * the timer. */
934 vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
936 /* If we can receive ANY GSO packets, we must allocate large ones. */
937 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
938 || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
939 || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
940 vi->big_packets = true;
942 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
943 vi->mergeable_rx_bufs = true;
945 /* We expect two virtqueues, receive then send,
946 * and optionally control. */
947 nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
949 err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
950 if (err)
951 goto free;
953 vi->rvq = vqs[0];
954 vi->svq = vqs[1];
956 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
957 vi->cvq = vqs[2];
959 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
960 dev->features |= NETIF_F_HW_VLAN_FILTER;
963 /* Initialize our empty receive and send queues. */
964 skb_queue_head_init(&vi->recv);
965 skb_queue_head_init(&vi->send);
967 tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi);
969 if (!vi->free_in_tasklet)
970 setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi);
972 err = register_netdev(dev);
973 if (err) {
974 pr_debug("virtio_net: registering device failed\n");
975 goto free_vqs;
978 /* Last of all, set up some receive buffers. */
979 try_fill_recv(vi, GFP_KERNEL);
981 /* If we didn't even get one input buffer, we're useless. */
982 if (vi->num == 0) {
983 err = -ENOMEM;
984 goto unregister;
987 vi->status = VIRTIO_NET_S_LINK_UP;
988 virtnet_update_status(vi);
989 netif_carrier_on(dev);
991 pr_debug("virtnet: registered device %s\n", dev->name);
992 return 0;
994 unregister:
995 unregister_netdev(dev);
996 cancel_delayed_work_sync(&vi->refill);
997 free_vqs:
998 vdev->config->del_vqs(vdev);
999 free:
1000 free_netdev(dev);
1001 return err;
1004 static void virtnet_remove(struct virtio_device *vdev)
1006 struct virtnet_info *vi = vdev->priv;
1007 struct sk_buff *skb;
1009 /* Stop all the virtqueues. */
1010 vdev->config->reset(vdev);
1012 if (!vi->free_in_tasklet)
1013 del_timer_sync(&vi->xmit_free_timer);
1015 /* Free our skbs in send and recv queues, if any. */
1016 while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
1017 kfree_skb(skb);
1018 vi->num--;
1020 __skb_queue_purge(&vi->send);
1022 BUG_ON(vi->num != 0);
1024 unregister_netdev(vi->dev);
1025 cancel_delayed_work_sync(&vi->refill);
1027 vdev->config->del_vqs(vi->vdev);
1029 while (vi->pages)
1030 __free_pages(get_a_page(vi, GFP_KERNEL), 0);
1032 free_netdev(vi->dev);
1035 static struct virtio_device_id id_table[] = {
1036 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
1037 { 0 },
1040 static unsigned int features[] = {
1041 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
1042 VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1043 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
1044 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1045 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1046 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1047 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1048 VIRTIO_F_NOTIFY_ON_EMPTY,
1051 static struct virtio_driver virtio_net = {
1052 .feature_table = features,
1053 .feature_table_size = ARRAY_SIZE(features),
1054 .driver.name = KBUILD_MODNAME,
1055 .driver.owner = THIS_MODULE,
1056 .id_table = id_table,
1057 .probe = virtnet_probe,
1058 .remove = __devexit_p(virtnet_remove),
1059 .config_changed = virtnet_config_changed,
1062 static int __init init(void)
1064 return register_virtio_driver(&virtio_net);
1067 static void __exit fini(void)
1069 unregister_virtio_driver(&virtio_net);
1071 module_init(init);
1072 module_exit(fini);
1074 MODULE_DEVICE_TABLE(virtio, id_table);
1075 MODULE_DESCRIPTION("Virtio network driver");
1076 MODULE_LICENSE("GPL");