tun: Make tun_net_xmit atomic wrt tun_attach && tun_detach
[wandboard.git] / drivers / net / tun.c
blobfa93160bf52244fbf71db7812bf7dccd86cc9278
1 /*
2 * TUN - Universal TUN/TAP device driver.
3 * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
19 * Changes:
21 * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
22 * Add TUNSETLINK ioctl to set the link encapsulation
24 * Mark Smith <markzzzsmith@yahoo.com.au>
25 * Use random_ether_addr() for tap MAC address.
27 * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20
28 * Fixes in packet dropping, queue length setting and queue wakeup.
29 * Increased default tx queue length.
30 * Added ethtool API.
31 * Minor cleanups
33 * Daniel Podlejski <underley@underley.eu.org>
34 * Modifications for 2.3.99-pre5 kernel.
37 #define DRV_NAME "tun"
38 #define DRV_VERSION "1.6"
39 #define DRV_DESCRIPTION "Universal TUN/TAP device driver"
40 #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
42 #include <linux/module.h>
43 #include <linux/errno.h>
44 #include <linux/kernel.h>
45 #include <linux/major.h>
46 #include <linux/slab.h>
47 #include <linux/smp_lock.h>
48 #include <linux/poll.h>
49 #include <linux/fcntl.h>
50 #include <linux/init.h>
51 #include <linux/skbuff.h>
52 #include <linux/netdevice.h>
53 #include <linux/etherdevice.h>
54 #include <linux/miscdevice.h>
55 #include <linux/ethtool.h>
56 #include <linux/rtnetlink.h>
57 #include <linux/if.h>
58 #include <linux/if_arp.h>
59 #include <linux/if_ether.h>
60 #include <linux/if_tun.h>
61 #include <linux/crc32.h>
62 #include <linux/nsproxy.h>
63 #include <linux/virtio_net.h>
64 #include <net/net_namespace.h>
65 #include <net/netns/generic.h>
67 #include <asm/system.h>
68 #include <asm/uaccess.h>
70 /* Uncomment to enable debugging */
71 /* #define TUN_DEBUG 1 */
73 #ifdef TUN_DEBUG
74 static int debug;
76 #define DBG if(tun->debug)printk
77 #define DBG1 if(debug==2)printk
78 #else
79 #define DBG( a... )
80 #define DBG1( a... )
81 #endif
83 #define FLT_EXACT_COUNT 8
84 struct tap_filter {
85 unsigned int count; /* Number of addrs. Zero means disabled */
86 u32 mask[2]; /* Mask of the hashed addrs */
87 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
90 struct tun_file {
91 struct tun_struct *tun;
92 struct net *net;
95 struct tun_struct {
96 struct tun_file *tfile;
97 unsigned int flags;
98 uid_t owner;
99 gid_t group;
101 wait_queue_head_t read_wait;
102 struct sk_buff_head readq;
104 struct net_device *dev;
105 struct fasync_struct *fasync;
107 struct tap_filter txflt;
109 #ifdef TUN_DEBUG
110 int debug;
111 #endif
114 static int tun_attach(struct tun_struct *tun, struct file *file)
116 struct tun_file *tfile = file->private_data;
117 const struct cred *cred = current_cred();
118 int err;
120 ASSERT_RTNL();
122 /* Check permissions */
123 if (((tun->owner != -1 && cred->euid != tun->owner) ||
124 (tun->group != -1 && cred->egid != tun->group)) &&
125 !capable(CAP_NET_ADMIN))
126 return -EPERM;
128 netif_tx_lock_bh(tun->dev);
130 err = -EINVAL;
131 if (tfile->tun)
132 goto out;
134 err = -EBUSY;
135 if (tun->tfile)
136 goto out;
138 err = 0;
139 tfile->tun = tun;
140 tun->tfile = tfile;
142 out:
143 netif_tx_unlock_bh(tun->dev);
144 return err;
147 static void __tun_detach(struct tun_struct *tun)
149 struct tun_file *tfile = tun->tfile;
151 /* Detach from net device */
152 netif_tx_lock_bh(tun->dev);
153 tfile->tun = NULL;
154 tun->tfile = NULL;
155 netif_tx_unlock_bh(tun->dev);
157 /* Drop read queue */
158 skb_queue_purge(&tun->readq);
161 static struct tun_struct *__tun_get(struct tun_file *tfile)
163 return tfile->tun;
166 static struct tun_struct *tun_get(struct file *file)
168 return __tun_get(file->private_data);
171 static void tun_put(struct tun_struct *tun)
173 /* Noop for now */
176 /* TAP filterting */
177 static void addr_hash_set(u32 *mask, const u8 *addr)
179 int n = ether_crc(ETH_ALEN, addr) >> 26;
180 mask[n >> 5] |= (1 << (n & 31));
183 static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
185 int n = ether_crc(ETH_ALEN, addr) >> 26;
186 return mask[n >> 5] & (1 << (n & 31));
189 static int update_filter(struct tap_filter *filter, void __user *arg)
191 struct { u8 u[ETH_ALEN]; } *addr;
192 struct tun_filter uf;
193 int err, alen, n, nexact;
195 if (copy_from_user(&uf, arg, sizeof(uf)))
196 return -EFAULT;
198 if (!uf.count) {
199 /* Disabled */
200 filter->count = 0;
201 return 0;
204 alen = ETH_ALEN * uf.count;
205 addr = kmalloc(alen, GFP_KERNEL);
206 if (!addr)
207 return -ENOMEM;
209 if (copy_from_user(addr, arg + sizeof(uf), alen)) {
210 err = -EFAULT;
211 goto done;
214 /* The filter is updated without holding any locks. Which is
215 * perfectly safe. We disable it first and in the worst
216 * case we'll accept a few undesired packets. */
217 filter->count = 0;
218 wmb();
220 /* Use first set of addresses as an exact filter */
221 for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
222 memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
224 nexact = n;
226 /* The rest is hashed */
227 memset(filter->mask, 0, sizeof(filter->mask));
228 for (; n < uf.count; n++)
229 addr_hash_set(filter->mask, addr[n].u);
231 /* For ALLMULTI just set the mask to all ones.
232 * This overrides the mask populated above. */
233 if ((uf.flags & TUN_FLT_ALLMULTI))
234 memset(filter->mask, ~0, sizeof(filter->mask));
236 /* Now enable the filter */
237 wmb();
238 filter->count = nexact;
240 /* Return the number of exact filters */
241 err = nexact;
243 done:
244 kfree(addr);
245 return err;
248 /* Returns: 0 - drop, !=0 - accept */
249 static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
251 /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
252 * at this point. */
253 struct ethhdr *eh = (struct ethhdr *) skb->data;
254 int i;
256 /* Exact match */
257 for (i = 0; i < filter->count; i++)
258 if (!compare_ether_addr(eh->h_dest, filter->addr[i]))
259 return 1;
261 /* Inexact match (multicast only) */
262 if (is_multicast_ether_addr(eh->h_dest))
263 return addr_hash_test(filter->mask, eh->h_dest);
265 return 0;
269 * Checks whether the packet is accepted or not.
270 * Returns: 0 - drop, !=0 - accept
272 static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
274 if (!filter->count)
275 return 1;
277 return run_filter(filter, skb);
280 /* Network device part of the driver */
282 static const struct ethtool_ops tun_ethtool_ops;
284 /* Net device open. */
285 static int tun_net_open(struct net_device *dev)
287 netif_start_queue(dev);
288 return 0;
291 /* Net device close. */
292 static int tun_net_close(struct net_device *dev)
294 netif_stop_queue(dev);
295 return 0;
298 /* Net device start xmit */
299 static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
301 struct tun_struct *tun = netdev_priv(dev);
303 DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
305 /* Drop packet if interface is not attached */
306 if (!tun->tfile)
307 goto drop;
309 /* Drop if the filter does not like it.
310 * This is a noop if the filter is disabled.
311 * Filter can be enabled only for the TAP devices. */
312 if (!check_filter(&tun->txflt, skb))
313 goto drop;
315 if (skb_queue_len(&tun->readq) >= dev->tx_queue_len) {
316 if (!(tun->flags & TUN_ONE_QUEUE)) {
317 /* Normal queueing mode. */
318 /* Packet scheduler handles dropping of further packets. */
319 netif_stop_queue(dev);
321 /* We won't see all dropped packets individually, so overrun
322 * error is more appropriate. */
323 dev->stats.tx_fifo_errors++;
324 } else {
325 /* Single queue mode.
326 * Driver handles dropping of all packets itself. */
327 goto drop;
331 /* Enqueue packet */
332 skb_queue_tail(&tun->readq, skb);
333 dev->trans_start = jiffies;
335 /* Notify and wake up reader process */
336 if (tun->flags & TUN_FASYNC)
337 kill_fasync(&tun->fasync, SIGIO, POLL_IN);
338 wake_up_interruptible(&tun->read_wait);
339 return 0;
341 drop:
342 dev->stats.tx_dropped++;
343 kfree_skb(skb);
344 return 0;
347 static void tun_net_mclist(struct net_device *dev)
350 * This callback is supposed to deal with mc filter in
351 * _rx_ path and has nothing to do with the _tx_ path.
352 * In rx path we always accept everything userspace gives us.
354 return;
357 #define MIN_MTU 68
358 #define MAX_MTU 65535
360 static int
361 tun_net_change_mtu(struct net_device *dev, int new_mtu)
363 if (new_mtu < MIN_MTU || new_mtu + dev->hard_header_len > MAX_MTU)
364 return -EINVAL;
365 dev->mtu = new_mtu;
366 return 0;
369 static const struct net_device_ops tun_netdev_ops = {
370 .ndo_open = tun_net_open,
371 .ndo_stop = tun_net_close,
372 .ndo_start_xmit = tun_net_xmit,
373 .ndo_change_mtu = tun_net_change_mtu,
376 static const struct net_device_ops tap_netdev_ops = {
377 .ndo_open = tun_net_open,
378 .ndo_stop = tun_net_close,
379 .ndo_start_xmit = tun_net_xmit,
380 .ndo_change_mtu = tun_net_change_mtu,
381 .ndo_set_multicast_list = tun_net_mclist,
382 .ndo_set_mac_address = eth_mac_addr,
383 .ndo_validate_addr = eth_validate_addr,
386 /* Initialize net device. */
387 static void tun_net_init(struct net_device *dev)
389 struct tun_struct *tun = netdev_priv(dev);
391 switch (tun->flags & TUN_TYPE_MASK) {
392 case TUN_TUN_DEV:
393 dev->netdev_ops = &tun_netdev_ops;
395 /* Point-to-Point TUN Device */
396 dev->hard_header_len = 0;
397 dev->addr_len = 0;
398 dev->mtu = 1500;
400 /* Zero header length */
401 dev->type = ARPHRD_NONE;
402 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
403 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */
404 break;
406 case TUN_TAP_DEV:
407 dev->netdev_ops = &tap_netdev_ops;
408 /* Ethernet TAP Device */
409 ether_setup(dev);
411 random_ether_addr(dev->dev_addr);
413 dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */
414 break;
418 /* Character device part */
420 /* Poll */
421 static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
423 struct tun_struct *tun = tun_get(file);
424 unsigned int mask = POLLOUT | POLLWRNORM;
426 if (!tun)
427 return POLLERR;
429 DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
431 poll_wait(file, &tun->read_wait, wait);
433 if (!skb_queue_empty(&tun->readq))
434 mask |= POLLIN | POLLRDNORM;
436 tun_put(tun);
437 return mask;
440 /* prepad is the amount to reserve at front. len is length after that.
441 * linear is a hint as to how much to copy (usually headers). */
442 static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear,
443 gfp_t gfp)
445 struct sk_buff *skb;
446 unsigned int i;
448 skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN);
449 if (skb) {
450 skb_reserve(skb, prepad);
451 skb_put(skb, len);
452 return skb;
455 /* Under a page? Don't bother with paged skb. */
456 if (prepad + len < PAGE_SIZE)
457 return NULL;
459 /* Start with a normal skb, and add pages. */
460 skb = alloc_skb(prepad + linear, gfp);
461 if (!skb)
462 return NULL;
464 skb_reserve(skb, prepad);
465 skb_put(skb, linear);
467 len -= linear;
469 for (i = 0; i < MAX_SKB_FRAGS; i++) {
470 skb_frag_t *f = &skb_shinfo(skb)->frags[i];
472 f->page = alloc_page(gfp|__GFP_ZERO);
473 if (!f->page)
474 break;
476 f->page_offset = 0;
477 f->size = PAGE_SIZE;
479 skb->data_len += PAGE_SIZE;
480 skb->len += PAGE_SIZE;
481 skb->truesize += PAGE_SIZE;
482 skb_shinfo(skb)->nr_frags++;
484 if (len < PAGE_SIZE) {
485 len = 0;
486 break;
488 len -= PAGE_SIZE;
491 /* Too large, or alloc fail? */
492 if (unlikely(len)) {
493 kfree_skb(skb);
494 skb = NULL;
497 return skb;
500 /* Get packet from user space buffer */
501 static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
503 struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
504 struct sk_buff *skb;
505 size_t len = count, align = 0;
506 struct virtio_net_hdr gso = { 0 };
508 if (!(tun->flags & TUN_NO_PI)) {
509 if ((len -= sizeof(pi)) > count)
510 return -EINVAL;
512 if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
513 return -EFAULT;
516 if (tun->flags & TUN_VNET_HDR) {
517 if ((len -= sizeof(gso)) > count)
518 return -EINVAL;
520 if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
521 return -EFAULT;
523 if (gso.hdr_len > len)
524 return -EINVAL;
527 if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
528 align = NET_IP_ALIGN;
529 if (unlikely(len < ETH_HLEN))
530 return -EINVAL;
533 if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) {
534 tun->dev->stats.rx_dropped++;
535 return -ENOMEM;
538 if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) {
539 tun->dev->stats.rx_dropped++;
540 kfree_skb(skb);
541 return -EFAULT;
544 if (gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
545 if (!skb_partial_csum_set(skb, gso.csum_start,
546 gso.csum_offset)) {
547 tun->dev->stats.rx_frame_errors++;
548 kfree_skb(skb);
549 return -EINVAL;
551 } else if (tun->flags & TUN_NOCHECKSUM)
552 skb->ip_summed = CHECKSUM_UNNECESSARY;
554 switch (tun->flags & TUN_TYPE_MASK) {
555 case TUN_TUN_DEV:
556 if (tun->flags & TUN_NO_PI) {
557 switch (skb->data[0] & 0xf0) {
558 case 0x40:
559 pi.proto = htons(ETH_P_IP);
560 break;
561 case 0x60:
562 pi.proto = htons(ETH_P_IPV6);
563 break;
564 default:
565 tun->dev->stats.rx_dropped++;
566 kfree_skb(skb);
567 return -EINVAL;
571 skb_reset_mac_header(skb);
572 skb->protocol = pi.proto;
573 skb->dev = tun->dev;
574 break;
575 case TUN_TAP_DEV:
576 skb->protocol = eth_type_trans(skb, tun->dev);
577 break;
580 if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
581 pr_debug("GSO!\n");
582 switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
583 case VIRTIO_NET_HDR_GSO_TCPV4:
584 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
585 break;
586 case VIRTIO_NET_HDR_GSO_TCPV6:
587 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
588 break;
589 default:
590 tun->dev->stats.rx_frame_errors++;
591 kfree_skb(skb);
592 return -EINVAL;
595 if (gso.gso_type & VIRTIO_NET_HDR_GSO_ECN)
596 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
598 skb_shinfo(skb)->gso_size = gso.gso_size;
599 if (skb_shinfo(skb)->gso_size == 0) {
600 tun->dev->stats.rx_frame_errors++;
601 kfree_skb(skb);
602 return -EINVAL;
605 /* Header must be checked, and gso_segs computed. */
606 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
607 skb_shinfo(skb)->gso_segs = 0;
610 netif_rx_ni(skb);
612 tun->dev->stats.rx_packets++;
613 tun->dev->stats.rx_bytes += len;
615 return count;
618 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
619 unsigned long count, loff_t pos)
621 struct tun_struct *tun = tun_get(iocb->ki_filp);
622 ssize_t result;
624 if (!tun)
625 return -EBADFD;
627 DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
629 result = tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
631 tun_put(tun);
632 return result;
635 /* Put packet to the user space buffer */
636 static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
637 struct sk_buff *skb,
638 struct iovec *iv, int len)
640 struct tun_pi pi = { 0, skb->protocol };
641 ssize_t total = 0;
643 if (!(tun->flags & TUN_NO_PI)) {
644 if ((len -= sizeof(pi)) < 0)
645 return -EINVAL;
647 if (len < skb->len) {
648 /* Packet will be striped */
649 pi.flags |= TUN_PKT_STRIP;
652 if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
653 return -EFAULT;
654 total += sizeof(pi);
657 if (tun->flags & TUN_VNET_HDR) {
658 struct virtio_net_hdr gso = { 0 }; /* no info leak */
659 if ((len -= sizeof(gso)) < 0)
660 return -EINVAL;
662 if (skb_is_gso(skb)) {
663 struct skb_shared_info *sinfo = skb_shinfo(skb);
665 /* This is a hint as to how much should be linear. */
666 gso.hdr_len = skb_headlen(skb);
667 gso.gso_size = sinfo->gso_size;
668 if (sinfo->gso_type & SKB_GSO_TCPV4)
669 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
670 else if (sinfo->gso_type & SKB_GSO_TCPV6)
671 gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
672 else
673 BUG();
674 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
675 gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
676 } else
677 gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
679 if (skb->ip_summed == CHECKSUM_PARTIAL) {
680 gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
681 gso.csum_start = skb->csum_start - skb_headroom(skb);
682 gso.csum_offset = skb->csum_offset;
683 } /* else everything is zero */
685 if (unlikely(memcpy_toiovec(iv, (void *)&gso, sizeof(gso))))
686 return -EFAULT;
687 total += sizeof(gso);
690 len = min_t(int, skb->len, len);
692 skb_copy_datagram_iovec(skb, 0, iv, len);
693 total += len;
695 tun->dev->stats.tx_packets++;
696 tun->dev->stats.tx_bytes += len;
698 return total;
701 static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
702 unsigned long count, loff_t pos)
704 struct file *file = iocb->ki_filp;
705 struct tun_struct *tun = tun_get(file);
706 DECLARE_WAITQUEUE(wait, current);
707 struct sk_buff *skb;
708 ssize_t len, ret = 0;
710 if (!tun)
711 return -EBADFD;
713 DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
715 len = iov_length(iv, count);
716 if (len < 0) {
717 ret = -EINVAL;
718 goto out;
721 add_wait_queue(&tun->read_wait, &wait);
722 while (len) {
723 current->state = TASK_INTERRUPTIBLE;
725 /* Read frames from the queue */
726 if (!(skb=skb_dequeue(&tun->readq))) {
727 if (file->f_flags & O_NONBLOCK) {
728 ret = -EAGAIN;
729 break;
731 if (signal_pending(current)) {
732 ret = -ERESTARTSYS;
733 break;
736 /* Nothing to read, let's sleep */
737 schedule();
738 continue;
740 netif_wake_queue(tun->dev);
742 ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
743 kfree_skb(skb);
744 break;
747 current->state = TASK_RUNNING;
748 remove_wait_queue(&tun->read_wait, &wait);
750 out:
751 tun_put(tun);
752 return ret;
755 static void tun_setup(struct net_device *dev)
757 struct tun_struct *tun = netdev_priv(dev);
759 skb_queue_head_init(&tun->readq);
760 init_waitqueue_head(&tun->read_wait);
762 tun->owner = -1;
763 tun->group = -1;
765 dev->ethtool_ops = &tun_ethtool_ops;
766 dev->destructor = free_netdev;
767 dev->features |= NETIF_F_NETNS_LOCAL;
770 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
772 struct tun_struct *tun;
773 struct net_device *dev;
774 int err;
776 dev = __dev_get_by_name(net, ifr->ifr_name);
777 if (dev) {
778 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
779 tun = netdev_priv(dev);
780 else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
781 tun = netdev_priv(dev);
782 else
783 return -EINVAL;
785 err = tun_attach(tun, file);
786 if (err < 0)
787 return err;
789 else {
790 char *name;
791 unsigned long flags = 0;
793 err = -EINVAL;
795 if (!capable(CAP_NET_ADMIN))
796 return -EPERM;
798 /* Set dev type */
799 if (ifr->ifr_flags & IFF_TUN) {
800 /* TUN device */
801 flags |= TUN_TUN_DEV;
802 name = "tun%d";
803 } else if (ifr->ifr_flags & IFF_TAP) {
804 /* TAP device */
805 flags |= TUN_TAP_DEV;
806 name = "tap%d";
807 } else
808 goto failed;
810 if (*ifr->ifr_name)
811 name = ifr->ifr_name;
813 dev = alloc_netdev(sizeof(struct tun_struct), name,
814 tun_setup);
815 if (!dev)
816 return -ENOMEM;
818 dev_net_set(dev, net);
820 tun = netdev_priv(dev);
821 tun->dev = dev;
822 tun->flags = flags;
823 tun->txflt.count = 0;
825 tun_net_init(dev);
827 if (strchr(dev->name, '%')) {
828 err = dev_alloc_name(dev, dev->name);
829 if (err < 0)
830 goto err_free_dev;
833 err = register_netdevice(tun->dev);
834 if (err < 0)
835 goto err_free_dev;
837 err = tun_attach(tun, file);
838 if (err < 0)
839 goto err_free_dev;
842 DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
844 if (ifr->ifr_flags & IFF_NO_PI)
845 tun->flags |= TUN_NO_PI;
846 else
847 tun->flags &= ~TUN_NO_PI;
849 if (ifr->ifr_flags & IFF_ONE_QUEUE)
850 tun->flags |= TUN_ONE_QUEUE;
851 else
852 tun->flags &= ~TUN_ONE_QUEUE;
854 if (ifr->ifr_flags & IFF_VNET_HDR)
855 tun->flags |= TUN_VNET_HDR;
856 else
857 tun->flags &= ~TUN_VNET_HDR;
859 /* Make sure persistent devices do not get stuck in
860 * xoff state.
862 if (netif_running(tun->dev))
863 netif_wake_queue(tun->dev);
865 strcpy(ifr->ifr_name, tun->dev->name);
866 return 0;
868 err_free_dev:
869 free_netdev(dev);
870 failed:
871 return err;
874 static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
876 struct tun_struct *tun = tun_get(file);
878 if (!tun)
879 return -EBADFD;
881 DBG(KERN_INFO "%s: tun_get_iff\n", tun->dev->name);
883 strcpy(ifr->ifr_name, tun->dev->name);
885 ifr->ifr_flags = 0;
887 if (ifr->ifr_flags & TUN_TUN_DEV)
888 ifr->ifr_flags |= IFF_TUN;
889 else
890 ifr->ifr_flags |= IFF_TAP;
892 if (tun->flags & TUN_NO_PI)
893 ifr->ifr_flags |= IFF_NO_PI;
895 if (tun->flags & TUN_ONE_QUEUE)
896 ifr->ifr_flags |= IFF_ONE_QUEUE;
898 if (tun->flags & TUN_VNET_HDR)
899 ifr->ifr_flags |= IFF_VNET_HDR;
901 tun_put(tun);
902 return 0;
905 /* This is like a cut-down ethtool ops, except done via tun fd so no
906 * privs required. */
907 static int set_offload(struct net_device *dev, unsigned long arg)
909 unsigned int old_features, features;
911 old_features = dev->features;
912 /* Unset features, set them as we chew on the arg. */
913 features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
914 |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6));
916 if (arg & TUN_F_CSUM) {
917 features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
918 arg &= ~TUN_F_CSUM;
920 if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
921 if (arg & TUN_F_TSO_ECN) {
922 features |= NETIF_F_TSO_ECN;
923 arg &= ~TUN_F_TSO_ECN;
925 if (arg & TUN_F_TSO4)
926 features |= NETIF_F_TSO;
927 if (arg & TUN_F_TSO6)
928 features |= NETIF_F_TSO6;
929 arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
933 /* This gives the user a way to test for new features in future by
934 * trying to set them. */
935 if (arg)
936 return -EINVAL;
938 dev->features = features;
939 if (old_features != dev->features)
940 netdev_features_change(dev);
942 return 0;
945 static int tun_chr_ioctl(struct inode *inode, struct file *file,
946 unsigned int cmd, unsigned long arg)
948 struct tun_file *tfile = file->private_data;
949 struct tun_struct *tun;
950 void __user* argp = (void __user*)arg;
951 struct ifreq ifr;
952 int ret;
954 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
955 if (copy_from_user(&ifr, argp, sizeof ifr))
956 return -EFAULT;
958 if (cmd == TUNGETFEATURES) {
959 /* Currently this just means: "what IFF flags are valid?".
960 * This is needed because we never checked for invalid flags on
961 * TUNSETIFF. */
962 return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
963 IFF_VNET_HDR,
964 (unsigned int __user*)argp);
967 tun = __tun_get(tfile);
968 if (cmd == TUNSETIFF && !tun) {
969 int err;
971 ifr.ifr_name[IFNAMSIZ-1] = '\0';
973 rtnl_lock();
974 err = tun_set_iff(tfile->net, file, &ifr);
975 rtnl_unlock();
977 if (err)
978 return err;
980 if (copy_to_user(argp, &ifr, sizeof(ifr)))
981 return -EFAULT;
982 return 0;
986 if (!tun)
987 return -EBADFD;
989 DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
991 ret = 0;
992 switch (cmd) {
993 case TUNGETIFF:
994 ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
995 if (ret)
996 break;
998 if (copy_to_user(argp, &ifr, sizeof(ifr)))
999 ret = -EFAULT;
1000 break;
1002 case TUNSETNOCSUM:
1003 /* Disable/Enable checksum */
1004 if (arg)
1005 tun->flags |= TUN_NOCHECKSUM;
1006 else
1007 tun->flags &= ~TUN_NOCHECKSUM;
1009 DBG(KERN_INFO "%s: checksum %s\n",
1010 tun->dev->name, arg ? "disabled" : "enabled");
1011 break;
1013 case TUNSETPERSIST:
1014 /* Disable/Enable persist mode */
1015 if (arg)
1016 tun->flags |= TUN_PERSIST;
1017 else
1018 tun->flags &= ~TUN_PERSIST;
1020 DBG(KERN_INFO "%s: persist %s\n",
1021 tun->dev->name, arg ? "enabled" : "disabled");
1022 break;
1024 case TUNSETOWNER:
1025 /* Set owner of the device */
1026 tun->owner = (uid_t) arg;
1028 DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
1029 break;
1031 case TUNSETGROUP:
1032 /* Set group of the device */
1033 tun->group= (gid_t) arg;
1035 DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
1036 break;
1038 case TUNSETLINK:
1039 /* Only allow setting the type when the interface is down */
1040 rtnl_lock();
1041 if (tun->dev->flags & IFF_UP) {
1042 DBG(KERN_INFO "%s: Linktype set failed because interface is up\n",
1043 tun->dev->name);
1044 ret = -EBUSY;
1045 } else {
1046 tun->dev->type = (int) arg;
1047 DBG(KERN_INFO "%s: linktype set to %d\n", tun->dev->name, tun->dev->type);
1048 ret = 0;
1050 rtnl_unlock();
1051 break;
1053 #ifdef TUN_DEBUG
1054 case TUNSETDEBUG:
1055 tun->debug = arg;
1056 break;
1057 #endif
1058 case TUNSETOFFLOAD:
1059 rtnl_lock();
1060 ret = set_offload(tun->dev, arg);
1061 rtnl_unlock();
1062 break;
1064 case TUNSETTXFILTER:
1065 /* Can be set only for TAPs */
1066 ret = -EINVAL;
1067 if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
1068 break;
1069 rtnl_lock();
1070 ret = update_filter(&tun->txflt, (void __user *)arg);
1071 rtnl_unlock();
1072 break;
1074 case SIOCGIFHWADDR:
1075 /* Get hw addres */
1076 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
1077 ifr.ifr_hwaddr.sa_family = tun->dev->type;
1078 if (copy_to_user(argp, &ifr, sizeof ifr))
1079 ret = -EFAULT;
1080 break;
1082 case SIOCSIFHWADDR:
1083 /* Set hw address */
1084 DBG(KERN_DEBUG "%s: set hw address: %pM\n",
1085 tun->dev->name, ifr.ifr_hwaddr.sa_data);
1087 rtnl_lock();
1088 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
1089 rtnl_unlock();
1090 break;
1091 default:
1092 ret = -EINVAL;
1093 break;
1096 tun_put(tun);
1097 return ret;
1100 static int tun_chr_fasync(int fd, struct file *file, int on)
1102 struct tun_struct *tun = tun_get(file);
1103 int ret;
1105 if (!tun)
1106 return -EBADFD;
1108 DBG(KERN_INFO "%s: tun_chr_fasync %d\n", tun->dev->name, on);
1110 lock_kernel();
1111 if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0)
1112 goto out;
1114 if (on) {
1115 ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
1116 if (ret)
1117 goto out;
1118 tun->flags |= TUN_FASYNC;
1119 } else
1120 tun->flags &= ~TUN_FASYNC;
1121 ret = 0;
1122 out:
1123 unlock_kernel();
1124 tun_put(tun);
1125 return ret;
1128 static int tun_chr_open(struct inode *inode, struct file * file)
1130 struct tun_file *tfile;
1131 cycle_kernel_lock();
1132 DBG1(KERN_INFO "tunX: tun_chr_open\n");
1134 tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
1135 if (!tfile)
1136 return -ENOMEM;
1137 tfile->tun = NULL;
1138 tfile->net = get_net(current->nsproxy->net_ns);
1139 file->private_data = tfile;
1140 return 0;
1143 static int tun_chr_close(struct inode *inode, struct file *file)
1145 struct tun_file *tfile = file->private_data;
1146 struct tun_struct *tun = __tun_get(tfile);
1149 if (tun) {
1150 DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
1152 rtnl_lock();
1153 __tun_detach(tun);
1155 /* If desireable, unregister the netdevice. */
1156 if (!(tun->flags & TUN_PERSIST))
1157 unregister_netdevice(tun->dev);
1159 rtnl_unlock();
1162 put_net(tfile->net);
1163 kfree(tfile);
1165 return 0;
1168 static const struct file_operations tun_fops = {
1169 .owner = THIS_MODULE,
1170 .llseek = no_llseek,
1171 .read = do_sync_read,
1172 .aio_read = tun_chr_aio_read,
1173 .write = do_sync_write,
1174 .aio_write = tun_chr_aio_write,
1175 .poll = tun_chr_poll,
1176 .ioctl = tun_chr_ioctl,
1177 .open = tun_chr_open,
1178 .release = tun_chr_close,
1179 .fasync = tun_chr_fasync
1182 static struct miscdevice tun_miscdev = {
1183 .minor = TUN_MINOR,
1184 .name = "tun",
1185 .fops = &tun_fops,
1188 /* ethtool interface */
1190 static int tun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
1192 cmd->supported = 0;
1193 cmd->advertising = 0;
1194 cmd->speed = SPEED_10;
1195 cmd->duplex = DUPLEX_FULL;
1196 cmd->port = PORT_TP;
1197 cmd->phy_address = 0;
1198 cmd->transceiver = XCVR_INTERNAL;
1199 cmd->autoneg = AUTONEG_DISABLE;
1200 cmd->maxtxpkt = 0;
1201 cmd->maxrxpkt = 0;
1202 return 0;
1205 static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1207 struct tun_struct *tun = netdev_priv(dev);
1209 strcpy(info->driver, DRV_NAME);
1210 strcpy(info->version, DRV_VERSION);
1211 strcpy(info->fw_version, "N/A");
1213 switch (tun->flags & TUN_TYPE_MASK) {
1214 case TUN_TUN_DEV:
1215 strcpy(info->bus_info, "tun");
1216 break;
1217 case TUN_TAP_DEV:
1218 strcpy(info->bus_info, "tap");
1219 break;
1223 static u32 tun_get_msglevel(struct net_device *dev)
1225 #ifdef TUN_DEBUG
1226 struct tun_struct *tun = netdev_priv(dev);
1227 return tun->debug;
1228 #else
1229 return -EOPNOTSUPP;
1230 #endif
1233 static void tun_set_msglevel(struct net_device *dev, u32 value)
1235 #ifdef TUN_DEBUG
1236 struct tun_struct *tun = netdev_priv(dev);
1237 tun->debug = value;
1238 #endif
1241 static u32 tun_get_link(struct net_device *dev)
1243 struct tun_struct *tun = netdev_priv(dev);
1244 return !!tun->tfile;
1247 static u32 tun_get_rx_csum(struct net_device *dev)
1249 struct tun_struct *tun = netdev_priv(dev);
1250 return (tun->flags & TUN_NOCHECKSUM) == 0;
1253 static int tun_set_rx_csum(struct net_device *dev, u32 data)
1255 struct tun_struct *tun = netdev_priv(dev);
1256 if (data)
1257 tun->flags &= ~TUN_NOCHECKSUM;
1258 else
1259 tun->flags |= TUN_NOCHECKSUM;
1260 return 0;
1263 static const struct ethtool_ops tun_ethtool_ops = {
1264 .get_settings = tun_get_settings,
1265 .get_drvinfo = tun_get_drvinfo,
1266 .get_msglevel = tun_get_msglevel,
1267 .set_msglevel = tun_set_msglevel,
1268 .get_link = tun_get_link,
1269 .get_rx_csum = tun_get_rx_csum,
1270 .set_rx_csum = tun_set_rx_csum
1273 static int tun_init_net(struct net *net)
1275 return 0;
1278 static void tun_exit_net(struct net *net)
1280 struct net_device *dev, *next;
1282 rtnl_lock();
1283 for_each_netdev_safe(net, dev, next) {
1284 if (dev->ethtool_ops != &tun_ethtool_ops)
1285 continue;
1286 DBG(KERN_INFO "%s cleaned up\n", dev->name);
1287 unregister_netdevice(dev);
1289 rtnl_unlock();
1292 static struct pernet_operations tun_net_ops = {
1293 .init = tun_init_net,
1294 .exit = tun_exit_net,
1297 static int __init tun_init(void)
1299 int ret = 0;
1301 printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
1302 printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
1304 ret = register_pernet_device(&tun_net_ops);
1305 if (ret) {
1306 printk(KERN_ERR "tun: Can't register pernet ops\n");
1307 goto err_pernet;
1310 ret = misc_register(&tun_miscdev);
1311 if (ret) {
1312 printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR);
1313 goto err_misc;
1315 return 0;
1317 err_misc:
1318 unregister_pernet_device(&tun_net_ops);
1319 err_pernet:
1320 return ret;
1323 static void tun_cleanup(void)
1325 misc_deregister(&tun_miscdev);
1326 unregister_pernet_device(&tun_net_ops);
1329 module_init(tun_init);
1330 module_exit(tun_cleanup);
1331 MODULE_DESCRIPTION(DRV_DESCRIPTION);
1332 MODULE_AUTHOR(DRV_COPYRIGHT);
1333 MODULE_LICENSE("GPL");
1334 MODULE_ALIAS_MISCDEV(TUN_MINOR);