2 * Copyright (C) 1999-2000 by Maksim Yevmenkin <m_evmenkin@yahoo.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * -------------------------------------------------------------------------
29 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
30 * Nottingham University 1987.
34 * $FreeBSD: src/sys/net/if_tap.c,v 1.3.2.3 2002/04/14 21:41:48 luigi Exp $
35 * $DragonFly: src/sys/net/tap/if_tap.c,v 1.37 2007/09/16 17:02:49 pavalos Exp $
36 * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $
41 #include <sys/param.h>
43 #include <sys/device.h>
44 #include <sys/filedesc.h>
45 #include <sys/filio.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/systm.h>
56 #include <sys/thread2.h>
57 #include <sys/ttycom.h>
59 #include <sys/vnode.h>
60 #include <sys/serialize.h>
63 #include <net/ethernet.h>
65 #include <net/ifq_var.h>
66 #include <net/if_arp.h>
67 #include <net/route.h>
69 #include <netinet/in.h>
71 #include "if_tapvar.h"
75 #define CDEV_NAME "tap"
76 #define CDEV_MAJOR 149
77 #define TAPDEBUG if (tapdebug) if_printf
81 #define VMNET_DEV_MASK 0x00010000
84 static int tapmodevent (module_t
, int, void *);
87 static void tapcreate (cdev_t
);
89 /* network interface */
90 static void tapifstart (struct ifnet
*);
91 static int tapifioctl (struct ifnet
*, u_long
, caddr_t
,
93 static void tapifinit (void *);
95 /* character device */
96 static d_open_t tapopen
;
97 static d_close_t tapclose
;
98 static d_read_t tapread
;
99 static d_write_t tapwrite
;
100 static d_ioctl_t tapioctl
;
101 static d_poll_t tappoll
;
102 static d_kqfilter_t tapkqfilter
;
104 static struct dev_ops tap_ops
= {
105 { CDEV_NAME
, CDEV_MAJOR
, 0 },
112 .d_kqfilter
= tapkqfilter
115 static int taprefcnt
= 0; /* module ref. counter */
116 static int taplastunit
= -1; /* max. open unit number */
117 static int tapdebug
= 0; /* debug flag */
119 MALLOC_DECLARE(M_TAP
);
120 MALLOC_DEFINE(M_TAP
, CDEV_NAME
, "Ethernet tunnel interface");
121 SYSCTL_INT(_debug
, OID_AUTO
, if_tap_debug
, CTLFLAG_RW
, &tapdebug
, 0, "");
122 DEV_MODULE(if_tap
, tapmodevent
, NULL
);
127 * module event handler
130 tapmodevent(module_t mod
, int type
, void *data
)
132 static int attached
= 0;
133 struct ifnet
*ifp
= NULL
;
141 dev_ops_add(&tap_ops
, 0, 0);
149 dev_ops_remove(&tap_ops
, 0, 0);
151 /* XXX: maintain tap ifs in a local list */
153 while (unit
<= taplastunit
) {
154 TAILQ_FOREACH(ifp
, &ifnet
, if_link
) {
155 if ((strcmp(ifp
->if_dname
, TAP
) == 0) ||
156 (strcmp(ifp
->if_dname
, VMNET
) == 0)) {
157 if (ifp
->if_dunit
== unit
)
163 struct tap_softc
*tp
= ifp
->if_softc
;
165 TAPDEBUG(ifp
, "detached. minor = %#x, " \
166 "taplastunit = %d\n",
167 minor(tp
->tap_dev
), taplastunit
);
170 destroy_dev(tp
->tap_dev
);
191 * to create interface
194 tapcreate(cdev_t dev
)
196 struct ifnet
*ifp
= NULL
;
197 struct tap_softc
*tp
= NULL
;
198 uint8_t ether_addr
[ETHER_ADDR_LEN
];
202 /* allocate driver storage and create device */
203 MALLOC(tp
, struct tap_softc
*, sizeof(*tp
), M_TAP
, M_WAITOK
| M_ZERO
);
205 /* select device: tap or vmnet */
206 if (minor(dev
) & VMNET_DEV_MASK
) {
208 unit
= lminor(dev
) & 0xff;
209 tp
->tap_flags
|= TAP_VMNET
;
216 tp
->tap_dev
= make_dev(&tap_ops
, minor(dev
), UID_ROOT
, GID_WHEEL
,
217 0600, "%s%d", name
, unit
);
218 tp
->tap_dev
->si_drv1
= dev
->si_drv1
= tp
;
219 reference_dev(tp
->tap_dev
); /* so we can destroy it later */
221 /* generate fake MAC address: 00 bd xx xx xx unit_no */
222 ether_addr
[0] = 0x00;
223 ether_addr
[1] = 0xbd;
224 bcopy(&ticks
, ðer_addr
[2], 3);
225 ether_addr
[5] = (u_char
)unit
;
227 /* fill the rest and attach interface */
231 if_initname(ifp
, name
, unit
);
232 if (unit
> taplastunit
)
235 ifp
->if_init
= tapifinit
;
236 ifp
->if_start
= tapifstart
;
237 ifp
->if_ioctl
= tapifioctl
;
238 ifp
->if_mtu
= ETHERMTU
;
239 ifp
->if_flags
= (IFF_BROADCAST
|IFF_SIMPLEX
|IFF_MULTICAST
);
240 ifq_set_maxlen(&ifp
->if_snd
, ifqmaxlen
);
241 ifq_set_ready(&ifp
->if_snd
);
243 ether_ifattach(ifp
, ether_addr
, NULL
);
245 tp
->tap_flags
|= TAP_INITED
;
247 TAPDEBUG(ifp
, "created. minor = %#x\n", minor(tp
->tap_dev
));
254 * to open tunnel. must be superuser
257 tapopen(struct dev_open_args
*ap
)
259 cdev_t dev
= ap
->a_head
.a_dev
;
260 struct tap_softc
*tp
= NULL
;
261 struct ifnet
*ifp
= NULL
;
264 if ((error
= suser_cred(ap
->a_cred
, 0)) != 0)
272 ifp
= &tp
->arpcom
.ac_if
;
274 ifp
= &tp
->arpcom
.ac_if
;
276 EVENTHANDLER_INVOKE(ifnet_attach_event
, ifp
);
278 /* Announce the return of the interface. */
279 rt_ifannouncemsg(ifp
, IFAN_ARRIVAL
);
282 if (tp
->tap_flags
& TAP_OPEN
) {
287 bcopy(tp
->arpcom
.ac_enaddr
, tp
->ether_addr
, sizeof(tp
->ether_addr
));
289 tp
->tap_td
= curthread
;
290 tp
->tap_flags
|= TAP_OPEN
;
293 TAPDEBUG(ifp
, "opened. minor = %#x, refcnt = %d, taplastunit = %d\n",
294 minor(tp
->tap_dev
), taprefcnt
, taplastunit
);
304 * close the device - mark i/f down & delete routing info
307 tapclose(struct dev_close_args
*ap
)
309 cdev_t dev
= ap
->a_head
.a_dev
;
310 struct tap_softc
*tp
= dev
->si_drv1
;
311 struct ifnet
*ifp
= &tp
->tap_if
;
313 /* junk all pending output */
316 lwkt_serialize_enter(ifp
->if_serializer
);
317 ifq_purge(&ifp
->if_snd
);
318 lwkt_serialize_exit(ifp
->if_serializer
);
321 * do not bring the interface down, and do not anything with
322 * interface, if we are in VMnet mode. just close the device.
325 if (((tp
->tap_flags
& TAP_VMNET
) == 0) && (ifp
->if_flags
& IFF_UP
)) {
326 EVENTHANDLER_INVOKE(ifnet_detach_event
, ifp
);
328 /* Announce the departure of the interface. */
329 rt_ifannouncemsg(ifp
, IFAN_DEPARTURE
);
332 lwkt_serialize_enter(ifp
->if_serializer
);
333 if (ifp
->if_flags
& IFF_RUNNING
) {
334 /* find internet addresses and delete routes */
335 struct ifaddr
*ifa
= NULL
;
337 TAILQ_FOREACH(ifa
, &ifp
->if_addrhead
, ifa_link
) {
338 if (ifa
->ifa_addr
->sa_family
== AF_INET
) {
339 rtinit(ifa
, (int)RTM_DELETE
, 0);
341 /* remove address from interface */
343 sizeof(*(ifa
->ifa_addr
)));
344 bzero(ifa
->ifa_dstaddr
,
345 sizeof(*(ifa
->ifa_dstaddr
)));
346 bzero(ifa
->ifa_netmask
,
347 sizeof(*(ifa
->ifa_netmask
)));
351 ifp
->if_flags
&= ~IFF_RUNNING
;
353 lwkt_serialize_exit(ifp
->if_serializer
);
356 funsetown(tp
->tap_sigio
);
357 selwakeup(&tp
->tap_rsel
);
359 tp
->tap_flags
&= ~TAP_OPEN
;
365 if_printf(ifp
, "minor = %#x, refcnt = %d is out of sync. "
366 "set refcnt to 0\n", minor(tp
->tap_dev
), taprefcnt
);
369 TAPDEBUG(ifp
, "closed. minor = %#x, refcnt = %d, taplastunit = %d\n",
370 minor(tp
->tap_dev
), taprefcnt
, taplastunit
);
380 * Network interface initialization function (called with if serializer held)
387 struct tap_softc
*tp
= (struct tap_softc
*)xtp
;
388 struct ifnet
*ifp
= &tp
->tap_if
;
390 TAPDEBUG(ifp
, "initializing, minor = %#x\n", minor(tp
->tap_dev
));
392 ifp
->if_flags
|= IFF_RUNNING
;
393 ifp
->if_flags
&= ~IFF_OACTIVE
;
395 /* attempt to start output */
403 * Process an ioctl request on network interface (called with if serializer
409 tapifioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t data
, struct ucred
*cr
)
411 struct tap_softc
*tp
= (struct tap_softc
*)(ifp
->if_softc
);
412 struct ifstat
*ifs
= NULL
;
419 dummy
= ether_ioctl(ifp
, cmd
, data
);
423 if ((tp
->tap_flags
& TAP_VMNET
) == 0) {
425 * Only for non-vmnet tap(4)
427 if (ifp
->if_flags
& IFF_UP
) {
428 if ((ifp
->if_flags
& IFF_RUNNING
) == 0)
433 case SIOCADDMULTI
: /* XXX -- just like vmnet does */
438 ifs
= (struct ifstat
*)data
;
439 dummy
= strlen(ifs
->ascii
);
440 if (tp
->tap_td
!= NULL
&& dummy
< sizeof(ifs
->ascii
)) {
441 if (tp
->tap_td
->td_proc
) {
442 ksnprintf(ifs
->ascii
+ dummy
,
443 sizeof(ifs
->ascii
) - dummy
,
444 "\tOpened by pid %d\n",
445 (int)tp
->tap_td
->td_proc
->p_pid
);
447 ksnprintf(ifs
->ascii
+ dummy
,
448 sizeof(ifs
->ascii
) - dummy
,
449 "\tOpened by td %p\n", tp
->tap_td
);
465 * Queue packets from higher level ready to put out (called with if serializer
471 tapifstart(struct ifnet
*ifp
)
473 struct tap_softc
*tp
= ifp
->if_softc
;
475 TAPDEBUG(ifp
, "starting, minor = %#x\n", minor(tp
->tap_dev
));
478 * do not junk pending output if we are in VMnet mode.
479 * XXX: can this do any harm because of queue overflow?
482 if (((tp
->tap_flags
& TAP_VMNET
) == 0) &&
483 ((tp
->tap_flags
& TAP_READY
) != TAP_READY
)) {
484 TAPDEBUG(ifp
, "not ready. minor = %#x, tap_flags = 0x%x\n",
485 minor(tp
->tap_dev
), tp
->tap_flags
);
487 ifq_purge(&ifp
->if_snd
);
491 ifp
->if_flags
|= IFF_OACTIVE
;
493 if (!ifq_is_empty(&ifp
->if_snd
)) {
494 if (tp
->tap_flags
& TAP_RWAIT
) {
495 tp
->tap_flags
&= ~TAP_RWAIT
;
498 KNOTE(&tp
->tap_rsel
.si_note
, 0);
500 if ((tp
->tap_flags
& TAP_ASYNC
) && (tp
->tap_sigio
!= NULL
)) {
502 pgsigio(tp
->tap_sigio
, SIGIO
, 0);
507 * selwakeup is not MPSAFE. tapifstart is.
510 selwakeup(&tp
->tap_rsel
);
512 ifp
->if_opackets
++; /* obytes are counted in ether_output */
515 ifp
->if_flags
&= ~IFF_OACTIVE
;
522 * The ops interface is now pretty minimal. Called via fileops with nothing
528 tapioctl(struct dev_ioctl_args
*ap
)
530 cdev_t dev
= ap
->a_head
.a_dev
;
531 caddr_t data
= ap
->a_data
;
532 struct tap_softc
*tp
= dev
->si_drv1
;
533 struct ifnet
*ifp
= &tp
->tap_if
;
534 struct tapinfo
*tapp
= NULL
;
539 lwkt_serialize_enter(ifp
->if_serializer
);
544 tapp
= (struct tapinfo
*)data
;
545 ifp
->if_mtu
= tapp
->mtu
;
546 ifp
->if_type
= tapp
->type
;
547 ifp
->if_baudrate
= tapp
->baudrate
;
551 tapp
= (struct tapinfo
*)data
;
552 tapp
->mtu
= ifp
->if_mtu
;
553 tapp
->type
= ifp
->if_type
;
554 tapp
->baudrate
= ifp
->if_baudrate
;
558 tapdebug
= *(int *)data
;
562 *(int *)data
= tapdebug
;
567 tp
->tap_flags
|= TAP_ASYNC
;
569 tp
->tap_flags
&= ~TAP_ASYNC
;
574 if ((mb
= ifq_poll(&ifp
->if_snd
)) != NULL
) {
575 for(; mb
!= NULL
; mb
= mb
->m_next
)
576 *(int *)data
+= mb
->m_len
;
581 error
= fsetown(*(int *)data
, &tp
->tap_sigio
);
585 *(int *)data
= fgetown(tp
->tap_sigio
);
588 /* this is deprecated, FIOSETOWN should be used instead */
590 error
= fsetown(-(*(int *)data
), &tp
->tap_sigio
);
593 /* this is deprecated, FIOGETOWN should be used instead */
595 *(int *)data
= -fgetown(tp
->tap_sigio
);
598 /* VMware/VMnet port ioctl's */
600 case SIOCGIFFLAGS
: /* get ifnet flags */
601 bcopy(&ifp
->if_flags
, data
, sizeof(ifp
->if_flags
));
604 case VMIO_SIOCSIFFLAGS
: /* VMware/VMnet SIOCSIFFLAGS */
607 f
&= ~IFF_CANTCHANGE
;
609 ifp
->if_flags
= f
| (ifp
->if_flags
& IFF_CANTCHANGE
);
612 case OSIOCGIFADDR
: /* get MAC address of the remote side */
614 bcopy(tp
->ether_addr
, data
, sizeof(tp
->ether_addr
));
617 case SIOCSIFADDR
: /* set MAC address of the remote side */
618 bcopy(data
, tp
->ether_addr
, sizeof(tp
->ether_addr
));
625 lwkt_serialize_exit(ifp
->if_serializer
);
633 * The ops read interface - reads a packet at a time, or at
634 * least as much of a packet as can be read.
636 * Called from the fileops interface with nothing held.
641 tapread(struct dev_read_args
*ap
)
643 cdev_t dev
= ap
->a_head
.a_dev
;
644 struct uio
*uio
= ap
->a_uio
;
645 struct tap_softc
*tp
= dev
->si_drv1
;
646 struct ifnet
*ifp
= &tp
->tap_if
;
647 struct mbuf
*m0
= NULL
;
650 TAPDEBUG(ifp
, "reading, minor = %#x\n", minor(tp
->tap_dev
));
652 if ((tp
->tap_flags
& TAP_READY
) != TAP_READY
) {
653 TAPDEBUG(ifp
, "not ready. minor = %#x, tap_flags = 0x%x\n",
654 minor(tp
->tap_dev
), tp
->tap_flags
);
659 tp
->tap_flags
&= ~TAP_RWAIT
;
661 /* sleep until we get a packet */
663 lwkt_serialize_enter(ifp
->if_serializer
);
664 m0
= ifq_dequeue(&ifp
->if_snd
, NULL
);
666 if (ap
->a_ioflag
& IO_NDELAY
) {
667 lwkt_serialize_exit(ifp
->if_serializer
);
668 return (EWOULDBLOCK
);
670 tp
->tap_flags
|= TAP_RWAIT
;
672 tsleep_interlock(tp
);
673 lwkt_serialize_exit(ifp
->if_serializer
);
674 error
= tsleep(tp
, PCATCH
, "taprd", 0);
679 lwkt_serialize_exit(ifp
->if_serializer
);
681 } while (m0
== NULL
);
685 /* xfer packet to user space */
686 while ((m0
!= NULL
) && (uio
->uio_resid
> 0) && (error
== 0)) {
687 len
= min(uio
->uio_resid
, m0
->m_len
);
691 error
= uiomove(mtod(m0
, caddr_t
), len
, uio
);
696 TAPDEBUG(ifp
, "dropping mbuf, minor = %#x\n",
707 * The ops write interface - an atomic write is a packet - or else!
709 * Called from the fileops interface with nothing held.
714 tapwrite(struct dev_write_args
*ap
)
716 cdev_t dev
= ap
->a_head
.a_dev
;
717 struct uio
*uio
= ap
->a_uio
;
718 struct tap_softc
*tp
= dev
->si_drv1
;
719 struct ifnet
*ifp
= &tp
->tap_if
;
720 struct mbuf
*top
= NULL
, **mp
= NULL
, *m
= NULL
;
721 int error
= 0, tlen
, mlen
;
723 TAPDEBUG(ifp
, "writing, minor = %#x\n", minor(tp
->tap_dev
));
725 if (uio
->uio_resid
== 0)
728 if ((uio
->uio_resid
< 0) || (uio
->uio_resid
> TAPMRU
)) {
729 TAPDEBUG(ifp
, "invalid packet len = %d, minor = %#x\n",
730 uio
->uio_resid
, minor(tp
->tap_dev
));
734 tlen
= uio
->uio_resid
;
736 /* get a header mbuf */
737 MGETHDR(m
, MB_DONTWAIT
, MT_DATA
);
744 while ((error
== 0) && (uio
->uio_resid
> 0)) {
745 m
->m_len
= min(mlen
, uio
->uio_resid
);
746 error
= uiomove(mtod(m
, caddr_t
), m
->m_len
, uio
);
749 if (uio
->uio_resid
> 0) {
750 MGET(m
, MB_DONTWAIT
, MT_DATA
);
765 top
->m_pkthdr
.len
= tlen
;
766 top
->m_pkthdr
.rcvif
= ifp
;
769 * Ethernet bridge and bpf are handled in ether_input
771 * adjust mbuf and give packet to the ether_input
773 lwkt_serialize_enter(ifp
->if_serializer
);
774 ifp
->if_input(ifp
, top
);
775 ifp
->if_ipackets
++; /* ibytes are counted in ether_input */
776 lwkt_serialize_exit(ifp
->if_serializer
);
784 * The poll interface, this is only useful on reads really. The write
785 * detect always returns true, write never blocks anyway, it either
786 * accepts the packet or drops it
788 * Called from the fileops interface with nothing held.
793 tappoll(struct dev_poll_args
*ap
)
795 cdev_t dev
= ap
->a_head
.a_dev
;
796 struct tap_softc
*tp
= dev
->si_drv1
;
797 struct ifnet
*ifp
= &tp
->tap_if
;
800 TAPDEBUG(ifp
, "polling, minor = %#x\n", minor(tp
->tap_dev
));
802 lwkt_serialize_enter(ifp
->if_serializer
);
803 if (ap
->a_events
& (POLLIN
| POLLRDNORM
)) {
804 if (!ifq_is_empty(&ifp
->if_snd
)) {
806 "has data in queue. minor = %#x\n",
809 revents
|= (ap
->a_events
& (POLLIN
| POLLRDNORM
));
811 TAPDEBUG(ifp
, "waiting for data, minor = %#x\n",
815 selrecord(curthread
, &tp
->tap_rsel
);
819 lwkt_serialize_exit(ifp
->if_serializer
);
821 if (ap
->a_events
& (POLLOUT
| POLLWRNORM
))
822 revents
|= (ap
->a_events
& (POLLOUT
| POLLWRNORM
));
823 ap
->a_events
= revents
;
828 * tapkqfilter - called from the fileops interface with nothing held
832 static int filt_tapread(struct knote
*kn
, long hint
);
833 static void filt_tapdetach(struct knote
*kn
);
834 static struct filterops tapread_filtops
=
835 { 1, NULL
, filt_tapdetach
, filt_tapread
};
838 tapkqfilter(struct dev_kqfilter_args
*ap
)
840 cdev_t dev
= ap
->a_head
.a_dev
;
841 struct knote
*kn
= ap
->a_kn
;
842 struct tap_softc
*tp
;
851 switch(kn
->kn_filter
) {
853 list
= &tp
->tap_rsel
.si_note
;
854 kn
->kn_fop
= &tapread_filtops
;
855 kn
->kn_hook
= (void *)tp
;
865 SLIST_INSERT_HEAD(list
, kn
, kn_selnext
);
872 filt_tapread(struct knote
*kn
, long hint
)
874 struct tap_softc
*tp
= (void *)kn
->kn_hook
;
875 struct ifnet
*ifp
= &tp
->tap_if
;
877 if (ifq_is_empty(&ifp
->if_snd
) == 0) {
885 filt_tapdetach(struct knote
*kn
)
887 struct tap_softc
*tp
= (void *)kn
->kn_hook
;
889 SLIST_REMOVE(&tp
->tap_rsel
.si_note
, kn
, knote
, kn_selnext
);