1 /* $FreeBSD: src/sys/net/if_stf.c,v 1.1.2.11 2003/01/23 21:06:44 sam Exp $ */
2 /* $KAME: if_stf.c,v 1.73 2001/12/03 11:08:30 keiichi Exp $ */
5 * Copyright (C) 2000 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * 6to4 interface, based on RFC3056.
36 * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
37 * There is no address mapping defined from IPv6 multicast address to IPv4
38 * address. Therefore, we do not have IFF_MULTICAST on the interface.
40 * Due to the lack of address mapping for link-local addresses, we cannot
41 * throw packets toward link-local addresses (fe80::x). Also, we cannot throw
42 * packets to link-local multicast addresses (ff02::x).
44 * Here are interesting symptoms due to the lack of link-local address:
46 * Unicast routing exchange:
47 * - RIPng: Impossible. Uses link-local multicast packet toward ff02::9,
48 * and link-local addresses as nexthop.
49 * - OSPFv6: Impossible. OSPFv6 assumes that there's link-local address
50 * assigned to the link, and makes use of them. Also, HELLO packets use
51 * link-local multicast addresses (ff02::5 and ff02::6).
52 * - BGP4+: Maybe. You can only use global address as nexthop, and global
53 * address as TCP endpoint address.
55 * Multicast routing protocols:
56 * - PIM: Hello packet cannot be used to discover adjacent PIM routers.
57 * Adjacent PIM routers must be configured manually (is it really spec-wise
58 * correct thing to do?).
61 * - Redirects cannot be used due to the lack of link-local address.
63 * stf interface does not have, and will not need, a link-local address.
64 * It seems to have no real benefit and does not help the above symptoms much.
65 * Even if we assign link-locals to interface, we cannot really
66 * use link-local unicast/multicast on top of 6to4 cloud (since there's no
67 * encapsulation defined for link-local address), and the above analysis does
68 * not change. RFC3056 does not mandate the assignment of link-local address
71 * 6to4 interface has security issues. Refer to
72 * http://playground.iijlab.net/i-d/draft-itojun-ipv6-transition-abuse-00.txt
73 * for details. The code tries to filter out some of malicious packets.
74 * Note that there is no way to be 100% secure.
78 #include "opt_inet6.h"
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/socket.h>
83 #include <sys/sockio.h>
85 #include <sys/errno.h>
86 #include <sys/protosw.h>
87 #include <sys/kernel.h>
88 #include <machine/cpu.h>
90 #include <sys/malloc.h>
93 #include <net/route.h>
94 #include <net/netisr.h>
95 #include <net/if_types.h>
96 #include <net/ifq_var.h>
97 #include <net/netisr2.h>
100 #include <netinet/in.h>
101 #include <netinet/in_systm.h>
102 #include <netinet/ip.h>
103 #include <netinet/ip_var.h>
104 #include <netinet/in_var.h>
106 #include <netinet/ip6.h>
107 #include <netinet6/ip6_var.h>
108 #include <netinet6/in6_var.h>
109 #include <netinet/ip_ecn.h>
111 #include <netinet/ip_encap.h>
113 #include <machine/stdarg.h>
115 #include <net/net_osdep.h>
119 #define IN6_IS_ADDR_6TO4(x) (ntohs((x)->s6_addr16[0]) == 0x2002)
120 #define GET_V4(x) ((struct in_addr *)(&(x)->s6_addr16[1]))
123 struct ifnet sc_if
; /* common area */
124 struct route
*route_pcpu
;
125 const struct encaptab
*encap_cookie
;
128 static struct stf_softc
*stf
;
130 static MALLOC_DEFINE(M_STF
, "stf", "6to4 Tunnel Interface");
131 static int ip_stf_ttl
= 40;
133 extern struct domain inetdomain
;
134 struct protosw in_stf_protosw
=
137 .pr_domain
= &inetdomain
,
138 .pr_protocol
= IPPROTO_IPV6
,
139 .pr_flags
= PR_ATOMIC
|PR_ADDR
,
141 .pr_input
= in_stf_input
,
142 .pr_output
= rip_output
,
144 .pr_ctloutput
= rip_ctloutput
,
146 .pr_usrreqs
= &rip_usrreqs
149 static int stfmodevent (module_t
, int, void *);
150 static int stf_encapcheck (const struct mbuf
*, int, int, void *);
151 static struct in6_ifaddr
*stf_getsrcifa6 (struct ifnet
*);
152 static int stf_output (struct ifnet
*, struct mbuf
*, struct sockaddr
*,
154 static int stf_checkaddr4 (struct stf_softc
*, struct in_addr
*,
156 static int stf_checkaddr6 (struct stf_softc
*, struct in6_addr
*,
158 static void stf_rtrequest (int, struct rtentry
*);
159 static int stf_ioctl (struct ifnet
*, u_long
, caddr_t
, struct ucred
*);
162 stfmodevent(module_t mod
, int type
, void *data
)
164 struct stf_softc
*sc
;
166 const struct encaptab
*p
;
170 stf
= kmalloc(sizeof(struct stf_softc
), M_STF
,
174 bzero(sc
, sizeof(*sc
));
175 if_initname(&(sc
->sc_if
), "stf", 0);
177 p
= encap_attach_func(AF_INET
, IPPROTO_IPV6
, stf_encapcheck
,
178 (void *)&in_stf_protosw
, sc
);
180 kprintf("%s: attach failed\n", if_name(&sc
->sc_if
));
183 sc
->encap_cookie
= p
;
184 sc
->route_pcpu
= kmalloc(netisr_ncpus
* sizeof(struct route
),
185 M_STF
, M_WAITOK
| M_ZERO
);
187 sc
->sc_if
.if_mtu
= IPV6_MMTU
;
188 sc
->sc_if
.if_flags
= 0;
189 sc
->sc_if
.if_ioctl
= stf_ioctl
;
190 sc
->sc_if
.if_output
= stf_output
;
191 sc
->sc_if
.if_type
= IFT_STF
;
193 /* turn off ingress filter */
194 sc
->sc_if
.if_flags
|= IFF_LINK2
;
196 ifq_set_maxlen(&sc
->sc_if
.if_snd
, IFQ_MAXLEN
);
197 if_attach(&sc
->sc_if
, NULL
);
198 bpfattach(&sc
->sc_if
, DLT_NULL
, sizeof(u_int
));
202 bpfdetach(&sc
->sc_if
);
203 if_detach(&sc
->sc_if
);
204 err
= encap_detach(sc
->encap_cookie
);
205 KASSERT(err
== 0, ("Unexpected error detaching encap_cookie"));
206 for (cpu
= 0; cpu
< netisr_ncpus
; ++cpu
) {
207 if (sc
->route_pcpu
[cpu
].ro_rt
!= NULL
) {
208 rtfree_async(sc
->route_pcpu
[cpu
].ro_rt
);
209 sc
->route_pcpu
[cpu
].ro_rt
= NULL
;
212 kfree(sc
->route_pcpu
, M_STF
);
220 static moduledata_t stf_mod
= {
226 DECLARE_MODULE(if_stf
, stf_mod
, SI_SUB_PSEUDO
, SI_ORDER_ANY
);
229 stf_encapcheck(const struct mbuf
*m
, int off
, int proto
, void *arg
)
232 struct in6_ifaddr
*ia6
;
233 struct stf_softc
*sc
;
236 sc
= (struct stf_softc
*)arg
;
240 if ((sc
->sc_if
.if_flags
& IFF_UP
) == 0)
243 /* IFF_LINK0 means "no decapsulation" */
244 if ((sc
->sc_if
.if_flags
& IFF_LINK0
) != 0)
247 if (proto
!= IPPROTO_IPV6
)
250 m_copydata(m
, 0, sizeof(ip
), (caddr_t
)&ip
);
255 ia6
= stf_getsrcifa6(&sc
->sc_if
);
260 * check if IPv4 dst matches the IPv4 address derived from the
261 * local 6to4 address.
262 * success on: dst = 10.1.1.1, ia6->ia_addr = 2002:0a01:0101:...
264 if (bcmp(GET_V4(&ia6
->ia_addr
.sin6_addr
), &ip
.ip_dst
,
265 sizeof(ip
.ip_dst
)) != 0)
269 * check if IPv4 src matches the IPv4 address derived from the
270 * local 6to4 address masked by prefixmask.
271 * success on: src = 10.1.1.1, ia6->ia_addr = 2002:0a00:.../24
272 * fail on: src = 10.1.1.1, ia6->ia_addr = 2002:0b00:.../24
274 bzero(&a
, sizeof(a
));
275 a
.s_addr
= GET_V4(&ia6
->ia_addr
.sin6_addr
)->s_addr
;
276 a
.s_addr
&= GET_V4(&ia6
->ia_prefixmask
.sin6_addr
)->s_addr
;
278 b
.s_addr
&= GET_V4(&ia6
->ia_prefixmask
.sin6_addr
)->s_addr
;
279 if (a
.s_addr
!= b
.s_addr
)
282 /* stf interface makes single side match only */
286 static struct in6_ifaddr
*
287 stf_getsrcifa6(struct ifnet
*ifp
)
289 struct ifaddr_container
*ifac
;
290 struct sockaddr_in6
*sin6
;
293 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
294 struct ifaddr
*ia
= ifac
->ifa
;
295 struct in_ifaddr_container
*iac
;
297 if (ia
->ifa_addr
== NULL
)
299 if (ia
->ifa_addr
->sa_family
!= AF_INET6
)
301 sin6
= (struct sockaddr_in6
*)ia
->ifa_addr
;
302 if (!IN6_IS_ADDR_6TO4(&sin6
->sin6_addr
))
305 bcopy(GET_V4(&sin6
->sin6_addr
), &in
, sizeof(in
));
306 LIST_FOREACH(iac
, INADDR_HASH(in
.s_addr
), ia_hash
) {
307 if (iac
->ia
->ia_addr
.sin_addr
.s_addr
== in
.s_addr
)
313 return (struct in6_ifaddr
*)ia
;
320 stf_output_serialized(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*dst
,
323 struct stf_softc
*sc
;
324 struct sockaddr_in6
*dst6
;
326 struct sockaddr_in
*dst4
;
330 struct in6_ifaddr
*ia6
;
332 static const uint32_t af
= AF_INET6
;
334 ASSERT_NETISR_NCPUS(mycpuid
);
336 sc
= (struct stf_softc
*)ifp
;
337 dst6
= (struct sockaddr_in6
*)dst
;
340 if ((ifp
->if_flags
& IFF_UP
) == 0) {
346 * If we don't have an ip4 address that match my inner ip6 address,
347 * we shouldn't generate output. Without this check, we'll end up
348 * using wrong IPv4 source.
350 ia6
= stf_getsrcifa6(ifp
);
356 if (m
->m_len
< sizeof(*ip6
)) {
357 m
= m_pullup(m
, sizeof(*ip6
));
361 ip6
= mtod(m
, struct ip6_hdr
*);
362 tos
= (ntohl(ip6
->ip6_flow
) >> 20) & 0xff;
365 * Pickup the right outer dst addr from the list of candidates.
366 * ip6_dst has priority as it may be able to give us shorter IPv4 hops.
368 if (IN6_IS_ADDR_6TO4(&ip6
->ip6_dst
))
369 in4
= GET_V4(&ip6
->ip6_dst
);
370 else if (IN6_IS_ADDR_6TO4(&dst6
->sin6_addr
))
371 in4
= GET_V4(&dst6
->sin6_addr
);
380 bpf_ptap(ifp
->if_bpf
, m
, &af
, sizeof(af
));
384 M_PREPEND(m
, sizeof(struct ip
), M_NOWAIT
);
385 if (m
&& m
->m_len
< sizeof(struct ip
))
386 m
= m_pullup(m
, sizeof(struct ip
));
389 ip
= mtod(m
, struct ip
*);
391 bzero(ip
, sizeof(*ip
));
393 bcopy(GET_V4(&((struct sockaddr_in6
*)&ia6
->ia_addr
)->sin6_addr
),
394 &ip
->ip_src
, sizeof(ip
->ip_src
));
395 bcopy(in4
, &ip
->ip_dst
, sizeof(ip
->ip_dst
));
396 ip
->ip_p
= IPPROTO_IPV6
;
397 ip
->ip_ttl
= ip_stf_ttl
;
398 ip
->ip_len
= m
->m_pkthdr
.len
; /*host order*/
399 if (ifp
->if_flags
& IFF_LINK1
)
400 ip_ecn_ingress(ECN_ALLOWED
, &ip
->ip_tos
, &tos
);
402 ip_ecn_ingress(ECN_NOCARE
, &ip
->ip_tos
, &tos
);
404 ro
= &sc
->route_pcpu
[mycpuid
];
405 dst4
= (struct sockaddr_in
*)&ro
->ro_dst
;
406 if (dst4
->sin_family
!= AF_INET
||
407 bcmp(&dst4
->sin_addr
, &ip
->ip_dst
, sizeof(ip
->ip_dst
)) != 0) {
408 /* cache route doesn't match */
409 dst4
->sin_family
= AF_INET
;
410 dst4
->sin_len
= sizeof(struct sockaddr_in
);
411 bcopy(&ip
->ip_dst
, &dst4
->sin_addr
, sizeof(dst4
->sin_addr
));
417 if (ro
->ro_rt
!= NULL
&& (ro
->ro_rt
->rt_flags
& RTF_UP
) == 0) {
422 if (ro
->ro_rt
== NULL
) {
424 if (ro
->ro_rt
== NULL
) {
430 return ip_output(m
, NULL
, ro
, IP_DEBUGROUTE
, NULL
, NULL
);
434 stf_output(struct ifnet
*ifp
, struct mbuf
*m
, struct sockaddr
*dst
,
437 struct ifaltq_subque
*ifsq
= ifq_get_subq_default(&ifp
->if_snd
);
440 ifsq_serialize_hw(ifsq
);
441 error
= stf_output_serialized(ifp
, m
, dst
, rt
);
442 ifsq_deserialize_hw(ifsq
);
449 * inifp: incoming interface
452 stf_checkaddr4(struct stf_softc
*sc
, struct in_addr
*in
, struct ifnet
*inifp
)
454 struct in_ifaddr_container
*iac
;
457 * reject packets with the following address:
458 * 224.0.0.0/4 0.0.0.0/8 127.0.0.0/8 255.0.0.0/8
460 if (IN_MULTICAST(ntohl(in
->s_addr
)))
462 switch ((ntohl(in
->s_addr
) & 0xff000000) >> 24) {
463 case 0: case 127: case 255:
468 * reject packets with broadcast
470 TAILQ_FOREACH(iac
, &in_ifaddrheads
[mycpuid
], ia_link
) {
471 struct in_ifaddr
*ia4
= iac
->ia
;
473 if ((ia4
->ia_ifa
.ifa_ifp
->if_flags
& IFF_BROADCAST
) == 0)
475 if (in
->s_addr
== ia4
->ia_broadaddr
.sin_addr
.s_addr
)
480 * perform ingress filter
482 if (sc
&& (sc
->sc_if
.if_flags
& IFF_LINK2
) == 0 && inifp
) {
483 struct sockaddr_in sin
;
486 bzero(&sin
, sizeof(sin
));
487 sin
.sin_family
= AF_INET
;
488 sin
.sin_len
= sizeof(struct sockaddr_in
);
490 rt
= rtpurelookup((struct sockaddr
*)&sin
);
491 if (!rt
|| rt
->rt_ifp
!= inifp
) {
493 log(LOG_WARNING
, "%s: packet from 0x%x dropped "
494 "due to ingress filter\n", if_name(&sc
->sc_if
),
495 (u_int32_t
)ntohl(sin
.sin_addr
.s_addr
));
509 * inifp: incoming interface
512 stf_checkaddr6(struct stf_softc
*sc
, struct in6_addr
*in6
, struct ifnet
*inifp
)
515 * check 6to4 addresses
517 if (IN6_IS_ADDR_6TO4(in6
))
518 return stf_checkaddr4(sc
, GET_V4(in6
), inifp
);
521 * reject anything that look suspicious. the test is implemented
522 * in ip6_input too, but we check here as well to
523 * (1) reject bad packets earlier, and
524 * (2) to be safe against future ip6_input change.
526 if (IN6_IS_ADDR_V4COMPAT(in6
) || IN6_IS_ADDR_V4MAPPED(in6
))
533 in_stf_input(struct mbuf
**mp
, int *offp
, int proto
)
536 struct stf_softc
*sc
;
542 static const uint32_t af
= AF_INET6
;
547 if (proto
!= IPPROTO_IPV6
) {
549 return(IPPROTO_DONE
);
552 ip
= mtod(m
, struct ip
*);
554 sc
= (struct stf_softc
*)encap_getarg(m
);
556 if (sc
== NULL
|| (sc
->sc_if
.if_flags
& IFF_UP
) == 0) {
558 return(IPPROTO_DONE
);
564 * perform sanity check against outer src/dst.
565 * for source, perform ingress filter as well.
567 if (stf_checkaddr4(sc
, &ip
->ip_dst
, NULL
) < 0 ||
568 stf_checkaddr4(sc
, &ip
->ip_src
, m
->m_pkthdr
.rcvif
) < 0) {
570 return(IPPROTO_DONE
);
576 if (m
->m_len
< sizeof(*ip6
)) {
577 m
= m_pullup(m
, sizeof(*ip6
));
579 return(IPPROTO_DONE
);
581 ip6
= mtod(m
, struct ip6_hdr
*);
584 * perform sanity check against inner src/dst.
585 * for source, perform ingress filter as well.
587 if (stf_checkaddr6(sc
, &ip6
->ip6_dst
, NULL
) < 0 ||
588 stf_checkaddr6(sc
, &ip6
->ip6_src
, m
->m_pkthdr
.rcvif
) < 0) {
590 return(IPPROTO_DONE
);
593 itos
= (ntohl(ip6
->ip6_flow
) >> 20) & 0xff;
594 if ((ifp
->if_flags
& IFF_LINK1
) != 0)
595 ip_ecn_egress(ECN_ALLOWED
, &otos
, &itos
);
597 ip_ecn_egress(ECN_NOCARE
, &otos
, &itos
);
598 ip6
->ip6_flow
&= ~htonl(0xff << 20);
599 ip6
->ip6_flow
|= htonl((u_int32_t
)itos
<< 20);
601 m
->m_pkthdr
.rcvif
= ifp
;
606 bpf_ptap(ifp
->if_bpf
, m
, &af
, sizeof(af
));
611 * Put the packet to the network layer input queue according to the
612 * specified address family.
613 * See net/if_gif.c for possible issues with packet processing
614 * reorder due to extra queueing.
616 IFNET_STAT_INC(ifp
, ipackets
, 1);
617 IFNET_STAT_INC(ifp
, ibytes
, m
->m_pkthdr
.len
);
618 netisr_queue(NETISR_IPV6
, m
);
619 return(IPPROTO_DONE
);
624 stf_rtrequest(int cmd
, struct rtentry
*rt
)
628 rt
->rt_rmx
.rmx_mtu
= IPV6_MMTU
;
632 stf_ioctl(struct ifnet
*ifp
, u_long cmd
, caddr_t data
, struct ucred
*cr
)
636 struct sockaddr_in6
*sin6
;
642 ifa
= (struct ifaddr
*)data
;
643 if (ifa
== NULL
|| ifa
->ifa_addr
->sa_family
!= AF_INET6
) {
644 error
= EAFNOSUPPORT
;
647 sin6
= (struct sockaddr_in6
*)ifa
->ifa_addr
;
648 if (IN6_IS_ADDR_6TO4(&sin6
->sin6_addr
)) {
649 ifa
->ifa_rtrequest
= stf_rtrequest
;
650 ifp
->if_flags
|= IFF_UP
;
657 ifr
= (struct ifreq
*)data
;
658 if (ifr
&& ifr
->ifr_addr
.sa_family
== AF_INET6
)
661 error
= EAFNOSUPPORT
;