2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
9 * $Id: sit.c,v 1.42 2000/08/02 06:03:59 davem Exp $
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
17 #define __NO_VERSION__
18 #include <linux/config.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/types.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/sched.h>
25 #include <linux/net.h>
26 #include <linux/in6.h>
27 #include <linux/netdevice.h>
28 #include <linux/if_arp.h>
29 #include <linux/icmp.h>
30 #include <asm/uaccess.h>
31 #include <linux/init.h>
32 #include <linux/netfilter_ipv4.h>
38 #include <net/protocol.h>
39 #include <net/transp_v6.h>
40 #include <net/ip6_fib.h>
41 #include <net/ip6_route.h>
42 #include <net/ndisc.h>
43 #include <net/addrconf.h>
48 #include <net/inet_ecn.h>
51 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
53 For comments look at net/ipv4/ip_gre.c --ANK
57 #define HASH(addr) ((addr^(addr>>4))&0xF)
59 static int ipip6_fb_tunnel_init(struct net_device
*dev
);
60 static int ipip6_tunnel_init(struct net_device
*dev
);
62 static struct net_device ipip6_fb_tunnel_dev
= {
63 "sit0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL
, ipip6_fb_tunnel_init
,
66 static struct ip_tunnel ipip6_fb_tunnel
= {
67 NULL
, &ipip6_fb_tunnel_dev
, {0, }, 0, 0, 0, 0, 0, 0, 0, {"sit0", }
70 static struct ip_tunnel
*tunnels_r_l
[HASH_SIZE
];
71 static struct ip_tunnel
*tunnels_r
[HASH_SIZE
];
72 static struct ip_tunnel
*tunnels_l
[HASH_SIZE
];
73 static struct ip_tunnel
*tunnels_wc
[1];
74 static struct ip_tunnel
**tunnels
[4] = { tunnels_wc
, tunnels_l
, tunnels_r
, tunnels_r_l
};
76 static rwlock_t ipip6_lock
= RW_LOCK_UNLOCKED
;
78 static struct ip_tunnel
* ipip6_tunnel_lookup(u32 remote
, u32 local
)
80 unsigned h0
= HASH(remote
);
81 unsigned h1
= HASH(local
);
84 for (t
= tunnels_r_l
[h0
^h1
]; t
; t
= t
->next
) {
85 if (local
== t
->parms
.iph
.saddr
&&
86 remote
== t
->parms
.iph
.daddr
&& (t
->dev
->flags
&IFF_UP
))
89 for (t
= tunnels_r
[h0
]; t
; t
= t
->next
) {
90 if (remote
== t
->parms
.iph
.daddr
&& (t
->dev
->flags
&IFF_UP
))
93 for (t
= tunnels_l
[h1
]; t
; t
= t
->next
) {
94 if (local
== t
->parms
.iph
.saddr
&& (t
->dev
->flags
&IFF_UP
))
97 if ((t
= tunnels_wc
[0]) != NULL
&& (t
->dev
->flags
&IFF_UP
))
102 static struct ip_tunnel
** ipip6_bucket(struct ip_tunnel
*t
)
104 u32 remote
= t
->parms
.iph
.daddr
;
105 u32 local
= t
->parms
.iph
.saddr
;
117 return &tunnels
[prio
][h
];
120 static void ipip6_tunnel_unlink(struct ip_tunnel
*t
)
122 struct ip_tunnel
**tp
;
124 for (tp
= ipip6_bucket(t
); *tp
; tp
= &(*tp
)->next
) {
126 write_lock_bh(&ipip6_lock
);
128 write_unlock_bh(&ipip6_lock
);
134 static void ipip6_tunnel_link(struct ip_tunnel
*t
)
136 struct ip_tunnel
**tp
= ipip6_bucket(t
);
138 write_lock_bh(&ipip6_lock
);
140 write_unlock_bh(&ipip6_lock
);
144 struct ip_tunnel
* ipip6_tunnel_locate(struct ip_tunnel_parm
*parms
, int create
)
146 u32 remote
= parms
->iph
.daddr
;
147 u32 local
= parms
->iph
.saddr
;
148 struct ip_tunnel
*t
, **tp
, *nt
;
149 struct net_device
*dev
;
161 for (tp
= &tunnels
[prio
][h
]; (t
= *tp
) != NULL
; tp
= &t
->next
) {
162 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
)
169 dev
= kmalloc(sizeof(*dev
) + sizeof(*t
), GFP_KERNEL
);
174 memset(dev
, 0, sizeof(*dev
) + sizeof(*t
));
175 dev
->priv
= (void*)(dev
+1);
176 nt
= (struct ip_tunnel
*)dev
->priv
;
178 dev
->init
= ipip6_tunnel_init
;
180 memcpy(&nt
->parms
, parms
, sizeof(*parms
));
181 strcpy(dev
->name
, nt
->parms
.name
);
182 if (dev
->name
[0] == 0) {
184 for (i
=1; i
<100; i
++) {
185 sprintf(dev
->name
, "sit%d", i
);
186 if (__dev_get_by_name(dev
->name
) == NULL
)
191 memcpy(parms
->name
, dev
->name
, IFNAMSIZ
);
193 if (register_netdevice(dev
) < 0)
197 ipip6_tunnel_link(nt
);
198 /* Do not decrement MOD_USE_COUNT here. */
207 static void ipip6_tunnel_destructor(struct net_device
*dev
)
209 if (dev
!= &ipip6_fb_tunnel_dev
) {
214 static void ipip6_tunnel_uninit(struct net_device
*dev
)
216 if (dev
== &ipip6_fb_tunnel_dev
) {
217 write_lock_bh(&ipip6_lock
);
218 tunnels_wc
[0] = NULL
;
219 write_unlock_bh(&ipip6_lock
);
222 ipip6_tunnel_unlink((struct ip_tunnel
*)dev
->priv
);
228 void ipip6_err(struct sk_buff
*skb
, unsigned char *dp
, int len
)
230 #ifndef I_WISH_WORLD_WERE_PERFECT
232 /* It is not :-( All the routers (except for Linux) return only
233 8 bytes of packet payload. It means, that precise relaying of
234 ICMP in the real Internet is absolutely infeasible.
236 struct iphdr
*iph
= (struct iphdr
*)dp
;
237 int type
= skb
->h
.icmph
->type
;
238 int code
= skb
->h
.icmph
->code
;
241 if (len
< sizeof(struct iphdr
))
246 case ICMP_PARAMETERPROB
:
249 case ICMP_DEST_UNREACH
:
252 case ICMP_PORT_UNREACH
:
253 /* Impossible event. */
255 case ICMP_FRAG_NEEDED
:
256 /* Soft state for pmtu is maintained by IP core. */
259 /* All others are translated to HOST_UNREACH.
260 rfc2003 contains "deep thoughts" about NET_UNREACH,
261 I believe they are just ether pollution. --ANK
266 case ICMP_TIME_EXCEEDED
:
267 if (code
!= ICMP_EXC_TTL
)
272 read_lock(&ipip6_lock
);
273 t
= ipip6_tunnel_lookup(iph
->daddr
, iph
->saddr
);
274 if (t
== NULL
|| t
->parms
.iph
.daddr
== 0)
276 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
279 if (jiffies
- t
->err_time
< IPTUNNEL_ERR_TIMEO
)
283 t
->err_time
= jiffies
;
285 read_unlock(&ipip6_lock
);
288 struct iphdr
*iph
= (struct iphdr
*)dp
;
289 int hlen
= iph
->ihl
<<2;
290 struct ipv6hdr
*iph6
;
291 int type
= skb
->h
.icmph
->type
;
292 int code
= skb
->h
.icmph
->code
;
296 struct sk_buff
*skb2
;
297 struct rt6_info
*rt6i
;
299 if (len
< hlen
+ sizeof(struct ipv6hdr
))
301 iph6
= (struct ipv6hdr
*)(dp
+ hlen
);
306 case ICMP_PARAMETERPROB
:
307 if (skb
->h
.icmph
->un
.gateway
< hlen
)
310 /* So... This guy found something strange INSIDE encapsulated
311 packet. Well, he is fool, but what can we do ?
313 rel_type
= ICMPV6_PARAMPROB
;
314 rel_info
= skb
->h
.icmph
->un
.gateway
- hlen
;
317 case ICMP_DEST_UNREACH
:
320 case ICMP_PORT_UNREACH
:
321 /* Impossible event. */
323 case ICMP_FRAG_NEEDED
:
324 /* Too complicated case ... */
327 /* All others are translated to HOST_UNREACH.
328 rfc2003 contains "deep thoughts" about NET_UNREACH,
329 I believe, it is just ether pollution. --ANK
331 rel_type
= ICMPV6_DEST_UNREACH
;
332 rel_code
= ICMPV6_ADDR_UNREACH
;
336 case ICMP_TIME_EXCEEDED
:
337 if (code
!= ICMP_EXC_TTL
)
339 rel_type
= ICMPV6_TIME_EXCEED
;
340 rel_code
= ICMPV6_EXC_HOPLIMIT
;
344 /* Prepare fake skb to feed it to icmpv6_send */
345 skb2
= skb_clone(skb
, GFP_ATOMIC
);
348 dst_release(skb2
->dst
);
350 skb_pull(skb2
, skb
->data
- (u8
*)iph6
);
351 skb2
->nh
.raw
= skb2
->data
;
353 /* Try to guess incoming interface */
354 rt6i
= rt6_lookup(&iph6
->saddr
, NULL
, NULL
, 0);
355 if (rt6i
&& rt6i
->rt6i_dev
) {
356 skb2
->dev
= rt6i
->rt6i_dev
;
358 rt6i
= rt6_lookup(&iph6
->daddr
, &iph6
->saddr
, NULL
, 0);
360 if (rt6i
&& rt6i
->rt6i_dev
&& rt6i
->rt6i_dev
->type
== ARPHRD_SIT
) {
361 struct ip_tunnel
* t
= (struct ip_tunnel
*)rt6i
->rt6i_dev
->priv
;
362 if (rel_type
== ICMPV6_TIME_EXCEED
&& t
->parms
.iph
.ttl
) {
363 rel_type
= ICMPV6_DEST_UNREACH
;
364 rel_code
= ICMPV6_ADDR_UNREACH
;
366 icmpv6_send(skb2
, rel_type
, rel_code
, rel_info
, skb2
->dev
);
374 static inline void ipip6_ecn_decapsulate(struct iphdr
*iph
, struct sk_buff
*skb
)
376 if (INET_ECN_is_ce(iph
->tos
) &&
377 INET_ECN_is_not_ce(ip6_get_dsfield(skb
->nh
.ipv6h
)))
378 IP6_ECN_set_ce(skb
->nh
.ipv6h
);
381 int ipip6_rcv(struct sk_buff
*skb
, unsigned short len
)
384 struct ip_tunnel
*tunnel
;
388 read_lock(&ipip6_lock
);
389 if ((tunnel
= ipip6_tunnel_lookup(iph
->saddr
, iph
->daddr
)) != NULL
) {
390 skb
->mac
.raw
= skb
->nh
.raw
;
391 skb
->nh
.raw
= skb_pull(skb
, skb
->h
.raw
- skb
->data
);
392 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
393 skb
->protocol
= __constant_htons(ETH_P_IPV6
);
395 skb
->pkt_type
= PACKET_HOST
;
396 tunnel
->stat
.rx_packets
++;
397 tunnel
->stat
.rx_bytes
+= skb
->len
;
398 skb
->dev
= tunnel
->dev
;
399 dst_release(skb
->dst
);
401 #ifdef CONFIG_NETFILTER
402 nf_conntrack_put(skb
->nfct
);
405 ipip6_ecn_decapsulate(iph
, skb
);
407 read_unlock(&ipip6_lock
);
411 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PROT_UNREACH
, 0);
413 read_unlock(&ipip6_lock
);
417 /* Need this wrapper because NF_HOOK takes the function address */
418 static inline int do_ip_send(struct sk_buff
*skb
)
424 * This function assumes it is being called from dev_queue_xmit()
425 * and that skb is filled properly by that function.
428 static int ipip6_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
430 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
431 struct net_device_stats
*stats
= &tunnel
->stat
;
432 struct iphdr
*tiph
= &tunnel
->parms
.iph
;
433 struct ipv6hdr
*iph6
= skb
->nh
.ipv6h
;
434 u8 tos
= tunnel
->parms
.iph
.tos
;
435 struct rtable
*rt
; /* Route to the other host */
436 struct net_device
*tdev
; /* Device to other host */
437 struct iphdr
*iph
; /* Our new IP header */
438 int max_headroom
; /* The extra header space needed */
439 u32 dst
= tiph
->daddr
;
441 struct in6_addr
*addr6
;
444 if (tunnel
->recursion
++) {
445 tunnel
->stat
.collisions
++;
449 if (skb
->protocol
!= __constant_htons(ETH_P_IPV6
))
453 struct neighbour
*neigh
= NULL
;
456 neigh
= skb
->dst
->neighbour
;
459 printk(KERN_DEBUG
"sit: nexthop == NULL\n");
463 addr6
= (struct in6_addr
*)&neigh
->primary_key
;
464 addr_type
= ipv6_addr_type(addr6
);
466 if (addr_type
== IPV6_ADDR_ANY
) {
467 addr6
= &skb
->nh
.ipv6h
->daddr
;
468 addr_type
= ipv6_addr_type(addr6
);
471 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
474 dst
= addr6
->s6_addr32
[3];
477 if (ip_route_output(&rt
, dst
, tiph
->saddr
, RT_TOS(tos
), tunnel
->parms
.link
)) {
478 tunnel
->stat
.tx_carrier_errors
++;
481 tdev
= rt
->u
.dst
.dev
;
485 tunnel
->stat
.collisions
++;
489 mtu
= rt
->u
.dst
.pmtu
- sizeof(struct iphdr
);
491 tunnel
->stat
.collisions
++;
495 if (mtu
< IPV6_MIN_MTU
)
497 if (skb
->dst
&& mtu
< skb
->dst
->pmtu
) {
498 struct rt6_info
*rt6
= (struct rt6_info
*)skb
->dst
;
499 if (mtu
< rt6
->u
.dst
.pmtu
) {
500 if (tunnel
->parms
.iph
.daddr
|| rt6
->rt6i_dst
.plen
== 128) {
501 rt6
->rt6i_flags
|= RTF_MODIFIED
;
502 rt6
->u
.dst
.pmtu
= mtu
;
506 if (skb
->len
> mtu
) {
507 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
, dev
);
512 if (tunnel
->err_count
> 0) {
513 if (jiffies
- tunnel
->err_time
< IPTUNNEL_ERR_TIMEO
) {
515 dst_link_failure(skb
);
517 tunnel
->err_count
= 0;
520 skb
->h
.raw
= skb
->nh
.raw
;
523 * Okay, now see if we can stuff it in the buffer as-is.
525 max_headroom
= (((tdev
->hard_header_len
+15)&~15)+sizeof(struct iphdr
));
527 if (skb_headroom(skb
) < max_headroom
|| skb_cloned(skb
) || skb_shared(skb
)) {
528 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
537 skb_set_owner_w(new_skb
, skb
->sk
);
542 skb
->nh
.raw
= skb_push(skb
, sizeof(struct iphdr
));
543 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
544 dst_release(skb
->dst
);
545 skb
->dst
= &rt
->u
.dst
;
548 * Push down and install the IPIP header.
553 iph
->ihl
= sizeof(struct iphdr
)>>2;
554 if (mtu
> IPV6_MIN_MTU
)
555 iph
->frag_off
= __constant_htons(IP_DF
);
559 iph
->protocol
= IPPROTO_IPV6
;
560 iph
->tos
= INET_ECN_encapsulate(tos
, ip6_get_dsfield(iph6
));
561 iph
->daddr
= rt
->rt_dst
;
562 iph
->saddr
= rt
->rt_src
;
564 if ((iph
->ttl
= tiph
->ttl
) == 0)
565 iph
->ttl
= iph6
->hop_limit
;
567 #ifdef CONFIG_NETFILTER
568 nf_conntrack_put(skb
->nfct
);
577 dst_link_failure(skb
);
586 ipip6_tunnel_ioctl (struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
589 struct ip_tunnel_parm p
;
597 if (dev
== &ipip6_fb_tunnel_dev
) {
598 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
602 t
= ipip6_tunnel_locate(&p
, 0);
605 t
= (struct ip_tunnel
*)dev
->priv
;
606 memcpy(&p
, &t
->parms
, sizeof(p
));
607 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
614 if (!capable(CAP_NET_ADMIN
))
618 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
622 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_IPV6
||
623 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&__constant_htons(~IP_DF
)))
626 p
.iph
.frag_off
|= __constant_htons(IP_DF
);
628 t
= ipip6_tunnel_locate(&p
, cmd
== SIOCADDTUNNEL
);
630 if (dev
!= &ipip6_fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
&&
631 t
!= &ipip6_fb_tunnel
) {
638 if (((dev
->flags
&IFF_POINTOPOINT
) && !p
.iph
.daddr
) ||
639 (!(dev
->flags
&IFF_POINTOPOINT
) && p
.iph
.daddr
)) {
643 t
= (struct ip_tunnel
*)dev
->priv
;
644 ipip6_tunnel_unlink(t
);
645 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
646 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
647 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
648 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
649 ipip6_tunnel_link(t
);
650 netdev_state_change(dev
);
656 if (cmd
== SIOCCHGTUNNEL
) {
657 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
658 t
->parms
.iph
.tos
= p
.iph
.tos
;
660 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
663 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
668 if (!capable(CAP_NET_ADMIN
))
671 if (dev
== &ipip6_fb_tunnel_dev
) {
673 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
676 if ((t
= ipip6_tunnel_locate(&p
, 0)) == NULL
)
679 if (t
== &ipip6_fb_tunnel
)
682 err
= unregister_netdevice(dev
);
694 static struct net_device_stats
*ipip6_tunnel_get_stats(struct net_device
*dev
)
696 return &(((struct ip_tunnel
*)dev
->priv
)->stat
);
699 static int ipip6_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
701 if (new_mtu
< IPV6_MIN_MTU
|| new_mtu
> 0xFFF8 - sizeof(struct iphdr
))
707 static void ipip6_tunnel_init_gen(struct net_device
*dev
)
709 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
711 dev
->destructor
= ipip6_tunnel_destructor
;
712 dev
->uninit
= ipip6_tunnel_uninit
;
713 dev
->hard_start_xmit
= ipip6_tunnel_xmit
;
714 dev
->get_stats
= ipip6_tunnel_get_stats
;
715 dev
->do_ioctl
= ipip6_tunnel_ioctl
;
716 dev
->change_mtu
= ipip6_tunnel_change_mtu
;
718 dev_init_buffers(dev
);
720 dev
->type
= ARPHRD_SIT
;
721 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
);
722 dev
->mtu
= 1500 - sizeof(struct iphdr
);
723 dev
->flags
= IFF_NOARP
;
726 memcpy(dev
->dev_addr
, &t
->parms
.iph
.saddr
, 4);
727 memcpy(dev
->broadcast
, &t
->parms
.iph
.daddr
, 4);
730 static int ipip6_tunnel_init(struct net_device
*dev
)
732 struct net_device
*tdev
= NULL
;
733 struct ip_tunnel
*tunnel
;
736 tunnel
= (struct ip_tunnel
*)dev
->priv
;
737 iph
= &tunnel
->parms
.iph
;
739 ipip6_tunnel_init_gen(dev
);
743 if (!ip_route_output(&rt
, iph
->daddr
, iph
->saddr
, RT_TOS(iph
->tos
), tunnel
->parms
.link
)) {
744 tdev
= rt
->u
.dst
.dev
;
747 dev
->flags
|= IFF_POINTOPOINT
;
750 if (!tdev
&& tunnel
->parms
.link
)
751 tdev
= __dev_get_by_index(tunnel
->parms
.link
);
754 dev
->hard_header_len
= tdev
->hard_header_len
+ sizeof(struct iphdr
);
755 dev
->mtu
= tdev
->mtu
- sizeof(struct iphdr
);
756 if (dev
->mtu
< IPV6_MIN_MTU
)
757 dev
->mtu
= IPV6_MIN_MTU
;
759 dev
->iflink
= tunnel
->parms
.link
;
765 static int ipip6_fb_tunnel_open(struct net_device
*dev
)
771 static int ipip6_fb_tunnel_close(struct net_device
*dev
)
778 int __init
ipip6_fb_tunnel_init(struct net_device
*dev
)
782 ipip6_tunnel_init_gen(dev
);
784 dev
->open
= ipip6_fb_tunnel_open
;
785 dev
->stop
= ipip6_fb_tunnel_close
;
788 iph
= &ipip6_fb_tunnel
.parms
.iph
;
790 iph
->protocol
= IPPROTO_IPV6
;
795 tunnels_wc
[0] = &ipip6_fb_tunnel
;
799 static struct inet_protocol sit_protocol
= {
810 void sit_cleanup(void)
812 inet_del_protocol(&sit_protocol
);
813 unregister_netdevice(&ipip6_fb_tunnel_dev
);
817 int __init
sit_init(void)
819 printk(KERN_INFO
"IPv6 over IPv4 tunneling driver\n");
821 ipip6_fb_tunnel_dev
.priv
= (void*)&ipip6_fb_tunnel
;
822 strcpy(ipip6_fb_tunnel_dev
.name
, ipip6_fb_tunnel
.parms
.name
);
824 register_netdev(&ipip6_fb_tunnel_dev
);
826 register_netdevice(&ipip6_fb_tunnel_dev
);
828 inet_add_protocol(&sit_protocol
);