2 * IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
9 * $Id: sit.c,v 1.35 2000/01/06 00:42:08 davem Exp $
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
17 #define __NO_VERSION__
18 #include <linux/module.h>
19 #include <linux/errno.h>
20 #include <linux/types.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/sched.h>
24 #include <linux/net.h>
25 #include <linux/in6.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_arp.h>
28 #include <linux/icmp.h>
29 #include <asm/uaccess.h>
30 #include <linux/init.h>
36 #include <net/protocol.h>
37 #include <net/transp_v6.h>
38 #include <net/ip6_fib.h>
39 #include <net/ip6_route.h>
40 #include <net/ndisc.h>
41 #include <net/addrconf.h>
48 This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
50 For comments look at net/ipv4/ip_gre.c --ANK
54 #define HASH(addr) ((addr^(addr>>4))&0xF)
56 static int ipip6_fb_tunnel_init(struct net_device
*dev
);
57 static int ipip6_tunnel_init(struct net_device
*dev
);
59 static struct net_device ipip6_fb_tunnel_dev
= {
60 NULL
, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL
, ipip6_fb_tunnel_init
,
63 static struct ip_tunnel ipip6_fb_tunnel
= {
64 NULL
, &ipip6_fb_tunnel_dev
, {0, }, 0, 0, 0, 0, 0, 0, 0, {"sit0", }
67 static struct ip_tunnel
*tunnels_r_l
[HASH_SIZE
];
68 static struct ip_tunnel
*tunnels_r
[HASH_SIZE
];
69 static struct ip_tunnel
*tunnels_l
[HASH_SIZE
];
70 static struct ip_tunnel
*tunnels_wc
[1];
71 static struct ip_tunnel
**tunnels
[4] = { tunnels_wc
, tunnels_l
, tunnels_r
, tunnels_r_l
};
73 static rwlock_t ipip6_lock
= RW_LOCK_UNLOCKED
;
75 static struct ip_tunnel
* ipip6_tunnel_lookup(u32 remote
, u32 local
)
77 unsigned h0
= HASH(remote
);
78 unsigned h1
= HASH(local
);
81 for (t
= tunnels_r_l
[h0
^h1
]; t
; t
= t
->next
) {
82 if (local
== t
->parms
.iph
.saddr
&&
83 remote
== t
->parms
.iph
.daddr
&& (t
->dev
->flags
&IFF_UP
))
86 for (t
= tunnels_r
[h0
]; t
; t
= t
->next
) {
87 if (remote
== t
->parms
.iph
.daddr
&& (t
->dev
->flags
&IFF_UP
))
90 for (t
= tunnels_l
[h1
]; t
; t
= t
->next
) {
91 if (local
== t
->parms
.iph
.saddr
&& (t
->dev
->flags
&IFF_UP
))
94 if ((t
= tunnels_wc
[0]) != NULL
&& (t
->dev
->flags
&IFF_UP
))
99 static struct ip_tunnel
** ipip6_bucket(struct ip_tunnel
*t
)
101 u32 remote
= t
->parms
.iph
.daddr
;
102 u32 local
= t
->parms
.iph
.saddr
;
114 return &tunnels
[prio
][h
];
117 static void ipip6_tunnel_unlink(struct ip_tunnel
*t
)
119 struct ip_tunnel
**tp
;
121 for (tp
= ipip6_bucket(t
); *tp
; tp
= &(*tp
)->next
) {
123 write_lock_bh(&ipip6_lock
);
125 write_unlock_bh(&ipip6_lock
);
131 static void ipip6_tunnel_link(struct ip_tunnel
*t
)
133 struct ip_tunnel
**tp
= ipip6_bucket(t
);
135 write_lock_bh(&ipip6_lock
);
137 write_unlock_bh(&ipip6_lock
);
141 struct ip_tunnel
* ipip6_tunnel_locate(struct ip_tunnel_parm
*parms
, int create
)
143 u32 remote
= parms
->iph
.daddr
;
144 u32 local
= parms
->iph
.saddr
;
145 struct ip_tunnel
*t
, **tp
, *nt
;
146 struct net_device
*dev
;
158 for (tp
= &tunnels
[prio
][h
]; (t
= *tp
) != NULL
; tp
= &t
->next
) {
159 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
)
166 dev
= kmalloc(sizeof(*dev
) + sizeof(*t
), GFP_KERNEL
);
171 memset(dev
, 0, sizeof(*dev
) + sizeof(*t
));
172 dev
->priv
= (void*)(dev
+1);
173 nt
= (struct ip_tunnel
*)dev
->priv
;
175 dev
->name
= nt
->parms
.name
;
176 dev
->init
= ipip6_tunnel_init
;
178 memcpy(&nt
->parms
, parms
, sizeof(*parms
));
179 if (dev
->name
[0] == 0) {
181 for (i
=1; i
<100; i
++) {
182 sprintf(dev
->name
, "sit%d", i
);
183 if (__dev_get_by_name(dev
->name
) == NULL
)
188 memcpy(parms
->name
, dev
->name
, IFNAMSIZ
);
190 if (register_netdevice(dev
) < 0)
194 ipip6_tunnel_link(nt
);
195 /* Do not decrement MOD_USE_COUNT here. */
204 static void ipip6_tunnel_destructor(struct net_device
*dev
)
206 if (dev
!= &ipip6_fb_tunnel_dev
) {
211 static void ipip6_tunnel_uninit(struct net_device
*dev
)
213 if (dev
== &ipip6_fb_tunnel_dev
) {
214 write_lock_bh(&ipip6_lock
);
215 tunnels_wc
[0] = NULL
;
216 write_unlock_bh(&ipip6_lock
);
219 ipip6_tunnel_unlink((struct ip_tunnel
*)dev
->priv
);
225 void ipip6_err(struct sk_buff
*skb
, unsigned char *dp
, int len
)
227 #ifndef I_WISH_WORLD_WERE_PERFECT
229 /* It is not :-( All the routers (except for Linux) return only
230 8 bytes of packet payload. It means, that precise relaying of
231 ICMP in the real Internet is absolutely infeasible.
233 struct iphdr
*iph
= (struct iphdr
*)dp
;
234 int type
= skb
->h
.icmph
->type
;
235 int code
= skb
->h
.icmph
->code
;
238 if (len
< sizeof(struct iphdr
))
243 case ICMP_PARAMETERPROB
:
246 case ICMP_DEST_UNREACH
:
249 case ICMP_PORT_UNREACH
:
250 /* Impossible event. */
252 case ICMP_FRAG_NEEDED
:
253 /* Soft state for pmtu is maintained by IP core. */
256 /* All others are translated to HOST_UNREACH.
257 rfc2003 contains "deep thoughts" about NET_UNREACH,
258 I believe they are just ether pollution. --ANK
263 case ICMP_TIME_EXCEEDED
:
264 if (code
!= ICMP_EXC_TTL
)
269 read_lock(&ipip6_lock
);
270 t
= ipip6_tunnel_lookup(iph
->daddr
, iph
->saddr
);
271 if (t
== NULL
|| t
->parms
.iph
.daddr
== 0)
273 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
276 if (jiffies
- t
->err_time
< IPTUNNEL_ERR_TIMEO
)
280 t
->err_time
= jiffies
;
282 read_unlock(&ipip6_lock
);
285 struct iphdr
*iph
= (struct iphdr
*)dp
;
286 int hlen
= iph
->ihl
<<2;
287 struct ipv6hdr
*iph6
;
288 int type
= skb
->h
.icmph
->type
;
289 int code
= skb
->h
.icmph
->code
;
293 struct sk_buff
*skb2
;
294 struct rt6_info
*rt6i
;
296 if (len
< hlen
+ sizeof(struct ipv6hdr
))
298 iph6
= (struct ipv6hdr
*)(dp
+ hlen
);
303 case ICMP_PARAMETERPROB
:
304 if (skb
->h
.icmph
->un
.gateway
< hlen
)
307 /* So... This guy found something strange INSIDE encapsulated
308 packet. Well, he is fool, but what can we do ?
310 rel_type
= ICMPV6_PARAMPROB
;
311 rel_info
= skb
->h
.icmph
->un
.gateway
- hlen
;
314 case ICMP_DEST_UNREACH
:
317 case ICMP_PORT_UNREACH
:
318 /* Impossible event. */
320 case ICMP_FRAG_NEEDED
:
321 /* Too complicated case ... */
324 /* All others are translated to HOST_UNREACH.
325 rfc2003 contains "deep thoughts" about NET_UNREACH,
326 I believe, it is just ether pollution. --ANK
328 rel_type
= ICMPV6_DEST_UNREACH
;
329 rel_code
= ICMPV6_ADDR_UNREACH
;
333 case ICMP_TIME_EXCEEDED
:
334 if (code
!= ICMP_EXC_TTL
)
336 rel_type
= ICMPV6_TIME_EXCEED
;
337 rel_code
= ICMPV6_EXC_HOPLIMIT
;
341 /* Prepare fake skb to feed it to icmpv6_send */
342 skb2
= skb_clone(skb
, GFP_ATOMIC
);
345 dst_release(skb2
->dst
);
347 skb_pull(skb2
, skb
->data
- (u8
*)iph6
);
348 skb2
->nh
.raw
= skb2
->data
;
350 /* Try to guess incoming interface */
351 rt6i
= rt6_lookup(&iph6
->saddr
, NULL
, NULL
, 0);
352 if (rt6i
&& rt6i
->rt6i_dev
) {
353 skb2
->dev
= rt6i
->rt6i_dev
;
355 rt6i
= rt6_lookup(&iph6
->daddr
, &iph6
->saddr
, NULL
, 0);
357 if (rt6i
&& rt6i
->rt6i_dev
&& rt6i
->rt6i_dev
->type
== ARPHRD_SIT
) {
358 struct ip_tunnel
* t
= (struct ip_tunnel
*)rt6i
->rt6i_dev
->priv
;
359 if (rel_type
== ICMPV6_TIME_EXCEED
&& t
->parms
.iph
.ttl
) {
360 rel_type
= ICMPV6_DEST_UNREACH
;
361 rel_code
= ICMPV6_ADDR_UNREACH
;
363 icmpv6_send(skb2
, rel_type
, rel_code
, rel_info
, skb2
->dev
);
371 int ipip6_rcv(struct sk_buff
*skb
, unsigned short len
)
374 struct ip_tunnel
*tunnel
;
378 read_lock(&ipip6_lock
);
379 if ((tunnel
= ipip6_tunnel_lookup(iph
->saddr
, iph
->daddr
)) != NULL
) {
380 skb
->mac
.raw
= skb
->nh
.raw
;
381 skb
->nh
.raw
= skb_pull(skb
, skb
->h
.raw
- skb
->data
);
382 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
383 skb
->protocol
= __constant_htons(ETH_P_IPV6
);
385 skb
->pkt_type
= PACKET_HOST
;
386 tunnel
->stat
.rx_packets
++;
387 tunnel
->stat
.rx_bytes
+= skb
->len
;
388 skb
->dev
= tunnel
->dev
;
389 dst_release(skb
->dst
);
392 read_unlock(&ipip6_lock
);
396 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PROT_UNREACH
, 0);
398 read_unlock(&ipip6_lock
);
403 * This function assumes it is being called from dev_queue_xmit()
404 * and that skb is filled properly by that function.
407 static int ipip6_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
409 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
410 struct net_device_stats
*stats
= &tunnel
->stat
;
411 struct iphdr
*tiph
= &tunnel
->parms
.iph
;
412 struct ipv6hdr
*iph6
= skb
->nh
.ipv6h
;
413 u8 tos
= tunnel
->parms
.iph
.tos
;
414 struct rtable
*rt
; /* Route to the other host */
415 struct net_device
*tdev
; /* Device to other host */
416 struct iphdr
*iph
; /* Our new IP header */
417 int max_headroom
; /* The extra header space needed */
418 u32 dst
= tiph
->daddr
;
420 struct in6_addr
*addr6
;
423 if (tunnel
->recursion
++) {
424 tunnel
->stat
.collisions
++;
428 if (skb
->protocol
!= __constant_htons(ETH_P_IPV6
))
432 struct neighbour
*neigh
= NULL
;
435 neigh
= skb
->dst
->neighbour
;
438 printk(KERN_DEBUG
"sit: nexthop == NULL\n");
442 addr6
= (struct in6_addr
*)&neigh
->primary_key
;
443 addr_type
= ipv6_addr_type(addr6
);
445 if (addr_type
== IPV6_ADDR_ANY
) {
446 addr6
= &skb
->nh
.ipv6h
->daddr
;
447 addr_type
= ipv6_addr_type(addr6
);
450 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
453 dst
= addr6
->s6_addr32
[3];
456 if (ip_route_output(&rt
, dst
, tiph
->saddr
, RT_TOS(tos
), tunnel
->parms
.link
)) {
457 tunnel
->stat
.tx_carrier_errors
++;
460 tdev
= rt
->u
.dst
.dev
;
464 tunnel
->stat
.collisions
++;
468 mtu
= rt
->u
.dst
.pmtu
- sizeof(struct iphdr
);
470 tunnel
->stat
.collisions
++;
474 if (mtu
< IPV6_MIN_MTU
)
476 if (skb
->dst
&& mtu
< skb
->dst
->pmtu
) {
477 struct rt6_info
*rt6
= (struct rt6_info
*)skb
->dst
;
478 if (mtu
< rt6
->u
.dst
.pmtu
) {
479 if (tunnel
->parms
.iph
.daddr
|| rt6
->rt6i_dst
.plen
== 128) {
480 rt6
->rt6i_flags
|= RTF_MODIFIED
;
481 rt6
->u
.dst
.pmtu
= mtu
;
485 if (skb
->len
> mtu
) {
486 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
, dev
);
491 if (tunnel
->err_count
> 0) {
492 if (jiffies
- tunnel
->err_time
< IPTUNNEL_ERR_TIMEO
) {
494 dst_link_failure(skb
);
496 tunnel
->err_count
= 0;
499 skb
->h
.raw
= skb
->nh
.raw
;
502 * Okay, now see if we can stuff it in the buffer as-is.
504 max_headroom
= (((tdev
->hard_header_len
+15)&~15)+sizeof(struct iphdr
));
506 if (skb_headroom(skb
) < max_headroom
|| skb_cloned(skb
) || skb_shared(skb
)) {
507 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
516 skb_set_owner_w(new_skb
, skb
->sk
);
521 skb
->nh
.raw
= skb_push(skb
, sizeof(struct iphdr
));
522 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
523 dst_release(skb
->dst
);
524 skb
->dst
= &rt
->u
.dst
;
527 * Push down and install the IPIP header.
532 iph
->ihl
= sizeof(struct iphdr
)>>2;
533 if (mtu
> IPV6_MIN_MTU
)
534 iph
->frag_off
= __constant_htons(IP_DF
);
538 iph
->protocol
= IPPROTO_IPV6
;
540 iph
->daddr
= rt
->rt_dst
;
541 iph
->saddr
= rt
->rt_src
;
543 if ((iph
->ttl
= tiph
->ttl
) == 0)
544 iph
->ttl
= iph6
->hop_limit
;
546 iph
->tot_len
= htons(skb
->len
);
547 ip_select_ident(iph
, &rt
->u
.dst
);
550 stats
->tx_bytes
+= skb
->len
;
558 dst_link_failure(skb
);
567 ipip6_tunnel_ioctl (struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
570 struct ip_tunnel_parm p
;
578 if (dev
== &ipip6_fb_tunnel_dev
) {
579 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
583 t
= ipip6_tunnel_locate(&p
, 0);
586 t
= (struct ip_tunnel
*)dev
->priv
;
587 memcpy(&p
, &t
->parms
, sizeof(p
));
588 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
595 if (!capable(CAP_NET_ADMIN
))
599 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
603 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_IPV6
||
604 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&__constant_htons(~IP_DF
)))
607 p
.iph
.frag_off
|= __constant_htons(IP_DF
);
609 t
= ipip6_tunnel_locate(&p
, cmd
== SIOCADDTUNNEL
);
611 if (dev
!= &ipip6_fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
&&
612 t
!= &ipip6_fb_tunnel
) {
619 if (((dev
->flags
&IFF_POINTOPOINT
) && !p
.iph
.daddr
) ||
620 (!(dev
->flags
&IFF_POINTOPOINT
) && p
.iph
.daddr
)) {
624 t
= (struct ip_tunnel
*)dev
->priv
;
625 ipip6_tunnel_unlink(t
);
626 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
627 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
628 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
629 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
630 ipip6_tunnel_link(t
);
631 netdev_state_change(dev
);
637 if (cmd
== SIOCCHGTUNNEL
) {
638 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
639 t
->parms
.iph
.tos
= p
.iph
.tos
;
641 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
644 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
649 if (!capable(CAP_NET_ADMIN
))
652 if (dev
== &ipip6_fb_tunnel_dev
) {
654 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
657 if ((t
= ipip6_tunnel_locate(&p
, 0)) == NULL
)
660 if (t
== &ipip6_fb_tunnel
)
663 err
= unregister_netdevice(dev
);
675 static struct net_device_stats
*ipip6_tunnel_get_stats(struct net_device
*dev
)
677 return &(((struct ip_tunnel
*)dev
->priv
)->stat
);
680 static int ipip6_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
682 if (new_mtu
< IPV6_MIN_MTU
|| new_mtu
> 0xFFF8 - sizeof(struct iphdr
))
688 static void ipip6_tunnel_init_gen(struct net_device
*dev
)
690 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
692 dev
->destructor
= ipip6_tunnel_destructor
;
693 dev
->uninit
= ipip6_tunnel_uninit
;
694 dev
->hard_start_xmit
= ipip6_tunnel_xmit
;
695 dev
->get_stats
= ipip6_tunnel_get_stats
;
696 dev
->do_ioctl
= ipip6_tunnel_ioctl
;
697 dev
->change_mtu
= ipip6_tunnel_change_mtu
;
699 dev_init_buffers(dev
);
701 dev
->type
= ARPHRD_SIT
;
702 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
);
703 dev
->mtu
= 1500 - sizeof(struct iphdr
);
704 dev
->flags
= IFF_NOARP
;
707 memcpy(dev
->dev_addr
, &t
->parms
.iph
.saddr
, 4);
708 memcpy(dev
->broadcast
, &t
->parms
.iph
.daddr
, 4);
711 static int ipip6_tunnel_init(struct net_device
*dev
)
713 struct net_device
*tdev
= NULL
;
714 struct ip_tunnel
*tunnel
;
717 tunnel
= (struct ip_tunnel
*)dev
->priv
;
718 iph
= &tunnel
->parms
.iph
;
720 ipip6_tunnel_init_gen(dev
);
724 if (!ip_route_output(&rt
, iph
->daddr
, iph
->saddr
, RT_TOS(iph
->tos
), tunnel
->parms
.link
)) {
725 tdev
= rt
->u
.dst
.dev
;
728 dev
->flags
|= IFF_POINTOPOINT
;
731 if (!tdev
&& tunnel
->parms
.link
)
732 tdev
= __dev_get_by_index(tunnel
->parms
.link
);
735 dev
->hard_header_len
= tdev
->hard_header_len
+ sizeof(struct iphdr
);
736 dev
->mtu
= tdev
->mtu
- sizeof(struct iphdr
);
737 if (dev
->mtu
< IPV6_MIN_MTU
)
738 dev
->mtu
= IPV6_MIN_MTU
;
740 dev
->iflink
= tunnel
->parms
.link
;
746 static int ipip6_fb_tunnel_open(struct net_device
*dev
)
752 static int ipip6_fb_tunnel_close(struct net_device
*dev
)
759 int __init
ipip6_fb_tunnel_init(struct net_device
*dev
)
763 ipip6_tunnel_init_gen(dev
);
765 dev
->open
= ipip6_fb_tunnel_open
;
766 dev
->stop
= ipip6_fb_tunnel_close
;
769 iph
= &ipip6_fb_tunnel
.parms
.iph
;
771 iph
->protocol
= IPPROTO_IPV6
;
776 tunnels_wc
[0] = &ipip6_fb_tunnel
;
780 static struct inet_protocol sit_protocol
= {
791 void sit_cleanup(void)
793 inet_del_protocol(&sit_protocol
);
794 unregister_netdevice(&ipip6_fb_tunnel_dev
);
798 int __init
sit_init(void)
800 printk(KERN_INFO
"IPv6 over IPv4 tunneling driver\n");
802 ipip6_fb_tunnel_dev
.priv
= (void*)&ipip6_fb_tunnel
;
803 ipip6_fb_tunnel_dev
.name
= ipip6_fb_tunnel
.parms
.name
;
805 register_netdev(&ipip6_fb_tunnel_dev
);
807 register_netdevice(&ipip6_fb_tunnel_dev
);
809 inet_add_protocol(&sit_protocol
);