2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/if_arp.h>
25 #include <linux/mroute.h>
26 #include <linux/init.h>
27 #include <linux/in6.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/netfilter_ipv4.h>
35 #include <net/protocol.h>
38 #include <net/checksum.h>
39 #include <net/inet_ecn.h>
43 #include <net/ip6_fib.h>
44 #include <net/ip6_route.h>
51 1. The most important issue is detecting local dead loops.
52 They would cause complete host lockup in transmit, which
53 would be "resolved" by stack overflow or, if queueing is enabled,
54 with infinite looping in net_bh.
56 We cannot track such dead loops during route installation,
57 it is infeasible task. The most general solutions would be
58 to keep skb->encapsulation counter (sort of local ttl),
59 and silently drop packet when it expires. It is the best
60 solution, but it supposes maintaing new variable in ALL
61 skb, even if no tunneling is used.
63 Current solution: t->recursion lock breaks dead loops. It looks
64 like dev->tbusy flag, but I preferred new variable, because
65 the semantics is different. One day, when hard_start_xmit
66 will be multithreaded we will have to use skb->encapsulation.
70 2. Networking dead loops would not kill routers, but would really
71 kill network. IP hop limit plays role of "t->recursion" in this case,
72 if we copy it from packet being encapsulated to upper header.
73 It is very good solution, but it introduces two problems:
75 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
76 do not work over tunnels.
77 - traceroute does not work. I planned to relay ICMP from tunnel,
78 so that this problem would be solved and traceroute output
79 would even more informative. This idea appeared to be wrong:
80 only Linux complies to rfc1812 now (yes, guys, Linux is the only
81 true router now :-)), all routers (at least, in neighbourhood of mine)
82 return only 8 bytes of payload. It is the end.
84 Hence, if we want that OSPF worked or traceroute said something reasonable,
85 we should search for another solution.
87 One of them is to parse packet trying to detect inner encapsulation
88 made by our node. It is difficult or even impossible, especially,
89 taking into account fragmentation. TO be short, tt is not solution at all.
91 Current solution: The solution was UNEXPECTEDLY SIMPLE.
92 We force DF flag on tunnels with preconfigured hop limit,
93 that is ALL. :-) Well, it does not remove the problem completely,
94 but exponential growth of network traffic is changed to linear
95 (branches, that exceed pmtu are pruned) and tunnel mtu
96 fastly degrades to value <68, where looping stops.
97 Yes, it is not good if there exists a router in the loop,
98 which does not force DF, even when encapsulating packets have DF set.
99 But it is not our problem! Nobody could accuse us, we made
100 all that we could make. Even if it is your gated who injected
101 fatal route to network, even if it were you who configured
102 fatal static route: you are innocent. :-)
106 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
107 practically identical code. It would be good to glue them
108 together, but it is not very evident, how to make them modular.
109 sit is integral part of IPv6, ipip and gre are naturally modular.
110 We could extract common parts (hash table, ioctl etc)
111 to a separate module (ip_tunnel.c).
116 static int ipgre_tunnel_init(struct net_device
*dev
);
118 /* Fallback tunnel: no source, no destination, no key, no options */
120 static int ipgre_fb_tunnel_init(struct net_device
*dev
);
122 static struct net_device ipgre_fb_tunnel_dev
= {
123 "gre0", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL
, ipgre_fb_tunnel_init
,
126 static struct ip_tunnel ipgre_fb_tunnel
= {
127 NULL
, &ipgre_fb_tunnel_dev
, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
130 /* Tunnel hash table */
140 We require exact key match i.e. if a key is present in packet
141 it will match only tunnel with the same key; if it is not present,
142 it will match only keyless tunnel.
144 All keysless packets, if not matched configured keyless tunnels
145 will match fallback tunnel.
149 #define HASH(addr) ((addr^(addr>>4))&0xF)
151 static struct ip_tunnel
*tunnels
[4][HASH_SIZE
];
153 #define tunnels_r_l (tunnels[3])
154 #define tunnels_r (tunnels[2])
155 #define tunnels_l (tunnels[1])
156 #define tunnels_wc (tunnels[0])
158 static rwlock_t ipgre_lock
= RW_LOCK_UNLOCKED
;
160 /* Given src, dst and key, find approriate for input tunnel. */
162 static struct ip_tunnel
* ipgre_tunnel_lookup(u32 remote
, u32 local
, u32 key
)
164 unsigned h0
= HASH(remote
);
165 unsigned h1
= HASH(key
);
168 for (t
= tunnels_r_l
[h0
^h1
]; t
; t
= t
->next
) {
169 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
170 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
174 for (t
= tunnels_r
[h0
^h1
]; t
; t
= t
->next
) {
175 if (remote
== t
->parms
.iph
.daddr
) {
176 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
180 for (t
= tunnels_l
[h1
]; t
; t
= t
->next
) {
181 if (local
== t
->parms
.iph
.saddr
||
182 (local
== t
->parms
.iph
.daddr
&& MULTICAST(local
))) {
183 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
187 for (t
= tunnels_wc
[h1
]; t
; t
= t
->next
) {
188 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
191 if (ipgre_fb_tunnel_dev
.flags
&IFF_UP
)
192 return &ipgre_fb_tunnel
;
196 static struct ip_tunnel
**ipgre_bucket(struct ip_tunnel
*t
)
198 u32 remote
= t
->parms
.iph
.daddr
;
199 u32 local
= t
->parms
.iph
.saddr
;
200 u32 key
= t
->parms
.i_key
;
201 unsigned h
= HASH(key
);
206 if (remote
&& !MULTICAST(remote
)) {
211 return &tunnels
[prio
][h
];
214 static void ipgre_tunnel_link(struct ip_tunnel
*t
)
216 struct ip_tunnel
**tp
= ipgre_bucket(t
);
219 write_lock_bh(&ipgre_lock
);
221 write_unlock_bh(&ipgre_lock
);
224 static void ipgre_tunnel_unlink(struct ip_tunnel
*t
)
226 struct ip_tunnel
**tp
;
228 for (tp
= ipgre_bucket(t
); *tp
; tp
= &(*tp
)->next
) {
230 write_lock_bh(&ipgre_lock
);
232 write_unlock_bh(&ipgre_lock
);
238 static struct ip_tunnel
* ipgre_tunnel_locate(struct ip_tunnel_parm
*parms
, int create
)
240 u32 remote
= parms
->iph
.daddr
;
241 u32 local
= parms
->iph
.saddr
;
242 u32 key
= parms
->i_key
;
243 struct ip_tunnel
*t
, **tp
, *nt
;
244 struct net_device
*dev
;
245 unsigned h
= HASH(key
);
250 if (remote
&& !MULTICAST(remote
)) {
254 for (tp
= &tunnels
[prio
][h
]; (t
= *tp
) != NULL
; tp
= &t
->next
) {
255 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
256 if (key
== t
->parms
.i_key
)
264 dev
= kmalloc(sizeof(*dev
) + sizeof(*t
), GFP_KERNEL
);
269 memset(dev
, 0, sizeof(*dev
) + sizeof(*t
));
270 dev
->priv
= (void*)(dev
+1);
271 nt
= (struct ip_tunnel
*)dev
->priv
;
273 dev
->init
= ipgre_tunnel_init
;
274 dev
->features
|= NETIF_F_DYNALLOC
;
275 memcpy(&nt
->parms
, parms
, sizeof(*parms
));
276 strcpy(dev
->name
, nt
->parms
.name
);
277 if (dev
->name
[0] == 0) {
279 for (i
=1; i
<100; i
++) {
280 sprintf(dev
->name
, "gre%d", i
);
281 if (__dev_get_by_name(dev
->name
) == NULL
)
286 memcpy(parms
->name
, dev
->name
, IFNAMSIZ
);
288 if (register_netdevice(dev
) < 0)
292 ipgre_tunnel_link(nt
);
293 /* Do not decrement MOD_USE_COUNT here. */
302 static void ipgre_tunnel_destructor(struct net_device
*dev
)
304 if (dev
!= &ipgre_fb_tunnel_dev
) {
309 static void ipgre_tunnel_uninit(struct net_device
*dev
)
311 ipgre_tunnel_unlink((struct ip_tunnel
*)dev
->priv
);
316 void ipgre_err(struct sk_buff
*skb
, unsigned char *dp
, int len
)
318 #ifndef I_WISH_WORLD_WERE_PERFECT
320 /* It is not :-( All the routers (except for Linux) return only
321 8 bytes of packet payload. It means, that precise relaying of
322 ICMP in the real Internet is absolutely infeasible.
324 Moreover, Cisco "wise men" put GRE key to the third word
325 in GRE header. It makes impossible maintaining even soft state for keyed
326 GRE tunnels with enabled checksum. Tell them "thank you".
328 Well, I wonder, rfc1812 was written by Cisco employee,
329 what the hell these idiots break standrads established
333 struct iphdr
*iph
= (struct iphdr
*)dp
;
334 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
335 int grehlen
= (iph
->ihl
<<2) + 4;
336 int type
= skb
->h
.icmph
->type
;
337 int code
= skb
->h
.icmph
->code
;
342 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
343 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
352 /* If only 8 bytes returned, keyed message will be dropped here */
358 case ICMP_PARAMETERPROB
:
361 case ICMP_DEST_UNREACH
:
364 case ICMP_PORT_UNREACH
:
365 /* Impossible event. */
367 case ICMP_FRAG_NEEDED
:
368 /* Soft state for pmtu is maintained by IP core. */
371 /* All others are translated to HOST_UNREACH.
372 rfc2003 contains "deep thoughts" about NET_UNREACH,
373 I believe they are just ether pollution. --ANK
378 case ICMP_TIME_EXCEEDED
:
379 if (code
!= ICMP_EXC_TTL
)
384 read_lock(&ipgre_lock
);
385 t
= ipgre_tunnel_lookup(iph
->daddr
, iph
->saddr
, (flags
&GRE_KEY
) ? *(((u32
*)p
) + (grehlen
>>2) - 1) : 0);
386 if (t
== NULL
|| t
->parms
.iph
.daddr
== 0 || MULTICAST(t
->parms
.iph
.daddr
))
389 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
392 if (jiffies
- t
->err_time
< IPTUNNEL_ERR_TIMEO
)
396 t
->err_time
= jiffies
;
398 read_unlock(&ipgre_lock
);
401 struct iphdr
*iph
= (struct iphdr
*)dp
;
403 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
404 int type
= skb
->h
.icmph
->type
;
405 int code
= skb
->h
.icmph
->code
;
410 int grehlen
= (iph
->ihl
<<2) + 4;
411 struct sk_buff
*skb2
;
414 if (p
[1] != __constant_htons(ETH_P_IP
))
418 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
419 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
428 if (len
< grehlen
+ sizeof(struct iphdr
))
430 eiph
= (struct iphdr
*)(dp
+ grehlen
);
435 case ICMP_PARAMETERPROB
:
436 if (skb
->h
.icmph
->un
.gateway
< (iph
->ihl
<<2))
439 /* So... This guy found something strange INSIDE encapsulated
440 packet. Well, he is fool, but what can we do ?
442 rel_type
= ICMP_PARAMETERPROB
;
443 rel_info
= skb
->h
.icmph
->un
.gateway
- grehlen
;
446 case ICMP_DEST_UNREACH
:
449 case ICMP_PORT_UNREACH
:
450 /* Impossible event. */
452 case ICMP_FRAG_NEEDED
:
453 /* And it is the only really necesary thing :-) */
454 rel_info
= ntohs(skb
->h
.icmph
->un
.frag
.mtu
);
455 if (rel_info
< grehlen
+68)
458 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
459 if (rel_info
> ntohs(eiph
->tot_len
))
463 /* All others are translated to HOST_UNREACH.
464 rfc2003 contains "deep thoughts" about NET_UNREACH,
465 I believe, it is just ether pollution. --ANK
467 rel_type
= ICMP_DEST_UNREACH
;
468 rel_code
= ICMP_HOST_UNREACH
;
472 case ICMP_TIME_EXCEEDED
:
473 if (code
!= ICMP_EXC_TTL
)
478 /* Prepare fake skb to feed it to icmp_send */
479 skb2
= skb_clone(skb
, GFP_ATOMIC
);
482 dst_release(skb2
->dst
);
484 skb_pull(skb2
, skb
->data
- (u8
*)eiph
);
485 skb2
->nh
.raw
= skb2
->data
;
487 /* Try to guess incoming interface */
488 if (ip_route_output(&rt
, eiph
->saddr
, 0, RT_TOS(eiph
->tos
), 0)) {
492 skb2
->dev
= rt
->u
.dst
.dev
;
494 /* route "incoming" packet */
495 if (rt
->rt_flags
&RTCF_LOCAL
) {
498 if (ip_route_output(&rt
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, 0) ||
499 rt
->u
.dst
.dev
->type
!= ARPHRD_IPGRE
) {
506 if (ip_route_input(skb2
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, skb2
->dev
) ||
507 skb2
->dst
->dev
->type
!= ARPHRD_IPGRE
) {
513 /* change mtu on this route */
514 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
515 if (rel_info
> skb2
->dst
->pmtu
) {
519 skb2
->dst
->pmtu
= rel_info
;
520 rel_info
= htonl(rel_info
);
521 } else if (type
== ICMP_TIME_EXCEEDED
) {
522 struct ip_tunnel
*t
= (struct ip_tunnel
*)skb2
->dev
->priv
;
523 if (t
->parms
.iph
.ttl
) {
524 rel_type
= ICMP_DEST_UNREACH
;
525 rel_code
= ICMP_HOST_UNREACH
;
529 icmp_send(skb2
, rel_type
, rel_code
, rel_info
);
534 static inline void ipgre_ecn_decapsulate(struct iphdr
*iph
, struct sk_buff
*skb
)
536 if (INET_ECN_is_ce(iph
->tos
)) {
537 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
538 if (INET_ECN_is_not_ce(skb
->nh
.iph
->tos
))
539 IP_ECN_set_ce(skb
->nh
.iph
);
540 } else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
541 if (INET_ECN_is_not_ce(ip6_get_dsfield(skb
->nh
.ipv6h
)))
542 IP6_ECN_set_ce(skb
->nh
.ipv6h
);
548 ipgre_ecn_encapsulate(u8 tos
, struct iphdr
*old_iph
, struct sk_buff
*skb
)
550 #ifdef CONFIG_INET_ECN
552 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
553 inner
= old_iph
->tos
;
554 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
))
555 inner
= ip6_get_dsfield((struct ipv6hdr
*)old_iph
);
556 return INET_ECN_encapsulate(tos
, inner
);
562 int ipgre_rcv(struct sk_buff
*skb
, unsigned short len
)
564 struct iphdr
*iph
= skb
->nh
.iph
;
566 u16 flags
= *(u16
*)h
;
570 struct ip_tunnel
*tunnel
;
573 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_ROUTING
|GRE_SEQ
|GRE_VERSION
)) {
574 /* - Version must be 0.
575 - We do not support routing headers.
577 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
580 if (flags
&GRE_CSUM
) {
581 csum
= ip_compute_csum(h
, len
);
585 key
= *(u32
*)(h
+ offset
);
589 seqno
= ntohl(*(u32
*)(h
+ offset
));
594 read_lock(&ipgre_lock
);
595 if ((tunnel
= ipgre_tunnel_lookup(iph
->saddr
, iph
->daddr
, key
)) != NULL
) {
596 skb
->mac
.raw
= skb
->nh
.raw
;
597 skb
->nh
.raw
= skb_pull(skb
, h
+ offset
- skb
->data
);
598 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
600 skb
->protocol
= *(u16
*)(h
+ 2);
601 skb
->pkt_type
= PACKET_HOST
;
602 #ifdef CONFIG_NET_IPGRE_BROADCAST
603 if (MULTICAST(iph
->daddr
)) {
604 /* Looped back packet, drop it! */
605 if (((struct rtable
*)skb
->dst
)->key
.iif
== 0)
607 tunnel
->stat
.multicast
++;
608 skb
->pkt_type
= PACKET_BROADCAST
;
612 if (((flags
&GRE_CSUM
) && csum
) ||
613 (!(flags
&GRE_CSUM
) && tunnel
->parms
.i_flags
&GRE_CSUM
)) {
614 tunnel
->stat
.rx_crc_errors
++;
615 tunnel
->stat
.rx_errors
++;
618 if (tunnel
->parms
.i_flags
&GRE_SEQ
) {
619 if (!(flags
&GRE_SEQ
) ||
620 (tunnel
->i_seqno
&& (s32
)(seqno
- tunnel
->i_seqno
) < 0)) {
621 tunnel
->stat
.rx_fifo_errors
++;
622 tunnel
->stat
.rx_errors
++;
625 tunnel
->i_seqno
= seqno
+ 1;
627 tunnel
->stat
.rx_packets
++;
628 tunnel
->stat
.rx_bytes
+= skb
->len
;
629 skb
->dev
= tunnel
->dev
;
630 dst_release(skb
->dst
);
632 #ifdef CONFIG_NETFILTER
633 nf_conntrack_put(skb
->nfct
);
635 #ifdef CONFIG_NETFILTER_DEBUG
639 ipgre_ecn_decapsulate(iph
, skb
);
641 read_unlock(&ipgre_lock
);
644 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PROT_UNREACH
, 0);
647 read_unlock(&ipgre_lock
);
653 /* Need this wrapper because NF_HOOK takes the function address */
654 static inline int do_ip_send(struct sk_buff
*skb
)
659 static int ipgre_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
661 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
662 struct net_device_stats
*stats
= &tunnel
->stat
;
663 struct iphdr
*old_iph
= skb
->nh
.iph
;
667 struct rtable
*rt
; /* Route to the other host */
668 struct net_device
*tdev
; /* Device to other host */
669 struct iphdr
*iph
; /* Our new IP header */
670 int max_headroom
; /* The extra header space needed */
675 if (tunnel
->recursion
++) {
676 tunnel
->stat
.collisions
++;
680 if (dev
->hard_header
) {
682 tiph
= (struct iphdr
*)skb
->data
;
684 gre_hlen
= tunnel
->hlen
;
685 tiph
= &tunnel
->parms
.iph
;
688 if ((dst
= tiph
->daddr
) == 0) {
691 if (skb
->dst
== NULL
) {
692 tunnel
->stat
.tx_fifo_errors
++;
696 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
697 rt
= (struct rtable
*)skb
->dst
;
698 if ((dst
= rt
->rt_gateway
) == 0)
702 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
703 struct in6_addr
*addr6
;
705 struct neighbour
*neigh
= skb
->dst
->neighbour
;
710 addr6
= (struct in6_addr
*)&neigh
->primary_key
;
711 addr_type
= ipv6_addr_type(addr6
);
713 if (addr_type
== IPV6_ADDR_ANY
) {
714 addr6
= &skb
->nh
.ipv6h
->daddr
;
715 addr_type
= ipv6_addr_type(addr6
);
718 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
721 dst
= addr6
->s6_addr32
[3];
730 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
735 if (ip_route_output(&rt
, dst
, tiph
->saddr
, RT_TOS(tos
), tunnel
->parms
.link
)) {
736 tunnel
->stat
.tx_carrier_errors
++;
739 tdev
= rt
->u
.dst
.dev
;
743 tunnel
->stat
.collisions
++;
748 mtu
= rt
->u
.dst
.pmtu
- tunnel
->hlen
;
750 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
751 if (skb
->dst
&& mtu
< skb
->dst
->pmtu
&& mtu
>= 68)
752 skb
->dst
->pmtu
= mtu
;
754 df
|= (old_iph
->frag_off
&__constant_htons(IP_DF
));
756 if ((old_iph
->frag_off
&__constant_htons(IP_DF
)) &&
757 mtu
< ntohs(old_iph
->tot_len
)) {
758 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
764 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
765 struct rt6_info
*rt6
= (struct rt6_info
*)skb
->dst
;
767 if (rt6
&& mtu
< rt6
->u
.dst
.pmtu
&& mtu
>= IPV6_MIN_MTU
) {
768 if ((tunnel
->parms
.iph
.daddr
&& !MULTICAST(tunnel
->parms
.iph
.daddr
)) ||
769 rt6
->rt6i_dst
.plen
== 128) {
770 rt6
->rt6i_flags
|= RTF_MODIFIED
;
771 skb
->dst
->pmtu
= mtu
;
775 if (mtu
>= IPV6_MIN_MTU
&& mtu
< skb
->len
- tunnel
->hlen
+ gre_hlen
) {
776 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
, dev
);
783 if (tunnel
->err_count
> 0) {
784 if (jiffies
- tunnel
->err_time
< IPTUNNEL_ERR_TIMEO
) {
787 dst_link_failure(skb
);
789 tunnel
->err_count
= 0;
792 skb
->h
.raw
= skb
->nh
.raw
;
794 max_headroom
= ((tdev
->hard_header_len
+15)&~15)+ gre_hlen
;
796 if (skb_headroom(skb
) < max_headroom
|| skb_cloned(skb
) || skb_shared(skb
)) {
797 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
806 skb_set_owner_w(new_skb
, skb
->sk
);
811 skb
->nh
.raw
= skb_push(skb
, gre_hlen
);
812 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
813 dst_release(skb
->dst
);
814 skb
->dst
= &rt
->u
.dst
;
817 * Push down and install the IPIP header.
822 iph
->ihl
= sizeof(struct iphdr
) >> 2;
824 iph
->protocol
= IPPROTO_GRE
;
825 iph
->tos
= ipgre_ecn_encapsulate(tos
, old_iph
, skb
);
826 iph
->daddr
= rt
->rt_dst
;
827 iph
->saddr
= rt
->rt_src
;
829 if ((iph
->ttl
= tiph
->ttl
) == 0) {
830 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
831 iph
->ttl
= old_iph
->ttl
;
833 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
))
834 iph
->ttl
= ((struct ipv6hdr
*)old_iph
)->hop_limit
;
837 iph
->ttl
= sysctl_ip_default_ttl
;
840 ((u16
*)(iph
+1))[0] = tunnel
->parms
.o_flags
;
841 ((u16
*)(iph
+1))[1] = skb
->protocol
;
843 if (tunnel
->parms
.o_flags
&(GRE_KEY
|GRE_CSUM
|GRE_SEQ
)) {
844 u32
*ptr
= (u32
*)(((u8
*)iph
) + tunnel
->hlen
- 4);
846 if (tunnel
->parms
.o_flags
&GRE_SEQ
) {
848 *ptr
= htonl(tunnel
->o_seqno
);
851 if (tunnel
->parms
.o_flags
&GRE_KEY
) {
852 *ptr
= tunnel
->parms
.o_key
;
855 if (tunnel
->parms
.o_flags
&GRE_CSUM
) {
857 *(__u16
*)ptr
= ip_compute_csum((void*)(iph
+1), skb
->len
- sizeof(struct iphdr
));
861 #ifdef CONFIG_NETFILTER
862 nf_conntrack_put(skb
->nfct
);
864 #ifdef CONFIG_NETFILTER_DEBUG
874 dst_link_failure(skb
);
884 ipgre_tunnel_ioctl (struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
887 struct ip_tunnel_parm p
;
895 if (dev
== &ipgre_fb_tunnel_dev
) {
896 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
900 t
= ipgre_tunnel_locate(&p
, 0);
903 t
= (struct ip_tunnel
*)dev
->priv
;
904 memcpy(&p
, &t
->parms
, sizeof(p
));
905 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
912 if (!capable(CAP_NET_ADMIN
))
916 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
920 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
921 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&__constant_htons(~IP_DF
)) ||
922 ((p
.i_flags
|p
.o_flags
)&(GRE_VERSION
|GRE_ROUTING
)))
925 p
.iph
.frag_off
|= __constant_htons(IP_DF
);
927 if (!(p
.i_flags
&GRE_KEY
))
929 if (!(p
.o_flags
&GRE_KEY
))
932 t
= ipgre_tunnel_locate(&p
, cmd
== SIOCADDTUNNEL
);
934 if (dev
!= &ipgre_fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
&&
935 t
!= &ipgre_fb_tunnel
) {
944 t
= (struct ip_tunnel
*)dev
->priv
;
946 if (MULTICAST(p
.iph
.daddr
))
947 nflags
= IFF_BROADCAST
;
948 else if (p
.iph
.daddr
)
949 nflags
= IFF_POINTOPOINT
;
951 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
955 ipgre_tunnel_unlink(t
);
956 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
957 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
958 t
->parms
.i_key
= p
.i_key
;
959 t
->parms
.o_key
= p
.o_key
;
960 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
961 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
962 ipgre_tunnel_link(t
);
963 netdev_state_change(dev
);
969 if (cmd
== SIOCCHGTUNNEL
) {
970 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
971 t
->parms
.iph
.tos
= p
.iph
.tos
;
972 t
->parms
.iph
.frag_off
= p
.iph
.frag_off
;
974 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
977 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
982 if (!capable(CAP_NET_ADMIN
))
985 if (dev
== &ipgre_fb_tunnel_dev
) {
987 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
990 if ((t
= ipgre_tunnel_locate(&p
, 0)) == NULL
)
993 if (t
== &ipgre_fb_tunnel
)
996 err
= unregister_netdevice(dev
);
1008 static struct net_device_stats
*ipgre_tunnel_get_stats(struct net_device
*dev
)
1010 return &(((struct ip_tunnel
*)dev
->priv
)->stat
);
1013 static int ipgre_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
1015 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
1016 if (new_mtu
< 68 || new_mtu
> 0xFFF8 - tunnel
->hlen
)
1022 #ifdef CONFIG_NET_IPGRE_BROADCAST
1023 /* Nice toy. Unfortunately, useless in real life :-)
1024 It allows to construct virtual multiprotocol broadcast "LAN"
1025 over the Internet, provided multicast routing is tuned.
1028 I have no idea was this bicycle invented before me,
1029 so that I had to set ARPHRD_IPGRE to a random value.
1030 I have an impression, that Cisco could make something similar,
1031 but this feature is apparently missing in IOS<=11.2(8).
1033 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1034 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1036 ping -t 255 224.66.66.66
1038 If nobody answers, mbone does not work.
1040 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1041 ip addr add 10.66.66.<somewhat>/24 dev Universe
1042 ifconfig Universe up
1043 ifconfig Universe add fe80::<Your_real_addr>/10
1044 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1047 ftp fec0:6666:6666::193.233.7.65
1052 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
, unsigned short type
,
1053 void *daddr
, void *saddr
, unsigned len
)
1055 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1056 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
, t
->hlen
);
1057 u16
*p
= (u16
*)(iph
+1);
1059 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
1060 p
[0] = t
->parms
.o_flags
;
1064 * Set the source hardware address.
1068 memcpy(&iph
->saddr
, saddr
, 4);
1071 memcpy(&iph
->daddr
, daddr
, 4);
1074 if (iph
->daddr
&& !MULTICAST(iph
->daddr
))
1080 static int ipgre_open(struct net_device
*dev
)
1082 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1085 if (MULTICAST(t
->parms
.iph
.daddr
)) {
1087 if (ip_route_output(&rt
, t
->parms
.iph
.daddr
,
1088 t
->parms
.iph
.saddr
, RT_TOS(t
->parms
.iph
.tos
),
1091 return -EADDRNOTAVAIL
;
1093 dev
= rt
->u
.dst
.dev
;
1095 if (__in_dev_get(dev
) == NULL
) {
1097 return -EADDRNOTAVAIL
;
1099 t
->mlink
= dev
->ifindex
;
1100 ip_mc_inc_group(__in_dev_get(dev
), t
->parms
.iph
.daddr
);
1105 static int ipgre_close(struct net_device
*dev
)
1107 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1108 if (MULTICAST(t
->parms
.iph
.daddr
) && t
->mlink
) {
1109 struct in_device
*in_dev
= inetdev_by_index(t
->mlink
);
1111 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
1121 static void ipgre_tunnel_init_gen(struct net_device
*dev
)
1123 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1125 dev
->uninit
= ipgre_tunnel_uninit
;
1126 dev
->destructor
= ipgre_tunnel_destructor
;
1127 dev
->hard_start_xmit
= ipgre_tunnel_xmit
;
1128 dev
->get_stats
= ipgre_tunnel_get_stats
;
1129 dev
->do_ioctl
= ipgre_tunnel_ioctl
;
1130 dev
->change_mtu
= ipgre_tunnel_change_mtu
;
1132 dev_init_buffers(dev
);
1134 dev
->type
= ARPHRD_IPGRE
;
1135 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
) + 4;
1136 dev
->mtu
= 1500 - sizeof(struct iphdr
) - 4;
1137 dev
->flags
= IFF_NOARP
;
1140 memcpy(dev
->dev_addr
, &t
->parms
.iph
.saddr
, 4);
1141 memcpy(dev
->broadcast
, &t
->parms
.iph
.daddr
, 4);
1144 static int ipgre_tunnel_init(struct net_device
*dev
)
1146 struct net_device
*tdev
= NULL
;
1147 struct ip_tunnel
*tunnel
;
1149 int hlen
= LL_MAX_HEADER
;
1151 int addend
= sizeof(struct iphdr
) + 4;
1153 tunnel
= (struct ip_tunnel
*)dev
->priv
;
1154 iph
= &tunnel
->parms
.iph
;
1156 ipgre_tunnel_init_gen(dev
);
1158 /* Guess output device to choose reasonable mtu and hard_header_len */
1162 if (!ip_route_output(&rt
, iph
->daddr
, iph
->saddr
, RT_TOS(iph
->tos
), tunnel
->parms
.link
)) {
1163 tdev
= rt
->u
.dst
.dev
;
1167 dev
->flags
|= IFF_POINTOPOINT
;
1169 #ifdef CONFIG_NET_IPGRE_BROADCAST
1170 if (MULTICAST(iph
->daddr
)) {
1173 dev
->flags
= IFF_BROADCAST
;
1174 dev
->hard_header
= ipgre_header
;
1175 dev
->open
= ipgre_open
;
1176 dev
->stop
= ipgre_close
;
1181 if (!tdev
&& tunnel
->parms
.link
)
1182 tdev
= __dev_get_by_index(tunnel
->parms
.link
);
1185 hlen
= tdev
->hard_header_len
;
1188 dev
->iflink
= tunnel
->parms
.link
;
1190 /* Precalculate GRE options length */
1191 if (tunnel
->parms
.o_flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
)) {
1192 if (tunnel
->parms
.o_flags
&GRE_CSUM
)
1194 if (tunnel
->parms
.o_flags
&GRE_KEY
)
1196 if (tunnel
->parms
.o_flags
&GRE_SEQ
)
1199 dev
->hard_header_len
= hlen
+ addend
;
1200 dev
->mtu
= mtu
- addend
;
1201 tunnel
->hlen
= addend
;
1206 static int ipgre_fb_tunnel_open(struct net_device
*dev
)
1212 static int ipgre_fb_tunnel_close(struct net_device
*dev
)
1219 int __init
ipgre_fb_tunnel_init(struct net_device
*dev
)
1221 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
1224 ipgre_tunnel_init_gen(dev
);
1226 dev
->open
= ipgre_fb_tunnel_open
;
1227 dev
->stop
= ipgre_fb_tunnel_close
;
1230 iph
= &ipgre_fb_tunnel
.parms
.iph
;
1232 iph
->protocol
= IPPROTO_GRE
;
1234 tunnel
->hlen
= sizeof(struct iphdr
) + 4;
1237 tunnels_wc
[0] = &ipgre_fb_tunnel
;
1242 static struct inet_protocol ipgre_protocol
= {
1243 ipgre_rcv
, /* GRE handler */
1244 ipgre_err
, /* TUNNEL error control */
1246 IPPROTO_GRE
, /* protocol ID */
1254 * And now the modules code and kernel interface.
1258 int init_module(void)
1260 int __init
ipgre_init(void)
1263 printk(KERN_INFO
"GRE over IPv4 tunneling driver\n");
1265 ipgre_fb_tunnel_dev
.priv
= (void*)&ipgre_fb_tunnel
;
1267 register_netdev(&ipgre_fb_tunnel_dev
);
1270 register_netdevice(&ipgre_fb_tunnel_dev
);
1274 inet_add_protocol(&ipgre_protocol
);
1280 void cleanup_module(void)
1282 if ( inet_del_protocol(&ipgre_protocol
) < 0 )
1283 printk(KERN_INFO
"ipgre close: can't remove protocol\n");
1285 unregister_netdev(&ipgre_fb_tunnel_dev
);