2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/if_arp.h>
25 #include <linux/mroute.h>
26 #include <linux/init.h>
27 #include <linux/in6.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/netfilter_ipv4.h>
31 #include <linux/if_ether.h>
32 #include <linux/if_bridge.h>
33 #include <linux/etherdevice.h>
34 #include <linux/llc.h>
39 #include <net/protocol.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: t->recursion lock breaks dead loops. It looks
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
122 static int ipgre_tunnel_init(struct net_device
*dev
);
123 static void ipgre_tunnel_setup(struct net_device
*dev
);
124 static void ipgre_ether_tunnel_setup(struct net_device
*dev
);
126 /* Fallback tunnel: no source, no destination, no key, no options */
128 static int ipgre_fb_tunnel_init(struct net_device
*dev
);
130 static struct net_device
*ipgre_fb_tunnel_dev
;
132 /* Tunnel hash table */
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
151 #define HASH(addr) ((addr^(addr>>4))&0xF)
153 static struct ip_tunnel
*tunnels
[4][HASH_SIZE
];
155 #define tunnels_r_l (tunnels[3])
156 #define tunnels_r (tunnels[2])
157 #define tunnels_l (tunnels[1])
158 #define tunnels_wc (tunnels[0])
160 static DEFINE_RWLOCK(ipgre_lock
);
162 /* Given src, dst and key, find appropriate for input tunnel. */
164 static struct ip_tunnel
* ipgre_tunnel_lookup(u32 remote
, u32 local
, u32 key
)
166 unsigned h0
= HASH(remote
);
167 unsigned h1
= HASH(key
);
170 for (t
= tunnels_r_l
[h0
^h1
]; t
; t
= t
->next
) {
171 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
172 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
176 for (t
= tunnels_r
[h0
^h1
]; t
; t
= t
->next
) {
177 if (remote
== t
->parms
.iph
.daddr
) {
178 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
182 for (t
= tunnels_l
[h1
]; t
; t
= t
->next
) {
183 if (local
== t
->parms
.iph
.saddr
||
184 (local
== t
->parms
.iph
.daddr
&& MULTICAST(local
))) {
185 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
189 for (t
= tunnels_wc
[h1
]; t
; t
= t
->next
) {
190 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
194 if (ipgre_fb_tunnel_dev
->flags
&IFF_UP
)
195 return netdev_priv(ipgre_fb_tunnel_dev
);
199 static struct ip_tunnel
**ipgre_bucket(struct ip_tunnel
*t
)
201 u32 remote
= t
->parms
.iph
.daddr
;
202 u32 local
= t
->parms
.iph
.saddr
;
203 u32 key
= t
->parms
.i_key
;
204 unsigned h
= HASH(key
);
209 if (remote
&& !MULTICAST(remote
)) {
214 return &tunnels
[prio
][h
];
217 static void ipgre_tunnel_link(struct ip_tunnel
*t
)
219 struct ip_tunnel
**tp
= ipgre_bucket(t
);
222 write_lock_bh(&ipgre_lock
);
224 write_unlock_bh(&ipgre_lock
);
227 static void ipgre_tunnel_unlink(struct ip_tunnel
*t
)
229 struct ip_tunnel
**tp
;
231 for (tp
= ipgre_bucket(t
); *tp
; tp
= &(*tp
)->next
) {
233 write_lock_bh(&ipgre_lock
);
235 write_unlock_bh(&ipgre_lock
);
241 static struct ip_tunnel
* ipgre_tunnel_locate(struct ip_tunnel_parm
*parms
, int create
)
243 u32 remote
= parms
->iph
.daddr
;
244 u32 local
= parms
->iph
.saddr
;
245 u32 key
= parms
->i_key
;
246 struct ip_tunnel
*t
, **tp
, *nt
;
247 struct net_device
*dev
;
248 unsigned h
= HASH(key
);
254 if (remote
&& !MULTICAST(remote
)) {
258 for (tp
= &tunnels
[prio
][h
]; (t
= *tp
) != NULL
; tp
= &t
->next
) {
259 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
260 if (key
== t
->parms
.i_key
)
268 strlcpy(name
, parms
->name
, IFNAMSIZ
);
271 for (i
=1; i
<100; i
++) {
272 sprintf(name
, "gre%d", i
);
273 if (__dev_get_by_name(name
) == NULL
)
280 if (parms
->iph
.id
== htons(ETH_P_BRIDGE
))
281 dev
= alloc_netdev(sizeof(*t
), name
, ipgre_ether_tunnel_setup
);
283 dev
= alloc_netdev(sizeof(*t
), name
, ipgre_tunnel_setup
);
287 dev
->init
= ipgre_tunnel_init
;
288 nt
= netdev_priv(dev
);
291 if (register_netdevice(dev
) < 0) {
297 ipgre_tunnel_link(nt
);
304 static void ipgre_tunnel_uninit(struct net_device
*dev
)
306 ipgre_tunnel_unlink(netdev_priv(dev
));
311 static void ipgre_err(struct sk_buff
*skb
, u32 info
)
313 #ifndef I_WISH_WORLD_WERE_PERFECT
315 /* It is not :-( All the routers (except for Linux) return only
316 8 bytes of packet payload. It means, that precise relaying of
317 ICMP in the real Internet is absolutely infeasible.
319 Moreover, Cisco "wise men" put GRE key to the third word
320 in GRE header. It makes impossible maintaining even soft state for keyed
321 GRE tunnels with enabled checksum. Tell them "thank you".
323 Well, I wonder, rfc1812 was written by Cisco employee,
324 what the hell these idiots break standrads established
328 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
329 u16
*p
= (u16
*)(skb
->data
+(iph
->ihl
<<2));
330 int grehlen
= (iph
->ihl
<<2) + 4;
331 int type
= skb
->h
.icmph
->type
;
332 int code
= skb
->h
.icmph
->code
;
337 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
338 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
347 /* If only 8 bytes returned, keyed message will be dropped here */
348 if (skb_headlen(skb
) < grehlen
)
353 case ICMP_PARAMETERPROB
:
356 case ICMP_DEST_UNREACH
:
359 case ICMP_PORT_UNREACH
:
360 /* Impossible event. */
362 case ICMP_FRAG_NEEDED
:
363 /* Soft state for pmtu is maintained by IP core. */
366 /* All others are translated to HOST_UNREACH.
367 rfc2003 contains "deep thoughts" about NET_UNREACH,
368 I believe they are just ether pollution. --ANK
373 case ICMP_TIME_EXCEEDED
:
374 if (code
!= ICMP_EXC_TTL
)
379 read_lock(&ipgre_lock
);
380 t
= ipgre_tunnel_lookup(iph
->daddr
, iph
->saddr
, (flags
&GRE_KEY
) ? *(((u32
*)p
) + (grehlen
>>2) - 1) : 0);
381 if (t
== NULL
|| t
->parms
.iph
.daddr
== 0 || MULTICAST(t
->parms
.iph
.daddr
))
384 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
387 if (jiffies
- t
->err_time
< IPTUNNEL_ERR_TIMEO
)
391 t
->err_time
= jiffies
;
393 read_unlock(&ipgre_lock
);
396 struct iphdr
*iph
= (struct iphdr
*)dp
;
398 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
399 int type
= skb
->h
.icmph
->type
;
400 int code
= skb
->h
.icmph
->code
;
406 int grehlen
= (iph
->ihl
<<2) + 4;
407 struct sk_buff
*skb2
;
411 if (p
[1] != htons(ETH_P_IP
))
415 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
416 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
425 if (len
< grehlen
+ sizeof(struct iphdr
))
427 eiph
= (struct iphdr
*)(dp
+ grehlen
);
432 case ICMP_PARAMETERPROB
:
433 n
= ntohl(skb
->h
.icmph
->un
.gateway
) >> 24;
434 if (n
< (iph
->ihl
<<2))
437 /* So... This guy found something strange INSIDE encapsulated
438 packet. Well, he is fool, but what can we do ?
440 rel_type
= ICMP_PARAMETERPROB
;
442 rel_info
= htonl(n
<< 24);
445 case ICMP_DEST_UNREACH
:
448 case ICMP_PORT_UNREACH
:
449 /* Impossible event. */
451 case ICMP_FRAG_NEEDED
:
452 /* And it is the only really necessary thing :-) */
453 n
= ntohs(skb
->h
.icmph
->un
.frag
.mtu
);
457 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
458 if (n
> ntohs(eiph
->tot_len
))
463 /* All others are translated to HOST_UNREACH.
464 rfc2003 contains "deep thoughts" about NET_UNREACH,
465 I believe, it is just ether pollution. --ANK
467 rel_type
= ICMP_DEST_UNREACH
;
468 rel_code
= ICMP_HOST_UNREACH
;
472 case ICMP_TIME_EXCEEDED
:
473 if (code
!= ICMP_EXC_TTL
)
478 /* Prepare fake skb to feed it to icmp_send */
479 skb2
= skb_clone(skb
, GFP_ATOMIC
);
482 dst_release(skb2
->dst
);
484 skb_pull(skb2
, skb
->data
- (u8
*)eiph
);
485 skb2
->nh
.raw
= skb2
->data
;
487 /* Try to guess incoming interface */
488 memset(&fl
, 0, sizeof(fl
));
489 fl
.fl4_dst
= eiph
->saddr
;
490 fl
.fl4_tos
= RT_TOS(eiph
->tos
);
491 fl
.proto
= IPPROTO_GRE
;
492 if (ip_route_output_key(&rt
, &fl
)) {
496 skb2
->dev
= rt
->u
.dst
.dev
;
498 /* route "incoming" packet */
499 if (rt
->rt_flags
&RTCF_LOCAL
) {
502 fl
.fl4_dst
= eiph
->daddr
;
503 fl
.fl4_src
= eiph
->saddr
;
504 fl
.fl4_tos
= eiph
->tos
;
505 if (ip_route_output_key(&rt
, &fl
) ||
506 rt
->u
.dst
.dev
->type
!= ARPHRD_IPGRE
) {
513 if (ip_route_input(skb2
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, skb2
->dev
) ||
514 skb2
->dst
->dev
->type
!= ARPHRD_IPGRE
) {
520 /* change mtu on this route */
521 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
522 if (n
> dst_mtu(skb2
->dst
)) {
526 skb2
->dst
->ops
->update_pmtu(skb2
->dst
, n
);
527 } else if (type
== ICMP_TIME_EXCEEDED
) {
528 struct ip_tunnel
*t
= netdev_priv(skb2
->dev
);
529 if (t
->parms
.iph
.ttl
) {
530 rel_type
= ICMP_DEST_UNREACH
;
531 rel_code
= ICMP_HOST_UNREACH
;
535 icmp_send(skb2
, rel_type
, rel_code
, rel_info
);
540 static inline void ipgre_ecn_decapsulate(struct iphdr
*iph
, struct sk_buff
*skb
)
542 if (INET_ECN_is_ce(iph
->tos
)) {
543 if (skb
->protocol
== htons(ETH_P_IP
)) {
544 IP_ECN_set_ce(skb
->nh
.iph
);
545 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
546 IP6_ECN_set_ce(skb
->nh
.ipv6h
);
552 ipgre_ecn_encapsulate(u8 tos
, struct iphdr
*old_iph
, struct sk_buff
*skb
)
555 if (skb
->protocol
== htons(ETH_P_IP
))
556 inner
= old_iph
->tos
;
557 else if (skb
->protocol
== htons(ETH_P_IPV6
))
558 inner
= ipv6_get_dsfield((struct ipv6hdr
*)old_iph
);
559 return INET_ECN_encapsulate(tos
, inner
);
562 static __be16
ipgre_type_trans(struct sk_buff
*skb
, struct net_device
*dev
)
564 if (skb
->protocol
== htons(ETH_P_BRIDGE
)) {
565 if (!pskb_may_pull(skb
, ETH_HLEN
))
567 return eth_type_trans(skb
, dev
);
569 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
570 else if (skb
->protocol
== htons(LLC_SAP_BSPAN
)) {
571 br_stp_rcv_raw(skb
, dev
);
579 static int ipgre_rcv(struct sk_buff
*skb
)
587 struct ip_tunnel
*tunnel
;
590 if (!pskb_may_pull(skb
, 16))
597 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_ROUTING
|GRE_SEQ
|GRE_VERSION
)) {
598 /* - Version must be 0.
599 - We do not support routing headers.
601 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
604 if (flags
&GRE_CSUM
) {
605 switch (skb
->ip_summed
) {
606 case CHECKSUM_COMPLETE
:
607 csum
= (u16
)csum_fold(skb
->csum
);
613 csum
= __skb_checksum_complete(skb
);
614 skb
->ip_summed
= CHECKSUM_COMPLETE
;
619 key
= *(u32
*)(h
+ offset
);
623 seqno
= ntohl(*(u32
*)(h
+ offset
));
628 read_lock(&ipgre_lock
);
629 if ((tunnel
= ipgre_tunnel_lookup(iph
->saddr
, iph
->daddr
, key
)) != NULL
) {
632 skb
->protocol
= *(u16
*)(h
+ 2);
633 /* WCCP version 1 and 2 protocol decoding.
634 * - Change protocol to IP
635 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
638 skb
->protocol
== htons(ETH_P_WCCP
)) {
639 skb
->protocol
= htons(ETH_P_IP
);
640 if ((*(h
+ offset
) & 0xF0) != 0x40)
644 skb
->mac
.raw
= skb
->nh
.raw
;
645 skb
->nh
.raw
= __pskb_pull(skb
, offset
);
646 skb_postpull_rcsum(skb
, skb
->h
.raw
, offset
);
647 skb
->pkt_type
= PACKET_HOST
;
648 #ifdef CONFIG_NET_IPGRE_BROADCAST
649 if (MULTICAST(iph
->daddr
)) {
650 /* Looped back packet, drop it! */
651 if (((struct rtable
*)skb
->dst
)->fl
.iif
== 0)
653 tunnel
->stat
.multicast
++;
654 skb
->pkt_type
= PACKET_BROADCAST
;
658 if (((flags
&GRE_CSUM
) && csum
) ||
659 (!(flags
&GRE_CSUM
) && tunnel
->parms
.i_flags
&GRE_CSUM
)) {
660 tunnel
->stat
.rx_crc_errors
++;
661 tunnel
->stat
.rx_errors
++;
664 if (tunnel
->parms
.i_flags
&GRE_SEQ
) {
665 if (!(flags
&GRE_SEQ
) ||
666 (tunnel
->i_seqno
&& (s32
)(seqno
- tunnel
->i_seqno
) < 0)) {
667 tunnel
->stat
.rx_fifo_errors
++;
668 tunnel
->stat
.rx_errors
++;
671 tunnel
->i_seqno
= seqno
+ 1;
673 if (tunnel
->dev
->type
== ARPHRD_ETHER
) {
674 skb
->protocol
= ipgre_type_trans(skb
, tunnel
->dev
);
675 if (!skb
->protocol
) {
676 tunnel
->stat
.rx_errors
++;
680 tunnel
->stat
.rx_packets
++;
681 tunnel
->stat
.rx_bytes
+= skb
->len
;
682 skb
->dev
= tunnel
->dev
;
683 dst_release(skb
->dst
);
686 ipgre_ecn_decapsulate(iph
, skb
);
688 read_unlock(&ipgre_lock
);
691 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
694 read_unlock(&ipgre_lock
);
700 static int ipgre_tunnel_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
702 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
703 struct net_device_stats
*stats
= &tunnel
->stat
;
704 struct iphdr
*old_iph
= skb
->nh
.iph
;
708 struct rtable
*rt
; /* Route to the other host */
709 struct net_device
*tdev
; /* Device to other host */
710 struct iphdr
*iph
; /* Our new IP header */
711 int max_headroom
; /* The extra header space needed */
717 if (tunnel
->recursion
++) {
718 tunnel
->stat
.collisions
++;
722 if (dev
->type
== ARPHRD_ETHER
) {
723 skb
->protocol
= htons(ETH_P_BRIDGE
);
724 gre_hlen
= tunnel
->hlen
- ETH_HLEN
;
725 push_hlen
= gre_hlen
;
726 tiph
= &tunnel
->parms
.iph
;
727 } else if (dev
->hard_header
) {
728 gre_hlen
= tunnel
->hlen
;
730 tiph
= (struct iphdr
*)skb
->data
;
732 gre_hlen
= tunnel
->hlen
;
733 push_hlen
= gre_hlen
;
734 tiph
= &tunnel
->parms
.iph
;
737 if ((dst
= tiph
->daddr
) == 0) {
740 if (skb
->dst
== NULL
) {
741 tunnel
->stat
.tx_fifo_errors
++;
745 if (skb
->protocol
== htons(ETH_P_IP
)) {
746 rt
= (struct rtable
*)skb
->dst
;
747 if ((dst
= rt
->rt_gateway
) == 0)
751 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
752 struct in6_addr
*addr6
;
754 struct neighbour
*neigh
= skb
->dst
->neighbour
;
759 addr6
= (struct in6_addr
*)&neigh
->primary_key
;
760 addr_type
= ipv6_addr_type(addr6
);
762 if (addr_type
== IPV6_ADDR_ANY
) {
763 addr6
= &skb
->nh
.ipv6h
->daddr
;
764 addr_type
= ipv6_addr_type(addr6
);
767 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
770 dst
= addr6
->s6_addr32
[3];
779 if (skb
->protocol
== htons(ETH_P_IP
))
785 struct flowi fl
= { .oif
= tunnel
->parms
.link
,
788 .saddr
= tiph
->saddr
,
789 .tos
= RT_TOS(tos
) } },
790 .proto
= IPPROTO_GRE
};
791 if (ip_route_output_key(&rt
, &fl
)) {
792 tunnel
->stat
.tx_carrier_errors
++;
796 tdev
= rt
->u
.dst
.dev
;
800 tunnel
->stat
.collisions
++;
806 mtu
= dst_mtu(&rt
->u
.dst
) - tunnel
->hlen
;
808 mtu
= skb
->dst
? dst_mtu(skb
->dst
) : dev
->mtu
;
811 skb
->dst
->ops
->update_pmtu(skb
->dst
, mtu
);
813 if (skb
->protocol
== htons(ETH_P_IP
)) {
814 df
|= (old_iph
->frag_off
&htons(IP_DF
));
816 if ((old_iph
->frag_off
&htons(IP_DF
)) &&
817 mtu
< ntohs(old_iph
->tot_len
)) {
818 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
824 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
825 struct rt6_info
*rt6
= (struct rt6_info
*)skb
->dst
;
827 if (rt6
&& mtu
< dst_mtu(skb
->dst
) && mtu
>= IPV6_MIN_MTU
) {
828 if ((tunnel
->parms
.iph
.daddr
&& !MULTICAST(tunnel
->parms
.iph
.daddr
)) ||
829 rt6
->rt6i_dst
.plen
== 128) {
830 rt6
->rt6i_flags
|= RTF_MODIFIED
;
831 skb
->dst
->metrics
[RTAX_MTU
-1] = mtu
;
835 if (mtu
>= IPV6_MIN_MTU
&&
836 mtu
< skb
->len
- tunnel
->hlen
+ push_hlen
) {
837 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
, dev
);
844 if (tunnel
->err_count
> 0) {
845 if (jiffies
- tunnel
->err_time
< IPTUNNEL_ERR_TIMEO
) {
848 dst_link_failure(skb
);
850 tunnel
->err_count
= 0;
853 max_headroom
= LL_RESERVED_SPACE(tdev
) + push_hlen
;
855 if (skb_headroom(skb
) < max_headroom
|| skb_cloned(skb
) || skb_shared(skb
)) {
856 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
865 skb_set_owner_w(new_skb
, skb
->sk
);
868 old_iph
= skb
->nh
.iph
;
871 skb
->h
.raw
= skb
->nh
.raw
;
872 skb
->nh
.raw
= skb_push(skb
, push_hlen
);
873 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
874 IPCB(skb
)->flags
&= ~(IPSKB_XFRM_TUNNEL_SIZE
| IPSKB_XFRM_TRANSFORMED
|
876 dst_release(skb
->dst
);
877 skb
->dst
= &rt
->u
.dst
;
880 * Push down and install the IPIP header.
885 iph
->ihl
= sizeof(struct iphdr
) >> 2;
887 iph
->protocol
= IPPROTO_GRE
;
888 iph
->tos
= ipgre_ecn_encapsulate(tos
, old_iph
, skb
);
889 iph
->daddr
= rt
->rt_dst
;
890 iph
->saddr
= rt
->rt_src
;
892 if ((iph
->ttl
= tiph
->ttl
) == 0) {
893 if (skb
->protocol
== htons(ETH_P_IP
))
894 iph
->ttl
= old_iph
->ttl
;
896 else if (skb
->protocol
== htons(ETH_P_IPV6
))
897 iph
->ttl
= ((struct ipv6hdr
*)old_iph
)->hop_limit
;
900 iph
->ttl
= dst_metric(&rt
->u
.dst
, RTAX_HOPLIMIT
);
903 ((u16
*)(iph
+1))[0] = tunnel
->parms
.o_flags
;
904 ((u16
*)(iph
+1))[1] = skb
->protocol
;
906 if (tunnel
->parms
.o_flags
&(GRE_KEY
|GRE_CSUM
|GRE_SEQ
)) {
907 u32
*ptr
= (u32
*)(((u8
*)iph
) + gre_hlen
- 4);
909 if (tunnel
->parms
.o_flags
&GRE_SEQ
) {
911 *ptr
= htonl(tunnel
->o_seqno
);
914 if (tunnel
->parms
.o_flags
&GRE_KEY
) {
915 *ptr
= tunnel
->parms
.o_key
;
918 if (tunnel
->parms
.o_flags
&GRE_CSUM
) {
920 *(__u16
*)ptr
= ip_compute_csum((void*)(iph
+1), skb
->len
- sizeof(struct iphdr
));
931 dst_link_failure(skb
);
941 ipgre_tunnel_ioctl (struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
944 struct ip_tunnel_parm p
;
950 if (dev
== ipgre_fb_tunnel_dev
) {
951 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
955 t
= ipgre_tunnel_locate(&p
, 0);
958 t
= netdev_priv(dev
);
959 memcpy(&p
, &t
->parms
, sizeof(p
));
960 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
967 if (!capable(CAP_NET_ADMIN
))
971 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
975 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
976 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&htons(~IP_DF
)) ||
977 ((p
.i_flags
|p
.o_flags
)&(GRE_VERSION
|GRE_ROUTING
)))
979 if (p
.iph
.id
!= 0 && p
.iph
.id
!= htons(ETH_P_BRIDGE
))
982 p
.iph
.frag_off
|= htons(IP_DF
);
984 if (!(p
.i_flags
&GRE_KEY
))
986 if (!(p
.o_flags
&GRE_KEY
))
989 t
= ipgre_tunnel_locate(&p
, cmd
== SIOCADDTUNNEL
);
991 if (dev
!= ipgre_fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
) {
1000 t
= netdev_priv(dev
);
1002 if (t
->dev
->type
== ARPHRD_ETHER
)
1003 nflags
= IFF_BROADCAST
;
1004 else if (MULTICAST(p
.iph
.daddr
))
1005 nflags
= IFF_BROADCAST
;
1006 else if (p
.iph
.daddr
)
1007 nflags
= IFF_POINTOPOINT
;
1009 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
1013 ipgre_tunnel_unlink(t
);
1014 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
1015 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
1016 t
->parms
.i_key
= p
.i_key
;
1017 t
->parms
.o_key
= p
.o_key
;
1018 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
1019 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
1020 ipgre_tunnel_link(t
);
1021 netdev_state_change(dev
);
1027 if (cmd
== SIOCCHGTUNNEL
) {
1028 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
1029 t
->parms
.iph
.tos
= p
.iph
.tos
;
1030 t
->parms
.iph
.frag_off
= p
.iph
.frag_off
;
1032 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
1035 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
1040 if (!capable(CAP_NET_ADMIN
))
1043 if (dev
== ipgre_fb_tunnel_dev
) {
1045 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
1048 if ((t
= ipgre_tunnel_locate(&p
, 0)) == NULL
)
1051 if (t
== netdev_priv(ipgre_fb_tunnel_dev
))
1055 err
= unregister_netdevice(dev
);
1066 static struct net_device_stats
*ipgre_tunnel_get_stats(struct net_device
*dev
)
1068 return &(((struct ip_tunnel
*)netdev_priv(dev
))->stat
);
1071 static int ipgre_tunnel_change_mtu(struct net_device
*dev
, int new_mtu
)
1073 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1074 if (new_mtu
< 68 || new_mtu
> 0xFFF8 - tunnel
->hlen
)
1080 #ifdef CONFIG_NET_IPGRE_BROADCAST
1081 /* Nice toy. Unfortunately, useless in real life :-)
1082 It allows to construct virtual multiprotocol broadcast "LAN"
1083 over the Internet, provided multicast routing is tuned.
1086 I have no idea was this bicycle invented before me,
1087 so that I had to set ARPHRD_IPGRE to a random value.
1088 I have an impression, that Cisco could make something similar,
1089 but this feature is apparently missing in IOS<=11.2(8).
1091 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1092 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1094 ping -t 255 224.66.66.66
1096 If nobody answers, mbone does not work.
1098 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1099 ip addr add 10.66.66.<somewhat>/24 dev Universe
1100 ifconfig Universe up
1101 ifconfig Universe add fe80::<Your_real_addr>/10
1102 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1105 ftp fec0:6666:6666::193.233.7.65
1110 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
, unsigned short type
,
1111 void *daddr
, void *saddr
, unsigned len
)
1113 struct ip_tunnel
*t
= netdev_priv(dev
);
1114 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
, t
->hlen
);
1115 u16
*p
= (u16
*)(iph
+1);
1117 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
1118 p
[0] = t
->parms
.o_flags
;
1122 * Set the source hardware address.
1126 memcpy(&iph
->saddr
, saddr
, 4);
1129 memcpy(&iph
->daddr
, daddr
, 4);
1132 if (iph
->daddr
&& !MULTICAST(iph
->daddr
))
1138 static int ipgre_open(struct net_device
*dev
)
1140 struct ip_tunnel
*t
= netdev_priv(dev
);
1142 if (MULTICAST(t
->parms
.iph
.daddr
)) {
1143 struct flowi fl
= { .oif
= t
->parms
.link
,
1145 { .daddr
= t
->parms
.iph
.daddr
,
1146 .saddr
= t
->parms
.iph
.saddr
,
1147 .tos
= RT_TOS(t
->parms
.iph
.tos
) } },
1148 .proto
= IPPROTO_GRE
};
1150 if (ip_route_output_key(&rt
, &fl
))
1151 return -EADDRNOTAVAIL
;
1152 dev
= rt
->u
.dst
.dev
;
1154 if (__in_dev_get_rtnl(dev
) == NULL
)
1155 return -EADDRNOTAVAIL
;
1156 t
->mlink
= dev
->ifindex
;
1157 ip_mc_inc_group(__in_dev_get_rtnl(dev
), t
->parms
.iph
.daddr
);
1162 static int ipgre_close(struct net_device
*dev
)
1164 struct ip_tunnel
*t
= netdev_priv(dev
);
1165 if (MULTICAST(t
->parms
.iph
.daddr
) && t
->mlink
) {
1166 struct in_device
*in_dev
= inetdev_by_index(t
->mlink
);
1168 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
1177 static void ipgre_tunnel_setup(struct net_device
*dev
)
1179 SET_MODULE_OWNER(dev
);
1180 dev
->uninit
= ipgre_tunnel_uninit
;
1181 dev
->destructor
= free_netdev
;
1182 dev
->hard_start_xmit
= ipgre_tunnel_xmit
;
1183 dev
->get_stats
= ipgre_tunnel_get_stats
;
1184 dev
->do_ioctl
= ipgre_tunnel_ioctl
;
1185 dev
->change_mtu
= ipgre_tunnel_change_mtu
;
1187 dev
->type
= ARPHRD_IPGRE
;
1188 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
) + 4;
1189 dev
->mtu
= ETH_DATA_LEN
- sizeof(struct iphdr
) - 4;
1190 dev
->flags
= IFF_NOARP
;
1195 static void ipgre_ether_tunnel_setup(struct net_device
*dev
)
1199 SET_MODULE_OWNER(dev
);
1200 dev
->uninit
= ipgre_tunnel_uninit
;
1201 dev
->destructor
= free_netdev
;
1202 dev
->hard_start_xmit
= ipgre_tunnel_xmit
;
1203 dev
->get_stats
= ipgre_tunnel_get_stats
;
1204 dev
->do_ioctl
= ipgre_tunnel_ioctl
;
1207 static int ipgre_tunnel_init(struct net_device
*dev
)
1209 struct net_device
*tdev
= NULL
;
1210 struct ip_tunnel
*tunnel
;
1212 int hlen
= LL_MAX_HEADER
;
1213 int mtu
= ETH_DATA_LEN
;
1214 int addend
= sizeof(struct iphdr
) + 4;
1216 tunnel
= netdev_priv(dev
);
1217 iph
= &tunnel
->parms
.iph
;
1220 strcpy(tunnel
->parms
.name
, dev
->name
);
1222 if (dev
->type
== ARPHRD_ETHER
)
1223 random_ether_addr(dev
->dev_addr
);
1225 memcpy(dev
->dev_addr
, &tunnel
->parms
.iph
.saddr
, 4);
1226 memcpy(dev
->broadcast
, &tunnel
->parms
.iph
.daddr
, 4);
1229 if (dev
->type
== ARPHRD_ETHER
)
1230 dev
->flags
|= IFF_BROADCAST
;
1231 #ifdef CONFIG_NET_IPGRE_BROADCAST
1232 else if (MULTICAST(iph
->daddr
)) {
1235 dev
->flags
= IFF_BROADCAST
;
1236 dev
->hard_header
= ipgre_header
;
1237 dev
->open
= ipgre_open
;
1238 dev
->stop
= ipgre_close
;
1241 else if (iph
->daddr
)
1242 dev
->flags
|= IFF_POINTOPOINT
;
1244 /* Guess output device to choose reasonable mtu and hard_header_len */
1247 struct flowi fl
= { .oif
= tunnel
->parms
.link
,
1249 { .daddr
= iph
->daddr
,
1250 .saddr
= iph
->saddr
,
1251 .tos
= RT_TOS(iph
->tos
) } },
1252 .proto
= IPPROTO_GRE
};
1254 if (!ip_route_output_key(&rt
, &fl
)) {
1255 tdev
= rt
->u
.dst
.dev
;
1260 if (!tdev
&& tunnel
->parms
.link
)
1261 tdev
= __dev_get_by_index(tunnel
->parms
.link
);
1264 hlen
= tdev
->hard_header_len
;
1267 dev
->iflink
= tunnel
->parms
.link
;
1269 /* Precalculate GRE options length */
1270 if (tunnel
->parms
.o_flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
)) {
1271 if (tunnel
->parms
.o_flags
&GRE_CSUM
)
1273 if (tunnel
->parms
.o_flags
&GRE_KEY
)
1275 if (tunnel
->parms
.o_flags
&GRE_SEQ
)
1278 if (dev
->type
== ARPHRD_ETHER
)
1280 dev
->hard_header_len
= hlen
+ addend
;
1281 dev
->mtu
= mtu
- addend
;
1282 tunnel
->hlen
= addend
;
1286 static int __init
ipgre_fb_tunnel_init(struct net_device
*dev
)
1288 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1289 struct iphdr
*iph
= &tunnel
->parms
.iph
;
1292 strcpy(tunnel
->parms
.name
, dev
->name
);
1295 iph
->protocol
= IPPROTO_GRE
;
1297 tunnel
->hlen
= sizeof(struct iphdr
) + 4;
1300 tunnels_wc
[0] = tunnel
;
1305 static struct net_protocol ipgre_protocol
= {
1306 .handler
= ipgre_rcv
,
1307 .err_handler
= ipgre_err
,
1312 * And now the modules code and kernel interface.
1315 static int __init
ipgre_init(void)
1319 printk(KERN_INFO
"GRE over IPv4 tunneling driver\n");
1321 if (inet_add_protocol(&ipgre_protocol
, IPPROTO_GRE
) < 0) {
1322 printk(KERN_INFO
"ipgre init: can't add protocol\n");
1326 ipgre_fb_tunnel_dev
= alloc_netdev(sizeof(struct ip_tunnel
), "gre0",
1327 ipgre_tunnel_setup
);
1328 if (!ipgre_fb_tunnel_dev
) {
1333 ipgre_fb_tunnel_dev
->init
= ipgre_fb_tunnel_init
;
1335 if ((err
= register_netdev(ipgre_fb_tunnel_dev
)))
1340 free_netdev(ipgre_fb_tunnel_dev
);
1342 inet_del_protocol(&ipgre_protocol
, IPPROTO_GRE
);
1346 static void __exit
ipgre_destroy_tunnels(void)
1350 for (prio
= 0; prio
< 4; prio
++) {
1352 for (h
= 0; h
< HASH_SIZE
; h
++) {
1353 struct ip_tunnel
*t
;
1354 while ((t
= tunnels
[prio
][h
]) != NULL
)
1355 unregister_netdevice(t
->dev
);
1360 static void __exit
ipgre_fini(void)
1362 if (inet_del_protocol(&ipgre_protocol
, IPPROTO_GRE
) < 0)
1363 printk(KERN_INFO
"ipgre close: can't remove protocol\n");
1366 ipgre_destroy_tunnels();
1370 module_init(ipgre_init
);
1371 module_exit(ipgre_fini
);
1372 MODULE_LICENSE("GPL");