2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/if_arp.h>
25 #include <linux/mroute.h>
26 #include <linux/init.h>
27 #include <linux/in6.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
34 #include <net/protocol.h>
37 #include <net/checksum.h>
41 #include <net/ip6_fib.h>
42 #include <net/ip6_route.h>
49 1. The most important issue is detecting local dead loops.
50 They would cause complete host lockup in transmit, which
51 would be "resolved" by stack overflow or, if queueing is enabled,
52 with infinite looping in net_bh.
54 We cannot track such dead loops during route installation,
55 it is infeasible task. The most general solutions would be
56 to keep skb->encapsulation counter (sort of local ttl),
57 and silently drop packet when it expires. It is the best
58 solution, but it supposes maintaing new variable in ALL
59 skb, even if no tunneling is used.
61 Current solution: t->recursion lock breaks dead loops. It looks
62 like dev->tbusy flag, but I preferred new variable, because
63 the semantics is different. One day, when hard_start_xmit
64 will be multithreaded we will have to use skb->encapsulation.
68 2. Networking dead loops would not kill routers, but would really
69 kill network. IP hop limit plays role of "t->recursion" in this case,
70 if we copy it from packet being encapsulated to upper header.
71 It is very good solution, but it introduces two problems:
73 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
74 do not work over tunnels.
75 - traceroute does not work. I planned to relay ICMP from tunnel,
76 so that this problem would be solved and traceroute output
77 would even more informative. This idea appeared to be wrong:
78 only Linux complies to rfc1812 now (yes, guys, Linux is the only
79 true router now :-)), all routers (at least, in neighbourhood of mine)
80 return only 8 bytes of payload. It is the end.
82 Hence, if we want that OSPF worked or traceroute said something reasonable,
83 we should search for another solution.
85 One of them is to parse packet trying to detect inner encapsulation
86 made by our node. It is difficult or even impossible, especially,
87 taking into account fragmentation. TO be short, tt is not solution at all.
89 Current solution: The solution was UNEXPECTEDLY SIMPLE.
90 We force DF flag on tunnels with preconfigured hop limit,
91 that is ALL. :-) Well, it does not remove the problem completely,
92 but exponential growth of network traffic is changed to linear
93 (branches, that exceed pmtu are pruned) and tunnel mtu
94 fastly degrades to value <68, where looping stops.
95 Yes, it is not good if there exists a router in the loop,
96 which does not force DF, even when encapsulating packets have DF set.
97 But it is not our problem! Nobody could accuse us, we made
98 all that we could make. Even if it is your gated who injected
99 fatal route to network, even if it were you who configured
100 fatal static route: you are innocent. :-)
104 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
105 practically identical code. It would be good to glue them
106 together, but it is not very evident, how to make them modular.
107 sit is integral part of IPv6, ipip and gre are naturally modular.
108 We could extract common parts (hash table, ioctl etc)
109 to a separate module (ip_tunnel.c).
114 static int ipgre_tunnel_init(struct device
*dev
);
116 /* Fallback tunnel: no source, no destination, no key, no options */
118 static int ipgre_fb_tunnel_init(struct device
*dev
);
120 static struct device ipgre_fb_tunnel_dev
= {
121 NULL
, 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL
, ipgre_fb_tunnel_init
,
124 static struct ip_tunnel ipgre_fb_tunnel
= {
125 NULL
, &ipgre_fb_tunnel_dev
, {0, }, 0, 0, 0, 0, 0, 0, 0, {"gre0", }
128 /* Tunnel hash table */
138 We require exact key match i.e. if a key is present in packet
139 it will match only tunnel with the same key; if it is not present,
140 it will match only keyless tunnel.
142 All keysless packets, if not matched configured keyless tunnels
143 will match fallback tunnel.
147 #define HASH(addr) ((addr^(addr>>4))&0xF)
149 static struct ip_tunnel
*tunnels
[4][HASH_SIZE
];
151 #define tunnels_r_l (tunnels[3])
152 #define tunnels_r (tunnels[2])
153 #define tunnels_l (tunnels[1])
154 #define tunnels_wc (tunnels[0])
156 /* Given src, dst and key, find approriate for input tunnel. */
158 static struct ip_tunnel
* ipgre_tunnel_lookup(u32 remote
, u32 local
, u32 key
)
160 unsigned h0
= HASH(remote
);
161 unsigned h1
= HASH(key
);
164 for (t
= tunnels_r_l
[h0
^h1
]; t
; t
= t
->next
) {
165 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
166 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
170 for (t
= tunnels_r
[h0
^h1
]; t
; t
= t
->next
) {
171 if (remote
== t
->parms
.iph
.daddr
) {
172 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
176 for (t
= tunnels_l
[h1
]; t
; t
= t
->next
) {
177 if (local
== t
->parms
.iph
.saddr
||
178 (local
== t
->parms
.iph
.daddr
&& MULTICAST(local
))) {
179 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
183 for (t
= tunnels_wc
[h1
]; t
; t
= t
->next
) {
184 if (t
->parms
.i_key
== key
&& (t
->dev
->flags
&IFF_UP
))
187 if (ipgre_fb_tunnel_dev
.flags
&IFF_UP
)
188 return &ipgre_fb_tunnel
;
192 static struct ip_tunnel
**ipgre_bucket(struct ip_tunnel
*t
)
194 u32 remote
= t
->parms
.iph
.daddr
;
195 u32 local
= t
->parms
.iph
.saddr
;
196 u32 key
= t
->parms
.i_key
;
197 unsigned h
= HASH(key
);
202 if (remote
&& !MULTICAST(remote
)) {
207 return &tunnels
[prio
][h
];
210 static void ipgre_tunnel_link(struct ip_tunnel
*t
)
212 struct ip_tunnel
**tp
= ipgre_bucket(t
);
219 static void ipgre_tunnel_unlink(struct ip_tunnel
*t
)
221 struct ip_tunnel
**tp
;
223 for (tp
= ipgre_bucket(t
); *tp
; tp
= &(*tp
)->next
) {
232 static struct ip_tunnel
* ipgre_tunnel_locate(struct ip_tunnel_parm
*parms
, int create
)
234 u32 remote
= parms
->iph
.daddr
;
235 u32 local
= parms
->iph
.saddr
;
236 u32 key
= parms
->i_key
;
237 struct ip_tunnel
*t
, **tp
, *nt
;
239 unsigned h
= HASH(key
);
244 if (remote
&& !MULTICAST(remote
)) {
248 for (tp
= &tunnels
[prio
][h
]; (t
= *tp
) != NULL
; tp
= &t
->next
) {
249 if (local
== t
->parms
.iph
.saddr
&& remote
== t
->parms
.iph
.daddr
) {
250 if (key
== t
->parms
.i_key
)
258 dev
= kmalloc(sizeof(*dev
) + sizeof(*t
), GFP_KERNEL
);
263 memset(dev
, 0, sizeof(*dev
) + sizeof(*t
));
264 dev
->priv
= (void*)(dev
+1);
265 nt
= (struct ip_tunnel
*)dev
->priv
;
267 dev
->name
= nt
->parms
.name
;
268 dev
->init
= ipgre_tunnel_init
;
269 memcpy(&nt
->parms
, parms
, sizeof(*parms
));
270 if (dev
->name
[0] == 0) {
272 for (i
=1; i
<100; i
++) {
273 sprintf(dev
->name
, "gre%d", i
);
274 if (dev_get(dev
->name
) == NULL
)
279 memcpy(parms
->name
, dev
->name
, IFNAMSIZ
);
281 if (register_netdevice(dev
) < 0)
284 ipgre_tunnel_link(nt
);
285 /* Do not decrement MOD_USE_COUNT here. */
294 static void ipgre_tunnel_destroy(struct device
*dev
)
296 ipgre_tunnel_unlink((struct ip_tunnel
*)dev
->priv
);
298 if (dev
!= &ipgre_fb_tunnel_dev
) {
305 void ipgre_err(struct sk_buff
*skb
, unsigned char *dp
, int len
)
307 #ifndef I_WISH_WORLD_WERE_PERFECT
309 /* It is not :-( All the routers (except for Linux) return only
310 8 bytes of packet payload. It means, that precise relaying of
311 ICMP in the real Internet is absolutely infeasible.
313 Moreover, Cisco "wise men" put GRE key to the third word
314 in GRE header. It makes impossible maintaining even soft state for keyed
315 GRE tunnels with enabled checksum. Tell them "thank you".
317 Well, I wonder, rfc1812 was written by Cisco employee,
318 what the hell these idiots break standrads established
322 struct iphdr
*iph
= (struct iphdr
*)dp
;
323 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
324 int grehlen
= (iph
->ihl
<<2) + 4;
325 int type
= skb
->h
.icmph
->type
;
326 int code
= skb
->h
.icmph
->code
;
331 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
332 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
341 /* If only 8 bytes returned, keyed message will be dropped here */
347 case ICMP_PARAMETERPROB
:
350 case ICMP_DEST_UNREACH
:
353 case ICMP_PORT_UNREACH
:
354 /* Impossible event. */
356 case ICMP_FRAG_NEEDED
:
357 /* Soft state for pmtu is maintained by IP core. */
360 /* All others are translated to HOST_UNREACH.
361 rfc2003 contains "deep thoughts" about NET_UNREACH,
362 I believe they are just ether pollution. --ANK
367 case ICMP_TIME_EXCEEDED
:
368 if (code
!= ICMP_EXC_TTL
)
373 t
= ipgre_tunnel_lookup(iph
->daddr
, iph
->saddr
, (flags
&GRE_KEY
) ? *(((u32
*)p
) + (grehlen
>>2) - 1) : 0);
374 if (t
== NULL
|| t
->parms
.iph
.daddr
== 0 || MULTICAST(t
->parms
.iph
.daddr
))
377 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
380 if (jiffies
- t
->err_time
< IPTUNNEL_ERR_TIMEO
)
384 t
->err_time
= jiffies
;
387 struct iphdr
*iph
= (struct iphdr
*)dp
;
389 u16
*p
= (u16
*)(dp
+(iph
->ihl
<<2));
390 int type
= skb
->h
.icmph
->type
;
391 int code
= skb
->h
.icmph
->code
;
396 int grehlen
= (iph
->ihl
<<2) + 4;
397 struct sk_buff
*skb2
;
400 if (p
[1] != __constant_htons(ETH_P_IP
))
404 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
|GRE_ROUTING
|GRE_VERSION
)) {
405 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
414 if (len
< grehlen
+ sizeof(struct iphdr
))
416 eiph
= (struct iphdr
*)(dp
+ grehlen
);
421 case ICMP_PARAMETERPROB
:
422 if (skb
->h
.icmph
->un
.gateway
< (iph
->ihl
<<2))
425 /* So... This guy found something strange INSIDE encapsulated
426 packet. Well, he is fool, but what can we do ?
428 rel_type
= ICMP_PARAMETERPROB
;
429 rel_info
= skb
->h
.icmph
->un
.gateway
- grehlen
;
432 case ICMP_DEST_UNREACH
:
435 case ICMP_PORT_UNREACH
:
436 /* Impossible event. */
438 case ICMP_FRAG_NEEDED
:
439 /* And it is the only really necesary thing :-) */
440 rel_info
= ntohs(skb
->h
.icmph
->un
.frag
.mtu
);
441 if (rel_info
< grehlen
+68)
444 /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
445 if (rel_info
> ntohs(eiph
->tot_len
))
449 /* All others are translated to HOST_UNREACH.
450 rfc2003 contains "deep thoughts" about NET_UNREACH,
451 I believe, it is just ether pollution. --ANK
453 rel_type
= ICMP_DEST_UNREACH
;
454 rel_code
= ICMP_HOST_UNREACH
;
458 case ICMP_TIME_EXCEEDED
:
459 if (code
!= ICMP_EXC_TTL
)
464 /* Prepare fake skb to feed it to icmp_send */
465 skb2
= skb_clone(skb
, GFP_ATOMIC
);
468 dst_release(skb2
->dst
);
470 skb_pull(skb2
, skb
->data
- (u8
*)eiph
);
471 skb2
->nh
.raw
= skb2
->data
;
473 /* Try to guess incoming interface */
474 if (ip_route_output(&rt
, eiph
->saddr
, 0, RT_TOS(eiph
->tos
), 0)) {
478 skb2
->dev
= rt
->u
.dst
.dev
;
480 /* route "incoming" packet */
481 if (rt
->rt_flags
&RTCF_LOCAL
) {
484 if (ip_route_output(&rt
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, 0) ||
485 rt
->u
.dst
.dev
->type
!= ARPHRD_IPGRE
) {
492 if (ip_route_input(skb2
, eiph
->daddr
, eiph
->saddr
, eiph
->tos
, skb2
->dev
) ||
493 skb2
->dst
->dev
->type
!= ARPHRD_IPGRE
) {
499 /* change mtu on this route */
500 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
501 if (rel_info
> skb2
->dst
->pmtu
) {
505 skb2
->dst
->pmtu
= rel_info
;
506 rel_info
= htonl(rel_info
);
507 } else if (type
== ICMP_TIME_EXCEEDED
) {
508 struct ip_tunnel
*t
= (struct ip_tunnel
*)skb2
->dev
->priv
;
509 if (t
->parms
.iph
.ttl
) {
510 rel_type
= ICMP_DEST_UNREACH
;
511 rel_code
= ICMP_HOST_UNREACH
;
515 icmp_send(skb2
, rel_type
, rel_code
, rel_info
);
520 int ipgre_rcv(struct sk_buff
*skb
, unsigned short len
)
522 struct iphdr
*iph
= skb
->nh
.iph
;
524 u16 flags
= *(u16
*)h
;
528 struct ip_tunnel
*tunnel
;
531 if (flags
&(GRE_CSUM
|GRE_KEY
|GRE_ROUTING
|GRE_SEQ
|GRE_VERSION
)) {
532 /* - Version must be 0.
533 - We do not support routing headers.
535 if (flags
&(GRE_VERSION
|GRE_ROUTING
))
538 if (flags
&GRE_CSUM
) {
539 csum
= ip_compute_csum(h
, len
);
543 key
= *(u32
*)(h
+ offset
);
547 seqno
= ntohl(*(u32
*)(h
+ offset
));
552 if ((tunnel
= ipgre_tunnel_lookup(iph
->saddr
, iph
->daddr
, key
)) != NULL
) {
553 skb
->mac
.raw
= skb
->nh
.raw
;
554 skb
->nh
.raw
= skb_pull(skb
, h
+ offset
- skb
->data
);
555 memset(&(IPCB(skb
)->opt
), 0, sizeof(struct ip_options
));
557 skb
->protocol
= *(u16
*)(h
+ 2);
558 skb
->pkt_type
= PACKET_HOST
;
559 #ifdef CONFIG_NET_IPGRE_BROADCAST
560 if (MULTICAST(iph
->daddr
)) {
561 /* Looped back packet, drop it! */
562 if (((struct rtable
*)skb
->dst
)->key
.iif
== 0)
564 tunnel
->stat
.multicast
++;
565 skb
->pkt_type
= PACKET_BROADCAST
;
569 if (((flags
&GRE_CSUM
) && csum
) ||
570 (!(flags
&GRE_CSUM
) && tunnel
->parms
.i_flags
&GRE_CSUM
)) {
571 tunnel
->stat
.rx_crc_errors
++;
572 tunnel
->stat
.rx_errors
++;
575 if (tunnel
->parms
.i_flags
&GRE_SEQ
) {
576 if (!(flags
&GRE_SEQ
) ||
577 (tunnel
->i_seqno
&& (s32
)(seqno
- tunnel
->i_seqno
) < 0)) {
578 tunnel
->stat
.rx_fifo_errors
++;
579 tunnel
->stat
.rx_errors
++;
582 tunnel
->i_seqno
= seqno
+ 1;
584 tunnel
->stat
.rx_packets
++;
585 tunnel
->stat
.rx_bytes
+= skb
->len
;
586 skb
->dev
= tunnel
->dev
;
587 dst_release(skb
->dst
);
592 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PROT_UNREACH
, 0);
599 static int ipgre_tunnel_xmit(struct sk_buff
*skb
, struct device
*dev
)
601 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
602 struct net_device_stats
*stats
= &tunnel
->stat
;
603 struct iphdr
*old_iph
= skb
->nh
.iph
;
607 struct rtable
*rt
; /* Route to the other host */
608 struct device
*tdev
; /* Device to other host */
609 struct iphdr
*iph
; /* Our new IP header */
610 int max_headroom
; /* The extra header space needed */
615 if (tunnel
->recursion
++) {
616 tunnel
->stat
.collisions
++;
620 if (dev
->hard_header
) {
622 tiph
= (struct iphdr
*)skb
->data
;
624 gre_hlen
= tunnel
->hlen
;
625 tiph
= &tunnel
->parms
.iph
;
628 if ((dst
= tiph
->daddr
) == 0) {
631 if (skb
->dst
== NULL
) {
632 tunnel
->stat
.tx_fifo_errors
++;
636 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
637 rt
= (struct rtable
*)skb
->dst
;
638 if ((dst
= rt
->rt_gateway
) == 0)
642 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
643 struct in6_addr
*addr6
;
645 struct neighbour
*neigh
= skb
->dst
->neighbour
;
650 addr6
= (struct in6_addr
*)&neigh
->primary_key
;
651 addr_type
= ipv6_addr_type(addr6
);
653 if (addr_type
== IPV6_ADDR_ANY
) {
654 addr6
= &skb
->nh
.ipv6h
->daddr
;
655 addr_type
= ipv6_addr_type(addr6
);
658 if ((addr_type
& IPV6_ADDR_COMPATv4
) == 0)
661 dst
= addr6
->s6_addr32
[3];
670 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
675 if (ip_route_output(&rt
, dst
, tiph
->saddr
, RT_TOS(tos
), tunnel
->parms
.link
)) {
676 tunnel
->stat
.tx_carrier_errors
++;
679 tdev
= rt
->u
.dst
.dev
;
683 tunnel
->stat
.collisions
++;
688 mtu
= rt
->u
.dst
.pmtu
- tunnel
->hlen
;
690 if (skb
->protocol
== __constant_htons(ETH_P_IP
)) {
691 if (skb
->dst
&& mtu
< skb
->dst
->pmtu
&& mtu
>= 68)
692 skb
->dst
->pmtu
= mtu
;
694 df
|= (old_iph
->frag_off
&__constant_htons(IP_DF
));
696 if ((old_iph
->frag_off
&__constant_htons(IP_DF
)) &&
697 mtu
< ntohs(old_iph
->tot_len
)) {
698 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
, htonl(mtu
));
704 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
)) {
705 struct rt6_info
*rt6
= (struct rt6_info
*)skb
->dst
;
707 if (rt6
&& mtu
< rt6
->u
.dst
.pmtu
&& mtu
>= IPV6_MIN_MTU
) {
708 if ((tunnel
->parms
.iph
.daddr
&& !MULTICAST(tunnel
->parms
.iph
.daddr
)) ||
709 rt6
->rt6i_dst
.plen
== 128) {
710 rt6
->rt6i_flags
|= RTF_MODIFIED
;
711 skb
->dst
->pmtu
= mtu
;
715 if (mtu
>= IPV6_MIN_MTU
&& mtu
< skb
->len
- tunnel
->hlen
+ gre_hlen
) {
716 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
, dev
);
723 if (tunnel
->err_count
> 0) {
724 if (jiffies
- tunnel
->err_time
< IPTUNNEL_ERR_TIMEO
) {
727 dst_link_failure(skb
);
729 tunnel
->err_count
= 0;
732 skb
->h
.raw
= skb
->nh
.raw
;
734 max_headroom
= ((tdev
->hard_header_len
+15)&~15)+ gre_hlen
;
736 if (skb_headroom(skb
) < max_headroom
|| skb_cloned(skb
) || skb_shared(skb
)) {
737 struct sk_buff
*new_skb
= skb_realloc_headroom(skb
, max_headroom
);
746 skb_set_owner_w(new_skb
, skb
->sk
);
751 skb
->nh
.raw
= skb_push(skb
, gre_hlen
);
752 memset(&(IPCB(skb
)->opt
), 0, sizeof(IPCB(skb
)->opt
));
753 dst_release(skb
->dst
);
754 skb
->dst
= &rt
->u
.dst
;
757 * Push down and install the IPIP header.
762 iph
->ihl
= sizeof(struct iphdr
) >> 2;
764 iph
->protocol
= IPPROTO_GRE
;
766 iph
->daddr
= rt
->rt_dst
;
767 iph
->saddr
= rt
->rt_src
;
769 if ((iph
->ttl
= tiph
->ttl
) == 0) {
770 if (skb
->protocol
== __constant_htons(ETH_P_IP
))
771 iph
->ttl
= old_iph
->ttl
;
773 else if (skb
->protocol
== __constant_htons(ETH_P_IPV6
))
774 iph
->ttl
= ((struct ipv6hdr
*)old_iph
)->hop_limit
;
777 iph
->ttl
= ip_statistics
.IpDefaultTTL
;
780 ((u16
*)(iph
+1))[0] = tunnel
->parms
.o_flags
;
781 ((u16
*)(iph
+1))[1] = skb
->protocol
;
783 if (tunnel
->parms
.o_flags
&(GRE_KEY
|GRE_CSUM
|GRE_SEQ
)) {
784 u32
*ptr
= (u32
*)(((u8
*)iph
) + tunnel
->hlen
- 4);
786 if (tunnel
->parms
.o_flags
&GRE_SEQ
) {
788 *ptr
= htonl(tunnel
->o_seqno
);
791 if (tunnel
->parms
.o_flags
&GRE_KEY
) {
792 *ptr
= tunnel
->parms
.o_key
;
795 if (tunnel
->parms
.o_flags
&GRE_CSUM
) {
797 *(__u16
*)ptr
= ip_compute_csum((void*)(iph
+1), skb
->len
- sizeof(struct iphdr
));
801 iph
->tot_len
= htons(skb
->len
);
802 iph
->id
= htons(ip_id_count
++);
805 stats
->tx_bytes
+= skb
->len
;
812 dst_link_failure(skb
);
822 ipgre_tunnel_ioctl (struct device
*dev
, struct ifreq
*ifr
, int cmd
)
825 struct ip_tunnel_parm p
;
833 if (dev
== &ipgre_fb_tunnel_dev
) {
834 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
))) {
838 t
= ipgre_tunnel_locate(&p
, 0);
841 t
= (struct ip_tunnel
*)dev
->priv
;
842 memcpy(&p
, &t
->parms
, sizeof(p
));
843 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
850 if (!capable(CAP_NET_ADMIN
))
854 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
858 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
859 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&__constant_htons(~IP_DF
)) ||
860 ((p
.i_flags
|p
.o_flags
)&(GRE_VERSION
|GRE_ROUTING
)))
863 p
.iph
.frag_off
|= __constant_htons(IP_DF
);
865 if (!(p
.i_flags
&GRE_KEY
))
867 if (!(p
.o_flags
&GRE_KEY
))
870 t
= ipgre_tunnel_locate(&p
, cmd
== SIOCADDTUNNEL
);
872 if (dev
!= &ipgre_fb_tunnel_dev
&& cmd
== SIOCCHGTUNNEL
&&
873 t
!= &ipgre_fb_tunnel
) {
882 t
= (struct ip_tunnel
*)dev
->priv
;
884 if (MULTICAST(p
.iph
.daddr
))
885 nflags
= IFF_BROADCAST
;
886 else if (p
.iph
.daddr
)
887 nflags
= IFF_POINTOPOINT
;
889 if ((dev
->flags
^nflags
)&(IFF_POINTOPOINT
|IFF_BROADCAST
)) {
894 ipgre_tunnel_unlink(t
);
895 t
->parms
.iph
.saddr
= p
.iph
.saddr
;
896 t
->parms
.iph
.daddr
= p
.iph
.daddr
;
897 t
->parms
.i_key
= p
.i_key
;
898 t
->parms
.o_key
= p
.o_key
;
899 memcpy(dev
->dev_addr
, &p
.iph
.saddr
, 4);
900 memcpy(dev
->broadcast
, &p
.iph
.daddr
, 4);
901 ipgre_tunnel_link(t
);
903 netdev_state_change(dev
);
909 if (cmd
== SIOCCHGTUNNEL
) {
910 t
->parms
.iph
.ttl
= p
.iph
.ttl
;
911 t
->parms
.iph
.tos
= p
.iph
.tos
;
912 t
->parms
.iph
.frag_off
= p
.iph
.frag_off
;
914 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &t
->parms
, sizeof(p
)))
917 err
= (cmd
== SIOCADDTUNNEL
? -ENOBUFS
: -ENOENT
);
922 if (!capable(CAP_NET_ADMIN
))
925 if (dev
== &ipgre_fb_tunnel_dev
) {
927 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
930 if ((t
= ipgre_tunnel_locate(&p
, 0)) == NULL
)
933 if (t
== &ipgre_fb_tunnel
)
936 err
= unregister_netdevice(dev
);
948 static struct net_device_stats
*ipgre_tunnel_get_stats(struct device
*dev
)
950 return &(((struct ip_tunnel
*)dev
->priv
)->stat
);
953 static int ipgre_tunnel_change_mtu(struct device
*dev
, int new_mtu
)
955 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
956 if (new_mtu
< 68 || new_mtu
> 0xFFF8 - tunnel
->hlen
)
962 #ifdef CONFIG_NET_IPGRE_BROADCAST
963 /* Nice toy. Unfortunately, useless in real life :-)
964 It allows to construct virtual multiprotocol broadcast "LAN"
965 over the Internet, provided multicast routing is tuned.
968 I have no idea was this bicycle invented before me,
969 so that I had to set ARPHRD_IPGRE to a random value.
970 I have an impression, that Cisco could make something similar,
971 but this feature is apparently missing in IOS<=11.2(8).
973 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
974 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
976 ping -t 255 224.66.66.66
978 If nobody answers, mbone does not work.
980 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
981 ip addr add 10.66.66.<somewhat>/24 dev Universe
983 ifconfig Universe add fe80::<Your_real_addr>/10
984 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
987 ftp fec0:6666:6666::193.233.7.65
992 static int ipgre_header(struct sk_buff
*skb
, struct device
*dev
, unsigned short type
,
993 void *daddr
, void *saddr
, unsigned len
)
995 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
996 struct iphdr
*iph
= (struct iphdr
*)skb_push(skb
, t
->hlen
);
997 u16
*p
= (u16
*)(iph
+1);
999 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
1000 p
[0] = t
->parms
.o_flags
;
1004 * Set the source hardware address.
1008 memcpy(&iph
->saddr
, saddr
, 4);
1011 memcpy(&iph
->daddr
, daddr
, 4);
1014 if (iph
->daddr
&& !MULTICAST(iph
->daddr
))
1020 static int ipgre_open(struct device
*dev
)
1022 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1025 if (MULTICAST(t
->parms
.iph
.daddr
)) {
1027 if (ip_route_output(&rt
, t
->parms
.iph
.daddr
,
1028 t
->parms
.iph
.saddr
, RT_TOS(t
->parms
.iph
.tos
),
1031 return -EADDRNOTAVAIL
;
1033 dev
= rt
->u
.dst
.dev
;
1035 if (dev
->ip_ptr
== NULL
) {
1037 return -EADDRNOTAVAIL
;
1039 t
->mlink
= dev
->ifindex
;
1040 ip_mc_inc_group(dev
->ip_ptr
, t
->parms
.iph
.daddr
);
1045 static int ipgre_close(struct device
*dev
)
1047 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1048 if (MULTICAST(t
->parms
.iph
.daddr
) && t
->mlink
) {
1049 dev
= dev_get_by_index(t
->mlink
);
1050 if (dev
&& dev
->ip_ptr
)
1051 ip_mc_dec_group(dev
->ip_ptr
, t
->parms
.iph
.daddr
);
1059 static void ipgre_tunnel_init_gen(struct device
*dev
)
1061 struct ip_tunnel
*t
= (struct ip_tunnel
*)dev
->priv
;
1063 dev
->destructor
= ipgre_tunnel_destroy
;
1064 dev
->hard_start_xmit
= ipgre_tunnel_xmit
;
1065 dev
->get_stats
= ipgre_tunnel_get_stats
;
1066 dev
->do_ioctl
= ipgre_tunnel_ioctl
;
1067 dev
->change_mtu
= ipgre_tunnel_change_mtu
;
1069 dev_init_buffers(dev
);
1071 dev
->type
= ARPHRD_IPGRE
;
1072 dev
->hard_header_len
= LL_MAX_HEADER
+ sizeof(struct iphdr
) + 4;
1073 dev
->mtu
= 1500 - sizeof(struct iphdr
) - 4;
1074 dev
->flags
= IFF_NOARP
;
1077 memcpy(dev
->dev_addr
, &t
->parms
.iph
.saddr
, 4);
1078 memcpy(dev
->broadcast
, &t
->parms
.iph
.daddr
, 4);
1081 static int ipgre_tunnel_init(struct device
*dev
)
1083 struct device
*tdev
= NULL
;
1084 struct ip_tunnel
*tunnel
;
1086 int hlen
= LL_MAX_HEADER
;
1088 int addend
= sizeof(struct iphdr
) + 4;
1090 tunnel
= (struct ip_tunnel
*)dev
->priv
;
1091 iph
= &tunnel
->parms
.iph
;
1093 ipgre_tunnel_init_gen(dev
);
1095 /* Guess output device to choose reasonable mtu and hard_header_len */
1099 if (!ip_route_output(&rt
, iph
->daddr
, iph
->saddr
, RT_TOS(iph
->tos
), tunnel
->parms
.link
)) {
1100 tdev
= rt
->u
.dst
.dev
;
1104 dev
->flags
|= IFF_POINTOPOINT
;
1106 #ifdef CONFIG_NET_IPGRE_BROADCAST
1107 if (MULTICAST(iph
->daddr
)) {
1110 dev
->flags
= IFF_BROADCAST
;
1111 dev
->hard_header
= ipgre_header
;
1112 dev
->open
= ipgre_open
;
1113 dev
->stop
= ipgre_close
;
1118 if (!tdev
&& tunnel
->parms
.link
)
1119 tdev
= dev_get_by_index(tunnel
->parms
.link
);
1122 hlen
= tdev
->hard_header_len
;
1125 dev
->iflink
= tunnel
->parms
.link
;
1127 /* Precalculate GRE options length */
1128 if (tunnel
->parms
.o_flags
&(GRE_CSUM
|GRE_KEY
|GRE_SEQ
)) {
1129 if (tunnel
->parms
.o_flags
&GRE_CSUM
)
1131 if (tunnel
->parms
.o_flags
&GRE_KEY
)
1133 if (tunnel
->parms
.o_flags
&GRE_SEQ
)
1136 dev
->hard_header_len
= hlen
+ addend
;
1137 dev
->mtu
= mtu
- addend
;
1138 tunnel
->hlen
= addend
;
1143 static int ipgre_fb_tunnel_open(struct device
*dev
)
1149 static int ipgre_fb_tunnel_close(struct device
*dev
)
1156 __initfunc(int ipgre_fb_tunnel_init(struct device
*dev
))
1158 struct ip_tunnel
*tunnel
= (struct ip_tunnel
*)dev
->priv
;
1161 ipgre_tunnel_init_gen(dev
);
1163 dev
->open
= ipgre_fb_tunnel_open
;
1164 dev
->stop
= ipgre_fb_tunnel_close
;
1167 iph
= &ipgre_fb_tunnel
.parms
.iph
;
1169 iph
->protocol
= IPPROTO_GRE
;
1171 tunnel
->hlen
= sizeof(struct iphdr
) + 4;
1173 tunnels_wc
[0] = &ipgre_fb_tunnel
;
1178 static struct inet_protocol ipgre_protocol
= {
1179 ipgre_rcv
, /* GRE handler */
1180 ipgre_err
, /* TUNNEL error control */
1182 IPPROTO_GRE
, /* protocol ID */
1190 * And now the modules code and kernel interface.
1194 int init_module(void)
1196 __initfunc(int ipgre_init(void))
1199 printk(KERN_INFO
"GRE over IPv4 tunneling driver\n");
1201 ipgre_fb_tunnel_dev
.priv
= (void*)&ipgre_fb_tunnel
;
1202 ipgre_fb_tunnel_dev
.name
= ipgre_fb_tunnel
.parms
.name
;
1204 register_netdev(&ipgre_fb_tunnel_dev
);
1206 register_netdevice(&ipgre_fb_tunnel_dev
);
1209 inet_add_protocol(&ipgre_protocol
);
1215 void cleanup_module(void)
1217 if ( inet_del_protocol(&ipgre_protocol
) < 0 )
1218 printk(KERN_INFO
"ipgre close: can't remove protocol\n");
1220 unregister_netdev(&ipgre_fb_tunnel_dev
);