x86: align DirectMap in /proc/meminfo
[linux-2.6/mini2440.git] / net / ipv4 / ip_gre.c
blob85c487b8572b726a365e869a92e8ad00669d76b7
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
54 Problems & solutions
55 --------------------
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: t->recursion lock breaks dead loops. It looks
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
119 Alexey Kuznetsov.
122 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
123 static int ipgre_tunnel_init(struct net_device *dev);
124 static void ipgre_tunnel_setup(struct net_device *dev);
125 static int ipgre_tunnel_bind_dev(struct net_device *dev);
127 /* Fallback tunnel: no source, no destination, no key, no options */
129 static int ipgre_fb_tunnel_init(struct net_device *dev);
131 #define HASH_SIZE 16
133 static int ipgre_net_id;
134 struct ipgre_net {
135 struct ip_tunnel *tunnels[4][HASH_SIZE];
137 struct net_device *fb_tunnel_dev;
140 /* Tunnel hash table */
143 4 hash tables:
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
158 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
160 #define tunnels_r_l tunnels[3]
161 #define tunnels_r tunnels[2]
162 #define tunnels_l tunnels[1]
163 #define tunnels_wc tunnels[0]
165 static DEFINE_RWLOCK(ipgre_lock);
167 /* Given src, dst and key, find appropriate for input tunnel. */
169 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
176 struct ip_tunnel *t2 = NULL;
177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
193 if (remote == t->parms.iph.daddr) {
194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
204 if (local == t->parms.iph.saddr ||
205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
225 if (t2)
226 return t2;
228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
230 return NULL;
233 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
239 unsigned h = HASH(key);
240 int prio = 0;
242 if (local)
243 prio |= 1;
244 if (remote && !ipv4_is_multicast(remote)) {
245 prio |= 2;
246 h ^= HASH(remote);
249 return &ign->tunnels[prio][h];
252 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
255 return __ipgre_bucket(ign, &t->parms);
258 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
268 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
270 struct ip_tunnel **tp;
272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
282 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
299 return t;
302 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
305 struct ip_tunnel *t, *nt;
306 struct net_device *dev;
307 char name[IFNAMSIZ];
308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
316 else
317 sprintf(name, "gre%%d");
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
323 dev_net_set(dev, net);
325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
330 nt = netdev_priv(dev);
331 nt->parms = *parms;
332 dev->rtnl_link_ops = &ipgre_link_ops;
334 dev->mtu = ipgre_tunnel_bind_dev(dev);
336 if (register_netdevice(dev) < 0)
337 goto failed_free;
339 dev_hold(dev);
340 ipgre_tunnel_link(ign, nt);
341 return nt;
343 failed_free:
344 free_netdev(dev);
345 return NULL;
348 static void ipgre_tunnel_uninit(struct net_device *dev)
350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
354 dev_put(dev);
358 static void ipgre_err(struct sk_buff *skb, u32 info)
361 /* All the routers (except for Linux) return only
362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
374 struct iphdr *iph = (struct iphdr*)skb->data;
375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
376 int grehlen = (iph->ihl<<2) + 4;
377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
379 struct ip_tunnel *t;
380 __be16 flags;
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
416 break;
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
425 read_lock(&ipgre_lock);
426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
432 goto out;
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442 out:
443 read_unlock(&ipgre_lock);
444 return;
447 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
451 IP_ECN_set_ce(ip_hdr(skb));
452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
453 IP6_ECN_set_ce(ipv6_hdr(skb));
458 static inline u8
459 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
469 static int ipgre_rcv(struct sk_buff *skb)
471 struct iphdr *iph;
472 u8 *h;
473 __be16 flags;
474 __sum16 csum = 0;
475 __be32 key = 0;
476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
479 __be16 gre_proto;
480 unsigned int len;
482 if (!pskb_may_pull(skb, 16))
483 goto drop_nolock;
485 iph = ip_hdr(skb);
486 h = skb->data;
487 flags = *(__be16*)h;
489 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
490 /* - Version must be 0.
491 - We do not support routing headers.
493 if (flags&(GRE_VERSION|GRE_ROUTING))
494 goto drop_nolock;
496 if (flags&GRE_CSUM) {
497 switch (skb->ip_summed) {
498 case CHECKSUM_COMPLETE:
499 csum = csum_fold(skb->csum);
500 if (!csum)
501 break;
502 /* fall through */
503 case CHECKSUM_NONE:
504 skb->csum = 0;
505 csum = __skb_checksum_complete(skb);
506 skb->ip_summed = CHECKSUM_COMPLETE;
508 offset += 4;
510 if (flags&GRE_KEY) {
511 key = *(__be32*)(h + offset);
512 offset += 4;
514 if (flags&GRE_SEQ) {
515 seqno = ntohl(*(__be32*)(h + offset));
516 offset += 4;
520 gre_proto = *(__be16 *)(h + 2);
522 read_lock(&ipgre_lock);
523 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
524 iph->saddr, iph->daddr, key,
525 gre_proto))) {
526 struct net_device_stats *stats = &tunnel->dev->stats;
528 secpath_reset(skb);
530 skb->protocol = gre_proto;
531 /* WCCP version 1 and 2 protocol decoding.
532 * - Change protocol to IP
533 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
535 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
536 skb->protocol = htons(ETH_P_IP);
537 if ((*(h + offset) & 0xF0) != 0x40)
538 offset += 4;
541 skb->mac_header = skb->network_header;
542 __pskb_pull(skb, offset);
543 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
544 skb->pkt_type = PACKET_HOST;
545 #ifdef CONFIG_NET_IPGRE_BROADCAST
546 if (ipv4_is_multicast(iph->daddr)) {
547 /* Looped back packet, drop it! */
548 if (skb->rtable->fl.iif == 0)
549 goto drop;
550 stats->multicast++;
551 skb->pkt_type = PACKET_BROADCAST;
553 #endif
555 if (((flags&GRE_CSUM) && csum) ||
556 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
557 stats->rx_crc_errors++;
558 stats->rx_errors++;
559 goto drop;
561 if (tunnel->parms.i_flags&GRE_SEQ) {
562 if (!(flags&GRE_SEQ) ||
563 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
564 stats->rx_fifo_errors++;
565 stats->rx_errors++;
566 goto drop;
568 tunnel->i_seqno = seqno + 1;
571 len = skb->len;
573 /* Warning: All skb pointers will be invalidated! */
574 if (tunnel->dev->type == ARPHRD_ETHER) {
575 if (!pskb_may_pull(skb, ETH_HLEN)) {
576 stats->rx_length_errors++;
577 stats->rx_errors++;
578 goto drop;
581 iph = ip_hdr(skb);
582 skb->protocol = eth_type_trans(skb, tunnel->dev);
583 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
586 stats->rx_packets++;
587 stats->rx_bytes += len;
588 skb->dev = tunnel->dev;
589 dst_release(skb->dst);
590 skb->dst = NULL;
591 nf_reset(skb);
593 skb_reset_network_header(skb);
594 ipgre_ecn_decapsulate(iph, skb);
596 netif_rx(skb);
597 read_unlock(&ipgre_lock);
598 return(0);
600 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
602 drop:
603 read_unlock(&ipgre_lock);
604 drop_nolock:
605 kfree_skb(skb);
606 return(0);
609 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
611 struct ip_tunnel *tunnel = netdev_priv(dev);
612 struct net_device_stats *stats = &tunnel->dev->stats;
613 struct iphdr *old_iph = ip_hdr(skb);
614 struct iphdr *tiph;
615 u8 tos;
616 __be16 df;
617 struct rtable *rt; /* Route to the other host */
618 struct net_device *tdev; /* Device to other host */
619 struct iphdr *iph; /* Our new IP header */
620 unsigned int max_headroom; /* The extra header space needed */
621 int gre_hlen;
622 __be32 dst;
623 int mtu;
625 if (tunnel->recursion++) {
626 stats->collisions++;
627 goto tx_error;
630 if (dev->type == ARPHRD_ETHER)
631 IPCB(skb)->flags = 0;
633 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
634 gre_hlen = 0;
635 tiph = (struct iphdr*)skb->data;
636 } else {
637 gre_hlen = tunnel->hlen;
638 tiph = &tunnel->parms.iph;
641 if ((dst = tiph->daddr) == 0) {
642 /* NBMA tunnel */
644 if (skb->dst == NULL) {
645 stats->tx_fifo_errors++;
646 goto tx_error;
649 if (skb->protocol == htons(ETH_P_IP)) {
650 rt = skb->rtable;
651 if ((dst = rt->rt_gateway) == 0)
652 goto tx_error_icmp;
654 #ifdef CONFIG_IPV6
655 else if (skb->protocol == htons(ETH_P_IPV6)) {
656 struct in6_addr *addr6;
657 int addr_type;
658 struct neighbour *neigh = skb->dst->neighbour;
660 if (neigh == NULL)
661 goto tx_error;
663 addr6 = (struct in6_addr*)&neigh->primary_key;
664 addr_type = ipv6_addr_type(addr6);
666 if (addr_type == IPV6_ADDR_ANY) {
667 addr6 = &ipv6_hdr(skb)->daddr;
668 addr_type = ipv6_addr_type(addr6);
671 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
672 goto tx_error_icmp;
674 dst = addr6->s6_addr32[3];
676 #endif
677 else
678 goto tx_error;
681 tos = tiph->tos;
682 if (tos&1) {
683 if (skb->protocol == htons(ETH_P_IP))
684 tos = old_iph->tos;
685 tos &= ~1;
689 struct flowi fl = { .oif = tunnel->parms.link,
690 .nl_u = { .ip4_u =
691 { .daddr = dst,
692 .saddr = tiph->saddr,
693 .tos = RT_TOS(tos) } },
694 .proto = IPPROTO_GRE };
695 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
696 stats->tx_carrier_errors++;
697 goto tx_error;
700 tdev = rt->u.dst.dev;
702 if (tdev == dev) {
703 ip_rt_put(rt);
704 stats->collisions++;
705 goto tx_error;
708 df = tiph->frag_off;
709 if (df)
710 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
711 else
712 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
714 if (skb->dst)
715 skb->dst->ops->update_pmtu(skb->dst, mtu);
717 if (skb->protocol == htons(ETH_P_IP)) {
718 df |= (old_iph->frag_off&htons(IP_DF));
720 if ((old_iph->frag_off&htons(IP_DF)) &&
721 mtu < ntohs(old_iph->tot_len)) {
722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
723 ip_rt_put(rt);
724 goto tx_error;
727 #ifdef CONFIG_IPV6
728 else if (skb->protocol == htons(ETH_P_IPV6)) {
729 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
731 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
732 if ((tunnel->parms.iph.daddr &&
733 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
734 rt6->rt6i_dst.plen == 128) {
735 rt6->rt6i_flags |= RTF_MODIFIED;
736 skb->dst->metrics[RTAX_MTU-1] = mtu;
740 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
741 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
742 ip_rt_put(rt);
743 goto tx_error;
746 #endif
748 if (tunnel->err_count > 0) {
749 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
750 tunnel->err_count--;
752 dst_link_failure(skb);
753 } else
754 tunnel->err_count = 0;
757 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
759 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
760 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
761 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
762 if (!new_skb) {
763 ip_rt_put(rt);
764 stats->tx_dropped++;
765 dev_kfree_skb(skb);
766 tunnel->recursion--;
767 return 0;
769 if (skb->sk)
770 skb_set_owner_w(new_skb, skb->sk);
771 dev_kfree_skb(skb);
772 skb = new_skb;
773 old_iph = ip_hdr(skb);
776 skb_reset_transport_header(skb);
777 skb_push(skb, gre_hlen);
778 skb_reset_network_header(skb);
779 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
780 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
781 IPSKB_REROUTED);
782 dst_release(skb->dst);
783 skb->dst = &rt->u.dst;
786 * Push down and install the IPIP header.
789 iph = ip_hdr(skb);
790 iph->version = 4;
791 iph->ihl = sizeof(struct iphdr) >> 2;
792 iph->frag_off = df;
793 iph->protocol = IPPROTO_GRE;
794 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
795 iph->daddr = rt->rt_dst;
796 iph->saddr = rt->rt_src;
798 if ((iph->ttl = tiph->ttl) == 0) {
799 if (skb->protocol == htons(ETH_P_IP))
800 iph->ttl = old_iph->ttl;
801 #ifdef CONFIG_IPV6
802 else if (skb->protocol == htons(ETH_P_IPV6))
803 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
804 #endif
805 else
806 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
809 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
810 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
811 htons(ETH_P_TEB) : skb->protocol;
813 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
814 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
816 if (tunnel->parms.o_flags&GRE_SEQ) {
817 ++tunnel->o_seqno;
818 *ptr = htonl(tunnel->o_seqno);
819 ptr--;
821 if (tunnel->parms.o_flags&GRE_KEY) {
822 *ptr = tunnel->parms.o_key;
823 ptr--;
825 if (tunnel->parms.o_flags&GRE_CSUM) {
826 *ptr = 0;
827 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
831 nf_reset(skb);
833 IPTUNNEL_XMIT();
834 tunnel->recursion--;
835 return 0;
837 tx_error_icmp:
838 dst_link_failure(skb);
840 tx_error:
841 stats->tx_errors++;
842 dev_kfree_skb(skb);
843 tunnel->recursion--;
844 return 0;
847 static int ipgre_tunnel_bind_dev(struct net_device *dev)
849 struct net_device *tdev = NULL;
850 struct ip_tunnel *tunnel;
851 struct iphdr *iph;
852 int hlen = LL_MAX_HEADER;
853 int mtu = ETH_DATA_LEN;
854 int addend = sizeof(struct iphdr) + 4;
856 tunnel = netdev_priv(dev);
857 iph = &tunnel->parms.iph;
859 /* Guess output device to choose reasonable mtu and needed_headroom */
861 if (iph->daddr) {
862 struct flowi fl = { .oif = tunnel->parms.link,
863 .nl_u = { .ip4_u =
864 { .daddr = iph->daddr,
865 .saddr = iph->saddr,
866 .tos = RT_TOS(iph->tos) } },
867 .proto = IPPROTO_GRE };
868 struct rtable *rt;
869 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
870 tdev = rt->u.dst.dev;
871 ip_rt_put(rt);
874 if (dev->type != ARPHRD_ETHER)
875 dev->flags |= IFF_POINTOPOINT;
878 if (!tdev && tunnel->parms.link)
879 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
881 if (tdev) {
882 hlen = tdev->hard_header_len + tdev->needed_headroom;
883 mtu = tdev->mtu;
885 dev->iflink = tunnel->parms.link;
887 /* Precalculate GRE options length */
888 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
889 if (tunnel->parms.o_flags&GRE_CSUM)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_KEY)
892 addend += 4;
893 if (tunnel->parms.o_flags&GRE_SEQ)
894 addend += 4;
896 dev->needed_headroom = addend + hlen;
897 mtu -= dev->hard_header_len - addend;
899 if (mtu < 68)
900 mtu = 68;
902 tunnel->hlen = addend;
904 return mtu;
907 static int
908 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
910 int err = 0;
911 struct ip_tunnel_parm p;
912 struct ip_tunnel *t;
913 struct net *net = dev_net(dev);
914 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
916 switch (cmd) {
917 case SIOCGETTUNNEL:
918 t = NULL;
919 if (dev == ign->fb_tunnel_dev) {
920 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
921 err = -EFAULT;
922 break;
924 t = ipgre_tunnel_locate(net, &p, 0);
926 if (t == NULL)
927 t = netdev_priv(dev);
928 memcpy(&p, &t->parms, sizeof(p));
929 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
930 err = -EFAULT;
931 break;
933 case SIOCADDTUNNEL:
934 case SIOCCHGTUNNEL:
935 err = -EPERM;
936 if (!capable(CAP_NET_ADMIN))
937 goto done;
939 err = -EFAULT;
940 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
941 goto done;
943 err = -EINVAL;
944 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
945 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
946 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
947 goto done;
948 if (p.iph.ttl)
949 p.iph.frag_off |= htons(IP_DF);
951 if (!(p.i_flags&GRE_KEY))
952 p.i_key = 0;
953 if (!(p.o_flags&GRE_KEY))
954 p.o_key = 0;
956 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
958 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
959 if (t != NULL) {
960 if (t->dev != dev) {
961 err = -EEXIST;
962 break;
964 } else {
965 unsigned nflags=0;
967 t = netdev_priv(dev);
969 if (ipv4_is_multicast(p.iph.daddr))
970 nflags = IFF_BROADCAST;
971 else if (p.iph.daddr)
972 nflags = IFF_POINTOPOINT;
974 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
975 err = -EINVAL;
976 break;
978 ipgre_tunnel_unlink(ign, t);
979 t->parms.iph.saddr = p.iph.saddr;
980 t->parms.iph.daddr = p.iph.daddr;
981 t->parms.i_key = p.i_key;
982 t->parms.o_key = p.o_key;
983 memcpy(dev->dev_addr, &p.iph.saddr, 4);
984 memcpy(dev->broadcast, &p.iph.daddr, 4);
985 ipgre_tunnel_link(ign, t);
986 netdev_state_change(dev);
990 if (t) {
991 err = 0;
992 if (cmd == SIOCCHGTUNNEL) {
993 t->parms.iph.ttl = p.iph.ttl;
994 t->parms.iph.tos = p.iph.tos;
995 t->parms.iph.frag_off = p.iph.frag_off;
996 if (t->parms.link != p.link) {
997 t->parms.link = p.link;
998 dev->mtu = ipgre_tunnel_bind_dev(dev);
999 netdev_state_change(dev);
1002 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1003 err = -EFAULT;
1004 } else
1005 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1006 break;
1008 case SIOCDELTUNNEL:
1009 err = -EPERM;
1010 if (!capable(CAP_NET_ADMIN))
1011 goto done;
1013 if (dev == ign->fb_tunnel_dev) {
1014 err = -EFAULT;
1015 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1016 goto done;
1017 err = -ENOENT;
1018 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1019 goto done;
1020 err = -EPERM;
1021 if (t == netdev_priv(ign->fb_tunnel_dev))
1022 goto done;
1023 dev = t->dev;
1025 unregister_netdevice(dev);
1026 err = 0;
1027 break;
1029 default:
1030 err = -EINVAL;
1033 done:
1034 return err;
1037 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1039 struct ip_tunnel *tunnel = netdev_priv(dev);
1040 if (new_mtu < 68 ||
1041 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1042 return -EINVAL;
1043 dev->mtu = new_mtu;
1044 return 0;
1047 /* Nice toy. Unfortunately, useless in real life :-)
1048 It allows to construct virtual multiprotocol broadcast "LAN"
1049 over the Internet, provided multicast routing is tuned.
1052 I have no idea was this bicycle invented before me,
1053 so that I had to set ARPHRD_IPGRE to a random value.
1054 I have an impression, that Cisco could make something similar,
1055 but this feature is apparently missing in IOS<=11.2(8).
1057 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1058 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1060 ping -t 255 224.66.66.66
1062 If nobody answers, mbone does not work.
1064 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1065 ip addr add 10.66.66.<somewhat>/24 dev Universe
1066 ifconfig Universe up
1067 ifconfig Universe add fe80::<Your_real_addr>/10
1068 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1069 ftp 10.66.66.66
1071 ftp fec0:6666:6666::193.233.7.65
1076 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1077 unsigned short type,
1078 const void *daddr, const void *saddr, unsigned len)
1080 struct ip_tunnel *t = netdev_priv(dev);
1081 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1082 __be16 *p = (__be16*)(iph+1);
1084 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1085 p[0] = t->parms.o_flags;
1086 p[1] = htons(type);
1089 * Set the source hardware address.
1092 if (saddr)
1093 memcpy(&iph->saddr, saddr, 4);
1095 if (daddr) {
1096 memcpy(&iph->daddr, daddr, 4);
1097 return t->hlen;
1099 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1100 return t->hlen;
1102 return -t->hlen;
1105 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1107 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1108 memcpy(haddr, &iph->saddr, 4);
1109 return 4;
1112 static const struct header_ops ipgre_header_ops = {
1113 .create = ipgre_header,
1114 .parse = ipgre_header_parse,
1117 #ifdef CONFIG_NET_IPGRE_BROADCAST
1118 static int ipgre_open(struct net_device *dev)
1120 struct ip_tunnel *t = netdev_priv(dev);
1122 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1123 struct flowi fl = { .oif = t->parms.link,
1124 .nl_u = { .ip4_u =
1125 { .daddr = t->parms.iph.daddr,
1126 .saddr = t->parms.iph.saddr,
1127 .tos = RT_TOS(t->parms.iph.tos) } },
1128 .proto = IPPROTO_GRE };
1129 struct rtable *rt;
1130 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1131 return -EADDRNOTAVAIL;
1132 dev = rt->u.dst.dev;
1133 ip_rt_put(rt);
1134 if (__in_dev_get_rtnl(dev) == NULL)
1135 return -EADDRNOTAVAIL;
1136 t->mlink = dev->ifindex;
1137 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1139 return 0;
1142 static int ipgre_close(struct net_device *dev)
1144 struct ip_tunnel *t = netdev_priv(dev);
1145 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1146 struct in_device *in_dev;
1147 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1148 if (in_dev) {
1149 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1150 in_dev_put(in_dev);
1153 return 0;
1156 #endif
1158 static void ipgre_tunnel_setup(struct net_device *dev)
1160 dev->init = ipgre_tunnel_init;
1161 dev->uninit = ipgre_tunnel_uninit;
1162 dev->destructor = free_netdev;
1163 dev->hard_start_xmit = ipgre_tunnel_xmit;
1164 dev->do_ioctl = ipgre_tunnel_ioctl;
1165 dev->change_mtu = ipgre_tunnel_change_mtu;
1167 dev->type = ARPHRD_IPGRE;
1168 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1169 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1170 dev->flags = IFF_NOARP;
1171 dev->iflink = 0;
1172 dev->addr_len = 4;
1173 dev->features |= NETIF_F_NETNS_LOCAL;
1176 static int ipgre_tunnel_init(struct net_device *dev)
1178 struct ip_tunnel *tunnel;
1179 struct iphdr *iph;
1181 tunnel = netdev_priv(dev);
1182 iph = &tunnel->parms.iph;
1184 tunnel->dev = dev;
1185 strcpy(tunnel->parms.name, dev->name);
1187 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1188 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1190 if (iph->daddr) {
1191 #ifdef CONFIG_NET_IPGRE_BROADCAST
1192 if (ipv4_is_multicast(iph->daddr)) {
1193 if (!iph->saddr)
1194 return -EINVAL;
1195 dev->flags = IFF_BROADCAST;
1196 dev->header_ops = &ipgre_header_ops;
1197 dev->open = ipgre_open;
1198 dev->stop = ipgre_close;
1200 #endif
1201 } else
1202 dev->header_ops = &ipgre_header_ops;
1204 return 0;
1207 static int ipgre_fb_tunnel_init(struct net_device *dev)
1209 struct ip_tunnel *tunnel = netdev_priv(dev);
1210 struct iphdr *iph = &tunnel->parms.iph;
1211 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1213 tunnel->dev = dev;
1214 strcpy(tunnel->parms.name, dev->name);
1216 iph->version = 4;
1217 iph->protocol = IPPROTO_GRE;
1218 iph->ihl = 5;
1219 tunnel->hlen = sizeof(struct iphdr) + 4;
1221 dev_hold(dev);
1222 ign->tunnels_wc[0] = tunnel;
1223 return 0;
1227 static struct net_protocol ipgre_protocol = {
1228 .handler = ipgre_rcv,
1229 .err_handler = ipgre_err,
1230 .netns_ok = 1,
1233 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1235 int prio;
1237 for (prio = 0; prio < 4; prio++) {
1238 int h;
1239 for (h = 0; h < HASH_SIZE; h++) {
1240 struct ip_tunnel *t;
1241 while ((t = ign->tunnels[prio][h]) != NULL)
1242 unregister_netdevice(t->dev);
1247 static int ipgre_init_net(struct net *net)
1249 int err;
1250 struct ipgre_net *ign;
1252 err = -ENOMEM;
1253 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1254 if (ign == NULL)
1255 goto err_alloc;
1257 err = net_assign_generic(net, ipgre_net_id, ign);
1258 if (err < 0)
1259 goto err_assign;
1261 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1262 ipgre_tunnel_setup);
1263 if (!ign->fb_tunnel_dev) {
1264 err = -ENOMEM;
1265 goto err_alloc_dev;
1268 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1269 dev_net_set(ign->fb_tunnel_dev, net);
1270 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1272 if ((err = register_netdev(ign->fb_tunnel_dev)))
1273 goto err_reg_dev;
1275 return 0;
1277 err_reg_dev:
1278 free_netdev(ign->fb_tunnel_dev);
1279 err_alloc_dev:
1280 /* nothing */
1281 err_assign:
1282 kfree(ign);
1283 err_alloc:
1284 return err;
1287 static void ipgre_exit_net(struct net *net)
1289 struct ipgre_net *ign;
1291 ign = net_generic(net, ipgre_net_id);
1292 rtnl_lock();
1293 ipgre_destroy_tunnels(ign);
1294 rtnl_unlock();
1295 kfree(ign);
1298 static struct pernet_operations ipgre_net_ops = {
1299 .init = ipgre_init_net,
1300 .exit = ipgre_exit_net,
1303 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1305 __be16 flags;
1307 if (!data)
1308 return 0;
1310 flags = 0;
1311 if (data[IFLA_GRE_IFLAGS])
1312 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1313 if (data[IFLA_GRE_OFLAGS])
1314 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1315 if (flags & (GRE_VERSION|GRE_ROUTING))
1316 return -EINVAL;
1318 return 0;
1321 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1323 __be32 daddr;
1325 if (tb[IFLA_ADDRESS]) {
1326 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1327 return -EINVAL;
1328 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1329 return -EADDRNOTAVAIL;
1332 if (!data)
1333 goto out;
1335 if (data[IFLA_GRE_REMOTE]) {
1336 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1337 if (!daddr)
1338 return -EINVAL;
1341 out:
1342 return ipgre_tunnel_validate(tb, data);
1345 static void ipgre_netlink_parms(struct nlattr *data[],
1346 struct ip_tunnel_parm *parms)
1348 memset(parms, 0, sizeof(*parms));
1350 parms->iph.protocol = IPPROTO_GRE;
1352 if (!data)
1353 return;
1355 if (data[IFLA_GRE_LINK])
1356 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1358 if (data[IFLA_GRE_IFLAGS])
1359 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1361 if (data[IFLA_GRE_OFLAGS])
1362 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1364 if (data[IFLA_GRE_IKEY])
1365 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1367 if (data[IFLA_GRE_OKEY])
1368 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1370 if (data[IFLA_GRE_LOCAL])
1371 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1373 if (data[IFLA_GRE_REMOTE])
1374 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1376 if (data[IFLA_GRE_TTL])
1377 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1379 if (data[IFLA_GRE_TOS])
1380 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1382 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1383 parms->iph.frag_off = htons(IP_DF);
1386 static int ipgre_tap_init(struct net_device *dev)
1388 struct ip_tunnel *tunnel;
1390 tunnel = netdev_priv(dev);
1392 tunnel->dev = dev;
1393 strcpy(tunnel->parms.name, dev->name);
1395 ipgre_tunnel_bind_dev(dev);
1397 return 0;
1400 static void ipgre_tap_setup(struct net_device *dev)
1403 ether_setup(dev);
1405 dev->init = ipgre_tap_init;
1406 dev->uninit = ipgre_tunnel_uninit;
1407 dev->destructor = free_netdev;
1408 dev->hard_start_xmit = ipgre_tunnel_xmit;
1409 dev->change_mtu = ipgre_tunnel_change_mtu;
1411 dev->iflink = 0;
1412 dev->features |= NETIF_F_NETNS_LOCAL;
1415 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1416 struct nlattr *data[])
1418 struct ip_tunnel *nt;
1419 struct net *net = dev_net(dev);
1420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1421 int mtu;
1422 int err;
1424 nt = netdev_priv(dev);
1425 ipgre_netlink_parms(data, &nt->parms);
1427 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1428 return -EEXIST;
1430 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1431 random_ether_addr(dev->dev_addr);
1433 mtu = ipgre_tunnel_bind_dev(dev);
1434 if (!tb[IFLA_MTU])
1435 dev->mtu = mtu;
1437 err = register_netdevice(dev);
1438 if (err)
1439 goto out;
1441 dev_hold(dev);
1442 ipgre_tunnel_link(ign, nt);
1444 out:
1445 return err;
1448 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1449 struct nlattr *data[])
1451 struct ip_tunnel *t, *nt;
1452 struct net *net = dev_net(dev);
1453 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1454 struct ip_tunnel_parm p;
1455 int mtu;
1457 if (dev == ign->fb_tunnel_dev)
1458 return -EINVAL;
1460 nt = netdev_priv(dev);
1461 ipgre_netlink_parms(data, &p);
1463 t = ipgre_tunnel_locate(net, &p, 0);
1465 if (t) {
1466 if (t->dev != dev)
1467 return -EEXIST;
1468 } else {
1469 unsigned nflags = 0;
1471 t = nt;
1473 if (ipv4_is_multicast(p.iph.daddr))
1474 nflags = IFF_BROADCAST;
1475 else if (p.iph.daddr)
1476 nflags = IFF_POINTOPOINT;
1478 if ((dev->flags ^ nflags) &
1479 (IFF_POINTOPOINT | IFF_BROADCAST))
1480 return -EINVAL;
1482 ipgre_tunnel_unlink(ign, t);
1483 t->parms.iph.saddr = p.iph.saddr;
1484 t->parms.iph.daddr = p.iph.daddr;
1485 t->parms.i_key = p.i_key;
1486 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1487 memcpy(dev->broadcast, &p.iph.daddr, 4);
1488 ipgre_tunnel_link(ign, t);
1489 netdev_state_change(dev);
1492 t->parms.o_key = p.o_key;
1493 t->parms.iph.ttl = p.iph.ttl;
1494 t->parms.iph.tos = p.iph.tos;
1495 t->parms.iph.frag_off = p.iph.frag_off;
1497 if (t->parms.link != p.link) {
1498 t->parms.link = p.link;
1499 mtu = ipgre_tunnel_bind_dev(dev);
1500 if (!tb[IFLA_MTU])
1501 dev->mtu = mtu;
1502 netdev_state_change(dev);
1505 return 0;
1508 static size_t ipgre_get_size(const struct net_device *dev)
1510 return
1511 /* IFLA_GRE_LINK */
1512 nla_total_size(4) +
1513 /* IFLA_GRE_IFLAGS */
1514 nla_total_size(2) +
1515 /* IFLA_GRE_OFLAGS */
1516 nla_total_size(2) +
1517 /* IFLA_GRE_IKEY */
1518 nla_total_size(4) +
1519 /* IFLA_GRE_OKEY */
1520 nla_total_size(4) +
1521 /* IFLA_GRE_LOCAL */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_REMOTE */
1524 nla_total_size(4) +
1525 /* IFLA_GRE_TTL */
1526 nla_total_size(1) +
1527 /* IFLA_GRE_TOS */
1528 nla_total_size(1) +
1529 /* IFLA_GRE_PMTUDISC */
1530 nla_total_size(1) +
1534 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1536 struct ip_tunnel *t = netdev_priv(dev);
1537 struct ip_tunnel_parm *p = &t->parms;
1539 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1540 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1541 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1542 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1543 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1544 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1545 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1546 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1547 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1548 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1550 return 0;
1552 nla_put_failure:
1553 return -EMSGSIZE;
1556 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1557 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1558 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1559 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1560 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1561 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1562 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1563 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1564 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1565 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1566 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1569 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1570 .kind = "gre",
1571 .maxtype = IFLA_GRE_MAX,
1572 .policy = ipgre_policy,
1573 .priv_size = sizeof(struct ip_tunnel),
1574 .setup = ipgre_tunnel_setup,
1575 .validate = ipgre_tunnel_validate,
1576 .newlink = ipgre_newlink,
1577 .changelink = ipgre_changelink,
1578 .get_size = ipgre_get_size,
1579 .fill_info = ipgre_fill_info,
1582 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1583 .kind = "gretap",
1584 .maxtype = IFLA_GRE_MAX,
1585 .policy = ipgre_policy,
1586 .priv_size = sizeof(struct ip_tunnel),
1587 .setup = ipgre_tap_setup,
1588 .validate = ipgre_tap_validate,
1589 .newlink = ipgre_newlink,
1590 .changelink = ipgre_changelink,
1591 .get_size = ipgre_get_size,
1592 .fill_info = ipgre_fill_info,
1596 * And now the modules code and kernel interface.
1599 static int __init ipgre_init(void)
1601 int err;
1603 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1605 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1606 printk(KERN_INFO "ipgre init: can't add protocol\n");
1607 return -EAGAIN;
1610 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1611 if (err < 0)
1612 goto gen_device_failed;
1614 err = rtnl_link_register(&ipgre_link_ops);
1615 if (err < 0)
1616 goto rtnl_link_failed;
1618 err = rtnl_link_register(&ipgre_tap_ops);
1619 if (err < 0)
1620 goto tap_ops_failed;
1622 out:
1623 return err;
1625 tap_ops_failed:
1626 rtnl_link_unregister(&ipgre_link_ops);
1627 rtnl_link_failed:
1628 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1629 gen_device_failed:
1630 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1631 goto out;
1634 static void __exit ipgre_fini(void)
1636 rtnl_link_unregister(&ipgre_tap_ops);
1637 rtnl_link_unregister(&ipgre_link_ops);
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1640 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1643 module_init(ipgre_init);
1644 module_exit(ipgre_fini);
1645 MODULE_LICENSE("GPL");
1646 MODULE_ALIAS_RTNL_LINK("gre");
1647 MODULE_ALIAS_RTNL_LINK("gretap");