Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ryusuke...
[linux-2.6/mini2440.git] / net / ipv4 / ip_gre.c
blob1433338526248bdac71566c94e01c1bcd731f190
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
54 Problems & solutions
55 --------------------
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: HARD_TX_LOCK lock breaks dead loops.
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
116 Alexey Kuznetsov.
119 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122 static int ipgre_tunnel_bind_dev(struct net_device *dev);
124 /* Fallback tunnel: no source, no destination, no key, no options */
126 #define HASH_SIZE 16
128 static int ipgre_net_id;
129 struct ipgre_net {
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
132 struct net_device *fb_tunnel_dev;
135 /* Tunnel hash table */
138 4 hash tables:
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
153 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155 #define tunnels_r_l tunnels[3]
156 #define tunnels_r tunnels[2]
157 #define tunnels_l tunnels[1]
158 #define tunnels_wc tunnels[0]
160 static DEFINE_RWLOCK(ipgre_lock);
162 /* Given src, dst and key, find appropriate for input tunnel. */
164 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
165 __be32 remote, __be32 local,
166 __be32 key, __be16 gre_proto)
168 struct net *net = dev_net(dev);
169 int link = dev->ifindex;
170 unsigned h0 = HASH(remote);
171 unsigned h1 = HASH(key);
172 struct ip_tunnel *t, *cand = NULL;
173 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
174 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
175 ARPHRD_ETHER : ARPHRD_IPGRE;
176 int score, cand_score = 4;
178 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
179 if (local != t->parms.iph.saddr ||
180 remote != t->parms.iph.daddr ||
181 key != t->parms.i_key ||
182 !(t->dev->flags & IFF_UP))
183 continue;
185 if (t->dev->type != ARPHRD_IPGRE &&
186 t->dev->type != dev_type)
187 continue;
189 score = 0;
190 if (t->parms.link != link)
191 score |= 1;
192 if (t->dev->type != dev_type)
193 score |= 2;
194 if (score == 0)
195 return t;
197 if (score < cand_score) {
198 cand = t;
199 cand_score = score;
203 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
204 if (remote != t->parms.iph.daddr ||
205 key != t->parms.i_key ||
206 !(t->dev->flags & IFF_UP))
207 continue;
209 if (t->dev->type != ARPHRD_IPGRE &&
210 t->dev->type != dev_type)
211 continue;
213 score = 0;
214 if (t->parms.link != link)
215 score |= 1;
216 if (t->dev->type != dev_type)
217 score |= 2;
218 if (score == 0)
219 return t;
221 if (score < cand_score) {
222 cand = t;
223 cand_score = score;
227 for (t = ign->tunnels_l[h1]; t; t = t->next) {
228 if ((local != t->parms.iph.saddr &&
229 (local != t->parms.iph.daddr ||
230 !ipv4_is_multicast(local))) ||
231 key != t->parms.i_key ||
232 !(t->dev->flags & IFF_UP))
233 continue;
235 if (t->dev->type != ARPHRD_IPGRE &&
236 t->dev->type != dev_type)
237 continue;
239 score = 0;
240 if (t->parms.link != link)
241 score |= 1;
242 if (t->dev->type != dev_type)
243 score |= 2;
244 if (score == 0)
245 return t;
247 if (score < cand_score) {
248 cand = t;
249 cand_score = score;
253 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
254 if (t->parms.i_key != key ||
255 !(t->dev->flags & IFF_UP))
256 continue;
258 if (t->dev->type != ARPHRD_IPGRE &&
259 t->dev->type != dev_type)
260 continue;
262 score = 0;
263 if (t->parms.link != link)
264 score |= 1;
265 if (t->dev->type != dev_type)
266 score |= 2;
267 if (score == 0)
268 return t;
270 if (score < cand_score) {
271 cand = t;
272 cand_score = score;
276 if (cand != NULL)
277 return cand;
279 if (ign->fb_tunnel_dev->flags & IFF_UP)
280 return netdev_priv(ign->fb_tunnel_dev);
282 return NULL;
285 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
286 struct ip_tunnel_parm *parms)
288 __be32 remote = parms->iph.daddr;
289 __be32 local = parms->iph.saddr;
290 __be32 key = parms->i_key;
291 unsigned h = HASH(key);
292 int prio = 0;
294 if (local)
295 prio |= 1;
296 if (remote && !ipv4_is_multicast(remote)) {
297 prio |= 2;
298 h ^= HASH(remote);
301 return &ign->tunnels[prio][h];
304 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
305 struct ip_tunnel *t)
307 return __ipgre_bucket(ign, &t->parms);
310 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
312 struct ip_tunnel **tp = ipgre_bucket(ign, t);
314 t->next = *tp;
315 write_lock_bh(&ipgre_lock);
316 *tp = t;
317 write_unlock_bh(&ipgre_lock);
320 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
322 struct ip_tunnel **tp;
324 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
325 if (t == *tp) {
326 write_lock_bh(&ipgre_lock);
327 *tp = t->next;
328 write_unlock_bh(&ipgre_lock);
329 break;
334 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
335 struct ip_tunnel_parm *parms,
336 int type)
338 __be32 remote = parms->iph.daddr;
339 __be32 local = parms->iph.saddr;
340 __be32 key = parms->i_key;
341 int link = parms->link;
342 struct ip_tunnel *t, **tp;
343 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
345 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
346 if (local == t->parms.iph.saddr &&
347 remote == t->parms.iph.daddr &&
348 key == t->parms.i_key &&
349 link == t->parms.link &&
350 type == t->dev->type)
351 break;
353 return t;
356 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
357 struct ip_tunnel_parm *parms, int create)
359 struct ip_tunnel *t, *nt;
360 struct net_device *dev;
361 char name[IFNAMSIZ];
362 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
364 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
365 if (t || !create)
366 return t;
368 if (parms->name[0])
369 strlcpy(name, parms->name, IFNAMSIZ);
370 else
371 sprintf(name, "gre%%d");
373 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
374 if (!dev)
375 return NULL;
377 dev_net_set(dev, net);
379 if (strchr(name, '%')) {
380 if (dev_alloc_name(dev, name) < 0)
381 goto failed_free;
384 nt = netdev_priv(dev);
385 nt->parms = *parms;
386 dev->rtnl_link_ops = &ipgre_link_ops;
388 dev->mtu = ipgre_tunnel_bind_dev(dev);
390 if (register_netdevice(dev) < 0)
391 goto failed_free;
393 dev_hold(dev);
394 ipgre_tunnel_link(ign, nt);
395 return nt;
397 failed_free:
398 free_netdev(dev);
399 return NULL;
402 static void ipgre_tunnel_uninit(struct net_device *dev)
404 struct net *net = dev_net(dev);
405 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
407 ipgre_tunnel_unlink(ign, netdev_priv(dev));
408 dev_put(dev);
412 static void ipgre_err(struct sk_buff *skb, u32 info)
415 /* All the routers (except for Linux) return only
416 8 bytes of packet payload. It means, that precise relaying of
417 ICMP in the real Internet is absolutely infeasible.
419 Moreover, Cisco "wise men" put GRE key to the third word
420 in GRE header. It makes impossible maintaining even soft state for keyed
421 GRE tunnels with enabled checksum. Tell them "thank you".
423 Well, I wonder, rfc1812 was written by Cisco employee,
424 what the hell these idiots break standrads established
425 by themself???
428 struct iphdr *iph = (struct iphdr *)skb->data;
429 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
430 int grehlen = (iph->ihl<<2) + 4;
431 const int type = icmp_hdr(skb)->type;
432 const int code = icmp_hdr(skb)->code;
433 struct ip_tunnel *t;
434 __be16 flags;
436 flags = p[0];
437 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
438 if (flags&(GRE_VERSION|GRE_ROUTING))
439 return;
440 if (flags&GRE_KEY) {
441 grehlen += 4;
442 if (flags&GRE_CSUM)
443 grehlen += 4;
447 /* If only 8 bytes returned, keyed message will be dropped here */
448 if (skb_headlen(skb) < grehlen)
449 return;
451 switch (type) {
452 default:
453 case ICMP_PARAMETERPROB:
454 return;
456 case ICMP_DEST_UNREACH:
457 switch (code) {
458 case ICMP_SR_FAILED:
459 case ICMP_PORT_UNREACH:
460 /* Impossible event. */
461 return;
462 case ICMP_FRAG_NEEDED:
463 /* Soft state for pmtu is maintained by IP core. */
464 return;
465 default:
466 /* All others are translated to HOST_UNREACH.
467 rfc2003 contains "deep thoughts" about NET_UNREACH,
468 I believe they are just ether pollution. --ANK
470 break;
472 break;
473 case ICMP_TIME_EXCEEDED:
474 if (code != ICMP_EXC_TTL)
475 return;
476 break;
479 read_lock(&ipgre_lock);
480 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
481 flags & GRE_KEY ?
482 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
483 p[1]);
484 if (t == NULL || t->parms.iph.daddr == 0 ||
485 ipv4_is_multicast(t->parms.iph.daddr))
486 goto out;
488 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
489 goto out;
491 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
492 t->err_count++;
493 else
494 t->err_count = 1;
495 t->err_time = jiffies;
496 out:
497 read_unlock(&ipgre_lock);
498 return;
501 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
503 if (INET_ECN_is_ce(iph->tos)) {
504 if (skb->protocol == htons(ETH_P_IP)) {
505 IP_ECN_set_ce(ip_hdr(skb));
506 } else if (skb->protocol == htons(ETH_P_IPV6)) {
507 IP6_ECN_set_ce(ipv6_hdr(skb));
512 static inline u8
513 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
515 u8 inner = 0;
516 if (skb->protocol == htons(ETH_P_IP))
517 inner = old_iph->tos;
518 else if (skb->protocol == htons(ETH_P_IPV6))
519 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
520 return INET_ECN_encapsulate(tos, inner);
523 static int ipgre_rcv(struct sk_buff *skb)
525 struct iphdr *iph;
526 u8 *h;
527 __be16 flags;
528 __sum16 csum = 0;
529 __be32 key = 0;
530 u32 seqno = 0;
531 struct ip_tunnel *tunnel;
532 int offset = 4;
533 __be16 gre_proto;
534 unsigned int len;
536 if (!pskb_may_pull(skb, 16))
537 goto drop_nolock;
539 iph = ip_hdr(skb);
540 h = skb->data;
541 flags = *(__be16*)h;
543 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
544 /* - Version must be 0.
545 - We do not support routing headers.
547 if (flags&(GRE_VERSION|GRE_ROUTING))
548 goto drop_nolock;
550 if (flags&GRE_CSUM) {
551 switch (skb->ip_summed) {
552 case CHECKSUM_COMPLETE:
553 csum = csum_fold(skb->csum);
554 if (!csum)
555 break;
556 /* fall through */
557 case CHECKSUM_NONE:
558 skb->csum = 0;
559 csum = __skb_checksum_complete(skb);
560 skb->ip_summed = CHECKSUM_COMPLETE;
562 offset += 4;
564 if (flags&GRE_KEY) {
565 key = *(__be32*)(h + offset);
566 offset += 4;
568 if (flags&GRE_SEQ) {
569 seqno = ntohl(*(__be32*)(h + offset));
570 offset += 4;
574 gre_proto = *(__be16 *)(h + 2);
576 read_lock(&ipgre_lock);
577 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
578 iph->saddr, iph->daddr, key,
579 gre_proto))) {
580 struct net_device_stats *stats = &tunnel->dev->stats;
582 secpath_reset(skb);
584 skb->protocol = gre_proto;
585 /* WCCP version 1 and 2 protocol decoding.
586 * - Change protocol to IP
587 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
589 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
590 skb->protocol = htons(ETH_P_IP);
591 if ((*(h + offset) & 0xF0) != 0x40)
592 offset += 4;
595 skb->mac_header = skb->network_header;
596 __pskb_pull(skb, offset);
597 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
598 skb->pkt_type = PACKET_HOST;
599 #ifdef CONFIG_NET_IPGRE_BROADCAST
600 if (ipv4_is_multicast(iph->daddr)) {
601 /* Looped back packet, drop it! */
602 if (skb_rtable(skb)->fl.iif == 0)
603 goto drop;
604 stats->multicast++;
605 skb->pkt_type = PACKET_BROADCAST;
607 #endif
609 if (((flags&GRE_CSUM) && csum) ||
610 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
611 stats->rx_crc_errors++;
612 stats->rx_errors++;
613 goto drop;
615 if (tunnel->parms.i_flags&GRE_SEQ) {
616 if (!(flags&GRE_SEQ) ||
617 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
618 stats->rx_fifo_errors++;
619 stats->rx_errors++;
620 goto drop;
622 tunnel->i_seqno = seqno + 1;
625 len = skb->len;
627 /* Warning: All skb pointers will be invalidated! */
628 if (tunnel->dev->type == ARPHRD_ETHER) {
629 if (!pskb_may_pull(skb, ETH_HLEN)) {
630 stats->rx_length_errors++;
631 stats->rx_errors++;
632 goto drop;
635 iph = ip_hdr(skb);
636 skb->protocol = eth_type_trans(skb, tunnel->dev);
637 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
640 stats->rx_packets++;
641 stats->rx_bytes += len;
642 skb->dev = tunnel->dev;
643 skb_dst_drop(skb);
644 nf_reset(skb);
646 skb_reset_network_header(skb);
647 ipgre_ecn_decapsulate(iph, skb);
649 netif_rx(skb);
650 read_unlock(&ipgre_lock);
651 return(0);
653 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
655 drop:
656 read_unlock(&ipgre_lock);
657 drop_nolock:
658 kfree_skb(skb);
659 return(0);
662 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
664 struct ip_tunnel *tunnel = netdev_priv(dev);
665 struct net_device_stats *stats = &tunnel->dev->stats;
666 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph;
668 u8 tos;
669 __be16 df;
670 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen;
675 __be32 dst;
676 int mtu;
678 if (dev->type == ARPHRD_ETHER)
679 IPCB(skb)->flags = 0;
681 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
682 gre_hlen = 0;
683 tiph = (struct iphdr *)skb->data;
684 } else {
685 gre_hlen = tunnel->hlen;
686 tiph = &tunnel->parms.iph;
689 if ((dst = tiph->daddr) == 0) {
690 /* NBMA tunnel */
692 if (skb_dst(skb) == NULL) {
693 stats->tx_fifo_errors++;
694 goto tx_error;
697 if (skb->protocol == htons(ETH_P_IP)) {
698 rt = skb_rtable(skb);
699 if ((dst = rt->rt_gateway) == 0)
700 goto tx_error_icmp;
702 #ifdef CONFIG_IPV6
703 else if (skb->protocol == htons(ETH_P_IPV6)) {
704 struct in6_addr *addr6;
705 int addr_type;
706 struct neighbour *neigh = skb_dst(skb)->neighbour;
708 if (neigh == NULL)
709 goto tx_error;
711 addr6 = (struct in6_addr *)&neigh->primary_key;
712 addr_type = ipv6_addr_type(addr6);
714 if (addr_type == IPV6_ADDR_ANY) {
715 addr6 = &ipv6_hdr(skb)->daddr;
716 addr_type = ipv6_addr_type(addr6);
719 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720 goto tx_error_icmp;
722 dst = addr6->s6_addr32[3];
724 #endif
725 else
726 goto tx_error;
729 tos = tiph->tos;
730 if (tos == 1) {
731 tos = 0;
732 if (skb->protocol == htons(ETH_P_IP))
733 tos = old_iph->tos;
737 struct flowi fl = { .oif = tunnel->parms.link,
738 .nl_u = { .ip4_u =
739 { .daddr = dst,
740 .saddr = tiph->saddr,
741 .tos = RT_TOS(tos) } },
742 .proto = IPPROTO_GRE };
743 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
744 stats->tx_carrier_errors++;
745 goto tx_error;
748 tdev = rt->u.dst.dev;
750 if (tdev == dev) {
751 ip_rt_put(rt);
752 stats->collisions++;
753 goto tx_error;
756 df = tiph->frag_off;
757 if (df)
758 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
759 else
760 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
762 if (skb_dst(skb))
763 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
765 if (skb->protocol == htons(ETH_P_IP)) {
766 df |= (old_iph->frag_off&htons(IP_DF));
768 if ((old_iph->frag_off&htons(IP_DF)) &&
769 mtu < ntohs(old_iph->tot_len)) {
770 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
771 ip_rt_put(rt);
772 goto tx_error;
775 #ifdef CONFIG_IPV6
776 else if (skb->protocol == htons(ETH_P_IPV6)) {
777 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
779 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
780 if ((tunnel->parms.iph.daddr &&
781 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
782 rt6->rt6i_dst.plen == 128) {
783 rt6->rt6i_flags |= RTF_MODIFIED;
784 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
788 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
789 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
790 ip_rt_put(rt);
791 goto tx_error;
794 #endif
796 if (tunnel->err_count > 0) {
797 if (time_before(jiffies,
798 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
799 tunnel->err_count--;
801 dst_link_failure(skb);
802 } else
803 tunnel->err_count = 0;
806 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
808 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
810 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811 if (!new_skb) {
812 ip_rt_put(rt);
813 stats->tx_dropped++;
814 dev_kfree_skb(skb);
815 return NETDEV_TX_OK;
817 if (skb->sk)
818 skb_set_owner_w(new_skb, skb->sk);
819 dev_kfree_skb(skb);
820 skb = new_skb;
821 old_iph = ip_hdr(skb);
824 skb_reset_transport_header(skb);
825 skb_push(skb, gre_hlen);
826 skb_reset_network_header(skb);
827 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
828 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
829 IPSKB_REROUTED);
830 skb_dst_drop(skb);
831 skb_dst_set(skb, &rt->u.dst);
834 * Push down and install the IPIP header.
837 iph = ip_hdr(skb);
838 iph->version = 4;
839 iph->ihl = sizeof(struct iphdr) >> 2;
840 iph->frag_off = df;
841 iph->protocol = IPPROTO_GRE;
842 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
843 iph->daddr = rt->rt_dst;
844 iph->saddr = rt->rt_src;
846 if ((iph->ttl = tiph->ttl) == 0) {
847 if (skb->protocol == htons(ETH_P_IP))
848 iph->ttl = old_iph->ttl;
849 #ifdef CONFIG_IPV6
850 else if (skb->protocol == htons(ETH_P_IPV6))
851 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
852 #endif
853 else
854 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
857 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
858 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
859 htons(ETH_P_TEB) : skb->protocol;
861 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
862 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
864 if (tunnel->parms.o_flags&GRE_SEQ) {
865 ++tunnel->o_seqno;
866 *ptr = htonl(tunnel->o_seqno);
867 ptr--;
869 if (tunnel->parms.o_flags&GRE_KEY) {
870 *ptr = tunnel->parms.o_key;
871 ptr--;
873 if (tunnel->parms.o_flags&GRE_CSUM) {
874 *ptr = 0;
875 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
879 nf_reset(skb);
881 IPTUNNEL_XMIT();
882 return NETDEV_TX_OK;
884 tx_error_icmp:
885 dst_link_failure(skb);
887 tx_error:
888 stats->tx_errors++;
889 dev_kfree_skb(skb);
890 return NETDEV_TX_OK;
893 static int ipgre_tunnel_bind_dev(struct net_device *dev)
895 struct net_device *tdev = NULL;
896 struct ip_tunnel *tunnel;
897 struct iphdr *iph;
898 int hlen = LL_MAX_HEADER;
899 int mtu = ETH_DATA_LEN;
900 int addend = sizeof(struct iphdr) + 4;
902 tunnel = netdev_priv(dev);
903 iph = &tunnel->parms.iph;
905 /* Guess output device to choose reasonable mtu and needed_headroom */
907 if (iph->daddr) {
908 struct flowi fl = { .oif = tunnel->parms.link,
909 .nl_u = { .ip4_u =
910 { .daddr = iph->daddr,
911 .saddr = iph->saddr,
912 .tos = RT_TOS(iph->tos) } },
913 .proto = IPPROTO_GRE };
914 struct rtable *rt;
915 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
916 tdev = rt->u.dst.dev;
917 ip_rt_put(rt);
920 if (dev->type != ARPHRD_ETHER)
921 dev->flags |= IFF_POINTOPOINT;
924 if (!tdev && tunnel->parms.link)
925 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
927 if (tdev) {
928 hlen = tdev->hard_header_len + tdev->needed_headroom;
929 mtu = tdev->mtu;
931 dev->iflink = tunnel->parms.link;
933 /* Precalculate GRE options length */
934 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935 if (tunnel->parms.o_flags&GRE_CSUM)
936 addend += 4;
937 if (tunnel->parms.o_flags&GRE_KEY)
938 addend += 4;
939 if (tunnel->parms.o_flags&GRE_SEQ)
940 addend += 4;
942 dev->needed_headroom = addend + hlen;
943 mtu -= dev->hard_header_len + addend;
945 if (mtu < 68)
946 mtu = 68;
948 tunnel->hlen = addend;
950 return mtu;
953 static int
954 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
956 int err = 0;
957 struct ip_tunnel_parm p;
958 struct ip_tunnel *t;
959 struct net *net = dev_net(dev);
960 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
962 switch (cmd) {
963 case SIOCGETTUNNEL:
964 t = NULL;
965 if (dev == ign->fb_tunnel_dev) {
966 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967 err = -EFAULT;
968 break;
970 t = ipgre_tunnel_locate(net, &p, 0);
972 if (t == NULL)
973 t = netdev_priv(dev);
974 memcpy(&p, &t->parms, sizeof(p));
975 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976 err = -EFAULT;
977 break;
979 case SIOCADDTUNNEL:
980 case SIOCCHGTUNNEL:
981 err = -EPERM;
982 if (!capable(CAP_NET_ADMIN))
983 goto done;
985 err = -EFAULT;
986 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987 goto done;
989 err = -EINVAL;
990 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993 goto done;
994 if (p.iph.ttl)
995 p.iph.frag_off |= htons(IP_DF);
997 if (!(p.i_flags&GRE_KEY))
998 p.i_key = 0;
999 if (!(p.o_flags&GRE_KEY))
1000 p.o_key = 0;
1002 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1004 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1005 if (t != NULL) {
1006 if (t->dev != dev) {
1007 err = -EEXIST;
1008 break;
1010 } else {
1011 unsigned nflags = 0;
1013 t = netdev_priv(dev);
1015 if (ipv4_is_multicast(p.iph.daddr))
1016 nflags = IFF_BROADCAST;
1017 else if (p.iph.daddr)
1018 nflags = IFF_POINTOPOINT;
1020 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021 err = -EINVAL;
1022 break;
1024 ipgre_tunnel_unlink(ign, t);
1025 t->parms.iph.saddr = p.iph.saddr;
1026 t->parms.iph.daddr = p.iph.daddr;
1027 t->parms.i_key = p.i_key;
1028 t->parms.o_key = p.o_key;
1029 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030 memcpy(dev->broadcast, &p.iph.daddr, 4);
1031 ipgre_tunnel_link(ign, t);
1032 netdev_state_change(dev);
1036 if (t) {
1037 err = 0;
1038 if (cmd == SIOCCHGTUNNEL) {
1039 t->parms.iph.ttl = p.iph.ttl;
1040 t->parms.iph.tos = p.iph.tos;
1041 t->parms.iph.frag_off = p.iph.frag_off;
1042 if (t->parms.link != p.link) {
1043 t->parms.link = p.link;
1044 dev->mtu = ipgre_tunnel_bind_dev(dev);
1045 netdev_state_change(dev);
1048 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049 err = -EFAULT;
1050 } else
1051 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052 break;
1054 case SIOCDELTUNNEL:
1055 err = -EPERM;
1056 if (!capable(CAP_NET_ADMIN))
1057 goto done;
1059 if (dev == ign->fb_tunnel_dev) {
1060 err = -EFAULT;
1061 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062 goto done;
1063 err = -ENOENT;
1064 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1065 goto done;
1066 err = -EPERM;
1067 if (t == netdev_priv(ign->fb_tunnel_dev))
1068 goto done;
1069 dev = t->dev;
1071 unregister_netdevice(dev);
1072 err = 0;
1073 break;
1075 default:
1076 err = -EINVAL;
1079 done:
1080 return err;
1083 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1085 struct ip_tunnel *tunnel = netdev_priv(dev);
1086 if (new_mtu < 68 ||
1087 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1088 return -EINVAL;
1089 dev->mtu = new_mtu;
1090 return 0;
1093 /* Nice toy. Unfortunately, useless in real life :-)
1094 It allows to construct virtual multiprotocol broadcast "LAN"
1095 over the Internet, provided multicast routing is tuned.
1098 I have no idea was this bicycle invented before me,
1099 so that I had to set ARPHRD_IPGRE to a random value.
1100 I have an impression, that Cisco could make something similar,
1101 but this feature is apparently missing in IOS<=11.2(8).
1103 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1106 ping -t 255 224.66.66.66
1108 If nobody answers, mbone does not work.
1110 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111 ip addr add 10.66.66.<somewhat>/24 dev Universe
1112 ifconfig Universe up
1113 ifconfig Universe add fe80::<Your_real_addr>/10
1114 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115 ftp 10.66.66.66
1117 ftp fec0:6666:6666::193.233.7.65
1122 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123 unsigned short type,
1124 const void *daddr, const void *saddr, unsigned len)
1126 struct ip_tunnel *t = netdev_priv(dev);
1127 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1128 __be16 *p = (__be16*)(iph+1);
1130 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131 p[0] = t->parms.o_flags;
1132 p[1] = htons(type);
1135 * Set the source hardware address.
1138 if (saddr)
1139 memcpy(&iph->saddr, saddr, 4);
1141 if (daddr) {
1142 memcpy(&iph->daddr, daddr, 4);
1143 return t->hlen;
1145 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1146 return t->hlen;
1148 return -t->hlen;
1151 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1153 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1154 memcpy(haddr, &iph->saddr, 4);
1155 return 4;
1158 static const struct header_ops ipgre_header_ops = {
1159 .create = ipgre_header,
1160 .parse = ipgre_header_parse,
1163 #ifdef CONFIG_NET_IPGRE_BROADCAST
1164 static int ipgre_open(struct net_device *dev)
1166 struct ip_tunnel *t = netdev_priv(dev);
1168 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1169 struct flowi fl = { .oif = t->parms.link,
1170 .nl_u = { .ip4_u =
1171 { .daddr = t->parms.iph.daddr,
1172 .saddr = t->parms.iph.saddr,
1173 .tos = RT_TOS(t->parms.iph.tos) } },
1174 .proto = IPPROTO_GRE };
1175 struct rtable *rt;
1176 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1177 return -EADDRNOTAVAIL;
1178 dev = rt->u.dst.dev;
1179 ip_rt_put(rt);
1180 if (__in_dev_get_rtnl(dev) == NULL)
1181 return -EADDRNOTAVAIL;
1182 t->mlink = dev->ifindex;
1183 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1185 return 0;
1188 static int ipgre_close(struct net_device *dev)
1190 struct ip_tunnel *t = netdev_priv(dev);
1192 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1193 struct in_device *in_dev;
1194 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1195 if (in_dev) {
1196 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197 in_dev_put(in_dev);
1200 return 0;
1203 #endif
1205 static const struct net_device_ops ipgre_netdev_ops = {
1206 .ndo_init = ipgre_tunnel_init,
1207 .ndo_uninit = ipgre_tunnel_uninit,
1208 #ifdef CONFIG_NET_IPGRE_BROADCAST
1209 .ndo_open = ipgre_open,
1210 .ndo_stop = ipgre_close,
1211 #endif
1212 .ndo_start_xmit = ipgre_tunnel_xmit,
1213 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1214 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1217 static void ipgre_tunnel_setup(struct net_device *dev)
1219 dev->netdev_ops = &ipgre_netdev_ops;
1220 dev->destructor = free_netdev;
1222 dev->type = ARPHRD_IPGRE;
1223 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1224 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1225 dev->flags = IFF_NOARP;
1226 dev->iflink = 0;
1227 dev->addr_len = 4;
1228 dev->features |= NETIF_F_NETNS_LOCAL;
1229 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1232 static int ipgre_tunnel_init(struct net_device *dev)
1234 struct ip_tunnel *tunnel;
1235 struct iphdr *iph;
1237 tunnel = netdev_priv(dev);
1238 iph = &tunnel->parms.iph;
1240 tunnel->dev = dev;
1241 strcpy(tunnel->parms.name, dev->name);
1243 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1244 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1246 if (iph->daddr) {
1247 #ifdef CONFIG_NET_IPGRE_BROADCAST
1248 if (ipv4_is_multicast(iph->daddr)) {
1249 if (!iph->saddr)
1250 return -EINVAL;
1251 dev->flags = IFF_BROADCAST;
1252 dev->header_ops = &ipgre_header_ops;
1254 #endif
1255 } else
1256 dev->header_ops = &ipgre_header_ops;
1258 return 0;
1261 static void ipgre_fb_tunnel_init(struct net_device *dev)
1263 struct ip_tunnel *tunnel = netdev_priv(dev);
1264 struct iphdr *iph = &tunnel->parms.iph;
1265 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1267 tunnel->dev = dev;
1268 strcpy(tunnel->parms.name, dev->name);
1270 iph->version = 4;
1271 iph->protocol = IPPROTO_GRE;
1272 iph->ihl = 5;
1273 tunnel->hlen = sizeof(struct iphdr) + 4;
1275 dev_hold(dev);
1276 ign->tunnels_wc[0] = tunnel;
1280 static const struct net_protocol ipgre_protocol = {
1281 .handler = ipgre_rcv,
1282 .err_handler = ipgre_err,
1283 .netns_ok = 1,
1286 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1288 int prio;
1290 for (prio = 0; prio < 4; prio++) {
1291 int h;
1292 for (h = 0; h < HASH_SIZE; h++) {
1293 struct ip_tunnel *t;
1294 while ((t = ign->tunnels[prio][h]) != NULL)
1295 unregister_netdevice(t->dev);
1300 static int ipgre_init_net(struct net *net)
1302 int err;
1303 struct ipgre_net *ign;
1305 err = -ENOMEM;
1306 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1307 if (ign == NULL)
1308 goto err_alloc;
1310 err = net_assign_generic(net, ipgre_net_id, ign);
1311 if (err < 0)
1312 goto err_assign;
1314 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1315 ipgre_tunnel_setup);
1316 if (!ign->fb_tunnel_dev) {
1317 err = -ENOMEM;
1318 goto err_alloc_dev;
1320 dev_net_set(ign->fb_tunnel_dev, net);
1322 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1323 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1325 if ((err = register_netdev(ign->fb_tunnel_dev)))
1326 goto err_reg_dev;
1328 return 0;
1330 err_reg_dev:
1331 free_netdev(ign->fb_tunnel_dev);
1332 err_alloc_dev:
1333 /* nothing */
1334 err_assign:
1335 kfree(ign);
1336 err_alloc:
1337 return err;
1340 static void ipgre_exit_net(struct net *net)
1342 struct ipgre_net *ign;
1344 ign = net_generic(net, ipgre_net_id);
1345 rtnl_lock();
1346 ipgre_destroy_tunnels(ign);
1347 rtnl_unlock();
1348 kfree(ign);
1351 static struct pernet_operations ipgre_net_ops = {
1352 .init = ipgre_init_net,
1353 .exit = ipgre_exit_net,
1356 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1358 __be16 flags;
1360 if (!data)
1361 return 0;
1363 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL;
1371 return 0;
1374 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1376 __be32 daddr;
1378 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL;
1385 if (!data)
1386 goto out;
1388 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr)
1391 return -EINVAL;
1394 out:
1395 return ipgre_tunnel_validate(tb, data);
1398 static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms)
1401 memset(parms, 0, sizeof(*parms));
1403 parms->iph.protocol = IPPROTO_GRE;
1405 if (!data)
1406 return;
1408 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1411 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1414 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1417 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1420 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1423 if (data[IFLA_GRE_LOCAL])
1424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1426 if (data[IFLA_GRE_REMOTE])
1427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1429 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1432 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF);
1439 static int ipgre_tap_init(struct net_device *dev)
1441 struct ip_tunnel *tunnel;
1443 tunnel = netdev_priv(dev);
1445 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name);
1448 ipgre_tunnel_bind_dev(dev);
1450 return 0;
1453 static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1462 static void ipgre_tap_setup(struct net_device *dev)
1465 ether_setup(dev);
1467 dev->netdev_ops = &ipgre_tap_netdev_ops;
1468 dev->destructor = free_netdev;
1470 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL;
1474 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[])
1477 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu;
1481 int err;
1483 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms);
1486 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1487 return -EEXIST;
1489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr);
1492 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu;
1496 err = register_netdevice(dev);
1497 if (err)
1498 goto out;
1500 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt);
1503 out:
1504 return err;
1507 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[])
1510 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p;
1514 int mtu;
1516 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL;
1519 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p);
1522 t = ipgre_tunnel_locate(net, &p, 0);
1524 if (t) {
1525 if (t->dev != dev)
1526 return -EEXIST;
1527 } else {
1528 t = nt;
1530 if (dev->type != ARPHRD_ETHER) {
1531 unsigned nflags = 0;
1533 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT;
1538 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL;
1543 ipgre_tunnel_unlink(ign, t);
1544 t->parms.iph.saddr = p.iph.saddr;
1545 t->parms.iph.daddr = p.iph.daddr;
1546 t->parms.i_key = p.i_key;
1547 if (dev->type != ARPHRD_ETHER) {
1548 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549 memcpy(dev->broadcast, &p.iph.daddr, 4);
1551 ipgre_tunnel_link(ign, t);
1552 netdev_state_change(dev);
1555 t->parms.o_key = p.o_key;
1556 t->parms.iph.ttl = p.iph.ttl;
1557 t->parms.iph.tos = p.iph.tos;
1558 t->parms.iph.frag_off = p.iph.frag_off;
1560 if (t->parms.link != p.link) {
1561 t->parms.link = p.link;
1562 mtu = ipgre_tunnel_bind_dev(dev);
1563 if (!tb[IFLA_MTU])
1564 dev->mtu = mtu;
1565 netdev_state_change(dev);
1568 return 0;
1571 static size_t ipgre_get_size(const struct net_device *dev)
1573 return
1574 /* IFLA_GRE_LINK */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_IFLAGS */
1577 nla_total_size(2) +
1578 /* IFLA_GRE_OFLAGS */
1579 nla_total_size(2) +
1580 /* IFLA_GRE_IKEY */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_OKEY */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_LOCAL */
1585 nla_total_size(4) +
1586 /* IFLA_GRE_REMOTE */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_TTL */
1589 nla_total_size(1) +
1590 /* IFLA_GRE_TOS */
1591 nla_total_size(1) +
1592 /* IFLA_GRE_PMTUDISC */
1593 nla_total_size(1) +
1597 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1599 struct ip_tunnel *t = netdev_priv(dev);
1600 struct ip_tunnel_parm *p = &t->parms;
1602 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1605 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1607 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1609 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1613 return 0;
1615 nla_put_failure:
1616 return -EMSGSIZE;
1619 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1621 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1623 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1625 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1627 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1628 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1629 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1632 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633 .kind = "gre",
1634 .maxtype = IFLA_GRE_MAX,
1635 .policy = ipgre_policy,
1636 .priv_size = sizeof(struct ip_tunnel),
1637 .setup = ipgre_tunnel_setup,
1638 .validate = ipgre_tunnel_validate,
1639 .newlink = ipgre_newlink,
1640 .changelink = ipgre_changelink,
1641 .get_size = ipgre_get_size,
1642 .fill_info = ipgre_fill_info,
1645 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646 .kind = "gretap",
1647 .maxtype = IFLA_GRE_MAX,
1648 .policy = ipgre_policy,
1649 .priv_size = sizeof(struct ip_tunnel),
1650 .setup = ipgre_tap_setup,
1651 .validate = ipgre_tap_validate,
1652 .newlink = ipgre_newlink,
1653 .changelink = ipgre_changelink,
1654 .get_size = ipgre_get_size,
1655 .fill_info = ipgre_fill_info,
1659 * And now the modules code and kernel interface.
1662 static int __init ipgre_init(void)
1664 int err;
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1668 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1669 printk(KERN_INFO "ipgre init: can't add protocol\n");
1670 return -EAGAIN;
1673 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1674 if (err < 0)
1675 goto gen_device_failed;
1677 err = rtnl_link_register(&ipgre_link_ops);
1678 if (err < 0)
1679 goto rtnl_link_failed;
1681 err = rtnl_link_register(&ipgre_tap_ops);
1682 if (err < 0)
1683 goto tap_ops_failed;
1685 out:
1686 return err;
1688 tap_ops_failed:
1689 rtnl_link_unregister(&ipgre_link_ops);
1690 rtnl_link_failed:
1691 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1692 gen_device_failed:
1693 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1694 goto out;
1697 static void __exit ipgre_fini(void)
1699 rtnl_link_unregister(&ipgre_tap_ops);
1700 rtnl_link_unregister(&ipgre_link_ops);
1701 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1706 module_init(ipgre_init);
1707 module_exit(ipgre_fini);
1708 MODULE_LICENSE("GPL");
1709 MODULE_ALIAS_RTNL_LINK("gre");
1710 MODULE_ALIAS_RTNL_LINK("gretap");