net: don't allow CAP_NET_ADMIN to load non-netdev kernel modules
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv4 / ip_gre.c
blobaa70db1ccce54de81ebfc98635408ab1a58d3fa6
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
54 Problems & solutions
55 --------------------
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: HARD_TX_LOCK lock breaks dead loops.
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, tt is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 fastly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
109 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110 practically identical code. It would be good to glue them
111 together, but it is not very evident, how to make them modular.
112 sit is integral part of IPv6, ipip and gre are naturally modular.
113 We could extract common parts (hash table, ioctl etc)
114 to a separate module (ip_tunnel.c).
116 Alexey Kuznetsov.
119 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122 static int ipgre_tunnel_bind_dev(struct net_device *dev);
124 /* Fallback tunnel: no source, no destination, no key, no options */
126 #define HASH_SIZE 16
128 static int ipgre_net_id __read_mostly;
129 struct ipgre_net {
130 struct ip_tunnel *tunnels[4][HASH_SIZE];
132 struct net_device *fb_tunnel_dev;
135 /* Tunnel hash table */
138 4 hash tables:
140 3: (remote,local)
141 2: (remote,*)
142 1: (*,local)
143 0: (*,*)
145 We require exact key match i.e. if a key is present in packet
146 it will match only tunnel with the same key; if it is not present,
147 it will match only keyless tunnel.
149 All keysless packets, if not matched configured keyless tunnels
150 will match fallback tunnel.
153 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
155 #define tunnels_r_l tunnels[3]
156 #define tunnels_r tunnels[2]
157 #define tunnels_l tunnels[1]
158 #define tunnels_wc tunnels[0]
160 * Locking : hash tables are protected by RCU and a spinlock
162 static DEFINE_SPINLOCK(ipgre_lock);
164 #define for_each_ip_tunnel_rcu(start) \
165 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167 /* Given src, dst and key, find appropriate for input tunnel. */
169 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
173 struct net *net = dev_net(dev);
174 int link = dev->ifindex;
175 unsigned h0 = HASH(remote);
176 unsigned h1 = HASH(key);
177 struct ip_tunnel *t, *cand = NULL;
178 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
179 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
180 ARPHRD_ETHER : ARPHRD_IPGRE;
181 int score, cand_score = 4;
183 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
184 if (local != t->parms.iph.saddr ||
185 remote != t->parms.iph.daddr ||
186 key != t->parms.i_key ||
187 !(t->dev->flags & IFF_UP))
188 continue;
190 if (t->dev->type != ARPHRD_IPGRE &&
191 t->dev->type != dev_type)
192 continue;
194 score = 0;
195 if (t->parms.link != link)
196 score |= 1;
197 if (t->dev->type != dev_type)
198 score |= 2;
199 if (score == 0)
200 return t;
202 if (score < cand_score) {
203 cand = t;
204 cand_score = score;
208 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
209 if (remote != t->parms.iph.daddr ||
210 key != t->parms.i_key ||
211 !(t->dev->flags & IFF_UP))
212 continue;
214 if (t->dev->type != ARPHRD_IPGRE &&
215 t->dev->type != dev_type)
216 continue;
218 score = 0;
219 if (t->parms.link != link)
220 score |= 1;
221 if (t->dev->type != dev_type)
222 score |= 2;
223 if (score == 0)
224 return t;
226 if (score < cand_score) {
227 cand = t;
228 cand_score = score;
232 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 key != t->parms.i_key ||
237 !(t->dev->flags & IFF_UP))
238 continue;
240 if (t->dev->type != ARPHRD_IPGRE &&
241 t->dev->type != dev_type)
242 continue;
244 score = 0;
245 if (t->parms.link != link)
246 score |= 1;
247 if (t->dev->type != dev_type)
248 score |= 2;
249 if (score == 0)
250 return t;
252 if (score < cand_score) {
253 cand = t;
254 cand_score = score;
258 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
259 if (t->parms.i_key != key ||
260 !(t->dev->flags & IFF_UP))
261 continue;
263 if (t->dev->type != ARPHRD_IPGRE &&
264 t->dev->type != dev_type)
265 continue;
267 score = 0;
268 if (t->parms.link != link)
269 score |= 1;
270 if (t->dev->type != dev_type)
271 score |= 2;
272 if (score == 0)
273 return t;
275 if (score < cand_score) {
276 cand = t;
277 cand_score = score;
281 if (cand != NULL)
282 return cand;
284 dev = ign->fb_tunnel_dev;
285 if (dev->flags & IFF_UP)
286 return netdev_priv(dev);
288 return NULL;
291 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
292 struct ip_tunnel_parm *parms)
294 __be32 remote = parms->iph.daddr;
295 __be32 local = parms->iph.saddr;
296 __be32 key = parms->i_key;
297 unsigned h = HASH(key);
298 int prio = 0;
300 if (local)
301 prio |= 1;
302 if (remote && !ipv4_is_multicast(remote)) {
303 prio |= 2;
304 h ^= HASH(remote);
307 return &ign->tunnels[prio][h];
310 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
311 struct ip_tunnel *t)
313 return __ipgre_bucket(ign, &t->parms);
316 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318 struct ip_tunnel **tp = ipgre_bucket(ign, t);
320 spin_lock_bh(&ipgre_lock);
321 t->next = *tp;
322 rcu_assign_pointer(*tp, t);
323 spin_unlock_bh(&ipgre_lock);
326 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328 struct ip_tunnel **tp;
330 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
331 if (t == *tp) {
332 spin_lock_bh(&ipgre_lock);
333 *tp = t->next;
334 spin_unlock_bh(&ipgre_lock);
335 break;
340 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
341 struct ip_tunnel_parm *parms,
342 int type)
344 __be32 remote = parms->iph.daddr;
345 __be32 local = parms->iph.saddr;
346 __be32 key = parms->i_key;
347 int link = parms->link;
348 struct ip_tunnel *t, **tp;
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
352 if (local == t->parms.iph.saddr &&
353 remote == t->parms.iph.daddr &&
354 key == t->parms.i_key &&
355 link == t->parms.link &&
356 type == t->dev->type)
357 break;
359 return t;
362 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
363 struct ip_tunnel_parm *parms, int create)
365 struct ip_tunnel *t, *nt;
366 struct net_device *dev;
367 char name[IFNAMSIZ];
368 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
370 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
371 if (t || !create)
372 return t;
374 if (parms->name[0])
375 strlcpy(name, parms->name, IFNAMSIZ);
376 else
377 sprintf(name, "gre%%d");
379 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
380 if (!dev)
381 return NULL;
383 dev_net_set(dev, net);
385 if (strchr(name, '%')) {
386 if (dev_alloc_name(dev, name) < 0)
387 goto failed_free;
390 nt = netdev_priv(dev);
391 nt->parms = *parms;
392 dev->rtnl_link_ops = &ipgre_link_ops;
394 dev->mtu = ipgre_tunnel_bind_dev(dev);
396 if (register_netdevice(dev) < 0)
397 goto failed_free;
399 dev_hold(dev);
400 ipgre_tunnel_link(ign, nt);
401 return nt;
403 failed_free:
404 free_netdev(dev);
405 return NULL;
408 static void ipgre_tunnel_uninit(struct net_device *dev)
410 struct net *net = dev_net(dev);
411 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
413 ipgre_tunnel_unlink(ign, netdev_priv(dev));
414 dev_put(dev);
418 static void ipgre_err(struct sk_buff *skb, u32 info)
421 /* All the routers (except for Linux) return only
422 8 bytes of packet payload. It means, that precise relaying of
423 ICMP in the real Internet is absolutely infeasible.
425 Moreover, Cisco "wise men" put GRE key to the third word
426 in GRE header. It makes impossible maintaining even soft state for keyed
427 GRE tunnels with enabled checksum. Tell them "thank you".
429 Well, I wonder, rfc1812 was written by Cisco employee,
430 what the hell these idiots break standrads established
431 by themself???
434 struct iphdr *iph = (struct iphdr *)skb->data;
435 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
436 int grehlen = (iph->ihl<<2) + 4;
437 const int type = icmp_hdr(skb)->type;
438 const int code = icmp_hdr(skb)->code;
439 struct ip_tunnel *t;
440 __be16 flags;
442 flags = p[0];
443 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
444 if (flags&(GRE_VERSION|GRE_ROUTING))
445 return;
446 if (flags&GRE_KEY) {
447 grehlen += 4;
448 if (flags&GRE_CSUM)
449 grehlen += 4;
453 /* If only 8 bytes returned, keyed message will be dropped here */
454 if (skb_headlen(skb) < grehlen)
455 return;
457 switch (type) {
458 default:
459 case ICMP_PARAMETERPROB:
460 return;
462 case ICMP_DEST_UNREACH:
463 switch (code) {
464 case ICMP_SR_FAILED:
465 case ICMP_PORT_UNREACH:
466 /* Impossible event. */
467 return;
468 case ICMP_FRAG_NEEDED:
469 /* Soft state for pmtu is maintained by IP core. */
470 return;
471 default:
472 /* All others are translated to HOST_UNREACH.
473 rfc2003 contains "deep thoughts" about NET_UNREACH,
474 I believe they are just ether pollution. --ANK
476 break;
478 break;
479 case ICMP_TIME_EXCEEDED:
480 if (code != ICMP_EXC_TTL)
481 return;
482 break;
485 rcu_read_lock();
486 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
487 flags & GRE_KEY ?
488 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
489 p[1]);
490 if (t == NULL || t->parms.iph.daddr == 0 ||
491 ipv4_is_multicast(t->parms.iph.daddr))
492 goto out;
494 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
495 goto out;
497 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
498 t->err_count++;
499 else
500 t->err_count = 1;
501 t->err_time = jiffies;
502 out:
503 rcu_read_unlock();
504 return;
507 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
509 if (INET_ECN_is_ce(iph->tos)) {
510 if (skb->protocol == htons(ETH_P_IP)) {
511 IP_ECN_set_ce(ip_hdr(skb));
512 } else if (skb->protocol == htons(ETH_P_IPV6)) {
513 IP6_ECN_set_ce(ipv6_hdr(skb));
518 static inline u8
519 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
521 u8 inner = 0;
522 if (skb->protocol == htons(ETH_P_IP))
523 inner = old_iph->tos;
524 else if (skb->protocol == htons(ETH_P_IPV6))
525 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
526 return INET_ECN_encapsulate(tos, inner);
529 static int ipgre_rcv(struct sk_buff *skb)
531 struct iphdr *iph;
532 u8 *h;
533 __be16 flags;
534 __sum16 csum = 0;
535 __be32 key = 0;
536 u32 seqno = 0;
537 struct ip_tunnel *tunnel;
538 int offset = 4;
539 __be16 gre_proto;
540 unsigned int len;
542 if (!pskb_may_pull(skb, 16))
543 goto drop_nolock;
545 iph = ip_hdr(skb);
546 h = skb->data;
547 flags = *(__be16*)h;
549 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
550 /* - Version must be 0.
551 - We do not support routing headers.
553 if (flags&(GRE_VERSION|GRE_ROUTING))
554 goto drop_nolock;
556 if (flags&GRE_CSUM) {
557 switch (skb->ip_summed) {
558 case CHECKSUM_COMPLETE:
559 csum = csum_fold(skb->csum);
560 if (!csum)
561 break;
562 /* fall through */
563 case CHECKSUM_NONE:
564 skb->csum = 0;
565 csum = __skb_checksum_complete(skb);
566 skb->ip_summed = CHECKSUM_COMPLETE;
568 offset += 4;
570 if (flags&GRE_KEY) {
571 key = *(__be32*)(h + offset);
572 offset += 4;
574 if (flags&GRE_SEQ) {
575 seqno = ntohl(*(__be32*)(h + offset));
576 offset += 4;
580 gre_proto = *(__be16 *)(h + 2);
582 rcu_read_lock();
583 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
584 iph->saddr, iph->daddr, key,
585 gre_proto))) {
586 struct net_device_stats *stats = &tunnel->dev->stats;
588 secpath_reset(skb);
590 skb->protocol = gre_proto;
591 /* WCCP version 1 and 2 protocol decoding.
592 * - Change protocol to IP
593 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
595 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
596 skb->protocol = htons(ETH_P_IP);
597 if ((*(h + offset) & 0xF0) != 0x40)
598 offset += 4;
601 skb->mac_header = skb->network_header;
602 __pskb_pull(skb, offset);
603 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
604 skb->pkt_type = PACKET_HOST;
605 #ifdef CONFIG_NET_IPGRE_BROADCAST
606 if (ipv4_is_multicast(iph->daddr)) {
607 /* Looped back packet, drop it! */
608 if (skb_rtable(skb)->fl.iif == 0)
609 goto drop;
610 stats->multicast++;
611 skb->pkt_type = PACKET_BROADCAST;
613 #endif
615 if (((flags&GRE_CSUM) && csum) ||
616 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
617 stats->rx_crc_errors++;
618 stats->rx_errors++;
619 goto drop;
621 if (tunnel->parms.i_flags&GRE_SEQ) {
622 if (!(flags&GRE_SEQ) ||
623 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
624 stats->rx_fifo_errors++;
625 stats->rx_errors++;
626 goto drop;
628 tunnel->i_seqno = seqno + 1;
631 len = skb->len;
633 /* Warning: All skb pointers will be invalidated! */
634 if (tunnel->dev->type == ARPHRD_ETHER) {
635 if (!pskb_may_pull(skb, ETH_HLEN)) {
636 stats->rx_length_errors++;
637 stats->rx_errors++;
638 goto drop;
641 iph = ip_hdr(skb);
642 skb->protocol = eth_type_trans(skb, tunnel->dev);
643 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
646 stats->rx_packets++;
647 stats->rx_bytes += len;
648 skb->dev = tunnel->dev;
649 skb_dst_drop(skb);
650 nf_reset(skb);
652 skb_reset_network_header(skb);
653 ipgre_ecn_decapsulate(iph, skb);
655 netif_rx(skb);
656 rcu_read_unlock();
657 return(0);
659 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
661 drop:
662 rcu_read_unlock();
663 drop_nolock:
664 kfree_skb(skb);
665 return(0);
668 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
670 struct ip_tunnel *tunnel = netdev_priv(dev);
671 struct net_device_stats *stats = &dev->stats;
672 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
673 struct iphdr *old_iph = ip_hdr(skb);
674 struct iphdr *tiph;
675 u8 tos;
676 __be16 df;
677 struct rtable *rt; /* Route to the other host */
678 struct net_device *tdev; /* Device to other host */
679 struct iphdr *iph; /* Our new IP header */
680 unsigned int max_headroom; /* The extra header space needed */
681 int gre_hlen;
682 __be32 dst;
683 int mtu;
685 if (dev->type == ARPHRD_ETHER)
686 IPCB(skb)->flags = 0;
688 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
689 gre_hlen = 0;
690 tiph = (struct iphdr *)skb->data;
691 } else {
692 gre_hlen = tunnel->hlen;
693 tiph = &tunnel->parms.iph;
696 if ((dst = tiph->daddr) == 0) {
697 /* NBMA tunnel */
699 if (skb_dst(skb) == NULL) {
700 stats->tx_fifo_errors++;
701 goto tx_error;
704 if (skb->protocol == htons(ETH_P_IP)) {
705 rt = skb_rtable(skb);
706 if ((dst = rt->rt_gateway) == 0)
707 goto tx_error_icmp;
709 #ifdef CONFIG_IPV6
710 else if (skb->protocol == htons(ETH_P_IPV6)) {
711 struct in6_addr *addr6;
712 int addr_type;
713 struct neighbour *neigh = skb_dst(skb)->neighbour;
715 if (neigh == NULL)
716 goto tx_error;
718 addr6 = (struct in6_addr *)&neigh->primary_key;
719 addr_type = ipv6_addr_type(addr6);
721 if (addr_type == IPV6_ADDR_ANY) {
722 addr6 = &ipv6_hdr(skb)->daddr;
723 addr_type = ipv6_addr_type(addr6);
726 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
727 goto tx_error_icmp;
729 dst = addr6->s6_addr32[3];
731 #endif
732 else
733 goto tx_error;
736 tos = tiph->tos;
737 if (tos == 1) {
738 tos = 0;
739 if (skb->protocol == htons(ETH_P_IP))
740 tos = old_iph->tos;
744 struct flowi fl = { .oif = tunnel->parms.link,
745 .nl_u = { .ip4_u =
746 { .daddr = dst,
747 .saddr = tiph->saddr,
748 .tos = RT_TOS(tos) } },
749 .proto = IPPROTO_GRE };
750 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
751 stats->tx_carrier_errors++;
752 goto tx_error;
755 tdev = rt->u.dst.dev;
757 if (tdev == dev) {
758 ip_rt_put(rt);
759 stats->collisions++;
760 goto tx_error;
763 df = tiph->frag_off;
764 if (df)
765 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
766 else
767 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
769 if (skb_dst(skb))
770 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
772 if (skb->protocol == htons(ETH_P_IP)) {
773 df |= (old_iph->frag_off&htons(IP_DF));
775 if ((old_iph->frag_off&htons(IP_DF)) &&
776 mtu < ntohs(old_iph->tot_len)) {
777 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
778 ip_rt_put(rt);
779 goto tx_error;
782 #ifdef CONFIG_IPV6
783 else if (skb->protocol == htons(ETH_P_IPV6)) {
784 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
786 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
787 if ((tunnel->parms.iph.daddr &&
788 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
789 rt6->rt6i_dst.plen == 128) {
790 rt6->rt6i_flags |= RTF_MODIFIED;
791 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
795 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
796 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
797 ip_rt_put(rt);
798 goto tx_error;
801 #endif
803 if (tunnel->err_count > 0) {
804 if (time_before(jiffies,
805 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
806 tunnel->err_count--;
808 dst_link_failure(skb);
809 } else
810 tunnel->err_count = 0;
813 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
815 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
816 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
817 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
818 if (max_headroom > dev->needed_headroom)
819 dev->needed_headroom = max_headroom;
820 if (!new_skb) {
821 ip_rt_put(rt);
822 txq->tx_dropped++;
823 dev_kfree_skb(skb);
824 return NETDEV_TX_OK;
826 if (skb->sk)
827 skb_set_owner_w(new_skb, skb->sk);
828 dev_kfree_skb(skb);
829 skb = new_skb;
830 old_iph = ip_hdr(skb);
833 skb_reset_transport_header(skb);
834 skb_push(skb, gre_hlen);
835 skb_reset_network_header(skb);
836 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
837 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
838 IPSKB_REROUTED);
839 skb_dst_drop(skb);
840 skb_dst_set(skb, &rt->u.dst);
843 * Push down and install the IPIP header.
846 iph = ip_hdr(skb);
847 iph->version = 4;
848 iph->ihl = sizeof(struct iphdr) >> 2;
849 iph->frag_off = df;
850 iph->protocol = IPPROTO_GRE;
851 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
852 iph->daddr = rt->rt_dst;
853 iph->saddr = rt->rt_src;
855 if ((iph->ttl = tiph->ttl) == 0) {
856 if (skb->protocol == htons(ETH_P_IP))
857 iph->ttl = old_iph->ttl;
858 #ifdef CONFIG_IPV6
859 else if (skb->protocol == htons(ETH_P_IPV6))
860 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
861 #endif
862 else
863 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
866 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
867 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
868 htons(ETH_P_TEB) : skb->protocol;
870 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
871 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
873 if (tunnel->parms.o_flags&GRE_SEQ) {
874 ++tunnel->o_seqno;
875 *ptr = htonl(tunnel->o_seqno);
876 ptr--;
878 if (tunnel->parms.o_flags&GRE_KEY) {
879 *ptr = tunnel->parms.o_key;
880 ptr--;
882 if (tunnel->parms.o_flags&GRE_CSUM) {
883 *ptr = 0;
884 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
888 nf_reset(skb);
890 IPTUNNEL_XMIT();
891 return NETDEV_TX_OK;
893 tx_error_icmp:
894 dst_link_failure(skb);
896 tx_error:
897 stats->tx_errors++;
898 dev_kfree_skb(skb);
899 return NETDEV_TX_OK;
902 static int ipgre_tunnel_bind_dev(struct net_device *dev)
904 struct net_device *tdev = NULL;
905 struct ip_tunnel *tunnel;
906 struct iphdr *iph;
907 int hlen = LL_MAX_HEADER;
908 int mtu = ETH_DATA_LEN;
909 int addend = sizeof(struct iphdr) + 4;
911 tunnel = netdev_priv(dev);
912 iph = &tunnel->parms.iph;
914 /* Guess output device to choose reasonable mtu and needed_headroom */
916 if (iph->daddr) {
917 struct flowi fl = { .oif = tunnel->parms.link,
918 .nl_u = { .ip4_u =
919 { .daddr = iph->daddr,
920 .saddr = iph->saddr,
921 .tos = RT_TOS(iph->tos) } },
922 .proto = IPPROTO_GRE };
923 struct rtable *rt;
924 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
925 tdev = rt->u.dst.dev;
926 ip_rt_put(rt);
929 if (dev->type != ARPHRD_ETHER)
930 dev->flags |= IFF_POINTOPOINT;
933 if (!tdev && tunnel->parms.link)
934 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
936 if (tdev) {
937 hlen = tdev->hard_header_len + tdev->needed_headroom;
938 mtu = tdev->mtu;
940 dev->iflink = tunnel->parms.link;
942 /* Precalculate GRE options length */
943 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
944 if (tunnel->parms.o_flags&GRE_CSUM)
945 addend += 4;
946 if (tunnel->parms.o_flags&GRE_KEY)
947 addend += 4;
948 if (tunnel->parms.o_flags&GRE_SEQ)
949 addend += 4;
951 dev->needed_headroom = addend + hlen;
952 mtu -= dev->hard_header_len + addend;
954 if (mtu < 68)
955 mtu = 68;
957 tunnel->hlen = addend;
959 return mtu;
962 static int
963 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
965 int err = 0;
966 struct ip_tunnel_parm p;
967 struct ip_tunnel *t;
968 struct net *net = dev_net(dev);
969 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
971 switch (cmd) {
972 case SIOCGETTUNNEL:
973 t = NULL;
974 if (dev == ign->fb_tunnel_dev) {
975 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
976 err = -EFAULT;
977 break;
979 t = ipgre_tunnel_locate(net, &p, 0);
981 if (t == NULL)
982 t = netdev_priv(dev);
983 memcpy(&p, &t->parms, sizeof(p));
984 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
985 err = -EFAULT;
986 break;
988 case SIOCADDTUNNEL:
989 case SIOCCHGTUNNEL:
990 err = -EPERM;
991 if (!capable(CAP_NET_ADMIN))
992 goto done;
994 err = -EFAULT;
995 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
996 goto done;
998 err = -EINVAL;
999 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1000 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1001 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1002 goto done;
1003 if (p.iph.ttl)
1004 p.iph.frag_off |= htons(IP_DF);
1006 if (!(p.i_flags&GRE_KEY))
1007 p.i_key = 0;
1008 if (!(p.o_flags&GRE_KEY))
1009 p.o_key = 0;
1011 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1013 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1014 if (t != NULL) {
1015 if (t->dev != dev) {
1016 err = -EEXIST;
1017 break;
1019 } else {
1020 unsigned nflags = 0;
1022 t = netdev_priv(dev);
1024 if (ipv4_is_multicast(p.iph.daddr))
1025 nflags = IFF_BROADCAST;
1026 else if (p.iph.daddr)
1027 nflags = IFF_POINTOPOINT;
1029 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1030 err = -EINVAL;
1031 break;
1033 ipgre_tunnel_unlink(ign, t);
1034 t->parms.iph.saddr = p.iph.saddr;
1035 t->parms.iph.daddr = p.iph.daddr;
1036 t->parms.i_key = p.i_key;
1037 t->parms.o_key = p.o_key;
1038 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1039 memcpy(dev->broadcast, &p.iph.daddr, 4);
1040 ipgre_tunnel_link(ign, t);
1041 netdev_state_change(dev);
1045 if (t) {
1046 err = 0;
1047 if (cmd == SIOCCHGTUNNEL) {
1048 t->parms.iph.ttl = p.iph.ttl;
1049 t->parms.iph.tos = p.iph.tos;
1050 t->parms.iph.frag_off = p.iph.frag_off;
1051 if (t->parms.link != p.link) {
1052 t->parms.link = p.link;
1053 dev->mtu = ipgre_tunnel_bind_dev(dev);
1054 netdev_state_change(dev);
1057 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1058 err = -EFAULT;
1059 } else
1060 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1061 break;
1063 case SIOCDELTUNNEL:
1064 err = -EPERM;
1065 if (!capable(CAP_NET_ADMIN))
1066 goto done;
1068 if (dev == ign->fb_tunnel_dev) {
1069 err = -EFAULT;
1070 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1071 goto done;
1072 err = -ENOENT;
1073 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1074 goto done;
1075 err = -EPERM;
1076 if (t == netdev_priv(ign->fb_tunnel_dev))
1077 goto done;
1078 dev = t->dev;
1080 unregister_netdevice(dev);
1081 err = 0;
1082 break;
1084 default:
1085 err = -EINVAL;
1088 done:
1089 return err;
1092 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1094 struct ip_tunnel *tunnel = netdev_priv(dev);
1095 if (new_mtu < 68 ||
1096 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1097 return -EINVAL;
1098 dev->mtu = new_mtu;
1099 return 0;
1102 /* Nice toy. Unfortunately, useless in real life :-)
1103 It allows to construct virtual multiprotocol broadcast "LAN"
1104 over the Internet, provided multicast routing is tuned.
1107 I have no idea was this bicycle invented before me,
1108 so that I had to set ARPHRD_IPGRE to a random value.
1109 I have an impression, that Cisco could make something similar,
1110 but this feature is apparently missing in IOS<=11.2(8).
1112 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1113 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1115 ping -t 255 224.66.66.66
1117 If nobody answers, mbone does not work.
1119 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1120 ip addr add 10.66.66.<somewhat>/24 dev Universe
1121 ifconfig Universe up
1122 ifconfig Universe add fe80::<Your_real_addr>/10
1123 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1124 ftp 10.66.66.66
1126 ftp fec0:6666:6666::193.233.7.65
1131 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1132 unsigned short type,
1133 const void *daddr, const void *saddr, unsigned len)
1135 struct ip_tunnel *t = netdev_priv(dev);
1136 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1137 __be16 *p = (__be16*)(iph+1);
1139 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1140 p[0] = t->parms.o_flags;
1141 p[1] = htons(type);
1144 * Set the source hardware address.
1147 if (saddr)
1148 memcpy(&iph->saddr, saddr, 4);
1150 if (daddr) {
1151 memcpy(&iph->daddr, daddr, 4);
1152 return t->hlen;
1154 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1155 return t->hlen;
1157 return -t->hlen;
1160 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1162 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1163 memcpy(haddr, &iph->saddr, 4);
1164 return 4;
1167 static const struct header_ops ipgre_header_ops = {
1168 .create = ipgre_header,
1169 .parse = ipgre_header_parse,
1172 #ifdef CONFIG_NET_IPGRE_BROADCAST
1173 static int ipgre_open(struct net_device *dev)
1175 struct ip_tunnel *t = netdev_priv(dev);
1177 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1178 struct flowi fl = { .oif = t->parms.link,
1179 .nl_u = { .ip4_u =
1180 { .daddr = t->parms.iph.daddr,
1181 .saddr = t->parms.iph.saddr,
1182 .tos = RT_TOS(t->parms.iph.tos) } },
1183 .proto = IPPROTO_GRE };
1184 struct rtable *rt;
1185 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1186 return -EADDRNOTAVAIL;
1187 dev = rt->u.dst.dev;
1188 ip_rt_put(rt);
1189 if (__in_dev_get_rtnl(dev) == NULL)
1190 return -EADDRNOTAVAIL;
1191 t->mlink = dev->ifindex;
1192 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1194 return 0;
1197 static int ipgre_close(struct net_device *dev)
1199 struct ip_tunnel *t = netdev_priv(dev);
1201 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1202 struct in_device *in_dev;
1203 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1204 if (in_dev) {
1205 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1206 in_dev_put(in_dev);
1209 return 0;
1212 #endif
1214 static const struct net_device_ops ipgre_netdev_ops = {
1215 .ndo_init = ipgre_tunnel_init,
1216 .ndo_uninit = ipgre_tunnel_uninit,
1217 #ifdef CONFIG_NET_IPGRE_BROADCAST
1218 .ndo_open = ipgre_open,
1219 .ndo_stop = ipgre_close,
1220 #endif
1221 .ndo_start_xmit = ipgre_tunnel_xmit,
1222 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1223 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1226 static void ipgre_tunnel_setup(struct net_device *dev)
1228 dev->netdev_ops = &ipgre_netdev_ops;
1229 dev->destructor = free_netdev;
1231 dev->type = ARPHRD_IPGRE;
1232 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1233 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1234 dev->flags = IFF_NOARP;
1235 dev->iflink = 0;
1236 dev->addr_len = 4;
1237 dev->features |= NETIF_F_NETNS_LOCAL;
1238 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1241 static int ipgre_tunnel_init(struct net_device *dev)
1243 struct ip_tunnel *tunnel;
1244 struct iphdr *iph;
1246 tunnel = netdev_priv(dev);
1247 iph = &tunnel->parms.iph;
1249 tunnel->dev = dev;
1250 strcpy(tunnel->parms.name, dev->name);
1252 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1253 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1255 if (iph->daddr) {
1256 #ifdef CONFIG_NET_IPGRE_BROADCAST
1257 if (ipv4_is_multicast(iph->daddr)) {
1258 if (!iph->saddr)
1259 return -EINVAL;
1260 dev->flags = IFF_BROADCAST;
1261 dev->header_ops = &ipgre_header_ops;
1263 #endif
1264 } else
1265 dev->header_ops = &ipgre_header_ops;
1267 return 0;
1270 static void ipgre_fb_tunnel_init(struct net_device *dev)
1272 struct ip_tunnel *tunnel = netdev_priv(dev);
1273 struct iphdr *iph = &tunnel->parms.iph;
1274 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1276 tunnel->dev = dev;
1277 strcpy(tunnel->parms.name, dev->name);
1279 iph->version = 4;
1280 iph->protocol = IPPROTO_GRE;
1281 iph->ihl = 5;
1282 tunnel->hlen = sizeof(struct iphdr) + 4;
1284 dev_hold(dev);
1285 ign->tunnels_wc[0] = tunnel;
1289 static const struct net_protocol ipgre_protocol = {
1290 .handler = ipgre_rcv,
1291 .err_handler = ipgre_err,
1292 .netns_ok = 1,
1295 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1297 int prio;
1299 for (prio = 0; prio < 4; prio++) {
1300 int h;
1301 for (h = 0; h < HASH_SIZE; h++) {
1302 struct ip_tunnel *t = ign->tunnels[prio][h];
1304 while (t != NULL) {
1305 unregister_netdevice_queue(t->dev, head);
1306 t = t->next;
1312 static int ipgre_init_net(struct net *net)
1314 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1315 int err;
1317 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1318 ipgre_tunnel_setup);
1319 if (!ign->fb_tunnel_dev) {
1320 err = -ENOMEM;
1321 goto err_alloc_dev;
1323 dev_net_set(ign->fb_tunnel_dev, net);
1325 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1326 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1328 if ((err = register_netdev(ign->fb_tunnel_dev)))
1329 goto err_reg_dev;
1331 return 0;
1333 err_reg_dev:
1334 free_netdev(ign->fb_tunnel_dev);
1335 err_alloc_dev:
1336 return err;
1339 static void ipgre_exit_net(struct net *net)
1341 struct ipgre_net *ign;
1342 LIST_HEAD(list);
1344 ign = net_generic(net, ipgre_net_id);
1345 rtnl_lock();
1346 ipgre_destroy_tunnels(ign, &list);
1347 unregister_netdevice_many(&list);
1348 rtnl_unlock();
1351 static struct pernet_operations ipgre_net_ops = {
1352 .init = ipgre_init_net,
1353 .exit = ipgre_exit_net,
1354 .id = &ipgre_net_id,
1355 .size = sizeof(struct ipgre_net),
1358 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1360 __be16 flags;
1362 if (!data)
1363 return 0;
1365 flags = 0;
1366 if (data[IFLA_GRE_IFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1368 if (data[IFLA_GRE_OFLAGS])
1369 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1370 if (flags & (GRE_VERSION|GRE_ROUTING))
1371 return -EINVAL;
1373 return 0;
1376 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1378 __be32 daddr;
1380 if (tb[IFLA_ADDRESS]) {
1381 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1382 return -EINVAL;
1383 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1384 return -EADDRNOTAVAIL;
1387 if (!data)
1388 goto out;
1390 if (data[IFLA_GRE_REMOTE]) {
1391 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1392 if (!daddr)
1393 return -EINVAL;
1396 out:
1397 return ipgre_tunnel_validate(tb, data);
1400 static void ipgre_netlink_parms(struct nlattr *data[],
1401 struct ip_tunnel_parm *parms)
1403 memset(parms, 0, sizeof(*parms));
1405 parms->iph.protocol = IPPROTO_GRE;
1407 if (!data)
1408 return;
1410 if (data[IFLA_GRE_LINK])
1411 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1413 if (data[IFLA_GRE_IFLAGS])
1414 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1416 if (data[IFLA_GRE_OFLAGS])
1417 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1419 if (data[IFLA_GRE_IKEY])
1420 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1422 if (data[IFLA_GRE_OKEY])
1423 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1425 if (data[IFLA_GRE_LOCAL])
1426 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1428 if (data[IFLA_GRE_REMOTE])
1429 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1431 if (data[IFLA_GRE_TTL])
1432 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1434 if (data[IFLA_GRE_TOS])
1435 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1437 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1438 parms->iph.frag_off = htons(IP_DF);
1441 static int ipgre_tap_init(struct net_device *dev)
1443 struct ip_tunnel *tunnel;
1445 tunnel = netdev_priv(dev);
1447 tunnel->dev = dev;
1448 strcpy(tunnel->parms.name, dev->name);
1450 ipgre_tunnel_bind_dev(dev);
1452 return 0;
1455 static const struct net_device_ops ipgre_tap_netdev_ops = {
1456 .ndo_init = ipgre_tap_init,
1457 .ndo_uninit = ipgre_tunnel_uninit,
1458 .ndo_start_xmit = ipgre_tunnel_xmit,
1459 .ndo_set_mac_address = eth_mac_addr,
1460 .ndo_validate_addr = eth_validate_addr,
1461 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1464 static void ipgre_tap_setup(struct net_device *dev)
1467 ether_setup(dev);
1469 dev->netdev_ops = &ipgre_tap_netdev_ops;
1470 dev->destructor = free_netdev;
1472 dev->iflink = 0;
1473 dev->features |= NETIF_F_NETNS_LOCAL;
1476 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1477 struct nlattr *data[])
1479 struct ip_tunnel *nt;
1480 struct net *net = dev_net(dev);
1481 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1482 int mtu;
1483 int err;
1485 nt = netdev_priv(dev);
1486 ipgre_netlink_parms(data, &nt->parms);
1488 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1489 return -EEXIST;
1491 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1492 random_ether_addr(dev->dev_addr);
1494 mtu = ipgre_tunnel_bind_dev(dev);
1495 if (!tb[IFLA_MTU])
1496 dev->mtu = mtu;
1498 err = register_netdevice(dev);
1499 if (err)
1500 goto out;
1502 dev_hold(dev);
1503 ipgre_tunnel_link(ign, nt);
1505 out:
1506 return err;
1509 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1510 struct nlattr *data[])
1512 struct ip_tunnel *t, *nt;
1513 struct net *net = dev_net(dev);
1514 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1515 struct ip_tunnel_parm p;
1516 int mtu;
1518 if (dev == ign->fb_tunnel_dev)
1519 return -EINVAL;
1521 nt = netdev_priv(dev);
1522 ipgre_netlink_parms(data, &p);
1524 t = ipgre_tunnel_locate(net, &p, 0);
1526 if (t) {
1527 if (t->dev != dev)
1528 return -EEXIST;
1529 } else {
1530 t = nt;
1532 if (dev->type != ARPHRD_ETHER) {
1533 unsigned nflags = 0;
1535 if (ipv4_is_multicast(p.iph.daddr))
1536 nflags = IFF_BROADCAST;
1537 else if (p.iph.daddr)
1538 nflags = IFF_POINTOPOINT;
1540 if ((dev->flags ^ nflags) &
1541 (IFF_POINTOPOINT | IFF_BROADCAST))
1542 return -EINVAL;
1545 ipgre_tunnel_unlink(ign, t);
1546 t->parms.iph.saddr = p.iph.saddr;
1547 t->parms.iph.daddr = p.iph.daddr;
1548 t->parms.i_key = p.i_key;
1549 if (dev->type != ARPHRD_ETHER) {
1550 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1551 memcpy(dev->broadcast, &p.iph.daddr, 4);
1553 ipgre_tunnel_link(ign, t);
1554 netdev_state_change(dev);
1557 t->parms.o_key = p.o_key;
1558 t->parms.iph.ttl = p.iph.ttl;
1559 t->parms.iph.tos = p.iph.tos;
1560 t->parms.iph.frag_off = p.iph.frag_off;
1562 if (t->parms.link != p.link) {
1563 t->parms.link = p.link;
1564 mtu = ipgre_tunnel_bind_dev(dev);
1565 if (!tb[IFLA_MTU])
1566 dev->mtu = mtu;
1567 netdev_state_change(dev);
1570 return 0;
1573 static size_t ipgre_get_size(const struct net_device *dev)
1575 return
1576 /* IFLA_GRE_LINK */
1577 nla_total_size(4) +
1578 /* IFLA_GRE_IFLAGS */
1579 nla_total_size(2) +
1580 /* IFLA_GRE_OFLAGS */
1581 nla_total_size(2) +
1582 /* IFLA_GRE_IKEY */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_OKEY */
1585 nla_total_size(4) +
1586 /* IFLA_GRE_LOCAL */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_REMOTE */
1589 nla_total_size(4) +
1590 /* IFLA_GRE_TTL */
1591 nla_total_size(1) +
1592 /* IFLA_GRE_TOS */
1593 nla_total_size(1) +
1594 /* IFLA_GRE_PMTUDISC */
1595 nla_total_size(1) +
1599 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1601 struct ip_tunnel *t = netdev_priv(dev);
1602 struct ip_tunnel_parm *p = &t->parms;
1604 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1605 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1606 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1607 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1608 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1609 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1610 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1611 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1612 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1613 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1615 return 0;
1617 nla_put_failure:
1618 return -EMSGSIZE;
1621 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1622 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1623 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1624 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1625 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1626 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1627 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1628 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1629 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1630 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1631 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1634 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1635 .kind = "gre",
1636 .maxtype = IFLA_GRE_MAX,
1637 .policy = ipgre_policy,
1638 .priv_size = sizeof(struct ip_tunnel),
1639 .setup = ipgre_tunnel_setup,
1640 .validate = ipgre_tunnel_validate,
1641 .newlink = ipgre_newlink,
1642 .changelink = ipgre_changelink,
1643 .get_size = ipgre_get_size,
1644 .fill_info = ipgre_fill_info,
1647 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1648 .kind = "gretap",
1649 .maxtype = IFLA_GRE_MAX,
1650 .policy = ipgre_policy,
1651 .priv_size = sizeof(struct ip_tunnel),
1652 .setup = ipgre_tap_setup,
1653 .validate = ipgre_tap_validate,
1654 .newlink = ipgre_newlink,
1655 .changelink = ipgre_changelink,
1656 .get_size = ipgre_get_size,
1657 .fill_info = ipgre_fill_info,
1661 * And now the modules code and kernel interface.
1664 static int __init ipgre_init(void)
1666 int err;
1668 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1670 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1671 printk(KERN_INFO "ipgre init: can't add protocol\n");
1672 return -EAGAIN;
1675 err = register_pernet_device(&ipgre_net_ops);
1676 if (err < 0)
1677 goto gen_device_failed;
1679 err = rtnl_link_register(&ipgre_link_ops);
1680 if (err < 0)
1681 goto rtnl_link_failed;
1683 err = rtnl_link_register(&ipgre_tap_ops);
1684 if (err < 0)
1685 goto tap_ops_failed;
1687 out:
1688 return err;
1690 tap_ops_failed:
1691 rtnl_link_unregister(&ipgre_link_ops);
1692 rtnl_link_failed:
1693 unregister_pernet_device(&ipgre_net_ops);
1694 gen_device_failed:
1695 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1696 goto out;
1699 static void __exit ipgre_fini(void)
1701 rtnl_link_unregister(&ipgre_tap_ops);
1702 rtnl_link_unregister(&ipgre_link_ops);
1703 unregister_pernet_device(&ipgre_net_ops);
1704 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1705 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1708 module_init(ipgre_init);
1709 module_exit(ipgre_fini);
1710 MODULE_LICENSE("GPL");
1711 MODULE_ALIAS_RTNL_LINK("gre");
1712 MODULE_ALIAS_RTNL_LINK("gretap");
1713 MODULE_ALIAS_NETDEV("gre0");