code style scripts/checkpatch.pl (linux-3.9-rc1) formatting
[linux-2.6.34.14-moxart.git] / net / ipv4 / ip_gre.c
blob6be6fe7c711454ae2644a3369ad5782024ed254d
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/slab.h>
18 #include <asm/uaccess.h>
19 #include <linux/skbuff.h>
20 #include <linux/netdevice.h>
21 #include <linux/in.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/if_arp.h>
25 #include <linux/mroute.h>
26 #include <linux/init.h>
27 #include <linux/in6.h>
28 #include <linux/inetdevice.h>
29 #include <linux/igmp.h>
30 #include <linux/netfilter_ipv4.h>
31 #include <linux/etherdevice.h>
32 #include <linux/if_ether.h>
34 #include <net/sock.h>
35 #include <net/ip.h>
36 #include <net/icmp.h>
37 #include <net/protocol.h>
38 #include <net/ipip.h>
39 #include <net/arp.h>
40 #include <net/checksum.h>
41 #include <net/dsfield.h>
42 #include <net/inet_ecn.h>
43 #include <net/xfrm.h>
44 #include <net/net_namespace.h>
45 #include <net/netns/generic.h>
46 #include <net/rtnetlink.h>
48 #ifdef CONFIG_IPV6
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #endif
55 Problems & solutions
56 --------------------
58 1. The most important issue is detecting local dead loops.
59 They would cause complete host lockup in transmit, which
60 would be "resolved" by stack overflow or, if queueing is enabled,
61 with infinite looping in net_bh.
63 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best
67 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used.
70 Current solution: HARD_TX_LOCK lock breaks dead loops.
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
117 Alexey Kuznetsov.
120 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
121 static int ipgre_tunnel_init(struct net_device *dev);
122 static void ipgre_tunnel_setup(struct net_device *dev);
123 static int ipgre_tunnel_bind_dev(struct net_device *dev);
125 /* Fallback tunnel: no source, no destination, no key, no options */
127 #define HASH_SIZE 16
129 static int ipgre_net_id __read_mostly;
130 struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE];
133 struct net_device *fb_tunnel_dev;
136 /* Tunnel hash table */
139 4 hash tables:
141 3: (remote,local)
142 2: (remote,*)
143 1: (*,local)
144 0: (*,*)
146 We require exact key match i.e. if a key is present in packet
147 it will match only tunnel with the same key; if it is not present,
148 it will match only keyless tunnel.
150 All keysless packets, if not matched configured keyless tunnels
151 will match fallback tunnel.
154 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
156 #define tunnels_r_l tunnels[3]
157 #define tunnels_r tunnels[2]
158 #define tunnels_l tunnels[1]
159 #define tunnels_wc tunnels[0]
161 * Locking : hash tables are protected by RCU and a spinlock
163 static DEFINE_SPINLOCK(ipgre_lock);
165 #define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
168 /* Given src, dst and key, find appropriate for input tunnel. */
170 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
171 __be32 remote, __be32 local,
172 __be32 key, __be16 gre_proto)
174 struct net *net = dev_net(dev);
175 int link = dev->ifindex;
176 unsigned h0 = HASH(remote);
177 unsigned h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
181 ARPHRD_ETHER : ARPHRD_IPGRE;
182 int score, cand_score = 4;
184 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
185 if (local != t->parms.iph.saddr ||
186 remote != t->parms.iph.daddr ||
187 key != t->parms.i_key ||
188 !(t->dev->flags & IFF_UP))
189 continue;
191 if (t->dev->type != ARPHRD_IPGRE &&
192 t->dev->type != dev_type)
193 continue;
195 score = 0;
196 if (t->parms.link != link)
197 score |= 1;
198 if (t->dev->type != dev_type)
199 score |= 2;
200 if (score == 0)
201 return t;
203 if (score < cand_score) {
204 cand = t;
205 cand_score = score;
209 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
210 if (remote != t->parms.iph.daddr ||
211 key != t->parms.i_key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
215 if (t->dev->type != ARPHRD_IPGRE &&
216 t->dev->type != dev_type)
217 continue;
219 score = 0;
220 if (t->parms.link != link)
221 score |= 1;
222 if (t->dev->type != dev_type)
223 score |= 2;
224 if (score == 0)
225 return t;
227 if (score < cand_score) {
228 cand = t;
229 cand_score = score;
233 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
234 if ((local != t->parms.iph.saddr &&
235 (local != t->parms.iph.daddr ||
236 !ipv4_is_multicast(local))) ||
237 key != t->parms.i_key ||
238 !(t->dev->flags & IFF_UP))
239 continue;
241 if (t->dev->type != ARPHRD_IPGRE &&
242 t->dev->type != dev_type)
243 continue;
245 score = 0;
246 if (t->parms.link != link)
247 score |= 1;
248 if (t->dev->type != dev_type)
249 score |= 2;
250 if (score == 0)
251 return t;
253 if (score < cand_score) {
254 cand = t;
255 cand_score = score;
259 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
260 if (t->parms.i_key != key ||
261 !(t->dev->flags & IFF_UP))
262 continue;
264 if (t->dev->type != ARPHRD_IPGRE &&
265 t->dev->type != dev_type)
266 continue;
268 score = 0;
269 if (t->parms.link != link)
270 score |= 1;
271 if (t->dev->type != dev_type)
272 score |= 2;
273 if (score == 0)
274 return t;
276 if (score < cand_score) {
277 cand = t;
278 cand_score = score;
282 if (cand != NULL)
283 return cand;
285 dev = ign->fb_tunnel_dev;
286 if (dev->flags & IFF_UP)
287 return netdev_priv(dev);
289 return NULL;
292 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms)
295 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key;
298 unsigned h = HASH(key);
299 int prio = 0;
301 if (local)
302 prio |= 1;
303 if (remote && !ipv4_is_multicast(remote)) {
304 prio |= 2;
305 h ^= HASH(remote);
308 return &ign->tunnels[prio][h];
311 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t)
314 return __ipgre_bucket(ign, &t->parms);
317 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
319 struct ip_tunnel **tp = ipgre_bucket(ign, t);
321 spin_lock_bh(&ipgre_lock);
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
327 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
329 struct ip_tunnel **tp;
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
332 if (t == *tp) {
333 spin_lock_bh(&ipgre_lock);
334 *tp = t->next;
335 spin_unlock_bh(&ipgre_lock);
336 break;
341 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
342 struct ip_tunnel_parm *parms,
343 int type)
345 __be32 remote = parms->iph.daddr;
346 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key;
348 int link = parms->link;
349 struct ip_tunnel *t, **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
353 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key &&
356 link == t->parms.link &&
357 type == t->dev->type)
358 break;
360 return t;
363 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create)
366 struct ip_tunnel *t, *nt;
367 struct net_device *dev;
368 char name[IFNAMSIZ];
369 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
371 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
372 if (t || !create)
373 return t;
375 if (parms->name[0])
376 strlcpy(name, parms->name, IFNAMSIZ);
377 else
378 sprintf(name, "gre%%d");
380 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
381 if (!dev)
382 return NULL;
384 dev_net_set(dev, net);
386 if (strchr(name, '%')) {
387 if (dev_alloc_name(dev, name) < 0)
388 goto failed_free;
391 nt = netdev_priv(dev);
392 nt->parms = *parms;
393 dev->rtnl_link_ops = &ipgre_link_ops;
395 dev->mtu = ipgre_tunnel_bind_dev(dev);
397 if (register_netdevice(dev) < 0)
398 goto failed_free;
400 dev_hold(dev);
401 ipgre_tunnel_link(ign, nt);
402 return nt;
404 failed_free:
405 free_netdev(dev);
406 return NULL;
409 static void ipgre_tunnel_uninit(struct net_device *dev)
411 struct net *net = dev_net(dev);
412 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
414 ipgre_tunnel_unlink(ign, netdev_priv(dev));
415 dev_put(dev);
419 static void ipgre_err(struct sk_buff *skb, u32 info)
422 /* All the routers (except for Linux) return only
423 8 bytes of packet payload. It means, that precise relaying of
424 ICMP in the real Internet is absolutely infeasible.
426 Moreover, Cisco "wise men" put GRE key to the third word
427 in GRE header. It makes impossible maintaining even soft state for keyed
428 GRE tunnels with enabled checksum. Tell them "thank you".
430 Well, I wonder, rfc1812 was written by Cisco employee,
431 what the hell these idiots break standrads established
432 by themself???
435 struct iphdr *iph = (struct iphdr *)skb->data;
436 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
437 int grehlen = (iph->ihl<<2) + 4;
438 const int type = icmp_hdr(skb)->type;
439 const int code = icmp_hdr(skb)->code;
440 struct ip_tunnel *t;
441 __be16 flags;
443 flags = p[0];
444 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
445 if (flags&(GRE_VERSION|GRE_ROUTING))
446 return;
447 if (flags&GRE_KEY) {
448 grehlen += 4;
449 if (flags&GRE_CSUM)
450 grehlen += 4;
454 /* If only 8 bytes returned, keyed message will be dropped here */
455 if (skb_headlen(skb) < grehlen)
456 return;
458 switch (type) {
459 default:
460 case ICMP_PARAMETERPROB:
461 return;
463 case ICMP_DEST_UNREACH:
464 switch (code) {
465 case ICMP_SR_FAILED:
466 case ICMP_PORT_UNREACH:
467 /* Impossible event. */
468 return;
469 case ICMP_FRAG_NEEDED:
470 /* Soft state for pmtu is maintained by IP core. */
471 return;
472 default:
473 /* All others are translated to HOST_UNREACH.
474 rfc2003 contains "deep thoughts" about NET_UNREACH,
475 I believe they are just ether pollution. --ANK
477 break;
479 break;
480 case ICMP_TIME_EXCEEDED:
481 if (code != ICMP_EXC_TTL)
482 return;
483 break;
486 rcu_read_lock();
487 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
488 flags & GRE_KEY ?
489 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
490 p[1]);
491 if (t == NULL || t->parms.iph.daddr == 0 ||
492 ipv4_is_multicast(t->parms.iph.daddr))
493 goto out;
495 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
496 goto out;
498 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
499 t->err_count++;
500 else
501 t->err_count = 1;
502 t->err_time = jiffies;
503 out:
504 rcu_read_unlock();
505 return;
508 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
510 if (INET_ECN_is_ce(iph->tos)) {
511 if (skb->protocol == htons(ETH_P_IP)) {
512 IP_ECN_set_ce(ip_hdr(skb));
513 } else if (skb->protocol == htons(ETH_P_IPV6)) {
514 IP6_ECN_set_ce(ipv6_hdr(skb));
519 static inline u8
520 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
522 u8 inner = 0;
523 if (skb->protocol == htons(ETH_P_IP))
524 inner = old_iph->tos;
525 else if (skb->protocol == htons(ETH_P_IPV6))
526 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
527 return INET_ECN_encapsulate(tos, inner);
530 static int ipgre_rcv(struct sk_buff *skb)
532 struct iphdr *iph;
533 u8 *h;
534 __be16 flags;
535 __sum16 csum = 0;
536 __be32 key = 0;
537 u32 seqno = 0;
538 struct ip_tunnel *tunnel;
539 int offset = 4;
540 __be16 gre_proto;
541 unsigned int len;
543 if (!pskb_may_pull(skb, 16))
544 goto drop_nolock;
546 iph = ip_hdr(skb);
547 h = skb->data;
548 flags = *(__be16*)h;
550 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
551 /* - Version must be 0.
552 - We do not support routing headers.
554 if (flags&(GRE_VERSION|GRE_ROUTING))
555 goto drop_nolock;
557 if (flags&GRE_CSUM) {
558 switch (skb->ip_summed) {
559 case CHECKSUM_COMPLETE:
560 csum = csum_fold(skb->csum);
561 if (!csum)
562 break;
563 /* fall through */
564 case CHECKSUM_NONE:
565 skb->csum = 0;
566 csum = __skb_checksum_complete(skb);
567 skb->ip_summed = CHECKSUM_COMPLETE;
569 offset += 4;
571 if (flags&GRE_KEY) {
572 key = *(__be32*)(h + offset);
573 offset += 4;
575 if (flags&GRE_SEQ) {
576 seqno = ntohl(*(__be32*)(h + offset));
577 offset += 4;
581 gre_proto = *(__be16 *)(h + 2);
583 rcu_read_lock();
584 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
585 iph->saddr, iph->daddr, key,
586 gre_proto))) {
587 struct net_device_stats *stats = &tunnel->dev->stats;
589 secpath_reset(skb);
591 skb->protocol = gre_proto;
592 /* WCCP version 1 and 2 protocol decoding.
593 * - Change protocol to IP
594 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
596 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
597 skb->protocol = htons(ETH_P_IP);
598 if ((*(h + offset) & 0xF0) != 0x40)
599 offset += 4;
602 skb->mac_header = skb->network_header;
603 __pskb_pull(skb, offset);
604 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
605 skb->pkt_type = PACKET_HOST;
606 #ifdef CONFIG_NET_IPGRE_BROADCAST
607 if (ipv4_is_multicast(iph->daddr)) {
608 /* Looped back packet, drop it! */
609 if (skb_rtable(skb)->fl.iif == 0)
610 goto drop;
611 stats->multicast++;
612 skb->pkt_type = PACKET_BROADCAST;
614 #endif
616 if (((flags&GRE_CSUM) && csum) ||
617 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
618 stats->rx_crc_errors++;
619 stats->rx_errors++;
620 goto drop;
622 if (tunnel->parms.i_flags&GRE_SEQ) {
623 if (!(flags&GRE_SEQ) ||
624 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
625 stats->rx_fifo_errors++;
626 stats->rx_errors++;
627 goto drop;
629 tunnel->i_seqno = seqno + 1;
632 len = skb->len;
634 /* Warning: All skb pointers will be invalidated! */
635 if (tunnel->dev->type == ARPHRD_ETHER) {
636 if (!pskb_may_pull(skb, ETH_HLEN)) {
637 stats->rx_length_errors++;
638 stats->rx_errors++;
639 goto drop;
642 iph = ip_hdr(skb);
643 skb->protocol = eth_type_trans(skb, tunnel->dev);
644 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
647 stats->rx_packets++;
648 stats->rx_bytes += len;
649 skb->dev = tunnel->dev;
650 skb_dst_drop(skb);
651 nf_reset(skb);
653 skb_reset_network_header(skb);
654 ipgre_ecn_decapsulate(iph, skb);
656 netif_rx(skb);
657 rcu_read_unlock();
658 return(0);
660 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
662 drop:
663 rcu_read_unlock();
664 drop_nolock:
665 kfree_skb(skb);
666 return(0);
669 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
671 struct ip_tunnel *tunnel = netdev_priv(dev);
672 struct net_device_stats *stats = &dev->stats;
673 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
674 struct iphdr *old_iph = ip_hdr(skb);
675 struct iphdr *tiph;
676 u8 tos;
677 __be16 df;
678 struct rtable *rt; /* Route to the other host */
679 struct net_device *tdev; /* Device to other host */
680 struct iphdr *iph; /* Our new IP header */
681 unsigned int max_headroom; /* The extra header space needed */
682 int gre_hlen;
683 __be32 dst;
684 int mtu;
686 if (dev->type == ARPHRD_ETHER)
687 IPCB(skb)->flags = 0;
689 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
690 gre_hlen = 0;
691 tiph = (struct iphdr *)skb->data;
692 } else {
693 gre_hlen = tunnel->hlen;
694 tiph = &tunnel->parms.iph;
697 if ((dst = tiph->daddr) == 0) {
698 /* NBMA tunnel */
700 if (skb_dst(skb) == NULL) {
701 stats->tx_fifo_errors++;
702 goto tx_error;
705 if (skb->protocol == htons(ETH_P_IP)) {
706 rt = skb_rtable(skb);
707 if ((dst = rt->rt_gateway) == 0)
708 goto tx_error_icmp;
710 #ifdef CONFIG_IPV6
711 else if (skb->protocol == htons(ETH_P_IPV6)) {
712 struct in6_addr *addr6;
713 int addr_type;
714 struct neighbour *neigh = skb_dst(skb)->neighbour;
716 if (neigh == NULL)
717 goto tx_error;
719 addr6 = (struct in6_addr *)&neigh->primary_key;
720 addr_type = ipv6_addr_type(addr6);
722 if (addr_type == IPV6_ADDR_ANY) {
723 addr6 = &ipv6_hdr(skb)->daddr;
724 addr_type = ipv6_addr_type(addr6);
727 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
728 goto tx_error_icmp;
730 dst = addr6->s6_addr32[3];
732 #endif
733 else
734 goto tx_error;
737 tos = tiph->tos;
738 if (tos == 1) {
739 tos = 0;
740 if (skb->protocol == htons(ETH_P_IP))
741 tos = old_iph->tos;
745 struct flowi fl = { .oif = tunnel->parms.link,
746 .nl_u = { .ip4_u =
747 { .daddr = dst,
748 .saddr = tiph->saddr,
749 .tos = RT_TOS(tos) } },
750 .proto = IPPROTO_GRE };
751 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
752 stats->tx_carrier_errors++;
753 goto tx_error;
756 tdev = rt->u.dst.dev;
758 if (tdev == dev) {
759 ip_rt_put(rt);
760 stats->collisions++;
761 goto tx_error;
764 df = tiph->frag_off;
765 if (df)
766 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
767 else
768 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
770 if (skb_dst(skb))
771 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
773 if (skb->protocol == htons(ETH_P_IP)) {
774 df |= (old_iph->frag_off&htons(IP_DF));
776 if ((old_iph->frag_off&htons(IP_DF)) &&
777 mtu < ntohs(old_iph->tot_len)) {
778 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
779 ip_rt_put(rt);
780 goto tx_error;
783 #ifdef CONFIG_IPV6
784 else if (skb->protocol == htons(ETH_P_IPV6)) {
785 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
787 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
788 if ((tunnel->parms.iph.daddr &&
789 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
790 rt6->rt6i_dst.plen == 128) {
791 rt6->rt6i_flags |= RTF_MODIFIED;
792 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
796 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
797 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
798 ip_rt_put(rt);
799 goto tx_error;
802 #endif
804 if (tunnel->err_count > 0) {
805 if (time_before(jiffies,
806 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
807 tunnel->err_count--;
809 dst_link_failure(skb);
810 } else
811 tunnel->err_count = 0;
814 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
816 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
817 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
818 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
819 if (max_headroom > dev->needed_headroom)
820 dev->needed_headroom = max_headroom;
821 if (!new_skb) {
822 ip_rt_put(rt);
823 txq->tx_dropped++;
824 dev_kfree_skb(skb);
825 return NETDEV_TX_OK;
827 if (skb->sk)
828 skb_set_owner_w(new_skb, skb->sk);
829 dev_kfree_skb(skb);
830 skb = new_skb;
831 old_iph = ip_hdr(skb);
834 skb_reset_transport_header(skb);
835 skb_push(skb, gre_hlen);
836 skb_reset_network_header(skb);
837 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
838 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839 IPSKB_REROUTED);
840 skb_dst_drop(skb);
841 skb_dst_set(skb, &rt->u.dst);
844 * Push down and install the IPIP header.
847 iph = ip_hdr(skb);
848 iph->version = 4;
849 iph->ihl = sizeof(struct iphdr) >> 2;
850 iph->frag_off = df;
851 iph->protocol = IPPROTO_GRE;
852 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
853 iph->daddr = rt->rt_dst;
854 iph->saddr = rt->rt_src;
856 if ((iph->ttl = tiph->ttl) == 0) {
857 if (skb->protocol == htons(ETH_P_IP))
858 iph->ttl = old_iph->ttl;
859 #ifdef CONFIG_IPV6
860 else if (skb->protocol == htons(ETH_P_IPV6))
861 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
862 #endif
863 else
864 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
867 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
868 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
869 htons(ETH_P_TEB) : skb->protocol;
871 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
872 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
874 if (tunnel->parms.o_flags&GRE_SEQ) {
875 ++tunnel->o_seqno;
876 *ptr = htonl(tunnel->o_seqno);
877 ptr--;
879 if (tunnel->parms.o_flags&GRE_KEY) {
880 *ptr = tunnel->parms.o_key;
881 ptr--;
883 if (tunnel->parms.o_flags&GRE_CSUM) {
884 *ptr = 0;
885 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
889 nf_reset(skb);
891 IPTUNNEL_XMIT();
892 return NETDEV_TX_OK;
894 tx_error_icmp:
895 dst_link_failure(skb);
897 tx_error:
898 stats->tx_errors++;
899 dev_kfree_skb(skb);
900 return NETDEV_TX_OK;
903 static int ipgre_tunnel_bind_dev(struct net_device *dev)
905 struct net_device *tdev = NULL;
906 struct ip_tunnel *tunnel;
907 struct iphdr *iph;
908 int hlen = LL_MAX_HEADER;
909 int mtu = ETH_DATA_LEN;
910 int addend = sizeof(struct iphdr) + 4;
912 tunnel = netdev_priv(dev);
913 iph = &tunnel->parms.iph;
915 /* Guess output device to choose reasonable mtu and needed_headroom */
917 if (iph->daddr) {
918 struct flowi fl = { .oif = tunnel->parms.link,
919 .nl_u = { .ip4_u =
920 { .daddr = iph->daddr,
921 .saddr = iph->saddr,
922 .tos = RT_TOS(iph->tos) } },
923 .proto = IPPROTO_GRE };
924 struct rtable *rt;
925 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
926 tdev = rt->u.dst.dev;
927 ip_rt_put(rt);
930 if (dev->type != ARPHRD_ETHER)
931 dev->flags |= IFF_POINTOPOINT;
934 if (!tdev && tunnel->parms.link)
935 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
937 if (tdev) {
938 hlen = tdev->hard_header_len + tdev->needed_headroom;
939 mtu = tdev->mtu;
941 dev->iflink = tunnel->parms.link;
943 /* Precalculate GRE options length */
944 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
945 if (tunnel->parms.o_flags&GRE_CSUM)
946 addend += 4;
947 if (tunnel->parms.o_flags&GRE_KEY)
948 addend += 4;
949 if (tunnel->parms.o_flags&GRE_SEQ)
950 addend += 4;
952 dev->needed_headroom = addend + hlen;
953 mtu -= dev->hard_header_len + addend;
955 if (mtu < 68)
956 mtu = 68;
958 tunnel->hlen = addend;
960 return mtu;
963 static int
964 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
966 int err = 0;
967 struct ip_tunnel_parm p;
968 struct ip_tunnel *t;
969 struct net *net = dev_net(dev);
970 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
972 switch (cmd) {
973 case SIOCGETTUNNEL:
974 t = NULL;
975 if (dev == ign->fb_tunnel_dev) {
976 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
977 err = -EFAULT;
978 break;
980 t = ipgre_tunnel_locate(net, &p, 0);
982 if (t == NULL)
983 t = netdev_priv(dev);
984 memcpy(&p, &t->parms, sizeof(p));
985 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
986 err = -EFAULT;
987 break;
989 case SIOCADDTUNNEL:
990 case SIOCCHGTUNNEL:
991 err = -EPERM;
992 if (!capable(CAP_NET_ADMIN))
993 goto done;
995 err = -EFAULT;
996 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
997 goto done;
999 err = -EINVAL;
1000 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1001 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1002 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1003 goto done;
1004 if (p.iph.ttl)
1005 p.iph.frag_off |= htons(IP_DF);
1007 if (!(p.i_flags&GRE_KEY))
1008 p.i_key = 0;
1009 if (!(p.o_flags&GRE_KEY))
1010 p.o_key = 0;
1012 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1014 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1015 if (t != NULL) {
1016 if (t->dev != dev) {
1017 err = -EEXIST;
1018 break;
1020 } else {
1021 unsigned nflags = 0;
1023 t = netdev_priv(dev);
1025 if (ipv4_is_multicast(p.iph.daddr))
1026 nflags = IFF_BROADCAST;
1027 else if (p.iph.daddr)
1028 nflags = IFF_POINTOPOINT;
1030 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1031 err = -EINVAL;
1032 break;
1034 ipgre_tunnel_unlink(ign, t);
1035 t->parms.iph.saddr = p.iph.saddr;
1036 t->parms.iph.daddr = p.iph.daddr;
1037 t->parms.i_key = p.i_key;
1038 t->parms.o_key = p.o_key;
1039 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1040 memcpy(dev->broadcast, &p.iph.daddr, 4);
1041 ipgre_tunnel_link(ign, t);
1042 netdev_state_change(dev);
1046 if (t) {
1047 err = 0;
1048 if (cmd == SIOCCHGTUNNEL) {
1049 t->parms.iph.ttl = p.iph.ttl;
1050 t->parms.iph.tos = p.iph.tos;
1051 t->parms.iph.frag_off = p.iph.frag_off;
1052 if (t->parms.link != p.link) {
1053 t->parms.link = p.link;
1054 dev->mtu = ipgre_tunnel_bind_dev(dev);
1055 netdev_state_change(dev);
1058 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1059 err = -EFAULT;
1060 } else
1061 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1062 break;
1064 case SIOCDELTUNNEL:
1065 err = -EPERM;
1066 if (!capable(CAP_NET_ADMIN))
1067 goto done;
1069 if (dev == ign->fb_tunnel_dev) {
1070 err = -EFAULT;
1071 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1072 goto done;
1073 err = -ENOENT;
1074 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1075 goto done;
1076 err = -EPERM;
1077 if (t == netdev_priv(ign->fb_tunnel_dev))
1078 goto done;
1079 dev = t->dev;
1081 unregister_netdevice(dev);
1082 err = 0;
1083 break;
1085 default:
1086 err = -EINVAL;
1089 done:
1090 return err;
1093 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1095 struct ip_tunnel *tunnel = netdev_priv(dev);
1096 if (new_mtu < 68 ||
1097 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1098 return -EINVAL;
1099 dev->mtu = new_mtu;
1100 return 0;
1103 /* Nice toy. Unfortunately, useless in real life :-)
1104 It allows to construct virtual multiprotocol broadcast "LAN"
1105 over the Internet, provided multicast routing is tuned.
1108 I have no idea was this bicycle invented before me,
1109 so that I had to set ARPHRD_IPGRE to a random value.
1110 I have an impression, that Cisco could make something similar,
1111 but this feature is apparently missing in IOS<=11.2(8).
1113 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1114 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1116 ping -t 255 224.66.66.66
1118 If nobody answers, mbone does not work.
1120 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1121 ip addr add 10.66.66.<somewhat>/24 dev Universe
1122 ifconfig Universe up
1123 ifconfig Universe add fe80::<Your_real_addr>/10
1124 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1125 ftp 10.66.66.66
1127 ftp fec0:6666:6666::193.233.7.65
1132 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1133 unsigned short type,
1134 const void *daddr, const void *saddr, unsigned len)
1136 struct ip_tunnel *t = netdev_priv(dev);
1137 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1138 __be16 *p = (__be16*)(iph+1);
1140 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1141 p[0] = t->parms.o_flags;
1142 p[1] = htons(type);
1145 * Set the source hardware address.
1148 if (saddr)
1149 memcpy(&iph->saddr, saddr, 4);
1150 if (daddr)
1151 memcpy(&iph->daddr, daddr, 4);
1152 if (iph->daddr)
1153 return t->hlen;
1155 return -t->hlen;
1158 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1160 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1161 memcpy(haddr, &iph->saddr, 4);
1162 return 4;
1165 static const struct header_ops ipgre_header_ops = {
1166 .create = ipgre_header,
1167 .parse = ipgre_header_parse,
1170 #ifdef CONFIG_NET_IPGRE_BROADCAST
1171 static int ipgre_open(struct net_device *dev)
1173 struct ip_tunnel *t = netdev_priv(dev);
1175 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1176 struct flowi fl = { .oif = t->parms.link,
1177 .nl_u = { .ip4_u =
1178 { .daddr = t->parms.iph.daddr,
1179 .saddr = t->parms.iph.saddr,
1180 .tos = RT_TOS(t->parms.iph.tos) } },
1181 .proto = IPPROTO_GRE };
1182 struct rtable *rt;
1183 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1184 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev;
1186 ip_rt_put(rt);
1187 if (__in_dev_get_rtnl(dev) == NULL)
1188 return -EADDRNOTAVAIL;
1189 t->mlink = dev->ifindex;
1190 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1192 return 0;
1195 static int ipgre_close(struct net_device *dev)
1197 struct ip_tunnel *t = netdev_priv(dev);
1199 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1200 struct in_device *in_dev;
1201 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1202 if (in_dev) {
1203 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1204 in_dev_put(in_dev);
1207 return 0;
1210 #endif
1212 static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_init = ipgre_tunnel_init,
1214 .ndo_uninit = ipgre_tunnel_uninit,
1215 #ifdef CONFIG_NET_IPGRE_BROADCAST
1216 .ndo_open = ipgre_open,
1217 .ndo_stop = ipgre_close,
1218 #endif
1219 .ndo_start_xmit = ipgre_tunnel_xmit,
1220 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1221 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1224 static void ipgre_tunnel_setup(struct net_device *dev)
1226 dev->netdev_ops = &ipgre_netdev_ops;
1227 dev->destructor = free_netdev;
1229 dev->type = ARPHRD_IPGRE;
1230 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1231 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1232 dev->flags = IFF_NOARP;
1233 dev->iflink = 0;
1234 dev->addr_len = 4;
1235 dev->features |= NETIF_F_NETNS_LOCAL;
1236 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1239 static int ipgre_tunnel_init(struct net_device *dev)
1241 struct ip_tunnel *tunnel;
1242 struct iphdr *iph;
1244 tunnel = netdev_priv(dev);
1245 iph = &tunnel->parms.iph;
1247 tunnel->dev = dev;
1248 strcpy(tunnel->parms.name, dev->name);
1250 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1251 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1253 if (iph->daddr) {
1254 #ifdef CONFIG_NET_IPGRE_BROADCAST
1255 if (ipv4_is_multicast(iph->daddr)) {
1256 if (!iph->saddr)
1257 return -EINVAL;
1258 dev->flags = IFF_BROADCAST;
1259 dev->header_ops = &ipgre_header_ops;
1261 #endif
1262 } else
1263 dev->header_ops = &ipgre_header_ops;
1265 return 0;
1268 static void ipgre_fb_tunnel_init(struct net_device *dev)
1270 struct ip_tunnel *tunnel = netdev_priv(dev);
1271 struct iphdr *iph = &tunnel->parms.iph;
1272 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1274 tunnel->dev = dev;
1275 strcpy(tunnel->parms.name, dev->name);
1277 iph->version = 4;
1278 iph->protocol = IPPROTO_GRE;
1279 iph->ihl = 5;
1280 tunnel->hlen = sizeof(struct iphdr) + 4;
1282 dev_hold(dev);
1283 ign->tunnels_wc[0] = tunnel;
1287 static const struct net_protocol ipgre_protocol = {
1288 .handler = ipgre_rcv,
1289 .err_handler = ipgre_err,
1290 .netns_ok = 1,
1293 static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1295 int prio;
1297 for (prio = 0; prio < 4; prio++) {
1298 int h;
1299 for (h = 0; h < HASH_SIZE; h++) {
1300 struct ip_tunnel *t = ign->tunnels[prio][h];
1302 while (t != NULL) {
1303 unregister_netdevice_queue(t->dev, head);
1304 t = t->next;
1310 static int __net_init ipgre_init_net(struct net *net)
1312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1313 int err;
1315 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1316 ipgre_tunnel_setup);
1317 if (!ign->fb_tunnel_dev) {
1318 err = -ENOMEM;
1319 goto err_alloc_dev;
1321 dev_net_set(ign->fb_tunnel_dev, net);
1323 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1324 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1326 if ((err = register_netdev(ign->fb_tunnel_dev)))
1327 goto err_reg_dev;
1329 return 0;
1331 err_reg_dev:
1332 free_netdev(ign->fb_tunnel_dev);
1333 err_alloc_dev:
1334 return err;
1337 static void __net_exit ipgre_exit_net(struct net *net)
1339 struct ipgre_net *ign;
1340 LIST_HEAD(list);
1342 ign = net_generic(net, ipgre_net_id);
1343 rtnl_lock();
1344 ipgre_destroy_tunnels(ign, &list);
1345 unregister_netdevice_many(&list);
1346 rtnl_unlock();
1349 static struct pernet_operations ipgre_net_ops = {
1350 .init = ipgre_init_net,
1351 .exit = ipgre_exit_net,
1352 .id = &ipgre_net_id,
1353 .size = sizeof(struct ipgre_net),
1356 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1358 __be16 flags;
1360 if (!data)
1361 return 0;
1363 flags = 0;
1364 if (data[IFLA_GRE_IFLAGS])
1365 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366 if (data[IFLA_GRE_OFLAGS])
1367 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (flags & (GRE_VERSION|GRE_ROUTING))
1369 return -EINVAL;
1371 return 0;
1374 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1376 __be32 daddr;
1378 if (tb[IFLA_ADDRESS]) {
1379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380 return -EINVAL;
1381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382 return -EADDRNOTAVAIL;
1385 if (!data)
1386 goto out;
1388 if (data[IFLA_GRE_REMOTE]) {
1389 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390 if (!daddr)
1391 return -EINVAL;
1394 out:
1395 return ipgre_tunnel_validate(tb, data);
1398 static void ipgre_netlink_parms(struct nlattr *data[],
1399 struct ip_tunnel_parm *parms)
1401 memset(parms, 0, sizeof(*parms));
1403 parms->iph.protocol = IPPROTO_GRE;
1405 if (!data)
1406 return;
1408 if (data[IFLA_GRE_LINK])
1409 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1411 if (data[IFLA_GRE_IFLAGS])
1412 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1414 if (data[IFLA_GRE_OFLAGS])
1415 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1417 if (data[IFLA_GRE_IKEY])
1418 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1420 if (data[IFLA_GRE_OKEY])
1421 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1423 if (data[IFLA_GRE_LOCAL])
1424 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1426 if (data[IFLA_GRE_REMOTE])
1427 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1429 if (data[IFLA_GRE_TTL])
1430 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1432 if (data[IFLA_GRE_TOS])
1433 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1435 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436 parms->iph.frag_off = htons(IP_DF);
1439 static int ipgre_tap_init(struct net_device *dev)
1441 struct ip_tunnel *tunnel;
1443 tunnel = netdev_priv(dev);
1445 tunnel->dev = dev;
1446 strcpy(tunnel->parms.name, dev->name);
1448 ipgre_tunnel_bind_dev(dev);
1450 return 0;
1453 static const struct net_device_ops ipgre_tap_netdev_ops = {
1454 .ndo_init = ipgre_tap_init,
1455 .ndo_uninit = ipgre_tunnel_uninit,
1456 .ndo_start_xmit = ipgre_tunnel_xmit,
1457 .ndo_set_mac_address = eth_mac_addr,
1458 .ndo_validate_addr = eth_validate_addr,
1459 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1462 static void ipgre_tap_setup(struct net_device *dev)
1465 ether_setup(dev);
1467 dev->netdev_ops = &ipgre_tap_netdev_ops;
1468 dev->destructor = free_netdev;
1470 dev->iflink = 0;
1471 dev->features |= NETIF_F_NETNS_LOCAL;
1474 static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
1475 struct nlattr *data[])
1477 struct ip_tunnel *nt;
1478 struct net *net = dev_net(dev);
1479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480 int mtu;
1481 int err;
1483 nt = netdev_priv(dev);
1484 ipgre_netlink_parms(data, &nt->parms);
1486 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1487 return -EEXIST;
1489 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490 random_ether_addr(dev->dev_addr);
1492 mtu = ipgre_tunnel_bind_dev(dev);
1493 if (!tb[IFLA_MTU])
1494 dev->mtu = mtu;
1496 err = register_netdevice(dev);
1497 if (err)
1498 goto out;
1500 dev_hold(dev);
1501 ipgre_tunnel_link(ign, nt);
1503 out:
1504 return err;
1507 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508 struct nlattr *data[])
1510 struct ip_tunnel *t, *nt;
1511 struct net *net = dev_net(dev);
1512 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513 struct ip_tunnel_parm p;
1514 int mtu;
1516 if (dev == ign->fb_tunnel_dev)
1517 return -EINVAL;
1519 nt = netdev_priv(dev);
1520 ipgre_netlink_parms(data, &p);
1522 t = ipgre_tunnel_locate(net, &p, 0);
1524 if (t) {
1525 if (t->dev != dev)
1526 return -EEXIST;
1527 } else {
1528 t = nt;
1530 if (dev->type != ARPHRD_ETHER) {
1531 unsigned nflags = 0;
1533 if (ipv4_is_multicast(p.iph.daddr))
1534 nflags = IFF_BROADCAST;
1535 else if (p.iph.daddr)
1536 nflags = IFF_POINTOPOINT;
1538 if ((dev->flags ^ nflags) &
1539 (IFF_POINTOPOINT | IFF_BROADCAST))
1540 return -EINVAL;
1543 ipgre_tunnel_unlink(ign, t);
1544 t->parms.iph.saddr = p.iph.saddr;
1545 t->parms.iph.daddr = p.iph.daddr;
1546 t->parms.i_key = p.i_key;
1547 if (dev->type != ARPHRD_ETHER) {
1548 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549 memcpy(dev->broadcast, &p.iph.daddr, 4);
1551 ipgre_tunnel_link(ign, t);
1552 netdev_state_change(dev);
1555 t->parms.o_key = p.o_key;
1556 t->parms.iph.ttl = p.iph.ttl;
1557 t->parms.iph.tos = p.iph.tos;
1558 t->parms.iph.frag_off = p.iph.frag_off;
1560 if (t->parms.link != p.link) {
1561 t->parms.link = p.link;
1562 mtu = ipgre_tunnel_bind_dev(dev);
1563 if (!tb[IFLA_MTU])
1564 dev->mtu = mtu;
1565 netdev_state_change(dev);
1568 return 0;
1571 static size_t ipgre_get_size(const struct net_device *dev)
1573 return
1574 /* IFLA_GRE_LINK */
1575 nla_total_size(4) +
1576 /* IFLA_GRE_IFLAGS */
1577 nla_total_size(2) +
1578 /* IFLA_GRE_OFLAGS */
1579 nla_total_size(2) +
1580 /* IFLA_GRE_IKEY */
1581 nla_total_size(4) +
1582 /* IFLA_GRE_OKEY */
1583 nla_total_size(4) +
1584 /* IFLA_GRE_LOCAL */
1585 nla_total_size(4) +
1586 /* IFLA_GRE_REMOTE */
1587 nla_total_size(4) +
1588 /* IFLA_GRE_TTL */
1589 nla_total_size(1) +
1590 /* IFLA_GRE_TOS */
1591 nla_total_size(1) +
1592 /* IFLA_GRE_PMTUDISC */
1593 nla_total_size(1) +
1597 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1599 struct ip_tunnel *t = netdev_priv(dev);
1600 struct ip_tunnel_parm *p = &t->parms;
1602 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1605 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1607 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1609 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1613 return 0;
1615 nla_put_failure:
1616 return -EMSGSIZE;
1619 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1621 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1622 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1623 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1624 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1625 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1627 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1628 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1629 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1632 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633 .kind = "gre",
1634 .maxtype = IFLA_GRE_MAX,
1635 .policy = ipgre_policy,
1636 .priv_size = sizeof(struct ip_tunnel),
1637 .setup = ipgre_tunnel_setup,
1638 .validate = ipgre_tunnel_validate,
1639 .newlink = ipgre_newlink,
1640 .changelink = ipgre_changelink,
1641 .get_size = ipgre_get_size,
1642 .fill_info = ipgre_fill_info,
1645 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646 .kind = "gretap",
1647 .maxtype = IFLA_GRE_MAX,
1648 .policy = ipgre_policy,
1649 .priv_size = sizeof(struct ip_tunnel),
1650 .setup = ipgre_tap_setup,
1651 .validate = ipgre_tap_validate,
1652 .newlink = ipgre_newlink,
1653 .changelink = ipgre_changelink,
1654 .get_size = ipgre_get_size,
1655 .fill_info = ipgre_fill_info,
1659 * And now the modules code and kernel interface.
1662 static int __init ipgre_init(void)
1664 int err;
1666 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1668 err = register_pernet_device(&ipgre_net_ops);
1669 if (err < 0)
1670 return err;
1672 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1673 if (err < 0) {
1674 printk(KERN_INFO "ipgre init: can't add protocol\n");
1675 goto add_proto_failed;
1678 err = rtnl_link_register(&ipgre_link_ops);
1679 if (err < 0)
1680 goto rtnl_link_failed;
1682 err = rtnl_link_register(&ipgre_tap_ops);
1683 if (err < 0)
1684 goto tap_ops_failed;
1686 out:
1687 return err;
1689 tap_ops_failed:
1690 rtnl_link_unregister(&ipgre_link_ops);
1691 rtnl_link_failed:
1692 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1693 add_proto_failed:
1694 unregister_pernet_device(&ipgre_net_ops);
1695 goto out;
1698 static void __exit ipgre_fini(void)
1700 rtnl_link_unregister(&ipgre_tap_ops);
1701 rtnl_link_unregister(&ipgre_link_ops);
1702 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1704 unregister_pernet_device(&ipgre_net_ops);
1707 module_init(ipgre_init);
1708 module_exit(ipgre_fini);
1709 MODULE_LICENSE("GPL");
1710 MODULE_ALIAS_RTNL_LINK("gre");
1711 MODULE_ALIAS_RTNL_LINK("gretap");
1712 MODULE_ALIAS_NETDEV("gre0");