Fix Intel IOMMU write-buffer flushing
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv4 / ip_gre.c
blob0101521f366b74546cbb90f60e7e6faac597b430
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/etherdevice.h>
31 #include <linux/if_ether.h>
33 #include <net/sock.h>
34 #include <net/ip.h>
35 #include <net/icmp.h>
36 #include <net/protocol.h>
37 #include <net/ipip.h>
38 #include <net/arp.h>
39 #include <net/checksum.h>
40 #include <net/dsfield.h>
41 #include <net/inet_ecn.h>
42 #include <net/xfrm.h>
43 #include <net/net_namespace.h>
44 #include <net/netns/generic.h>
45 #include <net/rtnetlink.h>
47 #ifdef CONFIG_IPV6
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #endif
54 Problems & solutions
55 --------------------
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: t->recursion lock breaks dead loops. It looks
70 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
119 Alexey Kuznetsov.
122 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
123 static int ipgre_tunnel_init(struct net_device *dev);
124 static void ipgre_tunnel_setup(struct net_device *dev);
125 static int ipgre_tunnel_bind_dev(struct net_device *dev);
127 /* Fallback tunnel: no source, no destination, no key, no options */
129 #define HASH_SIZE 16
131 static int ipgre_net_id;
132 struct ipgre_net {
133 struct ip_tunnel *tunnels[4][HASH_SIZE];
135 struct net_device *fb_tunnel_dev;
138 /* Tunnel hash table */
141 4 hash tables:
143 3: (remote,local)
144 2: (remote,*)
145 1: (*,local)
146 0: (*,*)
148 We require exact key match i.e. if a key is present in packet
149 it will match only tunnel with the same key; if it is not present,
150 it will match only keyless tunnel.
152 All keysless packets, if not matched configured keyless tunnels
153 will match fallback tunnel.
156 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
158 #define tunnels_r_l tunnels[3]
159 #define tunnels_r tunnels[2]
160 #define tunnels_l tunnels[1]
161 #define tunnels_wc tunnels[0]
163 static DEFINE_RWLOCK(ipgre_lock);
165 /* Given src, dst and key, find appropriate for input tunnel. */
167 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto)
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
174 struct ip_tunnel *t2 = NULL;
175 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177 ARPHRD_ETHER : ARPHRD_IPGRE;
179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
182 if (t->dev->type == dev_type)
183 return t;
184 if (t->dev->type == ARPHRD_IPGRE && !t2)
185 t2 = t;
190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
191 if (remote == t->parms.iph.daddr) {
192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
193 if (t->dev->type == dev_type)
194 return t;
195 if (t->dev->type == ARPHRD_IPGRE && !t2)
196 t2 = t;
201 for (t = ign->tunnels_l[h1]; t; t = t->next) {
202 if (local == t->parms.iph.saddr ||
203 (local == t->parms.iph.daddr &&
204 ipv4_is_multicast(local))) {
205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
206 if (t->dev->type == dev_type)
207 return t;
208 if (t->dev->type == ARPHRD_IPGRE && !t2)
209 t2 = t;
214 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
216 if (t->dev->type == dev_type)
217 return t;
218 if (t->dev->type == ARPHRD_IPGRE && !t2)
219 t2 = t;
223 if (t2)
224 return t2;
226 if (ign->fb_tunnel_dev->flags&IFF_UP)
227 return netdev_priv(ign->fb_tunnel_dev);
228 return NULL;
231 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
232 struct ip_tunnel_parm *parms)
234 __be32 remote = parms->iph.daddr;
235 __be32 local = parms->iph.saddr;
236 __be32 key = parms->i_key;
237 unsigned h = HASH(key);
238 int prio = 0;
240 if (local)
241 prio |= 1;
242 if (remote && !ipv4_is_multicast(remote)) {
243 prio |= 2;
244 h ^= HASH(remote);
247 return &ign->tunnels[prio][h];
250 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
251 struct ip_tunnel *t)
253 return __ipgre_bucket(ign, &t->parms);
256 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
258 struct ip_tunnel **tp = ipgre_bucket(ign, t);
260 t->next = *tp;
261 write_lock_bh(&ipgre_lock);
262 *tp = t;
263 write_unlock_bh(&ipgre_lock);
266 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
268 struct ip_tunnel **tp;
270 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
271 if (t == *tp) {
272 write_lock_bh(&ipgre_lock);
273 *tp = t->next;
274 write_unlock_bh(&ipgre_lock);
275 break;
280 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
281 struct ip_tunnel_parm *parms,
282 int type)
284 __be32 remote = parms->iph.daddr;
285 __be32 local = parms->iph.saddr;
286 __be32 key = parms->i_key;
287 struct ip_tunnel *t, **tp;
288 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
290 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
291 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr &&
293 key == t->parms.i_key &&
294 type == t->dev->type)
295 break;
297 return t;
300 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
301 struct ip_tunnel_parm *parms, int create)
303 struct ip_tunnel *t, *nt;
304 struct net_device *dev;
305 char name[IFNAMSIZ];
306 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
308 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
309 if (t || !create)
310 return t;
312 if (parms->name[0])
313 strlcpy(name, parms->name, IFNAMSIZ);
314 else
315 sprintf(name, "gre%%d");
317 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
318 if (!dev)
319 return NULL;
321 dev_net_set(dev, net);
323 if (strchr(name, '%')) {
324 if (dev_alloc_name(dev, name) < 0)
325 goto failed_free;
328 nt = netdev_priv(dev);
329 nt->parms = *parms;
330 dev->rtnl_link_ops = &ipgre_link_ops;
332 dev->mtu = ipgre_tunnel_bind_dev(dev);
334 if (register_netdevice(dev) < 0)
335 goto failed_free;
337 dev_hold(dev);
338 ipgre_tunnel_link(ign, nt);
339 return nt;
341 failed_free:
342 free_netdev(dev);
343 return NULL;
346 static void ipgre_tunnel_uninit(struct net_device *dev)
348 struct net *net = dev_net(dev);
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 ipgre_tunnel_unlink(ign, netdev_priv(dev));
352 dev_put(dev);
356 static void ipgre_err(struct sk_buff *skb, u32 info)
359 /* All the routers (except for Linux) return only
360 8 bytes of packet payload. It means, that precise relaying of
361 ICMP in the real Internet is absolutely infeasible.
363 Moreover, Cisco "wise men" put GRE key to the third word
364 in GRE header. It makes impossible maintaining even soft state for keyed
365 GRE tunnels with enabled checksum. Tell them "thank you".
367 Well, I wonder, rfc1812 was written by Cisco employee,
368 what the hell these idiots break standrads established
369 by themself???
372 struct iphdr *iph = (struct iphdr *)skb->data;
373 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
374 int grehlen = (iph->ihl<<2) + 4;
375 const int type = icmp_hdr(skb)->type;
376 const int code = icmp_hdr(skb)->code;
377 struct ip_tunnel *t;
378 __be16 flags;
380 flags = p[0];
381 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
382 if (flags&(GRE_VERSION|GRE_ROUTING))
383 return;
384 if (flags&GRE_KEY) {
385 grehlen += 4;
386 if (flags&GRE_CSUM)
387 grehlen += 4;
391 /* If only 8 bytes returned, keyed message will be dropped here */
392 if (skb_headlen(skb) < grehlen)
393 return;
395 switch (type) {
396 default:
397 case ICMP_PARAMETERPROB:
398 return;
400 case ICMP_DEST_UNREACH:
401 switch (code) {
402 case ICMP_SR_FAILED:
403 case ICMP_PORT_UNREACH:
404 /* Impossible event. */
405 return;
406 case ICMP_FRAG_NEEDED:
407 /* Soft state for pmtu is maintained by IP core. */
408 return;
409 default:
410 /* All others are translated to HOST_UNREACH.
411 rfc2003 contains "deep thoughts" about NET_UNREACH,
412 I believe they are just ether pollution. --ANK
414 break;
416 break;
417 case ICMP_TIME_EXCEEDED:
418 if (code != ICMP_EXC_TTL)
419 return;
420 break;
423 read_lock(&ipgre_lock);
424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
425 flags & GRE_KEY ?
426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427 p[1]);
428 if (t == NULL || t->parms.iph.daddr == 0 ||
429 ipv4_is_multicast(t->parms.iph.daddr))
430 goto out;
432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
433 goto out;
435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
436 t->err_count++;
437 else
438 t->err_count = 1;
439 t->err_time = jiffies;
440 out:
441 read_unlock(&ipgre_lock);
442 return;
445 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
447 if (INET_ECN_is_ce(iph->tos)) {
448 if (skb->protocol == htons(ETH_P_IP)) {
449 IP_ECN_set_ce(ip_hdr(skb));
450 } else if (skb->protocol == htons(ETH_P_IPV6)) {
451 IP6_ECN_set_ce(ipv6_hdr(skb));
456 static inline u8
457 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
459 u8 inner = 0;
460 if (skb->protocol == htons(ETH_P_IP))
461 inner = old_iph->tos;
462 else if (skb->protocol == htons(ETH_P_IPV6))
463 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
464 return INET_ECN_encapsulate(tos, inner);
467 static int ipgre_rcv(struct sk_buff *skb)
469 struct iphdr *iph;
470 u8 *h;
471 __be16 flags;
472 __sum16 csum = 0;
473 __be32 key = 0;
474 u32 seqno = 0;
475 struct ip_tunnel *tunnel;
476 int offset = 4;
477 __be16 gre_proto;
478 unsigned int len;
480 if (!pskb_may_pull(skb, 16))
481 goto drop_nolock;
483 iph = ip_hdr(skb);
484 h = skb->data;
485 flags = *(__be16*)h;
487 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
488 /* - Version must be 0.
489 - We do not support routing headers.
491 if (flags&(GRE_VERSION|GRE_ROUTING))
492 goto drop_nolock;
494 if (flags&GRE_CSUM) {
495 switch (skb->ip_summed) {
496 case CHECKSUM_COMPLETE:
497 csum = csum_fold(skb->csum);
498 if (!csum)
499 break;
500 /* fall through */
501 case CHECKSUM_NONE:
502 skb->csum = 0;
503 csum = __skb_checksum_complete(skb);
504 skb->ip_summed = CHECKSUM_COMPLETE;
506 offset += 4;
508 if (flags&GRE_KEY) {
509 key = *(__be32*)(h + offset);
510 offset += 4;
512 if (flags&GRE_SEQ) {
513 seqno = ntohl(*(__be32*)(h + offset));
514 offset += 4;
518 gre_proto = *(__be16 *)(h + 2);
520 read_lock(&ipgre_lock);
521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
522 iph->saddr, iph->daddr, key,
523 gre_proto))) {
524 struct net_device_stats *stats = &tunnel->dev->stats;
526 secpath_reset(skb);
528 skb->protocol = gre_proto;
529 /* WCCP version 1 and 2 protocol decoding.
530 * - Change protocol to IP
531 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
533 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
534 skb->protocol = htons(ETH_P_IP);
535 if ((*(h + offset) & 0xF0) != 0x40)
536 offset += 4;
539 skb->mac_header = skb->network_header;
540 __pskb_pull(skb, offset);
541 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
542 skb->pkt_type = PACKET_HOST;
543 #ifdef CONFIG_NET_IPGRE_BROADCAST
544 if (ipv4_is_multicast(iph->daddr)) {
545 /* Looped back packet, drop it! */
546 if (skb->rtable->fl.iif == 0)
547 goto drop;
548 stats->multicast++;
549 skb->pkt_type = PACKET_BROADCAST;
551 #endif
553 if (((flags&GRE_CSUM) && csum) ||
554 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
555 stats->rx_crc_errors++;
556 stats->rx_errors++;
557 goto drop;
559 if (tunnel->parms.i_flags&GRE_SEQ) {
560 if (!(flags&GRE_SEQ) ||
561 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
562 stats->rx_fifo_errors++;
563 stats->rx_errors++;
564 goto drop;
566 tunnel->i_seqno = seqno + 1;
569 len = skb->len;
571 /* Warning: All skb pointers will be invalidated! */
572 if (tunnel->dev->type == ARPHRD_ETHER) {
573 if (!pskb_may_pull(skb, ETH_HLEN)) {
574 stats->rx_length_errors++;
575 stats->rx_errors++;
576 goto drop;
579 iph = ip_hdr(skb);
580 skb->protocol = eth_type_trans(skb, tunnel->dev);
581 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
584 stats->rx_packets++;
585 stats->rx_bytes += len;
586 skb->dev = tunnel->dev;
587 dst_release(skb->dst);
588 skb->dst = NULL;
589 nf_reset(skb);
591 skb_reset_network_header(skb);
592 ipgre_ecn_decapsulate(iph, skb);
594 netif_rx(skb);
595 read_unlock(&ipgre_lock);
596 return(0);
598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
600 drop:
601 read_unlock(&ipgre_lock);
602 drop_nolock:
603 kfree_skb(skb);
604 return(0);
607 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
609 struct ip_tunnel *tunnel = netdev_priv(dev);
610 struct net_device_stats *stats = &tunnel->dev->stats;
611 struct iphdr *old_iph = ip_hdr(skb);
612 struct iphdr *tiph;
613 u8 tos;
614 __be16 df;
615 struct rtable *rt; /* Route to the other host */
616 struct net_device *tdev; /* Device to other host */
617 struct iphdr *iph; /* Our new IP header */
618 unsigned int max_headroom; /* The extra header space needed */
619 int gre_hlen;
620 __be32 dst;
621 int mtu;
623 if (tunnel->recursion++) {
624 stats->collisions++;
625 goto tx_error;
628 if (dev->type == ARPHRD_ETHER)
629 IPCB(skb)->flags = 0;
631 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
632 gre_hlen = 0;
633 tiph = (struct iphdr *)skb->data;
634 } else {
635 gre_hlen = tunnel->hlen;
636 tiph = &tunnel->parms.iph;
639 if ((dst = tiph->daddr) == 0) {
640 /* NBMA tunnel */
642 if (skb->dst == NULL) {
643 stats->tx_fifo_errors++;
644 goto tx_error;
647 if (skb->protocol == htons(ETH_P_IP)) {
648 rt = skb->rtable;
649 if ((dst = rt->rt_gateway) == 0)
650 goto tx_error_icmp;
652 #ifdef CONFIG_IPV6
653 else if (skb->protocol == htons(ETH_P_IPV6)) {
654 struct in6_addr *addr6;
655 int addr_type;
656 struct neighbour *neigh = skb->dst->neighbour;
658 if (neigh == NULL)
659 goto tx_error;
661 addr6 = (struct in6_addr *)&neigh->primary_key;
662 addr_type = ipv6_addr_type(addr6);
664 if (addr_type == IPV6_ADDR_ANY) {
665 addr6 = &ipv6_hdr(skb)->daddr;
666 addr_type = ipv6_addr_type(addr6);
669 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
670 goto tx_error_icmp;
672 dst = addr6->s6_addr32[3];
674 #endif
675 else
676 goto tx_error;
679 tos = tiph->tos;
680 if (tos&1) {
681 if (skb->protocol == htons(ETH_P_IP))
682 tos = old_iph->tos;
683 tos &= ~1;
687 struct flowi fl = { .oif = tunnel->parms.link,
688 .nl_u = { .ip4_u =
689 { .daddr = dst,
690 .saddr = tiph->saddr,
691 .tos = RT_TOS(tos) } },
692 .proto = IPPROTO_GRE };
693 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
694 stats->tx_carrier_errors++;
695 goto tx_error;
698 tdev = rt->u.dst.dev;
700 if (tdev == dev) {
701 ip_rt_put(rt);
702 stats->collisions++;
703 goto tx_error;
706 df = tiph->frag_off;
707 if (df)
708 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
709 else
710 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
712 if (skb->dst)
713 skb->dst->ops->update_pmtu(skb->dst, mtu);
715 if (skb->protocol == htons(ETH_P_IP)) {
716 df |= (old_iph->frag_off&htons(IP_DF));
718 if ((old_iph->frag_off&htons(IP_DF)) &&
719 mtu < ntohs(old_iph->tot_len)) {
720 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
721 ip_rt_put(rt);
722 goto tx_error;
725 #ifdef CONFIG_IPV6
726 else if (skb->protocol == htons(ETH_P_IPV6)) {
727 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
729 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
730 if ((tunnel->parms.iph.daddr &&
731 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
732 rt6->rt6i_dst.plen == 128) {
733 rt6->rt6i_flags |= RTF_MODIFIED;
734 skb->dst->metrics[RTAX_MTU-1] = mtu;
738 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
739 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
740 ip_rt_put(rt);
741 goto tx_error;
744 #endif
746 if (tunnel->err_count > 0) {
747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
748 tunnel->err_count--;
750 dst_link_failure(skb);
751 } else
752 tunnel->err_count = 0;
755 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
757 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
758 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
759 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
760 if (!new_skb) {
761 ip_rt_put(rt);
762 stats->tx_dropped++;
763 dev_kfree_skb(skb);
764 tunnel->recursion--;
765 return 0;
767 if (skb->sk)
768 skb_set_owner_w(new_skb, skb->sk);
769 dev_kfree_skb(skb);
770 skb = new_skb;
771 old_iph = ip_hdr(skb);
774 skb_reset_transport_header(skb);
775 skb_push(skb, gre_hlen);
776 skb_reset_network_header(skb);
777 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
778 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
779 IPSKB_REROUTED);
780 dst_release(skb->dst);
781 skb->dst = &rt->u.dst;
784 * Push down and install the IPIP header.
787 iph = ip_hdr(skb);
788 iph->version = 4;
789 iph->ihl = sizeof(struct iphdr) >> 2;
790 iph->frag_off = df;
791 iph->protocol = IPPROTO_GRE;
792 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
793 iph->daddr = rt->rt_dst;
794 iph->saddr = rt->rt_src;
796 if ((iph->ttl = tiph->ttl) == 0) {
797 if (skb->protocol == htons(ETH_P_IP))
798 iph->ttl = old_iph->ttl;
799 #ifdef CONFIG_IPV6
800 else if (skb->protocol == htons(ETH_P_IPV6))
801 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
802 #endif
803 else
804 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
807 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
808 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
809 htons(ETH_P_TEB) : skb->protocol;
811 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
812 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
814 if (tunnel->parms.o_flags&GRE_SEQ) {
815 ++tunnel->o_seqno;
816 *ptr = htonl(tunnel->o_seqno);
817 ptr--;
819 if (tunnel->parms.o_flags&GRE_KEY) {
820 *ptr = tunnel->parms.o_key;
821 ptr--;
823 if (tunnel->parms.o_flags&GRE_CSUM) {
824 *ptr = 0;
825 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
829 nf_reset(skb);
831 IPTUNNEL_XMIT();
832 tunnel->recursion--;
833 return 0;
835 tx_error_icmp:
836 dst_link_failure(skb);
838 tx_error:
839 stats->tx_errors++;
840 dev_kfree_skb(skb);
841 tunnel->recursion--;
842 return 0;
845 static int ipgre_tunnel_bind_dev(struct net_device *dev)
847 struct net_device *tdev = NULL;
848 struct ip_tunnel *tunnel;
849 struct iphdr *iph;
850 int hlen = LL_MAX_HEADER;
851 int mtu = ETH_DATA_LEN;
852 int addend = sizeof(struct iphdr) + 4;
854 tunnel = netdev_priv(dev);
855 iph = &tunnel->parms.iph;
857 /* Guess output device to choose reasonable mtu and needed_headroom */
859 if (iph->daddr) {
860 struct flowi fl = { .oif = tunnel->parms.link,
861 .nl_u = { .ip4_u =
862 { .daddr = iph->daddr,
863 .saddr = iph->saddr,
864 .tos = RT_TOS(iph->tos) } },
865 .proto = IPPROTO_GRE };
866 struct rtable *rt;
867 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
868 tdev = rt->u.dst.dev;
869 ip_rt_put(rt);
872 if (dev->type != ARPHRD_ETHER)
873 dev->flags |= IFF_POINTOPOINT;
876 if (!tdev && tunnel->parms.link)
877 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
879 if (tdev) {
880 hlen = tdev->hard_header_len + tdev->needed_headroom;
881 mtu = tdev->mtu;
883 dev->iflink = tunnel->parms.link;
885 /* Precalculate GRE options length */
886 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
887 if (tunnel->parms.o_flags&GRE_CSUM)
888 addend += 4;
889 if (tunnel->parms.o_flags&GRE_KEY)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_SEQ)
892 addend += 4;
894 dev->needed_headroom = addend + hlen;
895 mtu -= dev->hard_header_len - addend;
897 if (mtu < 68)
898 mtu = 68;
900 tunnel->hlen = addend;
902 return mtu;
905 static int
906 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
908 int err = 0;
909 struct ip_tunnel_parm p;
910 struct ip_tunnel *t;
911 struct net *net = dev_net(dev);
912 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
914 switch (cmd) {
915 case SIOCGETTUNNEL:
916 t = NULL;
917 if (dev == ign->fb_tunnel_dev) {
918 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
919 err = -EFAULT;
920 break;
922 t = ipgre_tunnel_locate(net, &p, 0);
924 if (t == NULL)
925 t = netdev_priv(dev);
926 memcpy(&p, &t->parms, sizeof(p));
927 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
928 err = -EFAULT;
929 break;
931 case SIOCADDTUNNEL:
932 case SIOCCHGTUNNEL:
933 err = -EPERM;
934 if (!capable(CAP_NET_ADMIN))
935 goto done;
937 err = -EFAULT;
938 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
939 goto done;
941 err = -EINVAL;
942 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
943 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
944 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
945 goto done;
946 if (p.iph.ttl)
947 p.iph.frag_off |= htons(IP_DF);
949 if (!(p.i_flags&GRE_KEY))
950 p.i_key = 0;
951 if (!(p.o_flags&GRE_KEY))
952 p.o_key = 0;
954 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
956 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
957 if (t != NULL) {
958 if (t->dev != dev) {
959 err = -EEXIST;
960 break;
962 } else {
963 unsigned nflags = 0;
965 t = netdev_priv(dev);
967 if (ipv4_is_multicast(p.iph.daddr))
968 nflags = IFF_BROADCAST;
969 else if (p.iph.daddr)
970 nflags = IFF_POINTOPOINT;
972 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
973 err = -EINVAL;
974 break;
976 ipgre_tunnel_unlink(ign, t);
977 t->parms.iph.saddr = p.iph.saddr;
978 t->parms.iph.daddr = p.iph.daddr;
979 t->parms.i_key = p.i_key;
980 t->parms.o_key = p.o_key;
981 memcpy(dev->dev_addr, &p.iph.saddr, 4);
982 memcpy(dev->broadcast, &p.iph.daddr, 4);
983 ipgre_tunnel_link(ign, t);
984 netdev_state_change(dev);
988 if (t) {
989 err = 0;
990 if (cmd == SIOCCHGTUNNEL) {
991 t->parms.iph.ttl = p.iph.ttl;
992 t->parms.iph.tos = p.iph.tos;
993 t->parms.iph.frag_off = p.iph.frag_off;
994 if (t->parms.link != p.link) {
995 t->parms.link = p.link;
996 dev->mtu = ipgre_tunnel_bind_dev(dev);
997 netdev_state_change(dev);
1000 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1001 err = -EFAULT;
1002 } else
1003 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1004 break;
1006 case SIOCDELTUNNEL:
1007 err = -EPERM;
1008 if (!capable(CAP_NET_ADMIN))
1009 goto done;
1011 if (dev == ign->fb_tunnel_dev) {
1012 err = -EFAULT;
1013 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1014 goto done;
1015 err = -ENOENT;
1016 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1017 goto done;
1018 err = -EPERM;
1019 if (t == netdev_priv(ign->fb_tunnel_dev))
1020 goto done;
1021 dev = t->dev;
1023 unregister_netdevice(dev);
1024 err = 0;
1025 break;
1027 default:
1028 err = -EINVAL;
1031 done:
1032 return err;
1035 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1037 struct ip_tunnel *tunnel = netdev_priv(dev);
1038 if (new_mtu < 68 ||
1039 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1040 return -EINVAL;
1041 dev->mtu = new_mtu;
1042 return 0;
1045 /* Nice toy. Unfortunately, useless in real life :-)
1046 It allows to construct virtual multiprotocol broadcast "LAN"
1047 over the Internet, provided multicast routing is tuned.
1050 I have no idea was this bicycle invented before me,
1051 so that I had to set ARPHRD_IPGRE to a random value.
1052 I have an impression, that Cisco could make something similar,
1053 but this feature is apparently missing in IOS<=11.2(8).
1055 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1056 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1058 ping -t 255 224.66.66.66
1060 If nobody answers, mbone does not work.
1062 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1063 ip addr add 10.66.66.<somewhat>/24 dev Universe
1064 ifconfig Universe up
1065 ifconfig Universe add fe80::<Your_real_addr>/10
1066 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1067 ftp 10.66.66.66
1069 ftp fec0:6666:6666::193.233.7.65
1074 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1075 unsigned short type,
1076 const void *daddr, const void *saddr, unsigned len)
1078 struct ip_tunnel *t = netdev_priv(dev);
1079 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1080 __be16 *p = (__be16*)(iph+1);
1082 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1083 p[0] = t->parms.o_flags;
1084 p[1] = htons(type);
1087 * Set the source hardware address.
1090 if (saddr)
1091 memcpy(&iph->saddr, saddr, 4);
1093 if (daddr) {
1094 memcpy(&iph->daddr, daddr, 4);
1095 return t->hlen;
1097 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1098 return t->hlen;
1100 return -t->hlen;
1103 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1105 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1106 memcpy(haddr, &iph->saddr, 4);
1107 return 4;
1110 static const struct header_ops ipgre_header_ops = {
1111 .create = ipgre_header,
1112 .parse = ipgre_header_parse,
1115 #ifdef CONFIG_NET_IPGRE_BROADCAST
1116 static int ipgre_open(struct net_device *dev)
1118 struct ip_tunnel *t = netdev_priv(dev);
1120 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1121 struct flowi fl = { .oif = t->parms.link,
1122 .nl_u = { .ip4_u =
1123 { .daddr = t->parms.iph.daddr,
1124 .saddr = t->parms.iph.saddr,
1125 .tos = RT_TOS(t->parms.iph.tos) } },
1126 .proto = IPPROTO_GRE };
1127 struct rtable *rt;
1128 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1129 return -EADDRNOTAVAIL;
1130 dev = rt->u.dst.dev;
1131 ip_rt_put(rt);
1132 if (__in_dev_get_rtnl(dev) == NULL)
1133 return -EADDRNOTAVAIL;
1134 t->mlink = dev->ifindex;
1135 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1137 return 0;
1140 static int ipgre_close(struct net_device *dev)
1142 struct ip_tunnel *t = netdev_priv(dev);
1144 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1145 struct in_device *in_dev;
1146 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1147 if (in_dev) {
1148 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1149 in_dev_put(in_dev);
1152 return 0;
1155 #endif
1157 static const struct net_device_ops ipgre_netdev_ops = {
1158 .ndo_init = ipgre_tunnel_init,
1159 .ndo_uninit = ipgre_tunnel_uninit,
1160 #ifdef CONFIG_NET_IPGRE_BROADCAST
1161 .ndo_open = ipgre_open,
1162 .ndo_stop = ipgre_close,
1163 #endif
1164 .ndo_start_xmit = ipgre_tunnel_xmit,
1165 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1166 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1169 static void ipgre_tunnel_setup(struct net_device *dev)
1171 dev->netdev_ops = &ipgre_netdev_ops;
1172 dev->destructor = free_netdev;
1174 dev->type = ARPHRD_IPGRE;
1175 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1176 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1177 dev->flags = IFF_NOARP;
1178 dev->iflink = 0;
1179 dev->addr_len = 4;
1180 dev->features |= NETIF_F_NETNS_LOCAL;
1183 static int ipgre_tunnel_init(struct net_device *dev)
1185 struct ip_tunnel *tunnel;
1186 struct iphdr *iph;
1188 tunnel = netdev_priv(dev);
1189 iph = &tunnel->parms.iph;
1191 tunnel->dev = dev;
1192 strcpy(tunnel->parms.name, dev->name);
1194 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1195 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1197 if (iph->daddr) {
1198 #ifdef CONFIG_NET_IPGRE_BROADCAST
1199 if (ipv4_is_multicast(iph->daddr)) {
1200 if (!iph->saddr)
1201 return -EINVAL;
1202 dev->flags = IFF_BROADCAST;
1203 dev->header_ops = &ipgre_header_ops;
1205 #endif
1206 } else
1207 dev->header_ops = &ipgre_header_ops;
1209 return 0;
1212 static void ipgre_fb_tunnel_init(struct net_device *dev)
1214 struct ip_tunnel *tunnel = netdev_priv(dev);
1215 struct iphdr *iph = &tunnel->parms.iph;
1216 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1218 tunnel->dev = dev;
1219 strcpy(tunnel->parms.name, dev->name);
1221 iph->version = 4;
1222 iph->protocol = IPPROTO_GRE;
1223 iph->ihl = 5;
1224 tunnel->hlen = sizeof(struct iphdr) + 4;
1226 dev_hold(dev);
1227 ign->tunnels_wc[0] = tunnel;
1231 static struct net_protocol ipgre_protocol = {
1232 .handler = ipgre_rcv,
1233 .err_handler = ipgre_err,
1234 .netns_ok = 1,
1237 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1239 int prio;
1241 for (prio = 0; prio < 4; prio++) {
1242 int h;
1243 for (h = 0; h < HASH_SIZE; h++) {
1244 struct ip_tunnel *t;
1245 while ((t = ign->tunnels[prio][h]) != NULL)
1246 unregister_netdevice(t->dev);
1251 static int ipgre_init_net(struct net *net)
1253 int err;
1254 struct ipgre_net *ign;
1256 err = -ENOMEM;
1257 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1258 if (ign == NULL)
1259 goto err_alloc;
1261 err = net_assign_generic(net, ipgre_net_id, ign);
1262 if (err < 0)
1263 goto err_assign;
1265 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1266 ipgre_tunnel_setup);
1267 if (!ign->fb_tunnel_dev) {
1268 err = -ENOMEM;
1269 goto err_alloc_dev;
1271 dev_net_set(ign->fb_tunnel_dev, net);
1273 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1274 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1276 if ((err = register_netdev(ign->fb_tunnel_dev)))
1277 goto err_reg_dev;
1279 return 0;
1281 err_reg_dev:
1282 free_netdev(ign->fb_tunnel_dev);
1283 err_alloc_dev:
1284 /* nothing */
1285 err_assign:
1286 kfree(ign);
1287 err_alloc:
1288 return err;
1291 static void ipgre_exit_net(struct net *net)
1293 struct ipgre_net *ign;
1295 ign = net_generic(net, ipgre_net_id);
1296 rtnl_lock();
1297 ipgre_destroy_tunnels(ign);
1298 rtnl_unlock();
1299 kfree(ign);
1302 static struct pernet_operations ipgre_net_ops = {
1303 .init = ipgre_init_net,
1304 .exit = ipgre_exit_net,
1307 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1309 __be16 flags;
1311 if (!data)
1312 return 0;
1314 flags = 0;
1315 if (data[IFLA_GRE_IFLAGS])
1316 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1317 if (data[IFLA_GRE_OFLAGS])
1318 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1319 if (flags & (GRE_VERSION|GRE_ROUTING))
1320 return -EINVAL;
1322 return 0;
1325 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1327 __be32 daddr;
1329 if (tb[IFLA_ADDRESS]) {
1330 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1331 return -EINVAL;
1332 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1333 return -EADDRNOTAVAIL;
1336 if (!data)
1337 goto out;
1339 if (data[IFLA_GRE_REMOTE]) {
1340 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1341 if (!daddr)
1342 return -EINVAL;
1345 out:
1346 return ipgre_tunnel_validate(tb, data);
1349 static void ipgre_netlink_parms(struct nlattr *data[],
1350 struct ip_tunnel_parm *parms)
1352 memset(parms, 0, sizeof(*parms));
1354 parms->iph.protocol = IPPROTO_GRE;
1356 if (!data)
1357 return;
1359 if (data[IFLA_GRE_LINK])
1360 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1362 if (data[IFLA_GRE_IFLAGS])
1363 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1365 if (data[IFLA_GRE_OFLAGS])
1366 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368 if (data[IFLA_GRE_IKEY])
1369 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1371 if (data[IFLA_GRE_OKEY])
1372 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1374 if (data[IFLA_GRE_LOCAL])
1375 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1377 if (data[IFLA_GRE_REMOTE])
1378 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1380 if (data[IFLA_GRE_TTL])
1381 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1383 if (data[IFLA_GRE_TOS])
1384 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1386 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1387 parms->iph.frag_off = htons(IP_DF);
1390 static int ipgre_tap_init(struct net_device *dev)
1392 struct ip_tunnel *tunnel;
1394 tunnel = netdev_priv(dev);
1396 tunnel->dev = dev;
1397 strcpy(tunnel->parms.name, dev->name);
1399 ipgre_tunnel_bind_dev(dev);
1401 return 0;
1404 static const struct net_device_ops ipgre_tap_netdev_ops = {
1405 .ndo_init = ipgre_tap_init,
1406 .ndo_uninit = ipgre_tunnel_uninit,
1407 .ndo_start_xmit = ipgre_tunnel_xmit,
1408 .ndo_set_mac_address = eth_mac_addr,
1409 .ndo_validate_addr = eth_validate_addr,
1410 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1413 static void ipgre_tap_setup(struct net_device *dev)
1416 ether_setup(dev);
1418 dev->netdev_ops = &ipgre_netdev_ops;
1419 dev->destructor = free_netdev;
1421 dev->iflink = 0;
1422 dev->features |= NETIF_F_NETNS_LOCAL;
1425 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1426 struct nlattr *data[])
1428 struct ip_tunnel *nt;
1429 struct net *net = dev_net(dev);
1430 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1431 int mtu;
1432 int err;
1434 nt = netdev_priv(dev);
1435 ipgre_netlink_parms(data, &nt->parms);
1437 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1438 return -EEXIST;
1440 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1441 random_ether_addr(dev->dev_addr);
1443 mtu = ipgre_tunnel_bind_dev(dev);
1444 if (!tb[IFLA_MTU])
1445 dev->mtu = mtu;
1447 err = register_netdevice(dev);
1448 if (err)
1449 goto out;
1451 dev_hold(dev);
1452 ipgre_tunnel_link(ign, nt);
1454 out:
1455 return err;
1458 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1459 struct nlattr *data[])
1461 struct ip_tunnel *t, *nt;
1462 struct net *net = dev_net(dev);
1463 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1464 struct ip_tunnel_parm p;
1465 int mtu;
1467 if (dev == ign->fb_tunnel_dev)
1468 return -EINVAL;
1470 nt = netdev_priv(dev);
1471 ipgre_netlink_parms(data, &p);
1473 t = ipgre_tunnel_locate(net, &p, 0);
1475 if (t) {
1476 if (t->dev != dev)
1477 return -EEXIST;
1478 } else {
1479 unsigned nflags = 0;
1481 t = nt;
1483 if (ipv4_is_multicast(p.iph.daddr))
1484 nflags = IFF_BROADCAST;
1485 else if (p.iph.daddr)
1486 nflags = IFF_POINTOPOINT;
1488 if ((dev->flags ^ nflags) &
1489 (IFF_POINTOPOINT | IFF_BROADCAST))
1490 return -EINVAL;
1492 ipgre_tunnel_unlink(ign, t);
1493 t->parms.iph.saddr = p.iph.saddr;
1494 t->parms.iph.daddr = p.iph.daddr;
1495 t->parms.i_key = p.i_key;
1496 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1497 memcpy(dev->broadcast, &p.iph.daddr, 4);
1498 ipgre_tunnel_link(ign, t);
1499 netdev_state_change(dev);
1502 t->parms.o_key = p.o_key;
1503 t->parms.iph.ttl = p.iph.ttl;
1504 t->parms.iph.tos = p.iph.tos;
1505 t->parms.iph.frag_off = p.iph.frag_off;
1507 if (t->parms.link != p.link) {
1508 t->parms.link = p.link;
1509 mtu = ipgre_tunnel_bind_dev(dev);
1510 if (!tb[IFLA_MTU])
1511 dev->mtu = mtu;
1512 netdev_state_change(dev);
1515 return 0;
1518 static size_t ipgre_get_size(const struct net_device *dev)
1520 return
1521 /* IFLA_GRE_LINK */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_IFLAGS */
1524 nla_total_size(2) +
1525 /* IFLA_GRE_OFLAGS */
1526 nla_total_size(2) +
1527 /* IFLA_GRE_IKEY */
1528 nla_total_size(4) +
1529 /* IFLA_GRE_OKEY */
1530 nla_total_size(4) +
1531 /* IFLA_GRE_LOCAL */
1532 nla_total_size(4) +
1533 /* IFLA_GRE_REMOTE */
1534 nla_total_size(4) +
1535 /* IFLA_GRE_TTL */
1536 nla_total_size(1) +
1537 /* IFLA_GRE_TOS */
1538 nla_total_size(1) +
1539 /* IFLA_GRE_PMTUDISC */
1540 nla_total_size(1) +
1544 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1546 struct ip_tunnel *t = netdev_priv(dev);
1547 struct ip_tunnel_parm *p = &t->parms;
1549 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1550 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1551 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1552 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1553 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1554 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1555 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1556 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1557 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1558 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1560 return 0;
1562 nla_put_failure:
1563 return -EMSGSIZE;
1566 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1567 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1568 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1569 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1570 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1571 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1572 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1573 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1574 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1575 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1576 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1579 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1580 .kind = "gre",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tunnel_setup,
1585 .validate = ipgre_tunnel_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1592 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1593 .kind = "gretap",
1594 .maxtype = IFLA_GRE_MAX,
1595 .policy = ipgre_policy,
1596 .priv_size = sizeof(struct ip_tunnel),
1597 .setup = ipgre_tap_setup,
1598 .validate = ipgre_tap_validate,
1599 .newlink = ipgre_newlink,
1600 .changelink = ipgre_changelink,
1601 .get_size = ipgre_get_size,
1602 .fill_info = ipgre_fill_info,
1606 * And now the modules code and kernel interface.
1609 static int __init ipgre_init(void)
1611 int err;
1613 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1615 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1616 printk(KERN_INFO "ipgre init: can't add protocol\n");
1617 return -EAGAIN;
1620 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1621 if (err < 0)
1622 goto gen_device_failed;
1624 err = rtnl_link_register(&ipgre_link_ops);
1625 if (err < 0)
1626 goto rtnl_link_failed;
1628 err = rtnl_link_register(&ipgre_tap_ops);
1629 if (err < 0)
1630 goto tap_ops_failed;
1632 out:
1633 return err;
1635 tap_ops_failed:
1636 rtnl_link_unregister(&ipgre_link_ops);
1637 rtnl_link_failed:
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639 gen_device_failed:
1640 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1641 goto out;
1644 static void __exit ipgre_fini(void)
1646 rtnl_link_unregister(&ipgre_tap_ops);
1647 rtnl_link_unregister(&ipgre_link_ops);
1648 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1649 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1650 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1653 module_init(ipgre_init);
1654 module_exit(ipgre_fini);
1655 MODULE_LICENSE("GPL");
1656 MODULE_ALIAS_RTNL_LINK("gre");
1657 MODULE_ALIAS_RTNL_LINK("gretap");