gre: Add netlink interface
[firewire-audio.git] / net / ipv4 / ip_gre.c
blob25d2c77a7f38d1f596a11c9f51f4eaabf0938467
1 /*
2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #include <linux/capability.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <asm/uaccess.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/tcp.h>
22 #include <linux/udp.h>
23 #include <linux/if_arp.h>
24 #include <linux/mroute.h>
25 #include <linux/init.h>
26 #include <linux/in6.h>
27 #include <linux/inetdevice.h>
28 #include <linux/igmp.h>
29 #include <linux/netfilter_ipv4.h>
30 #include <linux/if_ether.h>
32 #include <net/sock.h>
33 #include <net/ip.h>
34 #include <net/icmp.h>
35 #include <net/protocol.h>
36 #include <net/ipip.h>
37 #include <net/arp.h>
38 #include <net/checksum.h>
39 #include <net/dsfield.h>
40 #include <net/inet_ecn.h>
41 #include <net/xfrm.h>
42 #include <net/net_namespace.h>
43 #include <net/netns/generic.h>
44 #include <net/rtnetlink.h>
46 #ifdef CONFIG_IPV6
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #endif
53 Problems & solutions
54 --------------------
56 1. The most important issue is detecting local dead loops.
57 They would cause complete host lockup in transmit, which
58 would be "resolved" by stack overflow or, if queueing is enabled,
59 with infinite looping in net_bh.
61 We cannot track such dead loops during route installation,
62 it is infeasible task. The most general solutions would be
63 to keep skb->encapsulation counter (sort of local ttl),
64 and silently drop packet when it expires. It is the best
65 solution, but it supposes maintaing new variable in ALL
66 skb, even if no tunneling is used.
68 Current solution: t->recursion lock breaks dead loops. It looks
69 like dev->tbusy flag, but I preferred new variable, because
70 the semantics is different. One day, when hard_start_xmit
71 will be multithreaded we will have to use skb->encapsulation.
75 2. Networking dead loops would not kill routers, but would really
76 kill network. IP hop limit plays role of "t->recursion" in this case,
77 if we copy it from packet being encapsulated to upper header.
78 It is very good solution, but it introduces two problems:
80 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
81 do not work over tunnels.
82 - traceroute does not work. I planned to relay ICMP from tunnel,
83 so that this problem would be solved and traceroute output
84 would even more informative. This idea appeared to be wrong:
85 only Linux complies to rfc1812 now (yes, guys, Linux is the only
86 true router now :-)), all routers (at least, in neighbourhood of mine)
87 return only 8 bytes of payload. It is the end.
89 Hence, if we want that OSPF worked or traceroute said something reasonable,
90 we should search for another solution.
92 One of them is to parse packet trying to detect inner encapsulation
93 made by our node. It is difficult or even impossible, especially,
94 taking into account fragmentation. TO be short, tt is not solution at all.
96 Current solution: The solution was UNEXPECTEDLY SIMPLE.
97 We force DF flag on tunnels with preconfigured hop limit,
98 that is ALL. :-) Well, it does not remove the problem completely,
99 but exponential growth of network traffic is changed to linear
100 (branches, that exceed pmtu are pruned) and tunnel mtu
101 fastly degrades to value <68, where looping stops.
102 Yes, it is not good if there exists a router in the loop,
103 which does not force DF, even when encapsulating packets have DF set.
104 But it is not our problem! Nobody could accuse us, we made
105 all that we could make. Even if it is your gated who injected
106 fatal route to network, even if it were you who configured
107 fatal static route: you are innocent. :-)
111 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
112 practically identical code. It would be good to glue them
113 together, but it is not very evident, how to make them modular.
114 sit is integral part of IPv6, ipip and gre are naturally modular.
115 We could extract common parts (hash table, ioctl etc)
116 to a separate module (ip_tunnel.c).
118 Alexey Kuznetsov.
121 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
122 static int ipgre_tunnel_init(struct net_device *dev);
123 static void ipgre_tunnel_setup(struct net_device *dev);
124 static int ipgre_tunnel_bind_dev(struct net_device *dev);
126 /* Fallback tunnel: no source, no destination, no key, no options */
128 static int ipgre_fb_tunnel_init(struct net_device *dev);
130 #define HASH_SIZE 16
132 static int ipgre_net_id;
133 struct ipgre_net {
134 struct ip_tunnel *tunnels[4][HASH_SIZE];
136 struct net_device *fb_tunnel_dev;
139 /* Tunnel hash table */
142 4 hash tables:
144 3: (remote,local)
145 2: (remote,*)
146 1: (*,local)
147 0: (*,*)
149 We require exact key match i.e. if a key is present in packet
150 it will match only tunnel with the same key; if it is not present,
151 it will match only keyless tunnel.
153 All keysless packets, if not matched configured keyless tunnels
154 will match fallback tunnel.
157 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
159 #define tunnels_r_l tunnels[3]
160 #define tunnels_r tunnels[2]
161 #define tunnels_l tunnels[1]
162 #define tunnels_wc tunnels[0]
164 static DEFINE_RWLOCK(ipgre_lock);
166 /* Given src, dst and key, find appropriate for input tunnel. */
168 static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
169 __be32 remote, __be32 local, __be32 key)
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
174 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
176 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
177 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
178 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
179 return t;
182 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
183 if (remote == t->parms.iph.daddr) {
184 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
185 return t;
188 for (t = ign->tunnels_l[h1]; t; t = t->next) {
189 if (local == t->parms.iph.saddr ||
190 (local == t->parms.iph.daddr &&
191 ipv4_is_multicast(local))) {
192 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
193 return t;
196 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
197 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
198 return t;
201 if (ign->fb_tunnel_dev->flags&IFF_UP)
202 return netdev_priv(ign->fb_tunnel_dev);
203 return NULL;
206 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
207 struct ip_tunnel_parm *parms)
209 __be32 remote = parms->iph.daddr;
210 __be32 local = parms->iph.saddr;
211 __be32 key = parms->i_key;
212 unsigned h = HASH(key);
213 int prio = 0;
215 if (local)
216 prio |= 1;
217 if (remote && !ipv4_is_multicast(remote)) {
218 prio |= 2;
219 h ^= HASH(remote);
222 return &ign->tunnels[prio][h];
225 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
226 struct ip_tunnel *t)
228 return __ipgre_bucket(ign, &t->parms);
231 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
233 struct ip_tunnel **tp = ipgre_bucket(ign, t);
235 t->next = *tp;
236 write_lock_bh(&ipgre_lock);
237 *tp = t;
238 write_unlock_bh(&ipgre_lock);
241 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
243 struct ip_tunnel **tp;
245 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
246 if (t == *tp) {
247 write_lock_bh(&ipgre_lock);
248 *tp = t->next;
249 write_unlock_bh(&ipgre_lock);
250 break;
255 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
256 struct ip_tunnel_parm *parms, int create)
258 __be32 remote = parms->iph.daddr;
259 __be32 local = parms->iph.saddr;
260 __be32 key = parms->i_key;
261 struct ip_tunnel *t, **tp, *nt;
262 struct net_device *dev;
263 char name[IFNAMSIZ];
264 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
266 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
267 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
268 if (key == t->parms.i_key)
269 return t;
272 if (!create)
273 return NULL;
275 if (parms->name[0])
276 strlcpy(name, parms->name, IFNAMSIZ);
277 else
278 sprintf(name, "gre%%d");
280 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
281 if (!dev)
282 return NULL;
284 dev_net_set(dev, net);
286 if (strchr(name, '%')) {
287 if (dev_alloc_name(dev, name) < 0)
288 goto failed_free;
291 nt = netdev_priv(dev);
292 nt->parms = *parms;
293 dev->rtnl_link_ops = &ipgre_link_ops;
295 dev->mtu = ipgre_tunnel_bind_dev(dev);
297 if (register_netdevice(dev) < 0)
298 goto failed_free;
300 dev_hold(dev);
301 ipgre_tunnel_link(ign, nt);
302 return nt;
304 failed_free:
305 free_netdev(dev);
306 return NULL;
309 static void ipgre_tunnel_uninit(struct net_device *dev)
311 struct net *net = dev_net(dev);
312 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
314 ipgre_tunnel_unlink(ign, netdev_priv(dev));
315 dev_put(dev);
319 static void ipgre_err(struct sk_buff *skb, u32 info)
322 /* All the routers (except for Linux) return only
323 8 bytes of packet payload. It means, that precise relaying of
324 ICMP in the real Internet is absolutely infeasible.
326 Moreover, Cisco "wise men" put GRE key to the third word
327 in GRE header. It makes impossible maintaining even soft state for keyed
328 GRE tunnels with enabled checksum. Tell them "thank you".
330 Well, I wonder, rfc1812 was written by Cisco employee,
331 what the hell these idiots break standrads established
332 by themself???
335 struct iphdr *iph = (struct iphdr*)skb->data;
336 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
337 int grehlen = (iph->ihl<<2) + 4;
338 const int type = icmp_hdr(skb)->type;
339 const int code = icmp_hdr(skb)->code;
340 struct ip_tunnel *t;
341 __be16 flags;
343 flags = p[0];
344 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
345 if (flags&(GRE_VERSION|GRE_ROUTING))
346 return;
347 if (flags&GRE_KEY) {
348 grehlen += 4;
349 if (flags&GRE_CSUM)
350 grehlen += 4;
354 /* If only 8 bytes returned, keyed message will be dropped here */
355 if (skb_headlen(skb) < grehlen)
356 return;
358 switch (type) {
359 default:
360 case ICMP_PARAMETERPROB:
361 return;
363 case ICMP_DEST_UNREACH:
364 switch (code) {
365 case ICMP_SR_FAILED:
366 case ICMP_PORT_UNREACH:
367 /* Impossible event. */
368 return;
369 case ICMP_FRAG_NEEDED:
370 /* Soft state for pmtu is maintained by IP core. */
371 return;
372 default:
373 /* All others are translated to HOST_UNREACH.
374 rfc2003 contains "deep thoughts" about NET_UNREACH,
375 I believe they are just ether pollution. --ANK
377 break;
379 break;
380 case ICMP_TIME_EXCEEDED:
381 if (code != ICMP_EXC_TTL)
382 return;
383 break;
386 read_lock(&ipgre_lock);
387 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
388 (flags&GRE_KEY) ?
389 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
390 if (t == NULL || t->parms.iph.daddr == 0 ||
391 ipv4_is_multicast(t->parms.iph.daddr))
392 goto out;
394 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395 goto out;
397 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
398 t->err_count++;
399 else
400 t->err_count = 1;
401 t->err_time = jiffies;
402 out:
403 read_unlock(&ipgre_lock);
404 return;
407 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
409 if (INET_ECN_is_ce(iph->tos)) {
410 if (skb->protocol == htons(ETH_P_IP)) {
411 IP_ECN_set_ce(ip_hdr(skb));
412 } else if (skb->protocol == htons(ETH_P_IPV6)) {
413 IP6_ECN_set_ce(ipv6_hdr(skb));
418 static inline u8
419 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
421 u8 inner = 0;
422 if (skb->protocol == htons(ETH_P_IP))
423 inner = old_iph->tos;
424 else if (skb->protocol == htons(ETH_P_IPV6))
425 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
426 return INET_ECN_encapsulate(tos, inner);
429 static int ipgre_rcv(struct sk_buff *skb)
431 struct iphdr *iph;
432 u8 *h;
433 __be16 flags;
434 __sum16 csum = 0;
435 __be32 key = 0;
436 u32 seqno = 0;
437 struct ip_tunnel *tunnel;
438 int offset = 4;
440 if (!pskb_may_pull(skb, 16))
441 goto drop_nolock;
443 iph = ip_hdr(skb);
444 h = skb->data;
445 flags = *(__be16*)h;
447 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
448 /* - Version must be 0.
449 - We do not support routing headers.
451 if (flags&(GRE_VERSION|GRE_ROUTING))
452 goto drop_nolock;
454 if (flags&GRE_CSUM) {
455 switch (skb->ip_summed) {
456 case CHECKSUM_COMPLETE:
457 csum = csum_fold(skb->csum);
458 if (!csum)
459 break;
460 /* fall through */
461 case CHECKSUM_NONE:
462 skb->csum = 0;
463 csum = __skb_checksum_complete(skb);
464 skb->ip_summed = CHECKSUM_COMPLETE;
466 offset += 4;
468 if (flags&GRE_KEY) {
469 key = *(__be32*)(h + offset);
470 offset += 4;
472 if (flags&GRE_SEQ) {
473 seqno = ntohl(*(__be32*)(h + offset));
474 offset += 4;
478 read_lock(&ipgre_lock);
479 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
480 iph->saddr, iph->daddr, key)) != NULL) {
481 struct net_device_stats *stats = &tunnel->dev->stats;
483 secpath_reset(skb);
485 skb->protocol = *(__be16*)(h + 2);
486 /* WCCP version 1 and 2 protocol decoding.
487 * - Change protocol to IP
488 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
490 if (flags == 0 &&
491 skb->protocol == htons(ETH_P_WCCP)) {
492 skb->protocol = htons(ETH_P_IP);
493 if ((*(h + offset) & 0xF0) != 0x40)
494 offset += 4;
497 skb->mac_header = skb->network_header;
498 __pskb_pull(skb, offset);
499 skb_reset_network_header(skb);
500 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
501 skb->pkt_type = PACKET_HOST;
502 #ifdef CONFIG_NET_IPGRE_BROADCAST
503 if (ipv4_is_multicast(iph->daddr)) {
504 /* Looped back packet, drop it! */
505 if (skb->rtable->fl.iif == 0)
506 goto drop;
507 stats->multicast++;
508 skb->pkt_type = PACKET_BROADCAST;
510 #endif
512 if (((flags&GRE_CSUM) && csum) ||
513 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
514 stats->rx_crc_errors++;
515 stats->rx_errors++;
516 goto drop;
518 if (tunnel->parms.i_flags&GRE_SEQ) {
519 if (!(flags&GRE_SEQ) ||
520 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
521 stats->rx_fifo_errors++;
522 stats->rx_errors++;
523 goto drop;
525 tunnel->i_seqno = seqno + 1;
527 stats->rx_packets++;
528 stats->rx_bytes += skb->len;
529 skb->dev = tunnel->dev;
530 dst_release(skb->dst);
531 skb->dst = NULL;
532 nf_reset(skb);
533 ipgre_ecn_decapsulate(iph, skb);
534 netif_rx(skb);
535 read_unlock(&ipgre_lock);
536 return(0);
538 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
540 drop:
541 read_unlock(&ipgre_lock);
542 drop_nolock:
543 kfree_skb(skb);
544 return(0);
547 static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
549 struct ip_tunnel *tunnel = netdev_priv(dev);
550 struct net_device_stats *stats = &tunnel->dev->stats;
551 struct iphdr *old_iph = ip_hdr(skb);
552 struct iphdr *tiph;
553 u8 tos;
554 __be16 df;
555 struct rtable *rt; /* Route to the other host */
556 struct net_device *tdev; /* Device to other host */
557 struct iphdr *iph; /* Our new IP header */
558 unsigned int max_headroom; /* The extra header space needed */
559 int gre_hlen;
560 __be32 dst;
561 int mtu;
563 if (tunnel->recursion++) {
564 stats->collisions++;
565 goto tx_error;
568 if (dev->header_ops) {
569 gre_hlen = 0;
570 tiph = (struct iphdr*)skb->data;
571 } else {
572 gre_hlen = tunnel->hlen;
573 tiph = &tunnel->parms.iph;
576 if ((dst = tiph->daddr) == 0) {
577 /* NBMA tunnel */
579 if (skb->dst == NULL) {
580 stats->tx_fifo_errors++;
581 goto tx_error;
584 if (skb->protocol == htons(ETH_P_IP)) {
585 rt = skb->rtable;
586 if ((dst = rt->rt_gateway) == 0)
587 goto tx_error_icmp;
589 #ifdef CONFIG_IPV6
590 else if (skb->protocol == htons(ETH_P_IPV6)) {
591 struct in6_addr *addr6;
592 int addr_type;
593 struct neighbour *neigh = skb->dst->neighbour;
595 if (neigh == NULL)
596 goto tx_error;
598 addr6 = (struct in6_addr*)&neigh->primary_key;
599 addr_type = ipv6_addr_type(addr6);
601 if (addr_type == IPV6_ADDR_ANY) {
602 addr6 = &ipv6_hdr(skb)->daddr;
603 addr_type = ipv6_addr_type(addr6);
606 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
607 goto tx_error_icmp;
609 dst = addr6->s6_addr32[3];
611 #endif
612 else
613 goto tx_error;
616 tos = tiph->tos;
617 if (tos&1) {
618 if (skb->protocol == htons(ETH_P_IP))
619 tos = old_iph->tos;
620 tos &= ~1;
624 struct flowi fl = { .oif = tunnel->parms.link,
625 .nl_u = { .ip4_u =
626 { .daddr = dst,
627 .saddr = tiph->saddr,
628 .tos = RT_TOS(tos) } },
629 .proto = IPPROTO_GRE };
630 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
631 stats->tx_carrier_errors++;
632 goto tx_error;
635 tdev = rt->u.dst.dev;
637 if (tdev == dev) {
638 ip_rt_put(rt);
639 stats->collisions++;
640 goto tx_error;
643 df = tiph->frag_off;
644 if (df)
645 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
646 else
647 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
649 if (skb->dst)
650 skb->dst->ops->update_pmtu(skb->dst, mtu);
652 if (skb->protocol == htons(ETH_P_IP)) {
653 df |= (old_iph->frag_off&htons(IP_DF));
655 if ((old_iph->frag_off&htons(IP_DF)) &&
656 mtu < ntohs(old_iph->tot_len)) {
657 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
658 ip_rt_put(rt);
659 goto tx_error;
662 #ifdef CONFIG_IPV6
663 else if (skb->protocol == htons(ETH_P_IPV6)) {
664 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
666 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
667 if ((tunnel->parms.iph.daddr &&
668 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
669 rt6->rt6i_dst.plen == 128) {
670 rt6->rt6i_flags |= RTF_MODIFIED;
671 skb->dst->metrics[RTAX_MTU-1] = mtu;
675 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
676 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
677 ip_rt_put(rt);
678 goto tx_error;
681 #endif
683 if (tunnel->err_count > 0) {
684 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
685 tunnel->err_count--;
687 dst_link_failure(skb);
688 } else
689 tunnel->err_count = 0;
692 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
694 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
695 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
696 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
697 if (!new_skb) {
698 ip_rt_put(rt);
699 stats->tx_dropped++;
700 dev_kfree_skb(skb);
701 tunnel->recursion--;
702 return 0;
704 if (skb->sk)
705 skb_set_owner_w(new_skb, skb->sk);
706 dev_kfree_skb(skb);
707 skb = new_skb;
708 old_iph = ip_hdr(skb);
711 skb->transport_header = skb->network_header;
712 skb_push(skb, gre_hlen);
713 skb_reset_network_header(skb);
714 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
715 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
716 IPSKB_REROUTED);
717 dst_release(skb->dst);
718 skb->dst = &rt->u.dst;
721 * Push down and install the IPIP header.
724 iph = ip_hdr(skb);
725 iph->version = 4;
726 iph->ihl = sizeof(struct iphdr) >> 2;
727 iph->frag_off = df;
728 iph->protocol = IPPROTO_GRE;
729 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
730 iph->daddr = rt->rt_dst;
731 iph->saddr = rt->rt_src;
733 if ((iph->ttl = tiph->ttl) == 0) {
734 if (skb->protocol == htons(ETH_P_IP))
735 iph->ttl = old_iph->ttl;
736 #ifdef CONFIG_IPV6
737 else if (skb->protocol == htons(ETH_P_IPV6))
738 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
739 #endif
740 else
741 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
744 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
745 ((__be16*)(iph+1))[1] = skb->protocol;
747 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
748 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
750 if (tunnel->parms.o_flags&GRE_SEQ) {
751 ++tunnel->o_seqno;
752 *ptr = htonl(tunnel->o_seqno);
753 ptr--;
755 if (tunnel->parms.o_flags&GRE_KEY) {
756 *ptr = tunnel->parms.o_key;
757 ptr--;
759 if (tunnel->parms.o_flags&GRE_CSUM) {
760 *ptr = 0;
761 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
765 nf_reset(skb);
767 IPTUNNEL_XMIT();
768 tunnel->recursion--;
769 return 0;
771 tx_error_icmp:
772 dst_link_failure(skb);
774 tx_error:
775 stats->tx_errors++;
776 dev_kfree_skb(skb);
777 tunnel->recursion--;
778 return 0;
781 static int ipgre_tunnel_bind_dev(struct net_device *dev)
783 struct net_device *tdev = NULL;
784 struct ip_tunnel *tunnel;
785 struct iphdr *iph;
786 int hlen = LL_MAX_HEADER;
787 int mtu = ETH_DATA_LEN;
788 int addend = sizeof(struct iphdr) + 4;
790 tunnel = netdev_priv(dev);
791 iph = &tunnel->parms.iph;
793 /* Guess output device to choose reasonable mtu and needed_headroom */
795 if (iph->daddr) {
796 struct flowi fl = { .oif = tunnel->parms.link,
797 .nl_u = { .ip4_u =
798 { .daddr = iph->daddr,
799 .saddr = iph->saddr,
800 .tos = RT_TOS(iph->tos) } },
801 .proto = IPPROTO_GRE };
802 struct rtable *rt;
803 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
804 tdev = rt->u.dst.dev;
805 ip_rt_put(rt);
807 dev->flags |= IFF_POINTOPOINT;
810 if (!tdev && tunnel->parms.link)
811 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
813 if (tdev) {
814 hlen = tdev->hard_header_len + tdev->needed_headroom;
815 mtu = tdev->mtu;
817 dev->iflink = tunnel->parms.link;
819 /* Precalculate GRE options length */
820 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
821 if (tunnel->parms.o_flags&GRE_CSUM)
822 addend += 4;
823 if (tunnel->parms.o_flags&GRE_KEY)
824 addend += 4;
825 if (tunnel->parms.o_flags&GRE_SEQ)
826 addend += 4;
828 dev->needed_headroom = addend + hlen;
829 mtu -= dev->hard_header_len - addend;
831 if (mtu < 68)
832 mtu = 68;
834 tunnel->hlen = addend;
836 return mtu;
839 static int
840 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
842 int err = 0;
843 struct ip_tunnel_parm p;
844 struct ip_tunnel *t;
845 struct net *net = dev_net(dev);
846 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
848 switch (cmd) {
849 case SIOCGETTUNNEL:
850 t = NULL;
851 if (dev == ign->fb_tunnel_dev) {
852 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
853 err = -EFAULT;
854 break;
856 t = ipgre_tunnel_locate(net, &p, 0);
858 if (t == NULL)
859 t = netdev_priv(dev);
860 memcpy(&p, &t->parms, sizeof(p));
861 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
862 err = -EFAULT;
863 break;
865 case SIOCADDTUNNEL:
866 case SIOCCHGTUNNEL:
867 err = -EPERM;
868 if (!capable(CAP_NET_ADMIN))
869 goto done;
871 err = -EFAULT;
872 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
873 goto done;
875 err = -EINVAL;
876 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
877 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
878 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
879 goto done;
880 if (p.iph.ttl)
881 p.iph.frag_off |= htons(IP_DF);
883 if (!(p.i_flags&GRE_KEY))
884 p.i_key = 0;
885 if (!(p.o_flags&GRE_KEY))
886 p.o_key = 0;
888 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
890 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
891 if (t != NULL) {
892 if (t->dev != dev) {
893 err = -EEXIST;
894 break;
896 } else {
897 unsigned nflags=0;
899 t = netdev_priv(dev);
901 if (ipv4_is_multicast(p.iph.daddr))
902 nflags = IFF_BROADCAST;
903 else if (p.iph.daddr)
904 nflags = IFF_POINTOPOINT;
906 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
907 err = -EINVAL;
908 break;
910 ipgre_tunnel_unlink(ign, t);
911 t->parms.iph.saddr = p.iph.saddr;
912 t->parms.iph.daddr = p.iph.daddr;
913 t->parms.i_key = p.i_key;
914 t->parms.o_key = p.o_key;
915 memcpy(dev->dev_addr, &p.iph.saddr, 4);
916 memcpy(dev->broadcast, &p.iph.daddr, 4);
917 ipgre_tunnel_link(ign, t);
918 netdev_state_change(dev);
922 if (t) {
923 err = 0;
924 if (cmd == SIOCCHGTUNNEL) {
925 t->parms.iph.ttl = p.iph.ttl;
926 t->parms.iph.tos = p.iph.tos;
927 t->parms.iph.frag_off = p.iph.frag_off;
928 if (t->parms.link != p.link) {
929 t->parms.link = p.link;
930 dev->mtu = ipgre_tunnel_bind_dev(dev);
931 netdev_state_change(dev);
934 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
935 err = -EFAULT;
936 } else
937 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
938 break;
940 case SIOCDELTUNNEL:
941 err = -EPERM;
942 if (!capable(CAP_NET_ADMIN))
943 goto done;
945 if (dev == ign->fb_tunnel_dev) {
946 err = -EFAULT;
947 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
948 goto done;
949 err = -ENOENT;
950 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
951 goto done;
952 err = -EPERM;
953 if (t == netdev_priv(ign->fb_tunnel_dev))
954 goto done;
955 dev = t->dev;
957 unregister_netdevice(dev);
958 err = 0;
959 break;
961 default:
962 err = -EINVAL;
965 done:
966 return err;
969 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
971 struct ip_tunnel *tunnel = netdev_priv(dev);
972 if (new_mtu < 68 ||
973 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
974 return -EINVAL;
975 dev->mtu = new_mtu;
976 return 0;
979 /* Nice toy. Unfortunately, useless in real life :-)
980 It allows to construct virtual multiprotocol broadcast "LAN"
981 over the Internet, provided multicast routing is tuned.
984 I have no idea was this bicycle invented before me,
985 so that I had to set ARPHRD_IPGRE to a random value.
986 I have an impression, that Cisco could make something similar,
987 but this feature is apparently missing in IOS<=11.2(8).
989 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
990 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
992 ping -t 255 224.66.66.66
994 If nobody answers, mbone does not work.
996 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
997 ip addr add 10.66.66.<somewhat>/24 dev Universe
998 ifconfig Universe up
999 ifconfig Universe add fe80::<Your_real_addr>/10
1000 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1001 ftp 10.66.66.66
1003 ftp fec0:6666:6666::193.233.7.65
1008 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1009 unsigned short type,
1010 const void *daddr, const void *saddr, unsigned len)
1012 struct ip_tunnel *t = netdev_priv(dev);
1013 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1014 __be16 *p = (__be16*)(iph+1);
1016 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1017 p[0] = t->parms.o_flags;
1018 p[1] = htons(type);
1021 * Set the source hardware address.
1024 if (saddr)
1025 memcpy(&iph->saddr, saddr, 4);
1027 if (daddr) {
1028 memcpy(&iph->daddr, daddr, 4);
1029 return t->hlen;
1031 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1032 return t->hlen;
1034 return -t->hlen;
1037 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1039 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1040 memcpy(haddr, &iph->saddr, 4);
1041 return 4;
1044 static const struct header_ops ipgre_header_ops = {
1045 .create = ipgre_header,
1046 .parse = ipgre_header_parse,
1049 #ifdef CONFIG_NET_IPGRE_BROADCAST
1050 static int ipgre_open(struct net_device *dev)
1052 struct ip_tunnel *t = netdev_priv(dev);
1054 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1055 struct flowi fl = { .oif = t->parms.link,
1056 .nl_u = { .ip4_u =
1057 { .daddr = t->parms.iph.daddr,
1058 .saddr = t->parms.iph.saddr,
1059 .tos = RT_TOS(t->parms.iph.tos) } },
1060 .proto = IPPROTO_GRE };
1061 struct rtable *rt;
1062 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1063 return -EADDRNOTAVAIL;
1064 dev = rt->u.dst.dev;
1065 ip_rt_put(rt);
1066 if (__in_dev_get_rtnl(dev) == NULL)
1067 return -EADDRNOTAVAIL;
1068 t->mlink = dev->ifindex;
1069 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1071 return 0;
1074 static int ipgre_close(struct net_device *dev)
1076 struct ip_tunnel *t = netdev_priv(dev);
1077 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1078 struct in_device *in_dev;
1079 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1080 if (in_dev) {
1081 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1082 in_dev_put(in_dev);
1085 return 0;
1088 #endif
1090 static void ipgre_tunnel_setup(struct net_device *dev)
1092 dev->init = ipgre_tunnel_init;
1093 dev->uninit = ipgre_tunnel_uninit;
1094 dev->destructor = free_netdev;
1095 dev->hard_start_xmit = ipgre_tunnel_xmit;
1096 dev->do_ioctl = ipgre_tunnel_ioctl;
1097 dev->change_mtu = ipgre_tunnel_change_mtu;
1099 dev->type = ARPHRD_IPGRE;
1100 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1101 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1102 dev->flags = IFF_NOARP;
1103 dev->iflink = 0;
1104 dev->addr_len = 4;
1105 dev->features |= NETIF_F_NETNS_LOCAL;
1108 static int ipgre_tunnel_init(struct net_device *dev)
1110 struct ip_tunnel *tunnel;
1111 struct iphdr *iph;
1113 tunnel = netdev_priv(dev);
1114 iph = &tunnel->parms.iph;
1116 tunnel->dev = dev;
1117 strcpy(tunnel->parms.name, dev->name);
1119 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1120 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1122 if (iph->daddr) {
1123 #ifdef CONFIG_NET_IPGRE_BROADCAST
1124 if (ipv4_is_multicast(iph->daddr)) {
1125 if (!iph->saddr)
1126 return -EINVAL;
1127 dev->flags = IFF_BROADCAST;
1128 dev->header_ops = &ipgre_header_ops;
1129 dev->open = ipgre_open;
1130 dev->stop = ipgre_close;
1132 #endif
1133 } else
1134 dev->header_ops = &ipgre_header_ops;
1136 return 0;
1139 static int ipgre_fb_tunnel_init(struct net_device *dev)
1141 struct ip_tunnel *tunnel = netdev_priv(dev);
1142 struct iphdr *iph = &tunnel->parms.iph;
1143 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1145 tunnel->dev = dev;
1146 strcpy(tunnel->parms.name, dev->name);
1148 iph->version = 4;
1149 iph->protocol = IPPROTO_GRE;
1150 iph->ihl = 5;
1151 tunnel->hlen = sizeof(struct iphdr) + 4;
1153 dev_hold(dev);
1154 ign->tunnels_wc[0] = tunnel;
1155 return 0;
1159 static struct net_protocol ipgre_protocol = {
1160 .handler = ipgre_rcv,
1161 .err_handler = ipgre_err,
1162 .netns_ok = 1,
1165 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1167 int prio;
1169 for (prio = 0; prio < 4; prio++) {
1170 int h;
1171 for (h = 0; h < HASH_SIZE; h++) {
1172 struct ip_tunnel *t;
1173 while ((t = ign->tunnels[prio][h]) != NULL)
1174 unregister_netdevice(t->dev);
1179 static int ipgre_init_net(struct net *net)
1181 int err;
1182 struct ipgre_net *ign;
1184 err = -ENOMEM;
1185 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1186 if (ign == NULL)
1187 goto err_alloc;
1189 err = net_assign_generic(net, ipgre_net_id, ign);
1190 if (err < 0)
1191 goto err_assign;
1193 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1194 ipgre_tunnel_setup);
1195 if (!ign->fb_tunnel_dev) {
1196 err = -ENOMEM;
1197 goto err_alloc_dev;
1200 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1201 dev_net_set(ign->fb_tunnel_dev, net);
1202 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1204 if ((err = register_netdev(ign->fb_tunnel_dev)))
1205 goto err_reg_dev;
1207 return 0;
1209 err_reg_dev:
1210 free_netdev(ign->fb_tunnel_dev);
1211 err_alloc_dev:
1212 /* nothing */
1213 err_assign:
1214 kfree(ign);
1215 err_alloc:
1216 return err;
1219 static void ipgre_exit_net(struct net *net)
1221 struct ipgre_net *ign;
1223 ign = net_generic(net, ipgre_net_id);
1224 rtnl_lock();
1225 ipgre_destroy_tunnels(ign);
1226 rtnl_unlock();
1227 kfree(ign);
1230 static struct pernet_operations ipgre_net_ops = {
1231 .init = ipgre_init_net,
1232 .exit = ipgre_exit_net,
1235 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1237 __be16 flags;
1239 if (!data)
1240 return 0;
1242 flags = 0;
1243 if (data[IFLA_GRE_IFLAGS])
1244 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1245 if (data[IFLA_GRE_OFLAGS])
1246 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1247 if (flags & (GRE_VERSION|GRE_ROUTING))
1248 return -EINVAL;
1250 return 0;
1253 static void ipgre_netlink_parms(struct nlattr *data[],
1254 struct ip_tunnel_parm *parms)
1256 memset(parms, 0, sizeof(parms));
1258 parms->iph.protocol = IPPROTO_GRE;
1260 if (!data)
1261 return;
1263 if (data[IFLA_GRE_LINK])
1264 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1266 if (data[IFLA_GRE_IFLAGS])
1267 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1269 if (data[IFLA_GRE_OFLAGS])
1270 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1272 if (data[IFLA_GRE_IKEY])
1273 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1275 if (data[IFLA_GRE_OKEY])
1276 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1278 if (data[IFLA_GRE_LOCAL])
1279 memcpy(&parms->iph.saddr, nla_data(data[IFLA_GRE_LOCAL]), 4);
1281 if (data[IFLA_GRE_REMOTE])
1282 memcpy(&parms->iph.daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1284 if (data[IFLA_GRE_TTL])
1285 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1287 if (data[IFLA_GRE_TOS])
1288 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1290 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1291 parms->iph.frag_off = htons(IP_DF);
1294 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1295 struct nlattr *data[])
1297 struct ip_tunnel *nt;
1298 struct net *net = dev_net(dev);
1299 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1300 int mtu;
1301 int err;
1303 nt = netdev_priv(dev);
1304 ipgre_netlink_parms(data, &nt->parms);
1306 if (ipgre_tunnel_locate(net, &nt->parms, 0))
1307 return -EEXIST;
1309 mtu = ipgre_tunnel_bind_dev(dev);
1310 if (!tb[IFLA_MTU])
1311 dev->mtu = mtu;
1313 err = register_netdevice(dev);
1314 if (err)
1315 goto out;
1317 dev_hold(dev);
1318 ipgre_tunnel_link(ign, nt);
1320 out:
1321 return err;
1324 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1325 struct nlattr *data[])
1327 struct ip_tunnel *t, *nt;
1328 struct net *net = dev_net(dev);
1329 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1330 struct ip_tunnel_parm p;
1331 int mtu;
1333 if (dev == ign->fb_tunnel_dev)
1334 return -EINVAL;
1336 nt = netdev_priv(dev);
1337 ipgre_netlink_parms(data, &p);
1339 t = ipgre_tunnel_locate(net, &p, 0);
1341 if (t) {
1342 if (t->dev != dev)
1343 return -EEXIST;
1344 } else {
1345 unsigned nflags = 0;
1347 t = nt;
1349 if (ipv4_is_multicast(p.iph.daddr))
1350 nflags = IFF_BROADCAST;
1351 else if (p.iph.daddr)
1352 nflags = IFF_POINTOPOINT;
1354 if ((dev->flags ^ nflags) &
1355 (IFF_POINTOPOINT | IFF_BROADCAST))
1356 return -EINVAL;
1358 ipgre_tunnel_unlink(ign, t);
1359 t->parms.iph.saddr = p.iph.saddr;
1360 t->parms.iph.daddr = p.iph.daddr;
1361 t->parms.i_key = p.i_key;
1362 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1363 memcpy(dev->broadcast, &p.iph.daddr, 4);
1364 ipgre_tunnel_link(ign, t);
1365 netdev_state_change(dev);
1368 t->parms.o_key = p.o_key;
1369 t->parms.iph.ttl = p.iph.ttl;
1370 t->parms.iph.tos = p.iph.tos;
1371 t->parms.iph.frag_off = p.iph.frag_off;
1373 if (t->parms.link != p.link) {
1374 t->parms.link = p.link;
1375 mtu = ipgre_tunnel_bind_dev(dev);
1376 if (!tb[IFLA_MTU])
1377 dev->mtu = mtu;
1378 netdev_state_change(dev);
1381 return 0;
1384 static size_t ipgre_get_size(const struct net_device *dev)
1386 return
1387 /* IFLA_GRE_LINK */
1388 nla_total_size(4) +
1389 /* IFLA_GRE_IFLAGS */
1390 nla_total_size(2) +
1391 /* IFLA_GRE_OFLAGS */
1392 nla_total_size(2) +
1393 /* IFLA_GRE_IKEY */
1394 nla_total_size(4) +
1395 /* IFLA_GRE_OKEY */
1396 nla_total_size(4) +
1397 /* IFLA_GRE_LOCAL */
1398 nla_total_size(4) +
1399 /* IFLA_GRE_REMOTE */
1400 nla_total_size(4) +
1401 /* IFLA_GRE_TTL */
1402 nla_total_size(1) +
1403 /* IFLA_GRE_TOS */
1404 nla_total_size(1) +
1405 /* IFLA_GRE_PMTUDISC */
1406 nla_total_size(1) +
1410 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1412 struct ip_tunnel *t = netdev_priv(dev);
1413 struct ip_tunnel_parm *p = &t->parms;
1415 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1416 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1417 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1418 NLA_PUT_BE32(skb, IFLA_GRE_IFLAGS, p->i_flags);
1419 NLA_PUT_BE32(skb, IFLA_GRE_OFLAGS, p->o_flags);
1420 NLA_PUT(skb, IFLA_GRE_LOCAL, 4, &p->iph.saddr);
1421 NLA_PUT(skb, IFLA_GRE_REMOTE, 4, &p->iph.daddr);
1422 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1423 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1424 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1426 return 0;
1428 nla_put_failure:
1429 return -EMSGSIZE;
1432 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1433 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1434 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1435 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1436 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1437 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
1438 [IFLA_GRE_LOCAL] = { .len = 4 },
1439 [IFLA_GRE_REMOTE] = { .len = 4 },
1440 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1441 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1442 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1445 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1446 .kind = "gre",
1447 .maxtype = IFLA_GRE_MAX,
1448 .policy = ipgre_policy,
1449 .priv_size = sizeof(struct ip_tunnel),
1450 .setup = ipgre_tunnel_setup,
1451 .validate = ipgre_tunnel_validate,
1452 .newlink = ipgre_newlink,
1453 .changelink = ipgre_changelink,
1454 .get_size = ipgre_get_size,
1455 .fill_info = ipgre_fill_info,
1459 * And now the modules code and kernel interface.
1462 static int __init ipgre_init(void)
1464 int err;
1466 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1468 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1469 printk(KERN_INFO "ipgre init: can't add protocol\n");
1470 return -EAGAIN;
1473 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1474 if (err < 0)
1475 goto gen_device_failed;
1477 err = rtnl_link_register(&ipgre_link_ops);
1478 if (err < 0)
1479 goto rtnl_link_failed;
1481 out:
1482 return err;
1484 rtnl_link_failed:
1485 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1486 gen_device_failed:
1487 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1488 goto out;
1491 static void __exit ipgre_fini(void)
1493 rtnl_link_unregister(&ipgre_link_ops);
1494 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1495 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1496 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1499 module_init(ipgre_init);
1500 module_exit(ipgre_fini);
1501 MODULE_LICENSE("GPL");
1502 MODULE_ALIAS("rtnl-link-gre");