Merge tag 'regmap-v3.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[linux-2.6.git] / net / ipv4 / ip_tunnel.c
blobbe2f8da0ae8ebc4b94a257dd50914db14212a451
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
75 int i;
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
96 tot->multicast = dev->stats.multicast;
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
110 tot->collisions = dev->stats.collisions;
112 return tot;
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
129 /* Fallback tunnel: no source, no destination, no key, no options
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 !(t->dev->flags & IFF_UP))
170 continue;
172 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 continue;
175 if (t->parms.link == link)
176 return t;
177 else if (!cand)
178 cand = t;
181 hash = ip_tunnel_hash(itn, key, 0);
182 head = &itn->tunnels[hash];
184 hlist_for_each_entry_rcu(t, head, hash_node) {
185 if ((local != t->parms.iph.saddr &&
186 (local != t->parms.iph.daddr ||
187 !ipv4_is_multicast(local))) ||
188 !(t->dev->flags & IFF_UP))
189 continue;
191 if (!ip_tunnel_key_match(&t->parms, flags, key))
192 continue;
194 if (t->parms.link == link)
195 return t;
196 else if (!cand)
197 cand = t;
200 if (flags & TUNNEL_NO_KEY)
201 goto skip_key_lookup;
203 hlist_for_each_entry_rcu(t, head, hash_node) {
204 if (t->parms.i_key != key ||
205 !(t->dev->flags & IFF_UP))
206 continue;
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
214 skip_key_lookup:
215 if (cand)
216 return cand;
218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219 return netdev_priv(itn->fb_tunnel_dev);
222 return NULL;
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227 struct ip_tunnel_parm *parms)
229 unsigned int h;
230 __be32 remote;
232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233 remote = parms->iph.daddr;
234 else
235 remote = 0;
237 h = ip_tunnel_hash(itn, parms->i_key, remote);
238 return &itn->tunnels[h];
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
243 struct hlist_head *head = ip_bucket(itn, &t->parms);
245 hlist_add_head_rcu(&t->hash_node, head);
248 static void ip_tunnel_del(struct ip_tunnel *t)
250 hlist_del_init_rcu(&t->hash_node);
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254 struct ip_tunnel_parm *parms,
255 int type)
257 __be32 remote = parms->iph.daddr;
258 __be32 local = parms->iph.saddr;
259 __be32 key = parms->i_key;
260 int link = parms->link;
261 struct ip_tunnel *t = NULL;
262 struct hlist_head *head = ip_bucket(itn, parms);
264 hlist_for_each_entry_rcu(t, head, hash_node) {
265 if (local == t->parms.iph.saddr &&
266 remote == t->parms.iph.daddr &&
267 key == t->parms.i_key &&
268 link == t->parms.link &&
269 type == t->dev->type)
270 break;
272 return t;
275 static struct net_device *__ip_tunnel_create(struct net *net,
276 const struct rtnl_link_ops *ops,
277 struct ip_tunnel_parm *parms)
279 int err;
280 struct ip_tunnel *tunnel;
281 struct net_device *dev;
282 char name[IFNAMSIZ];
284 if (parms->name[0])
285 strlcpy(name, parms->name, IFNAMSIZ);
286 else {
287 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288 err = -E2BIG;
289 goto failed;
291 strlcpy(name, ops->kind, IFNAMSIZ);
292 strncat(name, "%d", 2);
295 ASSERT_RTNL();
296 dev = alloc_netdev(ops->priv_size, name, ops->setup);
297 if (!dev) {
298 err = -ENOMEM;
299 goto failed;
301 dev_net_set(dev, net);
303 dev->rtnl_link_ops = ops;
305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms;
308 err = register_netdevice(dev);
309 if (err)
310 goto failed_free;
312 return dev;
314 failed_free:
315 free_netdev(dev);
316 failed:
317 return ERR_PTR(err);
320 static inline struct rtable *ip_route_output_tunnel(struct net *net,
321 struct flowi4 *fl4,
322 int proto,
323 __be32 daddr, __be32 saddr,
324 __be32 key, __u8 tos, int oif)
326 memset(fl4, 0, sizeof(*fl4));
327 fl4->flowi4_oif = oif;
328 fl4->daddr = daddr;
329 fl4->saddr = saddr;
330 fl4->flowi4_tos = tos;
331 fl4->flowi4_proto = proto;
332 fl4->fl4_gre_key = key;
333 return ip_route_output_key(net, fl4);
336 static int ip_tunnel_bind_dev(struct net_device *dev)
338 struct net_device *tdev = NULL;
339 struct ip_tunnel *tunnel = netdev_priv(dev);
340 const struct iphdr *iph;
341 int hlen = LL_MAX_HEADER;
342 int mtu = ETH_DATA_LEN;
343 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
345 iph = &tunnel->parms.iph;
347 /* Guess output device to choose reasonable mtu and needed_headroom */
348 if (iph->daddr) {
349 struct flowi4 fl4;
350 struct rtable *rt;
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr,
355 tunnel->parms.o_key,
356 RT_TOS(iph->tos),
357 tunnel->parms.link);
358 if (!IS_ERR(rt)) {
359 tdev = rt->dst.dev;
360 ip_rt_put(rt);
362 if (dev->type != ARPHRD_ETHER)
363 dev->flags |= IFF_POINTOPOINT;
366 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
369 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom;
371 mtu = tdev->mtu;
373 dev->iflink = tunnel->parms.link;
375 dev->needed_headroom = t_hlen + hlen;
376 mtu -= (dev->hard_header_len + t_hlen);
378 if (mtu < 68)
379 mtu = 68;
381 return mtu;
384 static struct ip_tunnel *ip_tunnel_create(struct net *net,
385 struct ip_tunnel_net *itn,
386 struct ip_tunnel_parm *parms)
388 struct ip_tunnel *nt, *fbt;
389 struct net_device *dev;
391 BUG_ON(!itn->fb_tunnel_dev);
392 fbt = netdev_priv(itn->fb_tunnel_dev);
393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394 if (IS_ERR(dev))
395 return NULL;
397 dev->mtu = ip_tunnel_bind_dev(dev);
399 nt = netdev_priv(dev);
400 ip_tunnel_add(itn, nt);
401 return nt;
404 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405 const struct tnl_ptk_info *tpi, bool log_ecn_error)
407 struct pcpu_tstats *tstats;
408 const struct iphdr *iph = ip_hdr(skb);
409 int err;
411 secpath_reset(skb);
413 skb->protocol = tpi->proto;
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418 #ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */
421 if (rt_is_output_route(skb_rtable(skb)))
422 goto drop;
423 tunnel->dev->stats.multicast++;
424 skb->pkt_type = PACKET_BROADCAST;
426 #endif
428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430 tunnel->dev->stats.rx_crc_errors++;
431 tunnel->dev->stats.rx_errors++;
432 goto drop;
435 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436 if (!(tpi->flags&TUNNEL_SEQ) ||
437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438 tunnel->dev->stats.rx_fifo_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
480 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0;
483 drop:
484 kfree_skb(skb);
485 return 0;
487 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490 const struct iphdr *tnl_params)
492 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4;
496 u8 tos, ttl;
497 __be16 df;
498 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst;
502 int mtu;
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
506 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
507 dst = tnl_params->daddr;
508 if (dst == 0) {
509 /* NBMA tunnel */
511 if (skb_dst(skb) == NULL) {
512 dev->stats.tx_fifo_errors++;
513 goto tx_error;
516 if (skb->protocol == htons(ETH_P_IP)) {
517 rt = skb_rtable(skb);
518 dst = rt_nexthop(rt, inner_iph->daddr);
520 #if IS_ENABLED(CONFIG_IPV6)
521 else if (skb->protocol == htons(ETH_P_IPV6)) {
522 const struct in6_addr *addr6;
523 struct neighbour *neigh;
524 bool do_tx_error_icmp;
525 int addr_type;
527 neigh = dst_neigh_lookup(skb_dst(skb),
528 &ipv6_hdr(skb)->daddr);
529 if (neigh == NULL)
530 goto tx_error;
532 addr6 = (const struct in6_addr *)&neigh->primary_key;
533 addr_type = ipv6_addr_type(addr6);
535 if (addr_type == IPV6_ADDR_ANY) {
536 addr6 = &ipv6_hdr(skb)->daddr;
537 addr_type = ipv6_addr_type(addr6);
540 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
541 do_tx_error_icmp = true;
542 else {
543 do_tx_error_icmp = false;
544 dst = addr6->s6_addr32[3];
546 neigh_release(neigh);
547 if (do_tx_error_icmp)
548 goto tx_error_icmp;
550 #endif
551 else
552 goto tx_error;
555 tos = tnl_params->tos;
556 if (tos & 0x1) {
557 tos &= ~0x1;
558 if (skb->protocol == htons(ETH_P_IP))
559 tos = inner_iph->tos;
560 else if (skb->protocol == htons(ETH_P_IPV6))
561 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
564 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
565 tunnel->parms.iph.protocol,
566 dst, tnl_params->saddr,
567 tunnel->parms.o_key,
568 RT_TOS(tos),
569 tunnel->parms.link);
570 if (IS_ERR(rt)) {
571 dev->stats.tx_carrier_errors++;
572 goto tx_error;
574 tdev = rt->dst.dev;
576 if (tdev == dev) {
577 ip_rt_put(rt);
578 dev->stats.collisions++;
579 goto tx_error;
582 df = tnl_params->frag_off;
584 if (df)
585 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
586 - sizeof(struct iphdr);
587 else
588 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
590 if (skb_dst(skb))
591 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
593 if (skb->protocol == htons(ETH_P_IP)) {
594 df |= (inner_iph->frag_off&htons(IP_DF));
596 if (!skb_is_gso(skb) &&
597 (inner_iph->frag_off&htons(IP_DF)) &&
598 mtu < ntohs(inner_iph->tot_len)) {
599 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
600 ip_rt_put(rt);
601 goto tx_error;
604 #if IS_ENABLED(CONFIG_IPV6)
605 else if (skb->protocol == htons(ETH_P_IPV6)) {
606 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
608 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
609 mtu >= IPV6_MIN_MTU) {
610 if ((tunnel->parms.iph.daddr &&
611 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
612 rt6->rt6i_dst.plen == 128) {
613 rt6->rt6i_flags |= RTF_MODIFIED;
614 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
618 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
619 mtu < skb->len) {
620 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
621 ip_rt_put(rt);
622 goto tx_error;
625 #endif
627 if (tunnel->err_count > 0) {
628 if (time_before(jiffies,
629 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
630 tunnel->err_count--;
632 dst_link_failure(skb);
633 } else
634 tunnel->err_count = 0;
637 ttl = tnl_params->ttl;
638 if (ttl == 0) {
639 if (skb->protocol == htons(ETH_P_IP))
640 ttl = inner_iph->ttl;
641 #if IS_ENABLED(CONFIG_IPV6)
642 else if (skb->protocol == htons(ETH_P_IPV6))
643 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
644 #endif
645 else
646 ttl = ip4_dst_hoplimit(&rt->dst);
649 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
650 + rt->dst.header_len;
651 if (max_headroom > dev->needed_headroom) {
652 dev->needed_headroom = max_headroom;
653 if (skb_cow_head(skb, dev->needed_headroom)) {
654 dev->stats.tx_dropped++;
655 dev_kfree_skb(skb);
656 return;
660 skb_dst_drop(skb);
661 skb_dst_set(skb, &rt->dst);
663 /* Push down and install the IP header. */
664 skb_push(skb, sizeof(struct iphdr));
665 skb_reset_network_header(skb);
667 iph = ip_hdr(skb);
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = tnl_params->protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
680 iptunnel_xmit(skb, dev);
681 return;
683 #if IS_ENABLED(CONFIG_IPV6)
684 tx_error_icmp:
685 dst_link_failure(skb);
686 #endif
687 tx_error:
688 dev->stats.tx_errors++;
689 dev_kfree_skb(skb);
691 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
693 static void ip_tunnel_update(struct ip_tunnel_net *itn,
694 struct ip_tunnel *t,
695 struct net_device *dev,
696 struct ip_tunnel_parm *p,
697 bool set_mtu)
699 ip_tunnel_del(t);
700 t->parms.iph.saddr = p->iph.saddr;
701 t->parms.iph.daddr = p->iph.daddr;
702 t->parms.i_key = p->i_key;
703 t->parms.o_key = p->o_key;
704 if (dev->type != ARPHRD_ETHER) {
705 memcpy(dev->dev_addr, &p->iph.saddr, 4);
706 memcpy(dev->broadcast, &p->iph.daddr, 4);
708 ip_tunnel_add(itn, t);
710 t->parms.iph.ttl = p->iph.ttl;
711 t->parms.iph.tos = p->iph.tos;
712 t->parms.iph.frag_off = p->iph.frag_off;
714 if (t->parms.link != p->link) {
715 int mtu;
717 t->parms.link = p->link;
718 mtu = ip_tunnel_bind_dev(dev);
719 if (set_mtu)
720 dev->mtu = mtu;
722 netdev_state_change(dev);
725 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
727 int err = 0;
728 struct ip_tunnel *t;
729 struct net *net = dev_net(dev);
730 struct ip_tunnel *tunnel = netdev_priv(dev);
731 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
733 BUG_ON(!itn->fb_tunnel_dev);
734 switch (cmd) {
735 case SIOCGETTUNNEL:
736 t = NULL;
737 if (dev == itn->fb_tunnel_dev)
738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739 if (t == NULL)
740 t = netdev_priv(dev);
741 memcpy(p, &t->parms, sizeof(*p));
742 break;
744 case SIOCADDTUNNEL:
745 case SIOCCHGTUNNEL:
746 err = -EPERM;
747 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748 goto done;
749 if (p->iph.ttl)
750 p->iph.frag_off |= htons(IP_DF);
751 if (!(p->i_flags&TUNNEL_KEY))
752 p->i_key = 0;
753 if (!(p->o_flags&TUNNEL_KEY))
754 p->o_key = 0;
756 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
758 if (!t && (cmd == SIOCADDTUNNEL))
759 t = ip_tunnel_create(net, itn, p);
761 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762 if (t != NULL) {
763 if (t->dev != dev) {
764 err = -EEXIST;
765 break;
767 } else {
768 unsigned int nflags = 0;
770 if (ipv4_is_multicast(p->iph.daddr))
771 nflags = IFF_BROADCAST;
772 else if (p->iph.daddr)
773 nflags = IFF_POINTOPOINT;
775 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776 err = -EINVAL;
777 break;
780 t = netdev_priv(dev);
784 if (t) {
785 err = 0;
786 ip_tunnel_update(itn, t, dev, p, true);
787 } else
788 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789 break;
791 case SIOCDELTUNNEL:
792 err = -EPERM;
793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794 goto done;
796 if (dev == itn->fb_tunnel_dev) {
797 err = -ENOENT;
798 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799 if (t == NULL)
800 goto done;
801 err = -EPERM;
802 if (t == netdev_priv(itn->fb_tunnel_dev))
803 goto done;
804 dev = t->dev;
806 unregister_netdevice(dev);
807 err = 0;
808 break;
810 default:
811 err = -EINVAL;
814 done:
815 return err;
817 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
819 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
821 struct ip_tunnel *tunnel = netdev_priv(dev);
822 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
824 if (new_mtu < 68 ||
825 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826 return -EINVAL;
827 dev->mtu = new_mtu;
828 return 0;
830 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
832 static void ip_tunnel_dev_free(struct net_device *dev)
834 struct ip_tunnel *tunnel = netdev_priv(dev);
836 gro_cells_destroy(&tunnel->gro_cells);
837 free_percpu(dev->tstats);
838 free_netdev(dev);
841 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
843 struct net *net = dev_net(dev);
844 struct ip_tunnel *tunnel = netdev_priv(dev);
845 struct ip_tunnel_net *itn;
847 itn = net_generic(net, tunnel->ip_tnl_net_id);
849 if (itn->fb_tunnel_dev != dev) {
850 ip_tunnel_del(netdev_priv(dev));
851 unregister_netdevice_queue(dev, head);
854 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
856 int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857 struct rtnl_link_ops *ops, char *devname)
859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860 struct ip_tunnel_parm parms;
862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
863 if (!itn->tunnels)
864 return -ENOMEM;
866 if (!ops) {
867 itn->fb_tunnel_dev = NULL;
868 return 0;
870 memset(&parms, 0, sizeof(parms));
871 if (devname)
872 strlcpy(parms.name, devname, IFNAMSIZ);
874 rtnl_lock();
875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876 rtnl_unlock();
877 if (IS_ERR(itn->fb_tunnel_dev)) {
878 kfree(itn->tunnels);
879 return PTR_ERR(itn->fb_tunnel_dev);
882 return 0;
884 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
886 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
888 int h;
890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891 struct ip_tunnel *t;
892 struct hlist_node *n;
893 struct hlist_head *thead = &itn->tunnels[h];
895 hlist_for_each_entry_safe(t, n, thead, hash_node)
896 unregister_netdevice_queue(t->dev, head);
898 if (itn->fb_tunnel_dev)
899 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
902 void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn)
904 LIST_HEAD(list);
906 rtnl_lock();
907 ip_tunnel_destroy(itn, &list);
908 unregister_netdevice_many(&list);
909 rtnl_unlock();
910 kfree(itn->tunnels);
912 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
914 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
915 struct ip_tunnel_parm *p)
917 struct ip_tunnel *nt;
918 struct net *net = dev_net(dev);
919 struct ip_tunnel_net *itn;
920 int mtu;
921 int err;
923 nt = netdev_priv(dev);
924 itn = net_generic(net, nt->ip_tnl_net_id);
926 if (ip_tunnel_find(itn, p, dev->type))
927 return -EEXIST;
929 nt->parms = *p;
930 err = register_netdevice(dev);
931 if (err)
932 goto out;
934 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
935 eth_hw_addr_random(dev);
937 mtu = ip_tunnel_bind_dev(dev);
938 if (!tb[IFLA_MTU])
939 dev->mtu = mtu;
941 ip_tunnel_add(itn, nt);
943 out:
944 return err;
946 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
948 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949 struct ip_tunnel_parm *p)
951 struct ip_tunnel *t, *nt;
952 struct net *net = dev_net(dev);
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
956 if (dev == itn->fb_tunnel_dev)
957 return -EINVAL;
959 nt = netdev_priv(dev);
961 t = ip_tunnel_find(itn, p, dev->type);
963 if (t) {
964 if (t->dev != dev)
965 return -EEXIST;
966 } else {
967 t = nt;
969 if (dev->type != ARPHRD_ETHER) {
970 unsigned int nflags = 0;
972 if (ipv4_is_multicast(p->iph.daddr))
973 nflags = IFF_BROADCAST;
974 else if (p->iph.daddr)
975 nflags = IFF_POINTOPOINT;
977 if ((dev->flags ^ nflags) &
978 (IFF_POINTOPOINT | IFF_BROADCAST))
979 return -EINVAL;
983 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
984 return 0;
986 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
988 int ip_tunnel_init(struct net_device *dev)
990 struct ip_tunnel *tunnel = netdev_priv(dev);
991 struct iphdr *iph = &tunnel->parms.iph;
992 int err;
994 dev->destructor = ip_tunnel_dev_free;
995 dev->tstats = alloc_percpu(struct pcpu_tstats);
996 if (!dev->tstats)
997 return -ENOMEM;
999 err = gro_cells_init(&tunnel->gro_cells, dev);
1000 if (err) {
1001 free_percpu(dev->tstats);
1002 return err;
1005 tunnel->dev = dev;
1006 strcpy(tunnel->parms.name, dev->name);
1007 iph->version = 4;
1008 iph->ihl = 5;
1010 return 0;
1012 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1014 void ip_tunnel_uninit(struct net_device *dev)
1016 struct net *net = dev_net(dev);
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018 struct ip_tunnel_net *itn;
1020 itn = net_generic(net, tunnel->ip_tnl_net_id);
1021 /* fb_tunnel_dev will be unregisted in net-exit call. */
1022 if (itn->fb_tunnel_dev != dev)
1023 ip_tunnel_del(netdev_priv(dev));
1025 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1027 /* Do least required initialization, rest of init is done in tunnel_init call */
1028 void ip_tunnel_setup(struct net_device *dev, int net_id)
1030 struct ip_tunnel *tunnel = netdev_priv(dev);
1031 tunnel->ip_tnl_net_id = net_id;
1033 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1035 MODULE_LICENSE("GPL");