r8152: disable power cut for RTL8153
[linux-2.6/btrfs-unstable.git] / net / ipv4 / ip_tunnel.c
blobbda4bb8ae2608e4543a505d8ae039fa301a8a9e6
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst, __be32 saddr)
74 struct dst_entry *old_dst;
76 dst_clone(dst);
77 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78 dst_release(old_dst);
79 idst->saddr = saddr;
82 static noinline void tunnel_dst_set(struct ip_tunnel *t,
83 struct dst_entry *dst, __be32 saddr)
85 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
88 static void tunnel_dst_reset(struct ip_tunnel *t)
90 tunnel_dst_set(t, NULL, 0);
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
95 int i;
97 for_each_possible_cpu(i)
98 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103 u32 cookie, __be32 *saddr)
105 struct ip_tunnel_dst *idst;
106 struct dst_entry *dst;
108 rcu_read_lock();
109 idst = raw_cpu_ptr(t->dst_cache);
110 dst = rcu_dereference(idst->dst);
111 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112 dst = NULL;
113 if (dst) {
114 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115 *saddr = idst->saddr;
116 } else {
117 tunnel_dst_reset(t);
118 dst_release(dst);
119 dst = NULL;
122 rcu_read_unlock();
123 return (struct rtable *)dst;
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127 __be16 flags, __be32 key)
129 if (p->i_flags & TUNNEL_KEY) {
130 if (flags & TUNNEL_KEY)
131 return key == p->i_key;
132 else
133 /* key expected, none present */
134 return false;
135 } else
136 return !(flags & TUNNEL_KEY);
139 /* Fallback tunnel: no source, no destination, no key, no options
141 Tunnel hash table:
142 We require exact key match i.e. if a key is present in packet
143 it will match only tunnel with the same key; if it is not present,
144 it will match only keyless tunnel.
146 All keysless packets, if not matched configured keyless tunnels
147 will match fallback tunnel.
148 Given src, dst and key, find appropriate for input tunnel.
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151 int link, __be16 flags,
152 __be32 remote, __be32 local,
153 __be32 key)
155 unsigned int hash;
156 struct ip_tunnel *t, *cand = NULL;
157 struct hlist_head *head;
159 hash = ip_tunnel_hash(key, remote);
160 head = &itn->tunnels[hash];
162 hlist_for_each_entry_rcu(t, head, hash_node) {
163 if (local != t->parms.iph.saddr ||
164 remote != t->parms.iph.daddr ||
165 !(t->dev->flags & IFF_UP))
166 continue;
168 if (!ip_tunnel_key_match(&t->parms, flags, key))
169 continue;
171 if (t->parms.link == link)
172 return t;
173 else
174 cand = t;
177 hlist_for_each_entry_rcu(t, head, hash_node) {
178 if (remote != t->parms.iph.daddr ||
179 t->parms.iph.saddr != 0 ||
180 !(t->dev->flags & IFF_UP))
181 continue;
183 if (!ip_tunnel_key_match(&t->parms, flags, key))
184 continue;
186 if (t->parms.link == link)
187 return t;
188 else if (!cand)
189 cand = t;
192 hash = ip_tunnel_hash(key, 0);
193 head = &itn->tunnels[hash];
195 hlist_for_each_entry_rcu(t, head, hash_node) {
196 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198 continue;
200 if (!(t->dev->flags & IFF_UP))
201 continue;
203 if (!ip_tunnel_key_match(&t->parms, flags, key))
204 continue;
206 if (t->parms.link == link)
207 return t;
208 else if (!cand)
209 cand = t;
212 if (flags & TUNNEL_NO_KEY)
213 goto skip_key_lookup;
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (t->parms.i_key != key ||
217 t->parms.iph.saddr != 0 ||
218 t->parms.iph.daddr != 0 ||
219 !(t->dev->flags & IFF_UP))
220 continue;
222 if (t->parms.link == link)
223 return t;
224 else if (!cand)
225 cand = t;
228 skip_key_lookup:
229 if (cand)
230 return cand;
232 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233 return netdev_priv(itn->fb_tunnel_dev);
236 return NULL;
238 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241 struct ip_tunnel_parm *parms)
243 unsigned int h;
244 __be32 remote;
245 __be32 i_key = parms->i_key;
247 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
248 remote = parms->iph.daddr;
249 else
250 remote = 0;
252 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
253 i_key = 0;
255 h = ip_tunnel_hash(i_key, remote);
256 return &itn->tunnels[h];
259 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261 struct hlist_head *head = ip_bucket(itn, &t->parms);
263 hlist_add_head_rcu(&t->hash_node, head);
266 static void ip_tunnel_del(struct ip_tunnel *t)
268 hlist_del_init_rcu(&t->hash_node);
271 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
272 struct ip_tunnel_parm *parms,
273 int type)
275 __be32 remote = parms->iph.daddr;
276 __be32 local = parms->iph.saddr;
277 __be32 key = parms->i_key;
278 __be16 flags = parms->i_flags;
279 int link = parms->link;
280 struct ip_tunnel *t = NULL;
281 struct hlist_head *head = ip_bucket(itn, parms);
283 hlist_for_each_entry_rcu(t, head, hash_node) {
284 if (local == t->parms.iph.saddr &&
285 remote == t->parms.iph.daddr &&
286 link == t->parms.link &&
287 type == t->dev->type &&
288 ip_tunnel_key_match(&t->parms, flags, key))
289 break;
291 return t;
294 static struct net_device *__ip_tunnel_create(struct net *net,
295 const struct rtnl_link_ops *ops,
296 struct ip_tunnel_parm *parms)
298 int err;
299 struct ip_tunnel *tunnel;
300 struct net_device *dev;
301 char name[IFNAMSIZ];
303 if (parms->name[0])
304 strlcpy(name, parms->name, IFNAMSIZ);
305 else {
306 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
307 err = -E2BIG;
308 goto failed;
310 strlcpy(name, ops->kind, IFNAMSIZ);
311 strncat(name, "%d", 2);
314 ASSERT_RTNL();
315 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
316 if (!dev) {
317 err = -ENOMEM;
318 goto failed;
320 dev_net_set(dev, net);
322 dev->rtnl_link_ops = ops;
324 tunnel = netdev_priv(dev);
325 tunnel->parms = *parms;
326 tunnel->net = net;
328 err = register_netdevice(dev);
329 if (err)
330 goto failed_free;
332 return dev;
334 failed_free:
335 free_netdev(dev);
336 failed:
337 return ERR_PTR(err);
340 static inline void init_tunnel_flow(struct flowi4 *fl4,
341 int proto,
342 __be32 daddr, __be32 saddr,
343 __be32 key, __u8 tos, int oif)
345 memset(fl4, 0, sizeof(*fl4));
346 fl4->flowi4_oif = oif;
347 fl4->daddr = daddr;
348 fl4->saddr = saddr;
349 fl4->flowi4_tos = tos;
350 fl4->flowi4_proto = proto;
351 fl4->fl4_gre_key = key;
354 static int ip_tunnel_bind_dev(struct net_device *dev)
356 struct net_device *tdev = NULL;
357 struct ip_tunnel *tunnel = netdev_priv(dev);
358 const struct iphdr *iph;
359 int hlen = LL_MAX_HEADER;
360 int mtu = ETH_DATA_LEN;
361 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363 iph = &tunnel->parms.iph;
365 /* Guess output device to choose reasonable mtu and needed_headroom */
366 if (iph->daddr) {
367 struct flowi4 fl4;
368 struct rtable *rt;
370 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
371 iph->saddr, tunnel->parms.o_key,
372 RT_TOS(iph->tos), tunnel->parms.link);
373 rt = ip_route_output_key(tunnel->net, &fl4);
375 if (!IS_ERR(rt)) {
376 tdev = rt->dst.dev;
377 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
378 ip_rt_put(rt);
380 if (dev->type != ARPHRD_ETHER)
381 dev->flags |= IFF_POINTOPOINT;
384 if (!tdev && tunnel->parms.link)
385 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
387 if (tdev) {
388 hlen = tdev->hard_header_len + tdev->needed_headroom;
389 mtu = tdev->mtu;
391 dev->iflink = tunnel->parms.link;
393 dev->needed_headroom = t_hlen + hlen;
394 mtu -= (dev->hard_header_len + t_hlen);
396 if (mtu < 68)
397 mtu = 68;
399 return mtu;
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403 struct ip_tunnel_net *itn,
404 struct ip_tunnel_parm *parms)
406 struct ip_tunnel *nt;
407 struct net_device *dev;
409 BUG_ON(!itn->fb_tunnel_dev);
410 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411 if (IS_ERR(dev))
412 return ERR_CAST(dev);
414 dev->mtu = ip_tunnel_bind_dev(dev);
416 nt = netdev_priv(dev);
417 ip_tunnel_add(itn, nt);
418 return nt;
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422 const struct tnl_ptk_info *tpi, bool log_ecn_error)
424 struct pcpu_sw_netstats *tstats;
425 const struct iphdr *iph = ip_hdr(skb);
426 int err;
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429 if (ipv4_is_multicast(iph->daddr)) {
430 tunnel->dev->stats.multicast++;
431 skb->pkt_type = PACKET_BROADCAST;
433 #endif
435 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437 tunnel->dev->stats.rx_crc_errors++;
438 tunnel->dev->stats.rx_errors++;
439 goto drop;
442 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443 if (!(tpi->flags&TUNNEL_SEQ) ||
444 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445 tunnel->dev->stats.rx_fifo_errors++;
446 tunnel->dev->stats.rx_errors++;
447 goto drop;
449 tunnel->i_seqno = ntohl(tpi->seq) + 1;
452 skb_reset_network_header(skb);
454 err = IP_ECN_decapsulate(iph, skb);
455 if (unlikely(err)) {
456 if (log_ecn_error)
457 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458 &iph->saddr, iph->tos);
459 if (err > 1) {
460 ++tunnel->dev->stats.rx_frame_errors;
461 ++tunnel->dev->stats.rx_errors;
462 goto drop;
466 tstats = this_cpu_ptr(tunnel->dev->tstats);
467 u64_stats_update_begin(&tstats->syncp);
468 tstats->rx_packets++;
469 tstats->rx_bytes += skb->len;
470 u64_stats_update_end(&tstats->syncp);
472 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
474 if (tunnel->dev->type == ARPHRD_ETHER) {
475 skb->protocol = eth_type_trans(skb, tunnel->dev);
476 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477 } else {
478 skb->dev = tunnel->dev;
481 gro_cells_receive(&tunnel->gro_cells, skb);
482 return 0;
484 drop:
485 kfree_skb(skb);
486 return 0;
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
490 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491 struct rtable *rt, __be16 df)
493 struct ip_tunnel *tunnel = netdev_priv(dev);
494 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
495 int mtu;
497 if (df)
498 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499 - sizeof(struct iphdr) - tunnel->hlen;
500 else
501 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
503 if (skb_dst(skb))
504 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
506 if (skb->protocol == htons(ETH_P_IP)) {
507 if (!skb_is_gso(skb) &&
508 (df & htons(IP_DF)) && mtu < pkt_size) {
509 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
510 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
511 return -E2BIG;
514 #if IS_ENABLED(CONFIG_IPV6)
515 else if (skb->protocol == htons(ETH_P_IPV6)) {
516 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
518 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
519 mtu >= IPV6_MIN_MTU) {
520 if ((tunnel->parms.iph.daddr &&
521 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
522 rt6->rt6i_dst.plen == 128) {
523 rt6->rt6i_flags |= RTF_MODIFIED;
524 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
528 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
529 mtu < pkt_size) {
530 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
531 return -E2BIG;
534 #endif
535 return 0;
538 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
539 const struct iphdr *tnl_params, const u8 protocol)
541 struct ip_tunnel *tunnel = netdev_priv(dev);
542 const struct iphdr *inner_iph;
543 struct flowi4 fl4;
544 u8 tos, ttl;
545 __be16 df;
546 struct rtable *rt; /* Route to the other host */
547 unsigned int max_headroom; /* The extra header space needed */
548 __be32 dst;
549 int err;
550 bool connected;
552 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
553 connected = (tunnel->parms.iph.daddr != 0);
555 dst = tnl_params->daddr;
556 if (dst == 0) {
557 /* NBMA tunnel */
559 if (skb_dst(skb) == NULL) {
560 dev->stats.tx_fifo_errors++;
561 goto tx_error;
564 if (skb->protocol == htons(ETH_P_IP)) {
565 rt = skb_rtable(skb);
566 dst = rt_nexthop(rt, inner_iph->daddr);
568 #if IS_ENABLED(CONFIG_IPV6)
569 else if (skb->protocol == htons(ETH_P_IPV6)) {
570 const struct in6_addr *addr6;
571 struct neighbour *neigh;
572 bool do_tx_error_icmp;
573 int addr_type;
575 neigh = dst_neigh_lookup(skb_dst(skb),
576 &ipv6_hdr(skb)->daddr);
577 if (neigh == NULL)
578 goto tx_error;
580 addr6 = (const struct in6_addr *)&neigh->primary_key;
581 addr_type = ipv6_addr_type(addr6);
583 if (addr_type == IPV6_ADDR_ANY) {
584 addr6 = &ipv6_hdr(skb)->daddr;
585 addr_type = ipv6_addr_type(addr6);
588 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
589 do_tx_error_icmp = true;
590 else {
591 do_tx_error_icmp = false;
592 dst = addr6->s6_addr32[3];
594 neigh_release(neigh);
595 if (do_tx_error_icmp)
596 goto tx_error_icmp;
598 #endif
599 else
600 goto tx_error;
602 connected = false;
605 tos = tnl_params->tos;
606 if (tos & 0x1) {
607 tos &= ~0x1;
608 if (skb->protocol == htons(ETH_P_IP)) {
609 tos = inner_iph->tos;
610 connected = false;
611 } else if (skb->protocol == htons(ETH_P_IPV6)) {
612 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
613 connected = false;
617 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
618 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
620 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
622 if (!rt) {
623 rt = ip_route_output_key(tunnel->net, &fl4);
625 if (IS_ERR(rt)) {
626 dev->stats.tx_carrier_errors++;
627 goto tx_error;
629 if (connected)
630 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
633 if (rt->dst.dev == dev) {
634 ip_rt_put(rt);
635 dev->stats.collisions++;
636 goto tx_error;
639 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
640 ip_rt_put(rt);
641 goto tx_error;
644 if (tunnel->err_count > 0) {
645 if (time_before(jiffies,
646 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
647 tunnel->err_count--;
649 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
650 dst_link_failure(skb);
651 } else
652 tunnel->err_count = 0;
655 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
656 ttl = tnl_params->ttl;
657 if (ttl == 0) {
658 if (skb->protocol == htons(ETH_P_IP))
659 ttl = inner_iph->ttl;
660 #if IS_ENABLED(CONFIG_IPV6)
661 else if (skb->protocol == htons(ETH_P_IPV6))
662 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
663 #endif
664 else
665 ttl = ip4_dst_hoplimit(&rt->dst);
668 df = tnl_params->frag_off;
669 if (skb->protocol == htons(ETH_P_IP))
670 df |= (inner_iph->frag_off&htons(IP_DF));
672 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
673 + rt->dst.header_len;
674 if (max_headroom > dev->needed_headroom)
675 dev->needed_headroom = max_headroom;
677 if (skb_cow_head(skb, dev->needed_headroom)) {
678 ip_rt_put(rt);
679 dev->stats.tx_dropped++;
680 kfree_skb(skb);
681 return;
684 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
685 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
686 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
688 return;
690 #if IS_ENABLED(CONFIG_IPV6)
691 tx_error_icmp:
692 dst_link_failure(skb);
693 #endif
694 tx_error:
695 dev->stats.tx_errors++;
696 kfree_skb(skb);
698 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
700 static void ip_tunnel_update(struct ip_tunnel_net *itn,
701 struct ip_tunnel *t,
702 struct net_device *dev,
703 struct ip_tunnel_parm *p,
704 bool set_mtu)
706 ip_tunnel_del(t);
707 t->parms.iph.saddr = p->iph.saddr;
708 t->parms.iph.daddr = p->iph.daddr;
709 t->parms.i_key = p->i_key;
710 t->parms.o_key = p->o_key;
711 if (dev->type != ARPHRD_ETHER) {
712 memcpy(dev->dev_addr, &p->iph.saddr, 4);
713 memcpy(dev->broadcast, &p->iph.daddr, 4);
715 ip_tunnel_add(itn, t);
717 t->parms.iph.ttl = p->iph.ttl;
718 t->parms.iph.tos = p->iph.tos;
719 t->parms.iph.frag_off = p->iph.frag_off;
721 if (t->parms.link != p->link) {
722 int mtu;
724 t->parms.link = p->link;
725 mtu = ip_tunnel_bind_dev(dev);
726 if (set_mtu)
727 dev->mtu = mtu;
729 ip_tunnel_dst_reset_all(t);
730 netdev_state_change(dev);
733 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
735 int err = 0;
736 struct ip_tunnel *t = netdev_priv(dev);
737 struct net *net = t->net;
738 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
740 BUG_ON(!itn->fb_tunnel_dev);
741 switch (cmd) {
742 case SIOCGETTUNNEL:
743 if (dev == itn->fb_tunnel_dev) {
744 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
745 if (t == NULL)
746 t = netdev_priv(dev);
748 memcpy(p, &t->parms, sizeof(*p));
749 break;
751 case SIOCADDTUNNEL:
752 case SIOCCHGTUNNEL:
753 err = -EPERM;
754 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
755 goto done;
756 if (p->iph.ttl)
757 p->iph.frag_off |= htons(IP_DF);
758 if (!(p->i_flags & VTI_ISVTI)) {
759 if (!(p->i_flags & TUNNEL_KEY))
760 p->i_key = 0;
761 if (!(p->o_flags & TUNNEL_KEY))
762 p->o_key = 0;
765 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
767 if (cmd == SIOCADDTUNNEL) {
768 if (!t) {
769 t = ip_tunnel_create(net, itn, p);
770 err = PTR_ERR_OR_ZERO(t);
771 break;
774 err = -EEXIST;
775 break;
777 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
778 if (t != NULL) {
779 if (t->dev != dev) {
780 err = -EEXIST;
781 break;
783 } else {
784 unsigned int nflags = 0;
786 if (ipv4_is_multicast(p->iph.daddr))
787 nflags = IFF_BROADCAST;
788 else if (p->iph.daddr)
789 nflags = IFF_POINTOPOINT;
791 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
792 err = -EINVAL;
793 break;
796 t = netdev_priv(dev);
800 if (t) {
801 err = 0;
802 ip_tunnel_update(itn, t, dev, p, true);
803 } else {
804 err = -ENOENT;
806 break;
808 case SIOCDELTUNNEL:
809 err = -EPERM;
810 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
811 goto done;
813 if (dev == itn->fb_tunnel_dev) {
814 err = -ENOENT;
815 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
816 if (t == NULL)
817 goto done;
818 err = -EPERM;
819 if (t == netdev_priv(itn->fb_tunnel_dev))
820 goto done;
821 dev = t->dev;
823 unregister_netdevice(dev);
824 err = 0;
825 break;
827 default:
828 err = -EINVAL;
831 done:
832 return err;
834 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
836 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
838 struct ip_tunnel *tunnel = netdev_priv(dev);
839 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
841 if (new_mtu < 68 ||
842 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
843 return -EINVAL;
844 dev->mtu = new_mtu;
845 return 0;
847 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
849 static void ip_tunnel_dev_free(struct net_device *dev)
851 struct ip_tunnel *tunnel = netdev_priv(dev);
853 gro_cells_destroy(&tunnel->gro_cells);
854 free_percpu(tunnel->dst_cache);
855 free_percpu(dev->tstats);
856 free_netdev(dev);
859 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
861 struct ip_tunnel *tunnel = netdev_priv(dev);
862 struct ip_tunnel_net *itn;
864 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
866 if (itn->fb_tunnel_dev != dev) {
867 ip_tunnel_del(netdev_priv(dev));
868 unregister_netdevice_queue(dev, head);
871 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
873 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
874 struct rtnl_link_ops *ops, char *devname)
876 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
877 struct ip_tunnel_parm parms;
878 unsigned int i;
880 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
881 INIT_HLIST_HEAD(&itn->tunnels[i]);
883 if (!ops) {
884 itn->fb_tunnel_dev = NULL;
885 return 0;
888 memset(&parms, 0, sizeof(parms));
889 if (devname)
890 strlcpy(parms.name, devname, IFNAMSIZ);
892 rtnl_lock();
893 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
894 /* FB netdevice is special: we have one, and only one per netns.
895 * Allowing to move it to another netns is clearly unsafe.
897 if (!IS_ERR(itn->fb_tunnel_dev)) {
898 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
899 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
900 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
902 rtnl_unlock();
904 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
906 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
908 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
909 struct rtnl_link_ops *ops)
911 struct net *net = dev_net(itn->fb_tunnel_dev);
912 struct net_device *dev, *aux;
913 int h;
915 for_each_netdev_safe(net, dev, aux)
916 if (dev->rtnl_link_ops == ops)
917 unregister_netdevice_queue(dev, head);
919 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
920 struct ip_tunnel *t;
921 struct hlist_node *n;
922 struct hlist_head *thead = &itn->tunnels[h];
924 hlist_for_each_entry_safe(t, n, thead, hash_node)
925 /* If dev is in the same netns, it has already
926 * been added to the list by the previous loop.
928 if (!net_eq(dev_net(t->dev), net))
929 unregister_netdevice_queue(t->dev, head);
933 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
935 LIST_HEAD(list);
937 rtnl_lock();
938 ip_tunnel_destroy(itn, &list, ops);
939 unregister_netdevice_many(&list);
940 rtnl_unlock();
942 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
944 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
945 struct ip_tunnel_parm *p)
947 struct ip_tunnel *nt;
948 struct net *net = dev_net(dev);
949 struct ip_tunnel_net *itn;
950 int mtu;
951 int err;
953 nt = netdev_priv(dev);
954 itn = net_generic(net, nt->ip_tnl_net_id);
956 if (ip_tunnel_find(itn, p, dev->type))
957 return -EEXIST;
959 nt->net = net;
960 nt->parms = *p;
961 err = register_netdevice(dev);
962 if (err)
963 goto out;
965 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
966 eth_hw_addr_random(dev);
968 mtu = ip_tunnel_bind_dev(dev);
969 if (!tb[IFLA_MTU])
970 dev->mtu = mtu;
972 ip_tunnel_add(itn, nt);
974 out:
975 return err;
977 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
979 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
980 struct ip_tunnel_parm *p)
982 struct ip_tunnel *t;
983 struct ip_tunnel *tunnel = netdev_priv(dev);
984 struct net *net = tunnel->net;
985 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
987 if (dev == itn->fb_tunnel_dev)
988 return -EINVAL;
990 t = ip_tunnel_find(itn, p, dev->type);
992 if (t) {
993 if (t->dev != dev)
994 return -EEXIST;
995 } else {
996 t = tunnel;
998 if (dev->type != ARPHRD_ETHER) {
999 unsigned int nflags = 0;
1001 if (ipv4_is_multicast(p->iph.daddr))
1002 nflags = IFF_BROADCAST;
1003 else if (p->iph.daddr)
1004 nflags = IFF_POINTOPOINT;
1006 if ((dev->flags ^ nflags) &
1007 (IFF_POINTOPOINT | IFF_BROADCAST))
1008 return -EINVAL;
1012 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1013 return 0;
1015 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1017 int ip_tunnel_init(struct net_device *dev)
1019 struct ip_tunnel *tunnel = netdev_priv(dev);
1020 struct iphdr *iph = &tunnel->parms.iph;
1021 int err;
1023 dev->destructor = ip_tunnel_dev_free;
1024 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1025 if (!dev->tstats)
1026 return -ENOMEM;
1028 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1029 if (!tunnel->dst_cache) {
1030 free_percpu(dev->tstats);
1031 return -ENOMEM;
1034 err = gro_cells_init(&tunnel->gro_cells, dev);
1035 if (err) {
1036 free_percpu(tunnel->dst_cache);
1037 free_percpu(dev->tstats);
1038 return err;
1041 tunnel->dev = dev;
1042 tunnel->net = dev_net(dev);
1043 strcpy(tunnel->parms.name, dev->name);
1044 iph->version = 4;
1045 iph->ihl = 5;
1047 return 0;
1049 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1051 void ip_tunnel_uninit(struct net_device *dev)
1053 struct ip_tunnel *tunnel = netdev_priv(dev);
1054 struct net *net = tunnel->net;
1055 struct ip_tunnel_net *itn;
1057 itn = net_generic(net, tunnel->ip_tnl_net_id);
1058 /* fb_tunnel_dev will be unregisted in net-exit call. */
1059 if (itn->fb_tunnel_dev != dev)
1060 ip_tunnel_del(netdev_priv(dev));
1062 ip_tunnel_dst_reset_all(tunnel);
1064 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1066 /* Do least required initialization, rest of init is done in tunnel_init call */
1067 void ip_tunnel_setup(struct net_device *dev, int net_id)
1069 struct ip_tunnel *tunnel = netdev_priv(dev);
1070 tunnel->ip_tnl_net_id = net_id;
1072 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1074 MODULE_LICENSE("GPL");