net: reinstate rtnl in call_netdevice_notifiers()
[linux-2.6/cjktty.git] / net / ipv4 / devinet.c
blobadf273f8ad2eb28a658321c68967bb2e6ccf25a0
1 /*
2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
66 #include "fib_lookup.h"
68 static struct ipv4_devconf ipv4_devconf = {
69 .data = {
70 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78 .data = {
79 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91 [IFA_LOCAL] = { .type = NLA_U32 },
92 [IFA_ADDRESS] = { .type = NLA_U32 },
93 [IFA_BROADCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 #define IN4_ADDR_HSIZE_SHIFT 8
98 #define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
100 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
101 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103 static u32 inet_addr_hash(struct net *net, __be32 addr)
105 u32 val = (__force u32) addr ^ net_hash_mix(net);
107 return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
110 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
112 u32 hash = inet_addr_hash(net, ifa->ifa_local);
114 spin_lock(&inet_addr_hash_lock);
115 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
116 spin_unlock(&inet_addr_hash_lock);
119 static void inet_hash_remove(struct in_ifaddr *ifa)
121 spin_lock(&inet_addr_hash_lock);
122 hlist_del_init_rcu(&ifa->hash);
123 spin_unlock(&inet_addr_hash_lock);
127 * __ip_dev_find - find the first device with a given source address.
128 * @net: the net namespace
129 * @addr: the source address
130 * @devref: if true, take a reference on the found device
132 * If a caller uses devref=false, it should be protected by RCU, or RTNL
134 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
136 u32 hash = inet_addr_hash(net, addr);
137 struct net_device *result = NULL;
138 struct in_ifaddr *ifa;
139 struct hlist_node *node;
141 rcu_read_lock();
142 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
143 if (ifa->ifa_local == addr) {
144 struct net_device *dev = ifa->ifa_dev->dev;
146 if (!net_eq(dev_net(dev), net))
147 continue;
148 result = dev;
149 break;
152 if (!result) {
153 struct flowi4 fl4 = { .daddr = addr };
154 struct fib_result res = { 0 };
155 struct fib_table *local;
157 /* Fallback to FIB local table so that communication
158 * over loopback subnets work.
160 local = fib_get_table(net, RT_TABLE_LOCAL);
161 if (local &&
162 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163 res.type == RTN_LOCAL)
164 result = FIB_RES_DEV(res);
166 if (result && devref)
167 dev_hold(result);
168 rcu_read_unlock();
169 return result;
171 EXPORT_SYMBOL(__ip_dev_find);
173 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
175 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
176 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
177 int destroy);
178 #ifdef CONFIG_SYSCTL
179 static void devinet_sysctl_register(struct in_device *idev);
180 static void devinet_sysctl_unregister(struct in_device *idev);
181 #else
182 static void devinet_sysctl_register(struct in_device *idev)
185 static void devinet_sysctl_unregister(struct in_device *idev)
188 #endif
190 /* Locks all the inet devices. */
192 static struct in_ifaddr *inet_alloc_ifa(void)
194 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 static void inet_rcu_free_ifa(struct rcu_head *head)
199 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
200 if (ifa->ifa_dev)
201 in_dev_put(ifa->ifa_dev);
202 kfree(ifa);
205 static void inet_free_ifa(struct in_ifaddr *ifa)
207 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 void in_dev_finish_destroy(struct in_device *idev)
212 struct net_device *dev = idev->dev;
214 WARN_ON(idev->ifa_list);
215 WARN_ON(idev->mc_list);
216 #ifdef NET_REFCNT_DEBUG
217 pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
218 #endif
219 dev_put(dev);
220 if (!idev->dead)
221 pr_err("Freeing alive in_device %p\n", idev);
222 else
223 kfree(idev);
225 EXPORT_SYMBOL(in_dev_finish_destroy);
227 static struct in_device *inetdev_init(struct net_device *dev)
229 struct in_device *in_dev;
231 ASSERT_RTNL();
233 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
234 if (!in_dev)
235 goto out;
236 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
237 sizeof(in_dev->cnf));
238 in_dev->cnf.sysctl = NULL;
239 in_dev->dev = dev;
240 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
241 if (!in_dev->arp_parms)
242 goto out_kfree;
243 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
244 dev_disable_lro(dev);
245 /* Reference in_dev->dev */
246 dev_hold(dev);
247 /* Account for reference dev->ip_ptr (below) */
248 in_dev_hold(in_dev);
250 devinet_sysctl_register(in_dev);
251 ip_mc_init_dev(in_dev);
252 if (dev->flags & IFF_UP)
253 ip_mc_up(in_dev);
255 /* we can receive as soon as ip_ptr is set -- do this last */
256 rcu_assign_pointer(dev->ip_ptr, in_dev);
257 out:
258 return in_dev;
259 out_kfree:
260 kfree(in_dev);
261 in_dev = NULL;
262 goto out;
265 static void in_dev_rcu_put(struct rcu_head *head)
267 struct in_device *idev = container_of(head, struct in_device, rcu_head);
268 in_dev_put(idev);
271 static void inetdev_destroy(struct in_device *in_dev)
273 struct in_ifaddr *ifa;
274 struct net_device *dev;
276 ASSERT_RTNL();
278 dev = in_dev->dev;
280 in_dev->dead = 1;
282 ip_mc_destroy_dev(in_dev);
284 while ((ifa = in_dev->ifa_list) != NULL) {
285 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
286 inet_free_ifa(ifa);
289 RCU_INIT_POINTER(dev->ip_ptr, NULL);
291 devinet_sysctl_unregister(in_dev);
292 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
293 arp_ifdown(dev);
295 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
300 rcu_read_lock();
301 for_primary_ifa(in_dev) {
302 if (inet_ifa_match(a, ifa)) {
303 if (!b || inet_ifa_match(b, ifa)) {
304 rcu_read_unlock();
305 return 1;
308 } endfor_ifa(in_dev);
309 rcu_read_unlock();
310 return 0;
313 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
314 int destroy, struct nlmsghdr *nlh, u32 pid)
316 struct in_ifaddr *promote = NULL;
317 struct in_ifaddr *ifa, *ifa1 = *ifap;
318 struct in_ifaddr *last_prim = in_dev->ifa_list;
319 struct in_ifaddr *prev_prom = NULL;
320 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
322 ASSERT_RTNL();
324 /* 1. Deleting primary ifaddr forces deletion all secondaries
325 * unless alias promotion is set
328 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
329 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
331 while ((ifa = *ifap1) != NULL) {
332 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
333 ifa1->ifa_scope <= ifa->ifa_scope)
334 last_prim = ifa;
336 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
337 ifa1->ifa_mask != ifa->ifa_mask ||
338 !inet_ifa_match(ifa1->ifa_address, ifa)) {
339 ifap1 = &ifa->ifa_next;
340 prev_prom = ifa;
341 continue;
344 if (!do_promote) {
345 inet_hash_remove(ifa);
346 *ifap1 = ifa->ifa_next;
348 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
349 blocking_notifier_call_chain(&inetaddr_chain,
350 NETDEV_DOWN, ifa);
351 inet_free_ifa(ifa);
352 } else {
353 promote = ifa;
354 break;
359 /* On promotion all secondaries from subnet are changing
360 * the primary IP, we must remove all their routes silently
361 * and later to add them back with new prefsrc. Do this
362 * while all addresses are on the device list.
364 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
365 if (ifa1->ifa_mask == ifa->ifa_mask &&
366 inet_ifa_match(ifa1->ifa_address, ifa))
367 fib_del_ifaddr(ifa, ifa1);
370 /* 2. Unlink it */
372 *ifap = ifa1->ifa_next;
373 inet_hash_remove(ifa1);
375 /* 3. Announce address deletion */
377 /* Send message first, then call notifier.
378 At first sight, FIB update triggered by notifier
379 will refer to already deleted ifaddr, that could confuse
380 netlink listeners. It is not true: look, gated sees
381 that route deleted and if it still thinks that ifaddr
382 is valid, it will try to restore deleted routes... Grr.
383 So that, this order is correct.
385 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
386 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
388 if (promote) {
389 struct in_ifaddr *next_sec = promote->ifa_next;
391 if (prev_prom) {
392 prev_prom->ifa_next = promote->ifa_next;
393 promote->ifa_next = last_prim->ifa_next;
394 last_prim->ifa_next = promote;
397 promote->ifa_flags &= ~IFA_F_SECONDARY;
398 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
399 blocking_notifier_call_chain(&inetaddr_chain,
400 NETDEV_UP, promote);
401 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
402 if (ifa1->ifa_mask != ifa->ifa_mask ||
403 !inet_ifa_match(ifa1->ifa_address, ifa))
404 continue;
405 fib_add_ifaddr(ifa);
409 if (destroy)
410 inet_free_ifa(ifa1);
413 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
414 int destroy)
416 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
420 u32 pid)
422 struct in_device *in_dev = ifa->ifa_dev;
423 struct in_ifaddr *ifa1, **ifap, **last_primary;
425 ASSERT_RTNL();
427 if (!ifa->ifa_local) {
428 inet_free_ifa(ifa);
429 return 0;
432 ifa->ifa_flags &= ~IFA_F_SECONDARY;
433 last_primary = &in_dev->ifa_list;
435 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
436 ifap = &ifa1->ifa_next) {
437 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
438 ifa->ifa_scope <= ifa1->ifa_scope)
439 last_primary = &ifa1->ifa_next;
440 if (ifa1->ifa_mask == ifa->ifa_mask &&
441 inet_ifa_match(ifa1->ifa_address, ifa)) {
442 if (ifa1->ifa_local == ifa->ifa_local) {
443 inet_free_ifa(ifa);
444 return -EEXIST;
446 if (ifa1->ifa_scope != ifa->ifa_scope) {
447 inet_free_ifa(ifa);
448 return -EINVAL;
450 ifa->ifa_flags |= IFA_F_SECONDARY;
454 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
455 net_srandom(ifa->ifa_local);
456 ifap = last_primary;
459 ifa->ifa_next = *ifap;
460 *ifap = ifa;
462 inet_hash_insert(dev_net(in_dev->dev), ifa);
464 /* Send message first, then call notifier.
465 Notifier will trigger FIB update, so that
466 listeners of netlink will know about new ifaddr */
467 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
468 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
470 return 0;
473 static int inet_insert_ifa(struct in_ifaddr *ifa)
475 return __inet_insert_ifa(ifa, NULL, 0);
478 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
480 struct in_device *in_dev = __in_dev_get_rtnl(dev);
482 ASSERT_RTNL();
484 if (!in_dev) {
485 inet_free_ifa(ifa);
486 return -ENOBUFS;
488 ipv4_devconf_setall(in_dev);
489 if (ifa->ifa_dev != in_dev) {
490 WARN_ON(ifa->ifa_dev);
491 in_dev_hold(in_dev);
492 ifa->ifa_dev = in_dev;
494 if (ipv4_is_loopback(ifa->ifa_local))
495 ifa->ifa_scope = RT_SCOPE_HOST;
496 return inet_insert_ifa(ifa);
499 /* Caller must hold RCU or RTNL :
500 * We dont take a reference on found in_device
502 struct in_device *inetdev_by_index(struct net *net, int ifindex)
504 struct net_device *dev;
505 struct in_device *in_dev = NULL;
507 rcu_read_lock();
508 dev = dev_get_by_index_rcu(net, ifindex);
509 if (dev)
510 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
511 rcu_read_unlock();
512 return in_dev;
514 EXPORT_SYMBOL(inetdev_by_index);
516 /* Called only from RTNL semaphored context. No locks. */
518 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
519 __be32 mask)
521 ASSERT_RTNL();
523 for_primary_ifa(in_dev) {
524 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
525 return ifa;
526 } endfor_ifa(in_dev);
527 return NULL;
530 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
532 struct net *net = sock_net(skb->sk);
533 struct nlattr *tb[IFA_MAX+1];
534 struct in_device *in_dev;
535 struct ifaddrmsg *ifm;
536 struct in_ifaddr *ifa, **ifap;
537 int err = -EINVAL;
539 ASSERT_RTNL();
541 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
542 if (err < 0)
543 goto errout;
545 ifm = nlmsg_data(nlh);
546 in_dev = inetdev_by_index(net, ifm->ifa_index);
547 if (in_dev == NULL) {
548 err = -ENODEV;
549 goto errout;
552 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
553 ifap = &ifa->ifa_next) {
554 if (tb[IFA_LOCAL] &&
555 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
556 continue;
558 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
559 continue;
561 if (tb[IFA_ADDRESS] &&
562 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
563 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
564 continue;
566 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
567 return 0;
570 err = -EADDRNOTAVAIL;
571 errout:
572 return err;
575 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
577 struct nlattr *tb[IFA_MAX+1];
578 struct in_ifaddr *ifa;
579 struct ifaddrmsg *ifm;
580 struct net_device *dev;
581 struct in_device *in_dev;
582 int err;
584 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585 if (err < 0)
586 goto errout;
588 ifm = nlmsg_data(nlh);
589 err = -EINVAL;
590 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
591 goto errout;
593 dev = __dev_get_by_index(net, ifm->ifa_index);
594 err = -ENODEV;
595 if (dev == NULL)
596 goto errout;
598 in_dev = __in_dev_get_rtnl(dev);
599 err = -ENOBUFS;
600 if (in_dev == NULL)
601 goto errout;
603 ifa = inet_alloc_ifa();
604 if (ifa == NULL)
606 * A potential indev allocation can be left alive, it stays
607 * assigned to its device and is destroy with it.
609 goto errout;
611 ipv4_devconf_setall(in_dev);
612 in_dev_hold(in_dev);
614 if (tb[IFA_ADDRESS] == NULL)
615 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
617 INIT_HLIST_NODE(&ifa->hash);
618 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
619 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
620 ifa->ifa_flags = ifm->ifa_flags;
621 ifa->ifa_scope = ifm->ifa_scope;
622 ifa->ifa_dev = in_dev;
624 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
625 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
627 if (tb[IFA_BROADCAST])
628 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
630 if (tb[IFA_LABEL])
631 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
632 else
633 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
635 return ifa;
637 errout:
638 return ERR_PTR(err);
641 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
643 struct net *net = sock_net(skb->sk);
644 struct in_ifaddr *ifa;
646 ASSERT_RTNL();
648 ifa = rtm_to_ifaddr(net, nlh);
649 if (IS_ERR(ifa))
650 return PTR_ERR(ifa);
652 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
656 * Determine a default network mask, based on the IP address.
659 static int inet_abc_len(__be32 addr)
661 int rc = -1; /* Something else, probably a multicast. */
663 if (ipv4_is_zeronet(addr))
664 rc = 0;
665 else {
666 __u32 haddr = ntohl(addr);
668 if (IN_CLASSA(haddr))
669 rc = 8;
670 else if (IN_CLASSB(haddr))
671 rc = 16;
672 else if (IN_CLASSC(haddr))
673 rc = 24;
676 return rc;
680 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
682 struct ifreq ifr;
683 struct sockaddr_in sin_orig;
684 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
685 struct in_device *in_dev;
686 struct in_ifaddr **ifap = NULL;
687 struct in_ifaddr *ifa = NULL;
688 struct net_device *dev;
689 char *colon;
690 int ret = -EFAULT;
691 int tryaddrmatch = 0;
694 * Fetch the caller's info block into kernel space
697 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
698 goto out;
699 ifr.ifr_name[IFNAMSIZ - 1] = 0;
701 /* save original address for comparison */
702 memcpy(&sin_orig, sin, sizeof(*sin));
704 colon = strchr(ifr.ifr_name, ':');
705 if (colon)
706 *colon = 0;
708 dev_load(net, ifr.ifr_name);
710 switch (cmd) {
711 case SIOCGIFADDR: /* Get interface address */
712 case SIOCGIFBRDADDR: /* Get the broadcast address */
713 case SIOCGIFDSTADDR: /* Get the destination address */
714 case SIOCGIFNETMASK: /* Get the netmask for the interface */
715 /* Note that these ioctls will not sleep,
716 so that we do not impose a lock.
717 One day we will be forced to put shlock here (I mean SMP)
719 tryaddrmatch = (sin_orig.sin_family == AF_INET);
720 memset(sin, 0, sizeof(*sin));
721 sin->sin_family = AF_INET;
722 break;
724 case SIOCSIFFLAGS:
725 ret = -EACCES;
726 if (!capable(CAP_NET_ADMIN))
727 goto out;
728 break;
729 case SIOCSIFADDR: /* Set interface address (and family) */
730 case SIOCSIFBRDADDR: /* Set the broadcast address */
731 case SIOCSIFDSTADDR: /* Set the destination address */
732 case SIOCSIFNETMASK: /* Set the netmask for the interface */
733 ret = -EACCES;
734 if (!capable(CAP_NET_ADMIN))
735 goto out;
736 ret = -EINVAL;
737 if (sin->sin_family != AF_INET)
738 goto out;
739 break;
740 default:
741 ret = -EINVAL;
742 goto out;
745 rtnl_lock();
747 ret = -ENODEV;
748 dev = __dev_get_by_name(net, ifr.ifr_name);
749 if (!dev)
750 goto done;
752 if (colon)
753 *colon = ':';
755 in_dev = __in_dev_get_rtnl(dev);
756 if (in_dev) {
757 if (tryaddrmatch) {
758 /* Matthias Andree */
759 /* compare label and address (4.4BSD style) */
760 /* note: we only do this for a limited set of ioctls
761 and only if the original address family was AF_INET.
762 This is checked above. */
763 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
764 ifap = &ifa->ifa_next) {
765 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
766 sin_orig.sin_addr.s_addr ==
767 ifa->ifa_local) {
768 break; /* found */
772 /* we didn't get a match, maybe the application is
773 4.3BSD-style and passed in junk so we fall back to
774 comparing just the label */
775 if (!ifa) {
776 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
777 ifap = &ifa->ifa_next)
778 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
779 break;
783 ret = -EADDRNOTAVAIL;
784 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
785 goto done;
787 switch (cmd) {
788 case SIOCGIFADDR: /* Get interface address */
789 sin->sin_addr.s_addr = ifa->ifa_local;
790 goto rarok;
792 case SIOCGIFBRDADDR: /* Get the broadcast address */
793 sin->sin_addr.s_addr = ifa->ifa_broadcast;
794 goto rarok;
796 case SIOCGIFDSTADDR: /* Get the destination address */
797 sin->sin_addr.s_addr = ifa->ifa_address;
798 goto rarok;
800 case SIOCGIFNETMASK: /* Get the netmask for the interface */
801 sin->sin_addr.s_addr = ifa->ifa_mask;
802 goto rarok;
804 case SIOCSIFFLAGS:
805 if (colon) {
806 ret = -EADDRNOTAVAIL;
807 if (!ifa)
808 break;
809 ret = 0;
810 if (!(ifr.ifr_flags & IFF_UP))
811 inet_del_ifa(in_dev, ifap, 1);
812 break;
814 ret = dev_change_flags(dev, ifr.ifr_flags);
815 break;
817 case SIOCSIFADDR: /* Set interface address (and family) */
818 ret = -EINVAL;
819 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
820 break;
822 if (!ifa) {
823 ret = -ENOBUFS;
824 ifa = inet_alloc_ifa();
825 INIT_HLIST_NODE(&ifa->hash);
826 if (!ifa)
827 break;
828 if (colon)
829 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830 else
831 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
832 } else {
833 ret = 0;
834 if (ifa->ifa_local == sin->sin_addr.s_addr)
835 break;
836 inet_del_ifa(in_dev, ifap, 0);
837 ifa->ifa_broadcast = 0;
838 ifa->ifa_scope = 0;
841 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
843 if (!(dev->flags & IFF_POINTOPOINT)) {
844 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
845 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
846 if ((dev->flags & IFF_BROADCAST) &&
847 ifa->ifa_prefixlen < 31)
848 ifa->ifa_broadcast = ifa->ifa_address |
849 ~ifa->ifa_mask;
850 } else {
851 ifa->ifa_prefixlen = 32;
852 ifa->ifa_mask = inet_make_mask(32);
854 ret = inet_set_ifa(dev, ifa);
855 break;
857 case SIOCSIFBRDADDR: /* Set the broadcast address */
858 ret = 0;
859 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
860 inet_del_ifa(in_dev, ifap, 0);
861 ifa->ifa_broadcast = sin->sin_addr.s_addr;
862 inet_insert_ifa(ifa);
864 break;
866 case SIOCSIFDSTADDR: /* Set the destination address */
867 ret = 0;
868 if (ifa->ifa_address == sin->sin_addr.s_addr)
869 break;
870 ret = -EINVAL;
871 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
872 break;
873 ret = 0;
874 inet_del_ifa(in_dev, ifap, 0);
875 ifa->ifa_address = sin->sin_addr.s_addr;
876 inet_insert_ifa(ifa);
877 break;
879 case SIOCSIFNETMASK: /* Set the netmask for the interface */
882 * The mask we set must be legal.
884 ret = -EINVAL;
885 if (bad_mask(sin->sin_addr.s_addr, 0))
886 break;
887 ret = 0;
888 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
889 __be32 old_mask = ifa->ifa_mask;
890 inet_del_ifa(in_dev, ifap, 0);
891 ifa->ifa_mask = sin->sin_addr.s_addr;
892 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
894 /* See if current broadcast address matches
895 * with current netmask, then recalculate
896 * the broadcast address. Otherwise it's a
897 * funny address, so don't touch it since
898 * the user seems to know what (s)he's doing...
900 if ((dev->flags & IFF_BROADCAST) &&
901 (ifa->ifa_prefixlen < 31) &&
902 (ifa->ifa_broadcast ==
903 (ifa->ifa_local|~old_mask))) {
904 ifa->ifa_broadcast = (ifa->ifa_local |
905 ~sin->sin_addr.s_addr);
907 inet_insert_ifa(ifa);
909 break;
911 done:
912 rtnl_unlock();
913 out:
914 return ret;
915 rarok:
916 rtnl_unlock();
917 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
918 goto out;
921 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
923 struct in_device *in_dev = __in_dev_get_rtnl(dev);
924 struct in_ifaddr *ifa;
925 struct ifreq ifr;
926 int done = 0;
928 if (!in_dev)
929 goto out;
931 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
932 if (!buf) {
933 done += sizeof(ifr);
934 continue;
936 if (len < (int) sizeof(ifr))
937 break;
938 memset(&ifr, 0, sizeof(struct ifreq));
939 if (ifa->ifa_label)
940 strcpy(ifr.ifr_name, ifa->ifa_label);
941 else
942 strcpy(ifr.ifr_name, dev->name);
944 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
945 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
946 ifa->ifa_local;
948 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
949 done = -EFAULT;
950 break;
952 buf += sizeof(struct ifreq);
953 len -= sizeof(struct ifreq);
954 done += sizeof(struct ifreq);
956 out:
957 return done;
960 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
962 __be32 addr = 0;
963 struct in_device *in_dev;
964 struct net *net = dev_net(dev);
966 rcu_read_lock();
967 in_dev = __in_dev_get_rcu(dev);
968 if (!in_dev)
969 goto no_in_dev;
971 for_primary_ifa(in_dev) {
972 if (ifa->ifa_scope > scope)
973 continue;
974 if (!dst || inet_ifa_match(dst, ifa)) {
975 addr = ifa->ifa_local;
976 break;
978 if (!addr)
979 addr = ifa->ifa_local;
980 } endfor_ifa(in_dev);
982 if (addr)
983 goto out_unlock;
984 no_in_dev:
986 /* Not loopback addresses on loopback should be preferred
987 in this case. It is importnat that lo is the first interface
988 in dev_base list.
990 for_each_netdev_rcu(net, dev) {
991 in_dev = __in_dev_get_rcu(dev);
992 if (!in_dev)
993 continue;
995 for_primary_ifa(in_dev) {
996 if (ifa->ifa_scope != RT_SCOPE_LINK &&
997 ifa->ifa_scope <= scope) {
998 addr = ifa->ifa_local;
999 goto out_unlock;
1001 } endfor_ifa(in_dev);
1003 out_unlock:
1004 rcu_read_unlock();
1005 return addr;
1007 EXPORT_SYMBOL(inet_select_addr);
1009 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010 __be32 local, int scope)
1012 int same = 0;
1013 __be32 addr = 0;
1015 for_ifa(in_dev) {
1016 if (!addr &&
1017 (local == ifa->ifa_local || !local) &&
1018 ifa->ifa_scope <= scope) {
1019 addr = ifa->ifa_local;
1020 if (same)
1021 break;
1023 if (!same) {
1024 same = (!local || inet_ifa_match(local, ifa)) &&
1025 (!dst || inet_ifa_match(dst, ifa));
1026 if (same && addr) {
1027 if (local || !dst)
1028 break;
1029 /* Is the selected addr into dst subnet? */
1030 if (inet_ifa_match(addr, ifa))
1031 break;
1032 /* No, then can we use new local src? */
1033 if (ifa->ifa_scope <= scope) {
1034 addr = ifa->ifa_local;
1035 break;
1037 /* search for large dst subnet for addr */
1038 same = 0;
1041 } endfor_ifa(in_dev);
1043 return same ? addr : 0;
1047 * Confirm that local IP address exists using wildcards:
1048 * - in_dev: only on this interface, 0=any interface
1049 * - dst: only in the same subnet as dst, 0=any dst
1050 * - local: address, 0=autoselect the local address
1051 * - scope: maximum allowed scope value for the local address
1053 __be32 inet_confirm_addr(struct in_device *in_dev,
1054 __be32 dst, __be32 local, int scope)
1056 __be32 addr = 0;
1057 struct net_device *dev;
1058 struct net *net;
1060 if (scope != RT_SCOPE_LINK)
1061 return confirm_addr_indev(in_dev, dst, local, scope);
1063 net = dev_net(in_dev->dev);
1064 rcu_read_lock();
1065 for_each_netdev_rcu(net, dev) {
1066 in_dev = __in_dev_get_rcu(dev);
1067 if (in_dev) {
1068 addr = confirm_addr_indev(in_dev, dst, local, scope);
1069 if (addr)
1070 break;
1073 rcu_read_unlock();
1075 return addr;
1077 EXPORT_SYMBOL(inet_confirm_addr);
1080 * Device notifier
1083 int register_inetaddr_notifier(struct notifier_block *nb)
1085 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1087 EXPORT_SYMBOL(register_inetaddr_notifier);
1089 int unregister_inetaddr_notifier(struct notifier_block *nb)
1091 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1093 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1095 /* Rename ifa_labels for a device name change. Make some effort to preserve
1096 * existing alias numbering and to create unique labels if possible.
1098 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1100 struct in_ifaddr *ifa;
1101 int named = 0;
1103 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104 char old[IFNAMSIZ], *dot;
1106 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108 if (named++ == 0)
1109 goto skip;
1110 dot = strchr(old, ':');
1111 if (dot == NULL) {
1112 sprintf(old, ":%d", named);
1113 dot = old;
1115 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116 strcat(ifa->ifa_label, dot);
1117 else
1118 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119 skip:
1120 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1124 static bool inetdev_valid_mtu(unsigned int mtu)
1126 return mtu >= 68;
1129 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130 struct in_device *in_dev)
1133 struct in_ifaddr *ifa;
1135 for (ifa = in_dev->ifa_list; ifa;
1136 ifa = ifa->ifa_next) {
1137 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1138 ifa->ifa_local, dev,
1139 ifa->ifa_local, NULL,
1140 dev->dev_addr, NULL);
1144 /* Called only under RTNL semaphore */
1146 static int inetdev_event(struct notifier_block *this, unsigned long event,
1147 void *ptr)
1149 struct net_device *dev = ptr;
1150 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1152 ASSERT_RTNL();
1154 if (!in_dev) {
1155 if (event == NETDEV_REGISTER) {
1156 in_dev = inetdev_init(dev);
1157 if (!in_dev)
1158 return notifier_from_errno(-ENOMEM);
1159 if (dev->flags & IFF_LOOPBACK) {
1160 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1161 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1163 } else if (event == NETDEV_CHANGEMTU) {
1164 /* Re-enabling IP */
1165 if (inetdev_valid_mtu(dev->mtu))
1166 in_dev = inetdev_init(dev);
1168 goto out;
1171 switch (event) {
1172 case NETDEV_REGISTER:
1173 pr_debug("%s: bug\n", __func__);
1174 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1175 break;
1176 case NETDEV_UP:
1177 if (!inetdev_valid_mtu(dev->mtu))
1178 break;
1179 if (dev->flags & IFF_LOOPBACK) {
1180 struct in_ifaddr *ifa = inet_alloc_ifa();
1182 if (ifa) {
1183 INIT_HLIST_NODE(&ifa->hash);
1184 ifa->ifa_local =
1185 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1186 ifa->ifa_prefixlen = 8;
1187 ifa->ifa_mask = inet_make_mask(8);
1188 in_dev_hold(in_dev);
1189 ifa->ifa_dev = in_dev;
1190 ifa->ifa_scope = RT_SCOPE_HOST;
1191 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192 inet_insert_ifa(ifa);
1195 ip_mc_up(in_dev);
1196 /* fall through */
1197 case NETDEV_CHANGEADDR:
1198 if (!IN_DEV_ARP_NOTIFY(in_dev))
1199 break;
1200 /* fall through */
1201 case NETDEV_NOTIFY_PEERS:
1202 /* Send gratuitous ARP to notify of link change */
1203 inetdev_send_gratuitous_arp(dev, in_dev);
1204 break;
1205 case NETDEV_DOWN:
1206 ip_mc_down(in_dev);
1207 break;
1208 case NETDEV_PRE_TYPE_CHANGE:
1209 ip_mc_unmap(in_dev);
1210 break;
1211 case NETDEV_POST_TYPE_CHANGE:
1212 ip_mc_remap(in_dev);
1213 break;
1214 case NETDEV_CHANGEMTU:
1215 if (inetdev_valid_mtu(dev->mtu))
1216 break;
1217 /* disable IP when MTU is not enough */
1218 case NETDEV_UNREGISTER:
1219 inetdev_destroy(in_dev);
1220 break;
1221 case NETDEV_CHANGENAME:
1222 /* Do not notify about label change, this event is
1223 * not interesting to applications using netlink.
1225 inetdev_changename(dev, in_dev);
1227 devinet_sysctl_unregister(in_dev);
1228 devinet_sysctl_register(in_dev);
1229 break;
1231 out:
1232 return NOTIFY_DONE;
1235 static struct notifier_block ip_netdev_notifier = {
1236 .notifier_call = inetdev_event,
1239 static size_t inet_nlmsg_size(void)
1241 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1242 + nla_total_size(4) /* IFA_ADDRESS */
1243 + nla_total_size(4) /* IFA_LOCAL */
1244 + nla_total_size(4) /* IFA_BROADCAST */
1245 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1248 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1249 u32 pid, u32 seq, int event, unsigned int flags)
1251 struct ifaddrmsg *ifm;
1252 struct nlmsghdr *nlh;
1254 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1255 if (nlh == NULL)
1256 return -EMSGSIZE;
1258 ifm = nlmsg_data(nlh);
1259 ifm->ifa_family = AF_INET;
1260 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1261 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1262 ifm->ifa_scope = ifa->ifa_scope;
1263 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1265 if ((ifa->ifa_address &&
1266 nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1267 (ifa->ifa_local &&
1268 nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1269 (ifa->ifa_broadcast &&
1270 nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1271 (ifa->ifa_label[0] &&
1272 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1273 goto nla_put_failure;
1275 return nlmsg_end(skb, nlh);
1277 nla_put_failure:
1278 nlmsg_cancel(skb, nlh);
1279 return -EMSGSIZE;
1282 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1284 struct net *net = sock_net(skb->sk);
1285 int h, s_h;
1286 int idx, s_idx;
1287 int ip_idx, s_ip_idx;
1288 struct net_device *dev;
1289 struct in_device *in_dev;
1290 struct in_ifaddr *ifa;
1291 struct hlist_head *head;
1292 struct hlist_node *node;
1294 s_h = cb->args[0];
1295 s_idx = idx = cb->args[1];
1296 s_ip_idx = ip_idx = cb->args[2];
1298 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1299 idx = 0;
1300 head = &net->dev_index_head[h];
1301 rcu_read_lock();
1302 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1303 if (idx < s_idx)
1304 goto cont;
1305 if (h > s_h || idx > s_idx)
1306 s_ip_idx = 0;
1307 in_dev = __in_dev_get_rcu(dev);
1308 if (!in_dev)
1309 goto cont;
1311 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1312 ifa = ifa->ifa_next, ip_idx++) {
1313 if (ip_idx < s_ip_idx)
1314 continue;
1315 if (inet_fill_ifaddr(skb, ifa,
1316 NETLINK_CB(cb->skb).pid,
1317 cb->nlh->nlmsg_seq,
1318 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1319 rcu_read_unlock();
1320 goto done;
1323 cont:
1324 idx++;
1326 rcu_read_unlock();
1329 done:
1330 cb->args[0] = h;
1331 cb->args[1] = idx;
1332 cb->args[2] = ip_idx;
1334 return skb->len;
1337 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1338 u32 pid)
1340 struct sk_buff *skb;
1341 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1342 int err = -ENOBUFS;
1343 struct net *net;
1345 net = dev_net(ifa->ifa_dev->dev);
1346 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1347 if (skb == NULL)
1348 goto errout;
1350 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1351 if (err < 0) {
1352 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1353 WARN_ON(err == -EMSGSIZE);
1354 kfree_skb(skb);
1355 goto errout;
1357 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1358 return;
1359 errout:
1360 if (err < 0)
1361 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1364 static size_t inet_get_link_af_size(const struct net_device *dev)
1366 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1368 if (!in_dev)
1369 return 0;
1371 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1374 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1376 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377 struct nlattr *nla;
1378 int i;
1380 if (!in_dev)
1381 return -ENODATA;
1383 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1384 if (nla == NULL)
1385 return -EMSGSIZE;
1387 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1388 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1390 return 0;
1393 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1394 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1397 static int inet_validate_link_af(const struct net_device *dev,
1398 const struct nlattr *nla)
1400 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1401 int err, rem;
1403 if (dev && !__in_dev_get_rtnl(dev))
1404 return -EAFNOSUPPORT;
1406 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1407 if (err < 0)
1408 return err;
1410 if (tb[IFLA_INET_CONF]) {
1411 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1412 int cfgid = nla_type(a);
1414 if (nla_len(a) < 4)
1415 return -EINVAL;
1417 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1418 return -EINVAL;
1422 return 0;
1425 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1427 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1428 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1429 int rem;
1431 if (!in_dev)
1432 return -EAFNOSUPPORT;
1434 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1435 BUG();
1437 if (tb[IFLA_INET_CONF]) {
1438 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1439 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1442 return 0;
1445 #ifdef CONFIG_SYSCTL
1447 static void devinet_copy_dflt_conf(struct net *net, int i)
1449 struct net_device *dev;
1451 rcu_read_lock();
1452 for_each_netdev_rcu(net, dev) {
1453 struct in_device *in_dev;
1455 in_dev = __in_dev_get_rcu(dev);
1456 if (in_dev && !test_bit(i, in_dev->cnf.state))
1457 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1459 rcu_read_unlock();
1462 /* called with RTNL locked */
1463 static void inet_forward_change(struct net *net)
1465 struct net_device *dev;
1466 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1468 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1471 for_each_netdev(net, dev) {
1472 struct in_device *in_dev;
1473 if (on)
1474 dev_disable_lro(dev);
1475 rcu_read_lock();
1476 in_dev = __in_dev_get_rcu(dev);
1477 if (in_dev)
1478 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1479 rcu_read_unlock();
1483 static int devinet_conf_proc(ctl_table *ctl, int write,
1484 void __user *buffer,
1485 size_t *lenp, loff_t *ppos)
1487 int old_value = *(int *)ctl->data;
1488 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489 int new_value = *(int *)ctl->data;
1491 if (write) {
1492 struct ipv4_devconf *cnf = ctl->extra1;
1493 struct net *net = ctl->extra2;
1494 int i = (int *)ctl->data - cnf->data;
1496 set_bit(i, cnf->state);
1498 if (cnf == net->ipv4.devconf_dflt)
1499 devinet_copy_dflt_conf(net, i);
1500 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1501 i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502 if ((new_value == 0) && (old_value != 0))
1503 rt_cache_flush(net, 0);
1506 return ret;
1509 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510 void __user *buffer,
1511 size_t *lenp, loff_t *ppos)
1513 int *valp = ctl->data;
1514 int val = *valp;
1515 loff_t pos = *ppos;
1516 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1518 if (write && *valp != val) {
1519 struct net *net = ctl->extra2;
1521 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522 if (!rtnl_trylock()) {
1523 /* Restore the original values before restarting */
1524 *valp = val;
1525 *ppos = pos;
1526 return restart_syscall();
1528 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529 inet_forward_change(net);
1530 } else if (*valp) {
1531 struct ipv4_devconf *cnf = ctl->extra1;
1532 struct in_device *idev =
1533 container_of(cnf, struct in_device, cnf);
1534 dev_disable_lro(idev->dev);
1536 rtnl_unlock();
1537 rt_cache_flush(net, 0);
1541 return ret;
1544 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545 void __user *buffer,
1546 size_t *lenp, loff_t *ppos)
1548 int *valp = ctl->data;
1549 int val = *valp;
1550 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551 struct net *net = ctl->extra2;
1553 if (write && *valp != val)
1554 rt_cache_flush(net, 0);
1556 return ret;
1559 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1561 .procname = name, \
1562 .data = ipv4_devconf.data + \
1563 IPV4_DEVCONF_ ## attr - 1, \
1564 .maxlen = sizeof(int), \
1565 .mode = mval, \
1566 .proc_handler = proc, \
1567 .extra1 = &ipv4_devconf, \
1570 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1573 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1576 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1579 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1582 static struct devinet_sysctl_table {
1583 struct ctl_table_header *sysctl_header;
1584 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585 } devinet_sysctl = {
1586 .devinet_vars = {
1587 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1588 devinet_sysctl_forward),
1589 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1591 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1592 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1593 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1594 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1595 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1596 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1597 "accept_source_route"),
1598 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1599 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1600 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1601 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1602 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1603 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1604 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1605 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1606 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1607 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1608 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1609 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1610 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1612 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1613 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1614 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1615 "force_igmp_version"),
1616 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1617 "promote_secondaries"),
1618 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1619 "route_localnet"),
1623 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624 struct ipv4_devconf *p)
1626 int i;
1627 struct devinet_sysctl_table *t;
1628 char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1630 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631 if (!t)
1632 goto out;
1634 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636 t->devinet_vars[i].extra1 = p;
1637 t->devinet_vars[i].extra2 = net;
1640 snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1642 t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643 if (!t->sysctl_header)
1644 goto free;
1646 p->sysctl = t;
1647 return 0;
1649 free:
1650 kfree(t);
1651 out:
1652 return -ENOBUFS;
1655 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1657 struct devinet_sysctl_table *t = cnf->sysctl;
1659 if (t == NULL)
1660 return;
1662 cnf->sysctl = NULL;
1663 unregister_net_sysctl_table(t->sysctl_header);
1664 kfree(t);
1667 static void devinet_sysctl_register(struct in_device *idev)
1669 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671 &idev->cnf);
1674 static void devinet_sysctl_unregister(struct in_device *idev)
1676 __devinet_sysctl_unregister(&idev->cnf);
1677 neigh_sysctl_unregister(idev->arp_parms);
1680 static struct ctl_table ctl_forward_entry[] = {
1682 .procname = "ip_forward",
1683 .data = &ipv4_devconf.data[
1684 IPV4_DEVCONF_FORWARDING - 1],
1685 .maxlen = sizeof(int),
1686 .mode = 0644,
1687 .proc_handler = devinet_sysctl_forward,
1688 .extra1 = &ipv4_devconf,
1689 .extra2 = &init_net,
1691 { },
1693 #endif
1695 static __net_init int devinet_init_net(struct net *net)
1697 int err;
1698 struct ipv4_devconf *all, *dflt;
1699 #ifdef CONFIG_SYSCTL
1700 struct ctl_table *tbl = ctl_forward_entry;
1701 struct ctl_table_header *forw_hdr;
1702 #endif
1704 err = -ENOMEM;
1705 all = &ipv4_devconf;
1706 dflt = &ipv4_devconf_dflt;
1708 if (!net_eq(net, &init_net)) {
1709 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710 if (all == NULL)
1711 goto err_alloc_all;
1713 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714 if (dflt == NULL)
1715 goto err_alloc_dflt;
1717 #ifdef CONFIG_SYSCTL
1718 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719 if (tbl == NULL)
1720 goto err_alloc_ctl;
1722 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723 tbl[0].extra1 = all;
1724 tbl[0].extra2 = net;
1725 #endif
1728 #ifdef CONFIG_SYSCTL
1729 err = __devinet_sysctl_register(net, "all", all);
1730 if (err < 0)
1731 goto err_reg_all;
1733 err = __devinet_sysctl_register(net, "default", dflt);
1734 if (err < 0)
1735 goto err_reg_dflt;
1737 err = -ENOMEM;
1738 forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739 if (forw_hdr == NULL)
1740 goto err_reg_ctl;
1741 net->ipv4.forw_hdr = forw_hdr;
1742 #endif
1744 net->ipv4.devconf_all = all;
1745 net->ipv4.devconf_dflt = dflt;
1746 return 0;
1748 #ifdef CONFIG_SYSCTL
1749 err_reg_ctl:
1750 __devinet_sysctl_unregister(dflt);
1751 err_reg_dflt:
1752 __devinet_sysctl_unregister(all);
1753 err_reg_all:
1754 if (tbl != ctl_forward_entry)
1755 kfree(tbl);
1756 err_alloc_ctl:
1757 #endif
1758 if (dflt != &ipv4_devconf_dflt)
1759 kfree(dflt);
1760 err_alloc_dflt:
1761 if (all != &ipv4_devconf)
1762 kfree(all);
1763 err_alloc_all:
1764 return err;
1767 static __net_exit void devinet_exit_net(struct net *net)
1769 #ifdef CONFIG_SYSCTL
1770 struct ctl_table *tbl;
1772 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1776 kfree(tbl);
1777 #endif
1778 kfree(net->ipv4.devconf_dflt);
1779 kfree(net->ipv4.devconf_all);
1782 static __net_initdata struct pernet_operations devinet_ops = {
1783 .init = devinet_init_net,
1784 .exit = devinet_exit_net,
1787 static struct rtnl_af_ops inet_af_ops = {
1788 .family = AF_INET,
1789 .fill_link_af = inet_fill_link_af,
1790 .get_link_af_size = inet_get_link_af_size,
1791 .validate_link_af = inet_validate_link_af,
1792 .set_link_af = inet_set_link_af,
1795 void __init devinet_init(void)
1797 int i;
1799 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1802 register_pernet_subsys(&devinet_ops);
1804 register_gifconf(PF_INET, inet_gifconf);
1805 register_netdevice_notifier(&ip_netdev_notifier);
1807 rtnl_af_register(&inet_af_ops);
1809 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);