ARM: imx6q: add ssi1_ipg clk_lookup
[linux-2.6/btrfs-unstable.git] / net / ipv4 / devinet.c
blob6e447ff94dfa5881876ca1a1a86f5f0f1df3b9ae
1 /*
2 * NET3 IP device support routines.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Derived from the IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
14 * Additional Authors:
15 * Alan Cox, <gw4pts@gw4pts.ampr.org>
16 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
18 * Changes:
19 * Alexey Kuznetsov: pa_* fields are replaced with ifaddr
20 * lists.
21 * Cyrus Durgin: updated for kmod
22 * Matthias Andree: in devinet_ioctl, compare label and
23 * address (4.4BSD alias style support),
24 * fall back to comparing just the label
25 * if no match found.
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
66 #include "fib_lookup.h"
68 static struct ipv4_devconf ipv4_devconf = {
69 .data = {
70 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78 .data = {
79 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88 IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91 [IFA_LOCAL] = { .type = NLA_U32 },
92 [IFA_ADDRESS] = { .type = NLA_U32 },
93 [IFA_BROADCAST] = { .type = NLA_U32 },
94 [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
98 * value. So if you change this define, make appropriate changes to
99 * inet_addr_hash as well.
101 #define IN4_ADDR_HSIZE 256
102 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
103 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107 u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109 return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
110 (IN4_ADDR_HSIZE - 1));
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117 spin_lock(&inet_addr_hash_lock);
118 hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119 spin_unlock(&inet_addr_hash_lock);
122 static void inet_hash_remove(struct in_ifaddr *ifa)
124 spin_lock(&inet_addr_hash_lock);
125 hlist_del_init_rcu(&ifa->hash);
126 spin_unlock(&inet_addr_hash_lock);
130 * __ip_dev_find - find the first device with a given source address.
131 * @net: the net namespace
132 * @addr: the source address
133 * @devref: if true, take a reference on the found device
135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 unsigned int hash = inet_addr_hash(net, addr);
140 struct net_device *result = NULL;
141 struct in_ifaddr *ifa;
142 struct hlist_node *node;
144 rcu_read_lock();
145 hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146 struct net_device *dev = ifa->ifa_dev->dev;
148 if (!net_eq(dev_net(dev), net))
149 continue;
150 if (ifa->ifa_local == addr) {
151 result = dev;
152 break;
155 if (!result) {
156 struct flowi4 fl4 = { .daddr = addr };
157 struct fib_result res = { 0 };
158 struct fib_table *local;
160 /* Fallback to FIB local table so that communication
161 * over loopback subnets work.
163 local = fib_get_table(net, RT_TABLE_LOCAL);
164 if (local &&
165 !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166 res.type == RTN_LOCAL)
167 result = FIB_RES_DEV(res);
169 if (result && devref)
170 dev_hold(result);
171 rcu_read_unlock();
172 return result;
174 EXPORT_SYMBOL(__ip_dev_find);
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180 int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static inline void devinet_sysctl_register(struct in_device *idev)
188 static inline void devinet_sysctl_unregister(struct in_device *idev)
191 #endif
193 /* Locks all the inet devices. */
195 static struct in_ifaddr *inet_alloc_ifa(void)
197 return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 static void inet_rcu_free_ifa(struct rcu_head *head)
202 struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203 if (ifa->ifa_dev)
204 in_dev_put(ifa->ifa_dev);
205 kfree(ifa);
208 static inline void inet_free_ifa(struct in_ifaddr *ifa)
210 call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 void in_dev_finish_destroy(struct in_device *idev)
215 struct net_device *dev = idev->dev;
217 WARN_ON(idev->ifa_list);
218 WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220 printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
221 idev, dev ? dev->name : "NIL");
222 #endif
223 dev_put(dev);
224 if (!idev->dead)
225 pr_err("Freeing alive in_device %p\n", idev);
226 else
227 kfree(idev);
229 EXPORT_SYMBOL(in_dev_finish_destroy);
231 static struct in_device *inetdev_init(struct net_device *dev)
233 struct in_device *in_dev;
235 ASSERT_RTNL();
237 in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
238 if (!in_dev)
239 goto out;
240 memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
241 sizeof(in_dev->cnf));
242 in_dev->cnf.sysctl = NULL;
243 in_dev->dev = dev;
244 in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
245 if (!in_dev->arp_parms)
246 goto out_kfree;
247 if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
248 dev_disable_lro(dev);
249 /* Reference in_dev->dev */
250 dev_hold(dev);
251 /* Account for reference dev->ip_ptr (below) */
252 in_dev_hold(in_dev);
254 devinet_sysctl_register(in_dev);
255 ip_mc_init_dev(in_dev);
256 if (dev->flags & IFF_UP)
257 ip_mc_up(in_dev);
259 /* we can receive as soon as ip_ptr is set -- do this last */
260 rcu_assign_pointer(dev->ip_ptr, in_dev);
261 out:
262 return in_dev;
263 out_kfree:
264 kfree(in_dev);
265 in_dev = NULL;
266 goto out;
269 static void in_dev_rcu_put(struct rcu_head *head)
271 struct in_device *idev = container_of(head, struct in_device, rcu_head);
272 in_dev_put(idev);
275 static void inetdev_destroy(struct in_device *in_dev)
277 struct in_ifaddr *ifa;
278 struct net_device *dev;
280 ASSERT_RTNL();
282 dev = in_dev->dev;
284 in_dev->dead = 1;
286 ip_mc_destroy_dev(in_dev);
288 while ((ifa = in_dev->ifa_list) != NULL) {
289 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
290 inet_free_ifa(ifa);
293 RCU_INIT_POINTER(dev->ip_ptr, NULL);
295 devinet_sysctl_unregister(in_dev);
296 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
297 arp_ifdown(dev);
299 call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
302 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 rcu_read_lock();
305 for_primary_ifa(in_dev) {
306 if (inet_ifa_match(a, ifa)) {
307 if (!b || inet_ifa_match(b, ifa)) {
308 rcu_read_unlock();
309 return 1;
312 } endfor_ifa(in_dev);
313 rcu_read_unlock();
314 return 0;
317 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
318 int destroy, struct nlmsghdr *nlh, u32 pid)
320 struct in_ifaddr *promote = NULL;
321 struct in_ifaddr *ifa, *ifa1 = *ifap;
322 struct in_ifaddr *last_prim = in_dev->ifa_list;
323 struct in_ifaddr *prev_prom = NULL;
324 int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326 ASSERT_RTNL();
328 /* 1. Deleting primary ifaddr forces deletion all secondaries
329 * unless alias promotion is set
332 if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
333 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335 while ((ifa = *ifap1) != NULL) {
336 if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
337 ifa1->ifa_scope <= ifa->ifa_scope)
338 last_prim = ifa;
340 if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
341 ifa1->ifa_mask != ifa->ifa_mask ||
342 !inet_ifa_match(ifa1->ifa_address, ifa)) {
343 ifap1 = &ifa->ifa_next;
344 prev_prom = ifa;
345 continue;
348 if (!do_promote) {
349 inet_hash_remove(ifa);
350 *ifap1 = ifa->ifa_next;
352 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
353 blocking_notifier_call_chain(&inetaddr_chain,
354 NETDEV_DOWN, ifa);
355 inet_free_ifa(ifa);
356 } else {
357 promote = ifa;
358 break;
363 /* On promotion all secondaries from subnet are changing
364 * the primary IP, we must remove all their routes silently
365 * and later to add them back with new prefsrc. Do this
366 * while all addresses are on the device list.
368 for (ifa = promote; ifa; ifa = ifa->ifa_next) {
369 if (ifa1->ifa_mask == ifa->ifa_mask &&
370 inet_ifa_match(ifa1->ifa_address, ifa))
371 fib_del_ifaddr(ifa, ifa1);
374 /* 2. Unlink it */
376 *ifap = ifa1->ifa_next;
377 inet_hash_remove(ifa1);
379 /* 3. Announce address deletion */
381 /* Send message first, then call notifier.
382 At first sight, FIB update triggered by notifier
383 will refer to already deleted ifaddr, that could confuse
384 netlink listeners. It is not true: look, gated sees
385 that route deleted and if it still thinks that ifaddr
386 is valid, it will try to restore deleted routes... Grr.
387 So that, this order is correct.
389 rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
390 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392 if (promote) {
393 struct in_ifaddr *next_sec = promote->ifa_next;
395 if (prev_prom) {
396 prev_prom->ifa_next = promote->ifa_next;
397 promote->ifa_next = last_prim->ifa_next;
398 last_prim->ifa_next = promote;
401 promote->ifa_flags &= ~IFA_F_SECONDARY;
402 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
403 blocking_notifier_call_chain(&inetaddr_chain,
404 NETDEV_UP, promote);
405 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
406 if (ifa1->ifa_mask != ifa->ifa_mask ||
407 !inet_ifa_match(ifa1->ifa_address, ifa))
408 continue;
409 fib_add_ifaddr(ifa);
413 if (destroy)
414 inet_free_ifa(ifa1);
417 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
418 int destroy)
420 __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
423 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
424 u32 pid)
426 struct in_device *in_dev = ifa->ifa_dev;
427 struct in_ifaddr *ifa1, **ifap, **last_primary;
429 ASSERT_RTNL();
431 if (!ifa->ifa_local) {
432 inet_free_ifa(ifa);
433 return 0;
436 ifa->ifa_flags &= ~IFA_F_SECONDARY;
437 last_primary = &in_dev->ifa_list;
439 for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
440 ifap = &ifa1->ifa_next) {
441 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
442 ifa->ifa_scope <= ifa1->ifa_scope)
443 last_primary = &ifa1->ifa_next;
444 if (ifa1->ifa_mask == ifa->ifa_mask &&
445 inet_ifa_match(ifa1->ifa_address, ifa)) {
446 if (ifa1->ifa_local == ifa->ifa_local) {
447 inet_free_ifa(ifa);
448 return -EEXIST;
450 if (ifa1->ifa_scope != ifa->ifa_scope) {
451 inet_free_ifa(ifa);
452 return -EINVAL;
454 ifa->ifa_flags |= IFA_F_SECONDARY;
458 if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
459 net_srandom(ifa->ifa_local);
460 ifap = last_primary;
463 ifa->ifa_next = *ifap;
464 *ifap = ifa;
466 inet_hash_insert(dev_net(in_dev->dev), ifa);
468 /* Send message first, then call notifier.
469 Notifier will trigger FIB update, so that
470 listeners of netlink will know about new ifaddr */
471 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
472 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
474 return 0;
477 static int inet_insert_ifa(struct in_ifaddr *ifa)
479 return __inet_insert_ifa(ifa, NULL, 0);
482 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
484 struct in_device *in_dev = __in_dev_get_rtnl(dev);
486 ASSERT_RTNL();
488 if (!in_dev) {
489 inet_free_ifa(ifa);
490 return -ENOBUFS;
492 ipv4_devconf_setall(in_dev);
493 if (ifa->ifa_dev != in_dev) {
494 WARN_ON(ifa->ifa_dev);
495 in_dev_hold(in_dev);
496 ifa->ifa_dev = in_dev;
498 if (ipv4_is_loopback(ifa->ifa_local))
499 ifa->ifa_scope = RT_SCOPE_HOST;
500 return inet_insert_ifa(ifa);
503 /* Caller must hold RCU or RTNL :
504 * We dont take a reference on found in_device
506 struct in_device *inetdev_by_index(struct net *net, int ifindex)
508 struct net_device *dev;
509 struct in_device *in_dev = NULL;
511 rcu_read_lock();
512 dev = dev_get_by_index_rcu(net, ifindex);
513 if (dev)
514 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
515 rcu_read_unlock();
516 return in_dev;
518 EXPORT_SYMBOL(inetdev_by_index);
520 /* Called only from RTNL semaphored context. No locks. */
522 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
523 __be32 mask)
525 ASSERT_RTNL();
527 for_primary_ifa(in_dev) {
528 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
529 return ifa;
530 } endfor_ifa(in_dev);
531 return NULL;
534 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536 struct net *net = sock_net(skb->sk);
537 struct nlattr *tb[IFA_MAX+1];
538 struct in_device *in_dev;
539 struct ifaddrmsg *ifm;
540 struct in_ifaddr *ifa, **ifap;
541 int err = -EINVAL;
543 ASSERT_RTNL();
545 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
546 if (err < 0)
547 goto errout;
549 ifm = nlmsg_data(nlh);
550 in_dev = inetdev_by_index(net, ifm->ifa_index);
551 if (in_dev == NULL) {
552 err = -ENODEV;
553 goto errout;
556 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
557 ifap = &ifa->ifa_next) {
558 if (tb[IFA_LOCAL] &&
559 ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
560 continue;
562 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
563 continue;
565 if (tb[IFA_ADDRESS] &&
566 (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
567 !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
568 continue;
570 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
571 return 0;
574 err = -EADDRNOTAVAIL;
575 errout:
576 return err;
579 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
581 struct nlattr *tb[IFA_MAX+1];
582 struct in_ifaddr *ifa;
583 struct ifaddrmsg *ifm;
584 struct net_device *dev;
585 struct in_device *in_dev;
586 int err;
588 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
589 if (err < 0)
590 goto errout;
592 ifm = nlmsg_data(nlh);
593 err = -EINVAL;
594 if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
595 goto errout;
597 dev = __dev_get_by_index(net, ifm->ifa_index);
598 err = -ENODEV;
599 if (dev == NULL)
600 goto errout;
602 in_dev = __in_dev_get_rtnl(dev);
603 err = -ENOBUFS;
604 if (in_dev == NULL)
605 goto errout;
607 ifa = inet_alloc_ifa();
608 if (ifa == NULL)
610 * A potential indev allocation can be left alive, it stays
611 * assigned to its device and is destroy with it.
613 goto errout;
615 ipv4_devconf_setall(in_dev);
616 in_dev_hold(in_dev);
618 if (tb[IFA_ADDRESS] == NULL)
619 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
621 INIT_HLIST_NODE(&ifa->hash);
622 ifa->ifa_prefixlen = ifm->ifa_prefixlen;
623 ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
624 ifa->ifa_flags = ifm->ifa_flags;
625 ifa->ifa_scope = ifm->ifa_scope;
626 ifa->ifa_dev = in_dev;
628 ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
629 ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
631 if (tb[IFA_BROADCAST])
632 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
634 if (tb[IFA_LABEL])
635 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
636 else
637 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
639 return ifa;
641 errout:
642 return ERR_PTR(err);
645 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
647 struct net *net = sock_net(skb->sk);
648 struct in_ifaddr *ifa;
650 ASSERT_RTNL();
652 ifa = rtm_to_ifaddr(net, nlh);
653 if (IS_ERR(ifa))
654 return PTR_ERR(ifa);
656 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
660 * Determine a default network mask, based on the IP address.
663 static inline int inet_abc_len(__be32 addr)
665 int rc = -1; /* Something else, probably a multicast. */
667 if (ipv4_is_zeronet(addr))
668 rc = 0;
669 else {
670 __u32 haddr = ntohl(addr);
672 if (IN_CLASSA(haddr))
673 rc = 8;
674 else if (IN_CLASSB(haddr))
675 rc = 16;
676 else if (IN_CLASSC(haddr))
677 rc = 24;
680 return rc;
684 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
686 struct ifreq ifr;
687 struct sockaddr_in sin_orig;
688 struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
689 struct in_device *in_dev;
690 struct in_ifaddr **ifap = NULL;
691 struct in_ifaddr *ifa = NULL;
692 struct net_device *dev;
693 char *colon;
694 int ret = -EFAULT;
695 int tryaddrmatch = 0;
698 * Fetch the caller's info block into kernel space
701 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
702 goto out;
703 ifr.ifr_name[IFNAMSIZ - 1] = 0;
705 /* save original address for comparison */
706 memcpy(&sin_orig, sin, sizeof(*sin));
708 colon = strchr(ifr.ifr_name, ':');
709 if (colon)
710 *colon = 0;
712 dev_load(net, ifr.ifr_name);
714 switch (cmd) {
715 case SIOCGIFADDR: /* Get interface address */
716 case SIOCGIFBRDADDR: /* Get the broadcast address */
717 case SIOCGIFDSTADDR: /* Get the destination address */
718 case SIOCGIFNETMASK: /* Get the netmask for the interface */
719 /* Note that these ioctls will not sleep,
720 so that we do not impose a lock.
721 One day we will be forced to put shlock here (I mean SMP)
723 tryaddrmatch = (sin_orig.sin_family == AF_INET);
724 memset(sin, 0, sizeof(*sin));
725 sin->sin_family = AF_INET;
726 break;
728 case SIOCSIFFLAGS:
729 ret = -EACCES;
730 if (!capable(CAP_NET_ADMIN))
731 goto out;
732 break;
733 case SIOCSIFADDR: /* Set interface address (and family) */
734 case SIOCSIFBRDADDR: /* Set the broadcast address */
735 case SIOCSIFDSTADDR: /* Set the destination address */
736 case SIOCSIFNETMASK: /* Set the netmask for the interface */
737 ret = -EACCES;
738 if (!capable(CAP_NET_ADMIN))
739 goto out;
740 ret = -EINVAL;
741 if (sin->sin_family != AF_INET)
742 goto out;
743 break;
744 default:
745 ret = -EINVAL;
746 goto out;
749 rtnl_lock();
751 ret = -ENODEV;
752 dev = __dev_get_by_name(net, ifr.ifr_name);
753 if (!dev)
754 goto done;
756 if (colon)
757 *colon = ':';
759 in_dev = __in_dev_get_rtnl(dev);
760 if (in_dev) {
761 if (tryaddrmatch) {
762 /* Matthias Andree */
763 /* compare label and address (4.4BSD style) */
764 /* note: we only do this for a limited set of ioctls
765 and only if the original address family was AF_INET.
766 This is checked above. */
767 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
768 ifap = &ifa->ifa_next) {
769 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
770 sin_orig.sin_addr.s_addr ==
771 ifa->ifa_local) {
772 break; /* found */
776 /* we didn't get a match, maybe the application is
777 4.3BSD-style and passed in junk so we fall back to
778 comparing just the label */
779 if (!ifa) {
780 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
781 ifap = &ifa->ifa_next)
782 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
783 break;
787 ret = -EADDRNOTAVAIL;
788 if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
789 goto done;
791 switch (cmd) {
792 case SIOCGIFADDR: /* Get interface address */
793 sin->sin_addr.s_addr = ifa->ifa_local;
794 goto rarok;
796 case SIOCGIFBRDADDR: /* Get the broadcast address */
797 sin->sin_addr.s_addr = ifa->ifa_broadcast;
798 goto rarok;
800 case SIOCGIFDSTADDR: /* Get the destination address */
801 sin->sin_addr.s_addr = ifa->ifa_address;
802 goto rarok;
804 case SIOCGIFNETMASK: /* Get the netmask for the interface */
805 sin->sin_addr.s_addr = ifa->ifa_mask;
806 goto rarok;
808 case SIOCSIFFLAGS:
809 if (colon) {
810 ret = -EADDRNOTAVAIL;
811 if (!ifa)
812 break;
813 ret = 0;
814 if (!(ifr.ifr_flags & IFF_UP))
815 inet_del_ifa(in_dev, ifap, 1);
816 break;
818 ret = dev_change_flags(dev, ifr.ifr_flags);
819 break;
821 case SIOCSIFADDR: /* Set interface address (and family) */
822 ret = -EINVAL;
823 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
824 break;
826 if (!ifa) {
827 ret = -ENOBUFS;
828 ifa = inet_alloc_ifa();
829 INIT_HLIST_NODE(&ifa->hash);
830 if (!ifa)
831 break;
832 if (colon)
833 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
834 else
835 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
836 } else {
837 ret = 0;
838 if (ifa->ifa_local == sin->sin_addr.s_addr)
839 break;
840 inet_del_ifa(in_dev, ifap, 0);
841 ifa->ifa_broadcast = 0;
842 ifa->ifa_scope = 0;
845 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
847 if (!(dev->flags & IFF_POINTOPOINT)) {
848 ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
849 ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
850 if ((dev->flags & IFF_BROADCAST) &&
851 ifa->ifa_prefixlen < 31)
852 ifa->ifa_broadcast = ifa->ifa_address |
853 ~ifa->ifa_mask;
854 } else {
855 ifa->ifa_prefixlen = 32;
856 ifa->ifa_mask = inet_make_mask(32);
858 ret = inet_set_ifa(dev, ifa);
859 break;
861 case SIOCSIFBRDADDR: /* Set the broadcast address */
862 ret = 0;
863 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
864 inet_del_ifa(in_dev, ifap, 0);
865 ifa->ifa_broadcast = sin->sin_addr.s_addr;
866 inet_insert_ifa(ifa);
868 break;
870 case SIOCSIFDSTADDR: /* Set the destination address */
871 ret = 0;
872 if (ifa->ifa_address == sin->sin_addr.s_addr)
873 break;
874 ret = -EINVAL;
875 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
876 break;
877 ret = 0;
878 inet_del_ifa(in_dev, ifap, 0);
879 ifa->ifa_address = sin->sin_addr.s_addr;
880 inet_insert_ifa(ifa);
881 break;
883 case SIOCSIFNETMASK: /* Set the netmask for the interface */
886 * The mask we set must be legal.
888 ret = -EINVAL;
889 if (bad_mask(sin->sin_addr.s_addr, 0))
890 break;
891 ret = 0;
892 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
893 __be32 old_mask = ifa->ifa_mask;
894 inet_del_ifa(in_dev, ifap, 0);
895 ifa->ifa_mask = sin->sin_addr.s_addr;
896 ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
898 /* See if current broadcast address matches
899 * with current netmask, then recalculate
900 * the broadcast address. Otherwise it's a
901 * funny address, so don't touch it since
902 * the user seems to know what (s)he's doing...
904 if ((dev->flags & IFF_BROADCAST) &&
905 (ifa->ifa_prefixlen < 31) &&
906 (ifa->ifa_broadcast ==
907 (ifa->ifa_local|~old_mask))) {
908 ifa->ifa_broadcast = (ifa->ifa_local |
909 ~sin->sin_addr.s_addr);
911 inet_insert_ifa(ifa);
913 break;
915 done:
916 rtnl_unlock();
917 out:
918 return ret;
919 rarok:
920 rtnl_unlock();
921 ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
922 goto out;
925 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
927 struct in_device *in_dev = __in_dev_get_rtnl(dev);
928 struct in_ifaddr *ifa;
929 struct ifreq ifr;
930 int done = 0;
932 if (!in_dev)
933 goto out;
935 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
936 if (!buf) {
937 done += sizeof(ifr);
938 continue;
940 if (len < (int) sizeof(ifr))
941 break;
942 memset(&ifr, 0, sizeof(struct ifreq));
943 if (ifa->ifa_label)
944 strcpy(ifr.ifr_name, ifa->ifa_label);
945 else
946 strcpy(ifr.ifr_name, dev->name);
948 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
949 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
950 ifa->ifa_local;
952 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
953 done = -EFAULT;
954 break;
956 buf += sizeof(struct ifreq);
957 len -= sizeof(struct ifreq);
958 done += sizeof(struct ifreq);
960 out:
961 return done;
964 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
966 __be32 addr = 0;
967 struct in_device *in_dev;
968 struct net *net = dev_net(dev);
970 rcu_read_lock();
971 in_dev = __in_dev_get_rcu(dev);
972 if (!in_dev)
973 goto no_in_dev;
975 for_primary_ifa(in_dev) {
976 if (ifa->ifa_scope > scope)
977 continue;
978 if (!dst || inet_ifa_match(dst, ifa)) {
979 addr = ifa->ifa_local;
980 break;
982 if (!addr)
983 addr = ifa->ifa_local;
984 } endfor_ifa(in_dev);
986 if (addr)
987 goto out_unlock;
988 no_in_dev:
990 /* Not loopback addresses on loopback should be preferred
991 in this case. It is importnat that lo is the first interface
992 in dev_base list.
994 for_each_netdev_rcu(net, dev) {
995 in_dev = __in_dev_get_rcu(dev);
996 if (!in_dev)
997 continue;
999 for_primary_ifa(in_dev) {
1000 if (ifa->ifa_scope != RT_SCOPE_LINK &&
1001 ifa->ifa_scope <= scope) {
1002 addr = ifa->ifa_local;
1003 goto out_unlock;
1005 } endfor_ifa(in_dev);
1007 out_unlock:
1008 rcu_read_unlock();
1009 return addr;
1011 EXPORT_SYMBOL(inet_select_addr);
1013 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1014 __be32 local, int scope)
1016 int same = 0;
1017 __be32 addr = 0;
1019 for_ifa(in_dev) {
1020 if (!addr &&
1021 (local == ifa->ifa_local || !local) &&
1022 ifa->ifa_scope <= scope) {
1023 addr = ifa->ifa_local;
1024 if (same)
1025 break;
1027 if (!same) {
1028 same = (!local || inet_ifa_match(local, ifa)) &&
1029 (!dst || inet_ifa_match(dst, ifa));
1030 if (same && addr) {
1031 if (local || !dst)
1032 break;
1033 /* Is the selected addr into dst subnet? */
1034 if (inet_ifa_match(addr, ifa))
1035 break;
1036 /* No, then can we use new local src? */
1037 if (ifa->ifa_scope <= scope) {
1038 addr = ifa->ifa_local;
1039 break;
1041 /* search for large dst subnet for addr */
1042 same = 0;
1045 } endfor_ifa(in_dev);
1047 return same ? addr : 0;
1051 * Confirm that local IP address exists using wildcards:
1052 * - in_dev: only on this interface, 0=any interface
1053 * - dst: only in the same subnet as dst, 0=any dst
1054 * - local: address, 0=autoselect the local address
1055 * - scope: maximum allowed scope value for the local address
1057 __be32 inet_confirm_addr(struct in_device *in_dev,
1058 __be32 dst, __be32 local, int scope)
1060 __be32 addr = 0;
1061 struct net_device *dev;
1062 struct net *net;
1064 if (scope != RT_SCOPE_LINK)
1065 return confirm_addr_indev(in_dev, dst, local, scope);
1067 net = dev_net(in_dev->dev);
1068 rcu_read_lock();
1069 for_each_netdev_rcu(net, dev) {
1070 in_dev = __in_dev_get_rcu(dev);
1071 if (in_dev) {
1072 addr = confirm_addr_indev(in_dev, dst, local, scope);
1073 if (addr)
1074 break;
1077 rcu_read_unlock();
1079 return addr;
1081 EXPORT_SYMBOL(inet_confirm_addr);
1084 * Device notifier
1087 int register_inetaddr_notifier(struct notifier_block *nb)
1089 return blocking_notifier_chain_register(&inetaddr_chain, nb);
1091 EXPORT_SYMBOL(register_inetaddr_notifier);
1093 int unregister_inetaddr_notifier(struct notifier_block *nb)
1095 return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1097 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1099 /* Rename ifa_labels for a device name change. Make some effort to preserve
1100 * existing alias numbering and to create unique labels if possible.
1102 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1104 struct in_ifaddr *ifa;
1105 int named = 0;
1107 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108 char old[IFNAMSIZ], *dot;
1110 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112 if (named++ == 0)
1113 goto skip;
1114 dot = strchr(old, ':');
1115 if (dot == NULL) {
1116 sprintf(old, ":%d", named);
1117 dot = old;
1119 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120 strcat(ifa->ifa_label, dot);
1121 else
1122 strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123 skip:
1124 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1128 static inline bool inetdev_valid_mtu(unsigned mtu)
1130 return mtu >= 68;
1133 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134 struct in_device *in_dev)
1137 struct in_ifaddr *ifa;
1139 for (ifa = in_dev->ifa_list; ifa;
1140 ifa = ifa->ifa_next) {
1141 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142 ifa->ifa_local, dev,
1143 ifa->ifa_local, NULL,
1144 dev->dev_addr, NULL);
1148 /* Called only under RTNL semaphore */
1150 static int inetdev_event(struct notifier_block *this, unsigned long event,
1151 void *ptr)
1153 struct net_device *dev = ptr;
1154 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1156 ASSERT_RTNL();
1158 if (!in_dev) {
1159 if (event == NETDEV_REGISTER) {
1160 in_dev = inetdev_init(dev);
1161 if (!in_dev)
1162 return notifier_from_errno(-ENOMEM);
1163 if (dev->flags & IFF_LOOPBACK) {
1164 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1167 } else if (event == NETDEV_CHANGEMTU) {
1168 /* Re-enabling IP */
1169 if (inetdev_valid_mtu(dev->mtu))
1170 in_dev = inetdev_init(dev);
1172 goto out;
1175 switch (event) {
1176 case NETDEV_REGISTER:
1177 printk(KERN_DEBUG "inetdev_event: bug\n");
1178 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1179 break;
1180 case NETDEV_UP:
1181 if (!inetdev_valid_mtu(dev->mtu))
1182 break;
1183 if (dev->flags & IFF_LOOPBACK) {
1184 struct in_ifaddr *ifa = inet_alloc_ifa();
1186 if (ifa) {
1187 INIT_HLIST_NODE(&ifa->hash);
1188 ifa->ifa_local =
1189 ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190 ifa->ifa_prefixlen = 8;
1191 ifa->ifa_mask = inet_make_mask(8);
1192 in_dev_hold(in_dev);
1193 ifa->ifa_dev = in_dev;
1194 ifa->ifa_scope = RT_SCOPE_HOST;
1195 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196 inet_insert_ifa(ifa);
1199 ip_mc_up(in_dev);
1200 /* fall through */
1201 case NETDEV_CHANGEADDR:
1202 if (!IN_DEV_ARP_NOTIFY(in_dev))
1203 break;
1204 /* fall through */
1205 case NETDEV_NOTIFY_PEERS:
1206 /* Send gratuitous ARP to notify of link change */
1207 inetdev_send_gratuitous_arp(dev, in_dev);
1208 break;
1209 case NETDEV_DOWN:
1210 ip_mc_down(in_dev);
1211 break;
1212 case NETDEV_PRE_TYPE_CHANGE:
1213 ip_mc_unmap(in_dev);
1214 break;
1215 case NETDEV_POST_TYPE_CHANGE:
1216 ip_mc_remap(in_dev);
1217 break;
1218 case NETDEV_CHANGEMTU:
1219 if (inetdev_valid_mtu(dev->mtu))
1220 break;
1221 /* disable IP when MTU is not enough */
1222 case NETDEV_UNREGISTER:
1223 inetdev_destroy(in_dev);
1224 break;
1225 case NETDEV_CHANGENAME:
1226 /* Do not notify about label change, this event is
1227 * not interesting to applications using netlink.
1229 inetdev_changename(dev, in_dev);
1231 devinet_sysctl_unregister(in_dev);
1232 devinet_sysctl_register(in_dev);
1233 break;
1235 out:
1236 return NOTIFY_DONE;
1239 static struct notifier_block ip_netdev_notifier = {
1240 .notifier_call = inetdev_event,
1243 static inline size_t inet_nlmsg_size(void)
1245 return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246 + nla_total_size(4) /* IFA_ADDRESS */
1247 + nla_total_size(4) /* IFA_LOCAL */
1248 + nla_total_size(4) /* IFA_BROADCAST */
1249 + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1252 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253 u32 pid, u32 seq, int event, unsigned int flags)
1255 struct ifaddrmsg *ifm;
1256 struct nlmsghdr *nlh;
1258 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259 if (nlh == NULL)
1260 return -EMSGSIZE;
1262 ifm = nlmsg_data(nlh);
1263 ifm->ifa_family = AF_INET;
1264 ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265 ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266 ifm->ifa_scope = ifa->ifa_scope;
1267 ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1269 if (ifa->ifa_address)
1270 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1272 if (ifa->ifa_local)
1273 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1275 if (ifa->ifa_broadcast)
1276 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1278 if (ifa->ifa_label[0])
1279 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1281 return nlmsg_end(skb, nlh);
1283 nla_put_failure:
1284 nlmsg_cancel(skb, nlh);
1285 return -EMSGSIZE;
1288 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1290 struct net *net = sock_net(skb->sk);
1291 int h, s_h;
1292 int idx, s_idx;
1293 int ip_idx, s_ip_idx;
1294 struct net_device *dev;
1295 struct in_device *in_dev;
1296 struct in_ifaddr *ifa;
1297 struct hlist_head *head;
1298 struct hlist_node *node;
1300 s_h = cb->args[0];
1301 s_idx = idx = cb->args[1];
1302 s_ip_idx = ip_idx = cb->args[2];
1304 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1305 idx = 0;
1306 head = &net->dev_index_head[h];
1307 rcu_read_lock();
1308 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1309 if (idx < s_idx)
1310 goto cont;
1311 if (h > s_h || idx > s_idx)
1312 s_ip_idx = 0;
1313 in_dev = __in_dev_get_rcu(dev);
1314 if (!in_dev)
1315 goto cont;
1317 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1318 ifa = ifa->ifa_next, ip_idx++) {
1319 if (ip_idx < s_ip_idx)
1320 continue;
1321 if (inet_fill_ifaddr(skb, ifa,
1322 NETLINK_CB(cb->skb).pid,
1323 cb->nlh->nlmsg_seq,
1324 RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1325 rcu_read_unlock();
1326 goto done;
1329 cont:
1330 idx++;
1332 rcu_read_unlock();
1335 done:
1336 cb->args[0] = h;
1337 cb->args[1] = idx;
1338 cb->args[2] = ip_idx;
1340 return skb->len;
1343 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1344 u32 pid)
1346 struct sk_buff *skb;
1347 u32 seq = nlh ? nlh->nlmsg_seq : 0;
1348 int err = -ENOBUFS;
1349 struct net *net;
1351 net = dev_net(ifa->ifa_dev->dev);
1352 skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1353 if (skb == NULL)
1354 goto errout;
1356 err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1357 if (err < 0) {
1358 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1359 WARN_ON(err == -EMSGSIZE);
1360 kfree_skb(skb);
1361 goto errout;
1363 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1364 return;
1365 errout:
1366 if (err < 0)
1367 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1370 static size_t inet_get_link_af_size(const struct net_device *dev)
1372 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1374 if (!in_dev)
1375 return 0;
1377 return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1380 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1382 struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383 struct nlattr *nla;
1384 int i;
1386 if (!in_dev)
1387 return -ENODATA;
1389 nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1390 if (nla == NULL)
1391 return -EMSGSIZE;
1393 for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1394 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1396 return 0;
1399 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1400 [IFLA_INET_CONF] = { .type = NLA_NESTED },
1403 static int inet_validate_link_af(const struct net_device *dev,
1404 const struct nlattr *nla)
1406 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407 int err, rem;
1409 if (dev && !__in_dev_get_rtnl(dev))
1410 return -EAFNOSUPPORT;
1412 err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1413 if (err < 0)
1414 return err;
1416 if (tb[IFLA_INET_CONF]) {
1417 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1418 int cfgid = nla_type(a);
1420 if (nla_len(a) < 4)
1421 return -EINVAL;
1423 if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1424 return -EINVAL;
1428 return 0;
1431 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1433 struct in_device *in_dev = __in_dev_get_rtnl(dev);
1434 struct nlattr *a, *tb[IFLA_INET_MAX+1];
1435 int rem;
1437 if (!in_dev)
1438 return -EAFNOSUPPORT;
1440 if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1441 BUG();
1443 if (tb[IFLA_INET_CONF]) {
1444 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1445 ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1448 return 0;
1451 #ifdef CONFIG_SYSCTL
1453 static void devinet_copy_dflt_conf(struct net *net, int i)
1455 struct net_device *dev;
1457 rcu_read_lock();
1458 for_each_netdev_rcu(net, dev) {
1459 struct in_device *in_dev;
1461 in_dev = __in_dev_get_rcu(dev);
1462 if (in_dev && !test_bit(i, in_dev->cnf.state))
1463 in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1465 rcu_read_unlock();
1468 /* called with RTNL locked */
1469 static void inet_forward_change(struct net *net)
1471 struct net_device *dev;
1472 int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1474 IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1475 IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1477 for_each_netdev(net, dev) {
1478 struct in_device *in_dev;
1479 if (on)
1480 dev_disable_lro(dev);
1481 rcu_read_lock();
1482 in_dev = __in_dev_get_rcu(dev);
1483 if (in_dev)
1484 IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1485 rcu_read_unlock();
1489 static int devinet_conf_proc(ctl_table *ctl, int write,
1490 void __user *buffer,
1491 size_t *lenp, loff_t *ppos)
1493 int old_value = *(int *)ctl->data;
1494 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1495 int new_value = *(int *)ctl->data;
1497 if (write) {
1498 struct ipv4_devconf *cnf = ctl->extra1;
1499 struct net *net = ctl->extra2;
1500 int i = (int *)ctl->data - cnf->data;
1502 set_bit(i, cnf->state);
1504 if (cnf == net->ipv4.devconf_dflt)
1505 devinet_copy_dflt_conf(net, i);
1506 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1)
1507 if ((new_value == 0) && (old_value != 0))
1508 rt_cache_flush(net, 0);
1511 return ret;
1514 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1515 void __user *buffer,
1516 size_t *lenp, loff_t *ppos)
1518 int *valp = ctl->data;
1519 int val = *valp;
1520 loff_t pos = *ppos;
1521 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1523 if (write && *valp != val) {
1524 struct net *net = ctl->extra2;
1526 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1527 if (!rtnl_trylock()) {
1528 /* Restore the original values before restarting */
1529 *valp = val;
1530 *ppos = pos;
1531 return restart_syscall();
1533 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1534 inet_forward_change(net);
1535 } else if (*valp) {
1536 struct ipv4_devconf *cnf = ctl->extra1;
1537 struct in_device *idev =
1538 container_of(cnf, struct in_device, cnf);
1539 dev_disable_lro(idev->dev);
1541 rtnl_unlock();
1542 rt_cache_flush(net, 0);
1546 return ret;
1549 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1550 void __user *buffer,
1551 size_t *lenp, loff_t *ppos)
1553 int *valp = ctl->data;
1554 int val = *valp;
1555 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1556 struct net *net = ctl->extra2;
1558 if (write && *valp != val)
1559 rt_cache_flush(net, 0);
1561 return ret;
1564 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1566 .procname = name, \
1567 .data = ipv4_devconf.data + \
1568 IPV4_DEVCONF_ ## attr - 1, \
1569 .maxlen = sizeof(int), \
1570 .mode = mval, \
1571 .proc_handler = proc, \
1572 .extra1 = &ipv4_devconf, \
1575 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1576 DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1578 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1579 DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1581 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1582 DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1584 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1585 DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1587 static struct devinet_sysctl_table {
1588 struct ctl_table_header *sysctl_header;
1589 struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1590 char *dev_name;
1591 } devinet_sysctl = {
1592 .devinet_vars = {
1593 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1594 devinet_sysctl_forward),
1595 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1597 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1598 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1599 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1600 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1601 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1602 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1603 "accept_source_route"),
1604 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1605 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1606 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1607 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1608 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1609 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1610 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1611 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1612 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1613 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1614 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1615 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1616 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1618 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1619 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1620 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1621 "force_igmp_version"),
1622 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1623 "promote_secondaries"),
1627 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1628 struct ipv4_devconf *p)
1630 int i;
1631 struct devinet_sysctl_table *t;
1633 #define DEVINET_CTL_PATH_DEV 3
1635 struct ctl_path devinet_ctl_path[] = {
1636 { .procname = "net", },
1637 { .procname = "ipv4", },
1638 { .procname = "conf", },
1639 { /* to be set */ },
1640 { },
1643 t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1644 if (!t)
1645 goto out;
1647 for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1648 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1649 t->devinet_vars[i].extra1 = p;
1650 t->devinet_vars[i].extra2 = net;
1654 * Make a copy of dev_name, because '.procname' is regarded as const
1655 * by sysctl and we wouldn't want anyone to change it under our feet
1656 * (see SIOCSIFNAME).
1658 t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1659 if (!t->dev_name)
1660 goto free;
1662 devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1664 t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1665 t->devinet_vars);
1666 if (!t->sysctl_header)
1667 goto free_procname;
1669 p->sysctl = t;
1670 return 0;
1672 free_procname:
1673 kfree(t->dev_name);
1674 free:
1675 kfree(t);
1676 out:
1677 return -ENOBUFS;
1680 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1682 struct devinet_sysctl_table *t = cnf->sysctl;
1684 if (t == NULL)
1685 return;
1687 cnf->sysctl = NULL;
1688 unregister_net_sysctl_table(t->sysctl_header);
1689 kfree(t->dev_name);
1690 kfree(t);
1693 static void devinet_sysctl_register(struct in_device *idev)
1695 neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1696 __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1697 &idev->cnf);
1700 static void devinet_sysctl_unregister(struct in_device *idev)
1702 __devinet_sysctl_unregister(&idev->cnf);
1703 neigh_sysctl_unregister(idev->arp_parms);
1706 static struct ctl_table ctl_forward_entry[] = {
1708 .procname = "ip_forward",
1709 .data = &ipv4_devconf.data[
1710 IPV4_DEVCONF_FORWARDING - 1],
1711 .maxlen = sizeof(int),
1712 .mode = 0644,
1713 .proc_handler = devinet_sysctl_forward,
1714 .extra1 = &ipv4_devconf,
1715 .extra2 = &init_net,
1717 { },
1720 static __net_initdata struct ctl_path net_ipv4_path[] = {
1721 { .procname = "net", },
1722 { .procname = "ipv4", },
1723 { },
1725 #endif
1727 static __net_init int devinet_init_net(struct net *net)
1729 int err;
1730 struct ipv4_devconf *all, *dflt;
1731 #ifdef CONFIG_SYSCTL
1732 struct ctl_table *tbl = ctl_forward_entry;
1733 struct ctl_table_header *forw_hdr;
1734 #endif
1736 err = -ENOMEM;
1737 all = &ipv4_devconf;
1738 dflt = &ipv4_devconf_dflt;
1740 if (!net_eq(net, &init_net)) {
1741 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1742 if (all == NULL)
1743 goto err_alloc_all;
1745 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1746 if (dflt == NULL)
1747 goto err_alloc_dflt;
1749 #ifdef CONFIG_SYSCTL
1750 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1751 if (tbl == NULL)
1752 goto err_alloc_ctl;
1754 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1755 tbl[0].extra1 = all;
1756 tbl[0].extra2 = net;
1757 #endif
1760 #ifdef CONFIG_SYSCTL
1761 err = __devinet_sysctl_register(net, "all", all);
1762 if (err < 0)
1763 goto err_reg_all;
1765 err = __devinet_sysctl_register(net, "default", dflt);
1766 if (err < 0)
1767 goto err_reg_dflt;
1769 err = -ENOMEM;
1770 forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1771 if (forw_hdr == NULL)
1772 goto err_reg_ctl;
1773 net->ipv4.forw_hdr = forw_hdr;
1774 #endif
1776 net->ipv4.devconf_all = all;
1777 net->ipv4.devconf_dflt = dflt;
1778 return 0;
1780 #ifdef CONFIG_SYSCTL
1781 err_reg_ctl:
1782 __devinet_sysctl_unregister(dflt);
1783 err_reg_dflt:
1784 __devinet_sysctl_unregister(all);
1785 err_reg_all:
1786 if (tbl != ctl_forward_entry)
1787 kfree(tbl);
1788 err_alloc_ctl:
1789 #endif
1790 if (dflt != &ipv4_devconf_dflt)
1791 kfree(dflt);
1792 err_alloc_dflt:
1793 if (all != &ipv4_devconf)
1794 kfree(all);
1795 err_alloc_all:
1796 return err;
1799 static __net_exit void devinet_exit_net(struct net *net)
1801 #ifdef CONFIG_SYSCTL
1802 struct ctl_table *tbl;
1804 tbl = net->ipv4.forw_hdr->ctl_table_arg;
1805 unregister_net_sysctl_table(net->ipv4.forw_hdr);
1806 __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1807 __devinet_sysctl_unregister(net->ipv4.devconf_all);
1808 kfree(tbl);
1809 #endif
1810 kfree(net->ipv4.devconf_dflt);
1811 kfree(net->ipv4.devconf_all);
1814 static __net_initdata struct pernet_operations devinet_ops = {
1815 .init = devinet_init_net,
1816 .exit = devinet_exit_net,
1819 static struct rtnl_af_ops inet_af_ops = {
1820 .family = AF_INET,
1821 .fill_link_af = inet_fill_link_af,
1822 .get_link_af_size = inet_get_link_af_size,
1823 .validate_link_af = inet_validate_link_af,
1824 .set_link_af = inet_set_link_af,
1827 void __init devinet_init(void)
1829 int i;
1831 for (i = 0; i < IN4_ADDR_HSIZE; i++)
1832 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1834 register_pernet_subsys(&devinet_ops);
1836 register_gifconf(PF_INET, inet_gifconf);
1837 register_netdevice_notifier(&ip_netdev_notifier);
1839 rtnl_af_register(&inet_af_ops);
1841 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1842 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1843 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);