USB: convert usb_hcd bitfields into atomic flags
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv4 / fib_frontend.c
bloba43968918350244a057e6f3364727d6a2aa7baf2
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 #include <linux/slab.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/arp.h>
45 #include <net/ip_fib.h>
46 #include <net/rtnetlink.h>
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
50 static int __net_init fib4_rules_init(struct net *net)
52 struct fib_table *local_table, *main_table;
54 local_table = fib_hash_table(RT_TABLE_LOCAL);
55 if (local_table == NULL)
56 return -ENOMEM;
58 main_table = fib_hash_table(RT_TABLE_MAIN);
59 if (main_table == NULL)
60 goto fail;
62 hlist_add_head_rcu(&local_table->tb_hlist,
63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64 hlist_add_head_rcu(&main_table->tb_hlist,
65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66 return 0;
68 fail:
69 kfree(local_table);
70 return -ENOMEM;
72 #else
74 struct fib_table *fib_new_table(struct net *net, u32 id)
76 struct fib_table *tb;
77 unsigned int h;
79 if (id == 0)
80 id = RT_TABLE_MAIN;
81 tb = fib_get_table(net, id);
82 if (tb)
83 return tb;
85 tb = fib_hash_table(id);
86 if (!tb)
87 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1);
89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90 return tb;
93 struct fib_table *fib_get_table(struct net *net, u32 id)
95 struct fib_table *tb;
96 struct hlist_node *node;
97 struct hlist_head *head;
98 unsigned int h;
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
104 rcu_read_lock();
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
112 rcu_read_unlock();
113 return NULL;
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
117 void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122 #ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126 #endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
132 static void fib_flush(struct net *net)
134 int flushed = 0;
135 struct fib_table *tb;
136 struct hlist_node *node;
137 struct hlist_head *head;
138 unsigned int h;
140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
143 flushed += fib_table_flush(tb);
146 if (flushed)
147 rt_cache_flush(net, -1);
151 * Find the first device with a given source address.
154 struct net_device * ip_dev_find(struct net *net, __be32 addr)
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
159 struct fib_table *local_table;
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163 #endif
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
172 if (dev)
173 dev_hold(dev);
174 out:
175 fib_res_put(&res);
176 return dev;
178 EXPORT_SYMBOL(ip_dev_find);
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
184 static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
186 __be32 addr)
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
191 struct fib_table *local_table;
193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
194 return RTN_BROADCAST;
195 if (ipv4_is_multicast(addr))
196 return RTN_MULTICAST;
198 #ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200 #endif
202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
203 if (local_table) {
204 ret = RTN_UNICAST;
205 if (!fib_table_lookup(local_table, &fl, &res)) {
206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
208 fib_res_put(&res);
211 return ret;
214 unsigned int inet_addr_type(struct net *net, __be32 addr)
216 return __inet_dev_addr_type(net, NULL, addr);
218 EXPORT_SYMBOL(inet_addr_type);
220 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
223 return __inet_dev_addr_type(net, dev, addr);
225 EXPORT_SYMBOL(inet_dev_addr_type);
227 /* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
235 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
244 .mark = mark,
245 .iif = oif };
247 struct fib_result res;
248 int no_addr, rpf, accept_local;
249 int ret;
250 struct net *net;
252 no_addr = rpf = accept_local = 0;
253 rcu_read_lock();
254 in_dev = __in_dev_get_rcu(dev);
255 if (in_dev) {
256 no_addr = in_dev->ifa_list == NULL;
257 rpf = IN_DEV_RPFILTER(in_dev);
258 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
259 if (mark && !IN_DEV_SRC_VMARK(in_dev))
260 fl.mark = 0;
262 rcu_read_unlock();
264 if (in_dev == NULL)
265 goto e_inval;
267 net = dev_net(dev);
268 if (fib_lookup(net, &fl, &res))
269 goto last_resort;
270 if (res.type != RTN_UNICAST) {
271 if (res.type != RTN_LOCAL || !accept_local)
272 goto e_inval_res;
274 *spec_dst = FIB_RES_PREFSRC(res);
275 fib_combine_itag(itag, &res);
276 #ifdef CONFIG_IP_ROUTE_MULTIPATH
277 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
278 #else
279 if (FIB_RES_DEV(res) == dev)
280 #endif
282 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
283 fib_res_put(&res);
284 return ret;
286 fib_res_put(&res);
287 if (no_addr)
288 goto last_resort;
289 if (rpf == 1)
290 goto e_rpf;
291 fl.oif = dev->ifindex;
293 ret = 0;
294 if (fib_lookup(net, &fl, &res) == 0) {
295 if (res.type == RTN_UNICAST) {
296 *spec_dst = FIB_RES_PREFSRC(res);
297 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
299 fib_res_put(&res);
301 return ret;
303 last_resort:
304 if (rpf)
305 goto e_rpf;
306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
307 *itag = 0;
308 return 0;
310 e_inval_res:
311 fib_res_put(&res);
312 e_inval:
313 return -EINVAL;
314 e_rpf:
315 return -EXDEV;
318 static inline __be32 sk_extract_addr(struct sockaddr *addr)
320 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
323 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
325 struct nlattr *nla;
327 nla = (struct nlattr *) ((char *) mx + len);
328 nla->nla_type = type;
329 nla->nla_len = nla_attr_size(4);
330 *(u32 *) nla_data(nla) = value;
332 return len + nla_total_size(4);
335 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
336 struct fib_config *cfg)
338 __be32 addr;
339 int plen;
341 memset(cfg, 0, sizeof(*cfg));
342 cfg->fc_nlinfo.nl_net = net;
344 if (rt->rt_dst.sa_family != AF_INET)
345 return -EAFNOSUPPORT;
348 * Check mask for validity:
349 * a) it must be contiguous.
350 * b) destination must have all host bits clear.
351 * c) if application forgot to set correct family (AF_INET),
352 * reject request unless it is absolutely clear i.e.
353 * both family and mask are zero.
355 plen = 32;
356 addr = sk_extract_addr(&rt->rt_dst);
357 if (!(rt->rt_flags & RTF_HOST)) {
358 __be32 mask = sk_extract_addr(&rt->rt_genmask);
360 if (rt->rt_genmask.sa_family != AF_INET) {
361 if (mask || rt->rt_genmask.sa_family)
362 return -EAFNOSUPPORT;
365 if (bad_mask(mask, addr))
366 return -EINVAL;
368 plen = inet_mask_len(mask);
371 cfg->fc_dst_len = plen;
372 cfg->fc_dst = addr;
374 if (cmd != SIOCDELRT) {
375 cfg->fc_nlflags = NLM_F_CREATE;
376 cfg->fc_protocol = RTPROT_BOOT;
379 if (rt->rt_metric)
380 cfg->fc_priority = rt->rt_metric - 1;
382 if (rt->rt_flags & RTF_REJECT) {
383 cfg->fc_scope = RT_SCOPE_HOST;
384 cfg->fc_type = RTN_UNREACHABLE;
385 return 0;
388 cfg->fc_scope = RT_SCOPE_NOWHERE;
389 cfg->fc_type = RTN_UNICAST;
391 if (rt->rt_dev) {
392 char *colon;
393 struct net_device *dev;
394 char devname[IFNAMSIZ];
396 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
397 return -EFAULT;
399 devname[IFNAMSIZ-1] = 0;
400 colon = strchr(devname, ':');
401 if (colon)
402 *colon = 0;
403 dev = __dev_get_by_name(net, devname);
404 if (!dev)
405 return -ENODEV;
406 cfg->fc_oif = dev->ifindex;
407 if (colon) {
408 struct in_ifaddr *ifa;
409 struct in_device *in_dev = __in_dev_get_rtnl(dev);
410 if (!in_dev)
411 return -ENODEV;
412 *colon = ':';
413 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
414 if (strcmp(ifa->ifa_label, devname) == 0)
415 break;
416 if (ifa == NULL)
417 return -ENODEV;
418 cfg->fc_prefsrc = ifa->ifa_local;
422 addr = sk_extract_addr(&rt->rt_gateway);
423 if (rt->rt_gateway.sa_family == AF_INET && addr) {
424 cfg->fc_gw = addr;
425 if (rt->rt_flags & RTF_GATEWAY &&
426 inet_addr_type(net, addr) == RTN_UNICAST)
427 cfg->fc_scope = RT_SCOPE_UNIVERSE;
430 if (cmd == SIOCDELRT)
431 return 0;
433 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
434 return -EINVAL;
436 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
437 cfg->fc_scope = RT_SCOPE_LINK;
439 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
440 struct nlattr *mx;
441 int len = 0;
443 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
444 if (mx == NULL)
445 return -ENOMEM;
447 if (rt->rt_flags & RTF_MTU)
448 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
450 if (rt->rt_flags & RTF_WINDOW)
451 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
453 if (rt->rt_flags & RTF_IRTT)
454 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
456 cfg->fc_mx = mx;
457 cfg->fc_mx_len = len;
460 return 0;
464 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
467 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
469 struct fib_config cfg;
470 struct rtentry rt;
471 int err;
473 switch (cmd) {
474 case SIOCADDRT: /* Add a route */
475 case SIOCDELRT: /* Delete a route */
476 if (!capable(CAP_NET_ADMIN))
477 return -EPERM;
479 if (copy_from_user(&rt, arg, sizeof(rt)))
480 return -EFAULT;
482 rtnl_lock();
483 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
484 if (err == 0) {
485 struct fib_table *tb;
487 if (cmd == SIOCDELRT) {
488 tb = fib_get_table(net, cfg.fc_table);
489 if (tb)
490 err = fib_table_delete(tb, &cfg);
491 else
492 err = -ESRCH;
493 } else {
494 tb = fib_new_table(net, cfg.fc_table);
495 if (tb)
496 err = fib_table_insert(tb, &cfg);
497 else
498 err = -ENOBUFS;
501 /* allocated by rtentry_to_fib_config() */
502 kfree(cfg.fc_mx);
504 rtnl_unlock();
505 return err;
507 return -EINVAL;
510 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
511 [RTA_DST] = { .type = NLA_U32 },
512 [RTA_SRC] = { .type = NLA_U32 },
513 [RTA_IIF] = { .type = NLA_U32 },
514 [RTA_OIF] = { .type = NLA_U32 },
515 [RTA_GATEWAY] = { .type = NLA_U32 },
516 [RTA_PRIORITY] = { .type = NLA_U32 },
517 [RTA_PREFSRC] = { .type = NLA_U32 },
518 [RTA_METRICS] = { .type = NLA_NESTED },
519 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
520 [RTA_FLOW] = { .type = NLA_U32 },
523 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
524 struct nlmsghdr *nlh, struct fib_config *cfg)
526 struct nlattr *attr;
527 int err, remaining;
528 struct rtmsg *rtm;
530 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
531 if (err < 0)
532 goto errout;
534 memset(cfg, 0, sizeof(*cfg));
536 rtm = nlmsg_data(nlh);
537 cfg->fc_dst_len = rtm->rtm_dst_len;
538 cfg->fc_tos = rtm->rtm_tos;
539 cfg->fc_table = rtm->rtm_table;
540 cfg->fc_protocol = rtm->rtm_protocol;
541 cfg->fc_scope = rtm->rtm_scope;
542 cfg->fc_type = rtm->rtm_type;
543 cfg->fc_flags = rtm->rtm_flags;
544 cfg->fc_nlflags = nlh->nlmsg_flags;
546 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
547 cfg->fc_nlinfo.nlh = nlh;
548 cfg->fc_nlinfo.nl_net = net;
550 if (cfg->fc_type > RTN_MAX) {
551 err = -EINVAL;
552 goto errout;
555 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
556 switch (nla_type(attr)) {
557 case RTA_DST:
558 cfg->fc_dst = nla_get_be32(attr);
559 break;
560 case RTA_OIF:
561 cfg->fc_oif = nla_get_u32(attr);
562 break;
563 case RTA_GATEWAY:
564 cfg->fc_gw = nla_get_be32(attr);
565 break;
566 case RTA_PRIORITY:
567 cfg->fc_priority = nla_get_u32(attr);
568 break;
569 case RTA_PREFSRC:
570 cfg->fc_prefsrc = nla_get_be32(attr);
571 break;
572 case RTA_METRICS:
573 cfg->fc_mx = nla_data(attr);
574 cfg->fc_mx_len = nla_len(attr);
575 break;
576 case RTA_MULTIPATH:
577 cfg->fc_mp = nla_data(attr);
578 cfg->fc_mp_len = nla_len(attr);
579 break;
580 case RTA_FLOW:
581 cfg->fc_flow = nla_get_u32(attr);
582 break;
583 case RTA_TABLE:
584 cfg->fc_table = nla_get_u32(attr);
585 break;
589 return 0;
590 errout:
591 return err;
594 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
596 struct net *net = sock_net(skb->sk);
597 struct fib_config cfg;
598 struct fib_table *tb;
599 int err;
601 err = rtm_to_fib_config(net, skb, nlh, &cfg);
602 if (err < 0)
603 goto errout;
605 tb = fib_get_table(net, cfg.fc_table);
606 if (tb == NULL) {
607 err = -ESRCH;
608 goto errout;
611 err = fib_table_delete(tb, &cfg);
612 errout:
613 return err;
616 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
618 struct net *net = sock_net(skb->sk);
619 struct fib_config cfg;
620 struct fib_table *tb;
621 int err;
623 err = rtm_to_fib_config(net, skb, nlh, &cfg);
624 if (err < 0)
625 goto errout;
627 tb = fib_new_table(net, cfg.fc_table);
628 if (tb == NULL) {
629 err = -ENOBUFS;
630 goto errout;
633 err = fib_table_insert(tb, &cfg);
634 errout:
635 return err;
638 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
640 struct net *net = sock_net(skb->sk);
641 unsigned int h, s_h;
642 unsigned int e = 0, s_e;
643 struct fib_table *tb;
644 struct hlist_node *node;
645 struct hlist_head *head;
646 int dumped = 0;
648 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
649 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
650 return ip_rt_dump(skb, cb);
652 s_h = cb->args[0];
653 s_e = cb->args[1];
655 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
656 e = 0;
657 head = &net->ipv4.fib_table_hash[h];
658 hlist_for_each_entry(tb, node, head, tb_hlist) {
659 if (e < s_e)
660 goto next;
661 if (dumped)
662 memset(&cb->args[2], 0, sizeof(cb->args) -
663 2 * sizeof(cb->args[0]));
664 if (fib_table_dump(tb, skb, cb) < 0)
665 goto out;
666 dumped = 1;
667 next:
668 e++;
671 out:
672 cb->args[1] = e;
673 cb->args[0] = h;
675 return skb->len;
678 /* Prepare and feed intra-kernel routing request.
679 Really, it should be netlink message, but :-( netlink
680 can be not configured, so that we feed it directly
681 to fib engine. It is legal, because all events occur
682 only when netlink is already locked.
685 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
687 struct net *net = dev_net(ifa->ifa_dev->dev);
688 struct fib_table *tb;
689 struct fib_config cfg = {
690 .fc_protocol = RTPROT_KERNEL,
691 .fc_type = type,
692 .fc_dst = dst,
693 .fc_dst_len = dst_len,
694 .fc_prefsrc = ifa->ifa_local,
695 .fc_oif = ifa->ifa_dev->dev->ifindex,
696 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
697 .fc_nlinfo = {
698 .nl_net = net,
702 if (type == RTN_UNICAST)
703 tb = fib_new_table(net, RT_TABLE_MAIN);
704 else
705 tb = fib_new_table(net, RT_TABLE_LOCAL);
707 if (tb == NULL)
708 return;
710 cfg.fc_table = tb->tb_id;
712 if (type != RTN_LOCAL)
713 cfg.fc_scope = RT_SCOPE_LINK;
714 else
715 cfg.fc_scope = RT_SCOPE_HOST;
717 if (cmd == RTM_NEWROUTE)
718 fib_table_insert(tb, &cfg);
719 else
720 fib_table_delete(tb, &cfg);
723 void fib_add_ifaddr(struct in_ifaddr *ifa)
725 struct in_device *in_dev = ifa->ifa_dev;
726 struct net_device *dev = in_dev->dev;
727 struct in_ifaddr *prim = ifa;
728 __be32 mask = ifa->ifa_mask;
729 __be32 addr = ifa->ifa_local;
730 __be32 prefix = ifa->ifa_address&mask;
732 if (ifa->ifa_flags&IFA_F_SECONDARY) {
733 prim = inet_ifa_byprefix(in_dev, prefix, mask);
734 if (prim == NULL) {
735 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
736 return;
740 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
742 if (!(dev->flags&IFF_UP))
743 return;
745 /* Add broadcast address, if it is explicitly assigned. */
746 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
747 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
749 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
750 (prefix != addr || ifa->ifa_prefixlen < 32)) {
751 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
752 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
754 /* Add network specific broadcasts, when it takes a sense */
755 if (ifa->ifa_prefixlen < 31) {
756 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
757 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
762 static void fib_del_ifaddr(struct in_ifaddr *ifa)
764 struct in_device *in_dev = ifa->ifa_dev;
765 struct net_device *dev = in_dev->dev;
766 struct in_ifaddr *ifa1;
767 struct in_ifaddr *prim = ifa;
768 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
769 __be32 any = ifa->ifa_address&ifa->ifa_mask;
770 #define LOCAL_OK 1
771 #define BRD_OK 2
772 #define BRD0_OK 4
773 #define BRD1_OK 8
774 unsigned ok = 0;
776 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
777 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
778 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
779 else {
780 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
781 if (prim == NULL) {
782 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
783 return;
787 /* Deletion is more complicated than add.
788 We should take care of not to delete too much :-)
790 Scan address list to be sure that addresses are really gone.
793 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
794 if (ifa->ifa_local == ifa1->ifa_local)
795 ok |= LOCAL_OK;
796 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
797 ok |= BRD_OK;
798 if (brd == ifa1->ifa_broadcast)
799 ok |= BRD1_OK;
800 if (any == ifa1->ifa_broadcast)
801 ok |= BRD0_OK;
804 if (!(ok&BRD_OK))
805 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
806 if (!(ok&BRD1_OK))
807 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
808 if (!(ok&BRD0_OK))
809 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
810 if (!(ok&LOCAL_OK)) {
811 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
813 /* Check, that this local address finally disappeared. */
814 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
815 /* And the last, but not the least thing.
816 We must flush stray FIB entries.
818 First of all, we scan fib_info list searching
819 for stray nexthop entries, then ignite fib_flush.
821 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
822 fib_flush(dev_net(dev));
825 #undef LOCAL_OK
826 #undef BRD_OK
827 #undef BRD0_OK
828 #undef BRD1_OK
831 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
834 struct fib_result res;
835 struct flowi fl = { .mark = frn->fl_mark,
836 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
837 .tos = frn->fl_tos,
838 .scope = frn->fl_scope } } };
840 #ifdef CONFIG_IP_MULTIPLE_TABLES
841 res.r = NULL;
842 #endif
844 frn->err = -ENOENT;
845 if (tb) {
846 local_bh_disable();
848 frn->tb_id = tb->tb_id;
849 frn->err = fib_table_lookup(tb, &fl, &res);
851 if (!frn->err) {
852 frn->prefixlen = res.prefixlen;
853 frn->nh_sel = res.nh_sel;
854 frn->type = res.type;
855 frn->scope = res.scope;
856 fib_res_put(&res);
858 local_bh_enable();
862 static void nl_fib_input(struct sk_buff *skb)
864 struct net *net;
865 struct fib_result_nl *frn;
866 struct nlmsghdr *nlh;
867 struct fib_table *tb;
868 u32 pid;
870 net = sock_net(skb->sk);
871 nlh = nlmsg_hdr(skb);
872 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
873 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
874 return;
876 skb = skb_clone(skb, GFP_KERNEL);
877 if (skb == NULL)
878 return;
879 nlh = nlmsg_hdr(skb);
881 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
882 tb = fib_get_table(net, frn->tb_id_in);
884 nl_fib_lookup(frn, tb);
886 pid = NETLINK_CB(skb).pid; /* pid of sending process */
887 NETLINK_CB(skb).pid = 0; /* from kernel */
888 NETLINK_CB(skb).dst_group = 0; /* unicast */
889 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
892 static int __net_init nl_fib_lookup_init(struct net *net)
894 struct sock *sk;
895 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
896 nl_fib_input, NULL, THIS_MODULE);
897 if (sk == NULL)
898 return -EAFNOSUPPORT;
899 net->ipv4.fibnl = sk;
900 return 0;
903 static void nl_fib_lookup_exit(struct net *net)
905 netlink_kernel_release(net->ipv4.fibnl);
906 net->ipv4.fibnl = NULL;
909 static void fib_disable_ip(struct net_device *dev, int force, int delay)
911 if (fib_sync_down_dev(dev, force))
912 fib_flush(dev_net(dev));
913 rt_cache_flush(dev_net(dev), delay);
914 arp_ifdown(dev);
917 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
919 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
920 struct net_device *dev = ifa->ifa_dev->dev;
922 switch (event) {
923 case NETDEV_UP:
924 fib_add_ifaddr(ifa);
925 #ifdef CONFIG_IP_ROUTE_MULTIPATH
926 fib_sync_up(dev);
927 #endif
928 rt_cache_flush(dev_net(dev), -1);
929 break;
930 case NETDEV_DOWN:
931 fib_del_ifaddr(ifa);
932 if (ifa->ifa_dev->ifa_list == NULL) {
933 /* Last address was deleted from this interface.
934 Disable IP.
936 fib_disable_ip(dev, 1, 0);
937 } else {
938 rt_cache_flush(dev_net(dev), -1);
940 break;
942 return NOTIFY_DONE;
945 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
947 struct net_device *dev = ptr;
948 struct in_device *in_dev = __in_dev_get_rtnl(dev);
950 if (event == NETDEV_UNREGISTER) {
951 fib_disable_ip(dev, 2, -1);
952 return NOTIFY_DONE;
955 if (!in_dev)
956 return NOTIFY_DONE;
958 switch (event) {
959 case NETDEV_UP:
960 for_ifa(in_dev) {
961 fib_add_ifaddr(ifa);
962 } endfor_ifa(in_dev);
963 #ifdef CONFIG_IP_ROUTE_MULTIPATH
964 fib_sync_up(dev);
965 #endif
966 rt_cache_flush(dev_net(dev), -1);
967 break;
968 case NETDEV_DOWN:
969 fib_disable_ip(dev, 0, 0);
970 break;
971 case NETDEV_CHANGEMTU:
972 case NETDEV_CHANGE:
973 rt_cache_flush(dev_net(dev), 0);
974 break;
975 case NETDEV_UNREGISTER_BATCH:
976 rt_cache_flush_batch();
977 break;
979 return NOTIFY_DONE;
982 static struct notifier_block fib_inetaddr_notifier = {
983 .notifier_call = fib_inetaddr_event,
986 static struct notifier_block fib_netdev_notifier = {
987 .notifier_call = fib_netdev_event,
990 static int __net_init ip_fib_net_init(struct net *net)
992 int err;
993 unsigned int i;
995 net->ipv4.fib_table_hash = kzalloc(
996 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
997 if (net->ipv4.fib_table_hash == NULL)
998 return -ENOMEM;
1000 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1001 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1003 err = fib4_rules_init(net);
1004 if (err < 0)
1005 goto fail;
1006 return 0;
1008 fail:
1009 kfree(net->ipv4.fib_table_hash);
1010 return err;
1013 static void ip_fib_net_exit(struct net *net)
1015 unsigned int i;
1017 #ifdef CONFIG_IP_MULTIPLE_TABLES
1018 fib4_rules_exit(net);
1019 #endif
1021 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1022 struct fib_table *tb;
1023 struct hlist_head *head;
1024 struct hlist_node *node, *tmp;
1026 head = &net->ipv4.fib_table_hash[i];
1027 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1028 hlist_del(node);
1029 fib_table_flush(tb);
1030 kfree(tb);
1033 kfree(net->ipv4.fib_table_hash);
1036 static int __net_init fib_net_init(struct net *net)
1038 int error;
1040 error = ip_fib_net_init(net);
1041 if (error < 0)
1042 goto out;
1043 error = nl_fib_lookup_init(net);
1044 if (error < 0)
1045 goto out_nlfl;
1046 error = fib_proc_init(net);
1047 if (error < 0)
1048 goto out_proc;
1049 out:
1050 return error;
1052 out_proc:
1053 nl_fib_lookup_exit(net);
1054 out_nlfl:
1055 ip_fib_net_exit(net);
1056 goto out;
1059 static void __net_exit fib_net_exit(struct net *net)
1061 fib_proc_exit(net);
1062 nl_fib_lookup_exit(net);
1063 ip_fib_net_exit(net);
1066 static struct pernet_operations fib_net_ops = {
1067 .init = fib_net_init,
1068 .exit = fib_net_exit,
1071 void __init ip_fib_init(void)
1073 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1074 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1075 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1077 register_pernet_subsys(&fib_net_ops);
1078 register_netdevice_notifier(&fib_netdev_notifier);
1079 register_inetaddr_notifier(&fib_inetaddr_notifier);
1081 fib_hash_init();