ipv4: Fix reverse path filtering with multipath routing.
[linux-2.6/x86.git] / net / ipv4 / fib_frontend.c
blob7d02a9f999fabcebeb61800816d722e6f6c054ff
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/mm.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
28 #include <linux/in.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 #include <linux/slab.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/arp.h>
45 #include <net/ip_fib.h>
46 #include <net/rtnetlink.h>
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
50 static int __net_init fib4_rules_init(struct net *net)
52 struct fib_table *local_table, *main_table;
54 local_table = fib_hash_table(RT_TABLE_LOCAL);
55 if (local_table == NULL)
56 return -ENOMEM;
58 main_table = fib_hash_table(RT_TABLE_MAIN);
59 if (main_table == NULL)
60 goto fail;
62 hlist_add_head_rcu(&local_table->tb_hlist,
63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]);
64 hlist_add_head_rcu(&main_table->tb_hlist,
65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]);
66 return 0;
68 fail:
69 kfree(local_table);
70 return -ENOMEM;
72 #else
74 struct fib_table *fib_new_table(struct net *net, u32 id)
76 struct fib_table *tb;
77 unsigned int h;
79 if (id == 0)
80 id = RT_TABLE_MAIN;
81 tb = fib_get_table(net, id);
82 if (tb)
83 return tb;
85 tb = fib_hash_table(id);
86 if (!tb)
87 return NULL;
88 h = id & (FIB_TABLE_HASHSZ - 1);
89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
90 return tb;
93 struct fib_table *fib_get_table(struct net *net, u32 id)
95 struct fib_table *tb;
96 struct hlist_node *node;
97 struct hlist_head *head;
98 unsigned int h;
100 if (id == 0)
101 id = RT_TABLE_MAIN;
102 h = id & (FIB_TABLE_HASHSZ - 1);
104 rcu_read_lock();
105 head = &net->ipv4.fib_table_hash[h];
106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
107 if (tb->tb_id == id) {
108 rcu_read_unlock();
109 return tb;
112 rcu_read_unlock();
113 return NULL;
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
117 void fib_select_default(struct net *net,
118 const struct flowi *flp, struct fib_result *res)
120 struct fib_table *tb;
121 int table = RT_TABLE_MAIN;
122 #ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL)
124 return;
125 table = res->r->table;
126 #endif
127 tb = fib_get_table(net, table);
128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
129 fib_table_select_default(tb, flp, res);
132 static void fib_flush(struct net *net)
134 int flushed = 0;
135 struct fib_table *tb;
136 struct hlist_node *node;
137 struct hlist_head *head;
138 unsigned int h;
140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
141 head = &net->ipv4.fib_table_hash[h];
142 hlist_for_each_entry(tb, node, head, tb_hlist)
143 flushed += fib_table_flush(tb);
146 if (flushed)
147 rt_cache_flush(net, -1);
151 * Find the first device with a given source address.
154 struct net_device * ip_dev_find(struct net *net, __be32 addr)
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
157 struct fib_result res;
158 struct net_device *dev = NULL;
159 struct fib_table *local_table;
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
162 res.r = NULL;
163 #endif
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL;
168 if (res.type != RTN_LOCAL)
169 goto out;
170 dev = FIB_RES_DEV(res);
172 if (dev)
173 dev_hold(dev);
174 out:
175 fib_res_put(&res);
176 return dev;
178 EXPORT_SYMBOL(ip_dev_find);
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
184 static inline unsigned __inet_dev_addr_type(struct net *net,
185 const struct net_device *dev,
186 __be32 addr)
188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
189 struct fib_result res;
190 unsigned ret = RTN_BROADCAST;
191 struct fib_table *local_table;
193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
194 return RTN_BROADCAST;
195 if (ipv4_is_multicast(addr))
196 return RTN_MULTICAST;
198 #ifdef CONFIG_IP_MULTIPLE_TABLES
199 res.r = NULL;
200 #endif
202 local_table = fib_get_table(net, RT_TABLE_LOCAL);
203 if (local_table) {
204 ret = RTN_UNICAST;
205 if (!fib_table_lookup(local_table, &fl, &res)) {
206 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type;
208 fib_res_put(&res);
211 return ret;
214 unsigned int inet_addr_type(struct net *net, __be32 addr)
216 return __inet_dev_addr_type(net, NULL, addr);
218 EXPORT_SYMBOL(inet_addr_type);
220 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr)
223 return __inet_dev_addr_type(net, dev, addr);
225 EXPORT_SYMBOL(inet_dev_addr_type);
227 /* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
229 address.
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
235 int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
236 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark)
239 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u =
241 { .daddr = src,
242 .saddr = dst,
243 .tos = tos } },
244 .mark = mark,
245 .iif = oif };
247 struct fib_result res;
248 int no_addr, rpf, accept_local;
249 bool dev_match;
250 int ret;
251 struct net *net;
253 no_addr = rpf = accept_local = 0;
254 rcu_read_lock();
255 in_dev = __in_dev_get_rcu(dev);
256 if (in_dev) {
257 no_addr = in_dev->ifa_list == NULL;
258 rpf = IN_DEV_RPFILTER(in_dev);
259 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
260 if (mark && !IN_DEV_SRC_VMARK(in_dev))
261 fl.mark = 0;
263 rcu_read_unlock();
265 if (in_dev == NULL)
266 goto e_inval;
268 net = dev_net(dev);
269 if (fib_lookup(net, &fl, &res))
270 goto last_resort;
271 if (res.type != RTN_UNICAST) {
272 if (res.type != RTN_LOCAL || !accept_local)
273 goto e_inval_res;
275 *spec_dst = FIB_RES_PREFSRC(res);
276 fib_combine_itag(itag, &res);
277 dev_match = false;
279 #ifdef CONFIG_IP_ROUTE_MULTIPATH
280 for (ret = 0; ret < res.fi->fib_nhs; ret++) {
281 struct fib_nh *nh = &res.fi->fib_nh[ret];
283 if (nh->nh_dev == dev) {
284 dev_match = true;
285 break;
288 #else
289 if (FIB_RES_DEV(res) == dev)
290 dev_match = true;
291 #endif
292 if (dev_match) {
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 fib_res_put(&res);
295 return ret;
297 fib_res_put(&res);
298 if (no_addr)
299 goto last_resort;
300 if (rpf == 1)
301 goto e_rpf;
302 fl.oif = dev->ifindex;
304 ret = 0;
305 if (fib_lookup(net, &fl, &res) == 0) {
306 if (res.type == RTN_UNICAST) {
307 *spec_dst = FIB_RES_PREFSRC(res);
308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
310 fib_res_put(&res);
312 return ret;
314 last_resort:
315 if (rpf)
316 goto e_rpf;
317 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
318 *itag = 0;
319 return 0;
321 e_inval_res:
322 fib_res_put(&res);
323 e_inval:
324 return -EINVAL;
325 e_rpf:
326 return -EXDEV;
329 static inline __be32 sk_extract_addr(struct sockaddr *addr)
331 return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
334 static int put_rtax(struct nlattr *mx, int len, int type, u32 value)
336 struct nlattr *nla;
338 nla = (struct nlattr *) ((char *) mx + len);
339 nla->nla_type = type;
340 nla->nla_len = nla_attr_size(4);
341 *(u32 *) nla_data(nla) = value;
343 return len + nla_total_size(4);
346 static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
347 struct fib_config *cfg)
349 __be32 addr;
350 int plen;
352 memset(cfg, 0, sizeof(*cfg));
353 cfg->fc_nlinfo.nl_net = net;
355 if (rt->rt_dst.sa_family != AF_INET)
356 return -EAFNOSUPPORT;
359 * Check mask for validity:
360 * a) it must be contiguous.
361 * b) destination must have all host bits clear.
362 * c) if application forgot to set correct family (AF_INET),
363 * reject request unless it is absolutely clear i.e.
364 * both family and mask are zero.
366 plen = 32;
367 addr = sk_extract_addr(&rt->rt_dst);
368 if (!(rt->rt_flags & RTF_HOST)) {
369 __be32 mask = sk_extract_addr(&rt->rt_genmask);
371 if (rt->rt_genmask.sa_family != AF_INET) {
372 if (mask || rt->rt_genmask.sa_family)
373 return -EAFNOSUPPORT;
376 if (bad_mask(mask, addr))
377 return -EINVAL;
379 plen = inet_mask_len(mask);
382 cfg->fc_dst_len = plen;
383 cfg->fc_dst = addr;
385 if (cmd != SIOCDELRT) {
386 cfg->fc_nlflags = NLM_F_CREATE;
387 cfg->fc_protocol = RTPROT_BOOT;
390 if (rt->rt_metric)
391 cfg->fc_priority = rt->rt_metric - 1;
393 if (rt->rt_flags & RTF_REJECT) {
394 cfg->fc_scope = RT_SCOPE_HOST;
395 cfg->fc_type = RTN_UNREACHABLE;
396 return 0;
399 cfg->fc_scope = RT_SCOPE_NOWHERE;
400 cfg->fc_type = RTN_UNICAST;
402 if (rt->rt_dev) {
403 char *colon;
404 struct net_device *dev;
405 char devname[IFNAMSIZ];
407 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1))
408 return -EFAULT;
410 devname[IFNAMSIZ-1] = 0;
411 colon = strchr(devname, ':');
412 if (colon)
413 *colon = 0;
414 dev = __dev_get_by_name(net, devname);
415 if (!dev)
416 return -ENODEV;
417 cfg->fc_oif = dev->ifindex;
418 if (colon) {
419 struct in_ifaddr *ifa;
420 struct in_device *in_dev = __in_dev_get_rtnl(dev);
421 if (!in_dev)
422 return -ENODEV;
423 *colon = ':';
424 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
425 if (strcmp(ifa->ifa_label, devname) == 0)
426 break;
427 if (ifa == NULL)
428 return -ENODEV;
429 cfg->fc_prefsrc = ifa->ifa_local;
433 addr = sk_extract_addr(&rt->rt_gateway);
434 if (rt->rt_gateway.sa_family == AF_INET && addr) {
435 cfg->fc_gw = addr;
436 if (rt->rt_flags & RTF_GATEWAY &&
437 inet_addr_type(net, addr) == RTN_UNICAST)
438 cfg->fc_scope = RT_SCOPE_UNIVERSE;
441 if (cmd == SIOCDELRT)
442 return 0;
444 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw)
445 return -EINVAL;
447 if (cfg->fc_scope == RT_SCOPE_NOWHERE)
448 cfg->fc_scope = RT_SCOPE_LINK;
450 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) {
451 struct nlattr *mx;
452 int len = 0;
454 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL);
455 if (mx == NULL)
456 return -ENOMEM;
458 if (rt->rt_flags & RTF_MTU)
459 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
461 if (rt->rt_flags & RTF_WINDOW)
462 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
464 if (rt->rt_flags & RTF_IRTT)
465 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
467 cfg->fc_mx = mx;
468 cfg->fc_mx_len = len;
471 return 0;
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
478 int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
480 struct fib_config cfg;
481 struct rtentry rt;
482 int err;
484 switch (cmd) {
485 case SIOCADDRT: /* Add a route */
486 case SIOCDELRT: /* Delete a route */
487 if (!capable(CAP_NET_ADMIN))
488 return -EPERM;
490 if (copy_from_user(&rt, arg, sizeof(rt)))
491 return -EFAULT;
493 rtnl_lock();
494 err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
495 if (err == 0) {
496 struct fib_table *tb;
498 if (cmd == SIOCDELRT) {
499 tb = fib_get_table(net, cfg.fc_table);
500 if (tb)
501 err = fib_table_delete(tb, &cfg);
502 else
503 err = -ESRCH;
504 } else {
505 tb = fib_new_table(net, cfg.fc_table);
506 if (tb)
507 err = fib_table_insert(tb, &cfg);
508 else
509 err = -ENOBUFS;
512 /* allocated by rtentry_to_fib_config() */
513 kfree(cfg.fc_mx);
515 rtnl_unlock();
516 return err;
518 return -EINVAL;
521 const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
522 [RTA_DST] = { .type = NLA_U32 },
523 [RTA_SRC] = { .type = NLA_U32 },
524 [RTA_IIF] = { .type = NLA_U32 },
525 [RTA_OIF] = { .type = NLA_U32 },
526 [RTA_GATEWAY] = { .type = NLA_U32 },
527 [RTA_PRIORITY] = { .type = NLA_U32 },
528 [RTA_PREFSRC] = { .type = NLA_U32 },
529 [RTA_METRICS] = { .type = NLA_NESTED },
530 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
531 [RTA_FLOW] = { .type = NLA_U32 },
534 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535 struct nlmsghdr *nlh, struct fib_config *cfg)
537 struct nlattr *attr;
538 int err, remaining;
539 struct rtmsg *rtm;
541 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy);
542 if (err < 0)
543 goto errout;
545 memset(cfg, 0, sizeof(*cfg));
547 rtm = nlmsg_data(nlh);
548 cfg->fc_dst_len = rtm->rtm_dst_len;
549 cfg->fc_tos = rtm->rtm_tos;
550 cfg->fc_table = rtm->rtm_table;
551 cfg->fc_protocol = rtm->rtm_protocol;
552 cfg->fc_scope = rtm->rtm_scope;
553 cfg->fc_type = rtm->rtm_type;
554 cfg->fc_flags = rtm->rtm_flags;
555 cfg->fc_nlflags = nlh->nlmsg_flags;
557 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
558 cfg->fc_nlinfo.nlh = nlh;
559 cfg->fc_nlinfo.nl_net = net;
561 if (cfg->fc_type > RTN_MAX) {
562 err = -EINVAL;
563 goto errout;
566 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) {
567 switch (nla_type(attr)) {
568 case RTA_DST:
569 cfg->fc_dst = nla_get_be32(attr);
570 break;
571 case RTA_OIF:
572 cfg->fc_oif = nla_get_u32(attr);
573 break;
574 case RTA_GATEWAY:
575 cfg->fc_gw = nla_get_be32(attr);
576 break;
577 case RTA_PRIORITY:
578 cfg->fc_priority = nla_get_u32(attr);
579 break;
580 case RTA_PREFSRC:
581 cfg->fc_prefsrc = nla_get_be32(attr);
582 break;
583 case RTA_METRICS:
584 cfg->fc_mx = nla_data(attr);
585 cfg->fc_mx_len = nla_len(attr);
586 break;
587 case RTA_MULTIPATH:
588 cfg->fc_mp = nla_data(attr);
589 cfg->fc_mp_len = nla_len(attr);
590 break;
591 case RTA_FLOW:
592 cfg->fc_flow = nla_get_u32(attr);
593 break;
594 case RTA_TABLE:
595 cfg->fc_table = nla_get_u32(attr);
596 break;
600 return 0;
601 errout:
602 return err;
605 static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
607 struct net *net = sock_net(skb->sk);
608 struct fib_config cfg;
609 struct fib_table *tb;
610 int err;
612 err = rtm_to_fib_config(net, skb, nlh, &cfg);
613 if (err < 0)
614 goto errout;
616 tb = fib_get_table(net, cfg.fc_table);
617 if (tb == NULL) {
618 err = -ESRCH;
619 goto errout;
622 err = fib_table_delete(tb, &cfg);
623 errout:
624 return err;
627 static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
629 struct net *net = sock_net(skb->sk);
630 struct fib_config cfg;
631 struct fib_table *tb;
632 int err;
634 err = rtm_to_fib_config(net, skb, nlh, &cfg);
635 if (err < 0)
636 goto errout;
638 tb = fib_new_table(net, cfg.fc_table);
639 if (tb == NULL) {
640 err = -ENOBUFS;
641 goto errout;
644 err = fib_table_insert(tb, &cfg);
645 errout:
646 return err;
649 static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
651 struct net *net = sock_net(skb->sk);
652 unsigned int h, s_h;
653 unsigned int e = 0, s_e;
654 struct fib_table *tb;
655 struct hlist_node *node;
656 struct hlist_head *head;
657 int dumped = 0;
659 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
660 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
661 return ip_rt_dump(skb, cb);
663 s_h = cb->args[0];
664 s_e = cb->args[1];
666 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
667 e = 0;
668 head = &net->ipv4.fib_table_hash[h];
669 hlist_for_each_entry(tb, node, head, tb_hlist) {
670 if (e < s_e)
671 goto next;
672 if (dumped)
673 memset(&cb->args[2], 0, sizeof(cb->args) -
674 2 * sizeof(cb->args[0]));
675 if (fib_table_dump(tb, skb, cb) < 0)
676 goto out;
677 dumped = 1;
678 next:
679 e++;
682 out:
683 cb->args[1] = e;
684 cb->args[0] = h;
686 return skb->len;
689 /* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur
693 only when netlink is already locked.
696 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
698 struct net *net = dev_net(ifa->ifa_dev->dev);
699 struct fib_table *tb;
700 struct fib_config cfg = {
701 .fc_protocol = RTPROT_KERNEL,
702 .fc_type = type,
703 .fc_dst = dst,
704 .fc_dst_len = dst_len,
705 .fc_prefsrc = ifa->ifa_local,
706 .fc_oif = ifa->ifa_dev->dev->ifindex,
707 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
708 .fc_nlinfo = {
709 .nl_net = net,
713 if (type == RTN_UNICAST)
714 tb = fib_new_table(net, RT_TABLE_MAIN);
715 else
716 tb = fib_new_table(net, RT_TABLE_LOCAL);
718 if (tb == NULL)
719 return;
721 cfg.fc_table = tb->tb_id;
723 if (type != RTN_LOCAL)
724 cfg.fc_scope = RT_SCOPE_LINK;
725 else
726 cfg.fc_scope = RT_SCOPE_HOST;
728 if (cmd == RTM_NEWROUTE)
729 fib_table_insert(tb, &cfg);
730 else
731 fib_table_delete(tb, &cfg);
734 void fib_add_ifaddr(struct in_ifaddr *ifa)
736 struct in_device *in_dev = ifa->ifa_dev;
737 struct net_device *dev = in_dev->dev;
738 struct in_ifaddr *prim = ifa;
739 __be32 mask = ifa->ifa_mask;
740 __be32 addr = ifa->ifa_local;
741 __be32 prefix = ifa->ifa_address&mask;
743 if (ifa->ifa_flags&IFA_F_SECONDARY) {
744 prim = inet_ifa_byprefix(in_dev, prefix, mask);
745 if (prim == NULL) {
746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
747 return;
751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
753 if (!(dev->flags&IFF_UP))
754 return;
756 /* Add broadcast address, if it is explicitly assigned. */
757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
761 (prefix != addr || ifa->ifa_prefixlen < 32)) {
762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
765 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa->ifa_prefixlen < 31) {
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
773 static void fib_del_ifaddr(struct in_ifaddr *ifa)
775 struct in_device *in_dev = ifa->ifa_dev;
776 struct net_device *dev = in_dev->dev;
777 struct in_ifaddr *ifa1;
778 struct in_ifaddr *prim = ifa;
779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask;
780 __be32 any = ifa->ifa_address&ifa->ifa_mask;
781 #define LOCAL_OK 1
782 #define BRD_OK 2
783 #define BRD0_OK 4
784 #define BRD1_OK 8
785 unsigned ok = 0;
787 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
790 else {
791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792 if (prim == NULL) {
793 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n");
794 return;
798 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-)
801 Scan address list to be sure that addresses are really gone.
804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
805 if (ifa->ifa_local == ifa1->ifa_local)
806 ok |= LOCAL_OK;
807 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
808 ok |= BRD_OK;
809 if (brd == ifa1->ifa_broadcast)
810 ok |= BRD1_OK;
811 if (any == ifa1->ifa_broadcast)
812 ok |= BRD0_OK;
815 if (!(ok&BRD_OK))
816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817 if (!(ok&BRD1_OK))
818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
819 if (!(ok&BRD0_OK))
820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
821 if (!(ok&LOCAL_OK)) {
822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
824 /* Check, that this local address finally disappeared. */
825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
826 /* And the last, but not the least thing.
827 We must flush stray FIB entries.
829 First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush.
832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833 fib_flush(dev_net(dev));
836 #undef LOCAL_OK
837 #undef BRD_OK
838 #undef BRD0_OK
839 #undef BRD1_OK
842 static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
845 struct fib_result res;
846 struct flowi fl = { .mark = frn->fl_mark,
847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
848 .tos = frn->fl_tos,
849 .scope = frn->fl_scope } } };
851 #ifdef CONFIG_IP_MULTIPLE_TABLES
852 res.r = NULL;
853 #endif
855 frn->err = -ENOENT;
856 if (tb) {
857 local_bh_disable();
859 frn->tb_id = tb->tb_id;
860 frn->err = fib_table_lookup(tb, &fl, &res);
862 if (!frn->err) {
863 frn->prefixlen = res.prefixlen;
864 frn->nh_sel = res.nh_sel;
865 frn->type = res.type;
866 frn->scope = res.scope;
867 fib_res_put(&res);
869 local_bh_enable();
873 static void nl_fib_input(struct sk_buff *skb)
875 struct net *net;
876 struct fib_result_nl *frn;
877 struct nlmsghdr *nlh;
878 struct fib_table *tb;
879 u32 pid;
881 net = sock_net(skb->sk);
882 nlh = nlmsg_hdr(skb);
883 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
884 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn)))
885 return;
887 skb = skb_clone(skb, GFP_KERNEL);
888 if (skb == NULL)
889 return;
890 nlh = nlmsg_hdr(skb);
892 frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
893 tb = fib_get_table(net, frn->tb_id_in);
895 nl_fib_lookup(frn, tb);
897 pid = NETLINK_CB(skb).pid; /* pid of sending process */
898 NETLINK_CB(skb).pid = 0; /* from kernel */
899 NETLINK_CB(skb).dst_group = 0; /* unicast */
900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
903 static int __net_init nl_fib_lookup_init(struct net *net)
905 struct sock *sk;
906 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0,
907 nl_fib_input, NULL, THIS_MODULE);
908 if (sk == NULL)
909 return -EAFNOSUPPORT;
910 net->ipv4.fibnl = sk;
911 return 0;
914 static void nl_fib_lookup_exit(struct net *net)
916 netlink_kernel_release(net->ipv4.fibnl);
917 net->ipv4.fibnl = NULL;
920 static void fib_disable_ip(struct net_device *dev, int force, int delay)
922 if (fib_sync_down_dev(dev, force))
923 fib_flush(dev_net(dev));
924 rt_cache_flush(dev_net(dev), delay);
925 arp_ifdown(dev);
928 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
930 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
931 struct net_device *dev = ifa->ifa_dev->dev;
933 switch (event) {
934 case NETDEV_UP:
935 fib_add_ifaddr(ifa);
936 #ifdef CONFIG_IP_ROUTE_MULTIPATH
937 fib_sync_up(dev);
938 #endif
939 rt_cache_flush(dev_net(dev), -1);
940 break;
941 case NETDEV_DOWN:
942 fib_del_ifaddr(ifa);
943 if (ifa->ifa_dev->ifa_list == NULL) {
944 /* Last address was deleted from this interface.
945 Disable IP.
947 fib_disable_ip(dev, 1, 0);
948 } else {
949 rt_cache_flush(dev_net(dev), -1);
951 break;
953 return NOTIFY_DONE;
956 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
958 struct net_device *dev = ptr;
959 struct in_device *in_dev = __in_dev_get_rtnl(dev);
961 if (event == NETDEV_UNREGISTER) {
962 fib_disable_ip(dev, 2, -1);
963 return NOTIFY_DONE;
966 if (!in_dev)
967 return NOTIFY_DONE;
969 switch (event) {
970 case NETDEV_UP:
971 for_ifa(in_dev) {
972 fib_add_ifaddr(ifa);
973 } endfor_ifa(in_dev);
974 #ifdef CONFIG_IP_ROUTE_MULTIPATH
975 fib_sync_up(dev);
976 #endif
977 rt_cache_flush(dev_net(dev), -1);
978 break;
979 case NETDEV_DOWN:
980 fib_disable_ip(dev, 0, 0);
981 break;
982 case NETDEV_CHANGEMTU:
983 case NETDEV_CHANGE:
984 rt_cache_flush(dev_net(dev), 0);
985 break;
986 case NETDEV_UNREGISTER_BATCH:
987 rt_cache_flush_batch();
988 break;
990 return NOTIFY_DONE;
993 static struct notifier_block fib_inetaddr_notifier = {
994 .notifier_call = fib_inetaddr_event,
997 static struct notifier_block fib_netdev_notifier = {
998 .notifier_call = fib_netdev_event,
1001 static int __net_init ip_fib_net_init(struct net *net)
1003 int err;
1004 unsigned int i;
1006 net->ipv4.fib_table_hash = kzalloc(
1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
1008 if (net->ipv4.fib_table_hash == NULL)
1009 return -ENOMEM;
1011 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1014 err = fib4_rules_init(net);
1015 if (err < 0)
1016 goto fail;
1017 return 0;
1019 fail:
1020 kfree(net->ipv4.fib_table_hash);
1021 return err;
1024 static void ip_fib_net_exit(struct net *net)
1026 unsigned int i;
1028 #ifdef CONFIG_IP_MULTIPLE_TABLES
1029 fib4_rules_exit(net);
1030 #endif
1032 for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
1033 struct fib_table *tb;
1034 struct hlist_head *head;
1035 struct hlist_node *node, *tmp;
1037 head = &net->ipv4.fib_table_hash[i];
1038 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
1039 hlist_del(node);
1040 fib_table_flush(tb);
1041 kfree(tb);
1044 kfree(net->ipv4.fib_table_hash);
1047 static int __net_init fib_net_init(struct net *net)
1049 int error;
1051 error = ip_fib_net_init(net);
1052 if (error < 0)
1053 goto out;
1054 error = nl_fib_lookup_init(net);
1055 if (error < 0)
1056 goto out_nlfl;
1057 error = fib_proc_init(net);
1058 if (error < 0)
1059 goto out_proc;
1060 out:
1061 return error;
1063 out_proc:
1064 nl_fib_lookup_exit(net);
1065 out_nlfl:
1066 ip_fib_net_exit(net);
1067 goto out;
1070 static void __net_exit fib_net_exit(struct net *net)
1072 fib_proc_exit(net);
1073 nl_fib_lookup_exit(net);
1074 ip_fib_net_exit(net);
1077 static struct pernet_operations fib_net_ops = {
1078 .init = fib_net_init,
1079 .exit = fib_net_exit,
1082 void __init ip_fib_init(void)
1084 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
1085 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
1086 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
1088 register_pernet_subsys(&fib_net_ops);
1089 register_netdevice_notifier(&fib_netdev_notifier);
1090 register_inetaddr_notifier(&fib_inetaddr_notifier);
1092 fib_hash_init();