2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 #include <linux/slab.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
45 #include <net/ip_fib.h>
46 #include <net/rtnetlink.h>
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
50 static int __net_init
fib4_rules_init(struct net
*net
)
52 struct fib_table
*local_table
, *main_table
;
54 local_table
= fib_hash_table(RT_TABLE_LOCAL
);
55 if (local_table
== NULL
)
58 main_table
= fib_hash_table(RT_TABLE_MAIN
);
59 if (main_table
== NULL
)
62 hlist_add_head_rcu(&local_table
->tb_hlist
,
63 &net
->ipv4
.fib_table_hash
[TABLE_LOCAL_INDEX
]);
64 hlist_add_head_rcu(&main_table
->tb_hlist
,
65 &net
->ipv4
.fib_table_hash
[TABLE_MAIN_INDEX
]);
74 struct fib_table
*fib_new_table(struct net
*net
, u32 id
)
81 tb
= fib_get_table(net
, id
);
85 tb
= fib_hash_table(id
);
88 h
= id
& (FIB_TABLE_HASHSZ
- 1);
89 hlist_add_head_rcu(&tb
->tb_hlist
, &net
->ipv4
.fib_table_hash
[h
]);
93 struct fib_table
*fib_get_table(struct net
*net
, u32 id
)
96 struct hlist_node
*node
;
97 struct hlist_head
*head
;
102 h
= id
& (FIB_TABLE_HASHSZ
- 1);
105 head
= &net
->ipv4
.fib_table_hash
[h
];
106 hlist_for_each_entry_rcu(tb
, node
, head
, tb_hlist
) {
107 if (tb
->tb_id
== id
) {
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
117 void fib_select_default(struct net
*net
,
118 const struct flowi
*flp
, struct fib_result
*res
)
120 struct fib_table
*tb
;
121 int table
= RT_TABLE_MAIN
;
122 #ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res
->r
== NULL
|| res
->r
->action
!= FR_ACT_TO_TBL
)
125 table
= res
->r
->table
;
127 tb
= fib_get_table(net
, table
);
128 if (FIB_RES_GW(*res
) && FIB_RES_NH(*res
).nh_scope
== RT_SCOPE_LINK
)
129 fib_table_select_default(tb
, flp
, res
);
132 static void fib_flush(struct net
*net
)
135 struct fib_table
*tb
;
136 struct hlist_node
*node
;
137 struct hlist_head
*head
;
140 for (h
= 0; h
< FIB_TABLE_HASHSZ
; h
++) {
141 head
= &net
->ipv4
.fib_table_hash
[h
];
142 hlist_for_each_entry(tb
, node
, head
, tb_hlist
)
143 flushed
+= fib_table_flush(tb
);
147 rt_cache_flush(net
, -1);
151 * Find the first device with a given source address.
154 struct net_device
* ip_dev_find(struct net
*net
, __be32 addr
)
156 struct flowi fl
= { .nl_u
= { .ip4_u
= { .daddr
= addr
} } };
157 struct fib_result res
;
158 struct net_device
*dev
= NULL
;
159 struct fib_table
*local_table
;
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
165 local_table
= fib_get_table(net
, RT_TABLE_LOCAL
);
166 if (!local_table
|| fib_table_lookup(local_table
, &fl
, &res
))
168 if (res
.type
!= RTN_LOCAL
)
170 dev
= FIB_RES_DEV(res
);
178 EXPORT_SYMBOL(ip_dev_find
);
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
184 static inline unsigned __inet_dev_addr_type(struct net
*net
,
185 const struct net_device
*dev
,
188 struct flowi fl
= { .nl_u
= { .ip4_u
= { .daddr
= addr
} } };
189 struct fib_result res
;
190 unsigned ret
= RTN_BROADCAST
;
191 struct fib_table
*local_table
;
193 if (ipv4_is_zeronet(addr
) || ipv4_is_lbcast(addr
))
194 return RTN_BROADCAST
;
195 if (ipv4_is_multicast(addr
))
196 return RTN_MULTICAST
;
198 #ifdef CONFIG_IP_MULTIPLE_TABLES
202 local_table
= fib_get_table(net
, RT_TABLE_LOCAL
);
205 if (!fib_table_lookup(local_table
, &fl
, &res
)) {
206 if (!dev
|| dev
== res
.fi
->fib_dev
)
214 unsigned int inet_addr_type(struct net
*net
, __be32 addr
)
216 return __inet_dev_addr_type(net
, NULL
, addr
);
218 EXPORT_SYMBOL(inet_addr_type
);
220 unsigned int inet_dev_addr_type(struct net
*net
, const struct net_device
*dev
,
223 return __inet_dev_addr_type(net
, dev
, addr
);
225 EXPORT_SYMBOL(inet_dev_addr_type
);
227 /* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
235 int fib_validate_source(__be32 src
, __be32 dst
, u8 tos
, int oif
,
236 struct net_device
*dev
, __be32
*spec_dst
,
239 struct in_device
*in_dev
;
240 struct flowi fl
= { .nl_u
= { .ip4_u
=
247 struct fib_result res
;
248 int no_addr
, rpf
, accept_local
;
253 no_addr
= rpf
= accept_local
= 0;
255 in_dev
= __in_dev_get_rcu(dev
);
257 no_addr
= in_dev
->ifa_list
== NULL
;
258 rpf
= IN_DEV_RPFILTER(in_dev
);
259 accept_local
= IN_DEV_ACCEPT_LOCAL(in_dev
);
260 if (mark
&& !IN_DEV_SRC_VMARK(in_dev
))
269 if (fib_lookup(net
, &fl
, &res
))
271 if (res
.type
!= RTN_UNICAST
) {
272 if (res
.type
!= RTN_LOCAL
|| !accept_local
)
275 *spec_dst
= FIB_RES_PREFSRC(res
);
276 fib_combine_itag(itag
, &res
);
279 #ifdef CONFIG_IP_ROUTE_MULTIPATH
280 for (ret
= 0; ret
< res
.fi
->fib_nhs
; ret
++) {
281 struct fib_nh
*nh
= &res
.fi
->fib_nh
[ret
];
283 if (nh
->nh_dev
== dev
) {
289 if (FIB_RES_DEV(res
) == dev
)
293 ret
= FIB_RES_NH(res
).nh_scope
>= RT_SCOPE_HOST
;
302 fl
.oif
= dev
->ifindex
;
305 if (fib_lookup(net
, &fl
, &res
) == 0) {
306 if (res
.type
== RTN_UNICAST
) {
307 *spec_dst
= FIB_RES_PREFSRC(res
);
308 ret
= FIB_RES_NH(res
).nh_scope
>= RT_SCOPE_HOST
;
317 *spec_dst
= inet_select_addr(dev
, 0, RT_SCOPE_UNIVERSE
);
329 static inline __be32
sk_extract_addr(struct sockaddr
*addr
)
331 return ((struct sockaddr_in
*) addr
)->sin_addr
.s_addr
;
334 static int put_rtax(struct nlattr
*mx
, int len
, int type
, u32 value
)
338 nla
= (struct nlattr
*) ((char *) mx
+ len
);
339 nla
->nla_type
= type
;
340 nla
->nla_len
= nla_attr_size(4);
341 *(u32
*) nla_data(nla
) = value
;
343 return len
+ nla_total_size(4);
346 static int rtentry_to_fib_config(struct net
*net
, int cmd
, struct rtentry
*rt
,
347 struct fib_config
*cfg
)
352 memset(cfg
, 0, sizeof(*cfg
));
353 cfg
->fc_nlinfo
.nl_net
= net
;
355 if (rt
->rt_dst
.sa_family
!= AF_INET
)
356 return -EAFNOSUPPORT
;
359 * Check mask for validity:
360 * a) it must be contiguous.
361 * b) destination must have all host bits clear.
362 * c) if application forgot to set correct family (AF_INET),
363 * reject request unless it is absolutely clear i.e.
364 * both family and mask are zero.
367 addr
= sk_extract_addr(&rt
->rt_dst
);
368 if (!(rt
->rt_flags
& RTF_HOST
)) {
369 __be32 mask
= sk_extract_addr(&rt
->rt_genmask
);
371 if (rt
->rt_genmask
.sa_family
!= AF_INET
) {
372 if (mask
|| rt
->rt_genmask
.sa_family
)
373 return -EAFNOSUPPORT
;
376 if (bad_mask(mask
, addr
))
379 plen
= inet_mask_len(mask
);
382 cfg
->fc_dst_len
= plen
;
385 if (cmd
!= SIOCDELRT
) {
386 cfg
->fc_nlflags
= NLM_F_CREATE
;
387 cfg
->fc_protocol
= RTPROT_BOOT
;
391 cfg
->fc_priority
= rt
->rt_metric
- 1;
393 if (rt
->rt_flags
& RTF_REJECT
) {
394 cfg
->fc_scope
= RT_SCOPE_HOST
;
395 cfg
->fc_type
= RTN_UNREACHABLE
;
399 cfg
->fc_scope
= RT_SCOPE_NOWHERE
;
400 cfg
->fc_type
= RTN_UNICAST
;
404 struct net_device
*dev
;
405 char devname
[IFNAMSIZ
];
407 if (copy_from_user(devname
, rt
->rt_dev
, IFNAMSIZ
-1))
410 devname
[IFNAMSIZ
-1] = 0;
411 colon
= strchr(devname
, ':');
414 dev
= __dev_get_by_name(net
, devname
);
417 cfg
->fc_oif
= dev
->ifindex
;
419 struct in_ifaddr
*ifa
;
420 struct in_device
*in_dev
= __in_dev_get_rtnl(dev
);
424 for (ifa
= in_dev
->ifa_list
; ifa
; ifa
= ifa
->ifa_next
)
425 if (strcmp(ifa
->ifa_label
, devname
) == 0)
429 cfg
->fc_prefsrc
= ifa
->ifa_local
;
433 addr
= sk_extract_addr(&rt
->rt_gateway
);
434 if (rt
->rt_gateway
.sa_family
== AF_INET
&& addr
) {
436 if (rt
->rt_flags
& RTF_GATEWAY
&&
437 inet_addr_type(net
, addr
) == RTN_UNICAST
)
438 cfg
->fc_scope
= RT_SCOPE_UNIVERSE
;
441 if (cmd
== SIOCDELRT
)
444 if (rt
->rt_flags
& RTF_GATEWAY
&& !cfg
->fc_gw
)
447 if (cfg
->fc_scope
== RT_SCOPE_NOWHERE
)
448 cfg
->fc_scope
= RT_SCOPE_LINK
;
450 if (rt
->rt_flags
& (RTF_MTU
| RTF_WINDOW
| RTF_IRTT
)) {
454 mx
= kzalloc(3 * nla_total_size(4), GFP_KERNEL
);
458 if (rt
->rt_flags
& RTF_MTU
)
459 len
= put_rtax(mx
, len
, RTAX_ADVMSS
, rt
->rt_mtu
- 40);
461 if (rt
->rt_flags
& RTF_WINDOW
)
462 len
= put_rtax(mx
, len
, RTAX_WINDOW
, rt
->rt_window
);
464 if (rt
->rt_flags
& RTF_IRTT
)
465 len
= put_rtax(mx
, len
, RTAX_RTT
, rt
->rt_irtt
<< 3);
468 cfg
->fc_mx_len
= len
;
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
478 int ip_rt_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
480 struct fib_config cfg
;
485 case SIOCADDRT
: /* Add a route */
486 case SIOCDELRT
: /* Delete a route */
487 if (!capable(CAP_NET_ADMIN
))
490 if (copy_from_user(&rt
, arg
, sizeof(rt
)))
494 err
= rtentry_to_fib_config(net
, cmd
, &rt
, &cfg
);
496 struct fib_table
*tb
;
498 if (cmd
== SIOCDELRT
) {
499 tb
= fib_get_table(net
, cfg
.fc_table
);
501 err
= fib_table_delete(tb
, &cfg
);
505 tb
= fib_new_table(net
, cfg
.fc_table
);
507 err
= fib_table_insert(tb
, &cfg
);
512 /* allocated by rtentry_to_fib_config() */
521 const struct nla_policy rtm_ipv4_policy
[RTA_MAX
+1] = {
522 [RTA_DST
] = { .type
= NLA_U32
},
523 [RTA_SRC
] = { .type
= NLA_U32
},
524 [RTA_IIF
] = { .type
= NLA_U32
},
525 [RTA_OIF
] = { .type
= NLA_U32
},
526 [RTA_GATEWAY
] = { .type
= NLA_U32
},
527 [RTA_PRIORITY
] = { .type
= NLA_U32
},
528 [RTA_PREFSRC
] = { .type
= NLA_U32
},
529 [RTA_METRICS
] = { .type
= NLA_NESTED
},
530 [RTA_MULTIPATH
] = { .len
= sizeof(struct rtnexthop
) },
531 [RTA_FLOW
] = { .type
= NLA_U32
},
534 static int rtm_to_fib_config(struct net
*net
, struct sk_buff
*skb
,
535 struct nlmsghdr
*nlh
, struct fib_config
*cfg
)
541 err
= nlmsg_validate(nlh
, sizeof(*rtm
), RTA_MAX
, rtm_ipv4_policy
);
545 memset(cfg
, 0, sizeof(*cfg
));
547 rtm
= nlmsg_data(nlh
);
548 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
549 cfg
->fc_tos
= rtm
->rtm_tos
;
550 cfg
->fc_table
= rtm
->rtm_table
;
551 cfg
->fc_protocol
= rtm
->rtm_protocol
;
552 cfg
->fc_scope
= rtm
->rtm_scope
;
553 cfg
->fc_type
= rtm
->rtm_type
;
554 cfg
->fc_flags
= rtm
->rtm_flags
;
555 cfg
->fc_nlflags
= nlh
->nlmsg_flags
;
557 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
558 cfg
->fc_nlinfo
.nlh
= nlh
;
559 cfg
->fc_nlinfo
.nl_net
= net
;
561 if (cfg
->fc_type
> RTN_MAX
) {
566 nlmsg_for_each_attr(attr
, nlh
, sizeof(struct rtmsg
), remaining
) {
567 switch (nla_type(attr
)) {
569 cfg
->fc_dst
= nla_get_be32(attr
);
572 cfg
->fc_oif
= nla_get_u32(attr
);
575 cfg
->fc_gw
= nla_get_be32(attr
);
578 cfg
->fc_priority
= nla_get_u32(attr
);
581 cfg
->fc_prefsrc
= nla_get_be32(attr
);
584 cfg
->fc_mx
= nla_data(attr
);
585 cfg
->fc_mx_len
= nla_len(attr
);
588 cfg
->fc_mp
= nla_data(attr
);
589 cfg
->fc_mp_len
= nla_len(attr
);
592 cfg
->fc_flow
= nla_get_u32(attr
);
595 cfg
->fc_table
= nla_get_u32(attr
);
605 static int inet_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
607 struct net
*net
= sock_net(skb
->sk
);
608 struct fib_config cfg
;
609 struct fib_table
*tb
;
612 err
= rtm_to_fib_config(net
, skb
, nlh
, &cfg
);
616 tb
= fib_get_table(net
, cfg
.fc_table
);
622 err
= fib_table_delete(tb
, &cfg
);
627 static int inet_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
629 struct net
*net
= sock_net(skb
->sk
);
630 struct fib_config cfg
;
631 struct fib_table
*tb
;
634 err
= rtm_to_fib_config(net
, skb
, nlh
, &cfg
);
638 tb
= fib_new_table(net
, cfg
.fc_table
);
644 err
= fib_table_insert(tb
, &cfg
);
649 static int inet_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
651 struct net
*net
= sock_net(skb
->sk
);
653 unsigned int e
= 0, s_e
;
654 struct fib_table
*tb
;
655 struct hlist_node
*node
;
656 struct hlist_head
*head
;
659 if (nlmsg_len(cb
->nlh
) >= sizeof(struct rtmsg
) &&
660 ((struct rtmsg
*) nlmsg_data(cb
->nlh
))->rtm_flags
& RTM_F_CLONED
)
661 return ip_rt_dump(skb
, cb
);
666 for (h
= s_h
; h
< FIB_TABLE_HASHSZ
; h
++, s_e
= 0) {
668 head
= &net
->ipv4
.fib_table_hash
[h
];
669 hlist_for_each_entry(tb
, node
, head
, tb_hlist
) {
673 memset(&cb
->args
[2], 0, sizeof(cb
->args
) -
674 2 * sizeof(cb
->args
[0]));
675 if (fib_table_dump(tb
, skb
, cb
) < 0)
689 /* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur
693 only when netlink is already locked.
696 static void fib_magic(int cmd
, int type
, __be32 dst
, int dst_len
, struct in_ifaddr
*ifa
)
698 struct net
*net
= dev_net(ifa
->ifa_dev
->dev
);
699 struct fib_table
*tb
;
700 struct fib_config cfg
= {
701 .fc_protocol
= RTPROT_KERNEL
,
704 .fc_dst_len
= dst_len
,
705 .fc_prefsrc
= ifa
->ifa_local
,
706 .fc_oif
= ifa
->ifa_dev
->dev
->ifindex
,
707 .fc_nlflags
= NLM_F_CREATE
| NLM_F_APPEND
,
713 if (type
== RTN_UNICAST
)
714 tb
= fib_new_table(net
, RT_TABLE_MAIN
);
716 tb
= fib_new_table(net
, RT_TABLE_LOCAL
);
721 cfg
.fc_table
= tb
->tb_id
;
723 if (type
!= RTN_LOCAL
)
724 cfg
.fc_scope
= RT_SCOPE_LINK
;
726 cfg
.fc_scope
= RT_SCOPE_HOST
;
728 if (cmd
== RTM_NEWROUTE
)
729 fib_table_insert(tb
, &cfg
);
731 fib_table_delete(tb
, &cfg
);
734 void fib_add_ifaddr(struct in_ifaddr
*ifa
)
736 struct in_device
*in_dev
= ifa
->ifa_dev
;
737 struct net_device
*dev
= in_dev
->dev
;
738 struct in_ifaddr
*prim
= ifa
;
739 __be32 mask
= ifa
->ifa_mask
;
740 __be32 addr
= ifa
->ifa_local
;
741 __be32 prefix
= ifa
->ifa_address
&mask
;
743 if (ifa
->ifa_flags
&IFA_F_SECONDARY
) {
744 prim
= inet_ifa_byprefix(in_dev
, prefix
, mask
);
746 printk(KERN_WARNING
"fib_add_ifaddr: bug: prim == NULL\n");
751 fib_magic(RTM_NEWROUTE
, RTN_LOCAL
, addr
, 32, prim
);
753 if (!(dev
->flags
&IFF_UP
))
756 /* Add broadcast address, if it is explicitly assigned. */
757 if (ifa
->ifa_broadcast
&& ifa
->ifa_broadcast
!= htonl(0xFFFFFFFF))
758 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, ifa
->ifa_broadcast
, 32, prim
);
760 if (!ipv4_is_zeronet(prefix
) && !(ifa
->ifa_flags
&IFA_F_SECONDARY
) &&
761 (prefix
!= addr
|| ifa
->ifa_prefixlen
< 32)) {
762 fib_magic(RTM_NEWROUTE
, dev
->flags
&IFF_LOOPBACK
? RTN_LOCAL
:
763 RTN_UNICAST
, prefix
, ifa
->ifa_prefixlen
, prim
);
765 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa
->ifa_prefixlen
< 31) {
767 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, prefix
, 32, prim
);
768 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, prefix
|~mask
, 32, prim
);
773 static void fib_del_ifaddr(struct in_ifaddr
*ifa
)
775 struct in_device
*in_dev
= ifa
->ifa_dev
;
776 struct net_device
*dev
= in_dev
->dev
;
777 struct in_ifaddr
*ifa1
;
778 struct in_ifaddr
*prim
= ifa
;
779 __be32 brd
= ifa
->ifa_address
|~ifa
->ifa_mask
;
780 __be32 any
= ifa
->ifa_address
&ifa
->ifa_mask
;
787 if (!(ifa
->ifa_flags
&IFA_F_SECONDARY
))
788 fib_magic(RTM_DELROUTE
, dev
->flags
&IFF_LOOPBACK
? RTN_LOCAL
:
789 RTN_UNICAST
, any
, ifa
->ifa_prefixlen
, prim
);
791 prim
= inet_ifa_byprefix(in_dev
, any
, ifa
->ifa_mask
);
793 printk(KERN_WARNING
"fib_del_ifaddr: bug: prim == NULL\n");
798 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-)
801 Scan address list to be sure that addresses are really gone.
804 for (ifa1
= in_dev
->ifa_list
; ifa1
; ifa1
= ifa1
->ifa_next
) {
805 if (ifa
->ifa_local
== ifa1
->ifa_local
)
807 if (ifa
->ifa_broadcast
== ifa1
->ifa_broadcast
)
809 if (brd
== ifa1
->ifa_broadcast
)
811 if (any
== ifa1
->ifa_broadcast
)
816 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, ifa
->ifa_broadcast
, 32, prim
);
818 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, brd
, 32, prim
);
820 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, any
, 32, prim
);
821 if (!(ok
&LOCAL_OK
)) {
822 fib_magic(RTM_DELROUTE
, RTN_LOCAL
, ifa
->ifa_local
, 32, prim
);
824 /* Check, that this local address finally disappeared. */
825 if (inet_addr_type(dev_net(dev
), ifa
->ifa_local
) != RTN_LOCAL
) {
826 /* And the last, but not the least thing.
827 We must flush stray FIB entries.
829 First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush.
832 if (fib_sync_down_addr(dev_net(dev
), ifa
->ifa_local
))
833 fib_flush(dev_net(dev
));
842 static void nl_fib_lookup(struct fib_result_nl
*frn
, struct fib_table
*tb
)
845 struct fib_result res
;
846 struct flowi fl
= { .mark
= frn
->fl_mark
,
847 .nl_u
= { .ip4_u
= { .daddr
= frn
->fl_addr
,
849 .scope
= frn
->fl_scope
} } };
851 #ifdef CONFIG_IP_MULTIPLE_TABLES
859 frn
->tb_id
= tb
->tb_id
;
860 frn
->err
= fib_table_lookup(tb
, &fl
, &res
);
863 frn
->prefixlen
= res
.prefixlen
;
864 frn
->nh_sel
= res
.nh_sel
;
865 frn
->type
= res
.type
;
866 frn
->scope
= res
.scope
;
873 static void nl_fib_input(struct sk_buff
*skb
)
876 struct fib_result_nl
*frn
;
877 struct nlmsghdr
*nlh
;
878 struct fib_table
*tb
;
881 net
= sock_net(skb
->sk
);
882 nlh
= nlmsg_hdr(skb
);
883 if (skb
->len
< NLMSG_SPACE(0) || skb
->len
< nlh
->nlmsg_len
||
884 nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*frn
)))
887 skb
= skb_clone(skb
, GFP_KERNEL
);
890 nlh
= nlmsg_hdr(skb
);
892 frn
= (struct fib_result_nl
*) NLMSG_DATA(nlh
);
893 tb
= fib_get_table(net
, frn
->tb_id_in
);
895 nl_fib_lookup(frn
, tb
);
897 pid
= NETLINK_CB(skb
).pid
; /* pid of sending process */
898 NETLINK_CB(skb
).pid
= 0; /* from kernel */
899 NETLINK_CB(skb
).dst_group
= 0; /* unicast */
900 netlink_unicast(net
->ipv4
.fibnl
, skb
, pid
, MSG_DONTWAIT
);
903 static int __net_init
nl_fib_lookup_init(struct net
*net
)
906 sk
= netlink_kernel_create(net
, NETLINK_FIB_LOOKUP
, 0,
907 nl_fib_input
, NULL
, THIS_MODULE
);
909 return -EAFNOSUPPORT
;
910 net
->ipv4
.fibnl
= sk
;
914 static void nl_fib_lookup_exit(struct net
*net
)
916 netlink_kernel_release(net
->ipv4
.fibnl
);
917 net
->ipv4
.fibnl
= NULL
;
920 static void fib_disable_ip(struct net_device
*dev
, int force
, int delay
)
922 if (fib_sync_down_dev(dev
, force
))
923 fib_flush(dev_net(dev
));
924 rt_cache_flush(dev_net(dev
), delay
);
928 static int fib_inetaddr_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
930 struct in_ifaddr
*ifa
= (struct in_ifaddr
*)ptr
;
931 struct net_device
*dev
= ifa
->ifa_dev
->dev
;
936 #ifdef CONFIG_IP_ROUTE_MULTIPATH
939 rt_cache_flush(dev_net(dev
), -1);
943 if (ifa
->ifa_dev
->ifa_list
== NULL
) {
944 /* Last address was deleted from this interface.
947 fib_disable_ip(dev
, 1, 0);
949 rt_cache_flush(dev_net(dev
), -1);
956 static int fib_netdev_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
958 struct net_device
*dev
= ptr
;
959 struct in_device
*in_dev
= __in_dev_get_rtnl(dev
);
961 if (event
== NETDEV_UNREGISTER
) {
962 fib_disable_ip(dev
, 2, -1);
973 } endfor_ifa(in_dev
);
974 #ifdef CONFIG_IP_ROUTE_MULTIPATH
977 rt_cache_flush(dev_net(dev
), -1);
980 fib_disable_ip(dev
, 0, 0);
982 case NETDEV_CHANGEMTU
:
984 rt_cache_flush(dev_net(dev
), 0);
986 case NETDEV_UNREGISTER_BATCH
:
987 rt_cache_flush_batch();
993 static struct notifier_block fib_inetaddr_notifier
= {
994 .notifier_call
= fib_inetaddr_event
,
997 static struct notifier_block fib_netdev_notifier
= {
998 .notifier_call
= fib_netdev_event
,
1001 static int __net_init
ip_fib_net_init(struct net
*net
)
1006 net
->ipv4
.fib_table_hash
= kzalloc(
1007 sizeof(struct hlist_head
)*FIB_TABLE_HASHSZ
, GFP_KERNEL
);
1008 if (net
->ipv4
.fib_table_hash
== NULL
)
1011 for (i
= 0; i
< FIB_TABLE_HASHSZ
; i
++)
1012 INIT_HLIST_HEAD(&net
->ipv4
.fib_table_hash
[i
]);
1014 err
= fib4_rules_init(net
);
1020 kfree(net
->ipv4
.fib_table_hash
);
1024 static void ip_fib_net_exit(struct net
*net
)
1028 #ifdef CONFIG_IP_MULTIPLE_TABLES
1029 fib4_rules_exit(net
);
1032 for (i
= 0; i
< FIB_TABLE_HASHSZ
; i
++) {
1033 struct fib_table
*tb
;
1034 struct hlist_head
*head
;
1035 struct hlist_node
*node
, *tmp
;
1037 head
= &net
->ipv4
.fib_table_hash
[i
];
1038 hlist_for_each_entry_safe(tb
, node
, tmp
, head
, tb_hlist
) {
1040 fib_table_flush(tb
);
1044 kfree(net
->ipv4
.fib_table_hash
);
1047 static int __net_init
fib_net_init(struct net
*net
)
1051 error
= ip_fib_net_init(net
);
1054 error
= nl_fib_lookup_init(net
);
1057 error
= fib_proc_init(net
);
1064 nl_fib_lookup_exit(net
);
1066 ip_fib_net_exit(net
);
1070 static void __net_exit
fib_net_exit(struct net
*net
)
1073 nl_fib_lookup_exit(net
);
1074 ip_fib_net_exit(net
);
1077 static struct pernet_operations fib_net_ops
= {
1078 .init
= fib_net_init
,
1079 .exit
= fib_net_exit
,
1082 void __init
ip_fib_init(void)
1084 rtnl_register(PF_INET
, RTM_NEWROUTE
, inet_rtm_newroute
, NULL
);
1085 rtnl_register(PF_INET
, RTM_DELROUTE
, inet_rtm_delroute
, NULL
);
1086 rtnl_register(PF_INET
, RTM_GETROUTE
, NULL
, inet_dump_fib
);
1088 register_pernet_subsys(&fib_net_ops
);
1089 register_netdevice_notifier(&fib_netdev_notifier
);
1090 register_inetaddr_notifier(&fib_inetaddr_notifier
);