2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/module.h>
17 #include <asm/uaccess.h>
18 #include <asm/system.h>
19 #include <linux/bitops.h>
20 #include <linux/capability.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
24 #include <linux/string.h>
25 #include <linux/socket.h>
26 #include <linux/sockios.h>
27 #include <linux/errno.h>
29 #include <linux/inet.h>
30 #include <linux/inetdevice.h>
31 #include <linux/netdevice.h>
32 #include <linux/if_addr.h>
33 #include <linux/if_arp.h>
34 #include <linux/skbuff.h>
35 #include <linux/init.h>
36 #include <linux/list.h>
37 #include <linux/slab.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
45 #include <net/ip_fib.h>
46 #include <net/rtnetlink.h>
48 #ifndef CONFIG_IP_MULTIPLE_TABLES
50 static int __net_init
fib4_rules_init(struct net
*net
)
52 struct fib_table
*local_table
, *main_table
;
54 local_table
= fib_hash_table(RT_TABLE_LOCAL
);
55 if (local_table
== NULL
)
58 main_table
= fib_hash_table(RT_TABLE_MAIN
);
59 if (main_table
== NULL
)
62 hlist_add_head_rcu(&local_table
->tb_hlist
,
63 &net
->ipv4
.fib_table_hash
[TABLE_LOCAL_INDEX
]);
64 hlist_add_head_rcu(&main_table
->tb_hlist
,
65 &net
->ipv4
.fib_table_hash
[TABLE_MAIN_INDEX
]);
74 struct fib_table
*fib_new_table(struct net
*net
, u32 id
)
81 tb
= fib_get_table(net
, id
);
85 tb
= fib_hash_table(id
);
88 h
= id
& (FIB_TABLE_HASHSZ
- 1);
89 hlist_add_head_rcu(&tb
->tb_hlist
, &net
->ipv4
.fib_table_hash
[h
]);
93 struct fib_table
*fib_get_table(struct net
*net
, u32 id
)
96 struct hlist_node
*node
;
97 struct hlist_head
*head
;
102 h
= id
& (FIB_TABLE_HASHSZ
- 1);
105 head
= &net
->ipv4
.fib_table_hash
[h
];
106 hlist_for_each_entry_rcu(tb
, node
, head
, tb_hlist
) {
107 if (tb
->tb_id
== id
) {
115 #endif /* CONFIG_IP_MULTIPLE_TABLES */
117 void fib_select_default(struct net
*net
,
118 const struct flowi
*flp
, struct fib_result
*res
)
120 struct fib_table
*tb
;
121 int table
= RT_TABLE_MAIN
;
122 #ifdef CONFIG_IP_MULTIPLE_TABLES
123 if (res
->r
== NULL
|| res
->r
->action
!= FR_ACT_TO_TBL
)
125 table
= res
->r
->table
;
127 tb
= fib_get_table(net
, table
);
128 if (FIB_RES_GW(*res
) && FIB_RES_NH(*res
).nh_scope
== RT_SCOPE_LINK
)
129 fib_table_select_default(tb
, flp
, res
);
132 static void fib_flush(struct net
*net
)
135 struct fib_table
*tb
;
136 struct hlist_node
*node
;
137 struct hlist_head
*head
;
140 for (h
= 0; h
< FIB_TABLE_HASHSZ
; h
++) {
141 head
= &net
->ipv4
.fib_table_hash
[h
];
142 hlist_for_each_entry(tb
, node
, head
, tb_hlist
)
143 flushed
+= fib_table_flush(tb
);
147 rt_cache_flush(net
, -1);
151 * Find the first device with a given source address.
154 struct net_device
* ip_dev_find(struct net
*net
, __be32 addr
)
156 struct flowi fl
= { .nl_u
= { .ip4_u
= { .daddr
= addr
} } };
157 struct fib_result res
;
158 struct net_device
*dev
= NULL
;
159 struct fib_table
*local_table
;
161 #ifdef CONFIG_IP_MULTIPLE_TABLES
165 local_table
= fib_get_table(net
, RT_TABLE_LOCAL
);
166 if (!local_table
|| fib_table_lookup(local_table
, &fl
, &res
))
168 if (res
.type
!= RTN_LOCAL
)
170 dev
= FIB_RES_DEV(res
);
178 EXPORT_SYMBOL(ip_dev_find
);
181 * Find address type as if only "dev" was present in the system. If
182 * on_dev is NULL then all interfaces are taken into consideration.
184 static inline unsigned __inet_dev_addr_type(struct net
*net
,
185 const struct net_device
*dev
,
188 struct flowi fl
= { .nl_u
= { .ip4_u
= { .daddr
= addr
} } };
189 struct fib_result res
;
190 unsigned ret
= RTN_BROADCAST
;
191 struct fib_table
*local_table
;
193 if (ipv4_is_zeronet(addr
) || ipv4_is_lbcast(addr
))
194 return RTN_BROADCAST
;
195 if (ipv4_is_multicast(addr
))
196 return RTN_MULTICAST
;
198 #ifdef CONFIG_IP_MULTIPLE_TABLES
202 local_table
= fib_get_table(net
, RT_TABLE_LOCAL
);
205 if (!fib_table_lookup(local_table
, &fl
, &res
)) {
206 if (!dev
|| dev
== res
.fi
->fib_dev
)
214 unsigned int inet_addr_type(struct net
*net
, __be32 addr
)
216 return __inet_dev_addr_type(net
, NULL
, addr
);
218 EXPORT_SYMBOL(inet_addr_type
);
220 unsigned int inet_dev_addr_type(struct net
*net
, const struct net_device
*dev
,
223 return __inet_dev_addr_type(net
, dev
, addr
);
225 EXPORT_SYMBOL(inet_dev_addr_type
);
227 /* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local
230 - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface.
235 int fib_validate_source(__be32 src
, __be32 dst
, u8 tos
, int oif
,
236 struct net_device
*dev
, __be32
*spec_dst
,
239 struct in_device
*in_dev
;
240 struct flowi fl
= { .nl_u
= { .ip4_u
=
247 struct fib_result res
;
248 int no_addr
, rpf
, accept_local
;
252 no_addr
= rpf
= accept_local
= 0;
254 in_dev
= __in_dev_get_rcu(dev
);
256 no_addr
= in_dev
->ifa_list
== NULL
;
257 rpf
= IN_DEV_RPFILTER(in_dev
);
258 accept_local
= IN_DEV_ACCEPT_LOCAL(in_dev
);
259 if (mark
&& !IN_DEV_SRC_VMARK(in_dev
))
268 if (fib_lookup(net
, &fl
, &res
))
270 if (res
.type
!= RTN_UNICAST
) {
271 if (res
.type
!= RTN_LOCAL
|| !accept_local
)
274 *spec_dst
= FIB_RES_PREFSRC(res
);
275 fib_combine_itag(itag
, &res
);
276 #ifdef CONFIG_IP_ROUTE_MULTIPATH
277 if (FIB_RES_DEV(res
) == dev
|| res
.fi
->fib_nhs
> 1)
279 if (FIB_RES_DEV(res
) == dev
)
282 ret
= FIB_RES_NH(res
).nh_scope
>= RT_SCOPE_HOST
;
291 fl
.oif
= dev
->ifindex
;
294 if (fib_lookup(net
, &fl
, &res
) == 0) {
295 if (res
.type
== RTN_UNICAST
) {
296 *spec_dst
= FIB_RES_PREFSRC(res
);
297 ret
= FIB_RES_NH(res
).nh_scope
>= RT_SCOPE_HOST
;
306 *spec_dst
= inet_select_addr(dev
, 0, RT_SCOPE_UNIVERSE
);
318 static inline __be32
sk_extract_addr(struct sockaddr
*addr
)
320 return ((struct sockaddr_in
*) addr
)->sin_addr
.s_addr
;
323 static int put_rtax(struct nlattr
*mx
, int len
, int type
, u32 value
)
327 nla
= (struct nlattr
*) ((char *) mx
+ len
);
328 nla
->nla_type
= type
;
329 nla
->nla_len
= nla_attr_size(4);
330 *(u32
*) nla_data(nla
) = value
;
332 return len
+ nla_total_size(4);
335 static int rtentry_to_fib_config(struct net
*net
, int cmd
, struct rtentry
*rt
,
336 struct fib_config
*cfg
)
341 memset(cfg
, 0, sizeof(*cfg
));
342 cfg
->fc_nlinfo
.nl_net
= net
;
344 if (rt
->rt_dst
.sa_family
!= AF_INET
)
345 return -EAFNOSUPPORT
;
348 * Check mask for validity:
349 * a) it must be contiguous.
350 * b) destination must have all host bits clear.
351 * c) if application forgot to set correct family (AF_INET),
352 * reject request unless it is absolutely clear i.e.
353 * both family and mask are zero.
356 addr
= sk_extract_addr(&rt
->rt_dst
);
357 if (!(rt
->rt_flags
& RTF_HOST
)) {
358 __be32 mask
= sk_extract_addr(&rt
->rt_genmask
);
360 if (rt
->rt_genmask
.sa_family
!= AF_INET
) {
361 if (mask
|| rt
->rt_genmask
.sa_family
)
362 return -EAFNOSUPPORT
;
365 if (bad_mask(mask
, addr
))
368 plen
= inet_mask_len(mask
);
371 cfg
->fc_dst_len
= plen
;
374 if (cmd
!= SIOCDELRT
) {
375 cfg
->fc_nlflags
= NLM_F_CREATE
;
376 cfg
->fc_protocol
= RTPROT_BOOT
;
380 cfg
->fc_priority
= rt
->rt_metric
- 1;
382 if (rt
->rt_flags
& RTF_REJECT
) {
383 cfg
->fc_scope
= RT_SCOPE_HOST
;
384 cfg
->fc_type
= RTN_UNREACHABLE
;
388 cfg
->fc_scope
= RT_SCOPE_NOWHERE
;
389 cfg
->fc_type
= RTN_UNICAST
;
393 struct net_device
*dev
;
394 char devname
[IFNAMSIZ
];
396 if (copy_from_user(devname
, rt
->rt_dev
, IFNAMSIZ
-1))
399 devname
[IFNAMSIZ
-1] = 0;
400 colon
= strchr(devname
, ':');
403 dev
= __dev_get_by_name(net
, devname
);
406 cfg
->fc_oif
= dev
->ifindex
;
408 struct in_ifaddr
*ifa
;
409 struct in_device
*in_dev
= __in_dev_get_rtnl(dev
);
413 for (ifa
= in_dev
->ifa_list
; ifa
; ifa
= ifa
->ifa_next
)
414 if (strcmp(ifa
->ifa_label
, devname
) == 0)
418 cfg
->fc_prefsrc
= ifa
->ifa_local
;
422 addr
= sk_extract_addr(&rt
->rt_gateway
);
423 if (rt
->rt_gateway
.sa_family
== AF_INET
&& addr
) {
425 if (rt
->rt_flags
& RTF_GATEWAY
&&
426 inet_addr_type(net
, addr
) == RTN_UNICAST
)
427 cfg
->fc_scope
= RT_SCOPE_UNIVERSE
;
430 if (cmd
== SIOCDELRT
)
433 if (rt
->rt_flags
& RTF_GATEWAY
&& !cfg
->fc_gw
)
436 if (cfg
->fc_scope
== RT_SCOPE_NOWHERE
)
437 cfg
->fc_scope
= RT_SCOPE_LINK
;
439 if (rt
->rt_flags
& (RTF_MTU
| RTF_WINDOW
| RTF_IRTT
)) {
443 mx
= kzalloc(3 * nla_total_size(4), GFP_KERNEL
);
447 if (rt
->rt_flags
& RTF_MTU
)
448 len
= put_rtax(mx
, len
, RTAX_ADVMSS
, rt
->rt_mtu
- 40);
450 if (rt
->rt_flags
& RTF_WINDOW
)
451 len
= put_rtax(mx
, len
, RTAX_WINDOW
, rt
->rt_window
);
453 if (rt
->rt_flags
& RTF_IRTT
)
454 len
= put_rtax(mx
, len
, RTAX_RTT
, rt
->rt_irtt
<< 3);
457 cfg
->fc_mx_len
= len
;
464 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
467 int ip_rt_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
469 struct fib_config cfg
;
474 case SIOCADDRT
: /* Add a route */
475 case SIOCDELRT
: /* Delete a route */
476 if (!capable(CAP_NET_ADMIN
))
479 if (copy_from_user(&rt
, arg
, sizeof(rt
)))
483 err
= rtentry_to_fib_config(net
, cmd
, &rt
, &cfg
);
485 struct fib_table
*tb
;
487 if (cmd
== SIOCDELRT
) {
488 tb
= fib_get_table(net
, cfg
.fc_table
);
490 err
= fib_table_delete(tb
, &cfg
);
494 tb
= fib_new_table(net
, cfg
.fc_table
);
496 err
= fib_table_insert(tb
, &cfg
);
501 /* allocated by rtentry_to_fib_config() */
510 const struct nla_policy rtm_ipv4_policy
[RTA_MAX
+1] = {
511 [RTA_DST
] = { .type
= NLA_U32
},
512 [RTA_SRC
] = { .type
= NLA_U32
},
513 [RTA_IIF
] = { .type
= NLA_U32
},
514 [RTA_OIF
] = { .type
= NLA_U32
},
515 [RTA_GATEWAY
] = { .type
= NLA_U32
},
516 [RTA_PRIORITY
] = { .type
= NLA_U32
},
517 [RTA_PREFSRC
] = { .type
= NLA_U32
},
518 [RTA_METRICS
] = { .type
= NLA_NESTED
},
519 [RTA_MULTIPATH
] = { .len
= sizeof(struct rtnexthop
) },
520 [RTA_FLOW
] = { .type
= NLA_U32
},
523 static int rtm_to_fib_config(struct net
*net
, struct sk_buff
*skb
,
524 struct nlmsghdr
*nlh
, struct fib_config
*cfg
)
530 err
= nlmsg_validate(nlh
, sizeof(*rtm
), RTA_MAX
, rtm_ipv4_policy
);
534 memset(cfg
, 0, sizeof(*cfg
));
536 rtm
= nlmsg_data(nlh
);
537 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
538 cfg
->fc_tos
= rtm
->rtm_tos
;
539 cfg
->fc_table
= rtm
->rtm_table
;
540 cfg
->fc_protocol
= rtm
->rtm_protocol
;
541 cfg
->fc_scope
= rtm
->rtm_scope
;
542 cfg
->fc_type
= rtm
->rtm_type
;
543 cfg
->fc_flags
= rtm
->rtm_flags
;
544 cfg
->fc_nlflags
= nlh
->nlmsg_flags
;
546 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
547 cfg
->fc_nlinfo
.nlh
= nlh
;
548 cfg
->fc_nlinfo
.nl_net
= net
;
550 if (cfg
->fc_type
> RTN_MAX
) {
555 nlmsg_for_each_attr(attr
, nlh
, sizeof(struct rtmsg
), remaining
) {
556 switch (nla_type(attr
)) {
558 cfg
->fc_dst
= nla_get_be32(attr
);
561 cfg
->fc_oif
= nla_get_u32(attr
);
564 cfg
->fc_gw
= nla_get_be32(attr
);
567 cfg
->fc_priority
= nla_get_u32(attr
);
570 cfg
->fc_prefsrc
= nla_get_be32(attr
);
573 cfg
->fc_mx
= nla_data(attr
);
574 cfg
->fc_mx_len
= nla_len(attr
);
577 cfg
->fc_mp
= nla_data(attr
);
578 cfg
->fc_mp_len
= nla_len(attr
);
581 cfg
->fc_flow
= nla_get_u32(attr
);
584 cfg
->fc_table
= nla_get_u32(attr
);
594 static int inet_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
596 struct net
*net
= sock_net(skb
->sk
);
597 struct fib_config cfg
;
598 struct fib_table
*tb
;
601 err
= rtm_to_fib_config(net
, skb
, nlh
, &cfg
);
605 tb
= fib_get_table(net
, cfg
.fc_table
);
611 err
= fib_table_delete(tb
, &cfg
);
616 static int inet_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
618 struct net
*net
= sock_net(skb
->sk
);
619 struct fib_config cfg
;
620 struct fib_table
*tb
;
623 err
= rtm_to_fib_config(net
, skb
, nlh
, &cfg
);
627 tb
= fib_new_table(net
, cfg
.fc_table
);
633 err
= fib_table_insert(tb
, &cfg
);
638 static int inet_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
640 struct net
*net
= sock_net(skb
->sk
);
642 unsigned int e
= 0, s_e
;
643 struct fib_table
*tb
;
644 struct hlist_node
*node
;
645 struct hlist_head
*head
;
648 if (nlmsg_len(cb
->nlh
) >= sizeof(struct rtmsg
) &&
649 ((struct rtmsg
*) nlmsg_data(cb
->nlh
))->rtm_flags
& RTM_F_CLONED
)
650 return ip_rt_dump(skb
, cb
);
655 for (h
= s_h
; h
< FIB_TABLE_HASHSZ
; h
++, s_e
= 0) {
657 head
= &net
->ipv4
.fib_table_hash
[h
];
658 hlist_for_each_entry(tb
, node
, head
, tb_hlist
) {
662 memset(&cb
->args
[2], 0, sizeof(cb
->args
) -
663 2 * sizeof(cb
->args
[0]));
664 if (fib_table_dump(tb
, skb
, cb
) < 0)
678 /* Prepare and feed intra-kernel routing request.
679 Really, it should be netlink message, but :-( netlink
680 can be not configured, so that we feed it directly
681 to fib engine. It is legal, because all events occur
682 only when netlink is already locked.
685 static void fib_magic(int cmd
, int type
, __be32 dst
, int dst_len
, struct in_ifaddr
*ifa
)
687 struct net
*net
= dev_net(ifa
->ifa_dev
->dev
);
688 struct fib_table
*tb
;
689 struct fib_config cfg
= {
690 .fc_protocol
= RTPROT_KERNEL
,
693 .fc_dst_len
= dst_len
,
694 .fc_prefsrc
= ifa
->ifa_local
,
695 .fc_oif
= ifa
->ifa_dev
->dev
->ifindex
,
696 .fc_nlflags
= NLM_F_CREATE
| NLM_F_APPEND
,
702 if (type
== RTN_UNICAST
)
703 tb
= fib_new_table(net
, RT_TABLE_MAIN
);
705 tb
= fib_new_table(net
, RT_TABLE_LOCAL
);
710 cfg
.fc_table
= tb
->tb_id
;
712 if (type
!= RTN_LOCAL
)
713 cfg
.fc_scope
= RT_SCOPE_LINK
;
715 cfg
.fc_scope
= RT_SCOPE_HOST
;
717 if (cmd
== RTM_NEWROUTE
)
718 fib_table_insert(tb
, &cfg
);
720 fib_table_delete(tb
, &cfg
);
723 void fib_add_ifaddr(struct in_ifaddr
*ifa
)
725 struct in_device
*in_dev
= ifa
->ifa_dev
;
726 struct net_device
*dev
= in_dev
->dev
;
727 struct in_ifaddr
*prim
= ifa
;
728 __be32 mask
= ifa
->ifa_mask
;
729 __be32 addr
= ifa
->ifa_local
;
730 __be32 prefix
= ifa
->ifa_address
&mask
;
732 if (ifa
->ifa_flags
&IFA_F_SECONDARY
) {
733 prim
= inet_ifa_byprefix(in_dev
, prefix
, mask
);
735 printk(KERN_WARNING
"fib_add_ifaddr: bug: prim == NULL\n");
740 fib_magic(RTM_NEWROUTE
, RTN_LOCAL
, addr
, 32, prim
);
742 if (!(dev
->flags
&IFF_UP
))
745 /* Add broadcast address, if it is explicitly assigned. */
746 if (ifa
->ifa_broadcast
&& ifa
->ifa_broadcast
!= htonl(0xFFFFFFFF))
747 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, ifa
->ifa_broadcast
, 32, prim
);
749 if (!ipv4_is_zeronet(prefix
) && !(ifa
->ifa_flags
&IFA_F_SECONDARY
) &&
750 (prefix
!= addr
|| ifa
->ifa_prefixlen
< 32)) {
751 fib_magic(RTM_NEWROUTE
, dev
->flags
&IFF_LOOPBACK
? RTN_LOCAL
:
752 RTN_UNICAST
, prefix
, ifa
->ifa_prefixlen
, prim
);
754 /* Add network specific broadcasts, when it takes a sense */
755 if (ifa
->ifa_prefixlen
< 31) {
756 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, prefix
, 32, prim
);
757 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, prefix
|~mask
, 32, prim
);
762 static void fib_del_ifaddr(struct in_ifaddr
*ifa
)
764 struct in_device
*in_dev
= ifa
->ifa_dev
;
765 struct net_device
*dev
= in_dev
->dev
;
766 struct in_ifaddr
*ifa1
;
767 struct in_ifaddr
*prim
= ifa
;
768 __be32 brd
= ifa
->ifa_address
|~ifa
->ifa_mask
;
769 __be32 any
= ifa
->ifa_address
&ifa
->ifa_mask
;
776 if (!(ifa
->ifa_flags
&IFA_F_SECONDARY
))
777 fib_magic(RTM_DELROUTE
, dev
->flags
&IFF_LOOPBACK
? RTN_LOCAL
:
778 RTN_UNICAST
, any
, ifa
->ifa_prefixlen
, prim
);
780 prim
= inet_ifa_byprefix(in_dev
, any
, ifa
->ifa_mask
);
782 printk(KERN_WARNING
"fib_del_ifaddr: bug: prim == NULL\n");
787 /* Deletion is more complicated than add.
788 We should take care of not to delete too much :-)
790 Scan address list to be sure that addresses are really gone.
793 for (ifa1
= in_dev
->ifa_list
; ifa1
; ifa1
= ifa1
->ifa_next
) {
794 if (ifa
->ifa_local
== ifa1
->ifa_local
)
796 if (ifa
->ifa_broadcast
== ifa1
->ifa_broadcast
)
798 if (brd
== ifa1
->ifa_broadcast
)
800 if (any
== ifa1
->ifa_broadcast
)
805 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, ifa
->ifa_broadcast
, 32, prim
);
807 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, brd
, 32, prim
);
809 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, any
, 32, prim
);
810 if (!(ok
&LOCAL_OK
)) {
811 fib_magic(RTM_DELROUTE
, RTN_LOCAL
, ifa
->ifa_local
, 32, prim
);
813 /* Check, that this local address finally disappeared. */
814 if (inet_addr_type(dev_net(dev
), ifa
->ifa_local
) != RTN_LOCAL
) {
815 /* And the last, but not the least thing.
816 We must flush stray FIB entries.
818 First of all, we scan fib_info list searching
819 for stray nexthop entries, then ignite fib_flush.
821 if (fib_sync_down_addr(dev_net(dev
), ifa
->ifa_local
))
822 fib_flush(dev_net(dev
));
831 static void nl_fib_lookup(struct fib_result_nl
*frn
, struct fib_table
*tb
)
834 struct fib_result res
;
835 struct flowi fl
= { .mark
= frn
->fl_mark
,
836 .nl_u
= { .ip4_u
= { .daddr
= frn
->fl_addr
,
838 .scope
= frn
->fl_scope
} } };
840 #ifdef CONFIG_IP_MULTIPLE_TABLES
848 frn
->tb_id
= tb
->tb_id
;
849 frn
->err
= fib_table_lookup(tb
, &fl
, &res
);
852 frn
->prefixlen
= res
.prefixlen
;
853 frn
->nh_sel
= res
.nh_sel
;
854 frn
->type
= res
.type
;
855 frn
->scope
= res
.scope
;
862 static void nl_fib_input(struct sk_buff
*skb
)
865 struct fib_result_nl
*frn
;
866 struct nlmsghdr
*nlh
;
867 struct fib_table
*tb
;
870 net
= sock_net(skb
->sk
);
871 nlh
= nlmsg_hdr(skb
);
872 if (skb
->len
< NLMSG_SPACE(0) || skb
->len
< nlh
->nlmsg_len
||
873 nlh
->nlmsg_len
< NLMSG_LENGTH(sizeof(*frn
)))
876 skb
= skb_clone(skb
, GFP_KERNEL
);
879 nlh
= nlmsg_hdr(skb
);
881 frn
= (struct fib_result_nl
*) NLMSG_DATA(nlh
);
882 tb
= fib_get_table(net
, frn
->tb_id_in
);
884 nl_fib_lookup(frn
, tb
);
886 pid
= NETLINK_CB(skb
).pid
; /* pid of sending process */
887 NETLINK_CB(skb
).pid
= 0; /* from kernel */
888 NETLINK_CB(skb
).dst_group
= 0; /* unicast */
889 netlink_unicast(net
->ipv4
.fibnl
, skb
, pid
, MSG_DONTWAIT
);
892 static int __net_init
nl_fib_lookup_init(struct net
*net
)
895 sk
= netlink_kernel_create(net
, NETLINK_FIB_LOOKUP
, 0,
896 nl_fib_input
, NULL
, THIS_MODULE
);
898 return -EAFNOSUPPORT
;
899 net
->ipv4
.fibnl
= sk
;
903 static void nl_fib_lookup_exit(struct net
*net
)
905 netlink_kernel_release(net
->ipv4
.fibnl
);
906 net
->ipv4
.fibnl
= NULL
;
909 static void fib_disable_ip(struct net_device
*dev
, int force
, int delay
)
911 if (fib_sync_down_dev(dev
, force
))
912 fib_flush(dev_net(dev
));
913 rt_cache_flush(dev_net(dev
), delay
);
917 static int fib_inetaddr_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
919 struct in_ifaddr
*ifa
= (struct in_ifaddr
*)ptr
;
920 struct net_device
*dev
= ifa
->ifa_dev
->dev
;
925 #ifdef CONFIG_IP_ROUTE_MULTIPATH
928 rt_cache_flush(dev_net(dev
), -1);
932 if (ifa
->ifa_dev
->ifa_list
== NULL
) {
933 /* Last address was deleted from this interface.
936 fib_disable_ip(dev
, 1, 0);
938 rt_cache_flush(dev_net(dev
), -1);
945 static int fib_netdev_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
947 struct net_device
*dev
= ptr
;
948 struct in_device
*in_dev
= __in_dev_get_rtnl(dev
);
950 if (event
== NETDEV_UNREGISTER
) {
951 fib_disable_ip(dev
, 2, -1);
962 } endfor_ifa(in_dev
);
963 #ifdef CONFIG_IP_ROUTE_MULTIPATH
966 rt_cache_flush(dev_net(dev
), -1);
969 fib_disable_ip(dev
, 0, 0);
971 case NETDEV_CHANGEMTU
:
973 rt_cache_flush(dev_net(dev
), 0);
975 case NETDEV_UNREGISTER_BATCH
:
976 rt_cache_flush_batch();
982 static struct notifier_block fib_inetaddr_notifier
= {
983 .notifier_call
= fib_inetaddr_event
,
986 static struct notifier_block fib_netdev_notifier
= {
987 .notifier_call
= fib_netdev_event
,
990 static int __net_init
ip_fib_net_init(struct net
*net
)
995 net
->ipv4
.fib_table_hash
= kzalloc(
996 sizeof(struct hlist_head
)*FIB_TABLE_HASHSZ
, GFP_KERNEL
);
997 if (net
->ipv4
.fib_table_hash
== NULL
)
1000 for (i
= 0; i
< FIB_TABLE_HASHSZ
; i
++)
1001 INIT_HLIST_HEAD(&net
->ipv4
.fib_table_hash
[i
]);
1003 err
= fib4_rules_init(net
);
1009 kfree(net
->ipv4
.fib_table_hash
);
1013 static void ip_fib_net_exit(struct net
*net
)
1017 #ifdef CONFIG_IP_MULTIPLE_TABLES
1018 fib4_rules_exit(net
);
1021 for (i
= 0; i
< FIB_TABLE_HASHSZ
; i
++) {
1022 struct fib_table
*tb
;
1023 struct hlist_head
*head
;
1024 struct hlist_node
*node
, *tmp
;
1026 head
= &net
->ipv4
.fib_table_hash
[i
];
1027 hlist_for_each_entry_safe(tb
, node
, tmp
, head
, tb_hlist
) {
1029 fib_table_flush(tb
);
1033 kfree(net
->ipv4
.fib_table_hash
);
1036 static int __net_init
fib_net_init(struct net
*net
)
1040 error
= ip_fib_net_init(net
);
1043 error
= nl_fib_lookup_init(net
);
1046 error
= fib_proc_init(net
);
1053 nl_fib_lookup_exit(net
);
1055 ip_fib_net_exit(net
);
1059 static void __net_exit
fib_net_exit(struct net
*net
)
1062 nl_fib_lookup_exit(net
);
1063 ip_fib_net_exit(net
);
1066 static struct pernet_operations fib_net_ops
= {
1067 .init
= fib_net_init
,
1068 .exit
= fib_net_exit
,
1071 void __init
ip_fib_init(void)
1073 rtnl_register(PF_INET
, RTM_NEWROUTE
, inet_rtm_newroute
, NULL
);
1074 rtnl_register(PF_INET
, RTM_DELROUTE
, inet_rtm_delroute
, NULL
);
1075 rtnl_register(PF_INET
, RTM_GETROUTE
, NULL
, inet_dump_fib
);
1077 register_pernet_subsys(&fib_net_ops
);
1078 register_netdevice_notifier(&fib_netdev_notifier
);
1079 register_inetaddr_notifier(&fib_inetaddr_notifier
);