2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: FIB frontend.
8 * Version: $Id: fib_frontend.c,v 1.21 1999/12/15 22:39:07 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
46 #include <net/ip_fib.h>
48 #define FFprint(a...) printk(KERN_DEBUG a)
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
52 #define RT_TABLE_MIN RT_TABLE_MAIN
54 struct fib_table
*local_table
;
55 struct fib_table
*main_table
;
59 #define RT_TABLE_MIN 1
61 struct fib_table
*fib_tables
[RT_TABLE_MAX
+1];
63 struct fib_table
*__fib_new_table(int id
)
67 tb
= fib_hash_init(id
);
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
85 for (id
= RT_TABLE_MAX
; id
>0; id
--) {
86 if ((tb
= fib_get_table(id
))==NULL
)
88 flushed
+= tb
->tb_flush(tb
);
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed
+= main_table
->tb_flush(main_table
);
92 flushed
+= local_table
->tb_flush(local_table
);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
100 #ifdef CONFIG_PROC_FS
103 * Called from the PROCfs module. This outputs /proc/net/route.
105 * It always works in backward compatibility mode.
106 * The format of the file is not supposed to be changed.
110 fib_get_procinfo(char *buffer
, char **start
, off_t offset
, int length
)
112 int first
= offset
/128;
114 int count
= (length
+127)/128;
117 *start
= buffer
+ offset
%128;
120 sprintf(buffer
, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
126 if (main_table
&& count
> 0) {
127 int n
= main_table
->tb_get_info(main_table
, ptr
, first
, count
);
139 #endif /* CONFIG_PROC_FS */
142 * Find the first device with a given source address.
145 struct net_device
* ip_dev_find(u32 addr
)
148 struct fib_result res
;
149 struct net_device
*dev
= NULL
;
151 memset(&key
, 0, sizeof(key
));
153 #ifdef CONFIG_IP_MULTIPLE_TABLES
157 if (!local_table
|| local_table
->tb_lookup(local_table
, &key
, &res
)) {
160 if (res
.type
!= RTN_LOCAL
)
162 dev
= FIB_RES_DEV(res
);
164 atomic_inc(&dev
->refcnt
);
171 unsigned inet_addr_type(u32 addr
)
174 struct fib_result res
;
175 unsigned ret
= RTN_BROADCAST
;
177 if (ZERONET(addr
) || BADCLASS(addr
))
178 return RTN_BROADCAST
;
180 return RTN_MULTICAST
;
182 memset(&key
, 0, sizeof(key
));
184 #ifdef CONFIG_IP_MULTIPLE_TABLES
190 if (local_table
->tb_lookup(local_table
, &key
, &res
) == 0) {
198 /* Given (packet source, input interface) and optional (dst, oif, tos):
199 - (main) check, that source is valid i.e. not broadcast or our local
201 - figure out what "logical" interface this packet arrived
202 and calculate "specific destination" address.
203 - check, that packet arrived from expected physical interface.
206 int fib_validate_source(u32 src
, u32 dst
, u8 tos
, int oif
,
207 struct net_device
*dev
, u32
*spec_dst
, u32
*itag
)
209 struct in_device
*in_dev
;
211 struct fib_result res
;
220 key
.scope
= RT_SCOPE_UNIVERSE
;
223 read_lock(&inetdev_lock
);
224 in_dev
= __in_dev_get(dev
);
226 no_addr
= in_dev
->ifa_list
== NULL
;
227 rpf
= IN_DEV_RPFILTER(in_dev
);
229 read_unlock(&inetdev_lock
);
234 if (fib_lookup(&key
, &res
))
236 if (res
.type
!= RTN_UNICAST
)
238 *spec_dst
= FIB_RES_PREFSRC(res
);
240 fib_combine_itag(itag
, &res
);
241 #ifdef CONFIG_IP_ROUTE_MULTIPATH
242 if (FIB_RES_DEV(res
) == dev
|| res
.fi
->fib_nhs
> 1)
244 if (FIB_RES_DEV(res
) == dev
)
247 ret
= FIB_RES_NH(res
).nh_scope
>= RT_SCOPE_HOST
;
256 key
.oif
= dev
->ifindex
;
259 if (fib_lookup(&key
, &res
) == 0) {
260 if (res
.type
== RTN_UNICAST
) {
261 *spec_dst
= FIB_RES_PREFSRC(res
);
262 ret
= FIB_RES_NH(res
).nh_scope
>= RT_SCOPE_HOST
;
271 *spec_dst
= inet_select_addr(dev
, 0, RT_SCOPE_UNIVERSE
);
281 #ifndef CONFIG_IP_NOSIOCRT
284 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
287 int ip_rt_ioctl(unsigned int cmd
, void *arg
)
298 case SIOCADDRT
: /* Add a route */
299 case SIOCDELRT
: /* Delete a route */
300 if (!capable(CAP_NET_ADMIN
))
302 if (copy_from_user(&r
, arg
, sizeof(struct rtentry
)))
305 err
= fib_convert_rtentry(cmd
, &req
.nlh
, &req
.rtm
, &rta
, &r
);
307 if (cmd
== SIOCDELRT
) {
308 struct fib_table
*tb
= fib_get_table(req
.rtm
.rtm_table
);
311 err
= tb
->tb_delete(tb
, &req
.rtm
, &rta
, &req
.nlh
, NULL
);
313 struct fib_table
*tb
= fib_new_table(req
.rtm
.rtm_table
);
316 err
= tb
->tb_insert(tb
, &req
.rtm
, &rta
, &req
.nlh
, NULL
);
329 int ip_rt_ioctl(unsigned int cmd
, void *arg
)
336 #ifdef CONFIG_RTNETLINK
338 static int inet_check_attr(struct rtmsg
*r
, struct rtattr
**rta
)
342 for (i
=1; i
<=RTA_MAX
; i
++) {
343 struct rtattr
*attr
= rta
[i
-1];
345 if (RTA_PAYLOAD(attr
) < 4)
347 if (i
!= RTA_MULTIPATH
&& i
!= RTA_METRICS
)
348 rta
[i
-1] = (struct rtattr
*)RTA_DATA(attr
);
354 int inet_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
356 struct fib_table
* tb
;
357 struct rtattr
**rta
= arg
;
358 struct rtmsg
*r
= NLMSG_DATA(nlh
);
360 if (inet_check_attr(r
, rta
))
363 tb
= fib_get_table(r
->rtm_table
);
365 return tb
->tb_delete(tb
, r
, (struct kern_rta
*)rta
, nlh
, &NETLINK_CB(skb
));
369 int inet_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
371 struct fib_table
* tb
;
372 struct rtattr
**rta
= arg
;
373 struct rtmsg
*r
= NLMSG_DATA(nlh
);
375 if (inet_check_attr(r
, rta
))
378 tb
= fib_new_table(r
->rtm_table
);
380 return tb
->tb_insert(tb
, r
, (struct kern_rta
*)rta
, nlh
, &NETLINK_CB(skb
));
384 int inet_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
388 struct fib_table
*tb
;
390 if (NLMSG_PAYLOAD(cb
->nlh
, 0) >= sizeof(struct rtmsg
) &&
391 ((struct rtmsg
*)NLMSG_DATA(cb
->nlh
))->rtm_flags
&RTM_F_CLONED
)
392 return ip_rt_dump(skb
, cb
);
396 s_t
= cb
->args
[0] = RT_TABLE_MIN
;
398 for (t
=s_t
; t
<=RT_TABLE_MAX
; t
++) {
399 if (t
< s_t
) continue;
401 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
402 if ((tb
= fib_get_table(t
))==NULL
)
404 if (tb
->tb_dump(tb
, skb
, cb
) < 0)
415 /* Prepare and feed intra-kernel routing request.
416 Really, it should be netlink message, but :-( netlink
417 can be not configured, so that we feed it directly
418 to fib engine. It is legal, because all events occur
419 only when netlink is already locked.
422 static void fib_magic(int cmd
, int type
, u32 dst
, int dst_len
, struct in_ifaddr
*ifa
)
424 struct fib_table
* tb
;
431 memset(&req
.rtm
, 0, sizeof(req
.rtm
));
432 memset(&rta
, 0, sizeof(rta
));
434 if (type
== RTN_UNICAST
)
435 tb
= fib_new_table(RT_TABLE_MAIN
);
437 tb
= fib_new_table(RT_TABLE_LOCAL
);
442 req
.nlh
.nlmsg_len
= sizeof(req
);
443 req
.nlh
.nlmsg_type
= cmd
;
444 req
.nlh
.nlmsg_flags
= NLM_F_REQUEST
|NLM_F_CREATE
|NLM_F_APPEND
;
445 req
.nlh
.nlmsg_pid
= 0;
446 req
.nlh
.nlmsg_seq
= 0;
448 req
.rtm
.rtm_dst_len
= dst_len
;
449 req
.rtm
.rtm_table
= tb
->tb_id
;
450 req
.rtm
.rtm_protocol
= RTPROT_KERNEL
;
451 req
.rtm
.rtm_scope
= (type
!= RTN_LOCAL
? RT_SCOPE_LINK
: RT_SCOPE_HOST
);
452 req
.rtm
.rtm_type
= type
;
455 rta
.rta_prefsrc
= &ifa
->ifa_local
;
456 rta
.rta_oif
= &ifa
->ifa_dev
->dev
->ifindex
;
458 if (cmd
== RTM_NEWROUTE
)
459 tb
->tb_insert(tb
, &req
.rtm
, &rta
, &req
.nlh
, NULL
);
461 tb
->tb_delete(tb
, &req
.rtm
, &rta
, &req
.nlh
, NULL
);
464 static void fib_add_ifaddr(struct in_ifaddr
*ifa
)
466 struct in_device
*in_dev
= ifa
->ifa_dev
;
467 struct net_device
*dev
= in_dev
->dev
;
468 struct in_ifaddr
*prim
= ifa
;
469 u32 mask
= ifa
->ifa_mask
;
470 u32 addr
= ifa
->ifa_local
;
471 u32 prefix
= ifa
->ifa_address
&mask
;
473 if (ifa
->ifa_flags
&IFA_F_SECONDARY
) {
474 prim
= inet_ifa_byprefix(in_dev
, prefix
, mask
);
476 printk(KERN_DEBUG
"fib_add_ifaddr: bug: prim == NULL\n");
481 fib_magic(RTM_NEWROUTE
, RTN_LOCAL
, addr
, 32, prim
);
483 if (!(dev
->flags
&IFF_UP
))
486 /* Add broadcast address, if it is explicitly assigned. */
487 if (ifa
->ifa_broadcast
&& ifa
->ifa_broadcast
!= 0xFFFFFFFF)
488 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, ifa
->ifa_broadcast
, 32, prim
);
490 if (!ZERONET(prefix
) && !(ifa
->ifa_flags
&IFA_F_SECONDARY
) &&
491 (prefix
!= addr
|| ifa
->ifa_prefixlen
< 32)) {
492 fib_magic(RTM_NEWROUTE
, dev
->flags
&IFF_LOOPBACK
? RTN_LOCAL
:
493 RTN_UNICAST
, prefix
, ifa
->ifa_prefixlen
, prim
);
495 /* Add network specific broadcasts, when it takes a sense */
496 if (ifa
->ifa_prefixlen
< 31) {
497 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, prefix
, 32, prim
);
498 fib_magic(RTM_NEWROUTE
, RTN_BROADCAST
, prefix
|~mask
, 32, prim
);
503 static void fib_del_ifaddr(struct in_ifaddr
*ifa
)
505 struct in_device
*in_dev
= ifa
->ifa_dev
;
506 struct net_device
*dev
= in_dev
->dev
;
507 struct in_ifaddr
*ifa1
;
508 struct in_ifaddr
*prim
= ifa
;
509 u32 brd
= ifa
->ifa_address
|~ifa
->ifa_mask
;
510 u32 any
= ifa
->ifa_address
&ifa
->ifa_mask
;
517 if (!(ifa
->ifa_flags
&IFA_F_SECONDARY
))
518 fib_magic(RTM_DELROUTE
, dev
->flags
&IFF_LOOPBACK
? RTN_LOCAL
:
519 RTN_UNICAST
, any
, ifa
->ifa_prefixlen
, prim
);
521 prim
= inet_ifa_byprefix(in_dev
, any
, ifa
->ifa_mask
);
523 printk(KERN_DEBUG
"fib_del_ifaddr: bug: prim == NULL\n");
528 /* Deletion is more complicated than add.
529 We should take care of not to delete too much :-)
531 Scan address list to be sure that addresses are really gone.
534 for (ifa1
= in_dev
->ifa_list
; ifa1
; ifa1
= ifa1
->ifa_next
) {
535 if (ifa
->ifa_local
== ifa1
->ifa_local
)
537 if (ifa
->ifa_broadcast
== ifa1
->ifa_broadcast
)
539 if (brd
== ifa1
->ifa_broadcast
)
541 if (any
== ifa1
->ifa_broadcast
)
546 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, ifa
->ifa_broadcast
, 32, prim
);
548 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, brd
, 32, prim
);
550 fib_magic(RTM_DELROUTE
, RTN_BROADCAST
, any
, 32, prim
);
551 if (!(ok
&LOCAL_OK
)) {
552 fib_magic(RTM_DELROUTE
, RTN_LOCAL
, ifa
->ifa_local
, 32, prim
);
554 /* Check, that this local address finally disappeared. */
555 if (inet_addr_type(ifa
->ifa_local
) != RTN_LOCAL
) {
556 /* And the last, but not the least thing.
557 We must flush stray FIB entries.
559 First of all, we scan fib_info list searching
560 for stray nexthop entries, then ignite fib_flush.
562 if (fib_sync_down(ifa
->ifa_local
, NULL
, 0))
572 static void fib_disable_ip(struct net_device
*dev
, int force
)
574 if (fib_sync_down(0, dev
, force
))
580 static int fib_inetaddr_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
582 struct in_ifaddr
*ifa
= (struct in_ifaddr
*)ptr
;
590 if (ifa
->ifa_dev
&& ifa
->ifa_dev
->ifa_list
== NULL
) {
591 /* Last address was deleted from this interface.
594 fib_disable_ip(ifa
->ifa_dev
->dev
, 1);
604 static int fib_netdev_event(struct notifier_block
*this, unsigned long event
, void *ptr
)
606 struct net_device
*dev
= ptr
;
607 struct in_device
*in_dev
= __in_dev_get(dev
);
616 } endfor_ifa(in_dev
);
617 #ifdef CONFIG_IP_ROUTE_MULTIPATH
623 fib_disable_ip(dev
, 0);
625 case NETDEV_UNREGISTER
:
626 fib_disable_ip(dev
, 1);
628 case NETDEV_CHANGEMTU
:
636 struct notifier_block fib_inetaddr_notifier
= {
642 struct notifier_block fib_netdev_notifier
= {
648 void __init
ip_fib_init(void)
650 #ifdef CONFIG_PROC_FS
651 proc_net_create("route",0,fib_get_procinfo
);
652 #endif /* CONFIG_PROC_FS */
654 #ifndef CONFIG_IP_MULTIPLE_TABLES
655 local_table
= fib_hash_init(RT_TABLE_LOCAL
);
656 main_table
= fib_hash_init(RT_TABLE_MAIN
);
661 register_netdevice_notifier(&fib_netdev_notifier
);
662 register_inetaddr_notifier(&fib_inetaddr_notifier
);