2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.17 2000/08/19 23:22:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
46 #define FSprintk(a...)
48 static struct fib_info
*fib_info_list
;
49 static rwlock_t fib_info_lock
= RW_LOCK_UNLOCKED
;
52 #define for_fib_info() { struct fib_info *fi; \
53 for (fi = fib_info_list; fi; fi = fi->fib_next)
55 #define endfor_fib_info() }
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
59 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
60 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
62 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
63 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
65 #else /* CONFIG_IP_ROUTE_MULTIPATH */
67 /* Hope, that gcc will optimize it to get rid of dummy loop */
69 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
70 for (nhsel=0; nhsel < 1; nhsel++)
72 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
73 for (nhsel=0; nhsel < 1; nhsel++)
75 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
77 #define endfor_nexthops(fi) }
84 } fib_props
[RTA_MAX
+1] = {
85 { 0, RT_SCOPE_NOWHERE
}, /* RTN_UNSPEC */
86 { 0, RT_SCOPE_UNIVERSE
}, /* RTN_UNICAST */
87 { 0, RT_SCOPE_HOST
}, /* RTN_LOCAL */
88 { 0, RT_SCOPE_LINK
}, /* RTN_BROADCAST */
89 { 0, RT_SCOPE_LINK
}, /* RTN_ANYCAST */
90 { 0, RT_SCOPE_UNIVERSE
}, /* RTN_MULTICAST */
91 { -EINVAL
, RT_SCOPE_UNIVERSE
}, /* RTN_BLACKHOLE */
92 { -EHOSTUNREACH
, RT_SCOPE_UNIVERSE
},/* RTN_UNREACHABLE */
93 { -EACCES
, RT_SCOPE_UNIVERSE
}, /* RTN_PROHIBIT */
94 { -EAGAIN
, RT_SCOPE_UNIVERSE
}, /* RTN_THROW */
95 #ifdef CONFIG_IP_ROUTE_NAT
96 { 0, RT_SCOPE_HOST
}, /* RTN_NAT */
98 { -EINVAL
, RT_SCOPE_NOWHERE
}, /* RTN_NAT */
100 { -EINVAL
, RT_SCOPE_NOWHERE
} /* RTN_XRESOLVE */
104 /* Release a nexthop info record */
106 void free_fib_info(struct fib_info
*fi
)
108 if (fi
->fib_dead
== 0) {
109 printk("Freeing alive fib_info %p\n", fi
);
112 change_nexthops(fi
) {
116 } endfor_nexthops(fi
);
121 void fib_release_info(struct fib_info
*fi
)
123 write_lock(&fib_info_lock
);
124 if (fi
&& --fi
->fib_treeref
== 0) {
126 fi
->fib_next
->fib_prev
= fi
->fib_prev
;
128 fi
->fib_prev
->fib_next
= fi
->fib_next
;
129 if (fi
== fib_info_list
)
130 fib_info_list
= fi
->fib_next
;
134 write_unlock(&fib_info_lock
);
137 extern __inline__
int nh_comp(const struct fib_info
*fi
, const struct fib_info
*ofi
)
139 const struct fib_nh
*onh
= ofi
->fib_nh
;
142 if (nh
->nh_oif
!= onh
->nh_oif
||
143 nh
->nh_gw
!= onh
->nh_gw
||
144 nh
->nh_scope
!= onh
->nh_scope
||
145 #ifdef CONFIG_IP_ROUTE_MULTIPATH
146 nh
->nh_weight
!= onh
->nh_weight
||
148 #ifdef CONFIG_NET_CLS_ROUTE
149 nh
->nh_tclassid
!= onh
->nh_tclassid
||
151 ((nh
->nh_flags
^onh
->nh_flags
)&~RTNH_F_DEAD
))
154 } endfor_nexthops(fi
);
158 extern __inline__
struct fib_info
* fib_find_info(const struct fib_info
*nfi
)
161 if (fi
->fib_nhs
!= nfi
->fib_nhs
)
163 if (nfi
->fib_protocol
== fi
->fib_protocol
&&
164 nfi
->fib_prefsrc
== fi
->fib_prefsrc
&&
165 nfi
->fib_priority
== fi
->fib_priority
&&
166 memcmp(nfi
->fib_metrics
, fi
->fib_metrics
, sizeof(fi
->fib_metrics
)) == 0 &&
167 ((nfi
->fib_flags
^fi
->fib_flags
)&~RTNH_F_DEAD
) == 0 &&
168 (nfi
->fib_nhs
== 0 || nh_comp(fi
, nfi
) == 0))
174 /* Check, that the gateway is already configured.
175 Used only by redirect accept routine.
178 int ip_fib_check_default(u32 gw
, struct net_device
*dev
)
180 read_lock(&fib_info_lock
);
182 if (fi
->fib_flags
& RTNH_F_DEAD
)
185 if (nh
->nh_dev
== dev
&& nh
->nh_gw
== gw
&&
186 !(nh
->nh_flags
&RTNH_F_DEAD
)) {
187 read_unlock(&fib_info_lock
);
190 } endfor_nexthops(fi
);
192 read_unlock(&fib_info_lock
);
196 #ifdef CONFIG_IP_ROUTE_MULTIPATH
198 static u32
fib_get_attr32(struct rtattr
*attr
, int attrlen
, int type
)
200 while (RTA_OK(attr
,attrlen
)) {
201 if (attr
->rta_type
== type
)
202 return *(u32
*)RTA_DATA(attr
);
203 attr
= RTA_NEXT(attr
, attrlen
);
209 fib_count_nexthops(struct rtattr
*rta
)
212 struct rtnexthop
*nhp
= RTA_DATA(rta
);
213 int nhlen
= RTA_PAYLOAD(rta
);
215 while (nhlen
>= (int)sizeof(struct rtnexthop
)) {
216 if ((nhlen
-= nhp
->rtnh_len
) < 0)
219 nhp
= RTNH_NEXT(nhp
);
225 fib_get_nhs(struct fib_info
*fi
, const struct rtattr
*rta
, const struct rtmsg
*r
)
227 struct rtnexthop
*nhp
= RTA_DATA(rta
);
228 int nhlen
= RTA_PAYLOAD(rta
);
230 change_nexthops(fi
) {
231 int attrlen
= nhlen
- sizeof(struct rtnexthop
);
232 if (attrlen
< 0 || (nhlen
-= nhp
->rtnh_len
) < 0)
234 nh
->nh_flags
= (r
->rtm_flags
&~0xFF) | nhp
->rtnh_flags
;
235 nh
->nh_oif
= nhp
->rtnh_ifindex
;
236 nh
->nh_weight
= nhp
->rtnh_hops
+ 1;
238 nh
->nh_gw
= fib_get_attr32(RTNH_DATA(nhp
), attrlen
, RTA_GATEWAY
);
239 #ifdef CONFIG_NET_CLS_ROUTE
240 nh
->nh_tclassid
= fib_get_attr32(RTNH_DATA(nhp
), attrlen
, RTA_FLOW
);
243 nhp
= RTNH_NEXT(nhp
);
244 } endfor_nexthops(fi
);
250 int fib_nh_match(struct rtmsg
*r
, struct nlmsghdr
*nlh
, struct kern_rta
*rta
,
253 #ifdef CONFIG_IP_ROUTE_MULTIPATH
254 struct rtnexthop
*nhp
;
258 if (rta
->rta_priority
&&
259 *rta
->rta_priority
!= fi
->fib_priority
)
262 if (rta
->rta_oif
|| rta
->rta_gw
) {
263 if ((!rta
->rta_oif
|| *rta
->rta_oif
== fi
->fib_nh
->nh_oif
) &&
264 (!rta
->rta_gw
|| memcmp(rta
->rta_gw
, &fi
->fib_nh
->nh_gw
, 4) == 0))
269 #ifdef CONFIG_IP_ROUTE_MULTIPATH
270 if (rta
->rta_mp
== NULL
)
272 nhp
= RTA_DATA(rta
->rta_mp
);
273 nhlen
= RTA_PAYLOAD(rta
->rta_mp
);
276 int attrlen
= nhlen
- sizeof(struct rtnexthop
);
279 if (attrlen
< 0 || (nhlen
-= nhp
->rtnh_len
) < 0)
281 if (nhp
->rtnh_ifindex
&& nhp
->rtnh_ifindex
!= nh
->nh_oif
)
284 gw
= fib_get_attr32(RTNH_DATA(nhp
), attrlen
, RTA_GATEWAY
);
285 if (gw
&& gw
!= nh
->nh_gw
)
287 #ifdef CONFIG_NET_CLS_ROUTE
288 gw
= fib_get_attr32(RTNH_DATA(nhp
), attrlen
, RTA_FLOW
);
289 if (gw
&& gw
!= nh
->nh_tclassid
)
293 nhp
= RTNH_NEXT(nhp
);
294 } endfor_nexthops(fi
);
304 Semantics of nexthop is very messy by historical reasons.
305 We have to take into account, that:
306 a) gateway can be actually local interface address,
307 so that gatewayed route is direct.
308 b) gateway must be on-link address, possibly
309 described not by an ifaddr, but also by a direct route.
310 c) If both gateway and interface are specified, they should not
312 d) If we use tunnel routes, gateway could be not on-link.
314 Attempt to reconcile all of these (alas, self-contradictory) conditions
315 results in pretty ugly and hairy code with obscure logic.
317 I choosed to generalized it instead, so that the size
318 of code does not increase practically, but it becomes
320 Every prefix is assigned a "scope" value: "host" is local address,
321 "link" is direct route,
322 [ ... "site" ... "interior" ... ]
323 and "universe" is true gateway route with global meaning.
325 Every prefix refers to a set of "nexthop"s (gw, oif),
326 where gw must have narrower scope. This recursion stops
327 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
328 which means that gw is forced to be on link.
330 Code is still hairy, but now it is apparently logically
331 consistent and very flexible. F.e. as by-product it allows
332 to co-exists in peace independent exterior and interior
335 Normally it looks as following.
337 {universe prefix} -> (gw, oif) [scope link]
339 |-> {link prefix} -> (gw, oif) [scope local]
341 |-> {local prefix} (terminal node)
344 static int fib_check_nh(const struct rtmsg
*r
, struct fib_info
*fi
, struct fib_nh
*nh
)
350 struct fib_result res
;
352 #ifdef CONFIG_IP_ROUTE_PERVASIVE
353 if (nh
->nh_flags
&RTNH_F_PERVASIVE
)
356 if (nh
->nh_flags
&RTNH_F_ONLINK
) {
357 struct net_device
*dev
;
359 if (r
->rtm_scope
>= RT_SCOPE_LINK
)
361 if (inet_addr_type(nh
->nh_gw
) != RTN_UNICAST
)
363 if ((dev
= __dev_get_by_index(nh
->nh_oif
)) == NULL
)
365 if (!(dev
->flags
&IFF_UP
))
368 atomic_inc(&dev
->refcnt
);
369 nh
->nh_scope
= RT_SCOPE_LINK
;
372 memset(&key
, 0, sizeof(key
));
374 key
.oif
= nh
->nh_oif
;
375 key
.scope
= r
->rtm_scope
+ 1;
377 /* It is not necessary, but requires a bit of thinking */
378 if (key
.scope
< RT_SCOPE_LINK
)
379 key
.scope
= RT_SCOPE_LINK
;
381 if ((err
= fib_lookup(&key
, &res
)) != 0)
383 nh
->nh_scope
= res
.scope
;
384 nh
->nh_oif
= FIB_RES_OIF(res
);
385 nh
->nh_dev
= FIB_RES_DEV(res
);
387 atomic_inc(&nh
->nh_dev
->refcnt
);
390 struct in_device
*in_dev
;
392 if (nh
->nh_flags
&(RTNH_F_PERVASIVE
|RTNH_F_ONLINK
))
395 in_dev
= inetdev_by_index(nh
->nh_oif
);
398 if (!(in_dev
->dev
->flags
&IFF_UP
)) {
402 nh
->nh_dev
= in_dev
->dev
;
403 atomic_inc(&nh
->nh_dev
->refcnt
);
404 nh
->nh_scope
= RT_SCOPE_HOST
;
411 fib_create_info(const struct rtmsg
*r
, struct kern_rta
*rta
,
412 const struct nlmsghdr
*nlh
, int *errp
)
415 struct fib_info
*fi
= NULL
;
416 struct fib_info
*ofi
;
417 #ifdef CONFIG_IP_ROUTE_MULTIPATH
423 /* Fast check to catch the most weird cases */
424 if (fib_props
[r
->rtm_type
].scope
> r
->rtm_scope
)
427 #ifdef CONFIG_IP_ROUTE_MULTIPATH
429 nhs
= fib_count_nexthops(rta
->rta_mp
);
435 fi
= kmalloc(sizeof(*fi
)+nhs
*sizeof(struct fib_nh
), GFP_KERNEL
);
440 memset(fi
, 0, sizeof(*fi
)+nhs
*sizeof(struct fib_nh
));
442 fi
->fib_protocol
= r
->rtm_protocol
;
444 fi
->fib_flags
= r
->rtm_flags
;
445 if (rta
->rta_priority
)
446 fi
->fib_priority
= *rta
->rta_priority
;
448 int attrlen
= RTA_PAYLOAD(rta
->rta_mx
);
449 struct rtattr
*attr
= RTA_DATA(rta
->rta_mx
);
451 while (RTA_OK(attr
, attrlen
)) {
452 unsigned flavor
= attr
->rta_type
;
454 if (flavor
> RTAX_MAX
)
456 fi
->fib_metrics
[flavor
-1] = *(unsigned*)RTA_DATA(attr
);
458 attr
= RTA_NEXT(attr
, attrlen
);
461 if (rta
->rta_prefsrc
)
462 memcpy(&fi
->fib_prefsrc
, rta
->rta_prefsrc
, 4);
465 #ifdef CONFIG_IP_ROUTE_MULTIPATH
466 if ((err
= fib_get_nhs(fi
, rta
->rta_mp
, r
)) != 0)
468 if (rta
->rta_oif
&& fi
->fib_nh
->nh_oif
!= *rta
->rta_oif
)
470 if (rta
->rta_gw
&& memcmp(&fi
->fib_nh
->nh_gw
, rta
->rta_gw
, 4))
472 #ifdef CONFIG_NET_CLS_ROUTE
473 if (rta
->rta_flow
&& memcmp(&fi
->fib_nh
->nh_tclassid
, rta
->rta_flow
, 4))
480 struct fib_nh
*nh
= fi
->fib_nh
;
482 nh
->nh_oif
= *rta
->rta_oif
;
484 memcpy(&nh
->nh_gw
, rta
->rta_gw
, 4);
485 #ifdef CONFIG_NET_CLS_ROUTE
487 memcpy(&nh
->nh_tclassid
, rta
->rta_flow
, 4);
489 nh
->nh_flags
= r
->rtm_flags
;
490 #ifdef CONFIG_IP_ROUTE_MULTIPATH
495 #ifdef CONFIG_IP_ROUTE_NAT
496 if (r
->rtm_type
== RTN_NAT
) {
497 if (rta
->rta_gw
== NULL
|| nhs
!= 1 || rta
->rta_oif
)
499 memcpy(&fi
->fib_nh
->nh_gw
, rta
->rta_gw
, 4);
504 if (fib_props
[r
->rtm_type
].error
) {
505 if (rta
->rta_gw
|| rta
->rta_oif
|| rta
->rta_mp
)
510 if (r
->rtm_scope
> RT_SCOPE_HOST
)
513 if (r
->rtm_scope
== RT_SCOPE_HOST
) {
514 struct fib_nh
*nh
= fi
->fib_nh
;
516 /* Local address is added. */
517 if (nhs
!= 1 || nh
->nh_gw
)
519 nh
->nh_scope
= RT_SCOPE_NOWHERE
;
520 nh
->nh_dev
= dev_get_by_index(fi
->fib_nh
->nh_oif
);
522 if (nh
->nh_dev
== NULL
)
525 change_nexthops(fi
) {
526 if ((err
= fib_check_nh(r
, fi
, nh
)) != 0)
528 } endfor_nexthops(fi
)
531 if (fi
->fib_prefsrc
) {
532 if (r
->rtm_type
!= RTN_LOCAL
|| rta
->rta_dst
== NULL
||
533 memcmp(&fi
->fib_prefsrc
, rta
->rta_dst
, 4))
534 if (inet_addr_type(fi
->fib_prefsrc
) != RTN_LOCAL
)
539 if ((ofi
= fib_find_info(fi
)) != NULL
) {
547 atomic_inc(&fi
->fib_clntref
);
548 write_lock(&fib_info_lock
);
549 fi
->fib_next
= fib_info_list
;
552 fib_info_list
->fib_prev
= fi
;
554 write_unlock(&fib_info_lock
);
570 fib_semantic_match(int type
, struct fib_info
*fi
, const struct rt_key
*key
, struct fib_result
*res
)
572 int err
= fib_props
[type
].error
;
575 if (fi
->fib_flags
&RTNH_F_DEAD
)
581 #ifdef CONFIG_IP_ROUTE_NAT
584 atomic_inc(&fi
->fib_clntref
);
593 if (nh
->nh_flags
&RTNH_F_DEAD
)
595 if (!key
->oif
|| key
->oif
== nh
->nh_oif
)
598 #ifdef CONFIG_IP_ROUTE_MULTIPATH
599 if (nhsel
< fi
->fib_nhs
) {
601 atomic_inc(&fi
->fib_clntref
);
606 atomic_inc(&fi
->fib_clntref
);
615 printk(KERN_DEBUG
"impossible 102\n");
622 /* Find appropriate source address to this destination */
624 u32
__fib_res_prefsrc(struct fib_result
*res
)
626 return inet_select_addr(FIB_RES_DEV(*res
), FIB_RES_GW(*res
), res
->scope
);
629 #ifdef CONFIG_RTNETLINK
632 fib_dump_info(struct sk_buff
*skb
, u32 pid
, u32 seq
, int event
,
633 u8 tb_id
, u8 type
, u8 scope
, void *dst
, int dst_len
, u8 tos
,
637 struct nlmsghdr
*nlh
;
638 unsigned char *b
= skb
->tail
;
640 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*rtm
));
641 rtm
= NLMSG_DATA(nlh
);
642 rtm
->rtm_family
= AF_INET
;
643 rtm
->rtm_dst_len
= dst_len
;
644 rtm
->rtm_src_len
= 0;
646 rtm
->rtm_table
= tb_id
;
647 rtm
->rtm_type
= type
;
648 rtm
->rtm_flags
= fi
->fib_flags
;
649 rtm
->rtm_scope
= scope
;
650 if (rtm
->rtm_dst_len
)
651 RTA_PUT(skb
, RTA_DST
, 4, dst
);
652 rtm
->rtm_protocol
= fi
->fib_protocol
;
653 if (fi
->fib_priority
)
654 RTA_PUT(skb
, RTA_PRIORITY
, 4, &fi
->fib_priority
);
655 #ifdef CONFIG_NET_CLS_ROUTE
656 if (fi
->fib_nh
[0].nh_tclassid
)
657 RTA_PUT(skb
, RTA_FLOW
, 4, &fi
->fib_nh
[0].nh_tclassid
);
659 if (rtnetlink_put_metrics(skb
, fi
->fib_metrics
) < 0)
662 RTA_PUT(skb
, RTA_PREFSRC
, 4, &fi
->fib_prefsrc
);
663 if (fi
->fib_nhs
== 1) {
664 if (fi
->fib_nh
->nh_gw
)
665 RTA_PUT(skb
, RTA_GATEWAY
, 4, &fi
->fib_nh
->nh_gw
);
666 if (fi
->fib_nh
->nh_oif
)
667 RTA_PUT(skb
, RTA_OIF
, sizeof(int), &fi
->fib_nh
->nh_oif
);
669 #ifdef CONFIG_IP_ROUTE_MULTIPATH
670 if (fi
->fib_nhs
> 1) {
671 struct rtnexthop
*nhp
;
672 struct rtattr
*mp_head
;
673 if (skb_tailroom(skb
) <= RTA_SPACE(0))
675 mp_head
= (struct rtattr
*)skb_put(skb
, RTA_SPACE(0));
678 if (skb_tailroom(skb
) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp
)) + 4))
680 nhp
= (struct rtnexthop
*)skb_put(skb
, RTA_ALIGN(sizeof(*nhp
)));
681 nhp
->rtnh_flags
= nh
->nh_flags
& 0xFF;
682 nhp
->rtnh_hops
= nh
->nh_weight
-1;
683 nhp
->rtnh_ifindex
= nh
->nh_oif
;
685 RTA_PUT(skb
, RTA_GATEWAY
, 4, &nh
->nh_gw
);
686 nhp
->rtnh_len
= skb
->tail
- (unsigned char*)nhp
;
687 } endfor_nexthops(fi
);
688 mp_head
->rta_type
= RTA_MULTIPATH
;
689 mp_head
->rta_len
= skb
->tail
- (u8
*)mp_head
;
692 nlh
->nlmsg_len
= skb
->tail
- b
;
697 skb_trim(skb
, b
- skb
->data
);
701 #endif /* CONFIG_RTNETLINK */
703 #ifndef CONFIG_IP_NOSIOCRT
706 fib_convert_rtentry(int cmd
, struct nlmsghdr
*nl
, struct rtmsg
*rtm
,
707 struct kern_rta
*rta
, struct rtentry
*r
)
712 memset(rtm
, 0, sizeof(*rtm
));
713 memset(rta
, 0, sizeof(*rta
));
715 if (r
->rt_dst
.sa_family
!= AF_INET
)
716 return -EAFNOSUPPORT
;
718 /* Check mask for validity:
719 a) it must be contiguous.
720 b) destination must have all host bits clear.
721 c) if application forgot to set correct family (AF_INET),
722 reject request unless it is absolutely clear i.e.
723 both family and mask are zero.
726 ptr
= &((struct sockaddr_in
*)&r
->rt_dst
)->sin_addr
.s_addr
;
727 if (!(r
->rt_flags
&RTF_HOST
)) {
728 u32 mask
= ((struct sockaddr_in
*)&r
->rt_genmask
)->sin_addr
.s_addr
;
729 if (r
->rt_genmask
.sa_family
!= AF_INET
) {
730 if (mask
|| r
->rt_genmask
.sa_family
)
731 return -EAFNOSUPPORT
;
733 if (bad_mask(mask
, *ptr
))
735 plen
= inet_mask_len(mask
);
738 nl
->nlmsg_flags
= NLM_F_REQUEST
;
741 nl
->nlmsg_len
= NLMSG_LENGTH(sizeof(*rtm
));
742 if (cmd
== SIOCDELRT
) {
743 nl
->nlmsg_type
= RTM_DELROUTE
;
746 nl
->nlmsg_type
= RTM_NEWROUTE
;
747 nl
->nlmsg_flags
= NLM_F_REQUEST
|NLM_F_CREATE
;
748 rtm
->rtm_protocol
= RTPROT_BOOT
;
751 rtm
->rtm_dst_len
= plen
;
755 *(u32
*)&r
->rt_pad3
= r
->rt_metric
- 1;
756 rta
->rta_priority
= (u32
*)&r
->rt_pad3
;
758 if (r
->rt_flags
&RTF_REJECT
) {
759 rtm
->rtm_scope
= RT_SCOPE_HOST
;
760 rtm
->rtm_type
= RTN_UNREACHABLE
;
763 rtm
->rtm_scope
= RT_SCOPE_NOWHERE
;
764 rtm
->rtm_type
= RTN_UNICAST
;
768 struct net_device
*dev
;
769 char devname
[IFNAMSIZ
];
771 if (copy_from_user(devname
, r
->rt_dev
, IFNAMSIZ
-1))
773 devname
[IFNAMSIZ
-1] = 0;
774 colon
= strchr(devname
, ':');
777 dev
= __dev_get_by_name(devname
);
780 rta
->rta_oif
= &dev
->ifindex
;
782 struct in_ifaddr
*ifa
;
783 struct in_device
*in_dev
= __in_dev_get(dev
);
787 for (ifa
= in_dev
->ifa_list
; ifa
; ifa
= ifa
->ifa_next
)
788 if (strcmp(ifa
->ifa_label
, devname
) == 0)
792 rta
->rta_prefsrc
= &ifa
->ifa_local
;
796 ptr
= &((struct sockaddr_in
*)&r
->rt_gateway
)->sin_addr
.s_addr
;
797 if (r
->rt_gateway
.sa_family
== AF_INET
&& *ptr
) {
799 if (r
->rt_flags
&RTF_GATEWAY
&& inet_addr_type(*ptr
) == RTN_UNICAST
)
800 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
803 if (cmd
== SIOCDELRT
)
806 if (r
->rt_flags
&RTF_GATEWAY
&& rta
->rta_gw
== NULL
)
809 if (rtm
->rtm_scope
== RT_SCOPE_NOWHERE
)
810 rtm
->rtm_scope
= RT_SCOPE_LINK
;
812 if (r
->rt_flags
&(RTF_MTU
|RTF_WINDOW
|RTF_IRTT
)) {
814 struct rtattr
*mx
= kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL
);
818 mx
->rta_type
= RTA_METRICS
;
819 mx
->rta_len
= RTA_LENGTH(0);
820 if (r
->rt_flags
&RTF_MTU
) {
821 rec
= (void*)((char*)mx
+ RTA_ALIGN(mx
->rta_len
));
822 rec
->rta_type
= RTAX_ADVMSS
;
823 rec
->rta_len
= RTA_LENGTH(4);
824 mx
->rta_len
+= RTA_LENGTH(4);
825 *(u32
*)RTA_DATA(rec
) = r
->rt_mtu
- 40;
827 if (r
->rt_flags
&RTF_WINDOW
) {
828 rec
= (void*)((char*)mx
+ RTA_ALIGN(mx
->rta_len
));
829 rec
->rta_type
= RTAX_WINDOW
;
830 rec
->rta_len
= RTA_LENGTH(4);
831 mx
->rta_len
+= RTA_LENGTH(4);
832 *(u32
*)RTA_DATA(rec
) = r
->rt_window
;
834 if (r
->rt_flags
&RTF_IRTT
) {
835 rec
= (void*)((char*)mx
+ RTA_ALIGN(mx
->rta_len
));
836 rec
->rta_type
= RTAX_RTT
;
837 rec
->rta_len
= RTA_LENGTH(4);
838 mx
->rta_len
+= RTA_LENGTH(4);
839 *(u32
*)RTA_DATA(rec
) = r
->rt_irtt
<<3;
849 - local address disappeared -> we must delete all the entries
851 - device went down -> we must shutdown all nexthops going via it.
854 int fib_sync_down(u32 local
, struct net_device
*dev
, int force
)
857 int scope
= RT_SCOPE_NOWHERE
;
863 if (local
&& fi
->fib_prefsrc
== local
) {
864 fi
->fib_flags
|= RTNH_F_DEAD
;
866 } else if (dev
&& fi
->fib_nhs
) {
869 change_nexthops(fi
) {
870 if (nh
->nh_flags
&RTNH_F_DEAD
)
872 else if (nh
->nh_dev
== dev
&&
873 nh
->nh_scope
!= scope
) {
874 nh
->nh_flags
|= RTNH_F_DEAD
;
875 #ifdef CONFIG_IP_ROUTE_MULTIPATH
876 fi
->fib_power
-= nh
->nh_power
;
881 } endfor_nexthops(fi
)
882 if (dead
== fi
->fib_nhs
) {
883 fi
->fib_flags
|= RTNH_F_DEAD
;
891 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894 Dead device goes up. We wake up dead nexthops.
895 It takes sense only on multipath routes.
898 int fib_sync_up(struct net_device
*dev
)
902 if (!(dev
->flags
&IFF_UP
))
908 change_nexthops(fi
) {
909 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
913 if (nh
->nh_dev
== NULL
|| !(nh
->nh_dev
->flags
&IFF_UP
))
915 if (nh
->nh_dev
!= dev
|| __in_dev_get(dev
) == NULL
)
919 nh
->nh_flags
&= ~RTNH_F_DEAD
;
920 } endfor_nexthops(fi
)
923 fi
->fib_flags
&= ~RTNH_F_DEAD
;
931 The algorithm is suboptimal, but it provides really
932 fair weighted route distribution.
935 void fib_select_multipath(const struct rt_key
*key
, struct fib_result
*res
)
937 struct fib_info
*fi
= res
->fi
;
940 if (fi
->fib_power
<= 0) {
942 change_nexthops(fi
) {
943 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
944 power
+= nh
->nh_weight
;
945 nh
->nh_power
= nh
->nh_weight
;
947 } endfor_nexthops(fi
);
948 fi
->fib_power
= power
;
951 printk(KERN_CRIT
"impossible 777\n");
958 /* w should be random number [0..fi->fib_power-1],
959 it is pretty bad approximation.
962 w
= jiffies
% fi
->fib_power
;
964 change_nexthops(fi
) {
965 if (!(nh
->nh_flags
&RTNH_F_DEAD
) && nh
->nh_power
) {
966 if ((w
-= nh
->nh_power
) <= 0) {
973 } endfor_nexthops(fi
);
976 printk(KERN_CRIT
"impossible 888\n");
983 #ifdef CONFIG_PROC_FS
985 static unsigned fib_flag_trans(int type
, int dead
, u32 mask
, struct fib_info
*fi
)
987 static unsigned type2flags
[RTN_MAX
+1] = {
988 0, 0, 0, 0, 0, 0, 0, RTF_REJECT
, RTF_REJECT
, 0, 0, 0
990 unsigned flags
= type2flags
[type
];
992 if (fi
&& fi
->fib_nh
->nh_gw
)
993 flags
|= RTF_GATEWAY
;
994 if (mask
== 0xFFFFFFFF)
1001 void fib_node_get_info(int type
, int dead
, struct fib_info
*fi
, u32 prefix
, u32 mask
, char *buffer
)
1004 unsigned flags
= fib_flag_trans(type
, dead
, mask
, fi
);
1007 len
= sprintf(buffer
, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1008 fi
->fib_dev
? fi
->fib_dev
->name
: "*", prefix
,
1009 fi
->fib_nh
->nh_gw
, flags
, 0, 0, fi
->fib_priority
,
1010 mask
, fi
->fib_advmss
+40, fi
->fib_window
, fi
->fib_rtt
>>3);
1012 len
= sprintf(buffer
, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1017 memset(buffer
+len
, ' ', 127-len
);