2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46 #include <net/netlink.h>
47 #include <net/nexthop.h>
49 #include "fib_lookup.h"
51 #define FSprintk(a...)
53 static DEFINE_SPINLOCK(fib_info_lock
);
54 static struct hlist_head
*fib_info_hash
;
55 static struct hlist_head
*fib_info_laddrhash
;
56 static unsigned int fib_hash_size
;
57 static unsigned int fib_info_cnt
;
59 #define DEVINDEX_HASHBITS 8
60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61 static struct hlist_head fib_info_devhash
[DEVINDEX_HASHSIZE
];
63 #ifdef CONFIG_IP_ROUTE_MULTIPATH
65 static DEFINE_SPINLOCK(fib_multipath_lock
);
67 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
70 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
73 #else /* CONFIG_IP_ROUTE_MULTIPATH */
75 /* Hope, that gcc will optimize it to get rid of dummy loop */
77 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78 for (nhsel=0; nhsel < 1; nhsel++)
80 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81 for (nhsel=0; nhsel < 1; nhsel++)
83 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
85 #define endfor_nexthops(fi) }
92 } fib_props
[RTN_MAX
+ 1] = {
95 .scope
= RT_SCOPE_NOWHERE
,
99 .scope
= RT_SCOPE_UNIVERSE
,
103 .scope
= RT_SCOPE_HOST
,
107 .scope
= RT_SCOPE_LINK
,
108 }, /* RTN_BROADCAST */
111 .scope
= RT_SCOPE_LINK
,
115 .scope
= RT_SCOPE_UNIVERSE
,
116 }, /* RTN_MULTICAST */
119 .scope
= RT_SCOPE_UNIVERSE
,
120 }, /* RTN_BLACKHOLE */
122 .error
= -EHOSTUNREACH
,
123 .scope
= RT_SCOPE_UNIVERSE
,
124 }, /* RTN_UNREACHABLE */
127 .scope
= RT_SCOPE_UNIVERSE
,
128 }, /* RTN_PROHIBIT */
131 .scope
= RT_SCOPE_UNIVERSE
,
135 .scope
= RT_SCOPE_NOWHERE
,
139 .scope
= RT_SCOPE_NOWHERE
,
140 }, /* RTN_XRESOLVE */
144 /* Release a nexthop info record */
146 void free_fib_info(struct fib_info
*fi
)
148 if (fi
->fib_dead
== 0) {
149 printk("Freeing alive fib_info %p\n", fi
);
152 change_nexthops(fi
) {
156 } endfor_nexthops(fi
);
161 void fib_release_info(struct fib_info
*fi
)
163 spin_lock_bh(&fib_info_lock
);
164 if (fi
&& --fi
->fib_treeref
== 0) {
165 hlist_del(&fi
->fib_hash
);
167 hlist_del(&fi
->fib_lhash
);
168 change_nexthops(fi
) {
171 hlist_del(&nh
->nh_hash
);
172 } endfor_nexthops(fi
)
176 spin_unlock_bh(&fib_info_lock
);
179 static __inline__
int nh_comp(const struct fib_info
*fi
, const struct fib_info
*ofi
)
181 const struct fib_nh
*onh
= ofi
->fib_nh
;
184 if (nh
->nh_oif
!= onh
->nh_oif
||
185 nh
->nh_gw
!= onh
->nh_gw
||
186 nh
->nh_scope
!= onh
->nh_scope
||
187 #ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh
->nh_weight
!= onh
->nh_weight
||
190 #ifdef CONFIG_NET_CLS_ROUTE
191 nh
->nh_tclassid
!= onh
->nh_tclassid
||
193 ((nh
->nh_flags
^onh
->nh_flags
)&~RTNH_F_DEAD
))
196 } endfor_nexthops(fi
);
200 static inline unsigned int fib_info_hashfn(const struct fib_info
*fi
)
202 unsigned int mask
= (fib_hash_size
- 1);
203 unsigned int val
= fi
->fib_nhs
;
205 val
^= fi
->fib_protocol
;
206 val
^= (__force u32
)fi
->fib_prefsrc
;
207 val
^= fi
->fib_priority
;
209 return (val
^ (val
>> 7) ^ (val
>> 12)) & mask
;
212 static struct fib_info
*fib_find_info(const struct fib_info
*nfi
)
214 struct hlist_head
*head
;
215 struct hlist_node
*node
;
219 hash
= fib_info_hashfn(nfi
);
220 head
= &fib_info_hash
[hash
];
222 hlist_for_each_entry(fi
, node
, head
, fib_hash
) {
223 if (fi
->fib_nhs
!= nfi
->fib_nhs
)
225 if (nfi
->fib_protocol
== fi
->fib_protocol
&&
226 nfi
->fib_prefsrc
== fi
->fib_prefsrc
&&
227 nfi
->fib_priority
== fi
->fib_priority
&&
228 memcmp(nfi
->fib_metrics
, fi
->fib_metrics
,
229 sizeof(fi
->fib_metrics
)) == 0 &&
230 ((nfi
->fib_flags
^fi
->fib_flags
)&~RTNH_F_DEAD
) == 0 &&
231 (nfi
->fib_nhs
== 0 || nh_comp(fi
, nfi
) == 0))
238 static inline unsigned int fib_devindex_hashfn(unsigned int val
)
240 unsigned int mask
= DEVINDEX_HASHSIZE
- 1;
243 (val
>> DEVINDEX_HASHBITS
) ^
244 (val
>> (DEVINDEX_HASHBITS
* 2))) & mask
;
247 /* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
251 int ip_fib_check_default(__be32 gw
, struct net_device
*dev
)
253 struct hlist_head
*head
;
254 struct hlist_node
*node
;
258 spin_lock(&fib_info_lock
);
260 hash
= fib_devindex_hashfn(dev
->ifindex
);
261 head
= &fib_info_devhash
[hash
];
262 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
263 if (nh
->nh_dev
== dev
&&
265 !(nh
->nh_flags
&RTNH_F_DEAD
)) {
266 spin_unlock(&fib_info_lock
);
271 spin_unlock(&fib_info_lock
);
276 static inline size_t fib_nlmsg_size(struct fib_info
*fi
)
278 size_t payload
= NLMSG_ALIGN(sizeof(struct rtmsg
))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
284 /* space for nested metrics */
285 payload
+= nla_total_size((RTAX_MAX
* nla_total_size(4)));
288 /* Also handles the special case fib_nhs == 1 */
290 /* each nexthop is packed in an attribute */
291 size_t nhsize
= nla_total_size(sizeof(struct rtnexthop
));
293 /* may contain flow and gateway attribute */
294 nhsize
+= 2 * nla_total_size(4);
296 /* all nexthops are packed in a nested attribute */
297 payload
+= nla_total_size(fi
->fib_nhs
* nhsize
);
303 void rtmsg_fib(int event
, __be32 key
, struct fib_alias
*fa
,
304 int dst_len
, u32 tb_id
, struct nl_info
*info
,
305 unsigned int nlm_flags
)
308 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
311 skb
= nlmsg_new(fib_nlmsg_size(fa
->fa_info
), GFP_KERNEL
);
315 err
= fib_dump_info(skb
, info
->pid
, seq
, event
, tb_id
,
316 fa
->fa_type
, fa
->fa_scope
, key
, dst_len
,
317 fa
->fa_tos
, fa
->fa_info
, nlm_flags
);
319 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
320 WARN_ON(err
== -EMSGSIZE
);
324 err
= rtnl_notify(skb
, info
->pid
, RTNLGRP_IPV4_ROUTE
,
325 info
->nlh
, GFP_KERNEL
);
328 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE
, err
);
331 /* Return the first fib alias matching TOS with
332 * priority less than or equal to PRIO.
334 struct fib_alias
*fib_find_alias(struct list_head
*fah
, u8 tos
, u32 prio
)
337 struct fib_alias
*fa
;
338 list_for_each_entry(fa
, fah
, fa_list
) {
339 if (fa
->fa_tos
> tos
)
341 if (fa
->fa_info
->fib_priority
>= prio
||
349 int fib_detect_death(struct fib_info
*fi
, int order
,
350 struct fib_info
**last_resort
, int *last_idx
, int *dflt
)
353 int state
= NUD_NONE
;
355 n
= neigh_lookup(&arp_tbl
, &fi
->fib_nh
[0].nh_gw
, fi
->fib_dev
);
357 state
= n
->nud_state
;
360 if (state
==NUD_REACHABLE
)
362 if ((state
&NUD_VALID
) && order
!= *dflt
)
364 if ((state
&NUD_VALID
) ||
365 (*last_idx
<0 && order
> *dflt
)) {
372 #ifdef CONFIG_IP_ROUTE_MULTIPATH
374 static int fib_count_nexthops(struct rtnexthop
*rtnh
, int remaining
)
378 while (rtnh_ok(rtnh
, remaining
)) {
380 rtnh
= rtnh_next(rtnh
, &remaining
);
383 /* leftover implies invalid nexthop configuration, discard it */
384 return remaining
> 0 ? 0 : nhs
;
387 static int fib_get_nhs(struct fib_info
*fi
, struct rtnexthop
*rtnh
,
388 int remaining
, struct fib_config
*cfg
)
390 change_nexthops(fi
) {
393 if (!rtnh_ok(rtnh
, remaining
))
396 nh
->nh_flags
= (cfg
->fc_flags
& ~0xFF) | rtnh
->rtnh_flags
;
397 nh
->nh_oif
= rtnh
->rtnh_ifindex
;
398 nh
->nh_weight
= rtnh
->rtnh_hops
+ 1;
400 attrlen
= rtnh_attrlen(rtnh
);
402 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
404 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
405 nh
->nh_gw
= nla
? nla_get_be32(nla
) : 0;
406 #ifdef CONFIG_NET_CLS_ROUTE
407 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
408 nh
->nh_tclassid
= nla
? nla_get_u32(nla
) : 0;
412 rtnh
= rtnh_next(rtnh
, &remaining
);
413 } endfor_nexthops(fi
);
420 int fib_nh_match(struct fib_config
*cfg
, struct fib_info
*fi
)
422 #ifdef CONFIG_IP_ROUTE_MULTIPATH
423 struct rtnexthop
*rtnh
;
427 if (cfg
->fc_priority
&& cfg
->fc_priority
!= fi
->fib_priority
)
430 if (cfg
->fc_oif
|| cfg
->fc_gw
) {
431 if ((!cfg
->fc_oif
|| cfg
->fc_oif
== fi
->fib_nh
->nh_oif
) &&
432 (!cfg
->fc_gw
|| cfg
->fc_gw
== fi
->fib_nh
->nh_gw
))
437 #ifdef CONFIG_IP_ROUTE_MULTIPATH
438 if (cfg
->fc_mp
== NULL
)
442 remaining
= cfg
->fc_mp_len
;
447 if (!rtnh_ok(rtnh
, remaining
))
450 if (rtnh
->rtnh_ifindex
&& rtnh
->rtnh_ifindex
!= nh
->nh_oif
)
453 attrlen
= rtnh_attrlen(rtnh
);
455 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
457 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
458 if (nla
&& nla_get_be32(nla
) != nh
->nh_gw
)
460 #ifdef CONFIG_NET_CLS_ROUTE
461 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
462 if (nla
&& nla_get_u32(nla
) != nh
->nh_tclassid
)
467 rtnh
= rtnh_next(rtnh
, &remaining
);
468 } endfor_nexthops(fi
);
478 Semantics of nexthop is very messy by historical reasons.
479 We have to take into account, that:
480 a) gateway can be actually local interface address,
481 so that gatewayed route is direct.
482 b) gateway must be on-link address, possibly
483 described not by an ifaddr, but also by a direct route.
484 c) If both gateway and interface are specified, they should not
486 d) If we use tunnel routes, gateway could be not on-link.
488 Attempt to reconcile all of these (alas, self-contradictory) conditions
489 results in pretty ugly and hairy code with obscure logic.
491 I chose to generalized it instead, so that the size
492 of code does not increase practically, but it becomes
494 Every prefix is assigned a "scope" value: "host" is local address,
495 "link" is direct route,
496 [ ... "site" ... "interior" ... ]
497 and "universe" is true gateway route with global meaning.
499 Every prefix refers to a set of "nexthop"s (gw, oif),
500 where gw must have narrower scope. This recursion stops
501 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
502 which means that gw is forced to be on link.
504 Code is still hairy, but now it is apparently logically
505 consistent and very flexible. F.e. as by-product it allows
506 to co-exists in peace independent exterior and interior
509 Normally it looks as following.
511 {universe prefix} -> (gw, oif) [scope link]
513 |-> {link prefix} -> (gw, oif) [scope local]
515 |-> {local prefix} (terminal node)
518 static int fib_check_nh(struct fib_config
*cfg
, struct fib_info
*fi
,
524 struct fib_result res
;
526 #ifdef CONFIG_IP_ROUTE_PERVASIVE
527 if (nh
->nh_flags
&RTNH_F_PERVASIVE
)
530 if (nh
->nh_flags
&RTNH_F_ONLINK
) {
531 struct net_device
*dev
;
533 if (cfg
->fc_scope
>= RT_SCOPE_LINK
)
535 if (inet_addr_type(nh
->nh_gw
) != RTN_UNICAST
)
537 if ((dev
= __dev_get_by_index(nh
->nh_oif
)) == NULL
)
539 if (!(dev
->flags
&IFF_UP
))
543 nh
->nh_scope
= RT_SCOPE_LINK
;
551 .scope
= cfg
->fc_scope
+ 1,
557 /* It is not necessary, but requires a bit of thinking */
558 if (fl
.fl4_scope
< RT_SCOPE_LINK
)
559 fl
.fl4_scope
= RT_SCOPE_LINK
;
560 if ((err
= fib_lookup(&fl
, &res
)) != 0)
564 if (res
.type
!= RTN_UNICAST
&& res
.type
!= RTN_LOCAL
)
566 nh
->nh_scope
= res
.scope
;
567 nh
->nh_oif
= FIB_RES_OIF(res
);
568 if ((nh
->nh_dev
= FIB_RES_DEV(res
)) == NULL
)
570 dev_hold(nh
->nh_dev
);
572 if (!(nh
->nh_dev
->flags
& IFF_UP
))
579 struct in_device
*in_dev
;
581 if (nh
->nh_flags
&(RTNH_F_PERVASIVE
|RTNH_F_ONLINK
))
584 in_dev
= inetdev_by_index(nh
->nh_oif
);
587 if (!(in_dev
->dev
->flags
&IFF_UP
)) {
591 nh
->nh_dev
= in_dev
->dev
;
592 dev_hold(nh
->nh_dev
);
593 nh
->nh_scope
= RT_SCOPE_HOST
;
599 static inline unsigned int fib_laddr_hashfn(__be32 val
)
601 unsigned int mask
= (fib_hash_size
- 1);
603 return ((__force u32
)val
^ ((__force u32
)val
>> 7) ^ ((__force u32
)val
>> 14)) & mask
;
606 static struct hlist_head
*fib_hash_alloc(int bytes
)
608 if (bytes
<= PAGE_SIZE
)
609 return kmalloc(bytes
, GFP_KERNEL
);
611 return (struct hlist_head
*)
612 __get_free_pages(GFP_KERNEL
, get_order(bytes
));
615 static void fib_hash_free(struct hlist_head
*hash
, int bytes
)
620 if (bytes
<= PAGE_SIZE
)
623 free_pages((unsigned long) hash
, get_order(bytes
));
626 static void fib_hash_move(struct hlist_head
*new_info_hash
,
627 struct hlist_head
*new_laddrhash
,
628 unsigned int new_size
)
630 struct hlist_head
*old_info_hash
, *old_laddrhash
;
631 unsigned int old_size
= fib_hash_size
;
632 unsigned int i
, bytes
;
634 spin_lock_bh(&fib_info_lock
);
635 old_info_hash
= fib_info_hash
;
636 old_laddrhash
= fib_info_laddrhash
;
637 fib_hash_size
= new_size
;
639 for (i
= 0; i
< old_size
; i
++) {
640 struct hlist_head
*head
= &fib_info_hash
[i
];
641 struct hlist_node
*node
, *n
;
644 hlist_for_each_entry_safe(fi
, node
, n
, head
, fib_hash
) {
645 struct hlist_head
*dest
;
646 unsigned int new_hash
;
648 hlist_del(&fi
->fib_hash
);
650 new_hash
= fib_info_hashfn(fi
);
651 dest
= &new_info_hash
[new_hash
];
652 hlist_add_head(&fi
->fib_hash
, dest
);
655 fib_info_hash
= new_info_hash
;
657 for (i
= 0; i
< old_size
; i
++) {
658 struct hlist_head
*lhead
= &fib_info_laddrhash
[i
];
659 struct hlist_node
*node
, *n
;
662 hlist_for_each_entry_safe(fi
, node
, n
, lhead
, fib_lhash
) {
663 struct hlist_head
*ldest
;
664 unsigned int new_hash
;
666 hlist_del(&fi
->fib_lhash
);
668 new_hash
= fib_laddr_hashfn(fi
->fib_prefsrc
);
669 ldest
= &new_laddrhash
[new_hash
];
670 hlist_add_head(&fi
->fib_lhash
, ldest
);
673 fib_info_laddrhash
= new_laddrhash
;
675 spin_unlock_bh(&fib_info_lock
);
677 bytes
= old_size
* sizeof(struct hlist_head
*);
678 fib_hash_free(old_info_hash
, bytes
);
679 fib_hash_free(old_laddrhash
, bytes
);
682 struct fib_info
*fib_create_info(struct fib_config
*cfg
)
685 struct fib_info
*fi
= NULL
;
686 struct fib_info
*ofi
;
689 /* Fast check to catch the most weird cases */
690 if (fib_props
[cfg
->fc_type
].scope
> cfg
->fc_scope
)
693 #ifdef CONFIG_IP_ROUTE_MULTIPATH
695 nhs
= fib_count_nexthops(cfg
->fc_mp
, cfg
->fc_mp_len
);
700 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
701 if (cfg
->fc_mp_alg
) {
702 if (cfg
->fc_mp_alg
< IP_MP_ALG_NONE
||
703 cfg
->fc_mp_alg
> IP_MP_ALG_MAX
)
709 if (fib_info_cnt
>= fib_hash_size
) {
710 unsigned int new_size
= fib_hash_size
<< 1;
711 struct hlist_head
*new_info_hash
;
712 struct hlist_head
*new_laddrhash
;
717 bytes
= new_size
* sizeof(struct hlist_head
*);
718 new_info_hash
= fib_hash_alloc(bytes
);
719 new_laddrhash
= fib_hash_alloc(bytes
);
720 if (!new_info_hash
|| !new_laddrhash
) {
721 fib_hash_free(new_info_hash
, bytes
);
722 fib_hash_free(new_laddrhash
, bytes
);
724 memset(new_info_hash
, 0, bytes
);
725 memset(new_laddrhash
, 0, bytes
);
727 fib_hash_move(new_info_hash
, new_laddrhash
, new_size
);
734 fi
= kzalloc(sizeof(*fi
)+nhs
*sizeof(struct fib_nh
), GFP_KERNEL
);
739 fi
->fib_protocol
= cfg
->fc_protocol
;
740 fi
->fib_flags
= cfg
->fc_flags
;
741 fi
->fib_priority
= cfg
->fc_priority
;
742 fi
->fib_prefsrc
= cfg
->fc_prefsrc
;
745 change_nexthops(fi
) {
747 } endfor_nexthops(fi
)
753 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
754 int type
= nla
->nla_type
;
759 fi
->fib_metrics
[type
- 1] = nla_get_u32(nla
);
765 #ifdef CONFIG_IP_ROUTE_MULTIPATH
766 err
= fib_get_nhs(fi
, cfg
->fc_mp
, cfg
->fc_mp_len
, cfg
);
769 if (cfg
->fc_oif
&& fi
->fib_nh
->nh_oif
!= cfg
->fc_oif
)
771 if (cfg
->fc_gw
&& fi
->fib_nh
->nh_gw
!= cfg
->fc_gw
)
773 #ifdef CONFIG_NET_CLS_ROUTE
774 if (cfg
->fc_flow
&& fi
->fib_nh
->nh_tclassid
!= cfg
->fc_flow
)
781 struct fib_nh
*nh
= fi
->fib_nh
;
783 nh
->nh_oif
= cfg
->fc_oif
;
784 nh
->nh_gw
= cfg
->fc_gw
;
785 nh
->nh_flags
= cfg
->fc_flags
;
786 #ifdef CONFIG_NET_CLS_ROUTE
787 nh
->nh_tclassid
= cfg
->fc_flow
;
789 #ifdef CONFIG_IP_ROUTE_MULTIPATH
794 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
795 fi
->fib_mp_alg
= cfg
->fc_mp_alg
;
798 if (fib_props
[cfg
->fc_type
].error
) {
799 if (cfg
->fc_gw
|| cfg
->fc_oif
|| cfg
->fc_mp
)
804 if (cfg
->fc_scope
> RT_SCOPE_HOST
)
807 if (cfg
->fc_scope
== RT_SCOPE_HOST
) {
808 struct fib_nh
*nh
= fi
->fib_nh
;
810 /* Local address is added. */
811 if (nhs
!= 1 || nh
->nh_gw
)
813 nh
->nh_scope
= RT_SCOPE_NOWHERE
;
814 nh
->nh_dev
= dev_get_by_index(fi
->fib_nh
->nh_oif
);
816 if (nh
->nh_dev
== NULL
)
819 change_nexthops(fi
) {
820 if ((err
= fib_check_nh(cfg
, fi
, nh
)) != 0)
822 } endfor_nexthops(fi
)
825 if (fi
->fib_prefsrc
) {
826 if (cfg
->fc_type
!= RTN_LOCAL
|| !cfg
->fc_dst
||
827 fi
->fib_prefsrc
!= cfg
->fc_dst
)
828 if (inet_addr_type(fi
->fib_prefsrc
) != RTN_LOCAL
)
833 if ((ofi
= fib_find_info(fi
)) != NULL
) {
841 atomic_inc(&fi
->fib_clntref
);
842 spin_lock_bh(&fib_info_lock
);
843 hlist_add_head(&fi
->fib_hash
,
844 &fib_info_hash
[fib_info_hashfn(fi
)]);
845 if (fi
->fib_prefsrc
) {
846 struct hlist_head
*head
;
848 head
= &fib_info_laddrhash
[fib_laddr_hashfn(fi
->fib_prefsrc
)];
849 hlist_add_head(&fi
->fib_lhash
, head
);
851 change_nexthops(fi
) {
852 struct hlist_head
*head
;
857 hash
= fib_devindex_hashfn(nh
->nh_dev
->ifindex
);
858 head
= &fib_info_devhash
[hash
];
859 hlist_add_head(&nh
->nh_hash
, head
);
860 } endfor_nexthops(fi
)
861 spin_unlock_bh(&fib_info_lock
);
876 /* Note! fib_semantic_match intentionally uses RCU list functions. */
877 int fib_semantic_match(struct list_head
*head
, const struct flowi
*flp
,
878 struct fib_result
*res
, __be32 zone
, __be32 mask
,
881 struct fib_alias
*fa
;
884 list_for_each_entry_rcu(fa
, head
, fa_list
) {
888 fa
->fa_tos
!= flp
->fl4_tos
)
891 if (fa
->fa_scope
< flp
->fl4_scope
)
894 fa
->fa_state
|= FA_S_ACCESSED
;
896 err
= fib_props
[fa
->fa_type
].error
;
898 struct fib_info
*fi
= fa
->fa_info
;
900 if (fi
->fib_flags
& RTNH_F_DEAD
)
903 switch (fa
->fa_type
) {
910 if (nh
->nh_flags
&RTNH_F_DEAD
)
912 if (!flp
->oif
|| flp
->oif
== nh
->nh_oif
)
915 #ifdef CONFIG_IP_ROUTE_MULTIPATH
916 if (nhsel
< fi
->fib_nhs
) {
929 printk(KERN_DEBUG
"impossible 102\n");
938 res
->prefixlen
= prefixlen
;
939 res
->nh_sel
= nh_sel
;
940 res
->type
= fa
->fa_type
;
941 res
->scope
= fa
->fa_scope
;
942 res
->fi
= fa
->fa_info
;
943 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
945 res
->network
= zone
& inet_make_mask(prefixlen
);
947 atomic_inc(&res
->fi
->fib_clntref
);
951 /* Find appropriate source address to this destination */
953 __be32
__fib_res_prefsrc(struct fib_result
*res
)
955 return inet_select_addr(FIB_RES_DEV(*res
), FIB_RES_GW(*res
), res
->scope
);
958 int fib_dump_info(struct sk_buff
*skb
, u32 pid
, u32 seq
, int event
,
959 u32 tb_id
, u8 type
, u8 scope
, __be32 dst
, int dst_len
, u8 tos
,
960 struct fib_info
*fi
, unsigned int flags
)
962 struct nlmsghdr
*nlh
;
965 nlh
= nlmsg_put(skb
, pid
, seq
, event
, sizeof(*rtm
), flags
);
969 rtm
= nlmsg_data(nlh
);
970 rtm
->rtm_family
= AF_INET
;
971 rtm
->rtm_dst_len
= dst_len
;
972 rtm
->rtm_src_len
= 0;
974 rtm
->rtm_table
= tb_id
;
975 NLA_PUT_U32(skb
, RTA_TABLE
, tb_id
);
976 rtm
->rtm_type
= type
;
977 rtm
->rtm_flags
= fi
->fib_flags
;
978 rtm
->rtm_scope
= scope
;
979 rtm
->rtm_protocol
= fi
->fib_protocol
;
981 if (rtm
->rtm_dst_len
)
982 NLA_PUT_BE32(skb
, RTA_DST
, dst
);
984 if (fi
->fib_priority
)
985 NLA_PUT_U32(skb
, RTA_PRIORITY
, fi
->fib_priority
);
987 if (rtnetlink_put_metrics(skb
, fi
->fib_metrics
) < 0)
988 goto nla_put_failure
;
991 NLA_PUT_BE32(skb
, RTA_PREFSRC
, fi
->fib_prefsrc
);
993 if (fi
->fib_nhs
== 1) {
994 if (fi
->fib_nh
->nh_gw
)
995 NLA_PUT_BE32(skb
, RTA_GATEWAY
, fi
->fib_nh
->nh_gw
);
997 if (fi
->fib_nh
->nh_oif
)
998 NLA_PUT_U32(skb
, RTA_OIF
, fi
->fib_nh
->nh_oif
);
999 #ifdef CONFIG_NET_CLS_ROUTE
1000 if (fi
->fib_nh
[0].nh_tclassid
)
1001 NLA_PUT_U32(skb
, RTA_FLOW
, fi
->fib_nh
[0].nh_tclassid
);
1004 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1005 if (fi
->fib_nhs
> 1) {
1006 struct rtnexthop
*rtnh
;
1009 mp
= nla_nest_start(skb
, RTA_MULTIPATH
);
1011 goto nla_put_failure
;
1014 rtnh
= nla_reserve_nohdr(skb
, sizeof(*rtnh
));
1016 goto nla_put_failure
;
1018 rtnh
->rtnh_flags
= nh
->nh_flags
& 0xFF;
1019 rtnh
->rtnh_hops
= nh
->nh_weight
- 1;
1020 rtnh
->rtnh_ifindex
= nh
->nh_oif
;
1023 NLA_PUT_BE32(skb
, RTA_GATEWAY
, nh
->nh_gw
);
1024 #ifdef CONFIG_NET_CLS_ROUTE
1025 if (nh
->nh_tclassid
)
1026 NLA_PUT_U32(skb
, RTA_FLOW
, nh
->nh_tclassid
);
1028 /* length of rtnetlink header + attributes */
1029 rtnh
->rtnh_len
= nlmsg_get_pos(skb
) - (void *) rtnh
;
1030 } endfor_nexthops(fi
);
1032 nla_nest_end(skb
, mp
);
1035 return nlmsg_end(skb
, nlh
);
1038 nlmsg_cancel(skb
, nlh
);
1044 - local address disappeared -> we must delete all the entries
1046 - device went down -> we must shutdown all nexthops going via it.
1049 int fib_sync_down(__be32 local
, struct net_device
*dev
, int force
)
1052 int scope
= RT_SCOPE_NOWHERE
;
1057 if (local
&& fib_info_laddrhash
) {
1058 unsigned int hash
= fib_laddr_hashfn(local
);
1059 struct hlist_head
*head
= &fib_info_laddrhash
[hash
];
1060 struct hlist_node
*node
;
1061 struct fib_info
*fi
;
1063 hlist_for_each_entry(fi
, node
, head
, fib_lhash
) {
1064 if (fi
->fib_prefsrc
== local
) {
1065 fi
->fib_flags
|= RTNH_F_DEAD
;
1072 struct fib_info
*prev_fi
= NULL
;
1073 unsigned int hash
= fib_devindex_hashfn(dev
->ifindex
);
1074 struct hlist_head
*head
= &fib_info_devhash
[hash
];
1075 struct hlist_node
*node
;
1078 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1079 struct fib_info
*fi
= nh
->nh_parent
;
1082 BUG_ON(!fi
->fib_nhs
);
1083 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1087 change_nexthops(fi
) {
1088 if (nh
->nh_flags
&RTNH_F_DEAD
)
1090 else if (nh
->nh_dev
== dev
&&
1091 nh
->nh_scope
!= scope
) {
1092 nh
->nh_flags
|= RTNH_F_DEAD
;
1093 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1094 spin_lock_bh(&fib_multipath_lock
);
1095 fi
->fib_power
-= nh
->nh_power
;
1097 spin_unlock_bh(&fib_multipath_lock
);
1101 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1102 if (force
> 1 && nh
->nh_dev
== dev
) {
1107 } endfor_nexthops(fi
)
1108 if (dead
== fi
->fib_nhs
) {
1109 fi
->fib_flags
|= RTNH_F_DEAD
;
1118 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1121 Dead device goes up. We wake up dead nexthops.
1122 It takes sense only on multipath routes.
1125 int fib_sync_up(struct net_device
*dev
)
1127 struct fib_info
*prev_fi
;
1129 struct hlist_head
*head
;
1130 struct hlist_node
*node
;
1134 if (!(dev
->flags
&IFF_UP
))
1138 hash
= fib_devindex_hashfn(dev
->ifindex
);
1139 head
= &fib_info_devhash
[hash
];
1142 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1143 struct fib_info
*fi
= nh
->nh_parent
;
1146 BUG_ON(!fi
->fib_nhs
);
1147 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1152 change_nexthops(fi
) {
1153 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
1157 if (nh
->nh_dev
== NULL
|| !(nh
->nh_dev
->flags
&IFF_UP
))
1159 if (nh
->nh_dev
!= dev
|| !__in_dev_get_rtnl(dev
))
1162 spin_lock_bh(&fib_multipath_lock
);
1164 nh
->nh_flags
&= ~RTNH_F_DEAD
;
1165 spin_unlock_bh(&fib_multipath_lock
);
1166 } endfor_nexthops(fi
)
1169 fi
->fib_flags
&= ~RTNH_F_DEAD
;
1178 The algorithm is suboptimal, but it provides really
1179 fair weighted route distribution.
1182 void fib_select_multipath(const struct flowi
*flp
, struct fib_result
*res
)
1184 struct fib_info
*fi
= res
->fi
;
1187 spin_lock_bh(&fib_multipath_lock
);
1188 if (fi
->fib_power
<= 0) {
1190 change_nexthops(fi
) {
1191 if (!(nh
->nh_flags
&RTNH_F_DEAD
)) {
1192 power
+= nh
->nh_weight
;
1193 nh
->nh_power
= nh
->nh_weight
;
1195 } endfor_nexthops(fi
);
1196 fi
->fib_power
= power
;
1198 spin_unlock_bh(&fib_multipath_lock
);
1199 /* Race condition: route has just become dead. */
1206 /* w should be random number [0..fi->fib_power-1],
1207 it is pretty bad approximation.
1210 w
= jiffies
% fi
->fib_power
;
1212 change_nexthops(fi
) {
1213 if (!(nh
->nh_flags
&RTNH_F_DEAD
) && nh
->nh_power
) {
1214 if ((w
-= nh
->nh_power
) <= 0) {
1217 res
->nh_sel
= nhsel
;
1218 spin_unlock_bh(&fib_multipath_lock
);
1222 } endfor_nexthops(fi
);
1224 /* Race condition: route has just become dead. */
1226 spin_unlock_bh(&fib_multipath_lock
);