2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 Forwarding Information Base: semantics.
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <asm/uaccess.h>
17 #include <asm/system.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/jiffies.h>
23 #include <linux/string.h>
24 #include <linux/socket.h>
25 #include <linux/sockios.h>
26 #include <linux/errno.h>
28 #include <linux/inet.h>
29 #include <linux/inetdevice.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/proc_fs.h>
33 #include <linux/skbuff.h>
34 #include <linux/init.h>
35 #include <linux/slab.h>
39 #include <net/protocol.h>
40 #include <net/route.h>
43 #include <net/ip_fib.h>
44 #include <net/netlink.h>
45 #include <net/nexthop.h>
47 #include "fib_lookup.h"
49 static DEFINE_SPINLOCK(fib_info_lock
);
50 static struct hlist_head
*fib_info_hash
;
51 static struct hlist_head
*fib_info_laddrhash
;
52 static unsigned int fib_hash_size
;
53 static unsigned int fib_info_cnt
;
55 #define DEVINDEX_HASHBITS 8
56 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
57 static struct hlist_head fib_info_devhash
[DEVINDEX_HASHSIZE
];
59 #ifdef CONFIG_IP_ROUTE_MULTIPATH
61 static DEFINE_SPINLOCK(fib_multipath_lock
);
63 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
64 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
66 #define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
67 for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
69 #else /* CONFIG_IP_ROUTE_MULTIPATH */
71 /* Hope, that gcc will optimize it to get rid of dummy loop */
73 #define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
74 for (nhsel=0; nhsel < 1; nhsel++)
76 #define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
77 for (nhsel=0; nhsel < 1; nhsel++)
79 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
81 #define endfor_nexthops(fi) }
88 } fib_props
[RTN_MAX
+ 1] = {
91 .scope
= RT_SCOPE_NOWHERE
,
95 .scope
= RT_SCOPE_UNIVERSE
,
99 .scope
= RT_SCOPE_HOST
,
103 .scope
= RT_SCOPE_LINK
,
104 }, /* RTN_BROADCAST */
107 .scope
= RT_SCOPE_LINK
,
111 .scope
= RT_SCOPE_UNIVERSE
,
112 }, /* RTN_MULTICAST */
115 .scope
= RT_SCOPE_UNIVERSE
,
116 }, /* RTN_BLACKHOLE */
118 .error
= -EHOSTUNREACH
,
119 .scope
= RT_SCOPE_UNIVERSE
,
120 }, /* RTN_UNREACHABLE */
123 .scope
= RT_SCOPE_UNIVERSE
,
124 }, /* RTN_PROHIBIT */
127 .scope
= RT_SCOPE_UNIVERSE
,
131 .scope
= RT_SCOPE_NOWHERE
,
135 .scope
= RT_SCOPE_NOWHERE
,
136 }, /* RTN_XRESOLVE */
140 /* Release a nexthop info record */
142 void free_fib_info(struct fib_info
*fi
)
144 if (fi
->fib_dead
== 0) {
145 printk(KERN_WARNING
"Freeing alive fib_info %p\n", fi
);
148 change_nexthops(fi
) {
149 if (nexthop_nh
->nh_dev
)
150 dev_put(nexthop_nh
->nh_dev
);
151 nexthop_nh
->nh_dev
= NULL
;
152 } endfor_nexthops(fi
);
154 release_net(fi
->fib_net
);
158 void fib_release_info(struct fib_info
*fi
)
160 spin_lock_bh(&fib_info_lock
);
161 if (fi
&& --fi
->fib_treeref
== 0) {
162 hlist_del(&fi
->fib_hash
);
164 hlist_del(&fi
->fib_lhash
);
165 change_nexthops(fi
) {
166 if (!nexthop_nh
->nh_dev
)
168 hlist_del(&nexthop_nh
->nh_hash
);
169 } endfor_nexthops(fi
)
173 spin_unlock_bh(&fib_info_lock
);
176 static __inline__
int nh_comp(const struct fib_info
*fi
, const struct fib_info
*ofi
)
178 const struct fib_nh
*onh
= ofi
->fib_nh
;
181 if (nh
->nh_oif
!= onh
->nh_oif
||
182 nh
->nh_gw
!= onh
->nh_gw
||
183 nh
->nh_scope
!= onh
->nh_scope
||
184 #ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh
->nh_weight
!= onh
->nh_weight
||
187 #ifdef CONFIG_NET_CLS_ROUTE
188 nh
->nh_tclassid
!= onh
->nh_tclassid
||
190 ((nh
->nh_flags
^onh
->nh_flags
)&~RTNH_F_DEAD
))
193 } endfor_nexthops(fi
);
197 static inline unsigned int fib_devindex_hashfn(unsigned int val
)
199 unsigned int mask
= DEVINDEX_HASHSIZE
- 1;
202 (val
>> DEVINDEX_HASHBITS
) ^
203 (val
>> (DEVINDEX_HASHBITS
* 2))) & mask
;
206 static inline unsigned int fib_info_hashfn(const struct fib_info
*fi
)
208 unsigned int mask
= (fib_hash_size
- 1);
209 unsigned int val
= fi
->fib_nhs
;
211 val
^= fi
->fib_protocol
;
212 val
^= (__force u32
)fi
->fib_prefsrc
;
213 val
^= fi
->fib_priority
;
215 val
^= fib_devindex_hashfn(nh
->nh_oif
);
216 } endfor_nexthops(fi
)
218 return (val
^ (val
>> 7) ^ (val
>> 12)) & mask
;
221 static struct fib_info
*fib_find_info(const struct fib_info
*nfi
)
223 struct hlist_head
*head
;
224 struct hlist_node
*node
;
228 hash
= fib_info_hashfn(nfi
);
229 head
= &fib_info_hash
[hash
];
231 hlist_for_each_entry(fi
, node
, head
, fib_hash
) {
232 if (!net_eq(fi
->fib_net
, nfi
->fib_net
))
234 if (fi
->fib_nhs
!= nfi
->fib_nhs
)
236 if (nfi
->fib_protocol
== fi
->fib_protocol
&&
237 nfi
->fib_prefsrc
== fi
->fib_prefsrc
&&
238 nfi
->fib_priority
== fi
->fib_priority
&&
239 memcmp(nfi
->fib_metrics
, fi
->fib_metrics
,
240 sizeof(fi
->fib_metrics
)) == 0 &&
241 ((nfi
->fib_flags
^fi
->fib_flags
)&~RTNH_F_DEAD
) == 0 &&
242 (nfi
->fib_nhs
== 0 || nh_comp(fi
, nfi
) == 0))
249 /* Check, that the gateway is already configured.
250 Used only by redirect accept routine.
253 int ip_fib_check_default(__be32 gw
, struct net_device
*dev
)
255 struct hlist_head
*head
;
256 struct hlist_node
*node
;
260 spin_lock(&fib_info_lock
);
262 hash
= fib_devindex_hashfn(dev
->ifindex
);
263 head
= &fib_info_devhash
[hash
];
264 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
265 if (nh
->nh_dev
== dev
&&
267 !(nh
->nh_flags
&RTNH_F_DEAD
)) {
268 spin_unlock(&fib_info_lock
);
273 spin_unlock(&fib_info_lock
);
278 static inline size_t fib_nlmsg_size(struct fib_info
*fi
)
280 size_t payload
= NLMSG_ALIGN(sizeof(struct rtmsg
))
281 + nla_total_size(4) /* RTA_TABLE */
282 + nla_total_size(4) /* RTA_DST */
283 + nla_total_size(4) /* RTA_PRIORITY */
284 + nla_total_size(4); /* RTA_PREFSRC */
286 /* space for nested metrics */
287 payload
+= nla_total_size((RTAX_MAX
* nla_total_size(4)));
290 /* Also handles the special case fib_nhs == 1 */
292 /* each nexthop is packed in an attribute */
293 size_t nhsize
= nla_total_size(sizeof(struct rtnexthop
));
295 /* may contain flow and gateway attribute */
296 nhsize
+= 2 * nla_total_size(4);
298 /* all nexthops are packed in a nested attribute */
299 payload
+= nla_total_size(fi
->fib_nhs
* nhsize
);
305 void rtmsg_fib(int event
, __be32 key
, struct fib_alias
*fa
,
306 int dst_len
, u32 tb_id
, struct nl_info
*info
,
307 unsigned int nlm_flags
)
310 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
313 skb
= nlmsg_new(fib_nlmsg_size(fa
->fa_info
), GFP_KERNEL
);
317 err
= fib_dump_info(skb
, info
->pid
, seq
, event
, tb_id
,
318 fa
->fa_type
, fa
->fa_scope
, key
, dst_len
,
319 fa
->fa_tos
, fa
->fa_info
, nlm_flags
);
321 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
322 WARN_ON(err
== -EMSGSIZE
);
326 rtnl_notify(skb
, info
->nl_net
, info
->pid
, RTNLGRP_IPV4_ROUTE
,
327 info
->nlh
, GFP_KERNEL
);
331 rtnl_set_sk_err(info
->nl_net
, RTNLGRP_IPV4_ROUTE
, err
);
334 /* Return the first fib alias matching TOS with
335 * priority less than or equal to PRIO.
337 struct fib_alias
*fib_find_alias(struct list_head
*fah
, u8 tos
, u32 prio
)
340 struct fib_alias
*fa
;
341 list_for_each_entry(fa
, fah
, fa_list
) {
342 if (fa
->fa_tos
> tos
)
344 if (fa
->fa_info
->fib_priority
>= prio
||
352 int fib_detect_death(struct fib_info
*fi
, int order
,
353 struct fib_info
**last_resort
, int *last_idx
, int dflt
)
356 int state
= NUD_NONE
;
358 n
= neigh_lookup(&arp_tbl
, &fi
->fib_nh
[0].nh_gw
, fi
->fib_dev
);
360 state
= n
->nud_state
;
363 if (state
== NUD_REACHABLE
)
365 if ((state
&NUD_VALID
) && order
!= dflt
)
367 if ((state
&NUD_VALID
) ||
368 (*last_idx
<0 && order
> dflt
)) {
375 #ifdef CONFIG_IP_ROUTE_MULTIPATH
377 static int fib_count_nexthops(struct rtnexthop
*rtnh
, int remaining
)
381 while (rtnh_ok(rtnh
, remaining
)) {
383 rtnh
= rtnh_next(rtnh
, &remaining
);
386 /* leftover implies invalid nexthop configuration, discard it */
387 return remaining
> 0 ? 0 : nhs
;
390 static int fib_get_nhs(struct fib_info
*fi
, struct rtnexthop
*rtnh
,
391 int remaining
, struct fib_config
*cfg
)
393 change_nexthops(fi
) {
396 if (!rtnh_ok(rtnh
, remaining
))
399 nexthop_nh
->nh_flags
=
400 (cfg
->fc_flags
& ~0xFF) | rtnh
->rtnh_flags
;
401 nexthop_nh
->nh_oif
= rtnh
->rtnh_ifindex
;
402 nexthop_nh
->nh_weight
= rtnh
->rtnh_hops
+ 1;
404 attrlen
= rtnh_attrlen(rtnh
);
406 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
408 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
409 nexthop_nh
->nh_gw
= nla
? nla_get_be32(nla
) : 0;
410 #ifdef CONFIG_NET_CLS_ROUTE
411 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
412 nexthop_nh
->nh_tclassid
= nla
? nla_get_u32(nla
) : 0;
416 rtnh
= rtnh_next(rtnh
, &remaining
);
417 } endfor_nexthops(fi
);
424 int fib_nh_match(struct fib_config
*cfg
, struct fib_info
*fi
)
426 #ifdef CONFIG_IP_ROUTE_MULTIPATH
427 struct rtnexthop
*rtnh
;
431 if (cfg
->fc_priority
&& cfg
->fc_priority
!= fi
->fib_priority
)
434 if (cfg
->fc_oif
|| cfg
->fc_gw
) {
435 if ((!cfg
->fc_oif
|| cfg
->fc_oif
== fi
->fib_nh
->nh_oif
) &&
436 (!cfg
->fc_gw
|| cfg
->fc_gw
== fi
->fib_nh
->nh_gw
))
441 #ifdef CONFIG_IP_ROUTE_MULTIPATH
442 if (cfg
->fc_mp
== NULL
)
446 remaining
= cfg
->fc_mp_len
;
451 if (!rtnh_ok(rtnh
, remaining
))
454 if (rtnh
->rtnh_ifindex
&& rtnh
->rtnh_ifindex
!= nh
->nh_oif
)
457 attrlen
= rtnh_attrlen(rtnh
);
459 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
461 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
462 if (nla
&& nla_get_be32(nla
) != nh
->nh_gw
)
464 #ifdef CONFIG_NET_CLS_ROUTE
465 nla
= nla_find(attrs
, attrlen
, RTA_FLOW
);
466 if (nla
&& nla_get_u32(nla
) != nh
->nh_tclassid
)
471 rtnh
= rtnh_next(rtnh
, &remaining
);
472 } endfor_nexthops(fi
);
482 Semantics of nexthop is very messy by historical reasons.
483 We have to take into account, that:
484 a) gateway can be actually local interface address,
485 so that gatewayed route is direct.
486 b) gateway must be on-link address, possibly
487 described not by an ifaddr, but also by a direct route.
488 c) If both gateway and interface are specified, they should not
490 d) If we use tunnel routes, gateway could be not on-link.
492 Attempt to reconcile all of these (alas, self-contradictory) conditions
493 results in pretty ugly and hairy code with obscure logic.
495 I chose to generalized it instead, so that the size
496 of code does not increase practically, but it becomes
498 Every prefix is assigned a "scope" value: "host" is local address,
499 "link" is direct route,
500 [ ... "site" ... "interior" ... ]
501 and "universe" is true gateway route with global meaning.
503 Every prefix refers to a set of "nexthop"s (gw, oif),
504 where gw must have narrower scope. This recursion stops
505 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
506 which means that gw is forced to be on link.
508 Code is still hairy, but now it is apparently logically
509 consistent and very flexible. F.e. as by-product it allows
510 to co-exists in peace independent exterior and interior
513 Normally it looks as following.
515 {universe prefix} -> (gw, oif) [scope link]
517 |-> {link prefix} -> (gw, oif) [scope local]
519 |-> {local prefix} (terminal node)
522 static int fib_check_nh(struct fib_config
*cfg
, struct fib_info
*fi
,
528 net
= cfg
->fc_nlinfo
.nl_net
;
530 struct fib_result res
;
532 if (nh
->nh_flags
&RTNH_F_ONLINK
) {
533 struct net_device
*dev
;
535 if (cfg
->fc_scope
>= RT_SCOPE_LINK
)
537 if (inet_addr_type(net
, nh
->nh_gw
) != RTN_UNICAST
)
539 if ((dev
= __dev_get_by_index(net
, nh
->nh_oif
)) == NULL
)
541 if (!(dev
->flags
&IFF_UP
))
545 nh
->nh_scope
= RT_SCOPE_LINK
;
553 .scope
= cfg
->fc_scope
+ 1,
559 /* It is not necessary, but requires a bit of thinking */
560 if (fl
.fl4_scope
< RT_SCOPE_LINK
)
561 fl
.fl4_scope
= RT_SCOPE_LINK
;
562 if ((err
= fib_lookup(net
, &fl
, &res
)) != 0)
566 if (res
.type
!= RTN_UNICAST
&& res
.type
!= RTN_LOCAL
)
568 nh
->nh_scope
= res
.scope
;
569 nh
->nh_oif
= FIB_RES_OIF(res
);
570 if ((nh
->nh_dev
= FIB_RES_DEV(res
)) == NULL
)
572 dev_hold(nh
->nh_dev
);
574 if (!(nh
->nh_dev
->flags
& IFF_UP
))
581 struct in_device
*in_dev
;
583 if (nh
->nh_flags
&(RTNH_F_PERVASIVE
|RTNH_F_ONLINK
))
586 in_dev
= inetdev_by_index(net
, nh
->nh_oif
);
589 if (!(in_dev
->dev
->flags
&IFF_UP
)) {
593 nh
->nh_dev
= in_dev
->dev
;
594 dev_hold(nh
->nh_dev
);
595 nh
->nh_scope
= RT_SCOPE_HOST
;
601 static inline unsigned int fib_laddr_hashfn(__be32 val
)
603 unsigned int mask
= (fib_hash_size
- 1);
605 return ((__force u32
)val
^ ((__force u32
)val
>> 7) ^ ((__force u32
)val
>> 14)) & mask
;
608 static struct hlist_head
*fib_hash_alloc(int bytes
)
610 if (bytes
<= PAGE_SIZE
)
611 return kzalloc(bytes
, GFP_KERNEL
);
613 return (struct hlist_head
*)
614 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
, get_order(bytes
));
617 static void fib_hash_free(struct hlist_head
*hash
, int bytes
)
622 if (bytes
<= PAGE_SIZE
)
625 free_pages((unsigned long) hash
, get_order(bytes
));
628 static void fib_hash_move(struct hlist_head
*new_info_hash
,
629 struct hlist_head
*new_laddrhash
,
630 unsigned int new_size
)
632 struct hlist_head
*old_info_hash
, *old_laddrhash
;
633 unsigned int old_size
= fib_hash_size
;
634 unsigned int i
, bytes
;
636 spin_lock_bh(&fib_info_lock
);
637 old_info_hash
= fib_info_hash
;
638 old_laddrhash
= fib_info_laddrhash
;
639 fib_hash_size
= new_size
;
641 for (i
= 0; i
< old_size
; i
++) {
642 struct hlist_head
*head
= &fib_info_hash
[i
];
643 struct hlist_node
*node
, *n
;
646 hlist_for_each_entry_safe(fi
, node
, n
, head
, fib_hash
) {
647 struct hlist_head
*dest
;
648 unsigned int new_hash
;
650 hlist_del(&fi
->fib_hash
);
652 new_hash
= fib_info_hashfn(fi
);
653 dest
= &new_info_hash
[new_hash
];
654 hlist_add_head(&fi
->fib_hash
, dest
);
657 fib_info_hash
= new_info_hash
;
659 for (i
= 0; i
< old_size
; i
++) {
660 struct hlist_head
*lhead
= &fib_info_laddrhash
[i
];
661 struct hlist_node
*node
, *n
;
664 hlist_for_each_entry_safe(fi
, node
, n
, lhead
, fib_lhash
) {
665 struct hlist_head
*ldest
;
666 unsigned int new_hash
;
668 hlist_del(&fi
->fib_lhash
);
670 new_hash
= fib_laddr_hashfn(fi
->fib_prefsrc
);
671 ldest
= &new_laddrhash
[new_hash
];
672 hlist_add_head(&fi
->fib_lhash
, ldest
);
675 fib_info_laddrhash
= new_laddrhash
;
677 spin_unlock_bh(&fib_info_lock
);
679 bytes
= old_size
* sizeof(struct hlist_head
*);
680 fib_hash_free(old_info_hash
, bytes
);
681 fib_hash_free(old_laddrhash
, bytes
);
684 struct fib_info
*fib_create_info(struct fib_config
*cfg
)
687 struct fib_info
*fi
= NULL
;
688 struct fib_info
*ofi
;
690 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
692 /* Fast check to catch the most weird cases */
693 if (fib_props
[cfg
->fc_type
].scope
> cfg
->fc_scope
)
696 #ifdef CONFIG_IP_ROUTE_MULTIPATH
698 nhs
= fib_count_nexthops(cfg
->fc_mp
, cfg
->fc_mp_len
);
705 if (fib_info_cnt
>= fib_hash_size
) {
706 unsigned int new_size
= fib_hash_size
<< 1;
707 struct hlist_head
*new_info_hash
;
708 struct hlist_head
*new_laddrhash
;
713 bytes
= new_size
* sizeof(struct hlist_head
*);
714 new_info_hash
= fib_hash_alloc(bytes
);
715 new_laddrhash
= fib_hash_alloc(bytes
);
716 if (!new_info_hash
|| !new_laddrhash
) {
717 fib_hash_free(new_info_hash
, bytes
);
718 fib_hash_free(new_laddrhash
, bytes
);
720 fib_hash_move(new_info_hash
, new_laddrhash
, new_size
);
726 fi
= kzalloc(sizeof(*fi
)+nhs
*sizeof(struct fib_nh
), GFP_KERNEL
);
731 fi
->fib_net
= hold_net(net
);
732 fi
->fib_protocol
= cfg
->fc_protocol
;
733 fi
->fib_flags
= cfg
->fc_flags
;
734 fi
->fib_priority
= cfg
->fc_priority
;
735 fi
->fib_prefsrc
= cfg
->fc_prefsrc
;
738 change_nexthops(fi
) {
739 nexthop_nh
->nh_parent
= fi
;
740 } endfor_nexthops(fi
)
746 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
747 int type
= nla_type(nla
);
752 fi
->fib_metrics
[type
- 1] = nla_get_u32(nla
);
758 #ifdef CONFIG_IP_ROUTE_MULTIPATH
759 err
= fib_get_nhs(fi
, cfg
->fc_mp
, cfg
->fc_mp_len
, cfg
);
762 if (cfg
->fc_oif
&& fi
->fib_nh
->nh_oif
!= cfg
->fc_oif
)
764 if (cfg
->fc_gw
&& fi
->fib_nh
->nh_gw
!= cfg
->fc_gw
)
766 #ifdef CONFIG_NET_CLS_ROUTE
767 if (cfg
->fc_flow
&& fi
->fib_nh
->nh_tclassid
!= cfg
->fc_flow
)
774 struct fib_nh
*nh
= fi
->fib_nh
;
776 nh
->nh_oif
= cfg
->fc_oif
;
777 nh
->nh_gw
= cfg
->fc_gw
;
778 nh
->nh_flags
= cfg
->fc_flags
;
779 #ifdef CONFIG_NET_CLS_ROUTE
780 nh
->nh_tclassid
= cfg
->fc_flow
;
782 #ifdef CONFIG_IP_ROUTE_MULTIPATH
787 if (fib_props
[cfg
->fc_type
].error
) {
788 if (cfg
->fc_gw
|| cfg
->fc_oif
|| cfg
->fc_mp
)
793 if (cfg
->fc_scope
> RT_SCOPE_HOST
)
796 if (cfg
->fc_scope
== RT_SCOPE_HOST
) {
797 struct fib_nh
*nh
= fi
->fib_nh
;
799 /* Local address is added. */
800 if (nhs
!= 1 || nh
->nh_gw
)
802 nh
->nh_scope
= RT_SCOPE_NOWHERE
;
803 nh
->nh_dev
= dev_get_by_index(net
, fi
->fib_nh
->nh_oif
);
805 if (nh
->nh_dev
== NULL
)
808 change_nexthops(fi
) {
809 if ((err
= fib_check_nh(cfg
, fi
, nexthop_nh
)) != 0)
811 } endfor_nexthops(fi
)
814 if (fi
->fib_prefsrc
) {
815 if (cfg
->fc_type
!= RTN_LOCAL
|| !cfg
->fc_dst
||
816 fi
->fib_prefsrc
!= cfg
->fc_dst
)
817 if (inet_addr_type(net
, fi
->fib_prefsrc
) != RTN_LOCAL
)
822 if ((ofi
= fib_find_info(fi
)) != NULL
) {
830 atomic_inc(&fi
->fib_clntref
);
831 spin_lock_bh(&fib_info_lock
);
832 hlist_add_head(&fi
->fib_hash
,
833 &fib_info_hash
[fib_info_hashfn(fi
)]);
834 if (fi
->fib_prefsrc
) {
835 struct hlist_head
*head
;
837 head
= &fib_info_laddrhash
[fib_laddr_hashfn(fi
->fib_prefsrc
)];
838 hlist_add_head(&fi
->fib_lhash
, head
);
840 change_nexthops(fi
) {
841 struct hlist_head
*head
;
844 if (!nexthop_nh
->nh_dev
)
846 hash
= fib_devindex_hashfn(nexthop_nh
->nh_dev
->ifindex
);
847 head
= &fib_info_devhash
[hash
];
848 hlist_add_head(&nexthop_nh
->nh_hash
, head
);
849 } endfor_nexthops(fi
)
850 spin_unlock_bh(&fib_info_lock
);
865 /* Note! fib_semantic_match intentionally uses RCU list functions. */
866 int fib_semantic_match(struct list_head
*head
, const struct flowi
*flp
,
867 struct fib_result
*res
, int prefixlen
)
869 struct fib_alias
*fa
;
872 list_for_each_entry_rcu(fa
, head
, fa_list
) {
876 fa
->fa_tos
!= flp
->fl4_tos
)
879 if (fa
->fa_scope
< flp
->fl4_scope
)
882 fa
->fa_state
|= FA_S_ACCESSED
;
884 err
= fib_props
[fa
->fa_type
].error
;
886 struct fib_info
*fi
= fa
->fa_info
;
888 if (fi
->fib_flags
& RTNH_F_DEAD
)
891 switch (fa
->fa_type
) {
898 if (nh
->nh_flags
&RTNH_F_DEAD
)
900 if (!flp
->oif
|| flp
->oif
== nh
->nh_oif
)
903 #ifdef CONFIG_IP_ROUTE_MULTIPATH
904 if (nhsel
< fi
->fib_nhs
) {
917 printk(KERN_WARNING
"fib_semantic_match bad type %#x\n",
927 res
->prefixlen
= prefixlen
;
928 res
->nh_sel
= nh_sel
;
929 res
->type
= fa
->fa_type
;
930 res
->scope
= fa
->fa_scope
;
931 res
->fi
= fa
->fa_info
;
932 atomic_inc(&res
->fi
->fib_clntref
);
936 /* Find appropriate source address to this destination */
938 __be32
__fib_res_prefsrc(struct fib_result
*res
)
940 return inet_select_addr(FIB_RES_DEV(*res
), FIB_RES_GW(*res
), res
->scope
);
943 int fib_dump_info(struct sk_buff
*skb
, u32 pid
, u32 seq
, int event
,
944 u32 tb_id
, u8 type
, u8 scope
, __be32 dst
, int dst_len
, u8 tos
,
945 struct fib_info
*fi
, unsigned int flags
)
947 struct nlmsghdr
*nlh
;
950 nlh
= nlmsg_put(skb
, pid
, seq
, event
, sizeof(*rtm
), flags
);
954 rtm
= nlmsg_data(nlh
);
955 rtm
->rtm_family
= AF_INET
;
956 rtm
->rtm_dst_len
= dst_len
;
957 rtm
->rtm_src_len
= 0;
960 rtm
->rtm_table
= tb_id
;
962 rtm
->rtm_table
= RT_TABLE_COMPAT
;
963 NLA_PUT_U32(skb
, RTA_TABLE
, tb_id
);
964 rtm
->rtm_type
= type
;
965 rtm
->rtm_flags
= fi
->fib_flags
;
966 rtm
->rtm_scope
= scope
;
967 rtm
->rtm_protocol
= fi
->fib_protocol
;
969 if (rtm
->rtm_dst_len
)
970 NLA_PUT_BE32(skb
, RTA_DST
, dst
);
972 if (fi
->fib_priority
)
973 NLA_PUT_U32(skb
, RTA_PRIORITY
, fi
->fib_priority
);
975 if (rtnetlink_put_metrics(skb
, fi
->fib_metrics
) < 0)
976 goto nla_put_failure
;
979 NLA_PUT_BE32(skb
, RTA_PREFSRC
, fi
->fib_prefsrc
);
981 if (fi
->fib_nhs
== 1) {
982 if (fi
->fib_nh
->nh_gw
)
983 NLA_PUT_BE32(skb
, RTA_GATEWAY
, fi
->fib_nh
->nh_gw
);
985 if (fi
->fib_nh
->nh_oif
)
986 NLA_PUT_U32(skb
, RTA_OIF
, fi
->fib_nh
->nh_oif
);
987 #ifdef CONFIG_NET_CLS_ROUTE
988 if (fi
->fib_nh
[0].nh_tclassid
)
989 NLA_PUT_U32(skb
, RTA_FLOW
, fi
->fib_nh
[0].nh_tclassid
);
992 #ifdef CONFIG_IP_ROUTE_MULTIPATH
993 if (fi
->fib_nhs
> 1) {
994 struct rtnexthop
*rtnh
;
997 mp
= nla_nest_start(skb
, RTA_MULTIPATH
);
999 goto nla_put_failure
;
1002 rtnh
= nla_reserve_nohdr(skb
, sizeof(*rtnh
));
1004 goto nla_put_failure
;
1006 rtnh
->rtnh_flags
= nh
->nh_flags
& 0xFF;
1007 rtnh
->rtnh_hops
= nh
->nh_weight
- 1;
1008 rtnh
->rtnh_ifindex
= nh
->nh_oif
;
1011 NLA_PUT_BE32(skb
, RTA_GATEWAY
, nh
->nh_gw
);
1012 #ifdef CONFIG_NET_CLS_ROUTE
1013 if (nh
->nh_tclassid
)
1014 NLA_PUT_U32(skb
, RTA_FLOW
, nh
->nh_tclassid
);
1016 /* length of rtnetlink header + attributes */
1017 rtnh
->rtnh_len
= nlmsg_get_pos(skb
) - (void *) rtnh
;
1018 } endfor_nexthops(fi
);
1020 nla_nest_end(skb
, mp
);
1023 return nlmsg_end(skb
, nlh
);
1026 nlmsg_cancel(skb
, nlh
);
1032 - local address disappeared -> we must delete all the entries
1034 - device went down -> we must shutdown all nexthops going via it.
1036 int fib_sync_down_addr(struct net
*net
, __be32 local
)
1039 unsigned int hash
= fib_laddr_hashfn(local
);
1040 struct hlist_head
*head
= &fib_info_laddrhash
[hash
];
1041 struct hlist_node
*node
;
1042 struct fib_info
*fi
;
1044 if (fib_info_laddrhash
== NULL
|| local
== 0)
1047 hlist_for_each_entry(fi
, node
, head
, fib_lhash
) {
1048 if (!net_eq(fi
->fib_net
, net
))
1050 if (fi
->fib_prefsrc
== local
) {
1051 fi
->fib_flags
|= RTNH_F_DEAD
;
1058 int fib_sync_down_dev(struct net_device
*dev
, int force
)
1061 int scope
= RT_SCOPE_NOWHERE
;
1062 struct fib_info
*prev_fi
= NULL
;
1063 unsigned int hash
= fib_devindex_hashfn(dev
->ifindex
);
1064 struct hlist_head
*head
= &fib_info_devhash
[hash
];
1065 struct hlist_node
*node
;
1071 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1072 struct fib_info
*fi
= nh
->nh_parent
;
1075 BUG_ON(!fi
->fib_nhs
);
1076 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1080 change_nexthops(fi
) {
1081 if (nexthop_nh
->nh_flags
&RTNH_F_DEAD
)
1083 else if (nexthop_nh
->nh_dev
== dev
&&
1084 nexthop_nh
->nh_scope
!= scope
) {
1085 nexthop_nh
->nh_flags
|= RTNH_F_DEAD
;
1086 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1087 spin_lock_bh(&fib_multipath_lock
);
1088 fi
->fib_power
-= nexthop_nh
->nh_power
;
1089 nexthop_nh
->nh_power
= 0;
1090 spin_unlock_bh(&fib_multipath_lock
);
1094 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1095 if (force
> 1 && nexthop_nh
->nh_dev
== dev
) {
1100 } endfor_nexthops(fi
)
1101 if (dead
== fi
->fib_nhs
) {
1102 fi
->fib_flags
|= RTNH_F_DEAD
;
1110 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1113 Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes.
1117 int fib_sync_up(struct net_device
*dev
)
1119 struct fib_info
*prev_fi
;
1121 struct hlist_head
*head
;
1122 struct hlist_node
*node
;
1126 if (!(dev
->flags
&IFF_UP
))
1130 hash
= fib_devindex_hashfn(dev
->ifindex
);
1131 head
= &fib_info_devhash
[hash
];
1134 hlist_for_each_entry(nh
, node
, head
, nh_hash
) {
1135 struct fib_info
*fi
= nh
->nh_parent
;
1138 BUG_ON(!fi
->fib_nhs
);
1139 if (nh
->nh_dev
!= dev
|| fi
== prev_fi
)
1144 change_nexthops(fi
) {
1145 if (!(nexthop_nh
->nh_flags
&RTNH_F_DEAD
)) {
1149 if (nexthop_nh
->nh_dev
== NULL
||
1150 !(nexthop_nh
->nh_dev
->flags
&IFF_UP
))
1152 if (nexthop_nh
->nh_dev
!= dev
||
1153 !__in_dev_get_rtnl(dev
))
1156 spin_lock_bh(&fib_multipath_lock
);
1157 nexthop_nh
->nh_power
= 0;
1158 nexthop_nh
->nh_flags
&= ~RTNH_F_DEAD
;
1159 spin_unlock_bh(&fib_multipath_lock
);
1160 } endfor_nexthops(fi
)
1163 fi
->fib_flags
&= ~RTNH_F_DEAD
;
1172 The algorithm is suboptimal, but it provides really
1173 fair weighted route distribution.
1176 void fib_select_multipath(const struct flowi
*flp
, struct fib_result
*res
)
1178 struct fib_info
*fi
= res
->fi
;
1181 spin_lock_bh(&fib_multipath_lock
);
1182 if (fi
->fib_power
<= 0) {
1184 change_nexthops(fi
) {
1185 if (!(nexthop_nh
->nh_flags
&RTNH_F_DEAD
)) {
1186 power
+= nexthop_nh
->nh_weight
;
1187 nexthop_nh
->nh_power
= nexthop_nh
->nh_weight
;
1189 } endfor_nexthops(fi
);
1190 fi
->fib_power
= power
;
1192 spin_unlock_bh(&fib_multipath_lock
);
1193 /* Race condition: route has just become dead. */
1200 /* w should be random number [0..fi->fib_power-1],
1201 it is pretty bad approximation.
1204 w
= jiffies
% fi
->fib_power
;
1206 change_nexthops(fi
) {
1207 if (!(nexthop_nh
->nh_flags
&RTNH_F_DEAD
) &&
1208 nexthop_nh
->nh_power
) {
1209 if ((w
-= nexthop_nh
->nh_power
) <= 0) {
1210 nexthop_nh
->nh_power
--;
1212 res
->nh_sel
= nhsel
;
1213 spin_unlock_bh(&fib_multipath_lock
);
1217 } endfor_nexthops(fi
);
1219 /* Race condition: route has just become dead. */
1221 spin_unlock_bh(&fib_multipath_lock
);