2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/route.h>
34 #include <linux/netdevice.h>
35 #include <linux/in6.h>
36 #include <linux/init.h>
37 #include <linux/netlink.h>
38 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
51 #include <linux/rtnetlink.h>
53 #include <asm/uaccess.h>
56 #include <linux/sysctl.h>
59 /* Set to 3 to get tracing. */
63 #define RDBG(x) printk x
64 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
67 #define RT6_TRACE(x...) do { ; } while (0)
71 static int ip6_rt_max_size
= 4096;
72 static int ip6_rt_gc_min_interval
= 5*HZ
;
73 static int ip6_rt_gc_timeout
= 60*HZ
;
74 int ip6_rt_gc_interval
= 30*HZ
;
75 static int ip6_rt_gc_elasticity
= 9;
76 static int ip6_rt_mtu_expires
= 10*60*HZ
;
77 static int ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
79 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
80 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
81 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
82 static int ip6_dst_gc(void);
84 static int ip6_pkt_discard(struct sk_buff
*skb
);
85 static void ip6_link_failure(struct sk_buff
*skb
);
86 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
88 static struct dst_ops ip6_dst_ops
= {
90 .protocol
= __constant_htons(ETH_P_IPV6
),
93 .check
= ip6_dst_check
,
94 .negative_advice
= ip6_negative_advice
,
95 .link_failure
= ip6_link_failure
,
96 .update_pmtu
= ip6_rt_update_pmtu
,
97 .entry_size
= sizeof(struct rt6_info
),
100 struct rt6_info ip6_null_entry
= {
103 .__refcnt
= ATOMIC_INIT(1),
105 .dev
= &loopback_dev
,
107 .error
= -ENETUNREACH
,
108 .input
= ip6_pkt_discard
,
109 .output
= ip6_pkt_discard
,
111 .path
= (struct dst_entry
*)&ip6_null_entry
,
114 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
115 .rt6i_metric
= ~(u32
) 0,
116 .rt6i_hoplimit
= 255,
117 .rt6i_ref
= ATOMIC_INIT(1),
120 struct fib6_node ip6_routing_table
= {
121 NULL
, NULL
, NULL
, NULL
,
123 0, RTN_ROOT
|RTN_TL_ROOT
|RTN_RTINFO
, 0
126 /* Protects all the ip6 fib */
128 rwlock_t rt6_lock
= RW_LOCK_UNLOCKED
;
132 * Route lookup. Any rt6_lock is implied.
135 static __inline__
struct rt6_info
*rt6_device_match(struct rt6_info
*rt
,
139 struct rt6_info
*local
= NULL
;
140 struct rt6_info
*sprt
;
143 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
144 struct net_device
*dev
= sprt
->rt6i_dev
;
145 if (dev
->ifindex
== oif
)
147 if (dev
->flags
&IFF_LOOPBACK
)
155 return &ip6_null_entry
;
161 * pointer to the last default router chosen. BH is disabled locally.
163 static struct rt6_info
*rt6_dflt_pointer
= NULL
;
164 static spinlock_t rt6_dflt_lock
= SPIN_LOCK_UNLOCKED
;
166 /* Default Router Selection (RFC 2461 6.3.6) */
167 static struct rt6_info
*rt6_best_dflt(struct rt6_info
*rt
, int oif
)
169 struct rt6_info
*match
= NULL
;
170 struct rt6_info
*sprt
;
173 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
174 struct neighbour
*neigh
;
179 sprt
->rt6i_dev
->ifindex
== oif
))
182 if (sprt
== rt6_dflt_pointer
)
185 if ((neigh
= sprt
->rt6i_nexthop
) != NULL
) {
186 read_lock_bh(&neigh
->lock
);
187 switch (neigh
->nud_state
) {
205 read_unlock_bh(&neigh
->lock
);
208 read_unlock_bh(&neigh
->lock
);
213 if (m
> mpri
|| m
>= 12) {
217 /* we choose the lastest default router if it
218 * is in (probably) reachable state.
219 * If route changed, we should do pmtu
220 * discovery. --yoshfuji
227 spin_lock(&rt6_dflt_lock
);
230 * No default routers are known to be reachable.
233 if (rt6_dflt_pointer
) {
234 for (sprt
= rt6_dflt_pointer
->u
.next
;
235 sprt
; sprt
= sprt
->u
.next
) {
236 if (sprt
->u
.dst
.obsolete
<= 0 &&
237 sprt
->u
.dst
.error
== 0) {
243 !match
&& sprt
&& sprt
!= rt6_dflt_pointer
;
244 sprt
= sprt
->u
.next
) {
245 if (sprt
->u
.dst
.obsolete
<= 0 &&
246 sprt
->u
.dst
.error
== 0) {
255 if (rt6_dflt_pointer
!= match
)
256 RT6_TRACE("changed default router: %p->%p\n",
257 rt6_dflt_pointer
, match
);
258 rt6_dflt_pointer
= match
;
260 spin_unlock(&rt6_dflt_lock
);
264 * Last Resort: if no default routers found,
265 * use addrconf default route.
266 * We don't record this route.
268 for (sprt
= ip6_routing_table
.leaf
;
269 sprt
; sprt
= sprt
->u
.next
) {
270 if ((sprt
->rt6i_flags
& RTF_DEFAULT
) &&
273 sprt
->rt6i_dev
->ifindex
== oif
))) {
279 /* no default route. give up. */
280 match
= &ip6_null_entry
;
287 struct rt6_info
*rt6_lookup(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
290 struct fib6_node
*fn
;
293 read_lock_bh(&rt6_lock
);
294 fn
= fib6_lookup(&ip6_routing_table
, daddr
, saddr
);
295 rt
= rt6_device_match(fn
->leaf
, oif
, strict
);
296 dst_hold(&rt
->u
.dst
);
298 read_unlock_bh(&rt6_lock
);
300 rt
->u
.dst
.lastuse
= jiffies
;
301 if (rt
->u
.dst
.error
== 0)
303 dst_release(&rt
->u
.dst
);
307 /* rt6_ins is called with FREE rt6_lock.
308 It takes new route entry, the addition fails by any reason the
309 route is freed. In any case, if caller does not hold it, it may
313 static int rt6_ins(struct rt6_info
*rt
)
317 write_lock_bh(&rt6_lock
);
318 err
= fib6_add(&ip6_routing_table
, rt
);
319 write_unlock_bh(&rt6_lock
);
324 /* No rt6_lock! If COW faild, the function returns dead route entry
325 with dst->error set to errno value.
328 static struct rt6_info
*rt6_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
329 struct in6_addr
*saddr
)
338 rt
= ip6_rt_copy(ort
);
341 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
343 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
344 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
346 rt
->rt6i_dst
.plen
= 128;
347 rt
->rt6i_flags
|= RTF_CACHE
;
348 rt
->u
.dst
.flags
|= DST_HOST
;
350 #ifdef CONFIG_IPV6_SUBTREES
351 if (rt
->rt6i_src
.plen
&& saddr
) {
352 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
353 rt
->rt6i_src
.plen
= 128;
357 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
359 dst_clone(&rt
->u
.dst
);
365 rt
->u
.dst
.error
= err
;
369 dst_clone(&ip6_null_entry
.u
.dst
);
370 return &ip6_null_entry
;
373 #define BACKTRACK() \
374 if (rt == &ip6_null_entry && strict) { \
375 while ((fn = fn->parent) != NULL) { \
376 if (fn->fn_flags & RTN_ROOT) { \
377 dst_clone(&rt->u.dst); \
380 if (fn->fn_flags & RTN_RTINFO) \
386 void ip6_route_input(struct sk_buff
*skb
)
388 struct fib6_node
*fn
;
393 strict
= ipv6_addr_type(&skb
->nh
.ipv6h
->daddr
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
396 read_lock_bh(&rt6_lock
);
398 fn
= fib6_lookup(&ip6_routing_table
, &skb
->nh
.ipv6h
->daddr
,
399 &skb
->nh
.ipv6h
->saddr
);
404 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
405 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, strict
);
407 dst_clone(&rt
->u
.dst
);
411 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, 0);
414 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
415 read_unlock_bh(&rt6_lock
);
417 rt
= rt6_cow(rt
, &skb
->nh
.ipv6h
->daddr
,
418 &skb
->nh
.ipv6h
->saddr
);
420 if (rt
->u
.dst
.error
!= -EEXIST
|| --attempts
<= 0)
422 /* Race condition! In the gap, when rt6_lock was
423 released someone could insert this route. Relookup.
427 dst_clone(&rt
->u
.dst
);
430 read_unlock_bh(&rt6_lock
);
432 rt
->u
.dst
.lastuse
= jiffies
;
434 skb
->dst
= (struct dst_entry
*) rt
;
437 struct dst_entry
* ip6_route_output(struct sock
*sk
, struct flowi
*fl
)
439 struct fib6_node
*fn
;
444 strict
= ipv6_addr_type(fl
->nl_u
.ip6_u
.daddr
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
447 read_lock_bh(&rt6_lock
);
449 fn
= fib6_lookup(&ip6_routing_table
, fl
->nl_u
.ip6_u
.daddr
,
450 fl
->nl_u
.ip6_u
.saddr
);
455 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
456 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
458 dst_clone(&rt
->u
.dst
);
461 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
462 if (rt
->rt6i_metric
>= IP6_RT_PRIO_ADDRCONF
)
463 rt
= rt6_best_dflt(rt
, fl
->oif
);
465 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
469 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
470 read_unlock_bh(&rt6_lock
);
472 rt
= rt6_cow(rt
, fl
->nl_u
.ip6_u
.daddr
,
473 fl
->nl_u
.ip6_u
.saddr
);
475 if (rt
->u
.dst
.error
!= -EEXIST
|| --attempts
<= 0)
478 /* Race condition! In the gap, when rt6_lock was
479 released someone could insert this route. Relookup.
483 dst_clone(&rt
->u
.dst
);
486 read_unlock_bh(&rt6_lock
);
488 rt
->u
.dst
.lastuse
= jiffies
;
495 * Destination cache support functions
498 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
502 rt
= (struct rt6_info
*) dst
;
504 if (rt
&& rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
511 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
513 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
516 if (rt
->rt6i_flags
& RTF_CACHE
)
524 static void ip6_link_failure(struct sk_buff
*skb
)
528 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
530 rt
= (struct rt6_info
*) skb
->dst
;
532 if (rt
->rt6i_flags
&RTF_CACHE
) {
533 dst_set_expires(&rt
->u
.dst
, 0);
534 rt
->rt6i_flags
|= RTF_EXPIRES
;
535 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
536 rt
->rt6i_node
->fn_sernum
= -1;
540 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
542 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
544 if (mtu
< dst_pmtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
545 rt6
->rt6i_flags
|= RTF_MODIFIED
;
546 dst
->metrics
[RTAX_MTU
-1] = mtu
;
550 static int ip6_dst_gc()
552 static unsigned expire
= 30*HZ
;
553 static unsigned long last_gc
;
554 unsigned long now
= jiffies
;
556 if ((long)(now
- last_gc
) < ip6_rt_gc_min_interval
&&
557 atomic_read(&ip6_dst_ops
.entries
) <= ip6_rt_max_size
)
563 if (atomic_read(&ip6_dst_ops
.entries
) < ip6_dst_ops
.gc_thresh
)
564 expire
= ip6_rt_gc_timeout
>>1;
567 expire
-= expire
>>ip6_rt_gc_elasticity
;
568 return (atomic_read(&ip6_dst_ops
.entries
) > ip6_rt_max_size
);
571 /* Clean host part of a prefix. Not necessary in radix tree,
572 but results in cleaner routing tables.
574 Remove it only when all the things will work!
577 static void ipv6_wash_prefix(struct in6_addr
*pfx
, int plen
)
580 int o
= (plen
+ 7)>>3;
583 memset(pfx
->s6_addr
+ o
, 0, 16 - o
);
585 pfx
->s6_addr
[plen
>>3] &= (0xFF<<(8-b
));
588 static int ipv6_get_mtu(struct net_device
*dev
)
590 int mtu
= IPV6_MIN_MTU
;
591 struct inet6_dev
*idev
;
593 idev
= in6_dev_get(dev
);
595 mtu
= idev
->cnf
.mtu6
;
601 static int ipv6_get_hoplimit(struct net_device
*dev
)
603 int hoplimit
= ipv6_devconf
.hop_limit
;
604 struct inet6_dev
*idev
;
606 idev
= in6_dev_get(dev
);
608 hoplimit
= idev
->cnf
.hop_limit
;
618 int ip6_route_add(struct in6_rtmsg
*rtmsg
)
622 struct net_device
*dev
= NULL
;
625 if (rtmsg
->rtmsg_dst_len
> 128 || rtmsg
->rtmsg_src_len
> 128)
627 #ifndef CONFIG_IPV6_SUBTREES
628 if (rtmsg
->rtmsg_src_len
)
631 if (rtmsg
->rtmsg_metric
== 0)
632 rtmsg
->rtmsg_metric
= IP6_RT_PRIO_USER
;
634 rt
= dst_alloc(&ip6_dst_ops
);
639 rt
->u
.dst
.obsolete
= -1;
640 rt
->rt6i_expires
= rtmsg
->rtmsg_info
;
642 addr_type
= ipv6_addr_type(&rtmsg
->rtmsg_dst
);
644 if (addr_type
& IPV6_ADDR_MULTICAST
)
645 rt
->u
.dst
.input
= ip6_mc_input
;
647 rt
->u
.dst
.input
= ip6_forward
;
649 rt
->u
.dst
.output
= ip6_output
;
651 if (rtmsg
->rtmsg_ifindex
) {
652 dev
= dev_get_by_index(rtmsg
->rtmsg_ifindex
);
658 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, &rtmsg
->rtmsg_dst
);
659 rt
->rt6i_dst
.plen
= rtmsg
->rtmsg_dst_len
;
660 if (rt
->rt6i_dst
.plen
== 128)
661 rt
->u
.dst
.flags
= DST_HOST
;
662 ipv6_wash_prefix(&rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
664 #ifdef CONFIG_IPV6_SUBTREES
665 ipv6_addr_copy(&rt
->rt6i_src
.addr
, &rtmsg
->rtmsg_src
);
666 rt
->rt6i_src
.plen
= rtmsg
->rtmsg_src_len
;
667 ipv6_wash_prefix(&rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
670 rt
->rt6i_metric
= rtmsg
->rtmsg_metric
;
672 /* We cannot add true routes via loopback here,
673 they would result in kernel looping; promote them to reject routes
675 if ((rtmsg
->rtmsg_flags
&RTF_REJECT
) ||
676 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
))) {
681 rt
->u
.dst
.output
= ip6_pkt_discard
;
682 rt
->u
.dst
.input
= ip6_pkt_discard
;
683 rt
->u
.dst
.error
= -ENETUNREACH
;
684 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
688 if (rtmsg
->rtmsg_flags
& RTF_GATEWAY
) {
689 struct in6_addr
*gw_addr
;
692 gw_addr
= &rtmsg
->rtmsg_gateway
;
693 ipv6_addr_copy(&rt
->rt6i_gateway
, &rtmsg
->rtmsg_gateway
);
694 gwa_type
= ipv6_addr_type(gw_addr
);
696 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
697 struct rt6_info
*grt
;
699 /* IPv6 strictly inhibits using not link-local
700 addresses as nexthop address.
701 Otherwise, router will not able to send redirects.
702 It is very good, but in some (rare!) curcumstances
703 (SIT, PtP, NBMA NOARP links) it is handy to allow
704 some exceptions. --ANK
707 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
710 grt
= rt6_lookup(gw_addr
, NULL
, rtmsg
->rtmsg_ifindex
, 1);
716 if (dev
!= grt
->rt6i_dev
) {
717 dst_release(&grt
->u
.dst
);
724 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
726 dst_release(&grt
->u
.dst
);
732 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
740 if (rtmsg
->rtmsg_flags
& (RTF_GATEWAY
|RTF_NONEXTHOP
)) {
741 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
742 if (IS_ERR(rt
->rt6i_nexthop
)) {
743 err
= PTR_ERR(rt
->rt6i_nexthop
);
744 rt
->rt6i_nexthop
= NULL
;
749 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
))
750 rt
->rt6i_hoplimit
= IPV6_DEFAULT_MCASTHOPS
;
752 rt
->rt6i_hoplimit
= ipv6_get_hoplimit(dev
);
753 rt
->rt6i_flags
= rtmsg
->rtmsg_flags
;
756 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(dev
);
757 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = max_t(unsigned int, dst_pmtu(&rt
->u
.dst
) - 60, ip6_rt_min_advmss
);
758 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
759 MSS is 65535 - tcp_header_size. 65535 is also valid and
760 means: "any MSS, rely only on pmtu discovery"
762 if (dst_metric(&rt
->u
.dst
, RTAX_ADVMSS
) > 65535-20)
763 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = 65535;
770 dst_free((struct dst_entry
*) rt
);
774 int ip6_del_rt(struct rt6_info
*rt
)
778 write_lock_bh(&rt6_lock
);
780 spin_lock_bh(&rt6_dflt_lock
);
781 rt6_dflt_pointer
= NULL
;
782 spin_unlock_bh(&rt6_dflt_lock
);
784 dst_release(&rt
->u
.dst
);
787 write_unlock_bh(&rt6_lock
);
792 static int ip6_route_del(struct in6_rtmsg
*rtmsg
)
794 struct fib6_node
*fn
;
798 read_lock_bh(&rt6_lock
);
800 fn
= fib6_locate(&ip6_routing_table
,
801 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
,
802 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
805 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.next
) {
806 if (rtmsg
->rtmsg_ifindex
&&
807 (rt
->rt6i_dev
== NULL
||
808 rt
->rt6i_dev
->ifindex
!= rtmsg
->rtmsg_ifindex
))
810 if (rtmsg
->rtmsg_flags
&RTF_GATEWAY
&&
811 ipv6_addr_cmp(&rtmsg
->rtmsg_gateway
, &rt
->rt6i_gateway
))
813 if (rtmsg
->rtmsg_metric
&&
814 rtmsg
->rtmsg_metric
!= rt
->rt6i_metric
)
816 dst_clone(&rt
->u
.dst
);
817 read_unlock_bh(&rt6_lock
);
819 return ip6_del_rt(rt
);
822 read_unlock_bh(&rt6_lock
);
830 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*saddr
,
831 struct neighbour
*neigh
, int on_link
)
833 struct rt6_info
*rt
, *nrt
;
835 /* Locate old route to this destination. */
836 rt
= rt6_lookup(dest
, NULL
, neigh
->dev
->ifindex
, 1);
841 if (neigh
->dev
!= rt
->rt6i_dev
)
844 /* Redirect received -> path was valid.
845 Look, redirects are sent only in response to data packets,
846 so that this nexthop apparently is reachable. --ANK
848 dst_confirm(&rt
->u
.dst
);
850 /* Duplicate redirect: silently ignore. */
851 if (neigh
== rt
->u
.dst
.neighbour
)
854 /* Current route is on-link; redirect is always invalid.
856 Seems, previous statement is not true. It could
857 be node, which looks for us as on-link (f.e. proxy ndisc)
858 But then router serving it might decide, that we should
859 know truth 8)8) --ANK (980726).
861 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
865 * RFC 1970 specifies that redirects should only be
866 * accepted if they come from the nexthop to the target.
867 * Due to the way default routers are chosen, this notion
868 * is a bit fuzzy and one might need to check all default
872 if (ipv6_addr_cmp(saddr
, &rt
->rt6i_gateway
)) {
873 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
874 struct rt6_info
*rt1
;
876 read_lock(&rt6_lock
);
877 for (rt1
= ip6_routing_table
.leaf
; rt1
; rt1
= rt1
->u
.next
) {
878 if (!ipv6_addr_cmp(saddr
, &rt1
->rt6i_gateway
)) {
879 dst_clone(&rt1
->u
.dst
);
880 dst_release(&rt
->u
.dst
);
881 read_unlock(&rt6_lock
);
886 read_unlock(&rt6_lock
);
889 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
890 "for redirect target\n");
897 * We have finally decided to accept it.
900 nrt
= ip6_rt_copy(rt
);
904 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
906 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
908 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
909 nrt
->rt6i_dst
.plen
= 128;
910 nrt
->u
.dst
.flags
|= DST_HOST
;
912 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
913 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
914 /* Reset pmtu, it may be better */
915 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(neigh
->dev
);
916 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = max_t(unsigned int, dst_pmtu(&nrt
->u
.dst
) - 60, ip6_rt_min_advmss
);
917 if (nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] > 65535-20)
918 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = 65535;
919 nrt
->rt6i_hoplimit
= ipv6_get_hoplimit(neigh
->dev
);
924 if (rt
->rt6i_flags
&RTF_CACHE
) {
930 dst_release(&rt
->u
.dst
);
935 * Handle ICMP "packet too big" messages
936 * i.e. Path MTU discovery
939 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
940 struct net_device
*dev
, u32 pmtu
)
942 struct rt6_info
*rt
, *nrt
;
944 if (pmtu
< IPV6_MIN_MTU
) {
946 printk(KERN_DEBUG
"rt6_pmtu_discovery: invalid MTU value %d\n",
948 /* According to RFC1981, the PMTU is set to the IPv6 minimum
949 link MTU if the node receives a Packet Too Big message
950 reporting next-hop MTU that is less than the IPv6 minimum MTU.
955 rt
= rt6_lookup(daddr
, saddr
, dev
->ifindex
, 0);
960 if (pmtu
>= dst_pmtu(&rt
->u
.dst
))
963 /* New mtu received -> path was valid.
964 They are sent only in response to data packets,
965 so that this nexthop apparently is reachable. --ANK
967 dst_confirm(&rt
->u
.dst
);
969 /* Host route. If it is static, it would be better
970 not to override it, but add new one, so that
971 when cache entry will expire old pmtu
972 would return automatically.
974 if (rt
->rt6i_flags
& RTF_CACHE
) {
975 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
976 dst_set_expires(&rt
->u
.dst
, ip6_rt_mtu_expires
);
977 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
982 Two cases are possible:
983 1. It is connected route. Action: COW
984 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
986 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
987 nrt
= rt6_cow(rt
, daddr
, saddr
);
988 if (!nrt
->u
.dst
.error
) {
989 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
990 /* According to RFC 1981, detecting PMTU increase shouldn't be
991 happened within 5 mins, the recommended timer is 10 mins.
992 Here this route expiration time is set to ip6_rt_mtu_expires
993 which is 10 mins. After 10 mins the decreased pmtu is expired
994 and detecting PMTU increase will be automatically happened.
996 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
997 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
998 dst_release(&nrt
->u
.dst
);
1001 nrt
= ip6_rt_copy(rt
);
1004 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, daddr
);
1005 nrt
->rt6i_dst
.plen
= 128;
1006 nrt
->u
.dst
.flags
|= DST_HOST
;
1007 nrt
->rt6i_nexthop
= neigh_clone(rt
->rt6i_nexthop
);
1008 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1009 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_CACHE
|RTF_EXPIRES
;
1010 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1015 dst_release(&rt
->u
.dst
);
1019 * Misc support functions
1022 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1024 struct rt6_info
*rt
;
1026 rt
= dst_alloc(&ip6_dst_ops
);
1029 rt
->u
.dst
.input
= ort
->u
.dst
.input
;
1030 rt
->u
.dst
.output
= ort
->u
.dst
.output
;
1032 memcpy(rt
->u
.dst
.metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
1033 rt
->u
.dst
.dev
= ort
->u
.dst
.dev
;
1035 dev_hold(rt
->u
.dst
.dev
);
1036 rt
->u
.dst
.lastuse
= jiffies
;
1037 rt
->rt6i_hoplimit
= ort
->rt6i_hoplimit
;
1038 rt
->rt6i_expires
= 0;
1040 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1041 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1042 rt
->rt6i_metric
= 0;
1044 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1045 #ifdef CONFIG_IPV6_SUBTREES
1046 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1052 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1054 struct rt6_info
*rt
;
1055 struct fib6_node
*fn
;
1057 fn
= &ip6_routing_table
;
1059 write_lock_bh(&rt6_lock
);
1060 for (rt
= fn
->leaf
; rt
; rt
=rt
->u
.next
) {
1061 if (dev
== rt
->rt6i_dev
&&
1062 ipv6_addr_cmp(&rt
->rt6i_gateway
, addr
) == 0)
1066 dst_clone(&rt
->u
.dst
);
1067 write_unlock_bh(&rt6_lock
);
1071 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1072 struct net_device
*dev
)
1074 struct in6_rtmsg rtmsg
;
1076 memset(&rtmsg
, 0, sizeof(struct in6_rtmsg
));
1077 rtmsg
.rtmsg_type
= RTMSG_NEWROUTE
;
1078 ipv6_addr_copy(&rtmsg
.rtmsg_gateway
, gwaddr
);
1079 rtmsg
.rtmsg_metric
= 1024;
1080 rtmsg
.rtmsg_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
| RTF_UP
;
1082 rtmsg
.rtmsg_ifindex
= dev
->ifindex
;
1084 ip6_route_add(&rtmsg
);
1085 return rt6_get_dflt_router(gwaddr
, dev
);
1088 void rt6_purge_dflt_routers(int last_resort
)
1090 struct rt6_info
*rt
;
1094 flags
= RTF_ALLONLINK
;
1096 flags
= RTF_DEFAULT
| RTF_ADDRCONF
;
1099 read_lock_bh(&rt6_lock
);
1100 for (rt
= ip6_routing_table
.leaf
; rt
; rt
= rt
->u
.next
) {
1101 if (rt
->rt6i_flags
& flags
) {
1102 dst_hold(&rt
->u
.dst
);
1104 spin_lock_bh(&rt6_dflt_lock
);
1105 rt6_dflt_pointer
= NULL
;
1106 spin_unlock_bh(&rt6_dflt_lock
);
1108 read_unlock_bh(&rt6_lock
);
1115 read_unlock_bh(&rt6_lock
);
1118 int ipv6_route_ioctl(unsigned int cmd
, void *arg
)
1120 struct in6_rtmsg rtmsg
;
1124 case SIOCADDRT
: /* Add a route */
1125 case SIOCDELRT
: /* Delete a route */
1126 if (!capable(CAP_NET_ADMIN
))
1128 err
= copy_from_user(&rtmsg
, arg
,
1129 sizeof(struct in6_rtmsg
));
1136 err
= ip6_route_add(&rtmsg
);
1139 err
= ip6_route_del(&rtmsg
);
1153 * Drop the packet on the floor
1156 int ip6_pkt_discard(struct sk_buff
*skb
)
1158 IP6_INC_STATS(Ip6OutNoRoutes
);
1159 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
1168 int ip6_rt_addr_add(struct in6_addr
*addr
, struct net_device
*dev
)
1170 struct rt6_info
*rt
;
1172 rt
= dst_alloc(&ip6_dst_ops
);
1176 rt
->u
.dst
.flags
= DST_HOST
;
1177 rt
->u
.dst
.input
= ip6_input
;
1178 rt
->u
.dst
.output
= ip6_output
;
1179 rt
->rt6i_dev
= dev_get_by_name("lo");
1180 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1181 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = max_t(unsigned int, dst_pmtu(&rt
->u
.dst
) - 60, ip6_rt_min_advmss
);
1182 if (rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] > 65535-20)
1183 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = 65535;
1184 rt
->rt6i_hoplimit
= ipv6_get_hoplimit(rt
->rt6i_dev
);
1185 rt
->u
.dst
.obsolete
= -1;
1187 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1188 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1189 if (rt
->rt6i_nexthop
== NULL
) {
1190 dst_free((struct dst_entry
*) rt
);
1194 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1195 rt
->rt6i_dst
.plen
= 128;
1201 /* Delete address. Warning: you should check that this address
1202 disappeared before calling this function.
1205 int ip6_rt_addr_del(struct in6_addr
*addr
, struct net_device
*dev
)
1207 struct rt6_info
*rt
;
1210 rt
= rt6_lookup(addr
, NULL
, loopback_dev
.ifindex
, 1);
1212 if (rt
->rt6i_dst
.plen
== 128)
1213 err
= ip6_del_rt(rt
);
1215 dst_release(&rt
->u
.dst
);
1221 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
1223 if (((void*)rt
->rt6i_dev
== arg
|| arg
== NULL
) &&
1224 rt
!= &ip6_null_entry
) {
1225 RT6_TRACE("deleted by ifdown %p\n", rt
);
1231 void rt6_ifdown(struct net_device
*dev
)
1233 write_lock_bh(&rt6_lock
);
1234 fib6_clean_tree(&ip6_routing_table
, fib6_ifdown
, 0, dev
);
1235 write_unlock_bh(&rt6_lock
);
1238 struct rt6_mtu_change_arg
1240 struct net_device
*dev
;
1244 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
1246 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
1247 struct inet6_dev
*idev
;
1249 /* In IPv6 pmtu discovery is not optional,
1250 so that RTAX_MTU lock cannot disable it.
1251 We still use this lock to block changes
1252 caused by addrconf/ndisc.
1255 idev
= __in6_dev_get(arg
->dev
);
1259 /* For administrative MTU increase, there is no way to discover
1260 IPv6 PMTU increase, so PMTU increase should be updated here.
1261 Since RFC 1981 doesn't include administrative MTU increase
1262 update PMTU increase is a MUST. (i.e. jumbo frame)
1265 If new MTU is less than route PMTU, this new MTU will be the
1266 lowest MTU in the path, update the route PMTU to refect PMTU
1267 decreases; if new MTU is greater than route PMTU, and the
1268 old MTU is the lowest MTU in the path, update the route PMTU
1269 to refect the increase. In this case if the other nodes' MTU
1270 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1273 if (rt
->rt6i_dev
== arg
->dev
&&
1274 !dst_metric_locked(&rt
->u
.dst
, RTAX_MTU
) &&
1275 (dst_pmtu(&rt
->u
.dst
) > arg
->mtu
||
1276 (dst_pmtu(&rt
->u
.dst
) < arg
->mtu
&&
1277 dst_pmtu(&rt
->u
.dst
) == idev
->cnf
.mtu6
)))
1278 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = arg
->mtu
;
1279 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = max_t(unsigned int, arg
->mtu
- 60, ip6_rt_min_advmss
);
1280 if (rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] > 65535-20)
1281 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = 65535;
1285 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
1287 struct rt6_mtu_change_arg arg
;
1291 read_lock_bh(&rt6_lock
);
1292 fib6_clean_tree(&ip6_routing_table
, rt6_mtu_change_route
, 0, &arg
);
1293 read_unlock_bh(&rt6_lock
);
1296 static int inet6_rtm_to_rtmsg(struct rtmsg
*r
, struct rtattr
**rta
,
1297 struct in6_rtmsg
*rtmsg
)
1299 memset(rtmsg
, 0, sizeof(*rtmsg
));
1301 rtmsg
->rtmsg_dst_len
= r
->rtm_dst_len
;
1302 rtmsg
->rtmsg_src_len
= r
->rtm_src_len
;
1303 rtmsg
->rtmsg_flags
= RTF_UP
;
1304 if (r
->rtm_type
== RTN_UNREACHABLE
)
1305 rtmsg
->rtmsg_flags
|= RTF_REJECT
;
1307 if (rta
[RTA_GATEWAY
-1]) {
1308 if (rta
[RTA_GATEWAY
-1]->rta_len
!= RTA_LENGTH(16))
1310 memcpy(&rtmsg
->rtmsg_gateway
, RTA_DATA(rta
[RTA_GATEWAY
-1]), 16);
1311 rtmsg
->rtmsg_flags
|= RTF_GATEWAY
;
1313 if (rta
[RTA_DST
-1]) {
1314 if (RTA_PAYLOAD(rta
[RTA_DST
-1]) < ((r
->rtm_dst_len
+7)>>3))
1316 memcpy(&rtmsg
->rtmsg_dst
, RTA_DATA(rta
[RTA_DST
-1]), ((r
->rtm_dst_len
+7)>>3));
1318 if (rta
[RTA_SRC
-1]) {
1319 if (RTA_PAYLOAD(rta
[RTA_SRC
-1]) < ((r
->rtm_src_len
+7)>>3))
1321 memcpy(&rtmsg
->rtmsg_src
, RTA_DATA(rta
[RTA_SRC
-1]), ((r
->rtm_src_len
+7)>>3));
1323 if (rta
[RTA_OIF
-1]) {
1324 if (rta
[RTA_OIF
-1]->rta_len
!= RTA_LENGTH(sizeof(int)))
1326 memcpy(&rtmsg
->rtmsg_ifindex
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1328 if (rta
[RTA_PRIORITY
-1]) {
1329 if (rta
[RTA_PRIORITY
-1]->rta_len
!= RTA_LENGTH(4))
1331 memcpy(&rtmsg
->rtmsg_metric
, RTA_DATA(rta
[RTA_PRIORITY
-1]), 4);
1336 int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1338 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1339 struct in6_rtmsg rtmsg
;
1341 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1343 return ip6_route_del(&rtmsg
);
1346 int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1348 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1349 struct in6_rtmsg rtmsg
;
1351 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1353 return ip6_route_add(&rtmsg
);
1356 struct rt6_rtnl_dump_arg
1358 struct sk_buff
*skb
;
1359 struct netlink_callback
*cb
;
1362 static int rt6_fill_node(struct sk_buff
*skb
, struct rt6_info
*rt
,
1363 struct in6_addr
*dst
,
1364 struct in6_addr
*src
,
1366 int type
, u32 pid
, u32 seq
)
1369 struct nlmsghdr
*nlh
;
1370 unsigned char *b
= skb
->tail
;
1371 struct rta_cacheinfo ci
;
1373 nlh
= NLMSG_PUT(skb
, pid
, seq
, type
, sizeof(*rtm
));
1374 rtm
= NLMSG_DATA(nlh
);
1375 rtm
->rtm_family
= AF_INET6
;
1376 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
1377 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
1379 rtm
->rtm_table
= RT_TABLE_MAIN
;
1380 if (rt
->rt6i_flags
&RTF_REJECT
)
1381 rtm
->rtm_type
= RTN_UNREACHABLE
;
1382 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
1383 rtm
->rtm_type
= RTN_LOCAL
;
1385 rtm
->rtm_type
= RTN_UNICAST
;
1387 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1388 rtm
->rtm_protocol
= RTPROT_BOOT
;
1389 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
1390 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
1391 else if (rt
->rt6i_flags
&(RTF_ADDRCONF
|RTF_ALLONLINK
))
1392 rtm
->rtm_protocol
= RTPROT_KERNEL
;
1393 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
1394 rtm
->rtm_protocol
= RTPROT_RA
;
1396 if (rt
->rt6i_flags
&RTF_CACHE
)
1397 rtm
->rtm_flags
|= RTM_F_CLONED
;
1400 RTA_PUT(skb
, RTA_DST
, 16, dst
);
1401 rtm
->rtm_dst_len
= 128;
1402 } else if (rtm
->rtm_dst_len
)
1403 RTA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
1404 #ifdef CONFIG_IPV6_SUBTREES
1406 RTA_PUT(skb
, RTA_SRC
, 16, src
);
1407 rtm
->rtm_src_len
= 128;
1408 } else if (rtm
->rtm_src_len
)
1409 RTA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
1412 RTA_PUT(skb
, RTA_IIF
, 4, &iif
);
1414 struct in6_addr saddr_buf
;
1415 if (ipv6_get_saddr(&rt
->u
.dst
, dst
, &saddr_buf
) == 0)
1416 RTA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
1418 if (rtnetlink_put_metrics(skb
, rt
->u
.dst
.metrics
) < 0)
1419 goto rtattr_failure
;
1420 if (rt
->u
.dst
.neighbour
)
1421 RTA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->u
.dst
.neighbour
->primary_key
);
1423 RTA_PUT(skb
, RTA_OIF
, sizeof(int), &rt
->rt6i_dev
->ifindex
);
1424 RTA_PUT(skb
, RTA_PRIORITY
, 4, &rt
->rt6i_metric
);
1425 ci
.rta_lastuse
= jiffies
- rt
->u
.dst
.lastuse
;
1426 if (rt
->rt6i_expires
)
1427 ci
.rta_expires
= rt
->rt6i_expires
- jiffies
;
1430 ci
.rta_used
= rt
->u
.dst
.__use
;
1431 ci
.rta_clntref
= atomic_read(&rt
->u
.dst
.__refcnt
);
1432 ci
.rta_error
= rt
->u
.dst
.error
;
1436 RTA_PUT(skb
, RTA_CACHEINFO
, sizeof(ci
), &ci
);
1437 nlh
->nlmsg_len
= skb
->tail
- b
;
1442 skb_trim(skb
, b
- skb
->data
);
1446 static int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
1448 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
1450 return rt6_fill_node(arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
1451 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
);
1454 static int fib6_dump_node(struct fib6_walker_t
*w
)
1457 struct rt6_info
*rt
;
1459 for (rt
= w
->leaf
; rt
; rt
= rt
->u
.next
) {
1460 res
= rt6_dump_route(rt
, w
->args
);
1462 /* Frame is full, suspend walking */
1472 static void fib6_dump_end(struct netlink_callback
*cb
)
1474 struct fib6_walker_t
*w
= (void*)cb
->args
[0];
1478 fib6_walker_unlink(w
);
1482 cb
->done
= (void*)cb
->args
[1];
1487 static int fib6_dump_done(struct netlink_callback
*cb
)
1490 return cb
->done(cb
);
1493 int inet6_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1495 struct rt6_rtnl_dump_arg arg
;
1496 struct fib6_walker_t
*w
;
1502 w
= (void*)cb
->args
[0];
1506 * 1. hook callback destructor.
1508 cb
->args
[1] = (long)cb
->done
;
1509 cb
->done
= fib6_dump_done
;
1512 * 2. allocate and initialize walker.
1514 w
= kmalloc(sizeof(*w
), GFP_ATOMIC
);
1517 RT6_TRACE("dump<%p", w
);
1518 memset(w
, 0, sizeof(*w
));
1519 w
->root
= &ip6_routing_table
;
1520 w
->func
= fib6_dump_node
;
1522 cb
->args
[0] = (long)w
;
1523 read_lock_bh(&rt6_lock
);
1525 read_unlock_bh(&rt6_lock
);
1528 read_lock_bh(&rt6_lock
);
1529 res
= fib6_walk_continue(w
);
1530 read_unlock_bh(&rt6_lock
);
1533 if (res
<= 0 && skb
->len
== 0)
1534 RT6_TRACE("%p>dump end\n", w
);
1536 res
= res
< 0 ? res
: skb
->len
;
1537 /* res < 0 is an error. (really, impossible)
1538 res == 0 means that dump is complete, but skb still can contain data.
1539 res > 0 dump is not complete, but frame is full.
1541 /* Destroy walker, if dump of this table is complete. */
1547 int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
1549 struct rtattr
**rta
= arg
;
1552 struct sk_buff
*skb
;
1554 struct rt6_info
*rt
;
1556 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1560 /* Reserve room for dummy headers, this skb can pass
1561 through good chunk of routing engine.
1563 skb
->mac
.raw
= skb
->data
;
1564 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
1567 fl
.nl_u
.ip6_u
.daddr
= NULL
;
1568 fl
.nl_u
.ip6_u
.saddr
= NULL
;
1569 fl
.uli_u
.icmpt
.type
= 0;
1570 fl
.uli_u
.icmpt
.code
= 0;
1572 fl
.nl_u
.ip6_u
.saddr
= (struct in6_addr
*)RTA_DATA(rta
[RTA_SRC
-1]);
1574 fl
.nl_u
.ip6_u
.daddr
= (struct in6_addr
*)RTA_DATA(rta
[RTA_DST
-1]);
1577 memcpy(&iif
, RTA_DATA(rta
[RTA_IIF
-1]), sizeof(int));
1580 struct net_device
*dev
;
1581 dev
= __dev_get_by_index(iif
);
1588 memcpy(&fl
.oif
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1590 rt
= (struct rt6_info
*)ip6_route_output(NULL
, &fl
);
1592 skb
->dst
= &rt
->u
.dst
;
1594 NETLINK_CB(skb
).dst_pid
= NETLINK_CB(in_skb
).pid
;
1595 err
= rt6_fill_node(skb
, rt
,
1596 fl
.nl_u
.ip6_u
.daddr
,
1597 fl
.nl_u
.ip6_u
.saddr
,
1599 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
, nlh
->nlmsg_seq
);
1603 err
= netlink_unicast(rtnl
, skb
, NETLINK_CB(in_skb
).pid
, MSG_DONTWAIT
);
1609 void inet6_rt_notify(int event
, struct rt6_info
*rt
)
1611 struct sk_buff
*skb
;
1612 int size
= NLMSG_SPACE(sizeof(struct rtmsg
)+256);
1614 skb
= alloc_skb(size
, gfp_any());
1616 netlink_set_err(rtnl
, 0, RTMGRP_IPV6_ROUTE
, ENOBUFS
);
1619 if (rt6_fill_node(skb
, rt
, NULL
, NULL
, 0, event
, 0, 0) < 0) {
1621 netlink_set_err(rtnl
, 0, RTMGRP_IPV6_ROUTE
, EINVAL
);
1624 NETLINK_CB(skb
).dst_groups
= RTMGRP_IPV6_ROUTE
;
1625 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_IPV6_ROUTE
, gfp_any());
1632 #ifdef CONFIG_PROC_FS
1634 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1645 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
1647 struct rt6_proc_arg
*arg
= (struct rt6_proc_arg
*) p_arg
;
1650 if (arg
->skip
< arg
->offset
/ RT6_INFO_LEN
) {
1655 if (arg
->len
>= arg
->length
)
1658 for (i
=0; i
<16; i
++) {
1659 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1660 rt
->rt6i_dst
.addr
.s6_addr
[i
]);
1663 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1666 #ifdef CONFIG_IPV6_SUBTREES
1667 for (i
=0; i
<16; i
++) {
1668 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1669 rt
->rt6i_src
.addr
.s6_addr
[i
]);
1672 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1675 sprintf(arg
->buffer
+ arg
->len
,
1676 "00000000000000000000000000000000 00 ");
1680 if (rt
->rt6i_nexthop
) {
1681 for (i
=0; i
<16; i
++) {
1682 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1683 rt
->rt6i_nexthop
->primary_key
[i
]);
1687 sprintf(arg
->buffer
+ arg
->len
,
1688 "00000000000000000000000000000000");
1691 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
1692 " %08x %08x %08x %08x %8s\n",
1693 rt
->rt6i_metric
, atomic_read(&rt
->u
.dst
.__refcnt
),
1694 rt
->u
.dst
.__use
, rt
->rt6i_flags
,
1695 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
1699 static int rt6_proc_info(char *buffer
, char **start
, off_t offset
, int length
)
1701 struct rt6_proc_arg arg
;
1702 arg
.buffer
= buffer
;
1703 arg
.offset
= offset
;
1704 arg
.length
= length
;
1708 read_lock_bh(&rt6_lock
);
1709 fib6_clean_tree(&ip6_routing_table
, rt6_info_route
, 0, &arg
);
1710 read_unlock_bh(&rt6_lock
);
1714 *start
+= offset
% RT6_INFO_LEN
;
1716 arg
.len
-= offset
% RT6_INFO_LEN
;
1718 if (arg
.len
> length
)
1726 extern struct rt6_statistics rt6_stats
;
1728 static int rt6_proc_stats(char *buffer
, char **start
, off_t offset
, int length
)
1732 len
= sprintf(buffer
, "%04x %04x %04x %04x %04x %04x\n",
1733 rt6_stats
.fib_nodes
, rt6_stats
.fib_route_nodes
,
1734 rt6_stats
.fib_rt_alloc
, rt6_stats
.fib_rt_entries
,
1735 rt6_stats
.fib_rt_cache
,
1736 atomic_read(&ip6_dst_ops
.entries
));
1745 *start
= buffer
+ offset
;
1749 #endif /* CONFIG_PROC_FS */
1751 #ifdef CONFIG_SYSCTL
1753 static int flush_delay
;
1756 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
, struct file
* filp
,
1757 void *buffer
, size_t *lenp
)
1760 proc_dointvec(ctl
, write
, filp
, buffer
, lenp
);
1761 if (flush_delay
< 0)
1763 fib6_run_gc((unsigned long)flush_delay
);
1769 ctl_table ipv6_route_table
[] = {
1770 {NET_IPV6_ROUTE_FLUSH
, "flush",
1771 &flush_delay
, sizeof(int), 0644, NULL
,
1772 &ipv6_sysctl_rtcache_flush
},
1773 {NET_IPV6_ROUTE_GC_THRESH
, "gc_thresh",
1774 &ip6_dst_ops
.gc_thresh
, sizeof(int), 0644, NULL
,
1776 {NET_IPV6_ROUTE_MAX_SIZE
, "max_size",
1777 &ip6_rt_max_size
, sizeof(int), 0644, NULL
,
1779 {NET_IPV6_ROUTE_GC_MIN_INTERVAL
, "gc_min_interval",
1780 &ip6_rt_gc_min_interval
, sizeof(int), 0644, NULL
,
1781 &proc_dointvec_jiffies
, &sysctl_jiffies
},
1782 {NET_IPV6_ROUTE_GC_TIMEOUT
, "gc_timeout",
1783 &ip6_rt_gc_timeout
, sizeof(int), 0644, NULL
,
1784 &proc_dointvec_jiffies
, &sysctl_jiffies
},
1785 {NET_IPV6_ROUTE_GC_INTERVAL
, "gc_interval",
1786 &ip6_rt_gc_interval
, sizeof(int), 0644, NULL
,
1787 &proc_dointvec_jiffies
, &sysctl_jiffies
},
1788 {NET_IPV6_ROUTE_GC_ELASTICITY
, "gc_elasticity",
1789 &ip6_rt_gc_elasticity
, sizeof(int), 0644, NULL
,
1790 &proc_dointvec_jiffies
, &sysctl_jiffies
},
1791 {NET_IPV6_ROUTE_MTU_EXPIRES
, "mtu_expires",
1792 &ip6_rt_mtu_expires
, sizeof(int), 0644, NULL
,
1793 &proc_dointvec_jiffies
, &sysctl_jiffies
},
1794 {NET_IPV6_ROUTE_MIN_ADVMSS
, "min_adv_mss",
1795 &ip6_rt_min_advmss
, sizeof(int), 0644, NULL
,
1796 &proc_dointvec_jiffies
, &sysctl_jiffies
},
1803 void __init
ip6_route_init(void)
1805 ip6_dst_ops
.kmem_cachep
= kmem_cache_create("ip6_dst_cache",
1806 sizeof(struct rt6_info
),
1807 0, SLAB_HWCACHE_ALIGN
,
1810 #ifdef CONFIG_PROC_FS
1811 proc_net_create("ipv6_route", 0, rt6_proc_info
);
1812 proc_net_create("rt6_stats", 0, rt6_proc_stats
);
1817 void ip6_route_cleanup(void)
1819 #ifdef CONFIG_PROC_FS
1820 proc_net_remove("ipv6_route");
1821 proc_net_remove("rt6_stats");