2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/route.h>
34 #include <linux/netdevice.h>
35 #include <linux/in6.h>
36 #include <linux/init.h>
37 #include <linux/netlink.h>
38 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
56 #include <asm/uaccess.h>
59 #include <linux/sysctl.h>
62 /* Set to 3 to get tracing. */
66 #define RDBG(x) printk x
67 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #define RT6_TRACE(x...) do { ; } while (0)
74 static int ip6_rt_max_size
= 4096;
75 static int ip6_rt_gc_min_interval
= HZ
/ 2;
76 static int ip6_rt_gc_timeout
= 60*HZ
;
77 int ip6_rt_gc_interval
= 30*HZ
;
78 static int ip6_rt_gc_elasticity
= 9;
79 static int ip6_rt_mtu_expires
= 10*60*HZ
;
80 static int ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
82 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
83 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
84 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
85 static int ip6_dst_gc(void);
87 static int ip6_pkt_discard(struct sk_buff
*skb
);
88 static void ip6_link_failure(struct sk_buff
*skb
);
89 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
91 static struct dst_ops ip6_dst_ops
= {
93 .protocol
= __constant_htons(ETH_P_IPV6
),
96 .check
= ip6_dst_check
,
97 .negative_advice
= ip6_negative_advice
,
98 .link_failure
= ip6_link_failure
,
99 .update_pmtu
= ip6_rt_update_pmtu
,
100 .entry_size
= sizeof(struct rt6_info
),
103 struct rt6_info ip6_null_entry
= {
106 .__refcnt
= ATOMIC_INIT(1),
108 .dev
= &loopback_dev
,
110 .error
= -ENETUNREACH
,
111 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
112 .input
= ip6_pkt_discard
,
113 .output
= ip6_pkt_discard
,
115 .path
= (struct dst_entry
*)&ip6_null_entry
,
118 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
119 .rt6i_metric
= ~(u32
) 0,
120 .rt6i_ref
= ATOMIC_INIT(1),
123 struct fib6_node ip6_routing_table
= {
124 .leaf
= &ip6_null_entry
,
125 .fn_flags
= RTN_ROOT
| RTN_TL_ROOT
| RTN_RTINFO
,
128 /* Protects all the ip6 fib */
130 rwlock_t rt6_lock
= RW_LOCK_UNLOCKED
;
133 /* allocate dst with ip6_dst_ops */
134 static __inline__
struct rt6_info
*ip6_dst_alloc(void)
136 return dst_alloc(&ip6_dst_ops
);
140 * Route lookup. Any rt6_lock is implied.
143 static __inline__
struct rt6_info
*rt6_device_match(struct rt6_info
*rt
,
147 struct rt6_info
*local
= NULL
;
148 struct rt6_info
*sprt
;
151 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
152 struct net_device
*dev
= sprt
->rt6i_dev
;
153 if (dev
->ifindex
== oif
)
155 if (dev
->flags
&IFF_LOOPBACK
)
163 return &ip6_null_entry
;
169 * pointer to the last default router chosen. BH is disabled locally.
171 static struct rt6_info
*rt6_dflt_pointer
;
172 static spinlock_t rt6_dflt_lock
= SPIN_LOCK_UNLOCKED
;
174 /* Default Router Selection (RFC 2461 6.3.6) */
175 static struct rt6_info
*rt6_best_dflt(struct rt6_info
*rt
, int oif
)
177 struct rt6_info
*match
= NULL
;
178 struct rt6_info
*sprt
;
181 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
182 struct neighbour
*neigh
;
187 sprt
->rt6i_dev
->ifindex
== oif
))
190 if (sprt
== rt6_dflt_pointer
)
193 if ((neigh
= sprt
->rt6i_nexthop
) != NULL
) {
194 read_lock_bh(&neigh
->lock
);
195 switch (neigh
->nud_state
) {
213 read_unlock_bh(&neigh
->lock
);
216 read_unlock_bh(&neigh
->lock
);
221 if (m
> mpri
|| m
>= 12) {
225 /* we choose the lastest default router if it
226 * is in (probably) reachable state.
227 * If route changed, we should do pmtu
228 * discovery. --yoshfuji
235 spin_lock(&rt6_dflt_lock
);
238 * No default routers are known to be reachable.
241 if (rt6_dflt_pointer
) {
242 for (sprt
= rt6_dflt_pointer
->u
.next
;
243 sprt
; sprt
= sprt
->u
.next
) {
244 if (sprt
->u
.dst
.obsolete
<= 0 &&
245 sprt
->u
.dst
.error
== 0) {
252 sprt
= sprt
->u
.next
) {
253 if (sprt
->u
.dst
.obsolete
<= 0 &&
254 sprt
->u
.dst
.error
== 0) {
258 if (sprt
== rt6_dflt_pointer
)
265 if (rt6_dflt_pointer
!= match
)
266 RT6_TRACE("changed default router: %p->%p\n",
267 rt6_dflt_pointer
, match
);
268 rt6_dflt_pointer
= match
;
270 spin_unlock(&rt6_dflt_lock
);
274 * Last Resort: if no default routers found,
275 * use addrconf default route.
276 * We don't record this route.
278 for (sprt
= ip6_routing_table
.leaf
;
279 sprt
; sprt
= sprt
->u
.next
) {
280 if ((sprt
->rt6i_flags
& RTF_DEFAULT
) &&
283 sprt
->rt6i_dev
->ifindex
== oif
))) {
289 /* no default route. give up. */
290 match
= &ip6_null_entry
;
297 struct rt6_info
*rt6_lookup(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
300 struct fib6_node
*fn
;
303 read_lock_bh(&rt6_lock
);
304 fn
= fib6_lookup(&ip6_routing_table
, daddr
, saddr
);
305 rt
= rt6_device_match(fn
->leaf
, oif
, strict
);
306 dst_hold(&rt
->u
.dst
);
308 read_unlock_bh(&rt6_lock
);
310 rt
->u
.dst
.lastuse
= jiffies
;
311 if (rt
->u
.dst
.error
== 0)
313 dst_release(&rt
->u
.dst
);
317 /* rt6_ins is called with FREE rt6_lock.
318 It takes new route entry, the addition fails by any reason the
319 route is freed. In any case, if caller does not hold it, it may
323 static int rt6_ins(struct rt6_info
*rt
, struct nlmsghdr
*nlh
, void *_rtattr
)
327 write_lock_bh(&rt6_lock
);
328 err
= fib6_add(&ip6_routing_table
, rt
, nlh
, _rtattr
);
329 write_unlock_bh(&rt6_lock
);
334 /* No rt6_lock! If COW failed, the function returns dead route entry
335 with dst->error set to errno value.
338 static struct rt6_info
*rt6_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
339 struct in6_addr
*saddr
)
348 rt
= ip6_rt_copy(ort
);
351 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
353 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
354 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
356 rt
->rt6i_dst
.plen
= 128;
357 rt
->rt6i_flags
|= RTF_CACHE
;
358 rt
->u
.dst
.flags
|= DST_HOST
;
360 #ifdef CONFIG_IPV6_SUBTREES
361 if (rt
->rt6i_src
.plen
&& saddr
) {
362 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
363 rt
->rt6i_src
.plen
= 128;
367 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
369 dst_hold(&rt
->u
.dst
);
371 err
= rt6_ins(rt
, NULL
, NULL
);
375 rt
->u
.dst
.error
= err
;
379 dst_hold(&ip6_null_entry
.u
.dst
);
380 return &ip6_null_entry
;
383 #define BACKTRACK() \
384 if (rt == &ip6_null_entry && strict) { \
385 while ((fn = fn->parent) != NULL) { \
386 if (fn->fn_flags & RTN_ROOT) { \
387 dst_hold(&rt->u.dst); \
390 if (fn->fn_flags & RTN_RTINFO) \
396 void ip6_route_input(struct sk_buff
*skb
)
398 struct fib6_node
*fn
;
403 strict
= ipv6_addr_type(&skb
->nh
.ipv6h
->daddr
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
406 read_lock_bh(&rt6_lock
);
408 fn
= fib6_lookup(&ip6_routing_table
, &skb
->nh
.ipv6h
->daddr
,
409 &skb
->nh
.ipv6h
->saddr
);
414 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
415 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, strict
);
417 dst_hold(&rt
->u
.dst
);
421 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, 0);
424 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
425 read_unlock_bh(&rt6_lock
);
427 rt
= rt6_cow(rt
, &skb
->nh
.ipv6h
->daddr
,
428 &skb
->nh
.ipv6h
->saddr
);
430 if (rt
->u
.dst
.error
!= -EEXIST
|| --attempts
<= 0)
432 /* Race condition! In the gap, when rt6_lock was
433 released someone could insert this route. Relookup.
437 dst_hold(&rt
->u
.dst
);
440 read_unlock_bh(&rt6_lock
);
442 rt
->u
.dst
.lastuse
= jiffies
;
444 skb
->dst
= (struct dst_entry
*) rt
;
447 struct dst_entry
* ip6_route_output(struct sock
*sk
, struct flowi
*fl
)
449 struct fib6_node
*fn
;
454 strict
= ipv6_addr_type(&fl
->fl6_dst
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
457 read_lock_bh(&rt6_lock
);
459 fn
= fib6_lookup(&ip6_routing_table
, &fl
->fl6_dst
, &fl
->fl6_src
);
464 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
465 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
467 dst_hold(&rt
->u
.dst
);
470 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
471 if (rt
->rt6i_metric
>= IP6_RT_PRIO_ADDRCONF
)
472 rt
= rt6_best_dflt(rt
, fl
->oif
);
474 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
478 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
479 read_unlock_bh(&rt6_lock
);
481 rt
= rt6_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
483 if (rt
->u
.dst
.error
!= -EEXIST
|| --attempts
<= 0)
486 /* Race condition! In the gap, when rt6_lock was
487 released someone could insert this route. Relookup.
491 dst_hold(&rt
->u
.dst
);
494 read_unlock_bh(&rt6_lock
);
496 rt
->u
.dst
.lastuse
= jiffies
;
503 * Destination cache support functions
506 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
510 rt
= (struct rt6_info
*) dst
;
512 if (rt
&& rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
519 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
521 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
524 if (rt
->rt6i_flags
& RTF_CACHE
)
525 ip6_del_rt(rt
, NULL
, NULL
);
532 static void ip6_link_failure(struct sk_buff
*skb
)
536 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
538 rt
= (struct rt6_info
*) skb
->dst
;
540 if (rt
->rt6i_flags
&RTF_CACHE
) {
541 dst_set_expires(&rt
->u
.dst
, 0);
542 rt
->rt6i_flags
|= RTF_EXPIRES
;
543 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
544 rt
->rt6i_node
->fn_sernum
= -1;
548 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
550 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
552 if (mtu
< dst_pmtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
553 rt6
->rt6i_flags
|= RTF_MODIFIED
;
554 dst
->metrics
[RTAX_MTU
-1] = mtu
;
558 /* Protected by rt6_lock. */
559 static struct dst_entry
*ndisc_dst_gc_list
;
561 struct dst_entry
*ndisc_dst_alloc(struct net_device
*dev
,
562 struct neighbour
*neigh
,
563 int (*output
)(struct sk_buff
*))
565 struct rt6_info
*rt
= ip6_dst_alloc();
567 if (unlikely(rt
== NULL
))
576 rt
->rt6i_nexthop
= neigh
;
577 rt
->rt6i_expires
= 0;
578 rt
->rt6i_flags
= RTF_LOCAL
;
580 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
581 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = 255;
582 rt
->u
.dst
.output
= output
;
584 write_lock_bh(&rt6_lock
);
585 rt
->u
.dst
.next
= ndisc_dst_gc_list
;
586 ndisc_dst_gc_list
= &rt
->u
.dst
;
587 write_unlock_bh(&rt6_lock
);
589 fib6_force_start_gc();
592 return (struct dst_entry
*)rt
;
595 int ndisc_dst_gc(int *more
)
597 struct dst_entry
*dst
, *next
, **pprev
;
601 pprev
= &ndisc_dst_gc_list
;
603 while ((dst
= *pprev
) != NULL
) {
604 if (!atomic_read(&dst
->__refcnt
)) {
617 static int ip6_dst_gc(void)
619 static unsigned expire
= 30*HZ
;
620 static unsigned long last_gc
;
621 unsigned long now
= jiffies
;
623 if (time_after(last_gc
+ ip6_rt_gc_min_interval
, now
) &&
624 atomic_read(&ip6_dst_ops
.entries
) <= ip6_rt_max_size
)
630 if (atomic_read(&ip6_dst_ops
.entries
) < ip6_dst_ops
.gc_thresh
)
631 expire
= ip6_rt_gc_timeout
>>1;
634 expire
-= expire
>>ip6_rt_gc_elasticity
;
635 return (atomic_read(&ip6_dst_ops
.entries
) > ip6_rt_max_size
);
638 /* Clean host part of a prefix. Not necessary in radix tree,
639 but results in cleaner routing tables.
641 Remove it only when all the things will work!
644 static int ipv6_get_mtu(struct net_device
*dev
)
646 int mtu
= IPV6_MIN_MTU
;
647 struct inet6_dev
*idev
;
649 idev
= in6_dev_get(dev
);
651 mtu
= idev
->cnf
.mtu6
;
657 static inline unsigned int ipv6_advmss(unsigned int mtu
)
659 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
661 if (mtu
< ip6_rt_min_advmss
)
662 mtu
= ip6_rt_min_advmss
;
665 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
666 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
667 * IPV6_MAXPLEN is also valid and means: "any MSS,
668 * rely only on pmtu discovery"
670 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
675 static int ipv6_get_hoplimit(struct net_device
*dev
)
677 int hoplimit
= ipv6_devconf
.hop_limit
;
678 struct inet6_dev
*idev
;
680 idev
= in6_dev_get(dev
);
682 hoplimit
= idev
->cnf
.hop_limit
;
692 int ip6_route_add(struct in6_rtmsg
*rtmsg
, struct nlmsghdr
*nlh
, void *_rtattr
)
698 struct net_device
*dev
= NULL
;
701 rta
= (struct rtattr
**) _rtattr
;
703 if (rtmsg
->rtmsg_dst_len
> 128 || rtmsg
->rtmsg_src_len
> 128)
705 #ifndef CONFIG_IPV6_SUBTREES
706 if (rtmsg
->rtmsg_src_len
)
709 if (rtmsg
->rtmsg_metric
== 0)
710 rtmsg
->rtmsg_metric
= IP6_RT_PRIO_USER
;
712 rt
= ip6_dst_alloc();
717 rt
->u
.dst
.obsolete
= -1;
718 rt
->rt6i_expires
= rtmsg
->rtmsg_info
;
719 if (nlh
&& (r
= NLMSG_DATA(nlh
))) {
720 rt
->rt6i_protocol
= r
->rtm_protocol
;
722 rt
->rt6i_protocol
= RTPROT_BOOT
;
725 addr_type
= ipv6_addr_type(&rtmsg
->rtmsg_dst
);
727 if (addr_type
& IPV6_ADDR_MULTICAST
)
728 rt
->u
.dst
.input
= ip6_mc_input
;
730 rt
->u
.dst
.input
= ip6_forward
;
732 rt
->u
.dst
.output
= ip6_output
;
734 if (rtmsg
->rtmsg_ifindex
) {
735 dev
= dev_get_by_index(rtmsg
->rtmsg_ifindex
);
741 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
,
742 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
);
743 rt
->rt6i_dst
.plen
= rtmsg
->rtmsg_dst_len
;
744 if (rt
->rt6i_dst
.plen
== 128)
745 rt
->u
.dst
.flags
= DST_HOST
;
747 #ifdef CONFIG_IPV6_SUBTREES
748 ipv6_addr_prefix(&rt
->rt6i_src
.addr
,
749 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
750 rt
->rt6i_src
.plen
= rtmsg
->rtmsg_src_len
;
753 rt
->rt6i_metric
= rtmsg
->rtmsg_metric
;
755 /* We cannot add true routes via loopback here,
756 they would result in kernel looping; promote them to reject routes
758 if ((rtmsg
->rtmsg_flags
&RTF_REJECT
) ||
759 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
))) {
764 rt
->u
.dst
.output
= ip6_pkt_discard
;
765 rt
->u
.dst
.input
= ip6_pkt_discard
;
766 rt
->u
.dst
.error
= -ENETUNREACH
;
767 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
771 if (rtmsg
->rtmsg_flags
& RTF_GATEWAY
) {
772 struct in6_addr
*gw_addr
;
775 gw_addr
= &rtmsg
->rtmsg_gateway
;
776 ipv6_addr_copy(&rt
->rt6i_gateway
, &rtmsg
->rtmsg_gateway
);
777 gwa_type
= ipv6_addr_type(gw_addr
);
779 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
780 struct rt6_info
*grt
;
782 /* IPv6 strictly inhibits using not link-local
783 addresses as nexthop address.
784 Otherwise, router will not able to send redirects.
785 It is very good, but in some (rare!) curcumstances
786 (SIT, PtP, NBMA NOARP links) it is handy to allow
787 some exceptions. --ANK
790 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
793 grt
= rt6_lookup(gw_addr
, NULL
, rtmsg
->rtmsg_ifindex
, 1);
799 if (dev
!= grt
->rt6i_dev
) {
800 dst_release(&grt
->u
.dst
);
807 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
809 dst_release(&grt
->u
.dst
);
815 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
823 if (rtmsg
->rtmsg_flags
& (RTF_GATEWAY
|RTF_NONEXTHOP
)) {
824 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
825 if (IS_ERR(rt
->rt6i_nexthop
)) {
826 err
= PTR_ERR(rt
->rt6i_nexthop
);
827 rt
->rt6i_nexthop
= NULL
;
832 rt
->rt6i_flags
= rtmsg
->rtmsg_flags
;
835 if (rta
&& rta
[RTA_METRICS
-1]) {
836 int attrlen
= RTA_PAYLOAD(rta
[RTA_METRICS
-1]);
837 struct rtattr
*attr
= RTA_DATA(rta
[RTA_METRICS
-1]);
839 while (RTA_OK(attr
, attrlen
)) {
840 unsigned flavor
= attr
->rta_type
;
842 if (flavor
> RTAX_MAX
) {
846 rt
->u
.dst
.metrics
[flavor
-1] =
847 *(u32
*)RTA_DATA(attr
);
849 attr
= RTA_NEXT(attr
, attrlen
);
853 if (rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] == 0) {
854 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
))
855 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] =
856 IPV6_DEFAULT_MCASTHOPS
;
858 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] =
859 ipv6_get_hoplimit(dev
);
862 if (!rt
->u
.dst
.metrics
[RTAX_MTU
-1])
863 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(dev
);
864 if (!rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1])
865 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_pmtu(&rt
->u
.dst
));
867 return rt6_ins(rt
, nlh
, _rtattr
);
872 dst_free((struct dst_entry
*) rt
);
876 int ip6_del_rt(struct rt6_info
*rt
, struct nlmsghdr
*nlh
, void *_rtattr
)
880 write_lock_bh(&rt6_lock
);
882 spin_lock_bh(&rt6_dflt_lock
);
883 rt6_dflt_pointer
= NULL
;
884 spin_unlock_bh(&rt6_dflt_lock
);
886 dst_release(&rt
->u
.dst
);
888 err
= fib6_del(rt
, nlh
, _rtattr
);
889 write_unlock_bh(&rt6_lock
);
894 static int ip6_route_del(struct in6_rtmsg
*rtmsg
, struct nlmsghdr
*nlh
, void *_rtattr
)
896 struct fib6_node
*fn
;
900 read_lock_bh(&rt6_lock
);
902 fn
= fib6_locate(&ip6_routing_table
,
903 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
,
904 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
907 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.next
) {
908 if (rtmsg
->rtmsg_ifindex
&&
909 (rt
->rt6i_dev
== NULL
||
910 rt
->rt6i_dev
->ifindex
!= rtmsg
->rtmsg_ifindex
))
912 if (rtmsg
->rtmsg_flags
&RTF_GATEWAY
&&
913 ipv6_addr_cmp(&rtmsg
->rtmsg_gateway
, &rt
->rt6i_gateway
))
915 if (rtmsg
->rtmsg_metric
&&
916 rtmsg
->rtmsg_metric
!= rt
->rt6i_metric
)
918 dst_hold(&rt
->u
.dst
);
919 read_unlock_bh(&rt6_lock
);
921 return ip6_del_rt(rt
, nlh
, _rtattr
);
924 read_unlock_bh(&rt6_lock
);
932 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*saddr
,
933 struct neighbour
*neigh
, int on_link
)
935 struct rt6_info
*rt
, *nrt
;
937 /* Locate old route to this destination. */
938 rt
= rt6_lookup(dest
, NULL
, neigh
->dev
->ifindex
, 1);
943 if (neigh
->dev
!= rt
->rt6i_dev
)
946 /* Redirect received -> path was valid.
947 Look, redirects are sent only in response to data packets,
948 so that this nexthop apparently is reachable. --ANK
950 dst_confirm(&rt
->u
.dst
);
952 /* Duplicate redirect: silently ignore. */
953 if (neigh
== rt
->u
.dst
.neighbour
)
956 /* Current route is on-link; redirect is always invalid.
958 Seems, previous statement is not true. It could
959 be node, which looks for us as on-link (f.e. proxy ndisc)
960 But then router serving it might decide, that we should
961 know truth 8)8) --ANK (980726).
963 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
967 * RFC 2461 specifies that redirects should only be
968 * accepted if they come from the nexthop to the target.
969 * Due to the way default routers are chosen, this notion
970 * is a bit fuzzy and one might need to check all default
974 if (ipv6_addr_cmp(saddr
, &rt
->rt6i_gateway
)) {
975 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
976 struct rt6_info
*rt1
;
978 read_lock(&rt6_lock
);
979 for (rt1
= ip6_routing_table
.leaf
; rt1
; rt1
= rt1
->u
.next
) {
980 if (!ipv6_addr_cmp(saddr
, &rt1
->rt6i_gateway
)) {
981 dst_hold(&rt1
->u
.dst
);
982 dst_release(&rt
->u
.dst
);
983 read_unlock(&rt6_lock
);
988 read_unlock(&rt6_lock
);
991 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
992 "for redirect target\n");
999 * We have finally decided to accept it.
1002 nrt
= ip6_rt_copy(rt
);
1006 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1008 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1010 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1011 nrt
->rt6i_dst
.plen
= 128;
1012 nrt
->u
.dst
.flags
|= DST_HOST
;
1014 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1015 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
1016 /* Reset pmtu, it may be better */
1017 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(neigh
->dev
);
1018 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_pmtu(&nrt
->u
.dst
));
1020 if (rt6_ins(nrt
, NULL
, NULL
))
1023 if (rt
->rt6i_flags
&RTF_CACHE
) {
1024 ip6_del_rt(rt
, NULL
, NULL
);
1029 dst_release(&rt
->u
.dst
);
1034 * Handle ICMP "packet too big" messages
1035 * i.e. Path MTU discovery
1038 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1039 struct net_device
*dev
, u32 pmtu
)
1041 struct rt6_info
*rt
, *nrt
;
1043 if (pmtu
< IPV6_MIN_MTU
) {
1044 if (net_ratelimit())
1045 printk(KERN_DEBUG
"rt6_pmtu_discovery: invalid MTU value %d\n",
1047 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1048 link MTU if the node receives a Packet Too Big message
1049 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1051 pmtu
= IPV6_MIN_MTU
;
1054 rt
= rt6_lookup(daddr
, saddr
, dev
->ifindex
, 0);
1059 if (pmtu
>= dst_pmtu(&rt
->u
.dst
))
1062 /* New mtu received -> path was valid.
1063 They are sent only in response to data packets,
1064 so that this nexthop apparently is reachable. --ANK
1066 dst_confirm(&rt
->u
.dst
);
1068 /* Host route. If it is static, it would be better
1069 not to override it, but add new one, so that
1070 when cache entry will expire old pmtu
1071 would return automatically.
1073 if (rt
->rt6i_flags
& RTF_CACHE
) {
1074 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1075 dst_set_expires(&rt
->u
.dst
, ip6_rt_mtu_expires
);
1076 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1081 Two cases are possible:
1082 1. It is connected route. Action: COW
1083 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1085 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
)) {
1086 nrt
= rt6_cow(rt
, daddr
, saddr
);
1087 if (!nrt
->u
.dst
.error
) {
1088 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1089 /* According to RFC 1981, detecting PMTU increase shouldn't be
1090 happened within 5 mins, the recommended timer is 10 mins.
1091 Here this route expiration time is set to ip6_rt_mtu_expires
1092 which is 10 mins. After 10 mins the decreased pmtu is expired
1093 and detecting PMTU increase will be automatically happened.
1095 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1096 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1097 dst_release(&nrt
->u
.dst
);
1100 nrt
= ip6_rt_copy(rt
);
1103 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, daddr
);
1104 nrt
->rt6i_dst
.plen
= 128;
1105 nrt
->u
.dst
.flags
|= DST_HOST
;
1106 nrt
->rt6i_nexthop
= neigh_clone(rt
->rt6i_nexthop
);
1107 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1108 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_CACHE
|RTF_EXPIRES
;
1109 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1110 rt6_ins(nrt
, NULL
, NULL
);
1114 dst_release(&rt
->u
.dst
);
1118 * Misc support functions
1121 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1123 struct rt6_info
*rt
= ip6_dst_alloc();
1126 rt
->u
.dst
.input
= ort
->u
.dst
.input
;
1127 rt
->u
.dst
.output
= ort
->u
.dst
.output
;
1129 memcpy(rt
->u
.dst
.metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
1130 rt
->u
.dst
.dev
= ort
->u
.dst
.dev
;
1132 dev_hold(rt
->u
.dst
.dev
);
1133 rt
->u
.dst
.lastuse
= jiffies
;
1134 rt
->rt6i_expires
= 0;
1136 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1137 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1138 rt
->rt6i_metric
= 0;
1140 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1141 #ifdef CONFIG_IPV6_SUBTREES
1142 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1148 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1150 struct rt6_info
*rt
;
1151 struct fib6_node
*fn
;
1153 fn
= &ip6_routing_table
;
1155 write_lock_bh(&rt6_lock
);
1156 for (rt
= fn
->leaf
; rt
; rt
=rt
->u
.next
) {
1157 if (dev
== rt
->rt6i_dev
&&
1158 ipv6_addr_cmp(&rt
->rt6i_gateway
, addr
) == 0)
1162 dst_hold(&rt
->u
.dst
);
1163 write_unlock_bh(&rt6_lock
);
1167 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1168 struct net_device
*dev
)
1170 struct in6_rtmsg rtmsg
;
1172 memset(&rtmsg
, 0, sizeof(struct in6_rtmsg
));
1173 rtmsg
.rtmsg_type
= RTMSG_NEWROUTE
;
1174 ipv6_addr_copy(&rtmsg
.rtmsg_gateway
, gwaddr
);
1175 rtmsg
.rtmsg_metric
= 1024;
1176 rtmsg
.rtmsg_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
| RTF_UP
;
1178 rtmsg
.rtmsg_ifindex
= dev
->ifindex
;
1180 ip6_route_add(&rtmsg
, NULL
, NULL
);
1181 return rt6_get_dflt_router(gwaddr
, dev
);
1184 void rt6_purge_dflt_routers(int last_resort
)
1186 struct rt6_info
*rt
;
1190 flags
= RTF_ALLONLINK
;
1192 flags
= RTF_DEFAULT
| RTF_ADDRCONF
;
1195 read_lock_bh(&rt6_lock
);
1196 for (rt
= ip6_routing_table
.leaf
; rt
; rt
= rt
->u
.next
) {
1197 if (rt
->rt6i_flags
& flags
) {
1198 dst_hold(&rt
->u
.dst
);
1200 spin_lock_bh(&rt6_dflt_lock
);
1201 rt6_dflt_pointer
= NULL
;
1202 spin_unlock_bh(&rt6_dflt_lock
);
1204 read_unlock_bh(&rt6_lock
);
1206 ip6_del_rt(rt
, NULL
, NULL
);
1211 read_unlock_bh(&rt6_lock
);
1214 int ipv6_route_ioctl(unsigned int cmd
, void *arg
)
1216 struct in6_rtmsg rtmsg
;
1220 case SIOCADDRT
: /* Add a route */
1221 case SIOCDELRT
: /* Delete a route */
1222 if (!capable(CAP_NET_ADMIN
))
1224 err
= copy_from_user(&rtmsg
, arg
,
1225 sizeof(struct in6_rtmsg
));
1232 err
= ip6_route_add(&rtmsg
, NULL
, NULL
);
1235 err
= ip6_route_del(&rtmsg
, NULL
, NULL
);
1249 * Drop the packet on the floor
1252 int ip6_pkt_discard(struct sk_buff
*skb
)
1254 IP6_INC_STATS(Ip6OutNoRoutes
);
1255 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_NOROUTE
, 0, skb
->dev
);
1264 int ip6_rt_addr_add(struct in6_addr
*addr
, struct net_device
*dev
, int anycast
)
1266 struct rt6_info
*rt
= ip6_dst_alloc();
1271 dev_hold(&loopback_dev
);
1273 rt
->u
.dst
.flags
= DST_HOST
;
1274 rt
->u
.dst
.input
= ip6_input
;
1275 rt
->u
.dst
.output
= ip6_output
;
1276 rt
->rt6i_dev
= &loopback_dev
;
1277 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1278 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_pmtu(&rt
->u
.dst
));
1279 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = ipv6_get_hoplimit(rt
->rt6i_dev
);
1280 rt
->u
.dst
.obsolete
= -1;
1282 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1284 rt
->rt6i_flags
|= RTF_LOCAL
;
1285 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1286 if (rt
->rt6i_nexthop
== NULL
) {
1287 dst_free((struct dst_entry
*) rt
);
1291 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1292 rt
->rt6i_dst
.plen
= 128;
1293 rt6_ins(rt
, NULL
, NULL
);
1298 /* Delete address. Warning: you should check that this address
1299 disappeared before calling this function.
1302 int ip6_rt_addr_del(struct in6_addr
*addr
, struct net_device
*dev
)
1304 struct rt6_info
*rt
;
1307 rt
= rt6_lookup(addr
, NULL
, loopback_dev
.ifindex
, 1);
1309 if (rt
->rt6i_dst
.plen
== 128)
1310 err
= ip6_del_rt(rt
, NULL
, NULL
);
1312 dst_release(&rt
->u
.dst
);
1318 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
1320 if (((void*)rt
->rt6i_dev
== arg
|| arg
== NULL
) &&
1321 rt
!= &ip6_null_entry
) {
1322 RT6_TRACE("deleted by ifdown %p\n", rt
);
1328 void rt6_ifdown(struct net_device
*dev
)
1330 write_lock_bh(&rt6_lock
);
1331 fib6_clean_tree(&ip6_routing_table
, fib6_ifdown
, 0, dev
);
1332 write_unlock_bh(&rt6_lock
);
1335 struct rt6_mtu_change_arg
1337 struct net_device
*dev
;
1341 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
1343 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
1344 struct inet6_dev
*idev
;
1346 /* In IPv6 pmtu discovery is not optional,
1347 so that RTAX_MTU lock cannot disable it.
1348 We still use this lock to block changes
1349 caused by addrconf/ndisc.
1352 idev
= __in6_dev_get(arg
->dev
);
1356 /* For administrative MTU increase, there is no way to discover
1357 IPv6 PMTU increase, so PMTU increase should be updated here.
1358 Since RFC 1981 doesn't include administrative MTU increase
1359 update PMTU increase is a MUST. (i.e. jumbo frame)
1362 If new MTU is less than route PMTU, this new MTU will be the
1363 lowest MTU in the path, update the route PMTU to refect PMTU
1364 decreases; if new MTU is greater than route PMTU, and the
1365 old MTU is the lowest MTU in the path, update the route PMTU
1366 to refect the increase. In this case if the other nodes' MTU
1367 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1370 if (rt
->rt6i_dev
== arg
->dev
&&
1371 !dst_metric_locked(&rt
->u
.dst
, RTAX_MTU
) &&
1372 (dst_pmtu(&rt
->u
.dst
) > arg
->mtu
||
1373 (dst_pmtu(&rt
->u
.dst
) < arg
->mtu
&&
1374 dst_pmtu(&rt
->u
.dst
) == idev
->cnf
.mtu6
)))
1375 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = arg
->mtu
;
1376 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(arg
->mtu
);
1380 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
1382 struct rt6_mtu_change_arg arg
;
1386 read_lock_bh(&rt6_lock
);
1387 fib6_clean_tree(&ip6_routing_table
, rt6_mtu_change_route
, 0, &arg
);
1388 read_unlock_bh(&rt6_lock
);
1391 static int inet6_rtm_to_rtmsg(struct rtmsg
*r
, struct rtattr
**rta
,
1392 struct in6_rtmsg
*rtmsg
)
1394 memset(rtmsg
, 0, sizeof(*rtmsg
));
1396 rtmsg
->rtmsg_dst_len
= r
->rtm_dst_len
;
1397 rtmsg
->rtmsg_src_len
= r
->rtm_src_len
;
1398 rtmsg
->rtmsg_flags
= RTF_UP
;
1399 if (r
->rtm_type
== RTN_UNREACHABLE
)
1400 rtmsg
->rtmsg_flags
|= RTF_REJECT
;
1402 if (rta
[RTA_GATEWAY
-1]) {
1403 if (rta
[RTA_GATEWAY
-1]->rta_len
!= RTA_LENGTH(16))
1405 memcpy(&rtmsg
->rtmsg_gateway
, RTA_DATA(rta
[RTA_GATEWAY
-1]), 16);
1406 rtmsg
->rtmsg_flags
|= RTF_GATEWAY
;
1408 if (rta
[RTA_DST
-1]) {
1409 if (RTA_PAYLOAD(rta
[RTA_DST
-1]) < ((r
->rtm_dst_len
+7)>>3))
1411 memcpy(&rtmsg
->rtmsg_dst
, RTA_DATA(rta
[RTA_DST
-1]), ((r
->rtm_dst_len
+7)>>3));
1413 if (rta
[RTA_SRC
-1]) {
1414 if (RTA_PAYLOAD(rta
[RTA_SRC
-1]) < ((r
->rtm_src_len
+7)>>3))
1416 memcpy(&rtmsg
->rtmsg_src
, RTA_DATA(rta
[RTA_SRC
-1]), ((r
->rtm_src_len
+7)>>3));
1418 if (rta
[RTA_OIF
-1]) {
1419 if (rta
[RTA_OIF
-1]->rta_len
!= RTA_LENGTH(sizeof(int)))
1421 memcpy(&rtmsg
->rtmsg_ifindex
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1423 if (rta
[RTA_PRIORITY
-1]) {
1424 if (rta
[RTA_PRIORITY
-1]->rta_len
!= RTA_LENGTH(4))
1426 memcpy(&rtmsg
->rtmsg_metric
, RTA_DATA(rta
[RTA_PRIORITY
-1]), 4);
1431 int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1433 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1434 struct in6_rtmsg rtmsg
;
1436 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1438 return ip6_route_del(&rtmsg
, nlh
, arg
);
1441 int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1443 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1444 struct in6_rtmsg rtmsg
;
1446 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1448 return ip6_route_add(&rtmsg
, nlh
, arg
);
1451 struct rt6_rtnl_dump_arg
1453 struct sk_buff
*skb
;
1454 struct netlink_callback
*cb
;
1457 static int rt6_fill_node(struct sk_buff
*skb
, struct rt6_info
*rt
,
1458 struct in6_addr
*dst
,
1459 struct in6_addr
*src
,
1461 int type
, u32 pid
, u32 seq
,
1462 struct nlmsghdr
*in_nlh
, int prefix
)
1465 struct nlmsghdr
*nlh
;
1466 unsigned char *b
= skb
->tail
;
1467 struct rta_cacheinfo ci
;
1469 if (prefix
) { /* user wants prefix routes only */
1470 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
1471 /* success since this is not a prefix route */
1476 if (!pid
&& in_nlh
) {
1477 pid
= in_nlh
->nlmsg_pid
;
1480 nlh
= NLMSG_PUT(skb
, pid
, seq
, type
, sizeof(*rtm
));
1481 rtm
= NLMSG_DATA(nlh
);
1482 rtm
->rtm_family
= AF_INET6
;
1483 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
1484 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
1486 rtm
->rtm_table
= RT_TABLE_MAIN
;
1487 if (rt
->rt6i_flags
&RTF_REJECT
)
1488 rtm
->rtm_type
= RTN_UNREACHABLE
;
1489 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
1490 rtm
->rtm_type
= RTN_LOCAL
;
1492 rtm
->rtm_type
= RTN_UNICAST
;
1494 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1495 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
1496 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
1497 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
1498 else if (rt
->rt6i_flags
&(RTF_ADDRCONF
|RTF_ALLONLINK
))
1499 rtm
->rtm_protocol
= RTPROT_KERNEL
;
1500 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
1501 rtm
->rtm_protocol
= RTPROT_RA
;
1503 if (rt
->rt6i_flags
&RTF_CACHE
)
1504 rtm
->rtm_flags
|= RTM_F_CLONED
;
1507 RTA_PUT(skb
, RTA_DST
, 16, dst
);
1508 rtm
->rtm_dst_len
= 128;
1509 } else if (rtm
->rtm_dst_len
)
1510 RTA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
1511 #ifdef CONFIG_IPV6_SUBTREES
1513 RTA_PUT(skb
, RTA_SRC
, 16, src
);
1514 rtm
->rtm_src_len
= 128;
1515 } else if (rtm
->rtm_src_len
)
1516 RTA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
1519 RTA_PUT(skb
, RTA_IIF
, 4, &iif
);
1521 struct in6_addr saddr_buf
;
1522 if (ipv6_get_saddr(&rt
->u
.dst
, dst
, &saddr_buf
) == 0)
1523 RTA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
1525 if (rtnetlink_put_metrics(skb
, rt
->u
.dst
.metrics
) < 0)
1526 goto rtattr_failure
;
1527 if (rt
->u
.dst
.neighbour
)
1528 RTA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->u
.dst
.neighbour
->primary_key
);
1530 RTA_PUT(skb
, RTA_OIF
, sizeof(int), &rt
->rt6i_dev
->ifindex
);
1531 RTA_PUT(skb
, RTA_PRIORITY
, 4, &rt
->rt6i_metric
);
1532 ci
.rta_lastuse
= jiffies
- rt
->u
.dst
.lastuse
;
1533 if (rt
->rt6i_expires
)
1534 ci
.rta_expires
= rt
->rt6i_expires
- jiffies
;
1537 ci
.rta_used
= rt
->u
.dst
.__use
;
1538 ci
.rta_clntref
= atomic_read(&rt
->u
.dst
.__refcnt
);
1539 ci
.rta_error
= rt
->u
.dst
.error
;
1543 RTA_PUT(skb
, RTA_CACHEINFO
, sizeof(ci
), &ci
);
1544 nlh
->nlmsg_len
= skb
->tail
- b
;
1549 skb_trim(skb
, b
- skb
->data
);
1553 static int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
1555 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
1559 rtm
= NLMSG_DATA(arg
->cb
->nlh
);
1561 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
1564 return rt6_fill_node(arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
1565 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
1569 static int fib6_dump_node(struct fib6_walker_t
*w
)
1572 struct rt6_info
*rt
;
1574 for (rt
= w
->leaf
; rt
; rt
= rt
->u
.next
) {
1575 res
= rt6_dump_route(rt
, w
->args
);
1577 /* Frame is full, suspend walking */
1587 static void fib6_dump_end(struct netlink_callback
*cb
)
1589 struct fib6_walker_t
*w
= (void*)cb
->args
[0];
1593 fib6_walker_unlink(w
);
1597 cb
->done
= (void*)cb
->args
[1];
1602 static int fib6_dump_done(struct netlink_callback
*cb
)
1605 return cb
->done(cb
);
1608 int inet6_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1610 struct rt6_rtnl_dump_arg arg
;
1611 struct fib6_walker_t
*w
;
1617 w
= (void*)cb
->args
[0];
1621 * 1. hook callback destructor.
1623 cb
->args
[1] = (long)cb
->done
;
1624 cb
->done
= fib6_dump_done
;
1627 * 2. allocate and initialize walker.
1629 w
= kmalloc(sizeof(*w
), GFP_ATOMIC
);
1632 RT6_TRACE("dump<%p", w
);
1633 memset(w
, 0, sizeof(*w
));
1634 w
->root
= &ip6_routing_table
;
1635 w
->func
= fib6_dump_node
;
1637 cb
->args
[0] = (long)w
;
1638 read_lock_bh(&rt6_lock
);
1640 read_unlock_bh(&rt6_lock
);
1643 read_lock_bh(&rt6_lock
);
1644 res
= fib6_walk_continue(w
);
1645 read_unlock_bh(&rt6_lock
);
1648 if (res
<= 0 && skb
->len
== 0)
1649 RT6_TRACE("%p>dump end\n", w
);
1651 res
= res
< 0 ? res
: skb
->len
;
1652 /* res < 0 is an error. (really, impossible)
1653 res == 0 means that dump is complete, but skb still can contain data.
1654 res > 0 dump is not complete, but frame is full.
1656 /* Destroy walker, if dump of this table is complete. */
1662 int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
1664 struct rtattr
**rta
= arg
;
1667 struct sk_buff
*skb
;
1669 struct rt6_info
*rt
;
1671 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1675 /* Reserve room for dummy headers, this skb can pass
1676 through good chunk of routing engine.
1678 skb
->mac
.raw
= skb
->data
;
1679 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
1681 memset(&fl
, 0, sizeof(fl
));
1683 ipv6_addr_copy(&fl
.fl6_src
,
1684 (struct in6_addr
*)RTA_DATA(rta
[RTA_SRC
-1]));
1686 ipv6_addr_copy(&fl
.fl6_dst
,
1687 (struct in6_addr
*)RTA_DATA(rta
[RTA_DST
-1]));
1690 memcpy(&iif
, RTA_DATA(rta
[RTA_IIF
-1]), sizeof(int));
1693 struct net_device
*dev
;
1694 dev
= __dev_get_by_index(iif
);
1703 memcpy(&fl
.oif
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1705 rt
= (struct rt6_info
*)ip6_route_output(NULL
, &fl
);
1707 skb
->dst
= &rt
->u
.dst
;
1709 NETLINK_CB(skb
).dst_pid
= NETLINK_CB(in_skb
).pid
;
1710 err
= rt6_fill_node(skb
, rt
,
1711 &fl
.fl6_dst
, &fl
.fl6_src
,
1713 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
1714 nlh
->nlmsg_seq
, nlh
, 0);
1720 err
= netlink_unicast(rtnl
, skb
, NETLINK_CB(in_skb
).pid
, MSG_DONTWAIT
);
1730 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nlmsghdr
*nlh
)
1732 struct sk_buff
*skb
;
1733 int size
= NLMSG_SPACE(sizeof(struct rtmsg
)+256);
1735 skb
= alloc_skb(size
, gfp_any());
1737 netlink_set_err(rtnl
, 0, RTMGRP_IPV6_ROUTE
, ENOBUFS
);
1740 if (rt6_fill_node(skb
, rt
, NULL
, NULL
, 0, event
, 0, 0, nlh
, 0) < 0) {
1742 netlink_set_err(rtnl
, 0, RTMGRP_IPV6_ROUTE
, EINVAL
);
1745 NETLINK_CB(skb
).dst_groups
= RTMGRP_IPV6_ROUTE
;
1746 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_IPV6_ROUTE
, gfp_any());
1753 #ifdef CONFIG_PROC_FS
1755 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1766 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
1768 struct rt6_proc_arg
*arg
= (struct rt6_proc_arg
*) p_arg
;
1771 if (arg
->skip
< arg
->offset
/ RT6_INFO_LEN
) {
1776 if (arg
->len
>= arg
->length
)
1779 for (i
=0; i
<16; i
++) {
1780 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1781 rt
->rt6i_dst
.addr
.s6_addr
[i
]);
1784 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1787 #ifdef CONFIG_IPV6_SUBTREES
1788 for (i
=0; i
<16; i
++) {
1789 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1790 rt
->rt6i_src
.addr
.s6_addr
[i
]);
1793 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1796 sprintf(arg
->buffer
+ arg
->len
,
1797 "00000000000000000000000000000000 00 ");
1801 if (rt
->rt6i_nexthop
) {
1802 for (i
=0; i
<16; i
++) {
1803 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1804 rt
->rt6i_nexthop
->primary_key
[i
]);
1808 sprintf(arg
->buffer
+ arg
->len
,
1809 "00000000000000000000000000000000");
1812 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
1813 " %08x %08x %08x %08x %8s\n",
1814 rt
->rt6i_metric
, atomic_read(&rt
->u
.dst
.__refcnt
),
1815 rt
->u
.dst
.__use
, rt
->rt6i_flags
,
1816 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
1820 static int rt6_proc_info(char *buffer
, char **start
, off_t offset
, int length
)
1822 struct rt6_proc_arg arg
;
1823 arg
.buffer
= buffer
;
1824 arg
.offset
= offset
;
1825 arg
.length
= length
;
1829 read_lock_bh(&rt6_lock
);
1830 fib6_clean_tree(&ip6_routing_table
, rt6_info_route
, 0, &arg
);
1831 read_unlock_bh(&rt6_lock
);
1835 *start
+= offset
% RT6_INFO_LEN
;
1837 arg
.len
-= offset
% RT6_INFO_LEN
;
1839 if (arg
.len
> length
)
1847 extern struct rt6_statistics rt6_stats
;
1849 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
1851 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
1852 rt6_stats
.fib_nodes
, rt6_stats
.fib_route_nodes
,
1853 rt6_stats
.fib_rt_alloc
, rt6_stats
.fib_rt_entries
,
1854 rt6_stats
.fib_rt_cache
,
1855 atomic_read(&ip6_dst_ops
.entries
),
1856 rt6_stats
.fib_discarded_routes
);
1861 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
1863 return single_open(file
, rt6_stats_seq_show
, NULL
);
1866 static struct file_operations rt6_stats_seq_fops
= {
1867 .owner
= THIS_MODULE
,
1868 .open
= rt6_stats_seq_open
,
1870 .llseek
= seq_lseek
,
1871 .release
= single_release
,
1873 #endif /* CONFIG_PROC_FS */
1875 #ifdef CONFIG_SYSCTL
1877 static int flush_delay
;
1880 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
, struct file
* filp
,
1881 void *buffer
, size_t *lenp
)
1884 proc_dointvec(ctl
, write
, filp
, buffer
, lenp
);
1885 if (flush_delay
< 0)
1887 fib6_run_gc((unsigned long)flush_delay
);
1893 ctl_table ipv6_route_table
[] = {
1895 .ctl_name
= NET_IPV6_ROUTE_FLUSH
,
1896 .procname
= "flush",
1897 .data
= &flush_delay
,
1898 .maxlen
= sizeof(int),
1900 .proc_handler
= &ipv6_sysctl_rtcache_flush
1903 .ctl_name
= NET_IPV6_ROUTE_GC_THRESH
,
1904 .procname
= "gc_thresh",
1905 .data
= &ip6_dst_ops
.gc_thresh
,
1906 .maxlen
= sizeof(int),
1908 .proc_handler
= &proc_dointvec
,
1911 .ctl_name
= NET_IPV6_ROUTE_MAX_SIZE
,
1912 .procname
= "max_size",
1913 .data
= &ip6_rt_max_size
,
1914 .maxlen
= sizeof(int),
1916 .proc_handler
= &proc_dointvec
,
1919 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL
,
1920 .procname
= "gc_min_interval",
1921 .data
= &ip6_rt_gc_min_interval
,
1922 .maxlen
= sizeof(int),
1924 .proc_handler
= &proc_dointvec_jiffies
,
1925 .strategy
= &sysctl_jiffies
,
1928 .ctl_name
= NET_IPV6_ROUTE_GC_TIMEOUT
,
1929 .procname
= "gc_timeout",
1930 .data
= &ip6_rt_gc_timeout
,
1931 .maxlen
= sizeof(int),
1933 .proc_handler
= &proc_dointvec_jiffies
,
1934 .strategy
= &sysctl_jiffies
,
1937 .ctl_name
= NET_IPV6_ROUTE_GC_INTERVAL
,
1938 .procname
= "gc_interval",
1939 .data
= &ip6_rt_gc_interval
,
1940 .maxlen
= sizeof(int),
1942 .proc_handler
= &proc_dointvec_jiffies
,
1943 .strategy
= &sysctl_jiffies
,
1946 .ctl_name
= NET_IPV6_ROUTE_GC_ELASTICITY
,
1947 .procname
= "gc_elasticity",
1948 .data
= &ip6_rt_gc_elasticity
,
1949 .maxlen
= sizeof(int),
1951 .proc_handler
= &proc_dointvec_jiffies
,
1952 .strategy
= &sysctl_jiffies
,
1955 .ctl_name
= NET_IPV6_ROUTE_MTU_EXPIRES
,
1956 .procname
= "mtu_expires",
1957 .data
= &ip6_rt_mtu_expires
,
1958 .maxlen
= sizeof(int),
1960 .proc_handler
= &proc_dointvec_jiffies
,
1961 .strategy
= &sysctl_jiffies
,
1964 .ctl_name
= NET_IPV6_ROUTE_MIN_ADVMSS
,
1965 .procname
= "min_adv_mss",
1966 .data
= &ip6_rt_min_advmss
,
1967 .maxlen
= sizeof(int),
1969 .proc_handler
= &proc_dointvec_jiffies
,
1970 .strategy
= &sysctl_jiffies
,
1976 void __init
ip6_route_init(void)
1978 struct proc_dir_entry
*p
;
1980 ip6_dst_ops
.kmem_cachep
= kmem_cache_create("ip6_dst_cache",
1981 sizeof(struct rt6_info
),
1982 0, SLAB_HWCACHE_ALIGN
,
1985 #ifdef CONFIG_PROC_FS
1986 proc_net_create("ipv6_route", 0, rt6_proc_info
);
1987 p
= create_proc_entry("rt6_stats", S_IRUGO
, proc_net
);
1989 p
->proc_fops
= &rt6_stats_seq_fops
;
1997 void ip6_route_cleanup(void)
1999 #ifdef CONFIG_PROC_FS
2000 proc_net_remove("ipv6_route");
2001 proc_net_remove("rt6_stats");
2006 kmem_cache_destroy(ip6_dst_ops
.kmem_cachep
);