2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size
= 4096;
81 static int ip6_rt_gc_min_interval
= HZ
/ 2;
82 static int ip6_rt_gc_timeout
= 60*HZ
;
83 int ip6_rt_gc_interval
= 30*HZ
;
84 static int ip6_rt_gc_elasticity
= 9;
85 static int ip6_rt_mtu_expires
= 10*60*HZ
;
86 static int ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
88 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
89 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
90 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
91 static void ip6_dst_destroy(struct dst_entry
*);
92 static void ip6_dst_ifdown(struct dst_entry
*,
93 struct net_device
*dev
, int how
);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff
*skb
);
97 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
98 static void ip6_link_failure(struct sk_buff
*skb
);
99 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
101 static struct dst_ops ip6_dst_ops
= {
103 .protocol
= __constant_htons(ETH_P_IPV6
),
106 .check
= ip6_dst_check
,
107 .destroy
= ip6_dst_destroy
,
108 .ifdown
= ip6_dst_ifdown
,
109 .negative_advice
= ip6_negative_advice
,
110 .link_failure
= ip6_link_failure
,
111 .update_pmtu
= ip6_rt_update_pmtu
,
112 .entry_size
= sizeof(struct rt6_info
),
115 struct rt6_info ip6_null_entry
= {
118 .__refcnt
= ATOMIC_INIT(1),
120 .dev
= &loopback_dev
,
122 .error
= -ENETUNREACH
,
123 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
124 .input
= ip6_pkt_discard
,
125 .output
= ip6_pkt_discard_out
,
127 .path
= (struct dst_entry
*)&ip6_null_entry
,
130 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
131 .rt6i_metric
= ~(u32
) 0,
132 .rt6i_ref
= ATOMIC_INIT(1),
135 struct fib6_node ip6_routing_table
= {
136 .leaf
= &ip6_null_entry
,
137 .fn_flags
= RTN_ROOT
| RTN_TL_ROOT
| RTN_RTINFO
,
140 /* Protects all the ip6 fib */
142 DEFINE_RWLOCK(rt6_lock
);
145 /* allocate dst with ip6_dst_ops */
146 static __inline__
struct rt6_info
*ip6_dst_alloc(void)
148 return (struct rt6_info
*)dst_alloc(&ip6_dst_ops
);
151 static void ip6_dst_destroy(struct dst_entry
*dst
)
153 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
154 struct inet6_dev
*idev
= rt
->rt6i_idev
;
157 rt
->rt6i_idev
= NULL
;
162 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
165 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
166 struct inet6_dev
*idev
= rt
->rt6i_idev
;
168 if (dev
!= &loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
169 struct inet6_dev
*loopback_idev
= in6_dev_get(&loopback_dev
);
170 if (loopback_idev
!= NULL
) {
171 rt
->rt6i_idev
= loopback_idev
;
177 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
179 return (rt
->rt6i_flags
& RTF_EXPIRES
&&
180 time_after(jiffies
, rt
->rt6i_expires
));
184 * Route lookup. Any rt6_lock is implied.
187 static __inline__
struct rt6_info
*rt6_device_match(struct rt6_info
*rt
,
191 struct rt6_info
*local
= NULL
;
192 struct rt6_info
*sprt
;
195 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.next
) {
196 struct net_device
*dev
= sprt
->rt6i_dev
;
197 if (dev
->ifindex
== oif
)
199 if (dev
->flags
& IFF_LOOPBACK
) {
200 if (sprt
->rt6i_idev
== NULL
||
201 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
204 if (local
&& (!oif
||
205 local
->rt6i_idev
->dev
->ifindex
== oif
))
216 return &ip6_null_entry
;
222 * Default Router Selection (RFC 2461 6.3.6)
224 static int inline rt6_check_dev(struct rt6_info
*rt
, int oif
)
226 struct net_device
*dev
= rt
->rt6i_dev
;
227 if (!oif
|| dev
->ifindex
== oif
)
229 if ((dev
->flags
& IFF_LOOPBACK
) &&
230 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
235 static int inline rt6_check_neigh(struct rt6_info
*rt
)
237 struct neighbour
*neigh
= rt
->rt6i_nexthop
;
240 read_lock_bh(&neigh
->lock
);
241 if (neigh
->nud_state
& NUD_VALID
)
243 read_unlock_bh(&neigh
->lock
);
248 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
251 int m
= rt6_check_dev(rt
, oif
);
252 if (!m
&& (strict
& RT6_SELECT_F_IFACE
))
254 if (rt6_check_neigh(rt
))
256 else if (strict
& RT6_SELECT_F_REACHABLE
)
261 static struct rt6_info
*rt6_select(struct rt6_info
**head
, int oif
,
264 struct rt6_info
*match
= NULL
, *last
= NULL
;
265 struct rt6_info
*rt
, *rt0
= *head
;
269 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270 __FUNCTION__
, head
, head
? *head
: NULL
, oif
);
272 for (rt
= rt0
, metric
= rt0
->rt6i_metric
;
273 rt
&& rt
->rt6i_metric
== metric
;
277 if (rt6_check_expired(rt
))
282 m
= rt6_score_route(rt
, oif
, strict
);
293 (strict
& RT6_SELECT_F_REACHABLE
) &&
294 last
&& last
!= rt0
) {
295 /* no entries matched; do round-robin */
297 rt0
->u
.next
= last
->u
.next
;
301 RT6_TRACE("%s() => %p, score=%d\n",
302 __FUNCTION__
, match
, mpri
);
304 return (match
? match
: &ip6_null_entry
);
307 struct rt6_info
*rt6_lookup(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
310 struct fib6_node
*fn
;
313 read_lock_bh(&rt6_lock
);
314 fn
= fib6_lookup(&ip6_routing_table
, daddr
, saddr
);
315 rt
= rt6_device_match(fn
->leaf
, oif
, strict
);
316 dst_hold(&rt
->u
.dst
);
318 read_unlock_bh(&rt6_lock
);
320 rt
->u
.dst
.lastuse
= jiffies
;
321 if (rt
->u
.dst
.error
== 0)
323 dst_release(&rt
->u
.dst
);
327 /* ip6_ins_rt is called with FREE rt6_lock.
328 It takes new route entry, the addition fails by any reason the
329 route is freed. In any case, if caller does not hold it, it may
333 int ip6_ins_rt(struct rt6_info
*rt
, struct nlmsghdr
*nlh
,
334 void *_rtattr
, struct netlink_skb_parms
*req
)
338 write_lock_bh(&rt6_lock
);
339 err
= fib6_add(&ip6_routing_table
, rt
, nlh
, _rtattr
, req
);
340 write_unlock_bh(&rt6_lock
);
345 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
346 struct in6_addr
*saddr
)
354 rt
= ip6_rt_copy(ort
);
357 if (!(rt
->rt6i_flags
&RTF_GATEWAY
)) {
358 if (rt
->rt6i_dst
.plen
!= 128 &&
359 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, daddr
))
360 rt
->rt6i_flags
|= RTF_ANYCAST
;
361 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
364 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
365 rt
->rt6i_dst
.plen
= 128;
366 rt
->rt6i_flags
|= RTF_CACHE
;
367 rt
->u
.dst
.flags
|= DST_HOST
;
369 #ifdef CONFIG_IPV6_SUBTREES
370 if (rt
->rt6i_src
.plen
&& saddr
) {
371 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
372 rt
->rt6i_src
.plen
= 128;
376 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
383 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
, struct in6_addr
*daddr
)
385 struct rt6_info
*rt
= ip6_rt_copy(ort
);
387 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
388 rt
->rt6i_dst
.plen
= 128;
389 rt
->rt6i_flags
|= RTF_CACHE
;
390 if (rt
->rt6i_flags
& RTF_REJECT
)
391 rt
->u
.dst
.error
= ort
->u
.dst
.error
;
392 rt
->u
.dst
.flags
|= DST_HOST
;
393 rt
->rt6i_nexthop
= neigh_clone(ort
->rt6i_nexthop
);
398 #define BACKTRACK() \
399 if (rt == &ip6_null_entry && strict) { \
400 while ((fn = fn->parent) != NULL) { \
401 if (fn->fn_flags & RTN_ROOT) { \
404 if (fn->fn_flags & RTN_RTINFO) \
410 void ip6_route_input(struct sk_buff
*skb
)
412 struct fib6_node
*fn
;
413 struct rt6_info
*rt
, *nrt
;
418 strict
= ipv6_addr_type(&skb
->nh
.ipv6h
->daddr
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
);
421 read_lock_bh(&rt6_lock
);
423 fn
= fib6_lookup(&ip6_routing_table
, &skb
->nh
.ipv6h
->daddr
,
424 &skb
->nh
.ipv6h
->saddr
);
429 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
430 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, strict
);
435 rt
= rt6_device_match(rt
, skb
->dev
->ifindex
, strict
);
438 dst_hold(&rt
->u
.dst
);
439 read_unlock_bh(&rt6_lock
);
441 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
442 nrt
= rt6_alloc_cow(rt
, &skb
->nh
.ipv6h
->daddr
, &skb
->nh
.ipv6h
->saddr
);
444 #if CLONE_OFFLINK_ROUTE
445 nrt
= rt6_alloc_clone(rt
, &skb
->nh
.ipv6h
->daddr
);
451 dst_release(&rt
->u
.dst
);
452 rt
= nrt
? : &ip6_null_entry
;
454 dst_hold(&rt
->u
.dst
);
456 err
= ip6_ins_rt(nrt
, NULL
, NULL
, &NETLINK_CB(skb
));
465 * Race condition! In the gap, when rt6_lock was
466 * released someone could insert this route. Relookup.
468 dst_release(&rt
->u
.dst
);
472 dst_hold(&rt
->u
.dst
);
473 read_unlock_bh(&rt6_lock
);
475 rt
->u
.dst
.lastuse
= jiffies
;
477 skb
->dst
= (struct dst_entry
*) rt
;
481 struct dst_entry
* ip6_route_output(struct sock
*sk
, struct flowi
*fl
)
483 struct fib6_node
*fn
;
484 struct rt6_info
*rt
, *nrt
;
489 strict
= ipv6_addr_type(&fl
->fl6_dst
) & (IPV6_ADDR_MULTICAST
|IPV6_ADDR_LINKLOCAL
) ? RT6_SELECT_F_IFACE
: 0;
492 read_lock_bh(&rt6_lock
);
494 fn
= fib6_lookup(&ip6_routing_table
, &fl
->fl6_dst
, &fl
->fl6_src
);
499 if ((rt
->rt6i_flags
& RTF_CACHE
)) {
500 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
504 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
505 rt
= rt6_select(&fn
->leaf
, fl
->oif
, strict
| RT6_SELECT_F_REACHABLE
);
506 if (rt
== &ip6_null_entry
)
507 rt
= rt6_select(&fn
->leaf
, fl
->oif
, strict
);
509 rt
= rt6_device_match(rt
, fl
->oif
, strict
);
513 dst_hold(&rt
->u
.dst
);
514 read_unlock_bh(&rt6_lock
);
516 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
517 nrt
= rt6_alloc_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
519 #if CLONE_OFFLINK_ROUTE
520 nrt
= rt6_alloc_clone(rt
, &fl
->fl6_dst
);
526 dst_release(&rt
->u
.dst
);
527 rt
= nrt
? : &ip6_null_entry
;
529 dst_hold(&rt
->u
.dst
);
531 err
= ip6_ins_rt(nrt
, NULL
, NULL
, NULL
);
540 * Race condition! In the gap, when rt6_lock was
541 * released someone could insert this route. Relookup.
543 dst_release(&rt
->u
.dst
);
547 dst_hold(&rt
->u
.dst
);
548 read_unlock_bh(&rt6_lock
);
550 rt
->u
.dst
.lastuse
= jiffies
;
557 * Destination cache support functions
560 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
564 rt
= (struct rt6_info
*) dst
;
566 if (rt
&& rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
572 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
574 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
577 if (rt
->rt6i_flags
& RTF_CACHE
)
578 ip6_del_rt(rt
, NULL
, NULL
, NULL
);
585 static void ip6_link_failure(struct sk_buff
*skb
)
589 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
591 rt
= (struct rt6_info
*) skb
->dst
;
593 if (rt
->rt6i_flags
&RTF_CACHE
) {
594 dst_set_expires(&rt
->u
.dst
, 0);
595 rt
->rt6i_flags
|= RTF_EXPIRES
;
596 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
597 rt
->rt6i_node
->fn_sernum
= -1;
601 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
603 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
605 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
606 rt6
->rt6i_flags
|= RTF_MODIFIED
;
607 if (mtu
< IPV6_MIN_MTU
) {
609 dst
->metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
611 dst
->metrics
[RTAX_MTU
-1] = mtu
;
615 /* Protected by rt6_lock. */
616 static struct dst_entry
*ndisc_dst_gc_list
;
617 static int ipv6_get_mtu(struct net_device
*dev
);
619 static inline unsigned int ipv6_advmss(unsigned int mtu
)
621 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
623 if (mtu
< ip6_rt_min_advmss
)
624 mtu
= ip6_rt_min_advmss
;
627 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
628 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
629 * IPV6_MAXPLEN is also valid and means: "any MSS,
630 * rely only on pmtu discovery"
632 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
637 struct dst_entry
*ndisc_dst_alloc(struct net_device
*dev
,
638 struct neighbour
*neigh
,
639 struct in6_addr
*addr
,
640 int (*output
)(struct sk_buff
*))
643 struct inet6_dev
*idev
= in6_dev_get(dev
);
645 if (unlikely(idev
== NULL
))
648 rt
= ip6_dst_alloc();
649 if (unlikely(rt
== NULL
)) {
658 neigh
= ndisc_get_neigh(dev
, addr
);
661 rt
->rt6i_idev
= idev
;
662 rt
->rt6i_nexthop
= neigh
;
663 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
664 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = 255;
665 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
666 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
667 rt
->u
.dst
.output
= output
;
669 #if 0 /* there's no chance to use these for ndisc */
670 rt
->u
.dst
.flags
= ipv6_addr_type(addr
) & IPV6_ADDR_UNICAST
673 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
674 rt
->rt6i_dst
.plen
= 128;
677 write_lock_bh(&rt6_lock
);
678 rt
->u
.dst
.next
= ndisc_dst_gc_list
;
679 ndisc_dst_gc_list
= &rt
->u
.dst
;
680 write_unlock_bh(&rt6_lock
);
682 fib6_force_start_gc();
685 return (struct dst_entry
*)rt
;
688 int ndisc_dst_gc(int *more
)
690 struct dst_entry
*dst
, *next
, **pprev
;
694 pprev
= &ndisc_dst_gc_list
;
696 while ((dst
= *pprev
) != NULL
) {
697 if (!atomic_read(&dst
->__refcnt
)) {
710 static int ip6_dst_gc(void)
712 static unsigned expire
= 30*HZ
;
713 static unsigned long last_gc
;
714 unsigned long now
= jiffies
;
716 if (time_after(last_gc
+ ip6_rt_gc_min_interval
, now
) &&
717 atomic_read(&ip6_dst_ops
.entries
) <= ip6_rt_max_size
)
723 if (atomic_read(&ip6_dst_ops
.entries
) < ip6_dst_ops
.gc_thresh
)
724 expire
= ip6_rt_gc_timeout
>>1;
727 expire
-= expire
>>ip6_rt_gc_elasticity
;
728 return (atomic_read(&ip6_dst_ops
.entries
) > ip6_rt_max_size
);
731 /* Clean host part of a prefix. Not necessary in radix tree,
732 but results in cleaner routing tables.
734 Remove it only when all the things will work!
737 static int ipv6_get_mtu(struct net_device
*dev
)
739 int mtu
= IPV6_MIN_MTU
;
740 struct inet6_dev
*idev
;
742 idev
= in6_dev_get(dev
);
744 mtu
= idev
->cnf
.mtu6
;
750 int ipv6_get_hoplimit(struct net_device
*dev
)
752 int hoplimit
= ipv6_devconf
.hop_limit
;
753 struct inet6_dev
*idev
;
755 idev
= in6_dev_get(dev
);
757 hoplimit
= idev
->cnf
.hop_limit
;
767 int ip6_route_add(struct in6_rtmsg
*rtmsg
, struct nlmsghdr
*nlh
,
768 void *_rtattr
, struct netlink_skb_parms
*req
)
773 struct rt6_info
*rt
= NULL
;
774 struct net_device
*dev
= NULL
;
775 struct inet6_dev
*idev
= NULL
;
778 rta
= (struct rtattr
**) _rtattr
;
780 if (rtmsg
->rtmsg_dst_len
> 128 || rtmsg
->rtmsg_src_len
> 128)
782 #ifndef CONFIG_IPV6_SUBTREES
783 if (rtmsg
->rtmsg_src_len
)
786 if (rtmsg
->rtmsg_ifindex
) {
788 dev
= dev_get_by_index(rtmsg
->rtmsg_ifindex
);
791 idev
= in6_dev_get(dev
);
796 if (rtmsg
->rtmsg_metric
== 0)
797 rtmsg
->rtmsg_metric
= IP6_RT_PRIO_USER
;
799 rt
= ip6_dst_alloc();
806 rt
->u
.dst
.obsolete
= -1;
807 rt
->rt6i_expires
= jiffies
+ clock_t_to_jiffies(rtmsg
->rtmsg_info
);
808 if (nlh
&& (r
= NLMSG_DATA(nlh
))) {
809 rt
->rt6i_protocol
= r
->rtm_protocol
;
811 rt
->rt6i_protocol
= RTPROT_BOOT
;
814 addr_type
= ipv6_addr_type(&rtmsg
->rtmsg_dst
);
816 if (addr_type
& IPV6_ADDR_MULTICAST
)
817 rt
->u
.dst
.input
= ip6_mc_input
;
819 rt
->u
.dst
.input
= ip6_forward
;
821 rt
->u
.dst
.output
= ip6_output
;
823 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
,
824 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
);
825 rt
->rt6i_dst
.plen
= rtmsg
->rtmsg_dst_len
;
826 if (rt
->rt6i_dst
.plen
== 128)
827 rt
->u
.dst
.flags
= DST_HOST
;
829 #ifdef CONFIG_IPV6_SUBTREES
830 ipv6_addr_prefix(&rt
->rt6i_src
.addr
,
831 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
832 rt
->rt6i_src
.plen
= rtmsg
->rtmsg_src_len
;
835 rt
->rt6i_metric
= rtmsg
->rtmsg_metric
;
837 /* We cannot add true routes via loopback here,
838 they would result in kernel looping; promote them to reject routes
840 if ((rtmsg
->rtmsg_flags
&RTF_REJECT
) ||
841 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
))) {
842 /* hold loopback dev/idev if we haven't done so. */
843 if (dev
!= &loopback_dev
) {
850 idev
= in6_dev_get(dev
);
856 rt
->u
.dst
.output
= ip6_pkt_discard_out
;
857 rt
->u
.dst
.input
= ip6_pkt_discard
;
858 rt
->u
.dst
.error
= -ENETUNREACH
;
859 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
863 if (rtmsg
->rtmsg_flags
& RTF_GATEWAY
) {
864 struct in6_addr
*gw_addr
;
867 gw_addr
= &rtmsg
->rtmsg_gateway
;
868 ipv6_addr_copy(&rt
->rt6i_gateway
, &rtmsg
->rtmsg_gateway
);
869 gwa_type
= ipv6_addr_type(gw_addr
);
871 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
872 struct rt6_info
*grt
;
874 /* IPv6 strictly inhibits using not link-local
875 addresses as nexthop address.
876 Otherwise, router will not able to send redirects.
877 It is very good, but in some (rare!) circumstances
878 (SIT, PtP, NBMA NOARP links) it is handy to allow
879 some exceptions. --ANK
882 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
885 grt
= rt6_lookup(gw_addr
, NULL
, rtmsg
->rtmsg_ifindex
, 1);
891 if (dev
!= grt
->rt6i_dev
) {
892 dst_release(&grt
->u
.dst
);
897 idev
= grt
->rt6i_idev
;
899 in6_dev_hold(grt
->rt6i_idev
);
901 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
903 dst_release(&grt
->u
.dst
);
909 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
917 if (rtmsg
->rtmsg_flags
& (RTF_GATEWAY
|RTF_NONEXTHOP
)) {
918 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
919 if (IS_ERR(rt
->rt6i_nexthop
)) {
920 err
= PTR_ERR(rt
->rt6i_nexthop
);
921 rt
->rt6i_nexthop
= NULL
;
926 rt
->rt6i_flags
= rtmsg
->rtmsg_flags
;
929 if (rta
&& rta
[RTA_METRICS
-1]) {
930 int attrlen
= RTA_PAYLOAD(rta
[RTA_METRICS
-1]);
931 struct rtattr
*attr
= RTA_DATA(rta
[RTA_METRICS
-1]);
933 while (RTA_OK(attr
, attrlen
)) {
934 unsigned flavor
= attr
->rta_type
;
936 if (flavor
> RTAX_MAX
) {
940 rt
->u
.dst
.metrics
[flavor
-1] =
941 *(u32
*)RTA_DATA(attr
);
943 attr
= RTA_NEXT(attr
, attrlen
);
947 if (rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] == 0)
948 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
949 if (!rt
->u
.dst
.metrics
[RTAX_MTU
-1])
950 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(dev
);
951 if (!rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1])
952 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
954 rt
->rt6i_idev
= idev
;
955 return ip6_ins_rt(rt
, nlh
, _rtattr
, req
);
963 dst_free((struct dst_entry
*) rt
);
967 int ip6_del_rt(struct rt6_info
*rt
, struct nlmsghdr
*nlh
, void *_rtattr
, struct netlink_skb_parms
*req
)
971 write_lock_bh(&rt6_lock
);
973 err
= fib6_del(rt
, nlh
, _rtattr
, req
);
974 dst_release(&rt
->u
.dst
);
976 write_unlock_bh(&rt6_lock
);
981 static int ip6_route_del(struct in6_rtmsg
*rtmsg
, struct nlmsghdr
*nlh
, void *_rtattr
, struct netlink_skb_parms
*req
)
983 struct fib6_node
*fn
;
987 read_lock_bh(&rt6_lock
);
989 fn
= fib6_locate(&ip6_routing_table
,
990 &rtmsg
->rtmsg_dst
, rtmsg
->rtmsg_dst_len
,
991 &rtmsg
->rtmsg_src
, rtmsg
->rtmsg_src_len
);
994 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.next
) {
995 if (rtmsg
->rtmsg_ifindex
&&
996 (rt
->rt6i_dev
== NULL
||
997 rt
->rt6i_dev
->ifindex
!= rtmsg
->rtmsg_ifindex
))
999 if (rtmsg
->rtmsg_flags
&RTF_GATEWAY
&&
1000 !ipv6_addr_equal(&rtmsg
->rtmsg_gateway
, &rt
->rt6i_gateway
))
1002 if (rtmsg
->rtmsg_metric
&&
1003 rtmsg
->rtmsg_metric
!= rt
->rt6i_metric
)
1005 dst_hold(&rt
->u
.dst
);
1006 read_unlock_bh(&rt6_lock
);
1008 return ip6_del_rt(rt
, nlh
, _rtattr
, req
);
1011 read_unlock_bh(&rt6_lock
);
1019 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*saddr
,
1020 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1022 struct rt6_info
*rt
, *nrt
;
1024 /* Locate old route to this destination. */
1025 rt
= rt6_lookup(dest
, NULL
, neigh
->dev
->ifindex
, 1);
1030 if (neigh
->dev
!= rt
->rt6i_dev
)
1034 * Current route is on-link; redirect is always invalid.
1036 * Seems, previous statement is not true. It could
1037 * be node, which looks for us as on-link (f.e. proxy ndisc)
1038 * But then router serving it might decide, that we should
1039 * know truth 8)8) --ANK (980726).
1041 if (!(rt
->rt6i_flags
&RTF_GATEWAY
))
1045 * RFC 2461 specifies that redirects should only be
1046 * accepted if they come from the nexthop to the target.
1047 * Due to the way default routers are chosen, this notion
1048 * is a bit fuzzy and one might need to check all default
1051 if (!ipv6_addr_equal(saddr
, &rt
->rt6i_gateway
)) {
1052 if (rt
->rt6i_flags
& RTF_DEFAULT
) {
1053 struct rt6_info
*rt1
;
1055 read_lock(&rt6_lock
);
1056 for (rt1
= ip6_routing_table
.leaf
; rt1
; rt1
= rt1
->u
.next
) {
1057 if (ipv6_addr_equal(saddr
, &rt1
->rt6i_gateway
)) {
1058 dst_hold(&rt1
->u
.dst
);
1059 dst_release(&rt
->u
.dst
);
1060 read_unlock(&rt6_lock
);
1065 read_unlock(&rt6_lock
);
1067 if (net_ratelimit())
1068 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1069 "for redirect target\n");
1076 * We have finally decided to accept it.
1079 neigh_update(neigh
, lladdr
, NUD_STALE
,
1080 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1081 NEIGH_UPDATE_F_OVERRIDE
|
1082 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1083 NEIGH_UPDATE_F_ISROUTER
))
1087 * Redirect received -> path was valid.
1088 * Look, redirects are sent only in response to data packets,
1089 * so that this nexthop apparently is reachable. --ANK
1091 dst_confirm(&rt
->u
.dst
);
1093 /* Duplicate redirect: silently ignore. */
1094 if (neigh
== rt
->u
.dst
.neighbour
)
1097 nrt
= ip6_rt_copy(rt
);
1101 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1103 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1105 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1106 nrt
->rt6i_dst
.plen
= 128;
1107 nrt
->u
.dst
.flags
|= DST_HOST
;
1109 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1110 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
1111 /* Reset pmtu, it may be better */
1112 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(neigh
->dev
);
1113 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&nrt
->u
.dst
));
1115 if (ip6_ins_rt(nrt
, NULL
, NULL
, NULL
))
1118 if (rt
->rt6i_flags
&RTF_CACHE
) {
1119 ip6_del_rt(rt
, NULL
, NULL
, NULL
);
1124 dst_release(&rt
->u
.dst
);
1129 * Handle ICMP "packet too big" messages
1130 * i.e. Path MTU discovery
1133 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1134 struct net_device
*dev
, u32 pmtu
)
1136 struct rt6_info
*rt
, *nrt
;
1139 rt
= rt6_lookup(daddr
, saddr
, dev
->ifindex
, 0);
1143 if (pmtu
>= dst_mtu(&rt
->u
.dst
))
1146 if (pmtu
< IPV6_MIN_MTU
) {
1148 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1149 * MTU (1280) and a fragment header should always be included
1150 * after a node receiving Too Big message reporting PMTU is
1151 * less than the IPv6 Minimum Link MTU.
1153 pmtu
= IPV6_MIN_MTU
;
1157 /* New mtu received -> path was valid.
1158 They are sent only in response to data packets,
1159 so that this nexthop apparently is reachable. --ANK
1161 dst_confirm(&rt
->u
.dst
);
1163 /* Host route. If it is static, it would be better
1164 not to override it, but add new one, so that
1165 when cache entry will expire old pmtu
1166 would return automatically.
1168 if (rt
->rt6i_flags
& RTF_CACHE
) {
1169 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1171 rt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1172 dst_set_expires(&rt
->u
.dst
, ip6_rt_mtu_expires
);
1173 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1178 Two cases are possible:
1179 1. It is connected route. Action: COW
1180 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1182 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1183 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1185 nrt
= rt6_alloc_clone(rt
, daddr
);
1188 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1190 nrt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1192 /* According to RFC 1981, detecting PMTU increase shouldn't be
1193 * happened within 5 mins, the recommended timer is 10 mins.
1194 * Here this route expiration time is set to ip6_rt_mtu_expires
1195 * which is 10 mins. After 10 mins the decreased pmtu is expired
1196 * and detecting PMTU increase will be automatically happened.
1198 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1199 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1201 ip6_ins_rt(nrt
, NULL
, NULL
, NULL
);
1204 dst_release(&rt
->u
.dst
);
1208 * Misc support functions
1211 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1213 struct rt6_info
*rt
= ip6_dst_alloc();
1216 rt
->u
.dst
.input
= ort
->u
.dst
.input
;
1217 rt
->u
.dst
.output
= ort
->u
.dst
.output
;
1219 memcpy(rt
->u
.dst
.metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
1220 rt
->u
.dst
.dev
= ort
->u
.dst
.dev
;
1222 dev_hold(rt
->u
.dst
.dev
);
1223 rt
->rt6i_idev
= ort
->rt6i_idev
;
1225 in6_dev_hold(rt
->rt6i_idev
);
1226 rt
->u
.dst
.lastuse
= jiffies
;
1227 rt
->rt6i_expires
= 0;
1229 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1230 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1231 rt
->rt6i_metric
= 0;
1233 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1234 #ifdef CONFIG_IPV6_SUBTREES
1235 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1241 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1243 struct rt6_info
*rt
;
1244 struct fib6_node
*fn
;
1246 fn
= &ip6_routing_table
;
1248 write_lock_bh(&rt6_lock
);
1249 for (rt
= fn
->leaf
; rt
; rt
=rt
->u
.next
) {
1250 if (dev
== rt
->rt6i_dev
&&
1251 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1252 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1256 dst_hold(&rt
->u
.dst
);
1257 write_unlock_bh(&rt6_lock
);
1261 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1262 struct net_device
*dev
)
1264 struct in6_rtmsg rtmsg
;
1266 memset(&rtmsg
, 0, sizeof(struct in6_rtmsg
));
1267 rtmsg
.rtmsg_type
= RTMSG_NEWROUTE
;
1268 ipv6_addr_copy(&rtmsg
.rtmsg_gateway
, gwaddr
);
1269 rtmsg
.rtmsg_metric
= 1024;
1270 rtmsg
.rtmsg_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
| RTF_UP
| RTF_EXPIRES
;
1272 rtmsg
.rtmsg_ifindex
= dev
->ifindex
;
1274 ip6_route_add(&rtmsg
, NULL
, NULL
, NULL
);
1275 return rt6_get_dflt_router(gwaddr
, dev
);
1278 void rt6_purge_dflt_routers(void)
1280 struct rt6_info
*rt
;
1283 read_lock_bh(&rt6_lock
);
1284 for (rt
= ip6_routing_table
.leaf
; rt
; rt
= rt
->u
.next
) {
1285 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1286 dst_hold(&rt
->u
.dst
);
1288 read_unlock_bh(&rt6_lock
);
1290 ip6_del_rt(rt
, NULL
, NULL
, NULL
);
1295 read_unlock_bh(&rt6_lock
);
1298 int ipv6_route_ioctl(unsigned int cmd
, void __user
*arg
)
1300 struct in6_rtmsg rtmsg
;
1304 case SIOCADDRT
: /* Add a route */
1305 case SIOCDELRT
: /* Delete a route */
1306 if (!capable(CAP_NET_ADMIN
))
1308 err
= copy_from_user(&rtmsg
, arg
,
1309 sizeof(struct in6_rtmsg
));
1316 err
= ip6_route_add(&rtmsg
, NULL
, NULL
, NULL
);
1319 err
= ip6_route_del(&rtmsg
, NULL
, NULL
, NULL
);
1333 * Drop the packet on the floor
1336 static int ip6_pkt_discard(struct sk_buff
*skb
)
1338 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES
);
1339 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_NOROUTE
, 0, skb
->dev
);
1344 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
1346 skb
->dev
= skb
->dst
->dev
;
1347 return ip6_pkt_discard(skb
);
1351 * Allocate a dst for local (unicast / anycast) address.
1354 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
1355 const struct in6_addr
*addr
,
1358 struct rt6_info
*rt
= ip6_dst_alloc();
1361 return ERR_PTR(-ENOMEM
);
1363 dev_hold(&loopback_dev
);
1366 rt
->u
.dst
.flags
= DST_HOST
;
1367 rt
->u
.dst
.input
= ip6_input
;
1368 rt
->u
.dst
.output
= ip6_output
;
1369 rt
->rt6i_dev
= &loopback_dev
;
1370 rt
->rt6i_idev
= idev
;
1371 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1372 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1373 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
1374 rt
->u
.dst
.obsolete
= -1;
1376 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1378 rt
->rt6i_flags
|= RTF_ANYCAST
;
1380 rt
->rt6i_flags
|= RTF_LOCAL
;
1381 rt
->rt6i_nexthop
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1382 if (rt
->rt6i_nexthop
== NULL
) {
1383 dst_free((struct dst_entry
*) rt
);
1384 return ERR_PTR(-ENOMEM
);
1387 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1388 rt
->rt6i_dst
.plen
= 128;
1390 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
1395 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
1397 if (((void*)rt
->rt6i_dev
== arg
|| arg
== NULL
) &&
1398 rt
!= &ip6_null_entry
) {
1399 RT6_TRACE("deleted by ifdown %p\n", rt
);
1405 void rt6_ifdown(struct net_device
*dev
)
1407 write_lock_bh(&rt6_lock
);
1408 fib6_clean_tree(&ip6_routing_table
, fib6_ifdown
, 0, dev
);
1409 write_unlock_bh(&rt6_lock
);
1412 struct rt6_mtu_change_arg
1414 struct net_device
*dev
;
1418 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
1420 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
1421 struct inet6_dev
*idev
;
1423 /* In IPv6 pmtu discovery is not optional,
1424 so that RTAX_MTU lock cannot disable it.
1425 We still use this lock to block changes
1426 caused by addrconf/ndisc.
1429 idev
= __in6_dev_get(arg
->dev
);
1433 /* For administrative MTU increase, there is no way to discover
1434 IPv6 PMTU increase, so PMTU increase should be updated here.
1435 Since RFC 1981 doesn't include administrative MTU increase
1436 update PMTU increase is a MUST. (i.e. jumbo frame)
1439 If new MTU is less than route PMTU, this new MTU will be the
1440 lowest MTU in the path, update the route PMTU to reflect PMTU
1441 decreases; if new MTU is greater than route PMTU, and the
1442 old MTU is the lowest MTU in the path, update the route PMTU
1443 to reflect the increase. In this case if the other nodes' MTU
1444 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1447 if (rt
->rt6i_dev
== arg
->dev
&&
1448 !dst_metric_locked(&rt
->u
.dst
, RTAX_MTU
) &&
1449 (dst_mtu(&rt
->u
.dst
) > arg
->mtu
||
1450 (dst_mtu(&rt
->u
.dst
) < arg
->mtu
&&
1451 dst_mtu(&rt
->u
.dst
) == idev
->cnf
.mtu6
)))
1452 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = arg
->mtu
;
1453 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(arg
->mtu
);
1457 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
1459 struct rt6_mtu_change_arg arg
;
1463 read_lock_bh(&rt6_lock
);
1464 fib6_clean_tree(&ip6_routing_table
, rt6_mtu_change_route
, 0, &arg
);
1465 read_unlock_bh(&rt6_lock
);
1468 static int inet6_rtm_to_rtmsg(struct rtmsg
*r
, struct rtattr
**rta
,
1469 struct in6_rtmsg
*rtmsg
)
1471 memset(rtmsg
, 0, sizeof(*rtmsg
));
1473 rtmsg
->rtmsg_dst_len
= r
->rtm_dst_len
;
1474 rtmsg
->rtmsg_src_len
= r
->rtm_src_len
;
1475 rtmsg
->rtmsg_flags
= RTF_UP
;
1476 if (r
->rtm_type
== RTN_UNREACHABLE
)
1477 rtmsg
->rtmsg_flags
|= RTF_REJECT
;
1479 if (rta
[RTA_GATEWAY
-1]) {
1480 if (rta
[RTA_GATEWAY
-1]->rta_len
!= RTA_LENGTH(16))
1482 memcpy(&rtmsg
->rtmsg_gateway
, RTA_DATA(rta
[RTA_GATEWAY
-1]), 16);
1483 rtmsg
->rtmsg_flags
|= RTF_GATEWAY
;
1485 if (rta
[RTA_DST
-1]) {
1486 if (RTA_PAYLOAD(rta
[RTA_DST
-1]) < ((r
->rtm_dst_len
+7)>>3))
1488 memcpy(&rtmsg
->rtmsg_dst
, RTA_DATA(rta
[RTA_DST
-1]), ((r
->rtm_dst_len
+7)>>3));
1490 if (rta
[RTA_SRC
-1]) {
1491 if (RTA_PAYLOAD(rta
[RTA_SRC
-1]) < ((r
->rtm_src_len
+7)>>3))
1493 memcpy(&rtmsg
->rtmsg_src
, RTA_DATA(rta
[RTA_SRC
-1]), ((r
->rtm_src_len
+7)>>3));
1495 if (rta
[RTA_OIF
-1]) {
1496 if (rta
[RTA_OIF
-1]->rta_len
!= RTA_LENGTH(sizeof(int)))
1498 memcpy(&rtmsg
->rtmsg_ifindex
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1500 if (rta
[RTA_PRIORITY
-1]) {
1501 if (rta
[RTA_PRIORITY
-1]->rta_len
!= RTA_LENGTH(4))
1503 memcpy(&rtmsg
->rtmsg_metric
, RTA_DATA(rta
[RTA_PRIORITY
-1]), 4);
1508 int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1510 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1511 struct in6_rtmsg rtmsg
;
1513 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1515 return ip6_route_del(&rtmsg
, nlh
, arg
, &NETLINK_CB(skb
));
1518 int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
1520 struct rtmsg
*r
= NLMSG_DATA(nlh
);
1521 struct in6_rtmsg rtmsg
;
1523 if (inet6_rtm_to_rtmsg(r
, arg
, &rtmsg
))
1525 return ip6_route_add(&rtmsg
, nlh
, arg
, &NETLINK_CB(skb
));
1528 struct rt6_rtnl_dump_arg
1530 struct sk_buff
*skb
;
1531 struct netlink_callback
*cb
;
1534 static int rt6_fill_node(struct sk_buff
*skb
, struct rt6_info
*rt
,
1535 struct in6_addr
*dst
, struct in6_addr
*src
,
1536 int iif
, int type
, u32 pid
, u32 seq
,
1537 int prefix
, unsigned int flags
)
1540 struct nlmsghdr
*nlh
;
1541 unsigned char *b
= skb
->tail
;
1542 struct rta_cacheinfo ci
;
1544 if (prefix
) { /* user wants prefix routes only */
1545 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
1546 /* success since this is not a prefix route */
1551 nlh
= NLMSG_NEW(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
1552 rtm
= NLMSG_DATA(nlh
);
1553 rtm
->rtm_family
= AF_INET6
;
1554 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
1555 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
1557 rtm
->rtm_table
= RT_TABLE_MAIN
;
1558 if (rt
->rt6i_flags
&RTF_REJECT
)
1559 rtm
->rtm_type
= RTN_UNREACHABLE
;
1560 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
1561 rtm
->rtm_type
= RTN_LOCAL
;
1563 rtm
->rtm_type
= RTN_UNICAST
;
1565 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1566 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
1567 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
1568 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
1569 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
1570 rtm
->rtm_protocol
= RTPROT_KERNEL
;
1571 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
1572 rtm
->rtm_protocol
= RTPROT_RA
;
1574 if (rt
->rt6i_flags
&RTF_CACHE
)
1575 rtm
->rtm_flags
|= RTM_F_CLONED
;
1578 RTA_PUT(skb
, RTA_DST
, 16, dst
);
1579 rtm
->rtm_dst_len
= 128;
1580 } else if (rtm
->rtm_dst_len
)
1581 RTA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
1582 #ifdef CONFIG_IPV6_SUBTREES
1584 RTA_PUT(skb
, RTA_SRC
, 16, src
);
1585 rtm
->rtm_src_len
= 128;
1586 } else if (rtm
->rtm_src_len
)
1587 RTA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
1590 RTA_PUT(skb
, RTA_IIF
, 4, &iif
);
1592 struct in6_addr saddr_buf
;
1593 if (ipv6_get_saddr(&rt
->u
.dst
, dst
, &saddr_buf
) == 0)
1594 RTA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
1596 if (rtnetlink_put_metrics(skb
, rt
->u
.dst
.metrics
) < 0)
1597 goto rtattr_failure
;
1598 if (rt
->u
.dst
.neighbour
)
1599 RTA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->u
.dst
.neighbour
->primary_key
);
1601 RTA_PUT(skb
, RTA_OIF
, sizeof(int), &rt
->rt6i_dev
->ifindex
);
1602 RTA_PUT(skb
, RTA_PRIORITY
, 4, &rt
->rt6i_metric
);
1603 ci
.rta_lastuse
= jiffies_to_clock_t(jiffies
- rt
->u
.dst
.lastuse
);
1604 if (rt
->rt6i_expires
)
1605 ci
.rta_expires
= jiffies_to_clock_t(rt
->rt6i_expires
- jiffies
);
1608 ci
.rta_used
= rt
->u
.dst
.__use
;
1609 ci
.rta_clntref
= atomic_read(&rt
->u
.dst
.__refcnt
);
1610 ci
.rta_error
= rt
->u
.dst
.error
;
1614 RTA_PUT(skb
, RTA_CACHEINFO
, sizeof(ci
), &ci
);
1615 nlh
->nlmsg_len
= skb
->tail
- b
;
1620 skb_trim(skb
, b
- skb
->data
);
1624 static int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
1626 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
1629 if (arg
->cb
->nlh
->nlmsg_len
>= NLMSG_LENGTH(sizeof(struct rtmsg
))) {
1630 struct rtmsg
*rtm
= NLMSG_DATA(arg
->cb
->nlh
);
1631 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
1635 return rt6_fill_node(arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
1636 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
1637 prefix
, NLM_F_MULTI
);
1640 static int fib6_dump_node(struct fib6_walker_t
*w
)
1643 struct rt6_info
*rt
;
1645 for (rt
= w
->leaf
; rt
; rt
= rt
->u
.next
) {
1646 res
= rt6_dump_route(rt
, w
->args
);
1648 /* Frame is full, suspend walking */
1658 static void fib6_dump_end(struct netlink_callback
*cb
)
1660 struct fib6_walker_t
*w
= (void*)cb
->args
[0];
1664 fib6_walker_unlink(w
);
1667 cb
->done
= (void*)cb
->args
[1];
1671 static int fib6_dump_done(struct netlink_callback
*cb
)
1674 return cb
->done
? cb
->done(cb
) : 0;
1677 int inet6_dump_fib(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1679 struct rt6_rtnl_dump_arg arg
;
1680 struct fib6_walker_t
*w
;
1686 w
= (void*)cb
->args
[0];
1690 * 1. hook callback destructor.
1692 cb
->args
[1] = (long)cb
->done
;
1693 cb
->done
= fib6_dump_done
;
1696 * 2. allocate and initialize walker.
1698 w
= kmalloc(sizeof(*w
), GFP_ATOMIC
);
1701 RT6_TRACE("dump<%p", w
);
1702 memset(w
, 0, sizeof(*w
));
1703 w
->root
= &ip6_routing_table
;
1704 w
->func
= fib6_dump_node
;
1706 cb
->args
[0] = (long)w
;
1707 read_lock_bh(&rt6_lock
);
1709 read_unlock_bh(&rt6_lock
);
1712 read_lock_bh(&rt6_lock
);
1713 res
= fib6_walk_continue(w
);
1714 read_unlock_bh(&rt6_lock
);
1717 if (res
<= 0 && skb
->len
== 0)
1718 RT6_TRACE("%p>dump end\n", w
);
1720 res
= res
< 0 ? res
: skb
->len
;
1721 /* res < 0 is an error. (really, impossible)
1722 res == 0 means that dump is complete, but skb still can contain data.
1723 res > 0 dump is not complete, but frame is full.
1725 /* Destroy walker, if dump of this table is complete. */
1731 int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
1733 struct rtattr
**rta
= arg
;
1736 struct sk_buff
*skb
;
1738 struct rt6_info
*rt
;
1740 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1744 /* Reserve room for dummy headers, this skb can pass
1745 through good chunk of routing engine.
1747 skb
->mac
.raw
= skb
->data
;
1748 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
1750 memset(&fl
, 0, sizeof(fl
));
1752 ipv6_addr_copy(&fl
.fl6_src
,
1753 (struct in6_addr
*)RTA_DATA(rta
[RTA_SRC
-1]));
1755 ipv6_addr_copy(&fl
.fl6_dst
,
1756 (struct in6_addr
*)RTA_DATA(rta
[RTA_DST
-1]));
1759 memcpy(&iif
, RTA_DATA(rta
[RTA_IIF
-1]), sizeof(int));
1762 struct net_device
*dev
;
1763 dev
= __dev_get_by_index(iif
);
1772 memcpy(&fl
.oif
, RTA_DATA(rta
[RTA_OIF
-1]), sizeof(int));
1774 rt
= (struct rt6_info
*)ip6_route_output(NULL
, &fl
);
1776 skb
->dst
= &rt
->u
.dst
;
1778 NETLINK_CB(skb
).dst_pid
= NETLINK_CB(in_skb
).pid
;
1779 err
= rt6_fill_node(skb
, rt
,
1780 &fl
.fl6_dst
, &fl
.fl6_src
,
1782 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
1783 nlh
->nlmsg_seq
, 0, 0);
1789 err
= netlink_unicast(rtnl
, skb
, NETLINK_CB(in_skb
).pid
, MSG_DONTWAIT
);
1799 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nlmsghdr
*nlh
,
1800 struct netlink_skb_parms
*req
)
1802 struct sk_buff
*skb
;
1803 int size
= NLMSG_SPACE(sizeof(struct rtmsg
)+256);
1804 u32 pid
= current
->pid
;
1810 seq
= nlh
->nlmsg_seq
;
1812 skb
= alloc_skb(size
, gfp_any());
1814 netlink_set_err(rtnl
, 0, RTNLGRP_IPV6_ROUTE
, ENOBUFS
);
1817 if (rt6_fill_node(skb
, rt
, NULL
, NULL
, 0, event
, pid
, seq
, 0, 0) < 0) {
1819 netlink_set_err(rtnl
, 0, RTNLGRP_IPV6_ROUTE
, EINVAL
);
1822 NETLINK_CB(skb
).dst_group
= RTNLGRP_IPV6_ROUTE
;
1823 netlink_broadcast(rtnl
, skb
, 0, RTNLGRP_IPV6_ROUTE
, gfp_any());
1830 #ifdef CONFIG_PROC_FS
1832 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1843 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
1845 struct rt6_proc_arg
*arg
= (struct rt6_proc_arg
*) p_arg
;
1848 if (arg
->skip
< arg
->offset
/ RT6_INFO_LEN
) {
1853 if (arg
->len
>= arg
->length
)
1856 for (i
=0; i
<16; i
++) {
1857 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1858 rt
->rt6i_dst
.addr
.s6_addr
[i
]);
1861 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1864 #ifdef CONFIG_IPV6_SUBTREES
1865 for (i
=0; i
<16; i
++) {
1866 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1867 rt
->rt6i_src
.addr
.s6_addr
[i
]);
1870 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
, " %02x ",
1873 sprintf(arg
->buffer
+ arg
->len
,
1874 "00000000000000000000000000000000 00 ");
1878 if (rt
->rt6i_nexthop
) {
1879 for (i
=0; i
<16; i
++) {
1880 sprintf(arg
->buffer
+ arg
->len
, "%02x",
1881 rt
->rt6i_nexthop
->primary_key
[i
]);
1885 sprintf(arg
->buffer
+ arg
->len
,
1886 "00000000000000000000000000000000");
1889 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
1890 " %08x %08x %08x %08x %8s\n",
1891 rt
->rt6i_metric
, atomic_read(&rt
->u
.dst
.__refcnt
),
1892 rt
->u
.dst
.__use
, rt
->rt6i_flags
,
1893 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
1897 static int rt6_proc_info(char *buffer
, char **start
, off_t offset
, int length
)
1899 struct rt6_proc_arg arg
;
1900 arg
.buffer
= buffer
;
1901 arg
.offset
= offset
;
1902 arg
.length
= length
;
1906 read_lock_bh(&rt6_lock
);
1907 fib6_clean_tree(&ip6_routing_table
, rt6_info_route
, 0, &arg
);
1908 read_unlock_bh(&rt6_lock
);
1912 *start
+= offset
% RT6_INFO_LEN
;
1914 arg
.len
-= offset
% RT6_INFO_LEN
;
1916 if (arg
.len
> length
)
1924 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
1926 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
1927 rt6_stats
.fib_nodes
, rt6_stats
.fib_route_nodes
,
1928 rt6_stats
.fib_rt_alloc
, rt6_stats
.fib_rt_entries
,
1929 rt6_stats
.fib_rt_cache
,
1930 atomic_read(&ip6_dst_ops
.entries
),
1931 rt6_stats
.fib_discarded_routes
);
1936 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
1938 return single_open(file
, rt6_stats_seq_show
, NULL
);
1941 static struct file_operations rt6_stats_seq_fops
= {
1942 .owner
= THIS_MODULE
,
1943 .open
= rt6_stats_seq_open
,
1945 .llseek
= seq_lseek
,
1946 .release
= single_release
,
1948 #endif /* CONFIG_PROC_FS */
1950 #ifdef CONFIG_SYSCTL
1952 static int flush_delay
;
1955 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
, struct file
* filp
,
1956 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
1959 proc_dointvec(ctl
, write
, filp
, buffer
, lenp
, ppos
);
1960 fib6_run_gc(flush_delay
<= 0 ? ~0UL : (unsigned long)flush_delay
);
1966 ctl_table ipv6_route_table
[] = {
1968 .ctl_name
= NET_IPV6_ROUTE_FLUSH
,
1969 .procname
= "flush",
1970 .data
= &flush_delay
,
1971 .maxlen
= sizeof(int),
1973 .proc_handler
= &ipv6_sysctl_rtcache_flush
1976 .ctl_name
= NET_IPV6_ROUTE_GC_THRESH
,
1977 .procname
= "gc_thresh",
1978 .data
= &ip6_dst_ops
.gc_thresh
,
1979 .maxlen
= sizeof(int),
1981 .proc_handler
= &proc_dointvec
,
1984 .ctl_name
= NET_IPV6_ROUTE_MAX_SIZE
,
1985 .procname
= "max_size",
1986 .data
= &ip6_rt_max_size
,
1987 .maxlen
= sizeof(int),
1989 .proc_handler
= &proc_dointvec
,
1992 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL
,
1993 .procname
= "gc_min_interval",
1994 .data
= &ip6_rt_gc_min_interval
,
1995 .maxlen
= sizeof(int),
1997 .proc_handler
= &proc_dointvec_jiffies
,
1998 .strategy
= &sysctl_jiffies
,
2001 .ctl_name
= NET_IPV6_ROUTE_GC_TIMEOUT
,
2002 .procname
= "gc_timeout",
2003 .data
= &ip6_rt_gc_timeout
,
2004 .maxlen
= sizeof(int),
2006 .proc_handler
= &proc_dointvec_jiffies
,
2007 .strategy
= &sysctl_jiffies
,
2010 .ctl_name
= NET_IPV6_ROUTE_GC_INTERVAL
,
2011 .procname
= "gc_interval",
2012 .data
= &ip6_rt_gc_interval
,
2013 .maxlen
= sizeof(int),
2015 .proc_handler
= &proc_dointvec_jiffies
,
2016 .strategy
= &sysctl_jiffies
,
2019 .ctl_name
= NET_IPV6_ROUTE_GC_ELASTICITY
,
2020 .procname
= "gc_elasticity",
2021 .data
= &ip6_rt_gc_elasticity
,
2022 .maxlen
= sizeof(int),
2024 .proc_handler
= &proc_dointvec_jiffies
,
2025 .strategy
= &sysctl_jiffies
,
2028 .ctl_name
= NET_IPV6_ROUTE_MTU_EXPIRES
,
2029 .procname
= "mtu_expires",
2030 .data
= &ip6_rt_mtu_expires
,
2031 .maxlen
= sizeof(int),
2033 .proc_handler
= &proc_dointvec_jiffies
,
2034 .strategy
= &sysctl_jiffies
,
2037 .ctl_name
= NET_IPV6_ROUTE_MIN_ADVMSS
,
2038 .procname
= "min_adv_mss",
2039 .data
= &ip6_rt_min_advmss
,
2040 .maxlen
= sizeof(int),
2042 .proc_handler
= &proc_dointvec_jiffies
,
2043 .strategy
= &sysctl_jiffies
,
2046 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS
,
2047 .procname
= "gc_min_interval_ms",
2048 .data
= &ip6_rt_gc_min_interval
,
2049 .maxlen
= sizeof(int),
2051 .proc_handler
= &proc_dointvec_ms_jiffies
,
2052 .strategy
= &sysctl_ms_jiffies
,
2059 void __init
ip6_route_init(void)
2061 struct proc_dir_entry
*p
;
2063 ip6_dst_ops
.kmem_cachep
= kmem_cache_create("ip6_dst_cache",
2064 sizeof(struct rt6_info
),
2065 0, SLAB_HWCACHE_ALIGN
,
2067 if (!ip6_dst_ops
.kmem_cachep
)
2068 panic("cannot create ip6_dst_cache");
2071 #ifdef CONFIG_PROC_FS
2072 p
= proc_net_create("ipv6_route", 0, rt6_proc_info
);
2074 p
->owner
= THIS_MODULE
;
2076 proc_net_fops_create("rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2083 void ip6_route_cleanup(void)
2085 #ifdef CONFIG_PROC_FS
2086 proc_net_remove("ipv6_route");
2087 proc_net_remove("rt6_stats");
2094 kmem_cache_destroy(ip6_dst_ops
.kmem_cachep
);