2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/mroute6.h>
40 #include <linux/init.h>
41 #include <linux/if_arp.h>
44 #include <linux/proc_fs.h>
45 #include <linux/seq_file.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
61 #include <asm/uaccess.h>
64 #include <linux/sysctl.h>
67 /* Set to 3 to get tracing. */
71 #define RDBG(x) printk x
72 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
75 #define RT6_TRACE(x...) do { ; } while (0)
78 #define CLONE_OFFLINK_ROUTE 0
80 static int ip6_rt_max_size
= 4096;
81 static int ip6_rt_gc_min_interval
= HZ
/ 2;
82 static int ip6_rt_gc_timeout
= 60*HZ
;
83 int ip6_rt_gc_interval
= 30*HZ
;
84 static int ip6_rt_gc_elasticity
= 9;
85 static int ip6_rt_mtu_expires
= 10*60*HZ
;
86 static int ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
88 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
89 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
90 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
91 static void ip6_dst_destroy(struct dst_entry
*);
92 static void ip6_dst_ifdown(struct dst_entry
*,
93 struct net_device
*dev
, int how
);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff
*skb
);
97 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
98 static void ip6_link_failure(struct sk_buff
*skb
);
99 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info
*rt6_add_route_info(struct in6_addr
*prefix
, int prefixlen
,
103 struct in6_addr
*gwaddr
, int ifindex
,
105 static struct rt6_info
*rt6_get_route_info(struct in6_addr
*prefix
, int prefixlen
,
106 struct in6_addr
*gwaddr
, int ifindex
);
109 static struct dst_ops ip6_dst_ops
= {
111 .protocol
= __constant_htons(ETH_P_IPV6
),
114 .check
= ip6_dst_check
,
115 .destroy
= ip6_dst_destroy
,
116 .ifdown
= ip6_dst_ifdown
,
117 .negative_advice
= ip6_negative_advice
,
118 .link_failure
= ip6_link_failure
,
119 .update_pmtu
= ip6_rt_update_pmtu
,
120 .entry_size
= sizeof(struct rt6_info
),
123 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
127 static struct dst_ops ip6_dst_blackhole_ops
= {
129 .protocol
= __constant_htons(ETH_P_IPV6
),
130 .destroy
= ip6_dst_destroy
,
131 .check
= ip6_dst_check
,
132 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
133 .entry_size
= sizeof(struct rt6_info
),
136 struct rt6_info ip6_null_entry
= {
139 .__refcnt
= ATOMIC_INIT(1),
141 .dev
= &loopback_dev
,
143 .error
= -ENETUNREACH
,
144 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
145 .input
= ip6_pkt_discard
,
146 .output
= ip6_pkt_discard_out
,
148 .path
= (struct dst_entry
*)&ip6_null_entry
,
151 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
152 .rt6i_protocol
= RTPROT_KERNEL
,
153 .rt6i_metric
= ~(u32
) 0,
154 .rt6i_ref
= ATOMIC_INIT(1),
157 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
159 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
160 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
161 static int ip6_pkt_blk_hole(struct sk_buff
*skb
);
163 struct rt6_info ip6_prohibit_entry
= {
166 .__refcnt
= ATOMIC_INIT(1),
168 .dev
= &loopback_dev
,
171 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
172 .input
= ip6_pkt_prohibit
,
173 .output
= ip6_pkt_prohibit_out
,
175 .path
= (struct dst_entry
*)&ip6_prohibit_entry
,
178 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
179 .rt6i_protocol
= RTPROT_KERNEL
,
180 .rt6i_metric
= ~(u32
) 0,
181 .rt6i_ref
= ATOMIC_INIT(1),
184 struct rt6_info ip6_blk_hole_entry
= {
187 .__refcnt
= ATOMIC_INIT(1),
189 .dev
= &loopback_dev
,
192 .metrics
= { [RTAX_HOPLIMIT
- 1] = 255, },
193 .input
= ip6_pkt_blk_hole
,
194 .output
= ip6_pkt_blk_hole
,
196 .path
= (struct dst_entry
*)&ip6_blk_hole_entry
,
199 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
200 .rt6i_protocol
= RTPROT_KERNEL
,
201 .rt6i_metric
= ~(u32
) 0,
202 .rt6i_ref
= ATOMIC_INIT(1),
207 /* allocate dst with ip6_dst_ops */
208 static __inline__
struct rt6_info
*ip6_dst_alloc(void)
210 return (struct rt6_info
*)dst_alloc(&ip6_dst_ops
);
213 static void ip6_dst_destroy(struct dst_entry
*dst
)
215 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
216 struct inet6_dev
*idev
= rt
->rt6i_idev
;
219 rt
->rt6i_idev
= NULL
;
224 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
227 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
228 struct inet6_dev
*idev
= rt
->rt6i_idev
;
230 if (dev
!= &loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
231 struct inet6_dev
*loopback_idev
= in6_dev_get(&loopback_dev
);
232 if (loopback_idev
!= NULL
) {
233 rt
->rt6i_idev
= loopback_idev
;
239 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
241 return (rt
->rt6i_flags
& RTF_EXPIRES
&&
242 time_after(jiffies
, rt
->rt6i_expires
));
245 static inline int rt6_need_strict(struct in6_addr
*daddr
)
247 return (ipv6_addr_type(daddr
) &
248 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
));
252 * Route lookup. Any table->tb6_lock is implied.
255 static __inline__
struct rt6_info
*rt6_device_match(struct rt6_info
*rt
,
259 struct rt6_info
*local
= NULL
;
260 struct rt6_info
*sprt
;
263 for (sprt
= rt
; sprt
; sprt
= sprt
->u
.dst
.rt6_next
) {
264 struct net_device
*dev
= sprt
->rt6i_dev
;
265 if (dev
->ifindex
== oif
)
267 if (dev
->flags
& IFF_LOOPBACK
) {
268 if (sprt
->rt6i_idev
== NULL
||
269 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
272 if (local
&& (!oif
||
273 local
->rt6i_idev
->dev
->ifindex
== oif
))
284 return &ip6_null_entry
;
289 #ifdef CONFIG_IPV6_ROUTER_PREF
290 static void rt6_probe(struct rt6_info
*rt
)
292 struct neighbour
*neigh
= rt
? rt
->rt6i_nexthop
: NULL
;
294 * Okay, this does not seem to be appropriate
295 * for now, however, we need to check if it
296 * is really so; aka Router Reachability Probing.
298 * Router Reachability Probe MUST be rate-limited
299 * to no more than one per minute.
301 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
303 read_lock_bh(&neigh
->lock
);
304 if (!(neigh
->nud_state
& NUD_VALID
) &&
305 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
306 struct in6_addr mcaddr
;
307 struct in6_addr
*target
;
309 neigh
->updated
= jiffies
;
310 read_unlock_bh(&neigh
->lock
);
312 target
= (struct in6_addr
*)&neigh
->primary_key
;
313 addrconf_addr_solict_mult(target
, &mcaddr
);
314 ndisc_send_ns(rt
->rt6i_dev
, NULL
, target
, &mcaddr
, NULL
);
316 read_unlock_bh(&neigh
->lock
);
319 static inline void rt6_probe(struct rt6_info
*rt
)
326 * Default Router Selection (RFC 2461 6.3.6)
328 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
330 struct net_device
*dev
= rt
->rt6i_dev
;
331 if (!oif
|| dev
->ifindex
== oif
)
333 if ((dev
->flags
& IFF_LOOPBACK
) &&
334 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
339 static inline int rt6_check_neigh(struct rt6_info
*rt
)
341 struct neighbour
*neigh
= rt
->rt6i_nexthop
;
343 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
344 !(rt
->rt6i_flags
& RTF_GATEWAY
))
347 read_lock_bh(&neigh
->lock
);
348 if (neigh
->nud_state
& NUD_VALID
)
350 #ifdef CONFIG_IPV6_ROUTER_PREF
351 else if (neigh
->nud_state
& NUD_FAILED
)
356 read_unlock_bh(&neigh
->lock
);
362 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
367 m
= rt6_check_dev(rt
, oif
);
368 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
370 #ifdef CONFIG_IPV6_ROUTER_PREF
371 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
373 n
= rt6_check_neigh(rt
);
374 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
379 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
380 int *mpri
, struct rt6_info
*match
)
384 if (rt6_check_expired(rt
))
387 m
= rt6_score_route(rt
, oif
, strict
);
392 if (strict
& RT6_LOOKUP_F_REACHABLE
)
396 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
404 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
405 struct rt6_info
*rr_head
,
406 u32 metric
, int oif
, int strict
)
408 struct rt6_info
*rt
, *match
;
412 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
413 rt
= rt
->u
.dst
.rt6_next
)
414 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
415 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
416 rt
= rt
->u
.dst
.rt6_next
)
417 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
422 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
424 struct rt6_info
*match
, *rt0
;
426 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
427 __FUNCTION__
, fn
->leaf
, oif
);
431 fn
->rr_ptr
= rt0
= fn
->leaf
;
433 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
436 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
437 struct rt6_info
*next
= rt0
->u
.dst
.rt6_next
;
439 /* no entries matched; do round-robin */
440 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
447 RT6_TRACE("%s() => %p\n",
448 __FUNCTION__
, match
);
450 return (match
? match
: &ip6_null_entry
);
453 #ifdef CONFIG_IPV6_ROUTE_INFO
454 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
455 struct in6_addr
*gwaddr
)
457 struct route_info
*rinfo
= (struct route_info
*) opt
;
458 struct in6_addr prefix_buf
, *prefix
;
463 if (len
< sizeof(struct route_info
)) {
467 /* Sanity check for prefix_len and length */
468 if (rinfo
->length
> 3) {
470 } else if (rinfo
->prefix_len
> 128) {
472 } else if (rinfo
->prefix_len
> 64) {
473 if (rinfo
->length
< 2) {
476 } else if (rinfo
->prefix_len
> 0) {
477 if (rinfo
->length
< 1) {
482 pref
= rinfo
->route_pref
;
483 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
484 pref
= ICMPV6_ROUTER_PREF_MEDIUM
;
486 lifetime
= ntohl(rinfo
->lifetime
);
487 if (lifetime
== 0xffffffff) {
489 } else if (lifetime
> 0x7fffffff/HZ
) {
490 /* Avoid arithmetic overflow */
491 lifetime
= 0x7fffffff/HZ
- 1;
494 if (rinfo
->length
== 3)
495 prefix
= (struct in6_addr
*)rinfo
->prefix
;
497 /* this function is safe */
498 ipv6_addr_prefix(&prefix_buf
,
499 (struct in6_addr
*)rinfo
->prefix
,
501 prefix
= &prefix_buf
;
504 rt
= rt6_get_route_info(prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
);
506 if (rt
&& !lifetime
) {
512 rt
= rt6_add_route_info(prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
515 rt
->rt6i_flags
= RTF_ROUTEINFO
|
516 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
519 if (lifetime
== 0xffffffff) {
520 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
522 rt
->rt6i_expires
= jiffies
+ HZ
* lifetime
;
523 rt
->rt6i_flags
|= RTF_EXPIRES
;
525 dst_release(&rt
->u
.dst
);
531 #define BACKTRACK(saddr) \
533 if (rt == &ip6_null_entry) { \
534 struct fib6_node *pn; \
536 if (fn->fn_flags & RTN_TL_ROOT) \
539 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
540 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
543 if (fn->fn_flags & RTN_RTINFO) \
549 static struct rt6_info
*ip6_pol_route_lookup(struct fib6_table
*table
,
550 struct flowi
*fl
, int flags
)
552 struct fib6_node
*fn
;
555 read_lock_bh(&table
->tb6_lock
);
556 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
559 rt
= rt6_device_match(rt
, fl
->oif
, flags
);
560 BACKTRACK(&fl
->fl6_src
);
562 dst_hold(&rt
->u
.dst
);
563 read_unlock_bh(&table
->tb6_lock
);
565 rt
->u
.dst
.lastuse
= jiffies
;
572 struct rt6_info
*rt6_lookup(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
583 struct dst_entry
*dst
;
584 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
587 memcpy(&fl
.fl6_src
, saddr
, sizeof(*saddr
));
588 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
591 dst
= fib6_rule_lookup(&fl
, flags
, ip6_pol_route_lookup
);
593 return (struct rt6_info
*) dst
;
600 EXPORT_SYMBOL(rt6_lookup
);
602 /* ip6_ins_rt is called with FREE table->tb6_lock.
603 It takes new route entry, the addition fails by any reason the
604 route is freed. In any case, if caller does not hold it, it may
608 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
611 struct fib6_table
*table
;
613 table
= rt
->rt6i_table
;
614 write_lock_bh(&table
->tb6_lock
);
615 err
= fib6_add(&table
->tb6_root
, rt
, info
);
616 write_unlock_bh(&table
->tb6_lock
);
621 int ip6_ins_rt(struct rt6_info
*rt
)
623 return __ip6_ins_rt(rt
, NULL
);
626 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
627 struct in6_addr
*saddr
)
635 rt
= ip6_rt_copy(ort
);
638 struct neighbour
*neigh
;
639 int attempts
= !in_softirq();
641 if (!(rt
->rt6i_flags
&RTF_GATEWAY
)) {
642 if (rt
->rt6i_dst
.plen
!= 128 &&
643 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, daddr
))
644 rt
->rt6i_flags
|= RTF_ANYCAST
;
645 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
648 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
649 rt
->rt6i_dst
.plen
= 128;
650 rt
->rt6i_flags
|= RTF_CACHE
;
651 rt
->u
.dst
.flags
|= DST_HOST
;
653 #ifdef CONFIG_IPV6_SUBTREES
654 if (rt
->rt6i_src
.plen
&& saddr
) {
655 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
656 rt
->rt6i_src
.plen
= 128;
661 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
663 int saved_rt_min_interval
= ip6_rt_gc_min_interval
;
664 int saved_rt_elasticity
= ip6_rt_gc_elasticity
;
666 if (attempts
-- > 0) {
667 ip6_rt_gc_elasticity
= 1;
668 ip6_rt_gc_min_interval
= 0;
672 ip6_rt_gc_elasticity
= saved_rt_elasticity
;
673 ip6_rt_gc_min_interval
= saved_rt_min_interval
;
679 "Neighbour table overflow.\n");
680 dst_free(&rt
->u
.dst
);
683 rt
->rt6i_nexthop
= neigh
;
690 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
, struct in6_addr
*daddr
)
692 struct rt6_info
*rt
= ip6_rt_copy(ort
);
694 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
695 rt
->rt6i_dst
.plen
= 128;
696 rt
->rt6i_flags
|= RTF_CACHE
;
697 rt
->u
.dst
.flags
|= DST_HOST
;
698 rt
->rt6i_nexthop
= neigh_clone(ort
->rt6i_nexthop
);
703 static struct rt6_info
*ip6_pol_route_input(struct fib6_table
*table
,
704 struct flowi
*fl
, int flags
)
706 struct fib6_node
*fn
;
707 struct rt6_info
*rt
, *nrt
;
711 int reachable
= ipv6_devconf
.forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
713 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
716 read_lock_bh(&table
->tb6_lock
);
719 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
722 rt
= rt6_select(fn
, fl
->iif
, strict
| reachable
);
723 BACKTRACK(&fl
->fl6_src
);
724 if (rt
== &ip6_null_entry
||
725 rt
->rt6i_flags
& RTF_CACHE
)
728 dst_hold(&rt
->u
.dst
);
729 read_unlock_bh(&table
->tb6_lock
);
731 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
732 nrt
= rt6_alloc_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
734 #if CLONE_OFFLINK_ROUTE
735 nrt
= rt6_alloc_clone(rt
, &fl
->fl6_dst
);
741 dst_release(&rt
->u
.dst
);
742 rt
= nrt
? : &ip6_null_entry
;
744 dst_hold(&rt
->u
.dst
);
746 err
= ip6_ins_rt(nrt
);
755 * Race condition! In the gap, when table->tb6_lock was
756 * released someone could insert this route. Relookup.
758 dst_release(&rt
->u
.dst
);
766 dst_hold(&rt
->u
.dst
);
767 read_unlock_bh(&table
->tb6_lock
);
769 rt
->u
.dst
.lastuse
= jiffies
;
775 void ip6_route_input(struct sk_buff
*skb
)
777 struct ipv6hdr
*iph
= ipv6_hdr(skb
);
778 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
780 .iif
= skb
->dev
->ifindex
,
785 .flowlabel
= (* (__be32
*) iph
)&IPV6_FLOWINFO_MASK
,
789 .proto
= iph
->nexthdr
,
792 if (rt6_need_strict(&iph
->daddr
))
793 flags
|= RT6_LOOKUP_F_IFACE
;
795 skb
->dst
= fib6_rule_lookup(&fl
, flags
, ip6_pol_route_input
);
798 static struct rt6_info
*ip6_pol_route_output(struct fib6_table
*table
,
799 struct flowi
*fl
, int flags
)
801 struct fib6_node
*fn
;
802 struct rt6_info
*rt
, *nrt
;
806 int reachable
= ipv6_devconf
.forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
808 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
811 read_lock_bh(&table
->tb6_lock
);
814 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
817 rt
= rt6_select(fn
, fl
->oif
, strict
| reachable
);
818 BACKTRACK(&fl
->fl6_src
);
819 if (rt
== &ip6_null_entry
||
820 rt
->rt6i_flags
& RTF_CACHE
)
823 dst_hold(&rt
->u
.dst
);
824 read_unlock_bh(&table
->tb6_lock
);
826 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
827 nrt
= rt6_alloc_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
829 #if CLONE_OFFLINK_ROUTE
830 nrt
= rt6_alloc_clone(rt
, &fl
->fl6_dst
);
836 dst_release(&rt
->u
.dst
);
837 rt
= nrt
? : &ip6_null_entry
;
839 dst_hold(&rt
->u
.dst
);
841 err
= ip6_ins_rt(nrt
);
850 * Race condition! In the gap, when table->tb6_lock was
851 * released someone could insert this route. Relookup.
853 dst_release(&rt
->u
.dst
);
861 dst_hold(&rt
->u
.dst
);
862 read_unlock_bh(&table
->tb6_lock
);
864 rt
->u
.dst
.lastuse
= jiffies
;
869 struct dst_entry
* ip6_route_output(struct sock
*sk
, struct flowi
*fl
)
873 if (rt6_need_strict(&fl
->fl6_dst
))
874 flags
|= RT6_LOOKUP_F_IFACE
;
876 if (!ipv6_addr_any(&fl
->fl6_src
))
877 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
879 return fib6_rule_lookup(fl
, flags
, ip6_pol_route_output
);
882 EXPORT_SYMBOL(ip6_route_output
);
884 static int ip6_blackhole_output(struct sk_buff
*skb
)
890 int ip6_dst_blackhole(struct sock
*sk
, struct dst_entry
**dstp
, struct flowi
*fl
)
892 struct rt6_info
*ort
= (struct rt6_info
*) *dstp
;
893 struct rt6_info
*rt
= (struct rt6_info
*)
894 dst_alloc(&ip6_dst_blackhole_ops
);
895 struct dst_entry
*new = NULL
;
900 atomic_set(&new->__refcnt
, 1);
902 new->input
= ip6_blackhole_output
;
903 new->output
= ip6_blackhole_output
;
905 memcpy(new->metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
906 new->dev
= ort
->u
.dst
.dev
;
909 rt
->rt6i_idev
= ort
->rt6i_idev
;
911 in6_dev_hold(rt
->rt6i_idev
);
912 rt
->rt6i_expires
= 0;
914 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
915 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
918 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
919 #ifdef CONFIG_IPV6_SUBTREES
920 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
928 return (new ? 0 : -ENOMEM
);
930 EXPORT_SYMBOL_GPL(ip6_dst_blackhole
);
933 * Destination cache support functions
936 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
940 rt
= (struct rt6_info
*) dst
;
942 if (rt
&& rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
948 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
950 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
953 if (rt
->rt6i_flags
& RTF_CACHE
)
961 static void ip6_link_failure(struct sk_buff
*skb
)
965 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0, skb
->dev
);
967 rt
= (struct rt6_info
*) skb
->dst
;
969 if (rt
->rt6i_flags
&RTF_CACHE
) {
970 dst_set_expires(&rt
->u
.dst
, 0);
971 rt
->rt6i_flags
|= RTF_EXPIRES
;
972 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
973 rt
->rt6i_node
->fn_sernum
= -1;
977 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
979 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
981 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
982 rt6
->rt6i_flags
|= RTF_MODIFIED
;
983 if (mtu
< IPV6_MIN_MTU
) {
985 dst
->metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
987 dst
->metrics
[RTAX_MTU
-1] = mtu
;
988 call_netevent_notifiers(NETEVENT_PMTU_UPDATE
, dst
);
992 static int ipv6_get_mtu(struct net_device
*dev
);
994 static inline unsigned int ipv6_advmss(unsigned int mtu
)
996 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
998 if (mtu
< ip6_rt_min_advmss
)
999 mtu
= ip6_rt_min_advmss
;
1002 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1003 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1004 * IPV6_MAXPLEN is also valid and means: "any MSS,
1005 * rely only on pmtu discovery"
1007 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1012 static struct dst_entry
*ndisc_dst_gc_list
;
1013 static DEFINE_SPINLOCK(ndisc_lock
);
1015 struct dst_entry
*ndisc_dst_alloc(struct net_device
*dev
,
1016 struct neighbour
*neigh
,
1017 struct in6_addr
*addr
,
1018 int (*output
)(struct sk_buff
*))
1020 struct rt6_info
*rt
;
1021 struct inet6_dev
*idev
= in6_dev_get(dev
);
1023 if (unlikely(idev
== NULL
))
1026 rt
= ip6_dst_alloc();
1027 if (unlikely(rt
== NULL
)) {
1036 neigh
= ndisc_get_neigh(dev
, addr
);
1042 rt
->rt6i_idev
= idev
;
1043 rt
->rt6i_nexthop
= neigh
;
1044 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
1045 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = 255;
1046 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1047 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1048 rt
->u
.dst
.output
= output
;
1050 #if 0 /* there's no chance to use these for ndisc */
1051 rt
->u
.dst
.flags
= ipv6_addr_type(addr
) & IPV6_ADDR_UNICAST
1054 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1055 rt
->rt6i_dst
.plen
= 128;
1058 spin_lock_bh(&ndisc_lock
);
1059 rt
->u
.dst
.next
= ndisc_dst_gc_list
;
1060 ndisc_dst_gc_list
= &rt
->u
.dst
;
1061 spin_unlock_bh(&ndisc_lock
);
1063 fib6_force_start_gc();
1069 int ndisc_dst_gc(int *more
)
1071 struct dst_entry
*dst
, *next
, **pprev
;
1077 spin_lock_bh(&ndisc_lock
);
1078 pprev
= &ndisc_dst_gc_list
;
1080 while ((dst
= *pprev
) != NULL
) {
1081 if (!atomic_read(&dst
->__refcnt
)) {
1091 spin_unlock_bh(&ndisc_lock
);
1096 static int ip6_dst_gc(void)
1098 static unsigned expire
= 30*HZ
;
1099 static unsigned long last_gc
;
1100 unsigned long now
= jiffies
;
1102 if (time_after(last_gc
+ ip6_rt_gc_min_interval
, now
) &&
1103 atomic_read(&ip6_dst_ops
.entries
) <= ip6_rt_max_size
)
1107 fib6_run_gc(expire
);
1109 if (atomic_read(&ip6_dst_ops
.entries
) < ip6_dst_ops
.gc_thresh
)
1110 expire
= ip6_rt_gc_timeout
>>1;
1113 expire
-= expire
>>ip6_rt_gc_elasticity
;
1114 return (atomic_read(&ip6_dst_ops
.entries
) > ip6_rt_max_size
);
1117 /* Clean host part of a prefix. Not necessary in radix tree,
1118 but results in cleaner routing tables.
1120 Remove it only when all the things will work!
1123 static int ipv6_get_mtu(struct net_device
*dev
)
1125 int mtu
= IPV6_MIN_MTU
;
1126 struct inet6_dev
*idev
;
1128 idev
= in6_dev_get(dev
);
1130 mtu
= idev
->cnf
.mtu6
;
1136 int ipv6_get_hoplimit(struct net_device
*dev
)
1138 int hoplimit
= ipv6_devconf
.hop_limit
;
1139 struct inet6_dev
*idev
;
1141 idev
= in6_dev_get(dev
);
1143 hoplimit
= idev
->cnf
.hop_limit
;
1153 int ip6_route_add(struct fib6_config
*cfg
)
1156 struct rt6_info
*rt
= NULL
;
1157 struct net_device
*dev
= NULL
;
1158 struct inet6_dev
*idev
= NULL
;
1159 struct fib6_table
*table
;
1162 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1164 #ifndef CONFIG_IPV6_SUBTREES
1165 if (cfg
->fc_src_len
)
1168 if (cfg
->fc_ifindex
) {
1170 dev
= dev_get_by_index(cfg
->fc_ifindex
);
1173 idev
= in6_dev_get(dev
);
1178 if (cfg
->fc_metric
== 0)
1179 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1181 table
= fib6_new_table(cfg
->fc_table
);
1182 if (table
== NULL
) {
1187 rt
= ip6_dst_alloc();
1194 rt
->u
.dst
.obsolete
= -1;
1195 rt
->rt6i_expires
= (cfg
->fc_flags
& RTF_EXPIRES
) ?
1196 jiffies
+ clock_t_to_jiffies(cfg
->fc_expires
) :
1199 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1200 cfg
->fc_protocol
= RTPROT_BOOT
;
1201 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1203 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1205 if (addr_type
& IPV6_ADDR_MULTICAST
)
1206 rt
->u
.dst
.input
= ip6_mc_input
;
1208 rt
->u
.dst
.input
= ip6_forward
;
1210 rt
->u
.dst
.output
= ip6_output
;
1212 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1213 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1214 if (rt
->rt6i_dst
.plen
== 128)
1215 rt
->u
.dst
.flags
= DST_HOST
;
1217 #ifdef CONFIG_IPV6_SUBTREES
1218 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1219 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1222 rt
->rt6i_metric
= cfg
->fc_metric
;
1224 /* We cannot add true routes via loopback here,
1225 they would result in kernel looping; promote them to reject routes
1227 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1228 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
))) {
1229 /* hold loopback dev/idev if we haven't done so. */
1230 if (dev
!= &loopback_dev
) {
1235 dev
= &loopback_dev
;
1237 idev
= in6_dev_get(dev
);
1243 rt
->u
.dst
.output
= ip6_pkt_discard_out
;
1244 rt
->u
.dst
.input
= ip6_pkt_discard
;
1245 rt
->u
.dst
.error
= -ENETUNREACH
;
1246 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1250 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1251 struct in6_addr
*gw_addr
;
1254 gw_addr
= &cfg
->fc_gateway
;
1255 ipv6_addr_copy(&rt
->rt6i_gateway
, gw_addr
);
1256 gwa_type
= ipv6_addr_type(gw_addr
);
1258 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1259 struct rt6_info
*grt
;
1261 /* IPv6 strictly inhibits using not link-local
1262 addresses as nexthop address.
1263 Otherwise, router will not able to send redirects.
1264 It is very good, but in some (rare!) circumstances
1265 (SIT, PtP, NBMA NOARP links) it is handy to allow
1266 some exceptions. --ANK
1269 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
1272 grt
= rt6_lookup(gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1274 err
= -EHOSTUNREACH
;
1278 if (dev
!= grt
->rt6i_dev
) {
1279 dst_release(&grt
->u
.dst
);
1283 dev
= grt
->rt6i_dev
;
1284 idev
= grt
->rt6i_idev
;
1286 in6_dev_hold(grt
->rt6i_idev
);
1288 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
1290 dst_release(&grt
->u
.dst
);
1296 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
1304 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1305 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
1306 if (IS_ERR(rt
->rt6i_nexthop
)) {
1307 err
= PTR_ERR(rt
->rt6i_nexthop
);
1308 rt
->rt6i_nexthop
= NULL
;
1313 rt
->rt6i_flags
= cfg
->fc_flags
;
1320 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1321 int type
= nla_type(nla
);
1324 if (type
> RTAX_MAX
) {
1329 rt
->u
.dst
.metrics
[type
- 1] = nla_get_u32(nla
);
1334 if (rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] == 0)
1335 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
1336 if (!rt
->u
.dst
.metrics
[RTAX_MTU
-1])
1337 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(dev
);
1338 if (!rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1])
1339 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1340 rt
->u
.dst
.dev
= dev
;
1341 rt
->rt6i_idev
= idev
;
1342 rt
->rt6i_table
= table
;
1343 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1351 dst_free(&rt
->u
.dst
);
1355 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1358 struct fib6_table
*table
;
1360 if (rt
== &ip6_null_entry
)
1363 table
= rt
->rt6i_table
;
1364 write_lock_bh(&table
->tb6_lock
);
1366 err
= fib6_del(rt
, info
);
1367 dst_release(&rt
->u
.dst
);
1369 write_unlock_bh(&table
->tb6_lock
);
1374 int ip6_del_rt(struct rt6_info
*rt
)
1376 return __ip6_del_rt(rt
, NULL
);
1379 static int ip6_route_del(struct fib6_config
*cfg
)
1381 struct fib6_table
*table
;
1382 struct fib6_node
*fn
;
1383 struct rt6_info
*rt
;
1386 table
= fib6_get_table(cfg
->fc_table
);
1390 read_lock_bh(&table
->tb6_lock
);
1392 fn
= fib6_locate(&table
->tb6_root
,
1393 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1394 &cfg
->fc_src
, cfg
->fc_src_len
);
1397 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1398 if (cfg
->fc_ifindex
&&
1399 (rt
->rt6i_dev
== NULL
||
1400 rt
->rt6i_dev
->ifindex
!= cfg
->fc_ifindex
))
1402 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1403 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1405 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1407 dst_hold(&rt
->u
.dst
);
1408 read_unlock_bh(&table
->tb6_lock
);
1410 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1413 read_unlock_bh(&table
->tb6_lock
);
1421 struct ip6rd_flowi
{
1423 struct in6_addr gateway
;
1426 static struct rt6_info
*__ip6_route_redirect(struct fib6_table
*table
,
1430 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl
;
1431 struct rt6_info
*rt
;
1432 struct fib6_node
*fn
;
1435 * Get the "current" route for this destination and
1436 * check if the redirect has come from approriate router.
1438 * RFC 2461 specifies that redirects should only be
1439 * accepted if they come from the nexthop to the target.
1440 * Due to the way the routes are chosen, this notion
1441 * is a bit fuzzy and one might need to check all possible
1445 read_lock_bh(&table
->tb6_lock
);
1446 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
1448 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1450 * Current route is on-link; redirect is always invalid.
1452 * Seems, previous statement is not true. It could
1453 * be node, which looks for us as on-link (f.e. proxy ndisc)
1454 * But then router serving it might decide, that we should
1455 * know truth 8)8) --ANK (980726).
1457 if (rt6_check_expired(rt
))
1459 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1461 if (fl
->oif
!= rt
->rt6i_dev
->ifindex
)
1463 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1469 rt
= &ip6_null_entry
;
1470 BACKTRACK(&fl
->fl6_src
);
1472 dst_hold(&rt
->u
.dst
);
1474 read_unlock_bh(&table
->tb6_lock
);
1479 static struct rt6_info
*ip6_route_redirect(struct in6_addr
*dest
,
1480 struct in6_addr
*src
,
1481 struct in6_addr
*gateway
,
1482 struct net_device
*dev
)
1484 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1485 struct ip6rd_flowi rdfl
= {
1487 .oif
= dev
->ifindex
,
1495 .gateway
= *gateway
,
1498 if (rt6_need_strict(dest
))
1499 flags
|= RT6_LOOKUP_F_IFACE
;
1501 return (struct rt6_info
*)fib6_rule_lookup((struct flowi
*)&rdfl
, flags
, __ip6_route_redirect
);
1504 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*src
,
1505 struct in6_addr
*saddr
,
1506 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1508 struct rt6_info
*rt
, *nrt
= NULL
;
1509 struct netevent_redirect netevent
;
1511 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1513 if (rt
== &ip6_null_entry
) {
1514 if (net_ratelimit())
1515 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1516 "for redirect target\n");
1521 * We have finally decided to accept it.
1524 neigh_update(neigh
, lladdr
, NUD_STALE
,
1525 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1526 NEIGH_UPDATE_F_OVERRIDE
|
1527 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1528 NEIGH_UPDATE_F_ISROUTER
))
1532 * Redirect received -> path was valid.
1533 * Look, redirects are sent only in response to data packets,
1534 * so that this nexthop apparently is reachable. --ANK
1536 dst_confirm(&rt
->u
.dst
);
1538 /* Duplicate redirect: silently ignore. */
1539 if (neigh
== rt
->u
.dst
.neighbour
)
1542 nrt
= ip6_rt_copy(rt
);
1546 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1548 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1550 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1551 nrt
->rt6i_dst
.plen
= 128;
1552 nrt
->u
.dst
.flags
|= DST_HOST
;
1554 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1555 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
1556 /* Reset pmtu, it may be better */
1557 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(neigh
->dev
);
1558 nrt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&nrt
->u
.dst
));
1560 if (ip6_ins_rt(nrt
))
1563 netevent
.old
= &rt
->u
.dst
;
1564 netevent
.new = &nrt
->u
.dst
;
1565 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1567 if (rt
->rt6i_flags
&RTF_CACHE
) {
1573 dst_release(&rt
->u
.dst
);
1578 * Handle ICMP "packet too big" messages
1579 * i.e. Path MTU discovery
1582 static void rt6_do_pmtu_disc(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1583 u32 pmtu
, int ifindex
)
1585 struct rt6_info
*rt
, *nrt
;
1588 rt
= rt6_lookup(daddr
, saddr
, ifindex
, 0);
1592 if (pmtu
>= dst_mtu(&rt
->u
.dst
))
1595 if (pmtu
< IPV6_MIN_MTU
) {
1597 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1598 * MTU (1280) and a fragment header should always be included
1599 * after a node receiving Too Big message reporting PMTU is
1600 * less than the IPv6 Minimum Link MTU.
1602 pmtu
= IPV6_MIN_MTU
;
1606 /* New mtu received -> path was valid.
1607 They are sent only in response to data packets,
1608 so that this nexthop apparently is reachable. --ANK
1610 dst_confirm(&rt
->u
.dst
);
1612 /* Host route. If it is static, it would be better
1613 not to override it, but add new one, so that
1614 when cache entry will expire old pmtu
1615 would return automatically.
1617 if (rt
->rt6i_flags
& RTF_CACHE
) {
1618 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1620 rt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1621 dst_set_expires(&rt
->u
.dst
, ip6_rt_mtu_expires
);
1622 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1627 Two cases are possible:
1628 1. It is connected route. Action: COW
1629 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1631 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1632 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1634 nrt
= rt6_alloc_clone(rt
, daddr
);
1637 nrt
->u
.dst
.metrics
[RTAX_MTU
-1] = pmtu
;
1639 nrt
->u
.dst
.metrics
[RTAX_FEATURES
-1] |= RTAX_FEATURE_ALLFRAG
;
1641 /* According to RFC 1981, detecting PMTU increase shouldn't be
1642 * happened within 5 mins, the recommended timer is 10 mins.
1643 * Here this route expiration time is set to ip6_rt_mtu_expires
1644 * which is 10 mins. After 10 mins the decreased pmtu is expired
1645 * and detecting PMTU increase will be automatically happened.
1647 dst_set_expires(&nrt
->u
.dst
, ip6_rt_mtu_expires
);
1648 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1653 dst_release(&rt
->u
.dst
);
1656 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1657 struct net_device
*dev
, u32 pmtu
)
1660 * RFC 1981 states that a node "MUST reduce the size of the packets it
1661 * is sending along the path" that caused the Packet Too Big message.
1662 * Since it's not possible in the general case to determine which
1663 * interface was used to send the original packet, we update the MTU
1664 * on the interface that will be used to send future packets. We also
1665 * update the MTU on the interface that received the Packet Too Big in
1666 * case the original packet was forced out that interface with
1667 * SO_BINDTODEVICE or similar. This is the next best thing to the
1668 * correct behaviour, which would be to update the MTU on all
1671 rt6_do_pmtu_disc(daddr
, saddr
, pmtu
, 0);
1672 rt6_do_pmtu_disc(daddr
, saddr
, pmtu
, dev
->ifindex
);
1676 * Misc support functions
1679 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1681 struct rt6_info
*rt
= ip6_dst_alloc();
1684 rt
->u
.dst
.input
= ort
->u
.dst
.input
;
1685 rt
->u
.dst
.output
= ort
->u
.dst
.output
;
1687 memcpy(rt
->u
.dst
.metrics
, ort
->u
.dst
.metrics
, RTAX_MAX
*sizeof(u32
));
1688 rt
->u
.dst
.error
= ort
->u
.dst
.error
;
1689 rt
->u
.dst
.dev
= ort
->u
.dst
.dev
;
1691 dev_hold(rt
->u
.dst
.dev
);
1692 rt
->rt6i_idev
= ort
->rt6i_idev
;
1694 in6_dev_hold(rt
->rt6i_idev
);
1695 rt
->u
.dst
.lastuse
= jiffies
;
1696 rt
->rt6i_expires
= 0;
1698 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1699 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1700 rt
->rt6i_metric
= 0;
1702 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1703 #ifdef CONFIG_IPV6_SUBTREES
1704 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1706 rt
->rt6i_table
= ort
->rt6i_table
;
1711 #ifdef CONFIG_IPV6_ROUTE_INFO
1712 static struct rt6_info
*rt6_get_route_info(struct in6_addr
*prefix
, int prefixlen
,
1713 struct in6_addr
*gwaddr
, int ifindex
)
1715 struct fib6_node
*fn
;
1716 struct rt6_info
*rt
= NULL
;
1717 struct fib6_table
*table
;
1719 table
= fib6_get_table(RT6_TABLE_INFO
);
1723 write_lock_bh(&table
->tb6_lock
);
1724 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1728 for (rt
= fn
->leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1729 if (rt
->rt6i_dev
->ifindex
!= ifindex
)
1731 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1733 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1735 dst_hold(&rt
->u
.dst
);
1739 write_unlock_bh(&table
->tb6_lock
);
1743 static struct rt6_info
*rt6_add_route_info(struct in6_addr
*prefix
, int prefixlen
,
1744 struct in6_addr
*gwaddr
, int ifindex
,
1747 struct fib6_config cfg
= {
1748 .fc_table
= RT6_TABLE_INFO
,
1750 .fc_ifindex
= ifindex
,
1751 .fc_dst_len
= prefixlen
,
1752 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1753 RTF_UP
| RTF_PREF(pref
),
1756 ipv6_addr_copy(&cfg
.fc_dst
, prefix
);
1757 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1759 /* We should treat it as a default route if prefix length is 0. */
1761 cfg
.fc_flags
|= RTF_DEFAULT
;
1763 ip6_route_add(&cfg
);
1765 return rt6_get_route_info(prefix
, prefixlen
, gwaddr
, ifindex
);
1769 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1771 struct rt6_info
*rt
;
1772 struct fib6_table
*table
;
1774 table
= fib6_get_table(RT6_TABLE_DFLT
);
1778 write_lock_bh(&table
->tb6_lock
);
1779 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->u
.dst
.rt6_next
) {
1780 if (dev
== rt
->rt6i_dev
&&
1781 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1782 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1786 dst_hold(&rt
->u
.dst
);
1787 write_unlock_bh(&table
->tb6_lock
);
1791 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1792 struct net_device
*dev
,
1795 struct fib6_config cfg
= {
1796 .fc_table
= RT6_TABLE_DFLT
,
1798 .fc_ifindex
= dev
->ifindex
,
1799 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1800 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1803 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1805 ip6_route_add(&cfg
);
1807 return rt6_get_dflt_router(gwaddr
, dev
);
1810 void rt6_purge_dflt_routers(void)
1812 struct rt6_info
*rt
;
1813 struct fib6_table
*table
;
1815 /* NOTE: Keep consistent with rt6_get_dflt_router */
1816 table
= fib6_get_table(RT6_TABLE_DFLT
);
1821 read_lock_bh(&table
->tb6_lock
);
1822 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->u
.dst
.rt6_next
) {
1823 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1824 dst_hold(&rt
->u
.dst
);
1825 read_unlock_bh(&table
->tb6_lock
);
1830 read_unlock_bh(&table
->tb6_lock
);
1833 static void rtmsg_to_fib6_config(struct in6_rtmsg
*rtmsg
,
1834 struct fib6_config
*cfg
)
1836 memset(cfg
, 0, sizeof(*cfg
));
1838 cfg
->fc_table
= RT6_TABLE_MAIN
;
1839 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1840 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1841 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1842 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1843 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1844 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1846 ipv6_addr_copy(&cfg
->fc_dst
, &rtmsg
->rtmsg_dst
);
1847 ipv6_addr_copy(&cfg
->fc_src
, &rtmsg
->rtmsg_src
);
1848 ipv6_addr_copy(&cfg
->fc_gateway
, &rtmsg
->rtmsg_gateway
);
1851 int ipv6_route_ioctl(unsigned int cmd
, void __user
*arg
)
1853 struct fib6_config cfg
;
1854 struct in6_rtmsg rtmsg
;
1858 case SIOCADDRT
: /* Add a route */
1859 case SIOCDELRT
: /* Delete a route */
1860 if (!capable(CAP_NET_ADMIN
))
1862 err
= copy_from_user(&rtmsg
, arg
,
1863 sizeof(struct in6_rtmsg
));
1867 rtmsg_to_fib6_config(&rtmsg
, &cfg
);
1872 err
= ip6_route_add(&cfg
);
1875 err
= ip6_route_del(&cfg
);
1889 * Drop the packet on the floor
1892 static inline int ip6_pkt_drop(struct sk_buff
*skb
, int code
,
1893 int ipstats_mib_noroutes
)
1896 switch (ipstats_mib_noroutes
) {
1897 case IPSTATS_MIB_INNOROUTES
:
1898 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
1899 if (type
== IPV6_ADDR_ANY
|| type
== IPV6_ADDR_RESERVED
) {
1900 IP6_INC_STATS(ip6_dst_idev(skb
->dst
), IPSTATS_MIB_INADDRERRORS
);
1904 case IPSTATS_MIB_OUTNOROUTES
:
1905 IP6_INC_STATS(ip6_dst_idev(skb
->dst
), ipstats_mib_noroutes
);
1908 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0, skb
->dev
);
1913 static int ip6_pkt_discard(struct sk_buff
*skb
)
1915 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
1918 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
1920 skb
->dev
= skb
->dst
->dev
;
1921 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
1924 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1926 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
1928 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
1931 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
1933 skb
->dev
= skb
->dst
->dev
;
1934 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
1937 static int ip6_pkt_blk_hole(struct sk_buff
*skb
)
1946 * Allocate a dst for local (unicast / anycast) address.
1949 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
1950 const struct in6_addr
*addr
,
1953 struct rt6_info
*rt
= ip6_dst_alloc();
1954 struct neighbour
*neigh
;
1957 return ERR_PTR(-ENOMEM
);
1959 dev_hold(&loopback_dev
);
1962 rt
->u
.dst
.flags
= DST_HOST
;
1963 rt
->u
.dst
.input
= ip6_input
;
1964 rt
->u
.dst
.output
= ip6_output
;
1965 rt
->rt6i_dev
= &loopback_dev
;
1966 rt
->rt6i_idev
= idev
;
1967 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = ipv6_get_mtu(rt
->rt6i_dev
);
1968 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(dst_mtu(&rt
->u
.dst
));
1969 rt
->u
.dst
.metrics
[RTAX_HOPLIMIT
-1] = -1;
1970 rt
->u
.dst
.obsolete
= -1;
1972 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1974 rt
->rt6i_flags
|= RTF_ANYCAST
;
1976 rt
->rt6i_flags
|= RTF_LOCAL
;
1977 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1978 if (IS_ERR(neigh
)) {
1979 dst_free(&rt
->u
.dst
);
1981 /* We are casting this because that is the return
1982 * value type. But an errno encoded pointer is the
1983 * same regardless of the underlying pointer type,
1984 * and that's what we are returning. So this is OK.
1986 return (struct rt6_info
*) neigh
;
1988 rt
->rt6i_nexthop
= neigh
;
1990 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1991 rt
->rt6i_dst
.plen
= 128;
1992 rt
->rt6i_table
= fib6_get_table(RT6_TABLE_LOCAL
);
1994 atomic_set(&rt
->u
.dst
.__refcnt
, 1);
1999 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2001 if (((void*)rt
->rt6i_dev
== arg
|| arg
== NULL
) &&
2002 rt
!= &ip6_null_entry
) {
2003 RT6_TRACE("deleted by ifdown %p\n", rt
);
2009 void rt6_ifdown(struct net_device
*dev
)
2011 fib6_clean_all(fib6_ifdown
, 0, dev
);
2014 struct rt6_mtu_change_arg
2016 struct net_device
*dev
;
2020 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2022 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2023 struct inet6_dev
*idev
;
2025 /* In IPv6 pmtu discovery is not optional,
2026 so that RTAX_MTU lock cannot disable it.
2027 We still use this lock to block changes
2028 caused by addrconf/ndisc.
2031 idev
= __in6_dev_get(arg
->dev
);
2035 /* For administrative MTU increase, there is no way to discover
2036 IPv6 PMTU increase, so PMTU increase should be updated here.
2037 Since RFC 1981 doesn't include administrative MTU increase
2038 update PMTU increase is a MUST. (i.e. jumbo frame)
2041 If new MTU is less than route PMTU, this new MTU will be the
2042 lowest MTU in the path, update the route PMTU to reflect PMTU
2043 decreases; if new MTU is greater than route PMTU, and the
2044 old MTU is the lowest MTU in the path, update the route PMTU
2045 to reflect the increase. In this case if the other nodes' MTU
2046 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2049 if (rt
->rt6i_dev
== arg
->dev
&&
2050 !dst_metric_locked(&rt
->u
.dst
, RTAX_MTU
) &&
2051 (dst_mtu(&rt
->u
.dst
) >= arg
->mtu
||
2052 (dst_mtu(&rt
->u
.dst
) < arg
->mtu
&&
2053 dst_mtu(&rt
->u
.dst
) == idev
->cnf
.mtu6
))) {
2054 rt
->u
.dst
.metrics
[RTAX_MTU
-1] = arg
->mtu
;
2055 rt
->u
.dst
.metrics
[RTAX_ADVMSS
-1] = ipv6_advmss(arg
->mtu
);
2060 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
2062 struct rt6_mtu_change_arg arg
= {
2067 fib6_clean_all(rt6_mtu_change_route
, 0, &arg
);
2070 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2071 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2072 [RTA_OIF
] = { .type
= NLA_U32
},
2073 [RTA_IIF
] = { .type
= NLA_U32
},
2074 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2075 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2078 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2079 struct fib6_config
*cfg
)
2082 struct nlattr
*tb
[RTA_MAX
+1];
2085 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2090 rtm
= nlmsg_data(nlh
);
2091 memset(cfg
, 0, sizeof(*cfg
));
2093 cfg
->fc_table
= rtm
->rtm_table
;
2094 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2095 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2096 cfg
->fc_flags
= RTF_UP
;
2097 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2099 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2100 cfg
->fc_flags
|= RTF_REJECT
;
2102 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2103 cfg
->fc_nlinfo
.nlh
= nlh
;
2105 if (tb
[RTA_GATEWAY
]) {
2106 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2107 cfg
->fc_flags
|= RTF_GATEWAY
;
2111 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2113 if (nla_len(tb
[RTA_DST
]) < plen
)
2116 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2120 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2122 if (nla_len(tb
[RTA_SRC
]) < plen
)
2125 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2129 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2131 if (tb
[RTA_PRIORITY
])
2132 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2134 if (tb
[RTA_METRICS
]) {
2135 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2136 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2140 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2147 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2149 struct fib6_config cfg
;
2152 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2156 return ip6_route_del(&cfg
);
2159 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2161 struct fib6_config cfg
;
2164 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2168 return ip6_route_add(&cfg
);
2171 static inline size_t rt6_nlmsg_size(void)
2173 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2174 + nla_total_size(16) /* RTA_SRC */
2175 + nla_total_size(16) /* RTA_DST */
2176 + nla_total_size(16) /* RTA_GATEWAY */
2177 + nla_total_size(16) /* RTA_PREFSRC */
2178 + nla_total_size(4) /* RTA_TABLE */
2179 + nla_total_size(4) /* RTA_IIF */
2180 + nla_total_size(4) /* RTA_OIF */
2181 + nla_total_size(4) /* RTA_PRIORITY */
2182 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2183 + nla_total_size(sizeof(struct rta_cacheinfo
));
2186 static int rt6_fill_node(struct sk_buff
*skb
, struct rt6_info
*rt
,
2187 struct in6_addr
*dst
, struct in6_addr
*src
,
2188 int iif
, int type
, u32 pid
, u32 seq
,
2189 int prefix
, int nowait
, unsigned int flags
)
2192 struct nlmsghdr
*nlh
;
2196 if (prefix
) { /* user wants prefix routes only */
2197 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2198 /* success since this is not a prefix route */
2203 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2207 rtm
= nlmsg_data(nlh
);
2208 rtm
->rtm_family
= AF_INET6
;
2209 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2210 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2213 table
= rt
->rt6i_table
->tb6_id
;
2215 table
= RT6_TABLE_UNSPEC
;
2216 rtm
->rtm_table
= table
;
2217 NLA_PUT_U32(skb
, RTA_TABLE
, table
);
2218 if (rt
->rt6i_flags
&RTF_REJECT
)
2219 rtm
->rtm_type
= RTN_UNREACHABLE
;
2220 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
2221 rtm
->rtm_type
= RTN_LOCAL
;
2223 rtm
->rtm_type
= RTN_UNICAST
;
2225 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2226 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2227 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
2228 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2229 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2230 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2231 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
2232 rtm
->rtm_protocol
= RTPROT_RA
;
2234 if (rt
->rt6i_flags
&RTF_CACHE
)
2235 rtm
->rtm_flags
|= RTM_F_CLONED
;
2238 NLA_PUT(skb
, RTA_DST
, 16, dst
);
2239 rtm
->rtm_dst_len
= 128;
2240 } else if (rtm
->rtm_dst_len
)
2241 NLA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
2242 #ifdef CONFIG_IPV6_SUBTREES
2244 NLA_PUT(skb
, RTA_SRC
, 16, src
);
2245 rtm
->rtm_src_len
= 128;
2246 } else if (rtm
->rtm_src_len
)
2247 NLA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
2250 #ifdef CONFIG_IPV6_MROUTE
2251 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2252 int err
= ip6mr_get_route(skb
, rtm
, nowait
);
2257 goto nla_put_failure
;
2259 if (err
== -EMSGSIZE
)
2260 goto nla_put_failure
;
2265 NLA_PUT_U32(skb
, RTA_IIF
, iif
);
2267 struct in6_addr saddr_buf
;
2268 if (ipv6_get_saddr(&rt
->u
.dst
, dst
, &saddr_buf
) == 0)
2269 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2272 if (rtnetlink_put_metrics(skb
, rt
->u
.dst
.metrics
) < 0)
2273 goto nla_put_failure
;
2275 if (rt
->u
.dst
.neighbour
)
2276 NLA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->u
.dst
.neighbour
->primary_key
);
2279 NLA_PUT_U32(skb
, RTA_OIF
, rt
->rt6i_dev
->ifindex
);
2281 NLA_PUT_U32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
);
2283 expires
= rt
->rt6i_expires
? rt
->rt6i_expires
- jiffies
: 0;
2284 if (rtnl_put_cacheinfo(skb
, &rt
->u
.dst
, 0, 0, 0,
2285 expires
, rt
->u
.dst
.error
) < 0)
2286 goto nla_put_failure
;
2288 return nlmsg_end(skb
, nlh
);
2291 nlmsg_cancel(skb
, nlh
);
2295 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2297 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2300 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2301 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2302 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2306 return rt6_fill_node(arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2307 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2308 prefix
, 0, NLM_F_MULTI
);
2311 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2313 struct nlattr
*tb
[RTA_MAX
+1];
2314 struct rt6_info
*rt
;
2315 struct sk_buff
*skb
;
2320 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2325 memset(&fl
, 0, sizeof(fl
));
2328 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2331 ipv6_addr_copy(&fl
.fl6_src
, nla_data(tb
[RTA_SRC
]));
2335 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2338 ipv6_addr_copy(&fl
.fl6_dst
, nla_data(tb
[RTA_DST
]));
2342 iif
= nla_get_u32(tb
[RTA_IIF
]);
2345 fl
.oif
= nla_get_u32(tb
[RTA_OIF
]);
2348 struct net_device
*dev
;
2349 dev
= __dev_get_by_index(iif
);
2356 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2362 /* Reserve room for dummy headers, this skb can pass
2363 through good chunk of routing engine.
2365 skb_reset_mac_header(skb
);
2366 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2368 rt
= (struct rt6_info
*) ip6_route_output(NULL
, &fl
);
2369 skb
->dst
= &rt
->u
.dst
;
2371 err
= rt6_fill_node(skb
, rt
, &fl
.fl6_dst
, &fl
.fl6_src
, iif
,
2372 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2373 nlh
->nlmsg_seq
, 0, 0, 0);
2379 err
= rtnl_unicast(skb
, NETLINK_CB(in_skb
).pid
);
2384 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2386 struct sk_buff
*skb
;
2387 u32 pid
= 0, seq
= 0;
2388 struct nlmsghdr
*nlh
= NULL
;
2395 seq
= nlh
->nlmsg_seq
;
2398 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2402 err
= rt6_fill_node(skb
, rt
, NULL
, NULL
, 0, event
, pid
, seq
, 0, 0, 0);
2404 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2405 WARN_ON(err
== -EMSGSIZE
);
2409 err
= rtnl_notify(skb
, pid
, RTNLGRP_IPV6_ROUTE
, nlh
, gfp_any());
2412 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE
, err
);
2419 #ifdef CONFIG_PROC_FS
2421 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2432 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2434 struct rt6_proc_arg
*arg
= (struct rt6_proc_arg
*) p_arg
;
2436 if (arg
->skip
< arg
->offset
/ RT6_INFO_LEN
) {
2441 if (arg
->len
>= arg
->length
)
2444 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2445 NIP6_SEQFMT
" %02x ",
2446 NIP6(rt
->rt6i_dst
.addr
),
2449 #ifdef CONFIG_IPV6_SUBTREES
2450 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2451 NIP6_SEQFMT
" %02x ",
2452 NIP6(rt
->rt6i_src
.addr
),
2455 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2456 "00000000000000000000000000000000 00 ");
2459 if (rt
->rt6i_nexthop
) {
2460 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2462 NIP6(*((struct in6_addr
*)rt
->rt6i_nexthop
->primary_key
)));
2464 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2465 "00000000000000000000000000000000");
2467 arg
->len
+= sprintf(arg
->buffer
+ arg
->len
,
2468 " %08x %08x %08x %08x %8s\n",
2469 rt
->rt6i_metric
, atomic_read(&rt
->u
.dst
.__refcnt
),
2470 rt
->u
.dst
.__use
, rt
->rt6i_flags
,
2471 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
2475 static int rt6_proc_info(char *buffer
, char **start
, off_t offset
, int length
)
2477 struct rt6_proc_arg arg
= {
2483 fib6_clean_all(rt6_info_route
, 0, &arg
);
2487 *start
+= offset
% RT6_INFO_LEN
;
2489 arg
.len
-= offset
% RT6_INFO_LEN
;
2491 if (arg
.len
> length
)
2499 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2501 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2502 rt6_stats
.fib_nodes
, rt6_stats
.fib_route_nodes
,
2503 rt6_stats
.fib_rt_alloc
, rt6_stats
.fib_rt_entries
,
2504 rt6_stats
.fib_rt_cache
,
2505 atomic_read(&ip6_dst_ops
.entries
),
2506 rt6_stats
.fib_discarded_routes
);
2511 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2513 return single_open(file
, rt6_stats_seq_show
, NULL
);
2516 static const struct file_operations rt6_stats_seq_fops
= {
2517 .owner
= THIS_MODULE
,
2518 .open
= rt6_stats_seq_open
,
2520 .llseek
= seq_lseek
,
2521 .release
= single_release
,
2523 #endif /* CONFIG_PROC_FS */
2525 #ifdef CONFIG_SYSCTL
2527 static int flush_delay
;
2530 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
, struct file
* filp
,
2531 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2534 proc_dointvec(ctl
, write
, filp
, buffer
, lenp
, ppos
);
2535 fib6_run_gc(flush_delay
<= 0 ? ~0UL : (unsigned long)flush_delay
);
2541 ctl_table ipv6_route_table
[] = {
2543 .ctl_name
= NET_IPV6_ROUTE_FLUSH
,
2544 .procname
= "flush",
2545 .data
= &flush_delay
,
2546 .maxlen
= sizeof(int),
2548 .proc_handler
= &ipv6_sysctl_rtcache_flush
2551 .ctl_name
= NET_IPV6_ROUTE_GC_THRESH
,
2552 .procname
= "gc_thresh",
2553 .data
= &ip6_dst_ops
.gc_thresh
,
2554 .maxlen
= sizeof(int),
2556 .proc_handler
= &proc_dointvec
,
2559 .ctl_name
= NET_IPV6_ROUTE_MAX_SIZE
,
2560 .procname
= "max_size",
2561 .data
= &ip6_rt_max_size
,
2562 .maxlen
= sizeof(int),
2564 .proc_handler
= &proc_dointvec
,
2567 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL
,
2568 .procname
= "gc_min_interval",
2569 .data
= &ip6_rt_gc_min_interval
,
2570 .maxlen
= sizeof(int),
2572 .proc_handler
= &proc_dointvec_jiffies
,
2573 .strategy
= &sysctl_jiffies
,
2576 .ctl_name
= NET_IPV6_ROUTE_GC_TIMEOUT
,
2577 .procname
= "gc_timeout",
2578 .data
= &ip6_rt_gc_timeout
,
2579 .maxlen
= sizeof(int),
2581 .proc_handler
= &proc_dointvec_jiffies
,
2582 .strategy
= &sysctl_jiffies
,
2585 .ctl_name
= NET_IPV6_ROUTE_GC_INTERVAL
,
2586 .procname
= "gc_interval",
2587 .data
= &ip6_rt_gc_interval
,
2588 .maxlen
= sizeof(int),
2590 .proc_handler
= &proc_dointvec_jiffies
,
2591 .strategy
= &sysctl_jiffies
,
2594 .ctl_name
= NET_IPV6_ROUTE_GC_ELASTICITY
,
2595 .procname
= "gc_elasticity",
2596 .data
= &ip6_rt_gc_elasticity
,
2597 .maxlen
= sizeof(int),
2599 .proc_handler
= &proc_dointvec_jiffies
,
2600 .strategy
= &sysctl_jiffies
,
2603 .ctl_name
= NET_IPV6_ROUTE_MTU_EXPIRES
,
2604 .procname
= "mtu_expires",
2605 .data
= &ip6_rt_mtu_expires
,
2606 .maxlen
= sizeof(int),
2608 .proc_handler
= &proc_dointvec_jiffies
,
2609 .strategy
= &sysctl_jiffies
,
2612 .ctl_name
= NET_IPV6_ROUTE_MIN_ADVMSS
,
2613 .procname
= "min_adv_mss",
2614 .data
= &ip6_rt_min_advmss
,
2615 .maxlen
= sizeof(int),
2617 .proc_handler
= &proc_dointvec_jiffies
,
2618 .strategy
= &sysctl_jiffies
,
2621 .ctl_name
= NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS
,
2622 .procname
= "gc_min_interval_ms",
2623 .data
= &ip6_rt_gc_min_interval
,
2624 .maxlen
= sizeof(int),
2626 .proc_handler
= &proc_dointvec_ms_jiffies
,
2627 .strategy
= &sysctl_ms_jiffies
,
2634 void __init
ip6_route_init(void)
2636 #ifdef CONFIG_PROC_FS
2637 struct proc_dir_entry
*p
;
2639 ip6_dst_ops
.kmem_cachep
=
2640 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
2641 SLAB_HWCACHE_ALIGN
|SLAB_PANIC
, NULL
, NULL
);
2642 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops
.kmem_cachep
;
2645 #ifdef CONFIG_PROC_FS
2646 p
= proc_net_create("ipv6_route", 0, rt6_proc_info
);
2648 p
->owner
= THIS_MODULE
;
2650 proc_net_fops_create("rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2655 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2659 __rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
);
2660 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
);
2661 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
);
2664 void ip6_route_cleanup(void)
2666 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2667 fib6_rules_cleanup();
2669 #ifdef CONFIG_PROC_FS
2670 proc_net_remove("ipv6_route");
2671 proc_net_remove("rt6_stats");
2678 kmem_cache_destroy(ip6_dst_ops
.kmem_cachep
);