2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 static struct rt6_info
*ip6_rt_copy(const struct rt6_info
*ort
,
76 const struct in6_addr
*dest
);
77 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
78 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
79 static unsigned int ip6_default_mtu(const struct dst_entry
*dst
);
80 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
81 static void ip6_dst_destroy(struct dst_entry
*);
82 static void ip6_dst_ifdown(struct dst_entry
*,
83 struct net_device
*dev
, int how
);
84 static int ip6_dst_gc(struct dst_ops
*ops
);
86 static int ip6_pkt_discard(struct sk_buff
*skb
);
87 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
88 static void ip6_link_failure(struct sk_buff
*skb
);
89 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
91 #ifdef CONFIG_IPV6_ROUTE_INFO
92 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
93 const struct in6_addr
*prefix
, int prefixlen
,
94 const struct in6_addr
*gwaddr
, int ifindex
,
96 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
97 const struct in6_addr
*prefix
, int prefixlen
,
98 const struct in6_addr
*gwaddr
, int ifindex
);
101 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
103 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
104 struct inet_peer
*peer
;
107 if (!(rt
->dst
.flags
& DST_HOST
))
111 rt6_bind_peer(rt
, 1);
113 peer
= rt
->rt6i_peer
;
115 u32
*old_p
= __DST_METRICS_PTR(old
);
116 unsigned long prev
, new;
119 if (inet_metrics_new(peer
))
120 memcpy(p
, old_p
, sizeof(u32
) * RTAX_MAX
);
122 new = (unsigned long) p
;
123 prev
= cmpxchg(&dst
->_metrics
, old
, new);
126 p
= __DST_METRICS_PTR(prev
);
127 if (prev
& DST_METRICS_READ_ONLY
)
134 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
, const void *daddr
)
136 return __neigh_lookup_errno(&nd_tbl
, daddr
, dst
->dev
);
139 static struct dst_ops ip6_dst_ops_template
= {
141 .protocol
= cpu_to_be16(ETH_P_IPV6
),
144 .check
= ip6_dst_check
,
145 .default_advmss
= ip6_default_advmss
,
146 .default_mtu
= ip6_default_mtu
,
147 .cow_metrics
= ipv6_cow_metrics
,
148 .destroy
= ip6_dst_destroy
,
149 .ifdown
= ip6_dst_ifdown
,
150 .negative_advice
= ip6_negative_advice
,
151 .link_failure
= ip6_link_failure
,
152 .update_pmtu
= ip6_rt_update_pmtu
,
153 .local_out
= __ip6_local_out
,
154 .neigh_lookup
= ip6_neigh_lookup
,
157 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry
*dst
)
162 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
166 static u32
*ip6_rt_blackhole_cow_metrics(struct dst_entry
*dst
,
172 static struct dst_ops ip6_dst_blackhole_ops
= {
174 .protocol
= cpu_to_be16(ETH_P_IPV6
),
175 .destroy
= ip6_dst_destroy
,
176 .check
= ip6_dst_check
,
177 .default_mtu
= ip6_blackhole_default_mtu
,
178 .default_advmss
= ip6_default_advmss
,
179 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
180 .cow_metrics
= ip6_rt_blackhole_cow_metrics
,
181 .neigh_lookup
= ip6_neigh_lookup
,
184 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
185 [RTAX_HOPLIMIT
- 1] = 255,
188 static struct rt6_info ip6_null_entry_template
= {
190 .__refcnt
= ATOMIC_INIT(1),
193 .error
= -ENETUNREACH
,
194 .input
= ip6_pkt_discard
,
195 .output
= ip6_pkt_discard_out
,
197 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
198 .rt6i_protocol
= RTPROT_KERNEL
,
199 .rt6i_metric
= ~(u32
) 0,
200 .rt6i_ref
= ATOMIC_INIT(1),
203 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
205 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
206 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
208 static struct rt6_info ip6_prohibit_entry_template
= {
210 .__refcnt
= ATOMIC_INIT(1),
214 .input
= ip6_pkt_prohibit
,
215 .output
= ip6_pkt_prohibit_out
,
217 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
218 .rt6i_protocol
= RTPROT_KERNEL
,
219 .rt6i_metric
= ~(u32
) 0,
220 .rt6i_ref
= ATOMIC_INIT(1),
223 static struct rt6_info ip6_blk_hole_entry_template
= {
225 .__refcnt
= ATOMIC_INIT(1),
229 .input
= dst_discard
,
230 .output
= dst_discard
,
232 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
233 .rt6i_protocol
= RTPROT_KERNEL
,
234 .rt6i_metric
= ~(u32
) 0,
235 .rt6i_ref
= ATOMIC_INIT(1),
240 /* allocate dst with ip6_dst_ops */
241 static inline struct rt6_info
*ip6_dst_alloc(struct dst_ops
*ops
,
242 struct net_device
*dev
,
245 struct rt6_info
*rt
= dst_alloc(ops
, dev
, 0, 0, flags
);
248 memset(&rt
->rt6i_table
, 0,
249 sizeof(*rt
) - sizeof(struct dst_entry
));
254 static void ip6_dst_destroy(struct dst_entry
*dst
)
256 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
257 struct inet6_dev
*idev
= rt
->rt6i_idev
;
258 struct inet_peer
*peer
= rt
->rt6i_peer
;
260 if (!(rt
->dst
.flags
& DST_HOST
))
261 dst_destroy_metrics_generic(dst
);
264 rt
->rt6i_idev
= NULL
;
268 rt
->rt6i_peer
= NULL
;
273 static atomic_t __rt6_peer_genid
= ATOMIC_INIT(0);
275 static u32
rt6_peer_genid(void)
277 return atomic_read(&__rt6_peer_genid
);
280 void rt6_bind_peer(struct rt6_info
*rt
, int create
)
282 struct inet_peer
*peer
;
284 peer
= inet_getpeer_v6(&rt
->rt6i_dst
.addr
, create
);
285 if (peer
&& cmpxchg(&rt
->rt6i_peer
, NULL
, peer
) != NULL
)
288 rt
->rt6i_peer_genid
= rt6_peer_genid();
291 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
294 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
295 struct inet6_dev
*idev
= rt
->rt6i_idev
;
296 struct net_device
*loopback_dev
=
297 dev_net(dev
)->loopback_dev
;
299 if (dev
!= loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
300 struct inet6_dev
*loopback_idev
=
301 in6_dev_get(loopback_dev
);
302 if (loopback_idev
!= NULL
) {
303 rt
->rt6i_idev
= loopback_idev
;
309 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
311 return (rt
->rt6i_flags
& RTF_EXPIRES
) &&
312 time_after(jiffies
, rt
->rt6i_expires
);
315 static inline int rt6_need_strict(const struct in6_addr
*daddr
)
317 return ipv6_addr_type(daddr
) &
318 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
| IPV6_ADDR_LOOPBACK
);
322 * Route lookup. Any table->tb6_lock is implied.
325 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
327 const struct in6_addr
*saddr
,
331 struct rt6_info
*local
= NULL
;
332 struct rt6_info
*sprt
;
334 if (!oif
&& ipv6_addr_any(saddr
))
337 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
338 struct net_device
*dev
= sprt
->rt6i_dev
;
341 if (dev
->ifindex
== oif
)
343 if (dev
->flags
& IFF_LOOPBACK
) {
344 if (sprt
->rt6i_idev
== NULL
||
345 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
346 if (flags
& RT6_LOOKUP_F_IFACE
&& oif
)
348 if (local
&& (!oif
||
349 local
->rt6i_idev
->dev
->ifindex
== oif
))
355 if (ipv6_chk_addr(net
, saddr
, dev
,
356 flags
& RT6_LOOKUP_F_IFACE
))
365 if (flags
& RT6_LOOKUP_F_IFACE
)
366 return net
->ipv6
.ip6_null_entry
;
372 #ifdef CONFIG_IPV6_ROUTER_PREF
373 static void rt6_probe(struct rt6_info
*rt
)
375 struct neighbour
*neigh
;
377 * Okay, this does not seem to be appropriate
378 * for now, however, we need to check if it
379 * is really so; aka Router Reachability Probing.
381 * Router Reachability Probe MUST be rate-limited
382 * to no more than one per minute.
385 neigh
= rt
? dst_get_neighbour(&rt
->dst
) : NULL
;
386 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
388 read_lock_bh(&neigh
->lock
);
389 if (!(neigh
->nud_state
& NUD_VALID
) &&
390 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
391 struct in6_addr mcaddr
;
392 struct in6_addr
*target
;
394 neigh
->updated
= jiffies
;
395 read_unlock_bh(&neigh
->lock
);
397 target
= (struct in6_addr
*)&neigh
->primary_key
;
398 addrconf_addr_solict_mult(target
, &mcaddr
);
399 ndisc_send_ns(rt
->rt6i_dev
, NULL
, target
, &mcaddr
, NULL
);
401 read_unlock_bh(&neigh
->lock
);
407 static inline void rt6_probe(struct rt6_info
*rt
)
413 * Default Router Selection (RFC 2461 6.3.6)
415 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
417 struct net_device
*dev
= rt
->rt6i_dev
;
418 if (!oif
|| dev
->ifindex
== oif
)
420 if ((dev
->flags
& IFF_LOOPBACK
) &&
421 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
426 static inline int rt6_check_neigh(struct rt6_info
*rt
)
428 struct neighbour
*neigh
;
432 neigh
= dst_get_neighbour(&rt
->dst
);
433 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
434 !(rt
->rt6i_flags
& RTF_GATEWAY
))
437 read_lock_bh(&neigh
->lock
);
438 if (neigh
->nud_state
& NUD_VALID
)
440 #ifdef CONFIG_IPV6_ROUTER_PREF
441 else if (neigh
->nud_state
& NUD_FAILED
)
446 read_unlock_bh(&neigh
->lock
);
453 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
458 m
= rt6_check_dev(rt
, oif
);
459 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
461 #ifdef CONFIG_IPV6_ROUTER_PREF
462 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
464 n
= rt6_check_neigh(rt
);
465 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
470 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
471 int *mpri
, struct rt6_info
*match
)
475 if (rt6_check_expired(rt
))
478 m
= rt6_score_route(rt
, oif
, strict
);
483 if (strict
& RT6_LOOKUP_F_REACHABLE
)
487 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
495 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
496 struct rt6_info
*rr_head
,
497 u32 metric
, int oif
, int strict
)
499 struct rt6_info
*rt
, *match
;
503 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
504 rt
= rt
->dst
.rt6_next
)
505 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
506 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
507 rt
= rt
->dst
.rt6_next
)
508 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
513 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
515 struct rt6_info
*match
, *rt0
;
518 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
519 __func__
, fn
->leaf
, oif
);
523 fn
->rr_ptr
= rt0
= fn
->leaf
;
525 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
528 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
529 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
531 /* no entries matched; do round-robin */
532 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
539 RT6_TRACE("%s() => %p\n",
542 net
= dev_net(rt0
->rt6i_dev
);
543 return match
? match
: net
->ipv6
.ip6_null_entry
;
546 #ifdef CONFIG_IPV6_ROUTE_INFO
547 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
548 const struct in6_addr
*gwaddr
)
550 struct net
*net
= dev_net(dev
);
551 struct route_info
*rinfo
= (struct route_info
*) opt
;
552 struct in6_addr prefix_buf
, *prefix
;
554 unsigned long lifetime
;
557 if (len
< sizeof(struct route_info
)) {
561 /* Sanity check for prefix_len and length */
562 if (rinfo
->length
> 3) {
564 } else if (rinfo
->prefix_len
> 128) {
566 } else if (rinfo
->prefix_len
> 64) {
567 if (rinfo
->length
< 2) {
570 } else if (rinfo
->prefix_len
> 0) {
571 if (rinfo
->length
< 1) {
576 pref
= rinfo
->route_pref
;
577 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
580 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
582 if (rinfo
->length
== 3)
583 prefix
= (struct in6_addr
*)rinfo
->prefix
;
585 /* this function is safe */
586 ipv6_addr_prefix(&prefix_buf
,
587 (struct in6_addr
*)rinfo
->prefix
,
589 prefix
= &prefix_buf
;
592 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
595 if (rt
&& !lifetime
) {
601 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
604 rt
->rt6i_flags
= RTF_ROUTEINFO
|
605 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
608 if (!addrconf_finite_timeout(lifetime
)) {
609 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
611 rt
->rt6i_expires
= jiffies
+ HZ
* lifetime
;
612 rt
->rt6i_flags
|= RTF_EXPIRES
;
614 dst_release(&rt
->dst
);
620 #define BACKTRACK(__net, saddr) \
622 if (rt == __net->ipv6.ip6_null_entry) { \
623 struct fib6_node *pn; \
625 if (fn->fn_flags & RTN_TL_ROOT) \
628 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
629 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
632 if (fn->fn_flags & RTN_RTINFO) \
638 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
639 struct fib6_table
*table
,
640 struct flowi6
*fl6
, int flags
)
642 struct fib6_node
*fn
;
645 read_lock_bh(&table
->tb6_lock
);
646 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
649 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
650 BACKTRACK(net
, &fl6
->saddr
);
652 dst_use(&rt
->dst
, jiffies
);
653 read_unlock_bh(&table
->tb6_lock
);
658 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
659 const struct in6_addr
*saddr
, int oif
, int strict
)
661 struct flowi6 fl6
= {
665 struct dst_entry
*dst
;
666 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
669 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
670 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
673 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
675 return (struct rt6_info
*) dst
;
682 EXPORT_SYMBOL(rt6_lookup
);
684 /* ip6_ins_rt is called with FREE table->tb6_lock.
685 It takes new route entry, the addition fails by any reason the
686 route is freed. In any case, if caller does not hold it, it may
690 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
693 struct fib6_table
*table
;
695 table
= rt
->rt6i_table
;
696 write_lock_bh(&table
->tb6_lock
);
697 err
= fib6_add(&table
->tb6_root
, rt
, info
);
698 write_unlock_bh(&table
->tb6_lock
);
703 int ip6_ins_rt(struct rt6_info
*rt
)
705 struct nl_info info
= {
706 .nl_net
= dev_net(rt
->rt6i_dev
),
708 return __ip6_ins_rt(rt
, &info
);
711 static struct rt6_info
*rt6_alloc_cow(const struct rt6_info
*ort
,
712 const struct in6_addr
*daddr
,
713 const struct in6_addr
*saddr
)
721 rt
= ip6_rt_copy(ort
, daddr
);
724 struct neighbour
*neigh
;
725 int attempts
= !in_softirq();
727 if (!(rt
->rt6i_flags
&RTF_GATEWAY
)) {
728 if (rt
->rt6i_dst
.plen
!= 128 &&
729 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
730 rt
->rt6i_flags
|= RTF_ANYCAST
;
731 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
734 rt
->rt6i_flags
|= RTF_CACHE
;
736 #ifdef CONFIG_IPV6_SUBTREES
737 if (rt
->rt6i_src
.plen
&& saddr
) {
738 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
739 rt
->rt6i_src
.plen
= 128;
744 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
746 struct net
*net
= dev_net(rt
->rt6i_dev
);
747 int saved_rt_min_interval
=
748 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
749 int saved_rt_elasticity
=
750 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
752 if (attempts
-- > 0) {
753 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 1;
754 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= 0;
756 ip6_dst_gc(&net
->ipv6
.ip6_dst_ops
);
758 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
=
760 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
=
761 saved_rt_min_interval
;
767 "ipv6: Neighbour table overflow.\n");
771 dst_set_neighbour(&rt
->dst
, neigh
);
778 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
,
779 const struct in6_addr
*daddr
)
781 struct rt6_info
*rt
= ip6_rt_copy(ort
, daddr
);
784 rt
->rt6i_flags
|= RTF_CACHE
;
785 dst_set_neighbour(&rt
->dst
, neigh_clone(dst_get_neighbour_raw(&ort
->dst
)));
790 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
791 struct flowi6
*fl6
, int flags
)
793 struct fib6_node
*fn
;
794 struct rt6_info
*rt
, *nrt
;
798 int reachable
= net
->ipv6
.devconf_all
->forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
800 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
803 read_lock_bh(&table
->tb6_lock
);
806 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
809 rt
= rt6_select(fn
, oif
, strict
| reachable
);
811 BACKTRACK(net
, &fl6
->saddr
);
812 if (rt
== net
->ipv6
.ip6_null_entry
||
813 rt
->rt6i_flags
& RTF_CACHE
)
817 read_unlock_bh(&table
->tb6_lock
);
819 if (!dst_get_neighbour_raw(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
820 nrt
= rt6_alloc_cow(rt
, &fl6
->daddr
, &fl6
->saddr
);
821 else if (!(rt
->dst
.flags
& DST_HOST
))
822 nrt
= rt6_alloc_clone(rt
, &fl6
->daddr
);
826 dst_release(&rt
->dst
);
827 rt
= nrt
? : net
->ipv6
.ip6_null_entry
;
831 err
= ip6_ins_rt(nrt
);
840 * Race condition! In the gap, when table->tb6_lock was
841 * released someone could insert this route. Relookup.
843 dst_release(&rt
->dst
);
852 read_unlock_bh(&table
->tb6_lock
);
854 rt
->dst
.lastuse
= jiffies
;
860 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
861 struct flowi6
*fl6
, int flags
)
863 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
866 void ip6_route_input(struct sk_buff
*skb
)
868 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
869 struct net
*net
= dev_net(skb
->dev
);
870 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
871 struct flowi6 fl6
= {
872 .flowi6_iif
= skb
->dev
->ifindex
,
875 .flowlabel
= (* (__be32
*) iph
)&IPV6_FLOWINFO_MASK
,
876 .flowi6_mark
= skb
->mark
,
877 .flowi6_proto
= iph
->nexthdr
,
880 if (rt6_need_strict(&iph
->daddr
) && skb
->dev
->type
!= ARPHRD_PIMREG
)
881 flags
|= RT6_LOOKUP_F_IFACE
;
883 skb_dst_set(skb
, fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_input
));
886 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
887 struct flowi6
*fl6
, int flags
)
889 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
892 struct dst_entry
* ip6_route_output(struct net
*net
, const struct sock
*sk
,
897 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
))
898 flags
|= RT6_LOOKUP_F_IFACE
;
900 if (!ipv6_addr_any(&fl6
->saddr
))
901 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
903 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
905 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
908 EXPORT_SYMBOL(ip6_route_output
);
910 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
912 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
913 struct dst_entry
*new = NULL
;
915 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, 0, 0);
917 memset(&rt
->rt6i_table
, 0, sizeof(*rt
) - sizeof(struct dst_entry
));
922 new->input
= dst_discard
;
923 new->output
= dst_discard
;
925 if (dst_metrics_read_only(&ort
->dst
))
926 new->_metrics
= ort
->dst
._metrics
;
928 dst_copy_metrics(new, &ort
->dst
);
929 rt
->rt6i_idev
= ort
->rt6i_idev
;
931 in6_dev_hold(rt
->rt6i_idev
);
932 rt
->rt6i_expires
= 0;
934 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
935 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
938 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
939 #ifdef CONFIG_IPV6_SUBTREES
940 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
946 dst_release(dst_orig
);
947 return new ? new : ERR_PTR(-ENOMEM
);
951 * Destination cache support functions
954 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
958 rt
= (struct rt6_info
*) dst
;
960 if (rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
)) {
961 if (rt
->rt6i_peer_genid
!= rt6_peer_genid()) {
963 rt6_bind_peer(rt
, 0);
964 rt
->rt6i_peer_genid
= rt6_peer_genid();
971 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
973 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
976 if (rt
->rt6i_flags
& RTF_CACHE
) {
977 if (rt6_check_expired(rt
)) {
989 static void ip6_link_failure(struct sk_buff
*skb
)
993 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
995 rt
= (struct rt6_info
*) skb_dst(skb
);
997 if (rt
->rt6i_flags
&RTF_CACHE
) {
998 dst_set_expires(&rt
->dst
, 0);
999 rt
->rt6i_flags
|= RTF_EXPIRES
;
1000 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
1001 rt
->rt6i_node
->fn_sernum
= -1;
1005 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
1007 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1009 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
1010 rt6
->rt6i_flags
|= RTF_MODIFIED
;
1011 if (mtu
< IPV6_MIN_MTU
) {
1012 u32 features
= dst_metric(dst
, RTAX_FEATURES
);
1014 features
|= RTAX_FEATURE_ALLFRAG
;
1015 dst_metric_set(dst
, RTAX_FEATURES
, features
);
1017 dst_metric_set(dst
, RTAX_MTU
, mtu
);
1021 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1023 struct net_device
*dev
= dst
->dev
;
1024 unsigned int mtu
= dst_mtu(dst
);
1025 struct net
*net
= dev_net(dev
);
1027 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1029 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1030 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1033 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1034 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1035 * IPV6_MAXPLEN is also valid and means: "any MSS,
1036 * rely only on pmtu discovery"
1038 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1043 static unsigned int ip6_default_mtu(const struct dst_entry
*dst
)
1045 unsigned int mtu
= IPV6_MIN_MTU
;
1046 struct inet6_dev
*idev
;
1049 idev
= __in6_dev_get(dst
->dev
);
1051 mtu
= idev
->cnf
.mtu6
;
1057 static struct dst_entry
*icmp6_dst_gc_list
;
1058 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1060 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1061 struct neighbour
*neigh
,
1062 const struct in6_addr
*addr
)
1064 struct rt6_info
*rt
;
1065 struct inet6_dev
*idev
= in6_dev_get(dev
);
1066 struct net
*net
= dev_net(dev
);
1068 if (unlikely(idev
== NULL
))
1071 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
, 0);
1072 if (unlikely(rt
== NULL
)) {
1080 neigh
= ndisc_get_neigh(dev
, addr
);
1085 rt
->dst
.flags
|= DST_HOST
;
1086 rt
->dst
.output
= ip6_output
;
1087 dst_set_neighbour(&rt
->dst
, neigh
);
1088 atomic_set(&rt
->dst
.__refcnt
, 1);
1089 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1090 rt
->rt6i_dst
.plen
= 128;
1091 rt
->rt6i_idev
= idev
;
1092 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 255);
1094 spin_lock_bh(&icmp6_dst_lock
);
1095 rt
->dst
.next
= icmp6_dst_gc_list
;
1096 icmp6_dst_gc_list
= &rt
->dst
;
1097 spin_unlock_bh(&icmp6_dst_lock
);
1099 fib6_force_start_gc(net
);
1105 int icmp6_dst_gc(void)
1107 struct dst_entry
*dst
, **pprev
;
1110 spin_lock_bh(&icmp6_dst_lock
);
1111 pprev
= &icmp6_dst_gc_list
;
1113 while ((dst
= *pprev
) != NULL
) {
1114 if (!atomic_read(&dst
->__refcnt
)) {
1123 spin_unlock_bh(&icmp6_dst_lock
);
1128 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1131 struct dst_entry
*dst
, **pprev
;
1133 spin_lock_bh(&icmp6_dst_lock
);
1134 pprev
= &icmp6_dst_gc_list
;
1135 while ((dst
= *pprev
) != NULL
) {
1136 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1137 if (func(rt
, arg
)) {
1144 spin_unlock_bh(&icmp6_dst_lock
);
1147 static int ip6_dst_gc(struct dst_ops
*ops
)
1149 unsigned long now
= jiffies
;
1150 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1151 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1152 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1153 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1154 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1155 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1158 entries
= dst_entries_get_fast(ops
);
1159 if (time_after(rt_last_gc
+ rt_min_interval
, now
) &&
1160 entries
<= rt_max_size
)
1163 net
->ipv6
.ip6_rt_gc_expire
++;
1164 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
);
1165 net
->ipv6
.ip6_rt_last_gc
= now
;
1166 entries
= dst_entries_get_slow(ops
);
1167 if (entries
< ops
->gc_thresh
)
1168 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1170 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1171 return entries
> rt_max_size
;
1174 /* Clean host part of a prefix. Not necessary in radix tree,
1175 but results in cleaner routing tables.
1177 Remove it only when all the things will work!
1180 int ip6_dst_hoplimit(struct dst_entry
*dst
)
1182 int hoplimit
= dst_metric_raw(dst
, RTAX_HOPLIMIT
);
1183 if (hoplimit
== 0) {
1184 struct net_device
*dev
= dst
->dev
;
1185 struct inet6_dev
*idev
;
1188 idev
= __in6_dev_get(dev
);
1190 hoplimit
= idev
->cnf
.hop_limit
;
1192 hoplimit
= dev_net(dev
)->ipv6
.devconf_all
->hop_limit
;
1197 EXPORT_SYMBOL(ip6_dst_hoplimit
);
1203 int ip6_route_add(struct fib6_config
*cfg
)
1206 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1207 struct rt6_info
*rt
= NULL
;
1208 struct net_device
*dev
= NULL
;
1209 struct inet6_dev
*idev
= NULL
;
1210 struct fib6_table
*table
;
1213 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1215 #ifndef CONFIG_IPV6_SUBTREES
1216 if (cfg
->fc_src_len
)
1219 if (cfg
->fc_ifindex
) {
1221 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1224 idev
= in6_dev_get(dev
);
1229 if (cfg
->fc_metric
== 0)
1230 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1232 table
= fib6_new_table(net
, cfg
->fc_table
);
1233 if (table
== NULL
) {
1238 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, NULL
, DST_NOCOUNT
);
1245 rt
->dst
.obsolete
= -1;
1246 rt
->rt6i_expires
= (cfg
->fc_flags
& RTF_EXPIRES
) ?
1247 jiffies
+ clock_t_to_jiffies(cfg
->fc_expires
) :
1250 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1251 cfg
->fc_protocol
= RTPROT_BOOT
;
1252 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1254 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1256 if (addr_type
& IPV6_ADDR_MULTICAST
)
1257 rt
->dst
.input
= ip6_mc_input
;
1258 else if (cfg
->fc_flags
& RTF_LOCAL
)
1259 rt
->dst
.input
= ip6_input
;
1261 rt
->dst
.input
= ip6_forward
;
1263 rt
->dst
.output
= ip6_output
;
1265 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1266 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1267 if (rt
->rt6i_dst
.plen
== 128)
1268 rt
->dst
.flags
|= DST_HOST
;
1270 if (!(rt
->dst
.flags
& DST_HOST
) && cfg
->fc_mx
) {
1271 u32
*metrics
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1276 dst_init_metrics(&rt
->dst
, metrics
, 0);
1278 #ifdef CONFIG_IPV6_SUBTREES
1279 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1280 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1283 rt
->rt6i_metric
= cfg
->fc_metric
;
1285 /* We cannot add true routes via loopback here,
1286 they would result in kernel looping; promote them to reject routes
1288 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1289 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
)
1290 && !(cfg
->fc_flags
&RTF_LOCAL
))) {
1291 /* hold loopback dev/idev if we haven't done so. */
1292 if (dev
!= net
->loopback_dev
) {
1297 dev
= net
->loopback_dev
;
1299 idev
= in6_dev_get(dev
);
1305 rt
->dst
.output
= ip6_pkt_discard_out
;
1306 rt
->dst
.input
= ip6_pkt_discard
;
1307 rt
->dst
.error
= -ENETUNREACH
;
1308 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1312 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1313 const struct in6_addr
*gw_addr
;
1316 gw_addr
= &cfg
->fc_gateway
;
1317 ipv6_addr_copy(&rt
->rt6i_gateway
, gw_addr
);
1318 gwa_type
= ipv6_addr_type(gw_addr
);
1320 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1321 struct rt6_info
*grt
;
1323 /* IPv6 strictly inhibits using not link-local
1324 addresses as nexthop address.
1325 Otherwise, router will not able to send redirects.
1326 It is very good, but in some (rare!) circumstances
1327 (SIT, PtP, NBMA NOARP links) it is handy to allow
1328 some exceptions. --ANK
1331 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
1334 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1336 err
= -EHOSTUNREACH
;
1340 if (dev
!= grt
->rt6i_dev
) {
1341 dst_release(&grt
->dst
);
1345 dev
= grt
->rt6i_dev
;
1346 idev
= grt
->rt6i_idev
;
1348 in6_dev_hold(grt
->rt6i_idev
);
1350 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
1352 dst_release(&grt
->dst
);
1358 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
1366 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
1367 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
1371 ipv6_addr_copy(&rt
->rt6i_prefsrc
.addr
, &cfg
->fc_prefsrc
);
1372 rt
->rt6i_prefsrc
.plen
= 128;
1374 rt
->rt6i_prefsrc
.plen
= 0;
1376 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1377 struct neighbour
*n
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
1382 dst_set_neighbour(&rt
->dst
, n
);
1385 rt
->rt6i_flags
= cfg
->fc_flags
;
1392 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1393 int type
= nla_type(nla
);
1396 if (type
> RTAX_MAX
) {
1401 dst_metric_set(&rt
->dst
, type
, nla_get_u32(nla
));
1407 rt
->rt6i_idev
= idev
;
1408 rt
->rt6i_table
= table
;
1410 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
1412 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1424 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1427 struct fib6_table
*table
;
1428 struct net
*net
= dev_net(rt
->rt6i_dev
);
1430 if (rt
== net
->ipv6
.ip6_null_entry
)
1433 table
= rt
->rt6i_table
;
1434 write_lock_bh(&table
->tb6_lock
);
1436 err
= fib6_del(rt
, info
);
1437 dst_release(&rt
->dst
);
1439 write_unlock_bh(&table
->tb6_lock
);
1444 int ip6_del_rt(struct rt6_info
*rt
)
1446 struct nl_info info
= {
1447 .nl_net
= dev_net(rt
->rt6i_dev
),
1449 return __ip6_del_rt(rt
, &info
);
1452 static int ip6_route_del(struct fib6_config
*cfg
)
1454 struct fib6_table
*table
;
1455 struct fib6_node
*fn
;
1456 struct rt6_info
*rt
;
1459 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
1463 read_lock_bh(&table
->tb6_lock
);
1465 fn
= fib6_locate(&table
->tb6_root
,
1466 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1467 &cfg
->fc_src
, cfg
->fc_src_len
);
1470 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1471 if (cfg
->fc_ifindex
&&
1472 (rt
->rt6i_dev
== NULL
||
1473 rt
->rt6i_dev
->ifindex
!= cfg
->fc_ifindex
))
1475 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1476 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1478 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1481 read_unlock_bh(&table
->tb6_lock
);
1483 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1486 read_unlock_bh(&table
->tb6_lock
);
1494 struct ip6rd_flowi
{
1496 struct in6_addr gateway
;
1499 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1500 struct fib6_table
*table
,
1504 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1505 struct rt6_info
*rt
;
1506 struct fib6_node
*fn
;
1509 * Get the "current" route for this destination and
1510 * check if the redirect has come from approriate router.
1512 * RFC 2461 specifies that redirects should only be
1513 * accepted if they come from the nexthop to the target.
1514 * Due to the way the routes are chosen, this notion
1515 * is a bit fuzzy and one might need to check all possible
1519 read_lock_bh(&table
->tb6_lock
);
1520 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1522 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1524 * Current route is on-link; redirect is always invalid.
1526 * Seems, previous statement is not true. It could
1527 * be node, which looks for us as on-link (f.e. proxy ndisc)
1528 * But then router serving it might decide, that we should
1529 * know truth 8)8) --ANK (980726).
1531 if (rt6_check_expired(rt
))
1533 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1535 if (fl6
->flowi6_oif
!= rt
->rt6i_dev
->ifindex
)
1537 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1543 rt
= net
->ipv6
.ip6_null_entry
;
1544 BACKTRACK(net
, &fl6
->saddr
);
1548 read_unlock_bh(&table
->tb6_lock
);
1553 static struct rt6_info
*ip6_route_redirect(const struct in6_addr
*dest
,
1554 const struct in6_addr
*src
,
1555 const struct in6_addr
*gateway
,
1556 struct net_device
*dev
)
1558 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1559 struct net
*net
= dev_net(dev
);
1560 struct ip6rd_flowi rdfl
= {
1562 .flowi6_oif
= dev
->ifindex
,
1568 ipv6_addr_copy(&rdfl
.gateway
, gateway
);
1570 if (rt6_need_strict(dest
))
1571 flags
|= RT6_LOOKUP_F_IFACE
;
1573 return (struct rt6_info
*)fib6_rule_lookup(net
, &rdfl
.fl6
,
1574 flags
, __ip6_route_redirect
);
1577 void rt6_redirect(const struct in6_addr
*dest
, const struct in6_addr
*src
,
1578 const struct in6_addr
*saddr
,
1579 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1581 struct rt6_info
*rt
, *nrt
= NULL
;
1582 struct netevent_redirect netevent
;
1583 struct net
*net
= dev_net(neigh
->dev
);
1585 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1587 if (rt
== net
->ipv6
.ip6_null_entry
) {
1588 if (net_ratelimit())
1589 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1590 "for redirect target\n");
1595 * We have finally decided to accept it.
1598 neigh_update(neigh
, lladdr
, NUD_STALE
,
1599 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1600 NEIGH_UPDATE_F_OVERRIDE
|
1601 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1602 NEIGH_UPDATE_F_ISROUTER
))
1606 * Redirect received -> path was valid.
1607 * Look, redirects are sent only in response to data packets,
1608 * so that this nexthop apparently is reachable. --ANK
1610 dst_confirm(&rt
->dst
);
1612 /* Duplicate redirect: silently ignore. */
1613 if (neigh
== dst_get_neighbour_raw(&rt
->dst
))
1616 nrt
= ip6_rt_copy(rt
, dest
);
1620 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1622 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1624 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1625 dst_set_neighbour(&nrt
->dst
, neigh_clone(neigh
));
1627 if (ip6_ins_rt(nrt
))
1630 netevent
.old
= &rt
->dst
;
1631 netevent
.new = &nrt
->dst
;
1632 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1634 if (rt
->rt6i_flags
&RTF_CACHE
) {
1640 dst_release(&rt
->dst
);
1644 * Handle ICMP "packet too big" messages
1645 * i.e. Path MTU discovery
1648 static void rt6_do_pmtu_disc(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1649 struct net
*net
, u32 pmtu
, int ifindex
)
1651 struct rt6_info
*rt
, *nrt
;
1654 rt
= rt6_lookup(net
, daddr
, saddr
, ifindex
, 0);
1658 if (rt6_check_expired(rt
)) {
1663 if (pmtu
>= dst_mtu(&rt
->dst
))
1666 if (pmtu
< IPV6_MIN_MTU
) {
1668 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1669 * MTU (1280) and a fragment header should always be included
1670 * after a node receiving Too Big message reporting PMTU is
1671 * less than the IPv6 Minimum Link MTU.
1673 pmtu
= IPV6_MIN_MTU
;
1677 /* New mtu received -> path was valid.
1678 They are sent only in response to data packets,
1679 so that this nexthop apparently is reachable. --ANK
1681 dst_confirm(&rt
->dst
);
1683 /* Host route. If it is static, it would be better
1684 not to override it, but add new one, so that
1685 when cache entry will expire old pmtu
1686 would return automatically.
1688 if (rt
->rt6i_flags
& RTF_CACHE
) {
1689 dst_metric_set(&rt
->dst
, RTAX_MTU
, pmtu
);
1691 u32 features
= dst_metric(&rt
->dst
, RTAX_FEATURES
);
1692 features
|= RTAX_FEATURE_ALLFRAG
;
1693 dst_metric_set(&rt
->dst
, RTAX_FEATURES
, features
);
1695 dst_set_expires(&rt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1696 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1701 Two cases are possible:
1702 1. It is connected route. Action: COW
1703 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1705 if (!dst_get_neighbour_raw(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1706 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1708 nrt
= rt6_alloc_clone(rt
, daddr
);
1711 dst_metric_set(&nrt
->dst
, RTAX_MTU
, pmtu
);
1713 u32 features
= dst_metric(&nrt
->dst
, RTAX_FEATURES
);
1714 features
|= RTAX_FEATURE_ALLFRAG
;
1715 dst_metric_set(&nrt
->dst
, RTAX_FEATURES
, features
);
1718 /* According to RFC 1981, detecting PMTU increase shouldn't be
1719 * happened within 5 mins, the recommended timer is 10 mins.
1720 * Here this route expiration time is set to ip6_rt_mtu_expires
1721 * which is 10 mins. After 10 mins the decreased pmtu is expired
1722 * and detecting PMTU increase will be automatically happened.
1724 dst_set_expires(&nrt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1725 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1730 dst_release(&rt
->dst
);
1733 void rt6_pmtu_discovery(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1734 struct net_device
*dev
, u32 pmtu
)
1736 struct net
*net
= dev_net(dev
);
1739 * RFC 1981 states that a node "MUST reduce the size of the packets it
1740 * is sending along the path" that caused the Packet Too Big message.
1741 * Since it's not possible in the general case to determine which
1742 * interface was used to send the original packet, we update the MTU
1743 * on the interface that will be used to send future packets. We also
1744 * update the MTU on the interface that received the Packet Too Big in
1745 * case the original packet was forced out that interface with
1746 * SO_BINDTODEVICE or similar. This is the next best thing to the
1747 * correct behaviour, which would be to update the MTU on all
1750 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, 0);
1751 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, dev
->ifindex
);
1755 * Misc support functions
1758 static struct rt6_info
*ip6_rt_copy(const struct rt6_info
*ort
,
1759 const struct in6_addr
*dest
)
1761 struct net
*net
= dev_net(ort
->rt6i_dev
);
1762 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
1766 rt
->dst
.input
= ort
->dst
.input
;
1767 rt
->dst
.output
= ort
->dst
.output
;
1768 rt
->dst
.flags
|= DST_HOST
;
1770 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, dest
);
1771 rt
->rt6i_dst
.plen
= 128;
1772 dst_copy_metrics(&rt
->dst
, &ort
->dst
);
1773 rt
->dst
.error
= ort
->dst
.error
;
1774 rt
->rt6i_idev
= ort
->rt6i_idev
;
1776 in6_dev_hold(rt
->rt6i_idev
);
1777 rt
->dst
.lastuse
= jiffies
;
1778 rt
->rt6i_expires
= 0;
1780 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1781 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1782 rt
->rt6i_metric
= 0;
1784 #ifdef CONFIG_IPV6_SUBTREES
1785 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1787 memcpy(&rt
->rt6i_prefsrc
, &ort
->rt6i_prefsrc
, sizeof(struct rt6key
));
1788 rt
->rt6i_table
= ort
->rt6i_table
;
1793 #ifdef CONFIG_IPV6_ROUTE_INFO
1794 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
1795 const struct in6_addr
*prefix
, int prefixlen
,
1796 const struct in6_addr
*gwaddr
, int ifindex
)
1798 struct fib6_node
*fn
;
1799 struct rt6_info
*rt
= NULL
;
1800 struct fib6_table
*table
;
1802 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
1806 write_lock_bh(&table
->tb6_lock
);
1807 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1811 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1812 if (rt
->rt6i_dev
->ifindex
!= ifindex
)
1814 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1816 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1822 write_unlock_bh(&table
->tb6_lock
);
1826 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
1827 const struct in6_addr
*prefix
, int prefixlen
,
1828 const struct in6_addr
*gwaddr
, int ifindex
,
1831 struct fib6_config cfg
= {
1832 .fc_table
= RT6_TABLE_INFO
,
1833 .fc_metric
= IP6_RT_PRIO_USER
,
1834 .fc_ifindex
= ifindex
,
1835 .fc_dst_len
= prefixlen
,
1836 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1837 RTF_UP
| RTF_PREF(pref
),
1839 .fc_nlinfo
.nlh
= NULL
,
1840 .fc_nlinfo
.nl_net
= net
,
1843 ipv6_addr_copy(&cfg
.fc_dst
, prefix
);
1844 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1846 /* We should treat it as a default route if prefix length is 0. */
1848 cfg
.fc_flags
|= RTF_DEFAULT
;
1850 ip6_route_add(&cfg
);
1852 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
1856 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
1858 struct rt6_info
*rt
;
1859 struct fib6_table
*table
;
1861 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
1865 write_lock_bh(&table
->tb6_lock
);
1866 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->dst
.rt6_next
) {
1867 if (dev
== rt
->rt6i_dev
&&
1868 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1869 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1874 write_unlock_bh(&table
->tb6_lock
);
1878 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
1879 struct net_device
*dev
,
1882 struct fib6_config cfg
= {
1883 .fc_table
= RT6_TABLE_DFLT
,
1884 .fc_metric
= IP6_RT_PRIO_USER
,
1885 .fc_ifindex
= dev
->ifindex
,
1886 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1887 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1889 .fc_nlinfo
.nlh
= NULL
,
1890 .fc_nlinfo
.nl_net
= dev_net(dev
),
1893 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1895 ip6_route_add(&cfg
);
1897 return rt6_get_dflt_router(gwaddr
, dev
);
1900 void rt6_purge_dflt_routers(struct net
*net
)
1902 struct rt6_info
*rt
;
1903 struct fib6_table
*table
;
1905 /* NOTE: Keep consistent with rt6_get_dflt_router */
1906 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
1911 read_lock_bh(&table
->tb6_lock
);
1912 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1913 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1915 read_unlock_bh(&table
->tb6_lock
);
1920 read_unlock_bh(&table
->tb6_lock
);
1923 static void rtmsg_to_fib6_config(struct net
*net
,
1924 struct in6_rtmsg
*rtmsg
,
1925 struct fib6_config
*cfg
)
1927 memset(cfg
, 0, sizeof(*cfg
));
1929 cfg
->fc_table
= RT6_TABLE_MAIN
;
1930 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1931 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1932 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1933 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1934 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1935 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1937 cfg
->fc_nlinfo
.nl_net
= net
;
1939 ipv6_addr_copy(&cfg
->fc_dst
, &rtmsg
->rtmsg_dst
);
1940 ipv6_addr_copy(&cfg
->fc_src
, &rtmsg
->rtmsg_src
);
1941 ipv6_addr_copy(&cfg
->fc_gateway
, &rtmsg
->rtmsg_gateway
);
1944 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
1946 struct fib6_config cfg
;
1947 struct in6_rtmsg rtmsg
;
1951 case SIOCADDRT
: /* Add a route */
1952 case SIOCDELRT
: /* Delete a route */
1953 if (!capable(CAP_NET_ADMIN
))
1955 err
= copy_from_user(&rtmsg
, arg
,
1956 sizeof(struct in6_rtmsg
));
1960 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
1965 err
= ip6_route_add(&cfg
);
1968 err
= ip6_route_del(&cfg
);
1982 * Drop the packet on the floor
1985 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
1988 struct dst_entry
*dst
= skb_dst(skb
);
1989 switch (ipstats_mib_noroutes
) {
1990 case IPSTATS_MIB_INNOROUTES
:
1991 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
1992 if (type
== IPV6_ADDR_ANY
) {
1993 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
1994 IPSTATS_MIB_INADDRERRORS
);
1998 case IPSTATS_MIB_OUTNOROUTES
:
1999 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2000 ipstats_mib_noroutes
);
2003 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2008 static int ip6_pkt_discard(struct sk_buff
*skb
)
2010 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2013 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
2015 skb
->dev
= skb_dst(skb
)->dev
;
2016 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2019 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2021 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2023 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2026 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
2028 skb
->dev
= skb_dst(skb
)->dev
;
2029 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2035 * Allocate a dst for local (unicast / anycast) address.
2038 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2039 const struct in6_addr
*addr
,
2042 struct net
*net
= dev_net(idev
->dev
);
2043 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
2044 net
->loopback_dev
, 0);
2045 struct neighbour
*neigh
;
2048 if (net_ratelimit())
2049 pr_warning("IPv6: Maximum number of routes reached,"
2050 " consider increasing route/max_size.\n");
2051 return ERR_PTR(-ENOMEM
);
2056 rt
->dst
.flags
|= DST_HOST
;
2057 rt
->dst
.input
= ip6_input
;
2058 rt
->dst
.output
= ip6_output
;
2059 rt
->rt6i_idev
= idev
;
2060 rt
->dst
.obsolete
= -1;
2062 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2064 rt
->rt6i_flags
|= RTF_ANYCAST
;
2066 rt
->rt6i_flags
|= RTF_LOCAL
;
2067 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
2068 if (IS_ERR(neigh
)) {
2071 return ERR_CAST(neigh
);
2073 dst_set_neighbour(&rt
->dst
, neigh
);
2075 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
2076 rt
->rt6i_dst
.plen
= 128;
2077 rt
->rt6i_table
= fib6_get_table(net
, RT6_TABLE_LOCAL
);
2079 atomic_set(&rt
->dst
.__refcnt
, 1);
2084 int ip6_route_get_saddr(struct net
*net
,
2085 struct rt6_info
*rt
,
2086 const struct in6_addr
*daddr
,
2088 struct in6_addr
*saddr
)
2090 struct inet6_dev
*idev
= ip6_dst_idev((struct dst_entry
*)rt
);
2092 if (rt
->rt6i_prefsrc
.plen
)
2093 ipv6_addr_copy(saddr
, &rt
->rt6i_prefsrc
.addr
);
2095 err
= ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2096 daddr
, prefs
, saddr
);
2100 /* remove deleted ip from prefsrc entries */
2101 struct arg_dev_net_ip
{
2102 struct net_device
*dev
;
2104 struct in6_addr
*addr
;
2107 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2109 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2110 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2111 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2113 if (((void *)rt
->rt6i_dev
== dev
|| dev
== NULL
) &&
2114 rt
!= net
->ipv6
.ip6_null_entry
&&
2115 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2116 /* remove prefsrc entry */
2117 rt
->rt6i_prefsrc
.plen
= 0;
2122 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2124 struct net
*net
= dev_net(ifp
->idev
->dev
);
2125 struct arg_dev_net_ip adni
= {
2126 .dev
= ifp
->idev
->dev
,
2130 fib6_clean_all(net
, fib6_remove_prefsrc
, 0, &adni
);
2133 struct arg_dev_net
{
2134 struct net_device
*dev
;
2138 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2140 const struct arg_dev_net
*adn
= arg
;
2141 const struct net_device
*dev
= adn
->dev
;
2143 if ((rt
->rt6i_dev
== dev
|| dev
== NULL
) &&
2144 rt
!= adn
->net
->ipv6
.ip6_null_entry
) {
2145 RT6_TRACE("deleted by ifdown %p\n", rt
);
2151 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2153 struct arg_dev_net adn
= {
2158 fib6_clean_all(net
, fib6_ifdown
, 0, &adn
);
2159 icmp6_clean_all(fib6_ifdown
, &adn
);
2162 struct rt6_mtu_change_arg
2164 struct net_device
*dev
;
2168 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2170 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2171 struct inet6_dev
*idev
;
2173 /* In IPv6 pmtu discovery is not optional,
2174 so that RTAX_MTU lock cannot disable it.
2175 We still use this lock to block changes
2176 caused by addrconf/ndisc.
2179 idev
= __in6_dev_get(arg
->dev
);
2183 /* For administrative MTU increase, there is no way to discover
2184 IPv6 PMTU increase, so PMTU increase should be updated here.
2185 Since RFC 1981 doesn't include administrative MTU increase
2186 update PMTU increase is a MUST. (i.e. jumbo frame)
2189 If new MTU is less than route PMTU, this new MTU will be the
2190 lowest MTU in the path, update the route PMTU to reflect PMTU
2191 decreases; if new MTU is greater than route PMTU, and the
2192 old MTU is the lowest MTU in the path, update the route PMTU
2193 to reflect the increase. In this case if the other nodes' MTU
2194 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2197 if (rt
->rt6i_dev
== arg
->dev
&&
2198 !dst_metric_locked(&rt
->dst
, RTAX_MTU
) &&
2199 (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2200 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2201 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
))) {
2202 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2207 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
2209 struct rt6_mtu_change_arg arg
= {
2214 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, 0, &arg
);
2217 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2218 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2219 [RTA_OIF
] = { .type
= NLA_U32
},
2220 [RTA_IIF
] = { .type
= NLA_U32
},
2221 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2222 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2225 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2226 struct fib6_config
*cfg
)
2229 struct nlattr
*tb
[RTA_MAX
+1];
2232 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2237 rtm
= nlmsg_data(nlh
);
2238 memset(cfg
, 0, sizeof(*cfg
));
2240 cfg
->fc_table
= rtm
->rtm_table
;
2241 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2242 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2243 cfg
->fc_flags
= RTF_UP
;
2244 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2246 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2247 cfg
->fc_flags
|= RTF_REJECT
;
2249 if (rtm
->rtm_type
== RTN_LOCAL
)
2250 cfg
->fc_flags
|= RTF_LOCAL
;
2252 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2253 cfg
->fc_nlinfo
.nlh
= nlh
;
2254 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2256 if (tb
[RTA_GATEWAY
]) {
2257 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2258 cfg
->fc_flags
|= RTF_GATEWAY
;
2262 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2264 if (nla_len(tb
[RTA_DST
]) < plen
)
2267 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2271 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2273 if (nla_len(tb
[RTA_SRC
]) < plen
)
2276 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2279 if (tb
[RTA_PREFSRC
])
2280 nla_memcpy(&cfg
->fc_prefsrc
, tb
[RTA_PREFSRC
], 16);
2283 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2285 if (tb
[RTA_PRIORITY
])
2286 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2288 if (tb
[RTA_METRICS
]) {
2289 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2290 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2294 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2301 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2303 struct fib6_config cfg
;
2306 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2310 return ip6_route_del(&cfg
);
2313 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2315 struct fib6_config cfg
;
2318 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2322 return ip6_route_add(&cfg
);
2325 static inline size_t rt6_nlmsg_size(void)
2327 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2328 + nla_total_size(16) /* RTA_SRC */
2329 + nla_total_size(16) /* RTA_DST */
2330 + nla_total_size(16) /* RTA_GATEWAY */
2331 + nla_total_size(16) /* RTA_PREFSRC */
2332 + nla_total_size(4) /* RTA_TABLE */
2333 + nla_total_size(4) /* RTA_IIF */
2334 + nla_total_size(4) /* RTA_OIF */
2335 + nla_total_size(4) /* RTA_PRIORITY */
2336 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2337 + nla_total_size(sizeof(struct rta_cacheinfo
));
2340 static int rt6_fill_node(struct net
*net
,
2341 struct sk_buff
*skb
, struct rt6_info
*rt
,
2342 struct in6_addr
*dst
, struct in6_addr
*src
,
2343 int iif
, int type
, u32 pid
, u32 seq
,
2344 int prefix
, int nowait
, unsigned int flags
)
2347 struct nlmsghdr
*nlh
;
2350 struct neighbour
*n
;
2352 if (prefix
) { /* user wants prefix routes only */
2353 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2354 /* success since this is not a prefix route */
2359 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2363 rtm
= nlmsg_data(nlh
);
2364 rtm
->rtm_family
= AF_INET6
;
2365 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2366 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2369 table
= rt
->rt6i_table
->tb6_id
;
2371 table
= RT6_TABLE_UNSPEC
;
2372 rtm
->rtm_table
= table
;
2373 NLA_PUT_U32(skb
, RTA_TABLE
, table
);
2374 if (rt
->rt6i_flags
&RTF_REJECT
)
2375 rtm
->rtm_type
= RTN_UNREACHABLE
;
2376 else if (rt
->rt6i_flags
&RTF_LOCAL
)
2377 rtm
->rtm_type
= RTN_LOCAL
;
2378 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
2379 rtm
->rtm_type
= RTN_LOCAL
;
2381 rtm
->rtm_type
= RTN_UNICAST
;
2383 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2384 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2385 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
2386 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2387 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2388 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2389 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
2390 rtm
->rtm_protocol
= RTPROT_RA
;
2392 if (rt
->rt6i_flags
&RTF_CACHE
)
2393 rtm
->rtm_flags
|= RTM_F_CLONED
;
2396 NLA_PUT(skb
, RTA_DST
, 16, dst
);
2397 rtm
->rtm_dst_len
= 128;
2398 } else if (rtm
->rtm_dst_len
)
2399 NLA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
2400 #ifdef CONFIG_IPV6_SUBTREES
2402 NLA_PUT(skb
, RTA_SRC
, 16, src
);
2403 rtm
->rtm_src_len
= 128;
2404 } else if (rtm
->rtm_src_len
)
2405 NLA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
2408 #ifdef CONFIG_IPV6_MROUTE
2409 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2410 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
2415 goto nla_put_failure
;
2417 if (err
== -EMSGSIZE
)
2418 goto nla_put_failure
;
2423 NLA_PUT_U32(skb
, RTA_IIF
, iif
);
2425 struct in6_addr saddr_buf
;
2426 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0)
2427 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2430 if (rt
->rt6i_prefsrc
.plen
) {
2431 struct in6_addr saddr_buf
;
2432 ipv6_addr_copy(&saddr_buf
, &rt
->rt6i_prefsrc
.addr
);
2433 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2436 if (rtnetlink_put_metrics(skb
, dst_metrics_ptr(&rt
->dst
)) < 0)
2437 goto nla_put_failure
;
2440 n
= dst_get_neighbour(&rt
->dst
);
2442 NLA_PUT(skb
, RTA_GATEWAY
, 16, &n
->primary_key
);
2446 NLA_PUT_U32(skb
, RTA_OIF
, rt
->rt6i_dev
->ifindex
);
2448 NLA_PUT_U32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
);
2450 if (!(rt
->rt6i_flags
& RTF_EXPIRES
))
2452 else if (rt
->rt6i_expires
- jiffies
< INT_MAX
)
2453 expires
= rt
->rt6i_expires
- jiffies
;
2457 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, 0, 0,
2458 expires
, rt
->dst
.error
) < 0)
2459 goto nla_put_failure
;
2461 return nlmsg_end(skb
, nlh
);
2464 nlmsg_cancel(skb
, nlh
);
2468 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2470 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2473 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2474 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2475 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2479 return rt6_fill_node(arg
->net
,
2480 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2481 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2482 prefix
, 0, NLM_F_MULTI
);
2485 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2487 struct net
*net
= sock_net(in_skb
->sk
);
2488 struct nlattr
*tb
[RTA_MAX
+1];
2489 struct rt6_info
*rt
;
2490 struct sk_buff
*skb
;
2495 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2500 memset(&fl6
, 0, sizeof(fl6
));
2503 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2506 ipv6_addr_copy(&fl6
.saddr
, nla_data(tb
[RTA_SRC
]));
2510 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2513 ipv6_addr_copy(&fl6
.daddr
, nla_data(tb
[RTA_DST
]));
2517 iif
= nla_get_u32(tb
[RTA_IIF
]);
2520 fl6
.flowi6_oif
= nla_get_u32(tb
[RTA_OIF
]);
2523 struct net_device
*dev
;
2524 dev
= __dev_get_by_index(net
, iif
);
2531 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2537 /* Reserve room for dummy headers, this skb can pass
2538 through good chunk of routing engine.
2540 skb_reset_mac_header(skb
);
2541 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2543 rt
= (struct rt6_info
*) ip6_route_output(net
, NULL
, &fl6
);
2544 skb_dst_set(skb
, &rt
->dst
);
2546 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
2547 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2548 nlh
->nlmsg_seq
, 0, 0, 0);
2554 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).pid
);
2559 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2561 struct sk_buff
*skb
;
2562 struct net
*net
= info
->nl_net
;
2567 seq
= info
->nlh
!= NULL
? info
->nlh
->nlmsg_seq
: 0;
2569 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2573 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
2574 event
, info
->pid
, seq
, 0, 0, 0);
2576 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2577 WARN_ON(err
== -EMSGSIZE
);
2581 rtnl_notify(skb
, net
, info
->pid
, RTNLGRP_IPV6_ROUTE
,
2582 info
->nlh
, gfp_any());
2586 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
2589 static int ip6_route_dev_notify(struct notifier_block
*this,
2590 unsigned long event
, void *data
)
2592 struct net_device
*dev
= (struct net_device
*)data
;
2593 struct net
*net
= dev_net(dev
);
2595 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
2596 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
2597 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
2598 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2599 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
2600 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
2601 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
2602 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
2613 #ifdef CONFIG_PROC_FS
2624 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2626 struct seq_file
*m
= p_arg
;
2627 struct neighbour
*n
;
2629 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
2631 #ifdef CONFIG_IPV6_SUBTREES
2632 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
2634 seq_puts(m
, "00000000000000000000000000000000 00 ");
2637 n
= dst_get_neighbour(&rt
->dst
);
2639 seq_printf(m
, "%pi6", n
->primary_key
);
2641 seq_puts(m
, "00000000000000000000000000000000");
2644 seq_printf(m
, " %08x %08x %08x %08x %8s\n",
2645 rt
->rt6i_metric
, atomic_read(&rt
->dst
.__refcnt
),
2646 rt
->dst
.__use
, rt
->rt6i_flags
,
2647 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
2651 static int ipv6_route_show(struct seq_file
*m
, void *v
)
2653 struct net
*net
= (struct net
*)m
->private;
2654 fib6_clean_all(net
, rt6_info_route
, 0, m
);
2658 static int ipv6_route_open(struct inode
*inode
, struct file
*file
)
2660 return single_open_net(inode
, file
, ipv6_route_show
);
2663 static const struct file_operations ipv6_route_proc_fops
= {
2664 .owner
= THIS_MODULE
,
2665 .open
= ipv6_route_open
,
2667 .llseek
= seq_lseek
,
2668 .release
= single_release_net
,
2671 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2673 struct net
*net
= (struct net
*)seq
->private;
2674 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2675 net
->ipv6
.rt6_stats
->fib_nodes
,
2676 net
->ipv6
.rt6_stats
->fib_route_nodes
,
2677 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
2678 net
->ipv6
.rt6_stats
->fib_rt_entries
,
2679 net
->ipv6
.rt6_stats
->fib_rt_cache
,
2680 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
2681 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
2686 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2688 return single_open_net(inode
, file
, rt6_stats_seq_show
);
2691 static const struct file_operations rt6_stats_seq_fops
= {
2692 .owner
= THIS_MODULE
,
2693 .open
= rt6_stats_seq_open
,
2695 .llseek
= seq_lseek
,
2696 .release
= single_release_net
,
2698 #endif /* CONFIG_PROC_FS */
2700 #ifdef CONFIG_SYSCTL
2703 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
,
2704 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2711 net
= (struct net
*)ctl
->extra1
;
2712 delay
= net
->ipv6
.sysctl
.flush_delay
;
2713 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
2714 fib6_run_gc(delay
<= 0 ? ~0UL : (unsigned long)delay
, net
);
2718 ctl_table ipv6_route_table_template
[] = {
2720 .procname
= "flush",
2721 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
2722 .maxlen
= sizeof(int),
2724 .proc_handler
= ipv6_sysctl_rtcache_flush
2727 .procname
= "gc_thresh",
2728 .data
= &ip6_dst_ops_template
.gc_thresh
,
2729 .maxlen
= sizeof(int),
2731 .proc_handler
= proc_dointvec
,
2734 .procname
= "max_size",
2735 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
2736 .maxlen
= sizeof(int),
2738 .proc_handler
= proc_dointvec
,
2741 .procname
= "gc_min_interval",
2742 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2743 .maxlen
= sizeof(int),
2745 .proc_handler
= proc_dointvec_jiffies
,
2748 .procname
= "gc_timeout",
2749 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
2750 .maxlen
= sizeof(int),
2752 .proc_handler
= proc_dointvec_jiffies
,
2755 .procname
= "gc_interval",
2756 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
2757 .maxlen
= sizeof(int),
2759 .proc_handler
= proc_dointvec_jiffies
,
2762 .procname
= "gc_elasticity",
2763 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
2764 .maxlen
= sizeof(int),
2766 .proc_handler
= proc_dointvec
,
2769 .procname
= "mtu_expires",
2770 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
2771 .maxlen
= sizeof(int),
2773 .proc_handler
= proc_dointvec_jiffies
,
2776 .procname
= "min_adv_mss",
2777 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
2778 .maxlen
= sizeof(int),
2780 .proc_handler
= proc_dointvec
,
2783 .procname
= "gc_min_interval_ms",
2784 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2785 .maxlen
= sizeof(int),
2787 .proc_handler
= proc_dointvec_ms_jiffies
,
2792 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
2794 struct ctl_table
*table
;
2796 table
= kmemdup(ipv6_route_table_template
,
2797 sizeof(ipv6_route_table_template
),
2801 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
2802 table
[0].extra1
= net
;
2803 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
2804 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
2805 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2806 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
2807 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
2808 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
2809 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
2810 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
2811 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2818 static int __net_init
ip6_route_net_init(struct net
*net
)
2822 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
2823 sizeof(net
->ipv6
.ip6_dst_ops
));
2825 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
2826 goto out_ip6_dst_ops
;
2828 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
2829 sizeof(*net
->ipv6
.ip6_null_entry
),
2831 if (!net
->ipv6
.ip6_null_entry
)
2832 goto out_ip6_dst_entries
;
2833 net
->ipv6
.ip6_null_entry
->dst
.path
=
2834 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
2835 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2836 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
2837 ip6_template_metrics
, true);
2839 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2840 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
2841 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
2843 if (!net
->ipv6
.ip6_prohibit_entry
)
2844 goto out_ip6_null_entry
;
2845 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
2846 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
2847 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2848 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
2849 ip6_template_metrics
, true);
2851 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
2852 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
2854 if (!net
->ipv6
.ip6_blk_hole_entry
)
2855 goto out_ip6_prohibit_entry
;
2856 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
2857 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
2858 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2859 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
2860 ip6_template_metrics
, true);
2863 net
->ipv6
.sysctl
.flush_delay
= 0;
2864 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
2865 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
2866 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
2867 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
2868 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
2869 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
2870 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
2872 #ifdef CONFIG_PROC_FS
2873 proc_net_fops_create(net
, "ipv6_route", 0, &ipv6_route_proc_fops
);
2874 proc_net_fops_create(net
, "rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2876 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
2882 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2883 out_ip6_prohibit_entry
:
2884 kfree(net
->ipv6
.ip6_prohibit_entry
);
2886 kfree(net
->ipv6
.ip6_null_entry
);
2888 out_ip6_dst_entries
:
2889 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2894 static void __net_exit
ip6_route_net_exit(struct net
*net
)
2896 #ifdef CONFIG_PROC_FS
2897 proc_net_remove(net
, "ipv6_route");
2898 proc_net_remove(net
, "rt6_stats");
2900 kfree(net
->ipv6
.ip6_null_entry
);
2901 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 kfree(net
->ipv6
.ip6_prohibit_entry
);
2903 kfree(net
->ipv6
.ip6_blk_hole_entry
);
2905 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2908 static struct pernet_operations ip6_route_net_ops
= {
2909 .init
= ip6_route_net_init
,
2910 .exit
= ip6_route_net_exit
,
2913 static struct notifier_block ip6_route_dev_notifier
= {
2914 .notifier_call
= ip6_route_dev_notify
,
2918 int __init
ip6_route_init(void)
2923 ip6_dst_ops_template
.kmem_cachep
=
2924 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
2925 SLAB_HWCACHE_ALIGN
, NULL
);
2926 if (!ip6_dst_ops_template
.kmem_cachep
)
2929 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
2931 goto out_kmem_cache
;
2933 ret
= register_pernet_subsys(&ip6_route_net_ops
);
2935 goto out_dst_entries
;
2937 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
2939 /* Registering of the loopback is done before this portion of code,
2940 * the loopback reference in rt6_info will not be taken, do it
2941 * manually for init_net */
2942 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
2943 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2944 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2945 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
2946 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2947 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
2948 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2952 goto out_register_subsys
;
2958 ret
= fib6_rules_init();
2963 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
2964 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
2965 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
2966 goto fib6_rules_init
;
2968 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
2970 goto fib6_rules_init
;
2976 fib6_rules_cleanup();
2981 out_register_subsys
:
2982 unregister_pernet_subsys(&ip6_route_net_ops
);
2984 dst_entries_destroy(&ip6_dst_blackhole_ops
);
2986 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
2990 void ip6_route_cleanup(void)
2992 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
2993 fib6_rules_cleanup();
2996 unregister_pernet_subsys(&ip6_route_net_ops
);
2997 dst_entries_destroy(&ip6_dst_blackhole_ops
);
2998 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);