2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
76 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
77 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
78 static unsigned int ip6_default_mtu(const struct dst_entry
*dst
);
79 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
80 static void ip6_dst_destroy(struct dst_entry
*);
81 static void ip6_dst_ifdown(struct dst_entry
*,
82 struct net_device
*dev
, int how
);
83 static int ip6_dst_gc(struct dst_ops
*ops
);
85 static int ip6_pkt_discard(struct sk_buff
*skb
);
86 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
87 static void ip6_link_failure(struct sk_buff
*skb
);
88 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
92 const struct in6_addr
*prefix
, int prefixlen
,
93 const struct in6_addr
*gwaddr
, int ifindex
,
95 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
96 const struct in6_addr
*prefix
, int prefixlen
,
97 const struct in6_addr
*gwaddr
, int ifindex
);
100 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
102 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
103 struct inet_peer
*peer
;
107 rt6_bind_peer(rt
, 1);
109 peer
= rt
->rt6i_peer
;
111 u32
*old_p
= __DST_METRICS_PTR(old
);
112 unsigned long prev
, new;
115 if (inet_metrics_new(peer
))
116 memcpy(p
, old_p
, sizeof(u32
) * RTAX_MAX
);
118 new = (unsigned long) p
;
119 prev
= cmpxchg(&dst
->_metrics
, old
, new);
122 p
= __DST_METRICS_PTR(prev
);
123 if (prev
& DST_METRICS_READ_ONLY
)
130 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
, const void *daddr
)
132 return __neigh_lookup_errno(&nd_tbl
, daddr
, dst
->dev
);
135 static struct dst_ops ip6_dst_ops_template
= {
137 .protocol
= cpu_to_be16(ETH_P_IPV6
),
140 .check
= ip6_dst_check
,
141 .default_advmss
= ip6_default_advmss
,
142 .default_mtu
= ip6_default_mtu
,
143 .cow_metrics
= ipv6_cow_metrics
,
144 .destroy
= ip6_dst_destroy
,
145 .ifdown
= ip6_dst_ifdown
,
146 .negative_advice
= ip6_negative_advice
,
147 .link_failure
= ip6_link_failure
,
148 .update_pmtu
= ip6_rt_update_pmtu
,
149 .local_out
= __ip6_local_out
,
150 .neigh_lookup
= ip6_neigh_lookup
,
153 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry
*dst
)
158 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
162 static u32
*ip6_rt_blackhole_cow_metrics(struct dst_entry
*dst
,
168 static struct dst_ops ip6_dst_blackhole_ops
= {
170 .protocol
= cpu_to_be16(ETH_P_IPV6
),
171 .destroy
= ip6_dst_destroy
,
172 .check
= ip6_dst_check
,
173 .default_mtu
= ip6_blackhole_default_mtu
,
174 .default_advmss
= ip6_default_advmss
,
175 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
176 .cow_metrics
= ip6_rt_blackhole_cow_metrics
,
177 .neigh_lookup
= ip6_neigh_lookup
,
180 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
181 [RTAX_HOPLIMIT
- 1] = 255,
184 static struct rt6_info ip6_null_entry_template
= {
186 .__refcnt
= ATOMIC_INIT(1),
189 .error
= -ENETUNREACH
,
190 .input
= ip6_pkt_discard
,
191 .output
= ip6_pkt_discard_out
,
193 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
194 .rt6i_protocol
= RTPROT_KERNEL
,
195 .rt6i_metric
= ~(u32
) 0,
196 .rt6i_ref
= ATOMIC_INIT(1),
199 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
201 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
202 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
204 static struct rt6_info ip6_prohibit_entry_template
= {
206 .__refcnt
= ATOMIC_INIT(1),
210 .input
= ip6_pkt_prohibit
,
211 .output
= ip6_pkt_prohibit_out
,
213 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
214 .rt6i_protocol
= RTPROT_KERNEL
,
215 .rt6i_metric
= ~(u32
) 0,
216 .rt6i_ref
= ATOMIC_INIT(1),
219 static struct rt6_info ip6_blk_hole_entry_template
= {
221 .__refcnt
= ATOMIC_INIT(1),
225 .input
= dst_discard
,
226 .output
= dst_discard
,
228 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
229 .rt6i_protocol
= RTPROT_KERNEL
,
230 .rt6i_metric
= ~(u32
) 0,
231 .rt6i_ref
= ATOMIC_INIT(1),
236 /* allocate dst with ip6_dst_ops */
237 static inline struct rt6_info
*ip6_dst_alloc(struct dst_ops
*ops
,
238 struct net_device
*dev
,
241 struct rt6_info
*rt
= dst_alloc(ops
, dev
, 0, 0, flags
);
243 memset(&rt
->rt6i_table
, 0, sizeof(*rt
) - sizeof(struct dst_entry
));
248 static void ip6_dst_destroy(struct dst_entry
*dst
)
250 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
251 struct inet6_dev
*idev
= rt
->rt6i_idev
;
252 struct inet_peer
*peer
= rt
->rt6i_peer
;
255 rt
->rt6i_idev
= NULL
;
259 rt
->rt6i_peer
= NULL
;
264 static atomic_t __rt6_peer_genid
= ATOMIC_INIT(0);
266 static u32
rt6_peer_genid(void)
268 return atomic_read(&__rt6_peer_genid
);
271 void rt6_bind_peer(struct rt6_info
*rt
, int create
)
273 struct inet_peer
*peer
;
275 peer
= inet_getpeer_v6(&rt
->rt6i_dst
.addr
, create
);
276 if (peer
&& cmpxchg(&rt
->rt6i_peer
, NULL
, peer
) != NULL
)
279 rt
->rt6i_peer_genid
= rt6_peer_genid();
282 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
285 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
286 struct inet6_dev
*idev
= rt
->rt6i_idev
;
287 struct net_device
*loopback_dev
=
288 dev_net(dev
)->loopback_dev
;
290 if (dev
!= loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
291 struct inet6_dev
*loopback_idev
=
292 in6_dev_get(loopback_dev
);
293 if (loopback_idev
!= NULL
) {
294 rt
->rt6i_idev
= loopback_idev
;
300 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
302 return (rt
->rt6i_flags
& RTF_EXPIRES
) &&
303 time_after(jiffies
, rt
->rt6i_expires
);
306 static inline int rt6_need_strict(const struct in6_addr
*daddr
)
308 return ipv6_addr_type(daddr
) &
309 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
| IPV6_ADDR_LOOPBACK
);
313 * Route lookup. Any table->tb6_lock is implied.
316 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
318 const struct in6_addr
*saddr
,
322 struct rt6_info
*local
= NULL
;
323 struct rt6_info
*sprt
;
325 if (!oif
&& ipv6_addr_any(saddr
))
328 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
329 struct net_device
*dev
= sprt
->rt6i_dev
;
332 if (dev
->ifindex
== oif
)
334 if (dev
->flags
& IFF_LOOPBACK
) {
335 if (sprt
->rt6i_idev
== NULL
||
336 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
337 if (flags
& RT6_LOOKUP_F_IFACE
&& oif
)
339 if (local
&& (!oif
||
340 local
->rt6i_idev
->dev
->ifindex
== oif
))
346 if (ipv6_chk_addr(net
, saddr
, dev
,
347 flags
& RT6_LOOKUP_F_IFACE
))
356 if (flags
& RT6_LOOKUP_F_IFACE
)
357 return net
->ipv6
.ip6_null_entry
;
363 #ifdef CONFIG_IPV6_ROUTER_PREF
364 static void rt6_probe(struct rt6_info
*rt
)
366 struct neighbour
*neigh
= rt
? dst_get_neighbour(&rt
->dst
) : NULL
;
368 * Okay, this does not seem to be appropriate
369 * for now, however, we need to check if it
370 * is really so; aka Router Reachability Probing.
372 * Router Reachability Probe MUST be rate-limited
373 * to no more than one per minute.
375 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
377 read_lock_bh(&neigh
->lock
);
378 if (!(neigh
->nud_state
& NUD_VALID
) &&
379 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
380 struct in6_addr mcaddr
;
381 struct in6_addr
*target
;
383 neigh
->updated
= jiffies
;
384 read_unlock_bh(&neigh
->lock
);
386 target
= (struct in6_addr
*)&neigh
->primary_key
;
387 addrconf_addr_solict_mult(target
, &mcaddr
);
388 ndisc_send_ns(rt
->rt6i_dev
, NULL
, target
, &mcaddr
, NULL
);
390 read_unlock_bh(&neigh
->lock
);
393 static inline void rt6_probe(struct rt6_info
*rt
)
399 * Default Router Selection (RFC 2461 6.3.6)
401 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
403 struct net_device
*dev
= rt
->rt6i_dev
;
404 if (!oif
|| dev
->ifindex
== oif
)
406 if ((dev
->flags
& IFF_LOOPBACK
) &&
407 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
412 static inline int rt6_check_neigh(struct rt6_info
*rt
)
414 struct neighbour
*neigh
= dst_get_neighbour(&rt
->dst
);
416 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
417 !(rt
->rt6i_flags
& RTF_GATEWAY
))
420 read_lock_bh(&neigh
->lock
);
421 if (neigh
->nud_state
& NUD_VALID
)
423 #ifdef CONFIG_IPV6_ROUTER_PREF
424 else if (neigh
->nud_state
& NUD_FAILED
)
429 read_unlock_bh(&neigh
->lock
);
435 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
440 m
= rt6_check_dev(rt
, oif
);
441 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
443 #ifdef CONFIG_IPV6_ROUTER_PREF
444 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
446 n
= rt6_check_neigh(rt
);
447 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
452 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
453 int *mpri
, struct rt6_info
*match
)
457 if (rt6_check_expired(rt
))
460 m
= rt6_score_route(rt
, oif
, strict
);
465 if (strict
& RT6_LOOKUP_F_REACHABLE
)
469 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
477 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
478 struct rt6_info
*rr_head
,
479 u32 metric
, int oif
, int strict
)
481 struct rt6_info
*rt
, *match
;
485 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
486 rt
= rt
->dst
.rt6_next
)
487 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
488 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
489 rt
= rt
->dst
.rt6_next
)
490 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
495 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
497 struct rt6_info
*match
, *rt0
;
500 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
501 __func__
, fn
->leaf
, oif
);
505 fn
->rr_ptr
= rt0
= fn
->leaf
;
507 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
510 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
511 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
513 /* no entries matched; do round-robin */
514 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
521 RT6_TRACE("%s() => %p\n",
524 net
= dev_net(rt0
->rt6i_dev
);
525 return match
? match
: net
->ipv6
.ip6_null_entry
;
528 #ifdef CONFIG_IPV6_ROUTE_INFO
529 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
530 const struct in6_addr
*gwaddr
)
532 struct net
*net
= dev_net(dev
);
533 struct route_info
*rinfo
= (struct route_info
*) opt
;
534 struct in6_addr prefix_buf
, *prefix
;
536 unsigned long lifetime
;
539 if (len
< sizeof(struct route_info
)) {
543 /* Sanity check for prefix_len and length */
544 if (rinfo
->length
> 3) {
546 } else if (rinfo
->prefix_len
> 128) {
548 } else if (rinfo
->prefix_len
> 64) {
549 if (rinfo
->length
< 2) {
552 } else if (rinfo
->prefix_len
> 0) {
553 if (rinfo
->length
< 1) {
558 pref
= rinfo
->route_pref
;
559 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
562 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
564 if (rinfo
->length
== 3)
565 prefix
= (struct in6_addr
*)rinfo
->prefix
;
567 /* this function is safe */
568 ipv6_addr_prefix(&prefix_buf
,
569 (struct in6_addr
*)rinfo
->prefix
,
571 prefix
= &prefix_buf
;
574 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
577 if (rt
&& !lifetime
) {
583 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
586 rt
->rt6i_flags
= RTF_ROUTEINFO
|
587 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
590 if (!addrconf_finite_timeout(lifetime
)) {
591 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
593 rt
->rt6i_expires
= jiffies
+ HZ
* lifetime
;
594 rt
->rt6i_flags
|= RTF_EXPIRES
;
596 dst_release(&rt
->dst
);
602 #define BACKTRACK(__net, saddr) \
604 if (rt == __net->ipv6.ip6_null_entry) { \
605 struct fib6_node *pn; \
607 if (fn->fn_flags & RTN_TL_ROOT) \
610 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
611 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
614 if (fn->fn_flags & RTN_RTINFO) \
620 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
621 struct fib6_table
*table
,
622 struct flowi6
*fl6
, int flags
)
624 struct fib6_node
*fn
;
627 read_lock_bh(&table
->tb6_lock
);
628 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
631 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
632 BACKTRACK(net
, &fl6
->saddr
);
634 dst_use(&rt
->dst
, jiffies
);
635 read_unlock_bh(&table
->tb6_lock
);
640 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
641 const struct in6_addr
*saddr
, int oif
, int strict
)
643 struct flowi6 fl6
= {
647 struct dst_entry
*dst
;
648 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
651 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
652 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
655 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
657 return (struct rt6_info
*) dst
;
664 EXPORT_SYMBOL(rt6_lookup
);
666 /* ip6_ins_rt is called with FREE table->tb6_lock.
667 It takes new route entry, the addition fails by any reason the
668 route is freed. In any case, if caller does not hold it, it may
672 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
675 struct fib6_table
*table
;
677 table
= rt
->rt6i_table
;
678 write_lock_bh(&table
->tb6_lock
);
679 err
= fib6_add(&table
->tb6_root
, rt
, info
);
680 write_unlock_bh(&table
->tb6_lock
);
685 int ip6_ins_rt(struct rt6_info
*rt
)
687 struct nl_info info
= {
688 .nl_net
= dev_net(rt
->rt6i_dev
),
690 return __ip6_ins_rt(rt
, &info
);
693 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
, const struct in6_addr
*daddr
,
694 const struct in6_addr
*saddr
)
702 rt
= ip6_rt_copy(ort
);
705 struct neighbour
*neigh
;
706 int attempts
= !in_softirq();
708 if (!(rt
->rt6i_flags
&RTF_GATEWAY
)) {
709 if (rt
->rt6i_dst
.plen
!= 128 &&
710 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, daddr
))
711 rt
->rt6i_flags
|= RTF_ANYCAST
;
712 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
715 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
716 rt
->rt6i_dst
.plen
= 128;
717 rt
->rt6i_flags
|= RTF_CACHE
;
718 rt
->dst
.flags
|= DST_HOST
;
720 #ifdef CONFIG_IPV6_SUBTREES
721 if (rt
->rt6i_src
.plen
&& saddr
) {
722 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
723 rt
->rt6i_src
.plen
= 128;
728 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
730 struct net
*net
= dev_net(rt
->rt6i_dev
);
731 int saved_rt_min_interval
=
732 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
733 int saved_rt_elasticity
=
734 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
736 if (attempts
-- > 0) {
737 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 1;
738 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= 0;
740 ip6_dst_gc(&net
->ipv6
.ip6_dst_ops
);
742 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
=
744 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
=
745 saved_rt_min_interval
;
751 "ipv6: Neighbour table overflow.\n");
755 dst_set_neighbour(&rt
->dst
, neigh
);
762 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
, const struct in6_addr
*daddr
)
764 struct rt6_info
*rt
= ip6_rt_copy(ort
);
766 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
767 rt
->rt6i_dst
.plen
= 128;
768 rt
->rt6i_flags
|= RTF_CACHE
;
769 rt
->dst
.flags
|= DST_HOST
;
770 dst_set_neighbour(&rt
->dst
, neigh_clone(dst_get_neighbour(&ort
->dst
)));
775 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
776 struct flowi6
*fl6
, int flags
)
778 struct fib6_node
*fn
;
779 struct rt6_info
*rt
, *nrt
;
783 int reachable
= net
->ipv6
.devconf_all
->forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
785 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
788 read_lock_bh(&table
->tb6_lock
);
791 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
794 rt
= rt6_select(fn
, oif
, strict
| reachable
);
796 BACKTRACK(net
, &fl6
->saddr
);
797 if (rt
== net
->ipv6
.ip6_null_entry
||
798 rt
->rt6i_flags
& RTF_CACHE
)
802 read_unlock_bh(&table
->tb6_lock
);
804 if (!dst_get_neighbour(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
805 nrt
= rt6_alloc_cow(rt
, &fl6
->daddr
, &fl6
->saddr
);
806 else if (!(rt
->dst
.flags
& DST_HOST
))
807 nrt
= rt6_alloc_clone(rt
, &fl6
->daddr
);
811 dst_release(&rt
->dst
);
812 rt
= nrt
? : net
->ipv6
.ip6_null_entry
;
816 err
= ip6_ins_rt(nrt
);
825 * Race condition! In the gap, when table->tb6_lock was
826 * released someone could insert this route. Relookup.
828 dst_release(&rt
->dst
);
837 read_unlock_bh(&table
->tb6_lock
);
839 rt
->dst
.lastuse
= jiffies
;
845 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
846 struct flowi6
*fl6
, int flags
)
848 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
851 void ip6_route_input(struct sk_buff
*skb
)
853 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
854 struct net
*net
= dev_net(skb
->dev
);
855 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
856 struct flowi6 fl6
= {
857 .flowi6_iif
= skb
->dev
->ifindex
,
860 .flowlabel
= (* (__be32
*) iph
)&IPV6_FLOWINFO_MASK
,
861 .flowi6_mark
= skb
->mark
,
862 .flowi6_proto
= iph
->nexthdr
,
865 if (rt6_need_strict(&iph
->daddr
) && skb
->dev
->type
!= ARPHRD_PIMREG
)
866 flags
|= RT6_LOOKUP_F_IFACE
;
868 skb_dst_set(skb
, fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_input
));
871 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
872 struct flowi6
*fl6
, int flags
)
874 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
877 struct dst_entry
* ip6_route_output(struct net
*net
, const struct sock
*sk
,
882 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
))
883 flags
|= RT6_LOOKUP_F_IFACE
;
885 if (!ipv6_addr_any(&fl6
->saddr
))
886 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
888 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
890 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
893 EXPORT_SYMBOL(ip6_route_output
);
895 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
897 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
898 struct dst_entry
*new = NULL
;
900 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, 0, 0);
902 memset(&rt
->rt6i_table
, 0, sizeof(*rt
) - sizeof(struct dst_entry
));
907 new->input
= dst_discard
;
908 new->output
= dst_discard
;
910 dst_copy_metrics(new, &ort
->dst
);
911 rt
->rt6i_idev
= ort
->rt6i_idev
;
913 in6_dev_hold(rt
->rt6i_idev
);
914 rt
->rt6i_expires
= 0;
916 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
917 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
920 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
921 #ifdef CONFIG_IPV6_SUBTREES
922 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
928 dst_release(dst_orig
);
929 return new ? new : ERR_PTR(-ENOMEM
);
933 * Destination cache support functions
936 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
940 rt
= (struct rt6_info
*) dst
;
942 if (rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
)) {
943 if (rt
->rt6i_peer_genid
!= rt6_peer_genid()) {
945 rt6_bind_peer(rt
, 0);
946 rt
->rt6i_peer_genid
= rt6_peer_genid();
953 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
955 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
958 if (rt
->rt6i_flags
& RTF_CACHE
) {
959 if (rt6_check_expired(rt
)) {
971 static void ip6_link_failure(struct sk_buff
*skb
)
975 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
977 rt
= (struct rt6_info
*) skb_dst(skb
);
979 if (rt
->rt6i_flags
&RTF_CACHE
) {
980 dst_set_expires(&rt
->dst
, 0);
981 rt
->rt6i_flags
|= RTF_EXPIRES
;
982 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
983 rt
->rt6i_node
->fn_sernum
= -1;
987 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
989 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
991 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
992 rt6
->rt6i_flags
|= RTF_MODIFIED
;
993 if (mtu
< IPV6_MIN_MTU
) {
994 u32 features
= dst_metric(dst
, RTAX_FEATURES
);
996 features
|= RTAX_FEATURE_ALLFRAG
;
997 dst_metric_set(dst
, RTAX_FEATURES
, features
);
999 dst_metric_set(dst
, RTAX_MTU
, mtu
);
1003 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1005 struct net_device
*dev
= dst
->dev
;
1006 unsigned int mtu
= dst_mtu(dst
);
1007 struct net
*net
= dev_net(dev
);
1009 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1011 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1012 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1015 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1016 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1017 * IPV6_MAXPLEN is also valid and means: "any MSS,
1018 * rely only on pmtu discovery"
1020 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1025 static unsigned int ip6_default_mtu(const struct dst_entry
*dst
)
1027 unsigned int mtu
= IPV6_MIN_MTU
;
1028 struct inet6_dev
*idev
;
1031 idev
= __in6_dev_get(dst
->dev
);
1033 mtu
= idev
->cnf
.mtu6
;
1039 static struct dst_entry
*icmp6_dst_gc_list
;
1040 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1042 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1043 struct neighbour
*neigh
,
1044 const struct in6_addr
*addr
)
1046 struct rt6_info
*rt
;
1047 struct inet6_dev
*idev
= in6_dev_get(dev
);
1048 struct net
*net
= dev_net(dev
);
1050 if (unlikely(idev
== NULL
))
1053 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
, 0);
1054 if (unlikely(rt
== NULL
)) {
1062 neigh
= ndisc_get_neigh(dev
, addr
);
1067 rt
->rt6i_idev
= idev
;
1068 dst_set_neighbour(&rt
->dst
, neigh
);
1069 atomic_set(&rt
->dst
.__refcnt
, 1);
1070 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 255);
1071 rt
->dst
.output
= ip6_output
;
1073 spin_lock_bh(&icmp6_dst_lock
);
1074 rt
->dst
.next
= icmp6_dst_gc_list
;
1075 icmp6_dst_gc_list
= &rt
->dst
;
1076 spin_unlock_bh(&icmp6_dst_lock
);
1078 fib6_force_start_gc(net
);
1084 int icmp6_dst_gc(void)
1086 struct dst_entry
*dst
, **pprev
;
1089 spin_lock_bh(&icmp6_dst_lock
);
1090 pprev
= &icmp6_dst_gc_list
;
1092 while ((dst
= *pprev
) != NULL
) {
1093 if (!atomic_read(&dst
->__refcnt
)) {
1102 spin_unlock_bh(&icmp6_dst_lock
);
1107 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1110 struct dst_entry
*dst
, **pprev
;
1112 spin_lock_bh(&icmp6_dst_lock
);
1113 pprev
= &icmp6_dst_gc_list
;
1114 while ((dst
= *pprev
) != NULL
) {
1115 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1116 if (func(rt
, arg
)) {
1123 spin_unlock_bh(&icmp6_dst_lock
);
1126 static int ip6_dst_gc(struct dst_ops
*ops
)
1128 unsigned long now
= jiffies
;
1129 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1130 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1131 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1132 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1133 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1134 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1137 entries
= dst_entries_get_fast(ops
);
1138 if (time_after(rt_last_gc
+ rt_min_interval
, now
) &&
1139 entries
<= rt_max_size
)
1142 net
->ipv6
.ip6_rt_gc_expire
++;
1143 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
);
1144 net
->ipv6
.ip6_rt_last_gc
= now
;
1145 entries
= dst_entries_get_slow(ops
);
1146 if (entries
< ops
->gc_thresh
)
1147 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1149 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1150 return entries
> rt_max_size
;
1153 /* Clean host part of a prefix. Not necessary in radix tree,
1154 but results in cleaner routing tables.
1156 Remove it only when all the things will work!
1159 int ip6_dst_hoplimit(struct dst_entry
*dst
)
1161 int hoplimit
= dst_metric_raw(dst
, RTAX_HOPLIMIT
);
1162 if (hoplimit
== 0) {
1163 struct net_device
*dev
= dst
->dev
;
1164 struct inet6_dev
*idev
;
1167 idev
= __in6_dev_get(dev
);
1169 hoplimit
= idev
->cnf
.hop_limit
;
1171 hoplimit
= dev_net(dev
)->ipv6
.devconf_all
->hop_limit
;
1176 EXPORT_SYMBOL(ip6_dst_hoplimit
);
1182 int ip6_route_add(struct fib6_config
*cfg
)
1185 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1186 struct rt6_info
*rt
= NULL
;
1187 struct net_device
*dev
= NULL
;
1188 struct inet6_dev
*idev
= NULL
;
1189 struct fib6_table
*table
;
1192 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1194 #ifndef CONFIG_IPV6_SUBTREES
1195 if (cfg
->fc_src_len
)
1198 if (cfg
->fc_ifindex
) {
1200 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1203 idev
= in6_dev_get(dev
);
1208 if (cfg
->fc_metric
== 0)
1209 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1211 table
= fib6_new_table(net
, cfg
->fc_table
);
1212 if (table
== NULL
) {
1217 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, NULL
, DST_NOCOUNT
);
1224 rt
->dst
.obsolete
= -1;
1225 rt
->rt6i_expires
= (cfg
->fc_flags
& RTF_EXPIRES
) ?
1226 jiffies
+ clock_t_to_jiffies(cfg
->fc_expires
) :
1229 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1230 cfg
->fc_protocol
= RTPROT_BOOT
;
1231 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1233 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1235 if (addr_type
& IPV6_ADDR_MULTICAST
)
1236 rt
->dst
.input
= ip6_mc_input
;
1237 else if (cfg
->fc_flags
& RTF_LOCAL
)
1238 rt
->dst
.input
= ip6_input
;
1240 rt
->dst
.input
= ip6_forward
;
1242 rt
->dst
.output
= ip6_output
;
1244 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1245 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1246 if (rt
->rt6i_dst
.plen
== 128)
1247 rt
->dst
.flags
|= DST_HOST
;
1249 #ifdef CONFIG_IPV6_SUBTREES
1250 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1251 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1254 rt
->rt6i_metric
= cfg
->fc_metric
;
1256 /* We cannot add true routes via loopback here,
1257 they would result in kernel looping; promote them to reject routes
1259 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1260 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
)
1261 && !(cfg
->fc_flags
&RTF_LOCAL
))) {
1262 /* hold loopback dev/idev if we haven't done so. */
1263 if (dev
!= net
->loopback_dev
) {
1268 dev
= net
->loopback_dev
;
1270 idev
= in6_dev_get(dev
);
1276 rt
->dst
.output
= ip6_pkt_discard_out
;
1277 rt
->dst
.input
= ip6_pkt_discard
;
1278 rt
->dst
.error
= -ENETUNREACH
;
1279 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1283 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1284 const struct in6_addr
*gw_addr
;
1287 gw_addr
= &cfg
->fc_gateway
;
1288 ipv6_addr_copy(&rt
->rt6i_gateway
, gw_addr
);
1289 gwa_type
= ipv6_addr_type(gw_addr
);
1291 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1292 struct rt6_info
*grt
;
1294 /* IPv6 strictly inhibits using not link-local
1295 addresses as nexthop address.
1296 Otherwise, router will not able to send redirects.
1297 It is very good, but in some (rare!) circumstances
1298 (SIT, PtP, NBMA NOARP links) it is handy to allow
1299 some exceptions. --ANK
1302 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
1305 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1307 err
= -EHOSTUNREACH
;
1311 if (dev
!= grt
->rt6i_dev
) {
1312 dst_release(&grt
->dst
);
1316 dev
= grt
->rt6i_dev
;
1317 idev
= grt
->rt6i_idev
;
1319 in6_dev_hold(grt
->rt6i_idev
);
1321 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
1323 dst_release(&grt
->dst
);
1329 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
1337 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
1338 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
1342 ipv6_addr_copy(&rt
->rt6i_prefsrc
.addr
, &cfg
->fc_prefsrc
);
1343 rt
->rt6i_prefsrc
.plen
= 128;
1345 rt
->rt6i_prefsrc
.plen
= 0;
1347 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1348 struct neighbour
*n
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
1353 dst_set_neighbour(&rt
->dst
, n
);
1356 rt
->rt6i_flags
= cfg
->fc_flags
;
1363 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1364 int type
= nla_type(nla
);
1367 if (type
> RTAX_MAX
) {
1372 dst_metric_set(&rt
->dst
, type
, nla_get_u32(nla
));
1378 rt
->rt6i_idev
= idev
;
1379 rt
->rt6i_table
= table
;
1381 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
1383 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1395 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1398 struct fib6_table
*table
;
1399 struct net
*net
= dev_net(rt
->rt6i_dev
);
1401 if (rt
== net
->ipv6
.ip6_null_entry
)
1404 table
= rt
->rt6i_table
;
1405 write_lock_bh(&table
->tb6_lock
);
1407 err
= fib6_del(rt
, info
);
1408 dst_release(&rt
->dst
);
1410 write_unlock_bh(&table
->tb6_lock
);
1415 int ip6_del_rt(struct rt6_info
*rt
)
1417 struct nl_info info
= {
1418 .nl_net
= dev_net(rt
->rt6i_dev
),
1420 return __ip6_del_rt(rt
, &info
);
1423 static int ip6_route_del(struct fib6_config
*cfg
)
1425 struct fib6_table
*table
;
1426 struct fib6_node
*fn
;
1427 struct rt6_info
*rt
;
1430 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
1434 read_lock_bh(&table
->tb6_lock
);
1436 fn
= fib6_locate(&table
->tb6_root
,
1437 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1438 &cfg
->fc_src
, cfg
->fc_src_len
);
1441 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1442 if (cfg
->fc_ifindex
&&
1443 (rt
->rt6i_dev
== NULL
||
1444 rt
->rt6i_dev
->ifindex
!= cfg
->fc_ifindex
))
1446 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1447 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1449 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1452 read_unlock_bh(&table
->tb6_lock
);
1454 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1457 read_unlock_bh(&table
->tb6_lock
);
1465 struct ip6rd_flowi
{
1467 struct in6_addr gateway
;
1470 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1471 struct fib6_table
*table
,
1475 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1476 struct rt6_info
*rt
;
1477 struct fib6_node
*fn
;
1480 * Get the "current" route for this destination and
1481 * check if the redirect has come from approriate router.
1483 * RFC 2461 specifies that redirects should only be
1484 * accepted if they come from the nexthop to the target.
1485 * Due to the way the routes are chosen, this notion
1486 * is a bit fuzzy and one might need to check all possible
1490 read_lock_bh(&table
->tb6_lock
);
1491 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1493 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1495 * Current route is on-link; redirect is always invalid.
1497 * Seems, previous statement is not true. It could
1498 * be node, which looks for us as on-link (f.e. proxy ndisc)
1499 * But then router serving it might decide, that we should
1500 * know truth 8)8) --ANK (980726).
1502 if (rt6_check_expired(rt
))
1504 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1506 if (fl6
->flowi6_oif
!= rt
->rt6i_dev
->ifindex
)
1508 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1514 rt
= net
->ipv6
.ip6_null_entry
;
1515 BACKTRACK(net
, &fl6
->saddr
);
1519 read_unlock_bh(&table
->tb6_lock
);
1524 static struct rt6_info
*ip6_route_redirect(const struct in6_addr
*dest
,
1525 const struct in6_addr
*src
,
1526 const struct in6_addr
*gateway
,
1527 struct net_device
*dev
)
1529 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1530 struct net
*net
= dev_net(dev
);
1531 struct ip6rd_flowi rdfl
= {
1533 .flowi6_oif
= dev
->ifindex
,
1539 ipv6_addr_copy(&rdfl
.gateway
, gateway
);
1541 if (rt6_need_strict(dest
))
1542 flags
|= RT6_LOOKUP_F_IFACE
;
1544 return (struct rt6_info
*)fib6_rule_lookup(net
, &rdfl
.fl6
,
1545 flags
, __ip6_route_redirect
);
1548 void rt6_redirect(const struct in6_addr
*dest
, const struct in6_addr
*src
,
1549 const struct in6_addr
*saddr
,
1550 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1552 struct rt6_info
*rt
, *nrt
= NULL
;
1553 struct netevent_redirect netevent
;
1554 struct net
*net
= dev_net(neigh
->dev
);
1556 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1558 if (rt
== net
->ipv6
.ip6_null_entry
) {
1559 if (net_ratelimit())
1560 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1561 "for redirect target\n");
1566 * We have finally decided to accept it.
1569 neigh_update(neigh
, lladdr
, NUD_STALE
,
1570 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1571 NEIGH_UPDATE_F_OVERRIDE
|
1572 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1573 NEIGH_UPDATE_F_ISROUTER
))
1577 * Redirect received -> path was valid.
1578 * Look, redirects are sent only in response to data packets,
1579 * so that this nexthop apparently is reachable. --ANK
1581 dst_confirm(&rt
->dst
);
1583 /* Duplicate redirect: silently ignore. */
1584 if (neigh
== dst_get_neighbour(&rt
->dst
))
1587 nrt
= ip6_rt_copy(rt
);
1591 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1593 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1595 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1596 nrt
->rt6i_dst
.plen
= 128;
1597 nrt
->dst
.flags
|= DST_HOST
;
1599 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1600 dst_set_neighbour(&nrt
->dst
, neigh_clone(neigh
));
1602 if (ip6_ins_rt(nrt
))
1605 netevent
.old
= &rt
->dst
;
1606 netevent
.new = &nrt
->dst
;
1607 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1609 if (rt
->rt6i_flags
&RTF_CACHE
) {
1615 dst_release(&rt
->dst
);
1619 * Handle ICMP "packet too big" messages
1620 * i.e. Path MTU discovery
1623 static void rt6_do_pmtu_disc(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1624 struct net
*net
, u32 pmtu
, int ifindex
)
1626 struct rt6_info
*rt
, *nrt
;
1629 rt
= rt6_lookup(net
, daddr
, saddr
, ifindex
, 0);
1633 if (rt6_check_expired(rt
)) {
1638 if (pmtu
>= dst_mtu(&rt
->dst
))
1641 if (pmtu
< IPV6_MIN_MTU
) {
1643 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1644 * MTU (1280) and a fragment header should always be included
1645 * after a node receiving Too Big message reporting PMTU is
1646 * less than the IPv6 Minimum Link MTU.
1648 pmtu
= IPV6_MIN_MTU
;
1652 /* New mtu received -> path was valid.
1653 They are sent only in response to data packets,
1654 so that this nexthop apparently is reachable. --ANK
1656 dst_confirm(&rt
->dst
);
1658 /* Host route. If it is static, it would be better
1659 not to override it, but add new one, so that
1660 when cache entry will expire old pmtu
1661 would return automatically.
1663 if (rt
->rt6i_flags
& RTF_CACHE
) {
1664 dst_metric_set(&rt
->dst
, RTAX_MTU
, pmtu
);
1666 u32 features
= dst_metric(&rt
->dst
, RTAX_FEATURES
);
1667 features
|= RTAX_FEATURE_ALLFRAG
;
1668 dst_metric_set(&rt
->dst
, RTAX_FEATURES
, features
);
1670 dst_set_expires(&rt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1671 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1676 Two cases are possible:
1677 1. It is connected route. Action: COW
1678 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1680 if (!dst_get_neighbour(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1681 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1683 nrt
= rt6_alloc_clone(rt
, daddr
);
1686 dst_metric_set(&nrt
->dst
, RTAX_MTU
, pmtu
);
1688 u32 features
= dst_metric(&nrt
->dst
, RTAX_FEATURES
);
1689 features
|= RTAX_FEATURE_ALLFRAG
;
1690 dst_metric_set(&nrt
->dst
, RTAX_FEATURES
, features
);
1693 /* According to RFC 1981, detecting PMTU increase shouldn't be
1694 * happened within 5 mins, the recommended timer is 10 mins.
1695 * Here this route expiration time is set to ip6_rt_mtu_expires
1696 * which is 10 mins. After 10 mins the decreased pmtu is expired
1697 * and detecting PMTU increase will be automatically happened.
1699 dst_set_expires(&nrt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1700 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1705 dst_release(&rt
->dst
);
1708 void rt6_pmtu_discovery(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1709 struct net_device
*dev
, u32 pmtu
)
1711 struct net
*net
= dev_net(dev
);
1714 * RFC 1981 states that a node "MUST reduce the size of the packets it
1715 * is sending along the path" that caused the Packet Too Big message.
1716 * Since it's not possible in the general case to determine which
1717 * interface was used to send the original packet, we update the MTU
1718 * on the interface that will be used to send future packets. We also
1719 * update the MTU on the interface that received the Packet Too Big in
1720 * case the original packet was forced out that interface with
1721 * SO_BINDTODEVICE or similar. This is the next best thing to the
1722 * correct behaviour, which would be to update the MTU on all
1725 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, 0);
1726 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, dev
->ifindex
);
1730 * Misc support functions
1733 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1735 struct net
*net
= dev_net(ort
->rt6i_dev
);
1736 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
1740 rt
->dst
.input
= ort
->dst
.input
;
1741 rt
->dst
.output
= ort
->dst
.output
;
1743 dst_copy_metrics(&rt
->dst
, &ort
->dst
);
1744 rt
->dst
.error
= ort
->dst
.error
;
1745 rt
->rt6i_idev
= ort
->rt6i_idev
;
1747 in6_dev_hold(rt
->rt6i_idev
);
1748 rt
->dst
.lastuse
= jiffies
;
1749 rt
->rt6i_expires
= 0;
1751 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1752 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1753 rt
->rt6i_metric
= 0;
1755 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1756 #ifdef CONFIG_IPV6_SUBTREES
1757 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1759 memcpy(&rt
->rt6i_prefsrc
, &ort
->rt6i_prefsrc
, sizeof(struct rt6key
));
1760 rt
->rt6i_table
= ort
->rt6i_table
;
1765 #ifdef CONFIG_IPV6_ROUTE_INFO
1766 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
1767 const struct in6_addr
*prefix
, int prefixlen
,
1768 const struct in6_addr
*gwaddr
, int ifindex
)
1770 struct fib6_node
*fn
;
1771 struct rt6_info
*rt
= NULL
;
1772 struct fib6_table
*table
;
1774 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
1778 write_lock_bh(&table
->tb6_lock
);
1779 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1783 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1784 if (rt
->rt6i_dev
->ifindex
!= ifindex
)
1786 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1788 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1794 write_unlock_bh(&table
->tb6_lock
);
1798 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
1799 const struct in6_addr
*prefix
, int prefixlen
,
1800 const struct in6_addr
*gwaddr
, int ifindex
,
1803 struct fib6_config cfg
= {
1804 .fc_table
= RT6_TABLE_INFO
,
1805 .fc_metric
= IP6_RT_PRIO_USER
,
1806 .fc_ifindex
= ifindex
,
1807 .fc_dst_len
= prefixlen
,
1808 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1809 RTF_UP
| RTF_PREF(pref
),
1811 .fc_nlinfo
.nlh
= NULL
,
1812 .fc_nlinfo
.nl_net
= net
,
1815 ipv6_addr_copy(&cfg
.fc_dst
, prefix
);
1816 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1818 /* We should treat it as a default route if prefix length is 0. */
1820 cfg
.fc_flags
|= RTF_DEFAULT
;
1822 ip6_route_add(&cfg
);
1824 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
1828 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
1830 struct rt6_info
*rt
;
1831 struct fib6_table
*table
;
1833 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
1837 write_lock_bh(&table
->tb6_lock
);
1838 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->dst
.rt6_next
) {
1839 if (dev
== rt
->rt6i_dev
&&
1840 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1841 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1846 write_unlock_bh(&table
->tb6_lock
);
1850 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
1851 struct net_device
*dev
,
1854 struct fib6_config cfg
= {
1855 .fc_table
= RT6_TABLE_DFLT
,
1856 .fc_metric
= IP6_RT_PRIO_USER
,
1857 .fc_ifindex
= dev
->ifindex
,
1858 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1859 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1861 .fc_nlinfo
.nlh
= NULL
,
1862 .fc_nlinfo
.nl_net
= dev_net(dev
),
1865 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1867 ip6_route_add(&cfg
);
1869 return rt6_get_dflt_router(gwaddr
, dev
);
1872 void rt6_purge_dflt_routers(struct net
*net
)
1874 struct rt6_info
*rt
;
1875 struct fib6_table
*table
;
1877 /* NOTE: Keep consistent with rt6_get_dflt_router */
1878 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
1883 read_lock_bh(&table
->tb6_lock
);
1884 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1885 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1887 read_unlock_bh(&table
->tb6_lock
);
1892 read_unlock_bh(&table
->tb6_lock
);
1895 static void rtmsg_to_fib6_config(struct net
*net
,
1896 struct in6_rtmsg
*rtmsg
,
1897 struct fib6_config
*cfg
)
1899 memset(cfg
, 0, sizeof(*cfg
));
1901 cfg
->fc_table
= RT6_TABLE_MAIN
;
1902 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1903 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1904 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1905 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1906 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1907 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1909 cfg
->fc_nlinfo
.nl_net
= net
;
1911 ipv6_addr_copy(&cfg
->fc_dst
, &rtmsg
->rtmsg_dst
);
1912 ipv6_addr_copy(&cfg
->fc_src
, &rtmsg
->rtmsg_src
);
1913 ipv6_addr_copy(&cfg
->fc_gateway
, &rtmsg
->rtmsg_gateway
);
1916 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
1918 struct fib6_config cfg
;
1919 struct in6_rtmsg rtmsg
;
1923 case SIOCADDRT
: /* Add a route */
1924 case SIOCDELRT
: /* Delete a route */
1925 if (!capable(CAP_NET_ADMIN
))
1927 err
= copy_from_user(&rtmsg
, arg
,
1928 sizeof(struct in6_rtmsg
));
1932 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
1937 err
= ip6_route_add(&cfg
);
1940 err
= ip6_route_del(&cfg
);
1954 * Drop the packet on the floor
1957 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
1960 struct dst_entry
*dst
= skb_dst(skb
);
1961 switch (ipstats_mib_noroutes
) {
1962 case IPSTATS_MIB_INNOROUTES
:
1963 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
1964 if (type
== IPV6_ADDR_ANY
) {
1965 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
1966 IPSTATS_MIB_INADDRERRORS
);
1970 case IPSTATS_MIB_OUTNOROUTES
:
1971 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
1972 ipstats_mib_noroutes
);
1975 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
1980 static int ip6_pkt_discard(struct sk_buff
*skb
)
1982 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
1985 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
1987 skb
->dev
= skb_dst(skb
)->dev
;
1988 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
1991 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1993 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
1995 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
1998 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
2000 skb
->dev
= skb_dst(skb
)->dev
;
2001 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2007 * Allocate a dst for local (unicast / anycast) address.
2010 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2011 const struct in6_addr
*addr
,
2014 struct net
*net
= dev_net(idev
->dev
);
2015 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
2016 net
->loopback_dev
, 0);
2017 struct neighbour
*neigh
;
2020 if (net_ratelimit())
2021 pr_warning("IPv6: Maximum number of routes reached,"
2022 " consider increasing route/max_size.\n");
2023 return ERR_PTR(-ENOMEM
);
2028 rt
->dst
.flags
|= DST_HOST
;
2029 rt
->dst
.input
= ip6_input
;
2030 rt
->dst
.output
= ip6_output
;
2031 rt
->rt6i_idev
= idev
;
2032 rt
->dst
.obsolete
= -1;
2034 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2036 rt
->rt6i_flags
|= RTF_ANYCAST
;
2038 rt
->rt6i_flags
|= RTF_LOCAL
;
2039 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
2040 if (IS_ERR(neigh
)) {
2043 return ERR_CAST(neigh
);
2045 dst_set_neighbour(&rt
->dst
, neigh
);
2047 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
2048 rt
->rt6i_dst
.plen
= 128;
2049 rt
->rt6i_table
= fib6_get_table(net
, RT6_TABLE_LOCAL
);
2051 atomic_set(&rt
->dst
.__refcnt
, 1);
2056 int ip6_route_get_saddr(struct net
*net
,
2057 struct rt6_info
*rt
,
2058 const struct in6_addr
*daddr
,
2060 struct in6_addr
*saddr
)
2062 struct inet6_dev
*idev
= ip6_dst_idev((struct dst_entry
*)rt
);
2064 if (rt
->rt6i_prefsrc
.plen
)
2065 ipv6_addr_copy(saddr
, &rt
->rt6i_prefsrc
.addr
);
2067 err
= ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2068 daddr
, prefs
, saddr
);
2072 /* remove deleted ip from prefsrc entries */
2073 struct arg_dev_net_ip
{
2074 struct net_device
*dev
;
2076 struct in6_addr
*addr
;
2079 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2081 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2082 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2083 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2085 if (((void *)rt
->rt6i_dev
== dev
|| dev
== NULL
) &&
2086 rt
!= net
->ipv6
.ip6_null_entry
&&
2087 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2088 /* remove prefsrc entry */
2089 rt
->rt6i_prefsrc
.plen
= 0;
2094 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2096 struct net
*net
= dev_net(ifp
->idev
->dev
);
2097 struct arg_dev_net_ip adni
= {
2098 .dev
= ifp
->idev
->dev
,
2102 fib6_clean_all(net
, fib6_remove_prefsrc
, 0, &adni
);
2105 struct arg_dev_net
{
2106 struct net_device
*dev
;
2110 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2112 const struct arg_dev_net
*adn
= arg
;
2113 const struct net_device
*dev
= adn
->dev
;
2115 if ((rt
->rt6i_dev
== dev
|| dev
== NULL
) &&
2116 rt
!= adn
->net
->ipv6
.ip6_null_entry
) {
2117 RT6_TRACE("deleted by ifdown %p\n", rt
);
2123 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2125 struct arg_dev_net adn
= {
2130 fib6_clean_all(net
, fib6_ifdown
, 0, &adn
);
2131 icmp6_clean_all(fib6_ifdown
, &adn
);
2134 struct rt6_mtu_change_arg
2136 struct net_device
*dev
;
2140 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2142 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2143 struct inet6_dev
*idev
;
2145 /* In IPv6 pmtu discovery is not optional,
2146 so that RTAX_MTU lock cannot disable it.
2147 We still use this lock to block changes
2148 caused by addrconf/ndisc.
2151 idev
= __in6_dev_get(arg
->dev
);
2155 /* For administrative MTU increase, there is no way to discover
2156 IPv6 PMTU increase, so PMTU increase should be updated here.
2157 Since RFC 1981 doesn't include administrative MTU increase
2158 update PMTU increase is a MUST. (i.e. jumbo frame)
2161 If new MTU is less than route PMTU, this new MTU will be the
2162 lowest MTU in the path, update the route PMTU to reflect PMTU
2163 decreases; if new MTU is greater than route PMTU, and the
2164 old MTU is the lowest MTU in the path, update the route PMTU
2165 to reflect the increase. In this case if the other nodes' MTU
2166 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2169 if (rt
->rt6i_dev
== arg
->dev
&&
2170 !dst_metric_locked(&rt
->dst
, RTAX_MTU
) &&
2171 (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2172 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2173 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
))) {
2174 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2179 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
2181 struct rt6_mtu_change_arg arg
= {
2186 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, 0, &arg
);
2189 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2190 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2191 [RTA_OIF
] = { .type
= NLA_U32
},
2192 [RTA_IIF
] = { .type
= NLA_U32
},
2193 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2194 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2197 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2198 struct fib6_config
*cfg
)
2201 struct nlattr
*tb
[RTA_MAX
+1];
2204 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2209 rtm
= nlmsg_data(nlh
);
2210 memset(cfg
, 0, sizeof(*cfg
));
2212 cfg
->fc_table
= rtm
->rtm_table
;
2213 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2214 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2215 cfg
->fc_flags
= RTF_UP
;
2216 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2218 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2219 cfg
->fc_flags
|= RTF_REJECT
;
2221 if (rtm
->rtm_type
== RTN_LOCAL
)
2222 cfg
->fc_flags
|= RTF_LOCAL
;
2224 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2225 cfg
->fc_nlinfo
.nlh
= nlh
;
2226 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2228 if (tb
[RTA_GATEWAY
]) {
2229 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2230 cfg
->fc_flags
|= RTF_GATEWAY
;
2234 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2236 if (nla_len(tb
[RTA_DST
]) < plen
)
2239 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2243 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2245 if (nla_len(tb
[RTA_SRC
]) < plen
)
2248 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2251 if (tb
[RTA_PREFSRC
])
2252 nla_memcpy(&cfg
->fc_prefsrc
, tb
[RTA_PREFSRC
], 16);
2255 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2257 if (tb
[RTA_PRIORITY
])
2258 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2260 if (tb
[RTA_METRICS
]) {
2261 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2262 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2266 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2273 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2275 struct fib6_config cfg
;
2278 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2282 return ip6_route_del(&cfg
);
2285 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2287 struct fib6_config cfg
;
2290 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2294 return ip6_route_add(&cfg
);
2297 static inline size_t rt6_nlmsg_size(void)
2299 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2300 + nla_total_size(16) /* RTA_SRC */
2301 + nla_total_size(16) /* RTA_DST */
2302 + nla_total_size(16) /* RTA_GATEWAY */
2303 + nla_total_size(16) /* RTA_PREFSRC */
2304 + nla_total_size(4) /* RTA_TABLE */
2305 + nla_total_size(4) /* RTA_IIF */
2306 + nla_total_size(4) /* RTA_OIF */
2307 + nla_total_size(4) /* RTA_PRIORITY */
2308 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2309 + nla_total_size(sizeof(struct rta_cacheinfo
));
2312 static int rt6_fill_node(struct net
*net
,
2313 struct sk_buff
*skb
, struct rt6_info
*rt
,
2314 struct in6_addr
*dst
, struct in6_addr
*src
,
2315 int iif
, int type
, u32 pid
, u32 seq
,
2316 int prefix
, int nowait
, unsigned int flags
)
2319 struct nlmsghdr
*nlh
;
2323 if (prefix
) { /* user wants prefix routes only */
2324 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2325 /* success since this is not a prefix route */
2330 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2334 rtm
= nlmsg_data(nlh
);
2335 rtm
->rtm_family
= AF_INET6
;
2336 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2337 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2340 table
= rt
->rt6i_table
->tb6_id
;
2342 table
= RT6_TABLE_UNSPEC
;
2343 rtm
->rtm_table
= table
;
2344 NLA_PUT_U32(skb
, RTA_TABLE
, table
);
2345 if (rt
->rt6i_flags
&RTF_REJECT
)
2346 rtm
->rtm_type
= RTN_UNREACHABLE
;
2347 else if (rt
->rt6i_flags
&RTF_LOCAL
)
2348 rtm
->rtm_type
= RTN_LOCAL
;
2349 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
2350 rtm
->rtm_type
= RTN_LOCAL
;
2352 rtm
->rtm_type
= RTN_UNICAST
;
2354 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2355 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2356 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
2357 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2358 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2359 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2360 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
2361 rtm
->rtm_protocol
= RTPROT_RA
;
2363 if (rt
->rt6i_flags
&RTF_CACHE
)
2364 rtm
->rtm_flags
|= RTM_F_CLONED
;
2367 NLA_PUT(skb
, RTA_DST
, 16, dst
);
2368 rtm
->rtm_dst_len
= 128;
2369 } else if (rtm
->rtm_dst_len
)
2370 NLA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
2371 #ifdef CONFIG_IPV6_SUBTREES
2373 NLA_PUT(skb
, RTA_SRC
, 16, src
);
2374 rtm
->rtm_src_len
= 128;
2375 } else if (rtm
->rtm_src_len
)
2376 NLA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
2379 #ifdef CONFIG_IPV6_MROUTE
2380 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2381 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
2386 goto nla_put_failure
;
2388 if (err
== -EMSGSIZE
)
2389 goto nla_put_failure
;
2394 NLA_PUT_U32(skb
, RTA_IIF
, iif
);
2396 struct in6_addr saddr_buf
;
2397 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0)
2398 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2401 if (rt
->rt6i_prefsrc
.plen
) {
2402 struct in6_addr saddr_buf
;
2403 ipv6_addr_copy(&saddr_buf
, &rt
->rt6i_prefsrc
.addr
);
2404 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2407 if (rtnetlink_put_metrics(skb
, dst_metrics_ptr(&rt
->dst
)) < 0)
2408 goto nla_put_failure
;
2410 if (dst_get_neighbour(&rt
->dst
))
2411 NLA_PUT(skb
, RTA_GATEWAY
, 16, &dst_get_neighbour(&rt
->dst
)->primary_key
);
2414 NLA_PUT_U32(skb
, RTA_OIF
, rt
->rt6i_dev
->ifindex
);
2416 NLA_PUT_U32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
);
2418 if (!(rt
->rt6i_flags
& RTF_EXPIRES
))
2420 else if (rt
->rt6i_expires
- jiffies
< INT_MAX
)
2421 expires
= rt
->rt6i_expires
- jiffies
;
2425 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, 0, 0,
2426 expires
, rt
->dst
.error
) < 0)
2427 goto nla_put_failure
;
2429 return nlmsg_end(skb
, nlh
);
2432 nlmsg_cancel(skb
, nlh
);
2436 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2438 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2441 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2442 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2443 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2447 return rt6_fill_node(arg
->net
,
2448 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2449 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2450 prefix
, 0, NLM_F_MULTI
);
2453 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2455 struct net
*net
= sock_net(in_skb
->sk
);
2456 struct nlattr
*tb
[RTA_MAX
+1];
2457 struct rt6_info
*rt
;
2458 struct sk_buff
*skb
;
2463 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2468 memset(&fl6
, 0, sizeof(fl6
));
2471 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2474 ipv6_addr_copy(&fl6
.saddr
, nla_data(tb
[RTA_SRC
]));
2478 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2481 ipv6_addr_copy(&fl6
.daddr
, nla_data(tb
[RTA_DST
]));
2485 iif
= nla_get_u32(tb
[RTA_IIF
]);
2488 fl6
.flowi6_oif
= nla_get_u32(tb
[RTA_OIF
]);
2491 struct net_device
*dev
;
2492 dev
= __dev_get_by_index(net
, iif
);
2499 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2505 /* Reserve room for dummy headers, this skb can pass
2506 through good chunk of routing engine.
2508 skb_reset_mac_header(skb
);
2509 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2511 rt
= (struct rt6_info
*) ip6_route_output(net
, NULL
, &fl6
);
2512 skb_dst_set(skb
, &rt
->dst
);
2514 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
2515 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2516 nlh
->nlmsg_seq
, 0, 0, 0);
2522 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).pid
);
2527 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2529 struct sk_buff
*skb
;
2530 struct net
*net
= info
->nl_net
;
2535 seq
= info
->nlh
!= NULL
? info
->nlh
->nlmsg_seq
: 0;
2537 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2541 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
2542 event
, info
->pid
, seq
, 0, 0, 0);
2544 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2545 WARN_ON(err
== -EMSGSIZE
);
2549 rtnl_notify(skb
, net
, info
->pid
, RTNLGRP_IPV6_ROUTE
,
2550 info
->nlh
, gfp_any());
2554 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
2557 static int ip6_route_dev_notify(struct notifier_block
*this,
2558 unsigned long event
, void *data
)
2560 struct net_device
*dev
= (struct net_device
*)data
;
2561 struct net
*net
= dev_net(dev
);
2563 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
2564 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
2565 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
2566 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2567 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
2568 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
2569 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
2570 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
2581 #ifdef CONFIG_PROC_FS
2592 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2594 struct seq_file
*m
= p_arg
;
2595 struct neighbour
*n
;
2597 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
2599 #ifdef CONFIG_IPV6_SUBTREES
2600 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
2602 seq_puts(m
, "00000000000000000000000000000000 00 ");
2604 n
= dst_get_neighbour(&rt
->dst
);
2606 seq_printf(m
, "%pi6", n
->primary_key
);
2608 seq_puts(m
, "00000000000000000000000000000000");
2610 seq_printf(m
, " %08x %08x %08x %08x %8s\n",
2611 rt
->rt6i_metric
, atomic_read(&rt
->dst
.__refcnt
),
2612 rt
->dst
.__use
, rt
->rt6i_flags
,
2613 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
2617 static int ipv6_route_show(struct seq_file
*m
, void *v
)
2619 struct net
*net
= (struct net
*)m
->private;
2620 fib6_clean_all(net
, rt6_info_route
, 0, m
);
2624 static int ipv6_route_open(struct inode
*inode
, struct file
*file
)
2626 return single_open_net(inode
, file
, ipv6_route_show
);
2629 static const struct file_operations ipv6_route_proc_fops
= {
2630 .owner
= THIS_MODULE
,
2631 .open
= ipv6_route_open
,
2633 .llseek
= seq_lseek
,
2634 .release
= single_release_net
,
2637 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2639 struct net
*net
= (struct net
*)seq
->private;
2640 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2641 net
->ipv6
.rt6_stats
->fib_nodes
,
2642 net
->ipv6
.rt6_stats
->fib_route_nodes
,
2643 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
2644 net
->ipv6
.rt6_stats
->fib_rt_entries
,
2645 net
->ipv6
.rt6_stats
->fib_rt_cache
,
2646 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
2647 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
2652 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2654 return single_open_net(inode
, file
, rt6_stats_seq_show
);
2657 static const struct file_operations rt6_stats_seq_fops
= {
2658 .owner
= THIS_MODULE
,
2659 .open
= rt6_stats_seq_open
,
2661 .llseek
= seq_lseek
,
2662 .release
= single_release_net
,
2664 #endif /* CONFIG_PROC_FS */
2666 #ifdef CONFIG_SYSCTL
2669 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
,
2670 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2677 net
= (struct net
*)ctl
->extra1
;
2678 delay
= net
->ipv6
.sysctl
.flush_delay
;
2679 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
2680 fib6_run_gc(delay
<= 0 ? ~0UL : (unsigned long)delay
, net
);
2684 ctl_table ipv6_route_table_template
[] = {
2686 .procname
= "flush",
2687 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
2688 .maxlen
= sizeof(int),
2690 .proc_handler
= ipv6_sysctl_rtcache_flush
2693 .procname
= "gc_thresh",
2694 .data
= &ip6_dst_ops_template
.gc_thresh
,
2695 .maxlen
= sizeof(int),
2697 .proc_handler
= proc_dointvec
,
2700 .procname
= "max_size",
2701 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
2702 .maxlen
= sizeof(int),
2704 .proc_handler
= proc_dointvec
,
2707 .procname
= "gc_min_interval",
2708 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2709 .maxlen
= sizeof(int),
2711 .proc_handler
= proc_dointvec_jiffies
,
2714 .procname
= "gc_timeout",
2715 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
2716 .maxlen
= sizeof(int),
2718 .proc_handler
= proc_dointvec_jiffies
,
2721 .procname
= "gc_interval",
2722 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
2723 .maxlen
= sizeof(int),
2725 .proc_handler
= proc_dointvec_jiffies
,
2728 .procname
= "gc_elasticity",
2729 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
2730 .maxlen
= sizeof(int),
2732 .proc_handler
= proc_dointvec
,
2735 .procname
= "mtu_expires",
2736 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
2737 .maxlen
= sizeof(int),
2739 .proc_handler
= proc_dointvec_jiffies
,
2742 .procname
= "min_adv_mss",
2743 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
2744 .maxlen
= sizeof(int),
2746 .proc_handler
= proc_dointvec
,
2749 .procname
= "gc_min_interval_ms",
2750 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2751 .maxlen
= sizeof(int),
2753 .proc_handler
= proc_dointvec_ms_jiffies
,
2758 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
2760 struct ctl_table
*table
;
2762 table
= kmemdup(ipv6_route_table_template
,
2763 sizeof(ipv6_route_table_template
),
2767 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
2768 table
[0].extra1
= net
;
2769 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
2770 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
2771 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2772 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
2773 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
2774 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
2775 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
2776 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
2777 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2784 static int __net_init
ip6_route_net_init(struct net
*net
)
2788 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
2789 sizeof(net
->ipv6
.ip6_dst_ops
));
2791 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
2792 goto out_ip6_dst_ops
;
2794 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
2795 sizeof(*net
->ipv6
.ip6_null_entry
),
2797 if (!net
->ipv6
.ip6_null_entry
)
2798 goto out_ip6_dst_entries
;
2799 net
->ipv6
.ip6_null_entry
->dst
.path
=
2800 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
2801 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2802 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
2803 ip6_template_metrics
, true);
2805 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2806 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
2807 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
2809 if (!net
->ipv6
.ip6_prohibit_entry
)
2810 goto out_ip6_null_entry
;
2811 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
2812 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
2813 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2814 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
2815 ip6_template_metrics
, true);
2817 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
2818 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
2820 if (!net
->ipv6
.ip6_blk_hole_entry
)
2821 goto out_ip6_prohibit_entry
;
2822 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
2823 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
2824 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2825 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
2826 ip6_template_metrics
, true);
2829 net
->ipv6
.sysctl
.flush_delay
= 0;
2830 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
2831 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
2832 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
2833 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
2834 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
2835 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
2836 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
2838 #ifdef CONFIG_PROC_FS
2839 proc_net_fops_create(net
, "ipv6_route", 0, &ipv6_route_proc_fops
);
2840 proc_net_fops_create(net
, "rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2842 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
2848 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2849 out_ip6_prohibit_entry
:
2850 kfree(net
->ipv6
.ip6_prohibit_entry
);
2852 kfree(net
->ipv6
.ip6_null_entry
);
2854 out_ip6_dst_entries
:
2855 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2860 static void __net_exit
ip6_route_net_exit(struct net
*net
)
2862 #ifdef CONFIG_PROC_FS
2863 proc_net_remove(net
, "ipv6_route");
2864 proc_net_remove(net
, "rt6_stats");
2866 kfree(net
->ipv6
.ip6_null_entry
);
2867 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2868 kfree(net
->ipv6
.ip6_prohibit_entry
);
2869 kfree(net
->ipv6
.ip6_blk_hole_entry
);
2871 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2874 static struct pernet_operations ip6_route_net_ops
= {
2875 .init
= ip6_route_net_init
,
2876 .exit
= ip6_route_net_exit
,
2879 static struct notifier_block ip6_route_dev_notifier
= {
2880 .notifier_call
= ip6_route_dev_notify
,
2884 int __init
ip6_route_init(void)
2889 ip6_dst_ops_template
.kmem_cachep
=
2890 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
2891 SLAB_HWCACHE_ALIGN
, NULL
);
2892 if (!ip6_dst_ops_template
.kmem_cachep
)
2895 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
2897 goto out_kmem_cache
;
2899 ret
= register_pernet_subsys(&ip6_route_net_ops
);
2901 goto out_dst_entries
;
2903 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
2905 /* Registering of the loopback is done before this portion of code,
2906 * the loopback reference in rt6_info will not be taken, do it
2907 * manually for init_net */
2908 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
2909 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2910 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2911 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
2912 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2913 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
2914 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2918 goto out_register_subsys
;
2924 ret
= fib6_rules_init();
2929 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
2930 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
2931 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
2932 goto fib6_rules_init
;
2934 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
2936 goto fib6_rules_init
;
2942 fib6_rules_cleanup();
2947 out_register_subsys
:
2948 unregister_pernet_subsys(&ip6_route_net_ops
);
2950 dst_entries_destroy(&ip6_dst_blackhole_ops
);
2952 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
2956 void ip6_route_cleanup(void)
2958 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
2959 fib6_rules_cleanup();
2962 unregister_pernet_subsys(&ip6_route_net_ops
);
2963 dst_entries_destroy(&ip6_dst_blackhole_ops
);
2964 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);