2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
59 #include <asm/uaccess.h>
62 #include <linux/sysctl.h>
65 /* Set to 3 to get tracing. */
69 #define RDBG(x) printk x
70 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
73 #define RT6_TRACE(x...) do { ; } while (0)
76 static struct rt6_info
*ip6_rt_copy(const struct rt6_info
*ort
,
77 const struct in6_addr
*dest
);
78 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
79 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
80 static unsigned int ip6_mtu(const struct dst_entry
*dst
);
81 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
82 static void ip6_dst_destroy(struct dst_entry
*);
83 static void ip6_dst_ifdown(struct dst_entry
*,
84 struct net_device
*dev
, int how
);
85 static int ip6_dst_gc(struct dst_ops
*ops
);
87 static int ip6_pkt_discard(struct sk_buff
*skb
);
88 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
89 static void ip6_link_failure(struct sk_buff
*skb
);
90 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
92 #ifdef CONFIG_IPV6_ROUTE_INFO
93 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
94 const struct in6_addr
*prefix
, int prefixlen
,
95 const struct in6_addr
*gwaddr
, int ifindex
,
97 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
98 const struct in6_addr
*prefix
, int prefixlen
,
99 const struct in6_addr
*gwaddr
, int ifindex
);
102 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
104 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
105 struct inet_peer
*peer
;
108 if (!(rt
->dst
.flags
& DST_HOST
))
112 rt6_bind_peer(rt
, 1);
114 peer
= rt
->rt6i_peer
;
116 u32
*old_p
= __DST_METRICS_PTR(old
);
117 unsigned long prev
, new;
120 if (inet_metrics_new(peer
))
121 memcpy(p
, old_p
, sizeof(u32
) * RTAX_MAX
);
123 new = (unsigned long) p
;
124 prev
= cmpxchg(&dst
->_metrics
, old
, new);
127 p
= __DST_METRICS_PTR(prev
);
128 if (prev
& DST_METRICS_READ_ONLY
)
135 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
, const void *daddr
)
137 return __neigh_lookup_errno(&nd_tbl
, daddr
, dst
->dev
);
140 static struct dst_ops ip6_dst_ops_template
= {
142 .protocol
= cpu_to_be16(ETH_P_IPV6
),
145 .check
= ip6_dst_check
,
146 .default_advmss
= ip6_default_advmss
,
148 .cow_metrics
= ipv6_cow_metrics
,
149 .destroy
= ip6_dst_destroy
,
150 .ifdown
= ip6_dst_ifdown
,
151 .negative_advice
= ip6_negative_advice
,
152 .link_failure
= ip6_link_failure
,
153 .update_pmtu
= ip6_rt_update_pmtu
,
154 .local_out
= __ip6_local_out
,
155 .neigh_lookup
= ip6_neigh_lookup
,
158 static unsigned int ip6_blackhole_mtu(const struct dst_entry
*dst
)
160 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
162 return mtu
? : dst
->dev
->mtu
;
165 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
169 static u32
*ip6_rt_blackhole_cow_metrics(struct dst_entry
*dst
,
175 static struct dst_ops ip6_dst_blackhole_ops
= {
177 .protocol
= cpu_to_be16(ETH_P_IPV6
),
178 .destroy
= ip6_dst_destroy
,
179 .check
= ip6_dst_check
,
180 .mtu
= ip6_blackhole_mtu
,
181 .default_advmss
= ip6_default_advmss
,
182 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
183 .cow_metrics
= ip6_rt_blackhole_cow_metrics
,
184 .neigh_lookup
= ip6_neigh_lookup
,
187 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
188 [RTAX_HOPLIMIT
- 1] = 255,
191 static struct rt6_info ip6_null_entry_template
= {
193 .__refcnt
= ATOMIC_INIT(1),
196 .error
= -ENETUNREACH
,
197 .input
= ip6_pkt_discard
,
198 .output
= ip6_pkt_discard_out
,
200 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
201 .rt6i_protocol
= RTPROT_KERNEL
,
202 .rt6i_metric
= ~(u32
) 0,
203 .rt6i_ref
= ATOMIC_INIT(1),
206 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
208 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
209 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
211 static struct rt6_info ip6_prohibit_entry_template
= {
213 .__refcnt
= ATOMIC_INIT(1),
217 .input
= ip6_pkt_prohibit
,
218 .output
= ip6_pkt_prohibit_out
,
220 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
221 .rt6i_protocol
= RTPROT_KERNEL
,
222 .rt6i_metric
= ~(u32
) 0,
223 .rt6i_ref
= ATOMIC_INIT(1),
226 static struct rt6_info ip6_blk_hole_entry_template
= {
228 .__refcnt
= ATOMIC_INIT(1),
232 .input
= dst_discard
,
233 .output
= dst_discard
,
235 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
236 .rt6i_protocol
= RTPROT_KERNEL
,
237 .rt6i_metric
= ~(u32
) 0,
238 .rt6i_ref
= ATOMIC_INIT(1),
243 /* allocate dst with ip6_dst_ops */
244 static inline struct rt6_info
*ip6_dst_alloc(struct dst_ops
*ops
,
245 struct net_device
*dev
,
248 struct rt6_info
*rt
= dst_alloc(ops
, dev
, 0, 0, flags
);
251 memset(&rt
->rt6i_table
, 0,
252 sizeof(*rt
) - sizeof(struct dst_entry
));
257 static void ip6_dst_destroy(struct dst_entry
*dst
)
259 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
260 struct inet6_dev
*idev
= rt
->rt6i_idev
;
261 struct inet_peer
*peer
= rt
->rt6i_peer
;
263 if (!(rt
->dst
.flags
& DST_HOST
))
264 dst_destroy_metrics_generic(dst
);
267 rt
->rt6i_idev
= NULL
;
271 rt
->rt6i_peer
= NULL
;
276 static atomic_t __rt6_peer_genid
= ATOMIC_INIT(0);
278 static u32
rt6_peer_genid(void)
280 return atomic_read(&__rt6_peer_genid
);
283 void rt6_bind_peer(struct rt6_info
*rt
, int create
)
285 struct inet_peer
*peer
;
287 peer
= inet_getpeer_v6(&rt
->rt6i_dst
.addr
, create
);
288 if (peer
&& cmpxchg(&rt
->rt6i_peer
, NULL
, peer
) != NULL
)
291 rt
->rt6i_peer_genid
= rt6_peer_genid();
294 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
297 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
298 struct inet6_dev
*idev
= rt
->rt6i_idev
;
299 struct net_device
*loopback_dev
=
300 dev_net(dev
)->loopback_dev
;
302 if (dev
!= loopback_dev
&& idev
&& idev
->dev
== dev
) {
303 struct inet6_dev
*loopback_idev
=
304 in6_dev_get(loopback_dev
);
306 rt
->rt6i_idev
= loopback_idev
;
312 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
314 return (rt
->rt6i_flags
& RTF_EXPIRES
) &&
315 time_after(jiffies
, rt
->rt6i_expires
);
318 static inline int rt6_need_strict(const struct in6_addr
*daddr
)
320 return ipv6_addr_type(daddr
) &
321 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
| IPV6_ADDR_LOOPBACK
);
325 * Route lookup. Any table->tb6_lock is implied.
328 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
330 const struct in6_addr
*saddr
,
334 struct rt6_info
*local
= NULL
;
335 struct rt6_info
*sprt
;
337 if (!oif
&& ipv6_addr_any(saddr
))
340 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
341 struct net_device
*dev
= sprt
->rt6i_dev
;
344 if (dev
->ifindex
== oif
)
346 if (dev
->flags
& IFF_LOOPBACK
) {
347 if (!sprt
->rt6i_idev
||
348 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
349 if (flags
& RT6_LOOKUP_F_IFACE
&& oif
)
351 if (local
&& (!oif
||
352 local
->rt6i_idev
->dev
->ifindex
== oif
))
358 if (ipv6_chk_addr(net
, saddr
, dev
,
359 flags
& RT6_LOOKUP_F_IFACE
))
368 if (flags
& RT6_LOOKUP_F_IFACE
)
369 return net
->ipv6
.ip6_null_entry
;
375 #ifdef CONFIG_IPV6_ROUTER_PREF
376 static void rt6_probe(struct rt6_info
*rt
)
378 struct neighbour
*neigh
;
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
388 neigh
= rt
? dst_get_neighbour_noref(&rt
->dst
) : NULL
;
389 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
391 read_lock_bh(&neigh
->lock
);
392 if (!(neigh
->nud_state
& NUD_VALID
) &&
393 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
394 struct in6_addr mcaddr
;
395 struct in6_addr
*target
;
397 neigh
->updated
= jiffies
;
398 read_unlock_bh(&neigh
->lock
);
400 target
= (struct in6_addr
*)&neigh
->primary_key
;
401 addrconf_addr_solict_mult(target
, &mcaddr
);
402 ndisc_send_ns(rt
->rt6i_dev
, NULL
, target
, &mcaddr
, NULL
);
404 read_unlock_bh(&neigh
->lock
);
410 static inline void rt6_probe(struct rt6_info
*rt
)
416 * Default Router Selection (RFC 2461 6.3.6)
418 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
420 struct net_device
*dev
= rt
->rt6i_dev
;
421 if (!oif
|| dev
->ifindex
== oif
)
423 if ((dev
->flags
& IFF_LOOPBACK
) &&
424 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
429 static inline int rt6_check_neigh(struct rt6_info
*rt
)
431 struct neighbour
*neigh
;
435 neigh
= dst_get_neighbour_noref(&rt
->dst
);
436 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
437 !(rt
->rt6i_flags
& RTF_GATEWAY
))
440 read_lock_bh(&neigh
->lock
);
441 if (neigh
->nud_state
& NUD_VALID
)
443 #ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh
->nud_state
& NUD_FAILED
)
449 read_unlock_bh(&neigh
->lock
);
456 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
461 m
= rt6_check_dev(rt
, oif
);
462 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
464 #ifdef CONFIG_IPV6_ROUTER_PREF
465 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
467 n
= rt6_check_neigh(rt
);
468 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
473 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
474 int *mpri
, struct rt6_info
*match
)
478 if (rt6_check_expired(rt
))
481 m
= rt6_score_route(rt
, oif
, strict
);
486 if (strict
& RT6_LOOKUP_F_REACHABLE
)
490 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
498 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
499 struct rt6_info
*rr_head
,
500 u32 metric
, int oif
, int strict
)
502 struct rt6_info
*rt
, *match
;
506 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
507 rt
= rt
->dst
.rt6_next
)
508 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
509 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
510 rt
= rt
->dst
.rt6_next
)
511 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
516 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
518 struct rt6_info
*match
, *rt0
;
521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
522 __func__
, fn
->leaf
, oif
);
526 fn
->rr_ptr
= rt0
= fn
->leaf
;
528 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
531 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
532 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
534 /* no entries matched; do round-robin */
535 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
542 RT6_TRACE("%s() => %p\n",
545 net
= dev_net(rt0
->rt6i_dev
);
546 return match
? match
: net
->ipv6
.ip6_null_entry
;
549 #ifdef CONFIG_IPV6_ROUTE_INFO
550 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
551 const struct in6_addr
*gwaddr
)
553 struct net
*net
= dev_net(dev
);
554 struct route_info
*rinfo
= (struct route_info
*) opt
;
555 struct in6_addr prefix_buf
, *prefix
;
557 unsigned long lifetime
;
560 if (len
< sizeof(struct route_info
)) {
564 /* Sanity check for prefix_len and length */
565 if (rinfo
->length
> 3) {
567 } else if (rinfo
->prefix_len
> 128) {
569 } else if (rinfo
->prefix_len
> 64) {
570 if (rinfo
->length
< 2) {
573 } else if (rinfo
->prefix_len
> 0) {
574 if (rinfo
->length
< 1) {
579 pref
= rinfo
->route_pref
;
580 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
583 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
585 if (rinfo
->length
== 3)
586 prefix
= (struct in6_addr
*)rinfo
->prefix
;
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf
,
590 (struct in6_addr
*)rinfo
->prefix
,
592 prefix
= &prefix_buf
;
595 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
598 if (rt
&& !lifetime
) {
604 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
607 rt
->rt6i_flags
= RTF_ROUTEINFO
|
608 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
611 if (!addrconf_finite_timeout(lifetime
)) {
612 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
614 rt
->rt6i_expires
= jiffies
+ HZ
* lifetime
;
615 rt
->rt6i_flags
|= RTF_EXPIRES
;
617 dst_release(&rt
->dst
);
623 #define BACKTRACK(__net, saddr) \
625 if (rt == __net->ipv6.ip6_null_entry) { \
626 struct fib6_node *pn; \
628 if (fn->fn_flags & RTN_TL_ROOT) \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
635 if (fn->fn_flags & RTN_RTINFO) \
641 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
642 struct fib6_table
*table
,
643 struct flowi6
*fl6
, int flags
)
645 struct fib6_node
*fn
;
648 read_lock_bh(&table
->tb6_lock
);
649 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
652 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
653 BACKTRACK(net
, &fl6
->saddr
);
655 dst_use(&rt
->dst
, jiffies
);
656 read_unlock_bh(&table
->tb6_lock
);
661 struct dst_entry
* ip6_route_lookup(struct net
*net
, struct flowi6
*fl6
,
664 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_lookup
);
666 EXPORT_SYMBOL_GPL(ip6_route_lookup
);
668 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
669 const struct in6_addr
*saddr
, int oif
, int strict
)
671 struct flowi6 fl6
= {
675 struct dst_entry
*dst
;
676 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
679 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
680 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
683 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
685 return (struct rt6_info
*) dst
;
692 EXPORT_SYMBOL(rt6_lookup
);
694 /* ip6_ins_rt is called with FREE table->tb6_lock.
695 It takes new route entry, the addition fails by any reason the
696 route is freed. In any case, if caller does not hold it, it may
700 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
703 struct fib6_table
*table
;
705 table
= rt
->rt6i_table
;
706 write_lock_bh(&table
->tb6_lock
);
707 err
= fib6_add(&table
->tb6_root
, rt
, info
);
708 write_unlock_bh(&table
->tb6_lock
);
713 int ip6_ins_rt(struct rt6_info
*rt
)
715 struct nl_info info
= {
716 .nl_net
= dev_net(rt
->rt6i_dev
),
718 return __ip6_ins_rt(rt
, &info
);
721 static struct rt6_info
*rt6_alloc_cow(const struct rt6_info
*ort
,
722 const struct in6_addr
*daddr
,
723 const struct in6_addr
*saddr
)
731 rt
= ip6_rt_copy(ort
, daddr
);
734 struct neighbour
*neigh
;
735 int attempts
= !in_softirq();
737 if (!(rt
->rt6i_flags
& RTF_GATEWAY
)) {
738 if (ort
->rt6i_dst
.plen
!= 128 &&
739 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
740 rt
->rt6i_flags
|= RTF_ANYCAST
;
741 rt
->rt6i_gateway
= *daddr
;
744 rt
->rt6i_flags
|= RTF_CACHE
;
746 #ifdef CONFIG_IPV6_SUBTREES
747 if (rt
->rt6i_src
.plen
&& saddr
) {
748 rt
->rt6i_src
.addr
= *saddr
;
749 rt
->rt6i_src
.plen
= 128;
754 neigh
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
,
757 struct net
*net
= dev_net(rt
->rt6i_dev
);
758 int saved_rt_min_interval
=
759 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
760 int saved_rt_elasticity
=
761 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
763 if (attempts
-- > 0) {
764 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 1;
765 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= 0;
767 ip6_dst_gc(&net
->ipv6
.ip6_dst_ops
);
769 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
=
771 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
=
772 saved_rt_min_interval
;
778 "ipv6: Neighbour table overflow.\n");
782 dst_set_neighbour(&rt
->dst
, neigh
);
789 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
,
790 const struct in6_addr
*daddr
)
792 struct rt6_info
*rt
= ip6_rt_copy(ort
, daddr
);
795 rt
->rt6i_flags
|= RTF_CACHE
;
796 dst_set_neighbour(&rt
->dst
, neigh_clone(dst_get_neighbour_noref_raw(&ort
->dst
)));
801 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
802 struct flowi6
*fl6
, int flags
)
804 struct fib6_node
*fn
;
805 struct rt6_info
*rt
, *nrt
;
809 int reachable
= net
->ipv6
.devconf_all
->forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
811 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
814 read_lock_bh(&table
->tb6_lock
);
817 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
820 rt
= rt6_select(fn
, oif
, strict
| reachable
);
822 BACKTRACK(net
, &fl6
->saddr
);
823 if (rt
== net
->ipv6
.ip6_null_entry
||
824 rt
->rt6i_flags
& RTF_CACHE
)
828 read_unlock_bh(&table
->tb6_lock
);
830 if (!dst_get_neighbour_noref_raw(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
831 nrt
= rt6_alloc_cow(rt
, &fl6
->daddr
, &fl6
->saddr
);
832 else if (!(rt
->dst
.flags
& DST_HOST
))
833 nrt
= rt6_alloc_clone(rt
, &fl6
->daddr
);
837 dst_release(&rt
->dst
);
838 rt
= nrt
? : net
->ipv6
.ip6_null_entry
;
842 err
= ip6_ins_rt(nrt
);
851 * Race condition! In the gap, when table->tb6_lock was
852 * released someone could insert this route. Relookup.
854 dst_release(&rt
->dst
);
863 read_unlock_bh(&table
->tb6_lock
);
865 rt
->dst
.lastuse
= jiffies
;
871 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
872 struct flowi6
*fl6
, int flags
)
874 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
877 void ip6_route_input(struct sk_buff
*skb
)
879 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
880 struct net
*net
= dev_net(skb
->dev
);
881 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
882 struct flowi6 fl6
= {
883 .flowi6_iif
= skb
->dev
->ifindex
,
886 .flowlabel
= (* (__be32
*) iph
) & IPV6_FLOWINFO_MASK
,
887 .flowi6_mark
= skb
->mark
,
888 .flowi6_proto
= iph
->nexthdr
,
891 if (rt6_need_strict(&iph
->daddr
) && skb
->dev
->type
!= ARPHRD_PIMREG
)
892 flags
|= RT6_LOOKUP_F_IFACE
;
894 skb_dst_set(skb
, fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_input
));
897 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
898 struct flowi6
*fl6
, int flags
)
900 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
903 struct dst_entry
* ip6_route_output(struct net
*net
, const struct sock
*sk
,
908 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
))
909 flags
|= RT6_LOOKUP_F_IFACE
;
911 if (!ipv6_addr_any(&fl6
->saddr
))
912 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
914 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
916 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
919 EXPORT_SYMBOL(ip6_route_output
);
921 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
923 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
924 struct dst_entry
*new = NULL
;
926 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, 0, 0);
928 memset(&rt
->rt6i_table
, 0, sizeof(*rt
) - sizeof(struct dst_entry
));
933 new->input
= dst_discard
;
934 new->output
= dst_discard
;
936 if (dst_metrics_read_only(&ort
->dst
))
937 new->_metrics
= ort
->dst
._metrics
;
939 dst_copy_metrics(new, &ort
->dst
);
940 rt
->rt6i_idev
= ort
->rt6i_idev
;
942 in6_dev_hold(rt
->rt6i_idev
);
943 rt
->rt6i_expires
= 0;
945 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
946 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
949 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
950 #ifdef CONFIG_IPV6_SUBTREES
951 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
957 dst_release(dst_orig
);
958 return new ? new : ERR_PTR(-ENOMEM
);
962 * Destination cache support functions
965 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
969 rt
= (struct rt6_info
*) dst
;
971 if (rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
)) {
972 if (rt
->rt6i_peer_genid
!= rt6_peer_genid()) {
974 rt6_bind_peer(rt
, 0);
975 rt
->rt6i_peer_genid
= rt6_peer_genid();
982 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
984 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
987 if (rt
->rt6i_flags
& RTF_CACHE
) {
988 if (rt6_check_expired(rt
)) {
1000 static void ip6_link_failure(struct sk_buff
*skb
)
1002 struct rt6_info
*rt
;
1004 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
1006 rt
= (struct rt6_info
*) skb_dst(skb
);
1008 if (rt
->rt6i_flags
& RTF_CACHE
) {
1009 dst_set_expires(&rt
->dst
, 0);
1010 rt
->rt6i_flags
|= RTF_EXPIRES
;
1011 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
1012 rt
->rt6i_node
->fn_sernum
= -1;
1016 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
1018 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1020 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
1021 rt6
->rt6i_flags
|= RTF_MODIFIED
;
1022 if (mtu
< IPV6_MIN_MTU
) {
1023 u32 features
= dst_metric(dst
, RTAX_FEATURES
);
1025 features
|= RTAX_FEATURE_ALLFRAG
;
1026 dst_metric_set(dst
, RTAX_FEATURES
, features
);
1028 dst_metric_set(dst
, RTAX_MTU
, mtu
);
1032 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1034 struct net_device
*dev
= dst
->dev
;
1035 unsigned int mtu
= dst_mtu(dst
);
1036 struct net
*net
= dev_net(dev
);
1038 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1040 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1041 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1044 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1045 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1046 * IPV6_MAXPLEN is also valid and means: "any MSS,
1047 * rely only on pmtu discovery"
1049 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1054 static unsigned int ip6_mtu(const struct dst_entry
*dst
)
1056 struct inet6_dev
*idev
;
1057 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
1065 idev
= __in6_dev_get(dst
->dev
);
1067 mtu
= idev
->cnf
.mtu6
;
1073 static struct dst_entry
*icmp6_dst_gc_list
;
1074 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1076 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1077 struct neighbour
*neigh
,
1080 struct dst_entry
*dst
;
1081 struct rt6_info
*rt
;
1082 struct inet6_dev
*idev
= in6_dev_get(dev
);
1083 struct net
*net
= dev_net(dev
);
1085 if (unlikely(!idev
))
1088 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
, 0);
1089 if (unlikely(!rt
)) {
1091 dst
= ERR_PTR(-ENOMEM
);
1098 neigh
= __neigh_lookup_errno(&nd_tbl
, &fl6
->daddr
, dev
);
1099 if (IS_ERR(neigh
)) {
1101 return ERR_CAST(neigh
);
1105 rt
->dst
.flags
|= DST_HOST
;
1106 rt
->dst
.output
= ip6_output
;
1107 dst_set_neighbour(&rt
->dst
, neigh
);
1108 atomic_set(&rt
->dst
.__refcnt
, 1);
1109 rt
->rt6i_dst
.addr
= fl6
->daddr
;
1110 rt
->rt6i_dst
.plen
= 128;
1111 rt
->rt6i_idev
= idev
;
1112 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 255);
1114 spin_lock_bh(&icmp6_dst_lock
);
1115 rt
->dst
.next
= icmp6_dst_gc_list
;
1116 icmp6_dst_gc_list
= &rt
->dst
;
1117 spin_unlock_bh(&icmp6_dst_lock
);
1119 fib6_force_start_gc(net
);
1121 dst
= xfrm_lookup(net
, &rt
->dst
, flowi6_to_flowi(fl6
), NULL
, 0);
1127 int icmp6_dst_gc(void)
1129 struct dst_entry
*dst
, **pprev
;
1132 spin_lock_bh(&icmp6_dst_lock
);
1133 pprev
= &icmp6_dst_gc_list
;
1135 while ((dst
= *pprev
) != NULL
) {
1136 if (!atomic_read(&dst
->__refcnt
)) {
1145 spin_unlock_bh(&icmp6_dst_lock
);
1150 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1153 struct dst_entry
*dst
, **pprev
;
1155 spin_lock_bh(&icmp6_dst_lock
);
1156 pprev
= &icmp6_dst_gc_list
;
1157 while ((dst
= *pprev
) != NULL
) {
1158 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1159 if (func(rt
, arg
)) {
1166 spin_unlock_bh(&icmp6_dst_lock
);
1169 static int ip6_dst_gc(struct dst_ops
*ops
)
1171 unsigned long now
= jiffies
;
1172 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1173 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1174 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1175 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1176 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1177 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1180 entries
= dst_entries_get_fast(ops
);
1181 if (time_after(rt_last_gc
+ rt_min_interval
, now
) &&
1182 entries
<= rt_max_size
)
1185 net
->ipv6
.ip6_rt_gc_expire
++;
1186 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
);
1187 net
->ipv6
.ip6_rt_last_gc
= now
;
1188 entries
= dst_entries_get_slow(ops
);
1189 if (entries
< ops
->gc_thresh
)
1190 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1192 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1193 return entries
> rt_max_size
;
1196 /* Clean host part of a prefix. Not necessary in radix tree,
1197 but results in cleaner routing tables.
1199 Remove it only when all the things will work!
1202 int ip6_dst_hoplimit(struct dst_entry
*dst
)
1204 int hoplimit
= dst_metric_raw(dst
, RTAX_HOPLIMIT
);
1205 if (hoplimit
== 0) {
1206 struct net_device
*dev
= dst
->dev
;
1207 struct inet6_dev
*idev
;
1210 idev
= __in6_dev_get(dev
);
1212 hoplimit
= idev
->cnf
.hop_limit
;
1214 hoplimit
= dev_net(dev
)->ipv6
.devconf_all
->hop_limit
;
1219 EXPORT_SYMBOL(ip6_dst_hoplimit
);
1225 int ip6_route_add(struct fib6_config
*cfg
)
1228 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1229 struct rt6_info
*rt
= NULL
;
1230 struct net_device
*dev
= NULL
;
1231 struct inet6_dev
*idev
= NULL
;
1232 struct fib6_table
*table
;
1235 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1237 #ifndef CONFIG_IPV6_SUBTREES
1238 if (cfg
->fc_src_len
)
1241 if (cfg
->fc_ifindex
) {
1243 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1246 idev
= in6_dev_get(dev
);
1251 if (cfg
->fc_metric
== 0)
1252 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1255 if (cfg
->fc_nlinfo
.nlh
&&
1256 !(cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
1257 table
= fib6_get_table(net
, cfg
->fc_table
);
1259 printk(KERN_WARNING
"IPv6: NLM_F_CREATE should be specified when creating new route\n");
1260 table
= fib6_new_table(net
, cfg
->fc_table
);
1263 table
= fib6_new_table(net
, cfg
->fc_table
);
1269 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
, NULL
, DST_NOCOUNT
);
1276 rt
->dst
.obsolete
= -1;
1277 rt
->rt6i_expires
= (cfg
->fc_flags
& RTF_EXPIRES
) ?
1278 jiffies
+ clock_t_to_jiffies(cfg
->fc_expires
) :
1281 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1282 cfg
->fc_protocol
= RTPROT_BOOT
;
1283 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1285 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1287 if (addr_type
& IPV6_ADDR_MULTICAST
)
1288 rt
->dst
.input
= ip6_mc_input
;
1289 else if (cfg
->fc_flags
& RTF_LOCAL
)
1290 rt
->dst
.input
= ip6_input
;
1292 rt
->dst
.input
= ip6_forward
;
1294 rt
->dst
.output
= ip6_output
;
1296 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1297 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1298 if (rt
->rt6i_dst
.plen
== 128)
1299 rt
->dst
.flags
|= DST_HOST
;
1301 if (!(rt
->dst
.flags
& DST_HOST
) && cfg
->fc_mx
) {
1302 u32
*metrics
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1307 dst_init_metrics(&rt
->dst
, metrics
, 0);
1309 #ifdef CONFIG_IPV6_SUBTREES
1310 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1311 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1314 rt
->rt6i_metric
= cfg
->fc_metric
;
1316 /* We cannot add true routes via loopback here,
1317 they would result in kernel looping; promote them to reject routes
1319 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1320 (dev
&& (dev
->flags
& IFF_LOOPBACK
) &&
1321 !(addr_type
& IPV6_ADDR_LOOPBACK
) &&
1322 !(cfg
->fc_flags
& RTF_LOCAL
))) {
1323 /* hold loopback dev/idev if we haven't done so. */
1324 if (dev
!= net
->loopback_dev
) {
1329 dev
= net
->loopback_dev
;
1331 idev
= in6_dev_get(dev
);
1337 rt
->dst
.output
= ip6_pkt_discard_out
;
1338 rt
->dst
.input
= ip6_pkt_discard
;
1339 rt
->dst
.error
= -ENETUNREACH
;
1340 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1344 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1345 const struct in6_addr
*gw_addr
;
1348 gw_addr
= &cfg
->fc_gateway
;
1349 rt
->rt6i_gateway
= *gw_addr
;
1350 gwa_type
= ipv6_addr_type(gw_addr
);
1352 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1353 struct rt6_info
*grt
;
1355 /* IPv6 strictly inhibits using not link-local
1356 addresses as nexthop address.
1357 Otherwise, router will not able to send redirects.
1358 It is very good, but in some (rare!) circumstances
1359 (SIT, PtP, NBMA NOARP links) it is handy to allow
1360 some exceptions. --ANK
1363 if (!(gwa_type
& IPV6_ADDR_UNICAST
))
1366 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1368 err
= -EHOSTUNREACH
;
1372 if (dev
!= grt
->rt6i_dev
) {
1373 dst_release(&grt
->dst
);
1377 dev
= grt
->rt6i_dev
;
1378 idev
= grt
->rt6i_idev
;
1380 in6_dev_hold(grt
->rt6i_idev
);
1382 if (!(grt
->rt6i_flags
& RTF_GATEWAY
))
1384 dst_release(&grt
->dst
);
1390 if (!dev
|| (dev
->flags
& IFF_LOOPBACK
))
1398 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
1399 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
1403 rt
->rt6i_prefsrc
.addr
= cfg
->fc_prefsrc
;
1404 rt
->rt6i_prefsrc
.plen
= 128;
1406 rt
->rt6i_prefsrc
.plen
= 0;
1408 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1409 struct neighbour
*n
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
1414 dst_set_neighbour(&rt
->dst
, n
);
1417 rt
->rt6i_flags
= cfg
->fc_flags
;
1424 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1425 int type
= nla_type(nla
);
1428 if (type
> RTAX_MAX
) {
1433 dst_metric_set(&rt
->dst
, type
, nla_get_u32(nla
));
1439 rt
->rt6i_idev
= idev
;
1440 rt
->rt6i_table
= table
;
1442 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
1444 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1456 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1459 struct fib6_table
*table
;
1460 struct net
*net
= dev_net(rt
->rt6i_dev
);
1462 if (rt
== net
->ipv6
.ip6_null_entry
)
1465 table
= rt
->rt6i_table
;
1466 write_lock_bh(&table
->tb6_lock
);
1468 err
= fib6_del(rt
, info
);
1469 dst_release(&rt
->dst
);
1471 write_unlock_bh(&table
->tb6_lock
);
1476 int ip6_del_rt(struct rt6_info
*rt
)
1478 struct nl_info info
= {
1479 .nl_net
= dev_net(rt
->rt6i_dev
),
1481 return __ip6_del_rt(rt
, &info
);
1484 static int ip6_route_del(struct fib6_config
*cfg
)
1486 struct fib6_table
*table
;
1487 struct fib6_node
*fn
;
1488 struct rt6_info
*rt
;
1491 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
1495 read_lock_bh(&table
->tb6_lock
);
1497 fn
= fib6_locate(&table
->tb6_root
,
1498 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1499 &cfg
->fc_src
, cfg
->fc_src_len
);
1502 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1503 if (cfg
->fc_ifindex
&&
1505 rt
->rt6i_dev
->ifindex
!= cfg
->fc_ifindex
))
1507 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1508 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1510 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1513 read_unlock_bh(&table
->tb6_lock
);
1515 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1518 read_unlock_bh(&table
->tb6_lock
);
1526 struct ip6rd_flowi
{
1528 struct in6_addr gateway
;
1531 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1532 struct fib6_table
*table
,
1536 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1537 struct rt6_info
*rt
;
1538 struct fib6_node
*fn
;
1541 * Get the "current" route for this destination and
1542 * check if the redirect has come from approriate router.
1544 * RFC 2461 specifies that redirects should only be
1545 * accepted if they come from the nexthop to the target.
1546 * Due to the way the routes are chosen, this notion
1547 * is a bit fuzzy and one might need to check all possible
1551 read_lock_bh(&table
->tb6_lock
);
1552 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1554 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1556 * Current route is on-link; redirect is always invalid.
1558 * Seems, previous statement is not true. It could
1559 * be node, which looks for us as on-link (f.e. proxy ndisc)
1560 * But then router serving it might decide, that we should
1561 * know truth 8)8) --ANK (980726).
1563 if (rt6_check_expired(rt
))
1565 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1567 if (fl6
->flowi6_oif
!= rt
->rt6i_dev
->ifindex
)
1569 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1575 rt
= net
->ipv6
.ip6_null_entry
;
1576 BACKTRACK(net
, &fl6
->saddr
);
1580 read_unlock_bh(&table
->tb6_lock
);
1585 static struct rt6_info
*ip6_route_redirect(const struct in6_addr
*dest
,
1586 const struct in6_addr
*src
,
1587 const struct in6_addr
*gateway
,
1588 struct net_device
*dev
)
1590 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1591 struct net
*net
= dev_net(dev
);
1592 struct ip6rd_flowi rdfl
= {
1594 .flowi6_oif
= dev
->ifindex
,
1600 rdfl
.gateway
= *gateway
;
1602 if (rt6_need_strict(dest
))
1603 flags
|= RT6_LOOKUP_F_IFACE
;
1605 return (struct rt6_info
*)fib6_rule_lookup(net
, &rdfl
.fl6
,
1606 flags
, __ip6_route_redirect
);
1609 void rt6_redirect(const struct in6_addr
*dest
, const struct in6_addr
*src
,
1610 const struct in6_addr
*saddr
,
1611 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1613 struct rt6_info
*rt
, *nrt
= NULL
;
1614 struct netevent_redirect netevent
;
1615 struct net
*net
= dev_net(neigh
->dev
);
1617 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1619 if (rt
== net
->ipv6
.ip6_null_entry
) {
1620 if (net_ratelimit())
1621 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1622 "for redirect target\n");
1627 * We have finally decided to accept it.
1630 neigh_update(neigh
, lladdr
, NUD_STALE
,
1631 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1632 NEIGH_UPDATE_F_OVERRIDE
|
1633 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1634 NEIGH_UPDATE_F_ISROUTER
))
1638 * Redirect received -> path was valid.
1639 * Look, redirects are sent only in response to data packets,
1640 * so that this nexthop apparently is reachable. --ANK
1642 dst_confirm(&rt
->dst
);
1644 /* Duplicate redirect: silently ignore. */
1645 if (neigh
== dst_get_neighbour_noref_raw(&rt
->dst
))
1648 nrt
= ip6_rt_copy(rt
, dest
);
1652 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1654 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1656 nrt
->rt6i_gateway
= *(struct in6_addr
*)neigh
->primary_key
;
1657 dst_set_neighbour(&nrt
->dst
, neigh_clone(neigh
));
1659 if (ip6_ins_rt(nrt
))
1662 netevent
.old
= &rt
->dst
;
1663 netevent
.new = &nrt
->dst
;
1664 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1666 if (rt
->rt6i_flags
& RTF_CACHE
) {
1672 dst_release(&rt
->dst
);
1676 * Handle ICMP "packet too big" messages
1677 * i.e. Path MTU discovery
1680 static void rt6_do_pmtu_disc(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1681 struct net
*net
, u32 pmtu
, int ifindex
)
1683 struct rt6_info
*rt
, *nrt
;
1686 rt
= rt6_lookup(net
, daddr
, saddr
, ifindex
, 0);
1690 if (rt6_check_expired(rt
)) {
1695 if (pmtu
>= dst_mtu(&rt
->dst
))
1698 if (pmtu
< IPV6_MIN_MTU
) {
1700 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1701 * MTU (1280) and a fragment header should always be included
1702 * after a node receiving Too Big message reporting PMTU is
1703 * less than the IPv6 Minimum Link MTU.
1705 pmtu
= IPV6_MIN_MTU
;
1709 /* New mtu received -> path was valid.
1710 They are sent only in response to data packets,
1711 so that this nexthop apparently is reachable. --ANK
1713 dst_confirm(&rt
->dst
);
1715 /* Host route. If it is static, it would be better
1716 not to override it, but add new one, so that
1717 when cache entry will expire old pmtu
1718 would return automatically.
1720 if (rt
->rt6i_flags
& RTF_CACHE
) {
1721 dst_metric_set(&rt
->dst
, RTAX_MTU
, pmtu
);
1723 u32 features
= dst_metric(&rt
->dst
, RTAX_FEATURES
);
1724 features
|= RTAX_FEATURE_ALLFRAG
;
1725 dst_metric_set(&rt
->dst
, RTAX_FEATURES
, features
);
1727 dst_set_expires(&rt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1728 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1733 Two cases are possible:
1734 1. It is connected route. Action: COW
1735 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1737 if (!dst_get_neighbour_noref_raw(&rt
->dst
) && !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1738 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1740 nrt
= rt6_alloc_clone(rt
, daddr
);
1743 dst_metric_set(&nrt
->dst
, RTAX_MTU
, pmtu
);
1745 u32 features
= dst_metric(&nrt
->dst
, RTAX_FEATURES
);
1746 features
|= RTAX_FEATURE_ALLFRAG
;
1747 dst_metric_set(&nrt
->dst
, RTAX_FEATURES
, features
);
1750 /* According to RFC 1981, detecting PMTU increase shouldn't be
1751 * happened within 5 mins, the recommended timer is 10 mins.
1752 * Here this route expiration time is set to ip6_rt_mtu_expires
1753 * which is 10 mins. After 10 mins the decreased pmtu is expired
1754 * and detecting PMTU increase will be automatically happened.
1756 dst_set_expires(&nrt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1757 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1762 dst_release(&rt
->dst
);
1765 void rt6_pmtu_discovery(const struct in6_addr
*daddr
, const struct in6_addr
*saddr
,
1766 struct net_device
*dev
, u32 pmtu
)
1768 struct net
*net
= dev_net(dev
);
1771 * RFC 1981 states that a node "MUST reduce the size of the packets it
1772 * is sending along the path" that caused the Packet Too Big message.
1773 * Since it's not possible in the general case to determine which
1774 * interface was used to send the original packet, we update the MTU
1775 * on the interface that will be used to send future packets. We also
1776 * update the MTU on the interface that received the Packet Too Big in
1777 * case the original packet was forced out that interface with
1778 * SO_BINDTODEVICE or similar. This is the next best thing to the
1779 * correct behaviour, which would be to update the MTU on all
1782 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, 0);
1783 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, dev
->ifindex
);
1787 * Misc support functions
1790 static struct rt6_info
*ip6_rt_copy(const struct rt6_info
*ort
,
1791 const struct in6_addr
*dest
)
1793 struct net
*net
= dev_net(ort
->rt6i_dev
);
1794 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
1798 rt
->dst
.input
= ort
->dst
.input
;
1799 rt
->dst
.output
= ort
->dst
.output
;
1800 rt
->dst
.flags
|= DST_HOST
;
1802 rt
->rt6i_dst
.addr
= *dest
;
1803 rt
->rt6i_dst
.plen
= 128;
1804 dst_copy_metrics(&rt
->dst
, &ort
->dst
);
1805 rt
->dst
.error
= ort
->dst
.error
;
1806 rt
->rt6i_idev
= ort
->rt6i_idev
;
1808 in6_dev_hold(rt
->rt6i_idev
);
1809 rt
->dst
.lastuse
= jiffies
;
1810 rt
->rt6i_expires
= 0;
1812 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1813 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1814 rt
->rt6i_metric
= 0;
1816 #ifdef CONFIG_IPV6_SUBTREES
1817 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1819 memcpy(&rt
->rt6i_prefsrc
, &ort
->rt6i_prefsrc
, sizeof(struct rt6key
));
1820 rt
->rt6i_table
= ort
->rt6i_table
;
1825 #ifdef CONFIG_IPV6_ROUTE_INFO
1826 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
1827 const struct in6_addr
*prefix
, int prefixlen
,
1828 const struct in6_addr
*gwaddr
, int ifindex
)
1830 struct fib6_node
*fn
;
1831 struct rt6_info
*rt
= NULL
;
1832 struct fib6_table
*table
;
1834 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
1838 write_lock_bh(&table
->tb6_lock
);
1839 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1843 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1844 if (rt
->rt6i_dev
->ifindex
!= ifindex
)
1846 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1848 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1854 write_unlock_bh(&table
->tb6_lock
);
1858 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
1859 const struct in6_addr
*prefix
, int prefixlen
,
1860 const struct in6_addr
*gwaddr
, int ifindex
,
1863 struct fib6_config cfg
= {
1864 .fc_table
= RT6_TABLE_INFO
,
1865 .fc_metric
= IP6_RT_PRIO_USER
,
1866 .fc_ifindex
= ifindex
,
1867 .fc_dst_len
= prefixlen
,
1868 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1869 RTF_UP
| RTF_PREF(pref
),
1871 .fc_nlinfo
.nlh
= NULL
,
1872 .fc_nlinfo
.nl_net
= net
,
1875 cfg
.fc_dst
= *prefix
;
1876 cfg
.fc_gateway
= *gwaddr
;
1878 /* We should treat it as a default route if prefix length is 0. */
1880 cfg
.fc_flags
|= RTF_DEFAULT
;
1882 ip6_route_add(&cfg
);
1884 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
1888 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
1890 struct rt6_info
*rt
;
1891 struct fib6_table
*table
;
1893 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
1897 write_lock_bh(&table
->tb6_lock
);
1898 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->dst
.rt6_next
) {
1899 if (dev
== rt
->rt6i_dev
&&
1900 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1901 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1906 write_unlock_bh(&table
->tb6_lock
);
1910 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
1911 struct net_device
*dev
,
1914 struct fib6_config cfg
= {
1915 .fc_table
= RT6_TABLE_DFLT
,
1916 .fc_metric
= IP6_RT_PRIO_USER
,
1917 .fc_ifindex
= dev
->ifindex
,
1918 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1919 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1921 .fc_nlinfo
.nlh
= NULL
,
1922 .fc_nlinfo
.nl_net
= dev_net(dev
),
1925 cfg
.fc_gateway
= *gwaddr
;
1927 ip6_route_add(&cfg
);
1929 return rt6_get_dflt_router(gwaddr
, dev
);
1932 void rt6_purge_dflt_routers(struct net
*net
)
1934 struct rt6_info
*rt
;
1935 struct fib6_table
*table
;
1937 /* NOTE: Keep consistent with rt6_get_dflt_router */
1938 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
1943 read_lock_bh(&table
->tb6_lock
);
1944 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1945 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1947 read_unlock_bh(&table
->tb6_lock
);
1952 read_unlock_bh(&table
->tb6_lock
);
1955 static void rtmsg_to_fib6_config(struct net
*net
,
1956 struct in6_rtmsg
*rtmsg
,
1957 struct fib6_config
*cfg
)
1959 memset(cfg
, 0, sizeof(*cfg
));
1961 cfg
->fc_table
= RT6_TABLE_MAIN
;
1962 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1963 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1964 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1965 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1966 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1967 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1969 cfg
->fc_nlinfo
.nl_net
= net
;
1971 cfg
->fc_dst
= rtmsg
->rtmsg_dst
;
1972 cfg
->fc_src
= rtmsg
->rtmsg_src
;
1973 cfg
->fc_gateway
= rtmsg
->rtmsg_gateway
;
1976 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
1978 struct fib6_config cfg
;
1979 struct in6_rtmsg rtmsg
;
1983 case SIOCADDRT
: /* Add a route */
1984 case SIOCDELRT
: /* Delete a route */
1985 if (!capable(CAP_NET_ADMIN
))
1987 err
= copy_from_user(&rtmsg
, arg
,
1988 sizeof(struct in6_rtmsg
));
1992 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
1997 err
= ip6_route_add(&cfg
);
2000 err
= ip6_route_del(&cfg
);
2014 * Drop the packet on the floor
2017 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
2020 struct dst_entry
*dst
= skb_dst(skb
);
2021 switch (ipstats_mib_noroutes
) {
2022 case IPSTATS_MIB_INNOROUTES
:
2023 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
2024 if (type
== IPV6_ADDR_ANY
) {
2025 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2026 IPSTATS_MIB_INADDRERRORS
);
2030 case IPSTATS_MIB_OUTNOROUTES
:
2031 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2032 ipstats_mib_noroutes
);
2035 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2040 static int ip6_pkt_discard(struct sk_buff
*skb
)
2042 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2045 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
2047 skb
->dev
= skb_dst(skb
)->dev
;
2048 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2053 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2055 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2058 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
2060 skb
->dev
= skb_dst(skb
)->dev
;
2061 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2067 * Allocate a dst for local (unicast / anycast) address.
2070 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2071 const struct in6_addr
*addr
,
2074 struct net
*net
= dev_net(idev
->dev
);
2075 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
,
2076 net
->loopback_dev
, 0);
2077 struct neighbour
*neigh
;
2080 if (net_ratelimit())
2081 pr_warning("IPv6: Maximum number of routes reached,"
2082 " consider increasing route/max_size.\n");
2083 return ERR_PTR(-ENOMEM
);
2088 rt
->dst
.flags
|= DST_HOST
;
2089 rt
->dst
.input
= ip6_input
;
2090 rt
->dst
.output
= ip6_output
;
2091 rt
->rt6i_idev
= idev
;
2092 rt
->dst
.obsolete
= -1;
2094 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2096 rt
->rt6i_flags
|= RTF_ANYCAST
;
2098 rt
->rt6i_flags
|= RTF_LOCAL
;
2099 neigh
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, rt
->rt6i_dev
);
2100 if (IS_ERR(neigh
)) {
2103 return ERR_CAST(neigh
);
2105 dst_set_neighbour(&rt
->dst
, neigh
);
2107 rt
->rt6i_dst
.addr
= *addr
;
2108 rt
->rt6i_dst
.plen
= 128;
2109 rt
->rt6i_table
= fib6_get_table(net
, RT6_TABLE_LOCAL
);
2111 atomic_set(&rt
->dst
.__refcnt
, 1);
2116 int ip6_route_get_saddr(struct net
*net
,
2117 struct rt6_info
*rt
,
2118 const struct in6_addr
*daddr
,
2120 struct in6_addr
*saddr
)
2122 struct inet6_dev
*idev
= ip6_dst_idev((struct dst_entry
*)rt
);
2124 if (rt
->rt6i_prefsrc
.plen
)
2125 *saddr
= rt
->rt6i_prefsrc
.addr
;
2127 err
= ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2128 daddr
, prefs
, saddr
);
2132 /* remove deleted ip from prefsrc entries */
2133 struct arg_dev_net_ip
{
2134 struct net_device
*dev
;
2136 struct in6_addr
*addr
;
2139 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2141 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2142 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2143 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2145 if (((void *)rt
->rt6i_dev
== dev
|| !dev
) &&
2146 rt
!= net
->ipv6
.ip6_null_entry
&&
2147 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2148 /* remove prefsrc entry */
2149 rt
->rt6i_prefsrc
.plen
= 0;
2154 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2156 struct net
*net
= dev_net(ifp
->idev
->dev
);
2157 struct arg_dev_net_ip adni
= {
2158 .dev
= ifp
->idev
->dev
,
2162 fib6_clean_all(net
, fib6_remove_prefsrc
, 0, &adni
);
2165 struct arg_dev_net
{
2166 struct net_device
*dev
;
2170 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2172 const struct arg_dev_net
*adn
= arg
;
2173 const struct net_device
*dev
= adn
->dev
;
2175 if ((rt
->rt6i_dev
== dev
|| !dev
) &&
2176 rt
!= adn
->net
->ipv6
.ip6_null_entry
) {
2177 RT6_TRACE("deleted by ifdown %p\n", rt
);
2183 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2185 struct arg_dev_net adn
= {
2190 fib6_clean_all(net
, fib6_ifdown
, 0, &adn
);
2191 icmp6_clean_all(fib6_ifdown
, &adn
);
2194 struct rt6_mtu_change_arg
2196 struct net_device
*dev
;
2200 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2202 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2203 struct inet6_dev
*idev
;
2205 /* In IPv6 pmtu discovery is not optional,
2206 so that RTAX_MTU lock cannot disable it.
2207 We still use this lock to block changes
2208 caused by addrconf/ndisc.
2211 idev
= __in6_dev_get(arg
->dev
);
2215 /* For administrative MTU increase, there is no way to discover
2216 IPv6 PMTU increase, so PMTU increase should be updated here.
2217 Since RFC 1981 doesn't include administrative MTU increase
2218 update PMTU increase is a MUST. (i.e. jumbo frame)
2221 If new MTU is less than route PMTU, this new MTU will be the
2222 lowest MTU in the path, update the route PMTU to reflect PMTU
2223 decreases; if new MTU is greater than route PMTU, and the
2224 old MTU is the lowest MTU in the path, update the route PMTU
2225 to reflect the increase. In this case if the other nodes' MTU
2226 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2229 if (rt
->rt6i_dev
== arg
->dev
&&
2230 !dst_metric_locked(&rt
->dst
, RTAX_MTU
) &&
2231 (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2232 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2233 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
))) {
2234 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2239 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
2241 struct rt6_mtu_change_arg arg
= {
2246 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, 0, &arg
);
2249 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2250 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2251 [RTA_OIF
] = { .type
= NLA_U32
},
2252 [RTA_IIF
] = { .type
= NLA_U32
},
2253 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2254 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2257 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2258 struct fib6_config
*cfg
)
2261 struct nlattr
*tb
[RTA_MAX
+1];
2264 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2269 rtm
= nlmsg_data(nlh
);
2270 memset(cfg
, 0, sizeof(*cfg
));
2272 cfg
->fc_table
= rtm
->rtm_table
;
2273 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2274 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2275 cfg
->fc_flags
= RTF_UP
;
2276 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2278 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2279 cfg
->fc_flags
|= RTF_REJECT
;
2281 if (rtm
->rtm_type
== RTN_LOCAL
)
2282 cfg
->fc_flags
|= RTF_LOCAL
;
2284 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2285 cfg
->fc_nlinfo
.nlh
= nlh
;
2286 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2288 if (tb
[RTA_GATEWAY
]) {
2289 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2290 cfg
->fc_flags
|= RTF_GATEWAY
;
2294 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2296 if (nla_len(tb
[RTA_DST
]) < plen
)
2299 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2303 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2305 if (nla_len(tb
[RTA_SRC
]) < plen
)
2308 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2311 if (tb
[RTA_PREFSRC
])
2312 nla_memcpy(&cfg
->fc_prefsrc
, tb
[RTA_PREFSRC
], 16);
2315 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2317 if (tb
[RTA_PRIORITY
])
2318 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2320 if (tb
[RTA_METRICS
]) {
2321 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2322 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2326 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2333 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2335 struct fib6_config cfg
;
2338 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2342 return ip6_route_del(&cfg
);
2345 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2347 struct fib6_config cfg
;
2350 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2354 return ip6_route_add(&cfg
);
2357 static inline size_t rt6_nlmsg_size(void)
2359 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2360 + nla_total_size(16) /* RTA_SRC */
2361 + nla_total_size(16) /* RTA_DST */
2362 + nla_total_size(16) /* RTA_GATEWAY */
2363 + nla_total_size(16) /* RTA_PREFSRC */
2364 + nla_total_size(4) /* RTA_TABLE */
2365 + nla_total_size(4) /* RTA_IIF */
2366 + nla_total_size(4) /* RTA_OIF */
2367 + nla_total_size(4) /* RTA_PRIORITY */
2368 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2369 + nla_total_size(sizeof(struct rta_cacheinfo
));
2372 static int rt6_fill_node(struct net
*net
,
2373 struct sk_buff
*skb
, struct rt6_info
*rt
,
2374 struct in6_addr
*dst
, struct in6_addr
*src
,
2375 int iif
, int type
, u32 pid
, u32 seq
,
2376 int prefix
, int nowait
, unsigned int flags
)
2379 struct nlmsghdr
*nlh
;
2382 struct neighbour
*n
;
2384 if (prefix
) { /* user wants prefix routes only */
2385 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2386 /* success since this is not a prefix route */
2391 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2395 rtm
= nlmsg_data(nlh
);
2396 rtm
->rtm_family
= AF_INET6
;
2397 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2398 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2401 table
= rt
->rt6i_table
->tb6_id
;
2403 table
= RT6_TABLE_UNSPEC
;
2404 rtm
->rtm_table
= table
;
2405 NLA_PUT_U32(skb
, RTA_TABLE
, table
);
2406 if (rt
->rt6i_flags
& RTF_REJECT
)
2407 rtm
->rtm_type
= RTN_UNREACHABLE
;
2408 else if (rt
->rt6i_flags
& RTF_LOCAL
)
2409 rtm
->rtm_type
= RTN_LOCAL
;
2410 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
& IFF_LOOPBACK
))
2411 rtm
->rtm_type
= RTN_LOCAL
;
2413 rtm
->rtm_type
= RTN_UNICAST
;
2415 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2416 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2417 if (rt
->rt6i_flags
& RTF_DYNAMIC
)
2418 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2419 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2420 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2421 else if (rt
->rt6i_flags
& RTF_DEFAULT
)
2422 rtm
->rtm_protocol
= RTPROT_RA
;
2424 if (rt
->rt6i_flags
& RTF_CACHE
)
2425 rtm
->rtm_flags
|= RTM_F_CLONED
;
2428 NLA_PUT(skb
, RTA_DST
, 16, dst
);
2429 rtm
->rtm_dst_len
= 128;
2430 } else if (rtm
->rtm_dst_len
)
2431 NLA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
2432 #ifdef CONFIG_IPV6_SUBTREES
2434 NLA_PUT(skb
, RTA_SRC
, 16, src
);
2435 rtm
->rtm_src_len
= 128;
2436 } else if (rtm
->rtm_src_len
)
2437 NLA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
2440 #ifdef CONFIG_IPV6_MROUTE
2441 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2442 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
2447 goto nla_put_failure
;
2449 if (err
== -EMSGSIZE
)
2450 goto nla_put_failure
;
2455 NLA_PUT_U32(skb
, RTA_IIF
, iif
);
2457 struct in6_addr saddr_buf
;
2458 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0)
2459 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2462 if (rt
->rt6i_prefsrc
.plen
) {
2463 struct in6_addr saddr_buf
;
2464 saddr_buf
= rt
->rt6i_prefsrc
.addr
;
2465 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2468 if (rtnetlink_put_metrics(skb
, dst_metrics_ptr(&rt
->dst
)) < 0)
2469 goto nla_put_failure
;
2472 n
= dst_get_neighbour_noref(&rt
->dst
);
2474 NLA_PUT(skb
, RTA_GATEWAY
, 16, &n
->primary_key
);
2478 NLA_PUT_U32(skb
, RTA_OIF
, rt
->rt6i_dev
->ifindex
);
2480 NLA_PUT_U32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
);
2482 if (!(rt
->rt6i_flags
& RTF_EXPIRES
))
2484 else if (rt
->rt6i_expires
- jiffies
< INT_MAX
)
2485 expires
= rt
->rt6i_expires
- jiffies
;
2489 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, 0, 0,
2490 expires
, rt
->dst
.error
) < 0)
2491 goto nla_put_failure
;
2493 return nlmsg_end(skb
, nlh
);
2496 nlmsg_cancel(skb
, nlh
);
2500 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2502 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2505 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2506 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2507 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2511 return rt6_fill_node(arg
->net
,
2512 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2513 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2514 prefix
, 0, NLM_F_MULTI
);
2517 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2519 struct net
*net
= sock_net(in_skb
->sk
);
2520 struct nlattr
*tb
[RTA_MAX
+1];
2521 struct rt6_info
*rt
;
2522 struct sk_buff
*skb
;
2527 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2532 memset(&fl6
, 0, sizeof(fl6
));
2535 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2538 fl6
.saddr
= *(struct in6_addr
*)nla_data(tb
[RTA_SRC
]);
2542 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2545 fl6
.daddr
= *(struct in6_addr
*)nla_data(tb
[RTA_DST
]);
2549 iif
= nla_get_u32(tb
[RTA_IIF
]);
2552 fl6
.flowi6_oif
= nla_get_u32(tb
[RTA_OIF
]);
2555 struct net_device
*dev
;
2556 dev
= __dev_get_by_index(net
, iif
);
2563 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2569 /* Reserve room for dummy headers, this skb can pass
2570 through good chunk of routing engine.
2572 skb_reset_mac_header(skb
);
2573 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2575 rt
= (struct rt6_info
*) ip6_route_output(net
, NULL
, &fl6
);
2576 skb_dst_set(skb
, &rt
->dst
);
2578 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
2579 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2580 nlh
->nlmsg_seq
, 0, 0, 0);
2586 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).pid
);
2591 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2593 struct sk_buff
*skb
;
2594 struct net
*net
= info
->nl_net
;
2599 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
2601 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2605 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
2606 event
, info
->pid
, seq
, 0, 0, 0);
2608 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2609 WARN_ON(err
== -EMSGSIZE
);
2613 rtnl_notify(skb
, net
, info
->pid
, RTNLGRP_IPV6_ROUTE
,
2614 info
->nlh
, gfp_any());
2618 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
2621 static int ip6_route_dev_notify(struct notifier_block
*this,
2622 unsigned long event
, void *data
)
2624 struct net_device
*dev
= (struct net_device
*)data
;
2625 struct net
*net
= dev_net(dev
);
2627 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
2628 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
2629 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
2630 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2631 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
2632 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
2633 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
2634 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
2645 #ifdef CONFIG_PROC_FS
2656 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2658 struct seq_file
*m
= p_arg
;
2659 struct neighbour
*n
;
2661 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
2663 #ifdef CONFIG_IPV6_SUBTREES
2664 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
2666 seq_puts(m
, "00000000000000000000000000000000 00 ");
2669 n
= dst_get_neighbour_noref(&rt
->dst
);
2671 seq_printf(m
, "%pi6", n
->primary_key
);
2673 seq_puts(m
, "00000000000000000000000000000000");
2676 seq_printf(m
, " %08x %08x %08x %08x %8s\n",
2677 rt
->rt6i_metric
, atomic_read(&rt
->dst
.__refcnt
),
2678 rt
->dst
.__use
, rt
->rt6i_flags
,
2679 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
2683 static int ipv6_route_show(struct seq_file
*m
, void *v
)
2685 struct net
*net
= (struct net
*)m
->private;
2686 fib6_clean_all(net
, rt6_info_route
, 0, m
);
2690 static int ipv6_route_open(struct inode
*inode
, struct file
*file
)
2692 return single_open_net(inode
, file
, ipv6_route_show
);
2695 static const struct file_operations ipv6_route_proc_fops
= {
2696 .owner
= THIS_MODULE
,
2697 .open
= ipv6_route_open
,
2699 .llseek
= seq_lseek
,
2700 .release
= single_release_net
,
2703 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2705 struct net
*net
= (struct net
*)seq
->private;
2706 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2707 net
->ipv6
.rt6_stats
->fib_nodes
,
2708 net
->ipv6
.rt6_stats
->fib_route_nodes
,
2709 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
2710 net
->ipv6
.rt6_stats
->fib_rt_entries
,
2711 net
->ipv6
.rt6_stats
->fib_rt_cache
,
2712 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
2713 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
2718 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2720 return single_open_net(inode
, file
, rt6_stats_seq_show
);
2723 static const struct file_operations rt6_stats_seq_fops
= {
2724 .owner
= THIS_MODULE
,
2725 .open
= rt6_stats_seq_open
,
2727 .llseek
= seq_lseek
,
2728 .release
= single_release_net
,
2730 #endif /* CONFIG_PROC_FS */
2732 #ifdef CONFIG_SYSCTL
2735 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
,
2736 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2743 net
= (struct net
*)ctl
->extra1
;
2744 delay
= net
->ipv6
.sysctl
.flush_delay
;
2745 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
2746 fib6_run_gc(delay
<= 0 ? ~0UL : (unsigned long)delay
, net
);
2750 ctl_table ipv6_route_table_template
[] = {
2752 .procname
= "flush",
2753 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
2754 .maxlen
= sizeof(int),
2756 .proc_handler
= ipv6_sysctl_rtcache_flush
2759 .procname
= "gc_thresh",
2760 .data
= &ip6_dst_ops_template
.gc_thresh
,
2761 .maxlen
= sizeof(int),
2763 .proc_handler
= proc_dointvec
,
2766 .procname
= "max_size",
2767 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
2768 .maxlen
= sizeof(int),
2770 .proc_handler
= proc_dointvec
,
2773 .procname
= "gc_min_interval",
2774 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2775 .maxlen
= sizeof(int),
2777 .proc_handler
= proc_dointvec_jiffies
,
2780 .procname
= "gc_timeout",
2781 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
2782 .maxlen
= sizeof(int),
2784 .proc_handler
= proc_dointvec_jiffies
,
2787 .procname
= "gc_interval",
2788 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
2789 .maxlen
= sizeof(int),
2791 .proc_handler
= proc_dointvec_jiffies
,
2794 .procname
= "gc_elasticity",
2795 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
2796 .maxlen
= sizeof(int),
2798 .proc_handler
= proc_dointvec
,
2801 .procname
= "mtu_expires",
2802 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
2803 .maxlen
= sizeof(int),
2805 .proc_handler
= proc_dointvec_jiffies
,
2808 .procname
= "min_adv_mss",
2809 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
2810 .maxlen
= sizeof(int),
2812 .proc_handler
= proc_dointvec
,
2815 .procname
= "gc_min_interval_ms",
2816 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2817 .maxlen
= sizeof(int),
2819 .proc_handler
= proc_dointvec_ms_jiffies
,
2824 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
2826 struct ctl_table
*table
;
2828 table
= kmemdup(ipv6_route_table_template
,
2829 sizeof(ipv6_route_table_template
),
2833 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
2834 table
[0].extra1
= net
;
2835 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
2836 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
2837 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2838 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
2839 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
2840 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
2841 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
2842 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
2843 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2850 static int __net_init
ip6_route_net_init(struct net
*net
)
2854 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
2855 sizeof(net
->ipv6
.ip6_dst_ops
));
2857 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
2858 goto out_ip6_dst_ops
;
2860 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
2861 sizeof(*net
->ipv6
.ip6_null_entry
),
2863 if (!net
->ipv6
.ip6_null_entry
)
2864 goto out_ip6_dst_entries
;
2865 net
->ipv6
.ip6_null_entry
->dst
.path
=
2866 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
2867 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2868 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
2869 ip6_template_metrics
, true);
2871 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2872 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
2873 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
2875 if (!net
->ipv6
.ip6_prohibit_entry
)
2876 goto out_ip6_null_entry
;
2877 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
2878 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
2879 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2880 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
2881 ip6_template_metrics
, true);
2883 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
2884 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
2886 if (!net
->ipv6
.ip6_blk_hole_entry
)
2887 goto out_ip6_prohibit_entry
;
2888 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
2889 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
2890 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2891 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
2892 ip6_template_metrics
, true);
2895 net
->ipv6
.sysctl
.flush_delay
= 0;
2896 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
2897 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
2898 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
2899 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
2900 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
2901 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
2902 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
2904 #ifdef CONFIG_PROC_FS
2905 proc_net_fops_create(net
, "ipv6_route", 0, &ipv6_route_proc_fops
);
2906 proc_net_fops_create(net
, "rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2908 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
2914 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2915 out_ip6_prohibit_entry
:
2916 kfree(net
->ipv6
.ip6_prohibit_entry
);
2918 kfree(net
->ipv6
.ip6_null_entry
);
2920 out_ip6_dst_entries
:
2921 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2926 static void __net_exit
ip6_route_net_exit(struct net
*net
)
2928 #ifdef CONFIG_PROC_FS
2929 proc_net_remove(net
, "ipv6_route");
2930 proc_net_remove(net
, "rt6_stats");
2932 kfree(net
->ipv6
.ip6_null_entry
);
2933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2934 kfree(net
->ipv6
.ip6_prohibit_entry
);
2935 kfree(net
->ipv6
.ip6_blk_hole_entry
);
2937 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2940 static struct pernet_operations ip6_route_net_ops
= {
2941 .init
= ip6_route_net_init
,
2942 .exit
= ip6_route_net_exit
,
2945 static struct notifier_block ip6_route_dev_notifier
= {
2946 .notifier_call
= ip6_route_dev_notify
,
2950 int __init
ip6_route_init(void)
2955 ip6_dst_ops_template
.kmem_cachep
=
2956 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
2957 SLAB_HWCACHE_ALIGN
, NULL
);
2958 if (!ip6_dst_ops_template
.kmem_cachep
)
2961 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
2963 goto out_kmem_cache
;
2965 ret
= register_pernet_subsys(&ip6_route_net_ops
);
2967 goto out_dst_entries
;
2969 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
2971 /* Registering of the loopback is done before this portion of code,
2972 * the loopback reference in rt6_info will not be taken, do it
2973 * manually for init_net */
2974 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
2975 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2976 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2977 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
2978 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2979 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
2980 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2984 goto out_register_subsys
;
2990 ret
= fib6_rules_init();
2995 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
2996 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
2997 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
2998 goto fib6_rules_init
;
3000 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
3002 goto fib6_rules_init
;
3008 fib6_rules_cleanup();
3013 out_register_subsys
:
3014 unregister_pernet_subsys(&ip6_route_net_ops
);
3016 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3018 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
3022 void ip6_route_cleanup(void)
3024 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
3025 fib6_rules_cleanup();
3028 unregister_pernet_subsys(&ip6_route_net_ops
);
3029 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3030 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);