Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6/cjktty.git] / net / ipv6 / route.c
blob19a77d0e03080cd3c185a4a7593fd5fefda2896e
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
127 return p;
130 static struct dst_ops ip6_dst_ops_template = {
131 .family = AF_INET6,
132 .protocol = cpu_to_be16(ETH_P_IPV6),
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
136 .default_advmss = ip6_default_advmss,
137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
144 .local_out = __ip6_local_out,
147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
149 return 0;
152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
156 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 unsigned long old)
159 return NULL;
162 static struct dst_ops ip6_dst_blackhole_ops = {
163 .family = AF_INET6,
164 .protocol = cpu_to_be16(ETH_P_IPV6),
165 .destroy = ip6_dst_destroy,
166 .check = ip6_dst_check,
167 .default_mtu = ip6_blackhole_default_mtu,
168 .default_advmss = ip6_default_advmss,
169 .update_pmtu = ip6_rt_blackhole_update_pmtu,
170 .cow_metrics = ip6_rt_blackhole_cow_metrics,
173 static const u32 ip6_template_metrics[RTAX_MAX] = {
174 [RTAX_HOPLIMIT - 1] = 255,
177 static struct rt6_info ip6_null_entry_template = {
178 .dst = {
179 .__refcnt = ATOMIC_INIT(1),
180 .__use = 1,
181 .obsolete = -1,
182 .error = -ENETUNREACH,
183 .input = ip6_pkt_discard,
184 .output = ip6_pkt_discard_out,
186 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
187 .rt6i_protocol = RTPROT_KERNEL,
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
192 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
194 static int ip6_pkt_prohibit(struct sk_buff *skb);
195 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
197 static struct rt6_info ip6_prohibit_entry_template = {
198 .dst = {
199 .__refcnt = ATOMIC_INIT(1),
200 .__use = 1,
201 .obsolete = -1,
202 .error = -EACCES,
203 .input = ip6_pkt_prohibit,
204 .output = ip6_pkt_prohibit_out,
206 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
207 .rt6i_protocol = RTPROT_KERNEL,
208 .rt6i_metric = ~(u32) 0,
209 .rt6i_ref = ATOMIC_INIT(1),
212 static struct rt6_info ip6_blk_hole_entry_template = {
213 .dst = {
214 .__refcnt = ATOMIC_INIT(1),
215 .__use = 1,
216 .obsolete = -1,
217 .error = -EINVAL,
218 .input = dst_discard,
219 .output = dst_discard,
221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
222 .rt6i_protocol = RTPROT_KERNEL,
223 .rt6i_metric = ~(u32) 0,
224 .rt6i_ref = ATOMIC_INIT(1),
227 #endif
229 /* allocate dst with ip6_dst_ops */
230 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
232 return (struct rt6_info *)dst_alloc(ops, 0);
235 static void ip6_dst_destroy(struct dst_entry *dst)
237 struct rt6_info *rt = (struct rt6_info *)dst;
238 struct inet6_dev *idev = rt->rt6i_idev;
239 struct inet_peer *peer = rt->rt6i_peer;
241 if (idev != NULL) {
242 rt->rt6i_idev = NULL;
243 in6_dev_put(idev);
245 if (peer) {
246 rt->rt6i_peer = NULL;
247 inet_putpeer(peer);
251 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
253 static u32 rt6_peer_genid(void)
255 return atomic_read(&__rt6_peer_genid);
258 void rt6_bind_peer(struct rt6_info *rt, int create)
260 struct inet_peer *peer;
262 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
263 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
264 inet_putpeer(peer);
265 else
266 rt->rt6i_peer_genid = rt6_peer_genid();
269 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
270 int how)
272 struct rt6_info *rt = (struct rt6_info *)dst;
273 struct inet6_dev *idev = rt->rt6i_idev;
274 struct net_device *loopback_dev =
275 dev_net(dev)->loopback_dev;
277 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
278 struct inet6_dev *loopback_idev =
279 in6_dev_get(loopback_dev);
280 if (loopback_idev != NULL) {
281 rt->rt6i_idev = loopback_idev;
282 in6_dev_put(idev);
287 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
289 return (rt->rt6i_flags & RTF_EXPIRES) &&
290 time_after(jiffies, rt->rt6i_expires);
293 static inline int rt6_need_strict(const struct in6_addr *daddr)
295 return ipv6_addr_type(daddr) &
296 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
300 * Route lookup. Any table->tb6_lock is implied.
303 static inline struct rt6_info *rt6_device_match(struct net *net,
304 struct rt6_info *rt,
305 const struct in6_addr *saddr,
306 int oif,
307 int flags)
309 struct rt6_info *local = NULL;
310 struct rt6_info *sprt;
312 if (!oif && ipv6_addr_any(saddr))
313 goto out;
315 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
316 struct net_device *dev = sprt->rt6i_dev;
318 if (oif) {
319 if (dev->ifindex == oif)
320 return sprt;
321 if (dev->flags & IFF_LOOPBACK) {
322 if (sprt->rt6i_idev == NULL ||
323 sprt->rt6i_idev->dev->ifindex != oif) {
324 if (flags & RT6_LOOKUP_F_IFACE && oif)
325 continue;
326 if (local && (!oif ||
327 local->rt6i_idev->dev->ifindex == oif))
328 continue;
330 local = sprt;
332 } else {
333 if (ipv6_chk_addr(net, saddr, dev,
334 flags & RT6_LOOKUP_F_IFACE))
335 return sprt;
339 if (oif) {
340 if (local)
341 return local;
343 if (flags & RT6_LOOKUP_F_IFACE)
344 return net->ipv6.ip6_null_entry;
346 out:
347 return rt;
350 #ifdef CONFIG_IPV6_ROUTER_PREF
351 static void rt6_probe(struct rt6_info *rt)
353 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
355 * Okay, this does not seem to be appropriate
356 * for now, however, we need to check if it
357 * is really so; aka Router Reachability Probing.
359 * Router Reachability Probe MUST be rate-limited
360 * to no more than one per minute.
362 if (!neigh || (neigh->nud_state & NUD_VALID))
363 return;
364 read_lock_bh(&neigh->lock);
365 if (!(neigh->nud_state & NUD_VALID) &&
366 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
367 struct in6_addr mcaddr;
368 struct in6_addr *target;
370 neigh->updated = jiffies;
371 read_unlock_bh(&neigh->lock);
373 target = (struct in6_addr *)&neigh->primary_key;
374 addrconf_addr_solict_mult(target, &mcaddr);
375 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
376 } else
377 read_unlock_bh(&neigh->lock);
379 #else
380 static inline void rt6_probe(struct rt6_info *rt)
383 #endif
386 * Default Router Selection (RFC 2461 6.3.6)
388 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
390 struct net_device *dev = rt->rt6i_dev;
391 if (!oif || dev->ifindex == oif)
392 return 2;
393 if ((dev->flags & IFF_LOOPBACK) &&
394 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
395 return 1;
396 return 0;
399 static inline int rt6_check_neigh(struct rt6_info *rt)
401 struct neighbour *neigh = rt->rt6i_nexthop;
402 int m;
403 if (rt->rt6i_flags & RTF_NONEXTHOP ||
404 !(rt->rt6i_flags & RTF_GATEWAY))
405 m = 1;
406 else if (neigh) {
407 read_lock_bh(&neigh->lock);
408 if (neigh->nud_state & NUD_VALID)
409 m = 2;
410 #ifdef CONFIG_IPV6_ROUTER_PREF
411 else if (neigh->nud_state & NUD_FAILED)
412 m = 0;
413 #endif
414 else
415 m = 1;
416 read_unlock_bh(&neigh->lock);
417 } else
418 m = 0;
419 return m;
422 static int rt6_score_route(struct rt6_info *rt, int oif,
423 int strict)
425 int m, n;
427 m = rt6_check_dev(rt, oif);
428 if (!m && (strict & RT6_LOOKUP_F_IFACE))
429 return -1;
430 #ifdef CONFIG_IPV6_ROUTER_PREF
431 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
432 #endif
433 n = rt6_check_neigh(rt);
434 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
435 return -1;
436 return m;
439 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
440 int *mpri, struct rt6_info *match)
442 int m;
444 if (rt6_check_expired(rt))
445 goto out;
447 m = rt6_score_route(rt, oif, strict);
448 if (m < 0)
449 goto out;
451 if (m > *mpri) {
452 if (strict & RT6_LOOKUP_F_REACHABLE)
453 rt6_probe(match);
454 *mpri = m;
455 match = rt;
456 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
457 rt6_probe(rt);
460 out:
461 return match;
464 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
465 struct rt6_info *rr_head,
466 u32 metric, int oif, int strict)
468 struct rt6_info *rt, *match;
469 int mpri = -1;
471 match = NULL;
472 for (rt = rr_head; rt && rt->rt6i_metric == metric;
473 rt = rt->dst.rt6_next)
474 match = find_match(rt, oif, strict, &mpri, match);
475 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
476 rt = rt->dst.rt6_next)
477 match = find_match(rt, oif, strict, &mpri, match);
479 return match;
482 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
484 struct rt6_info *match, *rt0;
485 struct net *net;
487 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
488 __func__, fn->leaf, oif);
490 rt0 = fn->rr_ptr;
491 if (!rt0)
492 fn->rr_ptr = rt0 = fn->leaf;
494 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
496 if (!match &&
497 (strict & RT6_LOOKUP_F_REACHABLE)) {
498 struct rt6_info *next = rt0->dst.rt6_next;
500 /* no entries matched; do round-robin */
501 if (!next || next->rt6i_metric != rt0->rt6i_metric)
502 next = fn->leaf;
504 if (next != rt0)
505 fn->rr_ptr = next;
508 RT6_TRACE("%s() => %p\n",
509 __func__, match);
511 net = dev_net(rt0->rt6i_dev);
512 return match ? match : net->ipv6.ip6_null_entry;
515 #ifdef CONFIG_IPV6_ROUTE_INFO
516 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
517 const struct in6_addr *gwaddr)
519 struct net *net = dev_net(dev);
520 struct route_info *rinfo = (struct route_info *) opt;
521 struct in6_addr prefix_buf, *prefix;
522 unsigned int pref;
523 unsigned long lifetime;
524 struct rt6_info *rt;
526 if (len < sizeof(struct route_info)) {
527 return -EINVAL;
530 /* Sanity check for prefix_len and length */
531 if (rinfo->length > 3) {
532 return -EINVAL;
533 } else if (rinfo->prefix_len > 128) {
534 return -EINVAL;
535 } else if (rinfo->prefix_len > 64) {
536 if (rinfo->length < 2) {
537 return -EINVAL;
539 } else if (rinfo->prefix_len > 0) {
540 if (rinfo->length < 1) {
541 return -EINVAL;
545 pref = rinfo->route_pref;
546 if (pref == ICMPV6_ROUTER_PREF_INVALID)
547 return -EINVAL;
549 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
551 if (rinfo->length == 3)
552 prefix = (struct in6_addr *)rinfo->prefix;
553 else {
554 /* this function is safe */
555 ipv6_addr_prefix(&prefix_buf,
556 (struct in6_addr *)rinfo->prefix,
557 rinfo->prefix_len);
558 prefix = &prefix_buf;
561 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
562 dev->ifindex);
564 if (rt && !lifetime) {
565 ip6_del_rt(rt);
566 rt = NULL;
569 if (!rt && lifetime)
570 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
571 pref);
572 else if (rt)
573 rt->rt6i_flags = RTF_ROUTEINFO |
574 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
576 if (rt) {
577 if (!addrconf_finite_timeout(lifetime)) {
578 rt->rt6i_flags &= ~RTF_EXPIRES;
579 } else {
580 rt->rt6i_expires = jiffies + HZ * lifetime;
581 rt->rt6i_flags |= RTF_EXPIRES;
583 dst_release(&rt->dst);
585 return 0;
587 #endif
589 #define BACKTRACK(__net, saddr) \
590 do { \
591 if (rt == __net->ipv6.ip6_null_entry) { \
592 struct fib6_node *pn; \
593 while (1) { \
594 if (fn->fn_flags & RTN_TL_ROOT) \
595 goto out; \
596 pn = fn->parent; \
597 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
598 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
599 else \
600 fn = pn; \
601 if (fn->fn_flags & RTN_RTINFO) \
602 goto restart; \
605 } while(0)
607 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
608 struct fib6_table *table,
609 struct flowi6 *fl6, int flags)
611 struct fib6_node *fn;
612 struct rt6_info *rt;
614 read_lock_bh(&table->tb6_lock);
615 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
616 restart:
617 rt = fn->leaf;
618 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
619 BACKTRACK(net, &fl6->saddr);
620 out:
621 dst_use(&rt->dst, jiffies);
622 read_unlock_bh(&table->tb6_lock);
623 return rt;
627 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
628 const struct in6_addr *saddr, int oif, int strict)
630 struct flowi6 fl6 = {
631 .flowi6_oif = oif,
632 .daddr = *daddr,
634 struct dst_entry *dst;
635 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
637 if (saddr) {
638 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
639 flags |= RT6_LOOKUP_F_HAS_SADDR;
642 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
643 if (dst->error == 0)
644 return (struct rt6_info *) dst;
646 dst_release(dst);
648 return NULL;
651 EXPORT_SYMBOL(rt6_lookup);
653 /* ip6_ins_rt is called with FREE table->tb6_lock.
654 It takes new route entry, the addition fails by any reason the
655 route is freed. In any case, if caller does not hold it, it may
656 be destroyed.
659 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
661 int err;
662 struct fib6_table *table;
664 table = rt->rt6i_table;
665 write_lock_bh(&table->tb6_lock);
666 err = fib6_add(&table->tb6_root, rt, info);
667 write_unlock_bh(&table->tb6_lock);
669 return err;
672 int ip6_ins_rt(struct rt6_info *rt)
674 struct nl_info info = {
675 .nl_net = dev_net(rt->rt6i_dev),
677 return __ip6_ins_rt(rt, &info);
680 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
681 const struct in6_addr *saddr)
683 struct rt6_info *rt;
686 * Clone the route.
689 rt = ip6_rt_copy(ort);
691 if (rt) {
692 struct neighbour *neigh;
693 int attempts = !in_softirq();
695 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
696 if (rt->rt6i_dst.plen != 128 &&
697 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
698 rt->rt6i_flags |= RTF_ANYCAST;
699 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
702 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
703 rt->rt6i_dst.plen = 128;
704 rt->rt6i_flags |= RTF_CACHE;
705 rt->dst.flags |= DST_HOST;
707 #ifdef CONFIG_IPV6_SUBTREES
708 if (rt->rt6i_src.plen && saddr) {
709 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
710 rt->rt6i_src.plen = 128;
712 #endif
714 retry:
715 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
716 if (IS_ERR(neigh)) {
717 struct net *net = dev_net(rt->rt6i_dev);
718 int saved_rt_min_interval =
719 net->ipv6.sysctl.ip6_rt_gc_min_interval;
720 int saved_rt_elasticity =
721 net->ipv6.sysctl.ip6_rt_gc_elasticity;
723 if (attempts-- > 0) {
724 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
725 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
727 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
729 net->ipv6.sysctl.ip6_rt_gc_elasticity =
730 saved_rt_elasticity;
731 net->ipv6.sysctl.ip6_rt_gc_min_interval =
732 saved_rt_min_interval;
733 goto retry;
736 if (net_ratelimit())
737 printk(KERN_WARNING
738 "ipv6: Neighbour table overflow.\n");
739 dst_free(&rt->dst);
740 return NULL;
742 rt->rt6i_nexthop = neigh;
746 return rt;
749 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
751 struct rt6_info *rt = ip6_rt_copy(ort);
752 if (rt) {
753 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
754 rt->rt6i_dst.plen = 128;
755 rt->rt6i_flags |= RTF_CACHE;
756 rt->dst.flags |= DST_HOST;
757 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
759 return rt;
762 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
763 struct flowi6 *fl6, int flags)
765 struct fib6_node *fn;
766 struct rt6_info *rt, *nrt;
767 int strict = 0;
768 int attempts = 3;
769 int err;
770 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
772 strict |= flags & RT6_LOOKUP_F_IFACE;
774 relookup:
775 read_lock_bh(&table->tb6_lock);
777 restart_2:
778 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
780 restart:
781 rt = rt6_select(fn, oif, strict | reachable);
783 BACKTRACK(net, &fl6->saddr);
784 if (rt == net->ipv6.ip6_null_entry ||
785 rt->rt6i_flags & RTF_CACHE)
786 goto out;
788 dst_hold(&rt->dst);
789 read_unlock_bh(&table->tb6_lock);
791 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
792 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
793 else if (!(rt->dst.flags & DST_HOST))
794 nrt = rt6_alloc_clone(rt, &fl6->daddr);
795 else
796 goto out2;
798 dst_release(&rt->dst);
799 rt = nrt ? : net->ipv6.ip6_null_entry;
801 dst_hold(&rt->dst);
802 if (nrt) {
803 err = ip6_ins_rt(nrt);
804 if (!err)
805 goto out2;
808 if (--attempts <= 0)
809 goto out2;
812 * Race condition! In the gap, when table->tb6_lock was
813 * released someone could insert this route. Relookup.
815 dst_release(&rt->dst);
816 goto relookup;
818 out:
819 if (reachable) {
820 reachable = 0;
821 goto restart_2;
823 dst_hold(&rt->dst);
824 read_unlock_bh(&table->tb6_lock);
825 out2:
826 rt->dst.lastuse = jiffies;
827 rt->dst.__use++;
829 return rt;
832 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
833 struct flowi6 *fl6, int flags)
835 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
838 void ip6_route_input(struct sk_buff *skb)
840 const struct ipv6hdr *iph = ipv6_hdr(skb);
841 struct net *net = dev_net(skb->dev);
842 int flags = RT6_LOOKUP_F_HAS_SADDR;
843 struct flowi6 fl6 = {
844 .flowi6_iif = skb->dev->ifindex,
845 .daddr = iph->daddr,
846 .saddr = iph->saddr,
847 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
848 .flowi6_mark = skb->mark,
849 .flowi6_proto = iph->nexthdr,
852 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
853 flags |= RT6_LOOKUP_F_IFACE;
855 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
858 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
859 struct flowi6 *fl6, int flags)
861 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
864 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
865 struct flowi6 *fl6)
867 int flags = 0;
869 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
870 flags |= RT6_LOOKUP_F_IFACE;
872 if (!ipv6_addr_any(&fl6->saddr))
873 flags |= RT6_LOOKUP_F_HAS_SADDR;
874 else if (sk)
875 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
877 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
880 EXPORT_SYMBOL(ip6_route_output);
882 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
884 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
885 struct rt6_info *ort = (struct rt6_info *) dst_orig;
886 struct dst_entry *new = NULL;
888 if (rt) {
889 new = &rt->dst;
891 new->__use = 1;
892 new->input = dst_discard;
893 new->output = dst_discard;
895 dst_copy_metrics(new, &ort->dst);
896 new->dev = ort->dst.dev;
897 if (new->dev)
898 dev_hold(new->dev);
899 rt->rt6i_idev = ort->rt6i_idev;
900 if (rt->rt6i_idev)
901 in6_dev_hold(rt->rt6i_idev);
902 rt->rt6i_expires = 0;
904 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
905 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
906 rt->rt6i_metric = 0;
908 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
909 #ifdef CONFIG_IPV6_SUBTREES
910 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
911 #endif
913 dst_free(new);
916 dst_release(dst_orig);
917 return new ? new : ERR_PTR(-ENOMEM);
921 * Destination cache support functions
924 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
926 struct rt6_info *rt;
928 rt = (struct rt6_info *) dst;
930 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
931 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
932 if (!rt->rt6i_peer)
933 rt6_bind_peer(rt, 0);
934 rt->rt6i_peer_genid = rt6_peer_genid();
936 return dst;
938 return NULL;
941 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
943 struct rt6_info *rt = (struct rt6_info *) dst;
945 if (rt) {
946 if (rt->rt6i_flags & RTF_CACHE) {
947 if (rt6_check_expired(rt)) {
948 ip6_del_rt(rt);
949 dst = NULL;
951 } else {
952 dst_release(dst);
953 dst = NULL;
956 return dst;
959 static void ip6_link_failure(struct sk_buff *skb)
961 struct rt6_info *rt;
963 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
965 rt = (struct rt6_info *) skb_dst(skb);
966 if (rt) {
967 if (rt->rt6i_flags&RTF_CACHE) {
968 dst_set_expires(&rt->dst, 0);
969 rt->rt6i_flags |= RTF_EXPIRES;
970 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
971 rt->rt6i_node->fn_sernum = -1;
975 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
977 struct rt6_info *rt6 = (struct rt6_info*)dst;
979 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
980 rt6->rt6i_flags |= RTF_MODIFIED;
981 if (mtu < IPV6_MIN_MTU) {
982 u32 features = dst_metric(dst, RTAX_FEATURES);
983 mtu = IPV6_MIN_MTU;
984 features |= RTAX_FEATURE_ALLFRAG;
985 dst_metric_set(dst, RTAX_FEATURES, features);
987 dst_metric_set(dst, RTAX_MTU, mtu);
991 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
993 struct net_device *dev = dst->dev;
994 unsigned int mtu = dst_mtu(dst);
995 struct net *net = dev_net(dev);
997 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
999 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1000 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1003 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1004 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1005 * IPV6_MAXPLEN is also valid and means: "any MSS,
1006 * rely only on pmtu discovery"
1008 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1009 mtu = IPV6_MAXPLEN;
1010 return mtu;
1013 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1015 unsigned int mtu = IPV6_MIN_MTU;
1016 struct inet6_dev *idev;
1018 rcu_read_lock();
1019 idev = __in6_dev_get(dst->dev);
1020 if (idev)
1021 mtu = idev->cnf.mtu6;
1022 rcu_read_unlock();
1024 return mtu;
1027 static struct dst_entry *icmp6_dst_gc_list;
1028 static DEFINE_SPINLOCK(icmp6_dst_lock);
1030 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1031 struct neighbour *neigh,
1032 const struct in6_addr *addr)
1034 struct rt6_info *rt;
1035 struct inet6_dev *idev = in6_dev_get(dev);
1036 struct net *net = dev_net(dev);
1038 if (unlikely(idev == NULL))
1039 return NULL;
1041 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1042 if (unlikely(rt == NULL)) {
1043 in6_dev_put(idev);
1044 goto out;
1047 dev_hold(dev);
1048 if (neigh)
1049 neigh_hold(neigh);
1050 else {
1051 neigh = ndisc_get_neigh(dev, addr);
1052 if (IS_ERR(neigh))
1053 neigh = NULL;
1056 rt->rt6i_dev = dev;
1057 rt->rt6i_idev = idev;
1058 rt->rt6i_nexthop = neigh;
1059 atomic_set(&rt->dst.__refcnt, 1);
1060 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1061 rt->dst.output = ip6_output;
1063 #if 0 /* there's no chance to use these for ndisc */
1064 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1065 ? DST_HOST
1066 : 0;
1067 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1068 rt->rt6i_dst.plen = 128;
1069 #endif
1071 spin_lock_bh(&icmp6_dst_lock);
1072 rt->dst.next = icmp6_dst_gc_list;
1073 icmp6_dst_gc_list = &rt->dst;
1074 spin_unlock_bh(&icmp6_dst_lock);
1076 fib6_force_start_gc(net);
1078 out:
1079 return &rt->dst;
1082 int icmp6_dst_gc(void)
1084 struct dst_entry *dst, **pprev;
1085 int more = 0;
1087 spin_lock_bh(&icmp6_dst_lock);
1088 pprev = &icmp6_dst_gc_list;
1090 while ((dst = *pprev) != NULL) {
1091 if (!atomic_read(&dst->__refcnt)) {
1092 *pprev = dst->next;
1093 dst_free(dst);
1094 } else {
1095 pprev = &dst->next;
1096 ++more;
1100 spin_unlock_bh(&icmp6_dst_lock);
1102 return more;
1105 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1106 void *arg)
1108 struct dst_entry *dst, **pprev;
1110 spin_lock_bh(&icmp6_dst_lock);
1111 pprev = &icmp6_dst_gc_list;
1112 while ((dst = *pprev) != NULL) {
1113 struct rt6_info *rt = (struct rt6_info *) dst;
1114 if (func(rt, arg)) {
1115 *pprev = dst->next;
1116 dst_free(dst);
1117 } else {
1118 pprev = &dst->next;
1121 spin_unlock_bh(&icmp6_dst_lock);
1124 static int ip6_dst_gc(struct dst_ops *ops)
1126 unsigned long now = jiffies;
1127 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1128 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1129 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1130 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1131 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1132 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1133 int entries;
1135 entries = dst_entries_get_fast(ops);
1136 if (time_after(rt_last_gc + rt_min_interval, now) &&
1137 entries <= rt_max_size)
1138 goto out;
1140 net->ipv6.ip6_rt_gc_expire++;
1141 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1142 net->ipv6.ip6_rt_last_gc = now;
1143 entries = dst_entries_get_slow(ops);
1144 if (entries < ops->gc_thresh)
1145 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1146 out:
1147 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1148 return entries > rt_max_size;
1151 /* Clean host part of a prefix. Not necessary in radix tree,
1152 but results in cleaner routing tables.
1154 Remove it only when all the things will work!
1157 int ip6_dst_hoplimit(struct dst_entry *dst)
1159 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1160 if (hoplimit == 0) {
1161 struct net_device *dev = dst->dev;
1162 struct inet6_dev *idev;
1164 rcu_read_lock();
1165 idev = __in6_dev_get(dev);
1166 if (idev)
1167 hoplimit = idev->cnf.hop_limit;
1168 else
1169 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1170 rcu_read_unlock();
1172 return hoplimit;
1174 EXPORT_SYMBOL(ip6_dst_hoplimit);
1180 int ip6_route_add(struct fib6_config *cfg)
1182 int err;
1183 struct net *net = cfg->fc_nlinfo.nl_net;
1184 struct rt6_info *rt = NULL;
1185 struct net_device *dev = NULL;
1186 struct inet6_dev *idev = NULL;
1187 struct fib6_table *table;
1188 int addr_type;
1190 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1191 return -EINVAL;
1192 #ifndef CONFIG_IPV6_SUBTREES
1193 if (cfg->fc_src_len)
1194 return -EINVAL;
1195 #endif
1196 if (cfg->fc_ifindex) {
1197 err = -ENODEV;
1198 dev = dev_get_by_index(net, cfg->fc_ifindex);
1199 if (!dev)
1200 goto out;
1201 idev = in6_dev_get(dev);
1202 if (!idev)
1203 goto out;
1206 if (cfg->fc_metric == 0)
1207 cfg->fc_metric = IP6_RT_PRIO_USER;
1209 table = fib6_new_table(net, cfg->fc_table);
1210 if (table == NULL) {
1211 err = -ENOBUFS;
1212 goto out;
1215 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1217 if (rt == NULL) {
1218 err = -ENOMEM;
1219 goto out;
1222 rt->dst.obsolete = -1;
1223 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1224 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1227 if (cfg->fc_protocol == RTPROT_UNSPEC)
1228 cfg->fc_protocol = RTPROT_BOOT;
1229 rt->rt6i_protocol = cfg->fc_protocol;
1231 addr_type = ipv6_addr_type(&cfg->fc_dst);
1233 if (addr_type & IPV6_ADDR_MULTICAST)
1234 rt->dst.input = ip6_mc_input;
1235 else if (cfg->fc_flags & RTF_LOCAL)
1236 rt->dst.input = ip6_input;
1237 else
1238 rt->dst.input = ip6_forward;
1240 rt->dst.output = ip6_output;
1242 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1243 rt->rt6i_dst.plen = cfg->fc_dst_len;
1244 if (rt->rt6i_dst.plen == 128)
1245 rt->dst.flags = DST_HOST;
1247 #ifdef CONFIG_IPV6_SUBTREES
1248 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1249 rt->rt6i_src.plen = cfg->fc_src_len;
1250 #endif
1252 rt->rt6i_metric = cfg->fc_metric;
1254 /* We cannot add true routes via loopback here,
1255 they would result in kernel looping; promote them to reject routes
1257 if ((cfg->fc_flags & RTF_REJECT) ||
1258 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1259 && !(cfg->fc_flags&RTF_LOCAL))) {
1260 /* hold loopback dev/idev if we haven't done so. */
1261 if (dev != net->loopback_dev) {
1262 if (dev) {
1263 dev_put(dev);
1264 in6_dev_put(idev);
1266 dev = net->loopback_dev;
1267 dev_hold(dev);
1268 idev = in6_dev_get(dev);
1269 if (!idev) {
1270 err = -ENODEV;
1271 goto out;
1274 rt->dst.output = ip6_pkt_discard_out;
1275 rt->dst.input = ip6_pkt_discard;
1276 rt->dst.error = -ENETUNREACH;
1277 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1278 goto install_route;
1281 if (cfg->fc_flags & RTF_GATEWAY) {
1282 const struct in6_addr *gw_addr;
1283 int gwa_type;
1285 gw_addr = &cfg->fc_gateway;
1286 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1287 gwa_type = ipv6_addr_type(gw_addr);
1289 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1290 struct rt6_info *grt;
1292 /* IPv6 strictly inhibits using not link-local
1293 addresses as nexthop address.
1294 Otherwise, router will not able to send redirects.
1295 It is very good, but in some (rare!) circumstances
1296 (SIT, PtP, NBMA NOARP links) it is handy to allow
1297 some exceptions. --ANK
1299 err = -EINVAL;
1300 if (!(gwa_type&IPV6_ADDR_UNICAST))
1301 goto out;
1303 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1305 err = -EHOSTUNREACH;
1306 if (grt == NULL)
1307 goto out;
1308 if (dev) {
1309 if (dev != grt->rt6i_dev) {
1310 dst_release(&grt->dst);
1311 goto out;
1313 } else {
1314 dev = grt->rt6i_dev;
1315 idev = grt->rt6i_idev;
1316 dev_hold(dev);
1317 in6_dev_hold(grt->rt6i_idev);
1319 if (!(grt->rt6i_flags&RTF_GATEWAY))
1320 err = 0;
1321 dst_release(&grt->dst);
1323 if (err)
1324 goto out;
1326 err = -EINVAL;
1327 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1328 goto out;
1331 err = -ENODEV;
1332 if (dev == NULL)
1333 goto out;
1335 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1336 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1337 err = -EINVAL;
1338 goto out;
1340 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1341 rt->rt6i_prefsrc.plen = 128;
1342 } else
1343 rt->rt6i_prefsrc.plen = 0;
1345 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1346 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1347 if (IS_ERR(rt->rt6i_nexthop)) {
1348 err = PTR_ERR(rt->rt6i_nexthop);
1349 rt->rt6i_nexthop = NULL;
1350 goto out;
1354 rt->rt6i_flags = cfg->fc_flags;
1356 install_route:
1357 if (cfg->fc_mx) {
1358 struct nlattr *nla;
1359 int remaining;
1361 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1362 int type = nla_type(nla);
1364 if (type) {
1365 if (type > RTAX_MAX) {
1366 err = -EINVAL;
1367 goto out;
1370 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1375 rt->dst.dev = dev;
1376 rt->rt6i_idev = idev;
1377 rt->rt6i_table = table;
1379 cfg->fc_nlinfo.nl_net = dev_net(dev);
1381 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1383 out:
1384 if (dev)
1385 dev_put(dev);
1386 if (idev)
1387 in6_dev_put(idev);
1388 if (rt)
1389 dst_free(&rt->dst);
1390 return err;
1393 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1395 int err;
1396 struct fib6_table *table;
1397 struct net *net = dev_net(rt->rt6i_dev);
1399 if (rt == net->ipv6.ip6_null_entry)
1400 return -ENOENT;
1402 table = rt->rt6i_table;
1403 write_lock_bh(&table->tb6_lock);
1405 err = fib6_del(rt, info);
1406 dst_release(&rt->dst);
1408 write_unlock_bh(&table->tb6_lock);
1410 return err;
1413 int ip6_del_rt(struct rt6_info *rt)
1415 struct nl_info info = {
1416 .nl_net = dev_net(rt->rt6i_dev),
1418 return __ip6_del_rt(rt, &info);
1421 static int ip6_route_del(struct fib6_config *cfg)
1423 struct fib6_table *table;
1424 struct fib6_node *fn;
1425 struct rt6_info *rt;
1426 int err = -ESRCH;
1428 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1429 if (table == NULL)
1430 return err;
1432 read_lock_bh(&table->tb6_lock);
1434 fn = fib6_locate(&table->tb6_root,
1435 &cfg->fc_dst, cfg->fc_dst_len,
1436 &cfg->fc_src, cfg->fc_src_len);
1438 if (fn) {
1439 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1440 if (cfg->fc_ifindex &&
1441 (rt->rt6i_dev == NULL ||
1442 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1443 continue;
1444 if (cfg->fc_flags & RTF_GATEWAY &&
1445 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1446 continue;
1447 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1448 continue;
1449 dst_hold(&rt->dst);
1450 read_unlock_bh(&table->tb6_lock);
1452 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1455 read_unlock_bh(&table->tb6_lock);
1457 return err;
1461 * Handle redirects
1463 struct ip6rd_flowi {
1464 struct flowi6 fl6;
1465 struct in6_addr gateway;
1468 static struct rt6_info *__ip6_route_redirect(struct net *net,
1469 struct fib6_table *table,
1470 struct flowi6 *fl6,
1471 int flags)
1473 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1474 struct rt6_info *rt;
1475 struct fib6_node *fn;
1478 * Get the "current" route for this destination and
1479 * check if the redirect has come from approriate router.
1481 * RFC 2461 specifies that redirects should only be
1482 * accepted if they come from the nexthop to the target.
1483 * Due to the way the routes are chosen, this notion
1484 * is a bit fuzzy and one might need to check all possible
1485 * routes.
1488 read_lock_bh(&table->tb6_lock);
1489 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1490 restart:
1491 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1493 * Current route is on-link; redirect is always invalid.
1495 * Seems, previous statement is not true. It could
1496 * be node, which looks for us as on-link (f.e. proxy ndisc)
1497 * But then router serving it might decide, that we should
1498 * know truth 8)8) --ANK (980726).
1500 if (rt6_check_expired(rt))
1501 continue;
1502 if (!(rt->rt6i_flags & RTF_GATEWAY))
1503 continue;
1504 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1505 continue;
1506 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1507 continue;
1508 break;
1511 if (!rt)
1512 rt = net->ipv6.ip6_null_entry;
1513 BACKTRACK(net, &fl6->saddr);
1514 out:
1515 dst_hold(&rt->dst);
1517 read_unlock_bh(&table->tb6_lock);
1519 return rt;
1522 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1523 const struct in6_addr *src,
1524 const struct in6_addr *gateway,
1525 struct net_device *dev)
1527 int flags = RT6_LOOKUP_F_HAS_SADDR;
1528 struct net *net = dev_net(dev);
1529 struct ip6rd_flowi rdfl = {
1530 .fl6 = {
1531 .flowi6_oif = dev->ifindex,
1532 .daddr = *dest,
1533 .saddr = *src,
1537 ipv6_addr_copy(&rdfl.gateway, gateway);
1539 if (rt6_need_strict(dest))
1540 flags |= RT6_LOOKUP_F_IFACE;
1542 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1543 flags, __ip6_route_redirect);
1546 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1547 const struct in6_addr *saddr,
1548 struct neighbour *neigh, u8 *lladdr, int on_link)
1550 struct rt6_info *rt, *nrt = NULL;
1551 struct netevent_redirect netevent;
1552 struct net *net = dev_net(neigh->dev);
1554 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1556 if (rt == net->ipv6.ip6_null_entry) {
1557 if (net_ratelimit())
1558 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1559 "for redirect target\n");
1560 goto out;
1564 * We have finally decided to accept it.
1567 neigh_update(neigh, lladdr, NUD_STALE,
1568 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1569 NEIGH_UPDATE_F_OVERRIDE|
1570 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1571 NEIGH_UPDATE_F_ISROUTER))
1575 * Redirect received -> path was valid.
1576 * Look, redirects are sent only in response to data packets,
1577 * so that this nexthop apparently is reachable. --ANK
1579 dst_confirm(&rt->dst);
1581 /* Duplicate redirect: silently ignore. */
1582 if (neigh == rt->dst.neighbour)
1583 goto out;
1585 nrt = ip6_rt_copy(rt);
1586 if (nrt == NULL)
1587 goto out;
1589 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1590 if (on_link)
1591 nrt->rt6i_flags &= ~RTF_GATEWAY;
1593 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1594 nrt->rt6i_dst.plen = 128;
1595 nrt->dst.flags |= DST_HOST;
1597 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1598 nrt->rt6i_nexthop = neigh_clone(neigh);
1600 if (ip6_ins_rt(nrt))
1601 goto out;
1603 netevent.old = &rt->dst;
1604 netevent.new = &nrt->dst;
1605 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1607 if (rt->rt6i_flags&RTF_CACHE) {
1608 ip6_del_rt(rt);
1609 return;
1612 out:
1613 dst_release(&rt->dst);
1617 * Handle ICMP "packet too big" messages
1618 * i.e. Path MTU discovery
1621 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1622 struct net *net, u32 pmtu, int ifindex)
1624 struct rt6_info *rt, *nrt;
1625 int allfrag = 0;
1626 again:
1627 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1628 if (rt == NULL)
1629 return;
1631 if (rt6_check_expired(rt)) {
1632 ip6_del_rt(rt);
1633 goto again;
1636 if (pmtu >= dst_mtu(&rt->dst))
1637 goto out;
1639 if (pmtu < IPV6_MIN_MTU) {
1641 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1642 * MTU (1280) and a fragment header should always be included
1643 * after a node receiving Too Big message reporting PMTU is
1644 * less than the IPv6 Minimum Link MTU.
1646 pmtu = IPV6_MIN_MTU;
1647 allfrag = 1;
1650 /* New mtu received -> path was valid.
1651 They are sent only in response to data packets,
1652 so that this nexthop apparently is reachable. --ANK
1654 dst_confirm(&rt->dst);
1656 /* Host route. If it is static, it would be better
1657 not to override it, but add new one, so that
1658 when cache entry will expire old pmtu
1659 would return automatically.
1661 if (rt->rt6i_flags & RTF_CACHE) {
1662 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1663 if (allfrag) {
1664 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1665 features |= RTAX_FEATURE_ALLFRAG;
1666 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1668 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1669 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1670 goto out;
1673 /* Network route.
1674 Two cases are possible:
1675 1. It is connected route. Action: COW
1676 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1678 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1679 nrt = rt6_alloc_cow(rt, daddr, saddr);
1680 else
1681 nrt = rt6_alloc_clone(rt, daddr);
1683 if (nrt) {
1684 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1685 if (allfrag) {
1686 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1687 features |= RTAX_FEATURE_ALLFRAG;
1688 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1691 /* According to RFC 1981, detecting PMTU increase shouldn't be
1692 * happened within 5 mins, the recommended timer is 10 mins.
1693 * Here this route expiration time is set to ip6_rt_mtu_expires
1694 * which is 10 mins. After 10 mins the decreased pmtu is expired
1695 * and detecting PMTU increase will be automatically happened.
1697 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1698 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1700 ip6_ins_rt(nrt);
1702 out:
1703 dst_release(&rt->dst);
1706 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1707 struct net_device *dev, u32 pmtu)
1709 struct net *net = dev_net(dev);
1712 * RFC 1981 states that a node "MUST reduce the size of the packets it
1713 * is sending along the path" that caused the Packet Too Big message.
1714 * Since it's not possible in the general case to determine which
1715 * interface was used to send the original packet, we update the MTU
1716 * on the interface that will be used to send future packets. We also
1717 * update the MTU on the interface that received the Packet Too Big in
1718 * case the original packet was forced out that interface with
1719 * SO_BINDTODEVICE or similar. This is the next best thing to the
1720 * correct behaviour, which would be to update the MTU on all
1721 * interfaces.
1723 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1724 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1728 * Misc support functions
1731 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1733 struct net *net = dev_net(ort->rt6i_dev);
1734 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1736 if (rt) {
1737 rt->dst.input = ort->dst.input;
1738 rt->dst.output = ort->dst.output;
1740 dst_copy_metrics(&rt->dst, &ort->dst);
1741 rt->dst.error = ort->dst.error;
1742 rt->dst.dev = ort->dst.dev;
1743 if (rt->dst.dev)
1744 dev_hold(rt->dst.dev);
1745 rt->rt6i_idev = ort->rt6i_idev;
1746 if (rt->rt6i_idev)
1747 in6_dev_hold(rt->rt6i_idev);
1748 rt->dst.lastuse = jiffies;
1749 rt->rt6i_expires = 0;
1751 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1752 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1753 rt->rt6i_metric = 0;
1755 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1756 #ifdef CONFIG_IPV6_SUBTREES
1757 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1758 #endif
1759 rt->rt6i_table = ort->rt6i_table;
1761 return rt;
1764 #ifdef CONFIG_IPV6_ROUTE_INFO
1765 static struct rt6_info *rt6_get_route_info(struct net *net,
1766 const struct in6_addr *prefix, int prefixlen,
1767 const struct in6_addr *gwaddr, int ifindex)
1769 struct fib6_node *fn;
1770 struct rt6_info *rt = NULL;
1771 struct fib6_table *table;
1773 table = fib6_get_table(net, RT6_TABLE_INFO);
1774 if (table == NULL)
1775 return NULL;
1777 write_lock_bh(&table->tb6_lock);
1778 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1779 if (!fn)
1780 goto out;
1782 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1783 if (rt->rt6i_dev->ifindex != ifindex)
1784 continue;
1785 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1786 continue;
1787 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1788 continue;
1789 dst_hold(&rt->dst);
1790 break;
1792 out:
1793 write_unlock_bh(&table->tb6_lock);
1794 return rt;
1797 static struct rt6_info *rt6_add_route_info(struct net *net,
1798 const struct in6_addr *prefix, int prefixlen,
1799 const struct in6_addr *gwaddr, int ifindex,
1800 unsigned pref)
1802 struct fib6_config cfg = {
1803 .fc_table = RT6_TABLE_INFO,
1804 .fc_metric = IP6_RT_PRIO_USER,
1805 .fc_ifindex = ifindex,
1806 .fc_dst_len = prefixlen,
1807 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1808 RTF_UP | RTF_PREF(pref),
1809 .fc_nlinfo.pid = 0,
1810 .fc_nlinfo.nlh = NULL,
1811 .fc_nlinfo.nl_net = net,
1814 ipv6_addr_copy(&cfg.fc_dst, prefix);
1815 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1817 /* We should treat it as a default route if prefix length is 0. */
1818 if (!prefixlen)
1819 cfg.fc_flags |= RTF_DEFAULT;
1821 ip6_route_add(&cfg);
1823 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1825 #endif
1827 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1829 struct rt6_info *rt;
1830 struct fib6_table *table;
1832 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1833 if (table == NULL)
1834 return NULL;
1836 write_lock_bh(&table->tb6_lock);
1837 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1838 if (dev == rt->rt6i_dev &&
1839 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1840 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1841 break;
1843 if (rt)
1844 dst_hold(&rt->dst);
1845 write_unlock_bh(&table->tb6_lock);
1846 return rt;
1849 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1850 struct net_device *dev,
1851 unsigned int pref)
1853 struct fib6_config cfg = {
1854 .fc_table = RT6_TABLE_DFLT,
1855 .fc_metric = IP6_RT_PRIO_USER,
1856 .fc_ifindex = dev->ifindex,
1857 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1858 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1859 .fc_nlinfo.pid = 0,
1860 .fc_nlinfo.nlh = NULL,
1861 .fc_nlinfo.nl_net = dev_net(dev),
1864 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1866 ip6_route_add(&cfg);
1868 return rt6_get_dflt_router(gwaddr, dev);
1871 void rt6_purge_dflt_routers(struct net *net)
1873 struct rt6_info *rt;
1874 struct fib6_table *table;
1876 /* NOTE: Keep consistent with rt6_get_dflt_router */
1877 table = fib6_get_table(net, RT6_TABLE_DFLT);
1878 if (table == NULL)
1879 return;
1881 restart:
1882 read_lock_bh(&table->tb6_lock);
1883 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1884 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1885 dst_hold(&rt->dst);
1886 read_unlock_bh(&table->tb6_lock);
1887 ip6_del_rt(rt);
1888 goto restart;
1891 read_unlock_bh(&table->tb6_lock);
1894 static void rtmsg_to_fib6_config(struct net *net,
1895 struct in6_rtmsg *rtmsg,
1896 struct fib6_config *cfg)
1898 memset(cfg, 0, sizeof(*cfg));
1900 cfg->fc_table = RT6_TABLE_MAIN;
1901 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1902 cfg->fc_metric = rtmsg->rtmsg_metric;
1903 cfg->fc_expires = rtmsg->rtmsg_info;
1904 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1905 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1906 cfg->fc_flags = rtmsg->rtmsg_flags;
1908 cfg->fc_nlinfo.nl_net = net;
1910 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1911 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1912 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1915 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1917 struct fib6_config cfg;
1918 struct in6_rtmsg rtmsg;
1919 int err;
1921 switch(cmd) {
1922 case SIOCADDRT: /* Add a route */
1923 case SIOCDELRT: /* Delete a route */
1924 if (!capable(CAP_NET_ADMIN))
1925 return -EPERM;
1926 err = copy_from_user(&rtmsg, arg,
1927 sizeof(struct in6_rtmsg));
1928 if (err)
1929 return -EFAULT;
1931 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1933 rtnl_lock();
1934 switch (cmd) {
1935 case SIOCADDRT:
1936 err = ip6_route_add(&cfg);
1937 break;
1938 case SIOCDELRT:
1939 err = ip6_route_del(&cfg);
1940 break;
1941 default:
1942 err = -EINVAL;
1944 rtnl_unlock();
1946 return err;
1949 return -EINVAL;
1953 * Drop the packet on the floor
1956 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1958 int type;
1959 struct dst_entry *dst = skb_dst(skb);
1960 switch (ipstats_mib_noroutes) {
1961 case IPSTATS_MIB_INNOROUTES:
1962 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1963 if (type == IPV6_ADDR_ANY) {
1964 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1965 IPSTATS_MIB_INADDRERRORS);
1966 break;
1968 /* FALLTHROUGH */
1969 case IPSTATS_MIB_OUTNOROUTES:
1970 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1971 ipstats_mib_noroutes);
1972 break;
1974 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1975 kfree_skb(skb);
1976 return 0;
1979 static int ip6_pkt_discard(struct sk_buff *skb)
1981 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1984 static int ip6_pkt_discard_out(struct sk_buff *skb)
1986 skb->dev = skb_dst(skb)->dev;
1987 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1990 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1992 static int ip6_pkt_prohibit(struct sk_buff *skb)
1994 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1997 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1999 skb->dev = skb_dst(skb)->dev;
2000 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2003 #endif
2006 * Allocate a dst for local (unicast / anycast) address.
2009 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2010 const struct in6_addr *addr,
2011 int anycast)
2013 struct net *net = dev_net(idev->dev);
2014 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
2015 struct neighbour *neigh;
2017 if (rt == NULL) {
2018 if (net_ratelimit())
2019 pr_warning("IPv6: Maximum number of routes reached,"
2020 " consider increasing route/max_size.\n");
2021 return ERR_PTR(-ENOMEM);
2024 dev_hold(net->loopback_dev);
2025 in6_dev_hold(idev);
2027 rt->dst.flags = DST_HOST;
2028 rt->dst.input = ip6_input;
2029 rt->dst.output = ip6_output;
2030 rt->rt6i_dev = net->loopback_dev;
2031 rt->rt6i_idev = idev;
2032 rt->dst.obsolete = -1;
2034 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2035 if (anycast)
2036 rt->rt6i_flags |= RTF_ANYCAST;
2037 else
2038 rt->rt6i_flags |= RTF_LOCAL;
2039 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2040 if (IS_ERR(neigh)) {
2041 dst_free(&rt->dst);
2043 return ERR_CAST(neigh);
2045 rt->rt6i_nexthop = neigh;
2047 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2048 rt->rt6i_dst.plen = 128;
2049 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2051 atomic_set(&rt->dst.__refcnt, 1);
2053 return rt;
2056 int ip6_route_get_saddr(struct net *net,
2057 struct rt6_info *rt,
2058 const struct in6_addr *daddr,
2059 unsigned int prefs,
2060 struct in6_addr *saddr)
2062 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2063 int err = 0;
2064 if (rt->rt6i_prefsrc.plen)
2065 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2066 else
2067 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2068 daddr, prefs, saddr);
2069 return err;
2072 /* remove deleted ip from prefsrc entries */
2073 struct arg_dev_net_ip {
2074 struct net_device *dev;
2075 struct net *net;
2076 struct in6_addr *addr;
2079 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2081 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2082 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2083 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2085 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2086 rt != net->ipv6.ip6_null_entry &&
2087 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2088 /* remove prefsrc entry */
2089 rt->rt6i_prefsrc.plen = 0;
2091 return 0;
2094 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2096 struct net *net = dev_net(ifp->idev->dev);
2097 struct arg_dev_net_ip adni = {
2098 .dev = ifp->idev->dev,
2099 .net = net,
2100 .addr = &ifp->addr,
2102 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2105 struct arg_dev_net {
2106 struct net_device *dev;
2107 struct net *net;
2110 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2112 const struct arg_dev_net *adn = arg;
2113 const struct net_device *dev = adn->dev;
2115 if ((rt->rt6i_dev == dev || dev == NULL) &&
2116 rt != adn->net->ipv6.ip6_null_entry) {
2117 RT6_TRACE("deleted by ifdown %p\n", rt);
2118 return -1;
2120 return 0;
2123 void rt6_ifdown(struct net *net, struct net_device *dev)
2125 struct arg_dev_net adn = {
2126 .dev = dev,
2127 .net = net,
2130 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2131 icmp6_clean_all(fib6_ifdown, &adn);
2134 struct rt6_mtu_change_arg
2136 struct net_device *dev;
2137 unsigned mtu;
2140 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2142 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2143 struct inet6_dev *idev;
2145 /* In IPv6 pmtu discovery is not optional,
2146 so that RTAX_MTU lock cannot disable it.
2147 We still use this lock to block changes
2148 caused by addrconf/ndisc.
2151 idev = __in6_dev_get(arg->dev);
2152 if (idev == NULL)
2153 return 0;
2155 /* For administrative MTU increase, there is no way to discover
2156 IPv6 PMTU increase, so PMTU increase should be updated here.
2157 Since RFC 1981 doesn't include administrative MTU increase
2158 update PMTU increase is a MUST. (i.e. jumbo frame)
2161 If new MTU is less than route PMTU, this new MTU will be the
2162 lowest MTU in the path, update the route PMTU to reflect PMTU
2163 decreases; if new MTU is greater than route PMTU, and the
2164 old MTU is the lowest MTU in the path, update the route PMTU
2165 to reflect the increase. In this case if the other nodes' MTU
2166 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2167 PMTU discouvery.
2169 if (rt->rt6i_dev == arg->dev &&
2170 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2171 (dst_mtu(&rt->dst) >= arg->mtu ||
2172 (dst_mtu(&rt->dst) < arg->mtu &&
2173 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2174 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2176 return 0;
2179 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2181 struct rt6_mtu_change_arg arg = {
2182 .dev = dev,
2183 .mtu = mtu,
2186 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2189 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2190 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2191 [RTA_OIF] = { .type = NLA_U32 },
2192 [RTA_IIF] = { .type = NLA_U32 },
2193 [RTA_PRIORITY] = { .type = NLA_U32 },
2194 [RTA_METRICS] = { .type = NLA_NESTED },
2197 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2198 struct fib6_config *cfg)
2200 struct rtmsg *rtm;
2201 struct nlattr *tb[RTA_MAX+1];
2202 int err;
2204 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2205 if (err < 0)
2206 goto errout;
2208 err = -EINVAL;
2209 rtm = nlmsg_data(nlh);
2210 memset(cfg, 0, sizeof(*cfg));
2212 cfg->fc_table = rtm->rtm_table;
2213 cfg->fc_dst_len = rtm->rtm_dst_len;
2214 cfg->fc_src_len = rtm->rtm_src_len;
2215 cfg->fc_flags = RTF_UP;
2216 cfg->fc_protocol = rtm->rtm_protocol;
2218 if (rtm->rtm_type == RTN_UNREACHABLE)
2219 cfg->fc_flags |= RTF_REJECT;
2221 if (rtm->rtm_type == RTN_LOCAL)
2222 cfg->fc_flags |= RTF_LOCAL;
2224 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2225 cfg->fc_nlinfo.nlh = nlh;
2226 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2228 if (tb[RTA_GATEWAY]) {
2229 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2230 cfg->fc_flags |= RTF_GATEWAY;
2233 if (tb[RTA_DST]) {
2234 int plen = (rtm->rtm_dst_len + 7) >> 3;
2236 if (nla_len(tb[RTA_DST]) < plen)
2237 goto errout;
2239 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2242 if (tb[RTA_SRC]) {
2243 int plen = (rtm->rtm_src_len + 7) >> 3;
2245 if (nla_len(tb[RTA_SRC]) < plen)
2246 goto errout;
2248 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2251 if (tb[RTA_PREFSRC])
2252 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2254 if (tb[RTA_OIF])
2255 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2257 if (tb[RTA_PRIORITY])
2258 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2260 if (tb[RTA_METRICS]) {
2261 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2262 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2265 if (tb[RTA_TABLE])
2266 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2268 err = 0;
2269 errout:
2270 return err;
2273 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2275 struct fib6_config cfg;
2276 int err;
2278 err = rtm_to_fib6_config(skb, nlh, &cfg);
2279 if (err < 0)
2280 return err;
2282 return ip6_route_del(&cfg);
2285 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2287 struct fib6_config cfg;
2288 int err;
2290 err = rtm_to_fib6_config(skb, nlh, &cfg);
2291 if (err < 0)
2292 return err;
2294 return ip6_route_add(&cfg);
2297 static inline size_t rt6_nlmsg_size(void)
2299 return NLMSG_ALIGN(sizeof(struct rtmsg))
2300 + nla_total_size(16) /* RTA_SRC */
2301 + nla_total_size(16) /* RTA_DST */
2302 + nla_total_size(16) /* RTA_GATEWAY */
2303 + nla_total_size(16) /* RTA_PREFSRC */
2304 + nla_total_size(4) /* RTA_TABLE */
2305 + nla_total_size(4) /* RTA_IIF */
2306 + nla_total_size(4) /* RTA_OIF */
2307 + nla_total_size(4) /* RTA_PRIORITY */
2308 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2309 + nla_total_size(sizeof(struct rta_cacheinfo));
2312 static int rt6_fill_node(struct net *net,
2313 struct sk_buff *skb, struct rt6_info *rt,
2314 struct in6_addr *dst, struct in6_addr *src,
2315 int iif, int type, u32 pid, u32 seq,
2316 int prefix, int nowait, unsigned int flags)
2318 struct rtmsg *rtm;
2319 struct nlmsghdr *nlh;
2320 long expires;
2321 u32 table;
2323 if (prefix) { /* user wants prefix routes only */
2324 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2325 /* success since this is not a prefix route */
2326 return 1;
2330 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2331 if (nlh == NULL)
2332 return -EMSGSIZE;
2334 rtm = nlmsg_data(nlh);
2335 rtm->rtm_family = AF_INET6;
2336 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2337 rtm->rtm_src_len = rt->rt6i_src.plen;
2338 rtm->rtm_tos = 0;
2339 if (rt->rt6i_table)
2340 table = rt->rt6i_table->tb6_id;
2341 else
2342 table = RT6_TABLE_UNSPEC;
2343 rtm->rtm_table = table;
2344 NLA_PUT_U32(skb, RTA_TABLE, table);
2345 if (rt->rt6i_flags&RTF_REJECT)
2346 rtm->rtm_type = RTN_UNREACHABLE;
2347 else if (rt->rt6i_flags&RTF_LOCAL)
2348 rtm->rtm_type = RTN_LOCAL;
2349 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2350 rtm->rtm_type = RTN_LOCAL;
2351 else
2352 rtm->rtm_type = RTN_UNICAST;
2353 rtm->rtm_flags = 0;
2354 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2355 rtm->rtm_protocol = rt->rt6i_protocol;
2356 if (rt->rt6i_flags&RTF_DYNAMIC)
2357 rtm->rtm_protocol = RTPROT_REDIRECT;
2358 else if (rt->rt6i_flags & RTF_ADDRCONF)
2359 rtm->rtm_protocol = RTPROT_KERNEL;
2360 else if (rt->rt6i_flags&RTF_DEFAULT)
2361 rtm->rtm_protocol = RTPROT_RA;
2363 if (rt->rt6i_flags&RTF_CACHE)
2364 rtm->rtm_flags |= RTM_F_CLONED;
2366 if (dst) {
2367 NLA_PUT(skb, RTA_DST, 16, dst);
2368 rtm->rtm_dst_len = 128;
2369 } else if (rtm->rtm_dst_len)
2370 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2371 #ifdef CONFIG_IPV6_SUBTREES
2372 if (src) {
2373 NLA_PUT(skb, RTA_SRC, 16, src);
2374 rtm->rtm_src_len = 128;
2375 } else if (rtm->rtm_src_len)
2376 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2377 #endif
2378 if (iif) {
2379 #ifdef CONFIG_IPV6_MROUTE
2380 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2381 int err = ip6mr_get_route(net, skb, rtm, nowait);
2382 if (err <= 0) {
2383 if (!nowait) {
2384 if (err == 0)
2385 return 0;
2386 goto nla_put_failure;
2387 } else {
2388 if (err == -EMSGSIZE)
2389 goto nla_put_failure;
2392 } else
2393 #endif
2394 NLA_PUT_U32(skb, RTA_IIF, iif);
2395 } else if (dst) {
2396 struct in6_addr saddr_buf;
2397 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2398 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2401 if (rt->rt6i_prefsrc.plen) {
2402 struct in6_addr saddr_buf;
2403 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2404 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2407 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2408 goto nla_put_failure;
2410 if (rt->dst.neighbour)
2411 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2413 if (rt->dst.dev)
2414 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2416 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2418 if (!(rt->rt6i_flags & RTF_EXPIRES))
2419 expires = 0;
2420 else if (rt->rt6i_expires - jiffies < INT_MAX)
2421 expires = rt->rt6i_expires - jiffies;
2422 else
2423 expires = INT_MAX;
2425 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2426 expires, rt->dst.error) < 0)
2427 goto nla_put_failure;
2429 return nlmsg_end(skb, nlh);
2431 nla_put_failure:
2432 nlmsg_cancel(skb, nlh);
2433 return -EMSGSIZE;
2436 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2438 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2439 int prefix;
2441 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2442 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2443 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2444 } else
2445 prefix = 0;
2447 return rt6_fill_node(arg->net,
2448 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2449 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2450 prefix, 0, NLM_F_MULTI);
2453 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2455 struct net *net = sock_net(in_skb->sk);
2456 struct nlattr *tb[RTA_MAX+1];
2457 struct rt6_info *rt;
2458 struct sk_buff *skb;
2459 struct rtmsg *rtm;
2460 struct flowi6 fl6;
2461 int err, iif = 0;
2463 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2464 if (err < 0)
2465 goto errout;
2467 err = -EINVAL;
2468 memset(&fl6, 0, sizeof(fl6));
2470 if (tb[RTA_SRC]) {
2471 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2472 goto errout;
2474 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2477 if (tb[RTA_DST]) {
2478 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2479 goto errout;
2481 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2484 if (tb[RTA_IIF])
2485 iif = nla_get_u32(tb[RTA_IIF]);
2487 if (tb[RTA_OIF])
2488 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2490 if (iif) {
2491 struct net_device *dev;
2492 dev = __dev_get_by_index(net, iif);
2493 if (!dev) {
2494 err = -ENODEV;
2495 goto errout;
2499 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2500 if (skb == NULL) {
2501 err = -ENOBUFS;
2502 goto errout;
2505 /* Reserve room for dummy headers, this skb can pass
2506 through good chunk of routing engine.
2508 skb_reset_mac_header(skb);
2509 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2511 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2512 skb_dst_set(skb, &rt->dst);
2514 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2515 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2516 nlh->nlmsg_seq, 0, 0, 0);
2517 if (err < 0) {
2518 kfree_skb(skb);
2519 goto errout;
2522 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2523 errout:
2524 return err;
2527 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2529 struct sk_buff *skb;
2530 struct net *net = info->nl_net;
2531 u32 seq;
2532 int err;
2534 err = -ENOBUFS;
2535 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2537 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2538 if (skb == NULL)
2539 goto errout;
2541 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2542 event, info->pid, seq, 0, 0, 0);
2543 if (err < 0) {
2544 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2545 WARN_ON(err == -EMSGSIZE);
2546 kfree_skb(skb);
2547 goto errout;
2549 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2550 info->nlh, gfp_any());
2551 return;
2552 errout:
2553 if (err < 0)
2554 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2557 static int ip6_route_dev_notify(struct notifier_block *this,
2558 unsigned long event, void *data)
2560 struct net_device *dev = (struct net_device *)data;
2561 struct net *net = dev_net(dev);
2563 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2564 net->ipv6.ip6_null_entry->dst.dev = dev;
2565 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2566 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2567 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2568 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2569 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2570 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2571 #endif
2574 return NOTIFY_OK;
2578 * /proc
2581 #ifdef CONFIG_PROC_FS
2583 struct rt6_proc_arg
2585 char *buffer;
2586 int offset;
2587 int length;
2588 int skip;
2589 int len;
2592 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2594 struct seq_file *m = p_arg;
2596 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2598 #ifdef CONFIG_IPV6_SUBTREES
2599 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2600 #else
2601 seq_puts(m, "00000000000000000000000000000000 00 ");
2602 #endif
2604 if (rt->rt6i_nexthop) {
2605 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2606 } else {
2607 seq_puts(m, "00000000000000000000000000000000");
2609 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2610 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2611 rt->dst.__use, rt->rt6i_flags,
2612 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2613 return 0;
2616 static int ipv6_route_show(struct seq_file *m, void *v)
2618 struct net *net = (struct net *)m->private;
2619 fib6_clean_all(net, rt6_info_route, 0, m);
2620 return 0;
2623 static int ipv6_route_open(struct inode *inode, struct file *file)
2625 return single_open_net(inode, file, ipv6_route_show);
2628 static const struct file_operations ipv6_route_proc_fops = {
2629 .owner = THIS_MODULE,
2630 .open = ipv6_route_open,
2631 .read = seq_read,
2632 .llseek = seq_lseek,
2633 .release = single_release_net,
2636 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2638 struct net *net = (struct net *)seq->private;
2639 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2640 net->ipv6.rt6_stats->fib_nodes,
2641 net->ipv6.rt6_stats->fib_route_nodes,
2642 net->ipv6.rt6_stats->fib_rt_alloc,
2643 net->ipv6.rt6_stats->fib_rt_entries,
2644 net->ipv6.rt6_stats->fib_rt_cache,
2645 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2646 net->ipv6.rt6_stats->fib_discarded_routes);
2648 return 0;
2651 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2653 return single_open_net(inode, file, rt6_stats_seq_show);
2656 static const struct file_operations rt6_stats_seq_fops = {
2657 .owner = THIS_MODULE,
2658 .open = rt6_stats_seq_open,
2659 .read = seq_read,
2660 .llseek = seq_lseek,
2661 .release = single_release_net,
2663 #endif /* CONFIG_PROC_FS */
2665 #ifdef CONFIG_SYSCTL
2667 static
2668 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2669 void __user *buffer, size_t *lenp, loff_t *ppos)
2671 struct net *net;
2672 int delay;
2673 if (!write)
2674 return -EINVAL;
2676 net = (struct net *)ctl->extra1;
2677 delay = net->ipv6.sysctl.flush_delay;
2678 proc_dointvec(ctl, write, buffer, lenp, ppos);
2679 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2680 return 0;
2683 ctl_table ipv6_route_table_template[] = {
2685 .procname = "flush",
2686 .data = &init_net.ipv6.sysctl.flush_delay,
2687 .maxlen = sizeof(int),
2688 .mode = 0200,
2689 .proc_handler = ipv6_sysctl_rtcache_flush
2692 .procname = "gc_thresh",
2693 .data = &ip6_dst_ops_template.gc_thresh,
2694 .maxlen = sizeof(int),
2695 .mode = 0644,
2696 .proc_handler = proc_dointvec,
2699 .procname = "max_size",
2700 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2701 .maxlen = sizeof(int),
2702 .mode = 0644,
2703 .proc_handler = proc_dointvec,
2706 .procname = "gc_min_interval",
2707 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2708 .maxlen = sizeof(int),
2709 .mode = 0644,
2710 .proc_handler = proc_dointvec_jiffies,
2713 .procname = "gc_timeout",
2714 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2715 .maxlen = sizeof(int),
2716 .mode = 0644,
2717 .proc_handler = proc_dointvec_jiffies,
2720 .procname = "gc_interval",
2721 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2722 .maxlen = sizeof(int),
2723 .mode = 0644,
2724 .proc_handler = proc_dointvec_jiffies,
2727 .procname = "gc_elasticity",
2728 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2729 .maxlen = sizeof(int),
2730 .mode = 0644,
2731 .proc_handler = proc_dointvec,
2734 .procname = "mtu_expires",
2735 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2736 .maxlen = sizeof(int),
2737 .mode = 0644,
2738 .proc_handler = proc_dointvec_jiffies,
2741 .procname = "min_adv_mss",
2742 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2743 .maxlen = sizeof(int),
2744 .mode = 0644,
2745 .proc_handler = proc_dointvec,
2748 .procname = "gc_min_interval_ms",
2749 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2750 .maxlen = sizeof(int),
2751 .mode = 0644,
2752 .proc_handler = proc_dointvec_ms_jiffies,
2757 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2759 struct ctl_table *table;
2761 table = kmemdup(ipv6_route_table_template,
2762 sizeof(ipv6_route_table_template),
2763 GFP_KERNEL);
2765 if (table) {
2766 table[0].data = &net->ipv6.sysctl.flush_delay;
2767 table[0].extra1 = net;
2768 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2769 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2770 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2771 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2772 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2773 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2774 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2775 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2776 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2779 return table;
2781 #endif
2783 static int __net_init ip6_route_net_init(struct net *net)
2785 int ret = -ENOMEM;
2787 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2788 sizeof(net->ipv6.ip6_dst_ops));
2790 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2791 goto out_ip6_dst_ops;
2793 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2794 sizeof(*net->ipv6.ip6_null_entry),
2795 GFP_KERNEL);
2796 if (!net->ipv6.ip6_null_entry)
2797 goto out_ip6_dst_entries;
2798 net->ipv6.ip6_null_entry->dst.path =
2799 (struct dst_entry *)net->ipv6.ip6_null_entry;
2800 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2801 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2802 ip6_template_metrics, true);
2804 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2805 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2806 sizeof(*net->ipv6.ip6_prohibit_entry),
2807 GFP_KERNEL);
2808 if (!net->ipv6.ip6_prohibit_entry)
2809 goto out_ip6_null_entry;
2810 net->ipv6.ip6_prohibit_entry->dst.path =
2811 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2812 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2813 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2814 ip6_template_metrics, true);
2816 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2817 sizeof(*net->ipv6.ip6_blk_hole_entry),
2818 GFP_KERNEL);
2819 if (!net->ipv6.ip6_blk_hole_entry)
2820 goto out_ip6_prohibit_entry;
2821 net->ipv6.ip6_blk_hole_entry->dst.path =
2822 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2823 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2824 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2825 ip6_template_metrics, true);
2826 #endif
2828 net->ipv6.sysctl.flush_delay = 0;
2829 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2830 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2831 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2832 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2833 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2834 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2835 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2837 #ifdef CONFIG_PROC_FS
2838 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2839 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2840 #endif
2841 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2843 ret = 0;
2844 out:
2845 return ret;
2847 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2848 out_ip6_prohibit_entry:
2849 kfree(net->ipv6.ip6_prohibit_entry);
2850 out_ip6_null_entry:
2851 kfree(net->ipv6.ip6_null_entry);
2852 #endif
2853 out_ip6_dst_entries:
2854 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2855 out_ip6_dst_ops:
2856 goto out;
2859 static void __net_exit ip6_route_net_exit(struct net *net)
2861 #ifdef CONFIG_PROC_FS
2862 proc_net_remove(net, "ipv6_route");
2863 proc_net_remove(net, "rt6_stats");
2864 #endif
2865 kfree(net->ipv6.ip6_null_entry);
2866 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2867 kfree(net->ipv6.ip6_prohibit_entry);
2868 kfree(net->ipv6.ip6_blk_hole_entry);
2869 #endif
2870 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2873 static struct pernet_operations ip6_route_net_ops = {
2874 .init = ip6_route_net_init,
2875 .exit = ip6_route_net_exit,
2878 static struct notifier_block ip6_route_dev_notifier = {
2879 .notifier_call = ip6_route_dev_notify,
2880 .priority = 0,
2883 int __init ip6_route_init(void)
2885 int ret;
2887 ret = -ENOMEM;
2888 ip6_dst_ops_template.kmem_cachep =
2889 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2890 SLAB_HWCACHE_ALIGN, NULL);
2891 if (!ip6_dst_ops_template.kmem_cachep)
2892 goto out;
2894 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2895 if (ret)
2896 goto out_kmem_cache;
2898 ret = register_pernet_subsys(&ip6_route_net_ops);
2899 if (ret)
2900 goto out_dst_entries;
2902 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2904 /* Registering of the loopback is done before this portion of code,
2905 * the loopback reference in rt6_info will not be taken, do it
2906 * manually for init_net */
2907 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2908 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2909 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2911 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2912 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2913 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2914 #endif
2915 ret = fib6_init();
2916 if (ret)
2917 goto out_register_subsys;
2919 ret = xfrm6_init();
2920 if (ret)
2921 goto out_fib6_init;
2923 ret = fib6_rules_init();
2924 if (ret)
2925 goto xfrm6_init;
2927 ret = -ENOBUFS;
2928 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2929 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2930 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2931 goto fib6_rules_init;
2933 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2934 if (ret)
2935 goto fib6_rules_init;
2937 out:
2938 return ret;
2940 fib6_rules_init:
2941 fib6_rules_cleanup();
2942 xfrm6_init:
2943 xfrm6_fini();
2944 out_fib6_init:
2945 fib6_gc_cleanup();
2946 out_register_subsys:
2947 unregister_pernet_subsys(&ip6_route_net_ops);
2948 out_dst_entries:
2949 dst_entries_destroy(&ip6_dst_blackhole_ops);
2950 out_kmem_cache:
2951 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2952 goto out;
2955 void ip6_route_cleanup(void)
2957 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2958 fib6_rules_cleanup();
2959 xfrm6_fini();
2960 fib6_gc_cleanup();
2961 unregister_pernet_subsys(&ip6_route_net_ops);
2962 dst_entries_destroy(&ip6_dst_blackhole_ops);
2963 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);