sysctl: ipv6: use correct net in ipv6_sysctl_rtcache_flush
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / route.c
blob904312e25a3c072cd4ae00436f4cde2dbff04732
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .default_advmss = ip6_default_advmss,
107 .default_mtu = ip6_default_mtu,
108 .destroy = ip6_dst_destroy,
109 .ifdown = ip6_dst_ifdown,
110 .negative_advice = ip6_negative_advice,
111 .link_failure = ip6_link_failure,
112 .update_pmtu = ip6_rt_update_pmtu,
113 .local_out = __ip6_local_out,
116 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
118 return 0;
121 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
125 static struct dst_ops ip6_dst_blackhole_ops = {
126 .family = AF_INET6,
127 .protocol = cpu_to_be16(ETH_P_IPV6),
128 .destroy = ip6_dst_destroy,
129 .check = ip6_dst_check,
130 .default_mtu = ip6_blackhole_default_mtu,
131 .default_advmss = ip6_default_advmss,
132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
135 static struct rt6_info ip6_null_entry_template = {
136 .dst = {
137 .__refcnt = ATOMIC_INIT(1),
138 .__use = 1,
139 .obsolete = -1,
140 .error = -ENETUNREACH,
141 .input = ip6_pkt_discard,
142 .output = ip6_pkt_discard_out,
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
145 .rt6i_protocol = RTPROT_KERNEL,
146 .rt6i_metric = ~(u32) 0,
147 .rt6i_ref = ATOMIC_INIT(1),
150 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
152 static int ip6_pkt_prohibit(struct sk_buff *skb);
153 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
155 static struct rt6_info ip6_prohibit_entry_template = {
156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
159 .obsolete = -1,
160 .error = -EACCES,
161 .input = ip6_pkt_prohibit,
162 .output = ip6_pkt_prohibit_out,
164 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
165 .rt6i_protocol = RTPROT_KERNEL,
166 .rt6i_metric = ~(u32) 0,
167 .rt6i_ref = ATOMIC_INIT(1),
170 static struct rt6_info ip6_blk_hole_entry_template = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -EINVAL,
176 .input = dst_discard,
177 .output = dst_discard,
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_protocol = RTPROT_KERNEL,
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
185 #endif
187 /* allocate dst with ip6_dst_ops */
188 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
190 return (struct rt6_info *)dst_alloc(ops);
193 static void ip6_dst_destroy(struct dst_entry *dst)
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197 struct inet_peer *peer = rt->rt6i_peer;
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
203 if (peer) {
204 rt->rt6i_peer = NULL;
205 inet_putpeer(peer);
209 void rt6_bind_peer(struct rt6_info *rt, int create)
211 struct inet_peer *peer;
213 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
214 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
215 inet_putpeer(peer);
218 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219 int how)
221 struct rt6_info *rt = (struct rt6_info *)dst;
222 struct inet6_dev *idev = rt->rt6i_idev;
223 struct net_device *loopback_dev =
224 dev_net(dev)->loopback_dev;
226 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
227 struct inet6_dev *loopback_idev =
228 in6_dev_get(loopback_dev);
229 if (loopback_idev != NULL) {
230 rt->rt6i_idev = loopback_idev;
231 in6_dev_put(idev);
236 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
238 return (rt->rt6i_flags & RTF_EXPIRES) &&
239 time_after(jiffies, rt->rt6i_expires);
242 static inline int rt6_need_strict(struct in6_addr *daddr)
244 return ipv6_addr_type(daddr) &
245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
249 * Route lookup. Any table->tb6_lock is implied.
252 static inline struct rt6_info *rt6_device_match(struct net *net,
253 struct rt6_info *rt,
254 struct in6_addr *saddr,
255 int oif,
256 int flags)
258 struct rt6_info *local = NULL;
259 struct rt6_info *sprt;
261 if (!oif && ipv6_addr_any(saddr))
262 goto out;
264 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
265 struct net_device *dev = sprt->rt6i_dev;
267 if (oif) {
268 if (dev->ifindex == oif)
269 return sprt;
270 if (dev->flags & IFF_LOOPBACK) {
271 if (sprt->rt6i_idev == NULL ||
272 sprt->rt6i_idev->dev->ifindex != oif) {
273 if (flags & RT6_LOOKUP_F_IFACE && oif)
274 continue;
275 if (local && (!oif ||
276 local->rt6i_idev->dev->ifindex == oif))
277 continue;
279 local = sprt;
281 } else {
282 if (ipv6_chk_addr(net, saddr, dev,
283 flags & RT6_LOOKUP_F_IFACE))
284 return sprt;
288 if (oif) {
289 if (local)
290 return local;
292 if (flags & RT6_LOOKUP_F_IFACE)
293 return net->ipv6.ip6_null_entry;
295 out:
296 return rt;
299 #ifdef CONFIG_IPV6_ROUTER_PREF
300 static void rt6_probe(struct rt6_info *rt)
302 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
304 * Okay, this does not seem to be appropriate
305 * for now, however, we need to check if it
306 * is really so; aka Router Reachability Probing.
308 * Router Reachability Probe MUST be rate-limited
309 * to no more than one per minute.
311 if (!neigh || (neigh->nud_state & NUD_VALID))
312 return;
313 read_lock_bh(&neigh->lock);
314 if (!(neigh->nud_state & NUD_VALID) &&
315 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
316 struct in6_addr mcaddr;
317 struct in6_addr *target;
319 neigh->updated = jiffies;
320 read_unlock_bh(&neigh->lock);
322 target = (struct in6_addr *)&neigh->primary_key;
323 addrconf_addr_solict_mult(target, &mcaddr);
324 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
325 } else
326 read_unlock_bh(&neigh->lock);
328 #else
329 static inline void rt6_probe(struct rt6_info *rt)
332 #endif
335 * Default Router Selection (RFC 2461 6.3.6)
337 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
339 struct net_device *dev = rt->rt6i_dev;
340 if (!oif || dev->ifindex == oif)
341 return 2;
342 if ((dev->flags & IFF_LOOPBACK) &&
343 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
344 return 1;
345 return 0;
348 static inline int rt6_check_neigh(struct rt6_info *rt)
350 struct neighbour *neigh = rt->rt6i_nexthop;
351 int m;
352 if (rt->rt6i_flags & RTF_NONEXTHOP ||
353 !(rt->rt6i_flags & RTF_GATEWAY))
354 m = 1;
355 else if (neigh) {
356 read_lock_bh(&neigh->lock);
357 if (neigh->nud_state & NUD_VALID)
358 m = 2;
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360 else if (neigh->nud_state & NUD_FAILED)
361 m = 0;
362 #endif
363 else
364 m = 1;
365 read_unlock_bh(&neigh->lock);
366 } else
367 m = 0;
368 return m;
371 static int rt6_score_route(struct rt6_info *rt, int oif,
372 int strict)
374 int m, n;
376 m = rt6_check_dev(rt, oif);
377 if (!m && (strict & RT6_LOOKUP_F_IFACE))
378 return -1;
379 #ifdef CONFIG_IPV6_ROUTER_PREF
380 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
381 #endif
382 n = rt6_check_neigh(rt);
383 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
384 return -1;
385 return m;
388 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
389 int *mpri, struct rt6_info *match)
391 int m;
393 if (rt6_check_expired(rt))
394 goto out;
396 m = rt6_score_route(rt, oif, strict);
397 if (m < 0)
398 goto out;
400 if (m > *mpri) {
401 if (strict & RT6_LOOKUP_F_REACHABLE)
402 rt6_probe(match);
403 *mpri = m;
404 match = rt;
405 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
406 rt6_probe(rt);
409 out:
410 return match;
413 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
414 struct rt6_info *rr_head,
415 u32 metric, int oif, int strict)
417 struct rt6_info *rt, *match;
418 int mpri = -1;
420 match = NULL;
421 for (rt = rr_head; rt && rt->rt6i_metric == metric;
422 rt = rt->dst.rt6_next)
423 match = find_match(rt, oif, strict, &mpri, match);
424 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
425 rt = rt->dst.rt6_next)
426 match = find_match(rt, oif, strict, &mpri, match);
428 return match;
431 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
433 struct rt6_info *match, *rt0;
434 struct net *net;
436 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
437 __func__, fn->leaf, oif);
439 rt0 = fn->rr_ptr;
440 if (!rt0)
441 fn->rr_ptr = rt0 = fn->leaf;
443 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
445 if (!match &&
446 (strict & RT6_LOOKUP_F_REACHABLE)) {
447 struct rt6_info *next = rt0->dst.rt6_next;
449 /* no entries matched; do round-robin */
450 if (!next || next->rt6i_metric != rt0->rt6i_metric)
451 next = fn->leaf;
453 if (next != rt0)
454 fn->rr_ptr = next;
457 RT6_TRACE("%s() => %p\n",
458 __func__, match);
460 net = dev_net(rt0->rt6i_dev);
461 return match ? match : net->ipv6.ip6_null_entry;
464 #ifdef CONFIG_IPV6_ROUTE_INFO
465 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
466 struct in6_addr *gwaddr)
468 struct net *net = dev_net(dev);
469 struct route_info *rinfo = (struct route_info *) opt;
470 struct in6_addr prefix_buf, *prefix;
471 unsigned int pref;
472 unsigned long lifetime;
473 struct rt6_info *rt;
475 if (len < sizeof(struct route_info)) {
476 return -EINVAL;
479 /* Sanity check for prefix_len and length */
480 if (rinfo->length > 3) {
481 return -EINVAL;
482 } else if (rinfo->prefix_len > 128) {
483 return -EINVAL;
484 } else if (rinfo->prefix_len > 64) {
485 if (rinfo->length < 2) {
486 return -EINVAL;
488 } else if (rinfo->prefix_len > 0) {
489 if (rinfo->length < 1) {
490 return -EINVAL;
494 pref = rinfo->route_pref;
495 if (pref == ICMPV6_ROUTER_PREF_INVALID)
496 return -EINVAL;
498 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
500 if (rinfo->length == 3)
501 prefix = (struct in6_addr *)rinfo->prefix;
502 else {
503 /* this function is safe */
504 ipv6_addr_prefix(&prefix_buf,
505 (struct in6_addr *)rinfo->prefix,
506 rinfo->prefix_len);
507 prefix = &prefix_buf;
510 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
511 dev->ifindex);
513 if (rt && !lifetime) {
514 ip6_del_rt(rt);
515 rt = NULL;
518 if (!rt && lifetime)
519 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
520 pref);
521 else if (rt)
522 rt->rt6i_flags = RTF_ROUTEINFO |
523 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
525 if (rt) {
526 if (!addrconf_finite_timeout(lifetime)) {
527 rt->rt6i_flags &= ~RTF_EXPIRES;
528 } else {
529 rt->rt6i_expires = jiffies + HZ * lifetime;
530 rt->rt6i_flags |= RTF_EXPIRES;
532 dst_release(&rt->dst);
534 return 0;
536 #endif
538 #define BACKTRACK(__net, saddr) \
539 do { \
540 if (rt == __net->ipv6.ip6_null_entry) { \
541 struct fib6_node *pn; \
542 while (1) { \
543 if (fn->fn_flags & RTN_TL_ROOT) \
544 goto out; \
545 pn = fn->parent; \
546 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
547 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
548 else \
549 fn = pn; \
550 if (fn->fn_flags & RTN_RTINFO) \
551 goto restart; \
554 } while(0)
556 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
557 struct fib6_table *table,
558 struct flowi *fl, int flags)
560 struct fib6_node *fn;
561 struct rt6_info *rt;
563 read_lock_bh(&table->tb6_lock);
564 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
565 restart:
566 rt = fn->leaf;
567 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
568 BACKTRACK(net, &fl->fl6_src);
569 out:
570 dst_use(&rt->dst, jiffies);
571 read_unlock_bh(&table->tb6_lock);
572 return rt;
576 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
577 const struct in6_addr *saddr, int oif, int strict)
579 struct flowi fl = {
580 .oif = oif,
581 .fl6_dst = *daddr,
583 struct dst_entry *dst;
584 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
586 if (saddr) {
587 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
588 flags |= RT6_LOOKUP_F_HAS_SADDR;
591 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
592 if (dst->error == 0)
593 return (struct rt6_info *) dst;
595 dst_release(dst);
597 return NULL;
600 EXPORT_SYMBOL(rt6_lookup);
602 /* ip6_ins_rt is called with FREE table->tb6_lock.
603 It takes new route entry, the addition fails by any reason the
604 route is freed. In any case, if caller does not hold it, it may
605 be destroyed.
608 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
610 int err;
611 struct fib6_table *table;
613 table = rt->rt6i_table;
614 write_lock_bh(&table->tb6_lock);
615 err = fib6_add(&table->tb6_root, rt, info);
616 write_unlock_bh(&table->tb6_lock);
618 return err;
621 int ip6_ins_rt(struct rt6_info *rt)
623 struct nl_info info = {
624 .nl_net = dev_net(rt->rt6i_dev),
626 return __ip6_ins_rt(rt, &info);
629 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
630 struct in6_addr *saddr)
632 struct rt6_info *rt;
635 * Clone the route.
638 rt = ip6_rt_copy(ort);
640 if (rt) {
641 struct neighbour *neigh;
642 int attempts = !in_softirq();
644 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
645 if (rt->rt6i_dst.plen != 128 &&
646 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
647 rt->rt6i_flags |= RTF_ANYCAST;
648 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
651 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
652 rt->rt6i_dst.plen = 128;
653 rt->rt6i_flags |= RTF_CACHE;
654 rt->dst.flags |= DST_HOST;
656 #ifdef CONFIG_IPV6_SUBTREES
657 if (rt->rt6i_src.plen && saddr) {
658 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
659 rt->rt6i_src.plen = 128;
661 #endif
663 retry:
664 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
665 if (IS_ERR(neigh)) {
666 struct net *net = dev_net(rt->rt6i_dev);
667 int saved_rt_min_interval =
668 net->ipv6.sysctl.ip6_rt_gc_min_interval;
669 int saved_rt_elasticity =
670 net->ipv6.sysctl.ip6_rt_gc_elasticity;
672 if (attempts-- > 0) {
673 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
674 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
676 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
678 net->ipv6.sysctl.ip6_rt_gc_elasticity =
679 saved_rt_elasticity;
680 net->ipv6.sysctl.ip6_rt_gc_min_interval =
681 saved_rt_min_interval;
682 goto retry;
685 if (net_ratelimit())
686 printk(KERN_WARNING
687 "ipv6: Neighbour table overflow.\n");
688 dst_free(&rt->dst);
689 return NULL;
691 rt->rt6i_nexthop = neigh;
695 return rt;
698 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
700 struct rt6_info *rt = ip6_rt_copy(ort);
701 if (rt) {
702 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
703 rt->rt6i_dst.plen = 128;
704 rt->rt6i_flags |= RTF_CACHE;
705 rt->dst.flags |= DST_HOST;
706 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
708 return rt;
711 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
712 struct flowi *fl, int flags)
714 struct fib6_node *fn;
715 struct rt6_info *rt, *nrt;
716 int strict = 0;
717 int attempts = 3;
718 int err;
719 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
721 strict |= flags & RT6_LOOKUP_F_IFACE;
723 relookup:
724 read_lock_bh(&table->tb6_lock);
726 restart_2:
727 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
729 restart:
730 rt = rt6_select(fn, oif, strict | reachable);
732 BACKTRACK(net, &fl->fl6_src);
733 if (rt == net->ipv6.ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE)
735 goto out;
737 dst_hold(&rt->dst);
738 read_unlock_bh(&table->tb6_lock);
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
742 else
743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
745 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry;
748 dst_hold(&rt->dst);
749 if (nrt) {
750 err = ip6_ins_rt(nrt);
751 if (!err)
752 goto out2;
755 if (--attempts <= 0)
756 goto out2;
759 * Race condition! In the gap, when table->tb6_lock was
760 * released someone could insert this route. Relookup.
762 dst_release(&rt->dst);
763 goto relookup;
765 out:
766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
770 dst_hold(&rt->dst);
771 read_unlock_bh(&table->tb6_lock);
772 out2:
773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
776 return rt;
779 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
780 struct flowi *fl, int flags)
782 return ip6_pol_route(net, table, fl->iif, fl, flags);
785 void ip6_route_input(struct sk_buff *skb)
787 struct ipv6hdr *iph = ipv6_hdr(skb);
788 struct net *net = dev_net(skb->dev);
789 int flags = RT6_LOOKUP_F_HAS_SADDR;
790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795 .mark = skb->mark,
796 .proto = iph->nexthdr,
799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
800 flags |= RT6_LOOKUP_F_IFACE;
802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
805 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
806 struct flowi *fl, int flags)
808 return ip6_pol_route(net, table, fl->oif, fl, flags);
811 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
814 int flags = 0;
816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
817 flags |= RT6_LOOKUP_F_IFACE;
819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
827 EXPORT_SYMBOL(ip6_route_output);
829 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
836 if (rt) {
837 new = &rt->dst;
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
841 new->input = dst_discard;
842 new->output = dst_discard;
844 dst_copy_metrics(new, &ort->dst);
845 new->dev = ort->dst.dev;
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858 #ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860 #endif
862 dst_free(new);
865 dst_release(*dstp);
866 *dstp = new;
867 return new ? 0 : -ENOMEM;
869 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
872 * Destination cache support functions
875 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
877 struct rt6_info *rt;
879 rt = (struct rt6_info *) dst;
881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
882 return dst;
884 return NULL;
887 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
889 struct rt6_info *rt = (struct rt6_info *) dst;
891 if (rt) {
892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
897 } else {
898 dst_release(dst);
899 dst = NULL;
902 return dst;
905 static void ip6_link_failure(struct sk_buff *skb)
907 struct rt6_info *rt;
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
911 rt = (struct rt6_info *) skb_dst(skb);
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 u32 features = dst_metric(dst, RTAX_FEATURES);
929 mtu = IPV6_MIN_MTU;
930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
933 dst_metric_set(dst, RTAX_MTU, mtu);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
938 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
940 struct net_device *dev = dst->dev;
941 unsigned int mtu = dst_mtu(dst);
942 struct net *net = dev_net(dev);
944 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
946 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
947 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
950 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
951 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
952 * IPV6_MAXPLEN is also valid and means: "any MSS,
953 * rely only on pmtu discovery"
955 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
956 mtu = IPV6_MAXPLEN;
957 return mtu;
960 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
962 unsigned int mtu = IPV6_MIN_MTU;
963 struct inet6_dev *idev;
965 rcu_read_lock();
966 idev = __in6_dev_get(dst->dev);
967 if (idev)
968 mtu = idev->cnf.mtu6;
969 rcu_read_unlock();
971 return mtu;
974 static struct dst_entry *icmp6_dst_gc_list;
975 static DEFINE_SPINLOCK(icmp6_dst_lock);
977 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
978 struct neighbour *neigh,
979 const struct in6_addr *addr)
981 struct rt6_info *rt;
982 struct inet6_dev *idev = in6_dev_get(dev);
983 struct net *net = dev_net(dev);
985 if (unlikely(idev == NULL))
986 return NULL;
988 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
989 if (unlikely(rt == NULL)) {
990 in6_dev_put(idev);
991 goto out;
994 dev_hold(dev);
995 if (neigh)
996 neigh_hold(neigh);
997 else {
998 neigh = ndisc_get_neigh(dev, addr);
999 if (IS_ERR(neigh))
1000 neigh = NULL;
1003 rt->rt6i_dev = dev;
1004 rt->rt6i_idev = idev;
1005 rt->rt6i_nexthop = neigh;
1006 atomic_set(&rt->dst.__refcnt, 1);
1007 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1008 rt->dst.output = ip6_output;
1010 #if 0 /* there's no chance to use these for ndisc */
1011 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1012 ? DST_HOST
1013 : 0;
1014 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1015 rt->rt6i_dst.plen = 128;
1016 #endif
1018 spin_lock_bh(&icmp6_dst_lock);
1019 rt->dst.next = icmp6_dst_gc_list;
1020 icmp6_dst_gc_list = &rt->dst;
1021 spin_unlock_bh(&icmp6_dst_lock);
1023 fib6_force_start_gc(net);
1025 out:
1026 return &rt->dst;
1029 int icmp6_dst_gc(void)
1031 struct dst_entry *dst, *next, **pprev;
1032 int more = 0;
1034 next = NULL;
1036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
1039 while ((dst = *pprev) != NULL) {
1040 if (!atomic_read(&dst->__refcnt)) {
1041 *pprev = dst->next;
1042 dst_free(dst);
1043 } else {
1044 pprev = &dst->next;
1045 ++more;
1049 spin_unlock_bh(&icmp6_dst_lock);
1051 return more;
1054 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1055 void *arg)
1057 struct dst_entry *dst, **pprev;
1059 spin_lock_bh(&icmp6_dst_lock);
1060 pprev = &icmp6_dst_gc_list;
1061 while ((dst = *pprev) != NULL) {
1062 struct rt6_info *rt = (struct rt6_info *) dst;
1063 if (func(rt, arg)) {
1064 *pprev = dst->next;
1065 dst_free(dst);
1066 } else {
1067 pprev = &dst->next;
1070 spin_unlock_bh(&icmp6_dst_lock);
1073 static int ip6_dst_gc(struct dst_ops *ops)
1075 unsigned long now = jiffies;
1076 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1077 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1078 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1079 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1080 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1081 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1082 int entries;
1084 entries = dst_entries_get_fast(ops);
1085 if (time_after(rt_last_gc + rt_min_interval, now) &&
1086 entries <= rt_max_size)
1087 goto out;
1089 net->ipv6.ip6_rt_gc_expire++;
1090 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1091 net->ipv6.ip6_rt_last_gc = now;
1092 entries = dst_entries_get_slow(ops);
1093 if (entries < ops->gc_thresh)
1094 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1095 out:
1096 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1097 return entries > rt_max_size;
1100 /* Clean host part of a prefix. Not necessary in radix tree,
1101 but results in cleaner routing tables.
1103 Remove it only when all the things will work!
1106 int ip6_dst_hoplimit(struct dst_entry *dst)
1108 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1109 if (hoplimit == 0) {
1110 struct net_device *dev = dst->dev;
1111 struct inet6_dev *idev;
1113 rcu_read_lock();
1114 idev = __in6_dev_get(dev);
1115 if (idev)
1116 hoplimit = idev->cnf.hop_limit;
1117 else
1118 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1119 rcu_read_unlock();
1121 return hoplimit;
1123 EXPORT_SYMBOL(ip6_dst_hoplimit);
1129 int ip6_route_add(struct fib6_config *cfg)
1131 int err;
1132 struct net *net = cfg->fc_nlinfo.nl_net;
1133 struct rt6_info *rt = NULL;
1134 struct net_device *dev = NULL;
1135 struct inet6_dev *idev = NULL;
1136 struct fib6_table *table;
1137 int addr_type;
1139 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1140 return -EINVAL;
1141 #ifndef CONFIG_IPV6_SUBTREES
1142 if (cfg->fc_src_len)
1143 return -EINVAL;
1144 #endif
1145 if (cfg->fc_ifindex) {
1146 err = -ENODEV;
1147 dev = dev_get_by_index(net, cfg->fc_ifindex);
1148 if (!dev)
1149 goto out;
1150 idev = in6_dev_get(dev);
1151 if (!idev)
1152 goto out;
1155 if (cfg->fc_metric == 0)
1156 cfg->fc_metric = IP6_RT_PRIO_USER;
1158 table = fib6_new_table(net, cfg->fc_table);
1159 if (table == NULL) {
1160 err = -ENOBUFS;
1161 goto out;
1164 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1166 if (rt == NULL) {
1167 err = -ENOMEM;
1168 goto out;
1171 rt->dst.obsolete = -1;
1172 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1173 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1176 if (cfg->fc_protocol == RTPROT_UNSPEC)
1177 cfg->fc_protocol = RTPROT_BOOT;
1178 rt->rt6i_protocol = cfg->fc_protocol;
1180 addr_type = ipv6_addr_type(&cfg->fc_dst);
1182 if (addr_type & IPV6_ADDR_MULTICAST)
1183 rt->dst.input = ip6_mc_input;
1184 else if (cfg->fc_flags & RTF_LOCAL)
1185 rt->dst.input = ip6_input;
1186 else
1187 rt->dst.input = ip6_forward;
1189 rt->dst.output = ip6_output;
1191 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1192 rt->rt6i_dst.plen = cfg->fc_dst_len;
1193 if (rt->rt6i_dst.plen == 128)
1194 rt->dst.flags = DST_HOST;
1196 #ifdef CONFIG_IPV6_SUBTREES
1197 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1198 rt->rt6i_src.plen = cfg->fc_src_len;
1199 #endif
1201 rt->rt6i_metric = cfg->fc_metric;
1203 /* We cannot add true routes via loopback here,
1204 they would result in kernel looping; promote them to reject routes
1206 if ((cfg->fc_flags & RTF_REJECT) ||
1207 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1208 && !(cfg->fc_flags&RTF_LOCAL))) {
1209 /* hold loopback dev/idev if we haven't done so. */
1210 if (dev != net->loopback_dev) {
1211 if (dev) {
1212 dev_put(dev);
1213 in6_dev_put(idev);
1215 dev = net->loopback_dev;
1216 dev_hold(dev);
1217 idev = in6_dev_get(dev);
1218 if (!idev) {
1219 err = -ENODEV;
1220 goto out;
1223 rt->dst.output = ip6_pkt_discard_out;
1224 rt->dst.input = ip6_pkt_discard;
1225 rt->dst.error = -ENETUNREACH;
1226 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1227 goto install_route;
1230 if (cfg->fc_flags & RTF_GATEWAY) {
1231 struct in6_addr *gw_addr;
1232 int gwa_type;
1234 gw_addr = &cfg->fc_gateway;
1235 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1236 gwa_type = ipv6_addr_type(gw_addr);
1238 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1239 struct rt6_info *grt;
1241 /* IPv6 strictly inhibits using not link-local
1242 addresses as nexthop address.
1243 Otherwise, router will not able to send redirects.
1244 It is very good, but in some (rare!) circumstances
1245 (SIT, PtP, NBMA NOARP links) it is handy to allow
1246 some exceptions. --ANK
1248 err = -EINVAL;
1249 if (!(gwa_type&IPV6_ADDR_UNICAST))
1250 goto out;
1252 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1254 err = -EHOSTUNREACH;
1255 if (grt == NULL)
1256 goto out;
1257 if (dev) {
1258 if (dev != grt->rt6i_dev) {
1259 dst_release(&grt->dst);
1260 goto out;
1262 } else {
1263 dev = grt->rt6i_dev;
1264 idev = grt->rt6i_idev;
1265 dev_hold(dev);
1266 in6_dev_hold(grt->rt6i_idev);
1268 if (!(grt->rt6i_flags&RTF_GATEWAY))
1269 err = 0;
1270 dst_release(&grt->dst);
1272 if (err)
1273 goto out;
1275 err = -EINVAL;
1276 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1277 goto out;
1280 err = -ENODEV;
1281 if (dev == NULL)
1282 goto out;
1284 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1285 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1286 if (IS_ERR(rt->rt6i_nexthop)) {
1287 err = PTR_ERR(rt->rt6i_nexthop);
1288 rt->rt6i_nexthop = NULL;
1289 goto out;
1293 rt->rt6i_flags = cfg->fc_flags;
1295 install_route:
1296 if (cfg->fc_mx) {
1297 struct nlattr *nla;
1298 int remaining;
1300 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1301 int type = nla_type(nla);
1303 if (type) {
1304 if (type > RTAX_MAX) {
1305 err = -EINVAL;
1306 goto out;
1309 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1314 rt->dst.dev = dev;
1315 rt->rt6i_idev = idev;
1316 rt->rt6i_table = table;
1318 cfg->fc_nlinfo.nl_net = dev_net(dev);
1320 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1322 out:
1323 if (dev)
1324 dev_put(dev);
1325 if (idev)
1326 in6_dev_put(idev);
1327 if (rt)
1328 dst_free(&rt->dst);
1329 return err;
1332 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1334 int err;
1335 struct fib6_table *table;
1336 struct net *net = dev_net(rt->rt6i_dev);
1338 if (rt == net->ipv6.ip6_null_entry)
1339 return -ENOENT;
1341 table = rt->rt6i_table;
1342 write_lock_bh(&table->tb6_lock);
1344 err = fib6_del(rt, info);
1345 dst_release(&rt->dst);
1347 write_unlock_bh(&table->tb6_lock);
1349 return err;
1352 int ip6_del_rt(struct rt6_info *rt)
1354 struct nl_info info = {
1355 .nl_net = dev_net(rt->rt6i_dev),
1357 return __ip6_del_rt(rt, &info);
1360 static int ip6_route_del(struct fib6_config *cfg)
1362 struct fib6_table *table;
1363 struct fib6_node *fn;
1364 struct rt6_info *rt;
1365 int err = -ESRCH;
1367 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1368 if (table == NULL)
1369 return err;
1371 read_lock_bh(&table->tb6_lock);
1373 fn = fib6_locate(&table->tb6_root,
1374 &cfg->fc_dst, cfg->fc_dst_len,
1375 &cfg->fc_src, cfg->fc_src_len);
1377 if (fn) {
1378 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1379 if (cfg->fc_ifindex &&
1380 (rt->rt6i_dev == NULL ||
1381 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1382 continue;
1383 if (cfg->fc_flags & RTF_GATEWAY &&
1384 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1385 continue;
1386 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1387 continue;
1388 dst_hold(&rt->dst);
1389 read_unlock_bh(&table->tb6_lock);
1391 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1394 read_unlock_bh(&table->tb6_lock);
1396 return err;
1400 * Handle redirects
1402 struct ip6rd_flowi {
1403 struct flowi fl;
1404 struct in6_addr gateway;
1407 static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 struct fib6_table *table,
1409 struct flowi *fl,
1410 int flags)
1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1413 struct rt6_info *rt;
1414 struct fib6_node *fn;
1417 * Get the "current" route for this destination and
1418 * check if the redirect has come from approriate router.
1420 * RFC 2461 specifies that redirects should only be
1421 * accepted if they come from the nexthop to the target.
1422 * Due to the way the routes are chosen, this notion
1423 * is a bit fuzzy and one might need to check all possible
1424 * routes.
1427 read_lock_bh(&table->tb6_lock);
1428 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1429 restart:
1430 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1432 * Current route is on-link; redirect is always invalid.
1434 * Seems, previous statement is not true. It could
1435 * be node, which looks for us as on-link (f.e. proxy ndisc)
1436 * But then router serving it might decide, that we should
1437 * know truth 8)8) --ANK (980726).
1439 if (rt6_check_expired(rt))
1440 continue;
1441 if (!(rt->rt6i_flags & RTF_GATEWAY))
1442 continue;
1443 if (fl->oif != rt->rt6i_dev->ifindex)
1444 continue;
1445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1446 continue;
1447 break;
1450 if (!rt)
1451 rt = net->ipv6.ip6_null_entry;
1452 BACKTRACK(net, &fl->fl6_src);
1453 out:
1454 dst_hold(&rt->dst);
1456 read_unlock_bh(&table->tb6_lock);
1458 return rt;
1461 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1462 struct in6_addr *src,
1463 struct in6_addr *gateway,
1464 struct net_device *dev)
1466 int flags = RT6_LOOKUP_F_HAS_SADDR;
1467 struct net *net = dev_net(dev);
1468 struct ip6rd_flowi rdfl = {
1469 .fl = {
1470 .oif = dev->ifindex,
1471 .fl6_dst = *dest,
1472 .fl6_src = *src,
1476 ipv6_addr_copy(&rdfl.gateway, gateway);
1478 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE;
1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1482 flags, __ip6_route_redirect);
1485 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1486 struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link)
1489 struct rt6_info *rt, *nrt = NULL;
1490 struct netevent_redirect netevent;
1491 struct net *net = dev_net(neigh->dev);
1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1495 if (rt == net->ipv6.ip6_null_entry) {
1496 if (net_ratelimit())
1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1498 "for redirect target\n");
1499 goto out;
1503 * We have finally decided to accept it.
1506 neigh_update(neigh, lladdr, NUD_STALE,
1507 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1508 NEIGH_UPDATE_F_OVERRIDE|
1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1510 NEIGH_UPDATE_F_ISROUTER))
1514 * Redirect received -> path was valid.
1515 * Look, redirects are sent only in response to data packets,
1516 * so that this nexthop apparently is reachable. --ANK
1518 dst_confirm(&rt->dst);
1520 /* Duplicate redirect: silently ignore. */
1521 if (neigh == rt->dst.neighbour)
1522 goto out;
1524 nrt = ip6_rt_copy(rt);
1525 if (nrt == NULL)
1526 goto out;
1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 if (on_link)
1530 nrt->rt6i_flags &= ~RTF_GATEWAY;
1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1533 nrt->rt6i_dst.plen = 128;
1534 nrt->dst.flags |= DST_HOST;
1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1537 nrt->rt6i_nexthop = neigh_clone(neigh);
1539 if (ip6_ins_rt(nrt))
1540 goto out;
1542 netevent.old = &rt->dst;
1543 netevent.new = &nrt->dst;
1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1546 if (rt->rt6i_flags&RTF_CACHE) {
1547 ip6_del_rt(rt);
1548 return;
1551 out:
1552 dst_release(&rt->dst);
1556 * Handle ICMP "packet too big" messages
1557 * i.e. Path MTU discovery
1560 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1561 struct net *net, u32 pmtu, int ifindex)
1563 struct rt6_info *rt, *nrt;
1564 int allfrag = 0;
1565 again:
1566 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1567 if (rt == NULL)
1568 return;
1570 if (rt6_check_expired(rt)) {
1571 ip6_del_rt(rt);
1572 goto again;
1575 if (pmtu >= dst_mtu(&rt->dst))
1576 goto out;
1578 if (pmtu < IPV6_MIN_MTU) {
1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1581 * MTU (1280) and a fragment header should always be included
1582 * after a node receiving Too Big message reporting PMTU is
1583 * less than the IPv6 Minimum Link MTU.
1585 pmtu = IPV6_MIN_MTU;
1586 allfrag = 1;
1589 /* New mtu received -> path was valid.
1590 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK
1593 dst_confirm(&rt->dst);
1595 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that
1597 when cache entry will expire old pmtu
1598 would return automatically.
1600 if (rt->rt6i_flags & RTF_CACHE) {
1601 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1602 if (allfrag) {
1603 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1604 features |= RTAX_FEATURE_ALLFRAG;
1605 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1607 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1608 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1609 goto out;
1612 /* Network route.
1613 Two cases are possible:
1614 1. It is connected route. Action: COW
1615 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1617 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1618 nrt = rt6_alloc_cow(rt, daddr, saddr);
1619 else
1620 nrt = rt6_alloc_clone(rt, daddr);
1622 if (nrt) {
1623 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1624 if (allfrag) {
1625 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1626 features |= RTAX_FEATURE_ALLFRAG;
1627 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1630 /* According to RFC 1981, detecting PMTU increase shouldn't be
1631 * happened within 5 mins, the recommended timer is 10 mins.
1632 * Here this route expiration time is set to ip6_rt_mtu_expires
1633 * which is 10 mins. After 10 mins the decreased pmtu is expired
1634 * and detecting PMTU increase will be automatically happened.
1636 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1637 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1639 ip6_ins_rt(nrt);
1641 out:
1642 dst_release(&rt->dst);
1645 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1646 struct net_device *dev, u32 pmtu)
1648 struct net *net = dev_net(dev);
1651 * RFC 1981 states that a node "MUST reduce the size of the packets it
1652 * is sending along the path" that caused the Packet Too Big message.
1653 * Since it's not possible in the general case to determine which
1654 * interface was used to send the original packet, we update the MTU
1655 * on the interface that will be used to send future packets. We also
1656 * update the MTU on the interface that received the Packet Too Big in
1657 * case the original packet was forced out that interface with
1658 * SO_BINDTODEVICE or similar. This is the next best thing to the
1659 * correct behaviour, which would be to update the MTU on all
1660 * interfaces.
1662 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1663 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1667 * Misc support functions
1670 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1672 struct net *net = dev_net(ort->rt6i_dev);
1673 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1675 if (rt) {
1676 rt->dst.input = ort->dst.input;
1677 rt->dst.output = ort->dst.output;
1679 dst_copy_metrics(&rt->dst, &ort->dst);
1680 rt->dst.error = ort->dst.error;
1681 rt->dst.dev = ort->dst.dev;
1682 if (rt->dst.dev)
1683 dev_hold(rt->dst.dev);
1684 rt->rt6i_idev = ort->rt6i_idev;
1685 if (rt->rt6i_idev)
1686 in6_dev_hold(rt->rt6i_idev);
1687 rt->dst.lastuse = jiffies;
1688 rt->rt6i_expires = 0;
1690 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1691 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1692 rt->rt6i_metric = 0;
1694 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1695 #ifdef CONFIG_IPV6_SUBTREES
1696 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1697 #endif
1698 rt->rt6i_table = ort->rt6i_table;
1700 return rt;
1703 #ifdef CONFIG_IPV6_ROUTE_INFO
1704 static struct rt6_info *rt6_get_route_info(struct net *net,
1705 struct in6_addr *prefix, int prefixlen,
1706 struct in6_addr *gwaddr, int ifindex)
1708 struct fib6_node *fn;
1709 struct rt6_info *rt = NULL;
1710 struct fib6_table *table;
1712 table = fib6_get_table(net, RT6_TABLE_INFO);
1713 if (table == NULL)
1714 return NULL;
1716 write_lock_bh(&table->tb6_lock);
1717 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1718 if (!fn)
1719 goto out;
1721 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1722 if (rt->rt6i_dev->ifindex != ifindex)
1723 continue;
1724 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1725 continue;
1726 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1727 continue;
1728 dst_hold(&rt->dst);
1729 break;
1731 out:
1732 write_unlock_bh(&table->tb6_lock);
1733 return rt;
1736 static struct rt6_info *rt6_add_route_info(struct net *net,
1737 struct in6_addr *prefix, int prefixlen,
1738 struct in6_addr *gwaddr, int ifindex,
1739 unsigned pref)
1741 struct fib6_config cfg = {
1742 .fc_table = RT6_TABLE_INFO,
1743 .fc_metric = IP6_RT_PRIO_USER,
1744 .fc_ifindex = ifindex,
1745 .fc_dst_len = prefixlen,
1746 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1747 RTF_UP | RTF_PREF(pref),
1748 .fc_nlinfo.pid = 0,
1749 .fc_nlinfo.nlh = NULL,
1750 .fc_nlinfo.nl_net = net,
1753 ipv6_addr_copy(&cfg.fc_dst, prefix);
1754 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1756 /* We should treat it as a default route if prefix length is 0. */
1757 if (!prefixlen)
1758 cfg.fc_flags |= RTF_DEFAULT;
1760 ip6_route_add(&cfg);
1762 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1764 #endif
1766 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1768 struct rt6_info *rt;
1769 struct fib6_table *table;
1771 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1772 if (table == NULL)
1773 return NULL;
1775 write_lock_bh(&table->tb6_lock);
1776 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1777 if (dev == rt->rt6i_dev &&
1778 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1779 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1780 break;
1782 if (rt)
1783 dst_hold(&rt->dst);
1784 write_unlock_bh(&table->tb6_lock);
1785 return rt;
1788 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1789 struct net_device *dev,
1790 unsigned int pref)
1792 struct fib6_config cfg = {
1793 .fc_table = RT6_TABLE_DFLT,
1794 .fc_metric = IP6_RT_PRIO_USER,
1795 .fc_ifindex = dev->ifindex,
1796 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1797 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1798 .fc_nlinfo.pid = 0,
1799 .fc_nlinfo.nlh = NULL,
1800 .fc_nlinfo.nl_net = dev_net(dev),
1803 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1805 ip6_route_add(&cfg);
1807 return rt6_get_dflt_router(gwaddr, dev);
1810 void rt6_purge_dflt_routers(struct net *net)
1812 struct rt6_info *rt;
1813 struct fib6_table *table;
1815 /* NOTE: Keep consistent with rt6_get_dflt_router */
1816 table = fib6_get_table(net, RT6_TABLE_DFLT);
1817 if (table == NULL)
1818 return;
1820 restart:
1821 read_lock_bh(&table->tb6_lock);
1822 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1823 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1824 dst_hold(&rt->dst);
1825 read_unlock_bh(&table->tb6_lock);
1826 ip6_del_rt(rt);
1827 goto restart;
1830 read_unlock_bh(&table->tb6_lock);
1833 static void rtmsg_to_fib6_config(struct net *net,
1834 struct in6_rtmsg *rtmsg,
1835 struct fib6_config *cfg)
1837 memset(cfg, 0, sizeof(*cfg));
1839 cfg->fc_table = RT6_TABLE_MAIN;
1840 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1841 cfg->fc_metric = rtmsg->rtmsg_metric;
1842 cfg->fc_expires = rtmsg->rtmsg_info;
1843 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1844 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1845 cfg->fc_flags = rtmsg->rtmsg_flags;
1847 cfg->fc_nlinfo.nl_net = net;
1849 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1850 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1851 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1854 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1856 struct fib6_config cfg;
1857 struct in6_rtmsg rtmsg;
1858 int err;
1860 switch(cmd) {
1861 case SIOCADDRT: /* Add a route */
1862 case SIOCDELRT: /* Delete a route */
1863 if (!capable(CAP_NET_ADMIN))
1864 return -EPERM;
1865 err = copy_from_user(&rtmsg, arg,
1866 sizeof(struct in6_rtmsg));
1867 if (err)
1868 return -EFAULT;
1870 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1872 rtnl_lock();
1873 switch (cmd) {
1874 case SIOCADDRT:
1875 err = ip6_route_add(&cfg);
1876 break;
1877 case SIOCDELRT:
1878 err = ip6_route_del(&cfg);
1879 break;
1880 default:
1881 err = -EINVAL;
1883 rtnl_unlock();
1885 return err;
1888 return -EINVAL;
1892 * Drop the packet on the floor
1895 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1897 int type;
1898 struct dst_entry *dst = skb_dst(skb);
1899 switch (ipstats_mib_noroutes) {
1900 case IPSTATS_MIB_INNOROUTES:
1901 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1902 if (type == IPV6_ADDR_ANY) {
1903 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1904 IPSTATS_MIB_INADDRERRORS);
1905 break;
1907 /* FALLTHROUGH */
1908 case IPSTATS_MIB_OUTNOROUTES:
1909 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1910 ipstats_mib_noroutes);
1911 break;
1913 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1914 kfree_skb(skb);
1915 return 0;
1918 static int ip6_pkt_discard(struct sk_buff *skb)
1920 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1923 static int ip6_pkt_discard_out(struct sk_buff *skb)
1925 skb->dev = skb_dst(skb)->dev;
1926 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1929 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1931 static int ip6_pkt_prohibit(struct sk_buff *skb)
1933 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1936 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1938 skb->dev = skb_dst(skb)->dev;
1939 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1942 #endif
1945 * Allocate a dst for local (unicast / anycast) address.
1948 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1949 const struct in6_addr *addr,
1950 int anycast)
1952 struct net *net = dev_net(idev->dev);
1953 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1954 struct neighbour *neigh;
1956 if (rt == NULL) {
1957 if (net_ratelimit())
1958 pr_warning("IPv6: Maximum number of routes reached,"
1959 " consider increasing route/max_size.\n");
1960 return ERR_PTR(-ENOMEM);
1963 dev_hold(net->loopback_dev);
1964 in6_dev_hold(idev);
1966 rt->dst.flags = DST_HOST;
1967 rt->dst.input = ip6_input;
1968 rt->dst.output = ip6_output;
1969 rt->rt6i_dev = net->loopback_dev;
1970 rt->rt6i_idev = idev;
1971 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1972 rt->dst.obsolete = -1;
1974 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1975 if (anycast)
1976 rt->rt6i_flags |= RTF_ANYCAST;
1977 else
1978 rt->rt6i_flags |= RTF_LOCAL;
1979 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1980 if (IS_ERR(neigh)) {
1981 dst_free(&rt->dst);
1983 /* We are casting this because that is the return
1984 * value type. But an errno encoded pointer is the
1985 * same regardless of the underlying pointer type,
1986 * and that's what we are returning. So this is OK.
1988 return (struct rt6_info *) neigh;
1990 rt->rt6i_nexthop = neigh;
1992 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1993 rt->rt6i_dst.plen = 128;
1994 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1996 atomic_set(&rt->dst.__refcnt, 1);
1998 return rt;
2001 struct arg_dev_net {
2002 struct net_device *dev;
2003 struct net *net;
2006 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2008 const struct arg_dev_net *adn = arg;
2009 const struct net_device *dev = adn->dev;
2011 if ((rt->rt6i_dev == dev || dev == NULL) &&
2012 rt != adn->net->ipv6.ip6_null_entry) {
2013 RT6_TRACE("deleted by ifdown %p\n", rt);
2014 return -1;
2016 return 0;
2019 void rt6_ifdown(struct net *net, struct net_device *dev)
2021 struct arg_dev_net adn = {
2022 .dev = dev,
2023 .net = net,
2026 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2027 icmp6_clean_all(fib6_ifdown, &adn);
2030 struct rt6_mtu_change_arg
2032 struct net_device *dev;
2033 unsigned mtu;
2036 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2038 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2039 struct inet6_dev *idev;
2041 /* In IPv6 pmtu discovery is not optional,
2042 so that RTAX_MTU lock cannot disable it.
2043 We still use this lock to block changes
2044 caused by addrconf/ndisc.
2047 idev = __in6_dev_get(arg->dev);
2048 if (idev == NULL)
2049 return 0;
2051 /* For administrative MTU increase, there is no way to discover
2052 IPv6 PMTU increase, so PMTU increase should be updated here.
2053 Since RFC 1981 doesn't include administrative MTU increase
2054 update PMTU increase is a MUST. (i.e. jumbo frame)
2057 If new MTU is less than route PMTU, this new MTU will be the
2058 lowest MTU in the path, update the route PMTU to reflect PMTU
2059 decreases; if new MTU is greater than route PMTU, and the
2060 old MTU is the lowest MTU in the path, update the route PMTU
2061 to reflect the increase. In this case if the other nodes' MTU
2062 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2063 PMTU discouvery.
2065 if (rt->rt6i_dev == arg->dev &&
2066 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2067 (dst_mtu(&rt->dst) >= arg->mtu ||
2068 (dst_mtu(&rt->dst) < arg->mtu &&
2069 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2070 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2072 return 0;
2075 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2077 struct rt6_mtu_change_arg arg = {
2078 .dev = dev,
2079 .mtu = mtu,
2082 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2085 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2086 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2087 [RTA_OIF] = { .type = NLA_U32 },
2088 [RTA_IIF] = { .type = NLA_U32 },
2089 [RTA_PRIORITY] = { .type = NLA_U32 },
2090 [RTA_METRICS] = { .type = NLA_NESTED },
2093 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2094 struct fib6_config *cfg)
2096 struct rtmsg *rtm;
2097 struct nlattr *tb[RTA_MAX+1];
2098 int err;
2100 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2101 if (err < 0)
2102 goto errout;
2104 err = -EINVAL;
2105 rtm = nlmsg_data(nlh);
2106 memset(cfg, 0, sizeof(*cfg));
2108 cfg->fc_table = rtm->rtm_table;
2109 cfg->fc_dst_len = rtm->rtm_dst_len;
2110 cfg->fc_src_len = rtm->rtm_src_len;
2111 cfg->fc_flags = RTF_UP;
2112 cfg->fc_protocol = rtm->rtm_protocol;
2114 if (rtm->rtm_type == RTN_UNREACHABLE)
2115 cfg->fc_flags |= RTF_REJECT;
2117 if (rtm->rtm_type == RTN_LOCAL)
2118 cfg->fc_flags |= RTF_LOCAL;
2120 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2121 cfg->fc_nlinfo.nlh = nlh;
2122 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2124 if (tb[RTA_GATEWAY]) {
2125 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2126 cfg->fc_flags |= RTF_GATEWAY;
2129 if (tb[RTA_DST]) {
2130 int plen = (rtm->rtm_dst_len + 7) >> 3;
2132 if (nla_len(tb[RTA_DST]) < plen)
2133 goto errout;
2135 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2138 if (tb[RTA_SRC]) {
2139 int plen = (rtm->rtm_src_len + 7) >> 3;
2141 if (nla_len(tb[RTA_SRC]) < plen)
2142 goto errout;
2144 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2147 if (tb[RTA_OIF])
2148 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2150 if (tb[RTA_PRIORITY])
2151 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2153 if (tb[RTA_METRICS]) {
2154 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2155 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2158 if (tb[RTA_TABLE])
2159 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2161 err = 0;
2162 errout:
2163 return err;
2166 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2168 struct fib6_config cfg;
2169 int err;
2171 err = rtm_to_fib6_config(skb, nlh, &cfg);
2172 if (err < 0)
2173 return err;
2175 return ip6_route_del(&cfg);
2178 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2180 struct fib6_config cfg;
2181 int err;
2183 err = rtm_to_fib6_config(skb, nlh, &cfg);
2184 if (err < 0)
2185 return err;
2187 return ip6_route_add(&cfg);
2190 static inline size_t rt6_nlmsg_size(void)
2192 return NLMSG_ALIGN(sizeof(struct rtmsg))
2193 + nla_total_size(16) /* RTA_SRC */
2194 + nla_total_size(16) /* RTA_DST */
2195 + nla_total_size(16) /* RTA_GATEWAY */
2196 + nla_total_size(16) /* RTA_PREFSRC */
2197 + nla_total_size(4) /* RTA_TABLE */
2198 + nla_total_size(4) /* RTA_IIF */
2199 + nla_total_size(4) /* RTA_OIF */
2200 + nla_total_size(4) /* RTA_PRIORITY */
2201 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2202 + nla_total_size(sizeof(struct rta_cacheinfo));
2205 static int rt6_fill_node(struct net *net,
2206 struct sk_buff *skb, struct rt6_info *rt,
2207 struct in6_addr *dst, struct in6_addr *src,
2208 int iif, int type, u32 pid, u32 seq,
2209 int prefix, int nowait, unsigned int flags)
2211 struct rtmsg *rtm;
2212 struct nlmsghdr *nlh;
2213 long expires;
2214 u32 table;
2216 if (prefix) { /* user wants prefix routes only */
2217 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2218 /* success since this is not a prefix route */
2219 return 1;
2223 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2224 if (nlh == NULL)
2225 return -EMSGSIZE;
2227 rtm = nlmsg_data(nlh);
2228 rtm->rtm_family = AF_INET6;
2229 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2230 rtm->rtm_src_len = rt->rt6i_src.plen;
2231 rtm->rtm_tos = 0;
2232 if (rt->rt6i_table)
2233 table = rt->rt6i_table->tb6_id;
2234 else
2235 table = RT6_TABLE_UNSPEC;
2236 rtm->rtm_table = table;
2237 NLA_PUT_U32(skb, RTA_TABLE, table);
2238 if (rt->rt6i_flags&RTF_REJECT)
2239 rtm->rtm_type = RTN_UNREACHABLE;
2240 else if (rt->rt6i_flags&RTF_LOCAL)
2241 rtm->rtm_type = RTN_LOCAL;
2242 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2243 rtm->rtm_type = RTN_LOCAL;
2244 else
2245 rtm->rtm_type = RTN_UNICAST;
2246 rtm->rtm_flags = 0;
2247 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2248 rtm->rtm_protocol = rt->rt6i_protocol;
2249 if (rt->rt6i_flags&RTF_DYNAMIC)
2250 rtm->rtm_protocol = RTPROT_REDIRECT;
2251 else if (rt->rt6i_flags & RTF_ADDRCONF)
2252 rtm->rtm_protocol = RTPROT_KERNEL;
2253 else if (rt->rt6i_flags&RTF_DEFAULT)
2254 rtm->rtm_protocol = RTPROT_RA;
2256 if (rt->rt6i_flags&RTF_CACHE)
2257 rtm->rtm_flags |= RTM_F_CLONED;
2259 if (dst) {
2260 NLA_PUT(skb, RTA_DST, 16, dst);
2261 rtm->rtm_dst_len = 128;
2262 } else if (rtm->rtm_dst_len)
2263 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2264 #ifdef CONFIG_IPV6_SUBTREES
2265 if (src) {
2266 NLA_PUT(skb, RTA_SRC, 16, src);
2267 rtm->rtm_src_len = 128;
2268 } else if (rtm->rtm_src_len)
2269 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2270 #endif
2271 if (iif) {
2272 #ifdef CONFIG_IPV6_MROUTE
2273 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2274 int err = ip6mr_get_route(net, skb, rtm, nowait);
2275 if (err <= 0) {
2276 if (!nowait) {
2277 if (err == 0)
2278 return 0;
2279 goto nla_put_failure;
2280 } else {
2281 if (err == -EMSGSIZE)
2282 goto nla_put_failure;
2285 } else
2286 #endif
2287 NLA_PUT_U32(skb, RTA_IIF, iif);
2288 } else if (dst) {
2289 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2290 struct in6_addr saddr_buf;
2291 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2292 dst, 0, &saddr_buf) == 0)
2293 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2296 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2297 goto nla_put_failure;
2299 if (rt->dst.neighbour)
2300 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2302 if (rt->dst.dev)
2303 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2305 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2307 if (!(rt->rt6i_flags & RTF_EXPIRES))
2308 expires = 0;
2309 else if (rt->rt6i_expires - jiffies < INT_MAX)
2310 expires = rt->rt6i_expires - jiffies;
2311 else
2312 expires = INT_MAX;
2314 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2315 expires, rt->dst.error) < 0)
2316 goto nla_put_failure;
2318 return nlmsg_end(skb, nlh);
2320 nla_put_failure:
2321 nlmsg_cancel(skb, nlh);
2322 return -EMSGSIZE;
2325 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2327 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2328 int prefix;
2330 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2331 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2332 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2333 } else
2334 prefix = 0;
2336 return rt6_fill_node(arg->net,
2337 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2338 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2339 prefix, 0, NLM_F_MULTI);
2342 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2344 struct net *net = sock_net(in_skb->sk);
2345 struct nlattr *tb[RTA_MAX+1];
2346 struct rt6_info *rt;
2347 struct sk_buff *skb;
2348 struct rtmsg *rtm;
2349 struct flowi fl;
2350 int err, iif = 0;
2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2353 if (err < 0)
2354 goto errout;
2356 err = -EINVAL;
2357 memset(&fl, 0, sizeof(fl));
2359 if (tb[RTA_SRC]) {
2360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2361 goto errout;
2363 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2366 if (tb[RTA_DST]) {
2367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2368 goto errout;
2370 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2373 if (tb[RTA_IIF])
2374 iif = nla_get_u32(tb[RTA_IIF]);
2376 if (tb[RTA_OIF])
2377 fl.oif = nla_get_u32(tb[RTA_OIF]);
2379 if (iif) {
2380 struct net_device *dev;
2381 dev = __dev_get_by_index(net, iif);
2382 if (!dev) {
2383 err = -ENODEV;
2384 goto errout;
2388 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2389 if (skb == NULL) {
2390 err = -ENOBUFS;
2391 goto errout;
2394 /* Reserve room for dummy headers, this skb can pass
2395 through good chunk of routing engine.
2397 skb_reset_mac_header(skb);
2398 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2400 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2401 skb_dst_set(skb, &rt->dst);
2403 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2404 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2405 nlh->nlmsg_seq, 0, 0, 0);
2406 if (err < 0) {
2407 kfree_skb(skb);
2408 goto errout;
2411 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2412 errout:
2413 return err;
2416 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2418 struct sk_buff *skb;
2419 struct net *net = info->nl_net;
2420 u32 seq;
2421 int err;
2423 err = -ENOBUFS;
2424 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2426 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2427 if (skb == NULL)
2428 goto errout;
2430 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2431 event, info->pid, seq, 0, 0, 0);
2432 if (err < 0) {
2433 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2434 WARN_ON(err == -EMSGSIZE);
2435 kfree_skb(skb);
2436 goto errout;
2438 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2439 info->nlh, gfp_any());
2440 return;
2441 errout:
2442 if (err < 0)
2443 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2446 static int ip6_route_dev_notify(struct notifier_block *this,
2447 unsigned long event, void *data)
2449 struct net_device *dev = (struct net_device *)data;
2450 struct net *net = dev_net(dev);
2452 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2453 net->ipv6.ip6_null_entry->dst.dev = dev;
2454 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2455 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2456 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2457 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2458 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2459 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2460 #endif
2463 return NOTIFY_OK;
2467 * /proc
2470 #ifdef CONFIG_PROC_FS
2472 struct rt6_proc_arg
2474 char *buffer;
2475 int offset;
2476 int length;
2477 int skip;
2478 int len;
2481 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2483 struct seq_file *m = p_arg;
2485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2487 #ifdef CONFIG_IPV6_SUBTREES
2488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2489 #else
2490 seq_puts(m, "00000000000000000000000000000000 00 ");
2491 #endif
2493 if (rt->rt6i_nexthop) {
2494 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2495 } else {
2496 seq_puts(m, "00000000000000000000000000000000");
2498 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2499 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2500 rt->dst.__use, rt->rt6i_flags,
2501 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2502 return 0;
2505 static int ipv6_route_show(struct seq_file *m, void *v)
2507 struct net *net = (struct net *)m->private;
2508 fib6_clean_all(net, rt6_info_route, 0, m);
2509 return 0;
2512 static int ipv6_route_open(struct inode *inode, struct file *file)
2514 return single_open_net(inode, file, ipv6_route_show);
2517 static const struct file_operations ipv6_route_proc_fops = {
2518 .owner = THIS_MODULE,
2519 .open = ipv6_route_open,
2520 .read = seq_read,
2521 .llseek = seq_lseek,
2522 .release = single_release_net,
2525 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2527 struct net *net = (struct net *)seq->private;
2528 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2529 net->ipv6.rt6_stats->fib_nodes,
2530 net->ipv6.rt6_stats->fib_route_nodes,
2531 net->ipv6.rt6_stats->fib_rt_alloc,
2532 net->ipv6.rt6_stats->fib_rt_entries,
2533 net->ipv6.rt6_stats->fib_rt_cache,
2534 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2535 net->ipv6.rt6_stats->fib_discarded_routes);
2537 return 0;
2540 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2542 return single_open_net(inode, file, rt6_stats_seq_show);
2545 static const struct file_operations rt6_stats_seq_fops = {
2546 .owner = THIS_MODULE,
2547 .open = rt6_stats_seq_open,
2548 .read = seq_read,
2549 .llseek = seq_lseek,
2550 .release = single_release_net,
2552 #endif /* CONFIG_PROC_FS */
2554 #ifdef CONFIG_SYSCTL
2556 static
2557 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2558 void __user *buffer, size_t *lenp, loff_t *ppos)
2560 struct net *net;
2561 int delay;
2562 if (!write)
2563 return -EINVAL;
2565 net = (struct net *)ctl->extra1;
2566 delay = net->ipv6.sysctl.flush_delay;
2567 proc_dointvec(ctl, write, buffer, lenp, ppos);
2568 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2569 return 0;
2572 ctl_table ipv6_route_table_template[] = {
2574 .procname = "flush",
2575 .data = &init_net.ipv6.sysctl.flush_delay,
2576 .maxlen = sizeof(int),
2577 .mode = 0200,
2578 .proc_handler = ipv6_sysctl_rtcache_flush
2581 .procname = "gc_thresh",
2582 .data = &ip6_dst_ops_template.gc_thresh,
2583 .maxlen = sizeof(int),
2584 .mode = 0644,
2585 .proc_handler = proc_dointvec,
2588 .procname = "max_size",
2589 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2590 .maxlen = sizeof(int),
2591 .mode = 0644,
2592 .proc_handler = proc_dointvec,
2595 .procname = "gc_min_interval",
2596 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2597 .maxlen = sizeof(int),
2598 .mode = 0644,
2599 .proc_handler = proc_dointvec_jiffies,
2602 .procname = "gc_timeout",
2603 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2604 .maxlen = sizeof(int),
2605 .mode = 0644,
2606 .proc_handler = proc_dointvec_jiffies,
2609 .procname = "gc_interval",
2610 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2611 .maxlen = sizeof(int),
2612 .mode = 0644,
2613 .proc_handler = proc_dointvec_jiffies,
2616 .procname = "gc_elasticity",
2617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2618 .maxlen = sizeof(int),
2619 .mode = 0644,
2620 .proc_handler = proc_dointvec,
2623 .procname = "mtu_expires",
2624 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2625 .maxlen = sizeof(int),
2626 .mode = 0644,
2627 .proc_handler = proc_dointvec_jiffies,
2630 .procname = "min_adv_mss",
2631 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2632 .maxlen = sizeof(int),
2633 .mode = 0644,
2634 .proc_handler = proc_dointvec,
2637 .procname = "gc_min_interval_ms",
2638 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2639 .maxlen = sizeof(int),
2640 .mode = 0644,
2641 .proc_handler = proc_dointvec_ms_jiffies,
2646 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2648 struct ctl_table *table;
2650 table = kmemdup(ipv6_route_table_template,
2651 sizeof(ipv6_route_table_template),
2652 GFP_KERNEL);
2654 if (table) {
2655 table[0].data = &net->ipv6.sysctl.flush_delay;
2656 table[0].extra1 = net;
2657 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2658 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2659 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2661 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2662 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2663 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2664 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2665 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2668 return table;
2670 #endif
2672 static int __net_init ip6_route_net_init(struct net *net)
2674 int ret = -ENOMEM;
2676 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2677 sizeof(net->ipv6.ip6_dst_ops));
2679 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2680 goto out_ip6_dst_ops;
2682 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2683 sizeof(*net->ipv6.ip6_null_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_null_entry)
2686 goto out_ip6_dst_entries;
2687 net->ipv6.ip6_null_entry->dst.path =
2688 (struct dst_entry *)net->ipv6.ip6_null_entry;
2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2690 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
2692 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2693 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2694 sizeof(*net->ipv6.ip6_prohibit_entry),
2695 GFP_KERNEL);
2696 if (!net->ipv6.ip6_prohibit_entry)
2697 goto out_ip6_null_entry;
2698 net->ipv6.ip6_prohibit_entry->dst.path =
2699 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2700 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2701 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
2703 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2704 sizeof(*net->ipv6.ip6_blk_hole_entry),
2705 GFP_KERNEL);
2706 if (!net->ipv6.ip6_blk_hole_entry)
2707 goto out_ip6_prohibit_entry;
2708 net->ipv6.ip6_blk_hole_entry->dst.path =
2709 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2710 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2711 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
2712 #endif
2714 net->ipv6.sysctl.flush_delay = 0;
2715 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2716 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2717 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2718 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2719 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2720 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2721 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2723 #ifdef CONFIG_PROC_FS
2724 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2725 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2726 #endif
2727 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2729 ret = 0;
2730 out:
2731 return ret;
2733 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2734 out_ip6_prohibit_entry:
2735 kfree(net->ipv6.ip6_prohibit_entry);
2736 out_ip6_null_entry:
2737 kfree(net->ipv6.ip6_null_entry);
2738 #endif
2739 out_ip6_dst_entries:
2740 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2741 out_ip6_dst_ops:
2742 goto out;
2745 static void __net_exit ip6_route_net_exit(struct net *net)
2747 #ifdef CONFIG_PROC_FS
2748 proc_net_remove(net, "ipv6_route");
2749 proc_net_remove(net, "rt6_stats");
2750 #endif
2751 kfree(net->ipv6.ip6_null_entry);
2752 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2753 kfree(net->ipv6.ip6_prohibit_entry);
2754 kfree(net->ipv6.ip6_blk_hole_entry);
2755 #endif
2756 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2759 static struct pernet_operations ip6_route_net_ops = {
2760 .init = ip6_route_net_init,
2761 .exit = ip6_route_net_exit,
2764 static struct notifier_block ip6_route_dev_notifier = {
2765 .notifier_call = ip6_route_dev_notify,
2766 .priority = 0,
2769 int __init ip6_route_init(void)
2771 int ret;
2773 ret = -ENOMEM;
2774 ip6_dst_ops_template.kmem_cachep =
2775 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2776 SLAB_HWCACHE_ALIGN, NULL);
2777 if (!ip6_dst_ops_template.kmem_cachep)
2778 goto out;
2780 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2781 if (ret)
2782 goto out_kmem_cache;
2784 ret = register_pernet_subsys(&ip6_route_net_ops);
2785 if (ret)
2786 goto out_dst_entries;
2788 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2790 /* Registering of the loopback is done before this portion of code,
2791 * the loopback reference in rt6_info will not be taken, do it
2792 * manually for init_net */
2793 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2794 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2795 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2796 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2797 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2798 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2799 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2800 #endif
2801 ret = fib6_init();
2802 if (ret)
2803 goto out_register_subsys;
2805 ret = xfrm6_init();
2806 if (ret)
2807 goto out_fib6_init;
2809 ret = fib6_rules_init();
2810 if (ret)
2811 goto xfrm6_init;
2813 ret = -ENOBUFS;
2814 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2815 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2816 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2817 goto fib6_rules_init;
2819 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2820 if (ret)
2821 goto fib6_rules_init;
2823 out:
2824 return ret;
2826 fib6_rules_init:
2827 fib6_rules_cleanup();
2828 xfrm6_init:
2829 xfrm6_fini();
2830 out_fib6_init:
2831 fib6_gc_cleanup();
2832 out_register_subsys:
2833 unregister_pernet_subsys(&ip6_route_net_ops);
2834 out_dst_entries:
2835 dst_entries_destroy(&ip6_dst_blackhole_ops);
2836 out_kmem_cache:
2837 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2838 goto out;
2841 void ip6_route_cleanup(void)
2843 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2844 fib6_rules_cleanup();
2845 xfrm6_fini();
2846 fib6_gc_cleanup();
2847 unregister_pernet_subsys(&ip6_route_net_ops);
2848 dst_entries_destroy(&ip6_dst_blackhole_ops);
2849 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);