GUI: Fix Tomato RAF theme for all builds. Compilation typo.
[tomato.git] / release / src-rt-6.x.4708 / linux / linux-2.6.36 / net / ipv6 / route.c
blobba23246066e8a9c1f5b38db493fbf277bdf25757
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out,
112 .entries = ATOMIC_INIT(0),
115 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
119 static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
121 .protocol = cpu_to_be16(ETH_P_IPV6),
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entries = ATOMIC_INIT(0),
128 static struct rt6_info ip6_null_entry_template = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_protocol = RTPROT_KERNEL,
140 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1),
144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
146 static int ip6_pkt_prohibit(struct sk_buff *skb);
147 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
149 static struct rt6_info ip6_prohibit_entry_template = {
150 .dst = {
151 .__refcnt = ATOMIC_INIT(1),
152 .__use = 1,
153 .obsolete = -1,
154 .error = -EACCES,
155 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
156 .input = ip6_pkt_prohibit,
157 .output = ip6_pkt_prohibit_out,
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_protocol = RTPROT_KERNEL,
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
165 static struct rt6_info ip6_blk_hole_entry_template = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .obsolete = -1,
170 .error = -EINVAL,
171 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
172 .input = dst_discard,
173 .output = dst_discard,
175 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
176 .rt6i_protocol = RTPROT_KERNEL,
177 .rt6i_metric = ~(u32) 0,
178 .rt6i_ref = ATOMIC_INIT(1),
181 #endif
183 /* allocate dst with ip6_dst_ops */
184 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
186 return (struct rt6_info *)dst_alloc(ops);
189 static void ip6_dst_destroy(struct dst_entry *dst)
191 struct rt6_info *rt = (struct rt6_info *)dst;
192 struct inet6_dev *idev = rt->rt6i_idev;
194 if (idev != NULL) {
195 rt->rt6i_idev = NULL;
196 in6_dev_put(idev);
200 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
201 int how)
203 struct rt6_info *rt = (struct rt6_info *)dst;
204 struct inet6_dev *idev = rt->rt6i_idev;
205 struct net_device *loopback_dev =
206 dev_net(dev)->loopback_dev;
208 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
209 struct inet6_dev *loopback_idev =
210 in6_dev_get(loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
218 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
224 static inline int rt6_need_strict(struct in6_addr *daddr)
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
231 * Route lookup. Any table->tb6_lock is implied.
234 static inline struct rt6_info *rt6_device_match(struct net *net,
235 struct rt6_info *rt,
236 struct in6_addr *saddr,
237 int oif,
238 int flags)
240 struct rt6_info *local = NULL;
241 struct rt6_info *sprt;
243 if (!oif && ipv6_addr_any(saddr))
244 goto out;
246 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
247 struct net_device *dev = sprt->rt6i_dev;
249 if (oif) {
250 if (dev->ifindex == oif)
251 return sprt;
252 if (dev->flags & IFF_LOOPBACK) {
253 if (sprt->rt6i_idev == NULL ||
254 sprt->rt6i_idev->dev->ifindex != oif) {
255 if (flags & RT6_LOOKUP_F_IFACE && oif)
256 continue;
257 if (local && (!oif ||
258 local->rt6i_idev->dev->ifindex == oif))
259 continue;
261 local = sprt;
263 } else {
264 if (ipv6_chk_addr(net, saddr, dev,
265 flags & RT6_LOOKUP_F_IFACE))
266 return sprt;
270 if (oif) {
271 if (local)
272 return local;
274 if (flags & RT6_LOOKUP_F_IFACE)
275 return net->ipv6.ip6_null_entry;
277 out:
278 return rt;
281 #ifdef CONFIG_IPV6_ROUTER_PREF
282 static void rt6_probe(struct rt6_info *rt)
284 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
286 * Okay, this does not seem to be appropriate
287 * for now, however, we need to check if it
288 * is really so; aka Router Reachability Probing.
290 * Router Reachability Probe MUST be rate-limited
291 * to no more than one per minute.
293 if (!neigh || (neigh->nud_state & NUD_VALID))
294 return;
295 read_lock_bh(&neigh->lock);
296 if (!(neigh->nud_state & NUD_VALID) &&
297 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
298 struct in6_addr mcaddr;
299 struct in6_addr *target;
301 neigh->updated = jiffies;
302 read_unlock_bh(&neigh->lock);
304 target = (struct in6_addr *)&neigh->primary_key;
305 addrconf_addr_solict_mult(target, &mcaddr);
306 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
307 } else
308 read_unlock_bh(&neigh->lock);
310 #else
311 static inline void rt6_probe(struct rt6_info *rt)
314 #endif
317 * Default Router Selection (RFC 2461 6.3.6)
319 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
321 struct net_device *dev = rt->rt6i_dev;
322 if (!oif || dev->ifindex == oif)
323 return 2;
324 if ((dev->flags & IFF_LOOPBACK) &&
325 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
326 return 1;
327 return 0;
330 static inline int rt6_check_neigh(struct rt6_info *rt)
332 struct neighbour *neigh = rt->rt6i_nexthop;
333 int m;
334 if (rt->rt6i_flags & RTF_NONEXTHOP ||
335 !(rt->rt6i_flags & RTF_GATEWAY))
336 m = 1;
337 else if (neigh) {
338 read_lock_bh(&neigh->lock);
339 if (neigh->nud_state & NUD_VALID)
340 m = 2;
341 #ifdef CONFIG_IPV6_ROUTER_PREF
342 else if (neigh->nud_state & NUD_FAILED)
343 m = 0;
344 #endif
345 else
346 m = 1;
347 read_unlock_bh(&neigh->lock);
348 } else
349 m = 0;
350 return m;
353 static int rt6_score_route(struct rt6_info *rt, int oif,
354 int strict)
356 int m, n;
358 m = rt6_check_dev(rt, oif);
359 if (!m && (strict & RT6_LOOKUP_F_IFACE))
360 return -1;
361 #ifdef CONFIG_IPV6_ROUTER_PREF
362 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363 #endif
364 n = rt6_check_neigh(rt);
365 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
366 return -1;
367 return m;
370 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371 int *mpri, struct rt6_info *match)
373 int m;
375 if (rt6_check_expired(rt))
376 goto out;
378 m = rt6_score_route(rt, oif, strict);
379 if (m < 0)
380 goto out;
382 if (m > *mpri) {
383 if (strict & RT6_LOOKUP_F_REACHABLE)
384 rt6_probe(match);
385 *mpri = m;
386 match = rt;
387 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
388 rt6_probe(rt);
391 out:
392 return match;
395 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396 struct rt6_info *rr_head,
397 u32 metric, int oif, int strict)
399 struct rt6_info *rt, *match;
400 int mpri = -1;
402 match = NULL;
403 for (rt = rr_head; rt && rt->rt6i_metric == metric;
404 rt = rt->dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
407 rt = rt->dst.rt6_next)
408 match = find_match(rt, oif, strict, &mpri, match);
410 return match;
413 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
415 struct rt6_info *match, *rt0;
416 struct net *net;
418 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
419 __func__, fn->leaf, oif);
421 rt0 = fn->rr_ptr;
422 if (!rt0)
423 fn->rr_ptr = rt0 = fn->leaf;
425 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
427 if (!match &&
428 (strict & RT6_LOOKUP_F_REACHABLE)) {
429 struct rt6_info *next = rt0->dst.rt6_next;
431 /* no entries matched; do round-robin */
432 if (!next || next->rt6i_metric != rt0->rt6i_metric)
433 next = fn->leaf;
435 if (next != rt0)
436 fn->rr_ptr = next;
439 RT6_TRACE("%s() => %p\n",
440 __func__, match);
442 net = dev_net(rt0->rt6i_dev);
443 return (match ? match : net->ipv6.ip6_null_entry);
446 #ifdef CONFIG_IPV6_ROUTE_INFO
447 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 struct in6_addr *gwaddr)
450 struct net *net = dev_net(dev);
451 struct route_info *rinfo = (struct route_info *) opt;
452 struct in6_addr prefix_buf, *prefix;
453 unsigned int pref;
454 unsigned long lifetime;
455 struct rt6_info *rt;
457 if (len < sizeof(struct route_info)) {
458 return -EINVAL;
461 /* Sanity check for prefix_len and length */
462 if (rinfo->length > 3) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 128) {
465 return -EINVAL;
466 } else if (rinfo->prefix_len > 64) {
467 if (rinfo->length < 2) {
468 return -EINVAL;
470 } else if (rinfo->prefix_len > 0) {
471 if (rinfo->length < 1) {
472 return -EINVAL;
476 pref = rinfo->route_pref;
477 if (pref == ICMPV6_ROUTER_PREF_INVALID)
478 return -EINVAL;
480 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
482 if (rinfo->length == 3)
483 prefix = (struct in6_addr *)rinfo->prefix;
484 else {
485 /* this function is safe */
486 ipv6_addr_prefix(&prefix_buf,
487 (struct in6_addr *)rinfo->prefix,
488 rinfo->prefix_len);
489 prefix = &prefix_buf;
492 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
493 dev->ifindex);
495 if (rt && !lifetime) {
496 ip6_del_rt(rt);
497 rt = NULL;
500 if (!rt && lifetime)
501 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 pref);
503 else if (rt)
504 rt->rt6i_flags = RTF_ROUTEINFO |
505 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
507 if (rt) {
508 if (!addrconf_finite_timeout(lifetime)) {
509 rt->rt6i_flags &= ~RTF_EXPIRES;
510 } else {
511 rt->rt6i_expires = jiffies + HZ * lifetime;
512 rt->rt6i_flags |= RTF_EXPIRES;
514 dst_release(&rt->dst);
516 return 0;
518 #endif
520 #define BACKTRACK(__net, saddr) \
521 do { \
522 if (rt == __net->ipv6.ip6_null_entry) { \
523 struct fib6_node *pn; \
524 while (1) { \
525 if (fn->fn_flags & RTN_TL_ROOT) \
526 goto out; \
527 pn = fn->parent; \
528 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
529 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
530 else \
531 fn = pn; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
536 } while(0)
538 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
539 struct fib6_table *table,
540 struct flowi *fl, int flags)
542 struct fib6_node *fn;
543 struct rt6_info *rt;
545 read_lock_bh(&table->tb6_lock);
546 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 restart:
548 rt = fn->leaf;
549 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
550 BACKTRACK(net, &fl->fl6_src);
551 out:
552 dst_use(&rt->dst, jiffies);
553 read_unlock_bh(&table->tb6_lock);
554 return rt;
558 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
559 const struct in6_addr *saddr, int oif, int strict)
561 struct flowi fl = {
562 .oif = oif,
563 .nl_u = {
564 .ip6_u = {
565 .daddr = *daddr,
569 struct dst_entry *dst;
570 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
572 if (saddr) {
573 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
574 flags |= RT6_LOOKUP_F_HAS_SADDR;
577 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
578 if (dst->error == 0)
579 return (struct rt6_info *) dst;
581 dst_release(dst);
583 return NULL;
586 EXPORT_SYMBOL(rt6_lookup);
588 /* ip6_ins_rt is called with FREE table->tb6_lock.
589 It takes new route entry, the addition fails by any reason the
590 route is freed. In any case, if caller does not hold it, it may
591 be destroyed.
594 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
596 int err;
597 struct fib6_table *table;
599 table = rt->rt6i_table;
600 write_lock_bh(&table->tb6_lock);
601 err = fib6_add(&table->tb6_root, rt, info);
602 write_unlock_bh(&table->tb6_lock);
604 return err;
607 int ip6_ins_rt(struct rt6_info *rt)
609 struct nl_info info = {
610 .nl_net = dev_net(rt->rt6i_dev),
612 return __ip6_ins_rt(rt, &info);
615 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 struct in6_addr *saddr)
618 struct rt6_info *rt;
621 * Clone the route.
624 rt = ip6_rt_copy(ort);
626 if (rt) {
627 struct neighbour *neigh;
628 int attempts = !in_softirq();
630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
640 rt->dst.flags |= DST_HOST;
642 #ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
647 #endif
649 retry:
650 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
651 if (IS_ERR(neigh)) {
652 struct net *net = dev_net(rt->rt6i_dev);
653 int saved_rt_min_interval =
654 net->ipv6.sysctl.ip6_rt_gc_min_interval;
655 int saved_rt_elasticity =
656 net->ipv6.sysctl.ip6_rt_gc_elasticity;
658 if (attempts-- > 0) {
659 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
660 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
662 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
664 net->ipv6.sysctl.ip6_rt_gc_elasticity =
665 saved_rt_elasticity;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval =
667 saved_rt_min_interval;
668 goto retry;
671 if (net_ratelimit())
672 printk(KERN_WARNING
673 "ipv6: Neighbour table overflow.\n");
674 dst_free(&rt->dst);
675 return NULL;
677 rt->rt6i_nexthop = neigh;
681 return rt;
684 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
686 struct rt6_info *rt = ip6_rt_copy(ort);
687 if (rt) {
688 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
689 rt->rt6i_dst.plen = 128;
690 rt->rt6i_flags |= RTF_CACHE;
691 rt->dst.flags |= DST_HOST;
692 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
694 return rt;
697 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
698 struct flowi *fl, int flags)
700 struct fib6_node *fn;
701 struct rt6_info *rt, *nrt;
702 int strict = 0;
703 int attempts = 3;
704 int err;
705 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
707 strict |= flags & RT6_LOOKUP_F_IFACE;
709 relookup:
710 read_lock_bh(&table->tb6_lock);
712 restart_2:
713 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
715 restart:
716 rt = rt6_select(fn, oif, strict | reachable);
718 BACKTRACK(net, &fl->fl6_src);
719 if (rt == net->ipv6.ip6_null_entry ||
720 rt->rt6i_flags & RTF_CACHE)
721 goto out;
723 dst_hold(&rt->dst);
724 read_unlock_bh(&table->tb6_lock);
726 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
727 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
728 else {
729 #if CLONE_OFFLINK_ROUTE
730 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
731 #else
732 goto out2;
733 #endif
736 dst_release(&rt->dst);
737 rt = nrt ? : net->ipv6.ip6_null_entry;
739 dst_hold(&rt->dst);
740 if (nrt) {
741 err = ip6_ins_rt(nrt);
742 if (!err)
743 goto out2;
746 if (--attempts <= 0)
747 goto out2;
750 * Race condition! In the gap, when table->tb6_lock was
751 * released someone could insert this route. Relookup.
753 dst_release(&rt->dst);
754 goto relookup;
756 out:
757 if (reachable) {
758 reachable = 0;
759 goto restart_2;
761 dst_hold(&rt->dst);
762 read_unlock_bh(&table->tb6_lock);
763 out2:
764 rt->dst.lastuse = jiffies;
765 rt->dst.__use++;
767 return rt;
770 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
771 struct flowi *fl, int flags)
773 return ip6_pol_route(net, table, fl->iif, fl, flags);
776 void ip6_route_input(struct sk_buff *skb)
778 struct ipv6hdr *iph = ipv6_hdr(skb);
779 struct net *net = dev_net(skb->dev);
780 int flags = RT6_LOOKUP_F_HAS_SADDR;
781 struct flowi fl = {
782 .iif = skb->dev->ifindex,
783 .nl_u = {
784 .ip6_u = {
785 .daddr = iph->daddr,
786 .saddr = iph->saddr,
787 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
790 .mark = skb->mark,
791 .proto = iph->nexthdr,
794 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
795 flags |= RT6_LOOKUP_F_IFACE;
797 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
800 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
801 struct flowi *fl, int flags)
803 return ip6_pol_route(net, table, fl->oif, fl, flags);
806 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
807 struct flowi *fl)
809 int flags = 0;
811 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
812 flags |= RT6_LOOKUP_F_IFACE;
814 if (!ipv6_addr_any(&fl->fl6_src))
815 flags |= RT6_LOOKUP_F_HAS_SADDR;
816 else if (sk)
817 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
819 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
822 EXPORT_SYMBOL(ip6_route_output);
824 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
826 struct rt6_info *ort = (struct rt6_info *) *dstp;
827 struct rt6_info *rt = (struct rt6_info *)
828 dst_alloc(&ip6_dst_blackhole_ops);
829 struct dst_entry *new = NULL;
831 if (rt) {
832 new = &rt->dst;
834 atomic_set(&new->__refcnt, 1);
835 new->__use = 1;
836 new->input = dst_discard;
837 new->output = dst_discard;
839 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
840 new->dev = ort->dst.dev;
841 if (new->dev)
842 dev_hold(new->dev);
843 rt->rt6i_idev = ort->rt6i_idev;
844 if (rt->rt6i_idev)
845 in6_dev_hold(rt->rt6i_idev);
846 rt->rt6i_expires = 0;
848 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
849 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
850 rt->rt6i_metric = 0;
852 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
853 #ifdef CONFIG_IPV6_SUBTREES
854 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
855 #endif
857 dst_free(new);
860 dst_release(*dstp);
861 *dstp = new;
862 return (new ? 0 : -ENOMEM);
864 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
867 * Destination cache support functions
870 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
872 struct rt6_info *rt;
874 rt = (struct rt6_info *) dst;
876 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
877 return dst;
879 return NULL;
882 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
884 struct rt6_info *rt = (struct rt6_info *) dst;
886 if (rt) {
887 if (rt->rt6i_flags & RTF_CACHE) {
888 if (rt6_check_expired(rt)) {
889 ip6_del_rt(rt);
890 dst = NULL;
892 } else {
893 dst_release(dst);
894 dst = NULL;
897 return dst;
900 static void ip6_link_failure(struct sk_buff *skb)
902 struct rt6_info *rt;
904 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
906 rt = (struct rt6_info *) skb_dst(skb);
907 if (rt) {
908 if (rt->rt6i_flags&RTF_CACHE) {
909 dst_set_expires(&rt->dst, 0);
910 rt->rt6i_flags |= RTF_EXPIRES;
911 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
912 rt->rt6i_node->fn_sernum = -1;
916 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
918 struct rt6_info *rt6 = (struct rt6_info*)dst;
920 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
921 rt6->rt6i_flags |= RTF_MODIFIED;
922 if (mtu < IPV6_MIN_MTU) {
923 mtu = IPV6_MIN_MTU;
924 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
926 dst->metrics[RTAX_MTU-1] = mtu;
927 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
931 static int ipv6_get_mtu(struct net_device *dev);
933 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
935 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
937 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
938 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
941 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
942 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
943 * IPV6_MAXPLEN is also valid and means: "any MSS,
944 * rely only on pmtu discovery"
946 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
947 mtu = IPV6_MAXPLEN;
948 return mtu;
951 static struct dst_entry *icmp6_dst_gc_list;
952 static DEFINE_SPINLOCK(icmp6_dst_lock);
954 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
955 struct neighbour *neigh,
956 const struct in6_addr *addr)
958 struct rt6_info *rt;
959 struct inet6_dev *idev = in6_dev_get(dev);
960 struct net *net = dev_net(dev);
962 if (unlikely(idev == NULL))
963 return NULL;
965 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
966 if (unlikely(rt == NULL)) {
967 in6_dev_put(idev);
968 goto out;
971 dev_hold(dev);
972 if (neigh)
973 neigh_hold(neigh);
974 else {
975 neigh = ndisc_get_neigh(dev, addr);
976 if (IS_ERR(neigh))
977 neigh = NULL;
980 rt->rt6i_dev = dev;
981 rt->rt6i_idev = idev;
982 rt->rt6i_nexthop = neigh;
983 atomic_set(&rt->dst.__refcnt, 1);
984 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
985 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
986 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
987 rt->dst.output = ip6_output;
990 spin_lock_bh(&icmp6_dst_lock);
991 rt->dst.next = icmp6_dst_gc_list;
992 icmp6_dst_gc_list = &rt->dst;
993 spin_unlock_bh(&icmp6_dst_lock);
995 fib6_force_start_gc(net);
997 out:
998 return &rt->dst;
1001 int icmp6_dst_gc(void)
1003 struct dst_entry *dst, *next, **pprev;
1004 int more = 0;
1006 next = NULL;
1008 spin_lock_bh(&icmp6_dst_lock);
1009 pprev = &icmp6_dst_gc_list;
1011 while ((dst = *pprev) != NULL) {
1012 if (!atomic_read(&dst->__refcnt)) {
1013 *pprev = dst->next;
1014 dst_free(dst);
1015 } else {
1016 pprev = &dst->next;
1017 ++more;
1021 spin_unlock_bh(&icmp6_dst_lock);
1023 return more;
1026 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1027 void *arg)
1029 struct dst_entry *dst, **pprev;
1031 spin_lock_bh(&icmp6_dst_lock);
1032 pprev = &icmp6_dst_gc_list;
1033 while ((dst = *pprev) != NULL) {
1034 struct rt6_info *rt = (struct rt6_info *) dst;
1035 if (func(rt, arg)) {
1036 *pprev = dst->next;
1037 dst_free(dst);
1038 } else {
1039 pprev = &dst->next;
1042 spin_unlock_bh(&icmp6_dst_lock);
1045 static int ip6_dst_gc(struct dst_ops *ops)
1047 unsigned long now = jiffies;
1048 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1049 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1050 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1051 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1052 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1053 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1055 if (time_after(rt_last_gc + rt_min_interval, now) &&
1056 atomic_read(&ops->entries) <= rt_max_size)
1057 goto out;
1059 net->ipv6.ip6_rt_gc_expire++;
1060 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1061 net->ipv6.ip6_rt_last_gc = now;
1062 if (atomic_read(&ops->entries) < ops->gc_thresh)
1063 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1064 out:
1065 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1066 return (atomic_read(&ops->entries) > rt_max_size);
1069 /* Clean host part of a prefix. Not necessary in radix tree,
1070 but results in cleaner routing tables.
1072 Remove it only when all the things will work!
1075 static int ipv6_get_mtu(struct net_device *dev)
1077 int mtu = IPV6_MIN_MTU;
1078 struct inet6_dev *idev;
1080 rcu_read_lock();
1081 idev = __in6_dev_get(dev);
1082 if (idev)
1083 mtu = idev->cnf.mtu6;
1084 rcu_read_unlock();
1085 return mtu;
1088 int ip6_dst_hoplimit(struct dst_entry *dst)
1090 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1091 if (hoplimit < 0) {
1092 struct net_device *dev = dst->dev;
1093 struct inet6_dev *idev;
1095 rcu_read_lock();
1096 idev = __in6_dev_get(dev);
1097 if (idev)
1098 hoplimit = idev->cnf.hop_limit;
1099 else
1100 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1101 rcu_read_unlock();
1103 return hoplimit;
1110 int ip6_route_add(struct fib6_config *cfg)
1112 int err;
1113 struct net *net = cfg->fc_nlinfo.nl_net;
1114 struct rt6_info *rt = NULL;
1115 struct net_device *dev = NULL;
1116 struct inet6_dev *idev = NULL;
1117 struct fib6_table *table;
1118 int addr_type;
1120 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1121 return -EINVAL;
1122 #ifndef CONFIG_IPV6_SUBTREES
1123 if (cfg->fc_src_len)
1124 return -EINVAL;
1125 #endif
1126 if (cfg->fc_ifindex) {
1127 err = -ENODEV;
1128 dev = dev_get_by_index(net, cfg->fc_ifindex);
1129 if (!dev)
1130 goto out;
1131 idev = in6_dev_get(dev);
1132 if (!idev)
1133 goto out;
1136 if (cfg->fc_metric == 0)
1137 cfg->fc_metric = IP6_RT_PRIO_USER;
1139 table = fib6_new_table(net, cfg->fc_table);
1140 if (table == NULL) {
1141 err = -ENOBUFS;
1142 goto out;
1145 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1147 if (rt == NULL) {
1148 err = -ENOMEM;
1149 goto out;
1152 rt->dst.obsolete = -1;
1153 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1154 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1157 if (cfg->fc_protocol == RTPROT_UNSPEC)
1158 cfg->fc_protocol = RTPROT_BOOT;
1159 rt->rt6i_protocol = cfg->fc_protocol;
1161 addr_type = ipv6_addr_type(&cfg->fc_dst);
1163 if (addr_type & IPV6_ADDR_MULTICAST)
1164 rt->dst.input = ip6_mc_input;
1165 else
1166 rt->dst.input = ip6_forward;
1168 rt->dst.output = ip6_output;
1170 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1171 rt->rt6i_dst.plen = cfg->fc_dst_len;
1172 if (rt->rt6i_dst.plen == 128)
1173 rt->dst.flags = DST_HOST;
1175 #ifdef CONFIG_IPV6_SUBTREES
1176 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1177 rt->rt6i_src.plen = cfg->fc_src_len;
1178 #endif
1180 rt->rt6i_metric = cfg->fc_metric;
1182 /* We cannot add true routes via loopback here,
1183 they would result in kernel looping; promote them to reject routes
1185 if ((cfg->fc_flags & RTF_REJECT) ||
1186 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1187 /* hold loopback dev/idev if we haven't done so. */
1188 if (dev != net->loopback_dev) {
1189 if (dev) {
1190 dev_put(dev);
1191 in6_dev_put(idev);
1193 dev = net->loopback_dev;
1194 dev_hold(dev);
1195 idev = in6_dev_get(dev);
1196 if (!idev) {
1197 err = -ENODEV;
1198 goto out;
1201 rt->dst.output = ip6_pkt_discard_out;
1202 rt->dst.input = ip6_pkt_discard;
1203 rt->dst.error = -ENETUNREACH;
1204 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1205 goto install_route;
1208 if (cfg->fc_flags & RTF_GATEWAY) {
1209 struct in6_addr *gw_addr;
1210 int gwa_type;
1212 gw_addr = &cfg->fc_gateway;
1213 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1214 gwa_type = ipv6_addr_type(gw_addr);
1216 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1217 struct rt6_info *grt;
1219 /* IPv6 strictly inhibits using not link-local
1220 addresses as nexthop address.
1221 Otherwise, router will not able to send redirects.
1222 It is very good, but in some (rare!) circumstances
1223 (SIT, PtP, NBMA NOARP links) it is handy to allow
1224 some exceptions. --ANK
1226 err = -EINVAL;
1227 if (!(gwa_type&IPV6_ADDR_UNICAST))
1228 goto out;
1230 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1232 err = -EHOSTUNREACH;
1233 if (grt == NULL)
1234 goto out;
1235 if (dev) {
1236 if (dev != grt->rt6i_dev) {
1237 dst_release(&grt->dst);
1238 goto out;
1240 } else {
1241 dev = grt->rt6i_dev;
1242 idev = grt->rt6i_idev;
1243 dev_hold(dev);
1244 in6_dev_hold(grt->rt6i_idev);
1246 if (!(grt->rt6i_flags&RTF_GATEWAY))
1247 err = 0;
1248 dst_release(&grt->dst);
1250 if (err)
1251 goto out;
1253 err = -EINVAL;
1254 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1255 goto out;
1258 err = -ENODEV;
1259 if (dev == NULL)
1260 goto out;
1262 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1263 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1264 if (IS_ERR(rt->rt6i_nexthop)) {
1265 err = PTR_ERR(rt->rt6i_nexthop);
1266 rt->rt6i_nexthop = NULL;
1267 goto out;
1271 rt->rt6i_flags = cfg->fc_flags;
1273 install_route:
1274 if (cfg->fc_mx) {
1275 struct nlattr *nla;
1276 int remaining;
1278 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1279 int type = nla_type(nla);
1281 if (type) {
1282 if (type > RTAX_MAX) {
1283 err = -EINVAL;
1284 goto out;
1287 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1292 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1293 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1294 if (!dst_mtu(&rt->dst))
1295 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1296 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1297 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1298 rt->dst.dev = dev;
1299 rt->rt6i_idev = idev;
1300 rt->rt6i_table = table;
1302 cfg->fc_nlinfo.nl_net = dev_net(dev);
1304 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1306 out:
1307 if (dev)
1308 dev_put(dev);
1309 if (idev)
1310 in6_dev_put(idev);
1311 if (rt)
1312 dst_free(&rt->dst);
1313 return err;
1316 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1318 int err;
1319 struct fib6_table *table;
1320 struct net *net = dev_net(rt->rt6i_dev);
1322 if (rt == net->ipv6.ip6_null_entry)
1323 return -ENOENT;
1325 table = rt->rt6i_table;
1326 write_lock_bh(&table->tb6_lock);
1328 err = fib6_del(rt, info);
1329 dst_release(&rt->dst);
1331 write_unlock_bh(&table->tb6_lock);
1333 return err;
1336 int ip6_del_rt(struct rt6_info *rt)
1338 struct nl_info info = {
1339 .nl_net = dev_net(rt->rt6i_dev),
1341 return __ip6_del_rt(rt, &info);
1344 static int ip6_route_del(struct fib6_config *cfg)
1346 struct fib6_table *table;
1347 struct fib6_node *fn;
1348 struct rt6_info *rt;
1349 int err = -ESRCH;
1351 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1352 if (table == NULL)
1353 return err;
1355 read_lock_bh(&table->tb6_lock);
1357 fn = fib6_locate(&table->tb6_root,
1358 &cfg->fc_dst, cfg->fc_dst_len,
1359 &cfg->fc_src, cfg->fc_src_len);
1361 if (fn) {
1362 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1363 if (cfg->fc_ifindex &&
1364 (rt->rt6i_dev == NULL ||
1365 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1366 continue;
1367 if (cfg->fc_flags & RTF_GATEWAY &&
1368 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1369 continue;
1370 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1371 continue;
1372 dst_hold(&rt->dst);
1373 read_unlock_bh(&table->tb6_lock);
1375 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1378 read_unlock_bh(&table->tb6_lock);
1380 return err;
1384 * Handle redirects
1386 struct ip6rd_flowi {
1387 struct flowi fl;
1388 struct in6_addr gateway;
1391 static struct rt6_info *__ip6_route_redirect(struct net *net,
1392 struct fib6_table *table,
1393 struct flowi *fl,
1394 int flags)
1396 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1397 struct rt6_info *rt;
1398 struct fib6_node *fn;
1401 * Get the "current" route for this destination and
1402 * check if the redirect has come from approriate router.
1404 * RFC 2461 specifies that redirects should only be
1405 * accepted if they come from the nexthop to the target.
1406 * Due to the way the routes are chosen, this notion
1407 * is a bit fuzzy and one might need to check all possible
1408 * routes.
1411 read_lock_bh(&table->tb6_lock);
1412 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1413 restart:
1414 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1416 * Current route is on-link; redirect is always invalid.
1418 * Seems, previous statement is not true. It could
1419 * be node, which looks for us as on-link (f.e. proxy ndisc)
1420 * But then router serving it might decide, that we should
1421 * know truth 8)8) --ANK (980726).
1423 if (rt6_check_expired(rt))
1424 continue;
1425 if (!(rt->rt6i_flags & RTF_GATEWAY))
1426 continue;
1427 if (fl->oif != rt->rt6i_dev->ifindex)
1428 continue;
1429 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1430 continue;
1431 break;
1434 if (!rt)
1435 rt = net->ipv6.ip6_null_entry;
1436 BACKTRACK(net, &fl->fl6_src);
1437 out:
1438 dst_hold(&rt->dst);
1440 read_unlock_bh(&table->tb6_lock);
1442 return rt;
1445 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1446 struct in6_addr *src,
1447 struct in6_addr *gateway,
1448 struct net_device *dev)
1450 int flags = RT6_LOOKUP_F_HAS_SADDR;
1451 struct net *net = dev_net(dev);
1452 struct ip6rd_flowi rdfl = {
1453 .fl = {
1454 .oif = dev->ifindex,
1455 .nl_u = {
1456 .ip6_u = {
1457 .daddr = *dest,
1458 .saddr = *src,
1464 ipv6_addr_copy(&rdfl.gateway, gateway);
1466 if (rt6_need_strict(dest))
1467 flags |= RT6_LOOKUP_F_IFACE;
1469 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1470 flags, __ip6_route_redirect);
1473 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1474 struct in6_addr *saddr,
1475 struct neighbour *neigh, u8 *lladdr, int on_link)
1477 struct rt6_info *rt, *nrt = NULL;
1478 struct netevent_redirect netevent;
1479 struct net *net = dev_net(neigh->dev);
1481 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1483 if (rt == net->ipv6.ip6_null_entry) {
1484 if (net_ratelimit())
1485 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1486 "for redirect target\n");
1487 goto out;
1491 * We have finally decided to accept it.
1494 neigh_update(neigh, lladdr, NUD_STALE,
1495 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1496 NEIGH_UPDATE_F_OVERRIDE|
1497 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1498 NEIGH_UPDATE_F_ISROUTER))
1502 * Redirect received -> path was valid.
1503 * Look, redirects are sent only in response to data packets,
1504 * so that this nexthop apparently is reachable. --ANK
1506 dst_confirm(&rt->dst);
1508 /* Duplicate redirect: silently ignore. */
1509 if (neigh == rt->dst.neighbour)
1510 goto out;
1512 nrt = ip6_rt_copy(rt);
1513 if (nrt == NULL)
1514 goto out;
1516 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1517 if (on_link)
1518 nrt->rt6i_flags &= ~RTF_GATEWAY;
1520 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1521 nrt->rt6i_dst.plen = 128;
1522 nrt->dst.flags |= DST_HOST;
1524 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1525 nrt->rt6i_nexthop = neigh_clone(neigh);
1526 /* Reset pmtu, it may be better */
1527 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1528 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1529 dst_mtu(&nrt->dst));
1531 if (ip6_ins_rt(nrt))
1532 goto out;
1534 netevent.old = &rt->dst;
1535 netevent.new = &nrt->dst;
1536 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1538 if (rt->rt6i_flags&RTF_CACHE) {
1539 ip6_del_rt(rt);
1540 return;
1543 out:
1544 dst_release(&rt->dst);
1548 * Handle ICMP "packet too big" messages
1549 * i.e. Path MTU discovery
1552 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1553 struct net *net, u32 pmtu, int ifindex)
1555 struct rt6_info *rt, *nrt;
1556 int allfrag = 0;
1558 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1559 if (rt == NULL)
1560 return;
1562 if (pmtu >= dst_mtu(&rt->dst))
1563 goto out;
1565 if (pmtu < IPV6_MIN_MTU) {
1567 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1568 * MTU (1280) and a fragment header should always be included
1569 * after a node receiving Too Big message reporting PMTU is
1570 * less than the IPv6 Minimum Link MTU.
1572 pmtu = IPV6_MIN_MTU;
1573 allfrag = 1;
1576 /* New mtu received -> path was valid.
1577 They are sent only in response to data packets,
1578 so that this nexthop apparently is reachable. --ANK
1580 dst_confirm(&rt->dst);
1582 /* Host route. If it is static, it would be better
1583 not to override it, but add new one, so that
1584 when cache entry will expire old pmtu
1585 would return automatically.
1587 if (rt->rt6i_flags & RTF_CACHE) {
1588 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1589 if (allfrag)
1590 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1591 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1592 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1593 goto out;
1596 /* Network route.
1597 Two cases are possible:
1598 1. It is connected route. Action: COW
1599 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1601 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1602 nrt = rt6_alloc_cow(rt, daddr, saddr);
1603 else
1604 nrt = rt6_alloc_clone(rt, daddr);
1606 if (nrt) {
1607 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
1608 if (allfrag)
1609 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1611 /* According to RFC 1981, detecting PMTU increase shouldn't be
1612 * happened within 5 mins, the recommended timer is 10 mins.
1613 * Here this route expiration time is set to ip6_rt_mtu_expires
1614 * which is 10 mins. After 10 mins the decreased pmtu is expired
1615 * and detecting PMTU increase will be automatically happened.
1617 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1618 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1620 ip6_ins_rt(nrt);
1622 out:
1623 dst_release(&rt->dst);
1626 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1627 struct net_device *dev, u32 pmtu)
1629 struct net *net = dev_net(dev);
1632 * RFC 1981 states that a node "MUST reduce the size of the packets it
1633 * is sending along the path" that caused the Packet Too Big message.
1634 * Since it's not possible in the general case to determine which
1635 * interface was used to send the original packet, we update the MTU
1636 * on the interface that will be used to send future packets. We also
1637 * update the MTU on the interface that received the Packet Too Big in
1638 * case the original packet was forced out that interface with
1639 * SO_BINDTODEVICE or similar. This is the next best thing to the
1640 * correct behaviour, which would be to update the MTU on all
1641 * interfaces.
1643 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1644 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1648 * Misc support functions
1651 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1653 struct net *net = dev_net(ort->rt6i_dev);
1654 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1656 if (rt) {
1657 rt->dst.input = ort->dst.input;
1658 rt->dst.output = ort->dst.output;
1660 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1661 rt->dst.error = ort->dst.error;
1662 rt->dst.dev = ort->dst.dev;
1663 if (rt->dst.dev)
1664 dev_hold(rt->dst.dev);
1665 rt->rt6i_idev = ort->rt6i_idev;
1666 if (rt->rt6i_idev)
1667 in6_dev_hold(rt->rt6i_idev);
1668 rt->dst.lastuse = jiffies;
1669 rt->rt6i_expires = 0;
1671 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1672 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1673 rt->rt6i_metric = 0;
1675 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1676 #ifdef CONFIG_IPV6_SUBTREES
1677 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1678 #endif
1679 rt->rt6i_table = ort->rt6i_table;
1681 return rt;
1684 #ifdef CONFIG_IPV6_ROUTE_INFO
1685 static struct rt6_info *rt6_get_route_info(struct net *net,
1686 struct in6_addr *prefix, int prefixlen,
1687 struct in6_addr *gwaddr, int ifindex)
1689 struct fib6_node *fn;
1690 struct rt6_info *rt = NULL;
1691 struct fib6_table *table;
1693 table = fib6_get_table(net, RT6_TABLE_INFO);
1694 if (table == NULL)
1695 return NULL;
1697 write_lock_bh(&table->tb6_lock);
1698 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1699 if (!fn)
1700 goto out;
1702 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1703 if (rt->rt6i_dev->ifindex != ifindex)
1704 continue;
1705 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1706 continue;
1707 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1708 continue;
1709 dst_hold(&rt->dst);
1710 break;
1712 out:
1713 write_unlock_bh(&table->tb6_lock);
1714 return rt;
1717 static struct rt6_info *rt6_add_route_info(struct net *net,
1718 struct in6_addr *prefix, int prefixlen,
1719 struct in6_addr *gwaddr, int ifindex,
1720 unsigned pref)
1722 struct fib6_config cfg = {
1723 .fc_table = RT6_TABLE_INFO,
1724 .fc_metric = IP6_RT_PRIO_USER,
1725 .fc_ifindex = ifindex,
1726 .fc_dst_len = prefixlen,
1727 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1728 RTF_UP | RTF_PREF(pref),
1729 .fc_nlinfo.pid = 0,
1730 .fc_nlinfo.nlh = NULL,
1731 .fc_nlinfo.nl_net = net,
1734 ipv6_addr_copy(&cfg.fc_dst, prefix);
1735 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1737 /* We should treat it as a default route if prefix length is 0. */
1738 if (!prefixlen)
1739 cfg.fc_flags |= RTF_DEFAULT;
1741 ip6_route_add(&cfg);
1743 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1745 #endif
1747 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1749 struct rt6_info *rt;
1750 struct fib6_table *table;
1752 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1753 if (table == NULL)
1754 return NULL;
1756 write_lock_bh(&table->tb6_lock);
1757 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1758 if (dev == rt->rt6i_dev &&
1759 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1760 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1761 break;
1763 if (rt)
1764 dst_hold(&rt->dst);
1765 write_unlock_bh(&table->tb6_lock);
1766 return rt;
1769 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1770 struct net_device *dev,
1771 unsigned int pref)
1773 struct fib6_config cfg = {
1774 .fc_table = RT6_TABLE_DFLT,
1775 .fc_metric = IP6_RT_PRIO_USER,
1776 .fc_ifindex = dev->ifindex,
1777 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1778 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1779 .fc_nlinfo.pid = 0,
1780 .fc_nlinfo.nlh = NULL,
1781 .fc_nlinfo.nl_net = dev_net(dev),
1784 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1786 ip6_route_add(&cfg);
1788 return rt6_get_dflt_router(gwaddr, dev);
1791 void rt6_purge_dflt_routers(struct net *net)
1793 struct rt6_info *rt;
1794 struct fib6_table *table;
1796 /* NOTE: Keep consistent with rt6_get_dflt_router */
1797 table = fib6_get_table(net, RT6_TABLE_DFLT);
1798 if (table == NULL)
1799 return;
1801 restart:
1802 read_lock_bh(&table->tb6_lock);
1803 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1804 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1805 dst_hold(&rt->dst);
1806 read_unlock_bh(&table->tb6_lock);
1807 ip6_del_rt(rt);
1808 goto restart;
1811 read_unlock_bh(&table->tb6_lock);
1814 static void rtmsg_to_fib6_config(struct net *net,
1815 struct in6_rtmsg *rtmsg,
1816 struct fib6_config *cfg)
1818 memset(cfg, 0, sizeof(*cfg));
1820 cfg->fc_table = RT6_TABLE_MAIN;
1821 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1822 cfg->fc_metric = rtmsg->rtmsg_metric;
1823 cfg->fc_expires = rtmsg->rtmsg_info;
1824 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1825 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1826 cfg->fc_flags = rtmsg->rtmsg_flags;
1828 cfg->fc_nlinfo.nl_net = net;
1830 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1831 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1832 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1835 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1837 struct fib6_config cfg;
1838 struct in6_rtmsg rtmsg;
1839 int err;
1841 switch(cmd) {
1842 case SIOCADDRT: /* Add a route */
1843 case SIOCDELRT: /* Delete a route */
1844 if (!capable(CAP_NET_ADMIN))
1845 return -EPERM;
1846 err = copy_from_user(&rtmsg, arg,
1847 sizeof(struct in6_rtmsg));
1848 if (err)
1849 return -EFAULT;
1851 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1853 rtnl_lock();
1854 switch (cmd) {
1855 case SIOCADDRT:
1856 err = ip6_route_add(&cfg);
1857 break;
1858 case SIOCDELRT:
1859 err = ip6_route_del(&cfg);
1860 break;
1861 default:
1862 err = -EINVAL;
1864 rtnl_unlock();
1866 return err;
1869 return -EINVAL;
1873 * Drop the packet on the floor
1876 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1878 int type;
1879 struct dst_entry *dst = skb_dst(skb);
1880 switch (ipstats_mib_noroutes) {
1881 case IPSTATS_MIB_INNOROUTES:
1882 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1883 if (type == IPV6_ADDR_ANY) {
1884 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1885 IPSTATS_MIB_INADDRERRORS);
1886 break;
1888 /* FALLTHROUGH */
1889 case IPSTATS_MIB_OUTNOROUTES:
1890 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1891 ipstats_mib_noroutes);
1892 break;
1894 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1895 kfree_skb(skb);
1896 return 0;
1899 static int ip6_pkt_discard(struct sk_buff *skb)
1901 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1904 static int ip6_pkt_discard_out(struct sk_buff *skb)
1906 skb->dev = skb_dst(skb)->dev;
1907 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1910 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1912 static int ip6_pkt_prohibit(struct sk_buff *skb)
1914 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1917 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1919 skb->dev = skb_dst(skb)->dev;
1920 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1923 #endif
1926 * Allocate a dst for local (unicast / anycast) address.
1929 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1930 const struct in6_addr *addr,
1931 int anycast)
1933 struct net *net = dev_net(idev->dev);
1934 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1935 struct neighbour *neigh;
1937 if (rt == NULL)
1938 return ERR_PTR(-ENOMEM);
1940 dev_hold(net->loopback_dev);
1941 in6_dev_hold(idev);
1943 rt->dst.flags = DST_HOST;
1944 rt->dst.input = ip6_input;
1945 rt->dst.output = ip6_output;
1946 rt->rt6i_dev = net->loopback_dev;
1947 rt->rt6i_idev = idev;
1948 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1949 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1950 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1951 rt->dst.obsolete = -1;
1953 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1954 if (anycast)
1955 rt->rt6i_flags |= RTF_ANYCAST;
1956 else
1957 rt->rt6i_flags |= RTF_LOCAL;
1958 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1959 if (IS_ERR(neigh)) {
1960 dst_free(&rt->dst);
1962 /* We are casting this because that is the return
1963 * value type. But an errno encoded pointer is the
1964 * same regardless of the underlying pointer type,
1965 * and that's what we are returning. So this is OK.
1967 return (struct rt6_info *) neigh;
1969 rt->rt6i_nexthop = neigh;
1971 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1972 rt->rt6i_dst.plen = 128;
1973 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1975 atomic_set(&rt->dst.__refcnt, 1);
1977 return rt;
1980 struct arg_dev_net {
1981 struct net_device *dev;
1982 struct net *net;
1985 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1987 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1988 struct net *net = ((struct arg_dev_net *)arg)->net;
1990 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1991 rt != net->ipv6.ip6_null_entry) {
1992 RT6_TRACE("deleted by ifdown %p\n", rt);
1993 return -1;
1995 return 0;
1998 void rt6_ifdown(struct net *net, struct net_device *dev)
2000 struct arg_dev_net adn = {
2001 .dev = dev,
2002 .net = net,
2005 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2006 icmp6_clean_all(fib6_ifdown, &adn);
2009 struct rt6_mtu_change_arg
2011 struct net_device *dev;
2012 unsigned mtu;
2015 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2017 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2018 struct inet6_dev *idev;
2019 struct net *net = dev_net(arg->dev);
2021 /* In IPv6 pmtu discovery is not optional,
2022 so that RTAX_MTU lock cannot disable it.
2023 We still use this lock to block changes
2024 caused by addrconf/ndisc.
2027 idev = __in6_dev_get(arg->dev);
2028 if (idev == NULL)
2029 return 0;
2031 /* For administrative MTU increase, there is no way to discover
2032 IPv6 PMTU increase, so PMTU increase should be updated here.
2033 Since RFC 1981 doesn't include administrative MTU increase
2034 update PMTU increase is a MUST. (i.e. jumbo frame)
2037 If new MTU is less than route PMTU, this new MTU will be the
2038 lowest MTU in the path, update the route PMTU to reflect PMTU
2039 decreases; if new MTU is greater than route PMTU, and the
2040 old MTU is the lowest MTU in the path, update the route PMTU
2041 to reflect the increase. In this case if the other nodes' MTU
2042 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2043 PMTU discouvery.
2045 if (rt->rt6i_dev == arg->dev &&
2046 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2047 (dst_mtu(&rt->dst) >= arg->mtu ||
2048 (dst_mtu(&rt->dst) < arg->mtu &&
2049 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2050 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2051 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2053 return 0;
2056 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2058 struct rt6_mtu_change_arg arg = {
2059 .dev = dev,
2060 .mtu = mtu,
2063 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2066 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2067 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2068 [RTA_OIF] = { .type = NLA_U32 },
2069 [RTA_IIF] = { .type = NLA_U32 },
2070 [RTA_PRIORITY] = { .type = NLA_U32 },
2071 [RTA_METRICS] = { .type = NLA_NESTED },
2074 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2075 struct fib6_config *cfg)
2077 struct rtmsg *rtm;
2078 struct nlattr *tb[RTA_MAX+1];
2079 int err;
2081 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2082 if (err < 0)
2083 goto errout;
2085 err = -EINVAL;
2086 rtm = nlmsg_data(nlh);
2087 memset(cfg, 0, sizeof(*cfg));
2089 cfg->fc_table = rtm->rtm_table;
2090 cfg->fc_dst_len = rtm->rtm_dst_len;
2091 cfg->fc_src_len = rtm->rtm_src_len;
2092 cfg->fc_flags = RTF_UP;
2093 cfg->fc_protocol = rtm->rtm_protocol;
2095 if (rtm->rtm_type == RTN_UNREACHABLE)
2096 cfg->fc_flags |= RTF_REJECT;
2098 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2099 cfg->fc_nlinfo.nlh = nlh;
2100 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2102 if (tb[RTA_GATEWAY]) {
2103 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2104 cfg->fc_flags |= RTF_GATEWAY;
2107 if (tb[RTA_DST]) {
2108 int plen = (rtm->rtm_dst_len + 7) >> 3;
2110 if (nla_len(tb[RTA_DST]) < plen)
2111 goto errout;
2113 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2116 if (tb[RTA_SRC]) {
2117 int plen = (rtm->rtm_src_len + 7) >> 3;
2119 if (nla_len(tb[RTA_SRC]) < plen)
2120 goto errout;
2122 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2125 if (tb[RTA_OIF])
2126 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2128 if (tb[RTA_PRIORITY])
2129 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2131 if (tb[RTA_METRICS]) {
2132 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2133 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2136 if (tb[RTA_TABLE])
2137 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2139 err = 0;
2140 errout:
2141 return err;
2144 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2146 struct fib6_config cfg;
2147 int err;
2149 err = rtm_to_fib6_config(skb, nlh, &cfg);
2150 if (err < 0)
2151 return err;
2153 return ip6_route_del(&cfg);
2156 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2158 struct fib6_config cfg;
2159 int err;
2161 err = rtm_to_fib6_config(skb, nlh, &cfg);
2162 if (err < 0)
2163 return err;
2165 return ip6_route_add(&cfg);
2168 static inline size_t rt6_nlmsg_size(void)
2170 return NLMSG_ALIGN(sizeof(struct rtmsg))
2171 + nla_total_size(16) /* RTA_SRC */
2172 + nla_total_size(16) /* RTA_DST */
2173 + nla_total_size(16) /* RTA_GATEWAY */
2174 + nla_total_size(16) /* RTA_PREFSRC */
2175 + nla_total_size(4) /* RTA_TABLE */
2176 + nla_total_size(4) /* RTA_IIF */
2177 + nla_total_size(4) /* RTA_OIF */
2178 + nla_total_size(4) /* RTA_PRIORITY */
2179 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2180 + nla_total_size(sizeof(struct rta_cacheinfo));
2183 static int rt6_fill_node(struct net *net,
2184 struct sk_buff *skb, struct rt6_info *rt,
2185 struct in6_addr *dst, struct in6_addr *src,
2186 int iif, int type, u32 pid, u32 seq,
2187 int prefix, int nowait, unsigned int flags)
2189 struct rtmsg *rtm;
2190 struct nlmsghdr *nlh;
2191 long expires;
2192 u32 table;
2194 if (prefix) { /* user wants prefix routes only */
2195 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2196 /* success since this is not a prefix route */
2197 return 1;
2201 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2202 if (nlh == NULL)
2203 return -EMSGSIZE;
2205 rtm = nlmsg_data(nlh);
2206 rtm->rtm_family = AF_INET6;
2207 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2208 rtm->rtm_src_len = rt->rt6i_src.plen;
2209 rtm->rtm_tos = 0;
2210 if (rt->rt6i_table)
2211 table = rt->rt6i_table->tb6_id;
2212 else
2213 table = RT6_TABLE_UNSPEC;
2214 rtm->rtm_table = table;
2215 NLA_PUT_U32(skb, RTA_TABLE, table);
2216 if (rt->rt6i_flags&RTF_REJECT)
2217 rtm->rtm_type = RTN_UNREACHABLE;
2218 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2219 rtm->rtm_type = RTN_LOCAL;
2220 else
2221 rtm->rtm_type = RTN_UNICAST;
2222 rtm->rtm_flags = 0;
2223 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2224 rtm->rtm_protocol = rt->rt6i_protocol;
2225 if (rt->rt6i_flags&RTF_DYNAMIC)
2226 rtm->rtm_protocol = RTPROT_REDIRECT;
2227 else if (rt->rt6i_flags & RTF_ADDRCONF)
2228 rtm->rtm_protocol = RTPROT_KERNEL;
2229 else if (rt->rt6i_flags&RTF_DEFAULT)
2230 rtm->rtm_protocol = RTPROT_RA;
2232 if (rt->rt6i_flags&RTF_CACHE)
2233 rtm->rtm_flags |= RTM_F_CLONED;
2235 if (dst) {
2236 NLA_PUT(skb, RTA_DST, 16, dst);
2237 rtm->rtm_dst_len = 128;
2238 } else if (rtm->rtm_dst_len)
2239 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2240 #ifdef CONFIG_IPV6_SUBTREES
2241 if (src) {
2242 NLA_PUT(skb, RTA_SRC, 16, src);
2243 rtm->rtm_src_len = 128;
2244 } else if (rtm->rtm_src_len)
2245 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2246 #endif
2247 if (iif) {
2248 #ifdef CONFIG_IPV6_MROUTE
2249 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2250 int err = ip6mr_get_route(net, skb, rtm, nowait);
2251 if (err <= 0) {
2252 if (!nowait) {
2253 if (err == 0)
2254 return 0;
2255 goto nla_put_failure;
2256 } else {
2257 if (err == -EMSGSIZE)
2258 goto nla_put_failure;
2261 } else
2262 #endif
2263 NLA_PUT_U32(skb, RTA_IIF, iif);
2264 } else if (dst) {
2265 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2266 struct in6_addr saddr_buf;
2267 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2268 dst, 0, &saddr_buf) == 0)
2269 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2272 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2273 goto nla_put_failure;
2275 if (rt->dst.neighbour)
2276 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2278 if (rt->dst.dev)
2279 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2281 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2283 if (!(rt->rt6i_flags & RTF_EXPIRES))
2284 expires = 0;
2285 else if (rt->rt6i_expires - jiffies < INT_MAX)
2286 expires = rt->rt6i_expires - jiffies;
2287 else
2288 expires = INT_MAX;
2290 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2291 expires, rt->dst.error) < 0)
2292 goto nla_put_failure;
2294 return nlmsg_end(skb, nlh);
2296 nla_put_failure:
2297 nlmsg_cancel(skb, nlh);
2298 return -EMSGSIZE;
2301 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2303 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2304 int prefix;
2306 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2307 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2308 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2309 } else
2310 prefix = 0;
2312 return rt6_fill_node(arg->net,
2313 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2314 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2315 prefix, 0, NLM_F_MULTI);
2318 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2320 struct net *net = sock_net(in_skb->sk);
2321 struct nlattr *tb[RTA_MAX+1];
2322 struct rt6_info *rt;
2323 struct sk_buff *skb;
2324 struct rtmsg *rtm;
2325 struct flowi fl;
2326 int err, iif = 0;
2328 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2329 if (err < 0)
2330 goto errout;
2332 err = -EINVAL;
2333 memset(&fl, 0, sizeof(fl));
2335 if (tb[RTA_SRC]) {
2336 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2337 goto errout;
2339 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2342 if (tb[RTA_DST]) {
2343 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2344 goto errout;
2346 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2349 if (tb[RTA_IIF])
2350 iif = nla_get_u32(tb[RTA_IIF]);
2352 if (tb[RTA_OIF])
2353 fl.oif = nla_get_u32(tb[RTA_OIF]);
2355 if (iif) {
2356 struct net_device *dev;
2357 dev = __dev_get_by_index(net, iif);
2358 if (!dev) {
2359 err = -ENODEV;
2360 goto errout;
2364 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2365 if (skb == NULL) {
2366 err = -ENOBUFS;
2367 goto errout;
2370 /* Reserve room for dummy headers, this skb can pass
2371 through good chunk of routing engine.
2373 skb_reset_mac_header(skb);
2374 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2376 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2377 skb_dst_set(skb, &rt->dst);
2379 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2380 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2381 nlh->nlmsg_seq, 0, 0, 0);
2382 if (err < 0) {
2383 kfree_skb(skb);
2384 goto errout;
2387 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2388 errout:
2389 return err;
2392 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2394 struct sk_buff *skb;
2395 struct net *net = info->nl_net;
2396 u32 seq;
2397 int err;
2399 err = -ENOBUFS;
2400 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2402 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2403 if (skb == NULL)
2404 goto errout;
2406 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2407 event, info->pid, seq, 0, 0, 0);
2408 if (err < 0) {
2409 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2410 WARN_ON(err == -EMSGSIZE);
2411 kfree_skb(skb);
2412 goto errout;
2414 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2415 info->nlh, gfp_any());
2416 return;
2417 errout:
2418 if (err < 0)
2419 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2422 static int ip6_route_dev_notify(struct notifier_block *this,
2423 unsigned long event, void *data)
2425 struct net_device *dev = (struct net_device *)data;
2426 struct net *net = dev_net(dev);
2428 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2429 net->ipv6.ip6_null_entry->dst.dev = dev;
2430 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2431 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2432 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2433 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2434 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2435 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2436 #endif
2439 return NOTIFY_OK;
2443 * /proc
2446 #ifdef CONFIG_PROC_FS
2448 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2450 struct rt6_proc_arg
2452 char *buffer;
2453 int offset;
2454 int length;
2455 int skip;
2456 int len;
2459 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2461 struct seq_file *m = p_arg;
2463 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2465 #ifdef CONFIG_IPV6_SUBTREES
2466 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2467 #else
2468 seq_puts(m, "00000000000000000000000000000000 00 ");
2469 #endif
2471 if (rt->rt6i_nexthop) {
2472 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2473 } else {
2474 seq_puts(m, "00000000000000000000000000000000");
2476 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2477 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2478 rt->dst.__use, rt->rt6i_flags,
2479 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2480 return 0;
2483 static int ipv6_route_show(struct seq_file *m, void *v)
2485 struct net *net = (struct net *)m->private;
2486 fib6_clean_all(net, rt6_info_route, 0, m);
2487 return 0;
2490 static int ipv6_route_open(struct inode *inode, struct file *file)
2492 return single_open_net(inode, file, ipv6_route_show);
2495 static const struct file_operations ipv6_route_proc_fops = {
2496 .owner = THIS_MODULE,
2497 .open = ipv6_route_open,
2498 .read = seq_read,
2499 .llseek = seq_lseek,
2500 .release = single_release_net,
2503 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2505 struct net *net = (struct net *)seq->private;
2506 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2507 net->ipv6.rt6_stats->fib_nodes,
2508 net->ipv6.rt6_stats->fib_route_nodes,
2509 net->ipv6.rt6_stats->fib_rt_alloc,
2510 net->ipv6.rt6_stats->fib_rt_entries,
2511 net->ipv6.rt6_stats->fib_rt_cache,
2512 atomic_read(&net->ipv6.ip6_dst_ops.entries),
2513 net->ipv6.rt6_stats->fib_discarded_routes);
2515 return 0;
2518 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2520 return single_open_net(inode, file, rt6_stats_seq_show);
2523 static const struct file_operations rt6_stats_seq_fops = {
2524 .owner = THIS_MODULE,
2525 .open = rt6_stats_seq_open,
2526 .read = seq_read,
2527 .llseek = seq_lseek,
2528 .release = single_release_net,
2530 #endif /* CONFIG_PROC_FS */
2532 #ifdef CONFIG_SYSCTL
2534 static
2535 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2536 void __user *buffer, size_t *lenp, loff_t *ppos)
2538 struct net *net = current->nsproxy->net_ns;
2539 int delay = net->ipv6.sysctl.flush_delay;
2540 if (write) {
2541 proc_dointvec(ctl, write, buffer, lenp, ppos);
2542 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2543 return 0;
2544 } else
2545 return -EINVAL;
2548 ctl_table ipv6_route_table_template[] = {
2550 .procname = "flush",
2551 .data = &init_net.ipv6.sysctl.flush_delay,
2552 .maxlen = sizeof(int),
2553 .mode = 0200,
2554 .proc_handler = ipv6_sysctl_rtcache_flush
2557 .procname = "gc_thresh",
2558 .data = &ip6_dst_ops_template.gc_thresh,
2559 .maxlen = sizeof(int),
2560 .mode = 0644,
2561 .proc_handler = proc_dointvec,
2564 .procname = "max_size",
2565 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2566 .maxlen = sizeof(int),
2567 .mode = 0644,
2568 .proc_handler = proc_dointvec,
2571 .procname = "gc_min_interval",
2572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2573 .maxlen = sizeof(int),
2574 .mode = 0644,
2575 .proc_handler = proc_dointvec_jiffies,
2578 .procname = "gc_timeout",
2579 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2580 .maxlen = sizeof(int),
2581 .mode = 0644,
2582 .proc_handler = proc_dointvec_jiffies,
2585 .procname = "gc_interval",
2586 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2587 .maxlen = sizeof(int),
2588 .mode = 0644,
2589 .proc_handler = proc_dointvec_jiffies,
2592 .procname = "gc_elasticity",
2593 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2594 .maxlen = sizeof(int),
2595 .mode = 0644,
2596 .proc_handler = proc_dointvec,
2599 .procname = "mtu_expires",
2600 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2601 .maxlen = sizeof(int),
2602 .mode = 0644,
2603 .proc_handler = proc_dointvec_jiffies,
2606 .procname = "min_adv_mss",
2607 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2608 .maxlen = sizeof(int),
2609 .mode = 0644,
2610 .proc_handler = proc_dointvec,
2613 .procname = "gc_min_interval_ms",
2614 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2615 .maxlen = sizeof(int),
2616 .mode = 0644,
2617 .proc_handler = proc_dointvec_ms_jiffies,
2622 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2624 struct ctl_table *table;
2626 table = kmemdup(ipv6_route_table_template,
2627 sizeof(ipv6_route_table_template),
2628 GFP_KERNEL);
2630 if (table) {
2631 table[0].data = &net->ipv6.sysctl.flush_delay;
2632 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2633 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2634 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2635 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2636 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2637 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2638 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2639 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2640 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2643 return table;
2645 #endif
2647 static int __net_init ip6_route_net_init(struct net *net)
2649 int ret = -ENOMEM;
2651 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2652 sizeof(net->ipv6.ip6_dst_ops));
2654 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2655 sizeof(*net->ipv6.ip6_null_entry),
2656 GFP_KERNEL);
2657 if (!net->ipv6.ip6_null_entry)
2658 goto out_ip6_dst_ops;
2659 net->ipv6.ip6_null_entry->dst.path =
2660 (struct dst_entry *)net->ipv6.ip6_null_entry;
2661 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2663 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2664 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2665 sizeof(*net->ipv6.ip6_prohibit_entry),
2666 GFP_KERNEL);
2667 if (!net->ipv6.ip6_prohibit_entry)
2668 goto out_ip6_null_entry;
2669 net->ipv6.ip6_prohibit_entry->dst.path =
2670 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2671 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2673 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2674 sizeof(*net->ipv6.ip6_blk_hole_entry),
2675 GFP_KERNEL);
2676 if (!net->ipv6.ip6_blk_hole_entry)
2677 goto out_ip6_prohibit_entry;
2678 net->ipv6.ip6_blk_hole_entry->dst.path =
2679 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2680 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2681 #endif
2683 net->ipv6.sysctl.flush_delay = 0;
2684 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2685 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2686 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2687 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2688 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2689 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2690 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2692 #ifdef CONFIG_PROC_FS
2693 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2694 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2695 #endif
2696 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2698 ret = 0;
2699 out:
2700 return ret;
2702 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2703 out_ip6_prohibit_entry:
2704 kfree(net->ipv6.ip6_prohibit_entry);
2705 out_ip6_null_entry:
2706 kfree(net->ipv6.ip6_null_entry);
2707 #endif
2708 out_ip6_dst_ops:
2709 goto out;
2712 static void __net_exit ip6_route_net_exit(struct net *net)
2714 #ifdef CONFIG_PROC_FS
2715 proc_net_remove(net, "ipv6_route");
2716 proc_net_remove(net, "rt6_stats");
2717 #endif
2718 kfree(net->ipv6.ip6_null_entry);
2719 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2720 kfree(net->ipv6.ip6_prohibit_entry);
2721 kfree(net->ipv6.ip6_blk_hole_entry);
2722 #endif
2725 static struct pernet_operations ip6_route_net_ops = {
2726 .init = ip6_route_net_init,
2727 .exit = ip6_route_net_exit,
2730 static struct notifier_block ip6_route_dev_notifier = {
2731 .notifier_call = ip6_route_dev_notify,
2732 .priority = 0,
2735 int __init ip6_route_init(void)
2737 int ret;
2739 ret = -ENOMEM;
2740 ip6_dst_ops_template.kmem_cachep =
2741 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2742 SLAB_HWCACHE_ALIGN, NULL);
2743 if (!ip6_dst_ops_template.kmem_cachep)
2744 goto out;
2746 ret = register_pernet_subsys(&ip6_route_net_ops);
2747 if (ret)
2748 goto out_kmem_cache;
2750 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2752 /* Registering of the loopback is done before this portion of code,
2753 * the loopback reference in rt6_info will not be taken, do it
2754 * manually for init_net */
2755 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2756 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2757 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2758 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2759 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2760 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2761 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2762 #endif
2763 ret = fib6_init();
2764 if (ret)
2765 goto out_register_subsys;
2767 ret = xfrm6_init();
2768 if (ret)
2769 goto out_fib6_init;
2771 ret = fib6_rules_init();
2772 if (ret)
2773 goto xfrm6_init;
2775 ret = -ENOBUFS;
2776 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2777 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2778 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2779 goto fib6_rules_init;
2781 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2782 if (ret)
2783 goto fib6_rules_init;
2785 out:
2786 return ret;
2788 fib6_rules_init:
2789 fib6_rules_cleanup();
2790 xfrm6_init:
2791 xfrm6_fini();
2792 out_fib6_init:
2793 fib6_gc_cleanup();
2794 out_register_subsys:
2795 unregister_pernet_subsys(&ip6_route_net_ops);
2796 out_kmem_cache:
2797 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2798 goto out;
2801 void ip6_route_cleanup(void)
2803 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2804 fib6_rules_cleanup();
2805 xfrm6_fini();
2806 fib6_gc_cleanup();
2807 unregister_pernet_subsys(&ip6_route_net_ops);
2808 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);