x86, mce, AMD: Fix leaving freed data in a list
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / route.c
bloba89ff5add15a16600015287105646b8df948f46e
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
57 #include <asm/uaccess.h>
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
74 #define CLONE_OFFLINK_ROUTE 0
76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79 static void ip6_dst_destroy(struct dst_entry *);
80 static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
82 static int ip6_dst_gc(struct dst_ops *ops);
84 static int ip6_pkt_discard(struct sk_buff *skb);
85 static int ip6_pkt_discard_out(struct sk_buff *skb);
86 static void ip6_link_failure(struct sk_buff *skb);
87 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 #ifdef CONFIG_IPV6_ROUTE_INFO
90 static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
94 static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
96 struct in6_addr *gwaddr, int ifindex);
97 #endif
99 static struct dst_ops ip6_dst_ops_template = {
100 .family = AF_INET6,
101 .protocol = cpu_to_be16(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
110 .local_out = __ip6_local_out,
111 .entries = ATOMIC_INIT(0),
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entries = ATOMIC_INIT(0),
127 static struct rt6_info ip6_null_entry_template = {
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_protocol = RTPROT_KERNEL,
141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
147 static int ip6_pkt_prohibit(struct sk_buff *skb);
148 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
150 static struct rt6_info ip6_prohibit_entry_template = {
151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
163 .rt6i_protocol = RTPROT_KERNEL,
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
168 static struct rt6_info ip6_blk_hole_entry_template = {
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
176 .input = dst_discard,
177 .output = dst_discard,
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_protocol = RTPROT_KERNEL,
182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
186 #endif
188 /* allocate dst with ip6_dst_ops */
189 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
191 return (struct rt6_info *)dst_alloc(ops);
194 static void ip6_dst_destroy(struct dst_entry *dst)
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
205 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
210 struct net_device *loopback_dev =
211 dev_net(dev)->loopback_dev;
213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
223 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
229 static inline int rt6_need_strict(struct in6_addr *daddr)
231 return (ipv6_addr_type(daddr) &
232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
236 * Route lookup. Any table->tb6_lock is implied.
239 static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
241 struct in6_addr *saddr,
242 int oif,
243 int flags)
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
254 if (oif) {
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (flags & RT6_LOOKUP_F_IFACE && oif)
261 continue;
262 if (local && (!oif ||
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
266 local = sprt;
268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
275 if (oif) {
276 if (local)
277 return local;
279 if (flags & RT6_LOOKUP_F_IFACE)
280 return net->ipv6.ip6_null_entry;
282 out:
283 return rt;
286 #ifdef CONFIG_IPV6_ROUTER_PREF
287 static void rt6_probe(struct rt6_info *rt)
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
303 struct in6_addr mcaddr;
304 struct in6_addr *target;
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
315 #else
316 static inline void rt6_probe(struct rt6_info *rt)
318 return;
320 #endif
323 * Default Router Selection (RFC 2461 6.3.6)
325 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
327 struct net_device *dev = rt->rt6i_dev;
328 if (!oif || dev->ifindex == oif)
329 return 2;
330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
336 static inline int rt6_check_neigh(struct rt6_info *rt)
338 struct neighbour *neigh = rt->rt6i_nexthop;
339 int m;
340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
346 m = 2;
347 #ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350 #endif
351 else
352 m = 1;
353 read_unlock_bh(&neigh->lock);
354 } else
355 m = 0;
356 return m;
359 static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
362 int m, n;
364 m = rt6_check_dev(rt, oif);
365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
366 return -1;
367 #ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369 #endif
370 n = rt6_check_neigh(rt);
371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
372 return -1;
373 return m;
376 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
379 int m;
381 if (rt6_check_expired(rt))
382 goto out;
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
397 out:
398 return match;
401 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
405 struct rt6_info *rt, *match;
406 int mpri = -1;
408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
416 return match;
419 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
421 struct rt6_info *match, *rt0;
422 struct net *net;
424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
425 __func__, fn->leaf, oif);
427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
433 if (!match &&
434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
437 /* no entries matched; do round-robin */
438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
441 if (next != rt0)
442 fn->rr_ptr = next;
445 RT6_TRACE("%s() => %p\n",
446 __func__, match);
448 net = dev_net(rt0->rt6i_dev);
449 return (match ? match : net->ipv6.ip6_null_entry);
452 #ifdef CONFIG_IPV6_ROUTE_INFO
453 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
456 struct net *net = dev_net(dev);
457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
460 unsigned long lifetime;
461 struct rt6_info *rt;
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
484 return -EINVAL;
486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
501 if (rt && !lifetime) {
502 ip6_del_rt(rt);
503 rt = NULL;
506 if (!rt && lifetime)
507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
513 if (rt) {
514 if (!addrconf_finite_timeout(lifetime)) {
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
520 dst_release(&rt->u.dst);
522 return 0;
524 #endif
526 #define BACKTRACK(__net, saddr) \
527 do { \
528 if (rt == __net->ipv6.ip6_null_entry) { \
529 struct fib6_node *pn; \
530 while (1) { \
531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
542 } while(0)
544 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
546 struct flowi *fl, int flags)
548 struct fib6_node *fn;
549 struct rt6_info *rt;
551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553 restart:
554 rt = fn->leaf;
555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
556 BACKTRACK(net, &fl->fl6_src);
557 out:
558 dst_use(&rt->u.dst, jiffies);
559 read_unlock_bh(&table->tb6_lock);
560 return rt;
564 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
575 struct dst_entry *dst;
576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
587 dst_release(dst);
589 return NULL;
592 EXPORT_SYMBOL(rt6_lookup);
594 /* ip6_ins_rt is called with FREE table->tb6_lock.
595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
600 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
602 int err;
603 struct fib6_table *table;
605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
607 err = fib6_add(&table->tb6_root, rt, info);
608 write_unlock_bh(&table->tb6_lock);
610 return err;
613 int ip6_ins_rt(struct rt6_info *rt)
615 struct nl_info info = {
616 .nl_net = dev_net(rt->rt6i_dev),
618 return __ip6_ins_rt(rt, &info);
621 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
624 struct rt6_info *rt;
627 * Clone the route.
630 rt = ip6_rt_copy(ort);
632 if (rt) {
633 struct neighbour *neigh;
634 int attempts = !in_softirq();
636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
648 #ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
653 #endif
655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
683 rt->rt6i_nexthop = neigh;
687 return rt;
690 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
700 return rt;
703 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
706 struct fib6_node *fn;
707 struct rt6_info *rt, *nrt;
708 int strict = 0;
709 int attempts = 3;
710 int err;
711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
713 strict |= flags & RT6_LOOKUP_F_IFACE;
715 relookup:
716 read_lock_bh(&table->tb6_lock);
718 restart_2:
719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
721 restart:
722 rt = rt6_select(fn, oif, strict | reachable);
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
726 rt->rt6i_flags & RTF_CACHE)
727 goto out;
729 dst_hold(&rt->u.dst);
730 read_unlock_bh(&table->tb6_lock);
732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
734 else {
735 #if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737 #else
738 goto out2;
739 #endif
742 dst_release(&rt->u.dst);
743 rt = nrt ? : net->ipv6.ip6_null_entry;
745 dst_hold(&rt->u.dst);
746 if (nrt) {
747 err = ip6_ins_rt(nrt);
748 if (!err)
749 goto out2;
752 if (--attempts <= 0)
753 goto out2;
756 * Race condition! In the gap, when table->tb6_lock was
757 * released someone could insert this route. Relookup.
759 dst_release(&rt->u.dst);
760 goto relookup;
762 out:
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
767 dst_hold(&rt->u.dst);
768 read_unlock_bh(&table->tb6_lock);
769 out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
773 return rt;
776 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
777 struct flowi *fl, int flags)
779 return ip6_pol_route(net, table, fl->iif, fl, flags);
782 void ip6_route_input(struct sk_buff *skb)
784 struct ipv6hdr *iph = ipv6_hdr(skb);
785 struct net *net = dev_net(skb->dev);
786 int flags = RT6_LOOKUP_F_HAS_SADDR;
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
796 .mark = skb->mark,
797 .proto = iph->nexthdr,
800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
801 flags |= RT6_LOOKUP_F_IFACE;
803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
806 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
807 struct flowi *fl, int flags)
809 return ip6_pol_route(net, table, fl->oif, fl, flags);
812 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
815 int flags = 0;
817 if (rt6_need_strict(&fl->fl6_dst))
818 flags |= RT6_LOOKUP_F_IFACE;
820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
835 EXPORT_SYMBOL(ip6_route_output);
837 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
844 if (rt) {
845 new = &rt->u.dst;
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
849 new->input = dst_discard;
850 new->output = dst_discard;
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866 #ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868 #endif
870 dst_free(new);
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
877 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
880 * Destination cache support functions
883 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
885 struct rt6_info *rt;
887 rt = (struct rt6_info *) dst;
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
892 return NULL;
895 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
897 struct rt6_info *rt = (struct rt6_info *) dst;
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE) {
901 if (rt6_check_expired(rt)) {
902 ip6_del_rt(rt);
903 dst = NULL;
905 } else {
906 dst_release(dst);
907 dst = NULL;
910 return dst;
913 static void ip6_link_failure(struct sk_buff *skb)
915 struct rt6_info *rt;
917 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
919 rt = (struct rt6_info *) skb_dst(skb);
920 if (rt) {
921 if (rt->rt6i_flags&RTF_CACHE) {
922 dst_set_expires(&rt->u.dst, 0);
923 rt->rt6i_flags |= RTF_EXPIRES;
924 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
925 rt->rt6i_node->fn_sernum = -1;
929 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
931 struct rt6_info *rt6 = (struct rt6_info*)dst;
933 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
934 rt6->rt6i_flags |= RTF_MODIFIED;
935 if (mtu < IPV6_MIN_MTU) {
936 mtu = IPV6_MIN_MTU;
937 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
939 dst->metrics[RTAX_MTU-1] = mtu;
940 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
944 static int ipv6_get_mtu(struct net_device *dev);
946 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
948 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
950 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
951 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
954 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
955 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
956 * IPV6_MAXPLEN is also valid and means: "any MSS,
957 * rely only on pmtu discovery"
959 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
960 mtu = IPV6_MAXPLEN;
961 return mtu;
964 static struct dst_entry *icmp6_dst_gc_list;
965 static DEFINE_SPINLOCK(icmp6_dst_lock);
967 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
968 struct neighbour *neigh,
969 const struct in6_addr *addr)
971 struct rt6_info *rt;
972 struct inet6_dev *idev = in6_dev_get(dev);
973 struct net *net = dev_net(dev);
975 if (unlikely(idev == NULL))
976 return NULL;
978 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
979 if (unlikely(rt == NULL)) {
980 in6_dev_put(idev);
981 goto out;
984 dev_hold(dev);
985 if (neigh)
986 neigh_hold(neigh);
987 else {
988 neigh = ndisc_get_neigh(dev, addr);
989 if (IS_ERR(neigh))
990 neigh = NULL;
993 rt->rt6i_dev = dev;
994 rt->rt6i_idev = idev;
995 rt->rt6i_nexthop = neigh;
996 atomic_set(&rt->u.dst.__refcnt, 1);
997 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
998 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
999 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1000 rt->u.dst.output = ip6_output;
1002 #if 0 /* there's no chance to use these for ndisc */
1003 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1004 ? DST_HOST
1005 : 0;
1006 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1007 rt->rt6i_dst.plen = 128;
1008 #endif
1010 spin_lock_bh(&icmp6_dst_lock);
1011 rt->u.dst.next = icmp6_dst_gc_list;
1012 icmp6_dst_gc_list = &rt->u.dst;
1013 spin_unlock_bh(&icmp6_dst_lock);
1015 fib6_force_start_gc(net);
1017 out:
1018 return &rt->u.dst;
1021 int icmp6_dst_gc(void)
1023 struct dst_entry *dst, *next, **pprev;
1024 int more = 0;
1026 next = NULL;
1028 spin_lock_bh(&icmp6_dst_lock);
1029 pprev = &icmp6_dst_gc_list;
1031 while ((dst = *pprev) != NULL) {
1032 if (!atomic_read(&dst->__refcnt)) {
1033 *pprev = dst->next;
1034 dst_free(dst);
1035 } else {
1036 pprev = &dst->next;
1037 ++more;
1041 spin_unlock_bh(&icmp6_dst_lock);
1043 return more;
1046 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1047 void *arg)
1049 struct dst_entry *dst, **pprev;
1051 spin_lock_bh(&icmp6_dst_lock);
1052 pprev = &icmp6_dst_gc_list;
1053 while ((dst = *pprev) != NULL) {
1054 struct rt6_info *rt = (struct rt6_info *) dst;
1055 if (func(rt, arg)) {
1056 *pprev = dst->next;
1057 dst_free(dst);
1058 } else {
1059 pprev = &dst->next;
1062 spin_unlock_bh(&icmp6_dst_lock);
1065 static int ip6_dst_gc(struct dst_ops *ops)
1067 unsigned long now = jiffies;
1068 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1069 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1070 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1071 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1072 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1073 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1075 if (time_after(rt_last_gc + rt_min_interval, now) &&
1076 atomic_read(&ops->entries) <= rt_max_size)
1077 goto out;
1079 net->ipv6.ip6_rt_gc_expire++;
1080 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1081 net->ipv6.ip6_rt_last_gc = now;
1082 if (atomic_read(&ops->entries) < ops->gc_thresh)
1083 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1084 out:
1085 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1086 return (atomic_read(&ops->entries) > rt_max_size);
1089 /* Clean host part of a prefix. Not necessary in radix tree,
1090 but results in cleaner routing tables.
1092 Remove it only when all the things will work!
1095 static int ipv6_get_mtu(struct net_device *dev)
1097 int mtu = IPV6_MIN_MTU;
1098 struct inet6_dev *idev;
1100 idev = in6_dev_get(dev);
1101 if (idev) {
1102 mtu = idev->cnf.mtu6;
1103 in6_dev_put(idev);
1105 return mtu;
1108 int ip6_dst_hoplimit(struct dst_entry *dst)
1110 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1111 if (hoplimit < 0) {
1112 struct net_device *dev = dst->dev;
1113 struct inet6_dev *idev = in6_dev_get(dev);
1114 if (idev) {
1115 hoplimit = idev->cnf.hop_limit;
1116 in6_dev_put(idev);
1117 } else
1118 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1120 return hoplimit;
1127 int ip6_route_add(struct fib6_config *cfg)
1129 int err;
1130 struct net *net = cfg->fc_nlinfo.nl_net;
1131 struct rt6_info *rt = NULL;
1132 struct net_device *dev = NULL;
1133 struct inet6_dev *idev = NULL;
1134 struct fib6_table *table;
1135 int addr_type;
1137 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1138 return -EINVAL;
1139 #ifndef CONFIG_IPV6_SUBTREES
1140 if (cfg->fc_src_len)
1141 return -EINVAL;
1142 #endif
1143 if (cfg->fc_ifindex) {
1144 err = -ENODEV;
1145 dev = dev_get_by_index(net, cfg->fc_ifindex);
1146 if (!dev)
1147 goto out;
1148 idev = in6_dev_get(dev);
1149 if (!idev)
1150 goto out;
1153 if (cfg->fc_metric == 0)
1154 cfg->fc_metric = IP6_RT_PRIO_USER;
1156 table = fib6_new_table(net, cfg->fc_table);
1157 if (table == NULL) {
1158 err = -ENOBUFS;
1159 goto out;
1162 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1164 if (rt == NULL) {
1165 err = -ENOMEM;
1166 goto out;
1169 rt->u.dst.obsolete = -1;
1170 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1171 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1174 if (cfg->fc_protocol == RTPROT_UNSPEC)
1175 cfg->fc_protocol = RTPROT_BOOT;
1176 rt->rt6i_protocol = cfg->fc_protocol;
1178 addr_type = ipv6_addr_type(&cfg->fc_dst);
1180 if (addr_type & IPV6_ADDR_MULTICAST)
1181 rt->u.dst.input = ip6_mc_input;
1182 else
1183 rt->u.dst.input = ip6_forward;
1185 rt->u.dst.output = ip6_output;
1187 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1188 rt->rt6i_dst.plen = cfg->fc_dst_len;
1189 if (rt->rt6i_dst.plen == 128)
1190 rt->u.dst.flags = DST_HOST;
1192 #ifdef CONFIG_IPV6_SUBTREES
1193 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1194 rt->rt6i_src.plen = cfg->fc_src_len;
1195 #endif
1197 rt->rt6i_metric = cfg->fc_metric;
1199 /* We cannot add true routes via loopback here,
1200 they would result in kernel looping; promote them to reject routes
1202 if ((cfg->fc_flags & RTF_REJECT) ||
1203 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1204 /* hold loopback dev/idev if we haven't done so. */
1205 if (dev != net->loopback_dev) {
1206 if (dev) {
1207 dev_put(dev);
1208 in6_dev_put(idev);
1210 dev = net->loopback_dev;
1211 dev_hold(dev);
1212 idev = in6_dev_get(dev);
1213 if (!idev) {
1214 err = -ENODEV;
1215 goto out;
1218 rt->u.dst.output = ip6_pkt_discard_out;
1219 rt->u.dst.input = ip6_pkt_discard;
1220 rt->u.dst.error = -ENETUNREACH;
1221 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1222 goto install_route;
1225 if (cfg->fc_flags & RTF_GATEWAY) {
1226 struct in6_addr *gw_addr;
1227 int gwa_type;
1229 gw_addr = &cfg->fc_gateway;
1230 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1231 gwa_type = ipv6_addr_type(gw_addr);
1233 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1234 struct rt6_info *grt;
1236 /* IPv6 strictly inhibits using not link-local
1237 addresses as nexthop address.
1238 Otherwise, router will not able to send redirects.
1239 It is very good, but in some (rare!) circumstances
1240 (SIT, PtP, NBMA NOARP links) it is handy to allow
1241 some exceptions. --ANK
1243 err = -EINVAL;
1244 if (!(gwa_type&IPV6_ADDR_UNICAST))
1245 goto out;
1247 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1249 err = -EHOSTUNREACH;
1250 if (grt == NULL)
1251 goto out;
1252 if (dev) {
1253 if (dev != grt->rt6i_dev) {
1254 dst_release(&grt->u.dst);
1255 goto out;
1257 } else {
1258 dev = grt->rt6i_dev;
1259 idev = grt->rt6i_idev;
1260 dev_hold(dev);
1261 in6_dev_hold(grt->rt6i_idev);
1263 if (!(grt->rt6i_flags&RTF_GATEWAY))
1264 err = 0;
1265 dst_release(&grt->u.dst);
1267 if (err)
1268 goto out;
1270 err = -EINVAL;
1271 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1272 goto out;
1275 err = -ENODEV;
1276 if (dev == NULL)
1277 goto out;
1279 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1280 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1281 if (IS_ERR(rt->rt6i_nexthop)) {
1282 err = PTR_ERR(rt->rt6i_nexthop);
1283 rt->rt6i_nexthop = NULL;
1284 goto out;
1288 rt->rt6i_flags = cfg->fc_flags;
1290 install_route:
1291 if (cfg->fc_mx) {
1292 struct nlattr *nla;
1293 int remaining;
1295 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1296 int type = nla_type(nla);
1298 if (type) {
1299 if (type > RTAX_MAX) {
1300 err = -EINVAL;
1301 goto out;
1304 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1309 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1310 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1311 if (!dst_mtu(&rt->u.dst))
1312 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1313 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1314 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1315 rt->u.dst.dev = dev;
1316 rt->rt6i_idev = idev;
1317 rt->rt6i_table = table;
1319 cfg->fc_nlinfo.nl_net = dev_net(dev);
1321 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1323 out:
1324 if (dev)
1325 dev_put(dev);
1326 if (idev)
1327 in6_dev_put(idev);
1328 if (rt)
1329 dst_free(&rt->u.dst);
1330 return err;
1333 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1335 int err;
1336 struct fib6_table *table;
1337 struct net *net = dev_net(rt->rt6i_dev);
1339 if (rt == net->ipv6.ip6_null_entry)
1340 return -ENOENT;
1342 table = rt->rt6i_table;
1343 write_lock_bh(&table->tb6_lock);
1345 err = fib6_del(rt, info);
1346 dst_release(&rt->u.dst);
1348 write_unlock_bh(&table->tb6_lock);
1350 return err;
1353 int ip6_del_rt(struct rt6_info *rt)
1355 struct nl_info info = {
1356 .nl_net = dev_net(rt->rt6i_dev),
1358 return __ip6_del_rt(rt, &info);
1361 static int ip6_route_del(struct fib6_config *cfg)
1363 struct fib6_table *table;
1364 struct fib6_node *fn;
1365 struct rt6_info *rt;
1366 int err = -ESRCH;
1368 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1369 if (table == NULL)
1370 return err;
1372 read_lock_bh(&table->tb6_lock);
1374 fn = fib6_locate(&table->tb6_root,
1375 &cfg->fc_dst, cfg->fc_dst_len,
1376 &cfg->fc_src, cfg->fc_src_len);
1378 if (fn) {
1379 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1380 if (cfg->fc_ifindex &&
1381 (rt->rt6i_dev == NULL ||
1382 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1383 continue;
1384 if (cfg->fc_flags & RTF_GATEWAY &&
1385 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1386 continue;
1387 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1388 continue;
1389 dst_hold(&rt->u.dst);
1390 read_unlock_bh(&table->tb6_lock);
1392 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1395 read_unlock_bh(&table->tb6_lock);
1397 return err;
1401 * Handle redirects
1403 struct ip6rd_flowi {
1404 struct flowi fl;
1405 struct in6_addr gateway;
1408 static struct rt6_info *__ip6_route_redirect(struct net *net,
1409 struct fib6_table *table,
1410 struct flowi *fl,
1411 int flags)
1413 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1414 struct rt6_info *rt;
1415 struct fib6_node *fn;
1418 * Get the "current" route for this destination and
1419 * check if the redirect has come from approriate router.
1421 * RFC 2461 specifies that redirects should only be
1422 * accepted if they come from the nexthop to the target.
1423 * Due to the way the routes are chosen, this notion
1424 * is a bit fuzzy and one might need to check all possible
1425 * routes.
1428 read_lock_bh(&table->tb6_lock);
1429 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1430 restart:
1431 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1433 * Current route is on-link; redirect is always invalid.
1435 * Seems, previous statement is not true. It could
1436 * be node, which looks for us as on-link (f.e. proxy ndisc)
1437 * But then router serving it might decide, that we should
1438 * know truth 8)8) --ANK (980726).
1440 if (rt6_check_expired(rt))
1441 continue;
1442 if (!(rt->rt6i_flags & RTF_GATEWAY))
1443 continue;
1444 if (fl->oif != rt->rt6i_dev->ifindex)
1445 continue;
1446 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1447 continue;
1448 break;
1451 if (!rt)
1452 rt = net->ipv6.ip6_null_entry;
1453 BACKTRACK(net, &fl->fl6_src);
1454 out:
1455 dst_hold(&rt->u.dst);
1457 read_unlock_bh(&table->tb6_lock);
1459 return rt;
1462 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1463 struct in6_addr *src,
1464 struct in6_addr *gateway,
1465 struct net_device *dev)
1467 int flags = RT6_LOOKUP_F_HAS_SADDR;
1468 struct net *net = dev_net(dev);
1469 struct ip6rd_flowi rdfl = {
1470 .fl = {
1471 .oif = dev->ifindex,
1472 .nl_u = {
1473 .ip6_u = {
1474 .daddr = *dest,
1475 .saddr = *src,
1481 ipv6_addr_copy(&rdfl.gateway, gateway);
1483 if (rt6_need_strict(dest))
1484 flags |= RT6_LOOKUP_F_IFACE;
1486 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1487 flags, __ip6_route_redirect);
1490 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1491 struct in6_addr *saddr,
1492 struct neighbour *neigh, u8 *lladdr, int on_link)
1494 struct rt6_info *rt, *nrt = NULL;
1495 struct netevent_redirect netevent;
1496 struct net *net = dev_net(neigh->dev);
1498 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1500 if (rt == net->ipv6.ip6_null_entry) {
1501 if (net_ratelimit())
1502 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1503 "for redirect target\n");
1504 goto out;
1508 * We have finally decided to accept it.
1511 neigh_update(neigh, lladdr, NUD_STALE,
1512 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1513 NEIGH_UPDATE_F_OVERRIDE|
1514 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1515 NEIGH_UPDATE_F_ISROUTER))
1519 * Redirect received -> path was valid.
1520 * Look, redirects are sent only in response to data packets,
1521 * so that this nexthop apparently is reachable. --ANK
1523 dst_confirm(&rt->u.dst);
1525 /* Duplicate redirect: silently ignore. */
1526 if (neigh == rt->u.dst.neighbour)
1527 goto out;
1529 nrt = ip6_rt_copy(rt);
1530 if (nrt == NULL)
1531 goto out;
1533 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1534 if (on_link)
1535 nrt->rt6i_flags &= ~RTF_GATEWAY;
1537 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1538 nrt->rt6i_dst.plen = 128;
1539 nrt->u.dst.flags |= DST_HOST;
1541 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1542 nrt->rt6i_nexthop = neigh_clone(neigh);
1543 /* Reset pmtu, it may be better */
1544 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1545 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1546 dst_mtu(&nrt->u.dst));
1548 if (ip6_ins_rt(nrt))
1549 goto out;
1551 netevent.old = &rt->u.dst;
1552 netevent.new = &nrt->u.dst;
1553 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1555 if (rt->rt6i_flags&RTF_CACHE) {
1556 ip6_del_rt(rt);
1557 return;
1560 out:
1561 dst_release(&rt->u.dst);
1562 return;
1566 * Handle ICMP "packet too big" messages
1567 * i.e. Path MTU discovery
1570 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1571 struct net *net, u32 pmtu, int ifindex)
1573 struct rt6_info *rt, *nrt;
1574 int allfrag = 0;
1576 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1577 if (rt == NULL)
1578 return;
1580 if (pmtu >= dst_mtu(&rt->u.dst))
1581 goto out;
1583 if (pmtu < IPV6_MIN_MTU) {
1585 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1586 * MTU (1280) and a fragment header should always be included
1587 * after a node receiving Too Big message reporting PMTU is
1588 * less than the IPv6 Minimum Link MTU.
1590 pmtu = IPV6_MIN_MTU;
1591 allfrag = 1;
1594 /* New mtu received -> path was valid.
1595 They are sent only in response to data packets,
1596 so that this nexthop apparently is reachable. --ANK
1598 dst_confirm(&rt->u.dst);
1600 /* Host route. If it is static, it would be better
1601 not to override it, but add new one, so that
1602 when cache entry will expire old pmtu
1603 would return automatically.
1605 if (rt->rt6i_flags & RTF_CACHE) {
1606 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1607 if (allfrag)
1608 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1609 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1610 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1611 goto out;
1614 /* Network route.
1615 Two cases are possible:
1616 1. It is connected route. Action: COW
1617 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1619 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1620 nrt = rt6_alloc_cow(rt, daddr, saddr);
1621 else
1622 nrt = rt6_alloc_clone(rt, daddr);
1624 if (nrt) {
1625 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1626 if (allfrag)
1627 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1629 /* According to RFC 1981, detecting PMTU increase shouldn't be
1630 * happened within 5 mins, the recommended timer is 10 mins.
1631 * Here this route expiration time is set to ip6_rt_mtu_expires
1632 * which is 10 mins. After 10 mins the decreased pmtu is expired
1633 * and detecting PMTU increase will be automatically happened.
1635 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1636 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1638 ip6_ins_rt(nrt);
1640 out:
1641 dst_release(&rt->u.dst);
1644 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1645 struct net_device *dev, u32 pmtu)
1647 struct net *net = dev_net(dev);
1650 * RFC 1981 states that a node "MUST reduce the size of the packets it
1651 * is sending along the path" that caused the Packet Too Big message.
1652 * Since it's not possible in the general case to determine which
1653 * interface was used to send the original packet, we update the MTU
1654 * on the interface that will be used to send future packets. We also
1655 * update the MTU on the interface that received the Packet Too Big in
1656 * case the original packet was forced out that interface with
1657 * SO_BINDTODEVICE or similar. This is the next best thing to the
1658 * correct behaviour, which would be to update the MTU on all
1659 * interfaces.
1661 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1662 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1666 * Misc support functions
1669 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1671 struct net *net = dev_net(ort->rt6i_dev);
1672 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1674 if (rt) {
1675 rt->u.dst.input = ort->u.dst.input;
1676 rt->u.dst.output = ort->u.dst.output;
1678 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1679 rt->u.dst.error = ort->u.dst.error;
1680 rt->u.dst.dev = ort->u.dst.dev;
1681 if (rt->u.dst.dev)
1682 dev_hold(rt->u.dst.dev);
1683 rt->rt6i_idev = ort->rt6i_idev;
1684 if (rt->rt6i_idev)
1685 in6_dev_hold(rt->rt6i_idev);
1686 rt->u.dst.lastuse = jiffies;
1687 rt->rt6i_expires = 0;
1689 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1690 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1691 rt->rt6i_metric = 0;
1693 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1694 #ifdef CONFIG_IPV6_SUBTREES
1695 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1696 #endif
1697 rt->rt6i_table = ort->rt6i_table;
1699 return rt;
1702 #ifdef CONFIG_IPV6_ROUTE_INFO
1703 static struct rt6_info *rt6_get_route_info(struct net *net,
1704 struct in6_addr *prefix, int prefixlen,
1705 struct in6_addr *gwaddr, int ifindex)
1707 struct fib6_node *fn;
1708 struct rt6_info *rt = NULL;
1709 struct fib6_table *table;
1711 table = fib6_get_table(net, RT6_TABLE_INFO);
1712 if (table == NULL)
1713 return NULL;
1715 write_lock_bh(&table->tb6_lock);
1716 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1717 if (!fn)
1718 goto out;
1720 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1721 if (rt->rt6i_dev->ifindex != ifindex)
1722 continue;
1723 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1724 continue;
1725 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1726 continue;
1727 dst_hold(&rt->u.dst);
1728 break;
1730 out:
1731 write_unlock_bh(&table->tb6_lock);
1732 return rt;
1735 static struct rt6_info *rt6_add_route_info(struct net *net,
1736 struct in6_addr *prefix, int prefixlen,
1737 struct in6_addr *gwaddr, int ifindex,
1738 unsigned pref)
1740 struct fib6_config cfg = {
1741 .fc_table = RT6_TABLE_INFO,
1742 .fc_metric = IP6_RT_PRIO_USER,
1743 .fc_ifindex = ifindex,
1744 .fc_dst_len = prefixlen,
1745 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1746 RTF_UP | RTF_PREF(pref),
1747 .fc_nlinfo.pid = 0,
1748 .fc_nlinfo.nlh = NULL,
1749 .fc_nlinfo.nl_net = net,
1752 ipv6_addr_copy(&cfg.fc_dst, prefix);
1753 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1755 /* We should treat it as a default route if prefix length is 0. */
1756 if (!prefixlen)
1757 cfg.fc_flags |= RTF_DEFAULT;
1759 ip6_route_add(&cfg);
1761 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1763 #endif
1765 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1767 struct rt6_info *rt;
1768 struct fib6_table *table;
1770 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1771 if (table == NULL)
1772 return NULL;
1774 write_lock_bh(&table->tb6_lock);
1775 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1776 if (dev == rt->rt6i_dev &&
1777 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1778 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1779 break;
1781 if (rt)
1782 dst_hold(&rt->u.dst);
1783 write_unlock_bh(&table->tb6_lock);
1784 return rt;
1787 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1788 struct net_device *dev,
1789 unsigned int pref)
1791 struct fib6_config cfg = {
1792 .fc_table = RT6_TABLE_DFLT,
1793 .fc_metric = IP6_RT_PRIO_USER,
1794 .fc_ifindex = dev->ifindex,
1795 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1796 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1797 .fc_nlinfo.pid = 0,
1798 .fc_nlinfo.nlh = NULL,
1799 .fc_nlinfo.nl_net = dev_net(dev),
1802 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1804 ip6_route_add(&cfg);
1806 return rt6_get_dflt_router(gwaddr, dev);
1809 void rt6_purge_dflt_routers(struct net *net)
1811 struct rt6_info *rt;
1812 struct fib6_table *table;
1814 /* NOTE: Keep consistent with rt6_get_dflt_router */
1815 table = fib6_get_table(net, RT6_TABLE_DFLT);
1816 if (table == NULL)
1817 return;
1819 restart:
1820 read_lock_bh(&table->tb6_lock);
1821 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1822 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1823 dst_hold(&rt->u.dst);
1824 read_unlock_bh(&table->tb6_lock);
1825 ip6_del_rt(rt);
1826 goto restart;
1829 read_unlock_bh(&table->tb6_lock);
1832 static void rtmsg_to_fib6_config(struct net *net,
1833 struct in6_rtmsg *rtmsg,
1834 struct fib6_config *cfg)
1836 memset(cfg, 0, sizeof(*cfg));
1838 cfg->fc_table = RT6_TABLE_MAIN;
1839 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1840 cfg->fc_metric = rtmsg->rtmsg_metric;
1841 cfg->fc_expires = rtmsg->rtmsg_info;
1842 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1843 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1844 cfg->fc_flags = rtmsg->rtmsg_flags;
1846 cfg->fc_nlinfo.nl_net = net;
1848 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1849 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1850 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1853 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1855 struct fib6_config cfg;
1856 struct in6_rtmsg rtmsg;
1857 int err;
1859 switch(cmd) {
1860 case SIOCADDRT: /* Add a route */
1861 case SIOCDELRT: /* Delete a route */
1862 if (!capable(CAP_NET_ADMIN))
1863 return -EPERM;
1864 err = copy_from_user(&rtmsg, arg,
1865 sizeof(struct in6_rtmsg));
1866 if (err)
1867 return -EFAULT;
1869 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1871 rtnl_lock();
1872 switch (cmd) {
1873 case SIOCADDRT:
1874 err = ip6_route_add(&cfg);
1875 break;
1876 case SIOCDELRT:
1877 err = ip6_route_del(&cfg);
1878 break;
1879 default:
1880 err = -EINVAL;
1882 rtnl_unlock();
1884 return err;
1887 return -EINVAL;
1891 * Drop the packet on the floor
1894 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1896 int type;
1897 struct dst_entry *dst = skb_dst(skb);
1898 switch (ipstats_mib_noroutes) {
1899 case IPSTATS_MIB_INNOROUTES:
1900 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1901 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1902 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1903 IPSTATS_MIB_INADDRERRORS);
1904 break;
1906 /* FALLTHROUGH */
1907 case IPSTATS_MIB_OUTNOROUTES:
1908 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1909 ipstats_mib_noroutes);
1910 break;
1912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1913 kfree_skb(skb);
1914 return 0;
1917 static int ip6_pkt_discard(struct sk_buff *skb)
1919 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1922 static int ip6_pkt_discard_out(struct sk_buff *skb)
1924 skb->dev = skb_dst(skb)->dev;
1925 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1928 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1930 static int ip6_pkt_prohibit(struct sk_buff *skb)
1932 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1935 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1937 skb->dev = skb_dst(skb)->dev;
1938 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1941 #endif
1944 * Allocate a dst for local (unicast / anycast) address.
1947 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1948 const struct in6_addr *addr,
1949 int anycast)
1951 struct net *net = dev_net(idev->dev);
1952 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1953 struct neighbour *neigh;
1955 if (rt == NULL)
1956 return ERR_PTR(-ENOMEM);
1958 dev_hold(net->loopback_dev);
1959 in6_dev_hold(idev);
1961 rt->u.dst.flags = DST_HOST;
1962 rt->u.dst.input = ip6_input;
1963 rt->u.dst.output = ip6_output;
1964 rt->rt6i_dev = net->loopback_dev;
1965 rt->rt6i_idev = idev;
1966 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1967 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1968 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1969 rt->u.dst.obsolete = -1;
1971 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1972 if (anycast)
1973 rt->rt6i_flags |= RTF_ANYCAST;
1974 else
1975 rt->rt6i_flags |= RTF_LOCAL;
1976 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1977 if (IS_ERR(neigh)) {
1978 dst_free(&rt->u.dst);
1980 /* We are casting this because that is the return
1981 * value type. But an errno encoded pointer is the
1982 * same regardless of the underlying pointer type,
1983 * and that's what we are returning. So this is OK.
1985 return (struct rt6_info *) neigh;
1987 rt->rt6i_nexthop = neigh;
1989 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1990 rt->rt6i_dst.plen = 128;
1991 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1993 atomic_set(&rt->u.dst.__refcnt, 1);
1995 return rt;
1998 struct arg_dev_net {
1999 struct net_device *dev;
2000 struct net *net;
2003 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2005 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2006 struct net *net = ((struct arg_dev_net *)arg)->net;
2008 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2009 rt != net->ipv6.ip6_null_entry) {
2010 RT6_TRACE("deleted by ifdown %p\n", rt);
2011 return -1;
2013 return 0;
2016 void rt6_ifdown(struct net *net, struct net_device *dev)
2018 struct arg_dev_net adn = {
2019 .dev = dev,
2020 .net = net,
2023 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2024 icmp6_clean_all(fib6_ifdown, &adn);
2027 struct rt6_mtu_change_arg
2029 struct net_device *dev;
2030 unsigned mtu;
2033 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2035 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2036 struct inet6_dev *idev;
2037 struct net *net = dev_net(arg->dev);
2039 /* In IPv6 pmtu discovery is not optional,
2040 so that RTAX_MTU lock cannot disable it.
2041 We still use this lock to block changes
2042 caused by addrconf/ndisc.
2045 idev = __in6_dev_get(arg->dev);
2046 if (idev == NULL)
2047 return 0;
2049 /* For administrative MTU increase, there is no way to discover
2050 IPv6 PMTU increase, so PMTU increase should be updated here.
2051 Since RFC 1981 doesn't include administrative MTU increase
2052 update PMTU increase is a MUST. (i.e. jumbo frame)
2055 If new MTU is less than route PMTU, this new MTU will be the
2056 lowest MTU in the path, update the route PMTU to reflect PMTU
2057 decreases; if new MTU is greater than route PMTU, and the
2058 old MTU is the lowest MTU in the path, update the route PMTU
2059 to reflect the increase. In this case if the other nodes' MTU
2060 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2061 PMTU discouvery.
2063 if (rt->rt6i_dev == arg->dev &&
2064 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
2065 (dst_mtu(&rt->u.dst) >= arg->mtu ||
2066 (dst_mtu(&rt->u.dst) < arg->mtu &&
2067 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
2068 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
2069 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2071 return 0;
2074 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2076 struct rt6_mtu_change_arg arg = {
2077 .dev = dev,
2078 .mtu = mtu,
2081 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2084 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2085 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2086 [RTA_OIF] = { .type = NLA_U32 },
2087 [RTA_IIF] = { .type = NLA_U32 },
2088 [RTA_PRIORITY] = { .type = NLA_U32 },
2089 [RTA_METRICS] = { .type = NLA_NESTED },
2092 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2093 struct fib6_config *cfg)
2095 struct rtmsg *rtm;
2096 struct nlattr *tb[RTA_MAX+1];
2097 int err;
2099 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2100 if (err < 0)
2101 goto errout;
2103 err = -EINVAL;
2104 rtm = nlmsg_data(nlh);
2105 memset(cfg, 0, sizeof(*cfg));
2107 cfg->fc_table = rtm->rtm_table;
2108 cfg->fc_dst_len = rtm->rtm_dst_len;
2109 cfg->fc_src_len = rtm->rtm_src_len;
2110 cfg->fc_flags = RTF_UP;
2111 cfg->fc_protocol = rtm->rtm_protocol;
2113 if (rtm->rtm_type == RTN_UNREACHABLE)
2114 cfg->fc_flags |= RTF_REJECT;
2116 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2117 cfg->fc_nlinfo.nlh = nlh;
2118 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2120 if (tb[RTA_GATEWAY]) {
2121 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2122 cfg->fc_flags |= RTF_GATEWAY;
2125 if (tb[RTA_DST]) {
2126 int plen = (rtm->rtm_dst_len + 7) >> 3;
2128 if (nla_len(tb[RTA_DST]) < plen)
2129 goto errout;
2131 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2134 if (tb[RTA_SRC]) {
2135 int plen = (rtm->rtm_src_len + 7) >> 3;
2137 if (nla_len(tb[RTA_SRC]) < plen)
2138 goto errout;
2140 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2143 if (tb[RTA_OIF])
2144 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2146 if (tb[RTA_PRIORITY])
2147 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2149 if (tb[RTA_METRICS]) {
2150 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2151 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2154 if (tb[RTA_TABLE])
2155 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2157 err = 0;
2158 errout:
2159 return err;
2162 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2164 struct fib6_config cfg;
2165 int err;
2167 err = rtm_to_fib6_config(skb, nlh, &cfg);
2168 if (err < 0)
2169 return err;
2171 return ip6_route_del(&cfg);
2174 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2176 struct fib6_config cfg;
2177 int err;
2179 err = rtm_to_fib6_config(skb, nlh, &cfg);
2180 if (err < 0)
2181 return err;
2183 return ip6_route_add(&cfg);
2186 static inline size_t rt6_nlmsg_size(void)
2188 return NLMSG_ALIGN(sizeof(struct rtmsg))
2189 + nla_total_size(16) /* RTA_SRC */
2190 + nla_total_size(16) /* RTA_DST */
2191 + nla_total_size(16) /* RTA_GATEWAY */
2192 + nla_total_size(16) /* RTA_PREFSRC */
2193 + nla_total_size(4) /* RTA_TABLE */
2194 + nla_total_size(4) /* RTA_IIF */
2195 + nla_total_size(4) /* RTA_OIF */
2196 + nla_total_size(4) /* RTA_PRIORITY */
2197 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2198 + nla_total_size(sizeof(struct rta_cacheinfo));
2201 static int rt6_fill_node(struct net *net,
2202 struct sk_buff *skb, struct rt6_info *rt,
2203 struct in6_addr *dst, struct in6_addr *src,
2204 int iif, int type, u32 pid, u32 seq,
2205 int prefix, int nowait, unsigned int flags)
2207 struct rtmsg *rtm;
2208 struct nlmsghdr *nlh;
2209 long expires;
2210 u32 table;
2212 if (prefix) { /* user wants prefix routes only */
2213 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2214 /* success since this is not a prefix route */
2215 return 1;
2219 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2220 if (nlh == NULL)
2221 return -EMSGSIZE;
2223 rtm = nlmsg_data(nlh);
2224 rtm->rtm_family = AF_INET6;
2225 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2226 rtm->rtm_src_len = rt->rt6i_src.plen;
2227 rtm->rtm_tos = 0;
2228 if (rt->rt6i_table)
2229 table = rt->rt6i_table->tb6_id;
2230 else
2231 table = RT6_TABLE_UNSPEC;
2232 rtm->rtm_table = table;
2233 NLA_PUT_U32(skb, RTA_TABLE, table);
2234 if (rt->rt6i_flags&RTF_REJECT)
2235 rtm->rtm_type = RTN_UNREACHABLE;
2236 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2237 rtm->rtm_type = RTN_LOCAL;
2238 else
2239 rtm->rtm_type = RTN_UNICAST;
2240 rtm->rtm_flags = 0;
2241 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2242 rtm->rtm_protocol = rt->rt6i_protocol;
2243 if (rt->rt6i_flags&RTF_DYNAMIC)
2244 rtm->rtm_protocol = RTPROT_REDIRECT;
2245 else if (rt->rt6i_flags & RTF_ADDRCONF)
2246 rtm->rtm_protocol = RTPROT_KERNEL;
2247 else if (rt->rt6i_flags&RTF_DEFAULT)
2248 rtm->rtm_protocol = RTPROT_RA;
2250 if (rt->rt6i_flags&RTF_CACHE)
2251 rtm->rtm_flags |= RTM_F_CLONED;
2253 if (dst) {
2254 NLA_PUT(skb, RTA_DST, 16, dst);
2255 rtm->rtm_dst_len = 128;
2256 } else if (rtm->rtm_dst_len)
2257 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2258 #ifdef CONFIG_IPV6_SUBTREES
2259 if (src) {
2260 NLA_PUT(skb, RTA_SRC, 16, src);
2261 rtm->rtm_src_len = 128;
2262 } else if (rtm->rtm_src_len)
2263 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2264 #endif
2265 if (iif) {
2266 #ifdef CONFIG_IPV6_MROUTE
2267 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2268 int err = ip6mr_get_route(net, skb, rtm, nowait);
2269 if (err <= 0) {
2270 if (!nowait) {
2271 if (err == 0)
2272 return 0;
2273 goto nla_put_failure;
2274 } else {
2275 if (err == -EMSGSIZE)
2276 goto nla_put_failure;
2279 } else
2280 #endif
2281 NLA_PUT_U32(skb, RTA_IIF, iif);
2282 } else if (dst) {
2283 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
2284 struct in6_addr saddr_buf;
2285 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2286 dst, 0, &saddr_buf) == 0)
2287 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2290 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2291 goto nla_put_failure;
2293 if (rt->u.dst.neighbour)
2294 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2296 if (rt->u.dst.dev)
2297 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2299 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2301 if (!(rt->rt6i_flags & RTF_EXPIRES))
2302 expires = 0;
2303 else if (rt->rt6i_expires - jiffies < INT_MAX)
2304 expires = rt->rt6i_expires - jiffies;
2305 else
2306 expires = INT_MAX;
2308 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2309 expires, rt->u.dst.error) < 0)
2310 goto nla_put_failure;
2312 return nlmsg_end(skb, nlh);
2314 nla_put_failure:
2315 nlmsg_cancel(skb, nlh);
2316 return -EMSGSIZE;
2319 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2321 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2322 int prefix;
2324 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2325 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2326 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2327 } else
2328 prefix = 0;
2330 return rt6_fill_node(arg->net,
2331 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2332 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2333 prefix, 0, NLM_F_MULTI);
2336 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2338 struct net *net = sock_net(in_skb->sk);
2339 struct nlattr *tb[RTA_MAX+1];
2340 struct rt6_info *rt;
2341 struct sk_buff *skb;
2342 struct rtmsg *rtm;
2343 struct flowi fl;
2344 int err, iif = 0;
2346 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2347 if (err < 0)
2348 goto errout;
2350 err = -EINVAL;
2351 memset(&fl, 0, sizeof(fl));
2353 if (tb[RTA_SRC]) {
2354 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2355 goto errout;
2357 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2360 if (tb[RTA_DST]) {
2361 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2362 goto errout;
2364 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2367 if (tb[RTA_IIF])
2368 iif = nla_get_u32(tb[RTA_IIF]);
2370 if (tb[RTA_OIF])
2371 fl.oif = nla_get_u32(tb[RTA_OIF]);
2373 if (iif) {
2374 struct net_device *dev;
2375 dev = __dev_get_by_index(net, iif);
2376 if (!dev) {
2377 err = -ENODEV;
2378 goto errout;
2382 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2383 if (skb == NULL) {
2384 err = -ENOBUFS;
2385 goto errout;
2388 /* Reserve room for dummy headers, this skb can pass
2389 through good chunk of routing engine.
2391 skb_reset_mac_header(skb);
2392 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2394 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2395 skb_dst_set(skb, &rt->u.dst);
2397 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2398 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2399 nlh->nlmsg_seq, 0, 0, 0);
2400 if (err < 0) {
2401 kfree_skb(skb);
2402 goto errout;
2405 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2406 errout:
2407 return err;
2410 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2412 struct sk_buff *skb;
2413 struct net *net = info->nl_net;
2414 u32 seq;
2415 int err;
2417 err = -ENOBUFS;
2418 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2420 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2421 if (skb == NULL)
2422 goto errout;
2424 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2425 event, info->pid, seq, 0, 0, 0);
2426 if (err < 0) {
2427 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2428 WARN_ON(err == -EMSGSIZE);
2429 kfree_skb(skb);
2430 goto errout;
2432 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2433 info->nlh, gfp_any());
2434 return;
2435 errout:
2436 if (err < 0)
2437 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2440 static int ip6_route_dev_notify(struct notifier_block *this,
2441 unsigned long event, void *data)
2443 struct net_device *dev = (struct net_device *)data;
2444 struct net *net = dev_net(dev);
2446 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2447 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2448 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2449 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2450 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2451 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2452 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2453 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2454 #endif
2457 return NOTIFY_OK;
2461 * /proc
2464 #ifdef CONFIG_PROC_FS
2466 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2468 struct rt6_proc_arg
2470 char *buffer;
2471 int offset;
2472 int length;
2473 int skip;
2474 int len;
2477 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2479 struct seq_file *m = p_arg;
2481 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2483 #ifdef CONFIG_IPV6_SUBTREES
2484 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2485 #else
2486 seq_puts(m, "00000000000000000000000000000000 00 ");
2487 #endif
2489 if (rt->rt6i_nexthop) {
2490 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2491 } else {
2492 seq_puts(m, "00000000000000000000000000000000");
2494 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2495 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2496 rt->u.dst.__use, rt->rt6i_flags,
2497 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2498 return 0;
2501 static int ipv6_route_show(struct seq_file *m, void *v)
2503 struct net *net = (struct net *)m->private;
2504 fib6_clean_all(net, rt6_info_route, 0, m);
2505 return 0;
2508 static int ipv6_route_open(struct inode *inode, struct file *file)
2510 return single_open_net(inode, file, ipv6_route_show);
2513 static const struct file_operations ipv6_route_proc_fops = {
2514 .owner = THIS_MODULE,
2515 .open = ipv6_route_open,
2516 .read = seq_read,
2517 .llseek = seq_lseek,
2518 .release = single_release_net,
2521 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2523 struct net *net = (struct net *)seq->private;
2524 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2525 net->ipv6.rt6_stats->fib_nodes,
2526 net->ipv6.rt6_stats->fib_route_nodes,
2527 net->ipv6.rt6_stats->fib_rt_alloc,
2528 net->ipv6.rt6_stats->fib_rt_entries,
2529 net->ipv6.rt6_stats->fib_rt_cache,
2530 atomic_read(&net->ipv6.ip6_dst_ops.entries),
2531 net->ipv6.rt6_stats->fib_discarded_routes);
2533 return 0;
2536 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2538 return single_open_net(inode, file, rt6_stats_seq_show);
2541 static const struct file_operations rt6_stats_seq_fops = {
2542 .owner = THIS_MODULE,
2543 .open = rt6_stats_seq_open,
2544 .read = seq_read,
2545 .llseek = seq_lseek,
2546 .release = single_release_net,
2548 #endif /* CONFIG_PROC_FS */
2550 #ifdef CONFIG_SYSCTL
2552 static
2553 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2554 void __user *buffer, size_t *lenp, loff_t *ppos)
2556 struct net *net = current->nsproxy->net_ns;
2557 int delay = net->ipv6.sysctl.flush_delay;
2558 if (write) {
2559 proc_dointvec(ctl, write, buffer, lenp, ppos);
2560 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2561 return 0;
2562 } else
2563 return -EINVAL;
2566 ctl_table ipv6_route_table_template[] = {
2568 .procname = "flush",
2569 .data = &init_net.ipv6.sysctl.flush_delay,
2570 .maxlen = sizeof(int),
2571 .mode = 0200,
2572 .proc_handler = ipv6_sysctl_rtcache_flush
2575 .procname = "gc_thresh",
2576 .data = &ip6_dst_ops_template.gc_thresh,
2577 .maxlen = sizeof(int),
2578 .mode = 0644,
2579 .proc_handler = proc_dointvec,
2582 .procname = "max_size",
2583 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
2586 .proc_handler = proc_dointvec,
2589 .procname = "gc_min_interval",
2590 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
2593 .proc_handler = proc_dointvec_jiffies,
2596 .procname = "gc_timeout",
2597 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
2600 .proc_handler = proc_dointvec_jiffies,
2603 .procname = "gc_interval",
2604 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2605 .maxlen = sizeof(int),
2606 .mode = 0644,
2607 .proc_handler = proc_dointvec_jiffies,
2610 .procname = "gc_elasticity",
2611 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2612 .maxlen = sizeof(int),
2613 .mode = 0644,
2614 .proc_handler = proc_dointvec_jiffies,
2617 .procname = "mtu_expires",
2618 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2619 .maxlen = sizeof(int),
2620 .mode = 0644,
2621 .proc_handler = proc_dointvec_jiffies,
2624 .procname = "min_adv_mss",
2625 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2626 .maxlen = sizeof(int),
2627 .mode = 0644,
2628 .proc_handler = proc_dointvec_jiffies,
2631 .procname = "gc_min_interval_ms",
2632 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2633 .maxlen = sizeof(int),
2634 .mode = 0644,
2635 .proc_handler = proc_dointvec_ms_jiffies,
2640 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2642 struct ctl_table *table;
2644 table = kmemdup(ipv6_route_table_template,
2645 sizeof(ipv6_route_table_template),
2646 GFP_KERNEL);
2648 if (table) {
2649 table[0].data = &net->ipv6.sysctl.flush_delay;
2650 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2651 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2652 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2653 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2654 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2655 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2656 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2657 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2658 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2661 return table;
2663 #endif
2665 static int ip6_route_net_init(struct net *net)
2667 int ret = -ENOMEM;
2669 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2670 sizeof(net->ipv6.ip6_dst_ops));
2672 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2673 sizeof(*net->ipv6.ip6_null_entry),
2674 GFP_KERNEL);
2675 if (!net->ipv6.ip6_null_entry)
2676 goto out_ip6_dst_ops;
2677 net->ipv6.ip6_null_entry->u.dst.path =
2678 (struct dst_entry *)net->ipv6.ip6_null_entry;
2679 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2681 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2682 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2683 sizeof(*net->ipv6.ip6_prohibit_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_prohibit_entry)
2686 goto out_ip6_null_entry;
2687 net->ipv6.ip6_prohibit_entry->u.dst.path =
2688 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2689 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2691 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2692 sizeof(*net->ipv6.ip6_blk_hole_entry),
2693 GFP_KERNEL);
2694 if (!net->ipv6.ip6_blk_hole_entry)
2695 goto out_ip6_prohibit_entry;
2696 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2697 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2698 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2699 #endif
2701 net->ipv6.sysctl.flush_delay = 0;
2702 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2703 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2704 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2705 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2706 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2707 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2708 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2710 #ifdef CONFIG_PROC_FS
2711 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2712 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2713 #endif
2714 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2716 ret = 0;
2717 out:
2718 return ret;
2720 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2721 out_ip6_prohibit_entry:
2722 kfree(net->ipv6.ip6_prohibit_entry);
2723 out_ip6_null_entry:
2724 kfree(net->ipv6.ip6_null_entry);
2725 #endif
2726 out_ip6_dst_ops:
2727 goto out;
2730 static void ip6_route_net_exit(struct net *net)
2732 #ifdef CONFIG_PROC_FS
2733 proc_net_remove(net, "ipv6_route");
2734 proc_net_remove(net, "rt6_stats");
2735 #endif
2736 kfree(net->ipv6.ip6_null_entry);
2737 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2738 kfree(net->ipv6.ip6_prohibit_entry);
2739 kfree(net->ipv6.ip6_blk_hole_entry);
2740 #endif
2743 static struct pernet_operations ip6_route_net_ops = {
2744 .init = ip6_route_net_init,
2745 .exit = ip6_route_net_exit,
2748 static struct notifier_block ip6_route_dev_notifier = {
2749 .notifier_call = ip6_route_dev_notify,
2750 .priority = 0,
2753 int __init ip6_route_init(void)
2755 int ret;
2757 ret = -ENOMEM;
2758 ip6_dst_ops_template.kmem_cachep =
2759 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2760 SLAB_HWCACHE_ALIGN, NULL);
2761 if (!ip6_dst_ops_template.kmem_cachep)
2762 goto out;
2764 ret = register_pernet_subsys(&ip6_route_net_ops);
2765 if (ret)
2766 goto out_kmem_cache;
2768 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2770 /* Registering of the loopback is done before this portion of code,
2771 * the loopback reference in rt6_info will not be taken, do it
2772 * manually for init_net */
2773 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2774 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2775 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2776 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2777 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2778 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2779 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2780 #endif
2781 ret = fib6_init();
2782 if (ret)
2783 goto out_register_subsys;
2785 ret = xfrm6_init();
2786 if (ret)
2787 goto out_fib6_init;
2789 ret = fib6_rules_init();
2790 if (ret)
2791 goto xfrm6_init;
2793 ret = -ENOBUFS;
2794 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2795 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2796 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2797 goto fib6_rules_init;
2799 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2800 if (ret)
2801 goto fib6_rules_init;
2803 out:
2804 return ret;
2806 fib6_rules_init:
2807 fib6_rules_cleanup();
2808 xfrm6_init:
2809 xfrm6_fini();
2810 out_fib6_init:
2811 fib6_gc_cleanup();
2812 out_register_subsys:
2813 unregister_pernet_subsys(&ip6_route_net_ops);
2814 out_kmem_cache:
2815 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2816 goto out;
2819 void ip6_route_cleanup(void)
2821 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2822 fib6_rules_cleanup();
2823 xfrm6_fini();
2824 fib6_gc_cleanup();
2825 unregister_pernet_subsys(&ip6_route_net_ops);
2826 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);