thinkpad-acpi: don't fail to load the entire module due to ALSA problems
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / route.c
blobe307517cbddcfa3767f7777b0e69aa55973119e8
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
57 #include <asm/uaccess.h>
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
74 #define CLONE_OFFLINK_ROUTE 0
76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79 static void ip6_dst_destroy(struct dst_entry *);
80 static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
82 static int ip6_dst_gc(struct dst_ops *ops);
84 static int ip6_pkt_discard(struct sk_buff *skb);
85 static int ip6_pkt_discard_out(struct sk_buff *skb);
86 static void ip6_link_failure(struct sk_buff *skb);
87 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 #ifdef CONFIG_IPV6_ROUTE_INFO
90 static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
94 static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
96 struct in6_addr *gwaddr, int ifindex);
97 #endif
99 static struct dst_ops ip6_dst_ops_template = {
100 .family = AF_INET6,
101 .protocol = cpu_to_be16(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
110 .local_out = __ip6_local_out,
111 .entries = ATOMIC_INIT(0),
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entries = ATOMIC_INIT(0),
127 static struct rt6_info ip6_null_entry_template = {
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_protocol = RTPROT_KERNEL,
141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
147 static int ip6_pkt_prohibit(struct sk_buff *skb);
148 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
150 static struct rt6_info ip6_prohibit_entry_template = {
151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
163 .rt6i_protocol = RTPROT_KERNEL,
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
168 static struct rt6_info ip6_blk_hole_entry_template = {
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
176 .input = dst_discard,
177 .output = dst_discard,
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_protocol = RTPROT_KERNEL,
182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
186 #endif
188 /* allocate dst with ip6_dst_ops */
189 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
191 return (struct rt6_info *)dst_alloc(ops);
194 static void ip6_dst_destroy(struct dst_entry *dst)
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
205 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
210 struct net_device *loopback_dev =
211 dev_net(dev)->loopback_dev;
213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
223 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
229 static inline int rt6_need_strict(struct in6_addr *daddr)
231 return (ipv6_addr_type(daddr) &
232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
236 * Route lookup. Any table->tb6_lock is implied.
239 static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
241 struct in6_addr *saddr,
242 int oif,
243 int flags)
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
254 if (oif) {
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (flags & RT6_LOOKUP_F_IFACE && oif)
261 continue;
262 if (local && (!oif ||
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
266 local = sprt;
268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
275 if (oif) {
276 if (local)
277 return local;
279 if (flags & RT6_LOOKUP_F_IFACE)
280 return net->ipv6.ip6_null_entry;
282 out:
283 return rt;
286 #ifdef CONFIG_IPV6_ROUTER_PREF
287 static void rt6_probe(struct rt6_info *rt)
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
303 struct in6_addr mcaddr;
304 struct in6_addr *target;
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
315 #else
316 static inline void rt6_probe(struct rt6_info *rt)
318 return;
320 #endif
323 * Default Router Selection (RFC 2461 6.3.6)
325 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
327 struct net_device *dev = rt->rt6i_dev;
328 if (!oif || dev->ifindex == oif)
329 return 2;
330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
336 static inline int rt6_check_neigh(struct rt6_info *rt)
338 struct neighbour *neigh = rt->rt6i_nexthop;
339 int m;
340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
346 m = 2;
347 #ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350 #endif
351 else
352 m = 1;
353 read_unlock_bh(&neigh->lock);
354 } else
355 m = 0;
356 return m;
359 static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
362 int m, n;
364 m = rt6_check_dev(rt, oif);
365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
366 return -1;
367 #ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369 #endif
370 n = rt6_check_neigh(rt);
371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
372 return -1;
373 return m;
376 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
379 int m;
381 if (rt6_check_expired(rt))
382 goto out;
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
397 out:
398 return match;
401 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
405 struct rt6_info *rt, *match;
406 int mpri = -1;
408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
416 return match;
419 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
421 struct rt6_info *match, *rt0;
422 struct net *net;
424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
425 __func__, fn->leaf, oif);
427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
433 if (!match &&
434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
437 /* no entries matched; do round-robin */
438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
441 if (next != rt0)
442 fn->rr_ptr = next;
445 RT6_TRACE("%s() => %p\n",
446 __func__, match);
448 net = dev_net(rt0->rt6i_dev);
449 return (match ? match : net->ipv6.ip6_null_entry);
452 #ifdef CONFIG_IPV6_ROUTE_INFO
453 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
456 struct net *net = dev_net(dev);
457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
460 unsigned long lifetime;
461 struct rt6_info *rt;
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
484 return -EINVAL;
486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
501 if (rt && !lifetime) {
502 ip6_del_rt(rt);
503 rt = NULL;
506 if (!rt && lifetime)
507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
513 if (rt) {
514 if (!addrconf_finite_timeout(lifetime)) {
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
520 dst_release(&rt->u.dst);
522 return 0;
524 #endif
526 #define BACKTRACK(__net, saddr) \
527 do { \
528 if (rt == __net->ipv6.ip6_null_entry) { \
529 struct fib6_node *pn; \
530 while (1) { \
531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
542 } while(0)
544 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
546 struct flowi *fl, int flags)
548 struct fib6_node *fn;
549 struct rt6_info *rt;
551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553 restart:
554 rt = fn->leaf;
555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
556 BACKTRACK(net, &fl->fl6_src);
557 out:
558 dst_use(&rt->u.dst, jiffies);
559 read_unlock_bh(&table->tb6_lock);
560 return rt;
564 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
575 struct dst_entry *dst;
576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
587 dst_release(dst);
589 return NULL;
592 EXPORT_SYMBOL(rt6_lookup);
594 /* ip6_ins_rt is called with FREE table->tb6_lock.
595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
600 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
602 int err;
603 struct fib6_table *table;
605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
607 err = fib6_add(&table->tb6_root, rt, info);
608 write_unlock_bh(&table->tb6_lock);
610 return err;
613 int ip6_ins_rt(struct rt6_info *rt)
615 struct nl_info info = {
616 .nl_net = dev_net(rt->rt6i_dev),
618 return __ip6_ins_rt(rt, &info);
621 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
624 struct rt6_info *rt;
627 * Clone the route.
630 rt = ip6_rt_copy(ort);
632 if (rt) {
633 struct neighbour *neigh;
634 int attempts = !in_softirq();
636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
648 #ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
653 #endif
655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
683 rt->rt6i_nexthop = neigh;
687 return rt;
690 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
700 return rt;
703 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
706 struct fib6_node *fn;
707 struct rt6_info *rt, *nrt;
708 int strict = 0;
709 int attempts = 3;
710 int err;
711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
713 strict |= flags & RT6_LOOKUP_F_IFACE;
715 relookup:
716 read_lock_bh(&table->tb6_lock);
718 restart_2:
719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
721 restart:
722 rt = rt6_select(fn, oif, strict | reachable);
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
726 rt->rt6i_flags & RTF_CACHE)
727 goto out;
729 dst_hold(&rt->u.dst);
730 read_unlock_bh(&table->tb6_lock);
732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
734 else {
735 #if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737 #else
738 goto out2;
739 #endif
742 dst_release(&rt->u.dst);
743 rt = nrt ? : net->ipv6.ip6_null_entry;
745 dst_hold(&rt->u.dst);
746 if (nrt) {
747 err = ip6_ins_rt(nrt);
748 if (!err)
749 goto out2;
752 if (--attempts <= 0)
753 goto out2;
756 * Race condition! In the gap, when table->tb6_lock was
757 * released someone could insert this route. Relookup.
759 dst_release(&rt->u.dst);
760 goto relookup;
762 out:
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
767 dst_hold(&rt->u.dst);
768 read_unlock_bh(&table->tb6_lock);
769 out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
773 return rt;
776 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
777 struct flowi *fl, int flags)
779 return ip6_pol_route(net, table, fl->iif, fl, flags);
782 void ip6_route_input(struct sk_buff *skb)
784 struct ipv6hdr *iph = ipv6_hdr(skb);
785 struct net *net = dev_net(skb->dev);
786 int flags = RT6_LOOKUP_F_HAS_SADDR;
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
796 .mark = skb->mark,
797 .proto = iph->nexthdr,
800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
801 flags |= RT6_LOOKUP_F_IFACE;
803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
806 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
807 struct flowi *fl, int flags)
809 return ip6_pol_route(net, table, fl->oif, fl, flags);
812 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
815 int flags = 0;
817 if (rt6_need_strict(&fl->fl6_dst))
818 flags |= RT6_LOOKUP_F_IFACE;
820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
835 EXPORT_SYMBOL(ip6_route_output);
837 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
844 if (rt) {
845 new = &rt->u.dst;
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
849 new->input = dst_discard;
850 new->output = dst_discard;
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866 #ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868 #endif
870 dst_free(new);
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
877 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
880 * Destination cache support functions
883 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
885 struct rt6_info *rt;
887 rt = (struct rt6_info *) dst;
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
892 return NULL;
895 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
897 struct rt6_info *rt = (struct rt6_info *) dst;
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE)
901 ip6_del_rt(rt);
902 else
903 dst_release(dst);
905 return NULL;
908 static void ip6_link_failure(struct sk_buff *skb)
910 struct rt6_info *rt;
912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
914 rt = (struct rt6_info *) skb_dst(skb);
915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
917 dst_set_expires(&rt->u.dst, 0);
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
924 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
934 dst->metrics[RTAX_MTU-1] = mtu;
935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
939 static int ipv6_get_mtu(struct net_device *dev);
941 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
952 * rely only on pmtu discovery"
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
959 static struct dst_entry *icmp6_dst_gc_list;
960 static DEFINE_SPINLOCK(icmp6_dst_lock);
962 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
963 struct neighbour *neigh,
964 const struct in6_addr *addr)
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
968 struct net *net = dev_net(dev);
970 if (unlikely(idev == NULL))
971 return NULL;
973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
982 else {
983 neigh = ndisc_get_neigh(dev, addr);
984 if (IS_ERR(neigh))
985 neigh = NULL;
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
991 atomic_set(&rt->u.dst.__refcnt, 1);
992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
995 rt->u.dst.output = ip6_output;
997 #if 0 /* there's no chance to use these for ndisc */
998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
999 ? DST_HOST
1000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003 #endif
1005 spin_lock_bh(&icmp6_dst_lock);
1006 rt->u.dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->u.dst;
1008 spin_unlock_bh(&icmp6_dst_lock);
1010 fib6_force_start_gc(net);
1012 out:
1013 return &rt->u.dst;
1016 int icmp6_dst_gc(void)
1018 struct dst_entry *dst, *next, **pprev;
1019 int more = 0;
1021 next = NULL;
1023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
1026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
1030 } else {
1031 pprev = &dst->next;
1032 ++more;
1036 spin_unlock_bh(&icmp6_dst_lock);
1038 return more;
1041 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1044 struct dst_entry *dst, **pprev;
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1057 spin_unlock_bh(&icmp6_dst_lock);
1060 static int ip6_dst_gc(struct dst_ops *ops)
1062 unsigned long now = jiffies;
1063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1070 if (time_after(rt_last_gc + rt_min_interval, now) &&
1071 atomic_read(&ops->entries) <= rt_max_size)
1072 goto out;
1074 net->ipv6.ip6_rt_gc_expire++;
1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076 net->ipv6.ip6_rt_last_gc = now;
1077 if (atomic_read(&ops->entries) < ops->gc_thresh)
1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1079 out:
1080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081 return (atomic_read(&ops->entries) > rt_max_size);
1084 /* Clean host part of a prefix. Not necessary in radix tree,
1085 but results in cleaner routing tables.
1087 Remove it only when all the things will work!
1090 static int ipv6_get_mtu(struct net_device *dev)
1092 int mtu = IPV6_MIN_MTU;
1093 struct inet6_dev *idev;
1095 idev = in6_dev_get(dev);
1096 if (idev) {
1097 mtu = idev->cnf.mtu6;
1098 in6_dev_put(idev);
1100 return mtu;
1103 int ip6_dst_hoplimit(struct dst_entry *dst)
1105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106 if (hoplimit < 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 if (idev) {
1110 hoplimit = idev->cnf.hop_limit;
1111 in6_dev_put(idev);
1112 } else
1113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1115 return hoplimit;
1122 int ip6_route_add(struct fib6_config *cfg)
1124 int err;
1125 struct net *net = cfg->fc_nlinfo.nl_net;
1126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
1129 struct fib6_table *table;
1130 int addr_type;
1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1133 return -EINVAL;
1134 #ifndef CONFIG_IPV6_SUBTREES
1135 if (cfg->fc_src_len)
1136 return -EINVAL;
1137 #endif
1138 if (cfg->fc_ifindex) {
1139 err = -ENODEV;
1140 dev = dev_get_by_index(net, cfg->fc_ifindex);
1141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
1151 table = fib6_new_table(net, cfg->fc_table);
1152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1164 rt->u.dst.obsolete = -1;
1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
1175 if (addr_type & IPV6_ADDR_MULTICAST)
1176 rt->u.dst.input = ip6_mc_input;
1177 else
1178 rt->u.dst.input = ip6_forward;
1180 rt->u.dst.output = ip6_output;
1182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183 rt->rt6i_dst.plen = cfg->fc_dst_len;
1184 if (rt->rt6i_dst.plen == 128)
1185 rt->u.dst.flags = DST_HOST;
1187 #ifdef CONFIG_IPV6_SUBTREES
1188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189 rt->rt6i_src.plen = cfg->fc_src_len;
1190 #endif
1192 rt->rt6i_metric = cfg->fc_metric;
1194 /* We cannot add true routes via loopback here,
1195 they would result in kernel looping; promote them to reject routes
1197 if ((cfg->fc_flags & RTF_REJECT) ||
1198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199 /* hold loopback dev/idev if we haven't done so. */
1200 if (dev != net->loopback_dev) {
1201 if (dev) {
1202 dev_put(dev);
1203 in6_dev_put(idev);
1205 dev = net->loopback_dev;
1206 dev_hold(dev);
1207 idev = in6_dev_get(dev);
1208 if (!idev) {
1209 err = -ENODEV;
1210 goto out;
1213 rt->u.dst.output = ip6_pkt_discard_out;
1214 rt->u.dst.input = ip6_pkt_discard;
1215 rt->u.dst.error = -ENETUNREACH;
1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217 goto install_route;
1220 if (cfg->fc_flags & RTF_GATEWAY) {
1221 struct in6_addr *gw_addr;
1222 int gwa_type;
1224 gw_addr = &cfg->fc_gateway;
1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1226 gwa_type = ipv6_addr_type(gw_addr);
1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229 struct rt6_info *grt;
1231 /* IPv6 strictly inhibits using not link-local
1232 addresses as nexthop address.
1233 Otherwise, router will not able to send redirects.
1234 It is very good, but in some (rare!) circumstances
1235 (SIT, PtP, NBMA NOARP links) it is handy to allow
1236 some exceptions. --ANK
1238 err = -EINVAL;
1239 if (!(gwa_type&IPV6_ADDR_UNICAST))
1240 goto out;
1242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1244 err = -EHOSTUNREACH;
1245 if (grt == NULL)
1246 goto out;
1247 if (dev) {
1248 if (dev != grt->rt6i_dev) {
1249 dst_release(&grt->u.dst);
1250 goto out;
1252 } else {
1253 dev = grt->rt6i_dev;
1254 idev = grt->rt6i_idev;
1255 dev_hold(dev);
1256 in6_dev_hold(grt->rt6i_idev);
1258 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 err = 0;
1260 dst_release(&grt->u.dst);
1262 if (err)
1263 goto out;
1265 err = -EINVAL;
1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267 goto out;
1270 err = -ENODEV;
1271 if (dev == NULL)
1272 goto out;
1274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276 if (IS_ERR(rt->rt6i_nexthop)) {
1277 err = PTR_ERR(rt->rt6i_nexthop);
1278 rt->rt6i_nexthop = NULL;
1279 goto out;
1283 rt->rt6i_flags = cfg->fc_flags;
1285 install_route:
1286 if (cfg->fc_mx) {
1287 struct nlattr *nla;
1288 int remaining;
1290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1291 int type = nla_type(nla);
1293 if (type) {
1294 if (type > RTAX_MAX) {
1295 err = -EINVAL;
1296 goto out;
1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1306 if (!dst_mtu(&rt->u.dst))
1307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1310 rt->u.dst.dev = dev;
1311 rt->rt6i_idev = idev;
1312 rt->rt6i_table = table;
1314 cfg->fc_nlinfo.nl_net = dev_net(dev);
1316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1318 out:
1319 if (dev)
1320 dev_put(dev);
1321 if (idev)
1322 in6_dev_put(idev);
1323 if (rt)
1324 dst_free(&rt->u.dst);
1325 return err;
1328 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1330 int err;
1331 struct fib6_table *table;
1332 struct net *net = dev_net(rt->rt6i_dev);
1334 if (rt == net->ipv6.ip6_null_entry)
1335 return -ENOENT;
1337 table = rt->rt6i_table;
1338 write_lock_bh(&table->tb6_lock);
1340 err = fib6_del(rt, info);
1341 dst_release(&rt->u.dst);
1343 write_unlock_bh(&table->tb6_lock);
1345 return err;
1348 int ip6_del_rt(struct rt6_info *rt)
1350 struct nl_info info = {
1351 .nl_net = dev_net(rt->rt6i_dev),
1353 return __ip6_del_rt(rt, &info);
1356 static int ip6_route_del(struct fib6_config *cfg)
1358 struct fib6_table *table;
1359 struct fib6_node *fn;
1360 struct rt6_info *rt;
1361 int err = -ESRCH;
1363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1364 if (table == NULL)
1365 return err;
1367 read_lock_bh(&table->tb6_lock);
1369 fn = fib6_locate(&table->tb6_root,
1370 &cfg->fc_dst, cfg->fc_dst_len,
1371 &cfg->fc_src, cfg->fc_src_len);
1373 if (fn) {
1374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1375 if (cfg->fc_ifindex &&
1376 (rt->rt6i_dev == NULL ||
1377 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1378 continue;
1379 if (cfg->fc_flags & RTF_GATEWAY &&
1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1381 continue;
1382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1383 continue;
1384 dst_hold(&rt->u.dst);
1385 read_unlock_bh(&table->tb6_lock);
1387 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1390 read_unlock_bh(&table->tb6_lock);
1392 return err;
1396 * Handle redirects
1398 struct ip6rd_flowi {
1399 struct flowi fl;
1400 struct in6_addr gateway;
1403 static struct rt6_info *__ip6_route_redirect(struct net *net,
1404 struct fib6_table *table,
1405 struct flowi *fl,
1406 int flags)
1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409 struct rt6_info *rt;
1410 struct fib6_node *fn;
1413 * Get the "current" route for this destination and
1414 * check if the redirect has come from approriate router.
1416 * RFC 2461 specifies that redirects should only be
1417 * accepted if they come from the nexthop to the target.
1418 * Due to the way the routes are chosen, this notion
1419 * is a bit fuzzy and one might need to check all possible
1420 * routes.
1423 read_lock_bh(&table->tb6_lock);
1424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1425 restart:
1426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1428 * Current route is on-link; redirect is always invalid.
1430 * Seems, previous statement is not true. It could
1431 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432 * But then router serving it might decide, that we should
1433 * know truth 8)8) --ANK (980726).
1435 if (rt6_check_expired(rt))
1436 continue;
1437 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 continue;
1439 if (fl->oif != rt->rt6i_dev->ifindex)
1440 continue;
1441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1442 continue;
1443 break;
1446 if (!rt)
1447 rt = net->ipv6.ip6_null_entry;
1448 BACKTRACK(net, &fl->fl6_src);
1449 out:
1450 dst_hold(&rt->u.dst);
1452 read_unlock_bh(&table->tb6_lock);
1454 return rt;
1457 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458 struct in6_addr *src,
1459 struct in6_addr *gateway,
1460 struct net_device *dev)
1462 int flags = RT6_LOOKUP_F_HAS_SADDR;
1463 struct net *net = dev_net(dev);
1464 struct ip6rd_flowi rdfl = {
1465 .fl = {
1466 .oif = dev->ifindex,
1467 .nl_u = {
1468 .ip6_u = {
1469 .daddr = *dest,
1470 .saddr = *src,
1474 .gateway = *gateway,
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1481 flags, __ip6_route_redirect);
1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
1490 struct net *net = dev_net(neigh->dev);
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494 if (rt == net->ipv6.ip6_null_entry) {
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
1498 goto out;
1502 * We have finally decided to accept it.
1505 neigh_update(neigh, lladdr, NUD_STALE,
1506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1517 dst_confirm(&rt->u.dst);
1519 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->u.dst.neighbour)
1521 goto out;
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
1533 nrt->u.dst.flags |= DST_HOST;
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540 dst_mtu(&nrt->u.dst));
1542 if (ip6_ins_rt(nrt))
1543 goto out;
1545 netevent.old = &rt->u.dst;
1546 netevent.new = &nrt->u.dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549 if (rt->rt6i_flags&RTF_CACHE) {
1550 ip6_del_rt(rt);
1551 return;
1554 out:
1555 dst_release(&rt->u.dst);
1556 return;
1560 * Handle ICMP "packet too big" messages
1561 * i.e. Path MTU discovery
1564 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1565 struct net *net, u32 pmtu, int ifindex)
1567 struct rt6_info *rt, *nrt;
1568 int allfrag = 0;
1570 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1571 if (rt == NULL)
1572 return;
1574 if (pmtu >= dst_mtu(&rt->u.dst))
1575 goto out;
1577 if (pmtu < IPV6_MIN_MTU) {
1579 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1580 * MTU (1280) and a fragment header should always be included
1581 * after a node receiving Too Big message reporting PMTU is
1582 * less than the IPv6 Minimum Link MTU.
1584 pmtu = IPV6_MIN_MTU;
1585 allfrag = 1;
1588 /* New mtu received -> path was valid.
1589 They are sent only in response to data packets,
1590 so that this nexthop apparently is reachable. --ANK
1592 dst_confirm(&rt->u.dst);
1594 /* Host route. If it is static, it would be better
1595 not to override it, but add new one, so that
1596 when cache entry will expire old pmtu
1597 would return automatically.
1599 if (rt->rt6i_flags & RTF_CACHE) {
1600 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1601 if (allfrag)
1602 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1603 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1604 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1605 goto out;
1608 /* Network route.
1609 Two cases are possible:
1610 1. It is connected route. Action: COW
1611 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1614 nrt = rt6_alloc_cow(rt, daddr, saddr);
1615 else
1616 nrt = rt6_alloc_clone(rt, daddr);
1618 if (nrt) {
1619 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1620 if (allfrag)
1621 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1623 /* According to RFC 1981, detecting PMTU increase shouldn't be
1624 * happened within 5 mins, the recommended timer is 10 mins.
1625 * Here this route expiration time is set to ip6_rt_mtu_expires
1626 * which is 10 mins. After 10 mins the decreased pmtu is expired
1627 * and detecting PMTU increase will be automatically happened.
1629 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1630 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1632 ip6_ins_rt(nrt);
1634 out:
1635 dst_release(&rt->u.dst);
1638 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1639 struct net_device *dev, u32 pmtu)
1641 struct net *net = dev_net(dev);
1644 * RFC 1981 states that a node "MUST reduce the size of the packets it
1645 * is sending along the path" that caused the Packet Too Big message.
1646 * Since it's not possible in the general case to determine which
1647 * interface was used to send the original packet, we update the MTU
1648 * on the interface that will be used to send future packets. We also
1649 * update the MTU on the interface that received the Packet Too Big in
1650 * case the original packet was forced out that interface with
1651 * SO_BINDTODEVICE or similar. This is the next best thing to the
1652 * correct behaviour, which would be to update the MTU on all
1653 * interfaces.
1655 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1656 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1660 * Misc support functions
1663 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1665 struct net *net = dev_net(ort->rt6i_dev);
1666 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1668 if (rt) {
1669 rt->u.dst.input = ort->u.dst.input;
1670 rt->u.dst.output = ort->u.dst.output;
1672 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1673 rt->u.dst.error = ort->u.dst.error;
1674 rt->u.dst.dev = ort->u.dst.dev;
1675 if (rt->u.dst.dev)
1676 dev_hold(rt->u.dst.dev);
1677 rt->rt6i_idev = ort->rt6i_idev;
1678 if (rt->rt6i_idev)
1679 in6_dev_hold(rt->rt6i_idev);
1680 rt->u.dst.lastuse = jiffies;
1681 rt->rt6i_expires = 0;
1683 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1684 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1685 rt->rt6i_metric = 0;
1687 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1688 #ifdef CONFIG_IPV6_SUBTREES
1689 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1690 #endif
1691 rt->rt6i_table = ort->rt6i_table;
1693 return rt;
1696 #ifdef CONFIG_IPV6_ROUTE_INFO
1697 static struct rt6_info *rt6_get_route_info(struct net *net,
1698 struct in6_addr *prefix, int prefixlen,
1699 struct in6_addr *gwaddr, int ifindex)
1701 struct fib6_node *fn;
1702 struct rt6_info *rt = NULL;
1703 struct fib6_table *table;
1705 table = fib6_get_table(net, RT6_TABLE_INFO);
1706 if (table == NULL)
1707 return NULL;
1709 write_lock_bh(&table->tb6_lock);
1710 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1711 if (!fn)
1712 goto out;
1714 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1715 if (rt->rt6i_dev->ifindex != ifindex)
1716 continue;
1717 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1718 continue;
1719 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1720 continue;
1721 dst_hold(&rt->u.dst);
1722 break;
1724 out:
1725 write_unlock_bh(&table->tb6_lock);
1726 return rt;
1729 static struct rt6_info *rt6_add_route_info(struct net *net,
1730 struct in6_addr *prefix, int prefixlen,
1731 struct in6_addr *gwaddr, int ifindex,
1732 unsigned pref)
1734 struct fib6_config cfg = {
1735 .fc_table = RT6_TABLE_INFO,
1736 .fc_metric = IP6_RT_PRIO_USER,
1737 .fc_ifindex = ifindex,
1738 .fc_dst_len = prefixlen,
1739 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1740 RTF_UP | RTF_PREF(pref),
1741 .fc_nlinfo.pid = 0,
1742 .fc_nlinfo.nlh = NULL,
1743 .fc_nlinfo.nl_net = net,
1746 ipv6_addr_copy(&cfg.fc_dst, prefix);
1747 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1749 /* We should treat it as a default route if prefix length is 0. */
1750 if (!prefixlen)
1751 cfg.fc_flags |= RTF_DEFAULT;
1753 ip6_route_add(&cfg);
1755 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1757 #endif
1759 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1761 struct rt6_info *rt;
1762 struct fib6_table *table;
1764 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1765 if (table == NULL)
1766 return NULL;
1768 write_lock_bh(&table->tb6_lock);
1769 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1770 if (dev == rt->rt6i_dev &&
1771 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1772 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1773 break;
1775 if (rt)
1776 dst_hold(&rt->u.dst);
1777 write_unlock_bh(&table->tb6_lock);
1778 return rt;
1781 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1782 struct net_device *dev,
1783 unsigned int pref)
1785 struct fib6_config cfg = {
1786 .fc_table = RT6_TABLE_DFLT,
1787 .fc_metric = IP6_RT_PRIO_USER,
1788 .fc_ifindex = dev->ifindex,
1789 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1790 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1791 .fc_nlinfo.pid = 0,
1792 .fc_nlinfo.nlh = NULL,
1793 .fc_nlinfo.nl_net = dev_net(dev),
1796 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1798 ip6_route_add(&cfg);
1800 return rt6_get_dflt_router(gwaddr, dev);
1803 void rt6_purge_dflt_routers(struct net *net)
1805 struct rt6_info *rt;
1806 struct fib6_table *table;
1808 /* NOTE: Keep consistent with rt6_get_dflt_router */
1809 table = fib6_get_table(net, RT6_TABLE_DFLT);
1810 if (table == NULL)
1811 return;
1813 restart:
1814 read_lock_bh(&table->tb6_lock);
1815 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1816 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1817 dst_hold(&rt->u.dst);
1818 read_unlock_bh(&table->tb6_lock);
1819 ip6_del_rt(rt);
1820 goto restart;
1823 read_unlock_bh(&table->tb6_lock);
1826 static void rtmsg_to_fib6_config(struct net *net,
1827 struct in6_rtmsg *rtmsg,
1828 struct fib6_config *cfg)
1830 memset(cfg, 0, sizeof(*cfg));
1832 cfg->fc_table = RT6_TABLE_MAIN;
1833 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1834 cfg->fc_metric = rtmsg->rtmsg_metric;
1835 cfg->fc_expires = rtmsg->rtmsg_info;
1836 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1837 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1838 cfg->fc_flags = rtmsg->rtmsg_flags;
1840 cfg->fc_nlinfo.nl_net = net;
1842 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1843 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1844 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1847 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1849 struct fib6_config cfg;
1850 struct in6_rtmsg rtmsg;
1851 int err;
1853 switch(cmd) {
1854 case SIOCADDRT: /* Add a route */
1855 case SIOCDELRT: /* Delete a route */
1856 if (!capable(CAP_NET_ADMIN))
1857 return -EPERM;
1858 err = copy_from_user(&rtmsg, arg,
1859 sizeof(struct in6_rtmsg));
1860 if (err)
1861 return -EFAULT;
1863 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1865 rtnl_lock();
1866 switch (cmd) {
1867 case SIOCADDRT:
1868 err = ip6_route_add(&cfg);
1869 break;
1870 case SIOCDELRT:
1871 err = ip6_route_del(&cfg);
1872 break;
1873 default:
1874 err = -EINVAL;
1876 rtnl_unlock();
1878 return err;
1881 return -EINVAL;
1885 * Drop the packet on the floor
1888 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1890 int type;
1891 struct dst_entry *dst = skb_dst(skb);
1892 switch (ipstats_mib_noroutes) {
1893 case IPSTATS_MIB_INNOROUTES:
1894 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1895 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1896 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1897 IPSTATS_MIB_INADDRERRORS);
1898 break;
1900 /* FALLTHROUGH */
1901 case IPSTATS_MIB_OUTNOROUTES:
1902 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1903 ipstats_mib_noroutes);
1904 break;
1906 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1907 kfree_skb(skb);
1908 return 0;
1911 static int ip6_pkt_discard(struct sk_buff *skb)
1913 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1916 static int ip6_pkt_discard_out(struct sk_buff *skb)
1918 skb->dev = skb_dst(skb)->dev;
1919 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1922 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1924 static int ip6_pkt_prohibit(struct sk_buff *skb)
1926 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1929 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1931 skb->dev = skb_dst(skb)->dev;
1932 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1935 #endif
1938 * Allocate a dst for local (unicast / anycast) address.
1941 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1942 const struct in6_addr *addr,
1943 int anycast)
1945 struct net *net = dev_net(idev->dev);
1946 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1947 struct neighbour *neigh;
1949 if (rt == NULL)
1950 return ERR_PTR(-ENOMEM);
1952 dev_hold(net->loopback_dev);
1953 in6_dev_hold(idev);
1955 rt->u.dst.flags = DST_HOST;
1956 rt->u.dst.input = ip6_input;
1957 rt->u.dst.output = ip6_output;
1958 rt->rt6i_dev = net->loopback_dev;
1959 rt->rt6i_idev = idev;
1960 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1961 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1962 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1963 rt->u.dst.obsolete = -1;
1965 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1966 if (anycast)
1967 rt->rt6i_flags |= RTF_ANYCAST;
1968 else
1969 rt->rt6i_flags |= RTF_LOCAL;
1970 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1971 if (IS_ERR(neigh)) {
1972 dst_free(&rt->u.dst);
1974 /* We are casting this because that is the return
1975 * value type. But an errno encoded pointer is the
1976 * same regardless of the underlying pointer type,
1977 * and that's what we are returning. So this is OK.
1979 return (struct rt6_info *) neigh;
1981 rt->rt6i_nexthop = neigh;
1983 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1984 rt->rt6i_dst.plen = 128;
1985 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1987 atomic_set(&rt->u.dst.__refcnt, 1);
1989 return rt;
1992 struct arg_dev_net {
1993 struct net_device *dev;
1994 struct net *net;
1997 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1999 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2000 struct net *net = ((struct arg_dev_net *)arg)->net;
2002 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2003 rt != net->ipv6.ip6_null_entry) {
2004 RT6_TRACE("deleted by ifdown %p\n", rt);
2005 return -1;
2007 return 0;
2010 void rt6_ifdown(struct net *net, struct net_device *dev)
2012 struct arg_dev_net adn = {
2013 .dev = dev,
2014 .net = net,
2017 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2018 icmp6_clean_all(fib6_ifdown, &adn);
2021 struct rt6_mtu_change_arg
2023 struct net_device *dev;
2024 unsigned mtu;
2027 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2029 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2030 struct inet6_dev *idev;
2031 struct net *net = dev_net(arg->dev);
2033 /* In IPv6 pmtu discovery is not optional,
2034 so that RTAX_MTU lock cannot disable it.
2035 We still use this lock to block changes
2036 caused by addrconf/ndisc.
2039 idev = __in6_dev_get(arg->dev);
2040 if (idev == NULL)
2041 return 0;
2043 /* For administrative MTU increase, there is no way to discover
2044 IPv6 PMTU increase, so PMTU increase should be updated here.
2045 Since RFC 1981 doesn't include administrative MTU increase
2046 update PMTU increase is a MUST. (i.e. jumbo frame)
2049 If new MTU is less than route PMTU, this new MTU will be the
2050 lowest MTU in the path, update the route PMTU to reflect PMTU
2051 decreases; if new MTU is greater than route PMTU, and the
2052 old MTU is the lowest MTU in the path, update the route PMTU
2053 to reflect the increase. In this case if the other nodes' MTU
2054 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2055 PMTU discouvery.
2057 if (rt->rt6i_dev == arg->dev &&
2058 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
2059 (dst_mtu(&rt->u.dst) >= arg->mtu ||
2060 (dst_mtu(&rt->u.dst) < arg->mtu &&
2061 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
2062 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
2063 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2065 return 0;
2068 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2070 struct rt6_mtu_change_arg arg = {
2071 .dev = dev,
2072 .mtu = mtu,
2075 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2078 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2079 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2080 [RTA_OIF] = { .type = NLA_U32 },
2081 [RTA_IIF] = { .type = NLA_U32 },
2082 [RTA_PRIORITY] = { .type = NLA_U32 },
2083 [RTA_METRICS] = { .type = NLA_NESTED },
2086 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2087 struct fib6_config *cfg)
2089 struct rtmsg *rtm;
2090 struct nlattr *tb[RTA_MAX+1];
2091 int err;
2093 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2094 if (err < 0)
2095 goto errout;
2097 err = -EINVAL;
2098 rtm = nlmsg_data(nlh);
2099 memset(cfg, 0, sizeof(*cfg));
2101 cfg->fc_table = rtm->rtm_table;
2102 cfg->fc_dst_len = rtm->rtm_dst_len;
2103 cfg->fc_src_len = rtm->rtm_src_len;
2104 cfg->fc_flags = RTF_UP;
2105 cfg->fc_protocol = rtm->rtm_protocol;
2107 if (rtm->rtm_type == RTN_UNREACHABLE)
2108 cfg->fc_flags |= RTF_REJECT;
2110 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2111 cfg->fc_nlinfo.nlh = nlh;
2112 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2114 if (tb[RTA_GATEWAY]) {
2115 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2116 cfg->fc_flags |= RTF_GATEWAY;
2119 if (tb[RTA_DST]) {
2120 int plen = (rtm->rtm_dst_len + 7) >> 3;
2122 if (nla_len(tb[RTA_DST]) < plen)
2123 goto errout;
2125 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2128 if (tb[RTA_SRC]) {
2129 int plen = (rtm->rtm_src_len + 7) >> 3;
2131 if (nla_len(tb[RTA_SRC]) < plen)
2132 goto errout;
2134 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2137 if (tb[RTA_OIF])
2138 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2140 if (tb[RTA_PRIORITY])
2141 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2143 if (tb[RTA_METRICS]) {
2144 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2145 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2148 if (tb[RTA_TABLE])
2149 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2151 err = 0;
2152 errout:
2153 return err;
2156 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2158 struct fib6_config cfg;
2159 int err;
2161 err = rtm_to_fib6_config(skb, nlh, &cfg);
2162 if (err < 0)
2163 return err;
2165 return ip6_route_del(&cfg);
2168 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2170 struct fib6_config cfg;
2171 int err;
2173 err = rtm_to_fib6_config(skb, nlh, &cfg);
2174 if (err < 0)
2175 return err;
2177 return ip6_route_add(&cfg);
2180 static inline size_t rt6_nlmsg_size(void)
2182 return NLMSG_ALIGN(sizeof(struct rtmsg))
2183 + nla_total_size(16) /* RTA_SRC */
2184 + nla_total_size(16) /* RTA_DST */
2185 + nla_total_size(16) /* RTA_GATEWAY */
2186 + nla_total_size(16) /* RTA_PREFSRC */
2187 + nla_total_size(4) /* RTA_TABLE */
2188 + nla_total_size(4) /* RTA_IIF */
2189 + nla_total_size(4) /* RTA_OIF */
2190 + nla_total_size(4) /* RTA_PRIORITY */
2191 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2192 + nla_total_size(sizeof(struct rta_cacheinfo));
2195 static int rt6_fill_node(struct net *net,
2196 struct sk_buff *skb, struct rt6_info *rt,
2197 struct in6_addr *dst, struct in6_addr *src,
2198 int iif, int type, u32 pid, u32 seq,
2199 int prefix, int nowait, unsigned int flags)
2201 struct rtmsg *rtm;
2202 struct nlmsghdr *nlh;
2203 long expires;
2204 u32 table;
2206 if (prefix) { /* user wants prefix routes only */
2207 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2208 /* success since this is not a prefix route */
2209 return 1;
2213 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2214 if (nlh == NULL)
2215 return -EMSGSIZE;
2217 rtm = nlmsg_data(nlh);
2218 rtm->rtm_family = AF_INET6;
2219 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2220 rtm->rtm_src_len = rt->rt6i_src.plen;
2221 rtm->rtm_tos = 0;
2222 if (rt->rt6i_table)
2223 table = rt->rt6i_table->tb6_id;
2224 else
2225 table = RT6_TABLE_UNSPEC;
2226 rtm->rtm_table = table;
2227 NLA_PUT_U32(skb, RTA_TABLE, table);
2228 if (rt->rt6i_flags&RTF_REJECT)
2229 rtm->rtm_type = RTN_UNREACHABLE;
2230 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2231 rtm->rtm_type = RTN_LOCAL;
2232 else
2233 rtm->rtm_type = RTN_UNICAST;
2234 rtm->rtm_flags = 0;
2235 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2236 rtm->rtm_protocol = rt->rt6i_protocol;
2237 if (rt->rt6i_flags&RTF_DYNAMIC)
2238 rtm->rtm_protocol = RTPROT_REDIRECT;
2239 else if (rt->rt6i_flags & RTF_ADDRCONF)
2240 rtm->rtm_protocol = RTPROT_KERNEL;
2241 else if (rt->rt6i_flags&RTF_DEFAULT)
2242 rtm->rtm_protocol = RTPROT_RA;
2244 if (rt->rt6i_flags&RTF_CACHE)
2245 rtm->rtm_flags |= RTM_F_CLONED;
2247 if (dst) {
2248 NLA_PUT(skb, RTA_DST, 16, dst);
2249 rtm->rtm_dst_len = 128;
2250 } else if (rtm->rtm_dst_len)
2251 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2252 #ifdef CONFIG_IPV6_SUBTREES
2253 if (src) {
2254 NLA_PUT(skb, RTA_SRC, 16, src);
2255 rtm->rtm_src_len = 128;
2256 } else if (rtm->rtm_src_len)
2257 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2258 #endif
2259 if (iif) {
2260 #ifdef CONFIG_IPV6_MROUTE
2261 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2262 int err = ip6mr_get_route(net, skb, rtm, nowait);
2263 if (err <= 0) {
2264 if (!nowait) {
2265 if (err == 0)
2266 return 0;
2267 goto nla_put_failure;
2268 } else {
2269 if (err == -EMSGSIZE)
2270 goto nla_put_failure;
2273 } else
2274 #endif
2275 NLA_PUT_U32(skb, RTA_IIF, iif);
2276 } else if (dst) {
2277 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
2278 struct in6_addr saddr_buf;
2279 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2280 dst, 0, &saddr_buf) == 0)
2281 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2284 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2285 goto nla_put_failure;
2287 if (rt->u.dst.neighbour)
2288 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2290 if (rt->u.dst.dev)
2291 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2293 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2295 if (!(rt->rt6i_flags & RTF_EXPIRES))
2296 expires = 0;
2297 else if (rt->rt6i_expires - jiffies < INT_MAX)
2298 expires = rt->rt6i_expires - jiffies;
2299 else
2300 expires = INT_MAX;
2302 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2303 expires, rt->u.dst.error) < 0)
2304 goto nla_put_failure;
2306 return nlmsg_end(skb, nlh);
2308 nla_put_failure:
2309 nlmsg_cancel(skb, nlh);
2310 return -EMSGSIZE;
2313 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2315 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2316 int prefix;
2318 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2319 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2320 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2321 } else
2322 prefix = 0;
2324 return rt6_fill_node(arg->net,
2325 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2326 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2327 prefix, 0, NLM_F_MULTI);
2330 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2332 struct net *net = sock_net(in_skb->sk);
2333 struct nlattr *tb[RTA_MAX+1];
2334 struct rt6_info *rt;
2335 struct sk_buff *skb;
2336 struct rtmsg *rtm;
2337 struct flowi fl;
2338 int err, iif = 0;
2340 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2341 if (err < 0)
2342 goto errout;
2344 err = -EINVAL;
2345 memset(&fl, 0, sizeof(fl));
2347 if (tb[RTA_SRC]) {
2348 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2349 goto errout;
2351 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2354 if (tb[RTA_DST]) {
2355 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2356 goto errout;
2358 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2361 if (tb[RTA_IIF])
2362 iif = nla_get_u32(tb[RTA_IIF]);
2364 if (tb[RTA_OIF])
2365 fl.oif = nla_get_u32(tb[RTA_OIF]);
2367 if (iif) {
2368 struct net_device *dev;
2369 dev = __dev_get_by_index(net, iif);
2370 if (!dev) {
2371 err = -ENODEV;
2372 goto errout;
2376 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2377 if (skb == NULL) {
2378 err = -ENOBUFS;
2379 goto errout;
2382 /* Reserve room for dummy headers, this skb can pass
2383 through good chunk of routing engine.
2385 skb_reset_mac_header(skb);
2386 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2388 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2389 skb_dst_set(skb, &rt->u.dst);
2391 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2392 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2393 nlh->nlmsg_seq, 0, 0, 0);
2394 if (err < 0) {
2395 kfree_skb(skb);
2396 goto errout;
2399 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2400 errout:
2401 return err;
2404 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2406 struct sk_buff *skb;
2407 struct net *net = info->nl_net;
2408 u32 seq;
2409 int err;
2411 err = -ENOBUFS;
2412 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2414 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2415 if (skb == NULL)
2416 goto errout;
2418 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2419 event, info->pid, seq, 0, 0, 0);
2420 if (err < 0) {
2421 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2422 WARN_ON(err == -EMSGSIZE);
2423 kfree_skb(skb);
2424 goto errout;
2426 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2427 info->nlh, gfp_any());
2428 return;
2429 errout:
2430 if (err < 0)
2431 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2434 static int ip6_route_dev_notify(struct notifier_block *this,
2435 unsigned long event, void *data)
2437 struct net_device *dev = (struct net_device *)data;
2438 struct net *net = dev_net(dev);
2440 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2441 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2442 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2443 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2445 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2446 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2447 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2448 #endif
2451 return NOTIFY_OK;
2455 * /proc
2458 #ifdef CONFIG_PROC_FS
2460 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2462 struct rt6_proc_arg
2464 char *buffer;
2465 int offset;
2466 int length;
2467 int skip;
2468 int len;
2471 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2473 struct seq_file *m = p_arg;
2475 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2477 #ifdef CONFIG_IPV6_SUBTREES
2478 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2479 #else
2480 seq_puts(m, "00000000000000000000000000000000 00 ");
2481 #endif
2483 if (rt->rt6i_nexthop) {
2484 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2485 } else {
2486 seq_puts(m, "00000000000000000000000000000000");
2488 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2489 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2490 rt->u.dst.__use, rt->rt6i_flags,
2491 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2492 return 0;
2495 static int ipv6_route_show(struct seq_file *m, void *v)
2497 struct net *net = (struct net *)m->private;
2498 fib6_clean_all(net, rt6_info_route, 0, m);
2499 return 0;
2502 static int ipv6_route_open(struct inode *inode, struct file *file)
2504 return single_open_net(inode, file, ipv6_route_show);
2507 static const struct file_operations ipv6_route_proc_fops = {
2508 .owner = THIS_MODULE,
2509 .open = ipv6_route_open,
2510 .read = seq_read,
2511 .llseek = seq_lseek,
2512 .release = single_release_net,
2515 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2517 struct net *net = (struct net *)seq->private;
2518 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2519 net->ipv6.rt6_stats->fib_nodes,
2520 net->ipv6.rt6_stats->fib_route_nodes,
2521 net->ipv6.rt6_stats->fib_rt_alloc,
2522 net->ipv6.rt6_stats->fib_rt_entries,
2523 net->ipv6.rt6_stats->fib_rt_cache,
2524 atomic_read(&net->ipv6.ip6_dst_ops.entries),
2525 net->ipv6.rt6_stats->fib_discarded_routes);
2527 return 0;
2530 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2532 return single_open_net(inode, file, rt6_stats_seq_show);
2535 static const struct file_operations rt6_stats_seq_fops = {
2536 .owner = THIS_MODULE,
2537 .open = rt6_stats_seq_open,
2538 .read = seq_read,
2539 .llseek = seq_lseek,
2540 .release = single_release_net,
2542 #endif /* CONFIG_PROC_FS */
2544 #ifdef CONFIG_SYSCTL
2546 static
2547 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2548 void __user *buffer, size_t *lenp, loff_t *ppos)
2550 struct net *net = current->nsproxy->net_ns;
2551 int delay = net->ipv6.sysctl.flush_delay;
2552 if (write) {
2553 proc_dointvec(ctl, write, buffer, lenp, ppos);
2554 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2555 return 0;
2556 } else
2557 return -EINVAL;
2560 ctl_table ipv6_route_table_template[] = {
2562 .procname = "flush",
2563 .data = &init_net.ipv6.sysctl.flush_delay,
2564 .maxlen = sizeof(int),
2565 .mode = 0200,
2566 .proc_handler = ipv6_sysctl_rtcache_flush
2569 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2570 .procname = "gc_thresh",
2571 .data = &ip6_dst_ops_template.gc_thresh,
2572 .maxlen = sizeof(int),
2573 .mode = 0644,
2574 .proc_handler = proc_dointvec,
2577 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2578 .procname = "max_size",
2579 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2580 .maxlen = sizeof(int),
2581 .mode = 0644,
2582 .proc_handler = proc_dointvec,
2585 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2586 .procname = "gc_min_interval",
2587 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2588 .maxlen = sizeof(int),
2589 .mode = 0644,
2590 .proc_handler = proc_dointvec_jiffies,
2591 .strategy = sysctl_jiffies,
2594 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2595 .procname = "gc_timeout",
2596 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2597 .maxlen = sizeof(int),
2598 .mode = 0644,
2599 .proc_handler = proc_dointvec_jiffies,
2600 .strategy = sysctl_jiffies,
2603 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2604 .procname = "gc_interval",
2605 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
2608 .proc_handler = proc_dointvec_jiffies,
2609 .strategy = sysctl_jiffies,
2612 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2613 .procname = "gc_elasticity",
2614 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2615 .maxlen = sizeof(int),
2616 .mode = 0644,
2617 .proc_handler = proc_dointvec_jiffies,
2618 .strategy = sysctl_jiffies,
2621 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2622 .procname = "mtu_expires",
2623 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2624 .maxlen = sizeof(int),
2625 .mode = 0644,
2626 .proc_handler = proc_dointvec_jiffies,
2627 .strategy = sysctl_jiffies,
2630 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2631 .procname = "min_adv_mss",
2632 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2633 .maxlen = sizeof(int),
2634 .mode = 0644,
2635 .proc_handler = proc_dointvec_jiffies,
2636 .strategy = sysctl_jiffies,
2639 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2640 .procname = "gc_min_interval_ms",
2641 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2642 .maxlen = sizeof(int),
2643 .mode = 0644,
2644 .proc_handler = proc_dointvec_ms_jiffies,
2645 .strategy = sysctl_ms_jiffies,
2647 { .ctl_name = 0 }
2650 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2652 struct ctl_table *table;
2654 table = kmemdup(ipv6_route_table_template,
2655 sizeof(ipv6_route_table_template),
2656 GFP_KERNEL);
2658 if (table) {
2659 table[0].data = &net->ipv6.sysctl.flush_delay;
2660 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2661 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2662 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2663 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2664 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2665 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2666 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2667 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2670 return table;
2672 #endif
2674 static int ip6_route_net_init(struct net *net)
2676 int ret = -ENOMEM;
2678 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2679 sizeof(net->ipv6.ip6_dst_ops));
2681 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2682 sizeof(*net->ipv6.ip6_null_entry),
2683 GFP_KERNEL);
2684 if (!net->ipv6.ip6_null_entry)
2685 goto out_ip6_dst_ops;
2686 net->ipv6.ip6_null_entry->u.dst.path =
2687 (struct dst_entry *)net->ipv6.ip6_null_entry;
2688 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2690 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2691 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2692 sizeof(*net->ipv6.ip6_prohibit_entry),
2693 GFP_KERNEL);
2694 if (!net->ipv6.ip6_prohibit_entry)
2695 goto out_ip6_null_entry;
2696 net->ipv6.ip6_prohibit_entry->u.dst.path =
2697 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2698 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2700 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2701 sizeof(*net->ipv6.ip6_blk_hole_entry),
2702 GFP_KERNEL);
2703 if (!net->ipv6.ip6_blk_hole_entry)
2704 goto out_ip6_prohibit_entry;
2705 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2706 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2707 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2708 #endif
2710 net->ipv6.sysctl.flush_delay = 0;
2711 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2712 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2713 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2714 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2715 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2716 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2717 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2719 #ifdef CONFIG_PROC_FS
2720 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2721 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2722 #endif
2723 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2725 ret = 0;
2726 out:
2727 return ret;
2729 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2730 out_ip6_prohibit_entry:
2731 kfree(net->ipv6.ip6_prohibit_entry);
2732 out_ip6_null_entry:
2733 kfree(net->ipv6.ip6_null_entry);
2734 #endif
2735 out_ip6_dst_ops:
2736 goto out;
2739 static void ip6_route_net_exit(struct net *net)
2741 #ifdef CONFIG_PROC_FS
2742 proc_net_remove(net, "ipv6_route");
2743 proc_net_remove(net, "rt6_stats");
2744 #endif
2745 kfree(net->ipv6.ip6_null_entry);
2746 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2747 kfree(net->ipv6.ip6_prohibit_entry);
2748 kfree(net->ipv6.ip6_blk_hole_entry);
2749 #endif
2752 static struct pernet_operations ip6_route_net_ops = {
2753 .init = ip6_route_net_init,
2754 .exit = ip6_route_net_exit,
2757 static struct notifier_block ip6_route_dev_notifier = {
2758 .notifier_call = ip6_route_dev_notify,
2759 .priority = 0,
2762 int __init ip6_route_init(void)
2764 int ret;
2766 ret = -ENOMEM;
2767 ip6_dst_ops_template.kmem_cachep =
2768 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2769 SLAB_HWCACHE_ALIGN, NULL);
2770 if (!ip6_dst_ops_template.kmem_cachep)
2771 goto out;
2773 ret = register_pernet_subsys(&ip6_route_net_ops);
2774 if (ret)
2775 goto out_kmem_cache;
2777 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2779 /* Registering of the loopback is done before this portion of code,
2780 * the loopback reference in rt6_info will not be taken, do it
2781 * manually for init_net */
2782 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2783 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2784 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2785 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2786 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2787 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2788 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2789 #endif
2790 ret = fib6_init();
2791 if (ret)
2792 goto out_register_subsys;
2794 ret = xfrm6_init();
2795 if (ret)
2796 goto out_fib6_init;
2798 ret = fib6_rules_init();
2799 if (ret)
2800 goto xfrm6_init;
2802 ret = -ENOBUFS;
2803 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2804 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2805 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2806 goto fib6_rules_init;
2808 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2809 if (ret)
2810 goto fib6_rules_init;
2812 out:
2813 return ret;
2815 fib6_rules_init:
2816 fib6_rules_cleanup();
2817 xfrm6_init:
2818 xfrm6_fini();
2819 out_fib6_init:
2820 fib6_gc_cleanup();
2821 out_register_subsys:
2822 unregister_pernet_subsys(&ip6_route_net_ops);
2823 out_kmem_cache:
2824 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2825 goto out;
2828 void ip6_route_cleanup(void)
2830 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2831 fib6_rules_cleanup();
2832 xfrm6_fini();
2833 fib6_gc_cleanup();
2834 unregister_pernet_subsys(&ip6_route_net_ops);
2835 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);