ipv6: Use ip6_dst_hoplimit() instead of direct dst_metric() calls.
[linux-2.6/cjktty.git] / net / ipv6 / route.c
blob9b2d7bc7beda4679ffd2a64e911d6c69ad9a2b4e
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out,
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 static struct rt6_info ip6_null_entry_template = {
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
135 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
136 .rt6i_protocol = RTPROT_KERNEL,
137 .rt6i_metric = ~(u32) 0,
138 .rt6i_ref = ATOMIC_INIT(1),
141 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 static int ip6_pkt_prohibit(struct sk_buff *skb);
144 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
146 static struct rt6_info ip6_prohibit_entry_template = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .obsolete = -1,
151 .error = -EACCES,
152 .input = ip6_pkt_prohibit,
153 .output = ip6_pkt_prohibit_out,
155 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
156 .rt6i_protocol = RTPROT_KERNEL,
157 .rt6i_metric = ~(u32) 0,
158 .rt6i_ref = ATOMIC_INIT(1),
161 static struct rt6_info ip6_blk_hole_entry_template = {
162 .dst = {
163 .__refcnt = ATOMIC_INIT(1),
164 .__use = 1,
165 .obsolete = -1,
166 .error = -EINVAL,
167 .input = dst_discard,
168 .output = dst_discard,
170 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
171 .rt6i_protocol = RTPROT_KERNEL,
172 .rt6i_metric = ~(u32) 0,
173 .rt6i_ref = ATOMIC_INIT(1),
176 #endif
178 /* allocate dst with ip6_dst_ops */
179 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
181 return (struct rt6_info *)dst_alloc(ops);
184 static void ip6_dst_destroy(struct dst_entry *dst)
186 struct rt6_info *rt = (struct rt6_info *)dst;
187 struct inet6_dev *idev = rt->rt6i_idev;
188 struct inet_peer *peer = rt->rt6i_peer;
190 if (idev != NULL) {
191 rt->rt6i_idev = NULL;
192 in6_dev_put(idev);
194 if (peer) {
195 BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
196 rt->rt6i_peer = NULL;
197 inet_putpeer(peer);
201 void rt6_bind_peer(struct rt6_info *rt, int create)
203 struct inet_peer *peer;
205 if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
206 return;
208 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
209 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
210 inet_putpeer(peer);
213 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 int how)
216 struct rt6_info *rt = (struct rt6_info *)dst;
217 struct inet6_dev *idev = rt->rt6i_idev;
218 struct net_device *loopback_dev =
219 dev_net(dev)->loopback_dev;
221 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
222 struct inet6_dev *loopback_idev =
223 in6_dev_get(loopback_dev);
224 if (loopback_idev != NULL) {
225 rt->rt6i_idev = loopback_idev;
226 in6_dev_put(idev);
231 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
233 return (rt->rt6i_flags & RTF_EXPIRES) &&
234 time_after(jiffies, rt->rt6i_expires);
237 static inline int rt6_need_strict(struct in6_addr *daddr)
239 return ipv6_addr_type(daddr) &
240 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
244 * Route lookup. Any table->tb6_lock is implied.
247 static inline struct rt6_info *rt6_device_match(struct net *net,
248 struct rt6_info *rt,
249 struct in6_addr *saddr,
250 int oif,
251 int flags)
253 struct rt6_info *local = NULL;
254 struct rt6_info *sprt;
256 if (!oif && ipv6_addr_any(saddr))
257 goto out;
259 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
260 struct net_device *dev = sprt->rt6i_dev;
262 if (oif) {
263 if (dev->ifindex == oif)
264 return sprt;
265 if (dev->flags & IFF_LOOPBACK) {
266 if (sprt->rt6i_idev == NULL ||
267 sprt->rt6i_idev->dev->ifindex != oif) {
268 if (flags & RT6_LOOKUP_F_IFACE && oif)
269 continue;
270 if (local && (!oif ||
271 local->rt6i_idev->dev->ifindex == oif))
272 continue;
274 local = sprt;
276 } else {
277 if (ipv6_chk_addr(net, saddr, dev,
278 flags & RT6_LOOKUP_F_IFACE))
279 return sprt;
283 if (oif) {
284 if (local)
285 return local;
287 if (flags & RT6_LOOKUP_F_IFACE)
288 return net->ipv6.ip6_null_entry;
290 out:
291 return rt;
294 #ifdef CONFIG_IPV6_ROUTER_PREF
295 static void rt6_probe(struct rt6_info *rt)
297 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
299 * Okay, this does not seem to be appropriate
300 * for now, however, we need to check if it
301 * is really so; aka Router Reachability Probing.
303 * Router Reachability Probe MUST be rate-limited
304 * to no more than one per minute.
306 if (!neigh || (neigh->nud_state & NUD_VALID))
307 return;
308 read_lock_bh(&neigh->lock);
309 if (!(neigh->nud_state & NUD_VALID) &&
310 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
311 struct in6_addr mcaddr;
312 struct in6_addr *target;
314 neigh->updated = jiffies;
315 read_unlock_bh(&neigh->lock);
317 target = (struct in6_addr *)&neigh->primary_key;
318 addrconf_addr_solict_mult(target, &mcaddr);
319 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
320 } else
321 read_unlock_bh(&neigh->lock);
323 #else
324 static inline void rt6_probe(struct rt6_info *rt)
327 #endif
330 * Default Router Selection (RFC 2461 6.3.6)
332 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
334 struct net_device *dev = rt->rt6i_dev;
335 if (!oif || dev->ifindex == oif)
336 return 2;
337 if ((dev->flags & IFF_LOOPBACK) &&
338 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
339 return 1;
340 return 0;
343 static inline int rt6_check_neigh(struct rt6_info *rt)
345 struct neighbour *neigh = rt->rt6i_nexthop;
346 int m;
347 if (rt->rt6i_flags & RTF_NONEXTHOP ||
348 !(rt->rt6i_flags & RTF_GATEWAY))
349 m = 1;
350 else if (neigh) {
351 read_lock_bh(&neigh->lock);
352 if (neigh->nud_state & NUD_VALID)
353 m = 2;
354 #ifdef CONFIG_IPV6_ROUTER_PREF
355 else if (neigh->nud_state & NUD_FAILED)
356 m = 0;
357 #endif
358 else
359 m = 1;
360 read_unlock_bh(&neigh->lock);
361 } else
362 m = 0;
363 return m;
366 static int rt6_score_route(struct rt6_info *rt, int oif,
367 int strict)
369 int m, n;
371 m = rt6_check_dev(rt, oif);
372 if (!m && (strict & RT6_LOOKUP_F_IFACE))
373 return -1;
374 #ifdef CONFIG_IPV6_ROUTER_PREF
375 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
376 #endif
377 n = rt6_check_neigh(rt);
378 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
379 return -1;
380 return m;
383 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
384 int *mpri, struct rt6_info *match)
386 int m;
388 if (rt6_check_expired(rt))
389 goto out;
391 m = rt6_score_route(rt, oif, strict);
392 if (m < 0)
393 goto out;
395 if (m > *mpri) {
396 if (strict & RT6_LOOKUP_F_REACHABLE)
397 rt6_probe(match);
398 *mpri = m;
399 match = rt;
400 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
401 rt6_probe(rt);
404 out:
405 return match;
408 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
409 struct rt6_info *rr_head,
410 u32 metric, int oif, int strict)
412 struct rt6_info *rt, *match;
413 int mpri = -1;
415 match = NULL;
416 for (rt = rr_head; rt && rt->rt6i_metric == metric;
417 rt = rt->dst.rt6_next)
418 match = find_match(rt, oif, strict, &mpri, match);
419 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
420 rt = rt->dst.rt6_next)
421 match = find_match(rt, oif, strict, &mpri, match);
423 return match;
426 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
428 struct rt6_info *match, *rt0;
429 struct net *net;
431 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
432 __func__, fn->leaf, oif);
434 rt0 = fn->rr_ptr;
435 if (!rt0)
436 fn->rr_ptr = rt0 = fn->leaf;
438 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
440 if (!match &&
441 (strict & RT6_LOOKUP_F_REACHABLE)) {
442 struct rt6_info *next = rt0->dst.rt6_next;
444 /* no entries matched; do round-robin */
445 if (!next || next->rt6i_metric != rt0->rt6i_metric)
446 next = fn->leaf;
448 if (next != rt0)
449 fn->rr_ptr = next;
452 RT6_TRACE("%s() => %p\n",
453 __func__, match);
455 net = dev_net(rt0->rt6i_dev);
456 return match ? match : net->ipv6.ip6_null_entry;
459 #ifdef CONFIG_IPV6_ROUTE_INFO
460 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
461 struct in6_addr *gwaddr)
463 struct net *net = dev_net(dev);
464 struct route_info *rinfo = (struct route_info *) opt;
465 struct in6_addr prefix_buf, *prefix;
466 unsigned int pref;
467 unsigned long lifetime;
468 struct rt6_info *rt;
470 if (len < sizeof(struct route_info)) {
471 return -EINVAL;
474 /* Sanity check for prefix_len and length */
475 if (rinfo->length > 3) {
476 return -EINVAL;
477 } else if (rinfo->prefix_len > 128) {
478 return -EINVAL;
479 } else if (rinfo->prefix_len > 64) {
480 if (rinfo->length < 2) {
481 return -EINVAL;
483 } else if (rinfo->prefix_len > 0) {
484 if (rinfo->length < 1) {
485 return -EINVAL;
489 pref = rinfo->route_pref;
490 if (pref == ICMPV6_ROUTER_PREF_INVALID)
491 return -EINVAL;
493 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
495 if (rinfo->length == 3)
496 prefix = (struct in6_addr *)rinfo->prefix;
497 else {
498 /* this function is safe */
499 ipv6_addr_prefix(&prefix_buf,
500 (struct in6_addr *)rinfo->prefix,
501 rinfo->prefix_len);
502 prefix = &prefix_buf;
505 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
506 dev->ifindex);
508 if (rt && !lifetime) {
509 ip6_del_rt(rt);
510 rt = NULL;
513 if (!rt && lifetime)
514 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
515 pref);
516 else if (rt)
517 rt->rt6i_flags = RTF_ROUTEINFO |
518 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
520 if (rt) {
521 if (!addrconf_finite_timeout(lifetime)) {
522 rt->rt6i_flags &= ~RTF_EXPIRES;
523 } else {
524 rt->rt6i_expires = jiffies + HZ * lifetime;
525 rt->rt6i_flags |= RTF_EXPIRES;
527 dst_release(&rt->dst);
529 return 0;
531 #endif
533 #define BACKTRACK(__net, saddr) \
534 do { \
535 if (rt == __net->ipv6.ip6_null_entry) { \
536 struct fib6_node *pn; \
537 while (1) { \
538 if (fn->fn_flags & RTN_TL_ROOT) \
539 goto out; \
540 pn = fn->parent; \
541 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
542 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
543 else \
544 fn = pn; \
545 if (fn->fn_flags & RTN_RTINFO) \
546 goto restart; \
549 } while(0)
551 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
552 struct fib6_table *table,
553 struct flowi *fl, int flags)
555 struct fib6_node *fn;
556 struct rt6_info *rt;
558 read_lock_bh(&table->tb6_lock);
559 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
560 restart:
561 rt = fn->leaf;
562 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
563 BACKTRACK(net, &fl->fl6_src);
564 out:
565 dst_use(&rt->dst, jiffies);
566 read_unlock_bh(&table->tb6_lock);
567 return rt;
571 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
572 const struct in6_addr *saddr, int oif, int strict)
574 struct flowi fl = {
575 .oif = oif,
576 .fl6_dst = *daddr,
578 struct dst_entry *dst;
579 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
581 if (saddr) {
582 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
583 flags |= RT6_LOOKUP_F_HAS_SADDR;
586 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
587 if (dst->error == 0)
588 return (struct rt6_info *) dst;
590 dst_release(dst);
592 return NULL;
595 EXPORT_SYMBOL(rt6_lookup);
597 /* ip6_ins_rt is called with FREE table->tb6_lock.
598 It takes new route entry, the addition fails by any reason the
599 route is freed. In any case, if caller does not hold it, it may
600 be destroyed.
603 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
605 int err;
606 struct fib6_table *table;
608 table = rt->rt6i_table;
609 write_lock_bh(&table->tb6_lock);
610 err = fib6_add(&table->tb6_root, rt, info);
611 write_unlock_bh(&table->tb6_lock);
613 return err;
616 int ip6_ins_rt(struct rt6_info *rt)
618 struct nl_info info = {
619 .nl_net = dev_net(rt->rt6i_dev),
621 return __ip6_ins_rt(rt, &info);
624 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
625 struct in6_addr *saddr)
627 struct rt6_info *rt;
630 * Clone the route.
633 rt = ip6_rt_copy(ort);
635 if (rt) {
636 struct neighbour *neigh;
637 int attempts = !in_softirq();
639 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
640 if (rt->rt6i_dst.plen != 128 &&
641 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
642 rt->rt6i_flags |= RTF_ANYCAST;
643 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
646 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
647 rt->rt6i_dst.plen = 128;
648 rt->rt6i_flags |= RTF_CACHE;
649 rt->dst.flags |= DST_HOST;
651 #ifdef CONFIG_IPV6_SUBTREES
652 if (rt->rt6i_src.plen && saddr) {
653 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
654 rt->rt6i_src.plen = 128;
656 #endif
658 retry:
659 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
660 if (IS_ERR(neigh)) {
661 struct net *net = dev_net(rt->rt6i_dev);
662 int saved_rt_min_interval =
663 net->ipv6.sysctl.ip6_rt_gc_min_interval;
664 int saved_rt_elasticity =
665 net->ipv6.sysctl.ip6_rt_gc_elasticity;
667 if (attempts-- > 0) {
668 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
671 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
673 net->ipv6.sysctl.ip6_rt_gc_elasticity =
674 saved_rt_elasticity;
675 net->ipv6.sysctl.ip6_rt_gc_min_interval =
676 saved_rt_min_interval;
677 goto retry;
680 if (net_ratelimit())
681 printk(KERN_WARNING
682 "ipv6: Neighbour table overflow.\n");
683 dst_free(&rt->dst);
684 return NULL;
686 rt->rt6i_nexthop = neigh;
690 return rt;
693 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
695 struct rt6_info *rt = ip6_rt_copy(ort);
696 if (rt) {
697 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
698 rt->rt6i_dst.plen = 128;
699 rt->rt6i_flags |= RTF_CACHE;
700 rt->dst.flags |= DST_HOST;
701 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
703 return rt;
706 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
707 struct flowi *fl, int flags)
709 struct fib6_node *fn;
710 struct rt6_info *rt, *nrt;
711 int strict = 0;
712 int attempts = 3;
713 int err;
714 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
716 strict |= flags & RT6_LOOKUP_F_IFACE;
718 relookup:
719 read_lock_bh(&table->tb6_lock);
721 restart_2:
722 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
724 restart:
725 rt = rt6_select(fn, oif, strict | reachable);
727 BACKTRACK(net, &fl->fl6_src);
728 if (rt == net->ipv6.ip6_null_entry ||
729 rt->rt6i_flags & RTF_CACHE)
730 goto out;
732 dst_hold(&rt->dst);
733 read_unlock_bh(&table->tb6_lock);
735 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
736 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
737 else {
738 #if CLONE_OFFLINK_ROUTE
739 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
740 #else
741 goto out2;
742 #endif
745 dst_release(&rt->dst);
746 rt = nrt ? : net->ipv6.ip6_null_entry;
748 dst_hold(&rt->dst);
749 if (nrt) {
750 err = ip6_ins_rt(nrt);
751 if (!err)
752 goto out2;
755 if (--attempts <= 0)
756 goto out2;
759 * Race condition! In the gap, when table->tb6_lock was
760 * released someone could insert this route. Relookup.
762 dst_release(&rt->dst);
763 goto relookup;
765 out:
766 if (reachable) {
767 reachable = 0;
768 goto restart_2;
770 dst_hold(&rt->dst);
771 read_unlock_bh(&table->tb6_lock);
772 out2:
773 rt->dst.lastuse = jiffies;
774 rt->dst.__use++;
776 return rt;
779 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
780 struct flowi *fl, int flags)
782 return ip6_pol_route(net, table, fl->iif, fl, flags);
785 void ip6_route_input(struct sk_buff *skb)
787 struct ipv6hdr *iph = ipv6_hdr(skb);
788 struct net *net = dev_net(skb->dev);
789 int flags = RT6_LOOKUP_F_HAS_SADDR;
790 struct flowi fl = {
791 .iif = skb->dev->ifindex,
792 .fl6_dst = iph->daddr,
793 .fl6_src = iph->saddr,
794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
795 .mark = skb->mark,
796 .proto = iph->nexthdr,
799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
800 flags |= RT6_LOOKUP_F_IFACE;
802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
805 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
806 struct flowi *fl, int flags)
808 return ip6_pol_route(net, table, fl->oif, fl, flags);
811 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
812 struct flowi *fl)
814 int flags = 0;
816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
817 flags |= RT6_LOOKUP_F_IFACE;
819 if (!ipv6_addr_any(&fl->fl6_src))
820 flags |= RT6_LOOKUP_F_HAS_SADDR;
821 else if (sk)
822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
827 EXPORT_SYMBOL(ip6_route_output);
829 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
831 struct rt6_info *ort = (struct rt6_info *) *dstp;
832 struct rt6_info *rt = (struct rt6_info *)
833 dst_alloc(&ip6_dst_blackhole_ops);
834 struct dst_entry *new = NULL;
836 if (rt) {
837 new = &rt->dst;
839 atomic_set(&new->__refcnt, 1);
840 new->__use = 1;
841 new->input = dst_discard;
842 new->output = dst_discard;
844 dst_copy_metrics(new, &ort->dst);
845 new->dev = ort->dst.dev;
846 if (new->dev)
847 dev_hold(new->dev);
848 rt->rt6i_idev = ort->rt6i_idev;
849 if (rt->rt6i_idev)
850 in6_dev_hold(rt->rt6i_idev);
851 rt->rt6i_expires = 0;
853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
855 rt->rt6i_metric = 0;
857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
858 #ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
860 #endif
862 dst_free(new);
865 dst_release(*dstp);
866 *dstp = new;
867 return new ? 0 : -ENOMEM;
869 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
872 * Destination cache support functions
875 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
877 struct rt6_info *rt;
879 rt = (struct rt6_info *) dst;
881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
882 return dst;
884 return NULL;
887 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
889 struct rt6_info *rt = (struct rt6_info *) dst;
891 if (rt) {
892 if (rt->rt6i_flags & RTF_CACHE) {
893 if (rt6_check_expired(rt)) {
894 ip6_del_rt(rt);
895 dst = NULL;
897 } else {
898 dst_release(dst);
899 dst = NULL;
902 return dst;
905 static void ip6_link_failure(struct sk_buff *skb)
907 struct rt6_info *rt;
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
911 rt = (struct rt6_info *) skb_dst(skb);
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 u32 features = dst_metric(dst, RTAX_FEATURES);
929 mtu = IPV6_MIN_MTU;
930 features |= RTAX_FEATURE_ALLFRAG;
931 dst_metric_set(dst, RTAX_FEATURES, features);
933 dst_metric_set(dst, RTAX_MTU, mtu);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
938 static int ipv6_get_mtu(struct net_device *dev);
940 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
942 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
945 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950 * IPV6_MAXPLEN is also valid and means: "any MSS,
951 * rely only on pmtu discovery"
953 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
954 mtu = IPV6_MAXPLEN;
955 return mtu;
958 static struct dst_entry *icmp6_dst_gc_list;
959 static DEFINE_SPINLOCK(icmp6_dst_lock);
961 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
962 struct neighbour *neigh,
963 const struct in6_addr *addr)
965 struct rt6_info *rt;
966 struct inet6_dev *idev = in6_dev_get(dev);
967 struct net *net = dev_net(dev);
969 if (unlikely(idev == NULL))
970 return NULL;
972 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
973 if (unlikely(rt == NULL)) {
974 in6_dev_put(idev);
975 goto out;
978 dev_hold(dev);
979 if (neigh)
980 neigh_hold(neigh);
981 else {
982 neigh = ndisc_get_neigh(dev, addr);
983 if (IS_ERR(neigh))
984 neigh = NULL;
987 rt->rt6i_dev = dev;
988 rt->rt6i_idev = idev;
989 rt->rt6i_nexthop = neigh;
990 atomic_set(&rt->dst.__refcnt, 1);
991 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
992 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
993 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
994 rt->dst.output = ip6_output;
996 #if 0 /* there's no chance to use these for ndisc */
997 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
998 ? DST_HOST
999 : 0;
1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1001 rt->rt6i_dst.plen = 128;
1002 #endif
1004 spin_lock_bh(&icmp6_dst_lock);
1005 rt->dst.next = icmp6_dst_gc_list;
1006 icmp6_dst_gc_list = &rt->dst;
1007 spin_unlock_bh(&icmp6_dst_lock);
1009 fib6_force_start_gc(net);
1011 out:
1012 return &rt->dst;
1015 int icmp6_dst_gc(void)
1017 struct dst_entry *dst, *next, **pprev;
1018 int more = 0;
1020 next = NULL;
1022 spin_lock_bh(&icmp6_dst_lock);
1023 pprev = &icmp6_dst_gc_list;
1025 while ((dst = *pprev) != NULL) {
1026 if (!atomic_read(&dst->__refcnt)) {
1027 *pprev = dst->next;
1028 dst_free(dst);
1029 } else {
1030 pprev = &dst->next;
1031 ++more;
1035 spin_unlock_bh(&icmp6_dst_lock);
1037 return more;
1040 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1041 void *arg)
1043 struct dst_entry *dst, **pprev;
1045 spin_lock_bh(&icmp6_dst_lock);
1046 pprev = &icmp6_dst_gc_list;
1047 while ((dst = *pprev) != NULL) {
1048 struct rt6_info *rt = (struct rt6_info *) dst;
1049 if (func(rt, arg)) {
1050 *pprev = dst->next;
1051 dst_free(dst);
1052 } else {
1053 pprev = &dst->next;
1056 spin_unlock_bh(&icmp6_dst_lock);
1059 static int ip6_dst_gc(struct dst_ops *ops)
1061 unsigned long now = jiffies;
1062 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1063 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1064 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1065 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1066 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1067 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1068 int entries;
1070 entries = dst_entries_get_fast(ops);
1071 if (time_after(rt_last_gc + rt_min_interval, now) &&
1072 entries <= rt_max_size)
1073 goto out;
1075 net->ipv6.ip6_rt_gc_expire++;
1076 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1077 net->ipv6.ip6_rt_last_gc = now;
1078 entries = dst_entries_get_slow(ops);
1079 if (entries < ops->gc_thresh)
1080 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1081 out:
1082 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1083 return entries > rt_max_size;
1086 /* Clean host part of a prefix. Not necessary in radix tree,
1087 but results in cleaner routing tables.
1089 Remove it only when all the things will work!
1092 static int ipv6_get_mtu(struct net_device *dev)
1094 int mtu = IPV6_MIN_MTU;
1095 struct inet6_dev *idev;
1097 rcu_read_lock();
1098 idev = __in6_dev_get(dev);
1099 if (idev)
1100 mtu = idev->cnf.mtu6;
1101 rcu_read_unlock();
1102 return mtu;
1105 int ip6_dst_hoplimit(struct dst_entry *dst)
1107 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1108 if (hoplimit < 0) {
1109 struct net_device *dev = dst->dev;
1110 struct inet6_dev *idev;
1112 rcu_read_lock();
1113 idev = __in6_dev_get(dev);
1114 if (idev)
1115 hoplimit = idev->cnf.hop_limit;
1116 else
1117 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1118 rcu_read_unlock();
1120 return hoplimit;
1122 EXPORT_SYMBOL(ip6_dst_hoplimit);
1128 int ip6_route_add(struct fib6_config *cfg)
1130 int err;
1131 struct net *net = cfg->fc_nlinfo.nl_net;
1132 struct rt6_info *rt = NULL;
1133 struct net_device *dev = NULL;
1134 struct inet6_dev *idev = NULL;
1135 struct fib6_table *table;
1136 int addr_type;
1138 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1139 return -EINVAL;
1140 #ifndef CONFIG_IPV6_SUBTREES
1141 if (cfg->fc_src_len)
1142 return -EINVAL;
1143 #endif
1144 if (cfg->fc_ifindex) {
1145 err = -ENODEV;
1146 dev = dev_get_by_index(net, cfg->fc_ifindex);
1147 if (!dev)
1148 goto out;
1149 idev = in6_dev_get(dev);
1150 if (!idev)
1151 goto out;
1154 if (cfg->fc_metric == 0)
1155 cfg->fc_metric = IP6_RT_PRIO_USER;
1157 table = fib6_new_table(net, cfg->fc_table);
1158 if (table == NULL) {
1159 err = -ENOBUFS;
1160 goto out;
1163 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1165 if (rt == NULL) {
1166 err = -ENOMEM;
1167 goto out;
1170 rt->dst.obsolete = -1;
1171 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1172 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1175 if (cfg->fc_protocol == RTPROT_UNSPEC)
1176 cfg->fc_protocol = RTPROT_BOOT;
1177 rt->rt6i_protocol = cfg->fc_protocol;
1179 addr_type = ipv6_addr_type(&cfg->fc_dst);
1181 if (addr_type & IPV6_ADDR_MULTICAST)
1182 rt->dst.input = ip6_mc_input;
1183 else if (cfg->fc_flags & RTF_LOCAL)
1184 rt->dst.input = ip6_input;
1185 else
1186 rt->dst.input = ip6_forward;
1188 rt->dst.output = ip6_output;
1190 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1191 rt->rt6i_dst.plen = cfg->fc_dst_len;
1192 if (rt->rt6i_dst.plen == 128)
1193 rt->dst.flags = DST_HOST;
1195 #ifdef CONFIG_IPV6_SUBTREES
1196 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1197 rt->rt6i_src.plen = cfg->fc_src_len;
1198 #endif
1200 rt->rt6i_metric = cfg->fc_metric;
1202 /* We cannot add true routes via loopback here,
1203 they would result in kernel looping; promote them to reject routes
1205 if ((cfg->fc_flags & RTF_REJECT) ||
1206 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1207 && !(cfg->fc_flags&RTF_LOCAL))) {
1208 /* hold loopback dev/idev if we haven't done so. */
1209 if (dev != net->loopback_dev) {
1210 if (dev) {
1211 dev_put(dev);
1212 in6_dev_put(idev);
1214 dev = net->loopback_dev;
1215 dev_hold(dev);
1216 idev = in6_dev_get(dev);
1217 if (!idev) {
1218 err = -ENODEV;
1219 goto out;
1222 rt->dst.output = ip6_pkt_discard_out;
1223 rt->dst.input = ip6_pkt_discard;
1224 rt->dst.error = -ENETUNREACH;
1225 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1226 goto install_route;
1229 if (cfg->fc_flags & RTF_GATEWAY) {
1230 struct in6_addr *gw_addr;
1231 int gwa_type;
1233 gw_addr = &cfg->fc_gateway;
1234 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1235 gwa_type = ipv6_addr_type(gw_addr);
1237 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1238 struct rt6_info *grt;
1240 /* IPv6 strictly inhibits using not link-local
1241 addresses as nexthop address.
1242 Otherwise, router will not able to send redirects.
1243 It is very good, but in some (rare!) circumstances
1244 (SIT, PtP, NBMA NOARP links) it is handy to allow
1245 some exceptions. --ANK
1247 err = -EINVAL;
1248 if (!(gwa_type&IPV6_ADDR_UNICAST))
1249 goto out;
1251 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1253 err = -EHOSTUNREACH;
1254 if (grt == NULL)
1255 goto out;
1256 if (dev) {
1257 if (dev != grt->rt6i_dev) {
1258 dst_release(&grt->dst);
1259 goto out;
1261 } else {
1262 dev = grt->rt6i_dev;
1263 idev = grt->rt6i_idev;
1264 dev_hold(dev);
1265 in6_dev_hold(grt->rt6i_idev);
1267 if (!(grt->rt6i_flags&RTF_GATEWAY))
1268 err = 0;
1269 dst_release(&grt->dst);
1271 if (err)
1272 goto out;
1274 err = -EINVAL;
1275 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1276 goto out;
1279 err = -ENODEV;
1280 if (dev == NULL)
1281 goto out;
1283 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1284 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1285 if (IS_ERR(rt->rt6i_nexthop)) {
1286 err = PTR_ERR(rt->rt6i_nexthop);
1287 rt->rt6i_nexthop = NULL;
1288 goto out;
1292 rt->rt6i_flags = cfg->fc_flags;
1294 install_route:
1295 if (cfg->fc_mx) {
1296 struct nlattr *nla;
1297 int remaining;
1299 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1300 int type = nla_type(nla);
1302 if (type) {
1303 if (type > RTAX_MAX) {
1304 err = -EINVAL;
1305 goto out;
1308 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1313 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1314 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1315 if (!dst_mtu(&rt->dst))
1316 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev));
1317 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1318 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1319 rt->dst.dev = dev;
1320 rt->rt6i_idev = idev;
1321 rt->rt6i_table = table;
1323 cfg->fc_nlinfo.nl_net = dev_net(dev);
1325 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1327 out:
1328 if (dev)
1329 dev_put(dev);
1330 if (idev)
1331 in6_dev_put(idev);
1332 if (rt)
1333 dst_free(&rt->dst);
1334 return err;
1337 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1339 int err;
1340 struct fib6_table *table;
1341 struct net *net = dev_net(rt->rt6i_dev);
1343 if (rt == net->ipv6.ip6_null_entry)
1344 return -ENOENT;
1346 table = rt->rt6i_table;
1347 write_lock_bh(&table->tb6_lock);
1349 err = fib6_del(rt, info);
1350 dst_release(&rt->dst);
1352 write_unlock_bh(&table->tb6_lock);
1354 return err;
1357 int ip6_del_rt(struct rt6_info *rt)
1359 struct nl_info info = {
1360 .nl_net = dev_net(rt->rt6i_dev),
1362 return __ip6_del_rt(rt, &info);
1365 static int ip6_route_del(struct fib6_config *cfg)
1367 struct fib6_table *table;
1368 struct fib6_node *fn;
1369 struct rt6_info *rt;
1370 int err = -ESRCH;
1372 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1373 if (table == NULL)
1374 return err;
1376 read_lock_bh(&table->tb6_lock);
1378 fn = fib6_locate(&table->tb6_root,
1379 &cfg->fc_dst, cfg->fc_dst_len,
1380 &cfg->fc_src, cfg->fc_src_len);
1382 if (fn) {
1383 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1384 if (cfg->fc_ifindex &&
1385 (rt->rt6i_dev == NULL ||
1386 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1387 continue;
1388 if (cfg->fc_flags & RTF_GATEWAY &&
1389 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1390 continue;
1391 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1392 continue;
1393 dst_hold(&rt->dst);
1394 read_unlock_bh(&table->tb6_lock);
1396 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1399 read_unlock_bh(&table->tb6_lock);
1401 return err;
1405 * Handle redirects
1407 struct ip6rd_flowi {
1408 struct flowi fl;
1409 struct in6_addr gateway;
1412 static struct rt6_info *__ip6_route_redirect(struct net *net,
1413 struct fib6_table *table,
1414 struct flowi *fl,
1415 int flags)
1417 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1418 struct rt6_info *rt;
1419 struct fib6_node *fn;
1422 * Get the "current" route for this destination and
1423 * check if the redirect has come from approriate router.
1425 * RFC 2461 specifies that redirects should only be
1426 * accepted if they come from the nexthop to the target.
1427 * Due to the way the routes are chosen, this notion
1428 * is a bit fuzzy and one might need to check all possible
1429 * routes.
1432 read_lock_bh(&table->tb6_lock);
1433 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1434 restart:
1435 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1437 * Current route is on-link; redirect is always invalid.
1439 * Seems, previous statement is not true. It could
1440 * be node, which looks for us as on-link (f.e. proxy ndisc)
1441 * But then router serving it might decide, that we should
1442 * know truth 8)8) --ANK (980726).
1444 if (rt6_check_expired(rt))
1445 continue;
1446 if (!(rt->rt6i_flags & RTF_GATEWAY))
1447 continue;
1448 if (fl->oif != rt->rt6i_dev->ifindex)
1449 continue;
1450 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1451 continue;
1452 break;
1455 if (!rt)
1456 rt = net->ipv6.ip6_null_entry;
1457 BACKTRACK(net, &fl->fl6_src);
1458 out:
1459 dst_hold(&rt->dst);
1461 read_unlock_bh(&table->tb6_lock);
1463 return rt;
1466 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1467 struct in6_addr *src,
1468 struct in6_addr *gateway,
1469 struct net_device *dev)
1471 int flags = RT6_LOOKUP_F_HAS_SADDR;
1472 struct net *net = dev_net(dev);
1473 struct ip6rd_flowi rdfl = {
1474 .fl = {
1475 .oif = dev->ifindex,
1476 .fl6_dst = *dest,
1477 .fl6_src = *src,
1481 ipv6_addr_copy(&rdfl.gateway, gateway);
1483 if (rt6_need_strict(dest))
1484 flags |= RT6_LOOKUP_F_IFACE;
1486 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1487 flags, __ip6_route_redirect);
1490 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1491 struct in6_addr *saddr,
1492 struct neighbour *neigh, u8 *lladdr, int on_link)
1494 struct rt6_info *rt, *nrt = NULL;
1495 struct netevent_redirect netevent;
1496 struct net *net = dev_net(neigh->dev);
1498 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1500 if (rt == net->ipv6.ip6_null_entry) {
1501 if (net_ratelimit())
1502 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1503 "for redirect target\n");
1504 goto out;
1508 * We have finally decided to accept it.
1511 neigh_update(neigh, lladdr, NUD_STALE,
1512 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1513 NEIGH_UPDATE_F_OVERRIDE|
1514 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1515 NEIGH_UPDATE_F_ISROUTER))
1519 * Redirect received -> path was valid.
1520 * Look, redirects are sent only in response to data packets,
1521 * so that this nexthop apparently is reachable. --ANK
1523 dst_confirm(&rt->dst);
1525 /* Duplicate redirect: silently ignore. */
1526 if (neigh == rt->dst.neighbour)
1527 goto out;
1529 nrt = ip6_rt_copy(rt);
1530 if (nrt == NULL)
1531 goto out;
1533 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1534 if (on_link)
1535 nrt->rt6i_flags &= ~RTF_GATEWAY;
1537 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1538 nrt->rt6i_dst.plen = 128;
1539 nrt->dst.flags |= DST_HOST;
1541 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1542 nrt->rt6i_nexthop = neigh_clone(neigh);
1543 /* Reset pmtu, it may be better */
1544 dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev));
1545 dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev),
1546 dst_mtu(&nrt->dst)));
1548 if (ip6_ins_rt(nrt))
1549 goto out;
1551 netevent.old = &rt->dst;
1552 netevent.new = &nrt->dst;
1553 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1555 if (rt->rt6i_flags&RTF_CACHE) {
1556 ip6_del_rt(rt);
1557 return;
1560 out:
1561 dst_release(&rt->dst);
1565 * Handle ICMP "packet too big" messages
1566 * i.e. Path MTU discovery
1569 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1570 struct net *net, u32 pmtu, int ifindex)
1572 struct rt6_info *rt, *nrt;
1573 int allfrag = 0;
1575 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1576 if (rt == NULL)
1577 return;
1579 if (pmtu >= dst_mtu(&rt->dst))
1580 goto out;
1582 if (pmtu < IPV6_MIN_MTU) {
1584 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1585 * MTU (1280) and a fragment header should always be included
1586 * after a node receiving Too Big message reporting PMTU is
1587 * less than the IPv6 Minimum Link MTU.
1589 pmtu = IPV6_MIN_MTU;
1590 allfrag = 1;
1593 /* New mtu received -> path was valid.
1594 They are sent only in response to data packets,
1595 so that this nexthop apparently is reachable. --ANK
1597 dst_confirm(&rt->dst);
1599 /* Host route. If it is static, it would be better
1600 not to override it, but add new one, so that
1601 when cache entry will expire old pmtu
1602 would return automatically.
1604 if (rt->rt6i_flags & RTF_CACHE) {
1605 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1606 if (allfrag) {
1607 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1608 features |= RTAX_FEATURE_ALLFRAG;
1609 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1611 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1612 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1613 goto out;
1616 /* Network route.
1617 Two cases are possible:
1618 1. It is connected route. Action: COW
1619 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1621 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1622 nrt = rt6_alloc_cow(rt, daddr, saddr);
1623 else
1624 nrt = rt6_alloc_clone(rt, daddr);
1626 if (nrt) {
1627 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1628 if (allfrag) {
1629 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1630 features |= RTAX_FEATURE_ALLFRAG;
1631 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1634 /* According to RFC 1981, detecting PMTU increase shouldn't be
1635 * happened within 5 mins, the recommended timer is 10 mins.
1636 * Here this route expiration time is set to ip6_rt_mtu_expires
1637 * which is 10 mins. After 10 mins the decreased pmtu is expired
1638 * and detecting PMTU increase will be automatically happened.
1640 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1641 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1643 ip6_ins_rt(nrt);
1645 out:
1646 dst_release(&rt->dst);
1649 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1650 struct net_device *dev, u32 pmtu)
1652 struct net *net = dev_net(dev);
1655 * RFC 1981 states that a node "MUST reduce the size of the packets it
1656 * is sending along the path" that caused the Packet Too Big message.
1657 * Since it's not possible in the general case to determine which
1658 * interface was used to send the original packet, we update the MTU
1659 * on the interface that will be used to send future packets. We also
1660 * update the MTU on the interface that received the Packet Too Big in
1661 * case the original packet was forced out that interface with
1662 * SO_BINDTODEVICE or similar. This is the next best thing to the
1663 * correct behaviour, which would be to update the MTU on all
1664 * interfaces.
1666 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1667 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1671 * Misc support functions
1674 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1676 struct net *net = dev_net(ort->rt6i_dev);
1677 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1679 if (rt) {
1680 rt->dst.input = ort->dst.input;
1681 rt->dst.output = ort->dst.output;
1683 dst_copy_metrics(&rt->dst, &ort->dst);
1684 rt->dst.error = ort->dst.error;
1685 rt->dst.dev = ort->dst.dev;
1686 if (rt->dst.dev)
1687 dev_hold(rt->dst.dev);
1688 rt->rt6i_idev = ort->rt6i_idev;
1689 if (rt->rt6i_idev)
1690 in6_dev_hold(rt->rt6i_idev);
1691 rt->dst.lastuse = jiffies;
1692 rt->rt6i_expires = 0;
1694 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1695 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1696 rt->rt6i_metric = 0;
1698 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1699 #ifdef CONFIG_IPV6_SUBTREES
1700 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1701 #endif
1702 rt->rt6i_table = ort->rt6i_table;
1704 return rt;
1707 #ifdef CONFIG_IPV6_ROUTE_INFO
1708 static struct rt6_info *rt6_get_route_info(struct net *net,
1709 struct in6_addr *prefix, int prefixlen,
1710 struct in6_addr *gwaddr, int ifindex)
1712 struct fib6_node *fn;
1713 struct rt6_info *rt = NULL;
1714 struct fib6_table *table;
1716 table = fib6_get_table(net, RT6_TABLE_INFO);
1717 if (table == NULL)
1718 return NULL;
1720 write_lock_bh(&table->tb6_lock);
1721 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1722 if (!fn)
1723 goto out;
1725 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1726 if (rt->rt6i_dev->ifindex != ifindex)
1727 continue;
1728 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1729 continue;
1730 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1731 continue;
1732 dst_hold(&rt->dst);
1733 break;
1735 out:
1736 write_unlock_bh(&table->tb6_lock);
1737 return rt;
1740 static struct rt6_info *rt6_add_route_info(struct net *net,
1741 struct in6_addr *prefix, int prefixlen,
1742 struct in6_addr *gwaddr, int ifindex,
1743 unsigned pref)
1745 struct fib6_config cfg = {
1746 .fc_table = RT6_TABLE_INFO,
1747 .fc_metric = IP6_RT_PRIO_USER,
1748 .fc_ifindex = ifindex,
1749 .fc_dst_len = prefixlen,
1750 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1751 RTF_UP | RTF_PREF(pref),
1752 .fc_nlinfo.pid = 0,
1753 .fc_nlinfo.nlh = NULL,
1754 .fc_nlinfo.nl_net = net,
1757 ipv6_addr_copy(&cfg.fc_dst, prefix);
1758 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1760 /* We should treat it as a default route if prefix length is 0. */
1761 if (!prefixlen)
1762 cfg.fc_flags |= RTF_DEFAULT;
1764 ip6_route_add(&cfg);
1766 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1768 #endif
1770 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1772 struct rt6_info *rt;
1773 struct fib6_table *table;
1775 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1776 if (table == NULL)
1777 return NULL;
1779 write_lock_bh(&table->tb6_lock);
1780 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1781 if (dev == rt->rt6i_dev &&
1782 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1783 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1784 break;
1786 if (rt)
1787 dst_hold(&rt->dst);
1788 write_unlock_bh(&table->tb6_lock);
1789 return rt;
1792 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1793 struct net_device *dev,
1794 unsigned int pref)
1796 struct fib6_config cfg = {
1797 .fc_table = RT6_TABLE_DFLT,
1798 .fc_metric = IP6_RT_PRIO_USER,
1799 .fc_ifindex = dev->ifindex,
1800 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1801 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1802 .fc_nlinfo.pid = 0,
1803 .fc_nlinfo.nlh = NULL,
1804 .fc_nlinfo.nl_net = dev_net(dev),
1807 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1809 ip6_route_add(&cfg);
1811 return rt6_get_dflt_router(gwaddr, dev);
1814 void rt6_purge_dflt_routers(struct net *net)
1816 struct rt6_info *rt;
1817 struct fib6_table *table;
1819 /* NOTE: Keep consistent with rt6_get_dflt_router */
1820 table = fib6_get_table(net, RT6_TABLE_DFLT);
1821 if (table == NULL)
1822 return;
1824 restart:
1825 read_lock_bh(&table->tb6_lock);
1826 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1827 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1828 dst_hold(&rt->dst);
1829 read_unlock_bh(&table->tb6_lock);
1830 ip6_del_rt(rt);
1831 goto restart;
1834 read_unlock_bh(&table->tb6_lock);
1837 static void rtmsg_to_fib6_config(struct net *net,
1838 struct in6_rtmsg *rtmsg,
1839 struct fib6_config *cfg)
1841 memset(cfg, 0, sizeof(*cfg));
1843 cfg->fc_table = RT6_TABLE_MAIN;
1844 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1845 cfg->fc_metric = rtmsg->rtmsg_metric;
1846 cfg->fc_expires = rtmsg->rtmsg_info;
1847 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1848 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1849 cfg->fc_flags = rtmsg->rtmsg_flags;
1851 cfg->fc_nlinfo.nl_net = net;
1853 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1854 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1855 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1858 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1860 struct fib6_config cfg;
1861 struct in6_rtmsg rtmsg;
1862 int err;
1864 switch(cmd) {
1865 case SIOCADDRT: /* Add a route */
1866 case SIOCDELRT: /* Delete a route */
1867 if (!capable(CAP_NET_ADMIN))
1868 return -EPERM;
1869 err = copy_from_user(&rtmsg, arg,
1870 sizeof(struct in6_rtmsg));
1871 if (err)
1872 return -EFAULT;
1874 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1876 rtnl_lock();
1877 switch (cmd) {
1878 case SIOCADDRT:
1879 err = ip6_route_add(&cfg);
1880 break;
1881 case SIOCDELRT:
1882 err = ip6_route_del(&cfg);
1883 break;
1884 default:
1885 err = -EINVAL;
1887 rtnl_unlock();
1889 return err;
1892 return -EINVAL;
1896 * Drop the packet on the floor
1899 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1901 int type;
1902 struct dst_entry *dst = skb_dst(skb);
1903 switch (ipstats_mib_noroutes) {
1904 case IPSTATS_MIB_INNOROUTES:
1905 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1906 if (type == IPV6_ADDR_ANY) {
1907 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1908 IPSTATS_MIB_INADDRERRORS);
1909 break;
1911 /* FALLTHROUGH */
1912 case IPSTATS_MIB_OUTNOROUTES:
1913 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1914 ipstats_mib_noroutes);
1915 break;
1917 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1918 kfree_skb(skb);
1919 return 0;
1922 static int ip6_pkt_discard(struct sk_buff *skb)
1924 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1927 static int ip6_pkt_discard_out(struct sk_buff *skb)
1929 skb->dev = skb_dst(skb)->dev;
1930 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1933 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1935 static int ip6_pkt_prohibit(struct sk_buff *skb)
1937 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1940 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1942 skb->dev = skb_dst(skb)->dev;
1943 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1946 #endif
1949 * Allocate a dst for local (unicast / anycast) address.
1952 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1953 const struct in6_addr *addr,
1954 int anycast)
1956 struct net *net = dev_net(idev->dev);
1957 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1958 struct neighbour *neigh;
1960 if (rt == NULL) {
1961 if (net_ratelimit())
1962 pr_warning("IPv6: Maximum number of routes reached,"
1963 " consider increasing route/max_size.\n");
1964 return ERR_PTR(-ENOMEM);
1967 dev_hold(net->loopback_dev);
1968 in6_dev_hold(idev);
1970 rt->dst.flags = DST_HOST;
1971 rt->dst.input = ip6_input;
1972 rt->dst.output = ip6_output;
1973 rt->rt6i_dev = net->loopback_dev;
1974 rt->rt6i_idev = idev;
1975 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev));
1976 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst)));
1977 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
1978 rt->dst.obsolete = -1;
1980 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1981 if (anycast)
1982 rt->rt6i_flags |= RTF_ANYCAST;
1983 else
1984 rt->rt6i_flags |= RTF_LOCAL;
1985 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1986 if (IS_ERR(neigh)) {
1987 dst_free(&rt->dst);
1989 /* We are casting this because that is the return
1990 * value type. But an errno encoded pointer is the
1991 * same regardless of the underlying pointer type,
1992 * and that's what we are returning. So this is OK.
1994 return (struct rt6_info *) neigh;
1996 rt->rt6i_nexthop = neigh;
1998 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1999 rt->rt6i_dst.plen = 128;
2000 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2002 atomic_set(&rt->dst.__refcnt, 1);
2004 return rt;
2007 struct arg_dev_net {
2008 struct net_device *dev;
2009 struct net *net;
2012 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2014 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2015 struct net *net = ((struct arg_dev_net *)arg)->net;
2017 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2018 rt != net->ipv6.ip6_null_entry) {
2019 RT6_TRACE("deleted by ifdown %p\n", rt);
2020 return -1;
2022 return 0;
2025 void rt6_ifdown(struct net *net, struct net_device *dev)
2027 struct arg_dev_net adn = {
2028 .dev = dev,
2029 .net = net,
2032 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2033 icmp6_clean_all(fib6_ifdown, &adn);
2036 struct rt6_mtu_change_arg
2038 struct net_device *dev;
2039 unsigned mtu;
2042 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2044 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2045 struct inet6_dev *idev;
2046 struct net *net = dev_net(arg->dev);
2048 /* In IPv6 pmtu discovery is not optional,
2049 so that RTAX_MTU lock cannot disable it.
2050 We still use this lock to block changes
2051 caused by addrconf/ndisc.
2054 idev = __in6_dev_get(arg->dev);
2055 if (idev == NULL)
2056 return 0;
2058 /* For administrative MTU increase, there is no way to discover
2059 IPv6 PMTU increase, so PMTU increase should be updated here.
2060 Since RFC 1981 doesn't include administrative MTU increase
2061 update PMTU increase is a MUST. (i.e. jumbo frame)
2064 If new MTU is less than route PMTU, this new MTU will be the
2065 lowest MTU in the path, update the route PMTU to reflect PMTU
2066 decreases; if new MTU is greater than route PMTU, and the
2067 old MTU is the lowest MTU in the path, update the route PMTU
2068 to reflect the increase. In this case if the other nodes' MTU
2069 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2070 PMTU discouvery.
2072 if (rt->rt6i_dev == arg->dev &&
2073 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2074 (dst_mtu(&rt->dst) >= arg->mtu ||
2075 (dst_mtu(&rt->dst) < arg->mtu &&
2076 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2077 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2078 dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu));
2080 return 0;
2083 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2085 struct rt6_mtu_change_arg arg = {
2086 .dev = dev,
2087 .mtu = mtu,
2090 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2093 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2094 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2095 [RTA_OIF] = { .type = NLA_U32 },
2096 [RTA_IIF] = { .type = NLA_U32 },
2097 [RTA_PRIORITY] = { .type = NLA_U32 },
2098 [RTA_METRICS] = { .type = NLA_NESTED },
2101 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2102 struct fib6_config *cfg)
2104 struct rtmsg *rtm;
2105 struct nlattr *tb[RTA_MAX+1];
2106 int err;
2108 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2109 if (err < 0)
2110 goto errout;
2112 err = -EINVAL;
2113 rtm = nlmsg_data(nlh);
2114 memset(cfg, 0, sizeof(*cfg));
2116 cfg->fc_table = rtm->rtm_table;
2117 cfg->fc_dst_len = rtm->rtm_dst_len;
2118 cfg->fc_src_len = rtm->rtm_src_len;
2119 cfg->fc_flags = RTF_UP;
2120 cfg->fc_protocol = rtm->rtm_protocol;
2122 if (rtm->rtm_type == RTN_UNREACHABLE)
2123 cfg->fc_flags |= RTF_REJECT;
2125 if (rtm->rtm_type == RTN_LOCAL)
2126 cfg->fc_flags |= RTF_LOCAL;
2128 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2129 cfg->fc_nlinfo.nlh = nlh;
2130 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2132 if (tb[RTA_GATEWAY]) {
2133 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2134 cfg->fc_flags |= RTF_GATEWAY;
2137 if (tb[RTA_DST]) {
2138 int plen = (rtm->rtm_dst_len + 7) >> 3;
2140 if (nla_len(tb[RTA_DST]) < plen)
2141 goto errout;
2143 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2146 if (tb[RTA_SRC]) {
2147 int plen = (rtm->rtm_src_len + 7) >> 3;
2149 if (nla_len(tb[RTA_SRC]) < plen)
2150 goto errout;
2152 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2155 if (tb[RTA_OIF])
2156 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2158 if (tb[RTA_PRIORITY])
2159 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2161 if (tb[RTA_METRICS]) {
2162 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2163 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2166 if (tb[RTA_TABLE])
2167 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2169 err = 0;
2170 errout:
2171 return err;
2174 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2176 struct fib6_config cfg;
2177 int err;
2179 err = rtm_to_fib6_config(skb, nlh, &cfg);
2180 if (err < 0)
2181 return err;
2183 return ip6_route_del(&cfg);
2186 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2188 struct fib6_config cfg;
2189 int err;
2191 err = rtm_to_fib6_config(skb, nlh, &cfg);
2192 if (err < 0)
2193 return err;
2195 return ip6_route_add(&cfg);
2198 static inline size_t rt6_nlmsg_size(void)
2200 return NLMSG_ALIGN(sizeof(struct rtmsg))
2201 + nla_total_size(16) /* RTA_SRC */
2202 + nla_total_size(16) /* RTA_DST */
2203 + nla_total_size(16) /* RTA_GATEWAY */
2204 + nla_total_size(16) /* RTA_PREFSRC */
2205 + nla_total_size(4) /* RTA_TABLE */
2206 + nla_total_size(4) /* RTA_IIF */
2207 + nla_total_size(4) /* RTA_OIF */
2208 + nla_total_size(4) /* RTA_PRIORITY */
2209 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2210 + nla_total_size(sizeof(struct rta_cacheinfo));
2213 static int rt6_fill_node(struct net *net,
2214 struct sk_buff *skb, struct rt6_info *rt,
2215 struct in6_addr *dst, struct in6_addr *src,
2216 int iif, int type, u32 pid, u32 seq,
2217 int prefix, int nowait, unsigned int flags)
2219 struct rtmsg *rtm;
2220 struct nlmsghdr *nlh;
2221 long expires;
2222 u32 table;
2224 if (prefix) { /* user wants prefix routes only */
2225 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2226 /* success since this is not a prefix route */
2227 return 1;
2231 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2232 if (nlh == NULL)
2233 return -EMSGSIZE;
2235 rtm = nlmsg_data(nlh);
2236 rtm->rtm_family = AF_INET6;
2237 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2238 rtm->rtm_src_len = rt->rt6i_src.plen;
2239 rtm->rtm_tos = 0;
2240 if (rt->rt6i_table)
2241 table = rt->rt6i_table->tb6_id;
2242 else
2243 table = RT6_TABLE_UNSPEC;
2244 rtm->rtm_table = table;
2245 NLA_PUT_U32(skb, RTA_TABLE, table);
2246 if (rt->rt6i_flags&RTF_REJECT)
2247 rtm->rtm_type = RTN_UNREACHABLE;
2248 else if (rt->rt6i_flags&RTF_LOCAL)
2249 rtm->rtm_type = RTN_LOCAL;
2250 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2251 rtm->rtm_type = RTN_LOCAL;
2252 else
2253 rtm->rtm_type = RTN_UNICAST;
2254 rtm->rtm_flags = 0;
2255 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2256 rtm->rtm_protocol = rt->rt6i_protocol;
2257 if (rt->rt6i_flags&RTF_DYNAMIC)
2258 rtm->rtm_protocol = RTPROT_REDIRECT;
2259 else if (rt->rt6i_flags & RTF_ADDRCONF)
2260 rtm->rtm_protocol = RTPROT_KERNEL;
2261 else if (rt->rt6i_flags&RTF_DEFAULT)
2262 rtm->rtm_protocol = RTPROT_RA;
2264 if (rt->rt6i_flags&RTF_CACHE)
2265 rtm->rtm_flags |= RTM_F_CLONED;
2267 if (dst) {
2268 NLA_PUT(skb, RTA_DST, 16, dst);
2269 rtm->rtm_dst_len = 128;
2270 } else if (rtm->rtm_dst_len)
2271 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2272 #ifdef CONFIG_IPV6_SUBTREES
2273 if (src) {
2274 NLA_PUT(skb, RTA_SRC, 16, src);
2275 rtm->rtm_src_len = 128;
2276 } else if (rtm->rtm_src_len)
2277 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2278 #endif
2279 if (iif) {
2280 #ifdef CONFIG_IPV6_MROUTE
2281 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2282 int err = ip6mr_get_route(net, skb, rtm, nowait);
2283 if (err <= 0) {
2284 if (!nowait) {
2285 if (err == 0)
2286 return 0;
2287 goto nla_put_failure;
2288 } else {
2289 if (err == -EMSGSIZE)
2290 goto nla_put_failure;
2293 } else
2294 #endif
2295 NLA_PUT_U32(skb, RTA_IIF, iif);
2296 } else if (dst) {
2297 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2298 struct in6_addr saddr_buf;
2299 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2300 dst, 0, &saddr_buf) == 0)
2301 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2304 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2305 goto nla_put_failure;
2307 if (rt->dst.neighbour)
2308 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2310 if (rt->dst.dev)
2311 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2313 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2315 if (!(rt->rt6i_flags & RTF_EXPIRES))
2316 expires = 0;
2317 else if (rt->rt6i_expires - jiffies < INT_MAX)
2318 expires = rt->rt6i_expires - jiffies;
2319 else
2320 expires = INT_MAX;
2322 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2323 expires, rt->dst.error) < 0)
2324 goto nla_put_failure;
2326 return nlmsg_end(skb, nlh);
2328 nla_put_failure:
2329 nlmsg_cancel(skb, nlh);
2330 return -EMSGSIZE;
2333 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2335 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2336 int prefix;
2338 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2339 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2340 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2341 } else
2342 prefix = 0;
2344 return rt6_fill_node(arg->net,
2345 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2346 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2347 prefix, 0, NLM_F_MULTI);
2350 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2352 struct net *net = sock_net(in_skb->sk);
2353 struct nlattr *tb[RTA_MAX+1];
2354 struct rt6_info *rt;
2355 struct sk_buff *skb;
2356 struct rtmsg *rtm;
2357 struct flowi fl;
2358 int err, iif = 0;
2360 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2361 if (err < 0)
2362 goto errout;
2364 err = -EINVAL;
2365 memset(&fl, 0, sizeof(fl));
2367 if (tb[RTA_SRC]) {
2368 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2369 goto errout;
2371 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2374 if (tb[RTA_DST]) {
2375 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2376 goto errout;
2378 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2381 if (tb[RTA_IIF])
2382 iif = nla_get_u32(tb[RTA_IIF]);
2384 if (tb[RTA_OIF])
2385 fl.oif = nla_get_u32(tb[RTA_OIF]);
2387 if (iif) {
2388 struct net_device *dev;
2389 dev = __dev_get_by_index(net, iif);
2390 if (!dev) {
2391 err = -ENODEV;
2392 goto errout;
2396 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2397 if (skb == NULL) {
2398 err = -ENOBUFS;
2399 goto errout;
2402 /* Reserve room for dummy headers, this skb can pass
2403 through good chunk of routing engine.
2405 skb_reset_mac_header(skb);
2406 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2408 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2409 skb_dst_set(skb, &rt->dst);
2411 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2412 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2413 nlh->nlmsg_seq, 0, 0, 0);
2414 if (err < 0) {
2415 kfree_skb(skb);
2416 goto errout;
2419 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2420 errout:
2421 return err;
2424 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2426 struct sk_buff *skb;
2427 struct net *net = info->nl_net;
2428 u32 seq;
2429 int err;
2431 err = -ENOBUFS;
2432 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2434 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2435 if (skb == NULL)
2436 goto errout;
2438 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2439 event, info->pid, seq, 0, 0, 0);
2440 if (err < 0) {
2441 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2442 WARN_ON(err == -EMSGSIZE);
2443 kfree_skb(skb);
2444 goto errout;
2446 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2447 info->nlh, gfp_any());
2448 return;
2449 errout:
2450 if (err < 0)
2451 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2454 static int ip6_route_dev_notify(struct notifier_block *this,
2455 unsigned long event, void *data)
2457 struct net_device *dev = (struct net_device *)data;
2458 struct net *net = dev_net(dev);
2460 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2461 net->ipv6.ip6_null_entry->dst.dev = dev;
2462 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2463 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2464 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2465 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2466 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2467 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2468 #endif
2471 return NOTIFY_OK;
2475 * /proc
2478 #ifdef CONFIG_PROC_FS
2480 struct rt6_proc_arg
2482 char *buffer;
2483 int offset;
2484 int length;
2485 int skip;
2486 int len;
2489 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2491 struct seq_file *m = p_arg;
2493 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2495 #ifdef CONFIG_IPV6_SUBTREES
2496 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2497 #else
2498 seq_puts(m, "00000000000000000000000000000000 00 ");
2499 #endif
2501 if (rt->rt6i_nexthop) {
2502 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2503 } else {
2504 seq_puts(m, "00000000000000000000000000000000");
2506 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2507 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2508 rt->dst.__use, rt->rt6i_flags,
2509 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2510 return 0;
2513 static int ipv6_route_show(struct seq_file *m, void *v)
2515 struct net *net = (struct net *)m->private;
2516 fib6_clean_all(net, rt6_info_route, 0, m);
2517 return 0;
2520 static int ipv6_route_open(struct inode *inode, struct file *file)
2522 return single_open_net(inode, file, ipv6_route_show);
2525 static const struct file_operations ipv6_route_proc_fops = {
2526 .owner = THIS_MODULE,
2527 .open = ipv6_route_open,
2528 .read = seq_read,
2529 .llseek = seq_lseek,
2530 .release = single_release_net,
2533 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2535 struct net *net = (struct net *)seq->private;
2536 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2537 net->ipv6.rt6_stats->fib_nodes,
2538 net->ipv6.rt6_stats->fib_route_nodes,
2539 net->ipv6.rt6_stats->fib_rt_alloc,
2540 net->ipv6.rt6_stats->fib_rt_entries,
2541 net->ipv6.rt6_stats->fib_rt_cache,
2542 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2543 net->ipv6.rt6_stats->fib_discarded_routes);
2545 return 0;
2548 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2550 return single_open_net(inode, file, rt6_stats_seq_show);
2553 static const struct file_operations rt6_stats_seq_fops = {
2554 .owner = THIS_MODULE,
2555 .open = rt6_stats_seq_open,
2556 .read = seq_read,
2557 .llseek = seq_lseek,
2558 .release = single_release_net,
2560 #endif /* CONFIG_PROC_FS */
2562 #ifdef CONFIG_SYSCTL
2564 static
2565 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2566 void __user *buffer, size_t *lenp, loff_t *ppos)
2568 struct net *net = current->nsproxy->net_ns;
2569 int delay = net->ipv6.sysctl.flush_delay;
2570 if (write) {
2571 proc_dointvec(ctl, write, buffer, lenp, ppos);
2572 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2573 return 0;
2574 } else
2575 return -EINVAL;
2578 ctl_table ipv6_route_table_template[] = {
2580 .procname = "flush",
2581 .data = &init_net.ipv6.sysctl.flush_delay,
2582 .maxlen = sizeof(int),
2583 .mode = 0200,
2584 .proc_handler = ipv6_sysctl_rtcache_flush
2587 .procname = "gc_thresh",
2588 .data = &ip6_dst_ops_template.gc_thresh,
2589 .maxlen = sizeof(int),
2590 .mode = 0644,
2591 .proc_handler = proc_dointvec,
2594 .procname = "max_size",
2595 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2596 .maxlen = sizeof(int),
2597 .mode = 0644,
2598 .proc_handler = proc_dointvec,
2601 .procname = "gc_min_interval",
2602 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2603 .maxlen = sizeof(int),
2604 .mode = 0644,
2605 .proc_handler = proc_dointvec_jiffies,
2608 .procname = "gc_timeout",
2609 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2610 .maxlen = sizeof(int),
2611 .mode = 0644,
2612 .proc_handler = proc_dointvec_jiffies,
2615 .procname = "gc_interval",
2616 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2617 .maxlen = sizeof(int),
2618 .mode = 0644,
2619 .proc_handler = proc_dointvec_jiffies,
2622 .procname = "gc_elasticity",
2623 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2624 .maxlen = sizeof(int),
2625 .mode = 0644,
2626 .proc_handler = proc_dointvec,
2629 .procname = "mtu_expires",
2630 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2631 .maxlen = sizeof(int),
2632 .mode = 0644,
2633 .proc_handler = proc_dointvec_jiffies,
2636 .procname = "min_adv_mss",
2637 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2638 .maxlen = sizeof(int),
2639 .mode = 0644,
2640 .proc_handler = proc_dointvec,
2643 .procname = "gc_min_interval_ms",
2644 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2645 .maxlen = sizeof(int),
2646 .mode = 0644,
2647 .proc_handler = proc_dointvec_ms_jiffies,
2652 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2654 struct ctl_table *table;
2656 table = kmemdup(ipv6_route_table_template,
2657 sizeof(ipv6_route_table_template),
2658 GFP_KERNEL);
2660 if (table) {
2661 table[0].data = &net->ipv6.sysctl.flush_delay;
2662 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2663 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2664 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2665 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2666 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2667 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2668 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2669 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2670 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2673 return table;
2675 #endif
2677 static int __net_init ip6_route_net_init(struct net *net)
2679 int ret = -ENOMEM;
2681 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2682 sizeof(net->ipv6.ip6_dst_ops));
2684 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2685 goto out_ip6_dst_ops;
2687 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2688 sizeof(*net->ipv6.ip6_null_entry),
2689 GFP_KERNEL);
2690 if (!net->ipv6.ip6_null_entry)
2691 goto out_ip6_dst_entries;
2692 net->ipv6.ip6_null_entry->dst.path =
2693 (struct dst_entry *)net->ipv6.ip6_null_entry;
2694 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2695 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
2697 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2698 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2699 sizeof(*net->ipv6.ip6_prohibit_entry),
2700 GFP_KERNEL);
2701 if (!net->ipv6.ip6_prohibit_entry)
2702 goto out_ip6_null_entry;
2703 net->ipv6.ip6_prohibit_entry->dst.path =
2704 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2705 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2706 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
2708 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2709 sizeof(*net->ipv6.ip6_blk_hole_entry),
2710 GFP_KERNEL);
2711 if (!net->ipv6.ip6_blk_hole_entry)
2712 goto out_ip6_prohibit_entry;
2713 net->ipv6.ip6_blk_hole_entry->dst.path =
2714 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2715 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2716 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
2717 #endif
2719 net->ipv6.sysctl.flush_delay = 0;
2720 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2721 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2722 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2723 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2724 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2725 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2726 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2728 #ifdef CONFIG_PROC_FS
2729 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2730 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2731 #endif
2732 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2734 ret = 0;
2735 out:
2736 return ret;
2738 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2739 out_ip6_prohibit_entry:
2740 kfree(net->ipv6.ip6_prohibit_entry);
2741 out_ip6_null_entry:
2742 kfree(net->ipv6.ip6_null_entry);
2743 #endif
2744 out_ip6_dst_entries:
2745 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2746 out_ip6_dst_ops:
2747 goto out;
2750 static void __net_exit ip6_route_net_exit(struct net *net)
2752 #ifdef CONFIG_PROC_FS
2753 proc_net_remove(net, "ipv6_route");
2754 proc_net_remove(net, "rt6_stats");
2755 #endif
2756 kfree(net->ipv6.ip6_null_entry);
2757 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2758 kfree(net->ipv6.ip6_prohibit_entry);
2759 kfree(net->ipv6.ip6_blk_hole_entry);
2760 #endif
2761 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2764 static struct pernet_operations ip6_route_net_ops = {
2765 .init = ip6_route_net_init,
2766 .exit = ip6_route_net_exit,
2769 static struct notifier_block ip6_route_dev_notifier = {
2770 .notifier_call = ip6_route_dev_notify,
2771 .priority = 0,
2774 int __init ip6_route_init(void)
2776 int ret;
2778 ret = -ENOMEM;
2779 ip6_dst_ops_template.kmem_cachep =
2780 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2781 SLAB_HWCACHE_ALIGN, NULL);
2782 if (!ip6_dst_ops_template.kmem_cachep)
2783 goto out;
2785 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2786 if (ret)
2787 goto out_kmem_cache;
2789 ret = register_pernet_subsys(&ip6_route_net_ops);
2790 if (ret)
2791 goto out_dst_entries;
2793 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2795 /* Registering of the loopback is done before this portion of code,
2796 * the loopback reference in rt6_info will not be taken, do it
2797 * manually for init_net */
2798 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2799 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2800 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2801 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2802 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2803 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2804 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2805 #endif
2806 ret = fib6_init();
2807 if (ret)
2808 goto out_register_subsys;
2810 ret = xfrm6_init();
2811 if (ret)
2812 goto out_fib6_init;
2814 ret = fib6_rules_init();
2815 if (ret)
2816 goto xfrm6_init;
2818 ret = -ENOBUFS;
2819 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2820 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2821 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2822 goto fib6_rules_init;
2824 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2825 if (ret)
2826 goto fib6_rules_init;
2828 out:
2829 return ret;
2831 fib6_rules_init:
2832 fib6_rules_cleanup();
2833 xfrm6_init:
2834 xfrm6_fini();
2835 out_fib6_init:
2836 fib6_gc_cleanup();
2837 out_register_subsys:
2838 unregister_pernet_subsys(&ip6_route_net_ops);
2839 out_dst_entries:
2840 dst_entries_destroy(&ip6_dst_blackhole_ops);
2841 out_kmem_cache:
2842 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2843 goto out;
2846 void ip6_route_cleanup(void)
2848 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2849 fib6_rules_cleanup();
2850 xfrm6_fini();
2851 fib6_gc_cleanup();
2852 unregister_pernet_subsys(&ip6_route_net_ops);
2853 dst_entries_destroy(&ip6_dst_blackhole_ops);
2854 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);