ALSA: hda - Workaround for buggy DMA position on ATI controllers
[linux-2.6/mini2440.git] / net / ipv6 / route.c
blobc4a59824ac2ce45129508517ef0627e0f0fb96b1
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <net/net_namespace.h>
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 #include <net/dst.h>
53 #include <net/xfrm.h>
54 #include <net/netevent.h>
55 #include <net/netlink.h>
57 #include <asm/uaccess.h>
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
74 #define CLONE_OFFLINK_ROUTE 0
76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79 static void ip6_dst_destroy(struct dst_entry *);
80 static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
82 static int ip6_dst_gc(struct dst_ops *ops);
84 static int ip6_pkt_discard(struct sk_buff *skb);
85 static int ip6_pkt_discard_out(struct sk_buff *skb);
86 static void ip6_link_failure(struct sk_buff *skb);
87 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 #ifdef CONFIG_IPV6_ROUTE_INFO
90 static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
94 static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
96 struct in6_addr *gwaddr, int ifindex);
97 #endif
99 static struct dst_ops ip6_dst_ops_template = {
100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
110 .local_out = __ip6_local_out,
111 .entries = ATOMIC_INIT(0),
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entries = ATOMIC_INIT(0),
127 static struct rt6_info ip6_null_entry_template = {
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1),
144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
146 static int ip6_pkt_prohibit(struct sk_buff *skb);
147 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
149 static struct rt6_info ip6_prohibit_entry_template = {
150 .u = {
151 .dst = {
152 .__refcnt = ATOMIC_INIT(1),
153 .__use = 1,
154 .obsolete = -1,
155 .error = -EACCES,
156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
157 .input = ip6_pkt_prohibit,
158 .output = ip6_pkt_prohibit_out,
161 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
162 .rt6i_metric = ~(u32) 0,
163 .rt6i_ref = ATOMIC_INIT(1),
166 static struct rt6_info ip6_blk_hole_entry_template = {
167 .u = {
168 .dst = {
169 .__refcnt = ATOMIC_INIT(1),
170 .__use = 1,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = dst_discard,
175 .output = dst_discard,
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
179 .rt6i_metric = ~(u32) 0,
180 .rt6i_ref = ATOMIC_INIT(1),
183 #endif
185 /* allocate dst with ip6_dst_ops */
186 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
188 return (struct rt6_info *)dst_alloc(ops);
191 static void ip6_dst_destroy(struct dst_entry *dst)
193 struct rt6_info *rt = (struct rt6_info *)dst;
194 struct inet6_dev *idev = rt->rt6i_idev;
196 if (idev != NULL) {
197 rt->rt6i_idev = NULL;
198 in6_dev_put(idev);
202 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
203 int how)
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
207 struct net_device *loopback_dev =
208 dev_net(dev)->loopback_dev;
210 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev =
212 in6_dev_get(loopback_dev);
213 if (loopback_idev != NULL) {
214 rt->rt6i_idev = loopback_idev;
215 in6_dev_put(idev);
220 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
222 return (rt->rt6i_flags & RTF_EXPIRES &&
223 time_after(jiffies, rt->rt6i_expires));
226 static inline int rt6_need_strict(struct in6_addr *daddr)
228 return (ipv6_addr_type(daddr) &
229 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
233 * Route lookup. Any table->tb6_lock is implied.
236 static inline struct rt6_info *rt6_device_match(struct net *net,
237 struct rt6_info *rt,
238 struct in6_addr *saddr,
239 int oif,
240 int flags)
242 struct rt6_info *local = NULL;
243 struct rt6_info *sprt;
245 if (!oif && ipv6_addr_any(saddr))
246 goto out;
248 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249 struct net_device *dev = sprt->rt6i_dev;
251 if (oif) {
252 if (dev->ifindex == oif)
253 return sprt;
254 if (dev->flags & IFF_LOOPBACK) {
255 if (sprt->rt6i_idev == NULL ||
256 sprt->rt6i_idev->dev->ifindex != oif) {
257 if (flags & RT6_LOOKUP_F_IFACE && oif)
258 continue;
259 if (local && (!oif ||
260 local->rt6i_idev->dev->ifindex == oif))
261 continue;
263 local = sprt;
265 } else {
266 if (ipv6_chk_addr(net, saddr, dev,
267 flags & RT6_LOOKUP_F_IFACE))
268 return sprt;
272 if (oif) {
273 if (local)
274 return local;
276 if (flags & RT6_LOOKUP_F_IFACE)
277 return net->ipv6.ip6_null_entry;
279 out:
280 return rt;
283 #ifdef CONFIG_IPV6_ROUTER_PREF
284 static void rt6_probe(struct rt6_info *rt)
286 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
288 * Okay, this does not seem to be appropriate
289 * for now, however, we need to check if it
290 * is really so; aka Router Reachability Probing.
292 * Router Reachability Probe MUST be rate-limited
293 * to no more than one per minute.
295 if (!neigh || (neigh->nud_state & NUD_VALID))
296 return;
297 read_lock_bh(&neigh->lock);
298 if (!(neigh->nud_state & NUD_VALID) &&
299 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
300 struct in6_addr mcaddr;
301 struct in6_addr *target;
303 neigh->updated = jiffies;
304 read_unlock_bh(&neigh->lock);
306 target = (struct in6_addr *)&neigh->primary_key;
307 addrconf_addr_solict_mult(target, &mcaddr);
308 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 } else
310 read_unlock_bh(&neigh->lock);
312 #else
313 static inline void rt6_probe(struct rt6_info *rt)
315 return;
317 #endif
320 * Default Router Selection (RFC 2461 6.3.6)
322 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
324 struct net_device *dev = rt->rt6i_dev;
325 if (!oif || dev->ifindex == oif)
326 return 2;
327 if ((dev->flags & IFF_LOOPBACK) &&
328 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 return 1;
330 return 0;
333 static inline int rt6_check_neigh(struct rt6_info *rt)
335 struct neighbour *neigh = rt->rt6i_nexthop;
336 int m;
337 if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 !(rt->rt6i_flags & RTF_GATEWAY))
339 m = 1;
340 else if (neigh) {
341 read_lock_bh(&neigh->lock);
342 if (neigh->nud_state & NUD_VALID)
343 m = 2;
344 #ifdef CONFIG_IPV6_ROUTER_PREF
345 else if (neigh->nud_state & NUD_FAILED)
346 m = 0;
347 #endif
348 else
349 m = 1;
350 read_unlock_bh(&neigh->lock);
351 } else
352 m = 0;
353 return m;
356 static int rt6_score_route(struct rt6_info *rt, int oif,
357 int strict)
359 int m, n;
361 m = rt6_check_dev(rt, oif);
362 if (!m && (strict & RT6_LOOKUP_F_IFACE))
363 return -1;
364 #ifdef CONFIG_IPV6_ROUTER_PREF
365 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
366 #endif
367 n = rt6_check_neigh(rt);
368 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
369 return -1;
370 return m;
373 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
374 int *mpri, struct rt6_info *match)
376 int m;
378 if (rt6_check_expired(rt))
379 goto out;
381 m = rt6_score_route(rt, oif, strict);
382 if (m < 0)
383 goto out;
385 if (m > *mpri) {
386 if (strict & RT6_LOOKUP_F_REACHABLE)
387 rt6_probe(match);
388 *mpri = m;
389 match = rt;
390 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
391 rt6_probe(rt);
394 out:
395 return match;
398 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
399 struct rt6_info *rr_head,
400 u32 metric, int oif, int strict)
402 struct rt6_info *rt, *match;
403 int mpri = -1;
405 match = NULL;
406 for (rt = rr_head; rt && rt->rt6i_metric == metric;
407 rt = rt->u.dst.rt6_next)
408 match = find_match(rt, oif, strict, &mpri, match);
409 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
413 return match;
416 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
418 struct rt6_info *match, *rt0;
419 struct net *net;
421 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
422 __func__, fn->leaf, oif);
424 rt0 = fn->rr_ptr;
425 if (!rt0)
426 fn->rr_ptr = rt0 = fn->leaf;
428 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
430 if (!match &&
431 (strict & RT6_LOOKUP_F_REACHABLE)) {
432 struct rt6_info *next = rt0->u.dst.rt6_next;
434 /* no entries matched; do round-robin */
435 if (!next || next->rt6i_metric != rt0->rt6i_metric)
436 next = fn->leaf;
438 if (next != rt0)
439 fn->rr_ptr = next;
442 RT6_TRACE("%s() => %p\n",
443 __func__, match);
445 net = dev_net(rt0->rt6i_dev);
446 return (match ? match : net->ipv6.ip6_null_entry);
449 #ifdef CONFIG_IPV6_ROUTE_INFO
450 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
451 struct in6_addr *gwaddr)
453 struct net *net = dev_net(dev);
454 struct route_info *rinfo = (struct route_info *) opt;
455 struct in6_addr prefix_buf, *prefix;
456 unsigned int pref;
457 unsigned long lifetime;
458 struct rt6_info *rt;
460 if (len < sizeof(struct route_info)) {
461 return -EINVAL;
464 /* Sanity check for prefix_len and length */
465 if (rinfo->length > 3) {
466 return -EINVAL;
467 } else if (rinfo->prefix_len > 128) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 64) {
470 if (rinfo->length < 2) {
471 return -EINVAL;
473 } else if (rinfo->prefix_len > 0) {
474 if (rinfo->length < 1) {
475 return -EINVAL;
479 pref = rinfo->route_pref;
480 if (pref == ICMPV6_ROUTER_PREF_INVALID)
481 pref = ICMPV6_ROUTER_PREF_MEDIUM;
483 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
498 if (rt && !lifetime) {
499 ip6_del_rt(rt);
500 rt = NULL;
503 if (!rt && lifetime)
504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
510 if (rt) {
511 if (!addrconf_finite_timeout(lifetime)) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
517 dst_release(&rt->u.dst);
519 return 0;
521 #endif
523 #define BACKTRACK(__net, saddr) \
524 do { \
525 if (rt == __net->ipv6.ip6_null_entry) { \
526 struct fib6_node *pn; \
527 while (1) { \
528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
539 } while(0)
541 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542 struct fib6_table *table,
543 struct flowi *fl, int flags)
545 struct fib6_node *fn;
546 struct rt6_info *rt;
548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550 restart:
551 rt = fn->leaf;
552 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
553 BACKTRACK(net, &fl->fl6_src);
554 out:
555 dst_use(&rt->u.dst, jiffies);
556 read_unlock_bh(&table->tb6_lock);
557 return rt;
561 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
562 const struct in6_addr *saddr, int oif, int strict)
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
572 struct dst_entry *dst;
573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
584 dst_release(dst);
586 return NULL;
589 EXPORT_SYMBOL(rt6_lookup);
591 /* ip6_ins_rt is called with FREE table->tb6_lock.
592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
599 int err;
600 struct fib6_table *table;
602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
604 err = fib6_add(&table->tb6_root, rt, info);
605 write_unlock_bh(&table->tb6_lock);
607 return err;
610 int ip6_ins_rt(struct rt6_info *rt)
612 struct nl_info info = {
613 .nl_net = dev_net(rt->rt6i_dev),
615 return __ip6_ins_rt(rt, &info);
618 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
621 struct rt6_info *rt;
624 * Clone the route.
627 rt = ip6_rt_copy(ort);
629 if (rt) {
630 struct neighbour *neigh;
631 int attempts = !in_softirq();
633 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
634 if (rt->rt6i_dst.plen != 128 &&
635 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
636 rt->rt6i_flags |= RTF_ANYCAST;
637 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
640 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
641 rt->rt6i_dst.plen = 128;
642 rt->rt6i_flags |= RTF_CACHE;
643 rt->u.dst.flags |= DST_HOST;
645 #ifdef CONFIG_IPV6_SUBTREES
646 if (rt->rt6i_src.plen && saddr) {
647 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
648 rt->rt6i_src.plen = 128;
650 #endif
652 retry:
653 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
654 if (IS_ERR(neigh)) {
655 struct net *net = dev_net(rt->rt6i_dev);
656 int saved_rt_min_interval =
657 net->ipv6.sysctl.ip6_rt_gc_min_interval;
658 int saved_rt_elasticity =
659 net->ipv6.sysctl.ip6_rt_gc_elasticity;
661 if (attempts-- > 0) {
662 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
663 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
665 ip6_dst_gc(net->ipv6.ip6_dst_ops);
667 net->ipv6.sysctl.ip6_rt_gc_elasticity =
668 saved_rt_elasticity;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval =
670 saved_rt_min_interval;
671 goto retry;
674 if (net_ratelimit())
675 printk(KERN_WARNING
676 "Neighbour table overflow.\n");
677 dst_free(&rt->u.dst);
678 return NULL;
680 rt->rt6i_nexthop = neigh;
684 return rt;
687 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
689 struct rt6_info *rt = ip6_rt_copy(ort);
690 if (rt) {
691 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
692 rt->rt6i_dst.plen = 128;
693 rt->rt6i_flags |= RTF_CACHE;
694 rt->u.dst.flags |= DST_HOST;
695 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
697 return rt;
700 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
701 struct flowi *fl, int flags)
703 struct fib6_node *fn;
704 struct rt6_info *rt, *nrt;
705 int strict = 0;
706 int attempts = 3;
707 int err;
708 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
710 strict |= flags & RT6_LOOKUP_F_IFACE;
712 relookup:
713 read_lock_bh(&table->tb6_lock);
715 restart_2:
716 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
718 restart:
719 rt = rt6_select(fn, oif, strict | reachable);
721 BACKTRACK(net, &fl->fl6_src);
722 if (rt == net->ipv6.ip6_null_entry ||
723 rt->rt6i_flags & RTF_CACHE)
724 goto out;
726 dst_hold(&rt->u.dst);
727 read_unlock_bh(&table->tb6_lock);
729 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
730 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
731 else {
732 #if CLONE_OFFLINK_ROUTE
733 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
734 #else
735 goto out2;
736 #endif
739 dst_release(&rt->u.dst);
740 rt = nrt ? : net->ipv6.ip6_null_entry;
742 dst_hold(&rt->u.dst);
743 if (nrt) {
744 err = ip6_ins_rt(nrt);
745 if (!err)
746 goto out2;
749 if (--attempts <= 0)
750 goto out2;
753 * Race condition! In the gap, when table->tb6_lock was
754 * released someone could insert this route. Relookup.
756 dst_release(&rt->u.dst);
757 goto relookup;
759 out:
760 if (reachable) {
761 reachable = 0;
762 goto restart_2;
764 dst_hold(&rt->u.dst);
765 read_unlock_bh(&table->tb6_lock);
766 out2:
767 rt->u.dst.lastuse = jiffies;
768 rt->u.dst.__use++;
770 return rt;
773 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
774 struct flowi *fl, int flags)
776 return ip6_pol_route(net, table, fl->iif, fl, flags);
779 void ip6_route_input(struct sk_buff *skb)
781 struct ipv6hdr *iph = ipv6_hdr(skb);
782 struct net *net = dev_net(skb->dev);
783 int flags = RT6_LOOKUP_F_HAS_SADDR;
784 struct flowi fl = {
785 .iif = skb->dev->ifindex,
786 .nl_u = {
787 .ip6_u = {
788 .daddr = iph->daddr,
789 .saddr = iph->saddr,
790 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
793 .mark = skb->mark,
794 .proto = iph->nexthdr,
797 if (rt6_need_strict(&iph->daddr))
798 flags |= RT6_LOOKUP_F_IFACE;
800 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
803 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
804 struct flowi *fl, int flags)
806 return ip6_pol_route(net, table, fl->oif, fl, flags);
809 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
810 struct flowi *fl)
812 int flags = 0;
814 if (rt6_need_strict(&fl->fl6_dst))
815 flags |= RT6_LOOKUP_F_IFACE;
817 if (!ipv6_addr_any(&fl->fl6_src))
818 flags |= RT6_LOOKUP_F_HAS_SADDR;
819 else if (sk) {
820 unsigned int prefs = inet6_sk(sk)->srcprefs;
821 if (prefs & IPV6_PREFER_SRC_TMP)
822 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
823 if (prefs & IPV6_PREFER_SRC_PUBLIC)
824 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
825 if (prefs & IPV6_PREFER_SRC_COA)
826 flags |= RT6_LOOKUP_F_SRCPREF_COA;
829 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
832 EXPORT_SYMBOL(ip6_route_output);
834 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
836 struct rt6_info *ort = (struct rt6_info *) *dstp;
837 struct rt6_info *rt = (struct rt6_info *)
838 dst_alloc(&ip6_dst_blackhole_ops);
839 struct dst_entry *new = NULL;
841 if (rt) {
842 new = &rt->u.dst;
844 atomic_set(&new->__refcnt, 1);
845 new->__use = 1;
846 new->input = dst_discard;
847 new->output = dst_discard;
849 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
850 new->dev = ort->u.dst.dev;
851 if (new->dev)
852 dev_hold(new->dev);
853 rt->rt6i_idev = ort->rt6i_idev;
854 if (rt->rt6i_idev)
855 in6_dev_hold(rt->rt6i_idev);
856 rt->rt6i_expires = 0;
858 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
859 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
860 rt->rt6i_metric = 0;
862 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
863 #ifdef CONFIG_IPV6_SUBTREES
864 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
865 #endif
867 dst_free(new);
870 dst_release(*dstp);
871 *dstp = new;
872 return (new ? 0 : -ENOMEM);
874 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
877 * Destination cache support functions
880 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
882 struct rt6_info *rt;
884 rt = (struct rt6_info *) dst;
886 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
887 return dst;
889 return NULL;
892 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
894 struct rt6_info *rt = (struct rt6_info *) dst;
896 if (rt) {
897 if (rt->rt6i_flags & RTF_CACHE)
898 ip6_del_rt(rt);
899 else
900 dst_release(dst);
902 return NULL;
905 static void ip6_link_failure(struct sk_buff *skb)
907 struct rt6_info *rt;
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
911 rt = (struct rt6_info *) skb->dst;
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->u.dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 mtu = IPV6_MIN_MTU;
929 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
931 dst->metrics[RTAX_MTU-1] = mtu;
932 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
936 static int ipv6_get_mtu(struct net_device *dev);
938 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
940 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
942 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
943 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
946 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
947 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
948 * IPV6_MAXPLEN is also valid and means: "any MSS,
949 * rely only on pmtu discovery"
951 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
952 mtu = IPV6_MAXPLEN;
953 return mtu;
956 static struct dst_entry *icmp6_dst_gc_list;
957 static DEFINE_SPINLOCK(icmp6_dst_lock);
959 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
960 struct neighbour *neigh,
961 const struct in6_addr *addr)
963 struct rt6_info *rt;
964 struct inet6_dev *idev = in6_dev_get(dev);
965 struct net *net = dev_net(dev);
967 if (unlikely(idev == NULL))
968 return NULL;
970 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
971 if (unlikely(rt == NULL)) {
972 in6_dev_put(idev);
973 goto out;
976 dev_hold(dev);
977 if (neigh)
978 neigh_hold(neigh);
979 else {
980 neigh = ndisc_get_neigh(dev, addr);
981 if (IS_ERR(neigh))
982 neigh = NULL;
985 rt->rt6i_dev = dev;
986 rt->rt6i_idev = idev;
987 rt->rt6i_nexthop = neigh;
988 atomic_set(&rt->u.dst.__refcnt, 1);
989 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
990 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
992 rt->u.dst.output = ip6_output;
994 #if 0 /* there's no chance to use these for ndisc */
995 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
996 ? DST_HOST
997 : 0;
998 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
999 rt->rt6i_dst.plen = 128;
1000 #endif
1002 spin_lock_bh(&icmp6_dst_lock);
1003 rt->u.dst.next = icmp6_dst_gc_list;
1004 icmp6_dst_gc_list = &rt->u.dst;
1005 spin_unlock_bh(&icmp6_dst_lock);
1007 fib6_force_start_gc(net);
1009 out:
1010 return &rt->u.dst;
1013 int icmp6_dst_gc(void)
1015 struct dst_entry *dst, *next, **pprev;
1016 int more = 0;
1018 next = NULL;
1020 spin_lock_bh(&icmp6_dst_lock);
1021 pprev = &icmp6_dst_gc_list;
1023 while ((dst = *pprev) != NULL) {
1024 if (!atomic_read(&dst->__refcnt)) {
1025 *pprev = dst->next;
1026 dst_free(dst);
1027 } else {
1028 pprev = &dst->next;
1029 ++more;
1033 spin_unlock_bh(&icmp6_dst_lock);
1035 return more;
1038 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1039 void *arg)
1041 struct dst_entry *dst, **pprev;
1043 spin_lock_bh(&icmp6_dst_lock);
1044 pprev = &icmp6_dst_gc_list;
1045 while ((dst = *pprev) != NULL) {
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047 if (func(rt, arg)) {
1048 *pprev = dst->next;
1049 dst_free(dst);
1050 } else {
1051 pprev = &dst->next;
1054 spin_unlock_bh(&icmp6_dst_lock);
1057 static int ip6_dst_gc(struct dst_ops *ops)
1059 unsigned long now = jiffies;
1060 struct net *net = ops->dst_net;
1061 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1062 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1063 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1064 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1065 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1067 if (time_after(rt_last_gc + rt_min_interval, now) &&
1068 atomic_read(&ops->entries) <= rt_max_size)
1069 goto out;
1071 net->ipv6.ip6_rt_gc_expire++;
1072 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1073 net->ipv6.ip6_rt_last_gc = now;
1074 if (atomic_read(&ops->entries) < ops->gc_thresh)
1075 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1076 out:
1077 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1078 return (atomic_read(&ops->entries) > rt_max_size);
1081 /* Clean host part of a prefix. Not necessary in radix tree,
1082 but results in cleaner routing tables.
1084 Remove it only when all the things will work!
1087 static int ipv6_get_mtu(struct net_device *dev)
1089 int mtu = IPV6_MIN_MTU;
1090 struct inet6_dev *idev;
1092 idev = in6_dev_get(dev);
1093 if (idev) {
1094 mtu = idev->cnf.mtu6;
1095 in6_dev_put(idev);
1097 return mtu;
1100 int ip6_dst_hoplimit(struct dst_entry *dst)
1102 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1103 if (hoplimit < 0) {
1104 struct net_device *dev = dst->dev;
1105 struct inet6_dev *idev = in6_dev_get(dev);
1106 if (idev) {
1107 hoplimit = idev->cnf.hop_limit;
1108 in6_dev_put(idev);
1109 } else
1110 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1112 return hoplimit;
1119 int ip6_route_add(struct fib6_config *cfg)
1121 int err;
1122 struct net *net = cfg->fc_nlinfo.nl_net;
1123 struct rt6_info *rt = NULL;
1124 struct net_device *dev = NULL;
1125 struct inet6_dev *idev = NULL;
1126 struct fib6_table *table;
1127 int addr_type;
1129 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1130 return -EINVAL;
1131 #ifndef CONFIG_IPV6_SUBTREES
1132 if (cfg->fc_src_len)
1133 return -EINVAL;
1134 #endif
1135 if (cfg->fc_ifindex) {
1136 err = -ENODEV;
1137 dev = dev_get_by_index(net, cfg->fc_ifindex);
1138 if (!dev)
1139 goto out;
1140 idev = in6_dev_get(dev);
1141 if (!idev)
1142 goto out;
1145 if (cfg->fc_metric == 0)
1146 cfg->fc_metric = IP6_RT_PRIO_USER;
1148 table = fib6_new_table(net, cfg->fc_table);
1149 if (table == NULL) {
1150 err = -ENOBUFS;
1151 goto out;
1154 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1156 if (rt == NULL) {
1157 err = -ENOMEM;
1158 goto out;
1161 rt->u.dst.obsolete = -1;
1162 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1163 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1166 if (cfg->fc_protocol == RTPROT_UNSPEC)
1167 cfg->fc_protocol = RTPROT_BOOT;
1168 rt->rt6i_protocol = cfg->fc_protocol;
1170 addr_type = ipv6_addr_type(&cfg->fc_dst);
1172 if (addr_type & IPV6_ADDR_MULTICAST)
1173 rt->u.dst.input = ip6_mc_input;
1174 else
1175 rt->u.dst.input = ip6_forward;
1177 rt->u.dst.output = ip6_output;
1179 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1180 rt->rt6i_dst.plen = cfg->fc_dst_len;
1181 if (rt->rt6i_dst.plen == 128)
1182 rt->u.dst.flags = DST_HOST;
1184 #ifdef CONFIG_IPV6_SUBTREES
1185 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1186 rt->rt6i_src.plen = cfg->fc_src_len;
1187 #endif
1189 rt->rt6i_metric = cfg->fc_metric;
1191 /* We cannot add true routes via loopback here,
1192 they would result in kernel looping; promote them to reject routes
1194 if ((cfg->fc_flags & RTF_REJECT) ||
1195 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1196 /* hold loopback dev/idev if we haven't done so. */
1197 if (dev != net->loopback_dev) {
1198 if (dev) {
1199 dev_put(dev);
1200 in6_dev_put(idev);
1202 dev = net->loopback_dev;
1203 dev_hold(dev);
1204 idev = in6_dev_get(dev);
1205 if (!idev) {
1206 err = -ENODEV;
1207 goto out;
1210 rt->u.dst.output = ip6_pkt_discard_out;
1211 rt->u.dst.input = ip6_pkt_discard;
1212 rt->u.dst.error = -ENETUNREACH;
1213 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1214 goto install_route;
1217 if (cfg->fc_flags & RTF_GATEWAY) {
1218 struct in6_addr *gw_addr;
1219 int gwa_type;
1221 gw_addr = &cfg->fc_gateway;
1222 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1223 gwa_type = ipv6_addr_type(gw_addr);
1225 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1226 struct rt6_info *grt;
1228 /* IPv6 strictly inhibits using not link-local
1229 addresses as nexthop address.
1230 Otherwise, router will not able to send redirects.
1231 It is very good, but in some (rare!) circumstances
1232 (SIT, PtP, NBMA NOARP links) it is handy to allow
1233 some exceptions. --ANK
1235 err = -EINVAL;
1236 if (!(gwa_type&IPV6_ADDR_UNICAST))
1237 goto out;
1239 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1241 err = -EHOSTUNREACH;
1242 if (grt == NULL)
1243 goto out;
1244 if (dev) {
1245 if (dev != grt->rt6i_dev) {
1246 dst_release(&grt->u.dst);
1247 goto out;
1249 } else {
1250 dev = grt->rt6i_dev;
1251 idev = grt->rt6i_idev;
1252 dev_hold(dev);
1253 in6_dev_hold(grt->rt6i_idev);
1255 if (!(grt->rt6i_flags&RTF_GATEWAY))
1256 err = 0;
1257 dst_release(&grt->u.dst);
1259 if (err)
1260 goto out;
1262 err = -EINVAL;
1263 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1264 goto out;
1267 err = -ENODEV;
1268 if (dev == NULL)
1269 goto out;
1271 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1272 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1273 if (IS_ERR(rt->rt6i_nexthop)) {
1274 err = PTR_ERR(rt->rt6i_nexthop);
1275 rt->rt6i_nexthop = NULL;
1276 goto out;
1280 rt->rt6i_flags = cfg->fc_flags;
1282 install_route:
1283 if (cfg->fc_mx) {
1284 struct nlattr *nla;
1285 int remaining;
1287 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1288 int type = nla_type(nla);
1290 if (type) {
1291 if (type > RTAX_MAX) {
1292 err = -EINVAL;
1293 goto out;
1296 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1301 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1302 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1303 if (!dst_mtu(&rt->u.dst))
1304 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1305 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1306 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1307 rt->u.dst.dev = dev;
1308 rt->rt6i_idev = idev;
1309 rt->rt6i_table = table;
1311 cfg->fc_nlinfo.nl_net = dev_net(dev);
1313 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1315 out:
1316 if (dev)
1317 dev_put(dev);
1318 if (idev)
1319 in6_dev_put(idev);
1320 if (rt)
1321 dst_free(&rt->u.dst);
1322 return err;
1325 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1327 int err;
1328 struct fib6_table *table;
1329 struct net *net = dev_net(rt->rt6i_dev);
1331 if (rt == net->ipv6.ip6_null_entry)
1332 return -ENOENT;
1334 table = rt->rt6i_table;
1335 write_lock_bh(&table->tb6_lock);
1337 err = fib6_del(rt, info);
1338 dst_release(&rt->u.dst);
1340 write_unlock_bh(&table->tb6_lock);
1342 return err;
1345 int ip6_del_rt(struct rt6_info *rt)
1347 struct nl_info info = {
1348 .nl_net = dev_net(rt->rt6i_dev),
1350 return __ip6_del_rt(rt, &info);
1353 static int ip6_route_del(struct fib6_config *cfg)
1355 struct fib6_table *table;
1356 struct fib6_node *fn;
1357 struct rt6_info *rt;
1358 int err = -ESRCH;
1360 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1361 if (table == NULL)
1362 return err;
1364 read_lock_bh(&table->tb6_lock);
1366 fn = fib6_locate(&table->tb6_root,
1367 &cfg->fc_dst, cfg->fc_dst_len,
1368 &cfg->fc_src, cfg->fc_src_len);
1370 if (fn) {
1371 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1372 if (cfg->fc_ifindex &&
1373 (rt->rt6i_dev == NULL ||
1374 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1375 continue;
1376 if (cfg->fc_flags & RTF_GATEWAY &&
1377 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1378 continue;
1379 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1380 continue;
1381 dst_hold(&rt->u.dst);
1382 read_unlock_bh(&table->tb6_lock);
1384 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1387 read_unlock_bh(&table->tb6_lock);
1389 return err;
1393 * Handle redirects
1395 struct ip6rd_flowi {
1396 struct flowi fl;
1397 struct in6_addr gateway;
1400 static struct rt6_info *__ip6_route_redirect(struct net *net,
1401 struct fib6_table *table,
1402 struct flowi *fl,
1403 int flags)
1405 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1406 struct rt6_info *rt;
1407 struct fib6_node *fn;
1410 * Get the "current" route for this destination and
1411 * check if the redirect has come from approriate router.
1413 * RFC 2461 specifies that redirects should only be
1414 * accepted if they come from the nexthop to the target.
1415 * Due to the way the routes are chosen, this notion
1416 * is a bit fuzzy and one might need to check all possible
1417 * routes.
1420 read_lock_bh(&table->tb6_lock);
1421 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1422 restart:
1423 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1425 * Current route is on-link; redirect is always invalid.
1427 * Seems, previous statement is not true. It could
1428 * be node, which looks for us as on-link (f.e. proxy ndisc)
1429 * But then router serving it might decide, that we should
1430 * know truth 8)8) --ANK (980726).
1432 if (rt6_check_expired(rt))
1433 continue;
1434 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435 continue;
1436 if (fl->oif != rt->rt6i_dev->ifindex)
1437 continue;
1438 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1439 continue;
1440 break;
1443 if (!rt)
1444 rt = net->ipv6.ip6_null_entry;
1445 BACKTRACK(net, &fl->fl6_src);
1446 out:
1447 dst_hold(&rt->u.dst);
1449 read_unlock_bh(&table->tb6_lock);
1451 return rt;
1454 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1455 struct in6_addr *src,
1456 struct in6_addr *gateway,
1457 struct net_device *dev)
1459 int flags = RT6_LOOKUP_F_HAS_SADDR;
1460 struct net *net = dev_net(dev);
1461 struct ip6rd_flowi rdfl = {
1462 .fl = {
1463 .oif = dev->ifindex,
1464 .nl_u = {
1465 .ip6_u = {
1466 .daddr = *dest,
1467 .saddr = *src,
1471 .gateway = *gateway,
1474 if (rt6_need_strict(dest))
1475 flags |= RT6_LOOKUP_F_IFACE;
1477 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1478 flags, __ip6_route_redirect);
1481 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1482 struct in6_addr *saddr,
1483 struct neighbour *neigh, u8 *lladdr, int on_link)
1485 struct rt6_info *rt, *nrt = NULL;
1486 struct netevent_redirect netevent;
1487 struct net *net = dev_net(neigh->dev);
1489 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1491 if (rt == net->ipv6.ip6_null_entry) {
1492 if (net_ratelimit())
1493 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1494 "for redirect target\n");
1495 goto out;
1499 * We have finally decided to accept it.
1502 neigh_update(neigh, lladdr, NUD_STALE,
1503 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1504 NEIGH_UPDATE_F_OVERRIDE|
1505 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1506 NEIGH_UPDATE_F_ISROUTER))
1510 * Redirect received -> path was valid.
1511 * Look, redirects are sent only in response to data packets,
1512 * so that this nexthop apparently is reachable. --ANK
1514 dst_confirm(&rt->u.dst);
1516 /* Duplicate redirect: silently ignore. */
1517 if (neigh == rt->u.dst.neighbour)
1518 goto out;
1520 nrt = ip6_rt_copy(rt);
1521 if (nrt == NULL)
1522 goto out;
1524 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1525 if (on_link)
1526 nrt->rt6i_flags &= ~RTF_GATEWAY;
1528 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1529 nrt->rt6i_dst.plen = 128;
1530 nrt->u.dst.flags |= DST_HOST;
1532 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1533 nrt->rt6i_nexthop = neigh_clone(neigh);
1534 /* Reset pmtu, it may be better */
1535 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1536 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1537 dst_mtu(&nrt->u.dst));
1539 if (ip6_ins_rt(nrt))
1540 goto out;
1542 netevent.old = &rt->u.dst;
1543 netevent.new = &nrt->u.dst;
1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1546 if (rt->rt6i_flags&RTF_CACHE) {
1547 ip6_del_rt(rt);
1548 return;
1551 out:
1552 dst_release(&rt->u.dst);
1553 return;
1557 * Handle ICMP "packet too big" messages
1558 * i.e. Path MTU discovery
1561 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1562 struct net_device *dev, u32 pmtu)
1564 struct rt6_info *rt, *nrt;
1565 struct net *net = dev_net(dev);
1566 int allfrag = 0;
1568 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1569 if (rt == NULL)
1570 return;
1572 if (pmtu >= dst_mtu(&rt->u.dst))
1573 goto out;
1575 if (pmtu < IPV6_MIN_MTU) {
1577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1590 dst_confirm(&rt->u.dst);
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1597 if (rt->rt6i_flags & RTF_CACHE) {
1598 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1599 if (allfrag)
1600 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1601 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1602 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1603 goto out;
1606 /* Network route.
1607 Two cases are possible:
1608 1. It is connected route. Action: COW
1609 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1611 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1612 nrt = rt6_alloc_cow(rt, daddr, saddr);
1613 else
1614 nrt = rt6_alloc_clone(rt, daddr);
1616 if (nrt) {
1617 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1618 if (allfrag)
1619 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1621 /* According to RFC 1981, detecting PMTU increase shouldn't be
1622 * happened within 5 mins, the recommended timer is 10 mins.
1623 * Here this route expiration time is set to ip6_rt_mtu_expires
1624 * which is 10 mins. After 10 mins the decreased pmtu is expired
1625 * and detecting PMTU increase will be automatically happened.
1627 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1628 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1630 ip6_ins_rt(nrt);
1632 out:
1633 dst_release(&rt->u.dst);
1637 * Misc support functions
1640 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1642 struct net *net = dev_net(ort->rt6i_dev);
1643 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1645 if (rt) {
1646 rt->u.dst.input = ort->u.dst.input;
1647 rt->u.dst.output = ort->u.dst.output;
1649 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1650 rt->u.dst.error = ort->u.dst.error;
1651 rt->u.dst.dev = ort->u.dst.dev;
1652 if (rt->u.dst.dev)
1653 dev_hold(rt->u.dst.dev);
1654 rt->rt6i_idev = ort->rt6i_idev;
1655 if (rt->rt6i_idev)
1656 in6_dev_hold(rt->rt6i_idev);
1657 rt->u.dst.lastuse = jiffies;
1658 rt->rt6i_expires = 0;
1660 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1661 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1662 rt->rt6i_metric = 0;
1664 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1665 #ifdef CONFIG_IPV6_SUBTREES
1666 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1667 #endif
1668 rt->rt6i_table = ort->rt6i_table;
1670 return rt;
1673 #ifdef CONFIG_IPV6_ROUTE_INFO
1674 static struct rt6_info *rt6_get_route_info(struct net *net,
1675 struct in6_addr *prefix, int prefixlen,
1676 struct in6_addr *gwaddr, int ifindex)
1678 struct fib6_node *fn;
1679 struct rt6_info *rt = NULL;
1680 struct fib6_table *table;
1682 table = fib6_get_table(net, RT6_TABLE_INFO);
1683 if (table == NULL)
1684 return NULL;
1686 write_lock_bh(&table->tb6_lock);
1687 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1688 if (!fn)
1689 goto out;
1691 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1692 if (rt->rt6i_dev->ifindex != ifindex)
1693 continue;
1694 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1695 continue;
1696 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1697 continue;
1698 dst_hold(&rt->u.dst);
1699 break;
1701 out:
1702 write_unlock_bh(&table->tb6_lock);
1703 return rt;
1706 static struct rt6_info *rt6_add_route_info(struct net *net,
1707 struct in6_addr *prefix, int prefixlen,
1708 struct in6_addr *gwaddr, int ifindex,
1709 unsigned pref)
1711 struct fib6_config cfg = {
1712 .fc_table = RT6_TABLE_INFO,
1713 .fc_metric = IP6_RT_PRIO_USER,
1714 .fc_ifindex = ifindex,
1715 .fc_dst_len = prefixlen,
1716 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1717 RTF_UP | RTF_PREF(pref),
1718 .fc_nlinfo.pid = 0,
1719 .fc_nlinfo.nlh = NULL,
1720 .fc_nlinfo.nl_net = net,
1723 ipv6_addr_copy(&cfg.fc_dst, prefix);
1724 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1726 /* We should treat it as a default route if prefix length is 0. */
1727 if (!prefixlen)
1728 cfg.fc_flags |= RTF_DEFAULT;
1730 ip6_route_add(&cfg);
1732 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1734 #endif
1736 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1738 struct rt6_info *rt;
1739 struct fib6_table *table;
1741 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1742 if (table == NULL)
1743 return NULL;
1745 write_lock_bh(&table->tb6_lock);
1746 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1747 if (dev == rt->rt6i_dev &&
1748 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1749 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1750 break;
1752 if (rt)
1753 dst_hold(&rt->u.dst);
1754 write_unlock_bh(&table->tb6_lock);
1755 return rt;
1758 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1759 struct net_device *dev,
1760 unsigned int pref)
1762 struct fib6_config cfg = {
1763 .fc_table = RT6_TABLE_DFLT,
1764 .fc_metric = IP6_RT_PRIO_USER,
1765 .fc_ifindex = dev->ifindex,
1766 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1767 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1768 .fc_nlinfo.pid = 0,
1769 .fc_nlinfo.nlh = NULL,
1770 .fc_nlinfo.nl_net = dev_net(dev),
1773 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1775 ip6_route_add(&cfg);
1777 return rt6_get_dflt_router(gwaddr, dev);
1780 void rt6_purge_dflt_routers(struct net *net)
1782 struct rt6_info *rt;
1783 struct fib6_table *table;
1785 /* NOTE: Keep consistent with rt6_get_dflt_router */
1786 table = fib6_get_table(net, RT6_TABLE_DFLT);
1787 if (table == NULL)
1788 return;
1790 restart:
1791 read_lock_bh(&table->tb6_lock);
1792 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1793 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1794 dst_hold(&rt->u.dst);
1795 read_unlock_bh(&table->tb6_lock);
1796 ip6_del_rt(rt);
1797 goto restart;
1800 read_unlock_bh(&table->tb6_lock);
1803 static void rtmsg_to_fib6_config(struct net *net,
1804 struct in6_rtmsg *rtmsg,
1805 struct fib6_config *cfg)
1807 memset(cfg, 0, sizeof(*cfg));
1809 cfg->fc_table = RT6_TABLE_MAIN;
1810 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1811 cfg->fc_metric = rtmsg->rtmsg_metric;
1812 cfg->fc_expires = rtmsg->rtmsg_info;
1813 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1814 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1815 cfg->fc_flags = rtmsg->rtmsg_flags;
1817 cfg->fc_nlinfo.nl_net = net;
1819 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1820 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1821 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1824 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1826 struct fib6_config cfg;
1827 struct in6_rtmsg rtmsg;
1828 int err;
1830 switch(cmd) {
1831 case SIOCADDRT: /* Add a route */
1832 case SIOCDELRT: /* Delete a route */
1833 if (!capable(CAP_NET_ADMIN))
1834 return -EPERM;
1835 err = copy_from_user(&rtmsg, arg,
1836 sizeof(struct in6_rtmsg));
1837 if (err)
1838 return -EFAULT;
1840 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1842 rtnl_lock();
1843 switch (cmd) {
1844 case SIOCADDRT:
1845 err = ip6_route_add(&cfg);
1846 break;
1847 case SIOCDELRT:
1848 err = ip6_route_del(&cfg);
1849 break;
1850 default:
1851 err = -EINVAL;
1853 rtnl_unlock();
1855 return err;
1858 return -EINVAL;
1862 * Drop the packet on the floor
1865 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1867 int type;
1868 struct dst_entry *dst = skb->dst;
1869 switch (ipstats_mib_noroutes) {
1870 case IPSTATS_MIB_INNOROUTES:
1871 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1872 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1873 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1874 IPSTATS_MIB_INADDRERRORS);
1875 break;
1877 /* FALLTHROUGH */
1878 case IPSTATS_MIB_OUTNOROUTES:
1879 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1880 ipstats_mib_noroutes);
1881 break;
1883 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1884 kfree_skb(skb);
1885 return 0;
1888 static int ip6_pkt_discard(struct sk_buff *skb)
1890 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1893 static int ip6_pkt_discard_out(struct sk_buff *skb)
1895 skb->dev = skb->dst->dev;
1896 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1899 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1901 static int ip6_pkt_prohibit(struct sk_buff *skb)
1903 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1906 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1908 skb->dev = skb->dst->dev;
1909 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1912 #endif
1915 * Allocate a dst for local (unicast / anycast) address.
1918 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1919 const struct in6_addr *addr,
1920 int anycast)
1922 struct net *net = dev_net(idev->dev);
1923 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1924 struct neighbour *neigh;
1926 if (rt == NULL)
1927 return ERR_PTR(-ENOMEM);
1929 dev_hold(net->loopback_dev);
1930 in6_dev_hold(idev);
1932 rt->u.dst.flags = DST_HOST;
1933 rt->u.dst.input = ip6_input;
1934 rt->u.dst.output = ip6_output;
1935 rt->rt6i_dev = net->loopback_dev;
1936 rt->rt6i_idev = idev;
1937 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1938 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1939 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1940 rt->u.dst.obsolete = -1;
1942 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1943 if (anycast)
1944 rt->rt6i_flags |= RTF_ANYCAST;
1945 else
1946 rt->rt6i_flags |= RTF_LOCAL;
1947 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1948 if (IS_ERR(neigh)) {
1949 dst_free(&rt->u.dst);
1951 /* We are casting this because that is the return
1952 * value type. But an errno encoded pointer is the
1953 * same regardless of the underlying pointer type,
1954 * and that's what we are returning. So this is OK.
1956 return (struct rt6_info *) neigh;
1958 rt->rt6i_nexthop = neigh;
1960 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1961 rt->rt6i_dst.plen = 128;
1962 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1964 atomic_set(&rt->u.dst.__refcnt, 1);
1966 return rt;
1969 struct arg_dev_net {
1970 struct net_device *dev;
1971 struct net *net;
1974 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1976 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1977 struct net *net = ((struct arg_dev_net *)arg)->net;
1979 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1980 rt != net->ipv6.ip6_null_entry) {
1981 RT6_TRACE("deleted by ifdown %p\n", rt);
1982 return -1;
1984 return 0;
1987 void rt6_ifdown(struct net *net, struct net_device *dev)
1989 struct arg_dev_net adn = {
1990 .dev = dev,
1991 .net = net,
1994 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1995 icmp6_clean_all(fib6_ifdown, &adn);
1998 struct rt6_mtu_change_arg
2000 struct net_device *dev;
2001 unsigned mtu;
2004 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2006 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2007 struct inet6_dev *idev;
2008 struct net *net = dev_net(arg->dev);
2010 /* In IPv6 pmtu discovery is not optional,
2011 so that RTAX_MTU lock cannot disable it.
2012 We still use this lock to block changes
2013 caused by addrconf/ndisc.
2016 idev = __in6_dev_get(arg->dev);
2017 if (idev == NULL)
2018 return 0;
2020 /* For administrative MTU increase, there is no way to discover
2021 IPv6 PMTU increase, so PMTU increase should be updated here.
2022 Since RFC 1981 doesn't include administrative MTU increase
2023 update PMTU increase is a MUST. (i.e. jumbo frame)
2026 If new MTU is less than route PMTU, this new MTU will be the
2027 lowest MTU in the path, update the route PMTU to reflect PMTU
2028 decreases; if new MTU is greater than route PMTU, and the
2029 old MTU is the lowest MTU in the path, update the route PMTU
2030 to reflect the increase. In this case if the other nodes' MTU
2031 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2032 PMTU discouvery.
2034 if (rt->rt6i_dev == arg->dev &&
2035 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
2036 (dst_mtu(&rt->u.dst) >= arg->mtu ||
2037 (dst_mtu(&rt->u.dst) < arg->mtu &&
2038 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
2039 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
2040 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2042 return 0;
2045 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2047 struct rt6_mtu_change_arg arg = {
2048 .dev = dev,
2049 .mtu = mtu,
2052 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2055 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2056 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2057 [RTA_OIF] = { .type = NLA_U32 },
2058 [RTA_IIF] = { .type = NLA_U32 },
2059 [RTA_PRIORITY] = { .type = NLA_U32 },
2060 [RTA_METRICS] = { .type = NLA_NESTED },
2063 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2064 struct fib6_config *cfg)
2066 struct rtmsg *rtm;
2067 struct nlattr *tb[RTA_MAX+1];
2068 int err;
2070 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2071 if (err < 0)
2072 goto errout;
2074 err = -EINVAL;
2075 rtm = nlmsg_data(nlh);
2076 memset(cfg, 0, sizeof(*cfg));
2078 cfg->fc_table = rtm->rtm_table;
2079 cfg->fc_dst_len = rtm->rtm_dst_len;
2080 cfg->fc_src_len = rtm->rtm_src_len;
2081 cfg->fc_flags = RTF_UP;
2082 cfg->fc_protocol = rtm->rtm_protocol;
2084 if (rtm->rtm_type == RTN_UNREACHABLE)
2085 cfg->fc_flags |= RTF_REJECT;
2087 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2088 cfg->fc_nlinfo.nlh = nlh;
2089 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2091 if (tb[RTA_GATEWAY]) {
2092 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2093 cfg->fc_flags |= RTF_GATEWAY;
2096 if (tb[RTA_DST]) {
2097 int plen = (rtm->rtm_dst_len + 7) >> 3;
2099 if (nla_len(tb[RTA_DST]) < plen)
2100 goto errout;
2102 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2105 if (tb[RTA_SRC]) {
2106 int plen = (rtm->rtm_src_len + 7) >> 3;
2108 if (nla_len(tb[RTA_SRC]) < plen)
2109 goto errout;
2111 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2114 if (tb[RTA_OIF])
2115 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2117 if (tb[RTA_PRIORITY])
2118 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2120 if (tb[RTA_METRICS]) {
2121 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2122 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2125 if (tb[RTA_TABLE])
2126 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2128 err = 0;
2129 errout:
2130 return err;
2133 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2135 struct fib6_config cfg;
2136 int err;
2138 err = rtm_to_fib6_config(skb, nlh, &cfg);
2139 if (err < 0)
2140 return err;
2142 return ip6_route_del(&cfg);
2145 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2147 struct fib6_config cfg;
2148 int err;
2150 err = rtm_to_fib6_config(skb, nlh, &cfg);
2151 if (err < 0)
2152 return err;
2154 return ip6_route_add(&cfg);
2157 static inline size_t rt6_nlmsg_size(void)
2159 return NLMSG_ALIGN(sizeof(struct rtmsg))
2160 + nla_total_size(16) /* RTA_SRC */
2161 + nla_total_size(16) /* RTA_DST */
2162 + nla_total_size(16) /* RTA_GATEWAY */
2163 + nla_total_size(16) /* RTA_PREFSRC */
2164 + nla_total_size(4) /* RTA_TABLE */
2165 + nla_total_size(4) /* RTA_IIF */
2166 + nla_total_size(4) /* RTA_OIF */
2167 + nla_total_size(4) /* RTA_PRIORITY */
2168 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2169 + nla_total_size(sizeof(struct rta_cacheinfo));
2172 static int rt6_fill_node(struct net *net,
2173 struct sk_buff *skb, struct rt6_info *rt,
2174 struct in6_addr *dst, struct in6_addr *src,
2175 int iif, int type, u32 pid, u32 seq,
2176 int prefix, int nowait, unsigned int flags)
2178 struct rtmsg *rtm;
2179 struct nlmsghdr *nlh;
2180 long expires;
2181 u32 table;
2183 if (prefix) { /* user wants prefix routes only */
2184 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2185 /* success since this is not a prefix route */
2186 return 1;
2190 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2191 if (nlh == NULL)
2192 return -EMSGSIZE;
2194 rtm = nlmsg_data(nlh);
2195 rtm->rtm_family = AF_INET6;
2196 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2197 rtm->rtm_src_len = rt->rt6i_src.plen;
2198 rtm->rtm_tos = 0;
2199 if (rt->rt6i_table)
2200 table = rt->rt6i_table->tb6_id;
2201 else
2202 table = RT6_TABLE_UNSPEC;
2203 rtm->rtm_table = table;
2204 NLA_PUT_U32(skb, RTA_TABLE, table);
2205 if (rt->rt6i_flags&RTF_REJECT)
2206 rtm->rtm_type = RTN_UNREACHABLE;
2207 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2208 rtm->rtm_type = RTN_LOCAL;
2209 else
2210 rtm->rtm_type = RTN_UNICAST;
2211 rtm->rtm_flags = 0;
2212 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2213 rtm->rtm_protocol = rt->rt6i_protocol;
2214 if (rt->rt6i_flags&RTF_DYNAMIC)
2215 rtm->rtm_protocol = RTPROT_REDIRECT;
2216 else if (rt->rt6i_flags & RTF_ADDRCONF)
2217 rtm->rtm_protocol = RTPROT_KERNEL;
2218 else if (rt->rt6i_flags&RTF_DEFAULT)
2219 rtm->rtm_protocol = RTPROT_RA;
2221 if (rt->rt6i_flags&RTF_CACHE)
2222 rtm->rtm_flags |= RTM_F_CLONED;
2224 if (dst) {
2225 NLA_PUT(skb, RTA_DST, 16, dst);
2226 rtm->rtm_dst_len = 128;
2227 } else if (rtm->rtm_dst_len)
2228 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2229 #ifdef CONFIG_IPV6_SUBTREES
2230 if (src) {
2231 NLA_PUT(skb, RTA_SRC, 16, src);
2232 rtm->rtm_src_len = 128;
2233 } else if (rtm->rtm_src_len)
2234 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2235 #endif
2236 if (iif) {
2237 #ifdef CONFIG_IPV6_MROUTE
2238 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2239 int err = ip6mr_get_route(net, skb, rtm, nowait);
2240 if (err <= 0) {
2241 if (!nowait) {
2242 if (err == 0)
2243 return 0;
2244 goto nla_put_failure;
2245 } else {
2246 if (err == -EMSGSIZE)
2247 goto nla_put_failure;
2250 } else
2251 #endif
2252 NLA_PUT_U32(skb, RTA_IIF, iif);
2253 } else if (dst) {
2254 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
2255 struct in6_addr saddr_buf;
2256 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2257 dst, 0, &saddr_buf) == 0)
2258 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2261 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2262 goto nla_put_failure;
2264 if (rt->u.dst.neighbour)
2265 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2267 if (rt->u.dst.dev)
2268 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2270 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2272 if (!(rt->rt6i_flags & RTF_EXPIRES))
2273 expires = 0;
2274 else if (rt->rt6i_expires - jiffies < INT_MAX)
2275 expires = rt->rt6i_expires - jiffies;
2276 else
2277 expires = INT_MAX;
2279 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2280 expires, rt->u.dst.error) < 0)
2281 goto nla_put_failure;
2283 return nlmsg_end(skb, nlh);
2285 nla_put_failure:
2286 nlmsg_cancel(skb, nlh);
2287 return -EMSGSIZE;
2290 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2292 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2293 int prefix;
2295 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2296 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2297 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2298 } else
2299 prefix = 0;
2301 return rt6_fill_node(arg->net,
2302 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2303 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2304 prefix, 0, NLM_F_MULTI);
2307 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2309 struct net *net = sock_net(in_skb->sk);
2310 struct nlattr *tb[RTA_MAX+1];
2311 struct rt6_info *rt;
2312 struct sk_buff *skb;
2313 struct rtmsg *rtm;
2314 struct flowi fl;
2315 int err, iif = 0;
2317 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2318 if (err < 0)
2319 goto errout;
2321 err = -EINVAL;
2322 memset(&fl, 0, sizeof(fl));
2324 if (tb[RTA_SRC]) {
2325 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2326 goto errout;
2328 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2331 if (tb[RTA_DST]) {
2332 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2333 goto errout;
2335 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2338 if (tb[RTA_IIF])
2339 iif = nla_get_u32(tb[RTA_IIF]);
2341 if (tb[RTA_OIF])
2342 fl.oif = nla_get_u32(tb[RTA_OIF]);
2344 if (iif) {
2345 struct net_device *dev;
2346 dev = __dev_get_by_index(net, iif);
2347 if (!dev) {
2348 err = -ENODEV;
2349 goto errout;
2353 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2354 if (skb == NULL) {
2355 err = -ENOBUFS;
2356 goto errout;
2359 /* Reserve room for dummy headers, this skb can pass
2360 through good chunk of routing engine.
2362 skb_reset_mac_header(skb);
2363 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2365 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2366 skb->dst = &rt->u.dst;
2368 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2369 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2370 nlh->nlmsg_seq, 0, 0, 0);
2371 if (err < 0) {
2372 kfree_skb(skb);
2373 goto errout;
2376 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2377 errout:
2378 return err;
2381 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2383 struct sk_buff *skb;
2384 struct net *net = info->nl_net;
2385 u32 seq;
2386 int err;
2388 err = -ENOBUFS;
2389 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2391 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2392 if (skb == NULL)
2393 goto errout;
2395 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2396 event, info->pid, seq, 0, 0, 0);
2397 if (err < 0) {
2398 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2399 WARN_ON(err == -EMSGSIZE);
2400 kfree_skb(skb);
2401 goto errout;
2403 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2404 info->nlh, gfp_any());
2405 errout:
2406 if (err < 0)
2407 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2410 static int ip6_route_dev_notify(struct notifier_block *this,
2411 unsigned long event, void *data)
2413 struct net_device *dev = (struct net_device *)data;
2414 struct net *net = dev_net(dev);
2416 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2417 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2418 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2419 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2420 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2421 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2422 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2424 #endif
2427 return NOTIFY_OK;
2431 * /proc
2434 #ifdef CONFIG_PROC_FS
2436 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2438 struct rt6_proc_arg
2440 char *buffer;
2441 int offset;
2442 int length;
2443 int skip;
2444 int len;
2447 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2449 struct seq_file *m = p_arg;
2451 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2453 #ifdef CONFIG_IPV6_SUBTREES
2454 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2455 #else
2456 seq_puts(m, "00000000000000000000000000000000 00 ");
2457 #endif
2459 if (rt->rt6i_nexthop) {
2460 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2461 } else {
2462 seq_puts(m, "00000000000000000000000000000000");
2464 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2465 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2466 rt->u.dst.__use, rt->rt6i_flags,
2467 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2468 return 0;
2471 static int ipv6_route_show(struct seq_file *m, void *v)
2473 struct net *net = (struct net *)m->private;
2474 fib6_clean_all(net, rt6_info_route, 0, m);
2475 return 0;
2478 static int ipv6_route_open(struct inode *inode, struct file *file)
2480 return single_open_net(inode, file, ipv6_route_show);
2483 static const struct file_operations ipv6_route_proc_fops = {
2484 .owner = THIS_MODULE,
2485 .open = ipv6_route_open,
2486 .read = seq_read,
2487 .llseek = seq_lseek,
2488 .release = single_release_net,
2491 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2493 struct net *net = (struct net *)seq->private;
2494 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2495 net->ipv6.rt6_stats->fib_nodes,
2496 net->ipv6.rt6_stats->fib_route_nodes,
2497 net->ipv6.rt6_stats->fib_rt_alloc,
2498 net->ipv6.rt6_stats->fib_rt_entries,
2499 net->ipv6.rt6_stats->fib_rt_cache,
2500 atomic_read(&net->ipv6.ip6_dst_ops->entries),
2501 net->ipv6.rt6_stats->fib_discarded_routes);
2503 return 0;
2506 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2508 return single_open_net(inode, file, rt6_stats_seq_show);
2511 static const struct file_operations rt6_stats_seq_fops = {
2512 .owner = THIS_MODULE,
2513 .open = rt6_stats_seq_open,
2514 .read = seq_read,
2515 .llseek = seq_lseek,
2516 .release = single_release_net,
2518 #endif /* CONFIG_PROC_FS */
2520 #ifdef CONFIG_SYSCTL
2522 static
2523 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2524 void __user *buffer, size_t *lenp, loff_t *ppos)
2526 struct net *net = current->nsproxy->net_ns;
2527 int delay = net->ipv6.sysctl.flush_delay;
2528 if (write) {
2529 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2530 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2531 return 0;
2532 } else
2533 return -EINVAL;
2536 ctl_table ipv6_route_table_template[] = {
2538 .procname = "flush",
2539 .data = &init_net.ipv6.sysctl.flush_delay,
2540 .maxlen = sizeof(int),
2541 .mode = 0200,
2542 .proc_handler = ipv6_sysctl_rtcache_flush
2545 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2546 .procname = "gc_thresh",
2547 .data = &ip6_dst_ops_template.gc_thresh,
2548 .maxlen = sizeof(int),
2549 .mode = 0644,
2550 .proc_handler = proc_dointvec,
2553 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2554 .procname = "max_size",
2555 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2556 .maxlen = sizeof(int),
2557 .mode = 0644,
2558 .proc_handler = proc_dointvec,
2561 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2562 .procname = "gc_min_interval",
2563 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2564 .maxlen = sizeof(int),
2565 .mode = 0644,
2566 .proc_handler = proc_dointvec_jiffies,
2567 .strategy = sysctl_jiffies,
2570 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2571 .procname = "gc_timeout",
2572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2573 .maxlen = sizeof(int),
2574 .mode = 0644,
2575 .proc_handler = proc_dointvec_jiffies,
2576 .strategy = sysctl_jiffies,
2579 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2580 .procname = "gc_interval",
2581 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2582 .maxlen = sizeof(int),
2583 .mode = 0644,
2584 .proc_handler = proc_dointvec_jiffies,
2585 .strategy = sysctl_jiffies,
2588 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2589 .procname = "gc_elasticity",
2590 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
2593 .proc_handler = proc_dointvec_jiffies,
2594 .strategy = sysctl_jiffies,
2597 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2598 .procname = "mtu_expires",
2599 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2600 .maxlen = sizeof(int),
2601 .mode = 0644,
2602 .proc_handler = proc_dointvec_jiffies,
2603 .strategy = sysctl_jiffies,
2606 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2607 .procname = "min_adv_mss",
2608 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2609 .maxlen = sizeof(int),
2610 .mode = 0644,
2611 .proc_handler = proc_dointvec_jiffies,
2612 .strategy = sysctl_jiffies,
2615 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2616 .procname = "gc_min_interval_ms",
2617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2618 .maxlen = sizeof(int),
2619 .mode = 0644,
2620 .proc_handler = proc_dointvec_ms_jiffies,
2621 .strategy = sysctl_ms_jiffies,
2623 { .ctl_name = 0 }
2626 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2628 struct ctl_table *table;
2630 table = kmemdup(ipv6_route_table_template,
2631 sizeof(ipv6_route_table_template),
2632 GFP_KERNEL);
2634 if (table) {
2635 table[0].data = &net->ipv6.sysctl.flush_delay;
2636 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
2637 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2638 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2639 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2640 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2641 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2642 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2643 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2646 return table;
2648 #endif
2650 static int ip6_route_net_init(struct net *net)
2652 int ret = -ENOMEM;
2654 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2655 sizeof(*net->ipv6.ip6_dst_ops),
2656 GFP_KERNEL);
2657 if (!net->ipv6.ip6_dst_ops)
2658 goto out;
2659 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
2661 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662 sizeof(*net->ipv6.ip6_null_entry),
2663 GFP_KERNEL);
2664 if (!net->ipv6.ip6_null_entry)
2665 goto out_ip6_dst_ops;
2666 net->ipv6.ip6_null_entry->u.dst.path =
2667 (struct dst_entry *)net->ipv6.ip6_null_entry;
2668 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2670 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2671 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2672 sizeof(*net->ipv6.ip6_prohibit_entry),
2673 GFP_KERNEL);
2674 if (!net->ipv6.ip6_prohibit_entry)
2675 goto out_ip6_null_entry;
2676 net->ipv6.ip6_prohibit_entry->u.dst.path =
2677 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2678 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2680 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2681 sizeof(*net->ipv6.ip6_blk_hole_entry),
2682 GFP_KERNEL);
2683 if (!net->ipv6.ip6_blk_hole_entry)
2684 goto out_ip6_prohibit_entry;
2685 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2686 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2687 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
2688 #endif
2690 net->ipv6.sysctl.flush_delay = 0;
2691 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2692 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2693 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2694 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2695 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2696 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2697 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2699 #ifdef CONFIG_PROC_FS
2700 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2701 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2702 #endif
2703 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2705 ret = 0;
2706 out:
2707 return ret;
2709 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2710 out_ip6_prohibit_entry:
2711 kfree(net->ipv6.ip6_prohibit_entry);
2712 out_ip6_null_entry:
2713 kfree(net->ipv6.ip6_null_entry);
2714 #endif
2715 out_ip6_dst_ops:
2716 release_net(net->ipv6.ip6_dst_ops->dst_net);
2717 kfree(net->ipv6.ip6_dst_ops);
2718 goto out;
2721 static void ip6_route_net_exit(struct net *net)
2723 #ifdef CONFIG_PROC_FS
2724 proc_net_remove(net, "ipv6_route");
2725 proc_net_remove(net, "rt6_stats");
2726 #endif
2727 kfree(net->ipv6.ip6_null_entry);
2728 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2729 kfree(net->ipv6.ip6_prohibit_entry);
2730 kfree(net->ipv6.ip6_blk_hole_entry);
2731 #endif
2732 release_net(net->ipv6.ip6_dst_ops->dst_net);
2733 kfree(net->ipv6.ip6_dst_ops);
2736 static struct pernet_operations ip6_route_net_ops = {
2737 .init = ip6_route_net_init,
2738 .exit = ip6_route_net_exit,
2741 static struct notifier_block ip6_route_dev_notifier = {
2742 .notifier_call = ip6_route_dev_notify,
2743 .priority = 0,
2746 int __init ip6_route_init(void)
2748 int ret;
2750 ret = -ENOMEM;
2751 ip6_dst_ops_template.kmem_cachep =
2752 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2753 SLAB_HWCACHE_ALIGN, NULL);
2754 if (!ip6_dst_ops_template.kmem_cachep)
2755 goto out;
2757 ret = register_pernet_subsys(&ip6_route_net_ops);
2758 if (ret)
2759 goto out_kmem_cache;
2761 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2763 /* Registering of the loopback is done before this portion of code,
2764 * the loopback reference in rt6_info will not be taken, do it
2765 * manually for init_net */
2766 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2767 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2768 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2769 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2770 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2771 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2772 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2773 #endif
2774 ret = fib6_init();
2775 if (ret)
2776 goto out_register_subsys;
2778 ret = xfrm6_init();
2779 if (ret)
2780 goto out_fib6_init;
2782 ret = fib6_rules_init();
2783 if (ret)
2784 goto xfrm6_init;
2786 ret = -ENOBUFS;
2787 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2788 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2789 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2790 goto fib6_rules_init;
2792 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2793 if (ret)
2794 goto fib6_rules_init;
2796 out:
2797 return ret;
2799 fib6_rules_init:
2800 fib6_rules_cleanup();
2801 xfrm6_init:
2802 xfrm6_fini();
2803 out_fib6_init:
2804 fib6_gc_cleanup();
2805 out_register_subsys:
2806 unregister_pernet_subsys(&ip6_route_net_ops);
2807 out_kmem_cache:
2808 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2809 goto out;
2812 void ip6_route_cleanup(void)
2814 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2815 fib6_rules_cleanup();
2816 xfrm6_fini();
2817 fib6_gc_cleanup();
2818 unregister_pernet_subsys(&ip6_route_net_ops);
2819 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);