drm/i915: Only bind to function 0 of the PCI device
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / route.c
blob7659d6f16e6bae4b7e8a1701161181f627c0e41f
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static struct dst_ops ip6_dst_ops_template = {
101 .family = AF_INET6,
102 .protocol = cpu_to_be16(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out,
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 static struct rt6_info ip6_null_entry_template = {
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
137 .rt6i_protocol = RTPROT_KERNEL,
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
142 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144 static int ip6_pkt_prohibit(struct sk_buff *skb);
145 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
147 static struct rt6_info ip6_prohibit_entry_template = {
148 .dst = {
149 .__refcnt = ATOMIC_INIT(1),
150 .__use = 1,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
158 .rt6i_protocol = RTPROT_KERNEL,
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
163 static struct rt6_info ip6_blk_hole_entry_template = {
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
174 .rt6i_protocol = RTPROT_KERNEL,
175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
179 #endif
181 /* allocate dst with ip6_dst_ops */
182 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
184 return (struct rt6_info *)dst_alloc(ops);
187 static void ip6_dst_destroy(struct dst_entry *dst)
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
198 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
199 int how)
201 struct rt6_info *rt = (struct rt6_info *)dst;
202 struct inet6_dev *idev = rt->rt6i_idev;
203 struct net_device *loopback_dev =
204 dev_net(dev)->loopback_dev;
206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
207 struct inet6_dev *loopback_idev =
208 in6_dev_get(loopback_dev);
209 if (loopback_idev != NULL) {
210 rt->rt6i_idev = loopback_idev;
211 in6_dev_put(idev);
216 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
218 return (rt->rt6i_flags & RTF_EXPIRES) &&
219 time_after(jiffies, rt->rt6i_expires);
222 static inline int rt6_need_strict(struct in6_addr *daddr)
224 return ipv6_addr_type(daddr) &
225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
229 * Route lookup. Any table->tb6_lock is implied.
232 static inline struct rt6_info *rt6_device_match(struct net *net,
233 struct rt6_info *rt,
234 struct in6_addr *saddr,
235 int oif,
236 int flags)
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
241 if (!oif && ipv6_addr_any(saddr))
242 goto out;
244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
245 struct net_device *dev = sprt->rt6i_dev;
247 if (oif) {
248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
253 if (flags & RT6_LOOKUP_F_IFACE && oif)
254 continue;
255 if (local && (!oif ||
256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
259 local = sprt;
261 } else {
262 if (ipv6_chk_addr(net, saddr, dev,
263 flags & RT6_LOOKUP_F_IFACE))
264 return sprt;
268 if (oif) {
269 if (local)
270 return local;
272 if (flags & RT6_LOOKUP_F_IFACE)
273 return net->ipv6.ip6_null_entry;
275 out:
276 return rt;
279 #ifdef CONFIG_IPV6_ROUTER_PREF
280 static void rt6_probe(struct rt6_info *rt)
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
308 #else
309 static inline void rt6_probe(struct rt6_info *rt)
312 #endif
315 * Default Router Selection (RFC 2461 6.3.6)
317 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
319 struct net_device *dev = rt->rt6i_dev;
320 if (!oif || dev->ifindex == oif)
321 return 2;
322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
328 static inline int rt6_check_neigh(struct rt6_info *rt)
330 struct neighbour *neigh = rt->rt6i_nexthop;
331 int m;
332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
338 m = 2;
339 #ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342 #endif
343 else
344 m = 1;
345 read_unlock_bh(&neigh->lock);
346 } else
347 m = 0;
348 return m;
351 static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
354 int m, n;
356 m = rt6_check_dev(rt, oif);
357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
358 return -1;
359 #ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 #endif
362 n = rt6_check_neigh(rt);
363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
364 return -1;
365 return m;
368 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
371 int m;
373 if (rt6_check_expired(rt))
374 goto out;
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
389 out:
390 return match;
393 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
397 struct rt6_info *rt, *match;
398 int mpri = -1;
400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 rt = rt->dst.rt6_next)
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 rt = rt->dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
408 return match;
411 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413 struct rt6_info *match, *rt0;
414 struct net *net;
416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 __func__, fn->leaf, oif);
419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
425 if (!match &&
426 (strict & RT6_LOOKUP_F_REACHABLE)) {
427 struct rt6_info *next = rt0->dst.rt6_next;
429 /* no entries matched; do round-robin */
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
433 if (next != rt0)
434 fn->rr_ptr = next;
437 RT6_TRACE("%s() => %p\n",
438 __func__, match);
440 net = dev_net(rt0->rt6i_dev);
441 return match ? match : net->ipv6.ip6_null_entry;
444 #ifdef CONFIG_IPV6_ROUTE_INFO
445 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
448 struct net *net = dev_net(dev);
449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
452 unsigned long lifetime;
453 struct rt6_info *rt;
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
476 return -EINVAL;
478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
480 if (rinfo->length == 3)
481 prefix = (struct in6_addr *)rinfo->prefix;
482 else {
483 /* this function is safe */
484 ipv6_addr_prefix(&prefix_buf,
485 (struct in6_addr *)rinfo->prefix,
486 rinfo->prefix_len);
487 prefix = &prefix_buf;
490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
491 dev->ifindex);
493 if (rt && !lifetime) {
494 ip6_del_rt(rt);
495 rt = NULL;
498 if (!rt && lifetime)
499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
500 pref);
501 else if (rt)
502 rt->rt6i_flags = RTF_ROUTEINFO |
503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
505 if (rt) {
506 if (!addrconf_finite_timeout(lifetime)) {
507 rt->rt6i_flags &= ~RTF_EXPIRES;
508 } else {
509 rt->rt6i_expires = jiffies + HZ * lifetime;
510 rt->rt6i_flags |= RTF_EXPIRES;
512 dst_release(&rt->dst);
514 return 0;
516 #endif
518 #define BACKTRACK(__net, saddr) \
519 do { \
520 if (rt == __net->ipv6.ip6_null_entry) { \
521 struct fib6_node *pn; \
522 while (1) { \
523 if (fn->fn_flags & RTN_TL_ROOT) \
524 goto out; \
525 pn = fn->parent; \
526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
528 else \
529 fn = pn; \
530 if (fn->fn_flags & RTN_RTINFO) \
531 goto restart; \
534 } while(0)
536 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
537 struct fib6_table *table,
538 struct flowi *fl, int flags)
540 struct fib6_node *fn;
541 struct rt6_info *rt;
543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545 restart:
546 rt = fn->leaf;
547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
548 BACKTRACK(net, &fl->fl6_src);
549 out:
550 dst_use(&rt->dst, jiffies);
551 read_unlock_bh(&table->tb6_lock);
552 return rt;
556 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
557 const struct in6_addr *saddr, int oif, int strict)
559 struct flowi fl = {
560 .oif = oif,
561 .nl_u = {
562 .ip6_u = {
563 .daddr = *daddr,
567 struct dst_entry *dst;
568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
570 if (saddr) {
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
575 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
576 if (dst->error == 0)
577 return (struct rt6_info *) dst;
579 dst_release(dst);
581 return NULL;
584 EXPORT_SYMBOL(rt6_lookup);
586 /* ip6_ins_rt is called with FREE table->tb6_lock.
587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
589 be destroyed.
592 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
594 int err;
595 struct fib6_table *table;
597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
599 err = fib6_add(&table->tb6_root, rt, info);
600 write_unlock_bh(&table->tb6_lock);
602 return err;
605 int ip6_ins_rt(struct rt6_info *rt)
607 struct nl_info info = {
608 .nl_net = dev_net(rt->rt6i_dev),
610 return __ip6_ins_rt(rt, &info);
613 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
616 struct rt6_info *rt;
619 * Clone the route.
622 rt = ip6_rt_copy(ort);
624 if (rt) {
625 struct neighbour *neigh;
626 int attempts = !in_softirq();
628 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
629 if (rt->rt6i_dst.plen != 128 &&
630 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
631 rt->rt6i_flags |= RTF_ANYCAST;
632 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
635 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
636 rt->rt6i_dst.plen = 128;
637 rt->rt6i_flags |= RTF_CACHE;
638 rt->dst.flags |= DST_HOST;
640 #ifdef CONFIG_IPV6_SUBTREES
641 if (rt->rt6i_src.plen && saddr) {
642 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
643 rt->rt6i_src.plen = 128;
645 #endif
647 retry:
648 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649 if (IS_ERR(neigh)) {
650 struct net *net = dev_net(rt->rt6i_dev);
651 int saved_rt_min_interval =
652 net->ipv6.sysctl.ip6_rt_gc_min_interval;
653 int saved_rt_elasticity =
654 net->ipv6.sysctl.ip6_rt_gc_elasticity;
656 if (attempts-- > 0) {
657 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
658 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
660 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
662 net->ipv6.sysctl.ip6_rt_gc_elasticity =
663 saved_rt_elasticity;
664 net->ipv6.sysctl.ip6_rt_gc_min_interval =
665 saved_rt_min_interval;
666 goto retry;
669 if (net_ratelimit())
670 printk(KERN_WARNING
671 "ipv6: Neighbour table overflow.\n");
672 dst_free(&rt->dst);
673 return NULL;
675 rt->rt6i_nexthop = neigh;
679 return rt;
682 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
684 struct rt6_info *rt = ip6_rt_copy(ort);
685 if (rt) {
686 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
687 rt->rt6i_dst.plen = 128;
688 rt->rt6i_flags |= RTF_CACHE;
689 rt->dst.flags |= DST_HOST;
690 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
692 return rt;
695 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
696 struct flowi *fl, int flags)
698 struct fib6_node *fn;
699 struct rt6_info *rt, *nrt;
700 int strict = 0;
701 int attempts = 3;
702 int err;
703 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
705 strict |= flags & RT6_LOOKUP_F_IFACE;
707 relookup:
708 read_lock_bh(&table->tb6_lock);
710 restart_2:
711 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
713 restart:
714 rt = rt6_select(fn, oif, strict | reachable);
716 BACKTRACK(net, &fl->fl6_src);
717 if (rt == net->ipv6.ip6_null_entry ||
718 rt->rt6i_flags & RTF_CACHE)
719 goto out;
721 dst_hold(&rt->dst);
722 read_unlock_bh(&table->tb6_lock);
724 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
725 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
726 else {
727 #if CLONE_OFFLINK_ROUTE
728 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
729 #else
730 goto out2;
731 #endif
734 dst_release(&rt->dst);
735 rt = nrt ? : net->ipv6.ip6_null_entry;
737 dst_hold(&rt->dst);
738 if (nrt) {
739 err = ip6_ins_rt(nrt);
740 if (!err)
741 goto out2;
744 if (--attempts <= 0)
745 goto out2;
748 * Race condition! In the gap, when table->tb6_lock was
749 * released someone could insert this route. Relookup.
751 dst_release(&rt->dst);
752 goto relookup;
754 out:
755 if (reachable) {
756 reachable = 0;
757 goto restart_2;
759 dst_hold(&rt->dst);
760 read_unlock_bh(&table->tb6_lock);
761 out2:
762 rt->dst.lastuse = jiffies;
763 rt->dst.__use++;
765 return rt;
768 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
769 struct flowi *fl, int flags)
771 return ip6_pol_route(net, table, fl->iif, fl, flags);
774 void ip6_route_input(struct sk_buff *skb)
776 struct ipv6hdr *iph = ipv6_hdr(skb);
777 struct net *net = dev_net(skb->dev);
778 int flags = RT6_LOOKUP_F_HAS_SADDR;
779 struct flowi fl = {
780 .iif = skb->dev->ifindex,
781 .nl_u = {
782 .ip6_u = {
783 .daddr = iph->daddr,
784 .saddr = iph->saddr,
785 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
788 .mark = skb->mark,
789 .proto = iph->nexthdr,
792 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
793 flags |= RT6_LOOKUP_F_IFACE;
795 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
798 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
799 struct flowi *fl, int flags)
801 return ip6_pol_route(net, table, fl->oif, fl, flags);
804 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
805 struct flowi *fl)
807 int flags = 0;
809 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
810 flags |= RT6_LOOKUP_F_IFACE;
812 if (!ipv6_addr_any(&fl->fl6_src))
813 flags |= RT6_LOOKUP_F_HAS_SADDR;
814 else if (sk)
815 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
817 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
820 EXPORT_SYMBOL(ip6_route_output);
822 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
824 struct rt6_info *ort = (struct rt6_info *) *dstp;
825 struct rt6_info *rt = (struct rt6_info *)
826 dst_alloc(&ip6_dst_blackhole_ops);
827 struct dst_entry *new = NULL;
829 if (rt) {
830 new = &rt->dst;
832 atomic_set(&new->__refcnt, 1);
833 new->__use = 1;
834 new->input = dst_discard;
835 new->output = dst_discard;
837 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
838 new->dev = ort->dst.dev;
839 if (new->dev)
840 dev_hold(new->dev);
841 rt->rt6i_idev = ort->rt6i_idev;
842 if (rt->rt6i_idev)
843 in6_dev_hold(rt->rt6i_idev);
844 rt->rt6i_expires = 0;
846 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
847 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
848 rt->rt6i_metric = 0;
850 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
851 #ifdef CONFIG_IPV6_SUBTREES
852 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
853 #endif
855 dst_free(new);
858 dst_release(*dstp);
859 *dstp = new;
860 return new ? 0 : -ENOMEM;
862 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 * Destination cache support functions
868 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
870 struct rt6_info *rt;
872 rt = (struct rt6_info *) dst;
874 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
875 return dst;
877 return NULL;
880 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
882 struct rt6_info *rt = (struct rt6_info *) dst;
884 if (rt) {
885 if (rt->rt6i_flags & RTF_CACHE) {
886 if (rt6_check_expired(rt)) {
887 ip6_del_rt(rt);
888 dst = NULL;
890 } else {
891 dst_release(dst);
892 dst = NULL;
895 return dst;
898 static void ip6_link_failure(struct sk_buff *skb)
900 struct rt6_info *rt;
902 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
904 rt = (struct rt6_info *) skb_dst(skb);
905 if (rt) {
906 if (rt->rt6i_flags&RTF_CACHE) {
907 dst_set_expires(&rt->dst, 0);
908 rt->rt6i_flags |= RTF_EXPIRES;
909 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
910 rt->rt6i_node->fn_sernum = -1;
914 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
916 struct rt6_info *rt6 = (struct rt6_info*)dst;
918 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
919 rt6->rt6i_flags |= RTF_MODIFIED;
920 if (mtu < IPV6_MIN_MTU) {
921 mtu = IPV6_MIN_MTU;
922 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
924 dst->metrics[RTAX_MTU-1] = mtu;
925 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
929 static int ipv6_get_mtu(struct net_device *dev);
931 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
933 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
935 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
936 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
939 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
940 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
941 * IPV6_MAXPLEN is also valid and means: "any MSS,
942 * rely only on pmtu discovery"
944 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
945 mtu = IPV6_MAXPLEN;
946 return mtu;
949 static struct dst_entry *icmp6_dst_gc_list;
950 static DEFINE_SPINLOCK(icmp6_dst_lock);
952 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
953 struct neighbour *neigh,
954 const struct in6_addr *addr)
956 struct rt6_info *rt;
957 struct inet6_dev *idev = in6_dev_get(dev);
958 struct net *net = dev_net(dev);
960 if (unlikely(idev == NULL))
961 return NULL;
963 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
964 if (unlikely(rt == NULL)) {
965 in6_dev_put(idev);
966 goto out;
969 dev_hold(dev);
970 if (neigh)
971 neigh_hold(neigh);
972 else {
973 neigh = ndisc_get_neigh(dev, addr);
974 if (IS_ERR(neigh))
975 neigh = NULL;
978 rt->rt6i_dev = dev;
979 rt->rt6i_idev = idev;
980 rt->rt6i_nexthop = neigh;
981 atomic_set(&rt->dst.__refcnt, 1);
982 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
983 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
984 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
985 rt->dst.output = ip6_output;
987 #if 0 /* there's no chance to use these for ndisc */
988 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
989 ? DST_HOST
990 : 0;
991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
992 rt->rt6i_dst.plen = 128;
993 #endif
995 spin_lock_bh(&icmp6_dst_lock);
996 rt->dst.next = icmp6_dst_gc_list;
997 icmp6_dst_gc_list = &rt->dst;
998 spin_unlock_bh(&icmp6_dst_lock);
1000 fib6_force_start_gc(net);
1002 out:
1003 return &rt->dst;
1006 int icmp6_dst_gc(void)
1008 struct dst_entry *dst, *next, **pprev;
1009 int more = 0;
1011 next = NULL;
1013 spin_lock_bh(&icmp6_dst_lock);
1014 pprev = &icmp6_dst_gc_list;
1016 while ((dst = *pprev) != NULL) {
1017 if (!atomic_read(&dst->__refcnt)) {
1018 *pprev = dst->next;
1019 dst_free(dst);
1020 } else {
1021 pprev = &dst->next;
1022 ++more;
1026 spin_unlock_bh(&icmp6_dst_lock);
1028 return more;
1031 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1032 void *arg)
1034 struct dst_entry *dst, **pprev;
1036 spin_lock_bh(&icmp6_dst_lock);
1037 pprev = &icmp6_dst_gc_list;
1038 while ((dst = *pprev) != NULL) {
1039 struct rt6_info *rt = (struct rt6_info *) dst;
1040 if (func(rt, arg)) {
1041 *pprev = dst->next;
1042 dst_free(dst);
1043 } else {
1044 pprev = &dst->next;
1047 spin_unlock_bh(&icmp6_dst_lock);
1050 static int ip6_dst_gc(struct dst_ops *ops)
1052 unsigned long now = jiffies;
1053 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1054 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1055 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1059 int entries;
1061 entries = dst_entries_get_fast(ops);
1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 entries <= rt_max_size)
1064 goto out;
1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now;
1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1072 out:
1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1074 return entries > rt_max_size;
1077 /* Clean host part of a prefix. Not necessary in radix tree,
1078 but results in cleaner routing tables.
1080 Remove it only when all the things will work!
1083 static int ipv6_get_mtu(struct net_device *dev)
1085 int mtu = IPV6_MIN_MTU;
1086 struct inet6_dev *idev;
1088 rcu_read_lock();
1089 idev = __in6_dev_get(dev);
1090 if (idev)
1091 mtu = idev->cnf.mtu6;
1092 rcu_read_unlock();
1093 return mtu;
1096 int ip6_dst_hoplimit(struct dst_entry *dst)
1098 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1099 if (hoplimit < 0) {
1100 struct net_device *dev = dst->dev;
1101 struct inet6_dev *idev;
1103 rcu_read_lock();
1104 idev = __in6_dev_get(dev);
1105 if (idev)
1106 hoplimit = idev->cnf.hop_limit;
1107 else
1108 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1109 rcu_read_unlock();
1111 return hoplimit;
1118 int ip6_route_add(struct fib6_config *cfg)
1120 int err;
1121 struct net *net = cfg->fc_nlinfo.nl_net;
1122 struct rt6_info *rt = NULL;
1123 struct net_device *dev = NULL;
1124 struct inet6_dev *idev = NULL;
1125 struct fib6_table *table;
1126 int addr_type;
1128 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1129 return -EINVAL;
1130 #ifndef CONFIG_IPV6_SUBTREES
1131 if (cfg->fc_src_len)
1132 return -EINVAL;
1133 #endif
1134 if (cfg->fc_ifindex) {
1135 err = -ENODEV;
1136 dev = dev_get_by_index(net, cfg->fc_ifindex);
1137 if (!dev)
1138 goto out;
1139 idev = in6_dev_get(dev);
1140 if (!idev)
1141 goto out;
1144 if (cfg->fc_metric == 0)
1145 cfg->fc_metric = IP6_RT_PRIO_USER;
1147 table = fib6_new_table(net, cfg->fc_table);
1148 if (table == NULL) {
1149 err = -ENOBUFS;
1150 goto out;
1153 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1155 if (rt == NULL) {
1156 err = -ENOMEM;
1157 goto out;
1160 rt->dst.obsolete = -1;
1161 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1162 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1165 if (cfg->fc_protocol == RTPROT_UNSPEC)
1166 cfg->fc_protocol = RTPROT_BOOT;
1167 rt->rt6i_protocol = cfg->fc_protocol;
1169 addr_type = ipv6_addr_type(&cfg->fc_dst);
1171 if (addr_type & IPV6_ADDR_MULTICAST)
1172 rt->dst.input = ip6_mc_input;
1173 else if (cfg->fc_flags & RTF_LOCAL)
1174 rt->dst.input = ip6_input;
1175 else
1176 rt->dst.input = ip6_forward;
1178 rt->dst.output = ip6_output;
1180 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1181 rt->rt6i_dst.plen = cfg->fc_dst_len;
1182 if (rt->rt6i_dst.plen == 128)
1183 rt->dst.flags = DST_HOST;
1185 #ifdef CONFIG_IPV6_SUBTREES
1186 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1187 rt->rt6i_src.plen = cfg->fc_src_len;
1188 #endif
1190 rt->rt6i_metric = cfg->fc_metric;
1192 /* We cannot add true routes via loopback here,
1193 they would result in kernel looping; promote them to reject routes
1195 if ((cfg->fc_flags & RTF_REJECT) ||
1196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1197 && !(cfg->fc_flags&RTF_LOCAL))) {
1198 /* hold loopback dev/idev if we haven't done so. */
1199 if (dev != net->loopback_dev) {
1200 if (dev) {
1201 dev_put(dev);
1202 in6_dev_put(idev);
1204 dev = net->loopback_dev;
1205 dev_hold(dev);
1206 idev = in6_dev_get(dev);
1207 if (!idev) {
1208 err = -ENODEV;
1209 goto out;
1212 rt->dst.output = ip6_pkt_discard_out;
1213 rt->dst.input = ip6_pkt_discard;
1214 rt->dst.error = -ENETUNREACH;
1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1216 goto install_route;
1219 if (cfg->fc_flags & RTF_GATEWAY) {
1220 struct in6_addr *gw_addr;
1221 int gwa_type;
1223 gw_addr = &cfg->fc_gateway;
1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1225 gwa_type = ipv6_addr_type(gw_addr);
1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1228 struct rt6_info *grt;
1230 /* IPv6 strictly inhibits using not link-local
1231 addresses as nexthop address.
1232 Otherwise, router will not able to send redirects.
1233 It is very good, but in some (rare!) circumstances
1234 (SIT, PtP, NBMA NOARP links) it is handy to allow
1235 some exceptions. --ANK
1237 err = -EINVAL;
1238 if (!(gwa_type&IPV6_ADDR_UNICAST))
1239 goto out;
1241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1243 err = -EHOSTUNREACH;
1244 if (grt == NULL)
1245 goto out;
1246 if (dev) {
1247 if (dev != grt->rt6i_dev) {
1248 dst_release(&grt->dst);
1249 goto out;
1251 } else {
1252 dev = grt->rt6i_dev;
1253 idev = grt->rt6i_idev;
1254 dev_hold(dev);
1255 in6_dev_hold(grt->rt6i_idev);
1257 if (!(grt->rt6i_flags&RTF_GATEWAY))
1258 err = 0;
1259 dst_release(&grt->dst);
1261 if (err)
1262 goto out;
1264 err = -EINVAL;
1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1266 goto out;
1269 err = -ENODEV;
1270 if (dev == NULL)
1271 goto out;
1273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1275 if (IS_ERR(rt->rt6i_nexthop)) {
1276 err = PTR_ERR(rt->rt6i_nexthop);
1277 rt->rt6i_nexthop = NULL;
1278 goto out;
1282 rt->rt6i_flags = cfg->fc_flags;
1284 install_route:
1285 if (cfg->fc_mx) {
1286 struct nlattr *nla;
1287 int remaining;
1289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1290 int type = nla_type(nla);
1292 if (type) {
1293 if (type > RTAX_MAX) {
1294 err = -EINVAL;
1295 goto out;
1298 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1303 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1304 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1305 if (!dst_mtu(&rt->dst))
1306 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1307 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1308 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1309 rt->dst.dev = dev;
1310 rt->rt6i_idev = idev;
1311 rt->rt6i_table = table;
1313 cfg->fc_nlinfo.nl_net = dev_net(dev);
1315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1317 out:
1318 if (dev)
1319 dev_put(dev);
1320 if (idev)
1321 in6_dev_put(idev);
1322 if (rt)
1323 dst_free(&rt->dst);
1324 return err;
1327 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1329 int err;
1330 struct fib6_table *table;
1331 struct net *net = dev_net(rt->rt6i_dev);
1333 if (rt == net->ipv6.ip6_null_entry)
1334 return -ENOENT;
1336 table = rt->rt6i_table;
1337 write_lock_bh(&table->tb6_lock);
1339 err = fib6_del(rt, info);
1340 dst_release(&rt->dst);
1342 write_unlock_bh(&table->tb6_lock);
1344 return err;
1347 int ip6_del_rt(struct rt6_info *rt)
1349 struct nl_info info = {
1350 .nl_net = dev_net(rt->rt6i_dev),
1352 return __ip6_del_rt(rt, &info);
1355 static int ip6_route_del(struct fib6_config *cfg)
1357 struct fib6_table *table;
1358 struct fib6_node *fn;
1359 struct rt6_info *rt;
1360 int err = -ESRCH;
1362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1363 if (table == NULL)
1364 return err;
1366 read_lock_bh(&table->tb6_lock);
1368 fn = fib6_locate(&table->tb6_root,
1369 &cfg->fc_dst, cfg->fc_dst_len,
1370 &cfg->fc_src, cfg->fc_src_len);
1372 if (fn) {
1373 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1374 if (cfg->fc_ifindex &&
1375 (rt->rt6i_dev == NULL ||
1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1377 continue;
1378 if (cfg->fc_flags & RTF_GATEWAY &&
1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1380 continue;
1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1382 continue;
1383 dst_hold(&rt->dst);
1384 read_unlock_bh(&table->tb6_lock);
1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1389 read_unlock_bh(&table->tb6_lock);
1391 return err;
1395 * Handle redirects
1397 struct ip6rd_flowi {
1398 struct flowi fl;
1399 struct in6_addr gateway;
1402 static struct rt6_info *__ip6_route_redirect(struct net *net,
1403 struct fib6_table *table,
1404 struct flowi *fl,
1405 int flags)
1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1408 struct rt6_info *rt;
1409 struct fib6_node *fn;
1412 * Get the "current" route for this destination and
1413 * check if the redirect has come from approriate router.
1415 * RFC 2461 specifies that redirects should only be
1416 * accepted if they come from the nexthop to the target.
1417 * Due to the way the routes are chosen, this notion
1418 * is a bit fuzzy and one might need to check all possible
1419 * routes.
1422 read_lock_bh(&table->tb6_lock);
1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1424 restart:
1425 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1427 * Current route is on-link; redirect is always invalid.
1429 * Seems, previous statement is not true. It could
1430 * be node, which looks for us as on-link (f.e. proxy ndisc)
1431 * But then router serving it might decide, that we should
1432 * know truth 8)8) --ANK (980726).
1434 if (rt6_check_expired(rt))
1435 continue;
1436 if (!(rt->rt6i_flags & RTF_GATEWAY))
1437 continue;
1438 if (fl->oif != rt->rt6i_dev->ifindex)
1439 continue;
1440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1441 continue;
1442 break;
1445 if (!rt)
1446 rt = net->ipv6.ip6_null_entry;
1447 BACKTRACK(net, &fl->fl6_src);
1448 out:
1449 dst_hold(&rt->dst);
1451 read_unlock_bh(&table->tb6_lock);
1453 return rt;
1456 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1457 struct in6_addr *src,
1458 struct in6_addr *gateway,
1459 struct net_device *dev)
1461 int flags = RT6_LOOKUP_F_HAS_SADDR;
1462 struct net *net = dev_net(dev);
1463 struct ip6rd_flowi rdfl = {
1464 .fl = {
1465 .oif = dev->ifindex,
1466 .nl_u = {
1467 .ip6_u = {
1468 .daddr = *dest,
1469 .saddr = *src,
1475 ipv6_addr_copy(&rdfl.gateway, gateway);
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1481 flags, __ip6_route_redirect);
1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
1490 struct net *net = dev_net(neigh->dev);
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494 if (rt == net->ipv6.ip6_null_entry) {
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
1498 goto out;
1502 * We have finally decided to accept it.
1505 neigh_update(neigh, lladdr, NUD_STALE,
1506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1517 dst_confirm(&rt->dst);
1519 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->dst.neighbour)
1521 goto out;
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
1533 nrt->dst.flags |= DST_HOST;
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
1538 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540 dst_mtu(&nrt->dst));
1542 if (ip6_ins_rt(nrt))
1543 goto out;
1545 netevent.old = &rt->dst;
1546 netevent.new = &nrt->dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549 if (rt->rt6i_flags&RTF_CACHE) {
1550 ip6_del_rt(rt);
1551 return;
1554 out:
1555 dst_release(&rt->dst);
1559 * Handle ICMP "packet too big" messages
1560 * i.e. Path MTU discovery
1563 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1564 struct net *net, u32 pmtu, int ifindex)
1566 struct rt6_info *rt, *nrt;
1567 int allfrag = 0;
1568 again:
1569 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1570 if (rt == NULL)
1571 return;
1573 if (rt6_check_expired(rt)) {
1574 ip6_del_rt(rt);
1575 goto again;
1578 if (pmtu >= dst_mtu(&rt->dst))
1579 goto out;
1581 if (pmtu < IPV6_MIN_MTU) {
1583 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1584 * MTU (1280) and a fragment header should always be included
1585 * after a node receiving Too Big message reporting PMTU is
1586 * less than the IPv6 Minimum Link MTU.
1588 pmtu = IPV6_MIN_MTU;
1589 allfrag = 1;
1592 /* New mtu received -> path was valid.
1593 They are sent only in response to data packets,
1594 so that this nexthop apparently is reachable. --ANK
1596 dst_confirm(&rt->dst);
1598 /* Host route. If it is static, it would be better
1599 not to override it, but add new one, so that
1600 when cache entry will expire old pmtu
1601 would return automatically.
1603 if (rt->rt6i_flags & RTF_CACHE) {
1604 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1605 if (allfrag)
1606 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1607 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1608 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1609 goto out;
1612 /* Network route.
1613 Two cases are possible:
1614 1. It is connected route. Action: COW
1615 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1617 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1618 nrt = rt6_alloc_cow(rt, daddr, saddr);
1619 else
1620 nrt = rt6_alloc_clone(rt, daddr);
1622 if (nrt) {
1623 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
1624 if (allfrag)
1625 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1627 /* According to RFC 1981, detecting PMTU increase shouldn't be
1628 * happened within 5 mins, the recommended timer is 10 mins.
1629 * Here this route expiration time is set to ip6_rt_mtu_expires
1630 * which is 10 mins. After 10 mins the decreased pmtu is expired
1631 * and detecting PMTU increase will be automatically happened.
1633 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1634 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1636 ip6_ins_rt(nrt);
1638 out:
1639 dst_release(&rt->dst);
1642 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1643 struct net_device *dev, u32 pmtu)
1645 struct net *net = dev_net(dev);
1648 * RFC 1981 states that a node "MUST reduce the size of the packets it
1649 * is sending along the path" that caused the Packet Too Big message.
1650 * Since it's not possible in the general case to determine which
1651 * interface was used to send the original packet, we update the MTU
1652 * on the interface that will be used to send future packets. We also
1653 * update the MTU on the interface that received the Packet Too Big in
1654 * case the original packet was forced out that interface with
1655 * SO_BINDTODEVICE or similar. This is the next best thing to the
1656 * correct behaviour, which would be to update the MTU on all
1657 * interfaces.
1659 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1660 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1664 * Misc support functions
1667 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1669 struct net *net = dev_net(ort->rt6i_dev);
1670 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1672 if (rt) {
1673 rt->dst.input = ort->dst.input;
1674 rt->dst.output = ort->dst.output;
1676 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1677 rt->dst.error = ort->dst.error;
1678 rt->dst.dev = ort->dst.dev;
1679 if (rt->dst.dev)
1680 dev_hold(rt->dst.dev);
1681 rt->rt6i_idev = ort->rt6i_idev;
1682 if (rt->rt6i_idev)
1683 in6_dev_hold(rt->rt6i_idev);
1684 rt->dst.lastuse = jiffies;
1685 rt->rt6i_expires = 0;
1687 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1688 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1689 rt->rt6i_metric = 0;
1691 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1692 #ifdef CONFIG_IPV6_SUBTREES
1693 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1694 #endif
1695 rt->rt6i_table = ort->rt6i_table;
1697 return rt;
1700 #ifdef CONFIG_IPV6_ROUTE_INFO
1701 static struct rt6_info *rt6_get_route_info(struct net *net,
1702 struct in6_addr *prefix, int prefixlen,
1703 struct in6_addr *gwaddr, int ifindex)
1705 struct fib6_node *fn;
1706 struct rt6_info *rt = NULL;
1707 struct fib6_table *table;
1709 table = fib6_get_table(net, RT6_TABLE_INFO);
1710 if (table == NULL)
1711 return NULL;
1713 write_lock_bh(&table->tb6_lock);
1714 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1715 if (!fn)
1716 goto out;
1718 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1719 if (rt->rt6i_dev->ifindex != ifindex)
1720 continue;
1721 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1722 continue;
1723 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1724 continue;
1725 dst_hold(&rt->dst);
1726 break;
1728 out:
1729 write_unlock_bh(&table->tb6_lock);
1730 return rt;
1733 static struct rt6_info *rt6_add_route_info(struct net *net,
1734 struct in6_addr *prefix, int prefixlen,
1735 struct in6_addr *gwaddr, int ifindex,
1736 unsigned pref)
1738 struct fib6_config cfg = {
1739 .fc_table = RT6_TABLE_INFO,
1740 .fc_metric = IP6_RT_PRIO_USER,
1741 .fc_ifindex = ifindex,
1742 .fc_dst_len = prefixlen,
1743 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1744 RTF_UP | RTF_PREF(pref),
1745 .fc_nlinfo.pid = 0,
1746 .fc_nlinfo.nlh = NULL,
1747 .fc_nlinfo.nl_net = net,
1750 ipv6_addr_copy(&cfg.fc_dst, prefix);
1751 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1753 /* We should treat it as a default route if prefix length is 0. */
1754 if (!prefixlen)
1755 cfg.fc_flags |= RTF_DEFAULT;
1757 ip6_route_add(&cfg);
1759 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1761 #endif
1763 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1765 struct rt6_info *rt;
1766 struct fib6_table *table;
1768 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1769 if (table == NULL)
1770 return NULL;
1772 write_lock_bh(&table->tb6_lock);
1773 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1774 if (dev == rt->rt6i_dev &&
1775 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1776 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1777 break;
1779 if (rt)
1780 dst_hold(&rt->dst);
1781 write_unlock_bh(&table->tb6_lock);
1782 return rt;
1785 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1786 struct net_device *dev,
1787 unsigned int pref)
1789 struct fib6_config cfg = {
1790 .fc_table = RT6_TABLE_DFLT,
1791 .fc_metric = IP6_RT_PRIO_USER,
1792 .fc_ifindex = dev->ifindex,
1793 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1794 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1795 .fc_nlinfo.pid = 0,
1796 .fc_nlinfo.nlh = NULL,
1797 .fc_nlinfo.nl_net = dev_net(dev),
1800 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1802 ip6_route_add(&cfg);
1804 return rt6_get_dflt_router(gwaddr, dev);
1807 void rt6_purge_dflt_routers(struct net *net)
1809 struct rt6_info *rt;
1810 struct fib6_table *table;
1812 /* NOTE: Keep consistent with rt6_get_dflt_router */
1813 table = fib6_get_table(net, RT6_TABLE_DFLT);
1814 if (table == NULL)
1815 return;
1817 restart:
1818 read_lock_bh(&table->tb6_lock);
1819 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1820 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1821 dst_hold(&rt->dst);
1822 read_unlock_bh(&table->tb6_lock);
1823 ip6_del_rt(rt);
1824 goto restart;
1827 read_unlock_bh(&table->tb6_lock);
1830 static void rtmsg_to_fib6_config(struct net *net,
1831 struct in6_rtmsg *rtmsg,
1832 struct fib6_config *cfg)
1834 memset(cfg, 0, sizeof(*cfg));
1836 cfg->fc_table = RT6_TABLE_MAIN;
1837 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1838 cfg->fc_metric = rtmsg->rtmsg_metric;
1839 cfg->fc_expires = rtmsg->rtmsg_info;
1840 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1841 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1842 cfg->fc_flags = rtmsg->rtmsg_flags;
1844 cfg->fc_nlinfo.nl_net = net;
1846 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1847 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1848 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1851 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1853 struct fib6_config cfg;
1854 struct in6_rtmsg rtmsg;
1855 int err;
1857 switch(cmd) {
1858 case SIOCADDRT: /* Add a route */
1859 case SIOCDELRT: /* Delete a route */
1860 if (!capable(CAP_NET_ADMIN))
1861 return -EPERM;
1862 err = copy_from_user(&rtmsg, arg,
1863 sizeof(struct in6_rtmsg));
1864 if (err)
1865 return -EFAULT;
1867 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1869 rtnl_lock();
1870 switch (cmd) {
1871 case SIOCADDRT:
1872 err = ip6_route_add(&cfg);
1873 break;
1874 case SIOCDELRT:
1875 err = ip6_route_del(&cfg);
1876 break;
1877 default:
1878 err = -EINVAL;
1880 rtnl_unlock();
1882 return err;
1885 return -EINVAL;
1889 * Drop the packet on the floor
1892 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1894 int type;
1895 struct dst_entry *dst = skb_dst(skb);
1896 switch (ipstats_mib_noroutes) {
1897 case IPSTATS_MIB_INNOROUTES:
1898 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1899 if (type == IPV6_ADDR_ANY) {
1900 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1901 IPSTATS_MIB_INADDRERRORS);
1902 break;
1904 /* FALLTHROUGH */
1905 case IPSTATS_MIB_OUTNOROUTES:
1906 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1907 ipstats_mib_noroutes);
1908 break;
1910 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1911 kfree_skb(skb);
1912 return 0;
1915 static int ip6_pkt_discard(struct sk_buff *skb)
1917 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1920 static int ip6_pkt_discard_out(struct sk_buff *skb)
1922 skb->dev = skb_dst(skb)->dev;
1923 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1926 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1928 static int ip6_pkt_prohibit(struct sk_buff *skb)
1930 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1933 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1935 skb->dev = skb_dst(skb)->dev;
1936 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1939 #endif
1942 * Allocate a dst for local (unicast / anycast) address.
1945 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1946 const struct in6_addr *addr,
1947 int anycast)
1949 struct net *net = dev_net(idev->dev);
1950 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1951 struct neighbour *neigh;
1953 if (rt == NULL) {
1954 if (net_ratelimit())
1955 pr_warning("IPv6: Maximum number of routes reached,"
1956 " consider increasing route/max_size.\n");
1957 return ERR_PTR(-ENOMEM);
1960 dev_hold(net->loopback_dev);
1961 in6_dev_hold(idev);
1963 rt->dst.flags = DST_HOST;
1964 rt->dst.input = ip6_input;
1965 rt->dst.output = ip6_output;
1966 rt->rt6i_dev = net->loopback_dev;
1967 rt->rt6i_idev = idev;
1968 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1969 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1970 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1971 rt->dst.obsolete = -1;
1973 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1974 if (anycast)
1975 rt->rt6i_flags |= RTF_ANYCAST;
1976 else
1977 rt->rt6i_flags |= RTF_LOCAL;
1978 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1979 if (IS_ERR(neigh)) {
1980 dst_free(&rt->dst);
1982 /* We are casting this because that is the return
1983 * value type. But an errno encoded pointer is the
1984 * same regardless of the underlying pointer type,
1985 * and that's what we are returning. So this is OK.
1987 return (struct rt6_info *) neigh;
1989 rt->rt6i_nexthop = neigh;
1991 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1992 rt->rt6i_dst.plen = 128;
1993 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1995 atomic_set(&rt->dst.__refcnt, 1);
1997 return rt;
2000 struct arg_dev_net {
2001 struct net_device *dev;
2002 struct net *net;
2005 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2007 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
2008 struct net *net = ((struct arg_dev_net *)arg)->net;
2010 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2011 rt != net->ipv6.ip6_null_entry) {
2012 RT6_TRACE("deleted by ifdown %p\n", rt);
2013 return -1;
2015 return 0;
2018 void rt6_ifdown(struct net *net, struct net_device *dev)
2020 struct arg_dev_net adn = {
2021 .dev = dev,
2022 .net = net,
2025 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2026 icmp6_clean_all(fib6_ifdown, &adn);
2029 struct rt6_mtu_change_arg
2031 struct net_device *dev;
2032 unsigned mtu;
2035 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2037 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2038 struct inet6_dev *idev;
2039 struct net *net = dev_net(arg->dev);
2041 /* In IPv6 pmtu discovery is not optional,
2042 so that RTAX_MTU lock cannot disable it.
2043 We still use this lock to block changes
2044 caused by addrconf/ndisc.
2047 idev = __in6_dev_get(arg->dev);
2048 if (idev == NULL)
2049 return 0;
2051 /* For administrative MTU increase, there is no way to discover
2052 IPv6 PMTU increase, so PMTU increase should be updated here.
2053 Since RFC 1981 doesn't include administrative MTU increase
2054 update PMTU increase is a MUST. (i.e. jumbo frame)
2057 If new MTU is less than route PMTU, this new MTU will be the
2058 lowest MTU in the path, update the route PMTU to reflect PMTU
2059 decreases; if new MTU is greater than route PMTU, and the
2060 old MTU is the lowest MTU in the path, update the route PMTU
2061 to reflect the increase. In this case if the other nodes' MTU
2062 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2063 PMTU discouvery.
2065 if (rt->rt6i_dev == arg->dev &&
2066 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2067 (dst_mtu(&rt->dst) >= arg->mtu ||
2068 (dst_mtu(&rt->dst) < arg->mtu &&
2069 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2070 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2071 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2073 return 0;
2076 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2078 struct rt6_mtu_change_arg arg = {
2079 .dev = dev,
2080 .mtu = mtu,
2083 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2086 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2087 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2088 [RTA_OIF] = { .type = NLA_U32 },
2089 [RTA_IIF] = { .type = NLA_U32 },
2090 [RTA_PRIORITY] = { .type = NLA_U32 },
2091 [RTA_METRICS] = { .type = NLA_NESTED },
2094 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2095 struct fib6_config *cfg)
2097 struct rtmsg *rtm;
2098 struct nlattr *tb[RTA_MAX+1];
2099 int err;
2101 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2102 if (err < 0)
2103 goto errout;
2105 err = -EINVAL;
2106 rtm = nlmsg_data(nlh);
2107 memset(cfg, 0, sizeof(*cfg));
2109 cfg->fc_table = rtm->rtm_table;
2110 cfg->fc_dst_len = rtm->rtm_dst_len;
2111 cfg->fc_src_len = rtm->rtm_src_len;
2112 cfg->fc_flags = RTF_UP;
2113 cfg->fc_protocol = rtm->rtm_protocol;
2115 if (rtm->rtm_type == RTN_UNREACHABLE)
2116 cfg->fc_flags |= RTF_REJECT;
2118 if (rtm->rtm_type == RTN_LOCAL)
2119 cfg->fc_flags |= RTF_LOCAL;
2121 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2122 cfg->fc_nlinfo.nlh = nlh;
2123 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2125 if (tb[RTA_GATEWAY]) {
2126 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2127 cfg->fc_flags |= RTF_GATEWAY;
2130 if (tb[RTA_DST]) {
2131 int plen = (rtm->rtm_dst_len + 7) >> 3;
2133 if (nla_len(tb[RTA_DST]) < plen)
2134 goto errout;
2136 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2139 if (tb[RTA_SRC]) {
2140 int plen = (rtm->rtm_src_len + 7) >> 3;
2142 if (nla_len(tb[RTA_SRC]) < plen)
2143 goto errout;
2145 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2148 if (tb[RTA_OIF])
2149 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2151 if (tb[RTA_PRIORITY])
2152 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2154 if (tb[RTA_METRICS]) {
2155 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2156 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2159 if (tb[RTA_TABLE])
2160 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2162 err = 0;
2163 errout:
2164 return err;
2167 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2169 struct fib6_config cfg;
2170 int err;
2172 err = rtm_to_fib6_config(skb, nlh, &cfg);
2173 if (err < 0)
2174 return err;
2176 return ip6_route_del(&cfg);
2179 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2181 struct fib6_config cfg;
2182 int err;
2184 err = rtm_to_fib6_config(skb, nlh, &cfg);
2185 if (err < 0)
2186 return err;
2188 return ip6_route_add(&cfg);
2191 static inline size_t rt6_nlmsg_size(void)
2193 return NLMSG_ALIGN(sizeof(struct rtmsg))
2194 + nla_total_size(16) /* RTA_SRC */
2195 + nla_total_size(16) /* RTA_DST */
2196 + nla_total_size(16) /* RTA_GATEWAY */
2197 + nla_total_size(16) /* RTA_PREFSRC */
2198 + nla_total_size(4) /* RTA_TABLE */
2199 + nla_total_size(4) /* RTA_IIF */
2200 + nla_total_size(4) /* RTA_OIF */
2201 + nla_total_size(4) /* RTA_PRIORITY */
2202 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2203 + nla_total_size(sizeof(struct rta_cacheinfo));
2206 static int rt6_fill_node(struct net *net,
2207 struct sk_buff *skb, struct rt6_info *rt,
2208 struct in6_addr *dst, struct in6_addr *src,
2209 int iif, int type, u32 pid, u32 seq,
2210 int prefix, int nowait, unsigned int flags)
2212 struct rtmsg *rtm;
2213 struct nlmsghdr *nlh;
2214 long expires;
2215 u32 table;
2217 if (prefix) { /* user wants prefix routes only */
2218 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2219 /* success since this is not a prefix route */
2220 return 1;
2224 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2225 if (nlh == NULL)
2226 return -EMSGSIZE;
2228 rtm = nlmsg_data(nlh);
2229 rtm->rtm_family = AF_INET6;
2230 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2231 rtm->rtm_src_len = rt->rt6i_src.plen;
2232 rtm->rtm_tos = 0;
2233 if (rt->rt6i_table)
2234 table = rt->rt6i_table->tb6_id;
2235 else
2236 table = RT6_TABLE_UNSPEC;
2237 rtm->rtm_table = table;
2238 NLA_PUT_U32(skb, RTA_TABLE, table);
2239 if (rt->rt6i_flags&RTF_REJECT)
2240 rtm->rtm_type = RTN_UNREACHABLE;
2241 else if (rt->rt6i_flags&RTF_LOCAL)
2242 rtm->rtm_type = RTN_LOCAL;
2243 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2244 rtm->rtm_type = RTN_LOCAL;
2245 else
2246 rtm->rtm_type = RTN_UNICAST;
2247 rtm->rtm_flags = 0;
2248 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2249 rtm->rtm_protocol = rt->rt6i_protocol;
2250 if (rt->rt6i_flags&RTF_DYNAMIC)
2251 rtm->rtm_protocol = RTPROT_REDIRECT;
2252 else if (rt->rt6i_flags & RTF_ADDRCONF)
2253 rtm->rtm_protocol = RTPROT_KERNEL;
2254 else if (rt->rt6i_flags&RTF_DEFAULT)
2255 rtm->rtm_protocol = RTPROT_RA;
2257 if (rt->rt6i_flags&RTF_CACHE)
2258 rtm->rtm_flags |= RTM_F_CLONED;
2260 if (dst) {
2261 NLA_PUT(skb, RTA_DST, 16, dst);
2262 rtm->rtm_dst_len = 128;
2263 } else if (rtm->rtm_dst_len)
2264 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2265 #ifdef CONFIG_IPV6_SUBTREES
2266 if (src) {
2267 NLA_PUT(skb, RTA_SRC, 16, src);
2268 rtm->rtm_src_len = 128;
2269 } else if (rtm->rtm_src_len)
2270 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2271 #endif
2272 if (iif) {
2273 #ifdef CONFIG_IPV6_MROUTE
2274 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2275 int err = ip6mr_get_route(net, skb, rtm, nowait);
2276 if (err <= 0) {
2277 if (!nowait) {
2278 if (err == 0)
2279 return 0;
2280 goto nla_put_failure;
2281 } else {
2282 if (err == -EMSGSIZE)
2283 goto nla_put_failure;
2286 } else
2287 #endif
2288 NLA_PUT_U32(skb, RTA_IIF, iif);
2289 } else if (dst) {
2290 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2291 struct in6_addr saddr_buf;
2292 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2293 dst, 0, &saddr_buf) == 0)
2294 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2297 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2298 goto nla_put_failure;
2300 if (rt->dst.neighbour)
2301 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2303 if (rt->dst.dev)
2304 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2306 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2308 if (!(rt->rt6i_flags & RTF_EXPIRES))
2309 expires = 0;
2310 else if (rt->rt6i_expires - jiffies < INT_MAX)
2311 expires = rt->rt6i_expires - jiffies;
2312 else
2313 expires = INT_MAX;
2315 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2316 expires, rt->dst.error) < 0)
2317 goto nla_put_failure;
2319 return nlmsg_end(skb, nlh);
2321 nla_put_failure:
2322 nlmsg_cancel(skb, nlh);
2323 return -EMSGSIZE;
2326 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2328 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2329 int prefix;
2331 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2332 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2333 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2334 } else
2335 prefix = 0;
2337 return rt6_fill_node(arg->net,
2338 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2339 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2340 prefix, 0, NLM_F_MULTI);
2343 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2345 struct net *net = sock_net(in_skb->sk);
2346 struct nlattr *tb[RTA_MAX+1];
2347 struct rt6_info *rt;
2348 struct sk_buff *skb;
2349 struct rtmsg *rtm;
2350 struct flowi fl;
2351 int err, iif = 0;
2353 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2354 if (err < 0)
2355 goto errout;
2357 err = -EINVAL;
2358 memset(&fl, 0, sizeof(fl));
2360 if (tb[RTA_SRC]) {
2361 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2362 goto errout;
2364 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2367 if (tb[RTA_DST]) {
2368 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2369 goto errout;
2371 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2374 if (tb[RTA_IIF])
2375 iif = nla_get_u32(tb[RTA_IIF]);
2377 if (tb[RTA_OIF])
2378 fl.oif = nla_get_u32(tb[RTA_OIF]);
2380 if (iif) {
2381 struct net_device *dev;
2382 dev = __dev_get_by_index(net, iif);
2383 if (!dev) {
2384 err = -ENODEV;
2385 goto errout;
2389 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2390 if (skb == NULL) {
2391 err = -ENOBUFS;
2392 goto errout;
2395 /* Reserve room for dummy headers, this skb can pass
2396 through good chunk of routing engine.
2398 skb_reset_mac_header(skb);
2399 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2401 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2402 skb_dst_set(skb, &rt->dst);
2404 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2405 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2406 nlh->nlmsg_seq, 0, 0, 0);
2407 if (err < 0) {
2408 kfree_skb(skb);
2409 goto errout;
2412 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2413 errout:
2414 return err;
2417 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2419 struct sk_buff *skb;
2420 struct net *net = info->nl_net;
2421 u32 seq;
2422 int err;
2424 err = -ENOBUFS;
2425 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2427 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2428 if (skb == NULL)
2429 goto errout;
2431 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2432 event, info->pid, seq, 0, 0, 0);
2433 if (err < 0) {
2434 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2435 WARN_ON(err == -EMSGSIZE);
2436 kfree_skb(skb);
2437 goto errout;
2439 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2440 info->nlh, gfp_any());
2441 return;
2442 errout:
2443 if (err < 0)
2444 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2447 static int ip6_route_dev_notify(struct notifier_block *this,
2448 unsigned long event, void *data)
2450 struct net_device *dev = (struct net_device *)data;
2451 struct net *net = dev_net(dev);
2453 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2454 net->ipv6.ip6_null_entry->dst.dev = dev;
2455 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2456 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2457 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2458 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2459 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2460 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2461 #endif
2464 return NOTIFY_OK;
2468 * /proc
2471 #ifdef CONFIG_PROC_FS
2473 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2475 struct rt6_proc_arg
2477 char *buffer;
2478 int offset;
2479 int length;
2480 int skip;
2481 int len;
2484 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2486 struct seq_file *m = p_arg;
2488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2490 #ifdef CONFIG_IPV6_SUBTREES
2491 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2492 #else
2493 seq_puts(m, "00000000000000000000000000000000 00 ");
2494 #endif
2496 if (rt->rt6i_nexthop) {
2497 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2498 } else {
2499 seq_puts(m, "00000000000000000000000000000000");
2501 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2502 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2503 rt->dst.__use, rt->rt6i_flags,
2504 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2505 return 0;
2508 static int ipv6_route_show(struct seq_file *m, void *v)
2510 struct net *net = (struct net *)m->private;
2511 fib6_clean_all(net, rt6_info_route, 0, m);
2512 return 0;
2515 static int ipv6_route_open(struct inode *inode, struct file *file)
2517 return single_open_net(inode, file, ipv6_route_show);
2520 static const struct file_operations ipv6_route_proc_fops = {
2521 .owner = THIS_MODULE,
2522 .open = ipv6_route_open,
2523 .read = seq_read,
2524 .llseek = seq_lseek,
2525 .release = single_release_net,
2528 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2530 struct net *net = (struct net *)seq->private;
2531 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2532 net->ipv6.rt6_stats->fib_nodes,
2533 net->ipv6.rt6_stats->fib_route_nodes,
2534 net->ipv6.rt6_stats->fib_rt_alloc,
2535 net->ipv6.rt6_stats->fib_rt_entries,
2536 net->ipv6.rt6_stats->fib_rt_cache,
2537 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2538 net->ipv6.rt6_stats->fib_discarded_routes);
2540 return 0;
2543 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2545 return single_open_net(inode, file, rt6_stats_seq_show);
2548 static const struct file_operations rt6_stats_seq_fops = {
2549 .owner = THIS_MODULE,
2550 .open = rt6_stats_seq_open,
2551 .read = seq_read,
2552 .llseek = seq_lseek,
2553 .release = single_release_net,
2555 #endif /* CONFIG_PROC_FS */
2557 #ifdef CONFIG_SYSCTL
2559 static
2560 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2561 void __user *buffer, size_t *lenp, loff_t *ppos)
2563 struct net *net = current->nsproxy->net_ns;
2564 int delay = net->ipv6.sysctl.flush_delay;
2565 if (write) {
2566 proc_dointvec(ctl, write, buffer, lenp, ppos);
2567 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2568 return 0;
2569 } else
2570 return -EINVAL;
2573 ctl_table ipv6_route_table_template[] = {
2575 .procname = "flush",
2576 .data = &init_net.ipv6.sysctl.flush_delay,
2577 .maxlen = sizeof(int),
2578 .mode = 0200,
2579 .proc_handler = ipv6_sysctl_rtcache_flush
2582 .procname = "gc_thresh",
2583 .data = &ip6_dst_ops_template.gc_thresh,
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
2586 .proc_handler = proc_dointvec,
2589 .procname = "max_size",
2590 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
2593 .proc_handler = proc_dointvec,
2596 .procname = "gc_min_interval",
2597 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
2600 .proc_handler = proc_dointvec_jiffies,
2603 .procname = "gc_timeout",
2604 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2605 .maxlen = sizeof(int),
2606 .mode = 0644,
2607 .proc_handler = proc_dointvec_jiffies,
2610 .procname = "gc_interval",
2611 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2612 .maxlen = sizeof(int),
2613 .mode = 0644,
2614 .proc_handler = proc_dointvec_jiffies,
2617 .procname = "gc_elasticity",
2618 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2619 .maxlen = sizeof(int),
2620 .mode = 0644,
2621 .proc_handler = proc_dointvec,
2624 .procname = "mtu_expires",
2625 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2626 .maxlen = sizeof(int),
2627 .mode = 0644,
2628 .proc_handler = proc_dointvec_jiffies,
2631 .procname = "min_adv_mss",
2632 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2633 .maxlen = sizeof(int),
2634 .mode = 0644,
2635 .proc_handler = proc_dointvec,
2638 .procname = "gc_min_interval_ms",
2639 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2640 .maxlen = sizeof(int),
2641 .mode = 0644,
2642 .proc_handler = proc_dointvec_ms_jiffies,
2647 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2649 struct ctl_table *table;
2651 table = kmemdup(ipv6_route_table_template,
2652 sizeof(ipv6_route_table_template),
2653 GFP_KERNEL);
2655 if (table) {
2656 table[0].data = &net->ipv6.sysctl.flush_delay;
2657 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2658 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2659 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2660 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2661 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2662 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2663 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2664 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2665 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2668 return table;
2670 #endif
2672 static int __net_init ip6_route_net_init(struct net *net)
2674 int ret = -ENOMEM;
2676 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2677 sizeof(net->ipv6.ip6_dst_ops));
2679 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2680 goto out_ip6_dst_ops;
2682 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2683 sizeof(*net->ipv6.ip6_null_entry),
2684 GFP_KERNEL);
2685 if (!net->ipv6.ip6_null_entry)
2686 goto out_ip6_dst_entries;
2687 net->ipv6.ip6_null_entry->dst.path =
2688 (struct dst_entry *)net->ipv6.ip6_null_entry;
2689 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2691 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2692 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2693 sizeof(*net->ipv6.ip6_prohibit_entry),
2694 GFP_KERNEL);
2695 if (!net->ipv6.ip6_prohibit_entry)
2696 goto out_ip6_null_entry;
2697 net->ipv6.ip6_prohibit_entry->dst.path =
2698 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2699 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2701 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2702 sizeof(*net->ipv6.ip6_blk_hole_entry),
2703 GFP_KERNEL);
2704 if (!net->ipv6.ip6_blk_hole_entry)
2705 goto out_ip6_prohibit_entry;
2706 net->ipv6.ip6_blk_hole_entry->dst.path =
2707 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2708 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2709 #endif
2711 net->ipv6.sysctl.flush_delay = 0;
2712 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2713 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2714 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2715 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2716 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2717 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2718 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2720 #ifdef CONFIG_PROC_FS
2721 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2722 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2723 #endif
2724 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2726 ret = 0;
2727 out:
2728 return ret;
2730 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2731 out_ip6_prohibit_entry:
2732 kfree(net->ipv6.ip6_prohibit_entry);
2733 out_ip6_null_entry:
2734 kfree(net->ipv6.ip6_null_entry);
2735 #endif
2736 out_ip6_dst_entries:
2737 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2738 out_ip6_dst_ops:
2739 goto out;
2742 static void __net_exit ip6_route_net_exit(struct net *net)
2744 #ifdef CONFIG_PROC_FS
2745 proc_net_remove(net, "ipv6_route");
2746 proc_net_remove(net, "rt6_stats");
2747 #endif
2748 kfree(net->ipv6.ip6_null_entry);
2749 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2750 kfree(net->ipv6.ip6_prohibit_entry);
2751 kfree(net->ipv6.ip6_blk_hole_entry);
2752 #endif
2753 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2756 static struct pernet_operations ip6_route_net_ops = {
2757 .init = ip6_route_net_init,
2758 .exit = ip6_route_net_exit,
2761 static struct notifier_block ip6_route_dev_notifier = {
2762 .notifier_call = ip6_route_dev_notify,
2763 .priority = 0,
2766 int __init ip6_route_init(void)
2768 int ret;
2770 ret = -ENOMEM;
2771 ip6_dst_ops_template.kmem_cachep =
2772 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2773 SLAB_HWCACHE_ALIGN, NULL);
2774 if (!ip6_dst_ops_template.kmem_cachep)
2775 goto out;
2777 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2778 if (ret)
2779 goto out_kmem_cache;
2781 ret = register_pernet_subsys(&ip6_route_net_ops);
2782 if (ret)
2783 goto out_dst_entries;
2785 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2787 /* Registering of the loopback is done before this portion of code,
2788 * the loopback reference in rt6_info will not be taken, do it
2789 * manually for init_net */
2790 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2791 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2792 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2793 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2794 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2795 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2796 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2797 #endif
2798 ret = fib6_init();
2799 if (ret)
2800 goto out_register_subsys;
2802 ret = xfrm6_init();
2803 if (ret)
2804 goto out_fib6_init;
2806 ret = fib6_rules_init();
2807 if (ret)
2808 goto xfrm6_init;
2810 ret = -ENOBUFS;
2811 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2812 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2813 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2814 goto fib6_rules_init;
2816 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2817 if (ret)
2818 goto fib6_rules_init;
2820 out:
2821 return ret;
2823 fib6_rules_init:
2824 fib6_rules_cleanup();
2825 xfrm6_init:
2826 xfrm6_fini();
2827 out_fib6_init:
2828 fib6_gc_cleanup();
2829 out_register_subsys:
2830 unregister_pernet_subsys(&ip6_route_net_ops);
2831 out_dst_entries:
2832 dst_entries_destroy(&ip6_dst_blackhole_ops);
2833 out_kmem_cache:
2834 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2835 goto out;
2838 void ip6_route_cleanup(void)
2840 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2841 fib6_rules_cleanup();
2842 xfrm6_fini();
2843 fib6_gc_cleanup();
2844 unregister_pernet_subsys(&ip6_route_net_ops);
2845 dst_entries_destroy(&ip6_dst_blackhole_ops);
2846 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);