tg3: Remove excessive parenthesis
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / route.c
blobf1be5c5c85ef6c0256b6d1d3e0939a606c633722
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 /* Changes:
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 const struct in6_addr *prefix, int prefixlen,
97 const struct in6_addr *gwaddr, int ifindex);
98 #endif
100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
127 return p;
130 static struct dst_ops ip6_dst_ops_template = {
131 .family = AF_INET6,
132 .protocol = cpu_to_be16(ETH_P_IPV6),
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
136 .default_advmss = ip6_default_advmss,
137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
144 .local_out = __ip6_local_out,
147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
149 return 0;
152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
156 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 unsigned long old)
159 return NULL;
162 static struct dst_ops ip6_dst_blackhole_ops = {
163 .family = AF_INET6,
164 .protocol = cpu_to_be16(ETH_P_IPV6),
165 .destroy = ip6_dst_destroy,
166 .check = ip6_dst_check,
167 .default_mtu = ip6_blackhole_default_mtu,
168 .default_advmss = ip6_default_advmss,
169 .update_pmtu = ip6_rt_blackhole_update_pmtu,
170 .cow_metrics = ip6_rt_blackhole_cow_metrics,
173 static const u32 ip6_template_metrics[RTAX_MAX] = {
174 [RTAX_HOPLIMIT - 1] = 255,
177 static struct rt6_info ip6_null_entry_template = {
178 .dst = {
179 .__refcnt = ATOMIC_INIT(1),
180 .__use = 1,
181 .obsolete = -1,
182 .error = -ENETUNREACH,
183 .input = ip6_pkt_discard,
184 .output = ip6_pkt_discard_out,
186 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
187 .rt6i_protocol = RTPROT_KERNEL,
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
192 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
194 static int ip6_pkt_prohibit(struct sk_buff *skb);
195 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
197 static struct rt6_info ip6_prohibit_entry_template = {
198 .dst = {
199 .__refcnt = ATOMIC_INIT(1),
200 .__use = 1,
201 .obsolete = -1,
202 .error = -EACCES,
203 .input = ip6_pkt_prohibit,
204 .output = ip6_pkt_prohibit_out,
206 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
207 .rt6i_protocol = RTPROT_KERNEL,
208 .rt6i_metric = ~(u32) 0,
209 .rt6i_ref = ATOMIC_INIT(1),
212 static struct rt6_info ip6_blk_hole_entry_template = {
213 .dst = {
214 .__refcnt = ATOMIC_INIT(1),
215 .__use = 1,
216 .obsolete = -1,
217 .error = -EINVAL,
218 .input = dst_discard,
219 .output = dst_discard,
221 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
222 .rt6i_protocol = RTPROT_KERNEL,
223 .rt6i_metric = ~(u32) 0,
224 .rt6i_ref = ATOMIC_INIT(1),
227 #endif
229 /* allocate dst with ip6_dst_ops */
230 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
231 struct net_device *dev)
233 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, 0);
235 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
237 return rt;
240 static void ip6_dst_destroy(struct dst_entry *dst)
242 struct rt6_info *rt = (struct rt6_info *)dst;
243 struct inet6_dev *idev = rt->rt6i_idev;
244 struct inet_peer *peer = rt->rt6i_peer;
246 if (idev != NULL) {
247 rt->rt6i_idev = NULL;
248 in6_dev_put(idev);
250 if (peer) {
251 rt->rt6i_peer = NULL;
252 inet_putpeer(peer);
256 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
258 static u32 rt6_peer_genid(void)
260 return atomic_read(&__rt6_peer_genid);
263 void rt6_bind_peer(struct rt6_info *rt, int create)
265 struct inet_peer *peer;
267 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
268 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
269 inet_putpeer(peer);
270 else
271 rt->rt6i_peer_genid = rt6_peer_genid();
274 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
275 int how)
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
279 struct net_device *loopback_dev =
280 dev_net(dev)->loopback_dev;
282 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
283 struct inet6_dev *loopback_idev =
284 in6_dev_get(loopback_dev);
285 if (loopback_idev != NULL) {
286 rt->rt6i_idev = loopback_idev;
287 in6_dev_put(idev);
292 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
294 return (rt->rt6i_flags & RTF_EXPIRES) &&
295 time_after(jiffies, rt->rt6i_expires);
298 static inline int rt6_need_strict(const struct in6_addr *daddr)
300 return ipv6_addr_type(daddr) &
301 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
305 * Route lookup. Any table->tb6_lock is implied.
308 static inline struct rt6_info *rt6_device_match(struct net *net,
309 struct rt6_info *rt,
310 const struct in6_addr *saddr,
311 int oif,
312 int flags)
314 struct rt6_info *local = NULL;
315 struct rt6_info *sprt;
317 if (!oif && ipv6_addr_any(saddr))
318 goto out;
320 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
321 struct net_device *dev = sprt->rt6i_dev;
323 if (oif) {
324 if (dev->ifindex == oif)
325 return sprt;
326 if (dev->flags & IFF_LOOPBACK) {
327 if (sprt->rt6i_idev == NULL ||
328 sprt->rt6i_idev->dev->ifindex != oif) {
329 if (flags & RT6_LOOKUP_F_IFACE && oif)
330 continue;
331 if (local && (!oif ||
332 local->rt6i_idev->dev->ifindex == oif))
333 continue;
335 local = sprt;
337 } else {
338 if (ipv6_chk_addr(net, saddr, dev,
339 flags & RT6_LOOKUP_F_IFACE))
340 return sprt;
344 if (oif) {
345 if (local)
346 return local;
348 if (flags & RT6_LOOKUP_F_IFACE)
349 return net->ipv6.ip6_null_entry;
351 out:
352 return rt;
355 #ifdef CONFIG_IPV6_ROUTER_PREF
356 static void rt6_probe(struct rt6_info *rt)
358 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
360 * Okay, this does not seem to be appropriate
361 * for now, however, we need to check if it
362 * is really so; aka Router Reachability Probing.
364 * Router Reachability Probe MUST be rate-limited
365 * to no more than one per minute.
367 if (!neigh || (neigh->nud_state & NUD_VALID))
368 return;
369 read_lock_bh(&neigh->lock);
370 if (!(neigh->nud_state & NUD_VALID) &&
371 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
372 struct in6_addr mcaddr;
373 struct in6_addr *target;
375 neigh->updated = jiffies;
376 read_unlock_bh(&neigh->lock);
378 target = (struct in6_addr *)&neigh->primary_key;
379 addrconf_addr_solict_mult(target, &mcaddr);
380 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
381 } else
382 read_unlock_bh(&neigh->lock);
384 #else
385 static inline void rt6_probe(struct rt6_info *rt)
388 #endif
391 * Default Router Selection (RFC 2461 6.3.6)
393 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
395 struct net_device *dev = rt->rt6i_dev;
396 if (!oif || dev->ifindex == oif)
397 return 2;
398 if ((dev->flags & IFF_LOOPBACK) &&
399 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
400 return 1;
401 return 0;
404 static inline int rt6_check_neigh(struct rt6_info *rt)
406 struct neighbour *neigh = rt->rt6i_nexthop;
407 int m;
408 if (rt->rt6i_flags & RTF_NONEXTHOP ||
409 !(rt->rt6i_flags & RTF_GATEWAY))
410 m = 1;
411 else if (neigh) {
412 read_lock_bh(&neigh->lock);
413 if (neigh->nud_state & NUD_VALID)
414 m = 2;
415 #ifdef CONFIG_IPV6_ROUTER_PREF
416 else if (neigh->nud_state & NUD_FAILED)
417 m = 0;
418 #endif
419 else
420 m = 1;
421 read_unlock_bh(&neigh->lock);
422 } else
423 m = 0;
424 return m;
427 static int rt6_score_route(struct rt6_info *rt, int oif,
428 int strict)
430 int m, n;
432 m = rt6_check_dev(rt, oif);
433 if (!m && (strict & RT6_LOOKUP_F_IFACE))
434 return -1;
435 #ifdef CONFIG_IPV6_ROUTER_PREF
436 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
437 #endif
438 n = rt6_check_neigh(rt);
439 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
440 return -1;
441 return m;
444 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
445 int *mpri, struct rt6_info *match)
447 int m;
449 if (rt6_check_expired(rt))
450 goto out;
452 m = rt6_score_route(rt, oif, strict);
453 if (m < 0)
454 goto out;
456 if (m > *mpri) {
457 if (strict & RT6_LOOKUP_F_REACHABLE)
458 rt6_probe(match);
459 *mpri = m;
460 match = rt;
461 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
462 rt6_probe(rt);
465 out:
466 return match;
469 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
470 struct rt6_info *rr_head,
471 u32 metric, int oif, int strict)
473 struct rt6_info *rt, *match;
474 int mpri = -1;
476 match = NULL;
477 for (rt = rr_head; rt && rt->rt6i_metric == metric;
478 rt = rt->dst.rt6_next)
479 match = find_match(rt, oif, strict, &mpri, match);
480 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
481 rt = rt->dst.rt6_next)
482 match = find_match(rt, oif, strict, &mpri, match);
484 return match;
487 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
489 struct rt6_info *match, *rt0;
490 struct net *net;
492 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
493 __func__, fn->leaf, oif);
495 rt0 = fn->rr_ptr;
496 if (!rt0)
497 fn->rr_ptr = rt0 = fn->leaf;
499 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
501 if (!match &&
502 (strict & RT6_LOOKUP_F_REACHABLE)) {
503 struct rt6_info *next = rt0->dst.rt6_next;
505 /* no entries matched; do round-robin */
506 if (!next || next->rt6i_metric != rt0->rt6i_metric)
507 next = fn->leaf;
509 if (next != rt0)
510 fn->rr_ptr = next;
513 RT6_TRACE("%s() => %p\n",
514 __func__, match);
516 net = dev_net(rt0->rt6i_dev);
517 return match ? match : net->ipv6.ip6_null_entry;
520 #ifdef CONFIG_IPV6_ROUTE_INFO
521 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
522 const struct in6_addr *gwaddr)
524 struct net *net = dev_net(dev);
525 struct route_info *rinfo = (struct route_info *) opt;
526 struct in6_addr prefix_buf, *prefix;
527 unsigned int pref;
528 unsigned long lifetime;
529 struct rt6_info *rt;
531 if (len < sizeof(struct route_info)) {
532 return -EINVAL;
535 /* Sanity check for prefix_len and length */
536 if (rinfo->length > 3) {
537 return -EINVAL;
538 } else if (rinfo->prefix_len > 128) {
539 return -EINVAL;
540 } else if (rinfo->prefix_len > 64) {
541 if (rinfo->length < 2) {
542 return -EINVAL;
544 } else if (rinfo->prefix_len > 0) {
545 if (rinfo->length < 1) {
546 return -EINVAL;
550 pref = rinfo->route_pref;
551 if (pref == ICMPV6_ROUTER_PREF_INVALID)
552 return -EINVAL;
554 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
556 if (rinfo->length == 3)
557 prefix = (struct in6_addr *)rinfo->prefix;
558 else {
559 /* this function is safe */
560 ipv6_addr_prefix(&prefix_buf,
561 (struct in6_addr *)rinfo->prefix,
562 rinfo->prefix_len);
563 prefix = &prefix_buf;
566 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
567 dev->ifindex);
569 if (rt && !lifetime) {
570 ip6_del_rt(rt);
571 rt = NULL;
574 if (!rt && lifetime)
575 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
576 pref);
577 else if (rt)
578 rt->rt6i_flags = RTF_ROUTEINFO |
579 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
581 if (rt) {
582 if (!addrconf_finite_timeout(lifetime)) {
583 rt->rt6i_flags &= ~RTF_EXPIRES;
584 } else {
585 rt->rt6i_expires = jiffies + HZ * lifetime;
586 rt->rt6i_flags |= RTF_EXPIRES;
588 dst_release(&rt->dst);
590 return 0;
592 #endif
594 #define BACKTRACK(__net, saddr) \
595 do { \
596 if (rt == __net->ipv6.ip6_null_entry) { \
597 struct fib6_node *pn; \
598 while (1) { \
599 if (fn->fn_flags & RTN_TL_ROOT) \
600 goto out; \
601 pn = fn->parent; \
602 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
603 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
604 else \
605 fn = pn; \
606 if (fn->fn_flags & RTN_RTINFO) \
607 goto restart; \
610 } while(0)
612 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
613 struct fib6_table *table,
614 struct flowi6 *fl6, int flags)
616 struct fib6_node *fn;
617 struct rt6_info *rt;
619 read_lock_bh(&table->tb6_lock);
620 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
621 restart:
622 rt = fn->leaf;
623 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
624 BACKTRACK(net, &fl6->saddr);
625 out:
626 dst_use(&rt->dst, jiffies);
627 read_unlock_bh(&table->tb6_lock);
628 return rt;
632 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
633 const struct in6_addr *saddr, int oif, int strict)
635 struct flowi6 fl6 = {
636 .flowi6_oif = oif,
637 .daddr = *daddr,
639 struct dst_entry *dst;
640 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
642 if (saddr) {
643 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
644 flags |= RT6_LOOKUP_F_HAS_SADDR;
647 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
648 if (dst->error == 0)
649 return (struct rt6_info *) dst;
651 dst_release(dst);
653 return NULL;
656 EXPORT_SYMBOL(rt6_lookup);
658 /* ip6_ins_rt is called with FREE table->tb6_lock.
659 It takes new route entry, the addition fails by any reason the
660 route is freed. In any case, if caller does not hold it, it may
661 be destroyed.
664 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
666 int err;
667 struct fib6_table *table;
669 table = rt->rt6i_table;
670 write_lock_bh(&table->tb6_lock);
671 err = fib6_add(&table->tb6_root, rt, info);
672 write_unlock_bh(&table->tb6_lock);
674 return err;
677 int ip6_ins_rt(struct rt6_info *rt)
679 struct nl_info info = {
680 .nl_net = dev_net(rt->rt6i_dev),
682 return __ip6_ins_rt(rt, &info);
685 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, const struct in6_addr *daddr,
686 const struct in6_addr *saddr)
688 struct rt6_info *rt;
691 * Clone the route.
694 rt = ip6_rt_copy(ort);
696 if (rt) {
697 struct neighbour *neigh;
698 int attempts = !in_softirq();
700 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
701 if (rt->rt6i_dst.plen != 128 &&
702 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
703 rt->rt6i_flags |= RTF_ANYCAST;
704 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
707 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
708 rt->rt6i_dst.plen = 128;
709 rt->rt6i_flags |= RTF_CACHE;
710 rt->dst.flags |= DST_HOST;
712 #ifdef CONFIG_IPV6_SUBTREES
713 if (rt->rt6i_src.plen && saddr) {
714 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
715 rt->rt6i_src.plen = 128;
717 #endif
719 retry:
720 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
721 if (IS_ERR(neigh)) {
722 struct net *net = dev_net(rt->rt6i_dev);
723 int saved_rt_min_interval =
724 net->ipv6.sysctl.ip6_rt_gc_min_interval;
725 int saved_rt_elasticity =
726 net->ipv6.sysctl.ip6_rt_gc_elasticity;
728 if (attempts-- > 0) {
729 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
730 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
732 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
734 net->ipv6.sysctl.ip6_rt_gc_elasticity =
735 saved_rt_elasticity;
736 net->ipv6.sysctl.ip6_rt_gc_min_interval =
737 saved_rt_min_interval;
738 goto retry;
741 if (net_ratelimit())
742 printk(KERN_WARNING
743 "ipv6: Neighbour table overflow.\n");
744 dst_free(&rt->dst);
745 return NULL;
747 rt->rt6i_nexthop = neigh;
751 return rt;
754 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, const struct in6_addr *daddr)
756 struct rt6_info *rt = ip6_rt_copy(ort);
757 if (rt) {
758 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
759 rt->rt6i_dst.plen = 128;
760 rt->rt6i_flags |= RTF_CACHE;
761 rt->dst.flags |= DST_HOST;
762 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
764 return rt;
767 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
768 struct flowi6 *fl6, int flags)
770 struct fib6_node *fn;
771 struct rt6_info *rt, *nrt;
772 int strict = 0;
773 int attempts = 3;
774 int err;
775 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
777 strict |= flags & RT6_LOOKUP_F_IFACE;
779 relookup:
780 read_lock_bh(&table->tb6_lock);
782 restart_2:
783 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
785 restart:
786 rt = rt6_select(fn, oif, strict | reachable);
788 BACKTRACK(net, &fl6->saddr);
789 if (rt == net->ipv6.ip6_null_entry ||
790 rt->rt6i_flags & RTF_CACHE)
791 goto out;
793 dst_hold(&rt->dst);
794 read_unlock_bh(&table->tb6_lock);
796 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
797 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
798 else if (!(rt->dst.flags & DST_HOST))
799 nrt = rt6_alloc_clone(rt, &fl6->daddr);
800 else
801 goto out2;
803 dst_release(&rt->dst);
804 rt = nrt ? : net->ipv6.ip6_null_entry;
806 dst_hold(&rt->dst);
807 if (nrt) {
808 err = ip6_ins_rt(nrt);
809 if (!err)
810 goto out2;
813 if (--attempts <= 0)
814 goto out2;
817 * Race condition! In the gap, when table->tb6_lock was
818 * released someone could insert this route. Relookup.
820 dst_release(&rt->dst);
821 goto relookup;
823 out:
824 if (reachable) {
825 reachable = 0;
826 goto restart_2;
828 dst_hold(&rt->dst);
829 read_unlock_bh(&table->tb6_lock);
830 out2:
831 rt->dst.lastuse = jiffies;
832 rt->dst.__use++;
834 return rt;
837 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
838 struct flowi6 *fl6, int flags)
840 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
843 void ip6_route_input(struct sk_buff *skb)
845 const struct ipv6hdr *iph = ipv6_hdr(skb);
846 struct net *net = dev_net(skb->dev);
847 int flags = RT6_LOOKUP_F_HAS_SADDR;
848 struct flowi6 fl6 = {
849 .flowi6_iif = skb->dev->ifindex,
850 .daddr = iph->daddr,
851 .saddr = iph->saddr,
852 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
853 .flowi6_mark = skb->mark,
854 .flowi6_proto = iph->nexthdr,
857 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
858 flags |= RT6_LOOKUP_F_IFACE;
860 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
863 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
864 struct flowi6 *fl6, int flags)
866 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
869 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
870 struct flowi6 *fl6)
872 int flags = 0;
874 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
875 flags |= RT6_LOOKUP_F_IFACE;
877 if (!ipv6_addr_any(&fl6->saddr))
878 flags |= RT6_LOOKUP_F_HAS_SADDR;
879 else if (sk)
880 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
882 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
885 EXPORT_SYMBOL(ip6_route_output);
887 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
889 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
890 struct dst_entry *new = NULL;
892 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
893 if (rt) {
894 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
896 new = &rt->dst;
898 new->__use = 1;
899 new->input = dst_discard;
900 new->output = dst_discard;
902 dst_copy_metrics(new, &ort->dst);
903 rt->rt6i_idev = ort->rt6i_idev;
904 if (rt->rt6i_idev)
905 in6_dev_hold(rt->rt6i_idev);
906 rt->rt6i_expires = 0;
908 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
909 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
910 rt->rt6i_metric = 0;
912 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
913 #ifdef CONFIG_IPV6_SUBTREES
914 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
915 #endif
917 dst_free(new);
920 dst_release(dst_orig);
921 return new ? new : ERR_PTR(-ENOMEM);
925 * Destination cache support functions
928 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
930 struct rt6_info *rt;
932 rt = (struct rt6_info *) dst;
934 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
935 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
936 if (!rt->rt6i_peer)
937 rt6_bind_peer(rt, 0);
938 rt->rt6i_peer_genid = rt6_peer_genid();
940 return dst;
942 return NULL;
945 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
947 struct rt6_info *rt = (struct rt6_info *) dst;
949 if (rt) {
950 if (rt->rt6i_flags & RTF_CACHE) {
951 if (rt6_check_expired(rt)) {
952 ip6_del_rt(rt);
953 dst = NULL;
955 } else {
956 dst_release(dst);
957 dst = NULL;
960 return dst;
963 static void ip6_link_failure(struct sk_buff *skb)
965 struct rt6_info *rt;
967 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
969 rt = (struct rt6_info *) skb_dst(skb);
970 if (rt) {
971 if (rt->rt6i_flags&RTF_CACHE) {
972 dst_set_expires(&rt->dst, 0);
973 rt->rt6i_flags |= RTF_EXPIRES;
974 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
975 rt->rt6i_node->fn_sernum = -1;
979 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
981 struct rt6_info *rt6 = (struct rt6_info*)dst;
983 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
984 rt6->rt6i_flags |= RTF_MODIFIED;
985 if (mtu < IPV6_MIN_MTU) {
986 u32 features = dst_metric(dst, RTAX_FEATURES);
987 mtu = IPV6_MIN_MTU;
988 features |= RTAX_FEATURE_ALLFRAG;
989 dst_metric_set(dst, RTAX_FEATURES, features);
991 dst_metric_set(dst, RTAX_MTU, mtu);
995 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
997 struct net_device *dev = dst->dev;
998 unsigned int mtu = dst_mtu(dst);
999 struct net *net = dev_net(dev);
1001 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1003 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1004 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1007 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1008 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1009 * IPV6_MAXPLEN is also valid and means: "any MSS,
1010 * rely only on pmtu discovery"
1012 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1013 mtu = IPV6_MAXPLEN;
1014 return mtu;
1017 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1019 unsigned int mtu = IPV6_MIN_MTU;
1020 struct inet6_dev *idev;
1022 rcu_read_lock();
1023 idev = __in6_dev_get(dst->dev);
1024 if (idev)
1025 mtu = idev->cnf.mtu6;
1026 rcu_read_unlock();
1028 return mtu;
1031 static struct dst_entry *icmp6_dst_gc_list;
1032 static DEFINE_SPINLOCK(icmp6_dst_lock);
1034 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1035 struct neighbour *neigh,
1036 const struct in6_addr *addr)
1038 struct rt6_info *rt;
1039 struct inet6_dev *idev = in6_dev_get(dev);
1040 struct net *net = dev_net(dev);
1042 if (unlikely(idev == NULL))
1043 return NULL;
1045 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev);
1046 if (unlikely(rt == NULL)) {
1047 in6_dev_put(idev);
1048 goto out;
1051 if (neigh)
1052 neigh_hold(neigh);
1053 else {
1054 neigh = ndisc_get_neigh(dev, addr);
1055 if (IS_ERR(neigh))
1056 neigh = NULL;
1059 rt->rt6i_idev = idev;
1060 rt->rt6i_nexthop = neigh;
1061 atomic_set(&rt->dst.__refcnt, 1);
1062 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1063 rt->dst.output = ip6_output;
1065 #if 0 /* there's no chance to use these for ndisc */
1066 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1067 ? DST_HOST
1068 : 0;
1069 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1070 rt->rt6i_dst.plen = 128;
1071 #endif
1073 spin_lock_bh(&icmp6_dst_lock);
1074 rt->dst.next = icmp6_dst_gc_list;
1075 icmp6_dst_gc_list = &rt->dst;
1076 spin_unlock_bh(&icmp6_dst_lock);
1078 fib6_force_start_gc(net);
1080 out:
1081 return &rt->dst;
1084 int icmp6_dst_gc(void)
1086 struct dst_entry *dst, **pprev;
1087 int more = 0;
1089 spin_lock_bh(&icmp6_dst_lock);
1090 pprev = &icmp6_dst_gc_list;
1092 while ((dst = *pprev) != NULL) {
1093 if (!atomic_read(&dst->__refcnt)) {
1094 *pprev = dst->next;
1095 dst_free(dst);
1096 } else {
1097 pprev = &dst->next;
1098 ++more;
1102 spin_unlock_bh(&icmp6_dst_lock);
1104 return more;
1107 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1108 void *arg)
1110 struct dst_entry *dst, **pprev;
1112 spin_lock_bh(&icmp6_dst_lock);
1113 pprev = &icmp6_dst_gc_list;
1114 while ((dst = *pprev) != NULL) {
1115 struct rt6_info *rt = (struct rt6_info *) dst;
1116 if (func(rt, arg)) {
1117 *pprev = dst->next;
1118 dst_free(dst);
1119 } else {
1120 pprev = &dst->next;
1123 spin_unlock_bh(&icmp6_dst_lock);
1126 static int ip6_dst_gc(struct dst_ops *ops)
1128 unsigned long now = jiffies;
1129 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1130 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1131 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1132 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1133 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1134 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1135 int entries;
1137 entries = dst_entries_get_fast(ops);
1138 if (time_after(rt_last_gc + rt_min_interval, now) &&
1139 entries <= rt_max_size)
1140 goto out;
1142 net->ipv6.ip6_rt_gc_expire++;
1143 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1144 net->ipv6.ip6_rt_last_gc = now;
1145 entries = dst_entries_get_slow(ops);
1146 if (entries < ops->gc_thresh)
1147 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1148 out:
1149 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1150 return entries > rt_max_size;
1153 /* Clean host part of a prefix. Not necessary in radix tree,
1154 but results in cleaner routing tables.
1156 Remove it only when all the things will work!
1159 int ip6_dst_hoplimit(struct dst_entry *dst)
1161 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1162 if (hoplimit == 0) {
1163 struct net_device *dev = dst->dev;
1164 struct inet6_dev *idev;
1166 rcu_read_lock();
1167 idev = __in6_dev_get(dev);
1168 if (idev)
1169 hoplimit = idev->cnf.hop_limit;
1170 else
1171 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1172 rcu_read_unlock();
1174 return hoplimit;
1176 EXPORT_SYMBOL(ip6_dst_hoplimit);
1182 int ip6_route_add(struct fib6_config *cfg)
1184 int err;
1185 struct net *net = cfg->fc_nlinfo.nl_net;
1186 struct rt6_info *rt = NULL;
1187 struct net_device *dev = NULL;
1188 struct inet6_dev *idev = NULL;
1189 struct fib6_table *table;
1190 int addr_type;
1192 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1193 return -EINVAL;
1194 #ifndef CONFIG_IPV6_SUBTREES
1195 if (cfg->fc_src_len)
1196 return -EINVAL;
1197 #endif
1198 if (cfg->fc_ifindex) {
1199 err = -ENODEV;
1200 dev = dev_get_by_index(net, cfg->fc_ifindex);
1201 if (!dev)
1202 goto out;
1203 idev = in6_dev_get(dev);
1204 if (!idev)
1205 goto out;
1208 if (cfg->fc_metric == 0)
1209 cfg->fc_metric = IP6_RT_PRIO_USER;
1211 table = fib6_new_table(net, cfg->fc_table);
1212 if (table == NULL) {
1213 err = -ENOBUFS;
1214 goto out;
1217 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL);
1219 if (rt == NULL) {
1220 err = -ENOMEM;
1221 goto out;
1224 rt->dst.obsolete = -1;
1225 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1226 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1229 if (cfg->fc_protocol == RTPROT_UNSPEC)
1230 cfg->fc_protocol = RTPROT_BOOT;
1231 rt->rt6i_protocol = cfg->fc_protocol;
1233 addr_type = ipv6_addr_type(&cfg->fc_dst);
1235 if (addr_type & IPV6_ADDR_MULTICAST)
1236 rt->dst.input = ip6_mc_input;
1237 else if (cfg->fc_flags & RTF_LOCAL)
1238 rt->dst.input = ip6_input;
1239 else
1240 rt->dst.input = ip6_forward;
1242 rt->dst.output = ip6_output;
1244 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1245 rt->rt6i_dst.plen = cfg->fc_dst_len;
1246 if (rt->rt6i_dst.plen == 128)
1247 rt->dst.flags = DST_HOST;
1249 #ifdef CONFIG_IPV6_SUBTREES
1250 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1251 rt->rt6i_src.plen = cfg->fc_src_len;
1252 #endif
1254 rt->rt6i_metric = cfg->fc_metric;
1256 /* We cannot add true routes via loopback here,
1257 they would result in kernel looping; promote them to reject routes
1259 if ((cfg->fc_flags & RTF_REJECT) ||
1260 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1261 && !(cfg->fc_flags&RTF_LOCAL))) {
1262 /* hold loopback dev/idev if we haven't done so. */
1263 if (dev != net->loopback_dev) {
1264 if (dev) {
1265 dev_put(dev);
1266 in6_dev_put(idev);
1268 dev = net->loopback_dev;
1269 dev_hold(dev);
1270 idev = in6_dev_get(dev);
1271 if (!idev) {
1272 err = -ENODEV;
1273 goto out;
1276 rt->dst.output = ip6_pkt_discard_out;
1277 rt->dst.input = ip6_pkt_discard;
1278 rt->dst.error = -ENETUNREACH;
1279 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1280 goto install_route;
1283 if (cfg->fc_flags & RTF_GATEWAY) {
1284 const struct in6_addr *gw_addr;
1285 int gwa_type;
1287 gw_addr = &cfg->fc_gateway;
1288 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1289 gwa_type = ipv6_addr_type(gw_addr);
1291 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1292 struct rt6_info *grt;
1294 /* IPv6 strictly inhibits using not link-local
1295 addresses as nexthop address.
1296 Otherwise, router will not able to send redirects.
1297 It is very good, but in some (rare!) circumstances
1298 (SIT, PtP, NBMA NOARP links) it is handy to allow
1299 some exceptions. --ANK
1301 err = -EINVAL;
1302 if (!(gwa_type&IPV6_ADDR_UNICAST))
1303 goto out;
1305 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1307 err = -EHOSTUNREACH;
1308 if (grt == NULL)
1309 goto out;
1310 if (dev) {
1311 if (dev != grt->rt6i_dev) {
1312 dst_release(&grt->dst);
1313 goto out;
1315 } else {
1316 dev = grt->rt6i_dev;
1317 idev = grt->rt6i_idev;
1318 dev_hold(dev);
1319 in6_dev_hold(grt->rt6i_idev);
1321 if (!(grt->rt6i_flags&RTF_GATEWAY))
1322 err = 0;
1323 dst_release(&grt->dst);
1325 if (err)
1326 goto out;
1328 err = -EINVAL;
1329 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1330 goto out;
1333 err = -ENODEV;
1334 if (dev == NULL)
1335 goto out;
1337 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1338 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1339 err = -EINVAL;
1340 goto out;
1342 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1343 rt->rt6i_prefsrc.plen = 128;
1344 } else
1345 rt->rt6i_prefsrc.plen = 0;
1347 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1348 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1349 if (IS_ERR(rt->rt6i_nexthop)) {
1350 err = PTR_ERR(rt->rt6i_nexthop);
1351 rt->rt6i_nexthop = NULL;
1352 goto out;
1356 rt->rt6i_flags = cfg->fc_flags;
1358 install_route:
1359 if (cfg->fc_mx) {
1360 struct nlattr *nla;
1361 int remaining;
1363 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1364 int type = nla_type(nla);
1366 if (type) {
1367 if (type > RTAX_MAX) {
1368 err = -EINVAL;
1369 goto out;
1372 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1377 rt->dst.dev = dev;
1378 rt->rt6i_idev = idev;
1379 rt->rt6i_table = table;
1381 cfg->fc_nlinfo.nl_net = dev_net(dev);
1383 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1385 out:
1386 if (dev)
1387 dev_put(dev);
1388 if (idev)
1389 in6_dev_put(idev);
1390 if (rt)
1391 dst_free(&rt->dst);
1392 return err;
1395 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1397 int err;
1398 struct fib6_table *table;
1399 struct net *net = dev_net(rt->rt6i_dev);
1401 if (rt == net->ipv6.ip6_null_entry)
1402 return -ENOENT;
1404 table = rt->rt6i_table;
1405 write_lock_bh(&table->tb6_lock);
1407 err = fib6_del(rt, info);
1408 dst_release(&rt->dst);
1410 write_unlock_bh(&table->tb6_lock);
1412 return err;
1415 int ip6_del_rt(struct rt6_info *rt)
1417 struct nl_info info = {
1418 .nl_net = dev_net(rt->rt6i_dev),
1420 return __ip6_del_rt(rt, &info);
1423 static int ip6_route_del(struct fib6_config *cfg)
1425 struct fib6_table *table;
1426 struct fib6_node *fn;
1427 struct rt6_info *rt;
1428 int err = -ESRCH;
1430 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1431 if (table == NULL)
1432 return err;
1434 read_lock_bh(&table->tb6_lock);
1436 fn = fib6_locate(&table->tb6_root,
1437 &cfg->fc_dst, cfg->fc_dst_len,
1438 &cfg->fc_src, cfg->fc_src_len);
1440 if (fn) {
1441 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1442 if (cfg->fc_ifindex &&
1443 (rt->rt6i_dev == NULL ||
1444 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1445 continue;
1446 if (cfg->fc_flags & RTF_GATEWAY &&
1447 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1448 continue;
1449 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1450 continue;
1451 dst_hold(&rt->dst);
1452 read_unlock_bh(&table->tb6_lock);
1454 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1457 read_unlock_bh(&table->tb6_lock);
1459 return err;
1463 * Handle redirects
1465 struct ip6rd_flowi {
1466 struct flowi6 fl6;
1467 struct in6_addr gateway;
1470 static struct rt6_info *__ip6_route_redirect(struct net *net,
1471 struct fib6_table *table,
1472 struct flowi6 *fl6,
1473 int flags)
1475 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1476 struct rt6_info *rt;
1477 struct fib6_node *fn;
1480 * Get the "current" route for this destination and
1481 * check if the redirect has come from approriate router.
1483 * RFC 2461 specifies that redirects should only be
1484 * accepted if they come from the nexthop to the target.
1485 * Due to the way the routes are chosen, this notion
1486 * is a bit fuzzy and one might need to check all possible
1487 * routes.
1490 read_lock_bh(&table->tb6_lock);
1491 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1492 restart:
1493 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1495 * Current route is on-link; redirect is always invalid.
1497 * Seems, previous statement is not true. It could
1498 * be node, which looks for us as on-link (f.e. proxy ndisc)
1499 * But then router serving it might decide, that we should
1500 * know truth 8)8) --ANK (980726).
1502 if (rt6_check_expired(rt))
1503 continue;
1504 if (!(rt->rt6i_flags & RTF_GATEWAY))
1505 continue;
1506 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1507 continue;
1508 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1509 continue;
1510 break;
1513 if (!rt)
1514 rt = net->ipv6.ip6_null_entry;
1515 BACKTRACK(net, &fl6->saddr);
1516 out:
1517 dst_hold(&rt->dst);
1519 read_unlock_bh(&table->tb6_lock);
1521 return rt;
1524 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1525 const struct in6_addr *src,
1526 const struct in6_addr *gateway,
1527 struct net_device *dev)
1529 int flags = RT6_LOOKUP_F_HAS_SADDR;
1530 struct net *net = dev_net(dev);
1531 struct ip6rd_flowi rdfl = {
1532 .fl6 = {
1533 .flowi6_oif = dev->ifindex,
1534 .daddr = *dest,
1535 .saddr = *src,
1539 ipv6_addr_copy(&rdfl.gateway, gateway);
1541 if (rt6_need_strict(dest))
1542 flags |= RT6_LOOKUP_F_IFACE;
1544 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1545 flags, __ip6_route_redirect);
1548 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1549 const struct in6_addr *saddr,
1550 struct neighbour *neigh, u8 *lladdr, int on_link)
1552 struct rt6_info *rt, *nrt = NULL;
1553 struct netevent_redirect netevent;
1554 struct net *net = dev_net(neigh->dev);
1556 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1558 if (rt == net->ipv6.ip6_null_entry) {
1559 if (net_ratelimit())
1560 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1561 "for redirect target\n");
1562 goto out;
1566 * We have finally decided to accept it.
1569 neigh_update(neigh, lladdr, NUD_STALE,
1570 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1571 NEIGH_UPDATE_F_OVERRIDE|
1572 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1573 NEIGH_UPDATE_F_ISROUTER))
1577 * Redirect received -> path was valid.
1578 * Look, redirects are sent only in response to data packets,
1579 * so that this nexthop apparently is reachable. --ANK
1581 dst_confirm(&rt->dst);
1583 /* Duplicate redirect: silently ignore. */
1584 if (neigh == rt->dst.neighbour)
1585 goto out;
1587 nrt = ip6_rt_copy(rt);
1588 if (nrt == NULL)
1589 goto out;
1591 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1592 if (on_link)
1593 nrt->rt6i_flags &= ~RTF_GATEWAY;
1595 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1596 nrt->rt6i_dst.plen = 128;
1597 nrt->dst.flags |= DST_HOST;
1599 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1600 nrt->rt6i_nexthop = neigh_clone(neigh);
1602 if (ip6_ins_rt(nrt))
1603 goto out;
1605 netevent.old = &rt->dst;
1606 netevent.new = &nrt->dst;
1607 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1609 if (rt->rt6i_flags&RTF_CACHE) {
1610 ip6_del_rt(rt);
1611 return;
1614 out:
1615 dst_release(&rt->dst);
1619 * Handle ICMP "packet too big" messages
1620 * i.e. Path MTU discovery
1623 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1624 struct net *net, u32 pmtu, int ifindex)
1626 struct rt6_info *rt, *nrt;
1627 int allfrag = 0;
1628 again:
1629 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1630 if (rt == NULL)
1631 return;
1633 if (rt6_check_expired(rt)) {
1634 ip6_del_rt(rt);
1635 goto again;
1638 if (pmtu >= dst_mtu(&rt->dst))
1639 goto out;
1641 if (pmtu < IPV6_MIN_MTU) {
1643 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1644 * MTU (1280) and a fragment header should always be included
1645 * after a node receiving Too Big message reporting PMTU is
1646 * less than the IPv6 Minimum Link MTU.
1648 pmtu = IPV6_MIN_MTU;
1649 allfrag = 1;
1652 /* New mtu received -> path was valid.
1653 They are sent only in response to data packets,
1654 so that this nexthop apparently is reachable. --ANK
1656 dst_confirm(&rt->dst);
1658 /* Host route. If it is static, it would be better
1659 not to override it, but add new one, so that
1660 when cache entry will expire old pmtu
1661 would return automatically.
1663 if (rt->rt6i_flags & RTF_CACHE) {
1664 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1665 if (allfrag) {
1666 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1667 features |= RTAX_FEATURE_ALLFRAG;
1668 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1670 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1671 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1672 goto out;
1675 /* Network route.
1676 Two cases are possible:
1677 1. It is connected route. Action: COW
1678 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1680 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1681 nrt = rt6_alloc_cow(rt, daddr, saddr);
1682 else
1683 nrt = rt6_alloc_clone(rt, daddr);
1685 if (nrt) {
1686 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1687 if (allfrag) {
1688 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1689 features |= RTAX_FEATURE_ALLFRAG;
1690 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1693 /* According to RFC 1981, detecting PMTU increase shouldn't be
1694 * happened within 5 mins, the recommended timer is 10 mins.
1695 * Here this route expiration time is set to ip6_rt_mtu_expires
1696 * which is 10 mins. After 10 mins the decreased pmtu is expired
1697 * and detecting PMTU increase will be automatically happened.
1699 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1700 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1702 ip6_ins_rt(nrt);
1704 out:
1705 dst_release(&rt->dst);
1708 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1709 struct net_device *dev, u32 pmtu)
1711 struct net *net = dev_net(dev);
1714 * RFC 1981 states that a node "MUST reduce the size of the packets it
1715 * is sending along the path" that caused the Packet Too Big message.
1716 * Since it's not possible in the general case to determine which
1717 * interface was used to send the original packet, we update the MTU
1718 * on the interface that will be used to send future packets. We also
1719 * update the MTU on the interface that received the Packet Too Big in
1720 * case the original packet was forced out that interface with
1721 * SO_BINDTODEVICE or similar. This is the next best thing to the
1722 * correct behaviour, which would be to update the MTU on all
1723 * interfaces.
1725 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1726 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1730 * Misc support functions
1733 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1735 struct net *net = dev_net(ort->rt6i_dev);
1736 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1737 ort->dst.dev);
1739 if (rt) {
1740 rt->dst.input = ort->dst.input;
1741 rt->dst.output = ort->dst.output;
1743 dst_copy_metrics(&rt->dst, &ort->dst);
1744 rt->dst.error = ort->dst.error;
1745 rt->rt6i_idev = ort->rt6i_idev;
1746 if (rt->rt6i_idev)
1747 in6_dev_hold(rt->rt6i_idev);
1748 rt->dst.lastuse = jiffies;
1749 rt->rt6i_expires = 0;
1751 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1752 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1753 rt->rt6i_metric = 0;
1755 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1756 #ifdef CONFIG_IPV6_SUBTREES
1757 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1758 #endif
1759 rt->rt6i_table = ort->rt6i_table;
1761 return rt;
1764 #ifdef CONFIG_IPV6_ROUTE_INFO
1765 static struct rt6_info *rt6_get_route_info(struct net *net,
1766 const struct in6_addr *prefix, int prefixlen,
1767 const struct in6_addr *gwaddr, int ifindex)
1769 struct fib6_node *fn;
1770 struct rt6_info *rt = NULL;
1771 struct fib6_table *table;
1773 table = fib6_get_table(net, RT6_TABLE_INFO);
1774 if (table == NULL)
1775 return NULL;
1777 write_lock_bh(&table->tb6_lock);
1778 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1779 if (!fn)
1780 goto out;
1782 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1783 if (rt->rt6i_dev->ifindex != ifindex)
1784 continue;
1785 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1786 continue;
1787 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1788 continue;
1789 dst_hold(&rt->dst);
1790 break;
1792 out:
1793 write_unlock_bh(&table->tb6_lock);
1794 return rt;
1797 static struct rt6_info *rt6_add_route_info(struct net *net,
1798 const struct in6_addr *prefix, int prefixlen,
1799 const struct in6_addr *gwaddr, int ifindex,
1800 unsigned pref)
1802 struct fib6_config cfg = {
1803 .fc_table = RT6_TABLE_INFO,
1804 .fc_metric = IP6_RT_PRIO_USER,
1805 .fc_ifindex = ifindex,
1806 .fc_dst_len = prefixlen,
1807 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1808 RTF_UP | RTF_PREF(pref),
1809 .fc_nlinfo.pid = 0,
1810 .fc_nlinfo.nlh = NULL,
1811 .fc_nlinfo.nl_net = net,
1814 ipv6_addr_copy(&cfg.fc_dst, prefix);
1815 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1817 /* We should treat it as a default route if prefix length is 0. */
1818 if (!prefixlen)
1819 cfg.fc_flags |= RTF_DEFAULT;
1821 ip6_route_add(&cfg);
1823 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1825 #endif
1827 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1829 struct rt6_info *rt;
1830 struct fib6_table *table;
1832 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1833 if (table == NULL)
1834 return NULL;
1836 write_lock_bh(&table->tb6_lock);
1837 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1838 if (dev == rt->rt6i_dev &&
1839 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1840 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1841 break;
1843 if (rt)
1844 dst_hold(&rt->dst);
1845 write_unlock_bh(&table->tb6_lock);
1846 return rt;
1849 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1850 struct net_device *dev,
1851 unsigned int pref)
1853 struct fib6_config cfg = {
1854 .fc_table = RT6_TABLE_DFLT,
1855 .fc_metric = IP6_RT_PRIO_USER,
1856 .fc_ifindex = dev->ifindex,
1857 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1858 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1859 .fc_nlinfo.pid = 0,
1860 .fc_nlinfo.nlh = NULL,
1861 .fc_nlinfo.nl_net = dev_net(dev),
1864 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1866 ip6_route_add(&cfg);
1868 return rt6_get_dflt_router(gwaddr, dev);
1871 void rt6_purge_dflt_routers(struct net *net)
1873 struct rt6_info *rt;
1874 struct fib6_table *table;
1876 /* NOTE: Keep consistent with rt6_get_dflt_router */
1877 table = fib6_get_table(net, RT6_TABLE_DFLT);
1878 if (table == NULL)
1879 return;
1881 restart:
1882 read_lock_bh(&table->tb6_lock);
1883 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1884 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1885 dst_hold(&rt->dst);
1886 read_unlock_bh(&table->tb6_lock);
1887 ip6_del_rt(rt);
1888 goto restart;
1891 read_unlock_bh(&table->tb6_lock);
1894 static void rtmsg_to_fib6_config(struct net *net,
1895 struct in6_rtmsg *rtmsg,
1896 struct fib6_config *cfg)
1898 memset(cfg, 0, sizeof(*cfg));
1900 cfg->fc_table = RT6_TABLE_MAIN;
1901 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1902 cfg->fc_metric = rtmsg->rtmsg_metric;
1903 cfg->fc_expires = rtmsg->rtmsg_info;
1904 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1905 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1906 cfg->fc_flags = rtmsg->rtmsg_flags;
1908 cfg->fc_nlinfo.nl_net = net;
1910 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1911 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1912 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1915 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1917 struct fib6_config cfg;
1918 struct in6_rtmsg rtmsg;
1919 int err;
1921 switch(cmd) {
1922 case SIOCADDRT: /* Add a route */
1923 case SIOCDELRT: /* Delete a route */
1924 if (!capable(CAP_NET_ADMIN))
1925 return -EPERM;
1926 err = copy_from_user(&rtmsg, arg,
1927 sizeof(struct in6_rtmsg));
1928 if (err)
1929 return -EFAULT;
1931 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1933 rtnl_lock();
1934 switch (cmd) {
1935 case SIOCADDRT:
1936 err = ip6_route_add(&cfg);
1937 break;
1938 case SIOCDELRT:
1939 err = ip6_route_del(&cfg);
1940 break;
1941 default:
1942 err = -EINVAL;
1944 rtnl_unlock();
1946 return err;
1949 return -EINVAL;
1953 * Drop the packet on the floor
1956 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1958 int type;
1959 struct dst_entry *dst = skb_dst(skb);
1960 switch (ipstats_mib_noroutes) {
1961 case IPSTATS_MIB_INNOROUTES:
1962 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1963 if (type == IPV6_ADDR_ANY) {
1964 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1965 IPSTATS_MIB_INADDRERRORS);
1966 break;
1968 /* FALLTHROUGH */
1969 case IPSTATS_MIB_OUTNOROUTES:
1970 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1971 ipstats_mib_noroutes);
1972 break;
1974 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1975 kfree_skb(skb);
1976 return 0;
1979 static int ip6_pkt_discard(struct sk_buff *skb)
1981 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1984 static int ip6_pkt_discard_out(struct sk_buff *skb)
1986 skb->dev = skb_dst(skb)->dev;
1987 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1990 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1992 static int ip6_pkt_prohibit(struct sk_buff *skb)
1994 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1997 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1999 skb->dev = skb_dst(skb)->dev;
2000 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2003 #endif
2006 * Allocate a dst for local (unicast / anycast) address.
2009 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2010 const struct in6_addr *addr,
2011 int anycast)
2013 struct net *net = dev_net(idev->dev);
2014 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2015 net->loopback_dev);
2016 struct neighbour *neigh;
2018 if (rt == NULL) {
2019 if (net_ratelimit())
2020 pr_warning("IPv6: Maximum number of routes reached,"
2021 " consider increasing route/max_size.\n");
2022 return ERR_PTR(-ENOMEM);
2025 in6_dev_hold(idev);
2027 rt->dst.flags = DST_HOST;
2028 rt->dst.input = ip6_input;
2029 rt->dst.output = ip6_output;
2030 rt->rt6i_idev = idev;
2031 rt->dst.obsolete = -1;
2033 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2034 if (anycast)
2035 rt->rt6i_flags |= RTF_ANYCAST;
2036 else
2037 rt->rt6i_flags |= RTF_LOCAL;
2038 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2039 if (IS_ERR(neigh)) {
2040 dst_free(&rt->dst);
2042 return ERR_CAST(neigh);
2044 rt->rt6i_nexthop = neigh;
2046 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2047 rt->rt6i_dst.plen = 128;
2048 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2050 atomic_set(&rt->dst.__refcnt, 1);
2052 return rt;
2055 int ip6_route_get_saddr(struct net *net,
2056 struct rt6_info *rt,
2057 const struct in6_addr *daddr,
2058 unsigned int prefs,
2059 struct in6_addr *saddr)
2061 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2062 int err = 0;
2063 if (rt->rt6i_prefsrc.plen)
2064 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2065 else
2066 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2067 daddr, prefs, saddr);
2068 return err;
2071 /* remove deleted ip from prefsrc entries */
2072 struct arg_dev_net_ip {
2073 struct net_device *dev;
2074 struct net *net;
2075 struct in6_addr *addr;
2078 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2080 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2081 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2082 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2084 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2085 rt != net->ipv6.ip6_null_entry &&
2086 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2087 /* remove prefsrc entry */
2088 rt->rt6i_prefsrc.plen = 0;
2090 return 0;
2093 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2095 struct net *net = dev_net(ifp->idev->dev);
2096 struct arg_dev_net_ip adni = {
2097 .dev = ifp->idev->dev,
2098 .net = net,
2099 .addr = &ifp->addr,
2101 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2104 struct arg_dev_net {
2105 struct net_device *dev;
2106 struct net *net;
2109 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2111 const struct arg_dev_net *adn = arg;
2112 const struct net_device *dev = adn->dev;
2114 if ((rt->rt6i_dev == dev || dev == NULL) &&
2115 rt != adn->net->ipv6.ip6_null_entry) {
2116 RT6_TRACE("deleted by ifdown %p\n", rt);
2117 return -1;
2119 return 0;
2122 void rt6_ifdown(struct net *net, struct net_device *dev)
2124 struct arg_dev_net adn = {
2125 .dev = dev,
2126 .net = net,
2129 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2130 icmp6_clean_all(fib6_ifdown, &adn);
2133 struct rt6_mtu_change_arg
2135 struct net_device *dev;
2136 unsigned mtu;
2139 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2141 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2142 struct inet6_dev *idev;
2144 /* In IPv6 pmtu discovery is not optional,
2145 so that RTAX_MTU lock cannot disable it.
2146 We still use this lock to block changes
2147 caused by addrconf/ndisc.
2150 idev = __in6_dev_get(arg->dev);
2151 if (idev == NULL)
2152 return 0;
2154 /* For administrative MTU increase, there is no way to discover
2155 IPv6 PMTU increase, so PMTU increase should be updated here.
2156 Since RFC 1981 doesn't include administrative MTU increase
2157 update PMTU increase is a MUST. (i.e. jumbo frame)
2160 If new MTU is less than route PMTU, this new MTU will be the
2161 lowest MTU in the path, update the route PMTU to reflect PMTU
2162 decreases; if new MTU is greater than route PMTU, and the
2163 old MTU is the lowest MTU in the path, update the route PMTU
2164 to reflect the increase. In this case if the other nodes' MTU
2165 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2166 PMTU discouvery.
2168 if (rt->rt6i_dev == arg->dev &&
2169 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2170 (dst_mtu(&rt->dst) >= arg->mtu ||
2171 (dst_mtu(&rt->dst) < arg->mtu &&
2172 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2173 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2175 return 0;
2178 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2180 struct rt6_mtu_change_arg arg = {
2181 .dev = dev,
2182 .mtu = mtu,
2185 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2188 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2189 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2190 [RTA_OIF] = { .type = NLA_U32 },
2191 [RTA_IIF] = { .type = NLA_U32 },
2192 [RTA_PRIORITY] = { .type = NLA_U32 },
2193 [RTA_METRICS] = { .type = NLA_NESTED },
2196 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2197 struct fib6_config *cfg)
2199 struct rtmsg *rtm;
2200 struct nlattr *tb[RTA_MAX+1];
2201 int err;
2203 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2204 if (err < 0)
2205 goto errout;
2207 err = -EINVAL;
2208 rtm = nlmsg_data(nlh);
2209 memset(cfg, 0, sizeof(*cfg));
2211 cfg->fc_table = rtm->rtm_table;
2212 cfg->fc_dst_len = rtm->rtm_dst_len;
2213 cfg->fc_src_len = rtm->rtm_src_len;
2214 cfg->fc_flags = RTF_UP;
2215 cfg->fc_protocol = rtm->rtm_protocol;
2217 if (rtm->rtm_type == RTN_UNREACHABLE)
2218 cfg->fc_flags |= RTF_REJECT;
2220 if (rtm->rtm_type == RTN_LOCAL)
2221 cfg->fc_flags |= RTF_LOCAL;
2223 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2224 cfg->fc_nlinfo.nlh = nlh;
2225 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2227 if (tb[RTA_GATEWAY]) {
2228 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2229 cfg->fc_flags |= RTF_GATEWAY;
2232 if (tb[RTA_DST]) {
2233 int plen = (rtm->rtm_dst_len + 7) >> 3;
2235 if (nla_len(tb[RTA_DST]) < plen)
2236 goto errout;
2238 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2241 if (tb[RTA_SRC]) {
2242 int plen = (rtm->rtm_src_len + 7) >> 3;
2244 if (nla_len(tb[RTA_SRC]) < plen)
2245 goto errout;
2247 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2250 if (tb[RTA_PREFSRC])
2251 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2253 if (tb[RTA_OIF])
2254 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2256 if (tb[RTA_PRIORITY])
2257 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2259 if (tb[RTA_METRICS]) {
2260 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2261 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2264 if (tb[RTA_TABLE])
2265 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2267 err = 0;
2268 errout:
2269 return err;
2272 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2274 struct fib6_config cfg;
2275 int err;
2277 err = rtm_to_fib6_config(skb, nlh, &cfg);
2278 if (err < 0)
2279 return err;
2281 return ip6_route_del(&cfg);
2284 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2286 struct fib6_config cfg;
2287 int err;
2289 err = rtm_to_fib6_config(skb, nlh, &cfg);
2290 if (err < 0)
2291 return err;
2293 return ip6_route_add(&cfg);
2296 static inline size_t rt6_nlmsg_size(void)
2298 return NLMSG_ALIGN(sizeof(struct rtmsg))
2299 + nla_total_size(16) /* RTA_SRC */
2300 + nla_total_size(16) /* RTA_DST */
2301 + nla_total_size(16) /* RTA_GATEWAY */
2302 + nla_total_size(16) /* RTA_PREFSRC */
2303 + nla_total_size(4) /* RTA_TABLE */
2304 + nla_total_size(4) /* RTA_IIF */
2305 + nla_total_size(4) /* RTA_OIF */
2306 + nla_total_size(4) /* RTA_PRIORITY */
2307 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2308 + nla_total_size(sizeof(struct rta_cacheinfo));
2311 static int rt6_fill_node(struct net *net,
2312 struct sk_buff *skb, struct rt6_info *rt,
2313 struct in6_addr *dst, struct in6_addr *src,
2314 int iif, int type, u32 pid, u32 seq,
2315 int prefix, int nowait, unsigned int flags)
2317 struct rtmsg *rtm;
2318 struct nlmsghdr *nlh;
2319 long expires;
2320 u32 table;
2322 if (prefix) { /* user wants prefix routes only */
2323 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2324 /* success since this is not a prefix route */
2325 return 1;
2329 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2330 if (nlh == NULL)
2331 return -EMSGSIZE;
2333 rtm = nlmsg_data(nlh);
2334 rtm->rtm_family = AF_INET6;
2335 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2336 rtm->rtm_src_len = rt->rt6i_src.plen;
2337 rtm->rtm_tos = 0;
2338 if (rt->rt6i_table)
2339 table = rt->rt6i_table->tb6_id;
2340 else
2341 table = RT6_TABLE_UNSPEC;
2342 rtm->rtm_table = table;
2343 NLA_PUT_U32(skb, RTA_TABLE, table);
2344 if (rt->rt6i_flags&RTF_REJECT)
2345 rtm->rtm_type = RTN_UNREACHABLE;
2346 else if (rt->rt6i_flags&RTF_LOCAL)
2347 rtm->rtm_type = RTN_LOCAL;
2348 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2349 rtm->rtm_type = RTN_LOCAL;
2350 else
2351 rtm->rtm_type = RTN_UNICAST;
2352 rtm->rtm_flags = 0;
2353 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2354 rtm->rtm_protocol = rt->rt6i_protocol;
2355 if (rt->rt6i_flags&RTF_DYNAMIC)
2356 rtm->rtm_protocol = RTPROT_REDIRECT;
2357 else if (rt->rt6i_flags & RTF_ADDRCONF)
2358 rtm->rtm_protocol = RTPROT_KERNEL;
2359 else if (rt->rt6i_flags&RTF_DEFAULT)
2360 rtm->rtm_protocol = RTPROT_RA;
2362 if (rt->rt6i_flags&RTF_CACHE)
2363 rtm->rtm_flags |= RTM_F_CLONED;
2365 if (dst) {
2366 NLA_PUT(skb, RTA_DST, 16, dst);
2367 rtm->rtm_dst_len = 128;
2368 } else if (rtm->rtm_dst_len)
2369 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2370 #ifdef CONFIG_IPV6_SUBTREES
2371 if (src) {
2372 NLA_PUT(skb, RTA_SRC, 16, src);
2373 rtm->rtm_src_len = 128;
2374 } else if (rtm->rtm_src_len)
2375 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2376 #endif
2377 if (iif) {
2378 #ifdef CONFIG_IPV6_MROUTE
2379 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2380 int err = ip6mr_get_route(net, skb, rtm, nowait);
2381 if (err <= 0) {
2382 if (!nowait) {
2383 if (err == 0)
2384 return 0;
2385 goto nla_put_failure;
2386 } else {
2387 if (err == -EMSGSIZE)
2388 goto nla_put_failure;
2391 } else
2392 #endif
2393 NLA_PUT_U32(skb, RTA_IIF, iif);
2394 } else if (dst) {
2395 struct in6_addr saddr_buf;
2396 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2397 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2400 if (rt->rt6i_prefsrc.plen) {
2401 struct in6_addr saddr_buf;
2402 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2403 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2406 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2407 goto nla_put_failure;
2409 if (rt->dst.neighbour)
2410 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2412 if (rt->dst.dev)
2413 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2415 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2417 if (!(rt->rt6i_flags & RTF_EXPIRES))
2418 expires = 0;
2419 else if (rt->rt6i_expires - jiffies < INT_MAX)
2420 expires = rt->rt6i_expires - jiffies;
2421 else
2422 expires = INT_MAX;
2424 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2425 expires, rt->dst.error) < 0)
2426 goto nla_put_failure;
2428 return nlmsg_end(skb, nlh);
2430 nla_put_failure:
2431 nlmsg_cancel(skb, nlh);
2432 return -EMSGSIZE;
2435 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2437 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2438 int prefix;
2440 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2441 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2442 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2443 } else
2444 prefix = 0;
2446 return rt6_fill_node(arg->net,
2447 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2448 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2449 prefix, 0, NLM_F_MULTI);
2452 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2454 struct net *net = sock_net(in_skb->sk);
2455 struct nlattr *tb[RTA_MAX+1];
2456 struct rt6_info *rt;
2457 struct sk_buff *skb;
2458 struct rtmsg *rtm;
2459 struct flowi6 fl6;
2460 int err, iif = 0;
2462 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2463 if (err < 0)
2464 goto errout;
2466 err = -EINVAL;
2467 memset(&fl6, 0, sizeof(fl6));
2469 if (tb[RTA_SRC]) {
2470 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2471 goto errout;
2473 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2476 if (tb[RTA_DST]) {
2477 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2478 goto errout;
2480 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2483 if (tb[RTA_IIF])
2484 iif = nla_get_u32(tb[RTA_IIF]);
2486 if (tb[RTA_OIF])
2487 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2489 if (iif) {
2490 struct net_device *dev;
2491 dev = __dev_get_by_index(net, iif);
2492 if (!dev) {
2493 err = -ENODEV;
2494 goto errout;
2498 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2499 if (skb == NULL) {
2500 err = -ENOBUFS;
2501 goto errout;
2504 /* Reserve room for dummy headers, this skb can pass
2505 through good chunk of routing engine.
2507 skb_reset_mac_header(skb);
2508 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2510 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2511 skb_dst_set(skb, &rt->dst);
2513 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2514 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2515 nlh->nlmsg_seq, 0, 0, 0);
2516 if (err < 0) {
2517 kfree_skb(skb);
2518 goto errout;
2521 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2522 errout:
2523 return err;
2526 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2528 struct sk_buff *skb;
2529 struct net *net = info->nl_net;
2530 u32 seq;
2531 int err;
2533 err = -ENOBUFS;
2534 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2536 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2537 if (skb == NULL)
2538 goto errout;
2540 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2541 event, info->pid, seq, 0, 0, 0);
2542 if (err < 0) {
2543 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2544 WARN_ON(err == -EMSGSIZE);
2545 kfree_skb(skb);
2546 goto errout;
2548 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2549 info->nlh, gfp_any());
2550 return;
2551 errout:
2552 if (err < 0)
2553 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2556 static int ip6_route_dev_notify(struct notifier_block *this,
2557 unsigned long event, void *data)
2559 struct net_device *dev = (struct net_device *)data;
2560 struct net *net = dev_net(dev);
2562 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2563 net->ipv6.ip6_null_entry->dst.dev = dev;
2564 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2565 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2566 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2567 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2568 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2569 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2570 #endif
2573 return NOTIFY_OK;
2577 * /proc
2580 #ifdef CONFIG_PROC_FS
2582 struct rt6_proc_arg
2584 char *buffer;
2585 int offset;
2586 int length;
2587 int skip;
2588 int len;
2591 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2593 struct seq_file *m = p_arg;
2595 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2597 #ifdef CONFIG_IPV6_SUBTREES
2598 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2599 #else
2600 seq_puts(m, "00000000000000000000000000000000 00 ");
2601 #endif
2603 if (rt->rt6i_nexthop) {
2604 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2605 } else {
2606 seq_puts(m, "00000000000000000000000000000000");
2608 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2609 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2610 rt->dst.__use, rt->rt6i_flags,
2611 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2612 return 0;
2615 static int ipv6_route_show(struct seq_file *m, void *v)
2617 struct net *net = (struct net *)m->private;
2618 fib6_clean_all(net, rt6_info_route, 0, m);
2619 return 0;
2622 static int ipv6_route_open(struct inode *inode, struct file *file)
2624 return single_open_net(inode, file, ipv6_route_show);
2627 static const struct file_operations ipv6_route_proc_fops = {
2628 .owner = THIS_MODULE,
2629 .open = ipv6_route_open,
2630 .read = seq_read,
2631 .llseek = seq_lseek,
2632 .release = single_release_net,
2635 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2637 struct net *net = (struct net *)seq->private;
2638 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2639 net->ipv6.rt6_stats->fib_nodes,
2640 net->ipv6.rt6_stats->fib_route_nodes,
2641 net->ipv6.rt6_stats->fib_rt_alloc,
2642 net->ipv6.rt6_stats->fib_rt_entries,
2643 net->ipv6.rt6_stats->fib_rt_cache,
2644 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2645 net->ipv6.rt6_stats->fib_discarded_routes);
2647 return 0;
2650 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2652 return single_open_net(inode, file, rt6_stats_seq_show);
2655 static const struct file_operations rt6_stats_seq_fops = {
2656 .owner = THIS_MODULE,
2657 .open = rt6_stats_seq_open,
2658 .read = seq_read,
2659 .llseek = seq_lseek,
2660 .release = single_release_net,
2662 #endif /* CONFIG_PROC_FS */
2664 #ifdef CONFIG_SYSCTL
2666 static
2667 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2668 void __user *buffer, size_t *lenp, loff_t *ppos)
2670 struct net *net;
2671 int delay;
2672 if (!write)
2673 return -EINVAL;
2675 net = (struct net *)ctl->extra1;
2676 delay = net->ipv6.sysctl.flush_delay;
2677 proc_dointvec(ctl, write, buffer, lenp, ppos);
2678 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2679 return 0;
2682 ctl_table ipv6_route_table_template[] = {
2684 .procname = "flush",
2685 .data = &init_net.ipv6.sysctl.flush_delay,
2686 .maxlen = sizeof(int),
2687 .mode = 0200,
2688 .proc_handler = ipv6_sysctl_rtcache_flush
2691 .procname = "gc_thresh",
2692 .data = &ip6_dst_ops_template.gc_thresh,
2693 .maxlen = sizeof(int),
2694 .mode = 0644,
2695 .proc_handler = proc_dointvec,
2698 .procname = "max_size",
2699 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2700 .maxlen = sizeof(int),
2701 .mode = 0644,
2702 .proc_handler = proc_dointvec,
2705 .procname = "gc_min_interval",
2706 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2707 .maxlen = sizeof(int),
2708 .mode = 0644,
2709 .proc_handler = proc_dointvec_jiffies,
2712 .procname = "gc_timeout",
2713 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2714 .maxlen = sizeof(int),
2715 .mode = 0644,
2716 .proc_handler = proc_dointvec_jiffies,
2719 .procname = "gc_interval",
2720 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2721 .maxlen = sizeof(int),
2722 .mode = 0644,
2723 .proc_handler = proc_dointvec_jiffies,
2726 .procname = "gc_elasticity",
2727 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2728 .maxlen = sizeof(int),
2729 .mode = 0644,
2730 .proc_handler = proc_dointvec,
2733 .procname = "mtu_expires",
2734 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2735 .maxlen = sizeof(int),
2736 .mode = 0644,
2737 .proc_handler = proc_dointvec_jiffies,
2740 .procname = "min_adv_mss",
2741 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2742 .maxlen = sizeof(int),
2743 .mode = 0644,
2744 .proc_handler = proc_dointvec,
2747 .procname = "gc_min_interval_ms",
2748 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2749 .maxlen = sizeof(int),
2750 .mode = 0644,
2751 .proc_handler = proc_dointvec_ms_jiffies,
2756 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2758 struct ctl_table *table;
2760 table = kmemdup(ipv6_route_table_template,
2761 sizeof(ipv6_route_table_template),
2762 GFP_KERNEL);
2764 if (table) {
2765 table[0].data = &net->ipv6.sysctl.flush_delay;
2766 table[0].extra1 = net;
2767 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2768 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2769 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2770 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2771 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2772 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2773 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2774 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2775 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2778 return table;
2780 #endif
2782 static int __net_init ip6_route_net_init(struct net *net)
2784 int ret = -ENOMEM;
2786 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2787 sizeof(net->ipv6.ip6_dst_ops));
2789 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2790 goto out_ip6_dst_ops;
2792 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2793 sizeof(*net->ipv6.ip6_null_entry),
2794 GFP_KERNEL);
2795 if (!net->ipv6.ip6_null_entry)
2796 goto out_ip6_dst_entries;
2797 net->ipv6.ip6_null_entry->dst.path =
2798 (struct dst_entry *)net->ipv6.ip6_null_entry;
2799 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2800 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2801 ip6_template_metrics, true);
2803 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2804 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2805 sizeof(*net->ipv6.ip6_prohibit_entry),
2806 GFP_KERNEL);
2807 if (!net->ipv6.ip6_prohibit_entry)
2808 goto out_ip6_null_entry;
2809 net->ipv6.ip6_prohibit_entry->dst.path =
2810 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2811 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2812 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2813 ip6_template_metrics, true);
2815 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2816 sizeof(*net->ipv6.ip6_blk_hole_entry),
2817 GFP_KERNEL);
2818 if (!net->ipv6.ip6_blk_hole_entry)
2819 goto out_ip6_prohibit_entry;
2820 net->ipv6.ip6_blk_hole_entry->dst.path =
2821 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2822 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2823 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2824 ip6_template_metrics, true);
2825 #endif
2827 net->ipv6.sysctl.flush_delay = 0;
2828 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2829 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2830 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2831 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2832 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2833 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2834 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2836 #ifdef CONFIG_PROC_FS
2837 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2838 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2839 #endif
2840 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2842 ret = 0;
2843 out:
2844 return ret;
2846 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2847 out_ip6_prohibit_entry:
2848 kfree(net->ipv6.ip6_prohibit_entry);
2849 out_ip6_null_entry:
2850 kfree(net->ipv6.ip6_null_entry);
2851 #endif
2852 out_ip6_dst_entries:
2853 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2854 out_ip6_dst_ops:
2855 goto out;
2858 static void __net_exit ip6_route_net_exit(struct net *net)
2860 #ifdef CONFIG_PROC_FS
2861 proc_net_remove(net, "ipv6_route");
2862 proc_net_remove(net, "rt6_stats");
2863 #endif
2864 kfree(net->ipv6.ip6_null_entry);
2865 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2866 kfree(net->ipv6.ip6_prohibit_entry);
2867 kfree(net->ipv6.ip6_blk_hole_entry);
2868 #endif
2869 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2872 static struct pernet_operations ip6_route_net_ops = {
2873 .init = ip6_route_net_init,
2874 .exit = ip6_route_net_exit,
2877 static struct notifier_block ip6_route_dev_notifier = {
2878 .notifier_call = ip6_route_dev_notify,
2879 .priority = 0,
2882 int __init ip6_route_init(void)
2884 int ret;
2886 ret = -ENOMEM;
2887 ip6_dst_ops_template.kmem_cachep =
2888 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2889 SLAB_HWCACHE_ALIGN, NULL);
2890 if (!ip6_dst_ops_template.kmem_cachep)
2891 goto out;
2893 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2894 if (ret)
2895 goto out_kmem_cache;
2897 ret = register_pernet_subsys(&ip6_route_net_ops);
2898 if (ret)
2899 goto out_dst_entries;
2901 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2903 /* Registering of the loopback is done before this portion of code,
2904 * the loopback reference in rt6_info will not be taken, do it
2905 * manually for init_net */
2906 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2907 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2908 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2909 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2910 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2911 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2912 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2913 #endif
2914 ret = fib6_init();
2915 if (ret)
2916 goto out_register_subsys;
2918 ret = xfrm6_init();
2919 if (ret)
2920 goto out_fib6_init;
2922 ret = fib6_rules_init();
2923 if (ret)
2924 goto xfrm6_init;
2926 ret = -ENOBUFS;
2927 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2928 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2929 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2930 goto fib6_rules_init;
2932 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2933 if (ret)
2934 goto fib6_rules_init;
2936 out:
2937 return ret;
2939 fib6_rules_init:
2940 fib6_rules_cleanup();
2941 xfrm6_init:
2942 xfrm6_fini();
2943 out_fib6_init:
2944 fib6_gc_cleanup();
2945 out_register_subsys:
2946 unregister_pernet_subsys(&ip6_route_net_ops);
2947 out_dst_entries:
2948 dst_entries_destroy(&ip6_dst_blackhole_ops);
2949 out_kmem_cache:
2950 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2951 goto out;
2954 void ip6_route_cleanup(void)
2956 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2957 fib6_rules_cleanup();
2958 xfrm6_fini();
2959 fib6_gc_cleanup();
2960 unregister_pernet_subsys(&ip6_route_net_ops);
2961 dst_entries_destroy(&ip6_dst_blackhole_ops);
2962 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);