[IPV6] NDISC: Search over all possible rules on receipt of redirect.
[linux-2.6/x86.git] / net / ipv6 / route.c
blob8d00a9d77f019e0a47efaff154be2a5dddcfd7d9
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 /* Changes:
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/if_arp.h>
40 #ifdef CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #endif
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107 #endif
109 static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
123 struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
145 struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
165 struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
185 #endif
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193 static void ip6_dst_destroy(struct dst_entry *dst)
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
225 static inline int rt6_need_strict(struct in6_addr *daddr)
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 * Route lookup. Any table->tb6_lock is implied.
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
256 local = sprt;
260 if (local)
261 return local;
263 if (strict)
264 return &ip6_null_entry;
266 return rt;
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
298 #else
299 static inline void rt6_probe(struct rt6_info *rt)
301 return;
303 #endif
306 * Default Router Selection (RFC 2461 6.3.6)
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
319 static int inline rt6_check_neigh(struct rt6_info *rt)
321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
329 m = 2;
330 read_unlock_bh(&neigh->lock);
332 return m;
335 static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
338 int m, n;
340 m = rt6_check_dev(rt, oif);
341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345 #endif
346 n = rt6_check_neigh(rt);
347 if (n > 1)
348 m |= 16;
349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
350 return -1;
351 return m;
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
365 for (rt = rt0, metric = rt0->rt6i_metric;
366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
367 rt = rt->u.next) {
368 int m;
370 if (rt6_check_expired(rt))
371 continue;
373 last = rt;
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 continue;
379 if (m > mpri) {
380 rt6_probe(match);
381 match = rt;
382 mpri = m;
383 } else {
384 rt6_probe(rt);
388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
392 static DEFINE_SPINLOCK(lock);
393 spin_lock(&lock);
394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
397 spin_unlock(&lock);
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
403 return (match ? match : &ip6_null_entry);
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
459 if (rt && !lifetime) {
460 ip6_del_rt(rt);
461 rt = NULL;
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
478 dst_release(&rt->u.dst);
480 return 0;
482 #endif
484 #define BACKTRACK() \
485 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
496 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
499 struct fib6_node *fn;
500 struct rt6_info *rt;
502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504 restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
508 dst_hold(&rt->u.dst);
509 out:
510 read_unlock_bh(&table->tb6_lock);
512 rt->u.dst.lastuse = jiffies;
513 rt->u.dst.__use++;
515 return rt;
519 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
538 dst_release(dst);
540 return NULL;
543 /* ip6_ins_rt is called with FREE table->tb6_lock.
544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
549 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
551 int err;
552 struct fib6_table *table;
554 table = rt->rt6i_table;
555 write_lock_bh(&table->tb6_lock);
556 err = fib6_add(&table->tb6_root, rt, info);
557 write_unlock_bh(&table->tb6_lock);
559 return err;
562 int ip6_ins_rt(struct rt6_info *rt)
564 return __ip6_ins_rt(rt, NULL);
567 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
568 struct in6_addr *saddr)
570 struct rt6_info *rt;
573 * Clone the route.
576 rt = ip6_rt_copy(ort);
578 if (rt) {
579 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
580 if (rt->rt6i_dst.plen != 128 &&
581 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
582 rt->rt6i_flags |= RTF_ANYCAST;
583 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
586 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
587 rt->rt6i_dst.plen = 128;
588 rt->rt6i_flags |= RTF_CACHE;
589 rt->u.dst.flags |= DST_HOST;
591 #ifdef CONFIG_IPV6_SUBTREES
592 if (rt->rt6i_src.plen && saddr) {
593 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
594 rt->rt6i_src.plen = 128;
596 #endif
598 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
602 return rt;
605 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
607 struct rt6_info *rt = ip6_rt_copy(ort);
608 if (rt) {
609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
610 rt->rt6i_dst.plen = 128;
611 rt->rt6i_flags |= RTF_CACHE;
612 if (rt->rt6i_flags & RTF_REJECT)
613 rt->u.dst.error = ort->u.dst.error;
614 rt->u.dst.flags |= DST_HOST;
615 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
617 return rt;
620 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
621 struct flowi *fl, int flags)
623 struct fib6_node *fn;
624 struct rt6_info *rt, *nrt;
625 int strict = 0;
626 int attempts = 3;
627 int err;
628 int reachable = RT6_SELECT_F_REACHABLE;
630 if (flags & RT6_F_STRICT)
631 strict = RT6_SELECT_F_IFACE;
633 relookup:
634 read_lock_bh(&table->tb6_lock);
636 restart_2:
637 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
639 restart:
640 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
641 BACKTRACK();
642 if (rt == &ip6_null_entry ||
643 rt->rt6i_flags & RTF_CACHE)
644 goto out;
646 dst_hold(&rt->u.dst);
647 read_unlock_bh(&table->tb6_lock);
649 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
650 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
651 else {
652 #if CLONE_OFFLINK_ROUTE
653 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
654 #else
655 goto out2;
656 #endif
659 dst_release(&rt->u.dst);
660 rt = nrt ? : &ip6_null_entry;
662 dst_hold(&rt->u.dst);
663 if (nrt) {
664 err = ip6_ins_rt(nrt);
665 if (!err)
666 goto out2;
669 if (--attempts <= 0)
670 goto out2;
673 * Race condition! In the gap, when table->tb6_lock was
674 * released someone could insert this route. Relookup.
676 dst_release(&rt->u.dst);
677 goto relookup;
679 out:
680 if (reachable) {
681 reachable = 0;
682 goto restart_2;
684 dst_hold(&rt->u.dst);
685 read_unlock_bh(&table->tb6_lock);
686 out2:
687 rt->u.dst.lastuse = jiffies;
688 rt->u.dst.__use++;
690 return rt;
693 void ip6_route_input(struct sk_buff *skb)
695 struct ipv6hdr *iph = skb->nh.ipv6h;
696 struct flowi fl = {
697 .iif = skb->dev->ifindex,
698 .nl_u = {
699 .ip6_u = {
700 .daddr = iph->daddr,
701 .saddr = iph->saddr,
702 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
705 .proto = iph->nexthdr,
707 int flags = 0;
709 if (rt6_need_strict(&iph->daddr))
710 flags |= RT6_F_STRICT;
712 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
715 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
716 struct flowi *fl, int flags)
718 struct fib6_node *fn;
719 struct rt6_info *rt, *nrt;
720 int strict = 0;
721 int attempts = 3;
722 int err;
723 int reachable = RT6_SELECT_F_REACHABLE;
725 if (flags & RT6_F_STRICT)
726 strict = RT6_SELECT_F_IFACE;
728 relookup:
729 read_lock_bh(&table->tb6_lock);
731 restart_2:
732 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
734 restart:
735 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
736 BACKTRACK();
737 if (rt == &ip6_null_entry ||
738 rt->rt6i_flags & RTF_CACHE)
739 goto out;
741 dst_hold(&rt->u.dst);
742 read_unlock_bh(&table->tb6_lock);
744 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
745 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
746 else {
747 #if CLONE_OFFLINK_ROUTE
748 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
749 #else
750 goto out2;
751 #endif
754 dst_release(&rt->u.dst);
755 rt = nrt ? : &ip6_null_entry;
757 dst_hold(&rt->u.dst);
758 if (nrt) {
759 err = ip6_ins_rt(nrt);
760 if (!err)
761 goto out2;
764 if (--attempts <= 0)
765 goto out2;
768 * Race condition! In the gap, when table->tb6_lock was
769 * released someone could insert this route. Relookup.
771 dst_release(&rt->u.dst);
772 goto relookup;
774 out:
775 if (reachable) {
776 reachable = 0;
777 goto restart_2;
779 dst_hold(&rt->u.dst);
780 read_unlock_bh(&table->tb6_lock);
781 out2:
782 rt->u.dst.lastuse = jiffies;
783 rt->u.dst.__use++;
784 return rt;
787 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
789 int flags = 0;
791 if (rt6_need_strict(&fl->fl6_dst))
792 flags |= RT6_F_STRICT;
794 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
799 * Destination cache support functions
802 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
804 struct rt6_info *rt;
806 rt = (struct rt6_info *) dst;
808 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
809 return dst;
811 return NULL;
814 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
816 struct rt6_info *rt = (struct rt6_info *) dst;
818 if (rt) {
819 if (rt->rt6i_flags & RTF_CACHE)
820 ip6_del_rt(rt);
821 else
822 dst_release(dst);
824 return NULL;
827 static void ip6_link_failure(struct sk_buff *skb)
829 struct rt6_info *rt;
831 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
833 rt = (struct rt6_info *) skb->dst;
834 if (rt) {
835 if (rt->rt6i_flags&RTF_CACHE) {
836 dst_set_expires(&rt->u.dst, 0);
837 rt->rt6i_flags |= RTF_EXPIRES;
838 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
839 rt->rt6i_node->fn_sernum = -1;
843 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
845 struct rt6_info *rt6 = (struct rt6_info*)dst;
847 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
848 rt6->rt6i_flags |= RTF_MODIFIED;
849 if (mtu < IPV6_MIN_MTU) {
850 mtu = IPV6_MIN_MTU;
851 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
853 dst->metrics[RTAX_MTU-1] = mtu;
854 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
858 static int ipv6_get_mtu(struct net_device *dev);
860 static inline unsigned int ipv6_advmss(unsigned int mtu)
862 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
864 if (mtu < ip6_rt_min_advmss)
865 mtu = ip6_rt_min_advmss;
868 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
869 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
870 * IPV6_MAXPLEN is also valid and means: "any MSS,
871 * rely only on pmtu discovery"
873 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
874 mtu = IPV6_MAXPLEN;
875 return mtu;
878 static struct dst_entry *ndisc_dst_gc_list;
879 static DEFINE_SPINLOCK(ndisc_lock);
881 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
882 struct neighbour *neigh,
883 struct in6_addr *addr,
884 int (*output)(struct sk_buff *))
886 struct rt6_info *rt;
887 struct inet6_dev *idev = in6_dev_get(dev);
889 if (unlikely(idev == NULL))
890 return NULL;
892 rt = ip6_dst_alloc();
893 if (unlikely(rt == NULL)) {
894 in6_dev_put(idev);
895 goto out;
898 dev_hold(dev);
899 if (neigh)
900 neigh_hold(neigh);
901 else
902 neigh = ndisc_get_neigh(dev, addr);
904 rt->rt6i_dev = dev;
905 rt->rt6i_idev = idev;
906 rt->rt6i_nexthop = neigh;
907 atomic_set(&rt->u.dst.__refcnt, 1);
908 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
909 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
910 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
911 rt->u.dst.output = output;
913 #if 0 /* there's no chance to use these for ndisc */
914 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
915 ? DST_HOST
916 : 0;
917 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
918 rt->rt6i_dst.plen = 128;
919 #endif
921 spin_lock_bh(&ndisc_lock);
922 rt->u.dst.next = ndisc_dst_gc_list;
923 ndisc_dst_gc_list = &rt->u.dst;
924 spin_unlock_bh(&ndisc_lock);
926 fib6_force_start_gc();
928 out:
929 return (struct dst_entry *)rt;
932 int ndisc_dst_gc(int *more)
934 struct dst_entry *dst, *next, **pprev;
935 int freed;
937 next = NULL;
938 freed = 0;
940 spin_lock_bh(&ndisc_lock);
941 pprev = &ndisc_dst_gc_list;
943 while ((dst = *pprev) != NULL) {
944 if (!atomic_read(&dst->__refcnt)) {
945 *pprev = dst->next;
946 dst_free(dst);
947 freed++;
948 } else {
949 pprev = &dst->next;
950 (*more)++;
954 spin_unlock_bh(&ndisc_lock);
956 return freed;
959 static int ip6_dst_gc(void)
961 static unsigned expire = 30*HZ;
962 static unsigned long last_gc;
963 unsigned long now = jiffies;
965 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
966 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
967 goto out;
969 expire++;
970 fib6_run_gc(expire);
971 last_gc = now;
972 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
973 expire = ip6_rt_gc_timeout>>1;
975 out:
976 expire -= expire>>ip6_rt_gc_elasticity;
977 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
980 /* Clean host part of a prefix. Not necessary in radix tree,
981 but results in cleaner routing tables.
983 Remove it only when all the things will work!
986 static int ipv6_get_mtu(struct net_device *dev)
988 int mtu = IPV6_MIN_MTU;
989 struct inet6_dev *idev;
991 idev = in6_dev_get(dev);
992 if (idev) {
993 mtu = idev->cnf.mtu6;
994 in6_dev_put(idev);
996 return mtu;
999 int ipv6_get_hoplimit(struct net_device *dev)
1001 int hoplimit = ipv6_devconf.hop_limit;
1002 struct inet6_dev *idev;
1004 idev = in6_dev_get(dev);
1005 if (idev) {
1006 hoplimit = idev->cnf.hop_limit;
1007 in6_dev_put(idev);
1009 return hoplimit;
1016 int ip6_route_add(struct fib6_config *cfg)
1018 int err;
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
1022 struct fib6_table *table;
1023 int addr_type;
1025 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1026 return -EINVAL;
1027 #ifndef CONFIG_IPV6_SUBTREES
1028 if (cfg->fc_src_len)
1029 return -EINVAL;
1030 #endif
1031 if (cfg->fc_ifindex) {
1032 err = -ENODEV;
1033 dev = dev_get_by_index(cfg->fc_ifindex);
1034 if (!dev)
1035 goto out;
1036 idev = in6_dev_get(dev);
1037 if (!idev)
1038 goto out;
1041 if (cfg->fc_metric == 0)
1042 cfg->fc_metric = IP6_RT_PRIO_USER;
1044 table = fib6_new_table(cfg->fc_table);
1045 if (table == NULL) {
1046 err = -ENOBUFS;
1047 goto out;
1050 rt = ip6_dst_alloc();
1052 if (rt == NULL) {
1053 err = -ENOMEM;
1054 goto out;
1057 rt->u.dst.obsolete = -1;
1058 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1060 if (cfg->fc_protocol == RTPROT_UNSPEC)
1061 cfg->fc_protocol = RTPROT_BOOT;
1062 rt->rt6i_protocol = cfg->fc_protocol;
1064 addr_type = ipv6_addr_type(&cfg->fc_dst);
1066 if (addr_type & IPV6_ADDR_MULTICAST)
1067 rt->u.dst.input = ip6_mc_input;
1068 else
1069 rt->u.dst.input = ip6_forward;
1071 rt->u.dst.output = ip6_output;
1073 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1074 rt->rt6i_dst.plen = cfg->fc_dst_len;
1075 if (rt->rt6i_dst.plen == 128)
1076 rt->u.dst.flags = DST_HOST;
1078 #ifdef CONFIG_IPV6_SUBTREES
1079 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1080 rt->rt6i_src.plen = cfg->fc_src_len;
1081 #endif
1083 rt->rt6i_metric = cfg->fc_metric;
1085 /* We cannot add true routes via loopback here,
1086 they would result in kernel looping; promote them to reject routes
1088 if ((cfg->fc_flags & RTF_REJECT) ||
1089 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1090 /* hold loopback dev/idev if we haven't done so. */
1091 if (dev != &loopback_dev) {
1092 if (dev) {
1093 dev_put(dev);
1094 in6_dev_put(idev);
1096 dev = &loopback_dev;
1097 dev_hold(dev);
1098 idev = in6_dev_get(dev);
1099 if (!idev) {
1100 err = -ENODEV;
1101 goto out;
1104 rt->u.dst.output = ip6_pkt_discard_out;
1105 rt->u.dst.input = ip6_pkt_discard;
1106 rt->u.dst.error = -ENETUNREACH;
1107 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1108 goto install_route;
1111 if (cfg->fc_flags & RTF_GATEWAY) {
1112 struct in6_addr *gw_addr;
1113 int gwa_type;
1115 gw_addr = &cfg->fc_gateway;
1116 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1117 gwa_type = ipv6_addr_type(gw_addr);
1119 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1120 struct rt6_info *grt;
1122 /* IPv6 strictly inhibits using not link-local
1123 addresses as nexthop address.
1124 Otherwise, router will not able to send redirects.
1125 It is very good, but in some (rare!) circumstances
1126 (SIT, PtP, NBMA NOARP links) it is handy to allow
1127 some exceptions. --ANK
1129 err = -EINVAL;
1130 if (!(gwa_type&IPV6_ADDR_UNICAST))
1131 goto out;
1133 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1135 err = -EHOSTUNREACH;
1136 if (grt == NULL)
1137 goto out;
1138 if (dev) {
1139 if (dev != grt->rt6i_dev) {
1140 dst_release(&grt->u.dst);
1141 goto out;
1143 } else {
1144 dev = grt->rt6i_dev;
1145 idev = grt->rt6i_idev;
1146 dev_hold(dev);
1147 in6_dev_hold(grt->rt6i_idev);
1149 if (!(grt->rt6i_flags&RTF_GATEWAY))
1150 err = 0;
1151 dst_release(&grt->u.dst);
1153 if (err)
1154 goto out;
1156 err = -EINVAL;
1157 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1158 goto out;
1161 err = -ENODEV;
1162 if (dev == NULL)
1163 goto out;
1165 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1166 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1167 if (IS_ERR(rt->rt6i_nexthop)) {
1168 err = PTR_ERR(rt->rt6i_nexthop);
1169 rt->rt6i_nexthop = NULL;
1170 goto out;
1174 rt->rt6i_flags = cfg->fc_flags;
1176 install_route:
1177 if (cfg->fc_mx) {
1178 struct nlattr *nla;
1179 int remaining;
1181 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1182 int type = nla->nla_type;
1184 if (type) {
1185 if (type > RTAX_MAX) {
1186 err = -EINVAL;
1187 goto out;
1190 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1195 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1196 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1197 if (!rt->u.dst.metrics[RTAX_MTU-1])
1198 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1199 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1200 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1201 rt->u.dst.dev = dev;
1202 rt->rt6i_idev = idev;
1203 rt->rt6i_table = table;
1204 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1206 out:
1207 if (dev)
1208 dev_put(dev);
1209 if (idev)
1210 in6_dev_put(idev);
1211 if (rt)
1212 dst_free((struct dst_entry *) rt);
1213 return err;
1216 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1218 int err;
1219 struct fib6_table *table;
1221 if (rt == &ip6_null_entry)
1222 return -ENOENT;
1224 table = rt->rt6i_table;
1225 write_lock_bh(&table->tb6_lock);
1227 err = fib6_del(rt, info);
1228 dst_release(&rt->u.dst);
1230 write_unlock_bh(&table->tb6_lock);
1232 return err;
1235 int ip6_del_rt(struct rt6_info *rt)
1237 return __ip6_del_rt(rt, NULL);
1240 static int ip6_route_del(struct fib6_config *cfg)
1242 struct fib6_table *table;
1243 struct fib6_node *fn;
1244 struct rt6_info *rt;
1245 int err = -ESRCH;
1247 table = fib6_get_table(cfg->fc_table);
1248 if (table == NULL)
1249 return err;
1251 read_lock_bh(&table->tb6_lock);
1253 fn = fib6_locate(&table->tb6_root,
1254 &cfg->fc_dst, cfg->fc_dst_len,
1255 &cfg->fc_src, cfg->fc_src_len);
1257 if (fn) {
1258 for (rt = fn->leaf; rt; rt = rt->u.next) {
1259 if (cfg->fc_ifindex &&
1260 (rt->rt6i_dev == NULL ||
1261 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1262 continue;
1263 if (cfg->fc_flags & RTF_GATEWAY &&
1264 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1265 continue;
1266 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1267 continue;
1268 dst_hold(&rt->u.dst);
1269 read_unlock_bh(&table->tb6_lock);
1271 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1274 read_unlock_bh(&table->tb6_lock);
1276 return err;
1280 * Handle redirects
1282 struct ip6rd_flowi {
1283 struct flowi fl;
1284 struct in6_addr gateway;
1287 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1288 struct flowi *fl,
1289 int flags)
1291 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1292 struct rt6_info *rt;
1293 struct fib6_node *fn;
1296 * Get the "current" route for this destination and
1297 * check if the redirect has come from approriate router.
1299 * RFC 2461 specifies that redirects should only be
1300 * accepted if they come from the nexthop to the target.
1301 * Due to the way the routes are chosen, this notion
1302 * is a bit fuzzy and one might need to check all possible
1303 * routes.
1306 read_lock_bh(&table->tb6_lock);
1307 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1308 restart:
1309 for (rt = fn->leaf; rt; rt = rt->u.next) {
1311 * Current route is on-link; redirect is always invalid.
1313 * Seems, previous statement is not true. It could
1314 * be node, which looks for us as on-link (f.e. proxy ndisc)
1315 * But then router serving it might decide, that we should
1316 * know truth 8)8) --ANK (980726).
1318 if (rt6_check_expired(rt))
1319 continue;
1320 if (!(rt->rt6i_flags & RTF_GATEWAY))
1321 continue;
1322 if (fl->oif != rt->rt6i_dev->ifindex)
1323 continue;
1324 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1325 continue;
1326 break;
1329 if (!rt) {
1330 if (rt6_need_strict(&fl->fl6_dst)) {
1331 while ((fn = fn->parent) != NULL) {
1332 if (fn->fn_flags & RTN_ROOT)
1333 break;
1334 if (fn->fn_flags & RTN_RTINFO)
1335 goto restart;
1338 rt = &ip6_null_entry;
1340 dst_hold(&rt->u.dst);
1342 read_unlock_bh(&table->tb6_lock);
1344 return rt;
1347 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1348 struct in6_addr *src,
1349 struct in6_addr *gateway,
1350 struct net_device *dev)
1352 struct ip6rd_flowi rdfl = {
1353 .fl = {
1354 .oif = dev->ifindex,
1355 .nl_u = {
1356 .ip6_u = {
1357 .daddr = *dest,
1358 .saddr = *src,
1362 .gateway = *gateway,
1364 int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
1366 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1369 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1370 struct in6_addr *saddr,
1371 struct neighbour *neigh, u8 *lladdr, int on_link)
1373 struct rt6_info *rt, *nrt = NULL;
1374 struct netevent_redirect netevent;
1376 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1378 if (rt == &ip6_null_entry) {
1379 if (net_ratelimit())
1380 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1381 "for redirect target\n");
1382 goto out;
1386 * We have finally decided to accept it.
1389 neigh_update(neigh, lladdr, NUD_STALE,
1390 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1391 NEIGH_UPDATE_F_OVERRIDE|
1392 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1393 NEIGH_UPDATE_F_ISROUTER))
1397 * Redirect received -> path was valid.
1398 * Look, redirects are sent only in response to data packets,
1399 * so that this nexthop apparently is reachable. --ANK
1401 dst_confirm(&rt->u.dst);
1403 /* Duplicate redirect: silently ignore. */
1404 if (neigh == rt->u.dst.neighbour)
1405 goto out;
1407 nrt = ip6_rt_copy(rt);
1408 if (nrt == NULL)
1409 goto out;
1411 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1412 if (on_link)
1413 nrt->rt6i_flags &= ~RTF_GATEWAY;
1415 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1416 nrt->rt6i_dst.plen = 128;
1417 nrt->u.dst.flags |= DST_HOST;
1419 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1420 nrt->rt6i_nexthop = neigh_clone(neigh);
1421 /* Reset pmtu, it may be better */
1422 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1423 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1425 if (ip6_ins_rt(nrt))
1426 goto out;
1428 netevent.old = &rt->u.dst;
1429 netevent.new = &nrt->u.dst;
1430 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1432 if (rt->rt6i_flags&RTF_CACHE) {
1433 ip6_del_rt(rt);
1434 return;
1437 out:
1438 dst_release(&rt->u.dst);
1439 return;
1443 * Handle ICMP "packet too big" messages
1444 * i.e. Path MTU discovery
1447 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1448 struct net_device *dev, u32 pmtu)
1450 struct rt6_info *rt, *nrt;
1451 int allfrag = 0;
1453 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1454 if (rt == NULL)
1455 return;
1457 if (pmtu >= dst_mtu(&rt->u.dst))
1458 goto out;
1460 if (pmtu < IPV6_MIN_MTU) {
1462 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1463 * MTU (1280) and a fragment header should always be included
1464 * after a node receiving Too Big message reporting PMTU is
1465 * less than the IPv6 Minimum Link MTU.
1467 pmtu = IPV6_MIN_MTU;
1468 allfrag = 1;
1471 /* New mtu received -> path was valid.
1472 They are sent only in response to data packets,
1473 so that this nexthop apparently is reachable. --ANK
1475 dst_confirm(&rt->u.dst);
1477 /* Host route. If it is static, it would be better
1478 not to override it, but add new one, so that
1479 when cache entry will expire old pmtu
1480 would return automatically.
1482 if (rt->rt6i_flags & RTF_CACHE) {
1483 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1484 if (allfrag)
1485 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1486 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1487 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1488 goto out;
1491 /* Network route.
1492 Two cases are possible:
1493 1. It is connected route. Action: COW
1494 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1496 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1497 nrt = rt6_alloc_cow(rt, daddr, saddr);
1498 else
1499 nrt = rt6_alloc_clone(rt, daddr);
1501 if (nrt) {
1502 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1503 if (allfrag)
1504 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1506 /* According to RFC 1981, detecting PMTU increase shouldn't be
1507 * happened within 5 mins, the recommended timer is 10 mins.
1508 * Here this route expiration time is set to ip6_rt_mtu_expires
1509 * which is 10 mins. After 10 mins the decreased pmtu is expired
1510 * and detecting PMTU increase will be automatically happened.
1512 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1513 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1515 ip6_ins_rt(nrt);
1517 out:
1518 dst_release(&rt->u.dst);
1522 * Misc support functions
1525 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1527 struct rt6_info *rt = ip6_dst_alloc();
1529 if (rt) {
1530 rt->u.dst.input = ort->u.dst.input;
1531 rt->u.dst.output = ort->u.dst.output;
1533 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1534 rt->u.dst.dev = ort->u.dst.dev;
1535 if (rt->u.dst.dev)
1536 dev_hold(rt->u.dst.dev);
1537 rt->rt6i_idev = ort->rt6i_idev;
1538 if (rt->rt6i_idev)
1539 in6_dev_hold(rt->rt6i_idev);
1540 rt->u.dst.lastuse = jiffies;
1541 rt->rt6i_expires = 0;
1543 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1544 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1545 rt->rt6i_metric = 0;
1547 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1548 #ifdef CONFIG_IPV6_SUBTREES
1549 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1550 #endif
1551 rt->rt6i_table = ort->rt6i_table;
1553 return rt;
1556 #ifdef CONFIG_IPV6_ROUTE_INFO
1557 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1558 struct in6_addr *gwaddr, int ifindex)
1560 struct fib6_node *fn;
1561 struct rt6_info *rt = NULL;
1562 struct fib6_table *table;
1564 table = fib6_get_table(RT6_TABLE_INFO);
1565 if (table == NULL)
1566 return NULL;
1568 write_lock_bh(&table->tb6_lock);
1569 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1570 if (!fn)
1571 goto out;
1573 for (rt = fn->leaf; rt; rt = rt->u.next) {
1574 if (rt->rt6i_dev->ifindex != ifindex)
1575 continue;
1576 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1577 continue;
1578 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1579 continue;
1580 dst_hold(&rt->u.dst);
1581 break;
1583 out:
1584 write_unlock_bh(&table->tb6_lock);
1585 return rt;
1588 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1589 struct in6_addr *gwaddr, int ifindex,
1590 unsigned pref)
1592 struct fib6_config cfg = {
1593 .fc_table = RT6_TABLE_INFO,
1594 .fc_metric = 1024,
1595 .fc_ifindex = ifindex,
1596 .fc_dst_len = prefixlen,
1597 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1598 RTF_UP | RTF_PREF(pref),
1601 ipv6_addr_copy(&cfg.fc_dst, prefix);
1602 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1604 /* We should treat it as a default route if prefix length is 0. */
1605 if (!prefixlen)
1606 cfg.fc_flags |= RTF_DEFAULT;
1608 ip6_route_add(&cfg);
1610 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1612 #endif
1614 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1616 struct rt6_info *rt;
1617 struct fib6_table *table;
1619 table = fib6_get_table(RT6_TABLE_DFLT);
1620 if (table == NULL)
1621 return NULL;
1623 write_lock_bh(&table->tb6_lock);
1624 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1625 if (dev == rt->rt6i_dev &&
1626 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1627 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1628 break;
1630 if (rt)
1631 dst_hold(&rt->u.dst);
1632 write_unlock_bh(&table->tb6_lock);
1633 return rt;
1636 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1637 struct net_device *dev,
1638 unsigned int pref)
1640 struct fib6_config cfg = {
1641 .fc_table = RT6_TABLE_DFLT,
1642 .fc_metric = 1024,
1643 .fc_ifindex = dev->ifindex,
1644 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1645 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1648 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1650 ip6_route_add(&cfg);
1652 return rt6_get_dflt_router(gwaddr, dev);
1655 void rt6_purge_dflt_routers(void)
1657 struct rt6_info *rt;
1658 struct fib6_table *table;
1660 /* NOTE: Keep consistent with rt6_get_dflt_router */
1661 table = fib6_get_table(RT6_TABLE_DFLT);
1662 if (table == NULL)
1663 return;
1665 restart:
1666 read_lock_bh(&table->tb6_lock);
1667 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1668 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1669 dst_hold(&rt->u.dst);
1670 read_unlock_bh(&table->tb6_lock);
1671 ip6_del_rt(rt);
1672 goto restart;
1675 read_unlock_bh(&table->tb6_lock);
1678 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1679 struct fib6_config *cfg)
1681 memset(cfg, 0, sizeof(*cfg));
1683 cfg->fc_table = RT6_TABLE_MAIN;
1684 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1685 cfg->fc_metric = rtmsg->rtmsg_metric;
1686 cfg->fc_expires = rtmsg->rtmsg_info;
1687 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1688 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1689 cfg->fc_flags = rtmsg->rtmsg_flags;
1691 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1692 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1693 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1696 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1698 struct fib6_config cfg;
1699 struct in6_rtmsg rtmsg;
1700 int err;
1702 switch(cmd) {
1703 case SIOCADDRT: /* Add a route */
1704 case SIOCDELRT: /* Delete a route */
1705 if (!capable(CAP_NET_ADMIN))
1706 return -EPERM;
1707 err = copy_from_user(&rtmsg, arg,
1708 sizeof(struct in6_rtmsg));
1709 if (err)
1710 return -EFAULT;
1712 rtmsg_to_fib6_config(&rtmsg, &cfg);
1714 rtnl_lock();
1715 switch (cmd) {
1716 case SIOCADDRT:
1717 err = ip6_route_add(&cfg);
1718 break;
1719 case SIOCDELRT:
1720 err = ip6_route_del(&cfg);
1721 break;
1722 default:
1723 err = -EINVAL;
1725 rtnl_unlock();
1727 return err;
1730 return -EINVAL;
1734 * Drop the packet on the floor
1737 static int ip6_pkt_discard(struct sk_buff *skb)
1739 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1740 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1741 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1743 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1744 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1745 kfree_skb(skb);
1746 return 0;
1749 static int ip6_pkt_discard_out(struct sk_buff *skb)
1751 skb->dev = skb->dst->dev;
1752 return ip6_pkt_discard(skb);
1756 * Allocate a dst for local (unicast / anycast) address.
1759 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1760 const struct in6_addr *addr,
1761 int anycast)
1763 struct rt6_info *rt = ip6_dst_alloc();
1765 if (rt == NULL)
1766 return ERR_PTR(-ENOMEM);
1768 dev_hold(&loopback_dev);
1769 in6_dev_hold(idev);
1771 rt->u.dst.flags = DST_HOST;
1772 rt->u.dst.input = ip6_input;
1773 rt->u.dst.output = ip6_output;
1774 rt->rt6i_dev = &loopback_dev;
1775 rt->rt6i_idev = idev;
1776 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1777 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1778 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1779 rt->u.dst.obsolete = -1;
1781 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1782 if (anycast)
1783 rt->rt6i_flags |= RTF_ANYCAST;
1784 else
1785 rt->rt6i_flags |= RTF_LOCAL;
1786 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1787 if (rt->rt6i_nexthop == NULL) {
1788 dst_free((struct dst_entry *) rt);
1789 return ERR_PTR(-ENOMEM);
1792 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1793 rt->rt6i_dst.plen = 128;
1794 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1796 atomic_set(&rt->u.dst.__refcnt, 1);
1798 return rt;
1801 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1803 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1804 rt != &ip6_null_entry) {
1805 RT6_TRACE("deleted by ifdown %p\n", rt);
1806 return -1;
1808 return 0;
1811 void rt6_ifdown(struct net_device *dev)
1813 fib6_clean_all(fib6_ifdown, 0, dev);
1816 struct rt6_mtu_change_arg
1818 struct net_device *dev;
1819 unsigned mtu;
1822 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1824 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1825 struct inet6_dev *idev;
1827 /* In IPv6 pmtu discovery is not optional,
1828 so that RTAX_MTU lock cannot disable it.
1829 We still use this lock to block changes
1830 caused by addrconf/ndisc.
1833 idev = __in6_dev_get(arg->dev);
1834 if (idev == NULL)
1835 return 0;
1837 /* For administrative MTU increase, there is no way to discover
1838 IPv6 PMTU increase, so PMTU increase should be updated here.
1839 Since RFC 1981 doesn't include administrative MTU increase
1840 update PMTU increase is a MUST. (i.e. jumbo frame)
1843 If new MTU is less than route PMTU, this new MTU will be the
1844 lowest MTU in the path, update the route PMTU to reflect PMTU
1845 decreases; if new MTU is greater than route PMTU, and the
1846 old MTU is the lowest MTU in the path, update the route PMTU
1847 to reflect the increase. In this case if the other nodes' MTU
1848 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1849 PMTU discouvery.
1851 if (rt->rt6i_dev == arg->dev &&
1852 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1853 (dst_mtu(&rt->u.dst) > arg->mtu ||
1854 (dst_mtu(&rt->u.dst) < arg->mtu &&
1855 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1856 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1857 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1858 return 0;
1861 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1863 struct rt6_mtu_change_arg arg = {
1864 .dev = dev,
1865 .mtu = mtu,
1868 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1871 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1872 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1873 [RTA_OIF] = { .type = NLA_U32 },
1874 [RTA_IIF] = { .type = NLA_U32 },
1875 [RTA_PRIORITY] = { .type = NLA_U32 },
1876 [RTA_METRICS] = { .type = NLA_NESTED },
1879 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1880 struct fib6_config *cfg)
1882 struct rtmsg *rtm;
1883 struct nlattr *tb[RTA_MAX+1];
1884 int err;
1886 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1887 if (err < 0)
1888 goto errout;
1890 err = -EINVAL;
1891 rtm = nlmsg_data(nlh);
1892 memset(cfg, 0, sizeof(*cfg));
1894 cfg->fc_table = rtm->rtm_table;
1895 cfg->fc_dst_len = rtm->rtm_dst_len;
1896 cfg->fc_src_len = rtm->rtm_src_len;
1897 cfg->fc_flags = RTF_UP;
1898 cfg->fc_protocol = rtm->rtm_protocol;
1900 if (rtm->rtm_type == RTN_UNREACHABLE)
1901 cfg->fc_flags |= RTF_REJECT;
1903 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1904 cfg->fc_nlinfo.nlh = nlh;
1906 if (tb[RTA_GATEWAY]) {
1907 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1908 cfg->fc_flags |= RTF_GATEWAY;
1911 if (tb[RTA_DST]) {
1912 int plen = (rtm->rtm_dst_len + 7) >> 3;
1914 if (nla_len(tb[RTA_DST]) < plen)
1915 goto errout;
1917 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1920 if (tb[RTA_SRC]) {
1921 int plen = (rtm->rtm_src_len + 7) >> 3;
1923 if (nla_len(tb[RTA_SRC]) < plen)
1924 goto errout;
1926 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1929 if (tb[RTA_OIF])
1930 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1932 if (tb[RTA_PRIORITY])
1933 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1935 if (tb[RTA_METRICS]) {
1936 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1937 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1940 if (tb[RTA_TABLE])
1941 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1943 err = 0;
1944 errout:
1945 return err;
1948 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1950 struct fib6_config cfg;
1951 int err;
1953 err = rtm_to_fib6_config(skb, nlh, &cfg);
1954 if (err < 0)
1955 return err;
1957 return ip6_route_del(&cfg);
1960 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1962 struct fib6_config cfg;
1963 int err;
1965 err = rtm_to_fib6_config(skb, nlh, &cfg);
1966 if (err < 0)
1967 return err;
1969 return ip6_route_add(&cfg);
1972 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1973 struct in6_addr *dst, struct in6_addr *src,
1974 int iif, int type, u32 pid, u32 seq,
1975 int prefix, unsigned int flags)
1977 struct rtmsg *rtm;
1978 struct nlmsghdr *nlh;
1979 struct rta_cacheinfo ci;
1980 u32 table;
1982 if (prefix) { /* user wants prefix routes only */
1983 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1984 /* success since this is not a prefix route */
1985 return 1;
1989 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1990 if (nlh == NULL)
1991 return -ENOBUFS;
1993 rtm = nlmsg_data(nlh);
1994 rtm->rtm_family = AF_INET6;
1995 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1996 rtm->rtm_src_len = rt->rt6i_src.plen;
1997 rtm->rtm_tos = 0;
1998 if (rt->rt6i_table)
1999 table = rt->rt6i_table->tb6_id;
2000 else
2001 table = RT6_TABLE_UNSPEC;
2002 rtm->rtm_table = table;
2003 NLA_PUT_U32(skb, RTA_TABLE, table);
2004 if (rt->rt6i_flags&RTF_REJECT)
2005 rtm->rtm_type = RTN_UNREACHABLE;
2006 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2007 rtm->rtm_type = RTN_LOCAL;
2008 else
2009 rtm->rtm_type = RTN_UNICAST;
2010 rtm->rtm_flags = 0;
2011 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2012 rtm->rtm_protocol = rt->rt6i_protocol;
2013 if (rt->rt6i_flags&RTF_DYNAMIC)
2014 rtm->rtm_protocol = RTPROT_REDIRECT;
2015 else if (rt->rt6i_flags & RTF_ADDRCONF)
2016 rtm->rtm_protocol = RTPROT_KERNEL;
2017 else if (rt->rt6i_flags&RTF_DEFAULT)
2018 rtm->rtm_protocol = RTPROT_RA;
2020 if (rt->rt6i_flags&RTF_CACHE)
2021 rtm->rtm_flags |= RTM_F_CLONED;
2023 if (dst) {
2024 NLA_PUT(skb, RTA_DST, 16, dst);
2025 rtm->rtm_dst_len = 128;
2026 } else if (rtm->rtm_dst_len)
2027 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2028 #ifdef CONFIG_IPV6_SUBTREES
2029 if (src) {
2030 NLA_PUT(skb, RTA_SRC, 16, src);
2031 rtm->rtm_src_len = 128;
2032 } else if (rtm->rtm_src_len)
2033 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2034 #endif
2035 if (iif)
2036 NLA_PUT_U32(skb, RTA_IIF, iif);
2037 else if (dst) {
2038 struct in6_addr saddr_buf;
2039 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2040 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2043 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2044 goto nla_put_failure;
2046 if (rt->u.dst.neighbour)
2047 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2049 if (rt->u.dst.dev)
2050 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2052 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2053 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2054 if (rt->rt6i_expires)
2055 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2056 else
2057 ci.rta_expires = 0;
2058 ci.rta_used = rt->u.dst.__use;
2059 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2060 ci.rta_error = rt->u.dst.error;
2061 ci.rta_id = 0;
2062 ci.rta_ts = 0;
2063 ci.rta_tsage = 0;
2064 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2066 return nlmsg_end(skb, nlh);
2068 nla_put_failure:
2069 return nlmsg_cancel(skb, nlh);
2072 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2074 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2075 int prefix;
2077 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2078 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2079 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2080 } else
2081 prefix = 0;
2083 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2084 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2085 prefix, NLM_F_MULTI);
2088 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2090 struct nlattr *tb[RTA_MAX+1];
2091 struct rt6_info *rt;
2092 struct sk_buff *skb;
2093 struct rtmsg *rtm;
2094 struct flowi fl;
2095 int err, iif = 0;
2097 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2098 if (err < 0)
2099 goto errout;
2101 err = -EINVAL;
2102 memset(&fl, 0, sizeof(fl));
2104 if (tb[RTA_SRC]) {
2105 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2106 goto errout;
2108 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2111 if (tb[RTA_DST]) {
2112 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2113 goto errout;
2115 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2118 if (tb[RTA_IIF])
2119 iif = nla_get_u32(tb[RTA_IIF]);
2121 if (tb[RTA_OIF])
2122 fl.oif = nla_get_u32(tb[RTA_OIF]);
2124 if (iif) {
2125 struct net_device *dev;
2126 dev = __dev_get_by_index(iif);
2127 if (!dev) {
2128 err = -ENODEV;
2129 goto errout;
2133 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2134 if (skb == NULL) {
2135 err = -ENOBUFS;
2136 goto errout;
2139 /* Reserve room for dummy headers, this skb can pass
2140 through good chunk of routing engine.
2142 skb->mac.raw = skb->data;
2143 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2145 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2146 skb->dst = &rt->u.dst;
2148 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2149 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2150 nlh->nlmsg_seq, 0, 0);
2151 if (err < 0) {
2152 kfree_skb(skb);
2153 goto errout;
2156 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2157 errout:
2158 return err;
2161 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2163 struct sk_buff *skb;
2164 u32 pid = 0, seq = 0;
2165 struct nlmsghdr *nlh = NULL;
2166 int payload = sizeof(struct rtmsg) + 256;
2167 int err = -ENOBUFS;
2169 if (info) {
2170 pid = info->pid;
2171 nlh = info->nlh;
2172 if (nlh)
2173 seq = nlh->nlmsg_seq;
2176 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2177 if (skb == NULL)
2178 goto errout;
2180 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2181 if (err < 0) {
2182 kfree_skb(skb);
2183 goto errout;
2186 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2187 errout:
2188 if (err < 0)
2189 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2193 * /proc
2196 #ifdef CONFIG_PROC_FS
2198 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2200 struct rt6_proc_arg
2202 char *buffer;
2203 int offset;
2204 int length;
2205 int skip;
2206 int len;
2209 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2211 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2212 int i;
2214 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2215 arg->skip++;
2216 return 0;
2219 if (arg->len >= arg->length)
2220 return 0;
2222 for (i=0; i<16; i++) {
2223 sprintf(arg->buffer + arg->len, "%02x",
2224 rt->rt6i_dst.addr.s6_addr[i]);
2225 arg->len += 2;
2227 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2228 rt->rt6i_dst.plen);
2230 #ifdef CONFIG_IPV6_SUBTREES
2231 for (i=0; i<16; i++) {
2232 sprintf(arg->buffer + arg->len, "%02x",
2233 rt->rt6i_src.addr.s6_addr[i]);
2234 arg->len += 2;
2236 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2237 rt->rt6i_src.plen);
2238 #else
2239 sprintf(arg->buffer + arg->len,
2240 "00000000000000000000000000000000 00 ");
2241 arg->len += 36;
2242 #endif
2244 if (rt->rt6i_nexthop) {
2245 for (i=0; i<16; i++) {
2246 sprintf(arg->buffer + arg->len, "%02x",
2247 rt->rt6i_nexthop->primary_key[i]);
2248 arg->len += 2;
2250 } else {
2251 sprintf(arg->buffer + arg->len,
2252 "00000000000000000000000000000000");
2253 arg->len += 32;
2255 arg->len += sprintf(arg->buffer + arg->len,
2256 " %08x %08x %08x %08x %8s\n",
2257 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2258 rt->u.dst.__use, rt->rt6i_flags,
2259 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2260 return 0;
2263 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2265 struct rt6_proc_arg arg = {
2266 .buffer = buffer,
2267 .offset = offset,
2268 .length = length,
2271 fib6_clean_all(rt6_info_route, 0, &arg);
2273 *start = buffer;
2274 if (offset)
2275 *start += offset % RT6_INFO_LEN;
2277 arg.len -= offset % RT6_INFO_LEN;
2279 if (arg.len > length)
2280 arg.len = length;
2281 if (arg.len < 0)
2282 arg.len = 0;
2284 return arg.len;
2287 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2289 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2290 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2291 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2292 rt6_stats.fib_rt_cache,
2293 atomic_read(&ip6_dst_ops.entries),
2294 rt6_stats.fib_discarded_routes);
2296 return 0;
2299 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2301 return single_open(file, rt6_stats_seq_show, NULL);
2304 static struct file_operations rt6_stats_seq_fops = {
2305 .owner = THIS_MODULE,
2306 .open = rt6_stats_seq_open,
2307 .read = seq_read,
2308 .llseek = seq_lseek,
2309 .release = single_release,
2311 #endif /* CONFIG_PROC_FS */
2313 #ifdef CONFIG_SYSCTL
2315 static int flush_delay;
2317 static
2318 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2319 void __user *buffer, size_t *lenp, loff_t *ppos)
2321 if (write) {
2322 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2323 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2324 return 0;
2325 } else
2326 return -EINVAL;
2329 ctl_table ipv6_route_table[] = {
2331 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2332 .procname = "flush",
2333 .data = &flush_delay,
2334 .maxlen = sizeof(int),
2335 .mode = 0200,
2336 .proc_handler = &ipv6_sysctl_rtcache_flush
2339 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2340 .procname = "gc_thresh",
2341 .data = &ip6_dst_ops.gc_thresh,
2342 .maxlen = sizeof(int),
2343 .mode = 0644,
2344 .proc_handler = &proc_dointvec,
2347 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2348 .procname = "max_size",
2349 .data = &ip6_rt_max_size,
2350 .maxlen = sizeof(int),
2351 .mode = 0644,
2352 .proc_handler = &proc_dointvec,
2355 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2356 .procname = "gc_min_interval",
2357 .data = &ip6_rt_gc_min_interval,
2358 .maxlen = sizeof(int),
2359 .mode = 0644,
2360 .proc_handler = &proc_dointvec_jiffies,
2361 .strategy = &sysctl_jiffies,
2364 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2365 .procname = "gc_timeout",
2366 .data = &ip6_rt_gc_timeout,
2367 .maxlen = sizeof(int),
2368 .mode = 0644,
2369 .proc_handler = &proc_dointvec_jiffies,
2370 .strategy = &sysctl_jiffies,
2373 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2374 .procname = "gc_interval",
2375 .data = &ip6_rt_gc_interval,
2376 .maxlen = sizeof(int),
2377 .mode = 0644,
2378 .proc_handler = &proc_dointvec_jiffies,
2379 .strategy = &sysctl_jiffies,
2382 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2383 .procname = "gc_elasticity",
2384 .data = &ip6_rt_gc_elasticity,
2385 .maxlen = sizeof(int),
2386 .mode = 0644,
2387 .proc_handler = &proc_dointvec_jiffies,
2388 .strategy = &sysctl_jiffies,
2391 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2392 .procname = "mtu_expires",
2393 .data = &ip6_rt_mtu_expires,
2394 .maxlen = sizeof(int),
2395 .mode = 0644,
2396 .proc_handler = &proc_dointvec_jiffies,
2397 .strategy = &sysctl_jiffies,
2400 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2401 .procname = "min_adv_mss",
2402 .data = &ip6_rt_min_advmss,
2403 .maxlen = sizeof(int),
2404 .mode = 0644,
2405 .proc_handler = &proc_dointvec_jiffies,
2406 .strategy = &sysctl_jiffies,
2409 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2410 .procname = "gc_min_interval_ms",
2411 .data = &ip6_rt_gc_min_interval,
2412 .maxlen = sizeof(int),
2413 .mode = 0644,
2414 .proc_handler = &proc_dointvec_ms_jiffies,
2415 .strategy = &sysctl_ms_jiffies,
2417 { .ctl_name = 0 }
2420 #endif
2422 void __init ip6_route_init(void)
2424 struct proc_dir_entry *p;
2426 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2427 sizeof(struct rt6_info),
2428 0, SLAB_HWCACHE_ALIGN,
2429 NULL, NULL);
2430 if (!ip6_dst_ops.kmem_cachep)
2431 panic("cannot create ip6_dst_cache");
2433 fib6_init();
2434 #ifdef CONFIG_PROC_FS
2435 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2436 if (p)
2437 p->owner = THIS_MODULE;
2439 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2440 #endif
2441 #ifdef CONFIG_XFRM
2442 xfrm6_init();
2443 #endif
2444 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2445 fib6_rules_init();
2446 #endif
2449 void ip6_route_cleanup(void)
2451 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2452 fib6_rules_cleanup();
2453 #endif
2454 #ifdef CONFIG_PROC_FS
2455 proc_net_remove("ipv6_route");
2456 proc_net_remove("rt6_stats");
2457 #endif
2458 #ifdef CONFIG_XFRM
2459 xfrm6_fini();
2460 #endif
2461 rt6_ifdown(NULL);
2462 fib6_gc_cleanup();
2463 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);