[IPV6]: Multiple Routing Tables
[firewire-audio.git] / net / ipv6 / route.c
blob73efdadb9ab89ab3ef98c63137237adaa359c447
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 /* Changes:
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
41 #ifdef CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107 #endif
109 static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
123 struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
143 /* allocate dst with ip6_dst_ops */
144 static __inline__ struct rt6_info *ip6_dst_alloc(void)
146 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 static void ip6_dst_destroy(struct dst_entry *dst)
151 struct rt6_info *rt = (struct rt6_info *)dst;
152 struct inet6_dev *idev = rt->rt6i_idev;
154 if (idev != NULL) {
155 rt->rt6i_idev = NULL;
156 in6_dev_put(idev);
160 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
161 int how)
163 struct rt6_info *rt = (struct rt6_info *)dst;
164 struct inet6_dev *idev = rt->rt6i_idev;
166 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
167 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
168 if (loopback_idev != NULL) {
169 rt->rt6i_idev = loopback_idev;
170 in6_dev_put(idev);
175 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
177 return (rt->rt6i_flags & RTF_EXPIRES &&
178 time_after(jiffies, rt->rt6i_expires));
181 static inline int rt6_need_strict(struct in6_addr *daddr)
183 return (ipv6_addr_type(daddr) &
184 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
188 * Route lookup. Any table->tb6_lock is implied.
191 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
192 int oif,
193 int strict)
195 struct rt6_info *local = NULL;
196 struct rt6_info *sprt;
198 if (oif) {
199 for (sprt = rt; sprt; sprt = sprt->u.next) {
200 struct net_device *dev = sprt->rt6i_dev;
201 if (dev->ifindex == oif)
202 return sprt;
203 if (dev->flags & IFF_LOOPBACK) {
204 if (sprt->rt6i_idev == NULL ||
205 sprt->rt6i_idev->dev->ifindex != oif) {
206 if (strict && oif)
207 continue;
208 if (local && (!oif ||
209 local->rt6i_idev->dev->ifindex == oif))
210 continue;
212 local = sprt;
216 if (local)
217 return local;
219 if (strict)
220 return &ip6_null_entry;
222 return rt;
225 #ifdef CONFIG_IPV6_ROUTER_PREF
226 static void rt6_probe(struct rt6_info *rt)
228 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
230 * Okay, this does not seem to be appropriate
231 * for now, however, we need to check if it
232 * is really so; aka Router Reachability Probing.
234 * Router Reachability Probe MUST be rate-limited
235 * to no more than one per minute.
237 if (!neigh || (neigh->nud_state & NUD_VALID))
238 return;
239 read_lock_bh(&neigh->lock);
240 if (!(neigh->nud_state & NUD_VALID) &&
241 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
242 struct in6_addr mcaddr;
243 struct in6_addr *target;
245 neigh->updated = jiffies;
246 read_unlock_bh(&neigh->lock);
248 target = (struct in6_addr *)&neigh->primary_key;
249 addrconf_addr_solict_mult(target, &mcaddr);
250 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
251 } else
252 read_unlock_bh(&neigh->lock);
254 #else
255 static inline void rt6_probe(struct rt6_info *rt)
257 return;
259 #endif
262 * Default Router Selection (RFC 2461 6.3.6)
264 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
266 struct net_device *dev = rt->rt6i_dev;
267 if (!oif || dev->ifindex == oif)
268 return 2;
269 if ((dev->flags & IFF_LOOPBACK) &&
270 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
271 return 1;
272 return 0;
275 static int inline rt6_check_neigh(struct rt6_info *rt)
277 struct neighbour *neigh = rt->rt6i_nexthop;
278 int m = 0;
279 if (rt->rt6i_flags & RTF_NONEXTHOP ||
280 !(rt->rt6i_flags & RTF_GATEWAY))
281 m = 1;
282 else if (neigh) {
283 read_lock_bh(&neigh->lock);
284 if (neigh->nud_state & NUD_VALID)
285 m = 2;
286 read_unlock_bh(&neigh->lock);
288 return m;
291 static int rt6_score_route(struct rt6_info *rt, int oif,
292 int strict)
294 int m, n;
296 m = rt6_check_dev(rt, oif);
297 if (!m && (strict & RT6_SELECT_F_IFACE))
298 return -1;
299 #ifdef CONFIG_IPV6_ROUTER_PREF
300 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
301 #endif
302 n = rt6_check_neigh(rt);
303 if (n > 1)
304 m |= 16;
305 else if (!n && strict & RT6_SELECT_F_REACHABLE)
306 return -1;
307 return m;
310 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
311 int strict)
313 struct rt6_info *match = NULL, *last = NULL;
314 struct rt6_info *rt, *rt0 = *head;
315 u32 metric;
316 int mpri = -1;
318 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
319 __FUNCTION__, head, head ? *head : NULL, oif);
321 for (rt = rt0, metric = rt0->rt6i_metric;
322 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
323 rt = rt->u.next) {
324 int m;
326 if (rt6_check_expired(rt))
327 continue;
329 last = rt;
331 m = rt6_score_route(rt, oif, strict);
332 if (m < 0)
333 continue;
335 if (m > mpri) {
336 rt6_probe(match);
337 match = rt;
338 mpri = m;
339 } else {
340 rt6_probe(rt);
344 if (!match &&
345 (strict & RT6_SELECT_F_REACHABLE) &&
346 last && last != rt0) {
347 /* no entries matched; do round-robin */
348 static DEFINE_SPINLOCK(lock);
349 spin_lock(&lock);
350 *head = rt0->u.next;
351 rt0->u.next = last->u.next;
352 last->u.next = rt0;
353 spin_unlock(&lock);
356 RT6_TRACE("%s() => %p, score=%d\n",
357 __FUNCTION__, match, mpri);
359 return (match ? match : &ip6_null_entry);
362 #ifdef CONFIG_IPV6_ROUTE_INFO
363 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
364 struct in6_addr *gwaddr)
366 struct route_info *rinfo = (struct route_info *) opt;
367 struct in6_addr prefix_buf, *prefix;
368 unsigned int pref;
369 u32 lifetime;
370 struct rt6_info *rt;
372 if (len < sizeof(struct route_info)) {
373 return -EINVAL;
376 /* Sanity check for prefix_len and length */
377 if (rinfo->length > 3) {
378 return -EINVAL;
379 } else if (rinfo->prefix_len > 128) {
380 return -EINVAL;
381 } else if (rinfo->prefix_len > 64) {
382 if (rinfo->length < 2) {
383 return -EINVAL;
385 } else if (rinfo->prefix_len > 0) {
386 if (rinfo->length < 1) {
387 return -EINVAL;
391 pref = rinfo->route_pref;
392 if (pref == ICMPV6_ROUTER_PREF_INVALID)
393 pref = ICMPV6_ROUTER_PREF_MEDIUM;
395 lifetime = htonl(rinfo->lifetime);
396 if (lifetime == 0xffffffff) {
397 /* infinity */
398 } else if (lifetime > 0x7fffffff/HZ) {
399 /* Avoid arithmetic overflow */
400 lifetime = 0x7fffffff/HZ - 1;
403 if (rinfo->length == 3)
404 prefix = (struct in6_addr *)rinfo->prefix;
405 else {
406 /* this function is safe */
407 ipv6_addr_prefix(&prefix_buf,
408 (struct in6_addr *)rinfo->prefix,
409 rinfo->prefix_len);
410 prefix = &prefix_buf;
413 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
415 if (rt && !lifetime) {
416 ip6_del_rt(rt, NULL, NULL, NULL);
417 rt = NULL;
420 if (!rt && lifetime)
421 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
422 pref);
423 else if (rt)
424 rt->rt6i_flags = RTF_ROUTEINFO |
425 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
427 if (rt) {
428 if (lifetime == 0xffffffff) {
429 rt->rt6i_flags &= ~RTF_EXPIRES;
430 } else {
431 rt->rt6i_expires = jiffies + HZ * lifetime;
432 rt->rt6i_flags |= RTF_EXPIRES;
434 dst_release(&rt->u.dst);
436 return 0;
438 #endif
440 #define BACKTRACK() \
441 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
442 while ((fn = fn->parent) != NULL) { \
443 if (fn->fn_flags & RTN_TL_ROOT) { \
444 dst_hold(&rt->u.dst); \
445 goto out; \
447 if (fn->fn_flags & RTN_RTINFO) \
448 goto restart; \
452 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
453 struct flowi *fl, int flags)
455 struct fib6_node *fn;
456 struct rt6_info *rt;
458 read_lock_bh(&table->tb6_lock);
459 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
460 restart:
461 rt = fn->leaf;
462 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
463 BACKTRACK();
464 dst_hold(&rt->u.dst);
465 out:
466 read_unlock_bh(&table->tb6_lock);
468 rt->u.dst.lastuse = jiffies;
469 rt->u.dst.__use++;
471 return rt;
475 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
476 int oif, int strict)
478 struct flowi fl = {
479 .oif = oif,
480 .nl_u = {
481 .ip6_u = {
482 .daddr = *daddr,
483 /* TODO: saddr */
487 struct dst_entry *dst;
488 int flags = strict ? RT6_F_STRICT : 0;
490 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
491 if (dst->error == 0)
492 return (struct rt6_info *) dst;
494 dst_release(dst);
496 return NULL;
499 /* ip6_ins_rt is called with FREE table->tb6_lock.
500 It takes new route entry, the addition fails by any reason the
501 route is freed. In any case, if caller does not hold it, it may
502 be destroyed.
505 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
506 void *_rtattr, struct netlink_skb_parms *req)
508 int err;
509 struct fib6_table *table;
511 table = rt->rt6i_table;
512 write_lock_bh(&table->tb6_lock);
513 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
514 write_unlock_bh(&table->tb6_lock);
516 return err;
519 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
520 struct in6_addr *saddr)
522 struct rt6_info *rt;
525 * Clone the route.
528 rt = ip6_rt_copy(ort);
530 if (rt) {
531 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
532 if (rt->rt6i_dst.plen != 128 &&
533 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
534 rt->rt6i_flags |= RTF_ANYCAST;
535 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
538 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
539 rt->rt6i_dst.plen = 128;
540 rt->rt6i_flags |= RTF_CACHE;
541 rt->u.dst.flags |= DST_HOST;
543 #ifdef CONFIG_IPV6_SUBTREES
544 if (rt->rt6i_src.plen && saddr) {
545 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
546 rt->rt6i_src.plen = 128;
548 #endif
550 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
554 return rt;
557 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
559 struct rt6_info *rt = ip6_rt_copy(ort);
560 if (rt) {
561 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
562 rt->rt6i_dst.plen = 128;
563 rt->rt6i_flags |= RTF_CACHE;
564 if (rt->rt6i_flags & RTF_REJECT)
565 rt->u.dst.error = ort->u.dst.error;
566 rt->u.dst.flags |= DST_HOST;
567 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
569 return rt;
572 struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
573 int flags)
575 struct fib6_node *fn;
576 struct rt6_info *rt, *nrt;
577 int strict = 0;
578 int attempts = 3;
579 int err;
580 int reachable = RT6_SELECT_F_REACHABLE;
582 if (flags & RT6_F_STRICT)
583 strict = RT6_SELECT_F_IFACE;
585 relookup:
586 read_lock_bh(&table->tb6_lock);
588 restart_2:
589 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
591 restart:
592 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
593 BACKTRACK();
594 if (rt == &ip6_null_entry ||
595 rt->rt6i_flags & RTF_CACHE)
596 goto out;
598 dst_hold(&rt->u.dst);
599 read_unlock_bh(&table->tb6_lock);
601 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
602 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
603 else {
604 #if CLONE_OFFLINK_ROUTE
605 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
606 #else
607 goto out2;
608 #endif
611 dst_release(&rt->u.dst);
612 rt = nrt ? : &ip6_null_entry;
614 dst_hold(&rt->u.dst);
615 if (nrt) {
616 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
617 if (!err)
618 goto out2;
621 if (--attempts <= 0)
622 goto out2;
625 * Race condition! In the gap, when table->tb6_lock was
626 * released someone could insert this route. Relookup.
628 dst_release(&rt->u.dst);
629 goto relookup;
631 out:
632 if (reachable) {
633 reachable = 0;
634 goto restart_2;
636 dst_hold(&rt->u.dst);
637 read_unlock_bh(&table->tb6_lock);
638 out2:
639 rt->u.dst.lastuse = jiffies;
640 rt->u.dst.__use++;
642 return rt;
645 void ip6_route_input(struct sk_buff *skb)
647 struct ipv6hdr *iph = skb->nh.ipv6h;
648 struct flowi fl = {
649 .iif = skb->dev->ifindex,
650 .nl_u = {
651 .ip6_u = {
652 .daddr = iph->daddr,
653 .saddr = iph->saddr,
654 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
657 .proto = iph->nexthdr,
659 int flags = 0;
661 if (rt6_need_strict(&iph->daddr))
662 flags |= RT6_F_STRICT;
664 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
667 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
668 struct flowi *fl, int flags)
670 struct fib6_node *fn;
671 struct rt6_info *rt, *nrt;
672 int strict = 0;
673 int attempts = 3;
674 int err;
675 int reachable = RT6_SELECT_F_REACHABLE;
677 if (flags & RT6_F_STRICT)
678 strict = RT6_SELECT_F_IFACE;
680 relookup:
681 read_lock_bh(&table->tb6_lock);
683 restart_2:
684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
686 restart:
687 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
688 BACKTRACK();
689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
691 goto out;
693 dst_hold(&rt->u.dst);
694 read_unlock_bh(&table->tb6_lock);
696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
698 else {
699 #if CLONE_OFFLINK_ROUTE
700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701 #else
702 goto out2;
703 #endif
706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
709 dst_hold(&rt->u.dst);
710 if (nrt) {
711 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
712 if (!err)
713 goto out2;
716 if (--attempts <= 0)
717 goto out2;
720 * Race condition! In the gap, when table->tb6_lock was
721 * released someone could insert this route. Relookup.
723 dst_release(&rt->u.dst);
724 goto relookup;
726 out:
727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
731 dst_hold(&rt->u.dst);
732 read_unlock_bh(&table->tb6_lock);
733 out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
736 return rt;
739 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
741 int flags = 0;
743 if (rt6_need_strict(&fl->fl6_dst))
744 flags |= RT6_F_STRICT;
746 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
751 * Destination cache support functions
754 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
756 struct rt6_info *rt;
758 rt = (struct rt6_info *) dst;
760 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
761 return dst;
763 return NULL;
766 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
768 struct rt6_info *rt = (struct rt6_info *) dst;
770 if (rt) {
771 if (rt->rt6i_flags & RTF_CACHE)
772 ip6_del_rt(rt, NULL, NULL, NULL);
773 else
774 dst_release(dst);
776 return NULL;
779 static void ip6_link_failure(struct sk_buff *skb)
781 struct rt6_info *rt;
783 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
785 rt = (struct rt6_info *) skb->dst;
786 if (rt) {
787 if (rt->rt6i_flags&RTF_CACHE) {
788 dst_set_expires(&rt->u.dst, 0);
789 rt->rt6i_flags |= RTF_EXPIRES;
790 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
791 rt->rt6i_node->fn_sernum = -1;
795 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
797 struct rt6_info *rt6 = (struct rt6_info*)dst;
799 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
800 rt6->rt6i_flags |= RTF_MODIFIED;
801 if (mtu < IPV6_MIN_MTU) {
802 mtu = IPV6_MIN_MTU;
803 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
805 dst->metrics[RTAX_MTU-1] = mtu;
806 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
810 static int ipv6_get_mtu(struct net_device *dev);
812 static inline unsigned int ipv6_advmss(unsigned int mtu)
814 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
816 if (mtu < ip6_rt_min_advmss)
817 mtu = ip6_rt_min_advmss;
820 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
821 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
822 * IPV6_MAXPLEN is also valid and means: "any MSS,
823 * rely only on pmtu discovery"
825 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
826 mtu = IPV6_MAXPLEN;
827 return mtu;
830 static struct dst_entry *ndisc_dst_gc_list;
831 DEFINE_SPINLOCK(ndisc_lock);
833 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
834 struct neighbour *neigh,
835 struct in6_addr *addr,
836 int (*output)(struct sk_buff *))
838 struct rt6_info *rt;
839 struct inet6_dev *idev = in6_dev_get(dev);
841 if (unlikely(idev == NULL))
842 return NULL;
844 rt = ip6_dst_alloc();
845 if (unlikely(rt == NULL)) {
846 in6_dev_put(idev);
847 goto out;
850 dev_hold(dev);
851 if (neigh)
852 neigh_hold(neigh);
853 else
854 neigh = ndisc_get_neigh(dev, addr);
856 rt->rt6i_dev = dev;
857 rt->rt6i_idev = idev;
858 rt->rt6i_nexthop = neigh;
859 atomic_set(&rt->u.dst.__refcnt, 1);
860 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
861 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
862 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
863 rt->u.dst.output = output;
865 #if 0 /* there's no chance to use these for ndisc */
866 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
867 ? DST_HOST
868 : 0;
869 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
870 rt->rt6i_dst.plen = 128;
871 #endif
873 spin_lock_bh(&ndisc_lock);
874 rt->u.dst.next = ndisc_dst_gc_list;
875 ndisc_dst_gc_list = &rt->u.dst;
876 spin_unlock_bh(&ndisc_lock);
878 fib6_force_start_gc();
880 out:
881 return (struct dst_entry *)rt;
884 int ndisc_dst_gc(int *more)
886 struct dst_entry *dst, *next, **pprev;
887 int freed;
889 next = NULL;
890 freed = 0;
892 spin_lock_bh(&ndisc_lock);
893 pprev = &ndisc_dst_gc_list;
895 while ((dst = *pprev) != NULL) {
896 if (!atomic_read(&dst->__refcnt)) {
897 *pprev = dst->next;
898 dst_free(dst);
899 freed++;
900 } else {
901 pprev = &dst->next;
902 (*more)++;
906 spin_unlock_bh(&ndisc_lock);
908 return freed;
911 static int ip6_dst_gc(void)
913 static unsigned expire = 30*HZ;
914 static unsigned long last_gc;
915 unsigned long now = jiffies;
917 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
918 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
919 goto out;
921 expire++;
922 fib6_run_gc(expire);
923 last_gc = now;
924 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
925 expire = ip6_rt_gc_timeout>>1;
927 out:
928 expire -= expire>>ip6_rt_gc_elasticity;
929 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
932 /* Clean host part of a prefix. Not necessary in radix tree,
933 but results in cleaner routing tables.
935 Remove it only when all the things will work!
938 static int ipv6_get_mtu(struct net_device *dev)
940 int mtu = IPV6_MIN_MTU;
941 struct inet6_dev *idev;
943 idev = in6_dev_get(dev);
944 if (idev) {
945 mtu = idev->cnf.mtu6;
946 in6_dev_put(idev);
948 return mtu;
951 int ipv6_get_hoplimit(struct net_device *dev)
953 int hoplimit = ipv6_devconf.hop_limit;
954 struct inet6_dev *idev;
956 idev = in6_dev_get(dev);
957 if (idev) {
958 hoplimit = idev->cnf.hop_limit;
959 in6_dev_put(idev);
961 return hoplimit;
968 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
969 void *_rtattr, struct netlink_skb_parms *req,
970 u32 table_id)
972 int err;
973 struct rtmsg *r;
974 struct rtattr **rta;
975 struct rt6_info *rt = NULL;
976 struct net_device *dev = NULL;
977 struct inet6_dev *idev = NULL;
978 struct fib6_table *table;
979 int addr_type;
981 rta = (struct rtattr **) _rtattr;
983 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
984 return -EINVAL;
985 #ifndef CONFIG_IPV6_SUBTREES
986 if (rtmsg->rtmsg_src_len)
987 return -EINVAL;
988 #endif
989 if (rtmsg->rtmsg_ifindex) {
990 err = -ENODEV;
991 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
992 if (!dev)
993 goto out;
994 idev = in6_dev_get(dev);
995 if (!idev)
996 goto out;
999 if (rtmsg->rtmsg_metric == 0)
1000 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1002 table = fib6_new_table(table_id);
1003 if (table == NULL) {
1004 err = -ENOBUFS;
1005 goto out;
1008 rt = ip6_dst_alloc();
1010 if (rt == NULL) {
1011 err = -ENOMEM;
1012 goto out;
1015 rt->u.dst.obsolete = -1;
1016 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1017 if (nlh && (r = NLMSG_DATA(nlh))) {
1018 rt->rt6i_protocol = r->rtm_protocol;
1019 } else {
1020 rt->rt6i_protocol = RTPROT_BOOT;
1023 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1025 if (addr_type & IPV6_ADDR_MULTICAST)
1026 rt->u.dst.input = ip6_mc_input;
1027 else
1028 rt->u.dst.input = ip6_forward;
1030 rt->u.dst.output = ip6_output;
1032 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1033 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1034 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1035 if (rt->rt6i_dst.plen == 128)
1036 rt->u.dst.flags = DST_HOST;
1038 #ifdef CONFIG_IPV6_SUBTREES
1039 ipv6_addr_prefix(&rt->rt6i_src.addr,
1040 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1041 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1042 #endif
1044 rt->rt6i_metric = rtmsg->rtmsg_metric;
1046 /* We cannot add true routes via loopback here,
1047 they would result in kernel looping; promote them to reject routes
1049 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1050 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1051 /* hold loopback dev/idev if we haven't done so. */
1052 if (dev != &loopback_dev) {
1053 if (dev) {
1054 dev_put(dev);
1055 in6_dev_put(idev);
1057 dev = &loopback_dev;
1058 dev_hold(dev);
1059 idev = in6_dev_get(dev);
1060 if (!idev) {
1061 err = -ENODEV;
1062 goto out;
1065 rt->u.dst.output = ip6_pkt_discard_out;
1066 rt->u.dst.input = ip6_pkt_discard;
1067 rt->u.dst.error = -ENETUNREACH;
1068 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1069 goto install_route;
1072 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1073 struct in6_addr *gw_addr;
1074 int gwa_type;
1076 gw_addr = &rtmsg->rtmsg_gateway;
1077 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1078 gwa_type = ipv6_addr_type(gw_addr);
1080 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1081 struct rt6_info *grt;
1083 /* IPv6 strictly inhibits using not link-local
1084 addresses as nexthop address.
1085 Otherwise, router will not able to send redirects.
1086 It is very good, but in some (rare!) circumstances
1087 (SIT, PtP, NBMA NOARP links) it is handy to allow
1088 some exceptions. --ANK
1090 err = -EINVAL;
1091 if (!(gwa_type&IPV6_ADDR_UNICAST))
1092 goto out;
1094 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1096 err = -EHOSTUNREACH;
1097 if (grt == NULL)
1098 goto out;
1099 if (dev) {
1100 if (dev != grt->rt6i_dev) {
1101 dst_release(&grt->u.dst);
1102 goto out;
1104 } else {
1105 dev = grt->rt6i_dev;
1106 idev = grt->rt6i_idev;
1107 dev_hold(dev);
1108 in6_dev_hold(grt->rt6i_idev);
1110 if (!(grt->rt6i_flags&RTF_GATEWAY))
1111 err = 0;
1112 dst_release(&grt->u.dst);
1114 if (err)
1115 goto out;
1117 err = -EINVAL;
1118 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1119 goto out;
1122 err = -ENODEV;
1123 if (dev == NULL)
1124 goto out;
1126 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1127 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1128 if (IS_ERR(rt->rt6i_nexthop)) {
1129 err = PTR_ERR(rt->rt6i_nexthop);
1130 rt->rt6i_nexthop = NULL;
1131 goto out;
1135 rt->rt6i_flags = rtmsg->rtmsg_flags;
1137 install_route:
1138 if (rta && rta[RTA_METRICS-1]) {
1139 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1140 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1142 while (RTA_OK(attr, attrlen)) {
1143 unsigned flavor = attr->rta_type;
1144 if (flavor) {
1145 if (flavor > RTAX_MAX) {
1146 err = -EINVAL;
1147 goto out;
1149 rt->u.dst.metrics[flavor-1] =
1150 *(u32 *)RTA_DATA(attr);
1152 attr = RTA_NEXT(attr, attrlen);
1156 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1157 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1158 if (!rt->u.dst.metrics[RTAX_MTU-1])
1159 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1160 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1161 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1162 rt->u.dst.dev = dev;
1163 rt->rt6i_idev = idev;
1164 rt->rt6i_table = table;
1165 return ip6_ins_rt(rt, nlh, _rtattr, req);
1167 out:
1168 if (dev)
1169 dev_put(dev);
1170 if (idev)
1171 in6_dev_put(idev);
1172 if (rt)
1173 dst_free((struct dst_entry *) rt);
1174 return err;
1177 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1179 int err;
1180 struct fib6_table *table;
1182 table = rt->rt6i_table;
1183 write_lock_bh(&table->tb6_lock);
1185 err = fib6_del(rt, nlh, _rtattr, req);
1186 dst_release(&rt->u.dst);
1188 write_unlock_bh(&table->tb6_lock);
1190 return err;
1193 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1194 void *_rtattr, struct netlink_skb_parms *req,
1195 u32 table_id)
1197 struct fib6_table *table;
1198 struct fib6_node *fn;
1199 struct rt6_info *rt;
1200 int err = -ESRCH;
1202 table = fib6_get_table(table_id);
1203 if (table == NULL)
1204 return err;
1206 read_lock_bh(&table->tb6_lock);
1208 fn = fib6_locate(&table->tb6_root,
1209 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1210 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1212 if (fn) {
1213 for (rt = fn->leaf; rt; rt = rt->u.next) {
1214 if (rtmsg->rtmsg_ifindex &&
1215 (rt->rt6i_dev == NULL ||
1216 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1217 continue;
1218 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1219 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1220 continue;
1221 if (rtmsg->rtmsg_metric &&
1222 rtmsg->rtmsg_metric != rt->rt6i_metric)
1223 continue;
1224 dst_hold(&rt->u.dst);
1225 read_unlock_bh(&table->tb6_lock);
1227 return ip6_del_rt(rt, nlh, _rtattr, req);
1230 read_unlock_bh(&table->tb6_lock);
1232 return err;
1236 * Handle redirects
1238 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1239 struct neighbour *neigh, u8 *lladdr, int on_link)
1241 struct rt6_info *rt, *nrt = NULL;
1242 struct fib6_node *fn;
1243 struct fib6_table *table;
1244 struct netevent_redirect netevent;
1246 /* TODO: Very lazy, might need to check all tables */
1247 table = fib6_get_table(RT6_TABLE_MAIN);
1248 if (table == NULL)
1249 return;
1252 * Get the "current" route for this destination and
1253 * check if the redirect has come from approriate router.
1255 * RFC 2461 specifies that redirects should only be
1256 * accepted if they come from the nexthop to the target.
1257 * Due to the way the routes are chosen, this notion
1258 * is a bit fuzzy and one might need to check all possible
1259 * routes.
1262 read_lock_bh(&table->tb6_lock);
1263 fn = fib6_lookup(&table->tb6_root, dest, NULL);
1264 restart:
1265 for (rt = fn->leaf; rt; rt = rt->u.next) {
1267 * Current route is on-link; redirect is always invalid.
1269 * Seems, previous statement is not true. It could
1270 * be node, which looks for us as on-link (f.e. proxy ndisc)
1271 * But then router serving it might decide, that we should
1272 * know truth 8)8) --ANK (980726).
1274 if (rt6_check_expired(rt))
1275 continue;
1276 if (!(rt->rt6i_flags & RTF_GATEWAY))
1277 continue;
1278 if (neigh->dev != rt->rt6i_dev)
1279 continue;
1280 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1281 continue;
1282 break;
1284 if (rt)
1285 dst_hold(&rt->u.dst);
1286 else if (rt6_need_strict(dest)) {
1287 while ((fn = fn->parent) != NULL) {
1288 if (fn->fn_flags & RTN_ROOT)
1289 break;
1290 if (fn->fn_flags & RTN_RTINFO)
1291 goto restart;
1294 read_unlock_bh(&table->tb6_lock);
1296 if (!rt) {
1297 if (net_ratelimit())
1298 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1299 "for redirect target\n");
1300 return;
1304 * We have finally decided to accept it.
1307 neigh_update(neigh, lladdr, NUD_STALE,
1308 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1309 NEIGH_UPDATE_F_OVERRIDE|
1310 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1311 NEIGH_UPDATE_F_ISROUTER))
1315 * Redirect received -> path was valid.
1316 * Look, redirects are sent only in response to data packets,
1317 * so that this nexthop apparently is reachable. --ANK
1319 dst_confirm(&rt->u.dst);
1321 /* Duplicate redirect: silently ignore. */
1322 if (neigh == rt->u.dst.neighbour)
1323 goto out;
1325 nrt = ip6_rt_copy(rt);
1326 if (nrt == NULL)
1327 goto out;
1329 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1330 if (on_link)
1331 nrt->rt6i_flags &= ~RTF_GATEWAY;
1333 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1334 nrt->rt6i_dst.plen = 128;
1335 nrt->u.dst.flags |= DST_HOST;
1337 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1338 nrt->rt6i_nexthop = neigh_clone(neigh);
1339 /* Reset pmtu, it may be better */
1340 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1341 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1343 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1344 goto out;
1346 netevent.old = &rt->u.dst;
1347 netevent.new = &nrt->u.dst;
1348 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1350 if (rt->rt6i_flags&RTF_CACHE) {
1351 ip6_del_rt(rt, NULL, NULL, NULL);
1352 return;
1355 out:
1356 dst_release(&rt->u.dst);
1357 return;
1361 * Handle ICMP "packet too big" messages
1362 * i.e. Path MTU discovery
1365 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1366 struct net_device *dev, u32 pmtu)
1368 struct rt6_info *rt, *nrt;
1369 int allfrag = 0;
1371 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1372 if (rt == NULL)
1373 return;
1375 if (pmtu >= dst_mtu(&rt->u.dst))
1376 goto out;
1378 if (pmtu < IPV6_MIN_MTU) {
1380 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1381 * MTU (1280) and a fragment header should always be included
1382 * after a node receiving Too Big message reporting PMTU is
1383 * less than the IPv6 Minimum Link MTU.
1385 pmtu = IPV6_MIN_MTU;
1386 allfrag = 1;
1389 /* New mtu received -> path was valid.
1390 They are sent only in response to data packets,
1391 so that this nexthop apparently is reachable. --ANK
1393 dst_confirm(&rt->u.dst);
1395 /* Host route. If it is static, it would be better
1396 not to override it, but add new one, so that
1397 when cache entry will expire old pmtu
1398 would return automatically.
1400 if (rt->rt6i_flags & RTF_CACHE) {
1401 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1402 if (allfrag)
1403 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1404 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1405 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1406 goto out;
1409 /* Network route.
1410 Two cases are possible:
1411 1. It is connected route. Action: COW
1412 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1414 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1415 nrt = rt6_alloc_cow(rt, daddr, saddr);
1416 else
1417 nrt = rt6_alloc_clone(rt, daddr);
1419 if (nrt) {
1420 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1421 if (allfrag)
1422 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1424 /* According to RFC 1981, detecting PMTU increase shouldn't be
1425 * happened within 5 mins, the recommended timer is 10 mins.
1426 * Here this route expiration time is set to ip6_rt_mtu_expires
1427 * which is 10 mins. After 10 mins the decreased pmtu is expired
1428 * and detecting PMTU increase will be automatically happened.
1430 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1431 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1433 ip6_ins_rt(nrt, NULL, NULL, NULL);
1435 out:
1436 dst_release(&rt->u.dst);
1440 * Misc support functions
1443 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1445 struct rt6_info *rt = ip6_dst_alloc();
1447 if (rt) {
1448 rt->u.dst.input = ort->u.dst.input;
1449 rt->u.dst.output = ort->u.dst.output;
1451 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1452 rt->u.dst.dev = ort->u.dst.dev;
1453 if (rt->u.dst.dev)
1454 dev_hold(rt->u.dst.dev);
1455 rt->rt6i_idev = ort->rt6i_idev;
1456 if (rt->rt6i_idev)
1457 in6_dev_hold(rt->rt6i_idev);
1458 rt->u.dst.lastuse = jiffies;
1459 rt->rt6i_expires = 0;
1461 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1462 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1463 rt->rt6i_metric = 0;
1465 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1466 #ifdef CONFIG_IPV6_SUBTREES
1467 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1468 #endif
1469 rt->rt6i_table = ort->rt6i_table;
1471 return rt;
1474 #ifdef CONFIG_IPV6_ROUTE_INFO
1475 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1476 struct in6_addr *gwaddr, int ifindex)
1478 struct fib6_node *fn;
1479 struct rt6_info *rt = NULL;
1480 struct fib6_table *table;
1482 table = fib6_get_table(RT6_TABLE_INFO);
1483 if (table == NULL)
1484 return NULL;
1486 write_lock_bh(&table->tb6_lock);
1487 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1488 if (!fn)
1489 goto out;
1491 for (rt = fn->leaf; rt; rt = rt->u.next) {
1492 if (rt->rt6i_dev->ifindex != ifindex)
1493 continue;
1494 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1495 continue;
1496 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1497 continue;
1498 dst_hold(&rt->u.dst);
1499 break;
1501 out:
1502 write_unlock_bh(&table->tb6_lock);
1503 return rt;
1506 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1507 struct in6_addr *gwaddr, int ifindex,
1508 unsigned pref)
1510 struct in6_rtmsg rtmsg;
1512 memset(&rtmsg, 0, sizeof(rtmsg));
1513 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1514 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1515 rtmsg.rtmsg_dst_len = prefixlen;
1516 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1517 rtmsg.rtmsg_metric = 1024;
1518 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1519 /* We should treat it as a default route if prefix length is 0. */
1520 if (!prefixlen)
1521 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1522 rtmsg.rtmsg_ifindex = ifindex;
1524 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
1526 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1528 #endif
1530 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1532 struct rt6_info *rt;
1533 struct fib6_table *table;
1535 table = fib6_get_table(RT6_TABLE_DFLT);
1536 if (table == NULL)
1537 return NULL;
1539 write_lock_bh(&table->tb6_lock);
1540 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1541 if (dev == rt->rt6i_dev &&
1542 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1543 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1544 break;
1546 if (rt)
1547 dst_hold(&rt->u.dst);
1548 write_unlock_bh(&table->tb6_lock);
1549 return rt;
1552 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1553 struct net_device *dev,
1554 unsigned int pref)
1556 struct in6_rtmsg rtmsg;
1558 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1559 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1560 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1561 rtmsg.rtmsg_metric = 1024;
1562 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1563 RTF_PREF(pref);
1565 rtmsg.rtmsg_ifindex = dev->ifindex;
1567 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
1568 return rt6_get_dflt_router(gwaddr, dev);
1571 void rt6_purge_dflt_routers(void)
1573 struct rt6_info *rt;
1574 struct fib6_table *table;
1576 /* NOTE: Keep consistent with rt6_get_dflt_router */
1577 table = fib6_get_table(RT6_TABLE_DFLT);
1578 if (table == NULL)
1579 return;
1581 restart:
1582 read_lock_bh(&table->tb6_lock);
1583 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1584 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1585 dst_hold(&rt->u.dst);
1586 read_unlock_bh(&table->tb6_lock);
1587 ip6_del_rt(rt, NULL, NULL, NULL);
1588 goto restart;
1591 read_unlock_bh(&table->tb6_lock);
1594 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1596 struct in6_rtmsg rtmsg;
1597 int err;
1599 switch(cmd) {
1600 case SIOCADDRT: /* Add a route */
1601 case SIOCDELRT: /* Delete a route */
1602 if (!capable(CAP_NET_ADMIN))
1603 return -EPERM;
1604 err = copy_from_user(&rtmsg, arg,
1605 sizeof(struct in6_rtmsg));
1606 if (err)
1607 return -EFAULT;
1609 rtnl_lock();
1610 switch (cmd) {
1611 case SIOCADDRT:
1612 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1613 RT6_TABLE_MAIN);
1614 break;
1615 case SIOCDELRT:
1616 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1617 RT6_TABLE_MAIN);
1618 break;
1619 default:
1620 err = -EINVAL;
1622 rtnl_unlock();
1624 return err;
1627 return -EINVAL;
1631 * Drop the packet on the floor
1634 static int ip6_pkt_discard(struct sk_buff *skb)
1636 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1637 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1638 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1640 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1641 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1642 kfree_skb(skb);
1643 return 0;
1646 static int ip6_pkt_discard_out(struct sk_buff *skb)
1648 skb->dev = skb->dst->dev;
1649 return ip6_pkt_discard(skb);
1653 * Allocate a dst for local (unicast / anycast) address.
1656 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1657 const struct in6_addr *addr,
1658 int anycast)
1660 struct rt6_info *rt = ip6_dst_alloc();
1662 if (rt == NULL)
1663 return ERR_PTR(-ENOMEM);
1665 dev_hold(&loopback_dev);
1666 in6_dev_hold(idev);
1668 rt->u.dst.flags = DST_HOST;
1669 rt->u.dst.input = ip6_input;
1670 rt->u.dst.output = ip6_output;
1671 rt->rt6i_dev = &loopback_dev;
1672 rt->rt6i_idev = idev;
1673 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1674 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1675 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1676 rt->u.dst.obsolete = -1;
1678 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1679 if (anycast)
1680 rt->rt6i_flags |= RTF_ANYCAST;
1681 else
1682 rt->rt6i_flags |= RTF_LOCAL;
1683 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1684 if (rt->rt6i_nexthop == NULL) {
1685 dst_free((struct dst_entry *) rt);
1686 return ERR_PTR(-ENOMEM);
1689 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1690 rt->rt6i_dst.plen = 128;
1691 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1693 atomic_set(&rt->u.dst.__refcnt, 1);
1695 return rt;
1698 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1700 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1701 rt != &ip6_null_entry) {
1702 RT6_TRACE("deleted by ifdown %p\n", rt);
1703 return -1;
1705 return 0;
1708 void rt6_ifdown(struct net_device *dev)
1710 fib6_clean_all(fib6_ifdown, 0, dev);
1713 struct rt6_mtu_change_arg
1715 struct net_device *dev;
1716 unsigned mtu;
1719 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1721 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1722 struct inet6_dev *idev;
1724 /* In IPv6 pmtu discovery is not optional,
1725 so that RTAX_MTU lock cannot disable it.
1726 We still use this lock to block changes
1727 caused by addrconf/ndisc.
1730 idev = __in6_dev_get(arg->dev);
1731 if (idev == NULL)
1732 return 0;
1734 /* For administrative MTU increase, there is no way to discover
1735 IPv6 PMTU increase, so PMTU increase should be updated here.
1736 Since RFC 1981 doesn't include administrative MTU increase
1737 update PMTU increase is a MUST. (i.e. jumbo frame)
1740 If new MTU is less than route PMTU, this new MTU will be the
1741 lowest MTU in the path, update the route PMTU to reflect PMTU
1742 decreases; if new MTU is greater than route PMTU, and the
1743 old MTU is the lowest MTU in the path, update the route PMTU
1744 to reflect the increase. In this case if the other nodes' MTU
1745 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1746 PMTU discouvery.
1748 if (rt->rt6i_dev == arg->dev &&
1749 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1750 (dst_mtu(&rt->u.dst) > arg->mtu ||
1751 (dst_mtu(&rt->u.dst) < arg->mtu &&
1752 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1753 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1754 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1755 return 0;
1758 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1760 struct rt6_mtu_change_arg arg = {
1761 .dev = dev,
1762 .mtu = mtu,
1765 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1768 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1769 struct in6_rtmsg *rtmsg)
1771 memset(rtmsg, 0, sizeof(*rtmsg));
1773 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1774 rtmsg->rtmsg_src_len = r->rtm_src_len;
1775 rtmsg->rtmsg_flags = RTF_UP;
1776 if (r->rtm_type == RTN_UNREACHABLE)
1777 rtmsg->rtmsg_flags |= RTF_REJECT;
1779 if (rta[RTA_GATEWAY-1]) {
1780 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1781 return -EINVAL;
1782 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1783 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1785 if (rta[RTA_DST-1]) {
1786 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1787 return -EINVAL;
1788 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1790 if (rta[RTA_SRC-1]) {
1791 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1792 return -EINVAL;
1793 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1795 if (rta[RTA_OIF-1]) {
1796 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1797 return -EINVAL;
1798 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1800 if (rta[RTA_PRIORITY-1]) {
1801 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1802 return -EINVAL;
1803 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1805 return 0;
1808 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1810 struct rtmsg *r = NLMSG_DATA(nlh);
1811 struct in6_rtmsg rtmsg;
1813 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1814 return -EINVAL;
1815 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
1818 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1820 struct rtmsg *r = NLMSG_DATA(nlh);
1821 struct in6_rtmsg rtmsg;
1823 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1824 return -EINVAL;
1825 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
1828 struct rt6_rtnl_dump_arg
1830 struct sk_buff *skb;
1831 struct netlink_callback *cb;
1834 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1835 struct in6_addr *dst, struct in6_addr *src,
1836 int iif, int type, u32 pid, u32 seq,
1837 int prefix, unsigned int flags)
1839 struct rtmsg *rtm;
1840 struct nlmsghdr *nlh;
1841 unsigned char *b = skb->tail;
1842 struct rta_cacheinfo ci;
1844 if (prefix) { /* user wants prefix routes only */
1845 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1846 /* success since this is not a prefix route */
1847 return 1;
1851 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1852 rtm = NLMSG_DATA(nlh);
1853 rtm->rtm_family = AF_INET6;
1854 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1855 rtm->rtm_src_len = rt->rt6i_src.plen;
1856 rtm->rtm_tos = 0;
1857 if (rt->rt6i_table)
1858 rtm->rtm_table = rt->rt6i_table->tb6_id;
1859 else
1860 rtm->rtm_table = RT6_TABLE_UNSPEC;
1861 rtm->rtm_table = RT_TABLE_MAIN;
1862 if (rt->rt6i_flags&RTF_REJECT)
1863 rtm->rtm_type = RTN_UNREACHABLE;
1864 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1865 rtm->rtm_type = RTN_LOCAL;
1866 else
1867 rtm->rtm_type = RTN_UNICAST;
1868 rtm->rtm_flags = 0;
1869 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1870 rtm->rtm_protocol = rt->rt6i_protocol;
1871 if (rt->rt6i_flags&RTF_DYNAMIC)
1872 rtm->rtm_protocol = RTPROT_REDIRECT;
1873 else if (rt->rt6i_flags & RTF_ADDRCONF)
1874 rtm->rtm_protocol = RTPROT_KERNEL;
1875 else if (rt->rt6i_flags&RTF_DEFAULT)
1876 rtm->rtm_protocol = RTPROT_RA;
1878 if (rt->rt6i_flags&RTF_CACHE)
1879 rtm->rtm_flags |= RTM_F_CLONED;
1881 if (dst) {
1882 RTA_PUT(skb, RTA_DST, 16, dst);
1883 rtm->rtm_dst_len = 128;
1884 } else if (rtm->rtm_dst_len)
1885 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1886 #ifdef CONFIG_IPV6_SUBTREES
1887 if (src) {
1888 RTA_PUT(skb, RTA_SRC, 16, src);
1889 rtm->rtm_src_len = 128;
1890 } else if (rtm->rtm_src_len)
1891 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1892 #endif
1893 if (iif)
1894 RTA_PUT(skb, RTA_IIF, 4, &iif);
1895 else if (dst) {
1896 struct in6_addr saddr_buf;
1897 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1898 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1900 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1901 goto rtattr_failure;
1902 if (rt->u.dst.neighbour)
1903 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1904 if (rt->u.dst.dev)
1905 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1906 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1907 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1908 if (rt->rt6i_expires)
1909 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1910 else
1911 ci.rta_expires = 0;
1912 ci.rta_used = rt->u.dst.__use;
1913 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1914 ci.rta_error = rt->u.dst.error;
1915 ci.rta_id = 0;
1916 ci.rta_ts = 0;
1917 ci.rta_tsage = 0;
1918 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1919 nlh->nlmsg_len = skb->tail - b;
1920 return skb->len;
1922 nlmsg_failure:
1923 rtattr_failure:
1924 skb_trim(skb, b - skb->data);
1925 return -1;
1928 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1930 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1931 int prefix;
1933 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1934 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1935 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1936 } else
1937 prefix = 0;
1939 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1940 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1941 prefix, NLM_F_MULTI);
1944 static int fib6_dump_node(struct fib6_walker_t *w)
1946 int res;
1947 struct rt6_info *rt;
1949 for (rt = w->leaf; rt; rt = rt->u.next) {
1950 res = rt6_dump_route(rt, w->args);
1951 if (res < 0) {
1952 /* Frame is full, suspend walking */
1953 w->leaf = rt;
1954 return 1;
1956 BUG_TRAP(res!=0);
1958 w->leaf = NULL;
1959 return 0;
1962 static void fib6_dump_end(struct netlink_callback *cb)
1964 struct fib6_walker_t *w = (void*)cb->args[0];
1966 if (w) {
1967 cb->args[0] = 0;
1968 kfree(w);
1970 cb->done = (void*)cb->args[1];
1971 cb->args[1] = 0;
1974 static int fib6_dump_done(struct netlink_callback *cb)
1976 fib6_dump_end(cb);
1977 return cb->done ? cb->done(cb) : 0;
1980 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1982 struct fib6_table *table;
1983 struct rt6_rtnl_dump_arg arg;
1984 struct fib6_walker_t *w;
1985 int i, res = 0;
1987 arg.skb = skb;
1988 arg.cb = cb;
1991 * cb->args[0] = pointer to walker structure
1992 * cb->args[1] = saved cb->done() pointer
1993 * cb->args[2] = current table being dumped
1996 w = (void*)cb->args[0];
1997 if (w == NULL) {
1998 /* New dump:
2000 * 1. hook callback destructor.
2002 cb->args[1] = (long)cb->done;
2003 cb->done = fib6_dump_done;
2006 * 2. allocate and initialize walker.
2008 w = kzalloc(sizeof(*w), GFP_ATOMIC);
2009 if (w == NULL)
2010 return -ENOMEM;
2011 w->func = fib6_dump_node;
2012 w->args = &arg;
2013 cb->args[0] = (long)w;
2014 cb->args[2] = FIB6_TABLE_MIN;
2015 } else {
2016 w->args = &arg;
2017 i = cb->args[2];
2018 if (i > FIB6_TABLE_MAX)
2019 goto end;
2021 table = fib6_get_table(i);
2022 if (table != NULL) {
2023 read_lock_bh(&table->tb6_lock);
2024 w->root = &table->tb6_root;
2025 res = fib6_walk_continue(w);
2026 read_unlock_bh(&table->tb6_lock);
2027 if (res != 0) {
2028 if (res < 0)
2029 fib6_walker_unlink(w);
2030 goto end;
2034 fib6_walker_unlink(w);
2035 cb->args[2] = ++i;
2038 for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
2039 table = fib6_get_table(i);
2040 if (table == NULL)
2041 continue;
2043 read_lock_bh(&table->tb6_lock);
2044 w->root = &table->tb6_root;
2045 res = fib6_walk(w);
2046 read_unlock_bh(&table->tb6_lock);
2047 if (res)
2048 break;
2050 end:
2051 cb->args[2] = i;
2053 res = res < 0 ? res : skb->len;
2054 /* res < 0 is an error. (really, impossible)
2055 res == 0 means that dump is complete, but skb still can contain data.
2056 res > 0 dump is not complete, but frame is full.
2058 /* Destroy walker, if dump of this table is complete. */
2059 if (res <= 0)
2060 fib6_dump_end(cb);
2061 return res;
2064 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2066 struct rtattr **rta = arg;
2067 int iif = 0;
2068 int err = -ENOBUFS;
2069 struct sk_buff *skb;
2070 struct flowi fl;
2071 struct rt6_info *rt;
2073 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2074 if (skb == NULL)
2075 goto out;
2077 /* Reserve room for dummy headers, this skb can pass
2078 through good chunk of routing engine.
2080 skb->mac.raw = skb->data;
2081 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2083 memset(&fl, 0, sizeof(fl));
2084 if (rta[RTA_SRC-1])
2085 ipv6_addr_copy(&fl.fl6_src,
2086 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2087 if (rta[RTA_DST-1])
2088 ipv6_addr_copy(&fl.fl6_dst,
2089 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2091 if (rta[RTA_IIF-1])
2092 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2094 if (iif) {
2095 struct net_device *dev;
2096 dev = __dev_get_by_index(iif);
2097 if (!dev) {
2098 err = -ENODEV;
2099 goto out_free;
2103 fl.oif = 0;
2104 if (rta[RTA_OIF-1])
2105 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2107 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2109 skb->dst = &rt->u.dst;
2111 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2112 err = rt6_fill_node(skb, rt,
2113 &fl.fl6_dst, &fl.fl6_src,
2114 iif,
2115 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2116 nlh->nlmsg_seq, 0, 0);
2117 if (err < 0) {
2118 err = -EMSGSIZE;
2119 goto out_free;
2122 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
2123 if (err > 0)
2124 err = 0;
2125 out:
2126 return err;
2127 out_free:
2128 kfree_skb(skb);
2129 goto out;
2132 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2133 struct netlink_skb_parms *req)
2135 struct sk_buff *skb;
2136 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
2137 u32 pid = current->pid;
2138 u32 seq = 0;
2140 if (req)
2141 pid = req->pid;
2142 if (nlh)
2143 seq = nlh->nlmsg_seq;
2145 skb = alloc_skb(size, gfp_any());
2146 if (!skb) {
2147 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
2148 return;
2150 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
2151 kfree_skb(skb);
2152 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
2153 return;
2155 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2156 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
2160 * /proc
2163 #ifdef CONFIG_PROC_FS
2165 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2167 struct rt6_proc_arg
2169 char *buffer;
2170 int offset;
2171 int length;
2172 int skip;
2173 int len;
2176 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2178 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2179 int i;
2181 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2182 arg->skip++;
2183 return 0;
2186 if (arg->len >= arg->length)
2187 return 0;
2189 for (i=0; i<16; i++) {
2190 sprintf(arg->buffer + arg->len, "%02x",
2191 rt->rt6i_dst.addr.s6_addr[i]);
2192 arg->len += 2;
2194 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2195 rt->rt6i_dst.plen);
2197 #ifdef CONFIG_IPV6_SUBTREES
2198 for (i=0; i<16; i++) {
2199 sprintf(arg->buffer + arg->len, "%02x",
2200 rt->rt6i_src.addr.s6_addr[i]);
2201 arg->len += 2;
2203 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2204 rt->rt6i_src.plen);
2205 #else
2206 sprintf(arg->buffer + arg->len,
2207 "00000000000000000000000000000000 00 ");
2208 arg->len += 36;
2209 #endif
2211 if (rt->rt6i_nexthop) {
2212 for (i=0; i<16; i++) {
2213 sprintf(arg->buffer + arg->len, "%02x",
2214 rt->rt6i_nexthop->primary_key[i]);
2215 arg->len += 2;
2217 } else {
2218 sprintf(arg->buffer + arg->len,
2219 "00000000000000000000000000000000");
2220 arg->len += 32;
2222 arg->len += sprintf(arg->buffer + arg->len,
2223 " %08x %08x %08x %08x %8s\n",
2224 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2225 rt->u.dst.__use, rt->rt6i_flags,
2226 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2227 return 0;
2230 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2232 struct rt6_proc_arg arg = {
2233 .buffer = buffer,
2234 .offset = offset,
2235 .length = length,
2238 fib6_clean_all(rt6_info_route, 0, &arg);
2240 *start = buffer;
2241 if (offset)
2242 *start += offset % RT6_INFO_LEN;
2244 arg.len -= offset % RT6_INFO_LEN;
2246 if (arg.len > length)
2247 arg.len = length;
2248 if (arg.len < 0)
2249 arg.len = 0;
2251 return arg.len;
2254 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2256 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2257 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2258 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2259 rt6_stats.fib_rt_cache,
2260 atomic_read(&ip6_dst_ops.entries),
2261 rt6_stats.fib_discarded_routes);
2263 return 0;
2266 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2268 return single_open(file, rt6_stats_seq_show, NULL);
2271 static struct file_operations rt6_stats_seq_fops = {
2272 .owner = THIS_MODULE,
2273 .open = rt6_stats_seq_open,
2274 .read = seq_read,
2275 .llseek = seq_lseek,
2276 .release = single_release,
2278 #endif /* CONFIG_PROC_FS */
2280 #ifdef CONFIG_SYSCTL
2282 static int flush_delay;
2284 static
2285 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2286 void __user *buffer, size_t *lenp, loff_t *ppos)
2288 if (write) {
2289 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2290 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2291 return 0;
2292 } else
2293 return -EINVAL;
2296 ctl_table ipv6_route_table[] = {
2298 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2299 .procname = "flush",
2300 .data = &flush_delay,
2301 .maxlen = sizeof(int),
2302 .mode = 0200,
2303 .proc_handler = &ipv6_sysctl_rtcache_flush
2306 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2307 .procname = "gc_thresh",
2308 .data = &ip6_dst_ops.gc_thresh,
2309 .maxlen = sizeof(int),
2310 .mode = 0644,
2311 .proc_handler = &proc_dointvec,
2314 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2315 .procname = "max_size",
2316 .data = &ip6_rt_max_size,
2317 .maxlen = sizeof(int),
2318 .mode = 0644,
2319 .proc_handler = &proc_dointvec,
2322 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2323 .procname = "gc_min_interval",
2324 .data = &ip6_rt_gc_min_interval,
2325 .maxlen = sizeof(int),
2326 .mode = 0644,
2327 .proc_handler = &proc_dointvec_jiffies,
2328 .strategy = &sysctl_jiffies,
2331 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2332 .procname = "gc_timeout",
2333 .data = &ip6_rt_gc_timeout,
2334 .maxlen = sizeof(int),
2335 .mode = 0644,
2336 .proc_handler = &proc_dointvec_jiffies,
2337 .strategy = &sysctl_jiffies,
2340 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2341 .procname = "gc_interval",
2342 .data = &ip6_rt_gc_interval,
2343 .maxlen = sizeof(int),
2344 .mode = 0644,
2345 .proc_handler = &proc_dointvec_jiffies,
2346 .strategy = &sysctl_jiffies,
2349 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2350 .procname = "gc_elasticity",
2351 .data = &ip6_rt_gc_elasticity,
2352 .maxlen = sizeof(int),
2353 .mode = 0644,
2354 .proc_handler = &proc_dointvec_jiffies,
2355 .strategy = &sysctl_jiffies,
2358 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2359 .procname = "mtu_expires",
2360 .data = &ip6_rt_mtu_expires,
2361 .maxlen = sizeof(int),
2362 .mode = 0644,
2363 .proc_handler = &proc_dointvec_jiffies,
2364 .strategy = &sysctl_jiffies,
2367 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2368 .procname = "min_adv_mss",
2369 .data = &ip6_rt_min_advmss,
2370 .maxlen = sizeof(int),
2371 .mode = 0644,
2372 .proc_handler = &proc_dointvec_jiffies,
2373 .strategy = &sysctl_jiffies,
2376 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2377 .procname = "gc_min_interval_ms",
2378 .data = &ip6_rt_gc_min_interval,
2379 .maxlen = sizeof(int),
2380 .mode = 0644,
2381 .proc_handler = &proc_dointvec_ms_jiffies,
2382 .strategy = &sysctl_ms_jiffies,
2384 { .ctl_name = 0 }
2387 #endif
2389 void __init ip6_route_init(void)
2391 struct proc_dir_entry *p;
2393 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2394 sizeof(struct rt6_info),
2395 0, SLAB_HWCACHE_ALIGN,
2396 NULL, NULL);
2397 if (!ip6_dst_ops.kmem_cachep)
2398 panic("cannot create ip6_dst_cache");
2400 fib6_init();
2401 #ifdef CONFIG_PROC_FS
2402 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2403 if (p)
2404 p->owner = THIS_MODULE;
2406 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2407 #endif
2408 #ifdef CONFIG_XFRM
2409 xfrm6_init();
2410 #endif
2413 void ip6_route_cleanup(void)
2415 #ifdef CONFIG_PROC_FS
2416 proc_net_remove("ipv6_route");
2417 proc_net_remove("rt6_stats");
2418 #endif
2419 #ifdef CONFIG_XFRM
2420 xfrm6_fini();
2421 #endif
2422 rt6_ifdown(NULL);
2423 fib6_gc_cleanup();
2424 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);