[NET]: Make code static.
[linux-2.6/verdex.git] / net / ipv6 / route.c
blobe08d84063c1fdfaa7b46bdcc10ab9e01929d1339
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 /* Changes:
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
41 #ifdef CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107 #endif
109 static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
123 struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
145 struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
165 struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
185 #endif
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193 static void ip6_dst_destroy(struct dst_entry *dst)
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
225 static inline int rt6_need_strict(struct in6_addr *daddr)
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 * Route lookup. Any table->tb6_lock is implied.
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
256 local = sprt;
260 if (local)
261 return local;
263 if (strict)
264 return &ip6_null_entry;
266 return rt;
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
298 #else
299 static inline void rt6_probe(struct rt6_info *rt)
301 return;
303 #endif
306 * Default Router Selection (RFC 2461 6.3.6)
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
319 static int inline rt6_check_neigh(struct rt6_info *rt)
321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
329 m = 2;
330 read_unlock_bh(&neigh->lock);
332 return m;
335 static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
338 int m, n;
340 m = rt6_check_dev(rt, oif);
341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345 #endif
346 n = rt6_check_neigh(rt);
347 if (n > 1)
348 m |= 16;
349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
350 return -1;
351 return m;
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
365 for (rt = rt0, metric = rt0->rt6i_metric;
366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
367 rt = rt->u.next) {
368 int m;
370 if (rt6_check_expired(rt))
371 continue;
373 last = rt;
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 continue;
379 if (m > mpri) {
380 rt6_probe(match);
381 match = rt;
382 mpri = m;
383 } else {
384 rt6_probe(rt);
388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
392 static DEFINE_SPINLOCK(lock);
393 spin_lock(&lock);
394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
397 spin_unlock(&lock);
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
403 return (match ? match : &ip6_null_entry);
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
459 if (rt && !lifetime) {
460 ip6_del_rt(rt, NULL, NULL, NULL);
461 rt = NULL;
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
478 dst_release(&rt->u.dst);
480 return 0;
482 #endif
484 #define BACKTRACK() \
485 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
496 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
499 struct fib6_node *fn;
500 struct rt6_info *rt;
502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504 restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
508 dst_hold(&rt->u.dst);
509 out:
510 read_unlock_bh(&table->tb6_lock);
512 rt->u.dst.lastuse = jiffies;
513 rt->u.dst.__use++;
515 return rt;
519 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
538 dst_release(dst);
540 return NULL;
543 /* ip6_ins_rt is called with FREE table->tb6_lock.
544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
549 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
550 void *_rtattr, struct netlink_skb_parms *req)
552 int err;
553 struct fib6_table *table;
555 table = rt->rt6i_table;
556 write_lock_bh(&table->tb6_lock);
557 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
558 write_unlock_bh(&table->tb6_lock);
560 return err;
563 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
564 struct in6_addr *saddr)
566 struct rt6_info *rt;
569 * Clone the route.
572 rt = ip6_rt_copy(ort);
574 if (rt) {
575 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
576 if (rt->rt6i_dst.plen != 128 &&
577 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
578 rt->rt6i_flags |= RTF_ANYCAST;
579 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
582 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
583 rt->rt6i_dst.plen = 128;
584 rt->rt6i_flags |= RTF_CACHE;
585 rt->u.dst.flags |= DST_HOST;
587 #ifdef CONFIG_IPV6_SUBTREES
588 if (rt->rt6i_src.plen && saddr) {
589 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
590 rt->rt6i_src.plen = 128;
592 #endif
594 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
598 return rt;
601 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
603 struct rt6_info *rt = ip6_rt_copy(ort);
604 if (rt) {
605 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
606 rt->rt6i_dst.plen = 128;
607 rt->rt6i_flags |= RTF_CACHE;
608 if (rt->rt6i_flags & RTF_REJECT)
609 rt->u.dst.error = ort->u.dst.error;
610 rt->u.dst.flags |= DST_HOST;
611 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
613 return rt;
616 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
617 struct flowi *fl, int flags)
619 struct fib6_node *fn;
620 struct rt6_info *rt, *nrt;
621 int strict = 0;
622 int attempts = 3;
623 int err;
624 int reachable = RT6_SELECT_F_REACHABLE;
626 if (flags & RT6_F_STRICT)
627 strict = RT6_SELECT_F_IFACE;
629 relookup:
630 read_lock_bh(&table->tb6_lock);
632 restart_2:
633 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
635 restart:
636 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
637 BACKTRACK();
638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
640 goto out;
642 dst_hold(&rt->u.dst);
643 read_unlock_bh(&table->tb6_lock);
645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
647 else {
648 #if CLONE_OFFLINK_ROUTE
649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
650 #else
651 goto out2;
652 #endif
655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
658 dst_hold(&rt->u.dst);
659 if (nrt) {
660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
661 if (!err)
662 goto out2;
665 if (--attempts <= 0)
666 goto out2;
669 * Race condition! In the gap, when table->tb6_lock was
670 * released someone could insert this route. Relookup.
672 dst_release(&rt->u.dst);
673 goto relookup;
675 out:
676 if (reachable) {
677 reachable = 0;
678 goto restart_2;
680 dst_hold(&rt->u.dst);
681 read_unlock_bh(&table->tb6_lock);
682 out2:
683 rt->u.dst.lastuse = jiffies;
684 rt->u.dst.__use++;
686 return rt;
689 void ip6_route_input(struct sk_buff *skb)
691 struct ipv6hdr *iph = skb->nh.ipv6h;
692 struct flowi fl = {
693 .iif = skb->dev->ifindex,
694 .nl_u = {
695 .ip6_u = {
696 .daddr = iph->daddr,
697 .saddr = iph->saddr,
698 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
701 .proto = iph->nexthdr,
703 int flags = 0;
705 if (rt6_need_strict(&iph->daddr))
706 flags |= RT6_F_STRICT;
708 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
711 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
712 struct flowi *fl, int flags)
714 struct fib6_node *fn;
715 struct rt6_info *rt, *nrt;
716 int strict = 0;
717 int attempts = 3;
718 int err;
719 int reachable = RT6_SELECT_F_REACHABLE;
721 if (flags & RT6_F_STRICT)
722 strict = RT6_SELECT_F_IFACE;
724 relookup:
725 read_lock_bh(&table->tb6_lock);
727 restart_2:
728 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
730 restart:
731 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
732 BACKTRACK();
733 if (rt == &ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE)
735 goto out;
737 dst_hold(&rt->u.dst);
738 read_unlock_bh(&table->tb6_lock);
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
742 else {
743 #if CLONE_OFFLINK_ROUTE
744 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
745 #else
746 goto out2;
747 #endif
750 dst_release(&rt->u.dst);
751 rt = nrt ? : &ip6_null_entry;
753 dst_hold(&rt->u.dst);
754 if (nrt) {
755 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
756 if (!err)
757 goto out2;
760 if (--attempts <= 0)
761 goto out2;
764 * Race condition! In the gap, when table->tb6_lock was
765 * released someone could insert this route. Relookup.
767 dst_release(&rt->u.dst);
768 goto relookup;
770 out:
771 if (reachable) {
772 reachable = 0;
773 goto restart_2;
775 dst_hold(&rt->u.dst);
776 read_unlock_bh(&table->tb6_lock);
777 out2:
778 rt->u.dst.lastuse = jiffies;
779 rt->u.dst.__use++;
780 return rt;
783 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
785 int flags = 0;
787 if (rt6_need_strict(&fl->fl6_dst))
788 flags |= RT6_F_STRICT;
790 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
795 * Destination cache support functions
798 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
800 struct rt6_info *rt;
802 rt = (struct rt6_info *) dst;
804 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
805 return dst;
807 return NULL;
810 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
812 struct rt6_info *rt = (struct rt6_info *) dst;
814 if (rt) {
815 if (rt->rt6i_flags & RTF_CACHE)
816 ip6_del_rt(rt, NULL, NULL, NULL);
817 else
818 dst_release(dst);
820 return NULL;
823 static void ip6_link_failure(struct sk_buff *skb)
825 struct rt6_info *rt;
827 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
829 rt = (struct rt6_info *) skb->dst;
830 if (rt) {
831 if (rt->rt6i_flags&RTF_CACHE) {
832 dst_set_expires(&rt->u.dst, 0);
833 rt->rt6i_flags |= RTF_EXPIRES;
834 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
835 rt->rt6i_node->fn_sernum = -1;
839 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
841 struct rt6_info *rt6 = (struct rt6_info*)dst;
843 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
844 rt6->rt6i_flags |= RTF_MODIFIED;
845 if (mtu < IPV6_MIN_MTU) {
846 mtu = IPV6_MIN_MTU;
847 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
849 dst->metrics[RTAX_MTU-1] = mtu;
850 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
854 static int ipv6_get_mtu(struct net_device *dev);
856 static inline unsigned int ipv6_advmss(unsigned int mtu)
858 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
860 if (mtu < ip6_rt_min_advmss)
861 mtu = ip6_rt_min_advmss;
864 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
865 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
866 * IPV6_MAXPLEN is also valid and means: "any MSS,
867 * rely only on pmtu discovery"
869 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
870 mtu = IPV6_MAXPLEN;
871 return mtu;
874 static struct dst_entry *ndisc_dst_gc_list;
875 static DEFINE_SPINLOCK(ndisc_lock);
877 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
878 struct neighbour *neigh,
879 struct in6_addr *addr,
880 int (*output)(struct sk_buff *))
882 struct rt6_info *rt;
883 struct inet6_dev *idev = in6_dev_get(dev);
885 if (unlikely(idev == NULL))
886 return NULL;
888 rt = ip6_dst_alloc();
889 if (unlikely(rt == NULL)) {
890 in6_dev_put(idev);
891 goto out;
894 dev_hold(dev);
895 if (neigh)
896 neigh_hold(neigh);
897 else
898 neigh = ndisc_get_neigh(dev, addr);
900 rt->rt6i_dev = dev;
901 rt->rt6i_idev = idev;
902 rt->rt6i_nexthop = neigh;
903 atomic_set(&rt->u.dst.__refcnt, 1);
904 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
905 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
906 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
907 rt->u.dst.output = output;
909 #if 0 /* there's no chance to use these for ndisc */
910 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
911 ? DST_HOST
912 : 0;
913 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
914 rt->rt6i_dst.plen = 128;
915 #endif
917 spin_lock_bh(&ndisc_lock);
918 rt->u.dst.next = ndisc_dst_gc_list;
919 ndisc_dst_gc_list = &rt->u.dst;
920 spin_unlock_bh(&ndisc_lock);
922 fib6_force_start_gc();
924 out:
925 return (struct dst_entry *)rt;
928 int ndisc_dst_gc(int *more)
930 struct dst_entry *dst, *next, **pprev;
931 int freed;
933 next = NULL;
934 freed = 0;
936 spin_lock_bh(&ndisc_lock);
937 pprev = &ndisc_dst_gc_list;
939 while ((dst = *pprev) != NULL) {
940 if (!atomic_read(&dst->__refcnt)) {
941 *pprev = dst->next;
942 dst_free(dst);
943 freed++;
944 } else {
945 pprev = &dst->next;
946 (*more)++;
950 spin_unlock_bh(&ndisc_lock);
952 return freed;
955 static int ip6_dst_gc(void)
957 static unsigned expire = 30*HZ;
958 static unsigned long last_gc;
959 unsigned long now = jiffies;
961 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
962 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
963 goto out;
965 expire++;
966 fib6_run_gc(expire);
967 last_gc = now;
968 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
969 expire = ip6_rt_gc_timeout>>1;
971 out:
972 expire -= expire>>ip6_rt_gc_elasticity;
973 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
976 /* Clean host part of a prefix. Not necessary in radix tree,
977 but results in cleaner routing tables.
979 Remove it only when all the things will work!
982 static int ipv6_get_mtu(struct net_device *dev)
984 int mtu = IPV6_MIN_MTU;
985 struct inet6_dev *idev;
987 idev = in6_dev_get(dev);
988 if (idev) {
989 mtu = idev->cnf.mtu6;
990 in6_dev_put(idev);
992 return mtu;
995 int ipv6_get_hoplimit(struct net_device *dev)
997 int hoplimit = ipv6_devconf.hop_limit;
998 struct inet6_dev *idev;
1000 idev = in6_dev_get(dev);
1001 if (idev) {
1002 hoplimit = idev->cnf.hop_limit;
1003 in6_dev_put(idev);
1005 return hoplimit;
1012 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1013 void *_rtattr, struct netlink_skb_parms *req,
1014 u32 table_id)
1016 int err;
1017 struct rtmsg *r;
1018 struct rtattr **rta;
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
1022 struct fib6_table *table;
1023 int addr_type;
1025 rta = (struct rtattr **) _rtattr;
1027 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
1028 return -EINVAL;
1029 #ifndef CONFIG_IPV6_SUBTREES
1030 if (rtmsg->rtmsg_src_len)
1031 return -EINVAL;
1032 #endif
1033 if (rtmsg->rtmsg_ifindex) {
1034 err = -ENODEV;
1035 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
1036 if (!dev)
1037 goto out;
1038 idev = in6_dev_get(dev);
1039 if (!idev)
1040 goto out;
1043 if (rtmsg->rtmsg_metric == 0)
1044 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1046 table = fib6_new_table(table_id);
1047 if (table == NULL) {
1048 err = -ENOBUFS;
1049 goto out;
1052 rt = ip6_dst_alloc();
1054 if (rt == NULL) {
1055 err = -ENOMEM;
1056 goto out;
1059 rt->u.dst.obsolete = -1;
1060 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1061 if (nlh && (r = NLMSG_DATA(nlh))) {
1062 rt->rt6i_protocol = r->rtm_protocol;
1063 } else {
1064 rt->rt6i_protocol = RTPROT_BOOT;
1067 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1074 rt->u.dst.output = ip6_output;
1076 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1077 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1078 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1079 if (rt->rt6i_dst.plen == 128)
1080 rt->u.dst.flags = DST_HOST;
1082 #ifdef CONFIG_IPV6_SUBTREES
1083 ipv6_addr_prefix(&rt->rt6i_src.addr,
1084 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1085 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1086 #endif
1088 rt->rt6i_metric = rtmsg->rtmsg_metric;
1090 /* We cannot add true routes via loopback here,
1091 they would result in kernel looping; promote them to reject routes
1093 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1094 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1095 /* hold loopback dev/idev if we haven't done so. */
1096 if (dev != &loopback_dev) {
1097 if (dev) {
1098 dev_put(dev);
1099 in6_dev_put(idev);
1101 dev = &loopback_dev;
1102 dev_hold(dev);
1103 idev = in6_dev_get(dev);
1104 if (!idev) {
1105 err = -ENODEV;
1106 goto out;
1109 rt->u.dst.output = ip6_pkt_discard_out;
1110 rt->u.dst.input = ip6_pkt_discard;
1111 rt->u.dst.error = -ENETUNREACH;
1112 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1113 goto install_route;
1116 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1117 struct in6_addr *gw_addr;
1118 int gwa_type;
1120 gw_addr = &rtmsg->rtmsg_gateway;
1121 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1122 gwa_type = ipv6_addr_type(gw_addr);
1124 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1125 struct rt6_info *grt;
1127 /* IPv6 strictly inhibits using not link-local
1128 addresses as nexthop address.
1129 Otherwise, router will not able to send redirects.
1130 It is very good, but in some (rare!) circumstances
1131 (SIT, PtP, NBMA NOARP links) it is handy to allow
1132 some exceptions. --ANK
1134 err = -EINVAL;
1135 if (!(gwa_type&IPV6_ADDR_UNICAST))
1136 goto out;
1138 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1140 err = -EHOSTUNREACH;
1141 if (grt == NULL)
1142 goto out;
1143 if (dev) {
1144 if (dev != grt->rt6i_dev) {
1145 dst_release(&grt->u.dst);
1146 goto out;
1148 } else {
1149 dev = grt->rt6i_dev;
1150 idev = grt->rt6i_idev;
1151 dev_hold(dev);
1152 in6_dev_hold(grt->rt6i_idev);
1154 if (!(grt->rt6i_flags&RTF_GATEWAY))
1155 err = 0;
1156 dst_release(&grt->u.dst);
1158 if (err)
1159 goto out;
1161 err = -EINVAL;
1162 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1163 goto out;
1166 err = -ENODEV;
1167 if (dev == NULL)
1168 goto out;
1170 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1171 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1172 if (IS_ERR(rt->rt6i_nexthop)) {
1173 err = PTR_ERR(rt->rt6i_nexthop);
1174 rt->rt6i_nexthop = NULL;
1175 goto out;
1179 rt->rt6i_flags = rtmsg->rtmsg_flags;
1181 install_route:
1182 if (rta && rta[RTA_METRICS-1]) {
1183 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1184 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1186 while (RTA_OK(attr, attrlen)) {
1187 unsigned flavor = attr->rta_type;
1188 if (flavor) {
1189 if (flavor > RTAX_MAX) {
1190 err = -EINVAL;
1191 goto out;
1193 rt->u.dst.metrics[flavor-1] =
1194 *(u32 *)RTA_DATA(attr);
1196 attr = RTA_NEXT(attr, attrlen);
1200 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1201 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1202 if (!rt->u.dst.metrics[RTAX_MTU-1])
1203 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1204 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1205 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1206 rt->u.dst.dev = dev;
1207 rt->rt6i_idev = idev;
1208 rt->rt6i_table = table;
1209 return ip6_ins_rt(rt, nlh, _rtattr, req);
1211 out:
1212 if (dev)
1213 dev_put(dev);
1214 if (idev)
1215 in6_dev_put(idev);
1216 if (rt)
1217 dst_free((struct dst_entry *) rt);
1218 return err;
1221 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1223 int err;
1224 struct fib6_table *table;
1226 if (rt == &ip6_null_entry)
1227 return -ENOENT;
1229 table = rt->rt6i_table;
1230 write_lock_bh(&table->tb6_lock);
1232 err = fib6_del(rt, nlh, _rtattr, req);
1233 dst_release(&rt->u.dst);
1235 write_unlock_bh(&table->tb6_lock);
1237 return err;
1240 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1241 void *_rtattr, struct netlink_skb_parms *req,
1242 u32 table_id)
1244 struct fib6_table *table;
1245 struct fib6_node *fn;
1246 struct rt6_info *rt;
1247 int err = -ESRCH;
1249 table = fib6_get_table(table_id);
1250 if (table == NULL)
1251 return err;
1253 read_lock_bh(&table->tb6_lock);
1255 fn = fib6_locate(&table->tb6_root,
1256 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1257 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1259 if (fn) {
1260 for (rt = fn->leaf; rt; rt = rt->u.next) {
1261 if (rtmsg->rtmsg_ifindex &&
1262 (rt->rt6i_dev == NULL ||
1263 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1264 continue;
1265 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1266 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1267 continue;
1268 if (rtmsg->rtmsg_metric &&
1269 rtmsg->rtmsg_metric != rt->rt6i_metric)
1270 continue;
1271 dst_hold(&rt->u.dst);
1272 read_unlock_bh(&table->tb6_lock);
1274 return ip6_del_rt(rt, nlh, _rtattr, req);
1277 read_unlock_bh(&table->tb6_lock);
1279 return err;
1283 * Handle redirects
1285 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1286 struct neighbour *neigh, u8 *lladdr, int on_link)
1288 struct rt6_info *rt, *nrt = NULL;
1289 struct fib6_node *fn;
1290 struct fib6_table *table;
1291 struct netevent_redirect netevent;
1293 /* TODO: Very lazy, might need to check all tables */
1294 table = fib6_get_table(RT6_TABLE_MAIN);
1295 if (table == NULL)
1296 return;
1299 * Get the "current" route for this destination and
1300 * check if the redirect has come from approriate router.
1302 * RFC 2461 specifies that redirects should only be
1303 * accepted if they come from the nexthop to the target.
1304 * Due to the way the routes are chosen, this notion
1305 * is a bit fuzzy and one might need to check all possible
1306 * routes.
1309 read_lock_bh(&table->tb6_lock);
1310 fn = fib6_lookup(&table->tb6_root, dest, NULL);
1311 restart:
1312 for (rt = fn->leaf; rt; rt = rt->u.next) {
1314 * Current route is on-link; redirect is always invalid.
1316 * Seems, previous statement is not true. It could
1317 * be node, which looks for us as on-link (f.e. proxy ndisc)
1318 * But then router serving it might decide, that we should
1319 * know truth 8)8) --ANK (980726).
1321 if (rt6_check_expired(rt))
1322 continue;
1323 if (!(rt->rt6i_flags & RTF_GATEWAY))
1324 continue;
1325 if (neigh->dev != rt->rt6i_dev)
1326 continue;
1327 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1328 continue;
1329 break;
1331 if (rt)
1332 dst_hold(&rt->u.dst);
1333 else if (rt6_need_strict(dest)) {
1334 while ((fn = fn->parent) != NULL) {
1335 if (fn->fn_flags & RTN_ROOT)
1336 break;
1337 if (fn->fn_flags & RTN_RTINFO)
1338 goto restart;
1341 read_unlock_bh(&table->tb6_lock);
1343 if (!rt) {
1344 if (net_ratelimit())
1345 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1346 "for redirect target\n");
1347 return;
1351 * We have finally decided to accept it.
1354 neigh_update(neigh, lladdr, NUD_STALE,
1355 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1356 NEIGH_UPDATE_F_OVERRIDE|
1357 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1358 NEIGH_UPDATE_F_ISROUTER))
1362 * Redirect received -> path was valid.
1363 * Look, redirects are sent only in response to data packets,
1364 * so that this nexthop apparently is reachable. --ANK
1366 dst_confirm(&rt->u.dst);
1368 /* Duplicate redirect: silently ignore. */
1369 if (neigh == rt->u.dst.neighbour)
1370 goto out;
1372 nrt = ip6_rt_copy(rt);
1373 if (nrt == NULL)
1374 goto out;
1376 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1377 if (on_link)
1378 nrt->rt6i_flags &= ~RTF_GATEWAY;
1380 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1381 nrt->rt6i_dst.plen = 128;
1382 nrt->u.dst.flags |= DST_HOST;
1384 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1385 nrt->rt6i_nexthop = neigh_clone(neigh);
1386 /* Reset pmtu, it may be better */
1387 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1388 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1390 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1391 goto out;
1393 netevent.old = &rt->u.dst;
1394 netevent.new = &nrt->u.dst;
1395 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1397 if (rt->rt6i_flags&RTF_CACHE) {
1398 ip6_del_rt(rt, NULL, NULL, NULL);
1399 return;
1402 out:
1403 dst_release(&rt->u.dst);
1404 return;
1408 * Handle ICMP "packet too big" messages
1409 * i.e. Path MTU discovery
1412 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1413 struct net_device *dev, u32 pmtu)
1415 struct rt6_info *rt, *nrt;
1416 int allfrag = 0;
1418 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1419 if (rt == NULL)
1420 return;
1422 if (pmtu >= dst_mtu(&rt->u.dst))
1423 goto out;
1425 if (pmtu < IPV6_MIN_MTU) {
1427 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1428 * MTU (1280) and a fragment header should always be included
1429 * after a node receiving Too Big message reporting PMTU is
1430 * less than the IPv6 Minimum Link MTU.
1432 pmtu = IPV6_MIN_MTU;
1433 allfrag = 1;
1436 /* New mtu received -> path was valid.
1437 They are sent only in response to data packets,
1438 so that this nexthop apparently is reachable. --ANK
1440 dst_confirm(&rt->u.dst);
1442 /* Host route. If it is static, it would be better
1443 not to override it, but add new one, so that
1444 when cache entry will expire old pmtu
1445 would return automatically.
1447 if (rt->rt6i_flags & RTF_CACHE) {
1448 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1449 if (allfrag)
1450 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1451 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1452 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1453 goto out;
1456 /* Network route.
1457 Two cases are possible:
1458 1. It is connected route. Action: COW
1459 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1461 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1462 nrt = rt6_alloc_cow(rt, daddr, saddr);
1463 else
1464 nrt = rt6_alloc_clone(rt, daddr);
1466 if (nrt) {
1467 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1468 if (allfrag)
1469 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1471 /* According to RFC 1981, detecting PMTU increase shouldn't be
1472 * happened within 5 mins, the recommended timer is 10 mins.
1473 * Here this route expiration time is set to ip6_rt_mtu_expires
1474 * which is 10 mins. After 10 mins the decreased pmtu is expired
1475 * and detecting PMTU increase will be automatically happened.
1477 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1478 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1480 ip6_ins_rt(nrt, NULL, NULL, NULL);
1482 out:
1483 dst_release(&rt->u.dst);
1487 * Misc support functions
1490 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1492 struct rt6_info *rt = ip6_dst_alloc();
1494 if (rt) {
1495 rt->u.dst.input = ort->u.dst.input;
1496 rt->u.dst.output = ort->u.dst.output;
1498 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1499 rt->u.dst.dev = ort->u.dst.dev;
1500 if (rt->u.dst.dev)
1501 dev_hold(rt->u.dst.dev);
1502 rt->rt6i_idev = ort->rt6i_idev;
1503 if (rt->rt6i_idev)
1504 in6_dev_hold(rt->rt6i_idev);
1505 rt->u.dst.lastuse = jiffies;
1506 rt->rt6i_expires = 0;
1508 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1509 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1510 rt->rt6i_metric = 0;
1512 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1513 #ifdef CONFIG_IPV6_SUBTREES
1514 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1515 #endif
1516 rt->rt6i_table = ort->rt6i_table;
1518 return rt;
1521 #ifdef CONFIG_IPV6_ROUTE_INFO
1522 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1523 struct in6_addr *gwaddr, int ifindex)
1525 struct fib6_node *fn;
1526 struct rt6_info *rt = NULL;
1527 struct fib6_table *table;
1529 table = fib6_get_table(RT6_TABLE_INFO);
1530 if (table == NULL)
1531 return NULL;
1533 write_lock_bh(&table->tb6_lock);
1534 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1535 if (!fn)
1536 goto out;
1538 for (rt = fn->leaf; rt; rt = rt->u.next) {
1539 if (rt->rt6i_dev->ifindex != ifindex)
1540 continue;
1541 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1542 continue;
1543 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1544 continue;
1545 dst_hold(&rt->u.dst);
1546 break;
1548 out:
1549 write_unlock_bh(&table->tb6_lock);
1550 return rt;
1553 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1554 struct in6_addr *gwaddr, int ifindex,
1555 unsigned pref)
1557 struct in6_rtmsg rtmsg;
1559 memset(&rtmsg, 0, sizeof(rtmsg));
1560 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1561 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1562 rtmsg.rtmsg_dst_len = prefixlen;
1563 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1564 rtmsg.rtmsg_metric = 1024;
1565 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1566 /* We should treat it as a default route if prefix length is 0. */
1567 if (!prefixlen)
1568 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1569 rtmsg.rtmsg_ifindex = ifindex;
1571 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
1573 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1575 #endif
1577 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1579 struct rt6_info *rt;
1580 struct fib6_table *table;
1582 table = fib6_get_table(RT6_TABLE_DFLT);
1583 if (table == NULL)
1584 return NULL;
1586 write_lock_bh(&table->tb6_lock);
1587 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1588 if (dev == rt->rt6i_dev &&
1589 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1590 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1591 break;
1593 if (rt)
1594 dst_hold(&rt->u.dst);
1595 write_unlock_bh(&table->tb6_lock);
1596 return rt;
1599 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1600 struct net_device *dev,
1601 unsigned int pref)
1603 struct in6_rtmsg rtmsg;
1605 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1606 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1607 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1608 rtmsg.rtmsg_metric = 1024;
1609 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1610 RTF_PREF(pref);
1612 rtmsg.rtmsg_ifindex = dev->ifindex;
1614 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
1615 return rt6_get_dflt_router(gwaddr, dev);
1618 void rt6_purge_dflt_routers(void)
1620 struct rt6_info *rt;
1621 struct fib6_table *table;
1623 /* NOTE: Keep consistent with rt6_get_dflt_router */
1624 table = fib6_get_table(RT6_TABLE_DFLT);
1625 if (table == NULL)
1626 return;
1628 restart:
1629 read_lock_bh(&table->tb6_lock);
1630 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1631 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1632 dst_hold(&rt->u.dst);
1633 read_unlock_bh(&table->tb6_lock);
1634 ip6_del_rt(rt, NULL, NULL, NULL);
1635 goto restart;
1638 read_unlock_bh(&table->tb6_lock);
1641 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1643 struct in6_rtmsg rtmsg;
1644 int err;
1646 switch(cmd) {
1647 case SIOCADDRT: /* Add a route */
1648 case SIOCDELRT: /* Delete a route */
1649 if (!capable(CAP_NET_ADMIN))
1650 return -EPERM;
1651 err = copy_from_user(&rtmsg, arg,
1652 sizeof(struct in6_rtmsg));
1653 if (err)
1654 return -EFAULT;
1656 rtnl_lock();
1657 switch (cmd) {
1658 case SIOCADDRT:
1659 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1660 RT6_TABLE_MAIN);
1661 break;
1662 case SIOCDELRT:
1663 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1664 RT6_TABLE_MAIN);
1665 break;
1666 default:
1667 err = -EINVAL;
1669 rtnl_unlock();
1671 return err;
1674 return -EINVAL;
1678 * Drop the packet on the floor
1681 static int ip6_pkt_discard(struct sk_buff *skb)
1683 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1684 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1685 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1687 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1688 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1689 kfree_skb(skb);
1690 return 0;
1693 static int ip6_pkt_discard_out(struct sk_buff *skb)
1695 skb->dev = skb->dst->dev;
1696 return ip6_pkt_discard(skb);
1700 * Allocate a dst for local (unicast / anycast) address.
1703 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1704 const struct in6_addr *addr,
1705 int anycast)
1707 struct rt6_info *rt = ip6_dst_alloc();
1709 if (rt == NULL)
1710 return ERR_PTR(-ENOMEM);
1712 dev_hold(&loopback_dev);
1713 in6_dev_hold(idev);
1715 rt->u.dst.flags = DST_HOST;
1716 rt->u.dst.input = ip6_input;
1717 rt->u.dst.output = ip6_output;
1718 rt->rt6i_dev = &loopback_dev;
1719 rt->rt6i_idev = idev;
1720 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1721 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1722 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1723 rt->u.dst.obsolete = -1;
1725 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1726 if (anycast)
1727 rt->rt6i_flags |= RTF_ANYCAST;
1728 else
1729 rt->rt6i_flags |= RTF_LOCAL;
1730 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1731 if (rt->rt6i_nexthop == NULL) {
1732 dst_free((struct dst_entry *) rt);
1733 return ERR_PTR(-ENOMEM);
1736 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1737 rt->rt6i_dst.plen = 128;
1738 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1740 atomic_set(&rt->u.dst.__refcnt, 1);
1742 return rt;
1745 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1747 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1748 rt != &ip6_null_entry) {
1749 RT6_TRACE("deleted by ifdown %p\n", rt);
1750 return -1;
1752 return 0;
1755 void rt6_ifdown(struct net_device *dev)
1757 fib6_clean_all(fib6_ifdown, 0, dev);
1760 struct rt6_mtu_change_arg
1762 struct net_device *dev;
1763 unsigned mtu;
1766 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1768 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1769 struct inet6_dev *idev;
1771 /* In IPv6 pmtu discovery is not optional,
1772 so that RTAX_MTU lock cannot disable it.
1773 We still use this lock to block changes
1774 caused by addrconf/ndisc.
1777 idev = __in6_dev_get(arg->dev);
1778 if (idev == NULL)
1779 return 0;
1781 /* For administrative MTU increase, there is no way to discover
1782 IPv6 PMTU increase, so PMTU increase should be updated here.
1783 Since RFC 1981 doesn't include administrative MTU increase
1784 update PMTU increase is a MUST. (i.e. jumbo frame)
1787 If new MTU is less than route PMTU, this new MTU will be the
1788 lowest MTU in the path, update the route PMTU to reflect PMTU
1789 decreases; if new MTU is greater than route PMTU, and the
1790 old MTU is the lowest MTU in the path, update the route PMTU
1791 to reflect the increase. In this case if the other nodes' MTU
1792 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1793 PMTU discouvery.
1795 if (rt->rt6i_dev == arg->dev &&
1796 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1797 (dst_mtu(&rt->u.dst) > arg->mtu ||
1798 (dst_mtu(&rt->u.dst) < arg->mtu &&
1799 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1800 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1801 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1802 return 0;
1805 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1807 struct rt6_mtu_change_arg arg = {
1808 .dev = dev,
1809 .mtu = mtu,
1812 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1815 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1816 struct in6_rtmsg *rtmsg)
1818 memset(rtmsg, 0, sizeof(*rtmsg));
1820 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1821 rtmsg->rtmsg_src_len = r->rtm_src_len;
1822 rtmsg->rtmsg_flags = RTF_UP;
1823 if (r->rtm_type == RTN_UNREACHABLE)
1824 rtmsg->rtmsg_flags |= RTF_REJECT;
1826 if (rta[RTA_GATEWAY-1]) {
1827 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1828 return -EINVAL;
1829 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1830 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1832 if (rta[RTA_DST-1]) {
1833 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1834 return -EINVAL;
1835 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1837 if (rta[RTA_SRC-1]) {
1838 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1839 return -EINVAL;
1840 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1842 if (rta[RTA_OIF-1]) {
1843 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1844 return -EINVAL;
1845 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1847 if (rta[RTA_PRIORITY-1]) {
1848 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1849 return -EINVAL;
1850 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1852 return 0;
1855 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1857 struct rtmsg *r = NLMSG_DATA(nlh);
1858 struct in6_rtmsg rtmsg;
1860 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1861 return -EINVAL;
1862 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
1865 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1867 struct rtmsg *r = NLMSG_DATA(nlh);
1868 struct in6_rtmsg rtmsg;
1870 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1871 return -EINVAL;
1872 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
1875 struct rt6_rtnl_dump_arg
1877 struct sk_buff *skb;
1878 struct netlink_callback *cb;
1881 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1882 struct in6_addr *dst, struct in6_addr *src,
1883 int iif, int type, u32 pid, u32 seq,
1884 int prefix, unsigned int flags)
1886 struct rtmsg *rtm;
1887 struct nlmsghdr *nlh;
1888 unsigned char *b = skb->tail;
1889 struct rta_cacheinfo ci;
1891 if (prefix) { /* user wants prefix routes only */
1892 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1893 /* success since this is not a prefix route */
1894 return 1;
1898 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1899 rtm = NLMSG_DATA(nlh);
1900 rtm->rtm_family = AF_INET6;
1901 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1902 rtm->rtm_src_len = rt->rt6i_src.plen;
1903 rtm->rtm_tos = 0;
1904 if (rt->rt6i_table)
1905 rtm->rtm_table = rt->rt6i_table->tb6_id;
1906 else
1907 rtm->rtm_table = RT6_TABLE_UNSPEC;
1908 if (rt->rt6i_flags&RTF_REJECT)
1909 rtm->rtm_type = RTN_UNREACHABLE;
1910 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1911 rtm->rtm_type = RTN_LOCAL;
1912 else
1913 rtm->rtm_type = RTN_UNICAST;
1914 rtm->rtm_flags = 0;
1915 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1916 rtm->rtm_protocol = rt->rt6i_protocol;
1917 if (rt->rt6i_flags&RTF_DYNAMIC)
1918 rtm->rtm_protocol = RTPROT_REDIRECT;
1919 else if (rt->rt6i_flags & RTF_ADDRCONF)
1920 rtm->rtm_protocol = RTPROT_KERNEL;
1921 else if (rt->rt6i_flags&RTF_DEFAULT)
1922 rtm->rtm_protocol = RTPROT_RA;
1924 if (rt->rt6i_flags&RTF_CACHE)
1925 rtm->rtm_flags |= RTM_F_CLONED;
1927 if (dst) {
1928 RTA_PUT(skb, RTA_DST, 16, dst);
1929 rtm->rtm_dst_len = 128;
1930 } else if (rtm->rtm_dst_len)
1931 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1932 #ifdef CONFIG_IPV6_SUBTREES
1933 if (src) {
1934 RTA_PUT(skb, RTA_SRC, 16, src);
1935 rtm->rtm_src_len = 128;
1936 } else if (rtm->rtm_src_len)
1937 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1938 #endif
1939 if (iif)
1940 RTA_PUT(skb, RTA_IIF, 4, &iif);
1941 else if (dst) {
1942 struct in6_addr saddr_buf;
1943 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1944 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1946 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1947 goto rtattr_failure;
1948 if (rt->u.dst.neighbour)
1949 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1950 if (rt->u.dst.dev)
1951 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1952 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1953 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1954 if (rt->rt6i_expires)
1955 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1956 else
1957 ci.rta_expires = 0;
1958 ci.rta_used = rt->u.dst.__use;
1959 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1960 ci.rta_error = rt->u.dst.error;
1961 ci.rta_id = 0;
1962 ci.rta_ts = 0;
1963 ci.rta_tsage = 0;
1964 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1965 nlh->nlmsg_len = skb->tail - b;
1966 return skb->len;
1968 nlmsg_failure:
1969 rtattr_failure:
1970 skb_trim(skb, b - skb->data);
1971 return -1;
1974 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1976 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1977 int prefix;
1979 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1980 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1981 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1982 } else
1983 prefix = 0;
1985 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1986 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1987 prefix, NLM_F_MULTI);
1990 static int fib6_dump_node(struct fib6_walker_t *w)
1992 int res;
1993 struct rt6_info *rt;
1995 for (rt = w->leaf; rt; rt = rt->u.next) {
1996 res = rt6_dump_route(rt, w->args);
1997 if (res < 0) {
1998 /* Frame is full, suspend walking */
1999 w->leaf = rt;
2000 return 1;
2002 BUG_TRAP(res!=0);
2004 w->leaf = NULL;
2005 return 0;
2008 static void fib6_dump_end(struct netlink_callback *cb)
2010 struct fib6_walker_t *w = (void*)cb->args[0];
2012 if (w) {
2013 cb->args[0] = 0;
2014 kfree(w);
2016 cb->done = (void*)cb->args[1];
2017 cb->args[1] = 0;
2020 static int fib6_dump_done(struct netlink_callback *cb)
2022 fib6_dump_end(cb);
2023 return cb->done ? cb->done(cb) : 0;
2026 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
2028 struct fib6_table *table;
2029 struct rt6_rtnl_dump_arg arg;
2030 struct fib6_walker_t *w;
2031 int i, res = 0;
2033 arg.skb = skb;
2034 arg.cb = cb;
2037 * cb->args[0] = pointer to walker structure
2038 * cb->args[1] = saved cb->done() pointer
2039 * cb->args[2] = current table being dumped
2042 w = (void*)cb->args[0];
2043 if (w == NULL) {
2044 /* New dump:
2046 * 1. hook callback destructor.
2048 cb->args[1] = (long)cb->done;
2049 cb->done = fib6_dump_done;
2052 * 2. allocate and initialize walker.
2054 w = kzalloc(sizeof(*w), GFP_ATOMIC);
2055 if (w == NULL)
2056 return -ENOMEM;
2057 w->func = fib6_dump_node;
2058 w->args = &arg;
2059 cb->args[0] = (long)w;
2060 cb->args[2] = FIB6_TABLE_MIN;
2061 } else {
2062 w->args = &arg;
2063 i = cb->args[2];
2064 if (i > FIB6_TABLE_MAX)
2065 goto end;
2067 table = fib6_get_table(i);
2068 if (table != NULL) {
2069 read_lock_bh(&table->tb6_lock);
2070 w->root = &table->tb6_root;
2071 res = fib6_walk_continue(w);
2072 read_unlock_bh(&table->tb6_lock);
2073 if (res != 0) {
2074 if (res < 0)
2075 fib6_walker_unlink(w);
2076 goto end;
2080 fib6_walker_unlink(w);
2081 cb->args[2] = ++i;
2084 for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
2085 table = fib6_get_table(i);
2086 if (table == NULL)
2087 continue;
2089 read_lock_bh(&table->tb6_lock);
2090 w->root = &table->tb6_root;
2091 res = fib6_walk(w);
2092 read_unlock_bh(&table->tb6_lock);
2093 if (res)
2094 break;
2096 end:
2097 cb->args[2] = i;
2099 res = res < 0 ? res : skb->len;
2100 /* res < 0 is an error. (really, impossible)
2101 res == 0 means that dump is complete, but skb still can contain data.
2102 res > 0 dump is not complete, but frame is full.
2104 /* Destroy walker, if dump of this table is complete. */
2105 if (res <= 0)
2106 fib6_dump_end(cb);
2107 return res;
2110 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2112 struct rtattr **rta = arg;
2113 int iif = 0;
2114 int err = -ENOBUFS;
2115 struct sk_buff *skb;
2116 struct flowi fl;
2117 struct rt6_info *rt;
2119 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2120 if (skb == NULL)
2121 goto out;
2123 /* Reserve room for dummy headers, this skb can pass
2124 through good chunk of routing engine.
2126 skb->mac.raw = skb->data;
2127 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2129 memset(&fl, 0, sizeof(fl));
2130 if (rta[RTA_SRC-1])
2131 ipv6_addr_copy(&fl.fl6_src,
2132 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2133 if (rta[RTA_DST-1])
2134 ipv6_addr_copy(&fl.fl6_dst,
2135 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2137 if (rta[RTA_IIF-1])
2138 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2140 if (iif) {
2141 struct net_device *dev;
2142 dev = __dev_get_by_index(iif);
2143 if (!dev) {
2144 err = -ENODEV;
2145 goto out_free;
2149 fl.oif = 0;
2150 if (rta[RTA_OIF-1])
2151 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2153 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2155 skb->dst = &rt->u.dst;
2157 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2158 err = rt6_fill_node(skb, rt,
2159 &fl.fl6_dst, &fl.fl6_src,
2160 iif,
2161 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2162 nlh->nlmsg_seq, 0, 0);
2163 if (err < 0) {
2164 err = -EMSGSIZE;
2165 goto out_free;
2168 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
2169 if (err > 0)
2170 err = 0;
2171 out:
2172 return err;
2173 out_free:
2174 kfree_skb(skb);
2175 goto out;
2178 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2179 struct netlink_skb_parms *req)
2181 struct sk_buff *skb;
2182 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
2183 u32 pid = current->pid;
2184 u32 seq = 0;
2186 if (req)
2187 pid = req->pid;
2188 if (nlh)
2189 seq = nlh->nlmsg_seq;
2191 skb = alloc_skb(size, gfp_any());
2192 if (!skb) {
2193 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
2194 return;
2196 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
2197 kfree_skb(skb);
2198 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
2199 return;
2201 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2202 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
2206 * /proc
2209 #ifdef CONFIG_PROC_FS
2211 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2213 struct rt6_proc_arg
2215 char *buffer;
2216 int offset;
2217 int length;
2218 int skip;
2219 int len;
2222 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2224 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2225 int i;
2227 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2228 arg->skip++;
2229 return 0;
2232 if (arg->len >= arg->length)
2233 return 0;
2235 for (i=0; i<16; i++) {
2236 sprintf(arg->buffer + arg->len, "%02x",
2237 rt->rt6i_dst.addr.s6_addr[i]);
2238 arg->len += 2;
2240 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2241 rt->rt6i_dst.plen);
2243 #ifdef CONFIG_IPV6_SUBTREES
2244 for (i=0; i<16; i++) {
2245 sprintf(arg->buffer + arg->len, "%02x",
2246 rt->rt6i_src.addr.s6_addr[i]);
2247 arg->len += 2;
2249 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2250 rt->rt6i_src.plen);
2251 #else
2252 sprintf(arg->buffer + arg->len,
2253 "00000000000000000000000000000000 00 ");
2254 arg->len += 36;
2255 #endif
2257 if (rt->rt6i_nexthop) {
2258 for (i=0; i<16; i++) {
2259 sprintf(arg->buffer + arg->len, "%02x",
2260 rt->rt6i_nexthop->primary_key[i]);
2261 arg->len += 2;
2263 } else {
2264 sprintf(arg->buffer + arg->len,
2265 "00000000000000000000000000000000");
2266 arg->len += 32;
2268 arg->len += sprintf(arg->buffer + arg->len,
2269 " %08x %08x %08x %08x %8s\n",
2270 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2271 rt->u.dst.__use, rt->rt6i_flags,
2272 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2273 return 0;
2276 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2278 struct rt6_proc_arg arg = {
2279 .buffer = buffer,
2280 .offset = offset,
2281 .length = length,
2284 fib6_clean_all(rt6_info_route, 0, &arg);
2286 *start = buffer;
2287 if (offset)
2288 *start += offset % RT6_INFO_LEN;
2290 arg.len -= offset % RT6_INFO_LEN;
2292 if (arg.len > length)
2293 arg.len = length;
2294 if (arg.len < 0)
2295 arg.len = 0;
2297 return arg.len;
2300 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2302 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2303 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2304 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2305 rt6_stats.fib_rt_cache,
2306 atomic_read(&ip6_dst_ops.entries),
2307 rt6_stats.fib_discarded_routes);
2309 return 0;
2312 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2314 return single_open(file, rt6_stats_seq_show, NULL);
2317 static struct file_operations rt6_stats_seq_fops = {
2318 .owner = THIS_MODULE,
2319 .open = rt6_stats_seq_open,
2320 .read = seq_read,
2321 .llseek = seq_lseek,
2322 .release = single_release,
2324 #endif /* CONFIG_PROC_FS */
2326 #ifdef CONFIG_SYSCTL
2328 static int flush_delay;
2330 static
2331 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2332 void __user *buffer, size_t *lenp, loff_t *ppos)
2334 if (write) {
2335 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2336 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2337 return 0;
2338 } else
2339 return -EINVAL;
2342 ctl_table ipv6_route_table[] = {
2344 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2345 .procname = "flush",
2346 .data = &flush_delay,
2347 .maxlen = sizeof(int),
2348 .mode = 0200,
2349 .proc_handler = &ipv6_sysctl_rtcache_flush
2352 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2353 .procname = "gc_thresh",
2354 .data = &ip6_dst_ops.gc_thresh,
2355 .maxlen = sizeof(int),
2356 .mode = 0644,
2357 .proc_handler = &proc_dointvec,
2360 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2361 .procname = "max_size",
2362 .data = &ip6_rt_max_size,
2363 .maxlen = sizeof(int),
2364 .mode = 0644,
2365 .proc_handler = &proc_dointvec,
2368 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2369 .procname = "gc_min_interval",
2370 .data = &ip6_rt_gc_min_interval,
2371 .maxlen = sizeof(int),
2372 .mode = 0644,
2373 .proc_handler = &proc_dointvec_jiffies,
2374 .strategy = &sysctl_jiffies,
2377 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2378 .procname = "gc_timeout",
2379 .data = &ip6_rt_gc_timeout,
2380 .maxlen = sizeof(int),
2381 .mode = 0644,
2382 .proc_handler = &proc_dointvec_jiffies,
2383 .strategy = &sysctl_jiffies,
2386 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2387 .procname = "gc_interval",
2388 .data = &ip6_rt_gc_interval,
2389 .maxlen = sizeof(int),
2390 .mode = 0644,
2391 .proc_handler = &proc_dointvec_jiffies,
2392 .strategy = &sysctl_jiffies,
2395 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2396 .procname = "gc_elasticity",
2397 .data = &ip6_rt_gc_elasticity,
2398 .maxlen = sizeof(int),
2399 .mode = 0644,
2400 .proc_handler = &proc_dointvec_jiffies,
2401 .strategy = &sysctl_jiffies,
2404 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2405 .procname = "mtu_expires",
2406 .data = &ip6_rt_mtu_expires,
2407 .maxlen = sizeof(int),
2408 .mode = 0644,
2409 .proc_handler = &proc_dointvec_jiffies,
2410 .strategy = &sysctl_jiffies,
2413 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2414 .procname = "min_adv_mss",
2415 .data = &ip6_rt_min_advmss,
2416 .maxlen = sizeof(int),
2417 .mode = 0644,
2418 .proc_handler = &proc_dointvec_jiffies,
2419 .strategy = &sysctl_jiffies,
2422 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2423 .procname = "gc_min_interval_ms",
2424 .data = &ip6_rt_gc_min_interval,
2425 .maxlen = sizeof(int),
2426 .mode = 0644,
2427 .proc_handler = &proc_dointvec_ms_jiffies,
2428 .strategy = &sysctl_ms_jiffies,
2430 { .ctl_name = 0 }
2433 #endif
2435 void __init ip6_route_init(void)
2437 struct proc_dir_entry *p;
2439 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2440 sizeof(struct rt6_info),
2441 0, SLAB_HWCACHE_ALIGN,
2442 NULL, NULL);
2443 if (!ip6_dst_ops.kmem_cachep)
2444 panic("cannot create ip6_dst_cache");
2446 fib6_init();
2447 #ifdef CONFIG_PROC_FS
2448 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2449 if (p)
2450 p->owner = THIS_MODULE;
2452 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2453 #endif
2454 #ifdef CONFIG_XFRM
2455 xfrm6_init();
2456 #endif
2457 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2458 fib6_rules_init();
2459 #endif
2462 void ip6_route_cleanup(void)
2464 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2465 fib6_rules_cleanup();
2466 #endif
2467 #ifdef CONFIG_PROC_FS
2468 proc_net_remove("ipv6_route");
2469 proc_net_remove("rt6_stats");
2470 #endif
2471 #ifdef CONFIG_XFRM
2472 xfrm6_fini();
2473 #endif
2474 rt6_ifdown(NULL);
2475 fib6_gc_cleanup();
2476 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);