Import 2.4.0-test5pre2
[davej-history.git] / net / ipv6 / route.c
blobdc5ddffd8ba3a5fc2c08bfd2bc6513100e0d3642
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.46 2000/07/07 22:40:35 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/config.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/net.h>
22 #include <linux/route.h>
23 #include <linux/netdevice.h>
24 #include <linux/in6.h>
25 #include <linux/init.h>
26 #include <linux/netlink.h>
27 #include <linux/if_arp.h>
29 #ifdef CONFIG_PROC_FS
30 #include <linux/proc_fs.h>
31 #endif
33 #include <net/snmp.h>
34 #include <net/ipv6.h>
35 #include <net/ip6_fib.h>
36 #include <net/ip6_route.h>
37 #include <net/ndisc.h>
38 #include <net/addrconf.h>
39 #include <net/tcp.h>
40 #include <linux/rtnetlink.h>
42 #include <asm/uaccess.h>
44 #ifdef CONFIG_SYSCTL
45 #include <linux/sysctl.h>
46 #endif
48 #undef CONFIG_RT6_POLICY
50 /* Set to 3 to get tracing. */
51 #define RT6_DEBUG 2
53 #if RT6_DEBUG >= 3
54 #define RDBG(x) printk x
55 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
56 #else
57 #define RDBG(x)
58 #define RT6_TRACE(x...) do { ; } while (0)
59 #endif
62 int ip6_rt_max_size = 4096;
63 int ip6_rt_gc_min_interval = 5*HZ;
64 int ip6_rt_gc_timeout = 60*HZ;
65 int ip6_rt_gc_interval = 30*HZ;
66 int ip6_rt_gc_elasticity = 9;
67 int ip6_rt_mtu_expires = 10*60*HZ;
68 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
70 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
71 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
72 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
73 struct sk_buff *skb);
74 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
75 static int ip6_dst_gc(void);
77 static int ip6_pkt_discard(struct sk_buff *skb);
78 static void ip6_link_failure(struct sk_buff *skb);
80 struct dst_ops ip6_dst_ops = {
81 AF_INET6,
82 __constant_htons(ETH_P_IPV6),
83 1024,
85 ip6_dst_gc,
86 ip6_dst_check,
87 ip6_dst_reroute,
88 NULL,
89 ip6_negative_advice,
90 ip6_link_failure,
91 sizeof(struct rt6_info),
94 struct rt6_info ip6_null_entry = {
95 {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
96 -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 -ENETUNREACH, NULL, NULL,
98 ip6_pkt_discard, ip6_pkt_discard,
99 #ifdef CONFIG_NET_CLS_ROUTE
101 #endif
102 &ip6_dst_ops}},
103 NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
104 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
107 struct fib6_node ip6_routing_table = {
108 NULL, NULL, NULL, NULL,
109 &ip6_null_entry,
110 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
113 #ifdef CONFIG_RT6_POLICY
114 int ip6_rt_policy = 0;
116 struct pol_chain *rt6_pol_list = NULL;
119 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
120 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
122 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
123 struct in6_addr *daddr,
124 struct in6_addr *saddr,
125 struct fl_acc_args *args);
127 #else
128 #define ip6_rt_policy (0)
129 #endif
131 /* Protects all the ip6 fib */
133 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
137 * Route lookup. Any rt6_lock is implied.
140 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
141 int oif,
142 int strict)
144 struct rt6_info *local = NULL;
145 struct rt6_info *sprt;
147 if (oif) {
148 for (sprt = rt; sprt; sprt = sprt->u.next) {
149 struct net_device *dev = sprt->rt6i_dev;
150 if (dev->ifindex == oif)
151 return sprt;
152 if (dev->flags&IFF_LOOPBACK)
153 local = sprt;
156 if (local)
157 return local;
159 if (strict)
160 return &ip6_null_entry;
162 return rt;
166 * pointer to the last default router chosen. BH is disabled locally.
168 static struct rt6_info *rt6_dflt_pointer = NULL;
169 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
171 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
173 struct rt6_info *match = NULL;
174 struct rt6_info *sprt;
175 int mpri = 0;
177 for (sprt = rt; sprt; sprt = sprt->u.next) {
178 struct neighbour *neigh;
180 if ((neigh = sprt->rt6i_nexthop) != NULL) {
181 int m = -1;
183 switch (neigh->nud_state) {
184 case NUD_REACHABLE:
185 if (sprt != rt6_dflt_pointer) {
186 rt = sprt;
187 goto out;
189 m = 2;
190 break;
192 case NUD_DELAY:
193 m = 1;
194 break;
196 case NUD_STALE:
197 m = 1;
198 break;
201 if (oif && sprt->rt6i_dev->ifindex == oif) {
202 m += 2;
205 if (m >= mpri) {
206 mpri = m;
207 match = sprt;
212 if (match) {
213 rt = match;
214 } else {
216 * No default routers are known to be reachable.
217 * SHOULD round robin
219 spin_lock(&rt6_dflt_lock);
220 if (rt6_dflt_pointer) {
221 struct rt6_info *next;
223 if ((next = rt6_dflt_pointer->u.next) != NULL &&
224 next->u.dst.obsolete <= 0 &&
225 next->u.dst.error == 0)
226 rt = next;
228 spin_unlock(&rt6_dflt_lock);
231 out:
232 spin_lock(&rt6_dflt_lock);
233 rt6_dflt_pointer = rt;
234 spin_unlock(&rt6_dflt_lock);
235 return rt;
238 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
239 int oif, int strict)
241 struct fib6_node *fn;
242 struct rt6_info *rt;
244 read_lock_bh(&rt6_lock);
245 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
246 rt = rt6_device_match(fn->leaf, oif, strict);
247 dst_hold(&rt->u.dst);
248 rt->u.dst.__use++;
249 read_unlock_bh(&rt6_lock);
251 rt->u.dst.lastuse = jiffies;
252 if (rt->u.dst.error == 0)
253 return rt;
254 dst_release(&rt->u.dst);
255 return NULL;
258 /* rt6_ins is called with FREE rt6_lock.
259 It takes new route entry, the addition fails by any reason the
260 route is freed. In any case, if caller does not hold it, it may
261 be destroyed.
264 static int rt6_ins(struct rt6_info *rt)
266 int err;
268 write_lock_bh(&rt6_lock);
269 err = fib6_add(&ip6_routing_table, rt);
270 write_unlock_bh(&rt6_lock);
272 return err;
275 /* No rt6_lock! If COW faild, the function returns dead route entry
276 with dst->error set to errno value.
279 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
280 struct in6_addr *saddr)
282 int err;
283 struct rt6_info *rt;
286 * Clone the route.
289 rt = ip6_rt_copy(ort);
291 if (rt) {
292 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
294 if (!(rt->rt6i_flags&RTF_GATEWAY))
295 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
297 rt->rt6i_dst.plen = 128;
298 rt->rt6i_flags |= RTF_CACHE;
299 rt->u.dst.flags |= DST_HOST;
301 #ifdef CONFIG_IPV6_SUBTREES
302 if (rt->rt6i_src.plen && saddr) {
303 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
304 rt->rt6i_src.plen = 128;
306 #endif
308 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
310 dst_clone(&rt->u.dst);
312 err = rt6_ins(rt);
313 if (err == 0)
314 return rt;
316 rt->u.dst.error = err;
318 return rt;
320 dst_clone(&ip6_null_entry.u.dst);
321 return &ip6_null_entry;
324 #ifdef CONFIG_RT6_POLICY
325 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
326 struct sk_buff *skb)
328 struct in6_addr *daddr, *saddr;
329 struct fl_acc_args arg;
331 arg.type = FL_ARG_FORWARD;
332 arg.fl_u.skb = skb;
334 saddr = &skb->nh.ipv6h->saddr;
335 daddr = &skb->nh.ipv6h->daddr;
337 return rt6_flow_lookup(rt, daddr, saddr, &arg);
340 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
341 struct sock *sk,
342 struct flowi *fl)
344 struct fl_acc_args arg;
346 arg.type = FL_ARG_ORIGIN;
347 arg.fl_u.fl_o.sk = sk;
348 arg.fl_u.fl_o.flow = fl;
350 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
351 &arg);
354 #endif
356 #define BACKTRACK() \
357 if (rt == &ip6_null_entry && strict) { \
358 while ((fn = fn->parent) != NULL) { \
359 if (fn->fn_flags & RTN_ROOT) { \
360 dst_clone(&rt->u.dst); \
361 goto out; \
363 if (fn->fn_flags & RTN_RTINFO) \
364 goto restart; \
369 void ip6_route_input(struct sk_buff *skb)
371 struct fib6_node *fn;
372 struct rt6_info *rt;
373 int strict;
374 int attempts = 3;
376 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
378 relookup:
379 read_lock_bh(&rt6_lock);
381 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
382 &skb->nh.ipv6h->saddr);
384 restart:
385 rt = fn->leaf;
387 if ((rt->rt6i_flags & RTF_CACHE)) {
388 if (ip6_rt_policy == 0) {
389 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
390 BACKTRACK();
391 dst_clone(&rt->u.dst);
392 goto out;
395 #ifdef CONFIG_RT6_POLICY
396 if ((rt->rt6i_flags & RTF_FLOW)) {
397 struct rt6_info *sprt;
399 for (sprt = rt; sprt; sprt = sprt->u.next) {
400 if (rt6_flow_match_in(sprt, skb)) {
401 rt = sprt;
402 dst_clone(&rt->u.dst);
403 goto out;
407 #endif
410 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
411 BACKTRACK();
413 if (ip6_rt_policy == 0) {
414 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
415 read_unlock_bh(&rt6_lock);
417 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
418 &skb->nh.ipv6h->saddr);
420 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
421 goto out2;
422 /* Race condition! In the gap, when rt6_lock was
423 released someone could insert this route. Relookup.
425 goto relookup;
427 dst_clone(&rt->u.dst);
428 } else {
429 #ifdef CONFIG_RT6_POLICY
430 rt = rt6_flow_lookup_in(rt, skb);
431 #else
432 /* NEVER REACHED */
433 #endif
436 out:
437 read_unlock_bh(&rt6_lock);
438 out2:
439 rt->u.dst.lastuse = jiffies;
440 rt->u.dst.__use++;
441 skb->dst = (struct dst_entry *) rt;
444 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
446 struct fib6_node *fn;
447 struct rt6_info *rt;
448 int strict;
449 int attempts = 3;
451 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
453 relookup:
454 read_lock_bh(&rt6_lock);
456 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
457 fl->nl_u.ip6_u.saddr);
459 restart:
460 rt = fn->leaf;
462 if ((rt->rt6i_flags & RTF_CACHE)) {
463 if (ip6_rt_policy == 0) {
464 rt = rt6_device_match(rt, fl->oif, strict);
465 BACKTRACK();
466 dst_clone(&rt->u.dst);
467 goto out;
470 #ifdef CONFIG_RT6_POLICY
471 if ((rt->rt6i_flags & RTF_FLOW)) {
472 struct rt6_info *sprt;
474 for (sprt = rt; sprt; sprt = sprt->u.next) {
475 if (rt6_flow_match_out(sprt, sk)) {
476 rt = sprt;
477 dst_clone(&rt->u.dst);
478 goto out;
482 #endif
484 if (rt->rt6i_flags & RTF_DEFAULT) {
485 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
486 rt = rt6_best_dflt(rt, fl->oif);
487 } else {
488 rt = rt6_device_match(rt, fl->oif, strict);
489 BACKTRACK();
492 if (ip6_rt_policy == 0) {
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 read_unlock_bh(&rt6_lock);
496 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
497 fl->nl_u.ip6_u.saddr);
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500 goto out2;
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
505 goto relookup;
507 dst_clone(&rt->u.dst);
508 } else {
509 #ifdef CONFIG_RT6_POLICY
510 rt = rt6_flow_lookup_out(rt, sk, fl);
511 #else
512 /* NEVER REACHED */
513 #endif
516 out:
517 read_unlock_bh(&rt6_lock);
518 out2:
519 rt->u.dst.lastuse = jiffies;
520 rt->u.dst.__use++;
521 return &rt->u.dst;
526 * Destination cache support functions
529 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
531 struct rt6_info *rt;
533 rt = (struct rt6_info *) dst;
535 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
536 return dst;
538 dst_release(dst);
539 return NULL;
542 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
545 * FIXME
547 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
548 __builtin_return_address(0)));
549 return NULL;
552 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
554 struct rt6_info *rt = (struct rt6_info *) dst;
556 if (rt) {
557 if (rt->rt6i_flags & RTF_CACHE)
558 ip6_del_rt(rt);
559 else
560 dst_release(dst);
562 return NULL;
565 static void ip6_link_failure(struct sk_buff *skb)
567 struct rt6_info *rt;
569 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
571 rt = (struct rt6_info *) skb->dst;
572 if (rt) {
573 if (rt->rt6i_flags&RTF_CACHE) {
574 dst_set_expires(&rt->u.dst, 0);
575 rt->rt6i_flags |= RTF_EXPIRES;
576 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
577 rt->rt6i_node->fn_sernum = -1;
581 static int ip6_dst_gc()
583 static unsigned expire = 30*HZ;
584 static unsigned long last_gc;
585 unsigned long now = jiffies;
587 if ((long)(now - last_gc) < ip6_rt_gc_min_interval &&
588 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
589 goto out;
591 expire++;
592 fib6_run_gc(expire);
593 last_gc = now;
594 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
595 expire = ip6_rt_gc_timeout>>1;
597 out:
598 expire -= expire>>ip6_rt_gc_elasticity;
599 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
602 /* Clean host part of a prefix. Not necessary in radix tree,
603 but results in cleaner routing tables.
605 Remove it only when all the things will work!
608 static void ipv6_wash_prefix(struct in6_addr *pfx, int plen)
610 int b = plen&0x7;
611 int o = (plen + 7)>>3;
613 if (o < 16)
614 memset(pfx->s6_addr + o, 0, 16 - o);
615 if (b != 0)
616 pfx->s6_addr[plen>>3] &= (0xFF<<(8-b));
619 static int ipv6_get_mtu(struct net_device *dev)
621 int mtu = IPV6_MIN_MTU;
622 struct inet6_dev *idev;
624 idev = in6_dev_get(dev);
625 if (idev) {
626 mtu = idev->cnf.mtu6;
627 in6_dev_put(idev);
629 return mtu;
632 static int ipv6_get_hoplimit(struct net_device *dev)
634 int hoplimit = ipv6_devconf.hop_limit;
635 struct inet6_dev *idev;
637 idev = in6_dev_get(dev);
638 if (idev) {
639 hoplimit = idev->cnf.hop_limit;
640 in6_dev_put(idev);
642 return hoplimit;
649 int ip6_route_add(struct in6_rtmsg *rtmsg)
651 int err;
652 struct rt6_info *rt;
653 struct net_device *dev = NULL;
654 int addr_type;
656 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
657 return -EINVAL;
658 #ifndef CONFIG_IPV6_SUBTREES
659 if (rtmsg->rtmsg_src_len)
660 return -EINVAL;
661 #endif
662 if (rtmsg->rtmsg_metric == 0)
663 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
665 rt = dst_alloc(&ip6_dst_ops);
667 if (rt == NULL)
668 return -ENOMEM;
670 rt->u.dst.obsolete = -1;
671 rt->rt6i_expires = rtmsg->rtmsg_info;
673 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
675 if (addr_type & IPV6_ADDR_MULTICAST)
676 rt->u.dst.input = ip6_mc_input;
677 else
678 rt->u.dst.input = ip6_forward;
680 rt->u.dst.output = ip6_output;
682 if (rtmsg->rtmsg_ifindex) {
683 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
684 err = -ENODEV;
685 if (dev == NULL)
686 goto out;
689 ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst);
690 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
691 if (rt->rt6i_dst.plen == 128)
692 rt->u.dst.flags = DST_HOST;
693 ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen);
695 #ifdef CONFIG_IPV6_SUBTREES
696 ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src);
697 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
698 ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen);
699 #endif
701 rt->rt6i_metric = rtmsg->rtmsg_metric;
703 /* We cannot add true routes via loopback here,
704 they would result in kernel looping; promote them to reject routes
706 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
707 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
708 if (dev)
709 dev_put(dev);
710 dev = &loopback_dev;
711 dev_hold(dev);
712 rt->u.dst.output = ip6_pkt_discard;
713 rt->u.dst.input = ip6_pkt_discard;
714 rt->u.dst.error = -ENETUNREACH;
715 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
716 goto install_route;
719 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
720 struct in6_addr *gw_addr;
721 int gwa_type;
723 gw_addr = &rtmsg->rtmsg_gateway;
724 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
725 gwa_type = ipv6_addr_type(gw_addr);
727 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
728 struct rt6_info *grt;
730 /* IPv6 strictly inhibits using not link-local
731 addresses as nexthop address.
732 Otherwise, router will not able to send redirects.
733 It is very good, but in some (rare!) curcumstances
734 (SIT, PtP, NBMA NOARP links) it is handy to allow
735 some exceptions. --ANK
737 err = -EINVAL;
738 if (!(gwa_type&IPV6_ADDR_UNICAST))
739 goto out;
741 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
743 err = -EHOSTUNREACH;
744 if (grt == NULL)
745 goto out;
746 if (dev) {
747 if (dev != grt->rt6i_dev) {
748 dst_release(&grt->u.dst);
749 goto out;
751 } else {
752 dev = grt->rt6i_dev;
753 dev_hold(dev);
755 if (!(grt->rt6i_flags&RTF_GATEWAY))
756 err = 0;
757 dst_release(&grt->u.dst);
759 if (err)
760 goto out;
762 err = -EINVAL;
763 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
764 goto out;
767 err = -ENODEV;
768 if (dev == NULL)
769 goto out;
771 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
772 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
773 if (IS_ERR(rt->rt6i_nexthop)) {
774 err = PTR_ERR(rt->rt6i_nexthop);
775 rt->rt6i_nexthop = NULL;
776 goto out;
780 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
781 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
782 else
783 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
784 rt->rt6i_flags = rtmsg->rtmsg_flags;
786 install_route:
787 rt->u.dst.pmtu = ipv6_get_mtu(dev);
788 rt->u.dst.advmss = max(rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
789 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
790 MSS is 65535 - tcp_header_size. 65535 is also valid and
791 means: "any MSS, rely only on pmtu discovery"
793 if (rt->u.dst.advmss > 65535-20)
794 rt->u.dst.advmss = 65535;
795 rt->u.dst.dev = dev;
796 return rt6_ins(rt);
798 out:
799 if (dev)
800 dev_put(dev);
801 dst_free((struct dst_entry *) rt);
802 return err;
805 int ip6_del_rt(struct rt6_info *rt)
807 int err;
809 write_lock_bh(&rt6_lock);
811 spin_lock_bh(&rt6_dflt_lock);
812 rt6_dflt_pointer = NULL;
813 spin_unlock_bh(&rt6_dflt_lock);
815 dst_release(&rt->u.dst);
817 err = fib6_del(rt);
818 write_unlock_bh(&rt6_lock);
820 return err;
823 int ip6_route_del(struct in6_rtmsg *rtmsg)
825 struct fib6_node *fn;
826 struct rt6_info *rt;
827 int err = -ESRCH;
829 read_lock_bh(&rt6_lock);
831 fn = fib6_locate(&ip6_routing_table,
832 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
833 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
835 if (fn) {
836 for (rt = fn->leaf; rt; rt = rt->u.next) {
837 if (rtmsg->rtmsg_ifindex &&
838 (rt->rt6i_dev == NULL ||
839 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
840 continue;
841 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
842 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
843 continue;
844 if (rtmsg->rtmsg_metric &&
845 rtmsg->rtmsg_metric != rt->rt6i_metric)
846 continue;
847 dst_clone(&rt->u.dst);
848 read_unlock_bh(&rt6_lock);
850 return ip6_del_rt(rt);
853 read_unlock_bh(&rt6_lock);
855 return err;
859 * Handle redirects
861 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
862 struct neighbour *neigh, int on_link)
864 struct rt6_info *rt, *nrt;
866 /* Locate old route to this destination. */
867 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
869 if (rt == NULL)
870 return;
872 if (neigh->dev != rt->rt6i_dev)
873 goto out;
875 /* Redirect received -> path was valid.
876 Look, redirects are sent only in response to data packets,
877 so that this nexthop apparently is reachable. --ANK
879 dst_confirm(&rt->u.dst);
881 /* Duplicate redirect: silently ignore. */
882 if (neigh == rt->u.dst.neighbour)
883 goto out;
885 /* Current route is on-link; redirect is always invalid.
887 Seems, previous statement is not true. It could
888 be node, which looks for us as on-link (f.e. proxy ndisc)
889 But then router serving it might decide, that we should
890 know truth 8)8) --ANK (980726).
892 if (!(rt->rt6i_flags&RTF_GATEWAY))
893 goto out;
895 #if !defined(CONFIG_IPV6_EUI64) || defined(CONFIG_IPV6_NO_PB)
897 * During transition gateways have more than
898 * one link local address. Certainly, it is violation
899 * of basic principles, but it is temporary.
902 * RFC 1970 specifies that redirects should only be
903 * accepted if they come from the nexthop to the target.
904 * Due to the way default routers are chosen, this notion
905 * is a bit fuzzy and one might need to check all default
906 * routers.
909 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
910 if (rt->rt6i_flags & RTF_DEFAULT) {
911 struct rt6_info *rt1;
913 read_lock(&rt6_lock);
914 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
915 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
916 dst_clone(&rt1->u.dst);
917 dst_release(&rt->u.dst);
918 read_unlock(&rt6_lock);
919 rt = rt1;
920 goto source_ok;
923 read_unlock(&rt6_lock);
925 if (net_ratelimit())
926 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
927 "for redirect target\n");
928 goto out;
931 source_ok:
932 #endif
935 * We have finally decided to accept it.
938 nrt = ip6_rt_copy(rt);
939 if (nrt == NULL)
940 goto out;
942 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
943 if (on_link)
944 nrt->rt6i_flags &= ~RTF_GATEWAY;
946 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
947 nrt->rt6i_dst.plen = 128;
948 nrt->u.dst.flags |= DST_HOST;
950 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
951 nrt->rt6i_nexthop = neigh_clone(neigh);
952 /* Reset pmtu, it may be better */
953 nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
954 nrt->u.dst.advmss = max(nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
955 if (rt->u.dst.advmss > 65535-20)
956 rt->u.dst.advmss = 65535;
957 nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
959 if (rt6_ins(nrt))
960 goto out;
962 if (rt->rt6i_flags&RTF_CACHE) {
963 ip6_del_rt(rt);
964 return;
967 out:
968 dst_release(&rt->u.dst);
969 return;
973 * Handle ICMP "packet too big" messages
974 * i.e. Path MTU discovery
977 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
978 struct net_device *dev, u32 pmtu)
980 struct rt6_info *rt, *nrt;
982 if (pmtu < IPV6_MIN_MTU) {
983 if (net_ratelimit())
984 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
985 pmtu);
986 return;
989 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
991 if (rt == NULL)
992 return;
994 if (pmtu >= rt->u.dst.pmtu)
995 goto out;
997 /* New mtu received -> path was valid.
998 They are sent only in response to data packets,
999 so that this nexthop apparently is reachable. --ANK
1001 dst_confirm(&rt->u.dst);
1003 /* Host route. If it is static, it would be better
1004 not to override it, but add new one, so that
1005 when cache entry will expire old pmtu
1006 would return automatically.
1008 if (rt->rt6i_flags & RTF_CACHE) {
1009 rt->u.dst.pmtu = pmtu;
1010 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1011 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1012 goto out;
1015 /* Network route.
1016 Two cases are possible:
1017 1. It is connected route. Action: COW
1018 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1020 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1021 nrt = rt6_cow(rt, daddr, saddr);
1022 if (!nrt->u.dst.error) {
1023 nrt->u.dst.pmtu = pmtu;
1024 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1025 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1026 dst_release(&nrt->u.dst);
1028 } else {
1029 nrt = ip6_rt_copy(rt);
1030 if (nrt == NULL)
1031 goto out;
1032 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1033 nrt->rt6i_dst.plen = 128;
1034 nrt->u.dst.flags |= DST_HOST;
1035 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1036 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1037 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1038 nrt->u.dst.pmtu = pmtu;
1039 rt6_ins(nrt);
1042 out:
1043 dst_release(&rt->u.dst);
1047 * Misc support functions
1050 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1052 struct rt6_info *rt;
1054 rt = dst_alloc(&ip6_dst_ops);
1056 if (rt) {
1057 rt->u.dst.input = ort->u.dst.input;
1058 rt->u.dst.output = ort->u.dst.output;
1060 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1061 rt->u.dst.dev = ort->u.dst.dev;
1062 if (rt->u.dst.dev)
1063 dev_hold(rt->u.dst.dev);
1064 rt->u.dst.lastuse = jiffies;
1065 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1066 rt->rt6i_expires = 0;
1068 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1069 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1070 rt->rt6i_metric = 0;
1072 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1073 #ifdef CONFIG_IPV6_SUBTREES
1074 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1075 #endif
1077 return rt;
1080 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1082 struct rt6_info *rt;
1083 struct fib6_node *fn;
1085 fn = &ip6_routing_table;
1087 write_lock_bh(&rt6_lock);
1088 for (rt = fn->leaf; rt; rt=rt->u.next) {
1089 if (dev == rt->rt6i_dev &&
1090 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1091 break;
1093 if (rt)
1094 dst_clone(&rt->u.dst);
1095 write_unlock_bh(&rt6_lock);
1096 return rt;
1099 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1100 struct net_device *dev)
1102 struct in6_rtmsg rtmsg;
1104 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1105 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1106 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1107 rtmsg.rtmsg_metric = 1024;
1108 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1110 rtmsg.rtmsg_ifindex = dev->ifindex;
1112 ip6_route_add(&rtmsg);
1113 return rt6_get_dflt_router(gwaddr, dev);
1116 void rt6_purge_dflt_routers(int last_resort)
1118 struct rt6_info *rt;
1119 u32 flags;
1121 if (last_resort)
1122 flags = RTF_ALLONLINK;
1123 else
1124 flags = RTF_DEFAULT | RTF_ADDRCONF;
1126 restart:
1127 read_lock_bh(&rt6_lock);
1128 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1129 if (rt->rt6i_flags & flags) {
1130 dst_hold(&rt->u.dst);
1132 spin_lock_bh(&rt6_dflt_lock);
1133 rt6_dflt_pointer = NULL;
1134 spin_unlock_bh(&rt6_dflt_lock);
1136 read_unlock_bh(&rt6_lock);
1138 ip6_del_rt(rt);
1140 goto restart;
1143 read_unlock_bh(&rt6_lock);
1146 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1148 struct in6_rtmsg rtmsg;
1149 int err;
1151 switch(cmd) {
1152 case SIOCADDRT: /* Add a route */
1153 case SIOCDELRT: /* Delete a route */
1154 if (!capable(CAP_NET_ADMIN))
1155 return -EPERM;
1156 err = copy_from_user(&rtmsg, arg,
1157 sizeof(struct in6_rtmsg));
1158 if (err)
1159 return -EFAULT;
1161 rtnl_lock();
1162 switch (cmd) {
1163 case SIOCADDRT:
1164 err = ip6_route_add(&rtmsg);
1165 break;
1166 case SIOCDELRT:
1167 err = ip6_route_del(&rtmsg);
1168 break;
1169 default:
1170 err = -EINVAL;
1172 rtnl_unlock();
1174 return err;
1177 return -EINVAL;
1181 * Drop the packet on the floor
1184 int ip6_pkt_discard(struct sk_buff *skb)
1186 IP6_INC_STATS(Ip6OutNoRoutes);
1187 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
1188 kfree_skb(skb);
1189 return 0;
1193 * Add address
1196 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1198 struct rt6_info *rt;
1200 rt = dst_alloc(&ip6_dst_ops);
1201 if (rt == NULL)
1202 return -ENOMEM;
1204 rt->u.dst.flags = DST_HOST;
1205 rt->u.dst.input = ip6_input;
1206 rt->u.dst.output = ip6_output;
1207 rt->rt6i_dev = dev_get_by_name("lo");
1208 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1209 rt->u.dst.advmss = max(rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1210 if (rt->u.dst.advmss > 65535-20)
1211 rt->u.dst.advmss = 65535;
1212 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1213 rt->u.dst.obsolete = -1;
1215 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1216 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1217 if (rt->rt6i_nexthop == NULL) {
1218 dst_free((struct dst_entry *) rt);
1219 return -ENOMEM;
1222 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1223 rt->rt6i_dst.plen = 128;
1224 rt6_ins(rt);
1226 return 0;
1229 /* Delete address. Warning: you should check that this address
1230 disappeared before calling this function.
1233 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1235 struct rt6_info *rt;
1236 int err = -ENOENT;
1238 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1239 if (rt) {
1240 if (rt->rt6i_dst.plen == 128)
1241 err = ip6_del_rt(rt);
1242 else
1243 dst_release(&rt->u.dst);
1246 return err;
1249 #ifdef CONFIG_RT6_POLICY
1251 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1253 struct flow_filter *frule;
1254 struct pkt_filter *filter;
1255 int res = 1;
1257 if ((frule = rt->rt6i_filter) == NULL)
1258 goto out;
1260 if (frule->type != FLR_INPUT) {
1261 res = 0;
1262 goto out;
1265 for (filter = frule->u.filter; filter; filter = filter->next) {
1266 __u32 *word;
1268 word = (__u32 *) skb->h.raw;
1269 word += filter->offset;
1271 if ((*word ^ filter->value) & filter->mask) {
1272 res = 0;
1273 break;
1277 out:
1278 return res;
1281 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1283 struct flow_filter *frule;
1284 int res = 1;
1286 if ((frule = rt->rt6i_filter) == NULL)
1287 goto out;
1289 if (frule->type != FLR_INPUT) {
1290 res = 0;
1291 goto out;
1294 if (frule->u.sk != sk)
1295 res = 0;
1296 out:
1297 return res;
1300 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1301 struct in6_addr *daddr,
1302 struct in6_addr *saddr,
1303 struct fl_acc_args *args)
1305 struct flow_rule *frule;
1306 struct rt6_info *nrt = NULL;
1307 struct pol_chain *pol;
1309 for (pol = rt6_pol_list; pol; pol = pol->next) {
1310 struct fib6_node *fn;
1311 struct rt6_info *sprt;
1313 fn = fib6_lookup(pol->rules, daddr, saddr);
1315 do {
1316 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1317 int res;
1319 frule = sprt->rt6i_flowr;
1320 #if RT6_DEBUG >= 2
1321 if (frule == NULL) {
1322 printk(KERN_DEBUG "NULL flowr\n");
1323 goto error;
1325 #endif
1326 res = frule->ops->accept(rt, sprt, args, &nrt);
1328 switch (res) {
1329 case FLOWR_SELECT:
1330 goto found;
1331 case FLOWR_CLEAR:
1332 goto next_policy;
1333 case FLOWR_NODECISION:
1334 break;
1335 default:
1336 goto error;
1340 fn = fn->parent;
1342 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1344 next_policy:
1347 error:
1348 dst_clone(&ip6_null_entry.u.dst);
1349 return &ip6_null_entry;
1351 found:
1352 if (nrt == NULL)
1353 goto error;
1355 nrt->rt6i_flags |= RTF_CACHE;
1356 dst_clone(&nrt->u.dst);
1357 err = rt6_ins(nrt);
1358 if (err)
1359 nrt->u.dst.error = err;
1360 return nrt;
1362 #endif
1364 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1366 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1367 rt != &ip6_null_entry) {
1368 RT6_TRACE("deleted by ifdown %p\n", rt);
1369 return -1;
1371 return 0;
1374 void rt6_ifdown(struct net_device *dev)
1376 write_lock_bh(&rt6_lock);
1377 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1378 write_unlock_bh(&rt6_lock);
1381 struct rt6_mtu_change_arg
1383 struct net_device *dev;
1384 unsigned mtu;
1387 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1389 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1391 /* In IPv6 pmtu discovery is not optional,
1392 so that RTAX_MTU lock cannot disable it.
1393 We still use this lock to block changes
1394 caused by addrconf/ndisc.
1396 if (rt->rt6i_dev == arg->dev &&
1397 !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
1398 rt->u.dst.pmtu = arg->mtu;
1399 rt->u.dst.advmss = max(arg->mtu - 60, ip6_rt_min_advmss);
1400 if (rt->u.dst.advmss > 65535-20)
1401 rt->u.dst.advmss = 65535;
1402 return 0;
1405 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1407 struct rt6_mtu_change_arg arg;
1409 arg.dev = dev;
1410 arg.mtu = mtu;
1411 read_lock_bh(&rt6_lock);
1412 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1413 read_unlock_bh(&rt6_lock);
1416 #ifdef CONFIG_RTNETLINK
1418 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1419 struct in6_rtmsg *rtmsg)
1421 memset(rtmsg, 0, sizeof(*rtmsg));
1423 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1424 rtmsg->rtmsg_src_len = r->rtm_src_len;
1425 rtmsg->rtmsg_flags = RTF_UP;
1426 if (r->rtm_type == RTN_UNREACHABLE)
1427 rtmsg->rtmsg_flags |= RTF_REJECT;
1429 if (rta[RTA_GATEWAY-1]) {
1430 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1431 return -EINVAL;
1432 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1433 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1435 if (rta[RTA_DST-1]) {
1436 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1437 return -EINVAL;
1438 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1440 if (rta[RTA_SRC-1]) {
1441 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1442 return -EINVAL;
1443 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1445 if (rta[RTA_OIF-1]) {
1446 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1447 return -EINVAL;
1448 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1450 if (rta[RTA_PRIORITY-1]) {
1451 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1452 return -EINVAL;
1453 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1455 return 0;
1458 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1460 struct rtmsg *r = NLMSG_DATA(nlh);
1461 struct in6_rtmsg rtmsg;
1463 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1464 return -EINVAL;
1465 return ip6_route_del(&rtmsg);
1468 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1470 struct rtmsg *r = NLMSG_DATA(nlh);
1471 struct in6_rtmsg rtmsg;
1473 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1474 return -EINVAL;
1475 return ip6_route_add(&rtmsg);
1478 struct rt6_rtnl_dump_arg
1480 struct sk_buff *skb;
1481 struct netlink_callback *cb;
1484 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1485 struct in6_addr *dst,
1486 struct in6_addr *src,
1487 int iif,
1488 int type, u32 pid, u32 seq)
1490 struct rtmsg *rtm;
1491 struct nlmsghdr *nlh;
1492 unsigned char *b = skb->tail;
1493 struct rta_cacheinfo ci;
1495 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1496 rtm = NLMSG_DATA(nlh);
1497 rtm->rtm_family = AF_INET6;
1498 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1499 rtm->rtm_src_len = rt->rt6i_src.plen;
1500 rtm->rtm_tos = 0;
1501 rtm->rtm_table = RT_TABLE_MAIN;
1502 if (rt->rt6i_flags&RTF_REJECT)
1503 rtm->rtm_type = RTN_UNREACHABLE;
1504 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1505 rtm->rtm_type = RTN_LOCAL;
1506 else
1507 rtm->rtm_type = RTN_UNICAST;
1508 rtm->rtm_flags = 0;
1509 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1510 rtm->rtm_protocol = RTPROT_BOOT;
1511 if (rt->rt6i_flags&RTF_DYNAMIC)
1512 rtm->rtm_protocol = RTPROT_REDIRECT;
1513 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1514 rtm->rtm_protocol = RTPROT_KERNEL;
1515 else if (rt->rt6i_flags&RTF_DEFAULT)
1516 rtm->rtm_protocol = RTPROT_RA;
1518 if (rt->rt6i_flags&RTF_CACHE)
1519 rtm->rtm_flags |= RTM_F_CLONED;
1521 if (dst) {
1522 RTA_PUT(skb, RTA_DST, 16, dst);
1523 rtm->rtm_dst_len = 128;
1524 } else if (rtm->rtm_dst_len)
1525 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1526 #ifdef CONFIG_IPV6_SUBTREES
1527 if (src) {
1528 RTA_PUT(skb, RTA_SRC, 16, src);
1529 rtm->rtm_src_len = 128;
1530 } else if (rtm->rtm_src_len)
1531 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1532 #endif
1533 if (iif)
1534 RTA_PUT(skb, RTA_IIF, 4, &iif);
1535 else if (dst) {
1536 struct in6_addr saddr_buf;
1537 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1538 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1540 if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1541 goto rtattr_failure;
1542 if (rt->u.dst.neighbour)
1543 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1544 if (rt->u.dst.dev)
1545 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1546 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1547 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1548 if (rt->rt6i_expires)
1549 ci.rta_expires = rt->rt6i_expires - jiffies;
1550 else
1551 ci.rta_expires = 0;
1552 ci.rta_used = rt->u.dst.__use;
1553 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1554 ci.rta_error = rt->u.dst.error;
1555 ci.rta_id = 0;
1556 ci.rta_ts = 0;
1557 ci.rta_tsage = 0;
1558 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1559 nlh->nlmsg_len = skb->tail - b;
1560 return skb->len;
1562 nlmsg_failure:
1563 rtattr_failure:
1564 skb_trim(skb, b - skb->data);
1565 return -1;
1568 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1570 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1572 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1573 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq);
1576 static int fib6_dump_node(struct fib6_walker_t *w)
1578 int res;
1579 struct rt6_info *rt;
1581 for (rt = w->leaf; rt; rt = rt->u.next) {
1582 res = rt6_dump_route(rt, w->args);
1583 if (res < 0) {
1584 /* Frame is full, suspend walking */
1585 w->leaf = rt;
1586 return 1;
1588 BUG_TRAP(res!=0);
1590 w->leaf = NULL;
1591 return 0;
1594 static void fib6_dump_end(struct netlink_callback *cb)
1596 struct fib6_walker_t *w = (void*)cb->args[0];
1598 if (w) {
1599 cb->args[0] = 0;
1600 fib6_walker_unlink(w);
1601 kfree(w);
1603 if (cb->args[1]) {
1604 cb->done = (void*)cb->args[1];
1605 cb->args[1] = 0;
1609 static int fib6_dump_done(struct netlink_callback *cb)
1611 fib6_dump_end(cb);
1612 return cb->done(cb);
1615 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1617 struct rt6_rtnl_dump_arg arg;
1618 struct fib6_walker_t *w;
1619 int res;
1621 arg.skb = skb;
1622 arg.cb = cb;
1624 w = (void*)cb->args[0];
1625 if (w == NULL) {
1626 /* New dump:
1628 * 1. hook callback destructor.
1630 cb->args[1] = (long)cb->done;
1631 cb->done = fib6_dump_done;
1634 * 2. allocate and initialize walker.
1636 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1637 if (w == NULL)
1638 return -ENOMEM;
1639 RT6_TRACE("dump<%p", w);
1640 memset(w, 0, sizeof(*w));
1641 w->root = &ip6_routing_table;
1642 w->func = fib6_dump_node;
1643 w->args = &arg;
1644 cb->args[0] = (long)w;
1645 read_lock_bh(&rt6_lock);
1646 res = fib6_walk(w);
1647 read_unlock_bh(&rt6_lock);
1648 } else {
1649 w->args = &arg;
1650 read_lock_bh(&rt6_lock);
1651 res = fib6_walk_continue(w);
1652 read_unlock_bh(&rt6_lock);
1654 #if RT6_DEBUG >= 3
1655 if (res <= 0 && skb->len == 0)
1656 RT6_TRACE("%p>dump end\n", w);
1657 #endif
1658 res = res < 0 ? res : skb->len;
1659 /* res < 0 is an error. (really, impossible)
1660 res == 0 means that dump is complete, but skb still can contain data.
1661 res > 0 dump is not complete, but frame is full.
1663 /* Destroy walker, if dump of this table is complete. */
1664 if (res <= 0)
1665 fib6_dump_end(cb);
1666 return res;
1669 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1671 struct rtattr **rta = arg;
1672 int iif = 0;
1673 int err;
1674 struct sk_buff *skb;
1675 struct flowi fl;
1676 struct rt6_info *rt;
1678 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1679 if (skb == NULL)
1680 return -ENOBUFS;
1682 /* Reserve room for dummy headers, this skb can pass
1683 through good chunk of routing engine.
1685 skb->mac.raw = skb->data;
1686 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1688 fl.proto = 0;
1689 fl.nl_u.ip6_u.daddr = NULL;
1690 fl.nl_u.ip6_u.saddr = NULL;
1691 fl.uli_u.icmpt.type = 0;
1692 fl.uli_u.icmpt.code = 0;
1693 if (rta[RTA_SRC-1])
1694 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1695 if (rta[RTA_DST-1])
1696 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1698 if (rta[RTA_IIF-1])
1699 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1701 if (iif) {
1702 struct net_device *dev;
1703 dev = __dev_get_by_index(iif);
1704 if (!dev)
1705 return -ENODEV;
1708 fl.oif = 0;
1709 if (rta[RTA_OIF-1])
1710 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1712 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1714 skb->dst = &rt->u.dst;
1716 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1717 err = rt6_fill_node(skb, rt,
1718 fl.nl_u.ip6_u.daddr,
1719 fl.nl_u.ip6_u.saddr,
1720 iif,
1721 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq);
1722 if (err < 0)
1723 return -EMSGSIZE;
1725 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1726 if (err < 0)
1727 return err;
1728 return 0;
1731 void inet6_rt_notify(int event, struct rt6_info *rt)
1733 struct sk_buff *skb;
1734 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1736 skb = alloc_skb(size, gfp_any());
1737 if (!skb) {
1738 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1739 return;
1741 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0) < 0) {
1742 kfree_skb(skb);
1743 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1744 return;
1746 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1747 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1750 #endif
1753 * /proc
1756 #ifdef CONFIG_PROC_FS
1758 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1760 struct rt6_proc_arg
1762 char *buffer;
1763 int offset;
1764 int length;
1765 int skip;
1766 int len;
1769 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1771 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1772 int i;
1774 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1775 arg->skip++;
1776 return 0;
1779 if (arg->len >= arg->length)
1780 return 0;
1782 for (i=0; i<16; i++) {
1783 sprintf(arg->buffer + arg->len, "%02x",
1784 rt->rt6i_dst.addr.s6_addr[i]);
1785 arg->len += 2;
1787 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1788 rt->rt6i_dst.plen);
1790 #ifdef CONFIG_IPV6_SUBTREES
1791 for (i=0; i<16; i++) {
1792 sprintf(arg->buffer + arg->len, "%02x",
1793 rt->rt6i_src.addr.s6_addr[i]);
1794 arg->len += 2;
1796 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1797 rt->rt6i_src.plen);
1798 #else
1799 sprintf(arg->buffer + arg->len,
1800 "00000000000000000000000000000000 00 ");
1801 arg->len += 36;
1802 #endif
1804 if (rt->rt6i_nexthop) {
1805 for (i=0; i<16; i++) {
1806 sprintf(arg->buffer + arg->len, "%02x",
1807 rt->rt6i_nexthop->primary_key[i]);
1808 arg->len += 2;
1810 } else {
1811 sprintf(arg->buffer + arg->len,
1812 "00000000000000000000000000000000");
1813 arg->len += 32;
1815 arg->len += sprintf(arg->buffer + arg->len,
1816 " %08x %08x %08x %08x %8s\n",
1817 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1818 rt->u.dst.__use, rt->rt6i_flags,
1819 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1820 return 0;
1823 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1825 struct rt6_proc_arg arg;
1826 arg.buffer = buffer;
1827 arg.offset = offset;
1828 arg.length = length;
1829 arg.skip = 0;
1830 arg.len = 0;
1832 read_lock_bh(&rt6_lock);
1833 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1834 read_unlock_bh(&rt6_lock);
1836 *start = buffer;
1837 if (offset)
1838 *start += offset % RT6_INFO_LEN;
1840 arg.len -= offset % RT6_INFO_LEN;
1842 if (arg.len > length)
1843 arg.len = length;
1844 if (arg.len < 0)
1845 arg.len = 0;
1847 return arg.len;
1850 extern struct rt6_statistics rt6_stats;
1852 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1854 int len;
1856 len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1857 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1858 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1859 rt6_stats.fib_rt_cache,
1860 atomic_read(&ip6_dst_ops.entries));
1862 len -= offset;
1864 if (len > length)
1865 len = length;
1866 if(len < 0)
1867 len = 0;
1869 *start = buffer + offset;
1871 return len;
1873 #endif /* CONFIG_PROC_FS */
1875 #ifdef CONFIG_SYSCTL
1877 static int flush_delay;
1879 static
1880 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1881 void *buffer, size_t *lenp)
1883 if (write) {
1884 proc_dointvec(ctl, write, filp, buffer, lenp);
1885 if (flush_delay < 0)
1886 flush_delay = 0;
1887 fib6_run_gc((unsigned long)flush_delay);
1888 return 0;
1889 } else
1890 return -EINVAL;
1893 ctl_table ipv6_route_table[] = {
1894 {NET_IPV6_ROUTE_FLUSH, "flush",
1895 &flush_delay, sizeof(int), 0644, NULL,
1896 &ipv6_sysctl_rtcache_flush},
1897 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
1898 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
1899 &proc_dointvec},
1900 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
1901 &ip6_rt_max_size, sizeof(int), 0644, NULL,
1902 &proc_dointvec},
1903 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
1904 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
1905 &proc_dointvec_jiffies},
1906 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
1907 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
1908 &proc_dointvec_jiffies},
1909 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
1910 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
1911 &proc_dointvec_jiffies},
1912 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
1913 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
1914 &proc_dointvec_jiffies},
1915 {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
1916 &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
1917 &proc_dointvec_jiffies},
1918 {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
1919 &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
1920 &proc_dointvec_jiffies},
1924 #endif
1927 void __init ip6_route_init(void)
1929 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
1930 sizeof(struct rt6_info),
1931 0, SLAB_HWCACHE_ALIGN,
1932 NULL, NULL);
1933 fib6_init();
1934 #ifdef CONFIG_PROC_FS
1935 proc_net_create("ipv6_route", 0, rt6_proc_info);
1936 proc_net_create("rt6_stats", 0, rt6_proc_stats);
1937 #endif
1940 #ifdef MODULE
1941 void ip6_route_cleanup(void)
1943 #ifdef CONFIG_PROC_FS
1944 proc_net_remove("ipv6_route");
1945 proc_net_remove("rt6_stats");
1946 #endif
1948 rt6_ifdown(NULL);
1949 fib6_gc_cleanup();
1951 #endif /* MODULE */