Import 2.1.99pre2
[davej-history.git] / net / ipv6 / route.c
bloba71c9c0e583315b54d207dd74ebc38bf270bac7d
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.28 1998/04/28 06:22:04 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 #include <linux/config.h>
17 #include <linux/errno.h>
18 #include <linux/types.h>
19 #include <linux/socket.h>
20 #include <linux/sockios.h>
21 #include <linux/net.h>
22 #include <linux/route.h>
23 #include <linux/netdevice.h>
24 #include <linux/in6.h>
25 #include <linux/init.h>
26 #include <linux/netlink.h>
27 #include <linux/if_arp.h>
29 #ifdef CONFIG_PROC_FS
30 #include <linux/proc_fs.h>
31 #endif
33 #include <net/snmp.h>
34 #include <net/ipv6.h>
35 #include <net/ip6_fib.h>
36 #include <net/ip6_route.h>
37 #include <net/ndisc.h>
38 #include <net/addrconf.h>
39 #include <net/tcp.h>
40 #include <linux/netlink.h>
41 #include <linux/rtnetlink.h>
43 #include <asm/uaccess.h>
45 #ifdef CONFIG_SYSCTL
46 #include <linux/sysctl.h>
47 #endif
49 #undef CONFIG_RT6_POLICY
51 /* Set to 3 to get tracing. */
52 #define RT6_DEBUG 2
54 #if RT6_DEBUG >= 3
55 #define RDBG(x) printk x
56 #else
57 #define RDBG(x)
58 #endif
60 int ip6_rt_max_size = 4096;
61 int ip6_rt_gc_min_interval = 5*HZ;
62 int ip6_rt_gc_timeout = 60*HZ;
63 int ip6_rt_gc_interval = 30*HZ;
64 int ip6_rt_gc_elasticity = 9;
66 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
67 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
69 struct sk_buff *skb);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static int ip6_dst_gc(void);
73 static int ip6_pkt_discard(struct sk_buff *skb);
74 static void ip6_link_failure(struct sk_buff *skb);
76 struct dst_ops ip6_dst_ops = {
77 AF_INET6,
78 __constant_htons(ETH_P_IPV6),
79 1024,
81 ip6_dst_gc,
82 ip6_dst_check,
83 ip6_dst_reroute,
84 NULL,
85 ip6_negative_advice,
86 ip6_link_failure,
89 struct rt6_info ip6_null_entry = {
90 {{NULL, ATOMIC_INIT(1), ATOMIC_INIT(1), NULL,
91 -1, 0, 0, 0, 0, 0, 0, 0, 0,
92 -ENETUNREACH, NULL, NULL,
93 ip6_pkt_discard, ip6_pkt_discard,
94 #ifdef CONFIG_NET_CLS_ROUTE
96 #endif
97 &ip6_dst_ops}},
98 NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0U,
99 255, 0, {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
102 struct fib6_node ip6_routing_table = {
103 NULL, NULL, NULL, NULL,
104 &ip6_null_entry,
105 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
108 #ifdef CONFIG_RT6_POLICY
109 int ip6_rt_policy = 0;
111 struct pol_chain *rt6_pol_list = NULL;
114 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
115 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
117 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
118 struct in6_addr *daddr,
119 struct in6_addr *saddr,
120 struct fl_acc_args *args);
122 #else
123 #define ip6_rt_policy (0)
124 #endif
126 static atomic_t rt6_tbl_lock = ATOMIC_INIT(0);
127 static int rt6_bh_mask = 0;
129 #define RT_BH_REQUEST 1
130 #define RT_BH_GC 2
132 static void __rt6_run_bh(void);
135 * request queue operations
136 * FIFO queue/dequeue
139 static struct rt6_req request_queue = {
140 0, NULL, &request_queue, &request_queue
143 static __inline__ void rtreq_queue(struct rt6_req * req)
145 unsigned long flags;
146 struct rt6_req *next = &request_queue;
148 save_flags(flags);
149 cli();
151 req->prev = next->prev;
152 req->prev->next = req;
153 next->prev = req;
154 req->next = next;
155 restore_flags(flags);
158 static __inline__ struct rt6_req * rtreq_dequeue(void)
160 struct rt6_req *next = &request_queue;
161 struct rt6_req *head;
163 head = next->next;
165 if (head == next)
166 return NULL;
168 head->next->prev = head->prev;
169 next->next = head->next;
171 head->next = NULL;
172 head->prev = NULL;
174 return head;
177 void rtreq_add(struct rt6_info *rt, int operation)
179 struct rt6_req *rtreq;
181 rtreq = kmalloc(sizeof(struct rt6_req), GFP_ATOMIC);
183 if (rtreq == NULL)
184 return;
186 memset(rtreq, 0, sizeof(struct rt6_req));
188 rtreq->operation = operation;
189 rtreq->ptr = rt;
190 rtreq_queue(rtreq);
192 rt6_bh_mask |= RT_BH_REQUEST;
195 static __inline__ void rt6_lock(void)
197 atomic_inc(&rt6_tbl_lock);
200 static __inline__ void rt6_unlock(void)
202 if (atomic_dec_and_test(&rt6_tbl_lock) && rt6_bh_mask) {
203 start_bh_atomic();
204 __rt6_run_bh();
205 end_bh_atomic();
210 * Route lookup
213 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
214 int oif,
215 int strict)
217 struct rt6_info *local = NULL;
218 struct rt6_info *sprt;
220 if (oif) {
221 for (sprt = rt; sprt; sprt = sprt->u.next) {
222 if (sprt->rt6i_dev) {
223 if (sprt->rt6i_dev->ifindex == oif)
224 return sprt;
225 if (sprt->rt6i_dev->flags&IFF_LOOPBACK)
226 local = sprt;
230 if (local)
231 return local;
233 if (strict) {
234 RDBG(("nomatch & STRICT --> ip6_null_entry\n"));
235 return &ip6_null_entry;
238 RDBG(("!dev or (no match and !strict) --> rt(%p)\n", rt));
239 return rt;
243 * pointer to the last default router chosen
245 static struct rt6_info *rt6_dflt_pointer = NULL;
247 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
249 struct rt6_info *match = NULL;
250 struct rt6_info *sprt;
251 int mpri = 0;
253 for (sprt = rt; sprt; sprt = sprt->u.next) {
254 struct neighbour *neigh;
256 RDBG(("sprt(%p): ", sprt));
257 if ((neigh = sprt->rt6i_nexthop)) {
258 int m = -1;
260 RDBG(("nxthop(%p,%d) ", neigh, neigh->nud_state));
261 switch (neigh->nud_state) {
262 case NUD_REACHABLE:
263 RDBG(("NUD_REACHABLE "));
264 if (sprt != rt6_dflt_pointer) {
265 rt = sprt;
266 RDBG(("sprt!=dflt_ptr -> %p\n",
267 sprt));
268 goto out;
270 RDBG(("m=2, "));
271 m = 2;
272 break;
274 case NUD_DELAY:
275 RDBG(("NUD_DELAY, m=1, "));
276 m = 1;
277 break;
279 case NUD_STALE:
280 RDBG(("NUD_STALE, m=1, "));
281 m = 1;
282 break;
285 if (oif && sprt->rt6i_dev && sprt->rt6i_dev->ifindex == oif) {
286 m += 2;
289 if (m >= mpri) {
290 RDBG(("m>=mpri setmatch, "));
291 mpri = m;
292 match = sprt;
297 if (match) {
298 RDBG(("match, set rt, "));
299 rt = match;
300 } else {
302 * No default routers are known to be reachable.
303 * SHOULD round robin
305 RDBG(("!match, trying rt6_dflt_pointer, "));
306 if (rt6_dflt_pointer) {
307 struct rt6_info *next;
309 if ((next = rt6_dflt_pointer->u.next) &&
310 next->u.dst.error == 0)
311 rt = next;
315 out:
316 rt6_dflt_pointer = rt;
317 RDBG(("returning %p, dflt_ptr set\n", rt));
318 return rt;
321 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
322 int oif, int flags)
324 struct fib6_node *fn;
325 struct rt6_info *rt;
327 rt6_lock();
328 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
329 rt = rt6_device_match(fn->leaf, oif, flags&RTF_LINKRT);
330 rt6_unlock();
331 return rt;
334 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
335 struct in6_addr *saddr)
337 struct rt6_info *rt;
340 * Clone the route.
343 rt = ip6_rt_copy(ort);
345 if (rt) {
346 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
348 if (!(rt->rt6i_flags&RTF_GATEWAY))
349 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
351 rt->rt6i_dst.plen = 128;
352 rt->rt6i_flags |= RTF_CACHE;
354 if (rt->rt6i_src.plen) {
355 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
356 rt->rt6i_src.plen = 128;
359 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
361 rtreq_add(rt, RT_OPER_ADD);
362 } else {
363 rt = &ip6_null_entry;
365 return rt;
368 #ifdef CONFIG_RT6_POLICY
369 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
370 struct sk_buff *skb)
372 struct in6_addr *daddr, *saddr;
373 struct fl_acc_args arg;
375 arg.type = FL_ARG_FORWARD;
376 arg.fl_u.skb = skb;
378 saddr = &skb->nh.ipv6h->saddr;
379 daddr = &skb->nh.ipv6h->daddr;
381 return rt6_flow_lookup(rt, daddr, saddr, &arg);
384 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
385 struct sock *sk,
386 struct flowi *fl)
388 struct fl_acc_args arg;
390 arg.type = FL_ARG_ORIGIN;
391 arg.fl_u.fl_o.sk = sk;
392 arg.fl_u.fl_o.flow = fl;
394 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
395 &arg);
398 #endif
400 void ip6_route_input(struct sk_buff *skb)
402 struct fib6_node *fn;
403 struct rt6_info *rt;
404 struct dst_entry *dst;
406 RDBG(("ip6_route_input(%p) from %p\n", skb, __builtin_return_address(0)));
407 if ((dst = skb->dst) != NULL)
408 goto looped_back;
409 rt6_lock();
410 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
411 &skb->nh.ipv6h->saddr);
413 rt = fn->leaf;
415 if ((rt->rt6i_flags & RTF_CACHE)) {
416 if (ip6_rt_policy == 0) {
417 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
418 goto out;
421 #ifdef CONFIG_RT6_POLICY
422 if ((rt->rt6i_flags & RTF_FLOW)) {
423 struct rt6_info *sprt;
425 for (sprt = rt; sprt; sprt = sprt->u.next) {
426 if (rt6_flow_match_in(sprt, skb)) {
427 rt = sprt;
428 goto out;
432 #endif
435 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
437 if (ip6_rt_policy == 0) {
438 if (!rt->rt6i_nexthop && rt->rt6i_dev &&
439 ((rt->rt6i_flags & RTF_NONEXTHOP) == 0)) {
440 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
441 &skb->nh.ipv6h->saddr);
443 } else {
444 #ifdef CONFIG_RT6_POLICY
445 rt = rt6_flow_lookup_in(rt, skb);
446 #endif
449 out:
450 dst = dst_clone((struct dst_entry *) rt);
451 rt6_unlock();
453 skb->dst = dst;
454 looped_back:
455 dst->input(skb);
458 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
460 struct fib6_node *fn;
461 struct rt6_info *rt;
462 struct dst_entry *dst;
463 int strict;
465 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
467 rt6_lock();
468 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
469 fl->nl_u.ip6_u.saddr);
471 restart:
472 rt = fn->leaf;
474 if ((rt->rt6i_flags & RTF_CACHE)) {
475 RDBG(("RTF_CACHE "));
476 if (ip6_rt_policy == 0) {
477 rt = rt6_device_match(rt, fl->oif, strict);
479 /* BUGGGG! It is capital bug, that was hidden
480 by not-cloning multicast routes. However,
481 the same problem was with link-local addresses.
482 Fix is the following if-statement,
483 but it will not properly handle Pedro's subtrees --ANK
485 if (rt == &ip6_null_entry && strict) {
486 while ((fn = fn->parent) != NULL) {
487 if (fn->fn_flags & RTN_ROOT)
488 goto out;
489 if (fn->fn_flags & RTN_RTINFO)
490 goto restart;
493 RDBG(("devmatch(%p) ", rt));
494 goto out;
497 #ifdef CONFIG_RT6_POLICY
498 if ((rt->rt6i_flags & RTF_FLOW)) {
499 struct rt6_info *sprt;
501 for (sprt = rt; sprt; sprt = sprt->u.next) {
502 if (rt6_flow_match_out(sprt, sk)) {
503 rt = sprt;
504 goto out;
508 #endif
510 RDBG(("!RTF_CACHE "));
511 if (rt->rt6i_flags & RTF_DEFAULT) {
512 RDBG(("RTF_DEFAULT "));
513 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) {
514 rt = rt6_best_dflt(rt, fl->oif);
515 RDBG(("best_dflt(%p) ", rt));
517 } else {
518 rt = rt6_device_match(rt, fl->oif, strict);
519 RDBG(("!RTF_DEFAULT devmatch(%p) ", rt));
522 if (ip6_rt_policy == 0) {
523 if (!rt->rt6i_nexthop && rt->rt6i_dev &&
524 ((rt->rt6i_flags & RTF_NONEXTHOP) == 0)) {
525 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
526 fl->nl_u.ip6_u.saddr);
527 RDBG(("(!nhop&&rt6i_dev&&!RTF_NONEXTHOP) cow(%p) ", rt));
529 } else {
530 #ifdef CONFIG_RT6_POLICY
531 rt = rt6_flow_lookup_out(rt, sk, fl);
532 #endif
535 out:
536 dst = dst_clone((struct dst_entry *) rt);
537 rt6_unlock();
538 RDBG(("dclone/ret(%p)\n", dst));
539 return dst;
543 static void rt6_ins(struct rt6_info *rt)
545 start_bh_atomic();
546 if (atomic_read(&rt6_tbl_lock) == 1)
547 fib6_add(&ip6_routing_table, rt);
548 else
549 rtreq_add(rt, RT_OPER_ADD);
550 end_bh_atomic();
554 * Destination cache support functions
556 * BUGGG! This function is absolutely wrong.
557 * First of all it is never called. (look at include/net/dst.h)
558 * Second, even when it is called rt->rt6i_node == NULL
559 * ** partially fixed: now dst->obsolete = -1 for IPv6 not cache routes.
560 * Third, even we fixed previous bugs,
561 * it will not work because sernum is incorrectly checked/updated and
562 * it does not handle change of the parent of cloned route.
563 * Purging stray clones is not easy task, it would require
564 * massive remake of ip6_fib.c. Alas...
565 * --ANK
568 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
570 struct rt6_info *rt;
572 rt = (struct rt6_info *) dst;
574 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
575 return dst;
577 dst_release(dst);
578 return NULL;
581 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
584 * FIXME
586 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
587 __builtin_return_address(0)));
588 return NULL;
591 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
593 dst_release(dst);
594 return NULL;
597 static void ip6_link_failure(struct sk_buff *skb)
599 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
602 static int ip6_dst_gc()
604 static unsigned expire = 30*HZ;
605 static unsigned long last_gc;
606 unsigned long now = jiffies;
608 start_bh_atomic();
609 if ((long)(now - last_gc) < ip6_rt_gc_min_interval)
610 goto out;
612 expire++;
613 fib6_run_gc(expire);
614 last_gc = now;
615 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
616 expire = ip6_rt_gc_timeout>>1;
618 out:
619 expire -= expire>>ip6_rt_gc_elasticity;
620 end_bh_atomic();
621 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
624 /* Clean host part of a prefix. Not necessary in radix tree,
625 but results in cleaner routing tables.
627 Remove it only when all the things will work!
630 static void ipv6_wash_prefix(struct in6_addr *pfx, int plen)
632 int b = plen&0x7;
633 int o = (plen + 7)>>3;
635 if (o < 16)
636 memset(pfx->s6_addr + o, 0, 16 - o);
637 if (b != 0)
638 pfx->s6_addr[plen>>3] &= (0xFF<<(8-b));
641 static int ipv6_get_mtu(struct device *dev)
643 struct inet6_dev *idev;
645 idev = ipv6_get_idev(dev);
646 if (idev)
647 return idev->cnf.mtu6;
648 else
649 return 576;
652 static int ipv6_get_hoplimit(struct device *dev)
654 struct inet6_dev *idev;
656 idev = ipv6_get_idev(dev);
657 if (idev)
658 return idev->cnf.hop_limit;
659 else
660 return ipv6_devconf.hop_limit;
667 struct rt6_info *ip6_route_add(struct in6_rtmsg *rtmsg, int *err)
669 struct rt6_info *rt;
670 struct device *dev = NULL;
671 int addr_type;
673 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) {
674 *err = -EINVAL;
675 return NULL;
677 if (rtmsg->rtmsg_metric == 0)
678 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
680 *err = 0;
682 rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops);
684 if (rt == NULL) {
685 RDBG(("dalloc fails, "));
686 *err = -ENOMEM;
687 return NULL;
690 rt->u.dst.obsolete = -1;
691 rt->rt6i_expires = rtmsg->rtmsg_info;
693 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
695 if (addr_type & IPV6_ADDR_MULTICAST) {
696 RDBG(("MCAST, "));
697 rt->u.dst.input = ip6_mc_input;
698 } else {
699 RDBG(("!MCAST "));
700 rt->u.dst.input = ip6_forward;
703 rt->u.dst.output = ip6_output;
705 if (rtmsg->rtmsg_ifindex) {
706 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
707 if (dev == NULL) {
708 *err = -ENODEV;
709 goto out;
713 ipv6_addr_copy(&rt->rt6i_dst.addr, &rtmsg->rtmsg_dst);
714 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
715 ipv6_wash_prefix(&rt->rt6i_dst.addr, rt->rt6i_dst.plen);
717 ipv6_addr_copy(&rt->rt6i_src.addr, &rtmsg->rtmsg_src);
718 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
719 ipv6_wash_prefix(&rt->rt6i_src.addr, rt->rt6i_src.plen);
721 /* We cannot add true routes via loopback here,
722 they would result in kernel looping; promote them to reject routes
724 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
725 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
726 dev = dev_get("lo");
727 rt->u.dst.output = ip6_pkt_discard;
728 rt->u.dst.input = ip6_pkt_discard;
729 rt->u.dst.error = -ENETUNREACH;
730 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
731 rt->rt6i_metric = rtmsg->rtmsg_metric;
732 rt->rt6i_dev = dev;
733 goto install_route;
736 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
737 struct in6_addr *gw_addr;
738 int gwa_type;
740 gw_addr = &rtmsg->rtmsg_gateway;
741 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
742 gwa_type = ipv6_addr_type(gw_addr);
744 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
745 struct rt6_info *grt;
747 /* IPv6 strictly inhibits using not link-local
748 addresses as nexthop address.
749 It is very good, but in some (rare!) curcumstances
750 (SIT, NBMA NOARP links) it is handy to allow
751 some exceptions.
753 if (!(gwa_type&IPV6_ADDR_UNICAST)) {
754 *err = -EINVAL;
755 goto out;
758 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, RTF_LINKRT);
760 if (grt == NULL || (grt->rt6i_flags&RTF_GATEWAY)) {
761 *err = -EHOSTUNREACH;
762 goto out;
764 dev = grt->rt6i_dev;
766 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) {
767 *err = -EINVAL;
768 goto out;
772 if (dev == NULL) {
773 RDBG(("!dev, "));
774 *err = -ENODEV;
775 goto out;
778 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
779 rt->rt6i_nexthop = ndisc_get_neigh(dev, &rt->rt6i_gateway);
780 if (rt->rt6i_nexthop == NULL) {
781 RDBG(("!nxthop, "));
782 *err = -ENOMEM;
783 goto out;
785 RDBG(("nxthop, "));
788 rt->rt6i_metric = rtmsg->rtmsg_metric;
790 rt->rt6i_dev = dev;
791 rt->u.dst.pmtu = ipv6_get_mtu(dev);
792 rt->u.dst.rtt = TCP_TIMEOUT_INIT;
793 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
794 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
795 else
796 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
797 rt->rt6i_flags = rtmsg->rtmsg_flags;
799 install_route:
800 RDBG(("rt6ins(%p) ", rt));
802 rt6_lock();
803 rt6_ins(rt);
804 rt6_unlock();
806 /* BUGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG!
808 If rt6_ins will fail (and it occurs regularly f.e. if route
809 already existed), the route will be freed -> Finita.
810 Crash. No recovery. NO FIX. Unfortunately, it is not the only
811 place will it is fatal. It is sad, I believed this
812 code is a bit more accurate :-(
814 Really, the problem can be solved in two ways:
816 * As I did in old 2.0 IPv4: to increase use count and force
817 user to destroy stray route. It requires some care,
818 well, much more care.
819 * Second and the best: to get rid of this damn backlogging
820 system. I wonder why Pedro so liked it. It was the most
821 unhappy day when I invented it (well, by a strange reason
822 I believed that it is very clever :-)),
823 and when I managed to clean IPv4 of this crap,
824 it was really great win.
825 BTW I forgot how 2.0 route/arp works :-) :-)
826 --ANK
829 out:
830 if (*err) {
831 RDBG(("dfree(%p) ", rt));
832 dst_free((struct dst_entry *) rt);
833 rt = NULL;
835 RDBG(("ret(%p)\n", rt));
836 #if 0
837 return rt;
838 #else
839 /* BUGGG! For now always return NULL. (see above)
841 Really, it was used only in two places, and one of them
842 (rt6_add_dflt_router) is repaired, ip6_fw is not essential
843 at all. --ANK
845 return NULL;
846 #endif
849 int ip6_del_rt(struct rt6_info *rt)
851 rt6_lock();
853 start_bh_atomic();
855 /* I'd add here couple of cli()
856 cli(); cli(); cli();
858 Now it is really LOCKED. :-) :-) --ANK
861 rt6_dflt_pointer = NULL;
863 if (atomic_read(&rt6_tbl_lock) == 1)
864 fib6_del(rt);
865 else
866 rtreq_add(rt, RT_OPER_DEL);
867 end_bh_atomic();
868 rt6_unlock();
869 return 0;
872 int ip6_route_del(struct in6_rtmsg *rtmsg)
874 struct fib6_node *fn;
875 struct rt6_info *rt;
877 rt6_lock();
878 fn = fib6_lookup(&ip6_routing_table, &rtmsg->rtmsg_dst, &rtmsg->rtmsg_src);
879 rt = fn->leaf;
882 * Blow it away
884 * BUGGGG It will not help with Pedro's subtrees.
885 * We urgently need fib6_locate_node function, and
886 * it is not the only place where rt6_lookup is used
887 * for wrong purpose.
888 * --ANK
890 restart:
891 if (rt && rt->rt6i_src.plen == rtmsg->rtmsg_src_len) {
892 if (rt->rt6i_dst.plen > rtmsg->rtmsg_dst_len) {
893 struct fib6_node *fn = rt->rt6i_node;
894 while ((fn = fn->parent) != NULL) {
895 if (fn->fn_flags & RTN_ROOT)
896 break;
897 if (fn->fn_flags & RTN_RTINFO) {
898 rt = fn->leaf;
899 goto restart;
904 if (rt->rt6i_dst.plen == rtmsg->rtmsg_dst_len) {
905 for ( ; rt; rt = rt->u.next) {
906 if (rtmsg->rtmsg_ifindex &&
907 (rt->rt6i_dev == NULL ||
908 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
909 continue;
910 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
911 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
912 continue;
913 if (rtmsg->rtmsg_metric &&
914 rtmsg->rtmsg_metric != rt->rt6i_metric)
915 continue;
916 ip6_del_rt(rt);
917 rt6_unlock();
918 return 0;
922 rt6_unlock();
924 return -ESRCH;
929 * bottom handler, runs with atomic_bh protection
931 void __rt6_run_bh(void)
933 struct rt6_req *rtreq;
935 while ((rtreq = rtreq_dequeue())) {
936 switch (rtreq->operation) {
937 case RT_OPER_ADD:
938 fib6_add(&ip6_routing_table, rtreq->ptr);
939 break;
940 case RT_OPER_DEL:
941 fib6_del(rtreq->ptr);
942 break;
944 kfree(rtreq);
946 rt6_bh_mask = 0;
949 #ifdef CONFIG_IPV6_NETLINK
951 * NETLINK interface
952 * routing socket moral equivalent
955 static int rt6_msgrcv(int unit, struct sk_buff *skb)
957 int count = 0;
958 struct in6_rtmsg *rtmsg;
959 int err;
961 rtnl_lock();
962 while (skb->len) {
963 if (skb->len < sizeof(struct in6_rtmsg)) {
964 count = -EINVAL;
965 goto out;
968 rtmsg = (struct in6_rtmsg *) skb->data;
969 skb_pull(skb, sizeof(struct in6_rtmsg));
970 count += sizeof(struct in6_rtmsg);
972 switch (rtmsg->rtmsg_type) {
973 case RTMSG_NEWROUTE:
974 ip6_route_add(rtmsg, &err);
975 break;
976 case RTMSG_DELROUTE:
977 ip6_route_del(rtmsg);
978 break;
979 default:
980 count = -EINVAL;
981 goto out;
985 out:
986 rtnl_unlock();
987 kfree_skb(skb);
988 return count;
991 static void rt6_sndrtmsg(struct in6_rtmsg *rtmsg)
993 struct sk_buff *skb;
995 skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC);
996 if (skb == NULL)
997 return;
999 memcpy(skb_put(skb, sizeof(struct in6_rtmsg)), &rtmsg,
1000 sizeof(struct in6_rtmsg));
1002 if (netlink_post(NETLINK_ROUTE6, skb))
1003 kfree_skb(skb);
1006 void rt6_sndmsg(int type, struct in6_addr *dst, struct in6_addr *src,
1007 struct in6_addr *gw, struct device *dev,
1008 int dstlen, int srclen, int metric, __u32 flags)
1010 struct sk_buff *skb;
1011 struct in6_rtmsg *msg;
1013 skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC);
1014 if (skb == NULL)
1015 return;
1017 msg = (struct in6_rtmsg *) skb_put(skb, sizeof(struct in6_rtmsg));
1019 memset(msg, 0, sizeof(struct in6_rtmsg));
1021 msg->rtmsg_type = type;
1023 if (dst)
1024 ipv6_addr_copy(&msg->rtmsg_dst, dst);
1026 if (src) {
1027 ipv6_addr_copy(&msg->rtmsg_src, src);
1028 msg->rtmsg_src_len = srclen;
1031 if (gw)
1032 ipv6_addr_copy(&msg->rtmsg_gateway, gw);
1034 msg->rtmsg_dst_len = dstlen;
1035 msg->rtmsg_metric = metric;
1037 if (dev)
1038 msg->rtmsg_ifindex = dev->ifindex;
1040 msg->rtmsg_flags = flags;
1042 if (netlink_post(NETLINK_ROUTE6, skb))
1043 kfree_skb(skb);
1045 #endif /* CONFIG_IPV6_NETLINK */
1048 * Handle redirects
1050 struct rt6_info *rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1051 struct in6_addr *target, struct device *dev,
1052 int on_link)
1054 struct rt6_info *rt, *nrt;
1056 /* Locate old route to this destination. */
1057 rt = rt6_lookup(dest, NULL, dev->ifindex, 0);
1059 if (rt == NULL || rt->u.dst.error)
1060 return NULL;
1062 /* Redirect received -> path was valid.
1063 Look, redirects are sent only in response to data packets,
1064 so that this nexthop apparently is reachable. --ANK
1066 dst_confirm(&rt->u.dst);
1068 /* Duplicate redirect: silently ignore. */
1069 if (ipv6_addr_cmp(target, &rt->rt6i_gateway) == 0)
1070 return NULL;
1072 /* Current route is on-link; redirect is always invalid. */
1073 if (!(rt->rt6i_flags&RTF_GATEWAY))
1074 return NULL;
1076 #if !defined(CONFIG_IPV6_EUI64) || defined(CONFIG_IPV6_NO_PB)
1078 * During transition gateways have more than
1079 * one link local address. Certainly, it is violation
1080 * of basic principles, but it is temparary.
1083 * RFC 1970 specifies that redirects should only be
1084 * accepted if they come from the nexthop to the target.
1085 * Due to the way default routers are chosen, this notion
1086 * is a bit fuzzy and one might need to check all default
1087 * routers.
1090 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
1091 if (rt->rt6i_flags & RTF_DEFAULT) {
1092 rt = ip6_routing_table.leaf;
1094 for (; rt; rt = rt->u.next) {
1095 if (!ipv6_addr_cmp(saddr, &rt->rt6i_gateway))
1096 goto source_ok;
1099 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1100 "for redirect target\n");
1101 return NULL;
1104 source_ok:
1105 #endif
1108 * We have finally decided to accept it.
1110 if (rt->rt6i_dst.plen == 128) {
1111 /* BUGGGG! Very bad bug. Fast path code does not protect
1112 * itself of changing nexthop on the fly, it was supposed
1113 * that crucial parameters (dev, nexthop, hh) ARE VOLATILE.
1114 * --ANK
1115 * Not fixed!! I plugged it to avoid random crashes
1116 * (they are very unlikely, but I do not want to shrug
1117 * every time when redirect arrives)
1118 * but the plug must be removed. --ANK
1121 #if 0
1123 * Already a host route.
1126 if (rt->rt6i_nexthop)
1127 neigh_release(rt->rt6i_nexthop);
1128 rt->rt6i_flags |= RTF_MODIFIED | RTF_CACHE;
1129 if (on_link)
1130 rt->rt6i_flags &= ~RTF_GATEWAY;
1131 ipv6_addr_copy(&rt->rt6i_gateway, target);
1132 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, target);
1133 return rt;
1134 #else
1135 return NULL;
1136 #endif
1139 nrt = ip6_rt_copy(rt);
1140 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1141 if (on_link)
1142 nrt->rt6i_flags &= ~RTF_GATEWAY;
1144 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1145 nrt->rt6i_dst.plen = 128;
1147 ipv6_addr_copy(&nrt->rt6i_gateway, target);
1148 nrt->rt6i_nexthop = ndisc_get_neigh(nrt->rt6i_dev, target);
1149 nrt->rt6i_dev = dev;
1150 nrt->u.dst.pmtu = ipv6_get_mtu(dev);
1151 if (!ipv6_addr_is_multicast(&nrt->rt6i_dst.addr))
1152 nrt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
1154 rt6_lock();
1155 rt6_ins(nrt);
1156 rt6_unlock();
1158 /* BUGGGGGGG! nrt can point to nowhere. */
1159 return nrt;
1163 * Handle ICMP "packet too big" messages
1164 * i.e. Path MTU discovery
1167 void rt6_pmtu_discovery(struct in6_addr *addr, struct device *dev, int pmtu)
1169 struct rt6_info *rt, *nrt;
1171 if (pmtu < 576 || pmtu > 65536) {
1172 #if RT6_DEBUG >= 1
1173 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1174 pmtu);
1175 #endif
1176 return;
1179 rt = rt6_lookup(addr, NULL, dev->ifindex, 0);
1181 if (rt == NULL || rt->u.dst.error) {
1182 #if RT6_DEBUG >= 2
1183 printk(KERN_DEBUG "rt6_pmtu_discovery: no route to host\n");
1184 #endif
1185 return;
1188 if (pmtu >= rt->u.dst.pmtu)
1189 return;
1191 /* New mtu received -> path was valid.
1192 They are sent only in response to data packets,
1193 so that this nexthop apparently is reachable. --ANK
1195 dst_confirm(&rt->u.dst);
1197 /* It is wrong, but I plugged the hole here.
1198 On-link routes are cloned differently,
1199 look at rt6_redirect --ANK
1201 if (!(rt->rt6i_flags&RTF_GATEWAY))
1202 return;
1204 if (rt->rt6i_dst.plen == 128) {
1206 * host route
1208 rt->u.dst.pmtu = pmtu;
1209 rt->rt6i_flags |= RTF_MODIFIED;
1211 return;
1214 nrt = ip6_rt_copy(rt);
1215 ipv6_addr_copy(&nrt->rt6i_dst.addr, addr);
1216 nrt->rt6i_dst.plen = 128;
1218 nrt->rt6i_flags |= (RTF_DYNAMIC | RTF_CACHE);
1220 /* It was missing. :-) :-)
1221 I wonder, kernel was deemed to crash after pkt_too_big
1222 and nobody noticed it. Hey, guys, do someone really
1223 use it? --ANK
1225 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1227 rt6_lock();
1228 rt6_ins(rt);
1229 rt6_unlock();
1233 * Misc support functions
1236 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1238 struct rt6_info *rt;
1240 rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops);
1242 if (rt) {
1243 rt->u.dst.input = ort->u.dst.input;
1244 rt->u.dst.output = ort->u.dst.output;
1246 rt->u.dst.pmtu = ort->u.dst.pmtu;
1247 rt->u.dst.rtt = ort->u.dst.rtt;
1248 rt->u.dst.window = ort->u.dst.window;
1249 rt->u.dst.mxlock = ort->u.dst.mxlock;
1250 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1251 rt->rt6i_dev = ort->rt6i_dev;
1253 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1254 rt->rt6i_keylen = ort->rt6i_keylen;
1255 rt->rt6i_flags = ort->rt6i_flags;
1256 rt->rt6i_metric = ort->rt6i_metric;
1258 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1259 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1261 return rt;
1264 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct device *dev)
1266 struct rt6_info *rt;
1267 struct fib6_node *fn;
1269 RDBG(("rt6_get_dflt_router(%p,%p)[%p]", addr, dev,
1270 __builtin_return_address(0)));
1271 #if RT6_DEBUG >= 3
1273 int i;
1275 RDBG(("addr["));
1276 for(i = 0; i < 8; i++) {
1277 RDBG(("%04x%c", addr->s6_addr16[i],
1278 i == 7 ? ']' : ':'));
1281 #endif
1282 RDBG(("\n"));
1283 rt6_lock();
1285 fn = &ip6_routing_table;
1287 for (rt = fn->leaf; rt; rt=rt->u.next) {
1288 if (dev == rt->rt6i_dev &&
1289 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1290 break;
1293 rt6_unlock();
1294 return rt;
1297 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1298 struct device *dev)
1300 struct in6_rtmsg rtmsg;
1301 struct rt6_info *rt;
1302 int err;
1304 RDBG(("rt6_add_dflt_router(%p,%p)[%p] ", gwaddr, dev,
1305 __builtin_return_address(0)));
1306 #if RT6_DEBUG >= 3
1308 struct in6_addr *addr = gwaddr;
1309 int i;
1311 RDBG(("gwaddr["));
1312 for(i = 0; i < 8; i++) {
1313 RDBG(("%04x%c", addr->s6_addr16[i],
1314 i == 7 ? ']' : ':'));
1317 #endif
1318 RDBG(("\n"));
1320 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1321 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1322 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1323 rtmsg.rtmsg_metric = 1024;
1324 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1326 rtmsg.rtmsg_ifindex = dev->ifindex;
1328 rt = ip6_route_add(&rtmsg, &err);
1330 /* BUGGGGGGGGGGGGGGGGGGGG!
1331 rt can be not NULL, but point to heavens.
1334 if (err) {
1335 printk(KERN_DEBUG "rt6_add_dflt: ip6_route_add error %d\n",
1336 err);
1338 return rt;
1341 void rt6_purge_dflt_routers(int last_resort)
1343 struct rt6_info *rt;
1344 struct fib6_node *fn;
1345 u32 flags;
1347 RDBG(("rt6_purge_dflt_routers(%d)[%p]\n", last_resort,
1348 __builtin_return_address(0)));
1349 fn = &ip6_routing_table;
1351 rt6_dflt_pointer = NULL;
1353 if (last_resort)
1354 flags = RTF_ALLONLINK;
1355 else
1356 flags = RTF_DEFAULT | RTF_ADDRCONF;
1358 for (rt = fn->leaf; rt; ) {
1359 if ((rt->rt6i_flags & flags)) {
1360 struct rt6_info *drt;
1361 #if RT6_DEBUG >= 2
1362 printk(KERN_DEBUG "rt6_purge_dflt: deleting entry\n");
1363 #endif
1364 drt = rt;
1365 rt = rt->u.next;
1366 ip6_del_rt(drt);
1367 continue;
1369 rt = rt->u.next;
1373 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1375 struct in6_rtmsg rtmsg;
1376 int err;
1378 RDBG(("ipv6_route_ioctl(%d,%p)\n", cmd, arg));
1379 switch(cmd) {
1380 case SIOCADDRT: /* Add a route */
1381 case SIOCDELRT: /* Delete a route */
1382 if (!suser())
1383 return -EPERM;
1384 err = copy_from_user(&rtmsg, arg,
1385 sizeof(struct in6_rtmsg));
1386 if (err)
1387 return -EFAULT;
1389 rtnl_lock();
1390 switch (cmd) {
1391 case SIOCADDRT:
1392 ip6_route_add(&rtmsg, &err);
1393 break;
1394 case SIOCDELRT:
1395 err = ip6_route_del(&rtmsg);
1396 break;
1397 default:
1398 err = -EINVAL;
1400 rtnl_unlock();
1402 #ifdef CONFIG_IPV6_NETLINK
1403 if (err == 0)
1404 rt6_sndrtmsg(&rtmsg);
1405 #endif
1406 return err;
1409 return -EINVAL;
1413 * Drop the packet on the floor
1416 int ip6_pkt_discard(struct sk_buff *skb)
1418 ipv6_statistics.Ip6OutNoRoutes++;
1419 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
1420 kfree_skb(skb);
1421 return 0;
1425 * Add address
1428 int ip6_rt_addr_add(struct in6_addr *addr, struct device *dev)
1430 struct rt6_info *rt;
1432 RDBG(("ip6_rt_addr_add(%p,%p)[%p]\n", addr, dev,
1433 __builtin_return_address(0)));
1434 #if RT6_DEBUG >= 3
1436 int i;
1438 RDBG(("addr["));
1439 for(i = 0; i < 8; i++) {
1440 RDBG(("%04x%c", addr->s6_addr16[i],
1441 i == 7 ? ']' : ':'));
1444 #endif
1445 RDBG(("\n"));
1447 rt = dst_alloc(sizeof(struct rt6_info), &ip6_dst_ops);
1448 if (rt == NULL)
1449 return -ENOMEM;
1451 rt->u.dst.input = ip6_input;
1452 rt->u.dst.output = ip6_output;
1453 rt->rt6i_dev = dev_get("lo");
1454 rt->u.dst.rtt = TCP_TIMEOUT_INIT;
1455 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1456 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1457 rt->u.dst.obsolete = -1;
1459 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1460 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1461 if (rt->rt6i_nexthop == NULL) {
1462 dst_free((struct dst_entry *) rt);
1463 return -ENOMEM;
1466 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1467 rt->rt6i_dst.plen = 128;
1469 rt6_lock();
1470 rt6_ins(rt);
1471 rt6_unlock();
1473 return 0;
1476 /* Delete address. Warning: you should check that this address
1477 disappeared before calling this function.
1480 int ip6_rt_addr_del(struct in6_addr *addr, struct device *dev)
1482 struct rt6_info *rt;
1484 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, RTF_LINKRT);
1485 if (rt && rt->rt6i_dst.plen == 128)
1486 return ip6_del_rt(rt);
1488 return 0;
1491 #ifdef CONFIG_RT6_POLICY
1493 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1495 struct flow_filter *frule;
1496 struct pkt_filter *filter;
1497 int res = 1;
1499 if ((frule = rt->rt6i_filter) == NULL)
1500 goto out;
1502 if (frule->type != FLR_INPUT) {
1503 res = 0;
1504 goto out;
1507 for (filter = frule->u.filter; filter; filter = filter->next) {
1508 __u32 *word;
1510 word = (__u32 *) skb->h.raw;
1511 word += filter->offset;
1513 if ((*word ^ filter->value) & filter->mask) {
1514 res = 0;
1515 break;
1519 out:
1520 return res;
1523 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1525 struct flow_filter *frule;
1526 int res = 1;
1528 if ((frule = rt->rt6i_filter) == NULL)
1529 goto out;
1531 if (frule->type != FLR_INPUT) {
1532 res = 0;
1533 goto out;
1536 if (frule->u.sk != sk)
1537 res = 0;
1538 out:
1539 return res;
1542 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1543 struct in6_addr *daddr,
1544 struct in6_addr *saddr,
1545 struct fl_acc_args *args)
1547 struct flow_rule *frule;
1548 struct rt6_info *nrt = NULL;
1549 struct pol_chain *pol;
1551 for (pol = rt6_pol_list; pol; pol = pol->next) {
1552 struct fib6_node *fn;
1553 struct rt6_info *sprt;
1555 fn = fib6_lookup(pol->rules, daddr, saddr);
1557 do {
1558 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1559 int res;
1561 frule = sprt->rt6i_flowr;
1562 #if RT6_DEBUG >= 2
1563 if (frule == NULL) {
1564 printk(KERN_DEBUG "NULL flowr\n");
1565 goto error;
1567 #endif
1568 res = frule->ops->accept(rt, sprt, args, &nrt);
1570 switch (res) {
1571 case FLOWR_SELECT:
1572 goto found;
1573 case FLOWR_CLEAR:
1574 goto next_policy;
1575 case FLOWR_NODECISION:
1576 break;
1577 default:
1578 goto error;
1582 fn = fn->parent;
1584 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1586 next_policy:
1589 error:
1590 return &ip6_null_entry;
1592 found:
1594 if (nrt == NULL)
1595 goto error;
1597 nrt->rt6i_flags |= RTF_CACHE;
1598 /* BUGGGG! nrt can point to nowhere! */
1599 rt6_ins(nrt);
1601 return nrt;
1603 #endif
1606 * Nope, I am not idiot. I see that it is the ugliest of ugly routines.
1607 * Anyone is advertised to write better one. --ANK
1610 struct rt6_ifdown_arg {
1611 struct device *dev;
1612 struct rt6_info *rt;
1616 static void rt6_ifdown_node(struct fib6_node *fn, void *p_arg)
1618 struct rt6_info *rt;
1619 struct rt6_ifdown_arg *arg = (struct rt6_ifdown_arg *) p_arg;
1621 if (arg->rt != NULL)
1622 return;
1624 for (rt = fn->leaf; rt; rt = rt->u.next) {
1625 if (rt->rt6i_dev == arg->dev || arg->dev == NULL) {
1626 arg->rt = rt;
1627 return;
1632 void rt6_ifdown(struct device *dev)
1634 int count = 0;
1635 struct rt6_ifdown_arg arg;
1636 struct rt6_info *rt;
1638 do {
1639 arg.dev = dev;
1640 arg.rt = NULL;
1641 fib6_walk_tree(&ip6_routing_table, rt6_ifdown_node, &arg,
1642 RT6_FILTER_RTNODES);
1643 if (arg.rt != NULL)
1644 ip6_del_rt(arg.rt);
1645 count++;
1646 } while (arg.rt != NULL);
1648 /* And default routes ... */
1650 for (rt = ip6_routing_table.leaf; rt; ) {
1651 if (rt != &ip6_null_entry && (rt->rt6i_dev == dev || dev == NULL)) {
1652 struct rt6_info *deleting = rt;
1653 rt = rt->u.next;
1654 ip6_del_rt(deleting);
1655 continue;
1657 rt = rt->u.next;
1661 #ifdef CONFIG_RTNETLINK
1663 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1664 struct in6_rtmsg *rtmsg)
1666 memset(rtmsg, 0, sizeof(*rtmsg));
1668 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1669 rtmsg->rtmsg_src_len = r->rtm_src_len;
1670 rtmsg->rtmsg_flags = RTF_UP;
1671 if (r->rtm_type == RTN_UNREACHABLE)
1672 rtmsg->rtmsg_flags |= RTF_REJECT;
1674 if (rta[RTA_GATEWAY-1]) {
1675 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1676 return -EINVAL;
1677 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1678 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1680 if (rta[RTA_DST-1]) {
1681 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1682 return -EINVAL;
1683 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1685 if (rta[RTA_SRC-1]) {
1686 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1687 return -EINVAL;
1688 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1690 if (rta[RTA_OIF-1]) {
1691 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1692 return -EINVAL;
1693 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1695 if (rta[RTA_PRIORITY-1]) {
1696 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1697 return -EINVAL;
1698 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1700 return 0;
1703 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1705 struct rtmsg *r = NLMSG_DATA(nlh);
1706 struct in6_rtmsg rtmsg;
1708 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1709 return -EINVAL;
1710 return ip6_route_del(&rtmsg);
1713 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1715 struct rtmsg *r = NLMSG_DATA(nlh);
1716 struct in6_rtmsg rtmsg;
1717 int err = 0;
1719 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1720 return -EINVAL;
1721 ip6_route_add(&rtmsg, &err);
1722 return err;
1726 struct rt6_rtnl_dump_arg
1728 struct sk_buff *skb;
1729 struct netlink_callback *cb;
1730 int skip;
1731 int count;
1732 int stop;
1735 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1736 int type, pid_t pid, u32 seq)
1738 struct rtmsg *rtm;
1739 struct nlmsghdr *nlh;
1740 unsigned char *b = skb->tail;
1741 #ifdef CONFIG_RTNL_OLD_IFINFO
1742 unsigned char *o;
1743 #else
1744 struct rtattr *mx;
1745 #endif
1746 struct rta_cacheinfo ci;
1748 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1749 rtm = NLMSG_DATA(nlh);
1750 rtm->rtm_family = AF_INET6;
1751 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1752 rtm->rtm_src_len = rt->rt6i_src.plen;
1753 rtm->rtm_tos = 0;
1754 rtm->rtm_table = RT_TABLE_MAIN;
1755 if (rt->rt6i_flags&RTF_REJECT)
1756 rtm->rtm_type = RTN_UNREACHABLE;
1757 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1758 rtm->rtm_type = RTN_LOCAL;
1759 else
1760 rtm->rtm_type = RTN_UNICAST;
1761 rtm->rtm_flags = 0;
1762 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1763 #ifdef CONFIG_RTNL_OLD_IFINFO
1764 rtm->rtm_nhs = 0;
1765 #endif
1766 rtm->rtm_protocol = RTPROT_BOOT;
1767 if (rt->rt6i_flags&RTF_DYNAMIC)
1768 rtm->rtm_protocol = RTPROT_REDIRECT;
1769 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1770 rtm->rtm_protocol = RTPROT_KERNEL;
1771 else if (rt->rt6i_flags&RTF_DEFAULT)
1772 rtm->rtm_protocol = RTPROT_RA;
1774 if (rt->rt6i_flags&RTF_CACHE)
1775 rtm->rtm_flags |= RTM_F_CLONED;
1777 #ifdef CONFIG_RTNL_OLD_IFINFO
1778 o = skb->tail;
1779 #endif
1780 if (rtm->rtm_dst_len)
1781 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1782 if (rtm->rtm_src_len)
1783 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1784 #ifdef CONFIG_RTNL_OLD_IFINFO
1785 if (rt->u.dst.pmtu)
1786 RTA_PUT(skb, RTA_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
1787 if (rt->u.dst.window)
1788 RTA_PUT(skb, RTA_WINDOW, sizeof(unsigned), &rt->u.dst.window);
1789 if (rt->u.dst.rtt)
1790 RTA_PUT(skb, RTA_RTT, sizeof(unsigned), &rt->u.dst.rtt);
1791 #else
1792 mx = (struct rtattr*)skb->tail;
1793 RTA_PUT(skb, RTA_METRICS, 0, NULL);
1794 if (rt->u.dst.mxlock)
1795 RTA_PUT(skb, RTAX_LOCK, sizeof(unsigned), &rt->u.dst.mxlock);
1796 if (rt->u.dst.pmtu)
1797 RTA_PUT(skb, RTAX_MTU, sizeof(unsigned), &rt->u.dst.pmtu);
1798 if (rt->u.dst.window)
1799 RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window);
1800 if (rt->u.dst.rtt)
1801 RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt);
1802 mx->rta_len = skb->tail - (u8*)mx;
1803 if (mx->rta_len == RTA_LENGTH(0))
1804 skb_trim(skb, (u8*)mx - skb->data);
1805 #endif
1806 if (rt->u.dst.neighbour)
1807 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1808 if (rt->u.dst.dev)
1809 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1810 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1811 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1812 if (rt->rt6i_expires)
1813 ci.rta_expires = rt->rt6i_expires - jiffies;
1814 else
1815 ci.rta_expires = 0;
1816 ci.rta_used = 0;
1817 ci.rta_clntref = atomic_read(&rt->u.dst.use);
1818 ci.rta_error = rt->u.dst.error;
1819 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1820 #ifdef CONFIG_RTNL_OLD_IFINFO
1821 rtm->rtm_optlen = skb->tail - o;
1822 #endif
1823 nlh->nlmsg_len = skb->tail - b;
1824 return skb->len;
1826 nlmsg_failure:
1827 rtattr_failure:
1828 skb_trim(skb, b - skb->data);
1829 return -1;
1832 static void rt6_dump_node(struct fib6_node *fn, void *p_arg)
1834 struct rt6_info *rt;
1835 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1837 if (arg->stop)
1838 return;
1840 for (rt = fn->leaf; rt; rt = rt->u.next) {
1841 if (arg->count < arg->skip) {
1842 arg->count++;
1843 continue;
1845 if (rt6_fill_node(arg->skb, rt, RTM_NEWROUTE,
1846 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq) <= 0) {
1847 arg->stop = 1;
1848 break;
1850 arg->count++;
1855 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1857 struct rt6_rtnl_dump_arg arg;
1859 arg.skb = skb;
1860 arg.cb = cb;
1861 arg.skip = cb->args[0];
1862 arg.count = 0;
1863 arg.stop = 0;
1864 start_bh_atomic();
1865 fib6_walk_tree(&ip6_routing_table, rt6_dump_node, &arg, RT6_FILTER_RTNODES);
1866 if (arg.stop == 0)
1867 rt6_dump_node(&ip6_routing_table, &arg);
1868 end_bh_atomic();
1869 cb->args[0] = arg.count;
1870 return skb->len;
1873 void inet6_rt_notify(int event, struct rt6_info *rt)
1875 struct sk_buff *skb;
1876 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1878 skb = alloc_skb(size, GFP_ATOMIC);
1879 if (!skb) {
1880 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1881 return;
1883 if (rt6_fill_node(skb, rt, event, 0, 0) < 0) {
1884 kfree_skb(skb);
1885 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1886 return;
1888 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1889 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, GFP_ATOMIC);
1892 #endif
1895 * /proc
1898 #ifdef CONFIG_PROC_FS
1901 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1903 struct rt6_proc_arg {
1904 char *buffer;
1905 int offset;
1906 int length;
1907 int skip;
1908 int len;
1911 static void rt6_info_node(struct fib6_node *fn, void *p_arg)
1913 struct rt6_info *rt;
1914 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1916 for (rt = fn->leaf; rt; rt = rt->u.next) {
1917 int i;
1919 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1920 arg->skip++;
1921 continue;
1924 if (arg->len >= arg->length)
1925 return;
1927 for (i=0; i<16; i++) {
1928 sprintf(arg->buffer + arg->len, "%02x",
1929 rt->rt6i_dst.addr.s6_addr[i]);
1930 arg->len += 2;
1932 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1933 rt->rt6i_dst.plen);
1935 for (i=0; i<16; i++) {
1936 sprintf(arg->buffer + arg->len, "%02x",
1937 rt->rt6i_src.addr.s6_addr[i]);
1938 arg->len += 2;
1940 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1941 rt->rt6i_src.plen);
1943 if (rt->rt6i_nexthop) {
1944 for (i=0; i<16; i++) {
1945 sprintf(arg->buffer + arg->len, "%02x",
1946 rt->rt6i_nexthop->primary_key[i]);
1947 arg->len += 2;
1949 } else {
1950 sprintf(arg->buffer + arg->len,
1951 "00000000000000000000000000000000");
1952 arg->len += 32;
1954 arg->len += sprintf(arg->buffer + arg->len,
1955 " %08x %08x %08x %08x %8s\n",
1956 rt->rt6i_metric, atomic_read(&rt->rt6i_use),
1957 atomic_read(&rt->rt6i_ref), rt->rt6i_flags,
1958 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1962 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length,
1963 int dummy)
1965 struct rt6_proc_arg arg;
1966 arg.buffer = buffer;
1967 arg.offset = offset;
1968 arg.length = length;
1969 arg.skip = 0;
1970 arg.len = 0;
1972 fib6_walk_tree(&ip6_routing_table, rt6_info_node, &arg,
1973 RT6_FILTER_RTNODES);
1975 rt6_info_node(&ip6_routing_table, &arg);
1977 *start = buffer;
1978 if (offset)
1979 *start += offset % RT6_INFO_LEN;
1981 arg.len -= offset % RT6_INFO_LEN;
1983 if(arg.len > length)
1984 arg.len = length;
1985 if(arg.len < 0)
1986 arg.len = 0;
1988 return arg.len;
1991 #define PTR_SZ (sizeof(void *) * 2)
1992 #define FI_LINE_SZ (2 * (PTR_SZ) + 7 + 32 + 4 + 32 + 4)
1994 static void rt6_tree_node(struct fib6_node *fn, void *p_arg)
1996 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1997 struct rt6_info *rt;
1998 char f;
1999 int i;
2001 rt = fn->leaf;
2003 if (arg->skip < arg->offset / FI_LINE_SZ) {
2004 arg->skip++;
2005 return;
2008 if (arg->len + FI_LINE_SZ >= arg->length)
2009 return;
2011 f = (fn->fn_flags & RTN_RTINFO) ? 'r' : 'n';
2012 arg->len += sprintf(arg->buffer + arg->len, "%p %p %02x %c ",
2013 fn, fn->parent, fn->fn_bit, f);
2015 for (i=0; i<16; i++) {
2016 sprintf(arg->buffer + arg->len, "%02x",
2017 rt->rt6i_dst.addr.s6_addr[i]);
2018 arg->len += 2;
2020 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2021 rt->rt6i_dst.plen);
2023 for (i=0; i<16; i++) {
2024 sprintf(arg->buffer + arg->len, "%02x",
2025 rt->rt6i_src.addr.s6_addr[i]);
2026 arg->len += 2;
2028 arg->len += sprintf(arg->buffer + arg->len, " %02x\n",
2029 rt->rt6i_src.plen);
2033 static int rt6_proc_tree(char *buffer, char **start, off_t offset, int length,
2034 int dummy)
2036 struct rt6_proc_arg arg;
2037 arg.buffer = buffer;
2038 arg.offset = offset;
2039 arg.length = length;
2040 arg.skip = 0;
2041 arg.len = 0;
2043 fib6_walk_tree(&ip6_routing_table, rt6_tree_node, &arg, 0);
2045 *start = buffer;
2046 if (offset)
2047 *start += offset % RT6_INFO_LEN;
2049 arg.len -= offset % RT6_INFO_LEN;
2051 if(arg.len > length)
2052 arg.len = length;
2053 if(arg.len < 0)
2054 arg.len = 0;
2056 return arg.len;
2060 extern struct rt6_statistics rt6_stats;
2062 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length,
2063 int dummy)
2065 int len;
2067 len = sprintf(buffer, "%04x %04x %04x %04x %04x\n",
2068 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2069 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2070 rt6_stats.fib_rt_cache);
2072 len -= offset;
2074 if (len > length)
2075 len = length;
2076 if(len < 0)
2077 len = 0;
2079 *start = buffer + offset;
2081 return len;
2084 static struct proc_dir_entry proc_rt6_info = {
2085 PROC_NET_RT6, 10, "ipv6_route",
2086 S_IFREG | S_IRUGO, 1, 0, 0,
2087 0, &proc_net_inode_operations,
2088 rt6_proc_info
2090 static struct proc_dir_entry proc_rt6_tree = {
2091 PROC_NET_RT6_TREE, 7, "ip6_fib",
2092 S_IFREG | S_IRUGO, 1, 0, 0,
2093 0, &proc_net_inode_operations,
2094 rt6_proc_tree
2096 static struct proc_dir_entry proc_rt6_stats = {
2097 PROC_NET_RT6_STATS, 9, "rt6_stats",
2098 S_IFREG | S_IRUGO, 1, 0, 0,
2099 0, &proc_net_inode_operations,
2100 rt6_proc_stats
2102 #endif /* CONFIG_PROC_FS */
2104 #ifdef CONFIG_SYSCTL
2106 static int flush_delay;
2108 static
2109 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2110 void *buffer, size_t *lenp)
2112 if (write) {
2113 proc_dointvec(ctl, write, filp, buffer, lenp);
2114 if (flush_delay < 0)
2115 flush_delay = 0;
2116 start_bh_atomic();
2117 fib6_run_gc((unsigned long)flush_delay);
2118 end_bh_atomic();
2119 return 0;
2120 } else
2121 return -EINVAL;
2124 ctl_table ipv6_route_table[] = {
2125 {NET_IPV6_ROUTE_FLUSH, "flush",
2126 &flush_delay, sizeof(int), 0644, NULL,
2127 &ipv6_sysctl_rtcache_flush},
2128 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
2129 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
2130 &proc_dointvec},
2131 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
2132 &ip6_rt_max_size, sizeof(int), 0644, NULL,
2133 &proc_dointvec},
2134 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
2135 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
2136 &proc_dointvec_jiffies},
2137 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
2138 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
2139 &proc_dointvec_jiffies},
2140 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
2141 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
2142 &proc_dointvec_jiffies},
2143 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
2144 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
2145 &proc_dointvec_jiffies},
2149 #endif
2152 __initfunc(void ip6_route_init(void))
2154 #ifdef CONFIG_PROC_FS
2155 proc_net_register(&proc_rt6_info);
2156 proc_net_register(&proc_rt6_tree);
2157 proc_net_register(&proc_rt6_stats);
2158 #endif
2159 #ifdef CONFIG_IPV6_NETLINK
2160 netlink_attach(NETLINK_ROUTE6, rt6_msgrcv);
2161 #endif
2164 #ifdef MODULE
2165 void ip6_route_cleanup(void)
2167 #ifdef CONFIG_PROC_FS
2168 proc_net_unregister(PROC_NET_RT6);
2169 proc_net_unregister(PROC_NET_RT6_TREE);
2170 proc_net_unregister(PROC_NET_RT6_STATS);
2171 #endif
2172 #ifdef CONFIG_IPV6_NETLINK
2173 netlink_detach(NETLINK_ROUTE6);
2174 #endif
2175 rt6_ifdown(NULL);
2177 #endif /* MODULE */