[IPV6]: ROUTE: Flag RTF_DEFAULT for Route Infomation for ::/0.
[linux-2.6/mini2440.git] / net / ipv6 / route.c
blobf587a0e6cf34d04f9e57a9757d522f7f120d0d97
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 /* Changes:
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
42 #ifdef CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
58 #include <asm/uaccess.h>
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107 #endif
109 static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
123 struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
143 struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
148 /* Protects all the ip6 fib */
150 DEFINE_RWLOCK(rt6_lock);
153 /* allocate dst with ip6_dst_ops */
154 static __inline__ struct rt6_info *ip6_dst_alloc(void)
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
159 static void ip6_dst_destroy(struct dst_entry *dst)
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
170 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
185 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
192 * Route lookup. Any rt6_lock is implied.
195 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
216 local = sprt;
220 if (local)
221 return local;
223 if (strict)
224 return &ip6_null_entry;
226 return rt;
229 #ifdef CONFIG_IPV6_ROUTER_PREF
230 static void rt6_probe(struct rt6_info *rt)
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
246 struct in6_addr mcaddr;
247 struct in6_addr *target;
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
258 #else
259 static inline void rt6_probe(struct rt6_info *rt)
261 return;
263 #endif
266 * Default Router Selection (RFC 2461 6.3.6)
268 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
279 static int inline rt6_check_neigh(struct rt6_info *rt)
281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
283 if (neigh) {
284 read_lock_bh(&neigh->lock);
285 if (neigh->nud_state & NUD_VALID)
286 m = 1;
287 read_unlock_bh(&neigh->lock);
289 return m;
292 static int rt6_score_route(struct rt6_info *rt, int oif,
293 int strict)
295 int m = rt6_check_dev(rt, oif);
296 if (!m && (strict & RT6_SELECT_F_IFACE))
297 return -1;
298 #ifdef CONFIG_IPV6_ROUTER_PREF
299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
300 #endif
301 if (rt6_check_neigh(rt))
302 m |= 16;
303 else if (strict & RT6_SELECT_F_REACHABLE)
304 return -1;
305 return m;
308 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
309 int strict)
311 struct rt6_info *match = NULL, *last = NULL;
312 struct rt6_info *rt, *rt0 = *head;
313 u32 metric;
314 int mpri = -1;
316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
317 __FUNCTION__, head, head ? *head : NULL, oif);
319 for (rt = rt0, metric = rt0->rt6i_metric;
320 rt && rt->rt6i_metric == metric;
321 rt = rt->u.next) {
322 int m;
324 if (rt6_check_expired(rt))
325 continue;
327 last = rt;
329 m = rt6_score_route(rt, oif, strict);
330 if (m < 0)
331 continue;
333 if (m > mpri) {
334 rt6_probe(match);
335 match = rt;
336 mpri = m;
337 } else {
338 rt6_probe(rt);
342 if (!match &&
343 (strict & RT6_SELECT_F_REACHABLE) &&
344 last && last != rt0) {
345 /* no entries matched; do round-robin */
346 *head = rt0->u.next;
347 rt0->u.next = last->u.next;
348 last->u.next = rt0;
351 RT6_TRACE("%s() => %p, score=%d\n",
352 __FUNCTION__, match, mpri);
354 return (match ? match : &ip6_null_entry);
357 #ifdef CONFIG_IPV6_ROUTE_INFO
358 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
359 struct in6_addr *gwaddr)
361 struct route_info *rinfo = (struct route_info *) opt;
362 struct in6_addr prefix_buf, *prefix;
363 unsigned int pref;
364 u32 lifetime;
365 struct rt6_info *rt;
367 if (len < sizeof(struct route_info)) {
368 return -EINVAL;
371 /* Sanity check for prefix_len and length */
372 if (rinfo->length > 3) {
373 return -EINVAL;
374 } else if (rinfo->prefix_len > 128) {
375 return -EINVAL;
376 } else if (rinfo->prefix_len > 64) {
377 if (rinfo->length < 2) {
378 return -EINVAL;
380 } else if (rinfo->prefix_len > 0) {
381 if (rinfo->length < 1) {
382 return -EINVAL;
386 pref = rinfo->route_pref;
387 if (pref == ICMPV6_ROUTER_PREF_INVALID)
388 pref = ICMPV6_ROUTER_PREF_MEDIUM;
390 lifetime = htonl(rinfo->lifetime);
391 if (lifetime == 0xffffffff) {
392 /* infinity */
393 } else if (lifetime > 0x7fffffff/HZ) {
394 /* Avoid arithmetic overflow */
395 lifetime = 0x7fffffff/HZ - 1;
398 if (rinfo->length == 3)
399 prefix = (struct in6_addr *)rinfo->prefix;
400 else {
401 /* this function is safe */
402 ipv6_addr_prefix(&prefix_buf,
403 (struct in6_addr *)rinfo->prefix,
404 rinfo->prefix_len);
405 prefix = &prefix_buf;
408 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
410 if (rt && !lifetime) {
411 ip6_del_rt(rt, NULL, NULL, NULL);
412 rt = NULL;
415 if (!rt && lifetime)
416 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
417 pref);
418 else if (rt)
419 rt->rt6i_flags = RTF_ROUTEINFO |
420 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
422 if (rt) {
423 if (lifetime == 0xffffffff) {
424 rt->rt6i_flags &= ~RTF_EXPIRES;
425 } else {
426 rt->rt6i_expires = jiffies + HZ * lifetime;
427 rt->rt6i_flags |= RTF_EXPIRES;
429 dst_release(&rt->u.dst);
431 return 0;
433 #endif
435 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
436 int oif, int strict)
438 struct fib6_node *fn;
439 struct rt6_info *rt;
441 read_lock_bh(&rt6_lock);
442 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
443 rt = rt6_device_match(fn->leaf, oif, strict);
444 dst_hold(&rt->u.dst);
445 rt->u.dst.__use++;
446 read_unlock_bh(&rt6_lock);
448 rt->u.dst.lastuse = jiffies;
449 if (rt->u.dst.error == 0)
450 return rt;
451 dst_release(&rt->u.dst);
452 return NULL;
455 /* ip6_ins_rt is called with FREE rt6_lock.
456 It takes new route entry, the addition fails by any reason the
457 route is freed. In any case, if caller does not hold it, it may
458 be destroyed.
461 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
462 void *_rtattr, struct netlink_skb_parms *req)
464 int err;
466 write_lock_bh(&rt6_lock);
467 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
468 write_unlock_bh(&rt6_lock);
470 return err;
473 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
474 struct in6_addr *saddr)
476 struct rt6_info *rt;
479 * Clone the route.
482 rt = ip6_rt_copy(ort);
484 if (rt) {
485 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
486 if (rt->rt6i_dst.plen != 128 &&
487 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
488 rt->rt6i_flags |= RTF_ANYCAST;
489 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
492 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
493 rt->rt6i_dst.plen = 128;
494 rt->rt6i_flags |= RTF_CACHE;
495 rt->u.dst.flags |= DST_HOST;
497 #ifdef CONFIG_IPV6_SUBTREES
498 if (rt->rt6i_src.plen && saddr) {
499 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
500 rt->rt6i_src.plen = 128;
502 #endif
504 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
508 return rt;
511 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
513 struct rt6_info *rt = ip6_rt_copy(ort);
514 if (rt) {
515 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
516 rt->rt6i_dst.plen = 128;
517 rt->rt6i_flags |= RTF_CACHE;
518 if (rt->rt6i_flags & RTF_REJECT)
519 rt->u.dst.error = ort->u.dst.error;
520 rt->u.dst.flags |= DST_HOST;
521 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
523 return rt;
526 #define BACKTRACK() \
527 if (rt == &ip6_null_entry) { \
528 while ((fn = fn->parent) != NULL) { \
529 if (fn->fn_flags & RTN_ROOT) { \
530 goto out; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
538 void ip6_route_input(struct sk_buff *skb)
540 struct fib6_node *fn;
541 struct rt6_info *rt, *nrt;
542 int strict;
543 int attempts = 3;
544 int err;
545 int reachable = RT6_SELECT_F_REACHABLE;
547 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
549 relookup:
550 read_lock_bh(&rt6_lock);
552 restart_2:
553 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
554 &skb->nh.ipv6h->saddr);
556 restart:
557 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
558 BACKTRACK();
559 if (rt == &ip6_null_entry ||
560 rt->rt6i_flags & RTF_CACHE)
561 goto out;
563 dst_hold(&rt->u.dst);
564 read_unlock_bh(&rt6_lock);
566 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
567 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
568 else {
569 #if CLONE_OFFLINK_ROUTE
570 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
571 #else
572 goto out2;
573 #endif
576 dst_release(&rt->u.dst);
577 rt = nrt ? : &ip6_null_entry;
579 dst_hold(&rt->u.dst);
580 if (nrt) {
581 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
582 if (!err)
583 goto out2;
586 if (--attempts <= 0)
587 goto out2;
590 * Race condition! In the gap, when rt6_lock was
591 * released someone could insert this route. Relookup.
593 dst_release(&rt->u.dst);
594 goto relookup;
596 out:
597 if (reachable) {
598 reachable = 0;
599 goto restart_2;
601 dst_hold(&rt->u.dst);
602 read_unlock_bh(&rt6_lock);
603 out2:
604 rt->u.dst.lastuse = jiffies;
605 rt->u.dst.__use++;
606 skb->dst = (struct dst_entry *) rt;
607 return;
610 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
612 struct fib6_node *fn;
613 struct rt6_info *rt, *nrt;
614 int strict;
615 int attempts = 3;
616 int err;
617 int reachable = RT6_SELECT_F_REACHABLE;
619 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
621 relookup:
622 read_lock_bh(&rt6_lock);
624 restart_2:
625 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
627 restart:
628 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
629 BACKTRACK();
630 if (rt == &ip6_null_entry ||
631 rt->rt6i_flags & RTF_CACHE)
632 goto out;
634 dst_hold(&rt->u.dst);
635 read_unlock_bh(&rt6_lock);
637 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
638 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
639 else {
640 #if CLONE_OFFLINK_ROUTE
641 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
642 #else
643 goto out2;
644 #endif
647 dst_release(&rt->u.dst);
648 rt = nrt ? : &ip6_null_entry;
650 dst_hold(&rt->u.dst);
651 if (nrt) {
652 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
653 if (!err)
654 goto out2;
657 if (--attempts <= 0)
658 goto out2;
661 * Race condition! In the gap, when rt6_lock was
662 * released someone could insert this route. Relookup.
664 dst_release(&rt->u.dst);
665 goto relookup;
667 out:
668 if (reachable) {
669 reachable = 0;
670 goto restart_2;
672 dst_hold(&rt->u.dst);
673 read_unlock_bh(&rt6_lock);
674 out2:
675 rt->u.dst.lastuse = jiffies;
676 rt->u.dst.__use++;
677 return &rt->u.dst;
682 * Destination cache support functions
685 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
687 struct rt6_info *rt;
689 rt = (struct rt6_info *) dst;
691 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
692 return dst;
694 return NULL;
697 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
699 struct rt6_info *rt = (struct rt6_info *) dst;
701 if (rt) {
702 if (rt->rt6i_flags & RTF_CACHE)
703 ip6_del_rt(rt, NULL, NULL, NULL);
704 else
705 dst_release(dst);
707 return NULL;
710 static void ip6_link_failure(struct sk_buff *skb)
712 struct rt6_info *rt;
714 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
716 rt = (struct rt6_info *) skb->dst;
717 if (rt) {
718 if (rt->rt6i_flags&RTF_CACHE) {
719 dst_set_expires(&rt->u.dst, 0);
720 rt->rt6i_flags |= RTF_EXPIRES;
721 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
722 rt->rt6i_node->fn_sernum = -1;
726 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
728 struct rt6_info *rt6 = (struct rt6_info*)dst;
730 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
731 rt6->rt6i_flags |= RTF_MODIFIED;
732 if (mtu < IPV6_MIN_MTU) {
733 mtu = IPV6_MIN_MTU;
734 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
736 dst->metrics[RTAX_MTU-1] = mtu;
740 /* Protected by rt6_lock. */
741 static struct dst_entry *ndisc_dst_gc_list;
742 static int ipv6_get_mtu(struct net_device *dev);
744 static inline unsigned int ipv6_advmss(unsigned int mtu)
746 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
748 if (mtu < ip6_rt_min_advmss)
749 mtu = ip6_rt_min_advmss;
752 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
753 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
754 * IPV6_MAXPLEN is also valid and means: "any MSS,
755 * rely only on pmtu discovery"
757 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
758 mtu = IPV6_MAXPLEN;
759 return mtu;
762 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
763 struct neighbour *neigh,
764 struct in6_addr *addr,
765 int (*output)(struct sk_buff *))
767 struct rt6_info *rt;
768 struct inet6_dev *idev = in6_dev_get(dev);
770 if (unlikely(idev == NULL))
771 return NULL;
773 rt = ip6_dst_alloc();
774 if (unlikely(rt == NULL)) {
775 in6_dev_put(idev);
776 goto out;
779 dev_hold(dev);
780 if (neigh)
781 neigh_hold(neigh);
782 else
783 neigh = ndisc_get_neigh(dev, addr);
785 rt->rt6i_dev = dev;
786 rt->rt6i_idev = idev;
787 rt->rt6i_nexthop = neigh;
788 atomic_set(&rt->u.dst.__refcnt, 1);
789 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
790 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
791 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
792 rt->u.dst.output = output;
794 #if 0 /* there's no chance to use these for ndisc */
795 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
796 ? DST_HOST
797 : 0;
798 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
799 rt->rt6i_dst.plen = 128;
800 #endif
802 write_lock_bh(&rt6_lock);
803 rt->u.dst.next = ndisc_dst_gc_list;
804 ndisc_dst_gc_list = &rt->u.dst;
805 write_unlock_bh(&rt6_lock);
807 fib6_force_start_gc();
809 out:
810 return (struct dst_entry *)rt;
813 int ndisc_dst_gc(int *more)
815 struct dst_entry *dst, *next, **pprev;
816 int freed;
818 next = NULL;
819 pprev = &ndisc_dst_gc_list;
820 freed = 0;
821 while ((dst = *pprev) != NULL) {
822 if (!atomic_read(&dst->__refcnt)) {
823 *pprev = dst->next;
824 dst_free(dst);
825 freed++;
826 } else {
827 pprev = &dst->next;
828 (*more)++;
832 return freed;
835 static int ip6_dst_gc(void)
837 static unsigned expire = 30*HZ;
838 static unsigned long last_gc;
839 unsigned long now = jiffies;
841 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
842 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
843 goto out;
845 expire++;
846 fib6_run_gc(expire);
847 last_gc = now;
848 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
849 expire = ip6_rt_gc_timeout>>1;
851 out:
852 expire -= expire>>ip6_rt_gc_elasticity;
853 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
856 /* Clean host part of a prefix. Not necessary in radix tree,
857 but results in cleaner routing tables.
859 Remove it only when all the things will work!
862 static int ipv6_get_mtu(struct net_device *dev)
864 int mtu = IPV6_MIN_MTU;
865 struct inet6_dev *idev;
867 idev = in6_dev_get(dev);
868 if (idev) {
869 mtu = idev->cnf.mtu6;
870 in6_dev_put(idev);
872 return mtu;
875 int ipv6_get_hoplimit(struct net_device *dev)
877 int hoplimit = ipv6_devconf.hop_limit;
878 struct inet6_dev *idev;
880 idev = in6_dev_get(dev);
881 if (idev) {
882 hoplimit = idev->cnf.hop_limit;
883 in6_dev_put(idev);
885 return hoplimit;
892 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
893 void *_rtattr, struct netlink_skb_parms *req)
895 int err;
896 struct rtmsg *r;
897 struct rtattr **rta;
898 struct rt6_info *rt = NULL;
899 struct net_device *dev = NULL;
900 struct inet6_dev *idev = NULL;
901 int addr_type;
903 rta = (struct rtattr **) _rtattr;
905 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
906 return -EINVAL;
907 #ifndef CONFIG_IPV6_SUBTREES
908 if (rtmsg->rtmsg_src_len)
909 return -EINVAL;
910 #endif
911 if (rtmsg->rtmsg_ifindex) {
912 err = -ENODEV;
913 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
914 if (!dev)
915 goto out;
916 idev = in6_dev_get(dev);
917 if (!idev)
918 goto out;
921 if (rtmsg->rtmsg_metric == 0)
922 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
924 rt = ip6_dst_alloc();
926 if (rt == NULL) {
927 err = -ENOMEM;
928 goto out;
931 rt->u.dst.obsolete = -1;
932 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
933 if (nlh && (r = NLMSG_DATA(nlh))) {
934 rt->rt6i_protocol = r->rtm_protocol;
935 } else {
936 rt->rt6i_protocol = RTPROT_BOOT;
939 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
941 if (addr_type & IPV6_ADDR_MULTICAST)
942 rt->u.dst.input = ip6_mc_input;
943 else
944 rt->u.dst.input = ip6_forward;
946 rt->u.dst.output = ip6_output;
948 ipv6_addr_prefix(&rt->rt6i_dst.addr,
949 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
950 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
951 if (rt->rt6i_dst.plen == 128)
952 rt->u.dst.flags = DST_HOST;
954 #ifdef CONFIG_IPV6_SUBTREES
955 ipv6_addr_prefix(&rt->rt6i_src.addr,
956 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
957 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
958 #endif
960 rt->rt6i_metric = rtmsg->rtmsg_metric;
962 /* We cannot add true routes via loopback here,
963 they would result in kernel looping; promote them to reject routes
965 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
966 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
967 /* hold loopback dev/idev if we haven't done so. */
968 if (dev != &loopback_dev) {
969 if (dev) {
970 dev_put(dev);
971 in6_dev_put(idev);
973 dev = &loopback_dev;
974 dev_hold(dev);
975 idev = in6_dev_get(dev);
976 if (!idev) {
977 err = -ENODEV;
978 goto out;
981 rt->u.dst.output = ip6_pkt_discard_out;
982 rt->u.dst.input = ip6_pkt_discard;
983 rt->u.dst.error = -ENETUNREACH;
984 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
985 goto install_route;
988 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
989 struct in6_addr *gw_addr;
990 int gwa_type;
992 gw_addr = &rtmsg->rtmsg_gateway;
993 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
994 gwa_type = ipv6_addr_type(gw_addr);
996 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
997 struct rt6_info *grt;
999 /* IPv6 strictly inhibits using not link-local
1000 addresses as nexthop address.
1001 Otherwise, router will not able to send redirects.
1002 It is very good, but in some (rare!) circumstances
1003 (SIT, PtP, NBMA NOARP links) it is handy to allow
1004 some exceptions. --ANK
1006 err = -EINVAL;
1007 if (!(gwa_type&IPV6_ADDR_UNICAST))
1008 goto out;
1010 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1012 err = -EHOSTUNREACH;
1013 if (grt == NULL)
1014 goto out;
1015 if (dev) {
1016 if (dev != grt->rt6i_dev) {
1017 dst_release(&grt->u.dst);
1018 goto out;
1020 } else {
1021 dev = grt->rt6i_dev;
1022 idev = grt->rt6i_idev;
1023 dev_hold(dev);
1024 in6_dev_hold(grt->rt6i_idev);
1026 if (!(grt->rt6i_flags&RTF_GATEWAY))
1027 err = 0;
1028 dst_release(&grt->u.dst);
1030 if (err)
1031 goto out;
1033 err = -EINVAL;
1034 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1035 goto out;
1038 err = -ENODEV;
1039 if (dev == NULL)
1040 goto out;
1042 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1043 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1044 if (IS_ERR(rt->rt6i_nexthop)) {
1045 err = PTR_ERR(rt->rt6i_nexthop);
1046 rt->rt6i_nexthop = NULL;
1047 goto out;
1051 rt->rt6i_flags = rtmsg->rtmsg_flags;
1053 install_route:
1054 if (rta && rta[RTA_METRICS-1]) {
1055 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1056 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1058 while (RTA_OK(attr, attrlen)) {
1059 unsigned flavor = attr->rta_type;
1060 if (flavor) {
1061 if (flavor > RTAX_MAX) {
1062 err = -EINVAL;
1063 goto out;
1065 rt->u.dst.metrics[flavor-1] =
1066 *(u32 *)RTA_DATA(attr);
1068 attr = RTA_NEXT(attr, attrlen);
1072 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1073 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1074 if (!rt->u.dst.metrics[RTAX_MTU-1])
1075 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1076 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1077 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1078 rt->u.dst.dev = dev;
1079 rt->rt6i_idev = idev;
1080 return ip6_ins_rt(rt, nlh, _rtattr, req);
1082 out:
1083 if (dev)
1084 dev_put(dev);
1085 if (idev)
1086 in6_dev_put(idev);
1087 if (rt)
1088 dst_free((struct dst_entry *) rt);
1089 return err;
1092 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1094 int err;
1096 write_lock_bh(&rt6_lock);
1098 err = fib6_del(rt, nlh, _rtattr, req);
1099 dst_release(&rt->u.dst);
1101 write_unlock_bh(&rt6_lock);
1103 return err;
1106 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1108 struct fib6_node *fn;
1109 struct rt6_info *rt;
1110 int err = -ESRCH;
1112 read_lock_bh(&rt6_lock);
1114 fn = fib6_locate(&ip6_routing_table,
1115 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1116 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1118 if (fn) {
1119 for (rt = fn->leaf; rt; rt = rt->u.next) {
1120 if (rtmsg->rtmsg_ifindex &&
1121 (rt->rt6i_dev == NULL ||
1122 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1123 continue;
1124 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1125 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1126 continue;
1127 if (rtmsg->rtmsg_metric &&
1128 rtmsg->rtmsg_metric != rt->rt6i_metric)
1129 continue;
1130 dst_hold(&rt->u.dst);
1131 read_unlock_bh(&rt6_lock);
1133 return ip6_del_rt(rt, nlh, _rtattr, req);
1136 read_unlock_bh(&rt6_lock);
1138 return err;
1142 * Handle redirects
1144 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1145 struct neighbour *neigh, u8 *lladdr, int on_link)
1147 struct rt6_info *rt, *nrt;
1149 /* Locate old route to this destination. */
1150 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1152 if (rt == NULL)
1153 return;
1155 if (neigh->dev != rt->rt6i_dev)
1156 goto out;
1159 * Current route is on-link; redirect is always invalid.
1161 * Seems, previous statement is not true. It could
1162 * be node, which looks for us as on-link (f.e. proxy ndisc)
1163 * But then router serving it might decide, that we should
1164 * know truth 8)8) --ANK (980726).
1166 if (!(rt->rt6i_flags&RTF_GATEWAY))
1167 goto out;
1170 * RFC 2461 specifies that redirects should only be
1171 * accepted if they come from the nexthop to the target.
1172 * Due to the way default routers are chosen, this notion
1173 * is a bit fuzzy and one might need to check all default
1174 * routers.
1176 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1177 if (rt->rt6i_flags & RTF_DEFAULT) {
1178 struct rt6_info *rt1;
1180 read_lock(&rt6_lock);
1181 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1182 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1183 dst_hold(&rt1->u.dst);
1184 dst_release(&rt->u.dst);
1185 read_unlock(&rt6_lock);
1186 rt = rt1;
1187 goto source_ok;
1190 read_unlock(&rt6_lock);
1192 if (net_ratelimit())
1193 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1194 "for redirect target\n");
1195 goto out;
1198 source_ok:
1201 * We have finally decided to accept it.
1204 neigh_update(neigh, lladdr, NUD_STALE,
1205 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1206 NEIGH_UPDATE_F_OVERRIDE|
1207 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1208 NEIGH_UPDATE_F_ISROUTER))
1212 * Redirect received -> path was valid.
1213 * Look, redirects are sent only in response to data packets,
1214 * so that this nexthop apparently is reachable. --ANK
1216 dst_confirm(&rt->u.dst);
1218 /* Duplicate redirect: silently ignore. */
1219 if (neigh == rt->u.dst.neighbour)
1220 goto out;
1222 nrt = ip6_rt_copy(rt);
1223 if (nrt == NULL)
1224 goto out;
1226 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1227 if (on_link)
1228 nrt->rt6i_flags &= ~RTF_GATEWAY;
1230 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1231 nrt->rt6i_dst.plen = 128;
1232 nrt->u.dst.flags |= DST_HOST;
1234 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1235 nrt->rt6i_nexthop = neigh_clone(neigh);
1236 /* Reset pmtu, it may be better */
1237 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1238 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1240 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1241 goto out;
1243 if (rt->rt6i_flags&RTF_CACHE) {
1244 ip6_del_rt(rt, NULL, NULL, NULL);
1245 return;
1248 out:
1249 dst_release(&rt->u.dst);
1250 return;
1254 * Handle ICMP "packet too big" messages
1255 * i.e. Path MTU discovery
1258 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1259 struct net_device *dev, u32 pmtu)
1261 struct rt6_info *rt, *nrt;
1262 int allfrag = 0;
1264 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1265 if (rt == NULL)
1266 return;
1268 if (pmtu >= dst_mtu(&rt->u.dst))
1269 goto out;
1271 if (pmtu < IPV6_MIN_MTU) {
1273 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1274 * MTU (1280) and a fragment header should always be included
1275 * after a node receiving Too Big message reporting PMTU is
1276 * less than the IPv6 Minimum Link MTU.
1278 pmtu = IPV6_MIN_MTU;
1279 allfrag = 1;
1282 /* New mtu received -> path was valid.
1283 They are sent only in response to data packets,
1284 so that this nexthop apparently is reachable. --ANK
1286 dst_confirm(&rt->u.dst);
1288 /* Host route. If it is static, it would be better
1289 not to override it, but add new one, so that
1290 when cache entry will expire old pmtu
1291 would return automatically.
1293 if (rt->rt6i_flags & RTF_CACHE) {
1294 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1295 if (allfrag)
1296 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1297 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1298 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1299 goto out;
1302 /* Network route.
1303 Two cases are possible:
1304 1. It is connected route. Action: COW
1305 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1307 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1308 nrt = rt6_alloc_cow(rt, daddr, saddr);
1309 else
1310 nrt = rt6_alloc_clone(rt, daddr);
1312 if (nrt) {
1313 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1314 if (allfrag)
1315 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1317 /* According to RFC 1981, detecting PMTU increase shouldn't be
1318 * happened within 5 mins, the recommended timer is 10 mins.
1319 * Here this route expiration time is set to ip6_rt_mtu_expires
1320 * which is 10 mins. After 10 mins the decreased pmtu is expired
1321 * and detecting PMTU increase will be automatically happened.
1323 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1324 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1326 ip6_ins_rt(nrt, NULL, NULL, NULL);
1328 out:
1329 dst_release(&rt->u.dst);
1333 * Misc support functions
1336 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1338 struct rt6_info *rt = ip6_dst_alloc();
1340 if (rt) {
1341 rt->u.dst.input = ort->u.dst.input;
1342 rt->u.dst.output = ort->u.dst.output;
1344 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1345 rt->u.dst.dev = ort->u.dst.dev;
1346 if (rt->u.dst.dev)
1347 dev_hold(rt->u.dst.dev);
1348 rt->rt6i_idev = ort->rt6i_idev;
1349 if (rt->rt6i_idev)
1350 in6_dev_hold(rt->rt6i_idev);
1351 rt->u.dst.lastuse = jiffies;
1352 rt->rt6i_expires = 0;
1354 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1355 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1356 rt->rt6i_metric = 0;
1358 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1359 #ifdef CONFIG_IPV6_SUBTREES
1360 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1361 #endif
1363 return rt;
1366 #ifdef CONFIG_IPV6_ROUTE_INFO
1367 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1368 struct in6_addr *gwaddr, int ifindex)
1370 struct fib6_node *fn;
1371 struct rt6_info *rt = NULL;
1373 write_lock_bh(&rt6_lock);
1374 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1375 if (!fn)
1376 goto out;
1378 for (rt = fn->leaf; rt; rt = rt->u.next) {
1379 if (rt->rt6i_dev->ifindex != ifindex)
1380 continue;
1381 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1382 continue;
1383 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1384 continue;
1385 dst_hold(&rt->u.dst);
1386 break;
1388 out:
1389 write_unlock_bh(&rt6_lock);
1390 return rt;
1393 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1394 struct in6_addr *gwaddr, int ifindex,
1395 unsigned pref)
1397 struct in6_rtmsg rtmsg;
1399 memset(&rtmsg, 0, sizeof(rtmsg));
1400 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1401 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1402 rtmsg.rtmsg_dst_len = prefixlen;
1403 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1404 rtmsg.rtmsg_metric = 1024;
1405 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1406 /* We should treat it as a default route if prefix length is 0. */
1407 if (!prefixlen)
1408 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1409 rtmsg.rtmsg_ifindex = ifindex;
1411 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1413 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1415 #endif
1417 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1419 struct rt6_info *rt;
1420 struct fib6_node *fn;
1422 fn = &ip6_routing_table;
1424 write_lock_bh(&rt6_lock);
1425 for (rt = fn->leaf; rt; rt=rt->u.next) {
1426 if (dev == rt->rt6i_dev &&
1427 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1428 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1429 break;
1431 if (rt)
1432 dst_hold(&rt->u.dst);
1433 write_unlock_bh(&rt6_lock);
1434 return rt;
1437 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1438 struct net_device *dev,
1439 unsigned int pref)
1441 struct in6_rtmsg rtmsg;
1443 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1444 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1445 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1446 rtmsg.rtmsg_metric = 1024;
1447 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1448 RTF_PREF(pref);
1450 rtmsg.rtmsg_ifindex = dev->ifindex;
1452 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1453 return rt6_get_dflt_router(gwaddr, dev);
1456 void rt6_purge_dflt_routers(void)
1458 struct rt6_info *rt;
1460 restart:
1461 read_lock_bh(&rt6_lock);
1462 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1463 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1464 dst_hold(&rt->u.dst);
1466 read_unlock_bh(&rt6_lock);
1468 ip6_del_rt(rt, NULL, NULL, NULL);
1470 goto restart;
1473 read_unlock_bh(&rt6_lock);
1476 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1478 struct in6_rtmsg rtmsg;
1479 int err;
1481 switch(cmd) {
1482 case SIOCADDRT: /* Add a route */
1483 case SIOCDELRT: /* Delete a route */
1484 if (!capable(CAP_NET_ADMIN))
1485 return -EPERM;
1486 err = copy_from_user(&rtmsg, arg,
1487 sizeof(struct in6_rtmsg));
1488 if (err)
1489 return -EFAULT;
1491 rtnl_lock();
1492 switch (cmd) {
1493 case SIOCADDRT:
1494 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1495 break;
1496 case SIOCDELRT:
1497 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1498 break;
1499 default:
1500 err = -EINVAL;
1502 rtnl_unlock();
1504 return err;
1507 return -EINVAL;
1511 * Drop the packet on the floor
1514 static int ip6_pkt_discard(struct sk_buff *skb)
1516 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1517 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1518 kfree_skb(skb);
1519 return 0;
1522 static int ip6_pkt_discard_out(struct sk_buff *skb)
1524 skb->dev = skb->dst->dev;
1525 return ip6_pkt_discard(skb);
1529 * Allocate a dst for local (unicast / anycast) address.
1532 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1533 const struct in6_addr *addr,
1534 int anycast)
1536 struct rt6_info *rt = ip6_dst_alloc();
1538 if (rt == NULL)
1539 return ERR_PTR(-ENOMEM);
1541 dev_hold(&loopback_dev);
1542 in6_dev_hold(idev);
1544 rt->u.dst.flags = DST_HOST;
1545 rt->u.dst.input = ip6_input;
1546 rt->u.dst.output = ip6_output;
1547 rt->rt6i_dev = &loopback_dev;
1548 rt->rt6i_idev = idev;
1549 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1550 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1551 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1552 rt->u.dst.obsolete = -1;
1554 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1555 if (anycast)
1556 rt->rt6i_flags |= RTF_ANYCAST;
1557 else
1558 rt->rt6i_flags |= RTF_LOCAL;
1559 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1560 if (rt->rt6i_nexthop == NULL) {
1561 dst_free((struct dst_entry *) rt);
1562 return ERR_PTR(-ENOMEM);
1565 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1566 rt->rt6i_dst.plen = 128;
1568 atomic_set(&rt->u.dst.__refcnt, 1);
1570 return rt;
1573 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1575 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1576 rt != &ip6_null_entry) {
1577 RT6_TRACE("deleted by ifdown %p\n", rt);
1578 return -1;
1580 return 0;
1583 void rt6_ifdown(struct net_device *dev)
1585 write_lock_bh(&rt6_lock);
1586 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1587 write_unlock_bh(&rt6_lock);
1590 struct rt6_mtu_change_arg
1592 struct net_device *dev;
1593 unsigned mtu;
1596 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1598 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1599 struct inet6_dev *idev;
1601 /* In IPv6 pmtu discovery is not optional,
1602 so that RTAX_MTU lock cannot disable it.
1603 We still use this lock to block changes
1604 caused by addrconf/ndisc.
1607 idev = __in6_dev_get(arg->dev);
1608 if (idev == NULL)
1609 return 0;
1611 /* For administrative MTU increase, there is no way to discover
1612 IPv6 PMTU increase, so PMTU increase should be updated here.
1613 Since RFC 1981 doesn't include administrative MTU increase
1614 update PMTU increase is a MUST. (i.e. jumbo frame)
1617 If new MTU is less than route PMTU, this new MTU will be the
1618 lowest MTU in the path, update the route PMTU to reflect PMTU
1619 decreases; if new MTU is greater than route PMTU, and the
1620 old MTU is the lowest MTU in the path, update the route PMTU
1621 to reflect the increase. In this case if the other nodes' MTU
1622 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1623 PMTU discouvery.
1625 if (rt->rt6i_dev == arg->dev &&
1626 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1627 (dst_mtu(&rt->u.dst) > arg->mtu ||
1628 (dst_mtu(&rt->u.dst) < arg->mtu &&
1629 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1630 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1631 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1632 return 0;
1635 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1637 struct rt6_mtu_change_arg arg;
1639 arg.dev = dev;
1640 arg.mtu = mtu;
1641 read_lock_bh(&rt6_lock);
1642 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1643 read_unlock_bh(&rt6_lock);
1646 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1647 struct in6_rtmsg *rtmsg)
1649 memset(rtmsg, 0, sizeof(*rtmsg));
1651 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1652 rtmsg->rtmsg_src_len = r->rtm_src_len;
1653 rtmsg->rtmsg_flags = RTF_UP;
1654 if (r->rtm_type == RTN_UNREACHABLE)
1655 rtmsg->rtmsg_flags |= RTF_REJECT;
1657 if (rta[RTA_GATEWAY-1]) {
1658 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1659 return -EINVAL;
1660 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1661 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1663 if (rta[RTA_DST-1]) {
1664 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1665 return -EINVAL;
1666 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1668 if (rta[RTA_SRC-1]) {
1669 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1670 return -EINVAL;
1671 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1673 if (rta[RTA_OIF-1]) {
1674 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1675 return -EINVAL;
1676 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1678 if (rta[RTA_PRIORITY-1]) {
1679 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1680 return -EINVAL;
1681 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1683 return 0;
1686 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1688 struct rtmsg *r = NLMSG_DATA(nlh);
1689 struct in6_rtmsg rtmsg;
1691 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1692 return -EINVAL;
1693 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1696 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1698 struct rtmsg *r = NLMSG_DATA(nlh);
1699 struct in6_rtmsg rtmsg;
1701 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1702 return -EINVAL;
1703 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1706 struct rt6_rtnl_dump_arg
1708 struct sk_buff *skb;
1709 struct netlink_callback *cb;
1712 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1713 struct in6_addr *dst, struct in6_addr *src,
1714 int iif, int type, u32 pid, u32 seq,
1715 int prefix, unsigned int flags)
1717 struct rtmsg *rtm;
1718 struct nlmsghdr *nlh;
1719 unsigned char *b = skb->tail;
1720 struct rta_cacheinfo ci;
1722 if (prefix) { /* user wants prefix routes only */
1723 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1724 /* success since this is not a prefix route */
1725 return 1;
1729 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1730 rtm = NLMSG_DATA(nlh);
1731 rtm->rtm_family = AF_INET6;
1732 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1733 rtm->rtm_src_len = rt->rt6i_src.plen;
1734 rtm->rtm_tos = 0;
1735 rtm->rtm_table = RT_TABLE_MAIN;
1736 if (rt->rt6i_flags&RTF_REJECT)
1737 rtm->rtm_type = RTN_UNREACHABLE;
1738 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1739 rtm->rtm_type = RTN_LOCAL;
1740 else
1741 rtm->rtm_type = RTN_UNICAST;
1742 rtm->rtm_flags = 0;
1743 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1744 rtm->rtm_protocol = rt->rt6i_protocol;
1745 if (rt->rt6i_flags&RTF_DYNAMIC)
1746 rtm->rtm_protocol = RTPROT_REDIRECT;
1747 else if (rt->rt6i_flags & RTF_ADDRCONF)
1748 rtm->rtm_protocol = RTPROT_KERNEL;
1749 else if (rt->rt6i_flags&RTF_DEFAULT)
1750 rtm->rtm_protocol = RTPROT_RA;
1752 if (rt->rt6i_flags&RTF_CACHE)
1753 rtm->rtm_flags |= RTM_F_CLONED;
1755 if (dst) {
1756 RTA_PUT(skb, RTA_DST, 16, dst);
1757 rtm->rtm_dst_len = 128;
1758 } else if (rtm->rtm_dst_len)
1759 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1760 #ifdef CONFIG_IPV6_SUBTREES
1761 if (src) {
1762 RTA_PUT(skb, RTA_SRC, 16, src);
1763 rtm->rtm_src_len = 128;
1764 } else if (rtm->rtm_src_len)
1765 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1766 #endif
1767 if (iif)
1768 RTA_PUT(skb, RTA_IIF, 4, &iif);
1769 else if (dst) {
1770 struct in6_addr saddr_buf;
1771 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1772 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1774 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1775 goto rtattr_failure;
1776 if (rt->u.dst.neighbour)
1777 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1778 if (rt->u.dst.dev)
1779 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1780 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1781 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1782 if (rt->rt6i_expires)
1783 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1784 else
1785 ci.rta_expires = 0;
1786 ci.rta_used = rt->u.dst.__use;
1787 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1788 ci.rta_error = rt->u.dst.error;
1789 ci.rta_id = 0;
1790 ci.rta_ts = 0;
1791 ci.rta_tsage = 0;
1792 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1793 nlh->nlmsg_len = skb->tail - b;
1794 return skb->len;
1796 nlmsg_failure:
1797 rtattr_failure:
1798 skb_trim(skb, b - skb->data);
1799 return -1;
1802 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1804 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1805 int prefix;
1807 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1808 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1809 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1810 } else
1811 prefix = 0;
1813 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1814 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1815 prefix, NLM_F_MULTI);
1818 static int fib6_dump_node(struct fib6_walker_t *w)
1820 int res;
1821 struct rt6_info *rt;
1823 for (rt = w->leaf; rt; rt = rt->u.next) {
1824 res = rt6_dump_route(rt, w->args);
1825 if (res < 0) {
1826 /* Frame is full, suspend walking */
1827 w->leaf = rt;
1828 return 1;
1830 BUG_TRAP(res!=0);
1832 w->leaf = NULL;
1833 return 0;
1836 static void fib6_dump_end(struct netlink_callback *cb)
1838 struct fib6_walker_t *w = (void*)cb->args[0];
1840 if (w) {
1841 cb->args[0] = 0;
1842 fib6_walker_unlink(w);
1843 kfree(w);
1845 cb->done = (void*)cb->args[1];
1846 cb->args[1] = 0;
1849 static int fib6_dump_done(struct netlink_callback *cb)
1851 fib6_dump_end(cb);
1852 return cb->done ? cb->done(cb) : 0;
1855 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1857 struct rt6_rtnl_dump_arg arg;
1858 struct fib6_walker_t *w;
1859 int res;
1861 arg.skb = skb;
1862 arg.cb = cb;
1864 w = (void*)cb->args[0];
1865 if (w == NULL) {
1866 /* New dump:
1868 * 1. hook callback destructor.
1870 cb->args[1] = (long)cb->done;
1871 cb->done = fib6_dump_done;
1874 * 2. allocate and initialize walker.
1876 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1877 if (w == NULL)
1878 return -ENOMEM;
1879 RT6_TRACE("dump<%p", w);
1880 memset(w, 0, sizeof(*w));
1881 w->root = &ip6_routing_table;
1882 w->func = fib6_dump_node;
1883 w->args = &arg;
1884 cb->args[0] = (long)w;
1885 read_lock_bh(&rt6_lock);
1886 res = fib6_walk(w);
1887 read_unlock_bh(&rt6_lock);
1888 } else {
1889 w->args = &arg;
1890 read_lock_bh(&rt6_lock);
1891 res = fib6_walk_continue(w);
1892 read_unlock_bh(&rt6_lock);
1894 #if RT6_DEBUG >= 3
1895 if (res <= 0 && skb->len == 0)
1896 RT6_TRACE("%p>dump end\n", w);
1897 #endif
1898 res = res < 0 ? res : skb->len;
1899 /* res < 0 is an error. (really, impossible)
1900 res == 0 means that dump is complete, but skb still can contain data.
1901 res > 0 dump is not complete, but frame is full.
1903 /* Destroy walker, if dump of this table is complete. */
1904 if (res <= 0)
1905 fib6_dump_end(cb);
1906 return res;
1909 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1911 struct rtattr **rta = arg;
1912 int iif = 0;
1913 int err = -ENOBUFS;
1914 struct sk_buff *skb;
1915 struct flowi fl;
1916 struct rt6_info *rt;
1918 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1919 if (skb == NULL)
1920 goto out;
1922 /* Reserve room for dummy headers, this skb can pass
1923 through good chunk of routing engine.
1925 skb->mac.raw = skb->data;
1926 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1928 memset(&fl, 0, sizeof(fl));
1929 if (rta[RTA_SRC-1])
1930 ipv6_addr_copy(&fl.fl6_src,
1931 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1932 if (rta[RTA_DST-1])
1933 ipv6_addr_copy(&fl.fl6_dst,
1934 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1936 if (rta[RTA_IIF-1])
1937 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1939 if (iif) {
1940 struct net_device *dev;
1941 dev = __dev_get_by_index(iif);
1942 if (!dev) {
1943 err = -ENODEV;
1944 goto out_free;
1948 fl.oif = 0;
1949 if (rta[RTA_OIF-1])
1950 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1952 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1954 skb->dst = &rt->u.dst;
1956 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1957 err = rt6_fill_node(skb, rt,
1958 &fl.fl6_dst, &fl.fl6_src,
1959 iif,
1960 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1961 nlh->nlmsg_seq, 0, 0);
1962 if (err < 0) {
1963 err = -EMSGSIZE;
1964 goto out_free;
1967 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1968 if (err > 0)
1969 err = 0;
1970 out:
1971 return err;
1972 out_free:
1973 kfree_skb(skb);
1974 goto out;
1977 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1978 struct netlink_skb_parms *req)
1980 struct sk_buff *skb;
1981 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1982 u32 pid = current->pid;
1983 u32 seq = 0;
1985 if (req)
1986 pid = req->pid;
1987 if (nlh)
1988 seq = nlh->nlmsg_seq;
1990 skb = alloc_skb(size, gfp_any());
1991 if (!skb) {
1992 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1993 return;
1995 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1996 kfree_skb(skb);
1997 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1998 return;
2000 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2001 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
2005 * /proc
2008 #ifdef CONFIG_PROC_FS
2010 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2012 struct rt6_proc_arg
2014 char *buffer;
2015 int offset;
2016 int length;
2017 int skip;
2018 int len;
2021 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2023 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2024 int i;
2026 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2027 arg->skip++;
2028 return 0;
2031 if (arg->len >= arg->length)
2032 return 0;
2034 for (i=0; i<16; i++) {
2035 sprintf(arg->buffer + arg->len, "%02x",
2036 rt->rt6i_dst.addr.s6_addr[i]);
2037 arg->len += 2;
2039 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2040 rt->rt6i_dst.plen);
2042 #ifdef CONFIG_IPV6_SUBTREES
2043 for (i=0; i<16; i++) {
2044 sprintf(arg->buffer + arg->len, "%02x",
2045 rt->rt6i_src.addr.s6_addr[i]);
2046 arg->len += 2;
2048 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2049 rt->rt6i_src.plen);
2050 #else
2051 sprintf(arg->buffer + arg->len,
2052 "00000000000000000000000000000000 00 ");
2053 arg->len += 36;
2054 #endif
2056 if (rt->rt6i_nexthop) {
2057 for (i=0; i<16; i++) {
2058 sprintf(arg->buffer + arg->len, "%02x",
2059 rt->rt6i_nexthop->primary_key[i]);
2060 arg->len += 2;
2062 } else {
2063 sprintf(arg->buffer + arg->len,
2064 "00000000000000000000000000000000");
2065 arg->len += 32;
2067 arg->len += sprintf(arg->buffer + arg->len,
2068 " %08x %08x %08x %08x %8s\n",
2069 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2070 rt->u.dst.__use, rt->rt6i_flags,
2071 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2072 return 0;
2075 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2077 struct rt6_proc_arg arg;
2078 arg.buffer = buffer;
2079 arg.offset = offset;
2080 arg.length = length;
2081 arg.skip = 0;
2082 arg.len = 0;
2084 read_lock_bh(&rt6_lock);
2085 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2086 read_unlock_bh(&rt6_lock);
2088 *start = buffer;
2089 if (offset)
2090 *start += offset % RT6_INFO_LEN;
2092 arg.len -= offset % RT6_INFO_LEN;
2094 if (arg.len > length)
2095 arg.len = length;
2096 if (arg.len < 0)
2097 arg.len = 0;
2099 return arg.len;
2102 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2104 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2105 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2106 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2107 rt6_stats.fib_rt_cache,
2108 atomic_read(&ip6_dst_ops.entries),
2109 rt6_stats.fib_discarded_routes);
2111 return 0;
2114 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2116 return single_open(file, rt6_stats_seq_show, NULL);
2119 static struct file_operations rt6_stats_seq_fops = {
2120 .owner = THIS_MODULE,
2121 .open = rt6_stats_seq_open,
2122 .read = seq_read,
2123 .llseek = seq_lseek,
2124 .release = single_release,
2126 #endif /* CONFIG_PROC_FS */
2128 #ifdef CONFIG_SYSCTL
2130 static int flush_delay;
2132 static
2133 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2134 void __user *buffer, size_t *lenp, loff_t *ppos)
2136 if (write) {
2137 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2138 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2139 return 0;
2140 } else
2141 return -EINVAL;
2144 ctl_table ipv6_route_table[] = {
2146 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2147 .procname = "flush",
2148 .data = &flush_delay,
2149 .maxlen = sizeof(int),
2150 .mode = 0200,
2151 .proc_handler = &ipv6_sysctl_rtcache_flush
2154 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2155 .procname = "gc_thresh",
2156 .data = &ip6_dst_ops.gc_thresh,
2157 .maxlen = sizeof(int),
2158 .mode = 0644,
2159 .proc_handler = &proc_dointvec,
2162 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2163 .procname = "max_size",
2164 .data = &ip6_rt_max_size,
2165 .maxlen = sizeof(int),
2166 .mode = 0644,
2167 .proc_handler = &proc_dointvec,
2170 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2171 .procname = "gc_min_interval",
2172 .data = &ip6_rt_gc_min_interval,
2173 .maxlen = sizeof(int),
2174 .mode = 0644,
2175 .proc_handler = &proc_dointvec_jiffies,
2176 .strategy = &sysctl_jiffies,
2179 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2180 .procname = "gc_timeout",
2181 .data = &ip6_rt_gc_timeout,
2182 .maxlen = sizeof(int),
2183 .mode = 0644,
2184 .proc_handler = &proc_dointvec_jiffies,
2185 .strategy = &sysctl_jiffies,
2188 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2189 .procname = "gc_interval",
2190 .data = &ip6_rt_gc_interval,
2191 .maxlen = sizeof(int),
2192 .mode = 0644,
2193 .proc_handler = &proc_dointvec_jiffies,
2194 .strategy = &sysctl_jiffies,
2197 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2198 .procname = "gc_elasticity",
2199 .data = &ip6_rt_gc_elasticity,
2200 .maxlen = sizeof(int),
2201 .mode = 0644,
2202 .proc_handler = &proc_dointvec_jiffies,
2203 .strategy = &sysctl_jiffies,
2206 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2207 .procname = "mtu_expires",
2208 .data = &ip6_rt_mtu_expires,
2209 .maxlen = sizeof(int),
2210 .mode = 0644,
2211 .proc_handler = &proc_dointvec_jiffies,
2212 .strategy = &sysctl_jiffies,
2215 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2216 .procname = "min_adv_mss",
2217 .data = &ip6_rt_min_advmss,
2218 .maxlen = sizeof(int),
2219 .mode = 0644,
2220 .proc_handler = &proc_dointvec_jiffies,
2221 .strategy = &sysctl_jiffies,
2224 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2225 .procname = "gc_min_interval_ms",
2226 .data = &ip6_rt_gc_min_interval,
2227 .maxlen = sizeof(int),
2228 .mode = 0644,
2229 .proc_handler = &proc_dointvec_ms_jiffies,
2230 .strategy = &sysctl_ms_jiffies,
2232 { .ctl_name = 0 }
2235 #endif
2237 void __init ip6_route_init(void)
2239 struct proc_dir_entry *p;
2241 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2242 sizeof(struct rt6_info),
2243 0, SLAB_HWCACHE_ALIGN,
2244 NULL, NULL);
2245 if (!ip6_dst_ops.kmem_cachep)
2246 panic("cannot create ip6_dst_cache");
2248 fib6_init();
2249 #ifdef CONFIG_PROC_FS
2250 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2251 if (p)
2252 p->owner = THIS_MODULE;
2254 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2255 #endif
2256 #ifdef CONFIG_XFRM
2257 xfrm6_init();
2258 #endif
2261 void ip6_route_cleanup(void)
2263 #ifdef CONFIG_PROC_FS
2264 proc_net_remove("ipv6_route");
2265 proc_net_remove("rt6_stats");
2266 #endif
2267 #ifdef CONFIG_XFRM
2268 xfrm6_fini();
2269 #endif
2270 rt6_ifdown(NULL);
2271 fib6_gc_cleanup();
2272 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);