neigh: new unresolved queue limits
[linux-2.6/cjktty.git] / net / ipv6 / ndisc.c
blob4a2098222625ecc271999a4ea86725a16592fd97
1 /*
2 * Neighbour Discovery for IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Mike Shaver <shaver@ingenia.com>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
16 * Changes:
18 * Pierre Ynard : export userland ND options
19 * through netlink (RDNSS support)
20 * Lars Fenneberg : fixed MTU setting on receipt
21 * of an RA.
22 * Janos Farkas : kmalloc failure checks
23 * Alexey Kuznetsov : state machine reworked
24 * and moved to net/core.
25 * Pekka Savola : RFC2461 validation
26 * YOSHIFUJI Hideaki @USAGI : Verify ND options properly
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #include <linux/slab.h>
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
67 #include <linux/if_addr.h>
68 #include <linux/if_arp.h>
69 #include <linux/ipv6.h>
70 #include <linux/icmpv6.h>
71 #include <linux/jhash.h>
73 #include <net/sock.h>
74 #include <net/snmp.h>
76 #include <net/ipv6.h>
77 #include <net/protocol.h>
78 #include <net/ndisc.h>
79 #include <net/ip6_route.h>
80 #include <net/addrconf.h>
81 #include <net/icmp.h>
83 #include <net/netlink.h>
84 #include <linux/rtnetlink.h>
86 #include <net/flow.h>
87 #include <net/ip6_checksum.h>
88 #include <net/inet_common.h>
89 #include <linux/proc_fs.h>
91 #include <linux/netfilter.h>
92 #include <linux/netfilter_ipv6.h>
94 static u32 ndisc_hash(const void *pkey,
95 const struct net_device *dev,
96 __u32 rnd);
97 static int ndisc_constructor(struct neighbour *neigh);
98 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
99 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
100 static int pndisc_constructor(struct pneigh_entry *n);
101 static void pndisc_destructor(struct pneigh_entry *n);
102 static void pndisc_redo(struct sk_buff *skb);
104 static const struct neigh_ops ndisc_generic_ops = {
105 .family = AF_INET6,
106 .solicit = ndisc_solicit,
107 .error_report = ndisc_error_report,
108 .output = neigh_resolve_output,
109 .connected_output = neigh_connected_output,
112 static const struct neigh_ops ndisc_hh_ops = {
113 .family = AF_INET6,
114 .solicit = ndisc_solicit,
115 .error_report = ndisc_error_report,
116 .output = neigh_resolve_output,
117 .connected_output = neigh_resolve_output,
121 static const struct neigh_ops ndisc_direct_ops = {
122 .family = AF_INET6,
123 .output = neigh_direct_output,
124 .connected_output = neigh_direct_output,
127 struct neigh_table nd_tbl = {
128 .family = AF_INET6,
129 .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
130 .key_len = sizeof(struct in6_addr),
131 .hash = ndisc_hash,
132 .constructor = ndisc_constructor,
133 .pconstructor = pndisc_constructor,
134 .pdestructor = pndisc_destructor,
135 .proxy_redo = pndisc_redo,
136 .id = "ndisc_cache",
137 .parms = {
138 .tbl = &nd_tbl,
139 .base_reachable_time = ND_REACHABLE_TIME,
140 .retrans_time = ND_RETRANS_TIMER,
141 .gc_staletime = 60 * HZ,
142 .reachable_time = ND_REACHABLE_TIME,
143 .delay_probe_time = 5 * HZ,
144 .queue_len_bytes = 64*1024,
145 .ucast_probes = 3,
146 .mcast_probes = 3,
147 .anycast_delay = 1 * HZ,
148 .proxy_delay = (8 * HZ) / 10,
149 .proxy_qlen = 64,
151 .gc_interval = 30 * HZ,
152 .gc_thresh1 = 128,
153 .gc_thresh2 = 512,
154 .gc_thresh3 = 1024,
157 /* ND options */
158 struct ndisc_options {
159 struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
160 #ifdef CONFIG_IPV6_ROUTE_INFO
161 struct nd_opt_hdr *nd_opts_ri;
162 struct nd_opt_hdr *nd_opts_ri_end;
163 #endif
164 struct nd_opt_hdr *nd_useropts;
165 struct nd_opt_hdr *nd_useropts_end;
168 #define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
169 #define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
170 #define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
171 #define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
172 #define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
173 #define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
175 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
178 * Return the padding between the option length and the start of the
179 * link addr. Currently only IP-over-InfiniBand needs this, although
180 * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
181 * also need a pad of 2.
183 static int ndisc_addr_option_pad(unsigned short type)
185 switch (type) {
186 case ARPHRD_INFINIBAND: return 2;
187 default: return 0;
191 static inline int ndisc_opt_addr_space(struct net_device *dev)
193 return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
196 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
197 unsigned short addr_type)
199 int space = NDISC_OPT_SPACE(data_len);
200 int pad = ndisc_addr_option_pad(addr_type);
202 opt[0] = type;
203 opt[1] = space>>3;
205 memset(opt + 2, 0, pad);
206 opt += pad;
207 space -= pad;
209 memcpy(opt+2, data, data_len);
210 data_len += 2;
211 opt += data_len;
212 if ((space -= data_len) > 0)
213 memset(opt, 0, space);
214 return opt + space;
217 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
218 struct nd_opt_hdr *end)
220 int type;
221 if (!cur || !end || cur >= end)
222 return NULL;
223 type = cur->nd_opt_type;
224 do {
225 cur = ((void *)cur) + (cur->nd_opt_len << 3);
226 } while(cur < end && cur->nd_opt_type != type);
227 return cur <= end && cur->nd_opt_type == type ? cur : NULL;
230 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
232 return opt->nd_opt_type == ND_OPT_RDNSS;
235 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
236 struct nd_opt_hdr *end)
238 if (!cur || !end || cur >= end)
239 return NULL;
240 do {
241 cur = ((void *)cur) + (cur->nd_opt_len << 3);
242 } while(cur < end && !ndisc_is_useropt(cur));
243 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
246 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
247 struct ndisc_options *ndopts)
249 struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
251 if (!nd_opt || opt_len < 0 || !ndopts)
252 return NULL;
253 memset(ndopts, 0, sizeof(*ndopts));
254 while (opt_len) {
255 int l;
256 if (opt_len < sizeof(struct nd_opt_hdr))
257 return NULL;
258 l = nd_opt->nd_opt_len << 3;
259 if (opt_len < l || l == 0)
260 return NULL;
261 switch (nd_opt->nd_opt_type) {
262 case ND_OPT_SOURCE_LL_ADDR:
263 case ND_OPT_TARGET_LL_ADDR:
264 case ND_OPT_MTU:
265 case ND_OPT_REDIRECT_HDR:
266 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
267 ND_PRINTK2(KERN_WARNING
268 "%s(): duplicated ND6 option found: type=%d\n",
269 __func__,
270 nd_opt->nd_opt_type);
271 } else {
272 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
274 break;
275 case ND_OPT_PREFIX_INFO:
276 ndopts->nd_opts_pi_end = nd_opt;
277 if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
278 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
279 break;
280 #ifdef CONFIG_IPV6_ROUTE_INFO
281 case ND_OPT_ROUTE_INFO:
282 ndopts->nd_opts_ri_end = nd_opt;
283 if (!ndopts->nd_opts_ri)
284 ndopts->nd_opts_ri = nd_opt;
285 break;
286 #endif
287 default:
288 if (ndisc_is_useropt(nd_opt)) {
289 ndopts->nd_useropts_end = nd_opt;
290 if (!ndopts->nd_useropts)
291 ndopts->nd_useropts = nd_opt;
292 } else {
294 * Unknown options must be silently ignored,
295 * to accommodate future extension to the
296 * protocol.
298 ND_PRINTK2(KERN_NOTICE
299 "%s(): ignored unsupported option; type=%d, len=%d\n",
300 __func__,
301 nd_opt->nd_opt_type, nd_opt->nd_opt_len);
304 opt_len -= l;
305 nd_opt = ((void *)nd_opt) + l;
307 return ndopts;
310 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
311 struct net_device *dev)
313 u8 *lladdr = (u8 *)(p + 1);
314 int lladdrlen = p->nd_opt_len << 3;
315 int prepad = ndisc_addr_option_pad(dev->type);
316 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
317 return NULL;
318 return lladdr + prepad;
321 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
323 switch (dev->type) {
324 case ARPHRD_ETHER:
325 case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
326 case ARPHRD_FDDI:
327 ipv6_eth_mc_map(addr, buf);
328 return 0;
329 case ARPHRD_IEEE802_TR:
330 ipv6_tr_mc_map(addr,buf);
331 return 0;
332 case ARPHRD_ARCNET:
333 ipv6_arcnet_mc_map(addr, buf);
334 return 0;
335 case ARPHRD_INFINIBAND:
336 ipv6_ib_mc_map(addr, dev->broadcast, buf);
337 return 0;
338 case ARPHRD_IPGRE:
339 return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
340 default:
341 if (dir) {
342 memcpy(buf, dev->broadcast, dev->addr_len);
343 return 0;
346 return -EINVAL;
349 EXPORT_SYMBOL(ndisc_mc_map);
351 static u32 ndisc_hash(const void *pkey,
352 const struct net_device *dev,
353 __u32 hash_rnd)
355 const u32 *p32 = pkey;
356 u32 addr_hash, i;
358 addr_hash = 0;
359 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
360 addr_hash ^= *p32++;
362 return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
365 static int ndisc_constructor(struct neighbour *neigh)
367 struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
368 struct net_device *dev = neigh->dev;
369 struct inet6_dev *in6_dev;
370 struct neigh_parms *parms;
371 int is_multicast = ipv6_addr_is_multicast(addr);
373 in6_dev = in6_dev_get(dev);
374 if (in6_dev == NULL) {
375 return -EINVAL;
378 parms = in6_dev->nd_parms;
379 __neigh_parms_put(neigh->parms);
380 neigh->parms = neigh_parms_clone(parms);
382 neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
383 if (!dev->header_ops) {
384 neigh->nud_state = NUD_NOARP;
385 neigh->ops = &ndisc_direct_ops;
386 neigh->output = neigh_direct_output;
387 } else {
388 if (is_multicast) {
389 neigh->nud_state = NUD_NOARP;
390 ndisc_mc_map(addr, neigh->ha, dev, 1);
391 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
392 neigh->nud_state = NUD_NOARP;
393 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
394 if (dev->flags&IFF_LOOPBACK)
395 neigh->type = RTN_LOCAL;
396 } else if (dev->flags&IFF_POINTOPOINT) {
397 neigh->nud_state = NUD_NOARP;
398 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
400 if (dev->header_ops->cache)
401 neigh->ops = &ndisc_hh_ops;
402 else
403 neigh->ops = &ndisc_generic_ops;
404 if (neigh->nud_state&NUD_VALID)
405 neigh->output = neigh->ops->connected_output;
406 else
407 neigh->output = neigh->ops->output;
409 in6_dev_put(in6_dev);
410 return 0;
413 static int pndisc_constructor(struct pneigh_entry *n)
415 struct in6_addr *addr = (struct in6_addr*)&n->key;
416 struct in6_addr maddr;
417 struct net_device *dev = n->dev;
419 if (dev == NULL || __in6_dev_get(dev) == NULL)
420 return -EINVAL;
421 addrconf_addr_solict_mult(addr, &maddr);
422 ipv6_dev_mc_inc(dev, &maddr);
423 return 0;
426 static void pndisc_destructor(struct pneigh_entry *n)
428 struct in6_addr *addr = (struct in6_addr*)&n->key;
429 struct in6_addr maddr;
430 struct net_device *dev = n->dev;
432 if (dev == NULL || __in6_dev_get(dev) == NULL)
433 return;
434 addrconf_addr_solict_mult(addr, &maddr);
435 ipv6_dev_mc_dec(dev, &maddr);
438 struct sk_buff *ndisc_build_skb(struct net_device *dev,
439 const struct in6_addr *daddr,
440 const struct in6_addr *saddr,
441 struct icmp6hdr *icmp6h,
442 const struct in6_addr *target,
443 int llinfo)
445 struct net *net = dev_net(dev);
446 struct sock *sk = net->ipv6.ndisc_sk;
447 struct sk_buff *skb;
448 struct icmp6hdr *hdr;
449 int len;
450 int err;
451 u8 *opt;
453 if (!dev->addr_len)
454 llinfo = 0;
456 len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
457 if (llinfo)
458 len += ndisc_opt_addr_space(dev);
460 skb = sock_alloc_send_skb(sk,
461 (MAX_HEADER + sizeof(struct ipv6hdr) +
462 len + LL_ALLOCATED_SPACE(dev)),
463 1, &err);
464 if (!skb) {
465 ND_PRINTK0(KERN_ERR
466 "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
467 __func__, err);
468 return NULL;
471 skb_reserve(skb, LL_RESERVED_SPACE(dev));
472 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
474 skb->transport_header = skb->tail;
475 skb_put(skb, len);
477 hdr = (struct icmp6hdr *)skb_transport_header(skb);
478 memcpy(hdr, icmp6h, sizeof(*hdr));
480 opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
481 if (target) {
482 ipv6_addr_copy((struct in6_addr *)opt, target);
483 opt += sizeof(*target);
486 if (llinfo)
487 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
488 dev->addr_len, dev->type);
490 hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
491 IPPROTO_ICMPV6,
492 csum_partial(hdr,
493 len, 0));
495 return skb;
498 EXPORT_SYMBOL(ndisc_build_skb);
500 void ndisc_send_skb(struct sk_buff *skb,
501 struct net_device *dev,
502 struct neighbour *neigh,
503 const struct in6_addr *daddr,
504 const struct in6_addr *saddr,
505 struct icmp6hdr *icmp6h)
507 struct flowi6 fl6;
508 struct dst_entry *dst;
509 struct net *net = dev_net(dev);
510 struct sock *sk = net->ipv6.ndisc_sk;
511 struct inet6_dev *idev;
512 int err;
513 u8 type;
515 type = icmp6h->icmp6_type;
517 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
519 dst = icmp6_dst_alloc(dev, neigh, daddr);
520 if (!dst) {
521 kfree_skb(skb);
522 return;
525 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
526 if (IS_ERR(dst)) {
527 kfree_skb(skb);
528 return;
531 skb_dst_set(skb, dst);
533 rcu_read_lock();
534 idev = __in6_dev_get(dst->dev);
535 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
537 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
538 dst_output);
539 if (!err) {
540 ICMP6MSGOUT_INC_STATS(net, idev, type);
541 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
544 rcu_read_unlock();
547 EXPORT_SYMBOL(ndisc_send_skb);
550 * Send a Neighbour Discover packet
552 static void __ndisc_send(struct net_device *dev,
553 struct neighbour *neigh,
554 const struct in6_addr *daddr,
555 const struct in6_addr *saddr,
556 struct icmp6hdr *icmp6h, const struct in6_addr *target,
557 int llinfo)
559 struct sk_buff *skb;
561 skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
562 if (!skb)
563 return;
565 ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
568 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
569 const struct in6_addr *daddr,
570 const struct in6_addr *solicited_addr,
571 int router, int solicited, int override, int inc_opt)
573 struct in6_addr tmpaddr;
574 struct inet6_ifaddr *ifp;
575 const struct in6_addr *src_addr;
576 struct icmp6hdr icmp6h = {
577 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
580 /* for anycast or proxy, solicited_addr != src_addr */
581 ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
582 if (ifp) {
583 src_addr = solicited_addr;
584 if (ifp->flags & IFA_F_OPTIMISTIC)
585 override = 0;
586 inc_opt |= ifp->idev->cnf.force_tllao;
587 in6_ifa_put(ifp);
588 } else {
589 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
590 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
591 &tmpaddr))
592 return;
593 src_addr = &tmpaddr;
596 icmp6h.icmp6_router = router;
597 icmp6h.icmp6_solicited = solicited;
598 icmp6h.icmp6_override = override;
600 __ndisc_send(dev, neigh, daddr, src_addr,
601 &icmp6h, solicited_addr,
602 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
605 static void ndisc_send_unsol_na(struct net_device *dev)
607 struct inet6_dev *idev;
608 struct inet6_ifaddr *ifa;
609 struct in6_addr mcaddr;
611 idev = in6_dev_get(dev);
612 if (!idev)
613 return;
615 read_lock_bh(&idev->lock);
616 list_for_each_entry(ifa, &idev->addr_list, if_list) {
617 addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
618 ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
619 /*router=*/ !!idev->cnf.forwarding,
620 /*solicited=*/ false, /*override=*/ true,
621 /*inc_opt=*/ true);
623 read_unlock_bh(&idev->lock);
625 in6_dev_put(idev);
628 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
629 const struct in6_addr *solicit,
630 const struct in6_addr *daddr, const struct in6_addr *saddr)
632 struct in6_addr addr_buf;
633 struct icmp6hdr icmp6h = {
634 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
637 if (saddr == NULL) {
638 if (ipv6_get_lladdr(dev, &addr_buf,
639 (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
640 return;
641 saddr = &addr_buf;
644 __ndisc_send(dev, neigh, daddr, saddr,
645 &icmp6h, solicit,
646 !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
649 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
650 const struct in6_addr *daddr)
652 struct icmp6hdr icmp6h = {
653 .icmp6_type = NDISC_ROUTER_SOLICITATION,
655 int send_sllao = dev->addr_len;
657 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
659 * According to section 2.2 of RFC 4429, we must not
660 * send router solicitations with a sllao from
661 * optimistic addresses, but we may send the solicitation
662 * if we don't include the sllao. So here we check
663 * if our address is optimistic, and if so, we
664 * suppress the inclusion of the sllao.
666 if (send_sllao) {
667 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
668 dev, 1);
669 if (ifp) {
670 if (ifp->flags & IFA_F_OPTIMISTIC) {
671 send_sllao = 0;
673 in6_ifa_put(ifp);
674 } else {
675 send_sllao = 0;
678 #endif
679 __ndisc_send(dev, NULL, daddr, saddr,
680 &icmp6h, NULL,
681 send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
685 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
688 * "The sender MUST return an ICMP
689 * destination unreachable"
691 dst_link_failure(skb);
692 kfree_skb(skb);
695 /* Called with locked neigh: either read or both */
697 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
699 struct in6_addr *saddr = NULL;
700 struct in6_addr mcaddr;
701 struct net_device *dev = neigh->dev;
702 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
703 int probes = atomic_read(&neigh->probes);
705 if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
706 saddr = &ipv6_hdr(skb)->saddr;
708 if ((probes -= neigh->parms->ucast_probes) < 0) {
709 if (!(neigh->nud_state & NUD_VALID)) {
710 ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n",
711 __func__, target);
713 ndisc_send_ns(dev, neigh, target, target, saddr);
714 } else if ((probes -= neigh->parms->app_probes) < 0) {
715 #ifdef CONFIG_ARPD
716 neigh_app_ns(neigh);
717 #endif
718 } else {
719 addrconf_addr_solict_mult(target, &mcaddr);
720 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
724 static int pndisc_is_router(const void *pkey,
725 struct net_device *dev)
727 struct pneigh_entry *n;
728 int ret = -1;
730 read_lock_bh(&nd_tbl.lock);
731 n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
732 if (n)
733 ret = !!(n->flags & NTF_ROUTER);
734 read_unlock_bh(&nd_tbl.lock);
736 return ret;
739 static void ndisc_recv_ns(struct sk_buff *skb)
741 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
742 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
743 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
744 u8 *lladdr = NULL;
745 u32 ndoptlen = skb->tail - (skb->transport_header +
746 offsetof(struct nd_msg, opt));
747 struct ndisc_options ndopts;
748 struct net_device *dev = skb->dev;
749 struct inet6_ifaddr *ifp;
750 struct inet6_dev *idev = NULL;
751 struct neighbour *neigh;
752 int dad = ipv6_addr_any(saddr);
753 int inc;
754 int is_router = -1;
756 if (ipv6_addr_is_multicast(&msg->target)) {
757 ND_PRINTK2(KERN_WARNING
758 "ICMPv6 NS: multicast target address");
759 return;
763 * RFC2461 7.1.1:
764 * DAD has to be destined for solicited node multicast address.
766 if (dad &&
767 !(daddr->s6_addr32[0] == htonl(0xff020000) &&
768 daddr->s6_addr32[1] == htonl(0x00000000) &&
769 daddr->s6_addr32[2] == htonl(0x00000001) &&
770 daddr->s6_addr [12] == 0xff )) {
771 ND_PRINTK2(KERN_WARNING
772 "ICMPv6 NS: bad DAD packet (wrong destination)\n");
773 return;
776 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
777 ND_PRINTK2(KERN_WARNING
778 "ICMPv6 NS: invalid ND options\n");
779 return;
782 if (ndopts.nd_opts_src_lladdr) {
783 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
784 if (!lladdr) {
785 ND_PRINTK2(KERN_WARNING
786 "ICMPv6 NS: invalid link-layer address length\n");
787 return;
790 /* RFC2461 7.1.1:
791 * If the IP source address is the unspecified address,
792 * there MUST NOT be source link-layer address option
793 * in the message.
795 if (dad) {
796 ND_PRINTK2(KERN_WARNING
797 "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
798 return;
802 inc = ipv6_addr_is_multicast(daddr);
804 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
805 if (ifp) {
807 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
808 if (dad) {
809 if (dev->type == ARPHRD_IEEE802_TR) {
810 const unsigned char *sadr;
811 sadr = skb_mac_header(skb);
812 if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
813 sadr[9] == dev->dev_addr[1] &&
814 sadr[10] == dev->dev_addr[2] &&
815 sadr[11] == dev->dev_addr[3] &&
816 sadr[12] == dev->dev_addr[4] &&
817 sadr[13] == dev->dev_addr[5]) {
818 /* looped-back to us */
819 goto out;
824 * We are colliding with another node
825 * who is doing DAD
826 * so fail our DAD process
828 addrconf_dad_failure(ifp);
829 return;
830 } else {
832 * This is not a dad solicitation.
833 * If we are an optimistic node,
834 * we should respond.
835 * Otherwise, we should ignore it.
837 if (!(ifp->flags & IFA_F_OPTIMISTIC))
838 goto out;
842 idev = ifp->idev;
843 } else {
844 struct net *net = dev_net(dev);
846 idev = in6_dev_get(dev);
847 if (!idev) {
848 /* XXX: count this drop? */
849 return;
852 if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
853 (idev->cnf.forwarding &&
854 (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
855 (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
856 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
857 skb->pkt_type != PACKET_HOST &&
858 inc != 0 &&
859 idev->nd_parms->proxy_delay != 0) {
861 * for anycast or proxy,
862 * sender should delay its response
863 * by a random time between 0 and
864 * MAX_ANYCAST_DELAY_TIME seconds.
865 * (RFC2461) -- yoshfuji
867 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
868 if (n)
869 pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
870 goto out;
872 } else
873 goto out;
876 if (is_router < 0)
877 is_router = !!idev->cnf.forwarding;
879 if (dad) {
880 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
881 is_router, 0, (ifp != NULL), 1);
882 goto out;
885 if (inc)
886 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
887 else
888 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
891 * update / create cache entry
892 * for the source address
894 neigh = __neigh_lookup(&nd_tbl, saddr, dev,
895 !inc || lladdr || !dev->addr_len);
896 if (neigh)
897 neigh_update(neigh, lladdr, NUD_STALE,
898 NEIGH_UPDATE_F_WEAK_OVERRIDE|
899 NEIGH_UPDATE_F_OVERRIDE);
900 if (neigh || !dev->header_ops) {
901 ndisc_send_na(dev, neigh, saddr, &msg->target,
902 is_router,
903 1, (ifp != NULL && inc), inc);
904 if (neigh)
905 neigh_release(neigh);
908 out:
909 if (ifp)
910 in6_ifa_put(ifp);
911 else
912 in6_dev_put(idev);
915 static void ndisc_recv_na(struct sk_buff *skb)
917 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
918 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
919 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
920 u8 *lladdr = NULL;
921 u32 ndoptlen = skb->tail - (skb->transport_header +
922 offsetof(struct nd_msg, opt));
923 struct ndisc_options ndopts;
924 struct net_device *dev = skb->dev;
925 struct inet6_ifaddr *ifp;
926 struct neighbour *neigh;
928 if (skb->len < sizeof(struct nd_msg)) {
929 ND_PRINTK2(KERN_WARNING
930 "ICMPv6 NA: packet too short\n");
931 return;
934 if (ipv6_addr_is_multicast(&msg->target)) {
935 ND_PRINTK2(KERN_WARNING
936 "ICMPv6 NA: target address is multicast.\n");
937 return;
940 if (ipv6_addr_is_multicast(daddr) &&
941 msg->icmph.icmp6_solicited) {
942 ND_PRINTK2(KERN_WARNING
943 "ICMPv6 NA: solicited NA is multicasted.\n");
944 return;
947 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
948 ND_PRINTK2(KERN_WARNING
949 "ICMPv6 NS: invalid ND option\n");
950 return;
952 if (ndopts.nd_opts_tgt_lladdr) {
953 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
954 if (!lladdr) {
955 ND_PRINTK2(KERN_WARNING
956 "ICMPv6 NA: invalid link-layer address length\n");
957 return;
960 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
961 if (ifp) {
962 if (skb->pkt_type != PACKET_LOOPBACK
963 && (ifp->flags & IFA_F_TENTATIVE)) {
964 addrconf_dad_failure(ifp);
965 return;
967 /* What should we make now? The advertisement
968 is invalid, but ndisc specs say nothing
969 about it. It could be misconfiguration, or
970 an smart proxy agent tries to help us :-)
972 We should not print the error if NA has been
973 received from loopback - it is just our own
974 unsolicited advertisement.
976 if (skb->pkt_type != PACKET_LOOPBACK)
977 ND_PRINTK1(KERN_WARNING
978 "ICMPv6 NA: someone advertises our address %pI6 on %s!\n",
979 &ifp->addr, ifp->idev->dev->name);
980 in6_ifa_put(ifp);
981 return;
983 neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
985 if (neigh) {
986 u8 old_flags = neigh->flags;
987 struct net *net = dev_net(dev);
989 if (neigh->nud_state & NUD_FAILED)
990 goto out;
993 * Don't update the neighbor cache entry on a proxy NA from
994 * ourselves because either the proxied node is off link or it
995 * has already sent a NA to us.
997 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
998 net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
999 pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
1000 /* XXX: idev->cnf.prixy_ndp */
1001 goto out;
1004 neigh_update(neigh, lladdr,
1005 msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
1006 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1007 (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
1008 NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1009 (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
1011 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
1013 * Change: router to host
1015 struct rt6_info *rt;
1016 rt = rt6_get_dflt_router(saddr, dev);
1017 if (rt)
1018 ip6_del_rt(rt);
1021 out:
1022 neigh_release(neigh);
1026 static void ndisc_recv_rs(struct sk_buff *skb)
1028 struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
1029 unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1030 struct neighbour *neigh;
1031 struct inet6_dev *idev;
1032 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1033 struct ndisc_options ndopts;
1034 u8 *lladdr = NULL;
1036 if (skb->len < sizeof(*rs_msg))
1037 return;
1039 idev = __in6_dev_get(skb->dev);
1040 if (!idev) {
1041 if (net_ratelimit())
1042 ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
1043 return;
1046 /* Don't accept RS if we're not in router mode */
1047 if (!idev->cnf.forwarding)
1048 goto out;
1051 * Don't update NCE if src = ::;
1052 * this implies that the source node has no ip address assigned yet.
1054 if (ipv6_addr_any(saddr))
1055 goto out;
1057 /* Parse ND options */
1058 if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
1059 if (net_ratelimit())
1060 ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
1061 goto out;
1064 if (ndopts.nd_opts_src_lladdr) {
1065 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1066 skb->dev);
1067 if (!lladdr)
1068 goto out;
1071 neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1072 if (neigh) {
1073 neigh_update(neigh, lladdr, NUD_STALE,
1074 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1075 NEIGH_UPDATE_F_OVERRIDE|
1076 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1077 neigh_release(neigh);
1079 out:
1080 return;
1083 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1085 struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1086 struct sk_buff *skb;
1087 struct nlmsghdr *nlh;
1088 struct nduseroptmsg *ndmsg;
1089 struct net *net = dev_net(ra->dev);
1090 int err;
1091 int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1092 + (opt->nd_opt_len << 3));
1093 size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1095 skb = nlmsg_new(msg_size, GFP_ATOMIC);
1096 if (skb == NULL) {
1097 err = -ENOBUFS;
1098 goto errout;
1101 nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1102 if (nlh == NULL) {
1103 goto nla_put_failure;
1106 ndmsg = nlmsg_data(nlh);
1107 ndmsg->nduseropt_family = AF_INET6;
1108 ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1109 ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1110 ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1111 ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1113 memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1115 NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1116 &ipv6_hdr(ra)->saddr);
1117 nlmsg_end(skb, nlh);
1119 rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1120 return;
1122 nla_put_failure:
1123 nlmsg_free(skb);
1124 err = -EMSGSIZE;
1125 errout:
1126 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1129 static inline int accept_ra(struct inet6_dev *in6_dev)
1132 * If forwarding is enabled, RA are not accepted unless the special
1133 * hybrid mode (accept_ra=2) is enabled.
1135 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1136 return 0;
1138 return in6_dev->cnf.accept_ra;
1141 static void ndisc_router_discovery(struct sk_buff *skb)
1143 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1144 struct neighbour *neigh = NULL;
1145 struct inet6_dev *in6_dev;
1146 struct rt6_info *rt = NULL;
1147 int lifetime;
1148 struct ndisc_options ndopts;
1149 int optlen;
1150 unsigned int pref = 0;
1152 __u8 * opt = (__u8 *)(ra_msg + 1);
1154 optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1156 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1157 ND_PRINTK2(KERN_WARNING
1158 "ICMPv6 RA: source address is not link-local.\n");
1159 return;
1161 if (optlen < 0) {
1162 ND_PRINTK2(KERN_WARNING
1163 "ICMPv6 RA: packet too short\n");
1164 return;
1167 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1168 if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1169 ND_PRINTK2(KERN_WARNING
1170 "ICMPv6 RA: from host or unauthorized router\n");
1171 return;
1173 #endif
1176 * set the RA_RECV flag in the interface
1179 in6_dev = __in6_dev_get(skb->dev);
1180 if (in6_dev == NULL) {
1181 ND_PRINTK0(KERN_ERR
1182 "ICMPv6 RA: can't find inet6 device for %s.\n",
1183 skb->dev->name);
1184 return;
1187 if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1188 ND_PRINTK2(KERN_WARNING
1189 "ICMP6 RA: invalid ND options\n");
1190 return;
1193 if (!accept_ra(in6_dev))
1194 goto skip_linkparms;
1196 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1197 /* skip link-specific parameters from interior routers */
1198 if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1199 goto skip_linkparms;
1200 #endif
1202 if (in6_dev->if_flags & IF_RS_SENT) {
1204 * flag that an RA was received after an RS was sent
1205 * out on this interface.
1207 in6_dev->if_flags |= IF_RA_RCVD;
1211 * Remember the managed/otherconf flags from most recently
1212 * received RA message (RFC 2462) -- yoshfuji
1214 in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1215 IF_RA_OTHERCONF)) |
1216 (ra_msg->icmph.icmp6_addrconf_managed ?
1217 IF_RA_MANAGED : 0) |
1218 (ra_msg->icmph.icmp6_addrconf_other ?
1219 IF_RA_OTHERCONF : 0);
1221 if (!in6_dev->cnf.accept_ra_defrtr)
1222 goto skip_defrtr;
1224 if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
1225 goto skip_defrtr;
1227 lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1229 #ifdef CONFIG_IPV6_ROUTER_PREF
1230 pref = ra_msg->icmph.icmp6_router_pref;
1231 /* 10b is handled as if it were 00b (medium) */
1232 if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1233 !in6_dev->cnf.accept_ra_rtr_pref)
1234 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1235 #endif
1237 rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1239 if (rt)
1240 neigh = dst_get_neighbour(&rt->dst);
1242 if (rt && lifetime == 0) {
1243 neigh_clone(neigh);
1244 ip6_del_rt(rt);
1245 rt = NULL;
1248 if (rt == NULL && lifetime) {
1249 ND_PRINTK3(KERN_DEBUG
1250 "ICMPv6 RA: adding default router.\n");
1252 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1253 if (rt == NULL) {
1254 ND_PRINTK0(KERN_ERR
1255 "ICMPv6 RA: %s() failed to add default route.\n",
1256 __func__);
1257 return;
1260 neigh = dst_get_neighbour(&rt->dst);
1261 if (neigh == NULL) {
1262 ND_PRINTK0(KERN_ERR
1263 "ICMPv6 RA: %s() got default router without neighbour.\n",
1264 __func__);
1265 dst_release(&rt->dst);
1266 return;
1268 neigh->flags |= NTF_ROUTER;
1269 } else if (rt) {
1270 rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1273 if (rt)
1274 rt->rt6i_expires = jiffies + (HZ * lifetime);
1276 if (ra_msg->icmph.icmp6_hop_limit) {
1277 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1278 if (rt)
1279 dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
1280 ra_msg->icmph.icmp6_hop_limit);
1283 skip_defrtr:
1286 * Update Reachable Time and Retrans Timer
1289 if (in6_dev->nd_parms) {
1290 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1292 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1293 rtime = (rtime*HZ)/1000;
1294 if (rtime < HZ/10)
1295 rtime = HZ/10;
1296 in6_dev->nd_parms->retrans_time = rtime;
1297 in6_dev->tstamp = jiffies;
1298 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1301 rtime = ntohl(ra_msg->reachable_time);
1302 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1303 rtime = (rtime*HZ)/1000;
1305 if (rtime < HZ/10)
1306 rtime = HZ/10;
1308 if (rtime != in6_dev->nd_parms->base_reachable_time) {
1309 in6_dev->nd_parms->base_reachable_time = rtime;
1310 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1311 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1312 in6_dev->tstamp = jiffies;
1313 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1318 skip_linkparms:
1321 * Process options.
1324 if (!neigh)
1325 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1326 skb->dev, 1);
1327 if (neigh) {
1328 u8 *lladdr = NULL;
1329 if (ndopts.nd_opts_src_lladdr) {
1330 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1331 skb->dev);
1332 if (!lladdr) {
1333 ND_PRINTK2(KERN_WARNING
1334 "ICMPv6 RA: invalid link-layer address length\n");
1335 goto out;
1338 neigh_update(neigh, lladdr, NUD_STALE,
1339 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1340 NEIGH_UPDATE_F_OVERRIDE|
1341 NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1342 NEIGH_UPDATE_F_ISROUTER);
1345 if (!accept_ra(in6_dev))
1346 goto out;
1348 #ifdef CONFIG_IPV6_ROUTE_INFO
1349 if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
1350 goto skip_routeinfo;
1352 if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1353 struct nd_opt_hdr *p;
1354 for (p = ndopts.nd_opts_ri;
1356 p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1357 struct route_info *ri = (struct route_info *)p;
1358 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1359 if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1360 ri->prefix_len == 0)
1361 continue;
1362 #endif
1363 if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1364 continue;
1365 rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1366 &ipv6_hdr(skb)->saddr);
1370 skip_routeinfo:
1371 #endif
1373 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1374 /* skip link-specific ndopts from interior routers */
1375 if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1376 goto out;
1377 #endif
1379 if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1380 struct nd_opt_hdr *p;
1381 for (p = ndopts.nd_opts_pi;
1383 p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1384 addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1388 if (ndopts.nd_opts_mtu) {
1389 __be32 n;
1390 u32 mtu;
1392 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1393 mtu = ntohl(n);
1395 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1396 ND_PRINTK2(KERN_WARNING
1397 "ICMPv6 RA: invalid mtu: %d\n",
1398 mtu);
1399 } else if (in6_dev->cnf.mtu6 != mtu) {
1400 in6_dev->cnf.mtu6 = mtu;
1402 if (rt)
1403 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
1405 rt6_mtu_change(skb->dev, mtu);
1409 if (ndopts.nd_useropts) {
1410 struct nd_opt_hdr *p;
1411 for (p = ndopts.nd_useropts;
1413 p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1414 ndisc_ra_useropt(skb, p);
1418 if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1419 ND_PRINTK2(KERN_WARNING
1420 "ICMPv6 RA: invalid RA options");
1422 out:
1423 if (rt)
1424 dst_release(&rt->dst);
1425 else if (neigh)
1426 neigh_release(neigh);
1429 static void ndisc_redirect_rcv(struct sk_buff *skb)
1431 struct inet6_dev *in6_dev;
1432 struct icmp6hdr *icmph;
1433 const struct in6_addr *dest;
1434 const struct in6_addr *target; /* new first hop to destination */
1435 struct neighbour *neigh;
1436 int on_link = 0;
1437 struct ndisc_options ndopts;
1438 int optlen;
1439 u8 *lladdr = NULL;
1441 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1442 switch (skb->ndisc_nodetype) {
1443 case NDISC_NODETYPE_HOST:
1444 case NDISC_NODETYPE_NODEFAULT:
1445 ND_PRINTK2(KERN_WARNING
1446 "ICMPv6 Redirect: from host or unauthorized router\n");
1447 return;
1449 #endif
1451 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1452 ND_PRINTK2(KERN_WARNING
1453 "ICMPv6 Redirect: source address is not link-local.\n");
1454 return;
1457 optlen = skb->tail - skb->transport_header;
1458 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1460 if (optlen < 0) {
1461 ND_PRINTK2(KERN_WARNING
1462 "ICMPv6 Redirect: packet too short\n");
1463 return;
1466 icmph = icmp6_hdr(skb);
1467 target = (const struct in6_addr *) (icmph + 1);
1468 dest = target + 1;
1470 if (ipv6_addr_is_multicast(dest)) {
1471 ND_PRINTK2(KERN_WARNING
1472 "ICMPv6 Redirect: destination address is multicast.\n");
1473 return;
1476 if (ipv6_addr_equal(dest, target)) {
1477 on_link = 1;
1478 } else if (ipv6_addr_type(target) !=
1479 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1480 ND_PRINTK2(KERN_WARNING
1481 "ICMPv6 Redirect: target address is not link-local unicast.\n");
1482 return;
1485 in6_dev = __in6_dev_get(skb->dev);
1486 if (!in6_dev)
1487 return;
1488 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1489 return;
1491 /* RFC2461 8.1:
1492 * The IP source address of the Redirect MUST be the same as the current
1493 * first-hop router for the specified ICMP Destination Address.
1496 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1497 ND_PRINTK2(KERN_WARNING
1498 "ICMPv6 Redirect: invalid ND options\n");
1499 return;
1501 if (ndopts.nd_opts_tgt_lladdr) {
1502 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1503 skb->dev);
1504 if (!lladdr) {
1505 ND_PRINTK2(KERN_WARNING
1506 "ICMPv6 Redirect: invalid link-layer address length\n");
1507 return;
1511 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1512 if (neigh) {
1513 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1514 &ipv6_hdr(skb)->saddr, neigh, lladdr,
1515 on_link);
1516 neigh_release(neigh);
1520 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1521 const struct in6_addr *target)
1523 struct net_device *dev = skb->dev;
1524 struct net *net = dev_net(dev);
1525 struct sock *sk = net->ipv6.ndisc_sk;
1526 int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1527 struct sk_buff *buff;
1528 struct icmp6hdr *icmph;
1529 struct in6_addr saddr_buf;
1530 struct in6_addr *addrp;
1531 struct rt6_info *rt;
1532 struct dst_entry *dst;
1533 struct inet6_dev *idev;
1534 struct flowi6 fl6;
1535 u8 *opt;
1536 int rd_len;
1537 int err;
1538 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1540 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1541 ND_PRINTK2(KERN_WARNING
1542 "ICMPv6 Redirect: no link-local address on %s\n",
1543 dev->name);
1544 return;
1547 if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1548 ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1549 ND_PRINTK2(KERN_WARNING
1550 "ICMPv6 Redirect: target address is not link-local unicast.\n");
1551 return;
1554 icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1555 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1557 dst = ip6_route_output(net, NULL, &fl6);
1558 if (dst == NULL)
1559 return;
1561 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1562 if (IS_ERR(dst))
1563 return;
1565 rt = (struct rt6_info *) dst;
1567 if (rt->rt6i_flags & RTF_GATEWAY) {
1568 ND_PRINTK2(KERN_WARNING
1569 "ICMPv6 Redirect: destination is not a neighbour.\n");
1570 goto release;
1572 if (!rt->rt6i_peer)
1573 rt6_bind_peer(rt, 1);
1574 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1575 goto release;
1577 if (dev->addr_len) {
1578 read_lock_bh(&neigh->lock);
1579 if (neigh->nud_state & NUD_VALID) {
1580 memcpy(ha_buf, neigh->ha, dev->addr_len);
1581 read_unlock_bh(&neigh->lock);
1582 ha = ha_buf;
1583 len += ndisc_opt_addr_space(dev);
1584 } else
1585 read_unlock_bh(&neigh->lock);
1588 rd_len = min_t(unsigned int,
1589 IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1590 rd_len &= ~0x7;
1591 len += rd_len;
1593 buff = sock_alloc_send_skb(sk,
1594 (MAX_HEADER + sizeof(struct ipv6hdr) +
1595 len + LL_ALLOCATED_SPACE(dev)),
1596 1, &err);
1597 if (buff == NULL) {
1598 ND_PRINTK0(KERN_ERR
1599 "ICMPv6 Redirect: %s() failed to allocate an skb, err=%d.\n",
1600 __func__, err);
1601 goto release;
1604 skb_reserve(buff, LL_RESERVED_SPACE(dev));
1605 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1606 IPPROTO_ICMPV6, len);
1608 skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1609 skb_put(buff, len);
1610 icmph = icmp6_hdr(buff);
1612 memset(icmph, 0, sizeof(struct icmp6hdr));
1613 icmph->icmp6_type = NDISC_REDIRECT;
1616 * copy target and destination addresses
1619 addrp = (struct in6_addr *)(icmph + 1);
1620 ipv6_addr_copy(addrp, target);
1621 addrp++;
1622 ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1624 opt = (u8*) (addrp + 1);
1627 * include target_address option
1630 if (ha)
1631 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1632 dev->addr_len, dev->type);
1635 * build redirect option and copy skb over to the new packet.
1638 memset(opt, 0, 8);
1639 *(opt++) = ND_OPT_REDIRECT_HDR;
1640 *(opt++) = (rd_len >> 3);
1641 opt += 6;
1643 memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1645 icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1646 len, IPPROTO_ICMPV6,
1647 csum_partial(icmph, len, 0));
1649 skb_dst_set(buff, dst);
1650 rcu_read_lock();
1651 idev = __in6_dev_get(dst->dev);
1652 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1653 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1654 dst_output);
1655 if (!err) {
1656 ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
1657 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1660 rcu_read_unlock();
1661 return;
1663 release:
1664 dst_release(dst);
1667 static void pndisc_redo(struct sk_buff *skb)
1669 ndisc_recv_ns(skb);
1670 kfree_skb(skb);
1673 int ndisc_rcv(struct sk_buff *skb)
1675 struct nd_msg *msg;
1677 if (!pskb_may_pull(skb, skb->len))
1678 return 0;
1680 msg = (struct nd_msg *)skb_transport_header(skb);
1682 __skb_push(skb, skb->data - skb_transport_header(skb));
1684 if (ipv6_hdr(skb)->hop_limit != 255) {
1685 ND_PRINTK2(KERN_WARNING
1686 "ICMPv6 NDISC: invalid hop-limit: %d\n",
1687 ipv6_hdr(skb)->hop_limit);
1688 return 0;
1691 if (msg->icmph.icmp6_code != 0) {
1692 ND_PRINTK2(KERN_WARNING
1693 "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1694 msg->icmph.icmp6_code);
1695 return 0;
1698 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1700 switch (msg->icmph.icmp6_type) {
1701 case NDISC_NEIGHBOUR_SOLICITATION:
1702 ndisc_recv_ns(skb);
1703 break;
1705 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1706 ndisc_recv_na(skb);
1707 break;
1709 case NDISC_ROUTER_SOLICITATION:
1710 ndisc_recv_rs(skb);
1711 break;
1713 case NDISC_ROUTER_ADVERTISEMENT:
1714 ndisc_router_discovery(skb);
1715 break;
1717 case NDISC_REDIRECT:
1718 ndisc_redirect_rcv(skb);
1719 break;
1722 return 0;
1725 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1727 struct net_device *dev = ptr;
1728 struct net *net = dev_net(dev);
1730 switch (event) {
1731 case NETDEV_CHANGEADDR:
1732 neigh_changeaddr(&nd_tbl, dev);
1733 fib6_run_gc(~0UL, net);
1734 break;
1735 case NETDEV_DOWN:
1736 neigh_ifdown(&nd_tbl, dev);
1737 fib6_run_gc(~0UL, net);
1738 break;
1739 case NETDEV_NOTIFY_PEERS:
1740 ndisc_send_unsol_na(dev);
1741 break;
1742 default:
1743 break;
1746 return NOTIFY_DONE;
1749 static struct notifier_block ndisc_netdev_notifier = {
1750 .notifier_call = ndisc_netdev_event,
1753 #ifdef CONFIG_SYSCTL
1754 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1755 const char *func, const char *dev_name)
1757 static char warncomm[TASK_COMM_LEN];
1758 static int warned;
1759 if (strcmp(warncomm, current->comm) && warned < 5) {
1760 strcpy(warncomm, current->comm);
1761 printk(KERN_WARNING
1762 "process `%s' is using deprecated sysctl (%s) "
1763 "net.ipv6.neigh.%s.%s; "
1764 "Use net.ipv6.neigh.%s.%s_ms "
1765 "instead.\n",
1766 warncomm, func,
1767 dev_name, ctl->procname,
1768 dev_name, ctl->procname);
1769 warned++;
1773 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
1775 struct net_device *dev = ctl->extra1;
1776 struct inet6_dev *idev;
1777 int ret;
1779 if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1780 (strcmp(ctl->procname, "base_reachable_time") == 0))
1781 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1783 if (strcmp(ctl->procname, "retrans_time") == 0)
1784 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1786 else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1787 ret = proc_dointvec_jiffies(ctl, write,
1788 buffer, lenp, ppos);
1790 else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1791 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1792 ret = proc_dointvec_ms_jiffies(ctl, write,
1793 buffer, lenp, ppos);
1794 else
1795 ret = -1;
1797 if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1798 if (ctl->data == &idev->nd_parms->base_reachable_time)
1799 idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1800 idev->tstamp = jiffies;
1801 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1802 in6_dev_put(idev);
1804 return ret;
1808 #endif
1810 static int __net_init ndisc_net_init(struct net *net)
1812 struct ipv6_pinfo *np;
1813 struct sock *sk;
1814 int err;
1816 err = inet_ctl_sock_create(&sk, PF_INET6,
1817 SOCK_RAW, IPPROTO_ICMPV6, net);
1818 if (err < 0) {
1819 ND_PRINTK0(KERN_ERR
1820 "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1821 err);
1822 return err;
1825 net->ipv6.ndisc_sk = sk;
1827 np = inet6_sk(sk);
1828 np->hop_limit = 255;
1829 /* Do not loopback ndisc messages */
1830 np->mc_loop = 0;
1832 return 0;
1835 static void __net_exit ndisc_net_exit(struct net *net)
1837 inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1840 static struct pernet_operations ndisc_net_ops = {
1841 .init = ndisc_net_init,
1842 .exit = ndisc_net_exit,
1845 int __init ndisc_init(void)
1847 int err;
1849 err = register_pernet_subsys(&ndisc_net_ops);
1850 if (err)
1851 return err;
1853 * Initialize the neighbour table
1855 neigh_table_init(&nd_tbl);
1857 #ifdef CONFIG_SYSCTL
1858 err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
1859 &ndisc_ifinfo_sysctl_change);
1860 if (err)
1861 goto out_unregister_pernet;
1862 #endif
1863 err = register_netdevice_notifier(&ndisc_netdev_notifier);
1864 if (err)
1865 goto out_unregister_sysctl;
1866 out:
1867 return err;
1869 out_unregister_sysctl:
1870 #ifdef CONFIG_SYSCTL
1871 neigh_sysctl_unregister(&nd_tbl.parms);
1872 out_unregister_pernet:
1873 #endif
1874 unregister_pernet_subsys(&ndisc_net_ops);
1875 goto out;
1878 void ndisc_cleanup(void)
1880 unregister_netdevice_notifier(&ndisc_netdev_notifier);
1881 #ifdef CONFIG_SYSCTL
1882 neigh_sysctl_unregister(&nd_tbl.parms);
1883 #endif
1884 neigh_table_clear(&nd_tbl);
1885 unregister_pernet_subsys(&ndisc_net_ops);