Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / ipv6 / ndisc.c
blob2854705b15ea0f59187743c5a36a3cb933915f43
1 /*
2 * Neighbour Discovery for IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Mike Shaver <shaver@ingenia.com>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
16 * Changes:
18 * Pierre Ynard : export userland ND options
19 * through netlink (RDNSS support)
20 * Lars Fenneberg : fixed MTU setting on receipt
21 * of an RA.
22 * Janos Farkas : kmalloc failure checks
23 * Alexey Kuznetsov : state machine reworked
24 * and moved to net/core.
25 * Pekka Savola : RFC2461 validation
26 * YOSHIFUJI Hideaki @USAGI : Verify ND options properly
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #include <linux/slab.h>
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
67 #include <linux/if_addr.h>
68 #include <linux/if_arp.h>
69 #include <linux/ipv6.h>
70 #include <linux/icmpv6.h>
71 #include <linux/jhash.h>
73 #include <net/sock.h>
74 #include <net/snmp.h>
76 #include <net/ipv6.h>
77 #include <net/protocol.h>
78 #include <net/ndisc.h>
79 #include <net/ip6_route.h>
80 #include <net/addrconf.h>
81 #include <net/icmp.h>
83 #include <net/netlink.h>
84 #include <linux/rtnetlink.h>
86 #include <net/flow.h>
87 #include <net/ip6_checksum.h>
88 #include <net/inet_common.h>
89 #include <linux/proc_fs.h>
91 #include <linux/netfilter.h>
92 #include <linux/netfilter_ipv6.h>
94 static u32 ndisc_hash(const void *pkey,
95 const struct net_device *dev,
96 __u32 rnd);
97 static int ndisc_constructor(struct neighbour *neigh);
98 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
99 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
100 static int pndisc_constructor(struct pneigh_entry *n);
101 static void pndisc_destructor(struct pneigh_entry *n);
102 static void pndisc_redo(struct sk_buff *skb);
104 static const struct neigh_ops ndisc_generic_ops = {
105 .family = AF_INET6,
106 .solicit = ndisc_solicit,
107 .error_report = ndisc_error_report,
108 .output = neigh_resolve_output,
109 .connected_output = neigh_connected_output,
112 static const struct neigh_ops ndisc_hh_ops = {
113 .family = AF_INET6,
114 .solicit = ndisc_solicit,
115 .error_report = ndisc_error_report,
116 .output = neigh_resolve_output,
117 .connected_output = neigh_resolve_output,
121 static const struct neigh_ops ndisc_direct_ops = {
122 .family = AF_INET6,
123 .output = neigh_direct_output,
124 .connected_output = neigh_direct_output,
127 struct neigh_table nd_tbl = {
128 .family = AF_INET6,
129 .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
130 .key_len = sizeof(struct in6_addr),
131 .hash = ndisc_hash,
132 .constructor = ndisc_constructor,
133 .pconstructor = pndisc_constructor,
134 .pdestructor = pndisc_destructor,
135 .proxy_redo = pndisc_redo,
136 .id = "ndisc_cache",
137 .parms = {
138 .tbl = &nd_tbl,
139 .base_reachable_time = ND_REACHABLE_TIME,
140 .retrans_time = ND_RETRANS_TIMER,
141 .gc_staletime = 60 * HZ,
142 .reachable_time = ND_REACHABLE_TIME,
143 .delay_probe_time = 5 * HZ,
144 .queue_len_bytes = 64*1024,
145 .ucast_probes = 3,
146 .mcast_probes = 3,
147 .anycast_delay = 1 * HZ,
148 .proxy_delay = (8 * HZ) / 10,
149 .proxy_qlen = 64,
151 .gc_interval = 30 * HZ,
152 .gc_thresh1 = 128,
153 .gc_thresh2 = 512,
154 .gc_thresh3 = 1024,
157 /* ND options */
158 struct ndisc_options {
159 struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
160 #ifdef CONFIG_IPV6_ROUTE_INFO
161 struct nd_opt_hdr *nd_opts_ri;
162 struct nd_opt_hdr *nd_opts_ri_end;
163 #endif
164 struct nd_opt_hdr *nd_useropts;
165 struct nd_opt_hdr *nd_useropts_end;
168 #define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
169 #define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
170 #define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
171 #define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
172 #define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
173 #define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
175 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
178 * Return the padding between the option length and the start of the
179 * link addr. Currently only IP-over-InfiniBand needs this, although
180 * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
181 * also need a pad of 2.
183 static int ndisc_addr_option_pad(unsigned short type)
185 switch (type) {
186 case ARPHRD_INFINIBAND: return 2;
187 default: return 0;
191 static inline int ndisc_opt_addr_space(struct net_device *dev)
193 return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
196 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
197 unsigned short addr_type)
199 int space = NDISC_OPT_SPACE(data_len);
200 int pad = ndisc_addr_option_pad(addr_type);
202 opt[0] = type;
203 opt[1] = space>>3;
205 memset(opt + 2, 0, pad);
206 opt += pad;
207 space -= pad;
209 memcpy(opt+2, data, data_len);
210 data_len += 2;
211 opt += data_len;
212 if ((space -= data_len) > 0)
213 memset(opt, 0, space);
214 return opt + space;
217 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
218 struct nd_opt_hdr *end)
220 int type;
221 if (!cur || !end || cur >= end)
222 return NULL;
223 type = cur->nd_opt_type;
224 do {
225 cur = ((void *)cur) + (cur->nd_opt_len << 3);
226 } while(cur < end && cur->nd_opt_type != type);
227 return cur <= end && cur->nd_opt_type == type ? cur : NULL;
230 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
232 return opt->nd_opt_type == ND_OPT_RDNSS;
235 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
236 struct nd_opt_hdr *end)
238 if (!cur || !end || cur >= end)
239 return NULL;
240 do {
241 cur = ((void *)cur) + (cur->nd_opt_len << 3);
242 } while(cur < end && !ndisc_is_useropt(cur));
243 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
246 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
247 struct ndisc_options *ndopts)
249 struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
251 if (!nd_opt || opt_len < 0 || !ndopts)
252 return NULL;
253 memset(ndopts, 0, sizeof(*ndopts));
254 while (opt_len) {
255 int l;
256 if (opt_len < sizeof(struct nd_opt_hdr))
257 return NULL;
258 l = nd_opt->nd_opt_len << 3;
259 if (opt_len < l || l == 0)
260 return NULL;
261 switch (nd_opt->nd_opt_type) {
262 case ND_OPT_SOURCE_LL_ADDR:
263 case ND_OPT_TARGET_LL_ADDR:
264 case ND_OPT_MTU:
265 case ND_OPT_REDIRECT_HDR:
266 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
267 ND_PRINTK2(KERN_WARNING
268 "%s(): duplicated ND6 option found: type=%d\n",
269 __func__,
270 nd_opt->nd_opt_type);
271 } else {
272 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
274 break;
275 case ND_OPT_PREFIX_INFO:
276 ndopts->nd_opts_pi_end = nd_opt;
277 if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
278 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
279 break;
280 #ifdef CONFIG_IPV6_ROUTE_INFO
281 case ND_OPT_ROUTE_INFO:
282 ndopts->nd_opts_ri_end = nd_opt;
283 if (!ndopts->nd_opts_ri)
284 ndopts->nd_opts_ri = nd_opt;
285 break;
286 #endif
287 default:
288 if (ndisc_is_useropt(nd_opt)) {
289 ndopts->nd_useropts_end = nd_opt;
290 if (!ndopts->nd_useropts)
291 ndopts->nd_useropts = nd_opt;
292 } else {
294 * Unknown options must be silently ignored,
295 * to accommodate future extension to the
296 * protocol.
298 ND_PRINTK2(KERN_NOTICE
299 "%s(): ignored unsupported option; type=%d, len=%d\n",
300 __func__,
301 nd_opt->nd_opt_type, nd_opt->nd_opt_len);
304 opt_len -= l;
305 nd_opt = ((void *)nd_opt) + l;
307 return ndopts;
310 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
311 struct net_device *dev)
313 u8 *lladdr = (u8 *)(p + 1);
314 int lladdrlen = p->nd_opt_len << 3;
315 int prepad = ndisc_addr_option_pad(dev->type);
316 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
317 return NULL;
318 return lladdr + prepad;
321 int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
323 switch (dev->type) {
324 case ARPHRD_ETHER:
325 case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
326 case ARPHRD_FDDI:
327 ipv6_eth_mc_map(addr, buf);
328 return 0;
329 case ARPHRD_IEEE802_TR:
330 ipv6_tr_mc_map(addr,buf);
331 return 0;
332 case ARPHRD_ARCNET:
333 ipv6_arcnet_mc_map(addr, buf);
334 return 0;
335 case ARPHRD_INFINIBAND:
336 ipv6_ib_mc_map(addr, dev->broadcast, buf);
337 return 0;
338 case ARPHRD_IPGRE:
339 return ipv6_ipgre_mc_map(addr, dev->broadcast, buf);
340 default:
341 if (dir) {
342 memcpy(buf, dev->broadcast, dev->addr_len);
343 return 0;
346 return -EINVAL;
349 EXPORT_SYMBOL(ndisc_mc_map);
351 static u32 ndisc_hash(const void *pkey,
352 const struct net_device *dev,
353 __u32 hash_rnd)
355 const u32 *p32 = pkey;
356 u32 addr_hash, i;
358 addr_hash = 0;
359 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
360 addr_hash ^= *p32++;
362 return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
365 static int ndisc_constructor(struct neighbour *neigh)
367 struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
368 struct net_device *dev = neigh->dev;
369 struct inet6_dev *in6_dev;
370 struct neigh_parms *parms;
371 int is_multicast = ipv6_addr_is_multicast(addr);
373 in6_dev = in6_dev_get(dev);
374 if (in6_dev == NULL) {
375 return -EINVAL;
378 parms = in6_dev->nd_parms;
379 __neigh_parms_put(neigh->parms);
380 neigh->parms = neigh_parms_clone(parms);
382 neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
383 if (!dev->header_ops) {
384 neigh->nud_state = NUD_NOARP;
385 neigh->ops = &ndisc_direct_ops;
386 neigh->output = neigh_direct_output;
387 } else {
388 if (is_multicast) {
389 neigh->nud_state = NUD_NOARP;
390 ndisc_mc_map(addr, neigh->ha, dev, 1);
391 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
392 neigh->nud_state = NUD_NOARP;
393 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
394 if (dev->flags&IFF_LOOPBACK)
395 neigh->type = RTN_LOCAL;
396 } else if (dev->flags&IFF_POINTOPOINT) {
397 neigh->nud_state = NUD_NOARP;
398 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
400 if (dev->header_ops->cache)
401 neigh->ops = &ndisc_hh_ops;
402 else
403 neigh->ops = &ndisc_generic_ops;
404 if (neigh->nud_state&NUD_VALID)
405 neigh->output = neigh->ops->connected_output;
406 else
407 neigh->output = neigh->ops->output;
409 in6_dev_put(in6_dev);
410 return 0;
413 static int pndisc_constructor(struct pneigh_entry *n)
415 struct in6_addr *addr = (struct in6_addr*)&n->key;
416 struct in6_addr maddr;
417 struct net_device *dev = n->dev;
419 if (dev == NULL || __in6_dev_get(dev) == NULL)
420 return -EINVAL;
421 addrconf_addr_solict_mult(addr, &maddr);
422 ipv6_dev_mc_inc(dev, &maddr);
423 return 0;
426 static void pndisc_destructor(struct pneigh_entry *n)
428 struct in6_addr *addr = (struct in6_addr*)&n->key;
429 struct in6_addr maddr;
430 struct net_device *dev = n->dev;
432 if (dev == NULL || __in6_dev_get(dev) == NULL)
433 return;
434 addrconf_addr_solict_mult(addr, &maddr);
435 ipv6_dev_mc_dec(dev, &maddr);
438 struct sk_buff *ndisc_build_skb(struct net_device *dev,
439 const struct in6_addr *daddr,
440 const struct in6_addr *saddr,
441 struct icmp6hdr *icmp6h,
442 const struct in6_addr *target,
443 int llinfo)
445 struct net *net = dev_net(dev);
446 struct sock *sk = net->ipv6.ndisc_sk;
447 struct sk_buff *skb;
448 struct icmp6hdr *hdr;
449 int hlen = LL_RESERVED_SPACE(dev);
450 int tlen = dev->needed_tailroom;
451 int len;
452 int err;
453 u8 *opt;
455 if (!dev->addr_len)
456 llinfo = 0;
458 len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
459 if (llinfo)
460 len += ndisc_opt_addr_space(dev);
462 skb = sock_alloc_send_skb(sk,
463 (MAX_HEADER + sizeof(struct ipv6hdr) +
464 len + hlen + tlen),
465 1, &err);
466 if (!skb) {
467 ND_PRINTK0(KERN_ERR
468 "ICMPv6 ND: %s() failed to allocate an skb, err=%d.\n",
469 __func__, err);
470 return NULL;
473 skb_reserve(skb, hlen);
474 ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
476 skb->transport_header = skb->tail;
477 skb_put(skb, len);
479 hdr = (struct icmp6hdr *)skb_transport_header(skb);
480 memcpy(hdr, icmp6h, sizeof(*hdr));
482 opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
483 if (target) {
484 *(struct in6_addr *)opt = *target;
485 opt += sizeof(*target);
488 if (llinfo)
489 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
490 dev->addr_len, dev->type);
492 hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
493 IPPROTO_ICMPV6,
494 csum_partial(hdr,
495 len, 0));
497 return skb;
500 EXPORT_SYMBOL(ndisc_build_skb);
502 void ndisc_send_skb(struct sk_buff *skb,
503 struct net_device *dev,
504 struct neighbour *neigh,
505 const struct in6_addr *daddr,
506 const struct in6_addr *saddr,
507 struct icmp6hdr *icmp6h)
509 struct flowi6 fl6;
510 struct dst_entry *dst;
511 struct net *net = dev_net(dev);
512 struct sock *sk = net->ipv6.ndisc_sk;
513 struct inet6_dev *idev;
514 int err;
515 u8 type;
517 type = icmp6h->icmp6_type;
519 icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex);
521 dst = icmp6_dst_alloc(dev, neigh, daddr);
522 if (!dst) {
523 kfree_skb(skb);
524 return;
527 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
528 if (IS_ERR(dst)) {
529 kfree_skb(skb);
530 return;
533 skb_dst_set(skb, dst);
535 rcu_read_lock();
536 idev = __in6_dev_get(dst->dev);
537 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
539 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
540 dst_output);
541 if (!err) {
542 ICMP6MSGOUT_INC_STATS(net, idev, type);
543 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
546 rcu_read_unlock();
549 EXPORT_SYMBOL(ndisc_send_skb);
552 * Send a Neighbour Discover packet
554 static void __ndisc_send(struct net_device *dev,
555 struct neighbour *neigh,
556 const struct in6_addr *daddr,
557 const struct in6_addr *saddr,
558 struct icmp6hdr *icmp6h, const struct in6_addr *target,
559 int llinfo)
561 struct sk_buff *skb;
563 skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo);
564 if (!skb)
565 return;
567 ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h);
570 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
571 const struct in6_addr *daddr,
572 const struct in6_addr *solicited_addr,
573 int router, int solicited, int override, int inc_opt)
575 struct in6_addr tmpaddr;
576 struct inet6_ifaddr *ifp;
577 const struct in6_addr *src_addr;
578 struct icmp6hdr icmp6h = {
579 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
582 /* for anycast or proxy, solicited_addr != src_addr */
583 ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
584 if (ifp) {
585 src_addr = solicited_addr;
586 if (ifp->flags & IFA_F_OPTIMISTIC)
587 override = 0;
588 inc_opt |= ifp->idev->cnf.force_tllao;
589 in6_ifa_put(ifp);
590 } else {
591 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
592 inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
593 &tmpaddr))
594 return;
595 src_addr = &tmpaddr;
598 icmp6h.icmp6_router = router;
599 icmp6h.icmp6_solicited = solicited;
600 icmp6h.icmp6_override = override;
602 __ndisc_send(dev, neigh, daddr, src_addr,
603 &icmp6h, solicited_addr,
604 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
607 static void ndisc_send_unsol_na(struct net_device *dev)
609 struct inet6_dev *idev;
610 struct inet6_ifaddr *ifa;
611 struct in6_addr mcaddr;
613 idev = in6_dev_get(dev);
614 if (!idev)
615 return;
617 read_lock_bh(&idev->lock);
618 list_for_each_entry(ifa, &idev->addr_list, if_list) {
619 addrconf_addr_solict_mult(&ifa->addr, &mcaddr);
620 ndisc_send_na(dev, NULL, &mcaddr, &ifa->addr,
621 /*router=*/ !!idev->cnf.forwarding,
622 /*solicited=*/ false, /*override=*/ true,
623 /*inc_opt=*/ true);
625 read_unlock_bh(&idev->lock);
627 in6_dev_put(idev);
630 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
631 const struct in6_addr *solicit,
632 const struct in6_addr *daddr, const struct in6_addr *saddr)
634 struct in6_addr addr_buf;
635 struct icmp6hdr icmp6h = {
636 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
639 if (saddr == NULL) {
640 if (ipv6_get_lladdr(dev, &addr_buf,
641 (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
642 return;
643 saddr = &addr_buf;
646 __ndisc_send(dev, neigh, daddr, saddr,
647 &icmp6h, solicit,
648 !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
651 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
652 const struct in6_addr *daddr)
654 struct icmp6hdr icmp6h = {
655 .icmp6_type = NDISC_ROUTER_SOLICITATION,
657 int send_sllao = dev->addr_len;
659 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
661 * According to section 2.2 of RFC 4429, we must not
662 * send router solicitations with a sllao from
663 * optimistic addresses, but we may send the solicitation
664 * if we don't include the sllao. So here we check
665 * if our address is optimistic, and if so, we
666 * suppress the inclusion of the sllao.
668 if (send_sllao) {
669 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
670 dev, 1);
671 if (ifp) {
672 if (ifp->flags & IFA_F_OPTIMISTIC) {
673 send_sllao = 0;
675 in6_ifa_put(ifp);
676 } else {
677 send_sllao = 0;
680 #endif
681 __ndisc_send(dev, NULL, daddr, saddr,
682 &icmp6h, NULL,
683 send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
687 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
690 * "The sender MUST return an ICMP
691 * destination unreachable"
693 dst_link_failure(skb);
694 kfree_skb(skb);
697 /* Called with locked neigh: either read or both */
699 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
701 struct in6_addr *saddr = NULL;
702 struct in6_addr mcaddr;
703 struct net_device *dev = neigh->dev;
704 struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
705 int probes = atomic_read(&neigh->probes);
707 if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
708 saddr = &ipv6_hdr(skb)->saddr;
710 if ((probes -= neigh->parms->ucast_probes) < 0) {
711 if (!(neigh->nud_state & NUD_VALID)) {
712 ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n",
713 __func__, target);
715 ndisc_send_ns(dev, neigh, target, target, saddr);
716 } else if ((probes -= neigh->parms->app_probes) < 0) {
717 #ifdef CONFIG_ARPD
718 neigh_app_ns(neigh);
719 #endif
720 } else {
721 addrconf_addr_solict_mult(target, &mcaddr);
722 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
726 static int pndisc_is_router(const void *pkey,
727 struct net_device *dev)
729 struct pneigh_entry *n;
730 int ret = -1;
732 read_lock_bh(&nd_tbl.lock);
733 n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
734 if (n)
735 ret = !!(n->flags & NTF_ROUTER);
736 read_unlock_bh(&nd_tbl.lock);
738 return ret;
741 static void ndisc_recv_ns(struct sk_buff *skb)
743 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
744 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
745 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
746 u8 *lladdr = NULL;
747 u32 ndoptlen = skb->tail - (skb->transport_header +
748 offsetof(struct nd_msg, opt));
749 struct ndisc_options ndopts;
750 struct net_device *dev = skb->dev;
751 struct inet6_ifaddr *ifp;
752 struct inet6_dev *idev = NULL;
753 struct neighbour *neigh;
754 int dad = ipv6_addr_any(saddr);
755 int inc;
756 int is_router = -1;
758 if (ipv6_addr_is_multicast(&msg->target)) {
759 ND_PRINTK2(KERN_WARNING
760 "ICMPv6 NS: multicast target address");
761 return;
765 * RFC2461 7.1.1:
766 * DAD has to be destined for solicited node multicast address.
768 if (dad &&
769 !(daddr->s6_addr32[0] == htonl(0xff020000) &&
770 daddr->s6_addr32[1] == htonl(0x00000000) &&
771 daddr->s6_addr32[2] == htonl(0x00000001) &&
772 daddr->s6_addr [12] == 0xff )) {
773 ND_PRINTK2(KERN_WARNING
774 "ICMPv6 NS: bad DAD packet (wrong destination)\n");
775 return;
778 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
779 ND_PRINTK2(KERN_WARNING
780 "ICMPv6 NS: invalid ND options\n");
781 return;
784 if (ndopts.nd_opts_src_lladdr) {
785 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
786 if (!lladdr) {
787 ND_PRINTK2(KERN_WARNING
788 "ICMPv6 NS: invalid link-layer address length\n");
789 return;
792 /* RFC2461 7.1.1:
793 * If the IP source address is the unspecified address,
794 * there MUST NOT be source link-layer address option
795 * in the message.
797 if (dad) {
798 ND_PRINTK2(KERN_WARNING
799 "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
800 return;
804 inc = ipv6_addr_is_multicast(daddr);
806 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
807 if (ifp) {
809 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
810 if (dad) {
811 if (dev->type == ARPHRD_IEEE802_TR) {
812 const unsigned char *sadr;
813 sadr = skb_mac_header(skb);
814 if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
815 sadr[9] == dev->dev_addr[1] &&
816 sadr[10] == dev->dev_addr[2] &&
817 sadr[11] == dev->dev_addr[3] &&
818 sadr[12] == dev->dev_addr[4] &&
819 sadr[13] == dev->dev_addr[5]) {
820 /* looped-back to us */
821 goto out;
826 * We are colliding with another node
827 * who is doing DAD
828 * so fail our DAD process
830 addrconf_dad_failure(ifp);
831 return;
832 } else {
834 * This is not a dad solicitation.
835 * If we are an optimistic node,
836 * we should respond.
837 * Otherwise, we should ignore it.
839 if (!(ifp->flags & IFA_F_OPTIMISTIC))
840 goto out;
844 idev = ifp->idev;
845 } else {
846 struct net *net = dev_net(dev);
848 idev = in6_dev_get(dev);
849 if (!idev) {
850 /* XXX: count this drop? */
851 return;
854 if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
855 (idev->cnf.forwarding &&
856 (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
857 (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
858 if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
859 skb->pkt_type != PACKET_HOST &&
860 inc != 0 &&
861 idev->nd_parms->proxy_delay != 0) {
863 * for anycast or proxy,
864 * sender should delay its response
865 * by a random time between 0 and
866 * MAX_ANYCAST_DELAY_TIME seconds.
867 * (RFC2461) -- yoshfuji
869 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
870 if (n)
871 pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
872 goto out;
874 } else
875 goto out;
878 if (is_router < 0)
879 is_router = !!idev->cnf.forwarding;
881 if (dad) {
882 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
883 is_router, 0, (ifp != NULL), 1);
884 goto out;
887 if (inc)
888 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
889 else
890 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
893 * update / create cache entry
894 * for the source address
896 neigh = __neigh_lookup(&nd_tbl, saddr, dev,
897 !inc || lladdr || !dev->addr_len);
898 if (neigh)
899 neigh_update(neigh, lladdr, NUD_STALE,
900 NEIGH_UPDATE_F_WEAK_OVERRIDE|
901 NEIGH_UPDATE_F_OVERRIDE);
902 if (neigh || !dev->header_ops) {
903 ndisc_send_na(dev, neigh, saddr, &msg->target,
904 is_router,
905 1, (ifp != NULL && inc), inc);
906 if (neigh)
907 neigh_release(neigh);
910 out:
911 if (ifp)
912 in6_ifa_put(ifp);
913 else
914 in6_dev_put(idev);
917 static void ndisc_recv_na(struct sk_buff *skb)
919 struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
920 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
921 const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
922 u8 *lladdr = NULL;
923 u32 ndoptlen = skb->tail - (skb->transport_header +
924 offsetof(struct nd_msg, opt));
925 struct ndisc_options ndopts;
926 struct net_device *dev = skb->dev;
927 struct inet6_ifaddr *ifp;
928 struct neighbour *neigh;
930 if (skb->len < sizeof(struct nd_msg)) {
931 ND_PRINTK2(KERN_WARNING
932 "ICMPv6 NA: packet too short\n");
933 return;
936 if (ipv6_addr_is_multicast(&msg->target)) {
937 ND_PRINTK2(KERN_WARNING
938 "ICMPv6 NA: target address is multicast.\n");
939 return;
942 if (ipv6_addr_is_multicast(daddr) &&
943 msg->icmph.icmp6_solicited) {
944 ND_PRINTK2(KERN_WARNING
945 "ICMPv6 NA: solicited NA is multicasted.\n");
946 return;
949 if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
950 ND_PRINTK2(KERN_WARNING
951 "ICMPv6 NS: invalid ND option\n");
952 return;
954 if (ndopts.nd_opts_tgt_lladdr) {
955 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
956 if (!lladdr) {
957 ND_PRINTK2(KERN_WARNING
958 "ICMPv6 NA: invalid link-layer address length\n");
959 return;
962 ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
963 if (ifp) {
964 if (skb->pkt_type != PACKET_LOOPBACK
965 && (ifp->flags & IFA_F_TENTATIVE)) {
966 addrconf_dad_failure(ifp);
967 return;
969 /* What should we make now? The advertisement
970 is invalid, but ndisc specs say nothing
971 about it. It could be misconfiguration, or
972 an smart proxy agent tries to help us :-)
974 We should not print the error if NA has been
975 received from loopback - it is just our own
976 unsolicited advertisement.
978 if (skb->pkt_type != PACKET_LOOPBACK)
979 ND_PRINTK1(KERN_WARNING
980 "ICMPv6 NA: someone advertises our address %pI6 on %s!\n",
981 &ifp->addr, ifp->idev->dev->name);
982 in6_ifa_put(ifp);
983 return;
985 neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
987 if (neigh) {
988 u8 old_flags = neigh->flags;
989 struct net *net = dev_net(dev);
991 if (neigh->nud_state & NUD_FAILED)
992 goto out;
995 * Don't update the neighbor cache entry on a proxy NA from
996 * ourselves because either the proxied node is off link or it
997 * has already sent a NA to us.
999 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
1000 net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
1001 pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
1002 /* XXX: idev->cnf.prixy_ndp */
1003 goto out;
1006 neigh_update(neigh, lladdr,
1007 msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
1008 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1009 (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
1010 NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1011 (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
1013 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
1015 * Change: router to host
1017 struct rt6_info *rt;
1018 rt = rt6_get_dflt_router(saddr, dev);
1019 if (rt)
1020 ip6_del_rt(rt);
1023 out:
1024 neigh_release(neigh);
1028 static void ndisc_recv_rs(struct sk_buff *skb)
1030 struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
1031 unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
1032 struct neighbour *neigh;
1033 struct inet6_dev *idev;
1034 const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
1035 struct ndisc_options ndopts;
1036 u8 *lladdr = NULL;
1038 if (skb->len < sizeof(*rs_msg))
1039 return;
1041 idev = __in6_dev_get(skb->dev);
1042 if (!idev) {
1043 if (net_ratelimit())
1044 ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
1045 return;
1048 /* Don't accept RS if we're not in router mode */
1049 if (!idev->cnf.forwarding)
1050 goto out;
1053 * Don't update NCE if src = ::;
1054 * this implies that the source node has no ip address assigned yet.
1056 if (ipv6_addr_any(saddr))
1057 goto out;
1059 /* Parse ND options */
1060 if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
1061 if (net_ratelimit())
1062 ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
1063 goto out;
1066 if (ndopts.nd_opts_src_lladdr) {
1067 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1068 skb->dev);
1069 if (!lladdr)
1070 goto out;
1073 neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1074 if (neigh) {
1075 neigh_update(neigh, lladdr, NUD_STALE,
1076 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1077 NEIGH_UPDATE_F_OVERRIDE|
1078 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1079 neigh_release(neigh);
1081 out:
1082 return;
1085 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1087 struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1088 struct sk_buff *skb;
1089 struct nlmsghdr *nlh;
1090 struct nduseroptmsg *ndmsg;
1091 struct net *net = dev_net(ra->dev);
1092 int err;
1093 int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1094 + (opt->nd_opt_len << 3));
1095 size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1097 skb = nlmsg_new(msg_size, GFP_ATOMIC);
1098 if (skb == NULL) {
1099 err = -ENOBUFS;
1100 goto errout;
1103 nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1104 if (nlh == NULL) {
1105 goto nla_put_failure;
1108 ndmsg = nlmsg_data(nlh);
1109 ndmsg->nduseropt_family = AF_INET6;
1110 ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1111 ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1112 ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1113 ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1115 memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1117 NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1118 &ipv6_hdr(ra)->saddr);
1119 nlmsg_end(skb, nlh);
1121 rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1122 return;
1124 nla_put_failure:
1125 nlmsg_free(skb);
1126 err = -EMSGSIZE;
1127 errout:
1128 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1131 static inline int accept_ra(struct inet6_dev *in6_dev)
1134 * If forwarding is enabled, RA are not accepted unless the special
1135 * hybrid mode (accept_ra=2) is enabled.
1137 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1138 return 0;
1140 return in6_dev->cnf.accept_ra;
1143 static void ndisc_router_discovery(struct sk_buff *skb)
1145 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1146 struct neighbour *neigh = NULL;
1147 struct inet6_dev *in6_dev;
1148 struct rt6_info *rt = NULL;
1149 int lifetime;
1150 struct ndisc_options ndopts;
1151 int optlen;
1152 unsigned int pref = 0;
1154 __u8 * opt = (__u8 *)(ra_msg + 1);
1156 optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1158 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1159 ND_PRINTK2(KERN_WARNING
1160 "ICMPv6 RA: source address is not link-local.\n");
1161 return;
1163 if (optlen < 0) {
1164 ND_PRINTK2(KERN_WARNING
1165 "ICMPv6 RA: packet too short\n");
1166 return;
1169 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1170 if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1171 ND_PRINTK2(KERN_WARNING
1172 "ICMPv6 RA: from host or unauthorized router\n");
1173 return;
1175 #endif
1178 * set the RA_RECV flag in the interface
1181 in6_dev = __in6_dev_get(skb->dev);
1182 if (in6_dev == NULL) {
1183 ND_PRINTK0(KERN_ERR
1184 "ICMPv6 RA: can't find inet6 device for %s.\n",
1185 skb->dev->name);
1186 return;
1189 if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1190 ND_PRINTK2(KERN_WARNING
1191 "ICMP6 RA: invalid ND options\n");
1192 return;
1195 if (!accept_ra(in6_dev))
1196 goto skip_linkparms;
1198 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1199 /* skip link-specific parameters from interior routers */
1200 if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1201 goto skip_linkparms;
1202 #endif
1204 if (in6_dev->if_flags & IF_RS_SENT) {
1206 * flag that an RA was received after an RS was sent
1207 * out on this interface.
1209 in6_dev->if_flags |= IF_RA_RCVD;
1213 * Remember the managed/otherconf flags from most recently
1214 * received RA message (RFC 2462) -- yoshfuji
1216 in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1217 IF_RA_OTHERCONF)) |
1218 (ra_msg->icmph.icmp6_addrconf_managed ?
1219 IF_RA_MANAGED : 0) |
1220 (ra_msg->icmph.icmp6_addrconf_other ?
1221 IF_RA_OTHERCONF : 0);
1223 if (!in6_dev->cnf.accept_ra_defrtr)
1224 goto skip_defrtr;
1226 if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
1227 goto skip_defrtr;
1229 lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1231 #ifdef CONFIG_IPV6_ROUTER_PREF
1232 pref = ra_msg->icmph.icmp6_router_pref;
1233 /* 10b is handled as if it were 00b (medium) */
1234 if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1235 !in6_dev->cnf.accept_ra_rtr_pref)
1236 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1237 #endif
1239 rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1241 if (rt)
1242 neigh = dst_get_neighbour(&rt->dst);
1244 if (rt && lifetime == 0) {
1245 neigh_clone(neigh);
1246 ip6_del_rt(rt);
1247 rt = NULL;
1250 if (rt == NULL && lifetime) {
1251 ND_PRINTK3(KERN_DEBUG
1252 "ICMPv6 RA: adding default router.\n");
1254 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1255 if (rt == NULL) {
1256 ND_PRINTK0(KERN_ERR
1257 "ICMPv6 RA: %s() failed to add default route.\n",
1258 __func__);
1259 return;
1262 neigh = dst_get_neighbour(&rt->dst);
1263 if (neigh == NULL) {
1264 ND_PRINTK0(KERN_ERR
1265 "ICMPv6 RA: %s() got default router without neighbour.\n",
1266 __func__);
1267 dst_release(&rt->dst);
1268 return;
1270 neigh->flags |= NTF_ROUTER;
1271 } else if (rt) {
1272 rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1275 if (rt)
1276 rt->rt6i_expires = jiffies + (HZ * lifetime);
1278 if (ra_msg->icmph.icmp6_hop_limit) {
1279 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1280 if (rt)
1281 dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
1282 ra_msg->icmph.icmp6_hop_limit);
1285 skip_defrtr:
1288 * Update Reachable Time and Retrans Timer
1291 if (in6_dev->nd_parms) {
1292 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1294 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1295 rtime = (rtime*HZ)/1000;
1296 if (rtime < HZ/10)
1297 rtime = HZ/10;
1298 in6_dev->nd_parms->retrans_time = rtime;
1299 in6_dev->tstamp = jiffies;
1300 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1303 rtime = ntohl(ra_msg->reachable_time);
1304 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1305 rtime = (rtime*HZ)/1000;
1307 if (rtime < HZ/10)
1308 rtime = HZ/10;
1310 if (rtime != in6_dev->nd_parms->base_reachable_time) {
1311 in6_dev->nd_parms->base_reachable_time = rtime;
1312 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1313 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1314 in6_dev->tstamp = jiffies;
1315 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1320 skip_linkparms:
1323 * Process options.
1326 if (!neigh)
1327 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1328 skb->dev, 1);
1329 if (neigh) {
1330 u8 *lladdr = NULL;
1331 if (ndopts.nd_opts_src_lladdr) {
1332 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1333 skb->dev);
1334 if (!lladdr) {
1335 ND_PRINTK2(KERN_WARNING
1336 "ICMPv6 RA: invalid link-layer address length\n");
1337 goto out;
1340 neigh_update(neigh, lladdr, NUD_STALE,
1341 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1342 NEIGH_UPDATE_F_OVERRIDE|
1343 NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1344 NEIGH_UPDATE_F_ISROUTER);
1347 if (!accept_ra(in6_dev))
1348 goto out;
1350 #ifdef CONFIG_IPV6_ROUTE_INFO
1351 if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, NULL, 0))
1352 goto skip_routeinfo;
1354 if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1355 struct nd_opt_hdr *p;
1356 for (p = ndopts.nd_opts_ri;
1358 p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1359 struct route_info *ri = (struct route_info *)p;
1360 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1361 if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1362 ri->prefix_len == 0)
1363 continue;
1364 #endif
1365 if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1366 continue;
1367 rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1368 &ipv6_hdr(skb)->saddr);
1372 skip_routeinfo:
1373 #endif
1375 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1376 /* skip link-specific ndopts from interior routers */
1377 if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1378 goto out;
1379 #endif
1381 if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1382 struct nd_opt_hdr *p;
1383 for (p = ndopts.nd_opts_pi;
1385 p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1386 addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1390 if (ndopts.nd_opts_mtu) {
1391 __be32 n;
1392 u32 mtu;
1394 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1395 mtu = ntohl(n);
1397 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1398 ND_PRINTK2(KERN_WARNING
1399 "ICMPv6 RA: invalid mtu: %d\n",
1400 mtu);
1401 } else if (in6_dev->cnf.mtu6 != mtu) {
1402 in6_dev->cnf.mtu6 = mtu;
1404 if (rt)
1405 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
1407 rt6_mtu_change(skb->dev, mtu);
1411 if (ndopts.nd_useropts) {
1412 struct nd_opt_hdr *p;
1413 for (p = ndopts.nd_useropts;
1415 p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1416 ndisc_ra_useropt(skb, p);
1420 if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1421 ND_PRINTK2(KERN_WARNING
1422 "ICMPv6 RA: invalid RA options");
1424 out:
1425 if (rt)
1426 dst_release(&rt->dst);
1427 else if (neigh)
1428 neigh_release(neigh);
1431 static void ndisc_redirect_rcv(struct sk_buff *skb)
1433 struct inet6_dev *in6_dev;
1434 struct icmp6hdr *icmph;
1435 const struct in6_addr *dest;
1436 const struct in6_addr *target; /* new first hop to destination */
1437 struct neighbour *neigh;
1438 int on_link = 0;
1439 struct ndisc_options ndopts;
1440 int optlen;
1441 u8 *lladdr = NULL;
1443 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1444 switch (skb->ndisc_nodetype) {
1445 case NDISC_NODETYPE_HOST:
1446 case NDISC_NODETYPE_NODEFAULT:
1447 ND_PRINTK2(KERN_WARNING
1448 "ICMPv6 Redirect: from host or unauthorized router\n");
1449 return;
1451 #endif
1453 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1454 ND_PRINTK2(KERN_WARNING
1455 "ICMPv6 Redirect: source address is not link-local.\n");
1456 return;
1459 optlen = skb->tail - skb->transport_header;
1460 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1462 if (optlen < 0) {
1463 ND_PRINTK2(KERN_WARNING
1464 "ICMPv6 Redirect: packet too short\n");
1465 return;
1468 icmph = icmp6_hdr(skb);
1469 target = (const struct in6_addr *) (icmph + 1);
1470 dest = target + 1;
1472 if (ipv6_addr_is_multicast(dest)) {
1473 ND_PRINTK2(KERN_WARNING
1474 "ICMPv6 Redirect: destination address is multicast.\n");
1475 return;
1478 if (ipv6_addr_equal(dest, target)) {
1479 on_link = 1;
1480 } else if (ipv6_addr_type(target) !=
1481 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1482 ND_PRINTK2(KERN_WARNING
1483 "ICMPv6 Redirect: target address is not link-local unicast.\n");
1484 return;
1487 in6_dev = __in6_dev_get(skb->dev);
1488 if (!in6_dev)
1489 return;
1490 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1491 return;
1493 /* RFC2461 8.1:
1494 * The IP source address of the Redirect MUST be the same as the current
1495 * first-hop router for the specified ICMP Destination Address.
1498 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1499 ND_PRINTK2(KERN_WARNING
1500 "ICMPv6 Redirect: invalid ND options\n");
1501 return;
1503 if (ndopts.nd_opts_tgt_lladdr) {
1504 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1505 skb->dev);
1506 if (!lladdr) {
1507 ND_PRINTK2(KERN_WARNING
1508 "ICMPv6 Redirect: invalid link-layer address length\n");
1509 return;
1513 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1514 if (neigh) {
1515 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1516 &ipv6_hdr(skb)->saddr, neigh, lladdr,
1517 on_link);
1518 neigh_release(neigh);
1522 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1523 const struct in6_addr *target)
1525 struct net_device *dev = skb->dev;
1526 struct net *net = dev_net(dev);
1527 struct sock *sk = net->ipv6.ndisc_sk;
1528 int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1529 struct sk_buff *buff;
1530 struct icmp6hdr *icmph;
1531 struct in6_addr saddr_buf;
1532 struct in6_addr *addrp;
1533 struct rt6_info *rt;
1534 struct dst_entry *dst;
1535 struct inet6_dev *idev;
1536 struct flowi6 fl6;
1537 u8 *opt;
1538 int hlen, tlen;
1539 int rd_len;
1540 int err;
1541 u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1543 if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1544 ND_PRINTK2(KERN_WARNING
1545 "ICMPv6 Redirect: no link-local address on %s\n",
1546 dev->name);
1547 return;
1550 if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1551 ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1552 ND_PRINTK2(KERN_WARNING
1553 "ICMPv6 Redirect: target address is not link-local unicast.\n");
1554 return;
1557 icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
1558 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1560 dst = ip6_route_output(net, NULL, &fl6);
1561 if (dst == NULL)
1562 return;
1564 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
1565 if (IS_ERR(dst))
1566 return;
1568 rt = (struct rt6_info *) dst;
1570 if (rt->rt6i_flags & RTF_GATEWAY) {
1571 ND_PRINTK2(KERN_WARNING
1572 "ICMPv6 Redirect: destination is not a neighbour.\n");
1573 goto release;
1575 if (!rt->rt6i_peer)
1576 rt6_bind_peer(rt, 1);
1577 if (!inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
1578 goto release;
1580 if (dev->addr_len) {
1581 read_lock_bh(&neigh->lock);
1582 if (neigh->nud_state & NUD_VALID) {
1583 memcpy(ha_buf, neigh->ha, dev->addr_len);
1584 read_unlock_bh(&neigh->lock);
1585 ha = ha_buf;
1586 len += ndisc_opt_addr_space(dev);
1587 } else
1588 read_unlock_bh(&neigh->lock);
1591 rd_len = min_t(unsigned int,
1592 IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1593 rd_len &= ~0x7;
1594 len += rd_len;
1596 hlen = LL_RESERVED_SPACE(dev);
1597 tlen = dev->needed_tailroom;
1598 buff = sock_alloc_send_skb(sk,
1599 (MAX_HEADER + sizeof(struct ipv6hdr) +
1600 len + hlen + tlen),
1601 1, &err);
1602 if (buff == NULL) {
1603 ND_PRINTK0(KERN_ERR
1604 "ICMPv6 Redirect: %s() failed to allocate an skb, err=%d.\n",
1605 __func__, err);
1606 goto release;
1609 skb_reserve(buff, hlen);
1610 ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1611 IPPROTO_ICMPV6, len);
1613 skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1614 skb_put(buff, len);
1615 icmph = icmp6_hdr(buff);
1617 memset(icmph, 0, sizeof(struct icmp6hdr));
1618 icmph->icmp6_type = NDISC_REDIRECT;
1621 * copy target and destination addresses
1624 addrp = (struct in6_addr *)(icmph + 1);
1625 *addrp = *target;
1626 addrp++;
1627 *addrp = ipv6_hdr(skb)->daddr;
1629 opt = (u8*) (addrp + 1);
1632 * include target_address option
1635 if (ha)
1636 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1637 dev->addr_len, dev->type);
1640 * build redirect option and copy skb over to the new packet.
1643 memset(opt, 0, 8);
1644 *(opt++) = ND_OPT_REDIRECT_HDR;
1645 *(opt++) = (rd_len >> 3);
1646 opt += 6;
1648 memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1650 icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1651 len, IPPROTO_ICMPV6,
1652 csum_partial(icmph, len, 0));
1654 skb_dst_set(buff, dst);
1655 rcu_read_lock();
1656 idev = __in6_dev_get(dst->dev);
1657 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1658 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1659 dst_output);
1660 if (!err) {
1661 ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
1662 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1665 rcu_read_unlock();
1666 return;
1668 release:
1669 dst_release(dst);
1672 static void pndisc_redo(struct sk_buff *skb)
1674 ndisc_recv_ns(skb);
1675 kfree_skb(skb);
1678 int ndisc_rcv(struct sk_buff *skb)
1680 struct nd_msg *msg;
1682 if (!pskb_may_pull(skb, skb->len))
1683 return 0;
1685 msg = (struct nd_msg *)skb_transport_header(skb);
1687 __skb_push(skb, skb->data - skb_transport_header(skb));
1689 if (ipv6_hdr(skb)->hop_limit != 255) {
1690 ND_PRINTK2(KERN_WARNING
1691 "ICMPv6 NDISC: invalid hop-limit: %d\n",
1692 ipv6_hdr(skb)->hop_limit);
1693 return 0;
1696 if (msg->icmph.icmp6_code != 0) {
1697 ND_PRINTK2(KERN_WARNING
1698 "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1699 msg->icmph.icmp6_code);
1700 return 0;
1703 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1705 switch (msg->icmph.icmp6_type) {
1706 case NDISC_NEIGHBOUR_SOLICITATION:
1707 ndisc_recv_ns(skb);
1708 break;
1710 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1711 ndisc_recv_na(skb);
1712 break;
1714 case NDISC_ROUTER_SOLICITATION:
1715 ndisc_recv_rs(skb);
1716 break;
1718 case NDISC_ROUTER_ADVERTISEMENT:
1719 ndisc_router_discovery(skb);
1720 break;
1722 case NDISC_REDIRECT:
1723 ndisc_redirect_rcv(skb);
1724 break;
1727 return 0;
1730 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1732 struct net_device *dev = ptr;
1733 struct net *net = dev_net(dev);
1735 switch (event) {
1736 case NETDEV_CHANGEADDR:
1737 neigh_changeaddr(&nd_tbl, dev);
1738 fib6_run_gc(~0UL, net);
1739 break;
1740 case NETDEV_DOWN:
1741 neigh_ifdown(&nd_tbl, dev);
1742 fib6_run_gc(~0UL, net);
1743 break;
1744 case NETDEV_NOTIFY_PEERS:
1745 ndisc_send_unsol_na(dev);
1746 break;
1747 default:
1748 break;
1751 return NOTIFY_DONE;
1754 static struct notifier_block ndisc_netdev_notifier = {
1755 .notifier_call = ndisc_netdev_event,
1758 #ifdef CONFIG_SYSCTL
1759 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1760 const char *func, const char *dev_name)
1762 static char warncomm[TASK_COMM_LEN];
1763 static int warned;
1764 if (strcmp(warncomm, current->comm) && warned < 5) {
1765 strcpy(warncomm, current->comm);
1766 printk(KERN_WARNING
1767 "process `%s' is using deprecated sysctl (%s) "
1768 "net.ipv6.neigh.%s.%s; "
1769 "Use net.ipv6.neigh.%s.%s_ms "
1770 "instead.\n",
1771 warncomm, func,
1772 dev_name, ctl->procname,
1773 dev_name, ctl->procname);
1774 warned++;
1778 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
1780 struct net_device *dev = ctl->extra1;
1781 struct inet6_dev *idev;
1782 int ret;
1784 if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1785 (strcmp(ctl->procname, "base_reachable_time") == 0))
1786 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1788 if (strcmp(ctl->procname, "retrans_time") == 0)
1789 ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1791 else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1792 ret = proc_dointvec_jiffies(ctl, write,
1793 buffer, lenp, ppos);
1795 else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1796 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1797 ret = proc_dointvec_ms_jiffies(ctl, write,
1798 buffer, lenp, ppos);
1799 else
1800 ret = -1;
1802 if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1803 if (ctl->data == &idev->nd_parms->base_reachable_time)
1804 idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1805 idev->tstamp = jiffies;
1806 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1807 in6_dev_put(idev);
1809 return ret;
1813 #endif
1815 static int __net_init ndisc_net_init(struct net *net)
1817 struct ipv6_pinfo *np;
1818 struct sock *sk;
1819 int err;
1821 err = inet_ctl_sock_create(&sk, PF_INET6,
1822 SOCK_RAW, IPPROTO_ICMPV6, net);
1823 if (err < 0) {
1824 ND_PRINTK0(KERN_ERR
1825 "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1826 err);
1827 return err;
1830 net->ipv6.ndisc_sk = sk;
1832 np = inet6_sk(sk);
1833 np->hop_limit = 255;
1834 /* Do not loopback ndisc messages */
1835 np->mc_loop = 0;
1837 return 0;
1840 static void __net_exit ndisc_net_exit(struct net *net)
1842 inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1845 static struct pernet_operations ndisc_net_ops = {
1846 .init = ndisc_net_init,
1847 .exit = ndisc_net_exit,
1850 int __init ndisc_init(void)
1852 int err;
1854 err = register_pernet_subsys(&ndisc_net_ops);
1855 if (err)
1856 return err;
1858 * Initialize the neighbour table
1860 neigh_table_init(&nd_tbl);
1862 #ifdef CONFIG_SYSCTL
1863 err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6",
1864 &ndisc_ifinfo_sysctl_change);
1865 if (err)
1866 goto out_unregister_pernet;
1867 #endif
1868 err = register_netdevice_notifier(&ndisc_netdev_notifier);
1869 if (err)
1870 goto out_unregister_sysctl;
1871 out:
1872 return err;
1874 out_unregister_sysctl:
1875 #ifdef CONFIG_SYSCTL
1876 neigh_sysctl_unregister(&nd_tbl.parms);
1877 out_unregister_pernet:
1878 #endif
1879 unregister_pernet_subsys(&ndisc_net_ops);
1880 goto out;
1883 void ndisc_cleanup(void)
1885 unregister_netdevice_notifier(&ndisc_netdev_notifier);
1886 #ifdef CONFIG_SYSCTL
1887 neigh_sysctl_unregister(&nd_tbl.parms);
1888 #endif
1889 neigh_table_clear(&nd_tbl);
1890 unregister_pernet_subsys(&ndisc_net_ops);