2 * Internet Control Message Protocol (ICMPv6)
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
10 * Based on net/ipv4/icmp.c
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
23 * Andi Kleen : exception handling
24 * Andi Kleen add rate limits. never reply to a icmp.
25 * add more length checks and other fixes.
26 * yoshfuji : ensure to sent parameter problem for
28 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
30 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
31 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
34 #include <linux/module.h>
35 #include <linux/errno.h>
36 #include <linux/types.h>
37 #include <linux/socket.h>
39 #include <linux/kernel.h>
40 #include <linux/sched.h>
41 #include <linux/sockios.h>
42 #include <linux/net.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
47 #include <linux/sysctl.h>
50 #include <linux/inet.h>
51 #include <linux/netdevice.h>
52 #include <linux/icmpv6.h>
58 #include <net/ip6_checksum.h>
59 #include <net/protocol.h>
61 #include <net/rawv6.h>
62 #include <net/transp_v6.h>
63 #include <net/ip6_route.h>
64 #include <net/addrconf.h>
67 #include <asm/uaccess.h>
68 #include <asm/system.h>
70 DEFINE_SNMP_STAT(struct icmpv6_mib
, icmpv6_statistics
) __read_mostly
;
73 * The ICMP socket(s). This is the most convenient way to flow control
74 * our ICMP output as well as maintain a clean interface throughout
75 * all layers. All Socketless IP sends will soon be gone.
77 * On SMP we have one ICMP socket per-cpu.
79 static DEFINE_PER_CPU(struct socket
*, __icmpv6_socket
) = NULL
;
80 #define icmpv6_socket __get_cpu_var(__icmpv6_socket)
82 static int icmpv6_rcv(struct sk_buff
**pskb
, unsigned int *nhoffp
);
84 static struct inet6_protocol icmpv6_protocol
= {
85 .handler
= icmpv6_rcv
,
86 .flags
= INET6_PROTO_FINAL
,
89 static __inline__
int icmpv6_xmit_lock(void)
93 if (unlikely(!spin_trylock(&icmpv6_socket
->sk
->sk_lock
.slock
))) {
94 /* This can happen if the output path (f.e. SIT or
95 * ip6ip6 tunnel) signals dst_link_failure() for an
96 * outgoing ICMP6 packet.
104 static __inline__
void icmpv6_xmit_unlock(void)
106 spin_unlock_bh(&icmpv6_socket
->sk
->sk_lock
.slock
);
110 * Slightly more convenient version of icmpv6_send.
112 void icmpv6_param_prob(struct sk_buff
*skb
, int code
, int pos
)
114 icmpv6_send(skb
, ICMPV6_PARAMPROB
, code
, pos
, skb
->dev
);
119 * Figure out, may we reply to this packet with icmp error.
121 * We do not reply, if:
122 * - it was icmp error message.
123 * - it is truncated, so that it is known, that protocol is ICMPV6
124 * (i.e. in the middle of some exthdr)
129 static int is_ineligible(struct sk_buff
*skb
)
131 int ptr
= (u8
*)(skb
->nh
.ipv6h
+1) - skb
->data
;
132 int len
= skb
->len
- ptr
;
133 __u8 nexthdr
= skb
->nh
.ipv6h
->nexthdr
;
138 ptr
= ipv6_skip_exthdr(skb
, ptr
, &nexthdr
);
141 if (nexthdr
== IPPROTO_ICMPV6
) {
143 tp
= skb_header_pointer(skb
,
144 ptr
+offsetof(struct icmp6hdr
, icmp6_type
),
145 sizeof(_type
), &_type
);
147 !(*tp
& ICMPV6_INFOMSG_MASK
))
153 static int sysctl_icmpv6_time
= 1*HZ
;
156 * Check the ICMP output rate limit
158 static inline int icmpv6_xrlim_allow(struct sock
*sk
, int type
,
161 struct dst_entry
*dst
;
164 /* Informational messages are not limited. */
165 if (type
& ICMPV6_INFOMSG_MASK
)
168 /* Do not limit pmtu discovery, it would break it. */
169 if (type
== ICMPV6_PKT_TOOBIG
)
173 * Look up the output route.
174 * XXX: perhaps the expire for routing entries cloned by
175 * this lookup should be more aggressive (not longer than timeout).
177 dst
= ip6_route_output(sk
, fl
);
179 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES
);
180 } else if (dst
->dev
&& (dst
->dev
->flags
&IFF_LOOPBACK
)) {
183 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
184 int tmo
= sysctl_icmpv6_time
;
186 /* Give more bandwidth to wider prefixes. */
187 if (rt
->rt6i_dst
.plen
< 128)
188 tmo
>>= ((128 - rt
->rt6i_dst
.plen
)>>5);
190 res
= xrlim_allow(dst
, tmo
);
197 * an inline helper for the "simple" if statement below
198 * checks if parameter problem report is caused by an
199 * unrecognized IPv6 option that has the Option Type
200 * highest-order two bits set to 10
203 static __inline__
int opt_unrec(struct sk_buff
*skb
, __u32 offset
)
207 offset
+= skb
->nh
.raw
- skb
->data
;
208 op
= skb_header_pointer(skb
, offset
, sizeof(_optval
), &_optval
);
211 return (*op
& 0xC0) == 0x80;
214 static int icmpv6_push_pending_frames(struct sock
*sk
, struct flowi
*fl
, struct icmp6hdr
*thdr
, int len
)
217 struct icmp6hdr
*icmp6h
;
220 if ((skb
= skb_peek(&sk
->sk_write_queue
)) == NULL
)
223 icmp6h
= (struct icmp6hdr
*) skb
->h
.raw
;
224 memcpy(icmp6h
, thdr
, sizeof(struct icmp6hdr
));
225 icmp6h
->icmp6_cksum
= 0;
227 if (skb_queue_len(&sk
->sk_write_queue
) == 1) {
228 skb
->csum
= csum_partial((char *)icmp6h
,
229 sizeof(struct icmp6hdr
), skb
->csum
);
230 icmp6h
->icmp6_cksum
= csum_ipv6_magic(&fl
->fl6_src
,
237 skb_queue_walk(&sk
->sk_write_queue
, skb
) {
238 tmp_csum
= csum_add(tmp_csum
, skb
->csum
);
241 tmp_csum
= csum_partial((char *)icmp6h
,
242 sizeof(struct icmp6hdr
), tmp_csum
);
243 tmp_csum
= csum_ipv6_magic(&fl
->fl6_src
,
245 len
, fl
->proto
, tmp_csum
);
246 icmp6h
->icmp6_cksum
= tmp_csum
;
248 if (icmp6h
->icmp6_cksum
== 0)
249 icmp6h
->icmp6_cksum
= -1;
250 ip6_push_pending_frames(sk
);
260 static int icmpv6_getfrag(void *from
, char *to
, int offset
, int len
, int odd
, struct sk_buff
*skb
)
262 struct icmpv6_msg
*msg
= (struct icmpv6_msg
*) from
;
263 struct sk_buff
*org_skb
= msg
->skb
;
266 csum
= skb_copy_and_csum_bits(org_skb
, msg
->offset
+ offset
,
268 skb
->csum
= csum_block_add(skb
->csum
, csum
, odd
);
273 * Send an ICMP message in response to a packet in error
275 void icmpv6_send(struct sk_buff
*skb
, int type
, int code
, __u32 info
,
276 struct net_device
*dev
)
278 struct inet6_dev
*idev
= NULL
;
279 struct ipv6hdr
*hdr
= skb
->nh
.ipv6h
;
281 struct ipv6_pinfo
*np
;
282 struct in6_addr
*saddr
= NULL
;
283 struct dst_entry
*dst
;
284 struct icmp6hdr tmp_hdr
;
286 struct icmpv6_msg msg
;
293 if ((u8
*)hdr
< skb
->head
|| (u8
*)(hdr
+1) > skb
->tail
)
297 * Make sure we respect the rules
298 * i.e. RFC 1885 2.4(e)
299 * Rule (e.1) is enforced by not using icmpv6_send
300 * in any code that processes icmp errors.
302 addr_type
= ipv6_addr_type(&hdr
->daddr
);
304 if (ipv6_chk_addr(&hdr
->daddr
, skb
->dev
, 0))
311 if ((addr_type
& IPV6_ADDR_MULTICAST
|| skb
->pkt_type
!= PACKET_HOST
)) {
312 if (type
!= ICMPV6_PKT_TOOBIG
&&
313 !(type
== ICMPV6_PARAMPROB
&&
314 code
== ICMPV6_UNK_OPTION
&&
315 (opt_unrec(skb
, info
))))
321 addr_type
= ipv6_addr_type(&hdr
->saddr
);
327 if (addr_type
& IPV6_ADDR_LINKLOCAL
)
328 iif
= skb
->dev
->ifindex
;
331 * Must not send if we know that source is Anycast also.
332 * for now we don't know that.
334 if ((addr_type
== IPV6_ADDR_ANY
) || (addr_type
& IPV6_ADDR_MULTICAST
)) {
335 LIMIT_NETDEBUG(KERN_DEBUG
"icmpv6_send: addr_any/mcast source\n");
340 * Never answer to a ICMP packet.
342 if (is_ineligible(skb
)) {
343 LIMIT_NETDEBUG(KERN_DEBUG
"icmpv6_send: no reply to icmp error\n");
347 memset(&fl
, 0, sizeof(fl
));
348 fl
.proto
= IPPROTO_ICMPV6
;
349 ipv6_addr_copy(&fl
.fl6_dst
, &hdr
->saddr
);
351 ipv6_addr_copy(&fl
.fl6_src
, saddr
);
353 fl
.fl_icmp_type
= type
;
354 fl
.fl_icmp_code
= code
;
356 if (icmpv6_xmit_lock())
359 sk
= icmpv6_socket
->sk
;
362 if (!icmpv6_xrlim_allow(sk
, type
, &fl
))
365 tmp_hdr
.icmp6_type
= type
;
366 tmp_hdr
.icmp6_code
= code
;
367 tmp_hdr
.icmp6_cksum
= 0;
368 tmp_hdr
.icmp6_pointer
= htonl(info
);
370 if (!fl
.oif
&& ipv6_addr_is_multicast(&fl
.fl6_dst
))
371 fl
.oif
= np
->mcast_oif
;
373 err
= ip6_dst_lookup(sk
, &dst
, &fl
);
376 if ((err
= xfrm_lookup(&dst
, &fl
, sk
, 0)) < 0)
379 if (ipv6_addr_is_multicast(&fl
.fl6_dst
))
380 hlimit
= np
->mcast_hops
;
382 hlimit
= np
->hop_limit
;
384 hlimit
= dst_metric(dst
, RTAX_HOPLIMIT
);
386 hlimit
= ipv6_get_hoplimit(dst
->dev
);
388 tclass
= np
->cork
.tclass
;
393 msg
.offset
= skb
->nh
.raw
- skb
->data
;
395 len
= skb
->len
- msg
.offset
;
396 len
= min_t(unsigned int, len
, IPV6_MIN_MTU
- sizeof(struct ipv6hdr
) -sizeof(struct icmp6hdr
));
398 LIMIT_NETDEBUG(KERN_DEBUG
"icmp: len problem\n");
399 goto out_dst_release
;
402 idev
= in6_dev_get(skb
->dev
);
404 err
= ip6_append_data(sk
, icmpv6_getfrag
, &msg
,
405 len
+ sizeof(struct icmp6hdr
),
406 sizeof(struct icmp6hdr
),
407 hlimit
, tclass
, NULL
, &fl
, (struct rt6_info
*)dst
,
410 ip6_flush_pending_frames(sk
);
413 err
= icmpv6_push_pending_frames(sk
, &fl
, &tmp_hdr
, len
+ sizeof(struct icmp6hdr
));
415 if (type
>= ICMPV6_DEST_UNREACH
&& type
<= ICMPV6_PARAMPROB
)
416 ICMP6_INC_STATS_OFFSET_BH(idev
, ICMP6_MIB_OUTDESTUNREACHS
, type
- ICMPV6_DEST_UNREACH
);
417 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_OUTMSGS
);
420 if (likely(idev
!= NULL
))
425 icmpv6_xmit_unlock();
428 static void icmpv6_echo_reply(struct sk_buff
*skb
)
431 struct inet6_dev
*idev
;
432 struct ipv6_pinfo
*np
;
433 struct in6_addr
*saddr
= NULL
;
434 struct icmp6hdr
*icmph
= (struct icmp6hdr
*) skb
->h
.raw
;
435 struct icmp6hdr tmp_hdr
;
437 struct icmpv6_msg msg
;
438 struct dst_entry
*dst
;
443 saddr
= &skb
->nh
.ipv6h
->daddr
;
445 if (!ipv6_unicast_destination(skb
))
448 memcpy(&tmp_hdr
, icmph
, sizeof(tmp_hdr
));
449 tmp_hdr
.icmp6_type
= ICMPV6_ECHO_REPLY
;
451 memset(&fl
, 0, sizeof(fl
));
452 fl
.proto
= IPPROTO_ICMPV6
;
453 ipv6_addr_copy(&fl
.fl6_dst
, &skb
->nh
.ipv6h
->saddr
);
455 ipv6_addr_copy(&fl
.fl6_src
, saddr
);
456 fl
.oif
= skb
->dev
->ifindex
;
457 fl
.fl_icmp_type
= ICMPV6_ECHO_REPLY
;
459 if (icmpv6_xmit_lock())
462 sk
= icmpv6_socket
->sk
;
465 if (!fl
.oif
&& ipv6_addr_is_multicast(&fl
.fl6_dst
))
466 fl
.oif
= np
->mcast_oif
;
468 err
= ip6_dst_lookup(sk
, &dst
, &fl
);
471 if ((err
= xfrm_lookup(&dst
, &fl
, sk
, 0)) < 0)
474 if (ipv6_addr_is_multicast(&fl
.fl6_dst
))
475 hlimit
= np
->mcast_hops
;
477 hlimit
= np
->hop_limit
;
479 hlimit
= dst_metric(dst
, RTAX_HOPLIMIT
);
481 hlimit
= ipv6_get_hoplimit(dst
->dev
);
483 tclass
= np
->cork
.tclass
;
487 idev
= in6_dev_get(skb
->dev
);
492 err
= ip6_append_data(sk
, icmpv6_getfrag
, &msg
, skb
->len
+ sizeof(struct icmp6hdr
),
493 sizeof(struct icmp6hdr
), hlimit
, tclass
, NULL
, &fl
,
494 (struct rt6_info
*)dst
, MSG_DONTWAIT
);
497 ip6_flush_pending_frames(sk
);
500 err
= icmpv6_push_pending_frames(sk
, &fl
, &tmp_hdr
, skb
->len
+ sizeof(struct icmp6hdr
));
502 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_OUTECHOREPLIES
);
503 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_OUTMSGS
);
506 if (likely(idev
!= NULL
))
510 icmpv6_xmit_unlock();
513 static void icmpv6_notify(struct sk_buff
*skb
, int type
, int code
, u32 info
)
515 struct in6_addr
*saddr
, *daddr
;
516 struct inet6_protocol
*ipprot
;
522 if (!pskb_may_pull(skb
, sizeof(struct ipv6hdr
)))
525 nexthdr
= ((struct ipv6hdr
*)skb
->data
)->nexthdr
;
526 if (ipv6_ext_hdr(nexthdr
)) {
527 /* now skip over extension headers */
528 inner_offset
= ipv6_skip_exthdr(skb
, sizeof(struct ipv6hdr
), &nexthdr
);
532 inner_offset
= sizeof(struct ipv6hdr
);
535 /* Checkin header including 8 bytes of inner protocol header. */
536 if (!pskb_may_pull(skb
, inner_offset
+8))
539 saddr
= &skb
->nh
.ipv6h
->saddr
;
540 daddr
= &skb
->nh
.ipv6h
->daddr
;
542 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
543 Without this we will not able f.e. to make source routed
545 Corresponding argument (opt) to notifiers is already added.
549 hash
= nexthdr
& (MAX_INET_PROTOS
- 1);
552 ipprot
= rcu_dereference(inet6_protos
[hash
]);
553 if (ipprot
&& ipprot
->err_handler
)
554 ipprot
->err_handler(skb
, NULL
, type
, code
, inner_offset
, info
);
557 read_lock(&raw_v6_lock
);
558 if ((sk
= sk_head(&raw_v6_htable
[hash
])) != NULL
) {
559 while((sk
= __raw_v6_lookup(sk
, nexthdr
, daddr
, saddr
,
561 rawv6_err(sk
, skb
, NULL
, type
, code
, inner_offset
, info
);
565 read_unlock(&raw_v6_lock
);
569 * Handle icmp messages
572 static int icmpv6_rcv(struct sk_buff
**pskb
, unsigned int *nhoffp
)
574 struct sk_buff
*skb
= *pskb
;
575 struct net_device
*dev
= skb
->dev
;
576 struct inet6_dev
*idev
= __in6_dev_get(dev
);
577 struct in6_addr
*saddr
, *daddr
;
578 struct ipv6hdr
*orig_hdr
;
579 struct icmp6hdr
*hdr
;
582 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_INMSGS
);
584 saddr
= &skb
->nh
.ipv6h
->saddr
;
585 daddr
= &skb
->nh
.ipv6h
->daddr
;
587 /* Perform checksum. */
588 if (skb
->ip_summed
== CHECKSUM_HW
) {
589 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
590 if (csum_ipv6_magic(saddr
, daddr
, skb
->len
, IPPROTO_ICMPV6
,
592 LIMIT_NETDEBUG(KERN_DEBUG
"ICMPv6 hw checksum failed\n");
593 skb
->ip_summed
= CHECKSUM_NONE
;
596 if (skb
->ip_summed
== CHECKSUM_NONE
) {
597 if (csum_ipv6_magic(saddr
, daddr
, skb
->len
, IPPROTO_ICMPV6
,
598 skb_checksum(skb
, 0, skb
->len
, 0))) {
599 LIMIT_NETDEBUG(KERN_DEBUG
"ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
600 NIP6(*saddr
), NIP6(*daddr
));
605 if (!pskb_pull(skb
, sizeof(struct icmp6hdr
)))
608 hdr
= (struct icmp6hdr
*) skb
->h
.raw
;
610 type
= hdr
->icmp6_type
;
612 if (type
>= ICMPV6_DEST_UNREACH
&& type
<= ICMPV6_PARAMPROB
)
613 ICMP6_INC_STATS_OFFSET_BH(idev
, ICMP6_MIB_INDESTUNREACHS
, type
- ICMPV6_DEST_UNREACH
);
614 else if (type
>= ICMPV6_ECHO_REQUEST
&& type
<= NDISC_REDIRECT
)
615 ICMP6_INC_STATS_OFFSET_BH(idev
, ICMP6_MIB_INECHOS
, type
- ICMPV6_ECHO_REQUEST
);
618 case ICMPV6_ECHO_REQUEST
:
619 icmpv6_echo_reply(skb
);
622 case ICMPV6_ECHO_REPLY
:
623 /* we couldn't care less */
626 case ICMPV6_PKT_TOOBIG
:
627 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
628 standard destination cache. Seems, only "advanced"
629 destination cache will allow to solve this problem
632 if (!pskb_may_pull(skb
, sizeof(struct ipv6hdr
)))
634 hdr
= (struct icmp6hdr
*) skb
->h
.raw
;
635 orig_hdr
= (struct ipv6hdr
*) (hdr
+ 1);
636 rt6_pmtu_discovery(&orig_hdr
->daddr
, &orig_hdr
->saddr
, dev
,
637 ntohl(hdr
->icmp6_mtu
));
640 * Drop through to notify
643 case ICMPV6_DEST_UNREACH
:
644 case ICMPV6_TIME_EXCEED
:
645 case ICMPV6_PARAMPROB
:
646 icmpv6_notify(skb
, type
, hdr
->icmp6_code
, hdr
->icmp6_mtu
);
649 case NDISC_ROUTER_SOLICITATION
:
650 case NDISC_ROUTER_ADVERTISEMENT
:
651 case NDISC_NEIGHBOUR_SOLICITATION
:
652 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
657 case ICMPV6_MGM_QUERY
:
658 igmp6_event_query(skb
);
661 case ICMPV6_MGM_REPORT
:
662 igmp6_event_report(skb
);
665 case ICMPV6_MGM_REDUCTION
:
666 case ICMPV6_NI_QUERY
:
667 case ICMPV6_NI_REPLY
:
668 case ICMPV6_MLD2_REPORT
:
669 case ICMPV6_DHAAD_REQUEST
:
670 case ICMPV6_DHAAD_REPLY
:
671 case ICMPV6_MOBILE_PREFIX_SOL
:
672 case ICMPV6_MOBILE_PREFIX_ADV
:
676 LIMIT_NETDEBUG(KERN_DEBUG
"icmpv6: msg of unknown type\n");
679 if (type
& ICMPV6_INFOMSG_MASK
)
683 * error of unknown type.
684 * must pass to upper level
687 icmpv6_notify(skb
, type
, hdr
->icmp6_code
, hdr
->icmp6_mtu
);
693 ICMP6_INC_STATS_BH(idev
, ICMP6_MIB_INERRORS
);
698 int __init
icmpv6_init(struct net_proto_family
*ops
)
704 err
= sock_create_kern(PF_INET6
, SOCK_RAW
, IPPROTO_ICMPV6
,
705 &per_cpu(__icmpv6_socket
, i
));
708 "Failed to initialize the ICMP6 control socket "
714 sk
= per_cpu(__icmpv6_socket
, i
)->sk
;
715 sk
->sk_allocation
= GFP_ATOMIC
;
717 /* Enough space for 2 64K ICMP packets, including
718 * sk_buff struct overhead.
721 (2 * ((64 * 1024) + sizeof(struct sk_buff
)));
723 sk
->sk_prot
->unhash(sk
);
727 if (inet6_add_protocol(&icmpv6_protocol
, IPPROTO_ICMPV6
) < 0) {
728 printk(KERN_ERR
"Failed to register ICMP6 protocol\n");
736 for (j
= 0; j
< i
; j
++) {
737 if (!cpu_possible(j
))
739 sock_release(per_cpu(__icmpv6_socket
, j
));
745 void icmpv6_cleanup(void)
750 sock_release(per_cpu(__icmpv6_socket
, i
));
752 inet6_del_protocol(&icmpv6_protocol
, IPPROTO_ICMPV6
);
755 static struct icmp6_err
{
763 { /* ADM_PROHIBITED */
767 { /* Was NOT_NEIGHBOUR, now reserved */
781 int icmpv6_err_convert(int type
, int code
, int *err
)
788 case ICMPV6_DEST_UNREACH
:
790 if (code
<= ICMPV6_PORT_UNREACH
) {
791 *err
= tab_unreach
[code
].err
;
792 fatal
= tab_unreach
[code
].fatal
;
796 case ICMPV6_PKT_TOOBIG
:
800 case ICMPV6_PARAMPROB
:
805 case ICMPV6_TIME_EXCEED
:
814 ctl_table ipv6_icmp_table
[] = {
816 .ctl_name
= NET_IPV6_ICMP_RATELIMIT
,
817 .procname
= "ratelimit",
818 .data
= &sysctl_icmpv6_time
,
819 .maxlen
= sizeof(int),
821 .proc_handler
= &proc_dointvec