2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
30 * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.39.2.19 2003/01/24 05:11:34 sam Exp $
33 #include "opt_icmpprintfs.h"
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h> /* for M_NOWAIT */
39 #include <sys/protosw.h>
40 #include <sys/socket.h>
41 #include <sys/socketops.h>
43 #include <sys/kernel.h>
44 #include <sys/sysctl.h>
45 #include <sys/in_cksum.h>
47 #include <machine/stdarg.h>
50 #include <net/if_types.h>
51 #include <net/netisr2.h>
52 #include <net/netmsg2.h>
53 #include <net/route.h>
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip.h>
60 #include <netinet/ip_icmp.h>
61 #include <netinet/ip_var.h>
62 #include <netinet/icmp_var.h>
65 * ICMP routines: error generation, receive packet processing, and
66 * routines to turnaround packets back to the originator, and
67 * host table maintenance routines.
70 struct icmpstat icmpstat
;
71 SYSCTL_STRUCT(_net_inet_icmp
, ICMPCTL_STATS
, stats
, CTLFLAG_RW
,
72 &icmpstat
, icmpstat
, "ICMP statistics");
74 static int icmpmaskrepl
= 0;
75 SYSCTL_INT(_net_inet_icmp
, ICMPCTL_MASKREPL
, maskrepl
, CTLFLAG_RW
,
76 &icmpmaskrepl
, 0, "Allow replies to netmask requests");
78 static int drop_redirect
= 0;
79 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, drop_redirect
, CTLFLAG_RW
,
80 &drop_redirect
, 0, "Ignore ICMP redirects");
82 static int log_redirect
= 0;
83 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, log_redirect
, CTLFLAG_RW
,
84 &log_redirect
, 0, "Enable output about ICMP redirects");
86 static int discard_sourcequench
= 1;
87 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, discard_sourcequench
, CTLFLAG_RW
,
88 &discard_sourcequench
, 0, "Discard ICMP Source Quench");
93 * ICMP error-response bandwidth limiting sysctl. If not enabled, sysctl
94 * variable content is -1 and read-only.
97 static int icmplim
= 200;
98 SYSCTL_INT(_net_inet_icmp
, ICMPCTL_ICMPLIM
, icmplim
, CTLFLAG_RW
,
99 &icmplim
, 0, "ICMP bandwidth limit");
102 static int icmplim
= -1;
103 SYSCTL_INT(_net_inet_icmp
, ICMPCTL_ICMPLIM
, icmplim
, CTLFLAG_RD
,
104 &icmplim
, 0, "ICMP bandwidth limit");
108 static int icmplim_output
= 0;
109 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, icmplim_output
, CTLFLAG_RW
,
110 &icmplim_output
, 0, "Enable output about ICMP bandwidth limits");
113 * ICMP broadcast echo sysctl
116 static int icmpbmcastecho
= 0;
117 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, bmcastecho
, CTLFLAG_RW
,
118 &icmpbmcastecho
, 0, "");
120 static char icmp_reply_src
[IFNAMSIZ
];
121 SYSCTL_STRING(_net_inet_icmp
, OID_AUTO
, reply_src
, CTLFLAG_RW
,
122 icmp_reply_src
, IFNAMSIZ
, "icmp reply source for non-local packets.");
125 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, reply_from_interface
, CTLFLAG_RW
,
126 &icmp_rfi
, 0, "ICMP reply from incoming interface for "
127 "non-local packets");
130 static int icmpprintfs
= 0;
131 SYSCTL_INT(_net_inet_icmp
, OID_AUTO
, debug_prints
, CTLFLAG_RW
,
132 &icmpprintfs
, 0, "extra ICMP debug prints");
135 static void icmp_reflect (struct mbuf
*);
136 static void icmp_send (struct mbuf
*, struct mbuf
*, struct route
*);
138 extern struct protosw inetsw
[];
141 * Generate an error packet of type error
142 * in response to bad packet ip.
145 icmp_error(struct mbuf
*n
, int type
, int code
, n_long dest
, int destmtu
)
147 struct ip
*oip
= mtod(n
, struct ip
*), *nip
;
148 unsigned oiplen
= IP_VHL_HL(oip
->ip_vhl
) << 2;
155 kprintf("icmp_error(%p, %d, %d)\n", oip
, type
, code
);
157 if (type
!= ICMP_REDIRECT
)
158 icmpstat
.icps_error
++;
160 * Don't send error if the original packet was encrypted.
161 * Don't send error if not the first fragment of message.
162 * Don't error if the old packet protocol was ICMP
163 * error message, only known informational types.
165 if (n
->m_flags
& M_DECRYPTED
)
167 if (oip
->ip_off
& htons(~(IP_MF
|IP_DF
)))
169 if (oip
->ip_p
== IPPROTO_ICMP
&& type
!= ICMP_REDIRECT
&&
170 n
->m_len
>= oiplen
+ ICMP_MINLEN
&&
171 !ICMP_INFOTYPE(((struct icmp
*)((caddr_t
)oip
+ oiplen
))->icmp_type
)) {
172 icmpstat
.icps_oldicmp
++;
175 /* Don't send error in response to a multicast or broadcast packet */
176 if (n
->m_flags
& (M_BCAST
|M_MCAST
))
179 * First, formulate icmp message
181 m
= m_gethdr(M_NOWAIT
, MT_HEADER
);
184 icmplen
= min(oiplen
+ 8, ntohs(oip
->ip_len
));
185 if (icmplen
< sizeof(struct ip
))
186 panic("icmp_error: bad length");
187 m
->m_len
= icmplen
+ ICMP_MINLEN
;
188 MH_ALIGN(m
, m
->m_len
);
189 icp
= mtod(m
, struct icmp
*);
190 if ((u_int
)type
> ICMP_MAXTYPE
)
192 icmpstat
.icps_outhist
[type
]++;
193 icp
->icmp_type
= type
;
194 if (type
== ICMP_REDIRECT
)
195 icp
->icmp_gwaddr
.s_addr
= dest
;
199 * The following assignments assume an overlay with the
200 * zeroed icmp_void field.
202 if (type
== ICMP_PARAMPROB
) {
203 icp
->icmp_pptr
= code
;
205 } else if (type
== ICMP_UNREACH
&&
206 code
== ICMP_UNREACH_NEEDFRAG
&& destmtu
) {
207 icp
->icmp_nextmtu
= htons(destmtu
);
211 icp
->icmp_code
= code
;
212 m_copydata(n
, 0, icmplen
, &icp
->icmp_ip
);
216 * Now, copy old ip header (without options)
217 * in front of icmp message.
219 if (m
->m_data
- sizeof(struct ip
) < m
->m_pktdat
)
221 m
->m_data
-= sizeof(struct ip
);
222 m
->m_len
+= sizeof(struct ip
);
223 m
->m_pkthdr
.len
= m
->m_len
;
224 m
->m_pkthdr
.rcvif
= n
->m_pkthdr
.rcvif
;
225 nip
= mtod(m
, struct ip
*);
226 bcopy(oip
, nip
, sizeof(struct ip
));
227 nip
->ip_len
= htons(m
->m_len
);
228 nip
->ip_vhl
= IP_VHL_BORING
;
229 nip
->ip_p
= IPPROTO_ICMP
;
231 m
->m_pkthdr
.fw_flags
|= n
->m_pkthdr
.fw_flags
& FW_MBUF_GENERATED
;
239 icmp_ctlinput_done_handler(netmsg_t nmsg
)
241 struct netmsg_ctlinput
*msg
= (struct netmsg_ctlinput
*)nmsg
;
242 struct mbuf
*m
= msg
->m
;
243 int hlen
= msg
->hlen
;
245 rip_input(&m
, &hlen
, msg
->proto
);
249 icmp_ctlinput_done(struct mbuf
*m
)
251 struct netmsg_ctlinput
*msg
= &m
->m_hdr
.mh_ctlmsg
;
253 netmsg_init(&msg
->base
, NULL
, &netisr_apanic_rport
, 0,
254 icmp_ctlinput_done_handler
);
255 lwkt_sendmsg(netisr_cpuport(0), &msg
->base
.lmsg
);
259 icmp_mtudisc(struct mbuf
*m
, int hlen
)
261 struct sockaddr_in icmpsrc
= { sizeof(struct sockaddr_in
), AF_INET
};
265 KASSERT(curthread
->td_type
== TD_TYPE_NETISR
, ("not in netisr"));
267 icp
= mtodoff(m
, struct icmp
*, hlen
);
268 icmpsrc
.sin_addr
= icp
->icmp_ip
.ip_dst
;
272 * If we got a needfrag and there is a host route to the original
273 * destination, and the MTU is not locked, then set the MTU in the
274 * route to the suggested new value (if given) and then notify as
275 * usual. The ULPs will notice that the MTU has changed and adapt
276 * accordingly. If no new MTU was suggested, then we guess a new
277 * one less than the current value. If the new MTU is unreasonably
278 * small (arbitrarily set at 296), then we reset the MTU to the
279 * interface value and enable the lock bit, indicating that we are
280 * no longer doing MTU discovery.
282 rt
= rtpurelookup((struct sockaddr
*)&icmpsrc
);
283 if (rt
!= NULL
&& (rt
->rt_flags
& RTF_HOST
) &&
284 !(rt
->rt_rmx
.rmx_locks
& RTV_MTU
)) {
286 char src_buf
[INET_ADDRSTRLEN
];
290 mtu
= ntohs(icp
->icmp_nextmtu
);
292 mtu
= ip_next_mtu(rt
->rt_rmx
.rmx_mtu
, 1);
294 kprintf("MTU for %s reduced to %d\n",
295 inet_ntop(AF_INET
, &icmpsrc
.sin_addr
,
296 src_buf
, INET_ADDRSTRLEN
), mtu
);
299 /* rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; */
300 rt
->rt_rmx
.rmx_locks
|= RTV_MTU
;
301 } else if (rt
->rt_rmx
.rmx_mtu
> mtu
) {
302 rt
->rt_rmx
.rmx_mtu
= mtu
;
309 * XXX if the packet contains [IPv4 AH TCP], we can't make a
310 * notification to TCP layer.
312 so_pr_ctlinput_direct(&inetsw
[ip_protox
[icp
->icmp_ip
.ip_p
]],
313 PRC_MSGSIZE
, (struct sockaddr
*)&icmpsrc
, &icp
->icmp_ip
);
317 icmp_mtudisc_handler(netmsg_t nmsg
)
319 struct netmsg_ctlinput
*msg
= (struct netmsg_ctlinput
*)nmsg
;
322 ASSERT_NETISR_NCPUS(mycpuid
);
324 icmp_mtudisc(msg
->m
, msg
->hlen
);
326 nextcpu
= mycpuid
+ 1;
327 if (nextcpu
< netisr_ncpus
)
328 lwkt_forwardmsg(netisr_cpuport(nextcpu
), &msg
->base
.lmsg
);
330 icmp_ctlinput_done(msg
->m
);
334 icmp_mtudisc_start(struct mbuf
*m
, int hlen
, int proto
)
336 struct netmsg_ctlinput
*msg
;
340 icmp_mtudisc(m
, hlen
);
342 if (netisr_ncpus
== 1) {
343 /* There is only one netisr; done */
347 msg
= &m
->m_hdr
.mh_ctlmsg
;
348 netmsg_init(&msg
->base
, NULL
, &netisr_apanic_rport
, 0,
349 icmp_mtudisc_handler
);
351 msg
->cmd
= PRC_MSGSIZE
;
355 lwkt_sendmsg(netisr_cpuport(1), &msg
->base
.lmsg
);
360 icmp_ctlinput(struct mbuf
*m
, int cmd
, int hlen
)
362 struct sockaddr_in icmpsrc
= { sizeof(struct sockaddr_in
), AF_INET
};
365 KASSERT(curthread
->td_type
== TD_TYPE_NETISR
, ("not in netisr"));
367 icp
= mtodoff(m
, struct icmp
*, hlen
);
368 icmpsrc
.sin_addr
= icp
->icmp_ip
.ip_dst
;
371 * XXX if the packet contains [IPv4 AH TCP], we can't make a
372 * notification to TCP layer.
374 so_pr_ctlinput_direct(&inetsw
[ip_protox
[icp
->icmp_ip
.ip_p
]],
375 cmd
, (struct sockaddr
*)&icmpsrc
, &icp
->icmp_ip
);
379 icmp_ctlinput_handler(netmsg_t nmsg
)
381 struct netmsg_ctlinput
*msg
= (struct netmsg_ctlinput
*)nmsg
;
383 ASSERT_NETISR_NCPUS(mycpuid
);
385 icmp_ctlinput(msg
->m
, msg
->cmd
, msg
->hlen
);
386 icmp_ctlinput_done(msg
->m
);
390 icmp_ctlinput_start(struct mbuf
*m
, struct lwkt_port
*port
,
391 int cmd
, int hlen
, int proto
)
393 struct netmsg_ctlinput
*msg
;
395 KASSERT(&curthread
->td_msgport
!= port
,
396 ("send icmp ctlinput to the current netisr"));
398 msg
= &m
->m_hdr
.mh_ctlmsg
;
399 netmsg_init(&msg
->base
, NULL
, &netisr_apanic_rport
, 0,
400 icmp_ctlinput_handler
);
406 lwkt_sendmsg(port
, &msg
->base
.lmsg
);
410 icmp_ctlinput_global_handler(netmsg_t nmsg
)
412 struct netmsg_ctlinput
*msg
= (struct netmsg_ctlinput
*)nmsg
;
415 ASSERT_NETISR_NCPUS(mycpuid
);
417 icmp_ctlinput(msg
->m
, msg
->cmd
, msg
->hlen
);
419 nextcpu
= mycpuid
+ 1;
420 if (nextcpu
< netisr_ncpus
)
421 lwkt_forwardmsg(netisr_cpuport(nextcpu
), &msg
->base
.lmsg
);
423 icmp_ctlinput_done(msg
->m
);
427 icmp_ctlinput_global_start(struct mbuf
*m
, int cmd
, int hlen
, int proto
)
429 struct netmsg_ctlinput
*msg
;
432 KASSERT(netisr_ncpus
> 1, ("there is only 1 netisr cpu"));
434 icmp_ctlinput(m
, cmd
, hlen
);
436 msg
= &m
->m_hdr
.mh_ctlmsg
;
437 netmsg_init(&msg
->base
, NULL
, &netisr_apanic_rport
, 0,
438 icmp_ctlinput_global_handler
);
444 lwkt_sendmsg(netisr_cpuport(1), &msg
->base
.lmsg
);
447 #define ICMP_RTREDIRECT_FLAGS (RTF_GATEWAY | RTF_HOST)
450 icmp_redirect(struct mbuf
*m
, int hlen
, boolean_t prt
)
452 struct sockaddr_in icmpsrc
= { sizeof(struct sockaddr_in
), AF_INET
};
453 struct sockaddr_in icmpdst
= { sizeof(struct sockaddr_in
), AF_INET
};
454 struct sockaddr_in icmpgw
= { sizeof(struct sockaddr_in
), AF_INET
};
458 KASSERT(curthread
->td_type
== TD_TYPE_NETISR
, ("not in netisr"));
460 ip
= mtod(m
, struct ip
*);
461 icp
= mtodoff(m
, struct icmp
*, hlen
);
464 * Short circuit routing redirects to force immediate change
465 * in the kernel's routing tables. The message is also handed
466 * to anyone listening on a raw socket (e.g. the routing daemon
467 * for use in updating its tables).
470 if (icmpprintfs
&& prt
) {
471 char dst_buf
[INET_ADDRSTRLEN
], gw_buf
[INET_ADDRSTRLEN
];
473 kprintf("redirect dst %s to %s\n",
474 inet_ntop(AF_INET
, &icp
->icmp_ip
.ip_dst
,
475 dst_buf
, INET_ADDRSTRLEN
),
476 inet_ntop(AF_INET
, &icp
->icmp_gwaddr
,
477 gw_buf
, INET_ADDRSTRLEN
));
480 icmpgw
.sin_addr
= ip
->ip_src
;
481 icmpdst
.sin_addr
= icp
->icmp_gwaddr
;
482 icmpsrc
.sin_addr
= icp
->icmp_ip
.ip_dst
;
483 rtredirect_oncpu((struct sockaddr
*)&icmpsrc
,
484 (struct sockaddr
*)&icmpdst
, NULL
, ICMP_RTREDIRECT_FLAGS
,
485 (struct sockaddr
*)&icmpgw
);
486 kpfctlinput_direct(PRC_REDIRECT_HOST
, (struct sockaddr
*)&icmpsrc
);
490 icmp_redirect_done_handler(netmsg_t nmsg
)
492 struct netmsg_ctlinput
*msg
= (struct netmsg_ctlinput
*)nmsg
;
493 struct mbuf
*m
= msg
->m
;
494 int hlen
= msg
->hlen
;
496 rip_input(&m
, &hlen
, msg
->proto
);
500 icmp_redirect_done(struct mbuf
*m
, int hlen
, boolean_t dispatch_rip
)
502 struct rt_addrinfo rtinfo
;
503 struct sockaddr_in icmpsrc
= { sizeof(struct sockaddr_in
), AF_INET
};
504 struct sockaddr_in icmpdst
= { sizeof(struct sockaddr_in
), AF_INET
};
505 struct sockaddr_in icmpgw
= { sizeof(struct sockaddr_in
), AF_INET
};
509 ip
= mtod(m
, struct ip
*);
510 icp
= mtodoff(m
, struct icmp
*, hlen
);
512 icmpgw
.sin_addr
= ip
->ip_src
;
513 icmpdst
.sin_addr
= icp
->icmp_gwaddr
;
514 icmpsrc
.sin_addr
= icp
->icmp_ip
.ip_dst
;
516 bzero(&rtinfo
, sizeof(struct rt_addrinfo
));
517 rtinfo
.rti_info
[RTAX_DST
] = (struct sockaddr
*)&icmpsrc
;
518 rtinfo
.rti_info
[RTAX_GATEWAY
] = (struct sockaddr
*)&icmpdst
;
519 rtinfo
.rti_info
[RTAX_NETMASK
] = NULL
;
520 rtinfo
.rti_info
[RTAX_AUTHOR
] = (struct sockaddr
*)&icmpgw
;
521 rt_missmsg(RTM_REDIRECT
, &rtinfo
, ICMP_RTREDIRECT_FLAGS
, 0);
524 struct netmsg_ctlinput
*msg
= &m
->m_hdr
.mh_ctlmsg
;
526 netmsg_init(&msg
->base
, NULL
, &netisr_apanic_rport
, 0,
527 icmp_redirect_done_handler
);
528 lwkt_sendmsg(netisr_cpuport(0), &msg
->base
.lmsg
);
533 icmp_redirect_handler(netmsg_t nmsg
)
535 struct netmsg_ctlinput
*msg
= (struct netmsg_ctlinput
*)nmsg
;
538 ASSERT_NETISR_NCPUS(mycpuid
);
540 icmp_redirect(msg
->m
, msg
->hlen
, FALSE
);
542 nextcpu
= mycpuid
+ 1;
543 if (nextcpu
< netisr_ncpus
)
544 lwkt_forwardmsg(netisr_cpuport(nextcpu
), &msg
->base
.lmsg
);
546 icmp_redirect_done(msg
->m
, msg
->hlen
, TRUE
);
550 icmp_redirect_start(struct mbuf
*m
, int hlen
, int proto
)
552 struct netmsg_ctlinput
*msg
;
556 icmp_redirect(m
, hlen
, TRUE
);
558 if (netisr_ncpus
== 1) {
559 /* There is only one netisr; done */
560 icmp_redirect_done(m
, hlen
, FALSE
);
564 msg
= &m
->m_hdr
.mh_ctlmsg
;
565 netmsg_init(&msg
->base
, NULL
, &netisr_apanic_rport
, 0,
566 icmp_redirect_handler
);
568 msg
->cmd
= PRC_REDIRECT_HOST
;
572 lwkt_sendmsg(netisr_cpuport(1), &msg
->base
.lmsg
);
577 * Process a received ICMP message.
580 icmp_input(struct mbuf
**mp
, int *offp
, int proto
)
582 struct sockaddr_in icmpsrc
= { sizeof(struct sockaddr_in
), AF_INET
};
583 struct sockaddr_in icmpdst
= { sizeof(struct sockaddr_in
), AF_INET
};
585 struct in_ifaddr
*ia
;
586 struct mbuf
*m
= *mp
;
596 ip
= mtod(m
, struct ip
*);
597 icmplen
= ntohs(ip
->ip_len
) - hlen
;
600 * Locate icmp structure in mbuf, and check
601 * that not corrupted and of at least minimum length.
605 char src_buf
[INET_ADDRSTRLEN
], dst_buf
[INET_ADDRSTRLEN
];
607 kprintf("icmp_input from %s to %s, len %d\n",
608 inet_ntop(AF_INET
, &ip
->ip_src
, src_buf
, INET_ADDRSTRLEN
),
609 inet_ntop(AF_INET
, &ip
->ip_dst
, dst_buf
, INET_ADDRSTRLEN
),
613 if (icmplen
< ICMP_MINLEN
) {
614 icmpstat
.icps_tooshort
++;
617 i
= hlen
+ min(icmplen
, ICMP_ADVLENMIN
);
618 if (m
->m_len
< i
&& (m
= m_pullup(m
, i
)) == NULL
) {
619 icmpstat
.icps_tooshort
++;
620 return(IPPROTO_DONE
);
622 ip
= mtod(m
, struct ip
*);
624 if (in_cksum_skip(m
, hlen
+ icmplen
, hlen
)) {
625 icmpstat
.icps_checksum
++;
628 icp
= (struct icmp
*)((caddr_t
)ip
+ hlen
);
632 kprintf("icmp_input, type %d code %d\n", icp
->icmp_type
,
637 * Message type specific processing.
639 if (icp
->icmp_type
> ICMP_MAXTYPE
)
641 icmpstat
.icps_inhist
[icp
->icmp_type
]++;
642 code
= icp
->icmp_code
;
643 switch (icp
->icmp_type
) {
647 case ICMP_UNREACH_NET
:
648 case ICMP_UNREACH_HOST
:
649 case ICMP_UNREACH_SRCFAIL
:
650 case ICMP_UNREACH_NET_UNKNOWN
:
651 case ICMP_UNREACH_HOST_UNKNOWN
:
652 case ICMP_UNREACH_ISOLATED
:
653 case ICMP_UNREACH_TOSNET
:
654 case ICMP_UNREACH_TOSHOST
:
655 case ICMP_UNREACH_HOST_PRECEDENCE
:
656 case ICMP_UNREACH_PRECEDENCE_CUTOFF
:
657 code
= PRC_UNREACH_NET
;
660 case ICMP_UNREACH_NEEDFRAG
:
665 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
666 * Treat subcodes 2,3 as immediate RST
668 case ICMP_UNREACH_PROTOCOL
:
669 case ICMP_UNREACH_PORT
:
670 code
= PRC_UNREACH_PORT
;
673 case ICMP_UNREACH_NET_PROHIB
:
674 case ICMP_UNREACH_HOST_PROHIB
:
675 case ICMP_UNREACH_FILTER_PROHIB
:
676 code
= PRC_UNREACH_ADMIN_PROHIB
;
687 code
+= PRC_TIMXCEED_INTRANS
;
693 code
= PRC_PARAMPROB
;
696 case ICMP_SOURCEQUENCH
:
699 if (discard_sourcequench
)
704 * Problem with datagram; advise higher level routines.
706 if (icmplen
< ICMP_ADVLENMIN
|| icmplen
< ICMP_ADVLEN(icp
) ||
707 IP_VHL_HL(icp
->icmp_ip
.ip_vhl
) < (sizeof(struct ip
) >> 2)) {
708 icmpstat
.icps_badlen
++;
711 /* Discard ICMP's in response to multicast packets */
712 if (IN_MULTICAST(ntohl(icp
->icmp_ip
.ip_dst
.s_addr
)))
716 kprintf("deliver to protocol %d\n", icp
->icmp_ip
.ip_p
);
718 icmpsrc
.sin_addr
= icp
->icmp_ip
.ip_dst
;
723 if (code
== PRC_MSGSIZE
) {
724 /* Run MTU discovery in all netisrs */
725 if (icmp_mtudisc_start(m
, hlen
, proto
)) {
726 /* Forwarded; done */
729 /* Move on; run rip_input() directly */
732 struct lwkt_port
*port
;
735 pr
= &inetsw
[ip_protox
[icp
->icmp_ip
.ip_p
]];
736 port
= so_pr_ctlport(pr
, code
,
737 (struct sockaddr
*)&icmpsrc
, &icp
->icmp_ip
, &cpu
);
739 if (cpu
== netisr_ncpus
) {
740 if (netisr_ncpus
> 1) {
742 * Run pr_ctlinput in all
745 icmp_ctlinput_global_start(m
,
750 * There is only one netisr; run
751 * pr_ctlinput directly.
753 } else if (cpu
!= mycpuid
) {
755 * Send to the target netisr to run
758 icmp_ctlinput_start(m
, port
,
764 * The target netisr is this netisr.
766 * XXX if the packet contains [IPv4 AH TCP],
767 * we can't make a notification to TCP layer.
769 so_pr_ctlinput_direct(pr
, code
,
770 (struct sockaddr
*)&icmpsrc
, &icp
->icmp_ip
);
772 /* Move on; run rip_input() directly */
776 icmpstat
.icps_badcode
++;
781 && (m
->m_flags
& (M_MCAST
| M_BCAST
)) != 0) {
782 icmpstat
.icps_bmcastecho
++;
785 icp
->icmp_type
= ICMP_ECHOREPLY
;
787 if (badport_bandlim(BANDLIM_ICMP_ECHO
) < 0)
795 && (m
->m_flags
& (M_MCAST
| M_BCAST
)) != 0) {
796 icmpstat
.icps_bmcasttstamp
++;
799 if (icmplen
< ICMP_TSLEN
) {
800 icmpstat
.icps_badlen
++;
803 icp
->icmp_type
= ICMP_TSTAMPREPLY
;
804 icp
->icmp_rtime
= iptime();
805 icp
->icmp_ttime
= icp
->icmp_rtime
; /* bogus, do later! */
807 if (badport_bandlim(BANDLIM_ICMP_TSTAMP
) < 0)
814 if (icmpmaskrepl
== 0)
817 * We are not able to respond with all ones broadcast
818 * unless we receive it over a point-to-point interface.
820 if (icmplen
< ICMP_MASKLEN
)
822 switch (ip
->ip_dst
.s_addr
) {
824 case INADDR_BROADCAST
:
826 icmpdst
.sin_addr
= ip
->ip_src
;
830 icmpdst
.sin_addr
= ip
->ip_dst
;
832 ia
= (struct in_ifaddr
*)ifaof_ifpforaddr(
833 (struct sockaddr
*)&icmpdst
, m
->m_pkthdr
.rcvif
);
838 icp
->icmp_type
= ICMP_MASKREPLY
;
839 icp
->icmp_mask
= ia
->ia_sockmask
.sin_addr
.s_addr
;
840 if (ip
->ip_src
.s_addr
== 0) {
841 if (ia
->ia_ifp
->if_flags
& IFF_BROADCAST
)
842 ip
->ip_src
= satosin(&ia
->ia_broadaddr
)->sin_addr
;
843 else if (ia
->ia_ifp
->if_flags
& IFF_POINTOPOINT
)
844 ip
->ip_src
= satosin(&ia
->ia_dstaddr
)->sin_addr
;
847 icmpstat
.icps_reflect
++;
848 icmpstat
.icps_outhist
[icp
->icmp_type
]++;
850 return(IPPROTO_DONE
);
854 char src_buf
[INET_ADDRSTRLEN
];
855 char dst_buf
[INET_ADDRSTRLEN
];
856 char gwy_buf
[INET_ADDRSTRLEN
];
858 kprintf("icmp redirect from %s: %s => %s\n",
859 inet_ntop(AF_INET
, &ip
->ip_src
,
860 src_buf
, INET_ADDRSTRLEN
),
861 inet_ntop(AF_INET
, &icp
->icmp_ip
.ip_dst
,
862 dst_buf
, INET_ADDRSTRLEN
),
863 inet_ntop(AF_INET
, &icp
->icmp_gwaddr
,
864 gwy_buf
, INET_ADDRSTRLEN
));
870 if (icmplen
< ICMP_ADVLENMIN
|| icmplen
< ICMP_ADVLEN(icp
) ||
871 IP_VHL_HL(icp
->icmp_ip
.ip_vhl
) < (sizeof(struct ip
) >> 2)) {
872 icmpstat
.icps_badlen
++;
877 char dst_buf
[INET_ADDRSTRLEN
], gw_buf
[INET_ADDRSTRLEN
];
879 kprintf("redirect dst %s to %s\n",
880 inet_ntop(AF_INET
, &icp
->icmp_ip
.ip_dst
,
881 dst_buf
, INET_ADDRSTRLEN
),
882 inet_ntop(AF_INET
, &icp
->icmp_gwaddr
,
883 gw_buf
, INET_ADDRSTRLEN
));
886 icmpsrc
.sin_addr
= icp
->icmp_ip
.ip_dst
;
888 /* Run redirect in all netisrs */
889 if (icmp_redirect_start(m
, hlen
, proto
)) {
890 /* Forwarded; done */
893 /* Move on; run rip_input() directly */
897 * No kernel processing for the following;
898 * just fall through to send to raw listener.
901 case ICMP_ROUTERADVERT
:
902 case ICMP_ROUTERSOLICIT
:
903 case ICMP_TSTAMPREPLY
:
912 rip_input(mp
, offp
, proto
);
913 return(IPPROTO_DONE
);
917 return(IPPROTO_DONE
);
921 * Reflect the ip packet back to the source
924 icmp_reflect(struct mbuf
*m
)
926 struct ip
*ip
= mtod(m
, struct ip
*);
927 struct in_ifaddr
*ia
;
928 struct in_ifaddr_container
*iac
;
929 struct ifaddr_container
*ifac
;
932 struct mbuf
*opts
= NULL
;
933 int optlen
= (IP_VHL_HL(ip
->ip_vhl
) << 2) - sizeof(struct ip
);
934 struct route
*ro
= NULL
, rt
;
936 if (!in_canforward(ip
->ip_src
) &&
937 ((ntohl(ip
->ip_src
.s_addr
) & IN_CLASSA_NET
) !=
938 (IN_LOOPBACKNET
<< IN_CLASSA_NSHIFT
))) {
939 m_freem(m
); /* Bad return address */
940 icmpstat
.icps_badaddr
++;
941 goto done
; /* Ip_output() will check for broadcast */
944 ip
->ip_dst
= ip
->ip_src
;
947 bzero(ro
, sizeof *ro
);
950 * If the incoming packet was addressed directly to us,
951 * use dst as the src for the reply. Otherwise (broadcast
952 * or anonymous), use the address which corresponds
953 * to the incoming interface.
956 LIST_FOREACH(iac
, INADDR_HASH(t
.s_addr
), ia_hash
) {
957 if (t
.s_addr
== IA_SIN(iac
->ia
)->sin_addr
.s_addr
) {
962 ifp
= m
->m_pkthdr
.rcvif
;
963 if (ifp
!= NULL
&& (ifp
->if_flags
& IFF_BROADCAST
)) {
964 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
965 struct ifaddr
*ifa
= ifac
->ifa
;
967 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
970 if (satosin(&ia
->ia_broadaddr
)->sin_addr
.s_addr
==
976 * If the packet was transiting through us, use the address of
977 * the interface the packet came through in. If that interface
978 * doesn't have a suitable IP address, the normal selection
981 if (icmp_rfi
&& ifp
!= NULL
) {
982 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
983 struct ifaddr
*ifa
= ifac
->ifa
;
985 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
992 * If the incoming packet was not addressed directly to us, use
993 * designated interface for icmp replies specified by sysctl
994 * net.inet.icmp.reply_src (default not set). Otherwise continue
995 * with normal source selection.
997 if (icmp_reply_src
[0] != '\0' &&
998 (ifp
= ifunit_netisr(icmp_reply_src
))) {
999 TAILQ_FOREACH(ifac
, &ifp
->if_addrheads
[mycpuid
], ifa_link
) {
1000 struct ifaddr
*ifa
= ifac
->ifa
;
1002 if (ifa
->ifa_addr
->sa_family
!= AF_INET
)
1009 * If the packet was transiting through us, use the address of
1010 * the interface that is the closest to the packet source.
1011 * When we don't have a route back to the packet source, stop here
1012 * and drop the packet.
1014 ia
= ip_rtaddr(ip
->ip_dst
, ro
);
1017 icmpstat
.icps_noroute
++;
1021 t
= IA_SIN(ia
)->sin_addr
;
1023 ip
->ip_ttl
= ip_defttl
;
1031 * Retrieve any source routing from the incoming packet;
1032 * add on any record-route or timestamp options.
1034 cp
= (u_char
*) (ip
+ 1);
1035 if ((opts
= ip_srcroute(m
)) == NULL
&&
1036 (opts
= m_gethdr(M_NOWAIT
, MT_HEADER
))) {
1037 opts
->m_len
= sizeof(struct in_addr
);
1038 mtod(opts
, struct in_addr
*)->s_addr
= 0;
1043 kprintf("icmp_reflect optlen %d rt %d => ",
1044 optlen
, opts
->m_len
);
1046 for (cnt
= optlen
; cnt
> 0; cnt
-= len
, cp
+= len
) {
1047 opt
= cp
[IPOPT_OPTVAL
];
1048 if (opt
== IPOPT_EOL
)
1050 if (opt
== IPOPT_NOP
)
1053 if (cnt
< IPOPT_OLEN
+ sizeof *cp
)
1055 len
= cp
[IPOPT_OLEN
];
1056 if (len
< IPOPT_OLEN
+ sizeof *cp
||
1061 * Should check for overflow, but it
1064 if (opt
== IPOPT_RR
|| opt
== IPOPT_TS
||
1065 opt
== IPOPT_SECURITY
) {
1067 mtod(opts
, caddr_t
) + opts
->m_len
,
1072 /* Terminate & pad, if necessary */
1073 cnt
= opts
->m_len
% 4;
1075 for (; cnt
< 4; cnt
++) {
1076 *(mtod(opts
, caddr_t
) + opts
->m_len
) =
1083 kprintf("%d\n", opts
->m_len
);
1087 * Now strip out original options by copying rest of first
1088 * mbuf's data back, and adjust the IP length.
1090 ip
->ip_len
= htons(ntohs(ip
->ip_len
) - optlen
);
1091 ip
->ip_vhl
= IP_VHL_BORING
;
1093 if (m
->m_flags
& M_PKTHDR
)
1094 m
->m_pkthdr
.len
-= optlen
;
1095 optlen
+= sizeof(struct ip
);
1096 bcopy((caddr_t
)ip
+ optlen
, ip
+ 1,
1097 m
->m_len
- sizeof(struct ip
));
1099 m
->m_pkthdr
.fw_flags
&= FW_MBUF_GENERATED
;
1100 m
->m_flags
&= ~(M_BCAST
|M_MCAST
);
1101 icmp_send(m
, opts
, ro
);
1105 if (ro
&& ro
->ro_rt
)
1110 * Send an icmp packet back to the ip level,
1111 * after supplying a checksum.
1114 icmp_send(struct mbuf
*m
, struct mbuf
*opts
, struct route
*rt
)
1116 struct ip
*ip
= mtod(m
, struct ip
*);
1120 hlen
= IP_VHL_HL(ip
->ip_vhl
) << 2;
1123 icp
= mtod(m
, struct icmp
*);
1124 icp
->icmp_cksum
= 0;
1125 icp
->icmp_cksum
= in_cksum(m
, ntohs(ip
->ip_len
) - hlen
);
1128 m
->m_pkthdr
.rcvif
= NULL
;
1131 char dst_buf
[INET_ADDRSTRLEN
], src_buf
[INET_ADDRSTRLEN
];
1133 kprintf("icmp_send dst %s src %s\n",
1134 inet_ntop(AF_INET
, &ip
->ip_dst
, dst_buf
, INET_ADDRSTRLEN
),
1135 inet_ntop(AF_INET
, &ip
->ip_src
, src_buf
, INET_ADDRSTRLEN
));
1138 ip_output(m
, opts
, rt
, 0, NULL
, NULL
);
1148 t
= (atv
.tv_sec
% (24*60*60)) * 1000 + atv
.tv_usec
/ 1000;
1154 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1155 * given current value MTU. If DIR is less than zero, a larger plateau
1156 * is returned; otherwise, a smaller value is returned.
1159 ip_next_mtu(int mtu
, int dir
)
1161 static int mtutab
[] = {
1162 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
1167 for (i
= 0; i
< (sizeof mtutab
) / (sizeof mtutab
[0]); i
++) {
1168 if (mtu
>= mtutab
[i
])
1176 return mtutab
[i
- 1];
1179 if (mtutab
[i
] == 0) {
1181 } else if(mtu
> mtutab
[i
]) {
1184 return mtutab
[i
+ 1];
1192 * badport_bandlim() - check for ICMP bandwidth limit
1194 * Return 0 if it is ok to send an ICMP error response, -1 if we have
1195 * hit our bandwidth limit and it is not ok.
1197 * If icmplim is <= 0, the feature is disabled and 0 is returned.
1199 * For now we separate the TCP and UDP subsystems w/ different 'which'
1200 * values. We may eventually remove this separation (and simplify the
1203 * Note that the printing of the error message is delayed so we can
1204 * properly print the icmp error rate that the system was trying to do
1205 * (i.e. 22000/100 pps, etc...). This can cause long delays in printing
1206 * the 'final' error, but it doesn't make sense to solve the printing
1207 * delay with more complex code.
1210 badport_bandlim(int which
)
1212 static int lticks
[BANDLIM_MAX
+ 1];
1213 static int lpackets
[BANDLIM_MAX
+ 1];
1215 const char *bandlimittype
[] = {
1216 "Limiting icmp unreach response",
1217 "Limiting icmp ping response",
1218 "Limiting icmp tstamp response",
1219 "Limiting closed port RST response",
1220 "Limiting open port RST response"
1224 * Return ok status if feature disabled or argument out of
1228 if (icmplim
<= 0 || which
> BANDLIM_MAX
|| which
< 0)
1230 dticks
= ticks
- lticks
[which
];
1233 * reset stats when cumulative dt exceeds one second.
1236 if ((unsigned int)dticks
> hz
) {
1237 if (lpackets
[which
] > icmplim
&& icmplim_output
) {
1238 kprintf("%s from %d to %d packets per second\n",
1239 bandlimittype
[which
],
1244 lticks
[which
] = ticks
;
1245 lpackets
[which
] = 0;
1252 if (++lpackets
[which
] > icmplim
) {