4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright (c) 2018, Joyent, Inc.
26 * Copyright 2024 Oxide Computer Company
28 /* Copyright (c) 1990 Mentat Inc. */
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/stropts.h>
33 #include <sys/strlog.h>
34 #include <sys/strsun.h>
35 #define _SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/suntpi.h>
42 #include <sys/xti_inet.h>
43 #include <sys/cmn_err.h>
46 #include <sys/policy.h>
48 #include <sys/ucred.h>
51 #include <sys/sockio.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/vtrace.h>
56 #include <sys/debug.h>
57 #include <sys/isa_defs.h>
58 #include <sys/random.h>
59 #include <netinet/in.h>
60 #include <netinet/ip6.h>
61 #include <netinet/icmp6.h>
62 #include <netinet/udp.h>
64 #include <inet/common.h>
66 #include <inet/ip_impl.h>
67 #include <inet/ipsec_impl.h>
69 #include <inet/ip_ire.h>
70 #include <inet/ip_if.h>
71 #include <inet/ip_multi.h>
72 #include <inet/ip_ndp.h>
73 #include <inet/proto_set.h>
74 #include <inet/mib2.h>
76 #include <inet/optcom.h>
77 #include <inet/snmpcom.h>
78 #include <inet/kstatcom.h>
79 #include <inet/ipclassifier.h>
81 #include <sys/tsol/label.h>
82 #include <sys/tsol/tnet.h>
84 #include <inet/rawip_impl.h>
89 * Synchronization notes:
91 * RAWIP is MT and uses the usual kernel synchronization primitives. We use
92 * conn_lock to protect the icmp_t.
95 * ICMP is always a device driver. For compatibility with mibopen() code
96 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
99 static void icmp_addr_req(queue_t
*q
, mblk_t
*mp
);
100 static void icmp_tpi_bind(queue_t
*q
, mblk_t
*mp
);
101 static void icmp_bind_proto(icmp_t
*icmp
);
102 static int icmp_build_hdr_template(conn_t
*, const in6_addr_t
*,
103 const in6_addr_t
*, uint32_t);
104 static void icmp_capability_req(queue_t
*q
, mblk_t
*mp
);
105 static int icmp_close(queue_t
*q
, int flags
, cred_t
*);
106 static void icmp_close_free(conn_t
*);
107 static void icmp_tpi_connect(queue_t
*q
, mblk_t
*mp
);
108 static void icmp_tpi_disconnect(queue_t
*q
, mblk_t
*mp
);
109 static void icmp_err_ack(queue_t
*q
, mblk_t
*mp
, t_scalar_t t_error
,
111 static void icmp_err_ack_prim(queue_t
*q
, mblk_t
*mp
, t_scalar_t primitive
,
112 t_scalar_t tlierr
, int sys_error
);
113 static void icmp_icmp_input(void *arg1
, mblk_t
*mp
, void *arg2
,
115 static void icmp_icmp_error_ipv6(conn_t
*connp
, mblk_t
*mp
,
117 static void icmp_info_req(queue_t
*q
, mblk_t
*mp
);
118 static void icmp_input(void *, mblk_t
*, void *, ip_recv_attr_t
*);
119 static conn_t
*icmp_open(int family
, cred_t
*credp
, int *err
, int flags
);
120 static int icmp_openv4(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
,
122 static int icmp_openv6(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
,
124 static boolean_t
icmp_opt_allow_udr_set(t_scalar_t level
, t_scalar_t name
);
125 int icmp_opt_set(conn_t
*connp
, uint_t optset_context
,
126 int level
, int name
, uint_t inlen
,
127 uchar_t
*invalp
, uint_t
*outlenp
, uchar_t
*outvalp
,
128 void *thisdg_attrs
, cred_t
*cr
);
129 int icmp_opt_get(conn_t
*connp
, int level
, int name
,
131 static int icmp_output_newdst(conn_t
*connp
, mblk_t
*data_mp
, sin_t
*sin
,
132 sin6_t
*sin6
, cred_t
*cr
, pid_t pid
, ip_xmit_attr_t
*ixa
);
133 static mblk_t
*icmp_prepend_hdr(conn_t
*, ip_xmit_attr_t
*, const ip_pkt_t
*,
134 const in6_addr_t
*, const in6_addr_t
*, uint32_t, mblk_t
*, int *);
135 static mblk_t
*icmp_prepend_header_template(conn_t
*, ip_xmit_attr_t
*,
136 mblk_t
*, const in6_addr_t
*, uint32_t, int *);
137 static int icmp_snmp_set(queue_t
*q
, t_scalar_t level
, t_scalar_t name
,
138 uchar_t
*ptr
, int len
);
139 static void icmp_ud_err(queue_t
*q
, mblk_t
*mp
, t_scalar_t err
);
140 static void icmp_tpi_unbind(queue_t
*q
, mblk_t
*mp
);
141 static int icmp_wput(queue_t
*q
, mblk_t
*mp
);
142 static int icmp_wput_fallback(queue_t
*q
, mblk_t
*mp
);
143 static void icmp_wput_other(queue_t
*q
, mblk_t
*mp
);
144 static void icmp_wput_iocdata(queue_t
*q
, mblk_t
*mp
);
145 static void icmp_wput_restricted(queue_t
*q
, mblk_t
*mp
);
146 static void icmp_ulp_recv(conn_t
*, mblk_t
*, uint_t
);
148 static void *rawip_stack_init(netstackid_t stackid
, netstack_t
*ns
);
149 static void rawip_stack_fini(netstackid_t stackid
, void *arg
);
151 static void *rawip_kstat_init(netstackid_t stackid
);
152 static void rawip_kstat_fini(netstackid_t stackid
, kstat_t
*ksp
);
153 static int rawip_kstat_update(kstat_t
*kp
, int rw
);
154 static void rawip_stack_shutdown(netstackid_t stackid
, void *arg
);
156 /* Common routines for TPI and socket module */
157 static conn_t
*rawip_do_open(int, cred_t
*, int *, int);
158 static void rawip_do_close(conn_t
*);
159 static int rawip_do_bind(conn_t
*, struct sockaddr
*, socklen_t
);
160 static int rawip_do_unbind(conn_t
*);
161 static int rawip_do_connect(conn_t
*, const struct sockaddr
*, socklen_t
,
164 int rawip_getsockname(sock_lower_handle_t
, struct sockaddr
*,
165 socklen_t
*, cred_t
*);
166 int rawip_getpeername(sock_lower_handle_t
, struct sockaddr
*,
167 socklen_t
*, cred_t
*);
169 static struct module_info icmp_mod_info
= {
170 5707, "icmp", 1, INFPSZ
, 512, 128
174 * Entry points for ICMP as a device.
175 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
177 static struct qinit icmprinitv4
= {
178 NULL
, NULL
, icmp_openv4
, icmp_close
, NULL
, &icmp_mod_info
181 static struct qinit icmprinitv6
= {
182 NULL
, NULL
, icmp_openv6
, icmp_close
, NULL
, &icmp_mod_info
185 static struct qinit icmpwinit
= {
186 icmp_wput
, ip_wsrv
, NULL
, NULL
, NULL
, &icmp_mod_info
189 /* ICMP entry point during fallback */
190 static struct qinit icmp_fallback_sock_winit
= {
191 icmp_wput_fallback
, NULL
, NULL
, NULL
, NULL
, &icmp_mod_info
194 /* For AF_INET aka /dev/icmp */
195 struct streamtab icmpinfov4
= {
196 &icmprinitv4
, &icmpwinit
199 /* For AF_INET6 aka /dev/icmp6 */
200 struct streamtab icmpinfov6
= {
201 &icmprinitv6
, &icmpwinit
204 /* Default structure copied into T_INFO_ACK messages */
205 static struct T_info_ack icmp_g_t_info_ack
= {
207 IP_MAXPACKET
, /* TSDU_size. icmp allows maximum size messages. */
208 T_INVALID
, /* ETSDU_size. icmp does not support expedited data. */
209 T_INVALID
, /* CDATA_size. icmp does not support connect data. */
210 T_INVALID
, /* DDATA_size. icmp does not support disconnect data. */
211 0, /* ADDR_size - filled in later. */
212 0, /* OPT_size - not initialized here */
213 IP_MAXPACKET
, /* TIDU_size. icmp allows maximum size messages. */
214 T_CLTS
, /* SERV_type. icmp supports connection-less. */
215 TS_UNBND
, /* CURRENT_state. This is set from icmp_state. */
216 (XPG4_1
|SENDZERO
) /* PROVIDER_flag */
220 icmp_set_buf_prop(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
221 const char *ifname
, const void *pval
, uint_t flags
)
223 return (mod_set_buf_prop(stack
->netstack_icmp
->is_propinfo_tbl
,
224 stack
, cr
, pinfo
, ifname
, pval
, flags
));
228 icmp_get_buf_prop(netstack_t
*stack
, mod_prop_info_t
*pinfo
, const char *ifname
,
229 void *val
, uint_t psize
, uint_t flags
)
231 return (mod_get_buf_prop(stack
->netstack_icmp
->is_propinfo_tbl
, stack
,
232 pinfo
, ifname
, val
, psize
, flags
));
236 * All of these are alterable, within the min/max values given, at run time.
238 * Note: All those tunables which do not start with "icmp_" are Committed and
239 * therefore are public. See PSARC 2010/080.
241 static mod_prop_info_t icmp_propinfo_tbl
[] = {
243 { "_wroff_extra", MOD_PROTO_RAWIP
,
244 mod_set_uint32
, mod_get_uint32
,
245 {0, 128, 32}, {32} },
247 { "_ipv4_ttl", MOD_PROTO_RAWIP
,
248 mod_set_uint32
, mod_get_uint32
,
249 {1, 255, 255}, {255} },
251 { "_ipv6_hoplimit", MOD_PROTO_RAWIP
,
252 mod_set_uint32
, mod_get_uint32
,
253 {0, IPV6_MAX_HOPS
, IPV6_DEFAULT_HOPS
},
254 {IPV6_DEFAULT_HOPS
} },
256 { "_bsd_compat", MOD_PROTO_RAWIP
,
257 mod_set_boolean
, mod_get_boolean
,
258 {B_TRUE
}, {B_TRUE
} },
260 { "send_buf", MOD_PROTO_RAWIP
,
261 icmp_set_buf_prop
, icmp_get_buf_prop
,
262 {4096, 65536, 8192}, {8192} },
264 { "_xmit_lowat", MOD_PROTO_RAWIP
,
265 mod_set_uint32
, mod_get_uint32
,
266 {0, 65536, 1024}, {1024} },
268 { "recv_buf", MOD_PROTO_RAWIP
,
269 icmp_set_buf_prop
, icmp_get_buf_prop
,
270 {4096, 65536, 8192}, {8192} },
272 { "max_buf", MOD_PROTO_RAWIP
,
273 mod_set_uint32
, mod_get_uint32
,
274 {65536, ULP_MAX_BUF
, 256*1024}, {256*1024} },
276 { "_pmtu_discovery", MOD_PROTO_RAWIP
,
277 mod_set_boolean
, mod_get_boolean
,
278 {B_FALSE
}, {B_FALSE
} },
280 { "_sendto_ignerr", MOD_PROTO_RAWIP
,
281 mod_set_boolean
, mod_get_boolean
,
282 {B_FALSE
}, {B_FALSE
} },
284 { "?", MOD_PROTO_RAWIP
, NULL
, mod_get_allprop
, {0}, {0} },
286 { NULL
, 0, NULL
, NULL
, {0}, {0} }
289 #define is_wroff_extra is_propinfo_tbl[0].prop_cur_uval
290 #define is_ipv4_ttl is_propinfo_tbl[1].prop_cur_uval
291 #define is_ipv6_hoplimit is_propinfo_tbl[2].prop_cur_uval
292 #define is_bsd_compat is_propinfo_tbl[3].prop_cur_bval
293 #define is_xmit_hiwat is_propinfo_tbl[4].prop_cur_uval
294 #define is_xmit_lowat is_propinfo_tbl[5].prop_cur_uval
295 #define is_recv_hiwat is_propinfo_tbl[6].prop_cur_uval
296 #define is_max_buf is_propinfo_tbl[7].prop_cur_uval
297 #define is_pmtu_discovery is_propinfo_tbl[8].prop_cur_bval
298 #define is_sendto_ignerr is_propinfo_tbl[9].prop_cur_bval
300 typedef union T_primitives
*t_primp_t
;
303 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
304 * passed to icmp_wput.
305 * It calls IP to verify the local IP address, and calls IP to insert
306 * the conn_t in the fanout table.
307 * If everything is ok it then sends the T_BIND_ACK back up.
310 icmp_tpi_bind(queue_t
*q
, mblk_t
*mp
)
314 struct T_bind_req
*tbr
;
319 conn_t
*connp
= Q_TO_CONN(q
);
324 * All Solaris components should pass a db_credp
325 * for this TPI message, hence we ASSERT.
326 * But in case there is some other M_PROTO that looks
327 * like a TPI message sent by some other kernel
328 * component, we check and return an error.
330 cr
= msg_getcred(mp
, NULL
);
333 icmp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
337 icmp
= connp
->conn_icmp
;
338 if ((mp
->b_wptr
- mp
->b_rptr
) < sizeof (*tbr
)) {
339 (void) mi_strlog(q
, 1, SL_ERROR
|SL_TRACE
,
340 "icmp_bind: bad req, len %u",
341 (uint_t
)(mp
->b_wptr
- mp
->b_rptr
));
342 icmp_err_ack(q
, mp
, TPROTO
, 0);
346 if (icmp
->icmp_state
!= TS_UNBND
) {
347 (void) mi_strlog(q
, 1, SL_ERROR
|SL_TRACE
,
348 "icmp_bind: bad state, %u", icmp
->icmp_state
);
349 icmp_err_ack(q
, mp
, TOUTSTATE
, 0);
354 * Reallocate the message to make sure we have enough room for an
357 mp1
= reallocb(mp
, sizeof (struct T_bind_ack
) + sizeof (sin6_t
), 1);
359 icmp_err_ack(q
, mp
, TSYSERR
, ENOMEM
);
364 /* Reset the message type in preparation for shipping it back. */
365 DB_TYPE(mp
) = M_PCPROTO
;
366 tbr
= (struct T_bind_req
*)mp
->b_rptr
;
367 len
= tbr
->ADDR_length
;
369 case 0: /* request for a generic port */
370 tbr
->ADDR_offset
= sizeof (struct T_bind_req
);
371 if (connp
->conn_family
== AF_INET
) {
372 tbr
->ADDR_length
= sizeof (sin_t
);
373 sin
= (sin_t
*)&tbr
[1];
375 sin
->sin_family
= AF_INET
;
376 mp
->b_wptr
= (uchar_t
*)&sin
[1];
377 sa
= (struct sockaddr
*)sin
;
378 len
= sizeof (sin_t
);
380 ASSERT(connp
->conn_family
== AF_INET6
);
381 tbr
->ADDR_length
= sizeof (sin6_t
);
382 sin6
= (sin6_t
*)&tbr
[1];
384 sin6
->sin6_family
= AF_INET6
;
385 mp
->b_wptr
= (uchar_t
*)&sin6
[1];
386 sa
= (struct sockaddr
*)sin6
;
387 len
= sizeof (sin6_t
);
391 case sizeof (sin_t
): /* Complete IPv4 address */
392 sa
= (struct sockaddr
*)mi_offset_param(mp
, tbr
->ADDR_offset
,
396 case sizeof (sin6_t
): /* Complete IPv6 address */
397 sa
= (struct sockaddr
*)mi_offset_param(mp
,
398 tbr
->ADDR_offset
, sizeof (sin6_t
));
402 (void) mi_strlog(q
, 1, SL_ERROR
|SL_TRACE
,
403 "icmp_bind: bad ADDR_length %u", tbr
->ADDR_length
);
404 icmp_err_ack(q
, mp
, TBADADDR
, 0);
408 error
= rawip_do_bind(connp
, sa
, len
);
411 icmp_err_ack(q
, mp
, TSYSERR
, error
);
413 icmp_err_ack(q
, mp
, -error
, 0);
416 tbr
->PRIM_type
= T_BIND_ACK
;
422 rawip_do_bind(conn_t
*connp
, struct sockaddr
*sa
, socklen_t len
)
426 icmp_t
*icmp
= connp
->conn_icmp
;
428 ip_laddr_t laddr_type
= IPVL_UNICAST_UP
; /* INADDR_ANY */
429 in_port_t lport
; /* Network byte order */
430 ipaddr_t v4src
; /* Set if AF_INET */
433 zoneid_t zoneid
= IPCL_ZONEID(connp
);
434 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
436 if (sa
== NULL
|| !OK_32PTR((char *)sa
)) {
441 case sizeof (sin_t
): /* Complete IPv4 address */
443 if (sin
->sin_family
!= AF_INET
||
444 connp
->conn_family
!= AF_INET
) {
445 /* TSYSERR, EAFNOSUPPORT */
446 return (EAFNOSUPPORT
);
448 v4src
= sin
->sin_addr
.s_addr
;
449 IN6_IPADDR_TO_V4MAPPED(v4src
, &v6src
);
450 if (v4src
!= INADDR_ANY
) {
451 laddr_type
= ip_laddr_verify_v4(v4src
, zoneid
, ipst
,
454 lport
= sin
->sin_port
;
456 case sizeof (sin6_t
): /* Complete IPv6 address */
458 if (sin6
->sin6_family
!= AF_INET6
||
459 connp
->conn_family
!= AF_INET6
) {
460 /* TSYSERR, EAFNOSUPPORT */
461 return (EAFNOSUPPORT
);
463 /* No support for mapped addresses on raw sockets */
464 if (IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
465 /* TSYSERR, EADDRNOTAVAIL */
466 return (EADDRNOTAVAIL
);
468 v6src
= sin6
->sin6_addr
;
469 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
470 if (IN6_IS_ADDR_LINKSCOPE(&v6src
))
471 scopeid
= sin6
->sin6_scope_id
;
472 laddr_type
= ip_laddr_verify_v6(&v6src
, zoneid
, ipst
,
475 lport
= sin6
->sin6_port
;
480 return (EADDRNOTAVAIL
);
483 /* Is the local address a valid unicast, multicast, or broadcast? */
484 if (laddr_type
== IPVL_BAD
)
485 return (EADDRNOTAVAIL
);
488 * The state must be TS_UNBND.
490 mutex_enter(&connp
->conn_lock
);
491 if (icmp
->icmp_state
!= TS_UNBND
) {
492 mutex_exit(&connp
->conn_lock
);
497 * Copy the source address into our icmp structure. This address
498 * may still be zero; if so, ip will fill in the correct address
499 * each time an outbound packet is passed to it.
500 * If we are binding to a broadcast or multicast address then
501 * we just set the conn_bound_addr since we don't want to use
502 * that as the source address when sending.
504 connp
->conn_bound_addr_v6
= v6src
;
505 connp
->conn_laddr_v6
= v6src
;
507 connp
->conn_ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
508 connp
->conn_ixa
->ixa_scopeid
= scopeid
;
509 connp
->conn_incoming_ifindex
= scopeid
;
511 connp
->conn_ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
512 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
515 switch (laddr_type
) {
516 case IPVL_UNICAST_UP
:
517 case IPVL_UNICAST_DOWN
:
518 connp
->conn_saddr_v6
= v6src
;
519 connp
->conn_mcbc_bind
= B_FALSE
;
523 /* ip_set_destination will pick a source address later */
524 connp
->conn_saddr_v6
= ipv6_all_zeros
;
525 connp
->conn_mcbc_bind
= B_TRUE
;
529 /* Any errors after this point should use late_error */
532 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
535 connp
->conn_lport
= lport
;
536 connp
->conn_fport
= 0;
538 if (connp
->conn_family
== AF_INET
) {
539 ASSERT(connp
->conn_ipversion
== IPV4_VERSION
);
541 ASSERT(connp
->conn_ipversion
== IPV6_VERSION
);
544 icmp
->icmp_state
= TS_IDLE
;
547 * We create an initial header template here to make a subsequent
548 * sendto have a starting point. Since conn_last_dst is zero the
549 * first sendto will always follow the 'dst changed' code path.
550 * Note that we defer massaging options and the related checksum
551 * adjustment until we have a destination address.
553 error
= icmp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
554 &connp
->conn_faddr_v6
, connp
->conn_flowinfo
);
556 mutex_exit(&connp
->conn_lock
);
560 connp
->conn_faddr_v6
= ipv6_all_zeros
;
561 connp
->conn_v6lastdst
= ipv6_all_zeros
;
562 mutex_exit(&connp
->conn_lock
);
564 error
= ip_laddr_fanout_insert(connp
);
572 mutex_enter(&connp
->conn_lock
);
573 connp
->conn_saddr_v6
= ipv6_all_zeros
;
574 connp
->conn_bound_addr_v6
= ipv6_all_zeros
;
575 connp
->conn_laddr_v6
= ipv6_all_zeros
;
577 connp
->conn_ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
578 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
580 icmp
->icmp_state
= TS_UNBND
;
581 connp
->conn_v6lastdst
= ipv6_all_zeros
;
582 connp
->conn_lport
= 0;
584 /* Restore the header that was built above - different source address */
585 (void) icmp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
586 &connp
->conn_faddr_v6
, connp
->conn_flowinfo
);
587 mutex_exit(&connp
->conn_lock
);
592 * Tell IP to just bind to the protocol.
595 icmp_bind_proto(icmp_t
*icmp
)
597 conn_t
*connp
= icmp
->icmp_connp
;
599 mutex_enter(&connp
->conn_lock
);
600 connp
->conn_saddr_v6
= ipv6_all_zeros
;
601 connp
->conn_laddr_v6
= ipv6_all_zeros
;
602 connp
->conn_faddr_v6
= ipv6_all_zeros
;
603 connp
->conn_v6lastdst
= ipv6_all_zeros
;
604 mutex_exit(&connp
->conn_lock
);
606 (void) ip_laddr_fanout_insert(connp
);
610 * This routine handles each T_CONN_REQ message passed to icmp. It
611 * associates a default destination address with the stream.
613 * After various error checks are completed, icmp_connect() lays
614 * the target address and port into the composite header template.
615 * Then we ask IP for information, including a source address if we didn't
616 * already have one. Finally we send up the T_OK_ACK reply message.
619 icmp_tpi_connect(queue_t
*q
, mblk_t
*mp
)
621 conn_t
*connp
= Q_TO_CONN(q
);
622 struct T_conn_req
*tcr
;
629 * All Solaris components should pass a db_credp
630 * for this TPI message, hence we ASSERT.
631 * But in case there is some other M_PROTO that looks
632 * like a TPI message sent by some other kernel
633 * component, we check and return an error.
635 cr
= msg_getcred(mp
, &pid
);
638 icmp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
642 tcr
= (struct T_conn_req
*)mp
->b_rptr
;
644 if ((mp
->b_wptr
- mp
->b_rptr
) < sizeof (struct T_conn_req
)) {
645 icmp_err_ack(q
, mp
, TPROTO
, 0);
649 if (tcr
->OPT_length
!= 0) {
650 icmp_err_ack(q
, mp
, TBADOPT
, 0);
654 len
= tcr
->DEST_length
;
658 icmp_err_ack(q
, mp
, TBADADDR
, 0);
661 sa
= (struct sockaddr
*)mi_offset_param(mp
, tcr
->DEST_offset
,
664 case sizeof (sin6_t
):
665 sa
= (struct sockaddr
*)mi_offset_param(mp
,
666 tcr
->DEST_offset
, sizeof (sin6_t
));
670 error
= proto_verify_ip_addr(connp
->conn_family
, sa
, len
);
672 icmp_err_ack(q
, mp
, TSYSERR
, error
);
676 error
= rawip_do_connect(connp
, sa
, len
, cr
, pid
);
679 icmp_err_ack(q
, mp
, -error
, 0);
681 icmp_err_ack(q
, mp
, 0, error
);
687 * We have to send a connection confirmation to
690 if (connp
->conn_family
== AF_INET
) {
691 mp1
= mi_tpi_conn_con(NULL
, (char *)sa
,
692 sizeof (sin_t
), NULL
, 0);
694 ASSERT(connp
->conn_family
== AF_INET6
);
695 mp1
= mi_tpi_conn_con(NULL
, (char *)sa
,
696 sizeof (sin6_t
), NULL
, 0);
699 icmp_err_ack(q
, mp
, TSYSERR
, ENOMEM
);
704 * Send ok_ack for T_CONN_REQ
706 mp
= mi_tpi_ok_ack_alloc(mp
);
708 /* Unable to reuse the T_CONN_REQ for the ack. */
709 icmp_err_ack_prim(q
, mp1
, T_CONN_REQ
, TSYSERR
, ENOMEM
);
712 putnext(connp
->conn_rq
, mp
);
713 putnext(connp
->conn_rq
, mp1
);
718 rawip_do_connect(conn_t
*connp
, const struct sockaddr
*sa
, socklen_t len
,
719 cred_t
*cr
, pid_t pid
)
730 ip_xmit_attr_t
*oldixa
;
733 in6_addr_t v6src
= connp
->conn_saddr_v6
;
735 icmp
= connp
->conn_icmp
;
737 if (sa
== NULL
|| !OK_32PTR((char *)sa
)) {
741 ASSERT(sa
!= NULL
&& len
!= 0);
749 * Determine packet type based on type of address passed in
750 * the request should contain an IPv4 or IPv6 address.
751 * Make sure that address family matches the type of
752 * family of the address passed down.
758 v4dst
= sin
->sin_addr
.s_addr
;
759 dstport
= sin
->sin_port
;
760 IN6_IPADDR_TO_V4MAPPED(v4dst
, &v6dst
);
761 ASSERT(connp
->conn_ipversion
== IPV4_VERSION
);
764 case sizeof (sin6_t
):
767 /* No support for mapped addresses on raw sockets */
768 if (IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
769 return (EADDRNOTAVAIL
);
771 v6dst
= sin6
->sin6_addr
;
772 dstport
= sin6
->sin6_port
;
773 ASSERT(connp
->conn_ipversion
== IPV6_VERSION
);
774 flowinfo
= sin6
->sin6_flowinfo
;
775 if (IN6_IS_ADDR_LINKLOCAL(&sin6
->sin6_addr
))
776 scopeid
= sin6
->sin6_scope_id
;
777 srcid
= sin6
->__sin6_src_id
;
778 if (srcid
!= 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
779 /* Due to check above, we know sin6_addr is v6-only. */
780 if (!ip_srcid_find_id(srcid
, &v6src
, IPCL_ZONEID(connp
),
781 B_FALSE
, connp
->conn_netstack
)) {
782 /* Mismatch - v6src would be v4mapped. */
783 return (EADDRNOTAVAIL
);
790 * If there is a different thread using conn_ixa then we get a new
791 * copy and cut the old one loose from conn_ixa. Otherwise we use
792 * conn_ixa and prevent any other thread from using/changing it.
793 * Once connect() is done other threads can use conn_ixa since the
794 * refcnt will be back at one.
795 * We defer updating conn_ixa until later to handle any concurrent
796 * conn_ixa_cleanup thread.
798 ixa
= conn_get_ixa(connp
, B_FALSE
);
802 mutex_enter(&connp
->conn_lock
);
804 * This icmp_t must have bound already before doing a connect.
805 * Reject if a connect is in progress (we drop conn_lock during
808 if (icmp
->icmp_state
== TS_UNBND
|| icmp
->icmp_state
== TS_WCON_CREQ
) {
809 mutex_exit(&connp
->conn_lock
);
814 if (icmp
->icmp_state
== TS_DATA_XFER
) {
815 /* Already connected - clear out state */
816 if (connp
->conn_mcbc_bind
)
817 connp
->conn_saddr_v6
= ipv6_all_zeros
;
819 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
820 connp
->conn_laddr_v6
= connp
->conn_bound_addr_v6
;
821 connp
->conn_faddr_v6
= ipv6_all_zeros
;
822 icmp
->icmp_state
= TS_IDLE
;
826 * Use sin_port/sin6_port since applications like psh use SOCK_RAW
829 connp
->conn_fport
= dstport
;
830 if (connp
->conn_ipversion
== IPV4_VERSION
) {
832 * Interpret a zero destination to mean loopback.
833 * Update the T_CONN_REQ (sin/sin6) since it is used to
834 * generate the T_CONN_CON.
836 if (v4dst
== INADDR_ANY
) {
837 v4dst
= htonl(INADDR_LOOPBACK
);
838 IN6_IPADDR_TO_V4MAPPED(v4dst
, &v6dst
);
839 ASSERT(connp
->conn_family
== AF_INET
);
840 sin
->sin_addr
.s_addr
= v4dst
;
842 connp
->conn_faddr_v6
= v6dst
;
843 connp
->conn_flowinfo
= 0;
845 ASSERT(connp
->conn_ipversion
== IPV6_VERSION
);
847 * Interpret a zero destination to mean loopback.
848 * Update the T_CONN_REQ (sin/sin6) since it is used to
849 * generate the T_CONN_CON.
851 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst
)) {
852 v6dst
= ipv6_loopback
;
853 sin6
->sin6_addr
= v6dst
;
855 connp
->conn_faddr_v6
= v6dst
;
856 connp
->conn_flowinfo
= flowinfo
;
860 * We update our cred/cpid based on the caller of connect
862 if (connp
->conn_cred
!= cr
) {
864 crfree(connp
->conn_cred
);
865 connp
->conn_cred
= cr
;
867 connp
->conn_cpid
= pid
;
868 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
871 if (is_system_labeled()) {
872 /* We need to restart with a label based on the cred */
873 ip_xmit_attr_restore_tsl(ixa
, ixa
->ixa_cred
);
877 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
878 ixa
->ixa_scopeid
= scopeid
;
879 connp
->conn_incoming_ifindex
= scopeid
;
881 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
882 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
886 * conn_connect will drop conn_lock and reacquire it.
887 * To prevent a send* from messing with this icmp_t while the lock
888 * is dropped we set icmp_state and clear conn_v6lastdst.
889 * That will make all send* fail with EISCONN.
891 connp
->conn_v6lastdst
= ipv6_all_zeros
;
892 icmp
->icmp_state
= TS_WCON_CREQ
;
894 error
= conn_connect(connp
, NULL
, IPDF_ALLOW_MCBC
);
895 mutex_exit(&connp
->conn_lock
);
900 * The addresses have been verified. Time to insert in
901 * the correct fanout list.
903 error
= ipcl_conn_insert(connp
);
907 mutex_enter(&connp
->conn_lock
);
908 error
= icmp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
909 &connp
->conn_faddr_v6
, connp
->conn_flowinfo
);
911 mutex_exit(&connp
->conn_lock
);
915 icmp
->icmp_state
= TS_DATA_XFER
;
916 /* Record this as the "last" send even though we haven't sent any */
917 connp
->conn_v6lastdst
= connp
->conn_faddr_v6
;
918 connp
->conn_lastipversion
= connp
->conn_ipversion
;
919 connp
->conn_lastdstport
= connp
->conn_fport
;
920 connp
->conn_lastflowinfo
= connp
->conn_flowinfo
;
921 connp
->conn_lastscopeid
= scopeid
;
922 connp
->conn_lastsrcid
= srcid
;
923 /* Also remember a source to use together with lastdst */
924 connp
->conn_v6lastsrc
= v6src
;
926 oldixa
= conn_replace_ixa(connp
, ixa
);
927 mutex_exit(&connp
->conn_lock
);
936 mutex_enter(&connp
->conn_lock
);
937 icmp
->icmp_state
= TS_IDLE
;
938 /* In case the source address was set above */
939 if (connp
->conn_mcbc_bind
)
940 connp
->conn_saddr_v6
= ipv6_all_zeros
;
942 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
943 connp
->conn_laddr_v6
= connp
->conn_bound_addr_v6
;
944 connp
->conn_faddr_v6
= ipv6_all_zeros
;
945 connp
->conn_v6lastdst
= ipv6_all_zeros
;
946 connp
->conn_flowinfo
= 0;
948 (void) icmp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
949 &connp
->conn_faddr_v6
, connp
->conn_flowinfo
);
950 mutex_exit(&connp
->conn_lock
);
955 rawip_do_close(conn_t
*connp
)
957 ASSERT(connp
!= NULL
&& IPCL_IS_RAWIP(connp
));
959 ip_quiesce_conn(connp
);
961 if (!IPCL_IS_NONSTR(connp
)) {
962 qprocsoff(connp
->conn_rq
);
965 icmp_close_free(connp
);
968 * Now we are truly single threaded on this stream, and can
969 * delete the things hanging off the connp, and finally the connp.
970 * We removed this connp from the fanout list, it cannot be
971 * accessed thru the fanouts, and we already waited for the
972 * conn_ref to drop to 0. We are already in close, so
973 * there cannot be any other thread from the top. qprocsoff
974 * has completed, and service has completed or won't run in
977 ASSERT(connp
->conn_ref
== 1);
979 if (!IPCL_IS_NONSTR(connp
)) {
980 inet_minor_free(connp
->conn_minor_arena
, connp
->conn_dev
);
982 ip_free_helper_stream(connp
);
986 ipcl_conn_destroy(connp
);
991 icmp_close(queue_t
*q
, int flags
, cred_t
*credp __unused
)
995 if (flags
& SO_FALLBACK
) {
997 * stream is being closed while in fallback
998 * simply free the resources that were allocated
1000 inet_minor_free(WR(q
)->q_ptr
, (dev_t
)(RD(q
)->q_ptr
));
1005 connp
= Q_TO_CONN(q
);
1006 (void) rawip_do_close(connp
);
1008 q
->q_ptr
= WR(q
)->q_ptr
= NULL
;
1013 icmp_close_free(conn_t
*connp
)
1015 icmp_t
*icmp
= connp
->conn_icmp
;
1017 if (icmp
->icmp_filter
!= NULL
) {
1018 kmem_free(icmp
->icmp_filter
, sizeof (icmp6_filter_t
));
1019 icmp
->icmp_filter
= NULL
;
1023 * Clear any fields which the kmem_cache constructor clears.
1024 * Only icmp_connp needs to be preserved.
1025 * TBD: We should make this more efficient to avoid clearing
1028 ASSERT(icmp
->icmp_connp
== connp
);
1029 bzero(icmp
, sizeof (icmp_t
));
1030 icmp
->icmp_connp
= connp
;
1034 * This routine handles each T_DISCON_REQ message passed to icmp
1035 * as an indicating that ICMP is no longer connected. This results
1036 * in telling IP to restore the binding to just the local address.
1039 icmp_do_disconnect(conn_t
*connp
)
1041 icmp_t
*icmp
= connp
->conn_icmp
;
1044 mutex_enter(&connp
->conn_lock
);
1045 if (icmp
->icmp_state
!= TS_DATA_XFER
) {
1046 mutex_exit(&connp
->conn_lock
);
1047 return (-TOUTSTATE
);
1049 if (connp
->conn_mcbc_bind
)
1050 connp
->conn_saddr_v6
= ipv6_all_zeros
;
1052 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
1053 connp
->conn_laddr_v6
= connp
->conn_bound_addr_v6
;
1054 connp
->conn_faddr_v6
= ipv6_all_zeros
;
1055 icmp
->icmp_state
= TS_IDLE
;
1057 connp
->conn_v6lastdst
= ipv6_all_zeros
;
1058 error
= icmp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
1059 &connp
->conn_faddr_v6
, connp
->conn_flowinfo
);
1060 mutex_exit(&connp
->conn_lock
);
1065 * Tell IP to remove the full binding and revert
1066 * to the local address binding.
1068 return (ip_laddr_fanout_insert(connp
));
1072 icmp_tpi_disconnect(queue_t
*q
, mblk_t
*mp
)
1074 conn_t
*connp
= Q_TO_CONN(q
);
1078 * Allocate the largest primitive we need to send back
1079 * T_error_ack is > than T_ok_ack
1081 mp
= reallocb(mp
, sizeof (struct T_error_ack
), 1);
1083 /* Unable to reuse the T_DISCON_REQ for the ack. */
1084 icmp_err_ack_prim(q
, mp
, T_DISCON_REQ
, TSYSERR
, ENOMEM
);
1088 error
= icmp_do_disconnect(connp
);
1092 icmp_err_ack(q
, mp
, 0, error
);
1094 icmp_err_ack(q
, mp
, -error
, 0);
1097 mp
= mi_tpi_ok_ack_alloc(mp
);
1104 icmp_disconnect(conn_t
*connp
)
1108 connp
->conn_dgram_errind
= B_FALSE
;
1110 error
= icmp_do_disconnect(connp
);
1113 error
= proto_tlitosyserr(-error
);
1117 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
1119 icmp_err_ack(queue_t
*q
, mblk_t
*mp
, t_scalar_t t_error
, int sys_error
)
1121 if ((mp
= mi_tpi_err_ack_alloc(mp
, t_error
, sys_error
)) != NULL
)
1125 /* Shorthand to generate and send TPI error acks to our client */
1127 icmp_err_ack_prim(queue_t
*q
, mblk_t
*mp
, t_scalar_t primitive
,
1128 t_scalar_t t_error
, int sys_error
)
1130 struct T_error_ack
*teackp
;
1132 if ((mp
= tpi_ack_alloc(mp
, sizeof (struct T_error_ack
),
1133 M_PCPROTO
, T_ERROR_ACK
)) != NULL
) {
1134 teackp
= (struct T_error_ack
*)mp
->b_rptr
;
1135 teackp
->ERROR_prim
= primitive
;
1136 teackp
->TLI_error
= t_error
;
1137 teackp
->UNIX_error
= sys_error
;
1143 * icmp_icmp_input is called as conn_recvicmp to process ICMP messages.
1144 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1145 * Assumes that IP has pulled up everything up to and including the ICMP header.
1149 icmp_icmp_input(void *arg1
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*ira
)
1151 conn_t
*connp
= (conn_t
*)arg1
;
1152 icmp_t
*icmp
= connp
->conn_icmp
;
1160 ipha
= (ipha_t
*)mp
->b_rptr
;
1162 ASSERT(OK_32PTR(mp
->b_rptr
));
1164 if (IPH_HDR_VERSION(ipha
) != IPV4_VERSION
) {
1165 ASSERT(IPH_HDR_VERSION(ipha
) == IPV6_VERSION
);
1166 icmp_icmp_error_ipv6(connp
, mp
, ira
);
1169 ASSERT(IPH_HDR_VERSION(ipha
) == IPV4_VERSION
);
1171 /* Skip past the outer IP and ICMP headers */
1172 ASSERT(IPH_HDR_LENGTH(ipha
) == ira
->ira_ip_hdr_length
);
1173 iph_hdr_length
= ira
->ira_ip_hdr_length
;
1174 icmph
= (icmph_t
*)&mp
->b_rptr
[iph_hdr_length
];
1175 ipha
= (ipha_t
*)&icmph
[1]; /* Inner IP header */
1177 iph_hdr_length
= IPH_HDR_LENGTH(ipha
);
1179 switch (icmph
->icmph_type
) {
1180 case ICMP_DEST_UNREACHABLE
:
1181 switch (icmph
->icmph_code
) {
1182 case ICMP_FRAGMENTATION_NEEDED
: {
1184 ip_xmit_attr_t
*ixa
;
1186 * IP has already adjusted the path MTU.
1187 * But we need to adjust DF for IPv4.
1189 if (connp
->conn_ipversion
!= IPV4_VERSION
)
1192 ixa
= conn_get_ixa(connp
, B_FALSE
);
1193 if (ixa
== NULL
|| ixa
->ixa_ire
== NULL
) {
1195 * Some other thread holds conn_ixa. We will
1196 * redo this on the next ICMP too big.
1202 (void) ip_get_pmtu(ixa
);
1204 mutex_enter(&connp
->conn_lock
);
1205 ipha
= (ipha_t
*)connp
->conn_ht_iphc
;
1206 if (ixa
->ixa_flags
& IXAF_PMTU_IPV4_DF
) {
1207 ipha
->ipha_fragment_offset_and_flags
|=
1210 ipha
->ipha_fragment_offset_and_flags
&=
1213 mutex_exit(&connp
->conn_lock
);
1217 case ICMP_PORT_UNREACHABLE
:
1218 case ICMP_PROTOCOL_UNREACHABLE
:
1219 error
= ECONNREFUSED
;
1222 /* Transient errors */
1227 /* Transient errors */
1236 * Deliver T_UDERROR_IND when the application has asked for it.
1237 * The socket layer enables this automatically when connected.
1239 if (!connp
->conn_dgram_errind
) {
1245 sin
.sin_family
= AF_INET
;
1246 sin
.sin_addr
.s_addr
= ipha
->ipha_dst
;
1248 if (IPCL_IS_NONSTR(connp
)) {
1249 mutex_enter(&connp
->conn_lock
);
1250 if (icmp
->icmp_state
== TS_DATA_XFER
) {
1251 if (sin
.sin_addr
.s_addr
== connp
->conn_faddr_v4
) {
1252 mutex_exit(&connp
->conn_lock
);
1253 (*connp
->conn_upcalls
->su_set_error
)
1254 (connp
->conn_upper_handle
, error
);
1258 icmp
->icmp_delayed_error
= error
;
1259 *((sin_t
*)&icmp
->icmp_delayed_addr
) = sin
;
1261 mutex_exit(&connp
->conn_lock
);
1263 mp1
= mi_tpi_uderror_ind((char *)&sin
, sizeof (sin_t
), NULL
, 0,
1266 putnext(connp
->conn_rq
, mp1
);
1273 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMP for IPv6.
1274 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1275 * Assumes that IP has pulled up all the extension headers as well as the
1279 icmp_icmp_error_ipv6(conn_t
*connp
, mblk_t
*mp
, ip_recv_attr_t
*ira
)
1282 ip6_t
*ip6h
, *outer_ip6h
;
1283 uint16_t iph_hdr_length
;
1288 icmp_t
*icmp
= connp
->conn_icmp
;
1290 outer_ip6h
= (ip6_t
*)mp
->b_rptr
;
1292 if (outer_ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
)
1293 iph_hdr_length
= ip_hdr_length_v6(mp
, outer_ip6h
);
1295 iph_hdr_length
= IPV6_HDR_LEN
;
1296 ASSERT(iph_hdr_length
== ira
->ira_ip_hdr_length
);
1298 /* Skip past the outer IP and ICMP headers */
1299 iph_hdr_length
= ira
->ira_ip_hdr_length
;
1300 icmp6
= (icmp6_t
*)&mp
->b_rptr
[iph_hdr_length
];
1302 ip6h
= (ip6_t
*)&icmp6
[1]; /* Inner IP header */
1303 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &iph_hdr_length
, &nexthdrp
)) {
1308 switch (icmp6
->icmp6_type
) {
1309 case ICMP6_DST_UNREACH
:
1310 switch (icmp6
->icmp6_code
) {
1311 case ICMP6_DST_UNREACH_NOPORT
:
1312 error
= ECONNREFUSED
;
1314 case ICMP6_DST_UNREACH_ADMIN
:
1315 case ICMP6_DST_UNREACH_NOROUTE
:
1316 case ICMP6_DST_UNREACH_BEYONDSCOPE
:
1317 case ICMP6_DST_UNREACH_ADDR
:
1318 /* Transient errors */
1324 case ICMP6_PACKET_TOO_BIG
: {
1325 struct T_unitdata_ind
*tudi
;
1326 struct T_opthdr
*toh
;
1329 t_scalar_t opt_length
= sizeof (struct T_opthdr
) +
1330 sizeof (struct ip6_mtuinfo
);
1332 struct ip6_mtuinfo
*mtuinfo
;
1335 * If the application has requested to receive path mtu
1336 * information, send up an empty message containing an
1337 * IPV6_PATHMTU ancillary data item.
1339 if (!connp
->conn_ipv6_recvpathmtu
)
1342 udi_size
= sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
) +
1344 if ((newmp
= allocb(udi_size
, BPRI_MED
)) == NULL
) {
1345 BUMP_MIB(&icmp
->icmp_is
->is_rawip_mib
, rawipInErrors
);
1350 * newmp->b_cont is left to NULL on purpose. This is an
1351 * empty message containing only ancillary data.
1353 newmp
->b_datap
->db_type
= M_PROTO
;
1354 tudi
= (struct T_unitdata_ind
*)newmp
->b_rptr
;
1355 newmp
->b_wptr
= (uchar_t
*)tudi
+ udi_size
;
1356 tudi
->PRIM_type
= T_UNITDATA_IND
;
1357 tudi
->SRC_length
= sizeof (sin6_t
);
1358 tudi
->SRC_offset
= sizeof (struct T_unitdata_ind
);
1359 tudi
->OPT_offset
= tudi
->SRC_offset
+ sizeof (sin6_t
);
1360 tudi
->OPT_length
= opt_length
;
1362 sin6
= (sin6_t
*)&tudi
[1];
1363 bzero(sin6
, sizeof (sin6_t
));
1364 sin6
->sin6_family
= AF_INET6
;
1365 sin6
->sin6_addr
= connp
->conn_faddr_v6
;
1367 toh
= (struct T_opthdr
*)&sin6
[1];
1368 toh
->level
= IPPROTO_IPV6
;
1369 toh
->name
= IPV6_PATHMTU
;
1370 toh
->len
= opt_length
;
1373 mtuinfo
= (struct ip6_mtuinfo
*)&toh
[1];
1374 bzero(mtuinfo
, sizeof (struct ip6_mtuinfo
));
1375 mtuinfo
->ip6m_addr
.sin6_family
= AF_INET6
;
1376 mtuinfo
->ip6m_addr
.sin6_addr
= ip6h
->ip6_dst
;
1377 mtuinfo
->ip6m_mtu
= icmp6
->icmp6_mtu
;
1379 * We've consumed everything we need from the original
1380 * message. Free it, then send our empty message.
1383 icmp_ulp_recv(connp
, newmp
, msgdsize(newmp
));
1386 case ICMP6_TIME_EXCEEDED
:
1387 /* Transient errors */
1389 case ICMP6_PARAM_PROB
:
1390 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1391 if (icmp6
->icmp6_code
== ICMP6_PARAMPROB_NEXTHEADER
&&
1392 (uchar_t
*)ip6h
+ icmp6
->icmp6_pptr
==
1393 (uchar_t
*)nexthdrp
) {
1394 error
= ECONNREFUSED
;
1405 * Deliver T_UDERROR_IND when the application has asked for it.
1406 * The socket layer enables this automatically when connected.
1408 if (!connp
->conn_dgram_errind
) {
1414 sin6
.sin6_family
= AF_INET6
;
1415 sin6
.sin6_addr
= ip6h
->ip6_dst
;
1416 sin6
.sin6_flowinfo
= ip6h
->ip6_vcf
& ~IPV6_VERS_AND_FLOW_MASK
;
1417 if (IPCL_IS_NONSTR(connp
)) {
1418 mutex_enter(&connp
->conn_lock
);
1419 if (icmp
->icmp_state
== TS_DATA_XFER
) {
1420 if (IN6_ARE_ADDR_EQUAL(&sin6
.sin6_addr
,
1421 &connp
->conn_faddr_v6
)) {
1422 mutex_exit(&connp
->conn_lock
);
1423 (*connp
->conn_upcalls
->su_set_error
)
1424 (connp
->conn_upper_handle
, error
);
1428 icmp
->icmp_delayed_error
= error
;
1429 *((sin6_t
*)&icmp
->icmp_delayed_addr
) = sin6
;
1431 mutex_exit(&connp
->conn_lock
);
1433 mp1
= mi_tpi_uderror_ind((char *)&sin6
, sizeof (sin6_t
),
1436 putnext(connp
->conn_rq
, mp1
);
1443 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput.
1444 * The local address is filled in if endpoint is bound. The remote address
1445 * is filled in if remote address has been precified ("connected endpoint")
1446 * (The concept of connected CLTS sockets is alien to published TPI
1447 * but we support it anyway).
1450 icmp_addr_req(queue_t
*q
, mblk_t
*mp
)
1452 struct sockaddr
*sa
;
1454 struct T_addr_ack
*taa
;
1455 icmp_t
*icmp
= Q_TO_ICMP(q
);
1456 conn_t
*connp
= icmp
->icmp_connp
;
1459 /* Make it large enough for worst case */
1460 ackmp
= reallocb(mp
, sizeof (struct T_addr_ack
) +
1461 2 * sizeof (sin6_t
), 1);
1462 if (ackmp
== NULL
) {
1463 icmp_err_ack(q
, mp
, TSYSERR
, ENOMEM
);
1466 taa
= (struct T_addr_ack
*)ackmp
->b_rptr
;
1468 bzero(taa
, sizeof (struct T_addr_ack
));
1469 ackmp
->b_wptr
= (uchar_t
*)&taa
[1];
1471 taa
->PRIM_type
= T_ADDR_ACK
;
1472 ackmp
->b_datap
->db_type
= M_PCPROTO
;
1474 if (connp
->conn_family
== AF_INET
)
1475 addrlen
= sizeof (sin_t
);
1477 addrlen
= sizeof (sin6_t
);
1479 mutex_enter(&connp
->conn_lock
);
1481 * Note: Following code assumes 32 bit alignment of basic
1482 * data structures like sin_t and struct T_addr_ack.
1484 if (icmp
->icmp_state
!= TS_UNBND
) {
1486 * Fill in local address first
1488 taa
->LOCADDR_offset
= sizeof (*taa
);
1489 taa
->LOCADDR_length
= addrlen
;
1490 sa
= (struct sockaddr
*)&taa
[1];
1491 (void) conn_getsockname(connp
, sa
, &addrlen
);
1492 ackmp
->b_wptr
+= addrlen
;
1494 if (icmp
->icmp_state
== TS_DATA_XFER
) {
1496 * connected, fill remote address too
1498 taa
->REMADDR_length
= addrlen
;
1499 /* assumed 32-bit alignment */
1500 taa
->REMADDR_offset
= taa
->LOCADDR_offset
+ taa
->LOCADDR_length
;
1501 sa
= (struct sockaddr
*)(ackmp
->b_rptr
+ taa
->REMADDR_offset
);
1502 (void) conn_getpeername(connp
, sa
, &addrlen
);
1503 ackmp
->b_wptr
+= addrlen
;
1505 mutex_exit(&connp
->conn_lock
);
1506 ASSERT(ackmp
->b_wptr
<= ackmp
->b_datap
->db_lim
);
1511 icmp_copy_info(struct T_info_ack
*tap
, icmp_t
*icmp
)
1513 conn_t
*connp
= icmp
->icmp_connp
;
1515 *tap
= icmp_g_t_info_ack
;
1517 if (connp
->conn_family
== AF_INET6
)
1518 tap
->ADDR_size
= sizeof (sin6_t
);
1520 tap
->ADDR_size
= sizeof (sin_t
);
1521 tap
->CURRENT_state
= icmp
->icmp_state
;
1522 tap
->OPT_size
= icmp_max_optsize
;
1526 icmp_do_capability_ack(icmp_t
*icmp
, struct T_capability_ack
*tcap
,
1527 t_uscalar_t cap_bits1
)
1529 tcap
->CAP_bits1
= 0;
1531 if (cap_bits1
& TC1_INFO
) {
1532 icmp_copy_info(&tcap
->INFO_ack
, icmp
);
1533 tcap
->CAP_bits1
|= TC1_INFO
;
1538 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1539 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from
1540 * icmp_g_t_info_ack. The current state of the stream is copied from
1544 icmp_capability_req(queue_t
*q
, mblk_t
*mp
)
1546 icmp_t
*icmp
= Q_TO_ICMP(q
);
1547 t_uscalar_t cap_bits1
;
1548 struct T_capability_ack
*tcap
;
1550 cap_bits1
= ((struct T_capability_req
*)mp
->b_rptr
)->CAP_bits1
;
1552 mp
= tpi_ack_alloc(mp
, sizeof (struct T_capability_ack
),
1553 mp
->b_datap
->db_type
, T_CAPABILITY_ACK
);
1557 tcap
= (struct T_capability_ack
*)mp
->b_rptr
;
1559 icmp_do_capability_ack(icmp
, tcap
, cap_bits1
);
1565 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput.
1566 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1567 * The current state of the stream is copied from icmp_state.
1570 icmp_info_req(queue_t
*q
, mblk_t
*mp
)
1572 icmp_t
*icmp
= Q_TO_ICMP(q
);
1574 /* Create a T_INFO_ACK message. */
1575 mp
= tpi_ack_alloc(mp
, sizeof (struct T_info_ack
), M_PCPROTO
,
1579 icmp_copy_info((struct T_info_ack
*)mp
->b_rptr
, icmp
);
1584 icmp_tpi_open(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
,
1591 /* If the stream is already open, return immediately. */
1592 if (q
->q_ptr
!= NULL
)
1595 if (sflag
== MODOPEN
)
1599 * Since ICMP is not used so heavily, allocating from the small
1600 * arena should be sufficient.
1602 if ((conn_dev
= inet_minor_alloc(ip_minor_arena_sa
)) == 0) {
1606 if (flag
& SO_FALLBACK
) {
1608 * Non streams socket needs a stream to fallback to
1610 RD(q
)->q_ptr
= (void *)conn_dev
;
1611 WR(q
)->q_qinfo
= &icmp_fallback_sock_winit
;
1612 WR(q
)->q_ptr
= (void *)ip_minor_arena_sa
;
1617 connp
= rawip_do_open(family
, credp
, &error
, KM_SLEEP
);
1618 if (connp
== NULL
) {
1620 inet_minor_free(ip_minor_arena_sa
, conn_dev
);
1624 *devp
= makedevice(getemajor(*devp
), (minor_t
)conn_dev
);
1625 connp
->conn_dev
= conn_dev
;
1626 connp
->conn_minor_arena
= ip_minor_arena_sa
;
1629 * Initialize the icmp_t structure for this stream.
1632 WR(q
)->q_ptr
= connp
;
1634 connp
->conn_wq
= WR(q
);
1636 WR(q
)->q_hiwat
= connp
->conn_sndbuf
;
1637 WR(q
)->q_lowat
= connp
->conn_sndlowat
;
1641 /* Set the Stream head write offset. */
1642 (void) proto_set_tx_wroff(q
, connp
, connp
->conn_wroff
);
1643 (void) proto_set_rx_hiwat(connp
->conn_rq
, connp
, connp
->conn_rcvbuf
);
1645 mutex_enter(&connp
->conn_lock
);
1646 connp
->conn_state_flags
&= ~CONN_INCIPIENT
;
1647 mutex_exit(&connp
->conn_lock
);
1649 icmp_bind_proto(connp
->conn_icmp
);
1654 /* For /dev/icmp aka AF_INET open */
1656 icmp_openv4(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
)
1658 return (icmp_tpi_open(q
, devp
, flag
, sflag
, credp
, AF_INET
));
1661 /* For /dev/icmp6 aka AF_INET6 open */
1663 icmp_openv6(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
)
1665 return (icmp_tpi_open(q
, devp
, flag
, sflag
, credp
, AF_INET6
));
1669 * This is the open routine for icmp. It allocates a icmp_t structure for
1670 * the stream and, on the first open of the module, creates an ND table.
1673 rawip_do_open(int family
, cred_t
*credp
, int *err
, int flags
)
1681 boolean_t isv6
= B_FALSE
;
1683 *err
= secpolicy_net_icmpaccess(credp
);
1687 if (family
== AF_INET6
)
1690 ns
= netstack_find_by_cred(credp
);
1692 is
= ns
->netstack_icmp
;
1696 * For exclusive stacks we set the zoneid to zero
1697 * to make ICMP operate as if in the global zone.
1699 if (ns
->netstack_stackid
!= GLOBAL_NETSTACKID
)
1700 zoneid
= GLOBAL_ZONEID
;
1702 zoneid
= crgetzoneid(credp
);
1704 ASSERT(flags
== KM_SLEEP
|| flags
== KM_NOSLEEP
);
1706 connp
= ipcl_conn_create(IPCL_RAWIPCONN
, flags
, ns
);
1707 icmp
= connp
->conn_icmp
;
1710 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1711 * done by netstack_find_by_cred()
1716 * Since this conn_t/icmp_t is not yet visible to anybody else we don't
1717 * need to lock anything.
1719 ASSERT(connp
->conn_proto
== IPPROTO_ICMP
);
1720 ASSERT(connp
->conn_icmp
== icmp
);
1721 ASSERT(icmp
->icmp_connp
== connp
);
1723 /* Set the initial state of the stream and the privilege status. */
1724 icmp
->icmp_state
= TS_UNBND
;
1725 connp
->conn_ixa
->ixa_flags
|= IXAF_VERIFY_SOURCE
;
1727 connp
->conn_family
= AF_INET6
;
1728 connp
->conn_ipversion
= IPV6_VERSION
;
1729 connp
->conn_ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
1730 connp
->conn_proto
= IPPROTO_ICMPV6
;
1731 /* May be changed by a SO_PROTOTYPE socket option. */
1732 connp
->conn_proto
= IPPROTO_ICMPV6
;
1733 connp
->conn_ixa
->ixa_protocol
= connp
->conn_proto
;
1734 connp
->conn_ixa
->ixa_raw_cksum_offset
= 2;
1735 connp
->conn_default_ttl
= is
->is_ipv6_hoplimit
;
1736 len
= sizeof (ip6_t
);
1738 connp
->conn_family
= AF_INET
;
1739 connp
->conn_ipversion
= IPV4_VERSION
;
1740 connp
->conn_ixa
->ixa_flags
|= IXAF_IS_IPV4
;
1741 /* May be changed by a SO_PROTOTYPE socket option. */
1742 connp
->conn_proto
= IPPROTO_ICMP
;
1743 connp
->conn_ixa
->ixa_protocol
= connp
->conn_proto
;
1744 connp
->conn_default_ttl
= is
->is_ipv4_ttl
;
1745 len
= sizeof (ipha_t
);
1747 connp
->conn_xmit_ipp
.ipp_unicast_hops
= connp
->conn_default_ttl
;
1749 connp
->conn_ixa
->ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
1752 * For the socket of protocol IPPROTO_RAW or when IP_HDRINCL is set,
1753 * the checksum is provided in the pre-built packet. We clear
1754 * IXAF_SET_ULP_CKSUM to tell IP that the application has sent a
1755 * complete IP header and not to compute the transport checksum.
1757 connp
->conn_ixa
->ixa_flags
|= IXAF_MULTICAST_LOOP
| IXAF_SET_ULP_CKSUM
;
1758 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1759 connp
->conn_ixa
->ixa_zoneid
= zoneid
;
1761 connp
->conn_zoneid
= zoneid
;
1764 * If the caller has the process-wide flag set, then default to MAC
1765 * exempt mode. This allows read-down to unlabeled hosts.
1767 if (getpflags(NET_MAC_AWARE
, credp
) != 0)
1768 connp
->conn_mac_mode
= CONN_MAC_AWARE
;
1770 connp
->conn_zone_is_global
= (crgetzoneid(credp
) == GLOBAL_ZONEID
);
1774 connp
->conn_rcvbuf
= is
->is_recv_hiwat
;
1775 connp
->conn_sndbuf
= is
->is_xmit_hiwat
;
1776 connp
->conn_sndlowat
= is
->is_xmit_lowat
;
1777 connp
->conn_rcvlowat
= icmp_mod_info
.mi_lowat
;
1779 connp
->conn_wroff
= len
+ is
->is_wroff_extra
;
1780 connp
->conn_so_type
= SOCK_RAW
;
1782 connp
->conn_recv
= icmp_input
;
1783 connp
->conn_recvicmp
= icmp_icmp_input
;
1785 connp
->conn_cred
= credp
;
1786 connp
->conn_cpid
= curproc
->p_pid
;
1787 connp
->conn_open_time
= ddi_get_lbolt64();
1788 /* Cache things in ixa without an extra refhold */
1789 ASSERT(!(connp
->conn_ixa
->ixa_free_flags
& IXA_FREE_CRED
));
1790 connp
->conn_ixa
->ixa_cred
= connp
->conn_cred
;
1791 connp
->conn_ixa
->ixa_cpid
= connp
->conn_cpid
;
1792 if (is_system_labeled())
1793 connp
->conn_ixa
->ixa_tsl
= crgetlabel(connp
->conn_cred
);
1795 connp
->conn_flow_cntrld
= B_FALSE
;
1797 if (is
->is_pmtu_discovery
)
1798 connp
->conn_ixa
->ixa_flags
|= IXAF_PMTU_DISCOVERY
;
1804 * Which ICMP options OK to set through T_UNITDATA_REQ...
1808 icmp_opt_allow_udr_set(t_scalar_t level
, t_scalar_t name
)
1814 * This routine gets default values of certain options whose default
1815 * values are maintained by protcol specific code
1818 icmp_opt_default(queue_t
*q
, t_scalar_t level
, t_scalar_t name
, uchar_t
*ptr
)
1820 icmp_t
*icmp
= Q_TO_ICMP(q
);
1821 icmp_stack_t
*is
= icmp
->icmp_is
;
1822 int *i1
= (int *)ptr
;
1827 case IP_MULTICAST_TTL
:
1828 *ptr
= (uchar_t
)IP_DEFAULT_MULTICAST_TTL
;
1829 return (sizeof (uchar_t
));
1830 case IP_MULTICAST_LOOP
:
1831 *ptr
= (uchar_t
)IP_DEFAULT_MULTICAST_LOOP
;
1832 return (sizeof (uchar_t
));
1837 case IPV6_MULTICAST_HOPS
:
1838 *i1
= IP_DEFAULT_MULTICAST_TTL
;
1839 return (sizeof (int));
1840 case IPV6_MULTICAST_LOOP
:
1841 *i1
= IP_DEFAULT_MULTICAST_LOOP
;
1842 return (sizeof (int));
1843 case IPV6_UNICAST_HOPS
:
1844 *i1
= is
->is_ipv6_hoplimit
;
1845 return (sizeof (int));
1848 case IPPROTO_ICMPV6
:
1851 /* Make it look like "pass all" */
1852 ICMP6_FILTER_SETPASSALL((icmp6_filter_t
*)ptr
);
1853 return (sizeof (icmp6_filter_t
));
1861 * This routine retrieves the current status of socket options.
1862 * It returns the size of the option retrieved, or -1.
1865 icmp_opt_get(conn_t
*connp
, int level
, int name
, uchar_t
*ptr
)
1867 icmp_t
*icmp
= connp
->conn_icmp
;
1868 int *i1
= (int *)ptr
;
1869 conn_opt_arg_t coas
;
1872 coas
.coa_connp
= connp
;
1873 coas
.coa_ixa
= connp
->conn_ixa
;
1874 coas
.coa_ipp
= &connp
->conn_xmit_ipp
;
1875 coas
.coa_ancillary
= B_FALSE
;
1876 coas
.coa_changed
= 0;
1879 * We assume that the optcom framework has checked for the set
1880 * of levels and names that are supported, hence we don't worry
1881 * about rejecting based on that.
1882 * First check for ICMP specific handling, then pass to common routine.
1887 * Only allow IPv4 option processing on IPv4 sockets.
1889 if (connp
->conn_family
!= AF_INET
)
1895 /* Options are passed up with each packet */
1898 mutex_enter(&connp
->conn_lock
);
1899 *i1
= (int)icmp
->icmp_hdrincl
;
1900 mutex_exit(&connp
->conn_lock
);
1901 return (sizeof (int));
1907 * Only allow IPv6 option processing on native IPv6 sockets.
1909 if (connp
->conn_family
!= AF_INET6
)
1915 * Return offset or -1 if no checksum offset.
1916 * Does not apply to IPPROTO_ICMPV6
1918 if (connp
->conn_proto
== IPPROTO_ICMPV6
)
1921 mutex_enter(&connp
->conn_lock
);
1922 if (connp
->conn_ixa
->ixa_flags
& IXAF_SET_RAW_CKSUM
)
1923 *i1
= connp
->conn_ixa
->ixa_raw_cksum_offset
;
1926 mutex_exit(&connp
->conn_lock
);
1927 return (sizeof (int));
1931 case IPPROTO_ICMPV6
:
1933 * Only allow IPv6 option processing on native IPv6 sockets.
1935 if (connp
->conn_family
!= AF_INET6
)
1938 if (connp
->conn_proto
!= IPPROTO_ICMPV6
)
1943 mutex_enter(&connp
->conn_lock
);
1944 if (icmp
->icmp_filter
== NULL
) {
1945 /* Make it look like "pass all" */
1946 ICMP6_FILTER_SETPASSALL((icmp6_filter_t
*)ptr
);
1948 (void) bcopy(icmp
->icmp_filter
, ptr
,
1949 sizeof (icmp6_filter_t
));
1951 mutex_exit(&connp
->conn_lock
);
1952 return (sizeof (icmp6_filter_t
));
1955 mutex_enter(&connp
->conn_lock
);
1956 retval
= conn_opt_get(&coas
, level
, name
, ptr
);
1957 mutex_exit(&connp
->conn_lock
);
1962 * This routine retrieves the current status of socket options.
1963 * It returns the size of the option retrieved, or -1.
1966 icmp_tpi_opt_get(queue_t
*q
, int level
, int name
, uchar_t
*ptr
)
1968 conn_t
*connp
= Q_TO_CONN(q
);
1971 err
= icmp_opt_get(connp
, level
, name
, ptr
);
1976 * This routine sets socket options.
1979 icmp_do_opt_set(conn_opt_arg_t
*coa
, int level
, int name
,
1980 uint_t inlen
, uchar_t
*invalp
, cred_t
*cr
, boolean_t checkonly
)
1982 conn_t
*connp
= coa
->coa_connp
;
1983 ip_xmit_attr_t
*ixa
= coa
->coa_ixa
;
1984 icmp_t
*icmp
= connp
->conn_icmp
;
1985 icmp_stack_t
*is
= icmp
->icmp_is
;
1986 int *i1
= (int *)invalp
;
1987 boolean_t onoff
= (*i1
== 0) ? 0 : 1;
1990 ASSERT(MUTEX_NOT_HELD(&coa
->coa_connp
->conn_lock
));
1993 * For fixed length options, no sanity check
1994 * of passed in length is done. It is assumed *_optcom_req()
1995 * routines do the right thing.
2002 if ((*i1
& 0xFF) != IPPROTO_ICMP
&&
2003 (*i1
& 0xFF) != IPPROTO_ICMPV6
&&
2004 secpolicy_net_rawaccess(cr
) != 0) {
2010 mutex_enter(&connp
->conn_lock
);
2011 connp
->conn_proto
= *i1
& 0xFF;
2012 ixa
->ixa_protocol
= connp
->conn_proto
;
2013 if ((connp
->conn_proto
== IPPROTO_RAW
||
2014 connp
->conn_proto
== IPPROTO_IGMP
) &&
2015 connp
->conn_family
== AF_INET
) {
2016 icmp
->icmp_hdrincl
= 1;
2017 ixa
->ixa_flags
&= ~IXAF_SET_ULP_CKSUM
;
2018 } else if (connp
->conn_proto
== IPPROTO_UDP
||
2019 connp
->conn_proto
== IPPROTO_TCP
||
2020 connp
->conn_proto
== IPPROTO_SCTP
) {
2021 /* Used by test applications like psh */
2022 icmp
->icmp_hdrincl
= 0;
2023 ixa
->ixa_flags
&= ~IXAF_SET_ULP_CKSUM
;
2025 icmp
->icmp_hdrincl
= 0;
2026 ixa
->ixa_flags
|= IXAF_SET_ULP_CKSUM
;
2029 if (connp
->conn_family
== AF_INET6
&&
2030 connp
->conn_proto
== IPPROTO_ICMPV6
) {
2031 /* Set offset for icmp6_cksum */
2032 ixa
->ixa_flags
&= ~IXAF_SET_RAW_CKSUM
;
2033 ixa
->ixa_raw_cksum_offset
= 2;
2035 if (icmp
->icmp_filter
!= NULL
&&
2036 connp
->conn_proto
!= IPPROTO_ICMPV6
) {
2037 kmem_free(icmp
->icmp_filter
,
2038 sizeof (icmp6_filter_t
));
2039 icmp
->icmp_filter
= NULL
;
2041 mutex_exit(&connp
->conn_lock
);
2043 coa
->coa_changed
|= COA_HEADER_CHANGED
;
2045 * For SCTP, we don't use icmp_bind_proto() for
2046 * raw socket binding.
2048 if (connp
->conn_proto
== IPPROTO_SCTP
)
2051 coa
->coa_changed
|= COA_ICMP_BIND_NEEDED
;
2055 if (*i1
> is
->is_max_buf
) {
2060 if (*i1
> is
->is_max_buf
) {
2069 * Only allow IPv4 option processing on IPv4 sockets.
2071 if (connp
->conn_family
!= AF_INET
)
2077 mutex_enter(&connp
->conn_lock
);
2078 icmp
->icmp_hdrincl
= onoff
;
2080 ixa
->ixa_flags
&= ~IXAF_SET_ULP_CKSUM
;
2082 ixa
->ixa_flags
|= IXAF_SET_ULP_CKSUM
;
2083 mutex_exit(&connp
->conn_lock
);
2090 if (connp
->conn_family
!= AF_INET6
)
2096 * Integer offset into the user data of where the
2097 * checksum is located.
2098 * Offset of -1 disables option.
2099 * Does not apply to IPPROTO_ICMPV6.
2101 if (connp
->conn_proto
== IPPROTO_ICMPV6
||
2102 coa
->coa_ancillary
) {
2105 if ((*i1
!= -1) && ((*i1
< 0) || (*i1
& 0x1) != 0)) {
2106 /* Negative or not 16 bit aligned offset */
2112 mutex_enter(&connp
->conn_lock
);
2114 ixa
->ixa_flags
&= ~IXAF_SET_RAW_CKSUM
;
2115 ixa
->ixa_raw_cksum_offset
= 0;
2116 ixa
->ixa_flags
&= ~IXAF_SET_ULP_CKSUM
;
2118 ixa
->ixa_flags
|= IXAF_SET_RAW_CKSUM
;
2119 ixa
->ixa_raw_cksum_offset
= *i1
;
2120 ixa
->ixa_flags
|= IXAF_SET_ULP_CKSUM
;
2122 mutex_exit(&connp
->conn_lock
);
2127 case IPPROTO_ICMPV6
:
2129 * Only allow IPv6 option processing on IPv6 sockets.
2131 if (connp
->conn_family
!= AF_INET6
)
2133 if (connp
->conn_proto
!= IPPROTO_ICMPV6
)
2142 (inlen
!= sizeof (icmp6_filter_t
)))
2145 mutex_enter(&connp
->conn_lock
);
2147 if (icmp
->icmp_filter
!= NULL
) {
2148 kmem_free(icmp
->icmp_filter
,
2149 sizeof (icmp6_filter_t
));
2150 icmp
->icmp_filter
= NULL
;
2153 if (icmp
->icmp_filter
== NULL
) {
2154 icmp
->icmp_filter
= kmem_alloc(
2155 sizeof (icmp6_filter_t
),
2157 if (icmp
->icmp_filter
== NULL
) {
2158 mutex_exit(&connp
->conn_lock
);
2162 (void) bcopy(invalp
, icmp
->icmp_filter
, inlen
);
2164 mutex_exit(&connp
->conn_lock
);
2169 error
= conn_opt_set(coa
, level
, name
, inlen
, invalp
,
2175 * This routine sets socket options.
2178 icmp_opt_set(conn_t
*connp
, uint_t optset_context
, int level
, int name
,
2179 uint_t inlen
, uchar_t
*invalp
, uint_t
*outlenp
, uchar_t
*outvalp
,
2180 void *thisdg_attrs
, cred_t
*cr
)
2182 icmp_t
*icmp
= connp
->conn_icmp
;
2184 conn_opt_arg_t coas
, *coa
;
2185 boolean_t checkonly
;
2186 icmp_stack_t
*is
= icmp
->icmp_is
;
2188 switch (optset_context
) {
2189 case SETFN_OPTCOM_CHECKONLY
:
2192 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
2193 * inlen != 0 implies value supplied and
2194 * we have to "pretend" to set it.
2195 * inlen == 0 implies that there is no
2196 * value part in T_CHECK request and just validation
2197 * done elsewhere should be enough, we just return here.
2204 case SETFN_OPTCOM_NEGOTIATE
:
2205 checkonly
= B_FALSE
;
2207 case SETFN_UD_NEGOTIATE
:
2208 case SETFN_CONN_NEGOTIATE
:
2209 checkonly
= B_FALSE
;
2211 * Negotiating local and "association-related" options
2212 * through T_UNITDATA_REQ.
2214 * Following routine can filter out ones we do not
2215 * want to be "set" this way.
2217 if (!icmp_opt_allow_udr_set(level
, name
)) {
2224 * We should never get here
2230 ASSERT((optset_context
!= SETFN_OPTCOM_CHECKONLY
) ||
2231 (optset_context
== SETFN_OPTCOM_CHECKONLY
&& inlen
!= 0));
2233 if (thisdg_attrs
!= NULL
) {
2234 /* Options from T_UNITDATA_REQ */
2235 coa
= (conn_opt_arg_t
*)thisdg_attrs
;
2236 ASSERT(coa
->coa_connp
== connp
);
2237 ASSERT(coa
->coa_ixa
!= NULL
);
2238 ASSERT(coa
->coa_ipp
!= NULL
);
2239 ASSERT(coa
->coa_ancillary
);
2242 coas
.coa_connp
= connp
;
2243 /* Get a reference on conn_ixa to prevent concurrent mods */
2244 coas
.coa_ixa
= conn_get_ixa(connp
, B_TRUE
);
2245 if (coas
.coa_ixa
== NULL
) {
2249 coas
.coa_ipp
= &connp
->conn_xmit_ipp
;
2250 coas
.coa_ancillary
= B_FALSE
;
2251 coas
.coa_changed
= 0;
2254 err
= icmp_do_opt_set(coa
, level
, name
, inlen
, invalp
,
2258 if (!coa
->coa_ancillary
)
2259 ixa_refrele(coa
->coa_ixa
);
2265 * Common case of OK return with outval same as inval.
2267 if (invalp
!= outvalp
) {
2268 /* don't trust bcopy for identical src/dst */
2269 (void) bcopy(invalp
, outvalp
, inlen
);
2274 * If this was not ancillary data, then we rebuild the headers,
2275 * update the IRE/NCE, and IPsec as needed.
2276 * Since the label depends on the destination we go through
2277 * ip_set_destination first.
2279 if (coa
->coa_ancillary
) {
2283 if (coa
->coa_changed
& COA_ROUTE_CHANGED
) {
2284 in6_addr_t saddr
, faddr
, nexthop
;
2288 * We clear lastdst to make sure we pick up the change
2289 * next time sending.
2290 * If we are connected we re-cache the information.
2291 * We ignore errors to preserve BSD behavior.
2292 * Note that we don't redo IPsec policy lookup here
2293 * since the final destination (or source) didn't change.
2295 mutex_enter(&connp
->conn_lock
);
2296 connp
->conn_v6lastdst
= ipv6_all_zeros
;
2298 ip_attr_nexthop(coa
->coa_ipp
, coa
->coa_ixa
,
2299 &connp
->conn_faddr_v6
, &nexthop
);
2300 saddr
= connp
->conn_saddr_v6
;
2301 faddr
= connp
->conn_faddr_v6
;
2302 fport
= connp
->conn_fport
;
2303 mutex_exit(&connp
->conn_lock
);
2305 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr
) &&
2306 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr
)) {
2307 (void) ip_attr_connect(connp
, coa
->coa_ixa
,
2308 &saddr
, &faddr
, &nexthop
, fport
, NULL
, NULL
,
2309 IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
);
2313 ixa_refrele(coa
->coa_ixa
);
2315 if (coa
->coa_changed
& COA_HEADER_CHANGED
) {
2317 * Rebuild the header template if we are connected.
2318 * Otherwise clear conn_v6lastdst so we rebuild the header
2321 mutex_enter(&connp
->conn_lock
);
2322 if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_faddr_v6
) &&
2323 !IN6_IS_ADDR_V4MAPPED_ANY(&connp
->conn_faddr_v6
)) {
2324 err
= icmp_build_hdr_template(connp
,
2325 &connp
->conn_saddr_v6
, &connp
->conn_faddr_v6
,
2326 connp
->conn_flowinfo
);
2328 mutex_exit(&connp
->conn_lock
);
2332 connp
->conn_v6lastdst
= ipv6_all_zeros
;
2334 mutex_exit(&connp
->conn_lock
);
2336 if (coa
->coa_changed
& COA_RCVBUF_CHANGED
) {
2337 (void) proto_set_rx_hiwat(connp
->conn_rq
, connp
,
2338 connp
->conn_rcvbuf
);
2340 if ((coa
->coa_changed
& COA_SNDBUF_CHANGED
) && !IPCL_IS_NONSTR(connp
)) {
2341 connp
->conn_wq
->q_hiwat
= connp
->conn_sndbuf
;
2343 if (coa
->coa_changed
& COA_WROFF_CHANGED
) {
2344 /* Increase wroff if needed */
2347 mutex_enter(&connp
->conn_lock
);
2348 wroff
= connp
->conn_ht_iphc_allocated
+ is
->is_wroff_extra
;
2349 if (wroff
> connp
->conn_wroff
) {
2350 connp
->conn_wroff
= wroff
;
2351 mutex_exit(&connp
->conn_lock
);
2352 (void) proto_set_tx_wroff(connp
->conn_rq
, connp
, wroff
);
2354 mutex_exit(&connp
->conn_lock
);
2357 if (coa
->coa_changed
& COA_ICMP_BIND_NEEDED
) {
2358 icmp_bind_proto(icmp
);
2363 /* This routine sets socket options. */
2365 icmp_tpi_opt_set(queue_t
*q
, uint_t optset_context
, int level
, int name
,
2366 uint_t inlen
, uchar_t
*invalp
, uint_t
*outlenp
, uchar_t
*outvalp
,
2367 void *thisdg_attrs
, cred_t
*cr
)
2369 conn_t
*connp
= Q_TO_CONN(q
);
2372 error
= icmp_opt_set(connp
, optset_context
, level
, name
, inlen
, invalp
,
2373 outlenp
, outvalp
, thisdg_attrs
, cr
);
2380 * Note that IP_HDRINCL has ipha_protocol that is different than conn_proto,
2381 * but icmp_output_hdrincl restores ipha_protocol once we return.
2384 icmp_prepend_hdr(conn_t
*connp
, ip_xmit_attr_t
*ixa
, const ip_pkt_t
*ipp
,
2385 const in6_addr_t
*v6src
, const in6_addr_t
*v6dst
, uint32_t flowinfo
,
2386 mblk_t
*data_mp
, int *errorp
)
2389 icmp_stack_t
*is
= connp
->conn_netstack
->netstack_icmp
;
2393 data_len
= msgdsize(data_mp
);
2394 mp
= conn_prepend_hdr(ixa
, ipp
, v6src
, v6dst
, connp
->conn_proto
,
2395 flowinfo
, 0, data_mp
, data_len
, is
->is_wroff_extra
, &cksum
, errorp
);
2397 ASSERT(*errorp
!= 0);
2401 ixa
->ixa_pktlen
= data_len
+ ixa
->ixa_ip_hdr_length
;
2404 * If there was a routing option/header then conn_prepend_hdr
2405 * has massaged it and placed the pseudo-header checksum difference
2406 * in the cksum argument.
2408 * Prepare for ICMPv6 checksum done in IP.
2410 * We make it easy for IP to include our pseudo header
2411 * by putting our length (and any routing header adjustment)
2412 * in the ICMPv6 checksum field.
2413 * The IP source, destination, and length have already been set by
2417 cksum
= (cksum
>> 16) + (cksum
& 0xFFFF);
2418 ASSERT(cksum
< 0x10000);
2420 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
2421 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
2423 ASSERT(ntohs(ipha
->ipha_length
) == ixa
->ixa_pktlen
);
2425 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
2426 uint_t cksum_offset
= 0;
2428 ASSERT(ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
== ixa
->ixa_pktlen
);
2430 if (ixa
->ixa_flags
& IXAF_SET_ULP_CKSUM
) {
2431 if (connp
->conn_proto
== IPPROTO_ICMPV6
) {
2432 cksum_offset
= ixa
->ixa_ip_hdr_length
+
2433 offsetof(icmp6_t
, icmp6_cksum
);
2434 } else if (ixa
->ixa_flags
& IXAF_SET_RAW_CKSUM
) {
2435 cksum_offset
= ixa
->ixa_ip_hdr_length
+
2436 ixa
->ixa_raw_cksum_offset
;
2439 if (cksum_offset
!= 0) {
2442 /* Make sure the checksum fits in the first mblk */
2443 if (cksum_offset
+ sizeof (short) > MBLKL(mp
)) {
2447 cksum_offset
+ sizeof (short));
2454 ip6h
= (ip6_t
*)mp
->b_rptr
;
2456 ptr
= (uint16_t *)(mp
->b_rptr
+ cksum_offset
);
2457 *ptr
= htons(cksum
);
2461 /* Note that we don't try to update wroff due to ancillary data */
2466 icmp_build_hdr_template(conn_t
*connp
, const in6_addr_t
*v6src
,
2467 const in6_addr_t
*v6dst
, uint32_t flowinfo
)
2471 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
2473 * We clear lastdst to make sure we don't use the lastdst path
2474 * next time sending since we might not have set v6dst yet.
2476 connp
->conn_v6lastdst
= ipv6_all_zeros
;
2478 error
= conn_build_hdr_template(connp
, 0, 0, v6src
, v6dst
, flowinfo
);
2483 * Any routing header/option has been massaged. The checksum difference
2484 * is stored in conn_sum.
2490 icmp_queue_fallback(icmp_t
*icmp
, mblk_t
*mp
)
2492 ASSERT(MUTEX_HELD(&icmp
->icmp_recv_lock
));
2493 if (IPCL_IS_NONSTR(icmp
->icmp_connp
)) {
2495 * fallback has started but messages have not been moved yet
2497 if (icmp
->icmp_fallback_queue_head
== NULL
) {
2498 ASSERT(icmp
->icmp_fallback_queue_tail
== NULL
);
2499 icmp
->icmp_fallback_queue_head
= mp
;
2500 icmp
->icmp_fallback_queue_tail
= mp
;
2502 ASSERT(icmp
->icmp_fallback_queue_tail
!= NULL
);
2503 icmp
->icmp_fallback_queue_tail
->b_next
= mp
;
2504 icmp
->icmp_fallback_queue_tail
= mp
;
2509 * Fallback completed, let the caller putnext() the mblk.
2516 * Deliver data to ULP. In case we have a socket, and it's falling back to
2517 * TPI, then we'll queue the mp for later processing.
2520 icmp_ulp_recv(conn_t
*connp
, mblk_t
*mp
, uint_t len
)
2522 if (IPCL_IS_NONSTR(connp
)) {
2523 icmp_t
*icmp
= connp
->conn_icmp
;
2526 ASSERT(len
== msgdsize(mp
));
2527 if ((*connp
->conn_upcalls
->su_recv
)
2528 (connp
->conn_upper_handle
, mp
, len
, 0, &error
, NULL
) < 0) {
2529 mutex_enter(&icmp
->icmp_recv_lock
);
2530 if (error
== ENOSPC
) {
2532 * let's confirm while holding the lock
2534 if ((*connp
->conn_upcalls
->su_recv
)
2535 (connp
->conn_upper_handle
, NULL
, 0, 0,
2536 &error
, NULL
) < 0) {
2537 ASSERT(error
== ENOSPC
);
2538 if (error
== ENOSPC
) {
2539 connp
->conn_flow_cntrld
=
2543 mutex_exit(&icmp
->icmp_recv_lock
);
2545 ASSERT(error
== EOPNOTSUPP
);
2546 mp
= icmp_queue_fallback(icmp
, mp
);
2547 mutex_exit(&icmp
->icmp_recv_lock
);
2549 putnext(connp
->conn_rq
, mp
);
2552 ASSERT(MUTEX_NOT_HELD(&icmp
->icmp_recv_lock
));
2554 putnext(connp
->conn_rq
, mp
);
2559 * This is the inbound data path.
2560 * IP has already pulled up the IP headers and verified alignment
2565 icmp_input(void *arg1
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*ira
)
2567 conn_t
*connp
= (conn_t
*)arg1
;
2568 struct T_unitdata_ind
*tudi
;
2569 uchar_t
*rptr
; /* Pointer to IP header */
2571 int udi_size
; /* Size of T_unitdata_ind */
2577 crb_t recv_ancillary
;
2583 ASSERT(connp
->conn_flags
& IPCL_RAWIPCONN
);
2585 icmp
= connp
->conn_icmp
;
2589 ASSERT(DB_TYPE(mp
) == M_DATA
);
2590 ASSERT(OK_32PTR(rptr
));
2591 ASSERT(ira
->ira_pktlen
== msgdsize(mp
));
2592 pkt_len
= ira
->ira_pktlen
;
2595 * Get a snapshot of these and allow other threads to change
2596 * them after that. We need the same recv_ancillary when determining
2597 * the size as when adding the ancillary data items.
2599 mutex_enter(&connp
->conn_lock
);
2600 recv_ancillary
= connp
->conn_recv_ancillary
;
2601 mutex_exit(&connp
->conn_lock
);
2603 ip_hdr_length
= ira
->ira_ip_hdr_length
;
2604 ASSERT(MBLKL(mp
) >= ip_hdr_length
); /* IP did a pullup */
2606 /* Initialize regardless of IP version */
2607 ipps
.ipp_fields
= 0;
2609 if (ira
->ira_flags
& IRAF_IS_IPV4
) {
2610 ASSERT(IPH_HDR_VERSION(rptr
) == IPV4_VERSION
);
2611 ASSERT(MBLKL(mp
) >= sizeof (ipha_t
));
2612 ASSERT(ira
->ira_ip_hdr_length
== IPH_HDR_LENGTH(rptr
));
2614 ipha
= (ipha_t
*)mp
->b_rptr
;
2615 if (recv_ancillary
.crb_all
!= 0)
2616 (void) ip_find_hdr_v4(ipha
, &ipps
, B_FALSE
);
2619 * BSD for some reason adjusts ipha_length to exclude the
2620 * IP header length. We do the same.
2622 if (is
->is_bsd_compat
) {
2625 len
= ntohs(ipha
->ipha_length
);
2626 if (mp
->b_datap
->db_ref
> 1) {
2628 * Allocate a new IP header so that we can
2629 * modify ipha_length.
2633 mp1
= allocb(ip_hdr_length
, BPRI_MED
);
2636 BUMP_MIB(&is
->is_rawip_mib
,
2640 bcopy(rptr
, mp1
->b_rptr
, ip_hdr_length
);
2641 mp
->b_rptr
= rptr
+ ip_hdr_length
;
2643 ipha
= (ipha_t
*)rptr
;
2645 mp1
->b_wptr
= rptr
+ ip_hdr_length
;
2648 len
-= ip_hdr_length
;
2649 ipha
->ipha_length
= htons(len
);
2653 * For RAW sockets we not pass ICMP/IPv4 packets to AF_INET6
2654 * sockets. This is ensured by icmp_bind and the IP fanout code.
2656 ASSERT(connp
->conn_family
== AF_INET
);
2659 * This is the inbound data path. Packets are passed upstream
2660 * as T_UNITDATA_IND messages with full IPv4 headers still
2665 * Normally only send up the source address.
2666 * If any ancillary data items are wanted we add those.
2668 udi_size
= sizeof (struct T_unitdata_ind
) + sizeof (sin_t
);
2669 if (recv_ancillary
.crb_all
!= 0) {
2670 udi_size
+= conn_recvancillary_size(connp
,
2671 recv_ancillary
, ira
, mp
, &ipps
);
2674 /* Allocate a message block for the T_UNITDATA_IND structure. */
2675 mp1
= allocb(udi_size
, BPRI_MED
);
2678 BUMP_MIB(&is
->is_rawip_mib
, rawipInErrors
);
2682 tudi
= (struct T_unitdata_ind
*)mp1
->b_rptr
;
2683 mp1
->b_datap
->db_type
= M_PROTO
;
2684 mp1
->b_wptr
= (uchar_t
*)tudi
+ udi_size
;
2685 tudi
->PRIM_type
= T_UNITDATA_IND
;
2686 tudi
->SRC_length
= sizeof (sin_t
);
2687 tudi
->SRC_offset
= sizeof (struct T_unitdata_ind
);
2688 sin
= (sin_t
*)&tudi
[1];
2690 sin
->sin_family
= AF_INET
;
2691 sin
->sin_addr
.s_addr
= ipha
->ipha_src
;
2692 *(uint32_t *)&sin
->sin_zero
[0] = 0;
2693 *(uint32_t *)&sin
->sin_zero
[4] = 0;
2694 tudi
->OPT_offset
= sizeof (struct T_unitdata_ind
) +
2696 udi_size
-= (sizeof (struct T_unitdata_ind
) + sizeof (sin_t
));
2697 tudi
->OPT_length
= udi_size
;
2700 * Add options if IP_RECVIF etc is set
2702 if (udi_size
!= 0) {
2703 conn_recvancillary_add(connp
, recv_ancillary
, ira
,
2704 &ipps
, (uchar_t
*)&sin
[1], udi_size
);
2709 ASSERT(IPH_HDR_VERSION(rptr
) == IPV6_VERSION
);
2711 * IPv6 packets can only be received by applications
2712 * that are prepared to receive IPv6 addresses.
2713 * The IP fanout must ensure this.
2715 ASSERT(connp
->conn_family
== AF_INET6
);
2718 * Handle IPv6 packets. We don't pass up the IP headers with the
2722 ip6h
= (ip6_t
*)rptr
;
2723 if (recv_ancillary
.crb_all
!= 0) {
2725 * Call on ip_find_hdr_v6 which gets individual lenghts of
2726 * extension headers (and pointers to them).
2730 /* We don't care about the length or nextheader. */
2731 (void) ip_find_hdr_v6(mp
, ip6h
, B_TRUE
, &ipps
, &nexthdr
);
2734 * We do not pass up hop-by-hop options or any other
2735 * extension header as part of the packet. Applications
2736 * that want to see them have to specify IPV6_RECV* socket
2737 * options. And conn_recvancillary_size/add explicitly
2738 * drops the TX option from IPV6_HOPOPTS as it does for UDP.
2740 * If we had multilevel ICMP sockets, then we'd want to
2741 * modify conn_recvancillary_size/add to
2742 * allow the user to see the label.
2747 * Check a filter for ICMPv6 types if needed.
2748 * Verify raw checksums if needed.
2750 mutex_enter(&connp
->conn_lock
);
2751 if (icmp
->icmp_filter
!= NULL
) {
2754 /* Assumes that IP has done the pullupmsg */
2755 type
= mp
->b_rptr
[ip_hdr_length
];
2757 ASSERT(mp
->b_rptr
+ ip_hdr_length
<= mp
->b_wptr
);
2758 if (ICMP6_FILTER_WILLBLOCK(type
, icmp
->icmp_filter
)) {
2759 mutex_exit(&connp
->conn_lock
);
2764 if (connp
->conn_ixa
->ixa_flags
& IXAF_SET_RAW_CKSUM
) {
2770 up
= (uint16_t *)&ip6h
->ip6_src
;
2772 remlen
= msgdsize(mp
) - ip_hdr_length
;
2773 sum
= htons(connp
->conn_proto
+ remlen
)
2774 + up
[0] + up
[1] + up
[2] + up
[3]
2775 + up
[4] + up
[5] + up
[6] + up
[7]
2776 + up
[8] + up
[9] + up
[10] + up
[11]
2777 + up
[12] + up
[13] + up
[14] + up
[15];
2778 sum
= (sum
& 0xffff) + (sum
>> 16);
2779 sum
= IP_CSUM(mp
, ip_hdr_length
, sum
);
2781 /* IPv6 RAW checksum failed */
2782 ip0dbg(("icmp_rput: RAW checksum failed %x\n", sum
));
2783 mutex_exit(&connp
->conn_lock
);
2785 BUMP_MIB(&is
->is_rawip_mib
, rawipInCksumErrs
);
2789 mutex_exit(&connp
->conn_lock
);
2791 udi_size
= sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
);
2793 if (recv_ancillary
.crb_all
!= 0) {
2794 udi_size
+= conn_recvancillary_size(connp
,
2795 recv_ancillary
, ira
, mp
, &ipps
);
2798 mp1
= allocb(udi_size
, BPRI_MED
);
2801 BUMP_MIB(&is
->is_rawip_mib
, rawipInErrors
);
2805 mp1
->b_datap
->db_type
= M_PROTO
;
2806 tudi
= (struct T_unitdata_ind
*)mp1
->b_rptr
;
2807 mp1
->b_wptr
= (uchar_t
*)tudi
+ udi_size
;
2808 tudi
->PRIM_type
= T_UNITDATA_IND
;
2809 tudi
->SRC_length
= sizeof (sin6_t
);
2810 tudi
->SRC_offset
= sizeof (struct T_unitdata_ind
);
2811 tudi
->OPT_offset
= sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
);
2812 udi_size
-= (sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
));
2813 tudi
->OPT_length
= udi_size
;
2814 sin6
= (sin6_t
*)&tudi
[1];
2816 sin6
->sin6_port
= 0;
2817 sin6
->sin6_family
= AF_INET6
;
2819 sin6
->sin6_addr
= ip6h
->ip6_src
;
2820 /* No sin6_flowinfo per API */
2821 sin6
->sin6_flowinfo
= 0;
2822 /* For link-scope pass up scope id */
2823 if (IN6_IS_ADDR_LINKSCOPE(&ip6h
->ip6_src
))
2824 sin6
->sin6_scope_id
= ira
->ira_ruifindex
;
2826 sin6
->sin6_scope_id
= 0;
2827 sin6
->__sin6_src_id
= ip_srcid_find_addr(&ip6h
->ip6_dst
,
2828 IPCL_ZONEID(connp
), is
->is_netstack
);
2830 if (udi_size
!= 0) {
2831 conn_recvancillary_add(connp
, recv_ancillary
, ira
,
2832 &ipps
, (uchar_t
*)&sin6
[1], udi_size
);
2835 /* Skip all the IPv6 headers per API */
2836 mp
->b_rptr
+= ip_hdr_length
;
2837 pkt_len
-= ip_hdr_length
;
2840 BUMP_MIB(&is
->is_rawip_mib
, rawipInDatagrams
);
2841 icmp_ulp_recv(connp
, mp1
, pkt_len
);
2845 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
2846 * information that can be changing beneath us.
2849 icmp_snmp_get(queue_t
*q
, mblk_t
*mpctl
)
2852 struct opthdr
*optp
;
2853 conn_t
*connp
= Q_TO_CONN(q
);
2854 icmp_stack_t
*is
= connp
->conn_netstack
->netstack_icmp
;
2858 * make a copy of the original message
2860 mp2ctl
= copymsg(mpctl
);
2862 if (mpctl
== NULL
||
2863 (mpdata
= mpctl
->b_cont
) == NULL
) {
2869 /* fixed length structure for IPv4 and IPv6 counters */
2870 optp
= (struct opthdr
*)&mpctl
->b_rptr
[sizeof (struct T_optmgmt_ack
)];
2871 optp
->level
= EXPER_RAWIP
;
2873 (void) snmp_append_data(mpdata
, (char *)&is
->is_rawip_mib
,
2874 sizeof (is
->is_rawip_mib
));
2875 optp
->len
= msgdsize(mpdata
);
2882 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
2883 * TODO: If this ever actually tries to set anything, it needs to be
2884 * to do the appropriate locking.
2888 icmp_snmp_set(queue_t
*q
, t_scalar_t level
, t_scalar_t name
,
2889 uchar_t
*ptr
, int len
)
2900 * This routine creates a T_UDERROR_IND message and passes it upstream.
2901 * The address and options are copied from the T_UNITDATA_REQ message
2902 * passed in mp. This message is freed.
2905 icmp_ud_err(queue_t
*q
, mblk_t
*mp
, t_scalar_t err
)
2907 struct T_unitdata_req
*tudr
;
2914 if ((mp
->b_wptr
< mp
->b_rptr
) ||
2915 (MBLKL(mp
)) < sizeof (struct T_unitdata_req
)) {
2918 tudr
= (struct T_unitdata_req
*)mp
->b_rptr
;
2919 destaddr
= mp
->b_rptr
+ tudr
->DEST_offset
;
2920 if (destaddr
< mp
->b_rptr
|| destaddr
>= mp
->b_wptr
||
2921 destaddr
+ tudr
->DEST_length
< mp
->b_rptr
||
2922 destaddr
+ tudr
->DEST_length
> mp
->b_wptr
) {
2925 optaddr
= mp
->b_rptr
+ tudr
->OPT_offset
;
2926 if (optaddr
< mp
->b_rptr
|| optaddr
>= mp
->b_wptr
||
2927 optaddr
+ tudr
->OPT_length
< mp
->b_rptr
||
2928 optaddr
+ tudr
->OPT_length
> mp
->b_wptr
) {
2931 destlen
= tudr
->DEST_length
;
2932 optlen
= tudr
->OPT_length
;
2934 mp1
= mi_tpi_uderror_ind((char *)destaddr
, destlen
,
2935 (char *)optaddr
, optlen
, err
);
2944 rawip_do_unbind(conn_t
*connp
)
2946 icmp_t
*icmp
= connp
->conn_icmp
;
2948 mutex_enter(&connp
->conn_lock
);
2949 /* If a bind has not been done, we can't unbind. */
2950 if (icmp
->icmp_state
== TS_UNBND
) {
2951 mutex_exit(&connp
->conn_lock
);
2952 return (-TOUTSTATE
);
2954 connp
->conn_saddr_v6
= ipv6_all_zeros
;
2955 connp
->conn_bound_addr_v6
= ipv6_all_zeros
;
2956 connp
->conn_laddr_v6
= ipv6_all_zeros
;
2957 connp
->conn_mcbc_bind
= B_FALSE
;
2958 connp
->conn_lport
= 0;
2959 connp
->conn_fport
= 0;
2960 /* In case we were also connected */
2961 connp
->conn_faddr_v6
= ipv6_all_zeros
;
2962 connp
->conn_v6lastdst
= ipv6_all_zeros
;
2964 icmp
->icmp_state
= TS_UNBND
;
2966 (void) icmp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
2967 &connp
->conn_faddr_v6
, connp
->conn_flowinfo
);
2968 mutex_exit(&connp
->conn_lock
);
2975 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
2976 * After some error checking, the message is passed downstream to ip.
2979 icmp_tpi_unbind(queue_t
*q
, mblk_t
*mp
)
2981 conn_t
*connp
= Q_TO_CONN(q
);
2984 ASSERT(mp
->b_cont
== NULL
);
2985 error
= rawip_do_unbind(connp
);
2988 icmp_err_ack(q
, mp
, -error
, 0);
2990 icmp_err_ack(q
, mp
, 0, error
);
2996 * Convert mp into a T_OK_ACK
2999 mp
= mi_tpi_ok_ack_alloc(mp
);
3002 * should not happen in practice... T_OK_ACK is smaller than the
3006 ASSERT(((struct T_ok_ack
*)mp
->b_rptr
)->PRIM_type
== T_OK_ACK
);
3011 * Process IPv4 packets that already include an IP header.
3012 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
3014 * In this case we ignore the address and any options in the T_UNITDATA_REQ.
3016 * The packet is assumed to have a base (20 byte) IP header followed
3017 * by the upper-layer protocol. We include any IP_OPTIONS including a
3018 * CIPSO label but otherwise preserve the base IP header.
3021 icmp_output_hdrincl(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
, pid_t pid
)
3023 icmp_t
*icmp
= connp
->conn_icmp
;
3024 icmp_stack_t
*is
= icmp
->icmp_is
;
3029 ip_xmit_attr_t
*ixa
;
3033 in6_addr_t v6nexthop
;
3038 * We need an exclusive copy of conn_ixa since the included IP
3039 * header could have any destination.
3040 * That copy has no pointers hence we
3041 * need to set them up once we've parsed the ancillary data.
3043 ixa
= conn_get_ixa_exclusive(connp
);
3045 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3051 * Caller has a reference on cr; from db_credp or because we
3052 * are running in process context.
3054 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3056 ixa
->ixa_cpid
= pid
;
3057 if (is_system_labeled()) {
3058 /* We need to restart with a label based on the cred */
3059 ip_xmit_attr_restore_tsl(ixa
, ixa
->ixa_cred
);
3062 /* In case previous destination was multicast or multirt */
3063 ip_attr_newdst(ixa
);
3065 /* Get a copy of conn_xmit_ipp since the TX label might change it */
3066 ipp
= kmem_zalloc(sizeof (*ipp
), KM_NOSLEEP
);
3068 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3069 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3070 ixa
->ixa_cpid
= connp
->conn_cpid
;
3072 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3076 mutex_enter(&connp
->conn_lock
);
3077 error
= ip_pkt_copy(&connp
->conn_xmit_ipp
, ipp
, KM_NOSLEEP
);
3078 mutex_exit(&connp
->conn_lock
);
3080 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3085 /* Sanity check length of packet */
3086 ipha
= (ipha_t
*)mp
->b_rptr
;
3088 ip_hdr_length
= IP_SIMPLE_HDR_LENGTH
;
3089 if ((mp
->b_wptr
- mp
->b_rptr
) < IP_SIMPLE_HDR_LENGTH
) {
3090 if (!pullupmsg(mp
, IP_SIMPLE_HDR_LENGTH
)) {
3091 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3095 ipha
= (ipha_t
*)mp
->b_rptr
;
3097 ipha
->ipha_version_and_hdr_length
=
3098 (IP_VERSION
<<4) | (ip_hdr_length
>>2);
3101 * We set IXAF_DONTFRAG if the application set DF which makes
3104 ipha
->ipha_fragment_offset_and_flags
&= htons(IPH_DF
);
3105 if (ipha
->ipha_fragment_offset_and_flags
& htons(IPH_DF
))
3106 ixa
->ixa_flags
|= (IXAF_DONTFRAG
| IXAF_PMTU_IPV4_DF
);
3108 ixa
->ixa_flags
&= ~(IXAF_DONTFRAG
| IXAF_PMTU_IPV4_DF
);
3110 /* Even for multicast and broadcast we honor the apps ttl */
3111 ixa
->ixa_flags
|= IXAF_NO_TTL_CHANGE
;
3114 * No source verification for non-local addresses
3116 if (ipha
->ipha_src
!= INADDR_ANY
&&
3117 ip_laddr_verify_v4(ipha
->ipha_src
, ixa
->ixa_zoneid
,
3118 is
->is_netstack
->netstack_ip
, B_FALSE
)
3119 != IPVL_UNICAST_UP
) {
3120 ixa
->ixa_flags
&= ~IXAF_VERIFY_SOURCE
;
3123 if (ipha
->ipha_dst
== INADDR_ANY
)
3124 ipha
->ipha_dst
= htonl(INADDR_LOOPBACK
);
3126 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_src
, &v6src
);
3127 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_dst
, &v6dst
);
3129 /* Defer IPsec if it might need to look at ICMP type/code */
3130 do_ipsec
= ipha
->ipha_protocol
!= IPPROTO_ICMP
;
3131 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
3133 ip_attr_nexthop(ipp
, ixa
, &v6dst
, &v6nexthop
);
3134 error
= ip_attr_connect(connp
, ixa
, &v6src
, &v6dst
, &v6nexthop
,
3135 connp
->conn_fport
, &v6src
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
|
3136 (do_ipsec
? IPDF_IPSEC
: 0));
3142 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3143 * Don't have the application see that errno
3145 error
= ENETUNREACH
;
3149 * Have !ipif_addr_ready address; drop packet silently
3150 * until we can get applications to not send until we
3157 if (ixa
->ixa_ire
!= NULL
) {
3159 * Let conn_ip_output/ire_send_noroute return
3160 * the error and send any local ICMP error.
3169 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3172 if (ipha
->ipha_src
== INADDR_ANY
)
3173 IN6_V4MAPPED_TO_IPADDR(&v6src
, ipha
->ipha_src
);
3176 * We might be going to a different destination than last time,
3177 * thus check that TX allows the communication and compute any
3180 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
3181 * don't have to worry about concurrent threads.
3183 if (is_system_labeled()) {
3185 * Check whether Trusted Solaris policy allows communication
3186 * with this host, and pretend that the destination is
3187 * unreachable if not.
3188 * Compute any needed label and place it in ipp_label_v4/v6.
3190 * Later conn_build_hdr_template/conn_prepend_hdr takes
3191 * ipp_label_v4/v6 to form the packet.
3193 * Tsol note: We have ipp structure local to this thread so
3194 * no locking is needed.
3196 error
= conn_update_label(connp
, ixa
, &v6dst
, ipp
);
3199 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3205 * Save away a copy of the IPv4 header the application passed down
3206 * and then prepend an IPv4 header complete with any IP options
3208 * We need a struct copy since icmp_prepend_hdr will reuse the available
3209 * space in the mblk.
3212 mp
->b_rptr
+= IP_SIMPLE_HDR_LENGTH
;
3214 mp
= icmp_prepend_hdr(connp
, ixa
, ipp
, &v6src
, &v6dst
, 0, mp
, &error
);
3216 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3220 if (ixa
->ixa_pktlen
> IP_MAXPACKET
) {
3222 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3226 /* Restore key parts of the header that the application passed down */
3227 ipha
= (ipha_t
*)mp
->b_rptr
;
3228 ipha
->ipha_type_of_service
= iphas
.ipha_type_of_service
;
3229 ipha
->ipha_ident
= iphas
.ipha_ident
;
3230 ipha
->ipha_fragment_offset_and_flags
=
3231 iphas
.ipha_fragment_offset_and_flags
;
3232 ipha
->ipha_ttl
= iphas
.ipha_ttl
;
3233 ipha
->ipha_protocol
= iphas
.ipha_protocol
;
3234 ipha
->ipha_src
= iphas
.ipha_src
;
3235 ipha
->ipha_dst
= iphas
.ipha_dst
;
3237 ixa
->ixa_protocol
= ipha
->ipha_protocol
;
3240 * Make sure that the IP header plus any transport header that is
3241 * checksumed by ip_output is in the first mblk. (ip_output assumes
3242 * that at least the checksum field is in the first mblk.)
3244 switch (ipha
->ipha_protocol
) {
3255 ip_hdr_length
= IPH_HDR_LENGTH(ipha
);
3256 if (mp
->b_wptr
- mp
->b_rptr
< ip_hdr_length
+ tp_hdr_len
) {
3257 if (!pullupmsg(mp
, ip_hdr_length
+ tp_hdr_len
)) {
3258 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3259 if (mp
->b_cont
== NULL
)
3269 /* Policy might differ for different ICMP type/code */
3270 if (ixa
->ixa_ipsec_policy
!= NULL
) {
3271 IPPOL_REFRELE(ixa
->ixa_ipsec_policy
);
3272 ixa
->ixa_ipsec_policy
= NULL
;
3273 ixa
->ixa_flags
&= ~IXAF_IPSEC_SECURE
;
3275 mp
= ip_output_attach_policy(mp
, ipha
, NULL
, connp
, ixa
);
3277 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3278 error
= EHOSTUNREACH
; /* IPsec policy failure */
3283 /* We're done. Pass the packet to ip. */
3284 BUMP_MIB(&is
->is_rawip_mib
, rawipOutDatagrams
);
3286 error
= conn_ip_output(mp
, ixa
);
3287 /* No rawipOutErrors if an error since IP increases its error counter */
3292 (void) ixa_check_drain_insert(connp
, ixa
);
3297 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3298 * Don't have the application see that errno
3300 error
= ENETUNREACH
;
3304 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3305 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3306 ixa
->ixa_cpid
= connp
->conn_cpid
;
3309 kmem_free(ipp
, sizeof (*ipp
));
3314 icmp_output_attach_policy(mblk_t
*mp
, conn_t
*connp
, ip_xmit_attr_t
*ixa
)
3316 ipha_t
*ipha
= NULL
;
3319 if (ixa
->ixa_flags
& IXAF_IS_IPV4
)
3320 ipha
= (ipha_t
*)mp
->b_rptr
;
3322 ip6h
= (ip6_t
*)mp
->b_rptr
;
3324 if (ixa
->ixa_ipsec_policy
!= NULL
) {
3325 IPPOL_REFRELE(ixa
->ixa_ipsec_policy
);
3326 ixa
->ixa_ipsec_policy
= NULL
;
3327 ixa
->ixa_flags
&= ~IXAF_IPSEC_SECURE
;
3329 return (ip_output_attach_policy(mp
, ipha
, ip6h
, connp
, ixa
));
3333 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
3334 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
3335 * the TPI options, otherwise we take them from msg_control.
3336 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
3337 * Always consumes mp; never consumes tudr_mp.
3340 icmp_output_ancillary(conn_t
*connp
, sin_t
*sin
, sin6_t
*sin6
, mblk_t
*mp
,
3341 mblk_t
*tudr_mp
, struct nmsghdr
*msg
, cred_t
*cr
, pid_t pid
)
3343 icmp_t
*icmp
= connp
->conn_icmp
;
3344 icmp_stack_t
*is
= icmp
->icmp_is
;
3346 ip_xmit_attr_t
*ixa
;
3350 in6_addr_t v6nexthop
;
3353 int is_absreq_failure
= 0;
3354 conn_opt_arg_t coas
, *coa
;
3356 ASSERT(tudr_mp
!= NULL
|| msg
!= NULL
);
3359 * Get ixa before checking state to handle a disconnect race.
3361 * We need an exclusive copy of conn_ixa since the ancillary data
3362 * options might modify it. That copy has no pointers hence we
3363 * need to set them up once we've parsed the ancillary data.
3365 ixa
= conn_get_ixa_exclusive(connp
);
3367 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3372 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3374 ixa
->ixa_cpid
= pid
;
3375 if (is_system_labeled()) {
3376 /* We need to restart with a label based on the cred */
3377 ip_xmit_attr_restore_tsl(ixa
, ixa
->ixa_cred
);
3380 /* In case previous destination was multicast or multirt */
3381 ip_attr_newdst(ixa
);
3383 /* Get a copy of conn_xmit_ipp since the options might change it */
3384 ipp
= kmem_zalloc(sizeof (*ipp
), KM_NOSLEEP
);
3386 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3387 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3388 ixa
->ixa_cpid
= connp
->conn_cpid
;
3390 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3394 mutex_enter(&connp
->conn_lock
);
3395 error
= ip_pkt_copy(&connp
->conn_xmit_ipp
, ipp
, KM_NOSLEEP
);
3396 mutex_exit(&connp
->conn_lock
);
3398 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3404 * Parse the options and update ixa and ipp as a result.
3408 coa
->coa_connp
= connp
;
3411 coa
->coa_ancillary
= B_TRUE
;
3412 coa
->coa_changed
= 0;
3415 error
= process_auxiliary_options(connp
, msg
->msg_control
,
3416 msg
->msg_controllen
, coa
, &icmp_opt_obj
, icmp_opt_set
, cr
);
3418 struct T_unitdata_req
*tudr
;
3420 tudr
= (struct T_unitdata_req
*)tudr_mp
->b_rptr
;
3421 ASSERT(tudr
->PRIM_type
== T_UNITDATA_REQ
);
3422 error
= tpi_optcom_buf(connp
->conn_wq
, tudr_mp
,
3423 &tudr
->OPT_length
, tudr
->OPT_offset
, cr
, &icmp_opt_obj
,
3424 coa
, &is_absreq_failure
);
3428 * Note: No special action needed in this
3429 * module for "is_absreq_failure"
3432 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3435 ASSERT(is_absreq_failure
== 0);
3437 mutex_enter(&connp
->conn_lock
);
3439 * If laddr is unspecified then we look at sin6_src_id.
3440 * We will give precedence to a source address set with IPV6_PKTINFO
3441 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3442 * want ip_attr_connect to select a source (since it can fail) when
3443 * IPV6_PKTINFO is specified.
3444 * If this doesn't result in a source address then we get a source
3445 * from ip_attr_connect() below.
3447 v6src
= connp
->conn_saddr_v6
;
3449 IN6_IPADDR_TO_V4MAPPED(sin
->sin_addr
.s_addr
, &v6dst
);
3450 dstport
= sin
->sin_port
;
3452 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
3453 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
3454 } else if (sin6
!= NULL
) {
3458 v6dst
= sin6
->sin6_addr
;
3459 dstport
= sin6
->sin6_port
;
3460 flowinfo
= sin6
->sin6_flowinfo
;
3461 srcid
= sin6
->__sin6_src_id
;
3462 if (IN6_IS_ADDR_LINKSCOPE(&v6dst
) && sin6
->sin6_scope_id
!= 0) {
3463 ixa
->ixa_scopeid
= sin6
->sin6_scope_id
;
3464 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
3466 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
3468 v4mapped
= IN6_IS_ADDR_V4MAPPED(&v6dst
);
3470 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
3472 ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
3473 if (srcid
!= 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
3474 if (!ip_srcid_find_id(srcid
, &v6src
, IPCL_ZONEID(connp
),
3475 v4mapped
, connp
->conn_netstack
)) {
3476 /* Mismatched v4mapped/v6 specified by srcid. */
3477 mutex_exit(&connp
->conn_lock
);
3478 error
= EADDRNOTAVAIL
;
3479 goto failed
; /* Does freemsg() and mib. */
3483 /* Connected case */
3484 dstport
= connp
->conn_fport
;
3485 v6dst
= connp
->conn_faddr_v6
;
3486 flowinfo
= connp
->conn_flowinfo
;
3488 mutex_exit(&connp
->conn_lock
);
3489 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3490 if (ipp
->ipp_fields
& IPPF_ADDR
) {
3491 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
3492 if (IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
3493 v6src
= ipp
->ipp_addr
;
3495 if (!IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
3496 v6src
= ipp
->ipp_addr
;
3500 * Allow source not assigned to the system
3501 * only if it is not a local addresses
3503 if (!V6_OR_V4_INADDR_ANY(v6src
)) {
3504 ip_laddr_t laddr_type
;
3506 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
3509 IN6_V4MAPPED_TO_IPADDR(&v6src
, v4src
);
3510 laddr_type
= ip_laddr_verify_v4(v4src
, ixa
->ixa_zoneid
,
3511 is
->is_netstack
->netstack_ip
, B_FALSE
);
3513 laddr_type
= ip_laddr_verify_v6(&v6src
, ixa
->ixa_zoneid
,
3514 is
->is_netstack
->netstack_ip
, B_FALSE
, B_FALSE
);
3516 if (laddr_type
!= IPVL_UNICAST_UP
)
3517 ixa
->ixa_flags
&= ~IXAF_VERIFY_SOURCE
;
3520 ip_attr_nexthop(ipp
, ixa
, &v6dst
, &v6nexthop
);
3521 error
= ip_attr_connect(connp
, ixa
, &v6src
, &v6dst
, &v6nexthop
, dstport
,
3522 &v6src
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
);
3529 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3530 * Don't have the application see that errno
3532 error
= ENETUNREACH
;
3536 * Have !ipif_addr_ready address; drop packet silently
3537 * until we can get applications to not send until we
3544 if (ixa
->ixa_ire
!= NULL
) {
3546 * Let conn_ip_output/ire_send_noroute return
3547 * the error and send any local ICMP error.
3556 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3561 * We might be going to a different destination than last time,
3562 * thus check that TX allows the communication and compute any
3565 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
3566 * don't have to worry about concurrent threads.
3568 if (is_system_labeled()) {
3570 * Check whether Trusted Solaris policy allows communication
3571 * with this host, and pretend that the destination is
3572 * unreachable if not.
3573 * Compute any needed label and place it in ipp_label_v4/v6.
3575 * Later conn_build_hdr_template/conn_prepend_hdr takes
3576 * ipp_label_v4/v6 to form the packet.
3578 * Tsol note: We have ipp structure local to this thread so
3579 * no locking is needed.
3581 error
= conn_update_label(connp
, ixa
, &v6dst
, ipp
);
3584 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3588 mp
= icmp_prepend_hdr(connp
, ixa
, ipp
, &v6src
, &v6dst
, flowinfo
, mp
,
3591 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3595 if (ixa
->ixa_pktlen
> IP_MAXPACKET
) {
3597 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3602 /* Policy might differ for different ICMP type/code */
3603 mp
= icmp_output_attach_policy(mp
, connp
, ixa
);
3605 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3606 error
= EHOSTUNREACH
; /* IPsec policy failure */
3610 /* We're done. Pass the packet to ip. */
3611 BUMP_MIB(&is
->is_rawip_mib
, rawipOutDatagrams
);
3613 error
= conn_ip_output(mp
, ixa
);
3614 if (!connp
->conn_unspec_src
)
3615 ixa
->ixa_flags
|= IXAF_VERIFY_SOURCE
;
3616 /* No rawipOutErrors if an error since IP increases its error counter */
3621 (void) ixa_check_drain_insert(connp
, ixa
);
3626 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3627 * Don't have the application see that errno
3629 error
= ENETUNREACH
;
3632 mutex_enter(&connp
->conn_lock
);
3634 * Clear the source and v6lastdst so we call ip_attr_connect
3635 * for the next packet and try to pick a better source.
3637 if (connp
->conn_mcbc_bind
)
3638 connp
->conn_saddr_v6
= ipv6_all_zeros
;
3640 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
3641 connp
->conn_v6lastdst
= ipv6_all_zeros
;
3642 mutex_exit(&connp
->conn_lock
);
3646 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3647 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3648 ixa
->ixa_cpid
= connp
->conn_cpid
;
3651 kmem_free(ipp
, sizeof (*ipp
));
3656 * Handle sending an M_DATA for a connected socket.
3657 * Handles both IPv4 and IPv6.
3660 icmp_output_connected(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
, pid_t pid
)
3662 icmp_t
*icmp
= connp
->conn_icmp
;
3663 icmp_stack_t
*is
= icmp
->icmp_is
;
3665 ip_xmit_attr_t
*ixa
;
3669 * If no other thread is using conn_ixa this just gets a reference to
3670 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
3672 ixa
= conn_get_ixa(connp
, B_FALSE
);
3674 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3680 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3682 ixa
->ixa_cpid
= pid
;
3684 /* Defer IPsec if it might need to look at ICMP type/code */
3685 switch (ixa
->ixa_protocol
) {
3687 case IPPROTO_ICMPV6
:
3694 mutex_enter(&connp
->conn_lock
);
3695 mp
= icmp_prepend_header_template(connp
, ixa
, mp
,
3696 &connp
->conn_saddr_v6
, connp
->conn_flowinfo
, &error
);
3700 mutex_exit(&connp
->conn_lock
);
3701 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3702 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3703 ixa
->ixa_cpid
= connp
->conn_cpid
;
3705 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3711 /* Policy might differ for different ICMP type/code */
3712 mp
= icmp_output_attach_policy(mp
, connp
, ixa
);
3714 mutex_exit(&connp
->conn_lock
);
3715 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3716 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3717 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3718 ixa
->ixa_cpid
= connp
->conn_cpid
;
3720 return (EHOSTUNREACH
); /* IPsec policy failure */
3725 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3726 * safe copy, then we need to fill in any pointers in it.
3728 if (ixa
->ixa_ire
== NULL
) {
3729 in6_addr_t faddr
, saddr
;
3733 saddr
= connp
->conn_saddr_v6
;
3734 faddr
= connp
->conn_faddr_v6
;
3735 fport
= connp
->conn_fport
;
3736 ip_attr_nexthop(&connp
->conn_xmit_ipp
, ixa
, &faddr
, &nexthop
);
3737 mutex_exit(&connp
->conn_lock
);
3739 error
= ip_attr_connect(connp
, ixa
, &saddr
, &faddr
, &nexthop
,
3740 fport
, NULL
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
|
3741 (do_ipsec
? IPDF_IPSEC
: 0));
3747 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3748 * Don't have the application see that errno
3750 error
= ENETUNREACH
;
3754 * Have !ipif_addr_ready address; drop packet silently
3755 * until we can get applications to not send until we
3762 if (ixa
->ixa_ire
!= NULL
) {
3764 * Let conn_ip_output/ire_send_noroute return
3765 * the error and send any local ICMP error.
3773 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3774 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3775 ixa
->ixa_cpid
= connp
->conn_cpid
;
3777 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3782 /* Done with conn_t */
3783 mutex_exit(&connp
->conn_lock
);
3786 /* We're done. Pass the packet to ip. */
3787 BUMP_MIB(&is
->is_rawip_mib
, rawipOutDatagrams
);
3789 error
= conn_ip_output(mp
, ixa
);
3790 /* No rawipOutErrors if an error since IP increases its error counter */
3795 (void) ixa_check_drain_insert(connp
, ixa
);
3800 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3801 * Don't have the application see that errno
3803 error
= ENETUNREACH
;
3806 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3807 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3808 ixa
->ixa_cpid
= connp
->conn_cpid
;
3814 * Handle sending an M_DATA to the last destination.
3815 * Handles both IPv4 and IPv6.
3817 * NOTE: The caller must hold conn_lock and we drop it here.
3820 icmp_output_lastdst(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
, pid_t pid
,
3821 ip_xmit_attr_t
*ixa
)
3823 icmp_t
*icmp
= connp
->conn_icmp
;
3824 icmp_stack_t
*is
= icmp
->icmp_is
;
3828 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
3829 ASSERT(ixa
!= NULL
);
3832 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3834 ixa
->ixa_cpid
= pid
;
3836 /* Defer IPsec if it might need to look at ICMP type/code */
3837 switch (ixa
->ixa_protocol
) {
3839 case IPPROTO_ICMPV6
:
3847 mp
= icmp_prepend_header_template(connp
, ixa
, mp
,
3848 &connp
->conn_v6lastsrc
, connp
->conn_lastflowinfo
, &error
);
3852 mutex_exit(&connp
->conn_lock
);
3853 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3854 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3855 ixa
->ixa_cpid
= connp
->conn_cpid
;
3857 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3863 /* Policy might differ for different ICMP type/code */
3864 mp
= icmp_output_attach_policy(mp
, connp
, ixa
);
3866 mutex_exit(&connp
->conn_lock
);
3867 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3868 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3869 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3870 ixa
->ixa_cpid
= connp
->conn_cpid
;
3872 return (EHOSTUNREACH
); /* IPsec policy failure */
3877 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
3878 * safe copy, then we need to fill in any pointers in it.
3880 if (ixa
->ixa_ire
== NULL
) {
3881 in6_addr_t lastdst
, lastsrc
;
3885 lastsrc
= connp
->conn_v6lastsrc
;
3886 lastdst
= connp
->conn_v6lastdst
;
3887 lastport
= connp
->conn_lastdstport
;
3888 ip_attr_nexthop(&connp
->conn_xmit_ipp
, ixa
, &lastdst
, &nexthop
);
3889 mutex_exit(&connp
->conn_lock
);
3891 error
= ip_attr_connect(connp
, ixa
, &lastsrc
, &lastdst
,
3892 &nexthop
, lastport
, NULL
, NULL
, IPDF_ALLOW_MCBC
|
3893 IPDF_VERIFY_DST
| (do_ipsec
? IPDF_IPSEC
: 0));
3899 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3900 * Don't have the application see that errno
3902 error
= ENETUNREACH
;
3906 * Have !ipif_addr_ready address; drop packet silently
3907 * until we can get applications to not send until we
3914 if (ixa
->ixa_ire
!= NULL
) {
3916 * Let conn_ip_output/ire_send_noroute return
3917 * the error and send any local ICMP error.
3925 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3926 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3927 ixa
->ixa_cpid
= connp
->conn_cpid
;
3929 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
3934 /* Done with conn_t */
3935 mutex_exit(&connp
->conn_lock
);
3938 /* We're done. Pass the packet to ip. */
3939 BUMP_MIB(&is
->is_rawip_mib
, rawipOutDatagrams
);
3940 error
= conn_ip_output(mp
, ixa
);
3941 /* No rawipOutErrors if an error since IP increases its error counter */
3946 (void) ixa_check_drain_insert(connp
, ixa
);
3951 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3952 * Don't have the application see that errno
3954 error
= ENETUNREACH
;
3957 mutex_enter(&connp
->conn_lock
);
3959 * Clear the source and v6lastdst so we call ip_attr_connect
3960 * for the next packet and try to pick a better source.
3962 if (connp
->conn_mcbc_bind
)
3963 connp
->conn_saddr_v6
= ipv6_all_zeros
;
3965 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
3966 connp
->conn_v6lastdst
= ipv6_all_zeros
;
3967 mutex_exit(&connp
->conn_lock
);
3970 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3971 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3972 ixa
->ixa_cpid
= connp
->conn_cpid
;
3979 * Prepend the header template and then fill in the source and
3980 * flowinfo. The caller needs to handle the destination address since
3981 * it's setting is different if rthdr or source route.
3983 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3984 * When it returns NULL it sets errorp.
3987 icmp_prepend_header_template(conn_t
*connp
, ip_xmit_attr_t
*ixa
, mblk_t
*mp
,
3988 const in6_addr_t
*v6src
, uint32_t flowinfo
, int *errorp
)
3990 icmp_t
*icmp
= connp
->conn_icmp
;
3991 icmp_stack_t
*is
= icmp
->icmp_is
;
3995 uint_t ip_hdr_length
;
3999 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
4002 * Copy the header template.
4004 copylen
= connp
->conn_ht_iphc_len
;
4005 pktlen
= copylen
+ msgdsize(mp
);
4006 if (pktlen
> IP_MAXPACKET
) {
4011 ixa
->ixa_pktlen
= pktlen
;
4013 /* check/fix buffer config, setup pointers into it */
4014 iph
= mp
->b_rptr
- copylen
;
4015 if (DB_REF(mp
) != 1 || iph
< DB_BASE(mp
) || !OK_32PTR(iph
)) {
4018 mp1
= allocb(copylen
+ is
->is_wroff_extra
, BPRI_MED
);
4024 mp1
->b_wptr
= DB_LIM(mp1
);
4027 iph
= (mp
->b_wptr
- copylen
);
4030 bcopy(connp
->conn_ht_iphc
, iph
, copylen
);
4031 ip_hdr_length
= (uint_t
)(connp
->conn_ht_ulp
- connp
->conn_ht_iphc
);
4033 ixa
->ixa_ip_hdr_length
= ip_hdr_length
;
4036 * Prepare for ICMPv6 checksum done in IP.
4038 * icmp_build_hdr_template has already massaged any routing header
4039 * and placed the result in conn_sum.
4041 * We make it easy for IP to include our pseudo header
4042 * by putting our length (and any routing header adjustment)
4043 * in the ICMPv6 checksum field.
4045 cksum
= pktlen
- ip_hdr_length
;
4047 cksum
+= connp
->conn_sum
;
4048 cksum
= (cksum
>> 16) + (cksum
& 0xFFFF);
4049 ASSERT(cksum
< 0x10000);
4051 ipp
= &connp
->conn_xmit_ipp
;
4052 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
4053 ipha_t
*ipha
= (ipha_t
*)iph
;
4055 ipha
->ipha_length
= htons((uint16_t)pktlen
);
4057 /* if IP_PKTINFO specified an addres it wins over bind() */
4058 if ((ipp
->ipp_fields
& IPPF_ADDR
) &&
4059 IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
)) {
4060 ASSERT(ipp
->ipp_addr_v4
!= INADDR_ANY
);
4061 ipha
->ipha_src
= ipp
->ipp_addr_v4
;
4063 IN6_V4MAPPED_TO_IPADDR(v6src
, ipha
->ipha_src
);
4066 ip6_t
*ip6h
= (ip6_t
*)iph
;
4067 uint_t cksum_offset
= 0;
4069 ip6h
->ip6_plen
= htons((uint16_t)(pktlen
- IPV6_HDR_LEN
));
4071 /* if IP_PKTINFO specified an addres it wins over bind() */
4072 if ((ipp
->ipp_fields
& IPPF_ADDR
) &&
4073 !IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
)) {
4074 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp
->ipp_addr
));
4075 ip6h
->ip6_src
= ipp
->ipp_addr
;
4077 ip6h
->ip6_src
= *v6src
;
4080 (IPV6_DEFAULT_VERS_AND_FLOW
& IPV6_VERS_AND_FLOW_MASK
) |
4081 (flowinfo
& ~IPV6_VERS_AND_FLOW_MASK
);
4082 if (ipp
->ipp_fields
& IPPF_TCLASS
) {
4083 /* Overrides the class part of flowinfo */
4084 ip6h
->ip6_vcf
= IPV6_TCLASS_FLOW(ip6h
->ip6_vcf
,
4088 if (ixa
->ixa_flags
& IXAF_SET_ULP_CKSUM
) {
4089 if (connp
->conn_proto
== IPPROTO_ICMPV6
) {
4090 cksum_offset
= ixa
->ixa_ip_hdr_length
+
4091 offsetof(icmp6_t
, icmp6_cksum
);
4092 } else if (ixa
->ixa_flags
& IXAF_SET_RAW_CKSUM
) {
4093 cksum_offset
= ixa
->ixa_ip_hdr_length
+
4094 ixa
->ixa_raw_cksum_offset
;
4097 if (cksum_offset
!= 0) {
4100 /* Make sure the checksum fits in the first mblk */
4101 if (cksum_offset
+ sizeof (short) > MBLKL(mp
)) {
4105 cksum_offset
+ sizeof (short));
4113 ip6h
= (ip6_t
*)iph
;
4115 ptr
= (uint16_t *)(mp
->b_rptr
+ cksum_offset
);
4116 *ptr
= htons(cksum
);
4124 * This routine handles all messages passed downstream. It either
4125 * consumes the message or passes it downstream; it never queues a
4129 icmp_wput(queue_t
*q
, mblk_t
*mp
)
4134 conn_t
*connp
= Q_TO_CONN(q
);
4135 icmp_t
*icmp
= connp
->conn_icmp
;
4137 struct sockaddr
*addr
= NULL
;
4139 icmp_stack_t
*is
= icmp
->icmp_is
;
4140 struct T_unitdata_req
*tudr
;
4146 * We directly handle several cases here: T_UNITDATA_REQ message
4147 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
4150 switch (DB_TYPE(mp
)) {
4152 /* sockfs never sends down M_DATA */
4153 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
4159 tudr
= (struct T_unitdata_req
*)mp
->b_rptr
;
4160 if (MBLKL(mp
) < sizeof (*tudr
) ||
4161 ((t_primp_t
)mp
->b_rptr
)->type
!= T_UNITDATA_REQ
) {
4162 icmp_wput_other(q
, mp
);
4168 icmp_wput_other(q
, mp
);
4172 /* Handle valid T_UNITDATA_REQ here */
4173 data_mp
= mp
->b_cont
;
4174 if (data_mp
== NULL
) {
4180 if (!MBLKIN(mp
, 0, tudr
->DEST_offset
+ tudr
->DEST_length
)) {
4181 error
= EADDRNOTAVAIL
;
4186 * All Solaris components should pass a db_credp
4187 * for this message, hence we ASSERT.
4188 * On production kernels we return an error to be robust against
4189 * random streams modules sitting on top of us.
4191 cr
= msg_getcred(mp
, &pid
);
4199 * If a port has not been bound to the stream, fail.
4200 * This is not a problem when sockfs is directly
4201 * above us, because it will ensure that the socket
4202 * is first bound before allowing data to be sent.
4204 if (icmp
->icmp_state
== TS_UNBND
) {
4208 addr
= (struct sockaddr
*)&mp
->b_rptr
[tudr
->DEST_offset
];
4209 addrlen
= tudr
->DEST_length
;
4211 switch (connp
->conn_family
) {
4213 sin6
= (sin6_t
*)addr
;
4214 if (!OK_32PTR((char *)sin6
) || (addrlen
!= sizeof (sin6_t
)) ||
4215 (sin6
->sin6_family
!= AF_INET6
)) {
4216 error
= EADDRNOTAVAIL
;
4220 /* No support for mapped addresses on raw sockets */
4221 if (IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
4222 error
= EADDRNOTAVAIL
;
4225 srcid
= sin6
->__sin6_src_id
;
4228 * If the local address is a mapped address return
4230 * It would be possible to send an IPv6 packet but the
4231 * response would never make it back to the application
4232 * since it is bound to a mapped address.
4234 if (IN6_IS_ADDR_V4MAPPED(&connp
->conn_saddr_v6
)) {
4235 error
= EADDRNOTAVAIL
;
4239 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
))
4240 sin6
->sin6_addr
= ipv6_loopback
;
4242 if (tudr
->OPT_length
!= 0) {
4244 * If we are connected then the destination needs to be
4245 * the same as the connected one.
4247 if (icmp
->icmp_state
== TS_DATA_XFER
&&
4248 !conn_same_as_last_v6(connp
, sin6
)) {
4252 error
= icmp_output_ancillary(connp
, NULL
, sin6
,
4253 data_mp
, mp
, NULL
, cr
, pid
);
4255 ip_xmit_attr_t
*ixa
;
4258 * We have to allocate an ip_xmit_attr_t before we grab
4259 * conn_lock and we need to hold conn_lock once we've
4260 * checked conn_same_as_last_v6 to handle concurrent
4261 * send* calls on a socket.
4263 ixa
= conn_get_ixa(connp
, B_FALSE
);
4268 mutex_enter(&connp
->conn_lock
);
4270 if (conn_same_as_last_v6(connp
, sin6
) &&
4271 connp
->conn_lastsrcid
== srcid
&&
4272 ipsec_outbound_policy_current(ixa
)) {
4273 /* icmp_output_lastdst drops conn_lock */
4274 error
= icmp_output_lastdst(connp
, data_mp
, cr
,
4277 /* icmp_output_newdst drops conn_lock */
4278 error
= icmp_output_newdst(connp
, data_mp
, NULL
,
4279 sin6
, cr
, pid
, ixa
);
4281 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
4290 sin
= (sin_t
*)addr
;
4291 if ((!OK_32PTR((char *)sin
) || addrlen
!= sizeof (sin_t
)) ||
4292 (sin
->sin_family
!= AF_INET
)) {
4293 error
= EADDRNOTAVAIL
;
4296 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
4297 sin
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
4299 /* Protocol 255 contains full IP headers */
4300 /* Read without holding lock */
4301 if (icmp
->icmp_hdrincl
) {
4302 if (MBLKL(data_mp
) < IP_SIMPLE_HDR_LENGTH
) {
4303 if (!pullupmsg(data_mp
, IP_SIMPLE_HDR_LENGTH
)) {
4308 error
= icmp_output_hdrincl(connp
, data_mp
, cr
, pid
);
4313 /* data_mp consumed above */
4318 if (tudr
->OPT_length
!= 0) {
4320 * If we are connected then the destination needs to be
4321 * the same as the connected one.
4323 if (icmp
->icmp_state
== TS_DATA_XFER
&&
4324 !conn_same_as_last_v4(connp
, sin
)) {
4328 error
= icmp_output_ancillary(connp
, sin
, NULL
,
4329 data_mp
, mp
, NULL
, cr
, pid
);
4331 ip_xmit_attr_t
*ixa
;
4334 * We have to allocate an ip_xmit_attr_t before we grab
4335 * conn_lock and we need to hold conn_lock once we've
4336 * checked conn_same_as_last_v4 to handle concurrent
4337 * send* calls on a socket.
4339 ixa
= conn_get_ixa(connp
, B_FALSE
);
4344 mutex_enter(&connp
->conn_lock
);
4346 if (conn_same_as_last_v4(connp
, sin
) &&
4347 ipsec_outbound_policy_current(ixa
)) {
4348 /* icmp_output_lastdst drops conn_lock */
4349 error
= icmp_output_lastdst(connp
, data_mp
, cr
,
4352 /* icmp_output_newdst drops conn_lock */
4353 error
= icmp_output_newdst(connp
, data_mp
, sin
,
4354 NULL
, cr
, pid
, ixa
);
4356 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
4365 /* mp is freed by the following routine */
4366 icmp_ud_err(q
, mp
, (t_scalar_t
)error
);
4370 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
4373 /* mp is freed by the following routine */
4374 icmp_ud_err(q
, mp
, (t_scalar_t
)error
);
4379 * Handle the case of the IP address or flow label being different
4380 * for both IPv4 and IPv6.
4382 * NOTE: The caller must hold conn_lock and we drop it here.
4385 icmp_output_newdst(conn_t
*connp
, mblk_t
*data_mp
, sin_t
*sin
, sin6_t
*sin6
,
4386 cred_t
*cr
, pid_t pid
, ip_xmit_attr_t
*ixa
)
4388 icmp_t
*icmp
= connp
->conn_icmp
;
4389 icmp_stack_t
*is
= icmp
->icmp_is
;
4391 ip_xmit_attr_t
*oldixa
;
4397 in6_addr_t v6nexthop
;
4400 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
4401 ASSERT(ixa
!= NULL
);
4404 * We hold conn_lock across all the use and modifications of
4405 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
4410 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
4412 ixa
->ixa_cpid
= pid
;
4413 if (is_system_labeled()) {
4414 /* We need to restart with a label based on the cred */
4415 ip_xmit_attr_restore_tsl(ixa
, ixa
->ixa_cred
);
4418 * If we are connected then the destination needs to be the
4419 * same as the connected one, which is not the case here since we
4420 * checked for that above.
4422 if (icmp
->icmp_state
== TS_DATA_XFER
) {
4423 mutex_exit(&connp
->conn_lock
);
4429 * Before we modify the ixa at all, invalidate our most recent address
4430 * to assure that any subsequent call to conn_same_as_last_v6() will
4431 * not indicate a match: any thread that picks up conn_lock after we
4432 * drop it (but before we pick it up again and properly set the most
4433 * recent address) must not associate the ixa with the (now old) last
4436 connp
->conn_v6lastdst
= ipv6_all_zeros
;
4438 /* In case previous destination was multicast or multirt */
4439 ip_attr_newdst(ixa
);
4442 * If laddr is unspecified then we look at sin6_src_id.
4443 * We will give precedence to a source address set with IPV6_PKTINFO
4444 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
4445 * want ip_attr_connect to select a source (since it can fail) when
4446 * IPV6_PKTINFO is specified.
4447 * If this doesn't result in a source address then we get a source
4448 * from ip_attr_connect() below.
4450 v6src
= connp
->conn_saddr_v6
;
4452 IN6_IPADDR_TO_V4MAPPED(sin
->sin_addr
.s_addr
, &v6dst
);
4453 dstport
= sin
->sin_port
;
4455 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */
4457 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
4458 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
4462 v6dst
= sin6
->sin6_addr
;
4463 dstport
= sin6
->sin6_port
;
4464 flowinfo
= sin6
->sin6_flowinfo
;
4465 srcid
= sin6
->__sin6_src_id
;
4466 if (IN6_IS_ADDR_LINKSCOPE(&v6dst
) && sin6
->sin6_scope_id
!= 0) {
4467 ixa
->ixa_scopeid
= sin6
->sin6_scope_id
;
4468 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
4470 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
4472 v4mapped
= IN6_IS_ADDR_V4MAPPED(&v6dst
);
4474 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
4476 ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
4477 if (srcid
!= 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
4478 if (!ip_srcid_find_id(srcid
, &v6src
, IPCL_ZONEID(connp
),
4479 v4mapped
, connp
->conn_netstack
)) {
4480 /* Mismatched v4mapped/v6 specified by srcid. */
4481 mutex_exit(&connp
->conn_lock
);
4482 error
= EADDRNOTAVAIL
;
4487 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
4488 if (connp
->conn_xmit_ipp
.ipp_fields
& IPPF_ADDR
) {
4489 ip_pkt_t
*ipp
= &connp
->conn_xmit_ipp
;
4491 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
4492 if (IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
4493 v6src
= ipp
->ipp_addr
;
4495 if (!IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
4496 v6src
= ipp
->ipp_addr
;
4500 /* Defer IPsec if it might need to look at ICMP type/code */
4501 switch (ixa
->ixa_protocol
) {
4503 case IPPROTO_ICMPV6
:
4510 ip_attr_nexthop(&connp
->conn_xmit_ipp
, ixa
, &v6dst
, &v6nexthop
);
4511 mutex_exit(&connp
->conn_lock
);
4513 error
= ip_attr_connect(connp
, ixa
, &v6src
, &v6dst
, &v6nexthop
, dstport
,
4514 &v6src
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
|
4515 (do_ipsec
? IPDF_IPSEC
: 0));
4521 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4522 * Don't have the application see that errno
4524 error
= ENETUNREACH
;
4528 * Have !ipif_addr_ready address; drop packet silently
4529 * until we can get applications to not send until we
4536 if (ixa
->ixa_ire
!= NULL
) {
4538 * Let conn_ip_output/ire_send_noroute return
4539 * the error and send any local ICMP error.
4550 mutex_enter(&connp
->conn_lock
);
4552 * While we dropped the lock some other thread might have connected
4553 * this socket. If so we bail out with EISCONN to ensure that the
4554 * connecting thread is the one that updates conn_ixa, conn_ht_*
4557 if (icmp
->icmp_state
== TS_DATA_XFER
) {
4558 mutex_exit(&connp
->conn_lock
);
4564 * We need to rebuild the headers if
4565 * - we are labeling packets (could be different for different
4567 * - we have a source route (or routing header) since we need to
4568 * massage that to get the pseudo-header checksum
4569 * - a socket option with COA_HEADER_CHANGED has been set which
4570 * set conn_v6lastdst to zero.
4572 * Otherwise the prepend function will just update the src, dst,
4575 if (is_system_labeled()) {
4576 /* TX MLP requires SCM_UCRED and don't have that here */
4577 if (connp
->conn_mlp_type
!= mlptSingle
) {
4578 mutex_exit(&connp
->conn_lock
);
4579 error
= ECONNREFUSED
;
4583 * Check whether Trusted Solaris policy allows communication
4584 * with this host, and pretend that the destination is
4585 * unreachable if not.
4586 * Compute any needed label and place it in ipp_label_v4/v6.
4588 * Later conn_build_hdr_template/conn_prepend_hdr takes
4589 * ipp_label_v4/v6 to form the packet.
4591 * Tsol note: Since we hold conn_lock we know no other
4592 * thread manipulates conn_xmit_ipp.
4594 error
= conn_update_label(connp
, ixa
, &v6dst
,
4595 &connp
->conn_xmit_ipp
);
4597 mutex_exit(&connp
->conn_lock
);
4600 /* Rebuild the header template */
4601 error
= icmp_build_hdr_template(connp
, &v6src
, &v6dst
,
4604 mutex_exit(&connp
->conn_lock
);
4607 } else if (connp
->conn_xmit_ipp
.ipp_fields
&
4608 (IPPF_IPV4_OPTIONS
|IPPF_RTHDR
) ||
4609 IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_v6lastdst
)) {
4610 /* Rebuild the header template */
4611 error
= icmp_build_hdr_template(connp
, &v6src
, &v6dst
,
4614 mutex_exit(&connp
->conn_lock
);
4618 /* Simply update the destination address if no source route */
4619 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
4620 ipha_t
*ipha
= (ipha_t
*)connp
->conn_ht_iphc
;
4622 IN6_V4MAPPED_TO_IPADDR(&v6dst
, ipha
->ipha_dst
);
4623 if (ixa
->ixa_flags
& IXAF_PMTU_IPV4_DF
) {
4624 ipha
->ipha_fragment_offset_and_flags
|=
4627 ipha
->ipha_fragment_offset_and_flags
&=
4631 ip6_t
*ip6h
= (ip6_t
*)connp
->conn_ht_iphc
;
4632 ip6h
->ip6_dst
= v6dst
;
4637 * Remember the dst etc which corresponds to the built header
4638 * template and conn_ixa.
4640 oldixa
= conn_replace_ixa(connp
, ixa
);
4641 connp
->conn_v6lastdst
= v6dst
;
4642 connp
->conn_lastflowinfo
= flowinfo
;
4643 connp
->conn_lastscopeid
= ixa
->ixa_scopeid
;
4644 connp
->conn_lastsrcid
= srcid
;
4645 /* Also remember a source to use together with lastdst */
4646 connp
->conn_v6lastsrc
= v6src
;
4648 data_mp
= icmp_prepend_header_template(connp
, ixa
, data_mp
, &v6src
,
4651 /* Done with conn_t */
4652 mutex_exit(&connp
->conn_lock
);
4653 ixa_refrele(oldixa
);
4655 if (data_mp
== NULL
) {
4661 /* Policy might differ for different ICMP type/code */
4662 data_mp
= icmp_output_attach_policy(data_mp
, connp
, ixa
);
4663 if (data_mp
== NULL
) {
4664 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
4665 error
= EHOSTUNREACH
; /* IPsec policy failure */
4670 /* We're done. Pass the packet to ip. */
4671 BUMP_MIB(&is
->is_rawip_mib
, rawipOutDatagrams
);
4673 error
= conn_ip_output(data_mp
, ixa
);
4674 /* No rawipOutErrors if an error since IP increases its error counter */
4679 (void) ixa_check_drain_insert(connp
, ixa
);
4684 * IXAF_VERIFY_SOURCE tells us to pick a better source.
4685 * Don't have the application see that errno
4687 error
= ENETUNREACH
;
4690 mutex_enter(&connp
->conn_lock
);
4692 * Clear the source and v6lastdst so we call ip_attr_connect
4693 * for the next packet and try to pick a better source.
4695 if (connp
->conn_mcbc_bind
)
4696 connp
->conn_saddr_v6
= ipv6_all_zeros
;
4698 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
4699 connp
->conn_v6lastdst
= ipv6_all_zeros
;
4700 mutex_exit(&connp
->conn_lock
);
4704 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
4705 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
4706 ixa
->ixa_cpid
= connp
->conn_cpid
;
4711 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
4712 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
4713 ixa
->ixa_cpid
= connp
->conn_cpid
;
4716 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
4723 icmp_wput_fallback(queue_t
*q
, mblk_t
*mp
)
4726 cmn_err(CE_CONT
, "icmp_wput_fallback: Message during fallback \n");
4733 icmp_wput_other(queue_t
*q
, mblk_t
*mp
)
4735 uchar_t
*rptr
= mp
->b_rptr
;
4736 struct iocblk
*iocp
;
4737 conn_t
*connp
= Q_TO_CONN(q
);
4738 icmp_t
*icmp
= connp
->conn_icmp
;
4741 switch (mp
->b_datap
->db_type
) {
4744 if (mp
->b_wptr
- rptr
< sizeof (t_scalar_t
)) {
4746 * If the message does not contain a PRIM_type,
4752 switch (((t_primp_t
)rptr
)->type
) {
4754 icmp_addr_req(q
, mp
);
4758 icmp_tpi_bind(q
, mp
);
4761 icmp_tpi_connect(q
, mp
);
4763 case T_CAPABILITY_REQ
:
4764 icmp_capability_req(q
, mp
);
4767 icmp_info_req(q
, mp
);
4769 case T_UNITDATA_REQ
:
4771 * If a T_UNITDATA_REQ gets here, the address must
4772 * be bad. Valid T_UNITDATA_REQs are handled
4775 icmp_ud_err(q
, mp
, EADDRNOTAVAIL
);
4778 icmp_tpi_unbind(q
, mp
);
4780 case T_SVR4_OPTMGMT_REQ
:
4782 * All Solaris components should pass a db_credp
4783 * for this TPI message, hence we ASSERT.
4784 * But in case there is some other M_PROTO that looks
4785 * like a TPI message sent by some other kernel
4786 * component, we check and return an error.
4788 cr
= msg_getcred(mp
, NULL
);
4791 icmp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
4795 if (!snmpcom_req(q
, mp
, icmp_snmp_set
, ip_snmp_get
,
4797 svr4_optcom_req(q
, mp
, cr
, &icmp_opt_obj
);
4803 * All Solaris components should pass a db_credp
4804 * for this TPI message, hence we ASSERT.
4805 * But in case there is some other M_PROTO that looks
4806 * like a TPI message sent by some other kernel
4807 * component, we check and return an error.
4809 cr
= msg_getcred(mp
, NULL
);
4812 icmp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
4815 tpi_optcom_req(q
, mp
, cr
, &icmp_opt_obj
);
4819 icmp_tpi_disconnect(q
, mp
);
4822 /* The following TPI message is not supported by icmp. */
4825 icmp_err_ack(q
, mp
, TNOTSUPPORT
, 0);
4828 /* The following 3 TPI requests are illegal for icmp. */
4832 icmp_err_ack(q
, mp
, TNOTSUPPORT
, 0);
4840 flushq(q
, FLUSHDATA
);
4843 iocp
= (struct iocblk
*)mp
->b_rptr
;
4844 switch (iocp
->ioc_cmd
) {
4845 case TI_GETPEERNAME
:
4846 if (icmp
->icmp_state
!= TS_DATA_XFER
) {
4848 * If a default destination address has not
4849 * been associated with the stream, then we
4850 * don't know the peer's name.
4852 iocp
->ioc_error
= ENOTCONN
;
4853 iocp
->ioc_count
= 0;
4854 mp
->b_datap
->db_type
= M_IOCACK
;
4861 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4862 * need to copyin the user's strbuf structure.
4863 * Processing will continue in the M_IOCDATA case
4866 mi_copyin(q
, mp
, NULL
,
4867 SIZEOF_STRUCT(strbuf
, iocp
->ioc_flag
));
4874 icmp_wput_iocdata(q
, mp
);
4877 /* Unrecognized messages are passed through without change. */
4880 ip_wput_nondata(q
, mp
);
4884 * icmp_wput_iocdata is called by icmp_wput_other to handle all M_IOCDATA
4888 icmp_wput_iocdata(queue_t
*q
, mblk_t
*mp
)
4891 STRUCT_HANDLE(strbuf
, sb
);
4893 conn_t
*connp
= Q_TO_CONN(q
);
4894 icmp_t
*icmp
= connp
->conn_icmp
;
4896 /* Make sure it is one of ours. */
4897 switch (((struct iocblk
*)mp
->b_rptr
)->ioc_cmd
) {
4899 case TI_GETPEERNAME
:
4902 ip_wput_nondata(q
, mp
);
4906 switch (mi_copy_state(q
, mp
, &mp1
)) {
4909 case MI_COPY_CASE(MI_COPY_IN
, 1):
4911 case MI_COPY_CASE(MI_COPY_OUT
, 1):
4913 * The address has been copied out, so now
4914 * copyout the strbuf.
4918 case MI_COPY_CASE(MI_COPY_OUT
, 2):
4920 * The address and strbuf have been copied out.
4921 * We're done, so just acknowledge the original
4924 mi_copy_done(q
, mp
, 0);
4928 * Something strange has happened, so acknowledge
4929 * the original M_IOCTL with an EPROTO error.
4931 mi_copy_done(q
, mp
, EPROTO
);
4936 * Now we have the strbuf structure for TI_GETMYNAME
4937 * and TI_GETPEERNAME. Next we copyout the requested
4938 * address and then we'll copyout the strbuf.
4940 STRUCT_SET_HANDLE(sb
, ((struct iocblk
*)mp
->b_rptr
)->ioc_flag
,
4941 (void *)mp1
->b_rptr
);
4943 if (connp
->conn_family
== AF_INET
)
4944 addrlen
= sizeof (sin_t
);
4946 addrlen
= sizeof (sin6_t
);
4948 if (STRUCT_FGET(sb
, maxlen
) < addrlen
) {
4949 mi_copy_done(q
, mp
, EINVAL
);
4952 switch (((struct iocblk
*)mp
->b_rptr
)->ioc_cmd
) {
4955 case TI_GETPEERNAME
:
4956 if (icmp
->icmp_state
!= TS_DATA_XFER
) {
4957 mi_copy_done(q
, mp
, ENOTCONN
);
4962 mi_copy_done(q
, mp
, EPROTO
);
4965 mp1
= mi_copyout_alloc(q
, mp
, STRUCT_FGETP(sb
, buf
), addrlen
, B_TRUE
);
4969 STRUCT_FSET(sb
, len
, addrlen
);
4970 switch (((struct iocblk
*)mp
->b_rptr
)->ioc_cmd
) {
4972 (void) conn_getsockname(connp
, (struct sockaddr
*)mp1
->b_wptr
,
4975 case TI_GETPEERNAME
:
4976 (void) conn_getpeername(connp
, (struct sockaddr
*)mp1
->b_wptr
,
4980 mp1
->b_wptr
+= addrlen
;
4981 /* Copy out the address */
4986 icmp_ddi_g_init(void)
4988 icmp_max_optsize
= optcom_max_optsize(icmp_opt_obj
.odb_opt_des_arr
,
4989 icmp_opt_obj
.odb_opt_arr_cnt
);
4992 * We want to be informed each time a stack is created or
4993 * destroyed in the kernel, so we can maintain the
4994 * set of icmp_stack_t's.
4996 netstack_register(NS_ICMP
, rawip_stack_init
, NULL
, rawip_stack_fini
);
5000 icmp_ddi_g_destroy(void)
5002 netstack_unregister(NS_ICMP
);
5005 #define INET_NAME "ip"
5008 * Initialize the ICMP stack instance.
5011 rawip_stack_init(netstackid_t stackid
, netstack_t
*ns
)
5018 is
= (icmp_stack_t
*)kmem_zalloc(sizeof (*is
), KM_SLEEP
);
5019 is
->is_netstack
= ns
;
5021 arrsz
= sizeof (icmp_propinfo_tbl
);
5022 is
->is_propinfo_tbl
= (mod_prop_info_t
*)kmem_alloc(arrsz
, KM_SLEEP
);
5023 bcopy(icmp_propinfo_tbl
, is
->is_propinfo_tbl
, arrsz
);
5025 is
->is_ksp
= rawip_kstat_init(stackid
);
5027 major
= mod_name_to_major(INET_NAME
);
5028 error
= ldi_ident_from_major(major
, &is
->is_ldi_ident
);
5034 * Free the ICMP stack instance.
5037 rawip_stack_fini(netstackid_t stackid
, void *arg
)
5039 icmp_stack_t
*is
= (icmp_stack_t
*)arg
;
5041 kmem_free(is
->is_propinfo_tbl
, sizeof (icmp_propinfo_tbl
));
5042 is
->is_propinfo_tbl
= NULL
;
5044 rawip_kstat_fini(stackid
, is
->is_ksp
);
5046 ldi_ident_release(is
->is_ldi_ident
);
5047 kmem_free(is
, sizeof (*is
));
5051 rawip_kstat_init(netstackid_t stackid
)
5055 rawip_named_kstat_t
template = {
5056 { "inDatagrams", KSTAT_DATA_UINT32
, 0 },
5057 { "inCksumErrs", KSTAT_DATA_UINT32
, 0 },
5058 { "inErrors", KSTAT_DATA_UINT32
, 0 },
5059 { "outDatagrams", KSTAT_DATA_UINT32
, 0 },
5060 { "outErrors", KSTAT_DATA_UINT32
, 0 },
5063 ksp
= kstat_create_netstack("icmp", 0, "rawip", "mib2",
5064 KSTAT_TYPE_NAMED
, NUM_OF_FIELDS(rawip_named_kstat_t
), 0, stackid
);
5065 if (ksp
== NULL
|| ksp
->ks_data
== NULL
)
5068 bcopy(&template, ksp
->ks_data
, sizeof (template));
5069 ksp
->ks_update
= rawip_kstat_update
;
5070 ksp
->ks_private
= (void *)(uintptr_t)stackid
;
5077 rawip_kstat_fini(netstackid_t stackid
, kstat_t
*ksp
)
5080 ASSERT(stackid
== (netstackid_t
)(uintptr_t)ksp
->ks_private
);
5081 kstat_delete_netstack(ksp
, stackid
);
5086 rawip_kstat_update(kstat_t
*ksp
, int rw
)
5088 rawip_named_kstat_t
*rawipkp
;
5089 netstackid_t stackid
= (netstackid_t
)(uintptr_t)ksp
->ks_private
;
5093 if (ksp
->ks_data
== NULL
)
5096 if (rw
== KSTAT_WRITE
)
5099 rawipkp
= (rawip_named_kstat_t
*)ksp
->ks_data
;
5101 ns
= netstack_find_by_stackid(stackid
);
5104 is
= ns
->netstack_icmp
;
5109 rawipkp
->inDatagrams
.value
.ui32
= is
->is_rawip_mib
.rawipInDatagrams
;
5110 rawipkp
->inCksumErrs
.value
.ui32
= is
->is_rawip_mib
.rawipInCksumErrs
;
5111 rawipkp
->inErrors
.value
.ui32
= is
->is_rawip_mib
.rawipInErrors
;
5112 rawipkp
->outDatagrams
.value
.ui32
= is
->is_rawip_mib
.rawipOutDatagrams
;
5113 rawipkp
->outErrors
.value
.ui32
= is
->is_rawip_mib
.rawipOutErrors
;
5120 rawip_accept(sock_lower_handle_t lproto_handle
,
5121 sock_lower_handle_t eproto_handle
, sock_upper_handle_t sock_handle
,
5124 return (EOPNOTSUPP
);
5129 rawip_bind(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
5130 socklen_t len
, cred_t
*cr
)
5132 conn_t
*connp
= (conn_t
*)proto_handle
;
5135 /* All Solaris components should pass a cred for this operation. */
5138 /* Binding to a NULL address really means unbind */
5140 error
= rawip_do_unbind(connp
);
5142 error
= rawip_do_bind(connp
, sa
, len
);
5145 if (error
== -TOUTSTATE
)
5148 error
= proto_tlitosyserr(-error
);
5154 rawip_implicit_bind(conn_t
*connp
)
5162 if (connp
->conn_family
== AF_INET
) {
5163 len
= sizeof (struct sockaddr_in
);
5164 sin
= (sin_t
*)&sin6addr
;
5166 sin
->sin_family
= AF_INET
;
5167 sin
->sin_addr
.s_addr
= INADDR_ANY
;
5169 ASSERT(connp
->conn_family
== AF_INET6
);
5170 len
= sizeof (sin6_t
);
5171 sin6
= (sin6_t
*)&sin6addr
;
5173 sin6
->sin6_family
= AF_INET6
;
5174 V6_SET_ZERO(sin6
->sin6_addr
);
5177 error
= rawip_do_bind(connp
, (struct sockaddr
*)&sin6addr
, len
);
5179 return ((error
< 0) ? proto_tlitosyserr(-error
) : error
);
5183 rawip_unbind(conn_t
*connp
)
5187 error
= rawip_do_unbind(connp
);
5189 error
= proto_tlitosyserr(-error
);
5196 rawip_listen(sock_lower_handle_t proto_handle
, int backlog
, cred_t
*cr
)
5198 return (EOPNOTSUPP
);
5202 rawip_connect(sock_lower_handle_t proto_handle
, const struct sockaddr
*sa
,
5203 socklen_t len
, sock_connid_t
*id
, cred_t
*cr
)
5205 conn_t
*connp
= (conn_t
*)proto_handle
;
5206 icmp_t
*icmp
= connp
->conn_icmp
;
5208 boolean_t did_bind
= B_FALSE
;
5209 pid_t pid
= curproc
->p_pid
;
5211 /* All Solaris components should pass a cred for this operation. */
5217 * Make sure we are connected
5219 if (icmp
->icmp_state
!= TS_DATA_XFER
)
5222 error
= icmp_disconnect(connp
);
5226 error
= proto_verify_ip_addr(connp
->conn_family
, sa
, len
);
5230 /* do an implicit bind if necessary */
5231 if (icmp
->icmp_state
== TS_UNBND
) {
5232 error
= rawip_implicit_bind(connp
);
5234 * We could be racing with an actual bind, in which case
5235 * we would see EPROTO. We cross our fingers and try
5238 if (!(error
== 0 || error
== EPROTO
))
5244 * set SO_DGRAM_ERRIND
5246 connp
->conn_dgram_errind
= B_TRUE
;
5248 error
= rawip_do_connect(connp
, sa
, len
, cr
, pid
);
5249 if (error
!= 0 && did_bind
) {
5252 unbind_err
= rawip_unbind(connp
);
5253 ASSERT(unbind_err
== 0);
5258 (*connp
->conn_upcalls
->su_connected
)(connp
->conn_upper_handle
,
5260 } else if (error
< 0) {
5261 error
= proto_tlitosyserr(-error
);
5268 rawip_fallback(sock_lower_handle_t proto_handle
, queue_t
*q
,
5269 boolean_t direct_sockfs
, so_proto_quiesced_cb_t quiesced_cb
,
5270 sock_quiesce_arg_t
*arg
)
5272 conn_t
*connp
= (conn_t
*)proto_handle
;
5274 struct T_capability_ack tca
;
5275 struct sockaddr_in6 laddr
, faddr
;
5276 socklen_t laddrlen
, faddrlen
;
5278 struct stroptions
*stropt
;
5279 mblk_t
*mp
, *stropt_mp
;
5282 icmp
= connp
->conn_icmp
;
5284 stropt_mp
= allocb_wait(sizeof (*stropt
), BPRI_HI
, STR_NOSIG
, NULL
);
5287 * setup the fallback stream that was allocated
5289 connp
->conn_dev
= (dev_t
)RD(q
)->q_ptr
;
5290 connp
->conn_minor_arena
= WR(q
)->q_ptr
;
5292 RD(q
)->q_ptr
= WR(q
)->q_ptr
= connp
;
5294 WR(q
)->q_qinfo
= &icmpwinit
;
5296 connp
->conn_rq
= RD(q
);
5297 connp
->conn_wq
= WR(q
);
5299 /* Notify stream head about options before sending up data */
5300 stropt_mp
->b_datap
->db_type
= M_SETOPTS
;
5301 stropt_mp
->b_wptr
+= sizeof (*stropt
);
5302 stropt
= (struct stroptions
*)stropt_mp
->b_rptr
;
5303 stropt
->so_flags
= SO_WROFF
| SO_HIWAT
;
5304 stropt
->so_wroff
= connp
->conn_wroff
;
5305 stropt
->so_hiwat
= connp
->conn_rcvbuf
;
5306 putnext(RD(q
), stropt_mp
);
5309 * free helper stream
5311 ip_free_helper_stream(connp
);
5314 * Collect the information needed to sync with the sonode
5316 icmp_do_capability_ack(icmp
, &tca
, TC1_INFO
);
5318 laddrlen
= faddrlen
= sizeof (sin6_t
);
5319 (void) rawip_getsockname((sock_lower_handle_t
)connp
,
5320 (struct sockaddr
*)&laddr
, &laddrlen
, CRED());
5321 error
= rawip_getpeername((sock_lower_handle_t
)connp
,
5322 (struct sockaddr
*)&faddr
, &faddrlen
, CRED());
5326 if (connp
->conn_dgram_errind
)
5327 opts
|= SO_DGRAM_ERRIND
;
5328 if (connp
->conn_ixa
->ixa_flags
& IXAF_DONTROUTE
)
5329 opts
|= SO_DONTROUTE
;
5331 mp
= (*quiesced_cb
)(connp
->conn_upper_handle
, arg
, &tca
,
5332 (struct sockaddr
*)&laddr
, laddrlen
,
5333 (struct sockaddr
*)&faddr
, faddrlen
, opts
);
5336 * Attempts to send data up during fallback will result in it being
5337 * queued in icmp_t. Now we push up any queued packets.
5339 mutex_enter(&icmp
->icmp_recv_lock
);
5341 mp
->b_next
= icmp
->icmp_fallback_queue_head
;
5342 icmp
->icmp_fallback_queue_head
= mp
;
5344 while (icmp
->icmp_fallback_queue_head
!= NULL
) {
5345 mp
= icmp
->icmp_fallback_queue_head
;
5346 icmp
->icmp_fallback_queue_head
= mp
->b_next
;
5348 mutex_exit(&icmp
->icmp_recv_lock
);
5350 mutex_enter(&icmp
->icmp_recv_lock
);
5352 icmp
->icmp_fallback_queue_tail
= icmp
->icmp_fallback_queue_head
;
5355 * No longer a streams less socket
5357 mutex_enter(&connp
->conn_lock
);
5358 connp
->conn_flags
&= ~IPCL_NONSTR
;
5359 mutex_exit(&connp
->conn_lock
);
5361 mutex_exit(&icmp
->icmp_recv_lock
);
5363 ASSERT(icmp
->icmp_fallback_queue_head
== NULL
&&
5364 icmp
->icmp_fallback_queue_tail
== NULL
);
5366 ASSERT(connp
->conn_ref
>= 1);
5373 rawip_create(int family
, int type
, int proto
, sock_downcalls_t
**sock_downcalls
,
5374 uint_t
*smodep
, int *errorp
, int flags
, cred_t
*credp
)
5378 if (type
!= SOCK_RAW
|| (family
!= AF_INET
&& family
!= AF_INET6
)) {
5379 *errorp
= EPROTONOSUPPORT
;
5383 connp
= rawip_do_open(family
, credp
, errorp
, flags
);
5384 if (connp
!= NULL
) {
5385 connp
->conn_flags
|= IPCL_NONSTR
;
5387 mutex_enter(&connp
->conn_lock
);
5388 connp
->conn_state_flags
&= ~CONN_INCIPIENT
;
5389 mutex_exit(&connp
->conn_lock
);
5390 *sock_downcalls
= &sock_rawip_downcalls
;
5391 *smodep
= SM_ATOMIC
;
5393 ASSERT(*errorp
!= 0);
5396 return ((sock_lower_handle_t
)connp
);
5401 rawip_activate(sock_lower_handle_t proto_handle
,
5402 sock_upper_handle_t sock_handle
, sock_upcalls_t
*sock_upcalls
, int flags
,
5405 conn_t
*connp
= (conn_t
*)proto_handle
;
5406 struct sock_proto_props sopp
;
5408 /* All Solaris components should pass a cred for this operation. */
5411 connp
->conn_upcalls
= sock_upcalls
;
5412 connp
->conn_upper_handle
= sock_handle
;
5414 sopp
.sopp_flags
= SOCKOPT_WROFF
| SOCKOPT_RCVHIWAT
| SOCKOPT_RCVLOWAT
|
5415 SOCKOPT_MAXBLK
| SOCKOPT_MAXPSZ
| SOCKOPT_MINPSZ
;
5416 sopp
.sopp_wroff
= connp
->conn_wroff
;
5417 sopp
.sopp_rxhiwat
= connp
->conn_rcvbuf
;
5418 sopp
.sopp_rxlowat
= connp
->conn_rcvlowat
;
5419 sopp
.sopp_maxblk
= INFPSZ
;
5420 sopp
.sopp_maxpsz
= IP_MAXPACKET
;
5421 sopp
.sopp_minpsz
= (icmp_mod_info
.mi_minpsz
== 1) ? 0 :
5422 icmp_mod_info
.mi_minpsz
;
5424 (*connp
->conn_upcalls
->su_set_proto_props
)
5425 (connp
->conn_upper_handle
, &sopp
);
5427 icmp_bind_proto(connp
->conn_icmp
);
5432 rawip_getpeername(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
5433 socklen_t
*salenp
, cred_t
*cr
)
5435 conn_t
*connp
= (conn_t
*)proto_handle
;
5436 icmp_t
*icmp
= connp
->conn_icmp
;
5439 /* All Solaris components should pass a cred for this operation. */
5442 mutex_enter(&connp
->conn_lock
);
5443 if (icmp
->icmp_state
!= TS_DATA_XFER
)
5446 error
= conn_getpeername(connp
, sa
, salenp
);
5447 mutex_exit(&connp
->conn_lock
);
5453 rawip_getsockname(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
5454 socklen_t
*salenp
, cred_t
*cr
)
5456 conn_t
*connp
= (conn_t
*)proto_handle
;
5459 /* All Solaris components should pass a cred for this operation. */
5462 mutex_enter(&connp
->conn_lock
);
5463 error
= conn_getsockname(connp
, sa
, salenp
);
5464 mutex_exit(&connp
->conn_lock
);
5469 rawip_setsockopt(sock_lower_handle_t proto_handle
, int level
, int option_name
,
5470 const void *optvalp
, socklen_t optlen
, cred_t
*cr
)
5472 conn_t
*connp
= (conn_t
*)proto_handle
;
5475 /* All Solaris components should pass a cred for this operation. */
5478 error
= proto_opt_check(level
, option_name
, optlen
, NULL
,
5479 icmp_opt_obj
.odb_opt_des_arr
,
5480 icmp_opt_obj
.odb_opt_arr_cnt
,
5481 B_TRUE
, B_FALSE
, cr
);
5485 * option not recognized
5488 error
= proto_tlitosyserr(-error
);
5493 error
= icmp_opt_set(connp
, SETFN_OPTCOM_NEGOTIATE
, level
,
5494 option_name
, optlen
, (uchar_t
*)optvalp
, (uint_t
*)&optlen
,
5495 (uchar_t
*)optvalp
, NULL
, cr
);
5503 rawip_getsockopt(sock_lower_handle_t proto_handle
, int level
, int option_name
,
5504 void *optvalp
, socklen_t
*optlen
, cred_t
*cr
)
5507 conn_t
*connp
= (conn_t
*)proto_handle
;
5508 t_uscalar_t max_optbuf_len
;
5512 /* All Solaris components should pass a cred for this operation. */
5515 error
= proto_opt_check(level
, option_name
, *optlen
, &max_optbuf_len
,
5516 icmp_opt_obj
.odb_opt_des_arr
,
5517 icmp_opt_obj
.odb_opt_arr_cnt
,
5518 B_FALSE
, B_TRUE
, cr
);
5522 error
= proto_tlitosyserr(-error
);
5527 optvalp_buf
= kmem_alloc(max_optbuf_len
, KM_SLEEP
);
5528 len
= icmp_opt_get(connp
, level
, option_name
, optvalp_buf
);
5530 kmem_free(optvalp_buf
, max_optbuf_len
);
5535 * update optlen and copy option value
5537 t_uscalar_t size
= MIN(len
, *optlen
);
5539 bcopy(optvalp_buf
, optvalp
, size
);
5540 bcopy(&size
, optlen
, sizeof (size
));
5542 kmem_free(optvalp_buf
, max_optbuf_len
);
5548 rawip_close(sock_lower_handle_t proto_handle
, int flags
, cred_t
*cr
)
5550 conn_t
*connp
= (conn_t
*)proto_handle
;
5552 /* All Solaris components should pass a cred for this operation. */
5555 (void) rawip_do_close(connp
);
5561 rawip_shutdown(sock_lower_handle_t proto_handle
, int how
, cred_t
*cr
)
5563 conn_t
*connp
= (conn_t
*)proto_handle
;
5565 /* All Solaris components should pass a cred for this operation. */
5568 /* shut down the send side */
5570 (*connp
->conn_upcalls
->su_opctl
)(connp
->conn_upper_handle
,
5571 SOCK_OPCTL_SHUT_SEND
, 0);
5572 /* shut down the recv side */
5574 (*connp
->conn_upcalls
->su_opctl
)(connp
->conn_upper_handle
,
5575 SOCK_OPCTL_SHUT_RECV
, 0);
5580 rawip_clr_flowctrl(sock_lower_handle_t proto_handle
)
5582 conn_t
*connp
= (conn_t
*)proto_handle
;
5583 icmp_t
*icmp
= connp
->conn_icmp
;
5585 mutex_enter(&icmp
->icmp_recv_lock
);
5586 connp
->conn_flow_cntrld
= B_FALSE
;
5587 mutex_exit(&icmp
->icmp_recv_lock
);
5591 rawip_ioctl(sock_lower_handle_t proto_handle
, int cmd
, intptr_t arg
,
5592 int mode
, int32_t *rvalp
, cred_t
*cr
)
5594 conn_t
*connp
= (conn_t
*)proto_handle
;
5597 /* All Solaris components should pass a cred for this operation. */
5601 * If we don't have a helper stream then create one.
5602 * ip_create_helper_stream takes care of locking the conn_t,
5603 * so this check for NULL is just a performance optimization.
5605 if (connp
->conn_helper_info
== NULL
) {
5606 icmp_stack_t
*is
= connp
->conn_icmp
->icmp_is
;
5608 ASSERT(is
->is_ldi_ident
!= NULL
);
5611 * Create a helper stream for non-STREAMS socket.
5613 error
= ip_create_helper_stream(connp
, is
->is_ldi_ident
);
5615 ip0dbg(("rawip_ioctl: create of IP helper stream "
5616 "failed %d\n", error
));
5622 case _SIOCSOCKFALLBACK
:
5623 case TI_GETPEERNAME
:
5626 cmn_err(CE_CONT
, "icmp_ioctl cmd 0x%x on non streams"
5633 * Pass on to IP using helper stream
5635 error
= ldi_ioctl(connp
->conn_helper_info
->iphs_handle
,
5636 cmd
, arg
, mode
, cr
, rvalp
);
5643 rawip_send(sock_lower_handle_t proto_handle
, mblk_t
*mp
, struct nmsghdr
*msg
,
5649 conn_t
*connp
= (conn_t
*)proto_handle
;
5650 icmp_t
*icmp
= connp
->conn_icmp
;
5652 icmp_stack_t
*is
= icmp
->icmp_is
;
5653 pid_t pid
= curproc
->p_pid
;
5654 ip_xmit_attr_t
*ixa
;
5656 ASSERT(DB_TYPE(mp
) == M_DATA
);
5658 /* All Solaris components should pass a cred for this operation. */
5661 /* do an implicit bind if necessary */
5662 if (icmp
->icmp_state
== TS_UNBND
) {
5663 error
= rawip_implicit_bind(connp
);
5665 * We could be racing with an actual bind, in which case
5666 * we would see EPROTO. We cross our fingers and try
5669 if (!(error
== 0 || error
== EPROTO
)) {
5675 /* Protocol 255 contains full IP headers */
5676 /* Read without holding lock */
5677 if (icmp
->icmp_hdrincl
) {
5678 ASSERT(connp
->conn_ipversion
== IPV4_VERSION
);
5679 if (mp
->b_wptr
- mp
->b_rptr
< IP_SIMPLE_HDR_LENGTH
) {
5680 if (!pullupmsg(mp
, IP_SIMPLE_HDR_LENGTH
)) {
5681 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5686 error
= icmp_output_hdrincl(connp
, mp
, cr
, pid
);
5687 if (is
->is_sendto_ignerr
)
5694 if (msg
->msg_name
== NULL
) {
5695 if (icmp
->icmp_state
!= TS_DATA_XFER
) {
5696 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5697 return (EDESTADDRREQ
);
5699 if (msg
->msg_controllen
!= 0) {
5700 error
= icmp_output_ancillary(connp
, NULL
, NULL
, mp
,
5701 NULL
, msg
, cr
, pid
);
5703 error
= icmp_output_connected(connp
, mp
, cr
, pid
);
5705 if (is
->is_sendto_ignerr
)
5710 if (icmp
->icmp_state
== TS_DATA_XFER
) {
5711 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5714 error
= proto_verify_ip_addr(connp
->conn_family
,
5715 (struct sockaddr
*)msg
->msg_name
, msg
->msg_namelen
);
5717 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5720 switch (connp
->conn_family
) {
5722 sin6
= (sin6_t
*)msg
->msg_name
;
5724 /* No support for mapped addresses on raw sockets */
5725 if (IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
5726 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5727 return (EADDRNOTAVAIL
);
5729 srcid
= sin6
->__sin6_src_id
;
5732 * If the local address is a mapped address return
5734 * It would be possible to send an IPv6 packet but the
5735 * response would never make it back to the application
5736 * since it is bound to a mapped address.
5738 if (IN6_IS_ADDR_V4MAPPED(&connp
->conn_saddr_v6
)) {
5739 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5740 return (EADDRNOTAVAIL
);
5743 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
))
5744 sin6
->sin6_addr
= ipv6_loopback
;
5747 * We have to allocate an ip_xmit_attr_t before we grab
5748 * conn_lock and we need to hold conn_lock once we've check
5749 * conn_same_as_last_v6 to handle concurrent send* calls on a
5752 if (msg
->msg_controllen
== 0) {
5753 ixa
= conn_get_ixa(connp
, B_FALSE
);
5755 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5761 mutex_enter(&connp
->conn_lock
);
5762 if (icmp
->icmp_delayed_error
!= 0) {
5763 sin6_t
*sin2
= (sin6_t
*)&icmp
->icmp_delayed_addr
;
5765 error
= icmp
->icmp_delayed_error
;
5766 icmp
->icmp_delayed_error
= 0;
5768 /* Compare IP address and family */
5770 if (IN6_ARE_ADDR_EQUAL(&sin6
->sin6_addr
,
5771 &sin2
->sin6_addr
) &&
5772 sin6
->sin6_family
== sin2
->sin6_family
) {
5773 mutex_exit(&connp
->conn_lock
);
5774 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5780 if (msg
->msg_controllen
!= 0) {
5781 mutex_exit(&connp
->conn_lock
);
5782 ASSERT(ixa
== NULL
);
5783 error
= icmp_output_ancillary(connp
, NULL
, sin6
, mp
,
5784 NULL
, msg
, cr
, pid
);
5785 } else if (conn_same_as_last_v6(connp
, sin6
) &&
5786 connp
->conn_lastsrcid
== srcid
&&
5787 ipsec_outbound_policy_current(ixa
)) {
5788 /* icmp_output_lastdst drops conn_lock */
5789 error
= icmp_output_lastdst(connp
, mp
, cr
, pid
, ixa
);
5791 /* icmp_output_newdst drops conn_lock */
5792 error
= icmp_output_newdst(connp
, mp
, NULL
, sin6
, cr
,
5795 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
5796 if (is
->is_sendto_ignerr
)
5801 sin
= (sin_t
*)msg
->msg_name
;
5803 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
5804 sin
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
5807 * We have to allocate an ip_xmit_attr_t before we grab
5808 * conn_lock and we need to hold conn_lock once we've check
5809 * conn_same_as_last_v6 to handle concurrent send* on a socket.
5811 if (msg
->msg_controllen
== 0) {
5812 ixa
= conn_get_ixa(connp
, B_FALSE
);
5814 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5820 mutex_enter(&connp
->conn_lock
);
5821 if (icmp
->icmp_delayed_error
!= 0) {
5822 sin_t
*sin2
= (sin_t
*)&icmp
->icmp_delayed_addr
;
5824 error
= icmp
->icmp_delayed_error
;
5825 icmp
->icmp_delayed_error
= 0;
5827 /* Compare IP address */
5829 if (sin
->sin_addr
.s_addr
== sin2
->sin_addr
.s_addr
) {
5830 mutex_exit(&connp
->conn_lock
);
5831 BUMP_MIB(&is
->is_rawip_mib
, rawipOutErrors
);
5838 if (msg
->msg_controllen
!= 0) {
5839 mutex_exit(&connp
->conn_lock
);
5840 ASSERT(ixa
== NULL
);
5841 error
= icmp_output_ancillary(connp
, sin
, NULL
, mp
,
5842 NULL
, msg
, cr
, pid
);
5843 } else if (conn_same_as_last_v4(connp
, sin
) &&
5844 ipsec_outbound_policy_current(ixa
)) {
5845 /* icmp_output_lastdst drops conn_lock */
5846 error
= icmp_output_lastdst(connp
, mp
, cr
, pid
, ixa
);
5848 /* icmp_output_newdst drops conn_lock */
5849 error
= icmp_output_newdst(connp
, mp
, sin
, NULL
, cr
,
5852 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
5853 if (is
->is_sendto_ignerr
)
5862 sock_downcalls_t sock_rawip_downcalls
= {