4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
26 /* Copyright (c) 1990 Mentat Inc. */
28 #include <sys/sysmacros.h>
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/stropts.h>
32 #include <sys/strlog.h>
33 #include <sys/strsun.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/timod.h>
38 #include <sys/sunddi.h>
39 #include <sys/strsubr.h>
40 #include <sys/suntpi.h>
41 #include <sys/xti_inet.h>
43 #include <sys/cred_impl.h>
44 #include <sys/policy.h>
46 #include <sys/ucred.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sockio.h>
52 #include <sys/vtrace.h>
54 #include <sys/debug.h>
55 #include <sys/isa_defs.h>
56 #include <sys/random.h>
57 #include <netinet/in.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <netinet/udp.h>
62 #include <inet/common.h>
64 #include <inet/ip_impl.h>
65 #include <inet/ipsec_impl.h>
67 #include <inet/ip_ire.h>
68 #include <inet/ip_if.h>
69 #include <inet/ip_multi.h>
70 #include <inet/ip_ndp.h>
71 #include <inet/proto_set.h>
72 #include <inet/mib2.h>
73 #include <inet/optcom.h>
74 #include <inet/snmpcom.h>
75 #include <inet/kstatcom.h>
76 #include <inet/ipclassifier.h>
77 #include <sys/squeue_impl.h>
78 #include <inet/ipnet.h>
79 #include <sys/ethernet.h>
81 #include <rpc/pmap_prot.h>
83 #include <inet/udp_impl.h>
86 * Synchronization notes:
88 * UDP is MT and uses the usual kernel synchronization primitives. There are 2
89 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
90 * protects the contents of the udp_t. uf_lock protects the address and the
92 * The lock order is conn_lock -> uf_lock.
94 * The fanout lock uf_lock:
95 * When a UDP endpoint is bound to a local port, it is inserted into
96 * a bind hash list. The list consists of an array of udp_fanout_t buckets.
97 * The size of the array is controlled by the udp_bind_fanout_size variable.
98 * This variable can be changed in /etc/system if the default value is
99 * not large enough. Each bind hash bucket is protected by a per bucket
100 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
101 * structure and a few other fields in the udp_t. A UDP endpoint is removed
102 * from the bind hash list only when it is being unbound or being closed.
103 * The per bucket lock also protects a UDP endpoint's state changes.
106 * UDP is always a device driver. For compatibility with mibopen() code
107 * it is possible to I_PUSH "udp", but that results in pushing a passthrough
110 * The above implies that we don't support any intermediate module to
111 * reside in between /dev/ip and udp -- in fact, we never supported such
112 * scenario in the past as the inter-layer communication semantics have
113 * always been private.
116 /* For /etc/system control */
117 uint_t udp_bind_fanout_size
= UDP_BIND_FANOUT_SIZE
;
119 static void udp_addr_req(queue_t
*q
, mblk_t
*mp
);
120 static void udp_tpi_bind(queue_t
*q
, mblk_t
*mp
);
121 static void udp_bind_hash_insert(udp_fanout_t
*uf
, udp_t
*udp
);
122 static void udp_bind_hash_remove(udp_t
*udp
, boolean_t caller_holds_lock
);
123 static int udp_build_hdr_template(conn_t
*, const in6_addr_t
*,
124 const in6_addr_t
*, in_port_t
, uint32_t);
125 static void udp_capability_req(queue_t
*q
, mblk_t
*mp
);
126 static int udp_tpi_close(queue_t
*q
, int flags
);
127 static void udp_close_free(conn_t
*);
128 static void udp_tpi_connect(queue_t
*q
, mblk_t
*mp
);
129 static void udp_tpi_disconnect(queue_t
*q
, mblk_t
*mp
);
130 static void udp_err_ack(queue_t
*q
, mblk_t
*mp
, t_scalar_t t_error
,
132 static void udp_err_ack_prim(queue_t
*q
, mblk_t
*mp
, t_scalar_t primitive
,
133 t_scalar_t tlierr
, int sys_error
);
134 static int udp_extra_priv_ports_get(queue_t
*q
, mblk_t
*mp
, caddr_t cp
,
136 static int udp_extra_priv_ports_add(queue_t
*q
, mblk_t
*mp
,
137 char *value
, caddr_t cp
, cred_t
*cr
);
138 static int udp_extra_priv_ports_del(queue_t
*q
, mblk_t
*mp
,
139 char *value
, caddr_t cp
, cred_t
*cr
);
140 static void udp_icmp_input(void *, mblk_t
*, void *, ip_recv_attr_t
*);
141 static void udp_icmp_error_ipv6(conn_t
*connp
, mblk_t
*mp
,
142 ip_recv_attr_t
*ira
);
143 static void udp_info_req(queue_t
*q
, mblk_t
*mp
);
144 static void udp_input(void *, mblk_t
*, void *, ip_recv_attr_t
*);
145 static void udp_lrput(queue_t
*, mblk_t
*);
146 static void udp_lwput(queue_t
*, mblk_t
*);
147 static int udp_open(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
,
148 cred_t
*credp
, boolean_t isv6
);
149 static int udp_openv4(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
,
151 static int udp_openv6(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
,
153 static boolean_t
udp_opt_allow_udr_set(t_scalar_t level
, t_scalar_t name
);
154 int udp_opt_set(conn_t
*connp
, uint_t optset_context
,
155 int level
, int name
, uint_t inlen
,
156 uchar_t
*invalp
, uint_t
*outlenp
, uchar_t
*outvalp
,
157 void *thisdg_attrs
, cred_t
*cr
);
158 int udp_opt_get(conn_t
*connp
, int level
, int name
,
160 static int udp_output_connected(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
,
162 static int udp_output_lastdst(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
,
163 pid_t pid
, ip_xmit_attr_t
*ixa
);
164 static int udp_output_newdst(conn_t
*connp
, mblk_t
*data_mp
, sin_t
*sin
,
165 sin6_t
*sin6
, ushort_t ipversion
, cred_t
*cr
, pid_t
,
166 ip_xmit_attr_t
*ixa
);
167 static mblk_t
*udp_prepend_hdr(conn_t
*, ip_xmit_attr_t
*, const ip_pkt_t
*,
168 const in6_addr_t
*, const in6_addr_t
*, in_port_t
, uint32_t, mblk_t
*,
170 static mblk_t
*udp_prepend_header_template(conn_t
*, ip_xmit_attr_t
*,
171 mblk_t
*, const in6_addr_t
*, in_port_t
, uint32_t, int *);
172 static void udp_ud_err(queue_t
*q
, mblk_t
*mp
, t_scalar_t err
);
173 static void udp_ud_err_connected(conn_t
*, t_scalar_t
);
174 static void udp_tpi_unbind(queue_t
*q
, mblk_t
*mp
);
175 static in_port_t
udp_update_next_port(udp_t
*udp
, in_port_t port
,
177 static void udp_wput_other(queue_t
*q
, mblk_t
*mp
);
178 static void udp_wput_iocdata(queue_t
*q
, mblk_t
*mp
);
179 static void udp_wput_fallback(queue_t
*q
, mblk_t
*mp
);
180 static size_t udp_set_rcv_hiwat(udp_t
*udp
, size_t size
);
182 static void *udp_stack_init(netstackid_t stackid
, netstack_t
*ns
);
183 static void udp_stack_fini(netstackid_t stackid
, void *arg
);
185 /* Common routines for TPI and socket module */
186 static void udp_ulp_recv(conn_t
*, mblk_t
*, uint_t
, ip_recv_attr_t
*);
188 /* Common routine for TPI and socket module */
189 static conn_t
*udp_do_open(cred_t
*, boolean_t
, int, int *);
190 static void udp_do_close(conn_t
*);
191 static int udp_do_bind(conn_t
*, struct sockaddr
*, socklen_t
, cred_t
*,
193 static int udp_do_unbind(conn_t
*);
195 int udp_getsockname(sock_lower_handle_t
,
196 struct sockaddr
*, socklen_t
*, cred_t
*);
197 int udp_getpeername(sock_lower_handle_t
,
198 struct sockaddr
*, socklen_t
*, cred_t
*);
199 static int udp_do_connect(conn_t
*, const struct sockaddr
*, socklen_t
,
202 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
205 * Checks if the given destination addr/port is allowed out.
206 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
207 * Called for each connect() and for sendto()/sendmsg() to a different
209 * For connect(), called in udp_connect().
210 * For sendto()/sendmsg(), called in udp_output_newdst().
212 * This macro assumes that the cl_inet_connect2 hook is not NULL.
213 * Please check this before calling this macro.
216 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
217 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
219 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \
222 * Running in cluster mode - check and register active \
223 * "connection" information \
225 if ((cp)->conn_ipversion == IPV4_VERSION) \
226 (err) = (*cl_inet_connect2)( \
227 (cp)->conn_netstack->netstack_stackid, \
228 IPPROTO_UDP, is_outgoing, AF_INET, \
229 (uint8_t *)&((cp)->conn_laddr_v4), \
231 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \
232 (in_port_t)(fport), NULL); \
234 (err) = (*cl_inet_connect2)( \
235 (cp)->conn_netstack->netstack_stackid, \
236 IPPROTO_UDP, is_outgoing, AF_INET6, \
237 (uint8_t *)&((cp)->conn_laddr_v6), \
239 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \
242 static struct module_info udp_mod_info
= {
243 UDP_MOD_ID
, UDP_MOD_NAME
, 1, INFPSZ
, UDP_RECV_HIWATER
, UDP_RECV_LOWATER
247 * Entry points for UDP as a device.
248 * We have separate open functions for the /dev/udp and /dev/udp6 devices.
250 static struct qinit udp_rinitv4
= {
251 NULL
, NULL
, udp_openv4
, udp_tpi_close
, NULL
, &udp_mod_info
, NULL
254 static struct qinit udp_rinitv6
= {
255 NULL
, NULL
, udp_openv6
, udp_tpi_close
, NULL
, &udp_mod_info
, NULL
258 static struct qinit udp_winit
= {
259 (pfi_t
)udp_wput
, (pfi_t
)ip_wsrv
, NULL
, NULL
, NULL
, &udp_mod_info
262 /* UDP entry point during fallback */
263 struct qinit udp_fallback_sock_winit
= {
264 (pfi_t
)udp_wput_fallback
, NULL
, NULL
, NULL
, NULL
, &udp_mod_info
268 * UDP needs to handle I_LINK and I_PLINK since ifconfig
269 * likes to use it as a place to hang the various streams.
271 static struct qinit udp_lrinit
= {
272 (pfi_t
)udp_lrput
, NULL
, udp_openv4
, udp_tpi_close
, NULL
, &udp_mod_info
275 static struct qinit udp_lwinit
= {
276 (pfi_t
)udp_lwput
, NULL
, udp_openv4
, udp_tpi_close
, NULL
, &udp_mod_info
279 /* For AF_INET aka /dev/udp */
280 struct streamtab udpinfov4
= {
281 &udp_rinitv4
, &udp_winit
, &udp_lrinit
, &udp_lwinit
284 /* For AF_INET6 aka /dev/udp6 */
285 struct streamtab udpinfov6
= {
286 &udp_rinitv6
, &udp_winit
, &udp_lrinit
, &udp_lwinit
289 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
291 /* Default structure copied into T_INFO_ACK messages */
292 static struct T_info_ack udp_g_t_info_ack_ipv4
= {
294 UDP_MAXPACKET_IPV4
, /* TSDU_size. Excl. headers */
295 T_INVALID
, /* ETSU_size. udp does not support expedited data. */
296 T_INVALID
, /* CDATA_size. udp does not support connect data. */
297 T_INVALID
, /* DDATA_size. udp does not support disconnect data. */
298 sizeof (sin_t
), /* ADDR_size. */
299 0, /* OPT_size - not initialized here */
300 UDP_MAXPACKET_IPV4
, /* TIDU_size. Excl. headers */
301 T_CLTS
, /* SERV_type. udp supports connection-less. */
302 TS_UNBND
, /* CURRENT_state. This is set from udp_state. */
303 (XPG4_1
|SENDZERO
) /* PROVIDER_flag */
306 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
308 static struct T_info_ack udp_g_t_info_ack_ipv6
= {
310 UDP_MAXPACKET_IPV6
, /* TSDU_size. Excl. headers */
311 T_INVALID
, /* ETSU_size. udp does not support expedited data. */
312 T_INVALID
, /* CDATA_size. udp does not support connect data. */
313 T_INVALID
, /* DDATA_size. udp does not support disconnect data. */
314 sizeof (sin6_t
), /* ADDR_size. */
315 0, /* OPT_size - not initialized here */
316 UDP_MAXPACKET_IPV6
, /* TIDU_size. Excl. headers */
317 T_CLTS
, /* SERV_type. udp supports connection-less. */
318 TS_UNBND
, /* CURRENT_state. This is set from udp_state. */
319 (XPG4_1
|SENDZERO
) /* PROVIDER_flag */
323 * UDP tunables related declarations. Definitions are in udp_tunables.c
325 extern mod_prop_info_t udp_propinfo_tbl
[];
326 extern int udp_propinfo_count
;
328 /* Setable in /etc/system */
329 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
330 uint32_t udp_random_anon_port
= 1;
333 * Hook functions to enable cluster networking.
334 * On non-clustered systems these vectors must always be NULL
337 void (*cl_inet_bind
)(netstackid_t stack_id
, uchar_t protocol
,
338 sa_family_t addr_family
, uint8_t *laddrp
, in_port_t lport
,
340 void (*cl_inet_unbind
)(netstackid_t stack_id
, uint8_t protocol
,
341 sa_family_t addr_family
, uint8_t *laddrp
, in_port_t lport
,
344 typedef union T_primitives
*t_primp_t
;
347 * Return the next anonymous port in the privileged port range for
351 udp_get_next_priv_port(udp_t
*udp
)
353 static in_port_t next_priv_port
= IPPORT_RESERVED
- 1;
355 boolean_t restart
= B_FALSE
;
356 udp_stack_t
*us
= udp
->udp_us
;
359 if (next_priv_port
< us
->us_min_anonpriv_port
||
360 next_priv_port
>= IPPORT_RESERVED
) {
361 next_priv_port
= IPPORT_RESERVED
- 1;
367 return (next_priv_port
--);
371 * Hash list removal routine for udp_t structures.
374 udp_bind_hash_remove(udp_t
*udp
, boolean_t caller_holds_lock
)
378 udp_stack_t
*us
= udp
->udp_us
;
379 conn_t
*connp
= udp
->udp_connp
;
381 if (udp
->udp_ptpbhn
== NULL
)
385 * Extract the lock pointer in case there are concurrent
386 * hash_remove's for this instance.
388 ASSERT(connp
->conn_lport
!= 0);
389 if (!caller_holds_lock
) {
390 lockp
= &us
->us_bind_fanout
[UDP_BIND_HASH(connp
->conn_lport
,
391 us
->us_bind_fanout_size
)].uf_lock
;
392 ASSERT(lockp
!= NULL
);
395 if (udp
->udp_ptpbhn
!= NULL
) {
396 udpnext
= udp
->udp_bind_hash
;
397 if (udpnext
!= NULL
) {
398 udpnext
->udp_ptpbhn
= udp
->udp_ptpbhn
;
399 udp
->udp_bind_hash
= NULL
;
401 *udp
->udp_ptpbhn
= udpnext
;
402 udp
->udp_ptpbhn
= NULL
;
404 if (!caller_holds_lock
) {
410 udp_bind_hash_insert(udp_fanout_t
*uf
, udp_t
*udp
)
412 conn_t
*connp
= udp
->udp_connp
;
417 ASSERT(MUTEX_HELD(&uf
->uf_lock
));
418 ASSERT(udp
->udp_ptpbhn
== NULL
);
421 if (udpnext
!= NULL
) {
423 * If the new udp bound to the INADDR_ANY address
424 * and the first one in the list is not bound to
425 * INADDR_ANY we skip all entries until we find the
426 * first one bound to INADDR_ANY.
427 * This makes sure that applications binding to a
428 * specific address get preference over those binding to
431 connext
= udpnext
->udp_connp
;
432 if (V6_OR_V4_INADDR_ANY(connp
->conn_bound_addr_v6
) &&
433 !V6_OR_V4_INADDR_ANY(connext
->conn_bound_addr_v6
)) {
434 while ((udpnext
= udpp
[0]) != NULL
&&
435 !V6_OR_V4_INADDR_ANY(connext
->conn_bound_addr_v6
)) {
436 udpp
= &(udpnext
->udp_bind_hash
);
439 udpnext
->udp_ptpbhn
= &udp
->udp_bind_hash
;
441 udpnext
->udp_ptpbhn
= &udp
->udp_bind_hash
;
444 udp
->udp_bind_hash
= udpnext
;
445 udp
->udp_ptpbhn
= udpp
;
450 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
451 * passed to udp_wput.
452 * It associates a port number and local address with the stream.
453 * It calls IP to verify the local IP address, and calls IP to insert
454 * the conn_t in the fanout table.
455 * If everything is ok it then sends the T_BIND_ACK back up.
457 * Note that UDP over IPv4 and IPv6 sockets can use the same port number
458 * without setting SO_REUSEADDR. This is needed so that they
459 * can be viewed as two independent transport protocols.
460 * However, anonymouns ports are allocated from the same range to avoid
461 * duplicating the us->us_next_port_to_try.
464 udp_tpi_bind(queue_t
*q
, mblk_t
*mp
)
469 struct T_bind_req
*tbr
;
477 * All Solaris components should pass a db_credp
478 * for this TPI message, hence we ASSERT.
479 * But in case there is some other M_PROTO that looks
480 * like a TPI message sent by some other kernel
481 * component, we check and return an error.
483 cr
= msg_getcred(mp
, NULL
);
486 udp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
490 connp
= Q_TO_CONN(q
);
491 udp
= connp
->conn_udp
;
492 if ((mp
->b_wptr
- mp
->b_rptr
) < sizeof (*tbr
)) {
493 (void) mi_strlog(q
, 1, SL_ERROR
|SL_TRACE
,
494 "udp_bind: bad req, len %u",
495 (uint_t
)(mp
->b_wptr
- mp
->b_rptr
));
496 udp_err_ack(q
, mp
, TPROTO
, 0);
499 if (udp
->udp_state
!= TS_UNBND
) {
500 (void) mi_strlog(q
, 1, SL_ERROR
|SL_TRACE
,
501 "udp_bind: bad state, %u", udp
->udp_state
);
502 udp_err_ack(q
, mp
, TOUTSTATE
, 0);
506 * Reallocate the message to make sure we have enough room for an
509 mp1
= reallocb(mp
, sizeof (struct T_bind_ack
) + sizeof (sin6_t
), 1);
511 udp_err_ack(q
, mp
, TSYSERR
, ENOMEM
);
517 /* Reset the message type in preparation for shipping it back. */
518 DB_TYPE(mp
) = M_PCPROTO
;
520 tbr
= (struct T_bind_req
*)mp
->b_rptr
;
521 switch (tbr
->ADDR_length
) {
522 case 0: /* Request for a generic port */
523 tbr
->ADDR_offset
= sizeof (struct T_bind_req
);
524 if (connp
->conn_family
== AF_INET
) {
525 tbr
->ADDR_length
= sizeof (sin_t
);
526 sin
= (sin_t
*)&tbr
[1];
528 sin
->sin_family
= AF_INET
;
529 mp
->b_wptr
= (uchar_t
*)&sin
[1];
530 sa
= (struct sockaddr
*)sin
;
532 ASSERT(connp
->conn_family
== AF_INET6
);
533 tbr
->ADDR_length
= sizeof (sin6_t
);
534 sin6
= (sin6_t
*)&tbr
[1];
536 sin6
->sin6_family
= AF_INET6
;
537 mp
->b_wptr
= (uchar_t
*)&sin6
[1];
538 sa
= (struct sockaddr
*)sin6
;
542 case sizeof (sin_t
): /* Complete IPv4 address */
543 sa
= (struct sockaddr
*)mi_offset_param(mp
, tbr
->ADDR_offset
,
545 if (sa
== NULL
|| !OK_32PTR((char *)sa
)) {
546 udp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
549 if (connp
->conn_family
!= AF_INET
||
550 sa
->sa_family
!= AF_INET
) {
551 udp_err_ack(q
, mp
, TSYSERR
, EAFNOSUPPORT
);
556 case sizeof (sin6_t
): /* complete IPv6 address */
557 sa
= (struct sockaddr
*)mi_offset_param(mp
, tbr
->ADDR_offset
,
559 if (sa
== NULL
|| !OK_32PTR((char *)sa
)) {
560 udp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
563 if (connp
->conn_family
!= AF_INET6
||
564 sa
->sa_family
!= AF_INET6
) {
565 udp_err_ack(q
, mp
, TSYSERR
, EAFNOSUPPORT
);
570 default: /* Invalid request */
571 (void) mi_strlog(q
, 1, SL_ERROR
|SL_TRACE
,
572 "udp_bind: bad ADDR_length length %u", tbr
->ADDR_length
);
573 udp_err_ack(q
, mp
, TBADADDR
, 0);
577 error
= udp_do_bind(connp
, sa
, tbr
->ADDR_length
, cr
,
578 tbr
->PRIM_type
!= O_T_BIND_REQ
);
582 udp_err_ack(q
, mp
, TSYSERR
, error
);
584 udp_err_ack(q
, mp
, -error
, 0);
587 tbr
->PRIM_type
= T_BIND_ACK
;
593 * This routine handles each T_CONN_REQ message passed to udp. It
594 * associates a default destination address with the stream.
596 * After various error checks are completed, udp_connect() lays
597 * the target address and port into the composite header template.
598 * Then we ask IP for information, including a source address if we didn't
599 * already have one. Finally we send up the T_OK_ACK reply message.
602 udp_tpi_connect(queue_t
*q
, mblk_t
*mp
)
604 conn_t
*connp
= Q_TO_CONN(q
);
608 struct T_conn_req
*tcr
;
612 * All Solaris components should pass a db_credp
613 * for this TPI message, hence we ASSERT.
614 * But in case there is some other M_PROTO that looks
615 * like a TPI message sent by some other kernel
616 * component, we check and return an error.
618 cr
= msg_getcred(mp
, &pid
);
621 udp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
625 tcr
= (struct T_conn_req
*)mp
->b_rptr
;
627 /* A bit of sanity checking */
628 if ((mp
->b_wptr
- mp
->b_rptr
) < sizeof (struct T_conn_req
)) {
629 udp_err_ack(q
, mp
, TPROTO
, 0);
633 if (tcr
->OPT_length
!= 0) {
634 udp_err_ack(q
, mp
, TBADOPT
, 0);
639 * Determine packet type based on type of address passed in
640 * the request should contain an IPv4 or IPv6 address.
641 * Make sure that address family matches the type of
642 * family of the address passed down.
644 len
= tcr
->DEST_length
;
645 switch (tcr
->DEST_length
) {
647 udp_err_ack(q
, mp
, TBADADDR
, 0);
651 sa
= (struct sockaddr
*)mi_offset_param(mp
, tcr
->DEST_offset
,
655 case sizeof (sin6_t
):
656 sa
= (struct sockaddr
*)mi_offset_param(mp
, tcr
->DEST_offset
,
661 error
= proto_verify_ip_addr(connp
->conn_family
, sa
, len
);
663 udp_err_ack(q
, mp
, TSYSERR
, error
);
667 error
= udp_do_connect(connp
, sa
, len
, cr
, pid
);
670 udp_err_ack(q
, mp
, -error
, 0);
672 udp_err_ack(q
, mp
, TSYSERR
, error
);
676 * We have to send a connection confirmation to
679 if (connp
->conn_family
== AF_INET
) {
680 mp1
= mi_tpi_conn_con(NULL
, (char *)sa
,
681 sizeof (sin_t
), NULL
, 0);
683 mp1
= mi_tpi_conn_con(NULL
, (char *)sa
,
684 sizeof (sin6_t
), NULL
, 0);
687 udp_err_ack(q
, mp
, TSYSERR
, ENOMEM
);
692 * Send ok_ack for T_CONN_REQ
694 mp
= mi_tpi_ok_ack_alloc(mp
);
696 /* Unable to reuse the T_CONN_REQ for the ack. */
697 udp_err_ack_prim(q
, mp1
, T_CONN_REQ
, TSYSERR
, ENOMEM
);
701 putnext(connp
->conn_rq
, mp
);
702 putnext(connp
->conn_rq
, mp1
);
707 udp_tpi_close(queue_t
*q
, int flags
)
711 if (flags
& SO_FALLBACK
) {
713 * stream is being closed while in fallback
714 * simply free the resources that were allocated
716 inet_minor_free(WR(q
)->q_ptr
, (dev_t
)(RD(q
)->q_ptr
));
721 connp
= Q_TO_CONN(q
);
724 q
->q_ptr
= WR(q
)->q_ptr
= NULL
;
729 udp_close_free(conn_t
*connp
)
731 udp_t
*udp
= connp
->conn_udp
;
733 /* If there are any options associated with the stream, free them. */
734 if (udp
->udp_recv_ipp
.ipp_fields
!= 0)
735 ip_pkt_free(&udp
->udp_recv_ipp
);
738 * Clear any fields which the kmem_cache constructor clears.
739 * Only udp_connp needs to be preserved.
740 * TBD: We should make this more efficient to avoid clearing
743 ASSERT(udp
->udp_connp
== connp
);
744 bzero(udp
, sizeof (udp_t
));
745 udp
->udp_connp
= connp
;
749 udp_do_disconnect(conn_t
*connp
)
756 udp
= connp
->conn_udp
;
758 mutex_enter(&connp
->conn_lock
);
759 if (udp
->udp_state
!= TS_DATA_XFER
) {
760 mutex_exit(&connp
->conn_lock
);
763 udpf
= &us
->us_bind_fanout
[UDP_BIND_HASH(connp
->conn_lport
,
764 us
->us_bind_fanout_size
)];
765 mutex_enter(&udpf
->uf_lock
);
766 if (connp
->conn_mcbc_bind
)
767 connp
->conn_saddr_v6
= ipv6_all_zeros
;
769 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
770 connp
->conn_laddr_v6
= connp
->conn_bound_addr_v6
;
771 connp
->conn_faddr_v6
= ipv6_all_zeros
;
772 connp
->conn_fport
= 0;
773 udp
->udp_state
= TS_IDLE
;
774 mutex_exit(&udpf
->uf_lock
);
776 /* Remove any remnants of mapped address binding */
777 if (connp
->conn_family
== AF_INET6
)
778 connp
->conn_ipversion
= IPV6_VERSION
;
780 connp
->conn_v6lastdst
= ipv6_all_zeros
;
781 error
= udp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
782 &connp
->conn_faddr_v6
, connp
->conn_fport
, connp
->conn_flowinfo
);
783 mutex_exit(&connp
->conn_lock
);
788 * Tell IP to remove the full binding and revert
789 * to the local address binding.
791 return (ip_laddr_fanout_insert(connp
));
795 udp_tpi_disconnect(queue_t
*q
, mblk_t
*mp
)
797 conn_t
*connp
= Q_TO_CONN(q
);
801 * Allocate the largest primitive we need to send back
802 * T_error_ack is > than T_ok_ack
804 mp
= reallocb(mp
, sizeof (struct T_error_ack
), 1);
806 /* Unable to reuse the T_DISCON_REQ for the ack. */
807 udp_err_ack_prim(q
, mp
, T_DISCON_REQ
, TSYSERR
, ENOMEM
);
811 error
= udp_do_disconnect(connp
);
815 udp_err_ack(q
, mp
, -error
, 0);
817 udp_err_ack(q
, mp
, TSYSERR
, error
);
820 mp
= mi_tpi_ok_ack_alloc(mp
);
827 udp_disconnect(conn_t
*connp
)
831 connp
->conn_dgram_errind
= B_FALSE
;
832 error
= udp_do_disconnect(connp
);
834 error
= proto_tlitosyserr(-error
);
839 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
841 udp_err_ack(queue_t
*q
, mblk_t
*mp
, t_scalar_t t_error
, int sys_error
)
843 if ((mp
= mi_tpi_err_ack_alloc(mp
, t_error
, sys_error
)) != NULL
)
847 /* Shorthand to generate and send TPI error acks to our client */
849 udp_err_ack_prim(queue_t
*q
, mblk_t
*mp
, t_scalar_t primitive
,
850 t_scalar_t t_error
, int sys_error
)
852 struct T_error_ack
*teackp
;
854 if ((mp
= tpi_ack_alloc(mp
, sizeof (struct T_error_ack
),
855 M_PCPROTO
, T_ERROR_ACK
)) != NULL
) {
856 teackp
= (struct T_error_ack
*)mp
->b_rptr
;
857 teackp
->ERROR_prim
= primitive
;
858 teackp
->TLI_error
= t_error
;
859 teackp
->UNIX_error
= sys_error
;
864 /* At minimum we need 4 bytes of UDP header */
865 #define ICMP_MIN_UDP_HDR 4
868 * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
869 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
870 * Assumes that IP has pulled up everything up to and including the ICMP header.
874 udp_icmp_input(void *arg1
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*ira
)
876 conn_t
*connp
= (conn_t
*)arg1
;
885 udp_t
*udp
= connp
->conn_udp
;
887 ipha
= (ipha_t
*)mp
->b_rptr
;
889 ASSERT(OK_32PTR(mp
->b_rptr
));
891 if (IPH_HDR_VERSION(ipha
) != IPV4_VERSION
) {
892 ASSERT(IPH_HDR_VERSION(ipha
) == IPV6_VERSION
);
893 udp_icmp_error_ipv6(connp
, mp
, ira
);
896 ASSERT(IPH_HDR_VERSION(ipha
) == IPV4_VERSION
);
898 /* Skip past the outer IP and ICMP headers */
899 ASSERT(IPH_HDR_LENGTH(ipha
) == ira
->ira_ip_hdr_length
);
900 iph_hdr_length
= ira
->ira_ip_hdr_length
;
901 icmph
= (icmph_t
*)&mp
->b_rptr
[iph_hdr_length
];
902 ipha
= (ipha_t
*)&icmph
[1]; /* Inner IP header */
904 /* Skip past the inner IP and find the ULP header */
905 iph_hdr_length
= IPH_HDR_LENGTH(ipha
);
906 udpha
= (udpha_t
*)((char *)ipha
+ iph_hdr_length
);
908 switch (icmph
->icmph_type
) {
909 case ICMP_DEST_UNREACHABLE
:
910 switch (icmph
->icmph_code
) {
911 case ICMP_FRAGMENTATION_NEEDED
: {
915 * IP has already adjusted the path MTU.
916 * But we need to adjust DF for IPv4.
918 if (connp
->conn_ipversion
!= IPV4_VERSION
)
921 ixa
= conn_get_ixa(connp
, B_FALSE
);
922 if (ixa
== NULL
|| ixa
->ixa_ire
== NULL
) {
924 * Some other thread holds conn_ixa. We will
925 * redo this on the next ICMP too big.
931 (void) ip_get_pmtu(ixa
);
933 mutex_enter(&connp
->conn_lock
);
934 ipha
= (ipha_t
*)connp
->conn_ht_iphc
;
935 if (ixa
->ixa_flags
& IXAF_PMTU_IPV4_DF
) {
936 ipha
->ipha_fragment_offset_and_flags
|=
939 ipha
->ipha_fragment_offset_and_flags
&=
942 mutex_exit(&connp
->conn_lock
);
946 case ICMP_PORT_UNREACHABLE
:
947 case ICMP_PROTOCOL_UNREACHABLE
:
948 error
= ECONNREFUSED
;
951 /* Transient errors */
956 /* Transient errors */
965 * Deliver T_UDERROR_IND when the application has asked for it.
966 * The socket layer enables this automatically when connected.
968 if (!connp
->conn_dgram_errind
) {
973 switch (connp
->conn_family
) {
976 sin
.sin_family
= AF_INET
;
977 sin
.sin_addr
.s_addr
= ipha
->ipha_dst
;
978 sin
.sin_port
= udpha
->uha_dst_port
;
979 if (IPCL_IS_NONSTR(connp
)) {
980 mutex_enter(&connp
->conn_lock
);
981 if (udp
->udp_state
== TS_DATA_XFER
) {
982 if (sin
.sin_port
== connp
->conn_fport
&&
983 sin
.sin_addr
.s_addr
==
984 connp
->conn_faddr_v4
) {
985 mutex_exit(&connp
->conn_lock
);
986 (*connp
->conn_upcalls
->su_set_error
)
987 (connp
->conn_upper_handle
, error
);
991 udp
->udp_delayed_error
= error
;
992 *((sin_t
*)&udp
->udp_delayed_addr
) = sin
;
994 mutex_exit(&connp
->conn_lock
);
996 mp1
= mi_tpi_uderror_ind((char *)&sin
, sizeof (sin_t
),
999 putnext(connp
->conn_rq
, mp1
);
1004 sin6
.sin6_family
= AF_INET6
;
1005 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_dst
, &sin6
.sin6_addr
);
1006 sin6
.sin6_port
= udpha
->uha_dst_port
;
1007 if (IPCL_IS_NONSTR(connp
)) {
1008 mutex_enter(&connp
->conn_lock
);
1009 if (udp
->udp_state
== TS_DATA_XFER
) {
1010 if (sin6
.sin6_port
== connp
->conn_fport
&&
1011 IN6_ARE_ADDR_EQUAL(&sin6
.sin6_addr
,
1012 &connp
->conn_faddr_v6
)) {
1013 mutex_exit(&connp
->conn_lock
);
1014 (*connp
->conn_upcalls
->su_set_error
)
1015 (connp
->conn_upper_handle
, error
);
1019 udp
->udp_delayed_error
= error
;
1020 *((sin6_t
*)&udp
->udp_delayed_addr
) = sin6
;
1022 mutex_exit(&connp
->conn_lock
);
1024 mp1
= mi_tpi_uderror_ind((char *)&sin6
, sizeof (sin6_t
),
1027 putnext(connp
->conn_rq
, mp1
);
1036 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
1037 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
1038 * Assumes that IP has pulled up all the extension headers as well as the
1042 udp_icmp_error_ipv6(conn_t
*connp
, mblk_t
*mp
, ip_recv_attr_t
*ira
)
1045 ip6_t
*ip6h
, *outer_ip6h
;
1046 uint16_t iph_hdr_length
;
1052 udp_t
*udp
= connp
->conn_udp
;
1053 udp_stack_t
*us
= udp
->udp_us
;
1055 outer_ip6h
= (ip6_t
*)mp
->b_rptr
;
1057 if (outer_ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
)
1058 iph_hdr_length
= ip_hdr_length_v6(mp
, outer_ip6h
);
1060 iph_hdr_length
= IPV6_HDR_LEN
;
1061 ASSERT(iph_hdr_length
== ira
->ira_ip_hdr_length
);
1063 /* Skip past the outer IP and ICMP headers */
1064 iph_hdr_length
= ira
->ira_ip_hdr_length
;
1065 icmp6
= (icmp6_t
*)&mp
->b_rptr
[iph_hdr_length
];
1067 /* Skip past the inner IP and find the ULP header */
1068 ip6h
= (ip6_t
*)&icmp6
[1]; /* Inner IP header */
1069 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &iph_hdr_length
, &nexthdrp
)) {
1073 udpha
= (udpha_t
*)((char *)ip6h
+ iph_hdr_length
);
1075 switch (icmp6
->icmp6_type
) {
1076 case ICMP6_DST_UNREACH
:
1077 switch (icmp6
->icmp6_code
) {
1078 case ICMP6_DST_UNREACH_NOPORT
:
1079 error
= ECONNREFUSED
;
1081 case ICMP6_DST_UNREACH_ADMIN
:
1082 case ICMP6_DST_UNREACH_NOROUTE
:
1083 case ICMP6_DST_UNREACH_BEYONDSCOPE
:
1084 case ICMP6_DST_UNREACH_ADDR
:
1085 /* Transient errors */
1091 case ICMP6_PACKET_TOO_BIG
: {
1092 struct T_unitdata_ind
*tudi
;
1093 struct T_opthdr
*toh
;
1096 t_scalar_t opt_length
= sizeof (struct T_opthdr
) +
1097 sizeof (struct ip6_mtuinfo
);
1099 struct ip6_mtuinfo
*mtuinfo
;
1102 * If the application has requested to receive path mtu
1103 * information, send up an empty message containing an
1104 * IPV6_PATHMTU ancillary data item.
1106 if (!connp
->conn_ipv6_recvpathmtu
)
1109 udi_size
= sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
) +
1111 if ((newmp
= allocb(udi_size
, BPRI_MED
)) == NULL
) {
1112 UDPS_BUMP_MIB(us
, udpInErrors
);
1117 * newmp->b_cont is left to NULL on purpose. This is an
1118 * empty message containing only ancillary data.
1120 newmp
->b_datap
->db_type
= M_PROTO
;
1121 tudi
= (struct T_unitdata_ind
*)newmp
->b_rptr
;
1122 newmp
->b_wptr
= (uchar_t
*)tudi
+ udi_size
;
1123 tudi
->PRIM_type
= T_UNITDATA_IND
;
1124 tudi
->SRC_length
= sizeof (sin6_t
);
1125 tudi
->SRC_offset
= sizeof (struct T_unitdata_ind
);
1126 tudi
->OPT_offset
= tudi
->SRC_offset
+ sizeof (sin6_t
);
1127 tudi
->OPT_length
= opt_length
;
1129 sin6
= (sin6_t
*)&tudi
[1];
1130 bzero(sin6
, sizeof (sin6_t
));
1131 sin6
->sin6_family
= AF_INET6
;
1132 sin6
->sin6_addr
= connp
->conn_faddr_v6
;
1134 toh
= (struct T_opthdr
*)&sin6
[1];
1135 toh
->level
= IPPROTO_IPV6
;
1136 toh
->name
= IPV6_PATHMTU
;
1137 toh
->len
= opt_length
;
1140 mtuinfo
= (struct ip6_mtuinfo
*)&toh
[1];
1141 bzero(mtuinfo
, sizeof (struct ip6_mtuinfo
));
1142 mtuinfo
->ip6m_addr
.sin6_family
= AF_INET6
;
1143 mtuinfo
->ip6m_addr
.sin6_addr
= ip6h
->ip6_dst
;
1144 mtuinfo
->ip6m_mtu
= icmp6
->icmp6_mtu
;
1146 * We've consumed everything we need from the original
1147 * message. Free it, then send our empty message.
1150 udp_ulp_recv(connp
, newmp
, msgdsize(newmp
), ira
);
1153 case ICMP6_TIME_EXCEEDED
:
1154 /* Transient errors */
1156 case ICMP6_PARAM_PROB
:
1157 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1158 if (icmp6
->icmp6_code
== ICMP6_PARAMPROB_NEXTHEADER
&&
1159 (uchar_t
*)ip6h
+ icmp6
->icmp6_pptr
==
1160 (uchar_t
*)nexthdrp
) {
1161 error
= ECONNREFUSED
;
1172 * Deliver T_UDERROR_IND when the application has asked for it.
1173 * The socket layer enables this automatically when connected.
1175 if (!connp
->conn_dgram_errind
) {
1181 sin6
.sin6_family
= AF_INET6
;
1182 sin6
.sin6_addr
= ip6h
->ip6_dst
;
1183 sin6
.sin6_port
= udpha
->uha_dst_port
;
1184 sin6
.sin6_flowinfo
= ip6h
->ip6_vcf
& ~IPV6_VERS_AND_FLOW_MASK
;
1186 if (IPCL_IS_NONSTR(connp
)) {
1187 mutex_enter(&connp
->conn_lock
);
1188 if (udp
->udp_state
== TS_DATA_XFER
) {
1189 if (sin6
.sin6_port
== connp
->conn_fport
&&
1190 IN6_ARE_ADDR_EQUAL(&sin6
.sin6_addr
,
1191 &connp
->conn_faddr_v6
)) {
1192 mutex_exit(&connp
->conn_lock
);
1193 (*connp
->conn_upcalls
->su_set_error
)
1194 (connp
->conn_upper_handle
, error
);
1198 udp
->udp_delayed_error
= error
;
1199 *((sin6_t
*)&udp
->udp_delayed_addr
) = sin6
;
1201 mutex_exit(&connp
->conn_lock
);
1203 mp1
= mi_tpi_uderror_ind((char *)&sin6
, sizeof (sin6_t
),
1206 putnext(connp
->conn_rq
, mp1
);
1213 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput.
1214 * The local address is filled in if endpoint is bound. The remote address
1215 * is filled in if remote address has been precified ("connected endpoint")
1216 * (The concept of connected CLTS sockets is alien to published TPI
1217 * but we support it anyway).
1220 udp_addr_req(queue_t
*q
, mblk_t
*mp
)
1222 struct sockaddr
*sa
;
1224 struct T_addr_ack
*taa
;
1225 udp_t
*udp
= Q_TO_UDP(q
);
1226 conn_t
*connp
= udp
->udp_connp
;
1229 /* Make it large enough for worst case */
1230 ackmp
= reallocb(mp
, sizeof (struct T_addr_ack
) +
1231 2 * sizeof (sin6_t
), 1);
1232 if (ackmp
== NULL
) {
1233 udp_err_ack(q
, mp
, TSYSERR
, ENOMEM
);
1236 taa
= (struct T_addr_ack
*)ackmp
->b_rptr
;
1238 bzero(taa
, sizeof (struct T_addr_ack
));
1239 ackmp
->b_wptr
= (uchar_t
*)&taa
[1];
1241 taa
->PRIM_type
= T_ADDR_ACK
;
1242 ackmp
->b_datap
->db_type
= M_PCPROTO
;
1244 if (connp
->conn_family
== AF_INET
)
1245 addrlen
= sizeof (sin_t
);
1247 addrlen
= sizeof (sin6_t
);
1249 mutex_enter(&connp
->conn_lock
);
1251 * Note: Following code assumes 32 bit alignment of basic
1252 * data structures like sin_t and struct T_addr_ack.
1254 if (udp
->udp_state
!= TS_UNBND
) {
1256 * Fill in local address first
1258 taa
->LOCADDR_offset
= sizeof (*taa
);
1259 taa
->LOCADDR_length
= addrlen
;
1260 sa
= (struct sockaddr
*)&taa
[1];
1261 (void) conn_getsockname(connp
, sa
, &addrlen
);
1262 ackmp
->b_wptr
+= addrlen
;
1264 if (udp
->udp_state
== TS_DATA_XFER
) {
1266 * connected, fill remote address too
1268 taa
->REMADDR_length
= addrlen
;
1269 /* assumed 32-bit alignment */
1270 taa
->REMADDR_offset
= taa
->LOCADDR_offset
+ taa
->LOCADDR_length
;
1271 sa
= (struct sockaddr
*)(ackmp
->b_rptr
+ taa
->REMADDR_offset
);
1272 (void) conn_getpeername(connp
, sa
, &addrlen
);
1273 ackmp
->b_wptr
+= addrlen
;
1275 mutex_exit(&connp
->conn_lock
);
1276 ASSERT(ackmp
->b_wptr
<= ackmp
->b_datap
->db_lim
);
1281 udp_copy_info(struct T_info_ack
*tap
, udp_t
*udp
)
1283 conn_t
*connp
= udp
->udp_connp
;
1285 if (connp
->conn_family
== AF_INET
) {
1286 *tap
= udp_g_t_info_ack_ipv4
;
1288 *tap
= udp_g_t_info_ack_ipv6
;
1290 tap
->CURRENT_state
= udp
->udp_state
;
1291 tap
->OPT_size
= udp_max_optsize
;
1295 udp_do_capability_ack(udp_t
*udp
, struct T_capability_ack
*tcap
,
1296 t_uscalar_t cap_bits1
)
1298 tcap
->CAP_bits1
= 0;
1300 if (cap_bits1
& TC1_INFO
) {
1301 udp_copy_info(&tcap
->INFO_ack
, udp
);
1302 tcap
->CAP_bits1
|= TC1_INFO
;
1307 * This routine responds to T_CAPABILITY_REQ messages. It is called by
1308 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from
1309 * udp_g_t_info_ack. The current state of the stream is copied from
1313 udp_capability_req(queue_t
*q
, mblk_t
*mp
)
1315 t_uscalar_t cap_bits1
;
1316 struct T_capability_ack
*tcap
;
1317 udp_t
*udp
= Q_TO_UDP(q
);
1319 cap_bits1
= ((struct T_capability_req
*)mp
->b_rptr
)->CAP_bits1
;
1321 mp
= tpi_ack_alloc(mp
, sizeof (struct T_capability_ack
),
1322 mp
->b_datap
->db_type
, T_CAPABILITY_ACK
);
1326 tcap
= (struct T_capability_ack
*)mp
->b_rptr
;
1327 udp_do_capability_ack(udp
, tcap
, cap_bits1
);
1333 * This routine responds to T_INFO_REQ messages. It is called by udp_wput.
1334 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
1335 * The current state of the stream is copied from udp_state.
1338 udp_info_req(queue_t
*q
, mblk_t
*mp
)
1340 udp_t
*udp
= Q_TO_UDP(q
);
1342 /* Create a T_INFO_ACK message. */
1343 mp
= tpi_ack_alloc(mp
, sizeof (struct T_info_ack
), M_PCPROTO
,
1347 udp_copy_info((struct T_info_ack
*)mp
->b_rptr
, udp
);
1351 /* For /dev/udp aka AF_INET open */
1353 udp_openv4(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
)
1355 return (udp_open(q
, devp
, flag
, sflag
, credp
, B_FALSE
));
1358 /* For /dev/udp6 aka AF_INET6 open */
1360 udp_openv6(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
)
1362 return (udp_open(q
, devp
, flag
, sflag
, credp
, B_TRUE
));
1366 * This is the open routine for udp. It allocates a udp_t structure for
1367 * the stream and, on the first open of the module, creates an ND table.
1370 udp_open(queue_t
*q
, dev_t
*devp
, int flag
, int sflag
, cred_t
*credp
,
1376 vmem_t
*minor_arena
;
1379 /* If the stream is already open, return immediately. */
1380 if (q
->q_ptr
!= NULL
)
1383 if (sflag
== MODOPEN
)
1386 if ((ip_minor_arena_la
!= NULL
) && (flag
& SO_SOCKSTR
) &&
1387 ((conn_dev
= inet_minor_alloc(ip_minor_arena_la
)) != 0)) {
1388 minor_arena
= ip_minor_arena_la
;
1391 * Either minor numbers in the large arena were exhausted
1392 * or a non socket application is doing the open.
1393 * Try to allocate from the small arena.
1395 if ((conn_dev
= inet_minor_alloc(ip_minor_arena_sa
)) == 0)
1398 minor_arena
= ip_minor_arena_sa
;
1401 if (flag
& SO_FALLBACK
) {
1403 * Non streams socket needs a stream to fallback to
1405 RD(q
)->q_ptr
= (void *)conn_dev
;
1406 WR(q
)->q_qinfo
= &udp_fallback_sock_winit
;
1407 WR(q
)->q_ptr
= (void *)minor_arena
;
1412 connp
= udp_do_open(credp
, isv6
, KM_SLEEP
, &err
);
1413 if (connp
== NULL
) {
1414 inet_minor_free(minor_arena
, conn_dev
);
1417 udp
= connp
->conn_udp
;
1419 *devp
= makedevice(getemajor(*devp
), (minor_t
)conn_dev
);
1420 connp
->conn_dev
= conn_dev
;
1421 connp
->conn_minor_arena
= minor_arena
;
1424 * Initialize the udp_t structure for this stream.
1427 WR(q
)->q_ptr
= connp
;
1429 connp
->conn_wq
= WR(q
);
1432 * Since this conn_t/udp_t is not yet visible to anybody else we don't
1433 * need to lock anything.
1435 ASSERT(connp
->conn_proto
== IPPROTO_UDP
);
1436 ASSERT(connp
->conn_udp
== udp
);
1437 ASSERT(udp
->udp_connp
== connp
);
1439 if (flag
& SO_SOCKSTR
) {
1440 udp
->udp_issocket
= B_TRUE
;
1443 WR(q
)->q_hiwat
= connp
->conn_sndbuf
;
1444 WR(q
)->q_lowat
= connp
->conn_sndlowat
;
1448 /* Set the Stream head write offset and high watermark. */
1449 (void) proto_set_tx_wroff(q
, connp
, connp
->conn_wroff
);
1450 (void) proto_set_rx_hiwat(q
, connp
,
1451 udp_set_rcv_hiwat(udp
, connp
->conn_rcvbuf
));
1453 mutex_enter(&connp
->conn_lock
);
1454 connp
->conn_state_flags
&= ~CONN_INCIPIENT
;
1455 mutex_exit(&connp
->conn_lock
);
1460 * Which UDP options OK to set through T_UNITDATA_REQ...
1464 udp_opt_allow_udr_set(t_scalar_t level
, t_scalar_t name
)
1470 * This routine gets default values of certain options whose default
1471 * values are maintained by protcol specific code
1474 udp_opt_default(queue_t
*q
, t_scalar_t level
, t_scalar_t name
, uchar_t
*ptr
)
1476 udp_t
*udp
= Q_TO_UDP(q
);
1477 udp_stack_t
*us
= udp
->udp_us
;
1478 int *i1
= (int *)ptr
;
1483 case IP_MULTICAST_TTL
:
1484 *ptr
= (uchar_t
)IP_DEFAULT_MULTICAST_TTL
;
1485 return (sizeof (uchar_t
));
1486 case IP_MULTICAST_LOOP
:
1487 *ptr
= (uchar_t
)IP_DEFAULT_MULTICAST_LOOP
;
1488 return (sizeof (uchar_t
));
1493 case IPV6_MULTICAST_HOPS
:
1494 *i1
= IP_DEFAULT_MULTICAST_TTL
;
1495 return (sizeof (int));
1496 case IPV6_MULTICAST_LOOP
:
1497 *i1
= IP_DEFAULT_MULTICAST_LOOP
;
1498 return (sizeof (int));
1499 case IPV6_UNICAST_HOPS
:
1500 *i1
= us
->us_ipv6_hoplimit
;
1501 return (sizeof (int));
1509 * This routine retrieves the current status of socket options.
1510 * It returns the size of the option retrieved, or -1.
1513 udp_opt_get(conn_t
*connp
, t_scalar_t level
, t_scalar_t name
,
1516 int *i1
= (int *)ptr
;
1517 udp_t
*udp
= connp
->conn_udp
;
1519 conn_opt_arg_t coas
;
1522 coas
.coa_connp
= connp
;
1523 coas
.coa_ixa
= connp
->conn_ixa
;
1524 coas
.coa_ipp
= &connp
->conn_xmit_ipp
;
1525 coas
.coa_ancillary
= B_FALSE
;
1526 coas
.coa_changed
= 0;
1529 * We assume that the optcom framework has checked for the set
1530 * of levels and names that are supported, hence we don't worry
1531 * about rejecting based on that.
1532 * First check for UDP specific handling, then pass to common routine.
1537 * Only allow IPv4 option processing on IPv4 sockets.
1539 if (connp
->conn_family
!= AF_INET
)
1545 mutex_enter(&connp
->conn_lock
);
1546 if (!(udp
->udp_recv_ipp
.ipp_fields
&
1547 IPPF_IPV4_OPTIONS
)) {
1548 mutex_exit(&connp
->conn_lock
);
1552 len
= udp
->udp_recv_ipp
.ipp_ipv4_options_len
;
1554 bcopy(udp
->udp_recv_ipp
.ipp_ipv4_options
, ptr
, len
);
1555 mutex_exit(&connp
->conn_lock
);
1561 case UDP_NAT_T_ENDPOINT
:
1562 mutex_enter(&connp
->conn_lock
);
1563 *i1
= udp
->udp_nat_t_endpoint
;
1564 mutex_exit(&connp
->conn_lock
);
1565 return (sizeof (int));
1567 mutex_enter(&connp
->conn_lock
);
1568 *i1
= udp
->udp_rcvhdr
? 1 : 0;
1569 mutex_exit(&connp
->conn_lock
);
1570 return (sizeof (int));
1573 mutex_enter(&connp
->conn_lock
);
1574 retval
= conn_opt_get(&coas
, level
, name
, ptr
);
1575 mutex_exit(&connp
->conn_lock
);
1580 * This routine retrieves the current status of socket options.
1581 * It returns the size of the option retrieved, or -1.
1584 udp_tpi_opt_get(queue_t
*q
, t_scalar_t level
, t_scalar_t name
, uchar_t
*ptr
)
1586 conn_t
*connp
= Q_TO_CONN(q
);
1589 err
= udp_opt_get(connp
, level
, name
, ptr
);
1594 * This routine sets socket options.
1597 udp_do_opt_set(conn_opt_arg_t
*coa
, int level
, int name
,
1598 uint_t inlen
, uchar_t
*invalp
, cred_t
*cr
, boolean_t checkonly
)
1600 conn_t
*connp
= coa
->coa_connp
;
1601 ip_xmit_attr_t
*ixa
= coa
->coa_ixa
;
1602 udp_t
*udp
= connp
->conn_udp
;
1603 udp_stack_t
*us
= udp
->udp_us
;
1604 int *i1
= (int *)invalp
;
1605 boolean_t onoff
= (*i1
== 0) ? 0 : 1;
1608 ASSERT(MUTEX_NOT_HELD(&coa
->coa_connp
->conn_lock
));
1610 * First do UDP specific sanity checks and handle UDP specific
1611 * options. Note that some IPPROTO_UDP options are handled
1618 if (*i1
> us
->us_max_buf
) {
1623 if (*i1
> us
->us_max_buf
) {
1634 case UDP_NAT_T_ENDPOINT
:
1635 if ((error
= secpolicy_ip_config(cr
, B_FALSE
)) != 0) {
1640 * Use conn_family instead so we can avoid ambiguitites
1641 * with AF_INET6 sockets that may switch from IPv4
1644 if (connp
->conn_family
!= AF_INET
) {
1645 return (EAFNOSUPPORT
);
1649 mutex_enter(&connp
->conn_lock
);
1650 udp
->udp_nat_t_endpoint
= onoff
;
1651 mutex_exit(&connp
->conn_lock
);
1652 coa
->coa_changed
|= COA_HEADER_CHANGED
;
1653 coa
->coa_changed
|= COA_WROFF_CHANGED
;
1655 /* Fully handled this option. */
1658 mutex_enter(&connp
->conn_lock
);
1659 udp
->udp_rcvhdr
= onoff
;
1660 mutex_exit(&connp
->conn_lock
);
1665 error
= conn_opt_set(coa
, level
, name
, inlen
, invalp
,
1671 * This routine sets socket options.
1674 udp_opt_set(conn_t
*connp
, uint_t optset_context
, int level
,
1675 int name
, uint_t inlen
, uchar_t
*invalp
, uint_t
*outlenp
,
1676 uchar_t
*outvalp
, void *thisdg_attrs
, cred_t
*cr
)
1678 udp_t
*udp
= connp
->conn_udp
;
1680 conn_opt_arg_t coas
, *coa
;
1681 boolean_t checkonly
;
1682 udp_stack_t
*us
= udp
->udp_us
;
1684 switch (optset_context
) {
1685 case SETFN_OPTCOM_CHECKONLY
:
1688 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
1689 * inlen != 0 implies value supplied and
1690 * we have to "pretend" to set it.
1691 * inlen == 0 implies that there is no
1692 * value part in T_CHECK request and just validation
1693 * done elsewhere should be enough, we just return here.
1700 case SETFN_OPTCOM_NEGOTIATE
:
1701 checkonly
= B_FALSE
;
1703 case SETFN_UD_NEGOTIATE
:
1704 case SETFN_CONN_NEGOTIATE
:
1705 checkonly
= B_FALSE
;
1707 * Negotiating local and "association-related" options
1708 * through T_UNITDATA_REQ.
1710 * Following routine can filter out ones we do not
1711 * want to be "set" this way.
1713 if (!udp_opt_allow_udr_set(level
, name
)) {
1720 * We should never get here
1726 ASSERT((optset_context
!= SETFN_OPTCOM_CHECKONLY
) ||
1727 (optset_context
== SETFN_OPTCOM_CHECKONLY
&& inlen
!= 0));
1729 if (thisdg_attrs
!= NULL
) {
1730 /* Options from T_UNITDATA_REQ */
1731 coa
= (conn_opt_arg_t
*)thisdg_attrs
;
1732 ASSERT(coa
->coa_connp
== connp
);
1733 ASSERT(coa
->coa_ixa
!= NULL
);
1734 ASSERT(coa
->coa_ipp
!= NULL
);
1735 ASSERT(coa
->coa_ancillary
);
1738 coas
.coa_connp
= connp
;
1739 /* Get a reference on conn_ixa to prevent concurrent mods */
1740 coas
.coa_ixa
= conn_get_ixa(connp
, B_TRUE
);
1741 if (coas
.coa_ixa
== NULL
) {
1745 coas
.coa_ipp
= &connp
->conn_xmit_ipp
;
1746 coas
.coa_ancillary
= B_FALSE
;
1747 coas
.coa_changed
= 0;
1750 err
= udp_do_opt_set(coa
, level
, name
, inlen
, invalp
,
1754 if (!coa
->coa_ancillary
)
1755 ixa_refrele(coa
->coa_ixa
);
1759 /* Handle DHCPINIT here outside of lock */
1760 if (level
== IPPROTO_IP
&& name
== IP_DHCPINIT_IF
) {
1764 ifindex
= *(uint_t
*)invalp
;
1768 ill
= ill_lookup_on_ifindex(ifindex
, B_FALSE
,
1769 coa
->coa_ixa
->ixa_ipst
);
1775 mutex_enter(&ill
->ill_lock
);
1776 if (ill
->ill_state_flags
& ILL_CONDEMNED
) {
1777 mutex_exit(&ill
->ill_lock
);
1783 mutex_exit(&ill
->ill_lock
);
1789 mutex_enter(&connp
->conn_lock
);
1791 if (connp
->conn_dhcpinit_ill
!= NULL
) {
1793 * We've locked the conn so conn_cleanup_ill()
1794 * cannot clear conn_dhcpinit_ill -- so it's
1795 * safe to access the ill.
1797 ill_t
*oill
= connp
->conn_dhcpinit_ill
;
1799 ASSERT(oill
->ill_dhcpinit
!= 0);
1800 atomic_dec_32(&oill
->ill_dhcpinit
);
1801 ill_set_inputfn(connp
->conn_dhcpinit_ill
);
1802 connp
->conn_dhcpinit_ill
= NULL
;
1806 connp
->conn_dhcpinit_ill
= ill
;
1807 atomic_inc_32(&ill
->ill_dhcpinit
);
1808 ill_set_inputfn(ill
);
1809 mutex_exit(&connp
->conn_lock
);
1810 mutex_exit(&ill
->ill_lock
);
1813 mutex_exit(&connp
->conn_lock
);
1818 * Common case of OK return with outval same as inval.
1820 if (invalp
!= outvalp
) {
1821 /* don't trust bcopy for identical src/dst */
1822 (void) bcopy(invalp
, outvalp
, inlen
);
1827 * If this was not ancillary data, then we rebuild the headers,
1828 * update the IRE/NCE, and IPsec as needed.
1830 if (coa
->coa_ancillary
) {
1834 if (coa
->coa_changed
& COA_ROUTE_CHANGED
) {
1835 in6_addr_t saddr
, faddr
, nexthop
;
1839 * We clear lastdst to make sure we pick up the change
1840 * next time sending.
1841 * If we are connected we re-cache the information.
1842 * We ignore errors to preserve BSD behavior.
1843 * Note that we don't redo IPsec policy lookup here
1844 * since the final destination (or source) didn't change.
1846 mutex_enter(&connp
->conn_lock
);
1847 connp
->conn_v6lastdst
= ipv6_all_zeros
;
1849 ip_attr_nexthop(coa
->coa_ipp
, coa
->coa_ixa
,
1850 &connp
->conn_faddr_v6
, &nexthop
);
1851 saddr
= connp
->conn_saddr_v6
;
1852 faddr
= connp
->conn_faddr_v6
;
1853 fport
= connp
->conn_fport
;
1854 mutex_exit(&connp
->conn_lock
);
1856 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr
) &&
1857 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr
)) {
1858 (void) ip_attr_connect(connp
, coa
->coa_ixa
,
1859 &saddr
, &faddr
, &nexthop
, fport
, NULL
, NULL
,
1860 IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
);
1864 ixa_refrele(coa
->coa_ixa
);
1866 if (coa
->coa_changed
& COA_HEADER_CHANGED
) {
1868 * Rebuild the header template if we are connected.
1869 * Otherwise clear conn_v6lastdst so we rebuild the header
1872 mutex_enter(&connp
->conn_lock
);
1873 if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_faddr_v6
) &&
1874 !IN6_IS_ADDR_V4MAPPED_ANY(&connp
->conn_faddr_v6
)) {
1875 err
= udp_build_hdr_template(connp
,
1876 &connp
->conn_saddr_v6
, &connp
->conn_faddr_v6
,
1877 connp
->conn_fport
, connp
->conn_flowinfo
);
1879 mutex_exit(&connp
->conn_lock
);
1883 connp
->conn_v6lastdst
= ipv6_all_zeros
;
1885 mutex_exit(&connp
->conn_lock
);
1887 if (coa
->coa_changed
& COA_RCVBUF_CHANGED
) {
1888 (void) proto_set_rx_hiwat(connp
->conn_rq
, connp
,
1889 connp
->conn_rcvbuf
);
1891 if ((coa
->coa_changed
& COA_SNDBUF_CHANGED
) && !IPCL_IS_NONSTR(connp
)) {
1892 connp
->conn_wq
->q_hiwat
= connp
->conn_sndbuf
;
1894 if (coa
->coa_changed
& COA_WROFF_CHANGED
) {
1895 /* Increase wroff if needed */
1898 mutex_enter(&connp
->conn_lock
);
1899 wroff
= connp
->conn_ht_iphc_allocated
+ us
->us_wroff_extra
;
1900 if (udp
->udp_nat_t_endpoint
)
1901 wroff
+= sizeof (uint32_t);
1902 if (wroff
> connp
->conn_wroff
) {
1903 connp
->conn_wroff
= wroff
;
1904 mutex_exit(&connp
->conn_lock
);
1905 (void) proto_set_tx_wroff(connp
->conn_rq
, connp
, wroff
);
1907 mutex_exit(&connp
->conn_lock
);
1913 /* This routine sets socket options. */
1915 udp_tpi_opt_set(queue_t
*q
, uint_t optset_context
, int level
, int name
,
1916 uint_t inlen
, uchar_t
*invalp
, uint_t
*outlenp
, uchar_t
*outvalp
,
1917 void *thisdg_attrs
, cred_t
*cr
)
1919 conn_t
*connp
= Q_TO_CONN(q
);
1922 error
= udp_opt_set(connp
, optset_context
, level
, name
, inlen
, invalp
,
1923 outlenp
, outvalp
, thisdg_attrs
, cr
);
1928 * Setup IP and UDP headers.
1929 * Returns NULL on allocation failure, in which case data_mp is freed.
1932 udp_prepend_hdr(conn_t
*connp
, ip_xmit_attr_t
*ixa
, const ip_pkt_t
*ipp
,
1933 const in6_addr_t
*v6src
, const in6_addr_t
*v6dst
, in_port_t dstport
,
1934 uint32_t flowinfo
, mblk_t
*data_mp
, int *errorp
)
1938 udp_stack_t
*us
= connp
->conn_netstack
->netstack_udp
;
1941 udp_t
*udp
= connp
->conn_udp
;
1942 boolean_t insert_spi
= udp
->udp_nat_t_endpoint
;
1945 data_len
= msgdsize(data_mp
);
1946 ulp_hdr_len
= UDPH_SIZE
;
1948 ulp_hdr_len
+= sizeof (uint32_t);
1950 mp
= conn_prepend_hdr(ixa
, ipp
, v6src
, v6dst
, IPPROTO_UDP
, flowinfo
,
1951 ulp_hdr_len
, data_mp
, data_len
, us
->us_wroff_extra
, &cksum
, errorp
);
1953 ASSERT(*errorp
!= 0);
1957 data_len
+= ulp_hdr_len
;
1958 ixa
->ixa_pktlen
= data_len
+ ixa
->ixa_ip_hdr_length
;
1960 udpha
= (udpha_t
*)(mp
->b_rptr
+ ixa
->ixa_ip_hdr_length
);
1961 udpha
->uha_src_port
= connp
->conn_lport
;
1962 udpha
->uha_dst_port
= dstport
;
1963 udpha
->uha_checksum
= 0;
1964 udpha
->uha_length
= htons(data_len
);
1967 * If there was a routing option/header then conn_prepend_hdr
1968 * has massaged it and placed the pseudo-header checksum difference
1969 * in the cksum argument.
1971 * Setup header length and prepare for ULP checksum done in IP.
1973 * We make it easy for IP to include our pseudo header
1974 * by putting our length in uha_checksum.
1975 * The IP source, destination, and length have already been set by
1979 cksum
= (cksum
>> 16) + (cksum
& 0xFFFF);
1980 ASSERT(cksum
< 0x10000);
1982 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
1983 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
1985 ASSERT(ntohs(ipha
->ipha_length
) == ixa
->ixa_pktlen
);
1987 /* IP does the checksum if uha_checksum is non-zero */
1988 if (us
->us_do_checksum
) {
1990 udpha
->uha_checksum
= 0xffff;
1992 udpha
->uha_checksum
= htons(cksum
);
1994 udpha
->uha_checksum
= 0;
1997 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
1999 ASSERT(ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
== ixa
->ixa_pktlen
);
2001 udpha
->uha_checksum
= 0xffff;
2003 udpha
->uha_checksum
= htons(cksum
);
2006 /* Insert all-0s SPI now. */
2008 *((uint32_t *)(udpha
+ 1)) = 0;
2014 udp_build_hdr_template(conn_t
*connp
, const in6_addr_t
*v6src
,
2015 const in6_addr_t
*v6dst
, in_port_t dstport
, uint32_t flowinfo
)
2020 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
2022 * We clear lastdst to make sure we don't use the lastdst path
2023 * next time sending since we might not have set v6dst yet.
2025 connp
->conn_v6lastdst
= ipv6_all_zeros
;
2027 error
= conn_build_hdr_template(connp
, UDPH_SIZE
, 0, v6src
, v6dst
,
2033 * Any routing header/option has been massaged. The checksum difference
2034 * is stored in conn_sum.
2036 udpha
= (udpha_t
*)connp
->conn_ht_ulp
;
2037 udpha
->uha_src_port
= connp
->conn_lport
;
2038 udpha
->uha_dst_port
= dstport
;
2039 udpha
->uha_checksum
= 0;
2040 udpha
->uha_length
= htons(UDPH_SIZE
); /* Filled in later */
2045 udp_queue_fallback(udp_t
*udp
, mblk_t
*mp
)
2047 ASSERT(MUTEX_HELD(&udp
->udp_recv_lock
));
2048 if (IPCL_IS_NONSTR(udp
->udp_connp
)) {
2050 * fallback has started but messages have not been moved yet
2052 if (udp
->udp_fallback_queue_head
== NULL
) {
2053 ASSERT(udp
->udp_fallback_queue_tail
== NULL
);
2054 udp
->udp_fallback_queue_head
= mp
;
2055 udp
->udp_fallback_queue_tail
= mp
;
2057 ASSERT(udp
->udp_fallback_queue_tail
!= NULL
);
2058 udp
->udp_fallback_queue_tail
->b_next
= mp
;
2059 udp
->udp_fallback_queue_tail
= mp
;
2064 * Fallback completed, let the caller putnext() the mblk.
2071 * Deliver data to ULP. In case we have a socket, and it's falling back to
2072 * TPI, then we'll queue the mp for later processing.
2075 udp_ulp_recv(conn_t
*connp
, mblk_t
*mp
, uint_t len
, ip_recv_attr_t
*ira
)
2077 if (IPCL_IS_NONSTR(connp
)) {
2078 udp_t
*udp
= connp
->conn_udp
;
2081 ASSERT(len
== msgdsize(mp
));
2082 if ((*connp
->conn_upcalls
->su_recv
)
2083 (connp
->conn_upper_handle
, mp
, len
, 0, &error
, NULL
) < 0) {
2084 mutex_enter(&udp
->udp_recv_lock
);
2085 if (error
== ENOSPC
) {
2087 * let's confirm while holding the lock
2089 if ((*connp
->conn_upcalls
->su_recv
)
2090 (connp
->conn_upper_handle
, NULL
, 0, 0,
2091 &error
, NULL
) < 0) {
2092 ASSERT(error
== ENOSPC
);
2093 if (error
== ENOSPC
) {
2094 connp
->conn_flow_cntrld
=
2098 mutex_exit(&udp
->udp_recv_lock
);
2100 ASSERT(error
== EOPNOTSUPP
);
2101 mp
= udp_queue_fallback(udp
, mp
);
2102 mutex_exit(&udp
->udp_recv_lock
);
2104 putnext(connp
->conn_rq
, mp
);
2107 ASSERT(MUTEX_NOT_HELD(&udp
->udp_recv_lock
));
2109 putnext(connp
->conn_rq
, mp
);
2113 * This is the inbound data path.
2114 * IP has already pulled up the IP plus UDP headers and verified alignment
2119 udp_input(void *arg1
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*ira
)
2121 conn_t
*connp
= (conn_t
*)arg1
;
2122 struct T_unitdata_ind
*tudi
;
2123 uchar_t
*rptr
; /* Pointer to IP header */
2124 int hdr_length
; /* Length of IP+UDP headers */
2125 int udi_size
; /* Size of T_unitdata_ind */
2132 uint32_t udp_ipv4_options_len
;
2133 crb_t recv_ancillary
;
2136 ASSERT(connp
->conn_flags
& IPCL_UDPCONN
);
2138 udp
= connp
->conn_udp
;
2142 ASSERT(DB_TYPE(mp
) == M_DATA
);
2143 ASSERT(OK_32PTR(rptr
));
2144 ASSERT(ira
->ira_pktlen
== msgdsize(mp
));
2145 pkt_len
= ira
->ira_pktlen
;
2148 * Get a snapshot of these and allow other threads to change
2149 * them after that. We need the same recv_ancillary when determining
2150 * the size as when adding the ancillary data items.
2152 mutex_enter(&connp
->conn_lock
);
2153 udp_ipv4_options_len
= udp
->udp_recv_ipp
.ipp_ipv4_options_len
;
2154 recv_ancillary
= connp
->conn_recv_ancillary
;
2155 mutex_exit(&connp
->conn_lock
);
2157 hdr_length
= ira
->ira_ip_hdr_length
;
2160 * IP inspected the UDP header thus all of it must be in the mblk.
2161 * UDP length check is performed for IPv6 packets and IPv4 packets
2162 * to check if the size of the packet as specified
2163 * by the UDP header is the same as the length derived from the IP
2166 udpha
= (udpha_t
*)(rptr
+ hdr_length
);
2167 if (pkt_len
!= ntohs(udpha
->uha_length
) + hdr_length
)
2170 hdr_length
+= UDPH_SIZE
;
2171 ASSERT(MBLKL(mp
) >= hdr_length
); /* IP did a pullup */
2173 /* Initialize regardless of IP version */
2174 ipps
.ipp_fields
= 0;
2176 if (((ira
->ira_flags
& IRAF_IPV4_OPTIONS
) ||
2177 udp_ipv4_options_len
> 0) &&
2178 connp
->conn_family
== AF_INET
) {
2182 * Record/update udp_recv_ipp with the lock
2183 * held. Not needed for AF_INET6 sockets
2184 * since they don't support a getsockopt of IP_OPTIONS.
2186 mutex_enter(&connp
->conn_lock
);
2187 err
= ip_find_hdr_v4((ipha_t
*)rptr
, &udp
->udp_recv_ipp
,
2190 /* Allocation failed. Drop packet */
2191 mutex_exit(&connp
->conn_lock
);
2193 UDPS_BUMP_MIB(us
, udpInErrors
);
2196 mutex_exit(&connp
->conn_lock
);
2199 if (recv_ancillary
.crb_all
!= 0) {
2201 * Record packet information in the ip_pkt_t
2203 if (ira
->ira_flags
& IRAF_IS_IPV4
) {
2204 ASSERT(IPH_HDR_VERSION(rptr
) == IPV4_VERSION
);
2205 ASSERT(MBLKL(mp
) >= sizeof (ipha_t
));
2206 ASSERT(((ipha_t
*)rptr
)->ipha_protocol
== IPPROTO_UDP
);
2207 ASSERT(ira
->ira_ip_hdr_length
== IPH_HDR_LENGTH(rptr
));
2209 (void) ip_find_hdr_v4((ipha_t
*)rptr
, &ipps
, B_FALSE
);
2213 ASSERT(IPH_HDR_VERSION(rptr
) == IPV6_VERSION
);
2215 * IPv6 packets can only be received by applications
2216 * that are prepared to receive IPv6 addresses.
2217 * The IP fanout must ensure this.
2219 ASSERT(connp
->conn_family
== AF_INET6
);
2221 ip6h
= (ip6_t
*)rptr
;
2223 /* We don't care about the length, but need the ipp */
2224 hdr_length
= ip_find_hdr_v6(mp
, ip6h
, &ipps
,
2226 ASSERT(hdr_length
== ira
->ira_ip_hdr_length
);
2228 hdr_length
= ira
->ira_ip_hdr_length
+ UDPH_SIZE
;
2229 ASSERT(nexthdrp
== IPPROTO_UDP
);
2234 * This is the inbound data path. Packets are passed upstream as
2235 * T_UNITDATA_IND messages.
2237 if (connp
->conn_family
== AF_INET
) {
2240 ASSERT(IPH_HDR_VERSION((ipha_t
*)rptr
) == IPV4_VERSION
);
2243 * Normally only send up the source address.
2244 * If any ancillary data items are wanted we add those.
2246 udi_size
= sizeof (struct T_unitdata_ind
) + sizeof (sin_t
);
2247 if (recv_ancillary
.crb_all
!= 0) {
2248 udi_size
+= conn_recvancillary_size(connp
,
2249 recv_ancillary
, ira
, mp
, &ipps
);
2252 /* Allocate a message block for the T_UNITDATA_IND structure. */
2253 mp1
= allocb(udi_size
, BPRI_MED
);
2256 UDPS_BUMP_MIB(us
, udpInErrors
);
2260 mp1
->b_datap
->db_type
= M_PROTO
;
2261 tudi
= (struct T_unitdata_ind
*)mp1
->b_rptr
;
2262 mp1
->b_wptr
= (uchar_t
*)tudi
+ udi_size
;
2263 tudi
->PRIM_type
= T_UNITDATA_IND
;
2264 tudi
->SRC_length
= sizeof (sin_t
);
2265 tudi
->SRC_offset
= sizeof (struct T_unitdata_ind
);
2266 tudi
->OPT_offset
= sizeof (struct T_unitdata_ind
) +
2268 udi_size
-= (sizeof (struct T_unitdata_ind
) + sizeof (sin_t
));
2269 tudi
->OPT_length
= udi_size
;
2270 sin
= (sin_t
*)&tudi
[1];
2271 sin
->sin_addr
.s_addr
= ((ipha_t
*)rptr
)->ipha_src
;
2272 sin
->sin_port
= udpha
->uha_src_port
;
2273 sin
->sin_family
= connp
->conn_family
;
2274 *(uint32_t *)&sin
->sin_zero
[0] = 0;
2275 *(uint32_t *)&sin
->sin_zero
[4] = 0;
2278 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
2279 * IP_RECVTTL has been set.
2281 if (udi_size
!= 0) {
2282 conn_recvancillary_add(connp
, recv_ancillary
, ira
,
2283 &ipps
, (uchar_t
*)&sin
[1], udi_size
);
2289 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
2291 * Normally we only send up the address. If receiving of any
2292 * optional receive side information is enabled, we also send
2293 * that up as options.
2295 udi_size
= sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
);
2297 if (recv_ancillary
.crb_all
!= 0) {
2298 udi_size
+= conn_recvancillary_size(connp
,
2299 recv_ancillary
, ira
, mp
, &ipps
);
2302 mp1
= allocb(udi_size
, BPRI_MED
);
2305 UDPS_BUMP_MIB(us
, udpInErrors
);
2309 mp1
->b_datap
->db_type
= M_PROTO
;
2310 tudi
= (struct T_unitdata_ind
*)mp1
->b_rptr
;
2311 mp1
->b_wptr
= (uchar_t
*)tudi
+ udi_size
;
2312 tudi
->PRIM_type
= T_UNITDATA_IND
;
2313 tudi
->SRC_length
= sizeof (sin6_t
);
2314 tudi
->SRC_offset
= sizeof (struct T_unitdata_ind
);
2315 tudi
->OPT_offset
= sizeof (struct T_unitdata_ind
) +
2317 udi_size
-= (sizeof (struct T_unitdata_ind
) + sizeof (sin6_t
));
2318 tudi
->OPT_length
= udi_size
;
2319 sin6
= (sin6_t
*)&tudi
[1];
2320 if (ira
->ira_flags
& IRAF_IS_IPV4
) {
2323 IN6_IPADDR_TO_V4MAPPED(((ipha_t
*)rptr
)->ipha_src
,
2325 IN6_IPADDR_TO_V4MAPPED(((ipha_t
*)rptr
)->ipha_dst
,
2327 sin6
->sin6_flowinfo
= 0;
2328 sin6
->sin6_scope_id
= 0;
2329 sin6
->__sin6_src_id
= ip_srcid_find_addr(&v6dst
,
2330 IPCL_ZONEID(connp
), us
->us_netstack
);
2332 ip6h
= (ip6_t
*)rptr
;
2334 sin6
->sin6_addr
= ip6h
->ip6_src
;
2335 /* No sin6_flowinfo per API */
2336 sin6
->sin6_flowinfo
= 0;
2337 /* For link-scope pass up scope id */
2338 if (IN6_IS_ADDR_LINKSCOPE(&ip6h
->ip6_src
))
2339 sin6
->sin6_scope_id
= ira
->ira_ruifindex
;
2341 sin6
->sin6_scope_id
= 0;
2342 sin6
->__sin6_src_id
= ip_srcid_find_addr(
2343 &ip6h
->ip6_dst
, IPCL_ZONEID(connp
),
2346 sin6
->sin6_port
= udpha
->uha_src_port
;
2347 sin6
->sin6_family
= connp
->conn_family
;
2349 if (udi_size
!= 0) {
2350 conn_recvancillary_add(connp
, recv_ancillary
, ira
,
2351 &ipps
, (uchar_t
*)&sin6
[1], udi_size
);
2356 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and
2357 * loopback traffic).
2359 DTRACE_UDP5(receive
, mblk_t
*, NULL
, ip_xmit_attr_t
*, connp
->conn_ixa
,
2360 void_ip_t
*, rptr
, udp_t
*, udp
, udpha_t
*, udpha
);
2362 /* Walk past the headers unless IP_RECVHDR was set. */
2363 if (!udp
->udp_rcvhdr
) {
2364 mp
->b_rptr
= rptr
+ hdr_length
;
2365 pkt_len
-= hdr_length
;
2368 UDPS_BUMP_MIB(us
, udpHCInDatagrams
);
2369 udp_ulp_recv(connp
, mp1
, pkt_len
, ira
);
2374 UDPS_BUMP_MIB(us
, udpInErrors
);
2378 * This routine creates a T_UDERROR_IND message and passes it upstream.
2379 * The address and options are copied from the T_UNITDATA_REQ message
2380 * passed in mp. This message is freed.
2383 udp_ud_err(queue_t
*q
, mblk_t
*mp
, t_scalar_t err
)
2385 struct T_unitdata_req
*tudr
;
2392 if ((mp
->b_wptr
< mp
->b_rptr
) ||
2393 (MBLKL(mp
)) < sizeof (struct T_unitdata_req
)) {
2396 tudr
= (struct T_unitdata_req
*)mp
->b_rptr
;
2397 destaddr
= mp
->b_rptr
+ tudr
->DEST_offset
;
2398 if (destaddr
< mp
->b_rptr
|| destaddr
>= mp
->b_wptr
||
2399 destaddr
+ tudr
->DEST_length
< mp
->b_rptr
||
2400 destaddr
+ tudr
->DEST_length
> mp
->b_wptr
) {
2403 optaddr
= mp
->b_rptr
+ tudr
->OPT_offset
;
2404 if (optaddr
< mp
->b_rptr
|| optaddr
>= mp
->b_wptr
||
2405 optaddr
+ tudr
->OPT_length
< mp
->b_rptr
||
2406 optaddr
+ tudr
->OPT_length
> mp
->b_wptr
) {
2409 destlen
= tudr
->DEST_length
;
2410 optlen
= tudr
->OPT_length
;
2412 mp1
= mi_tpi_uderror_ind((char *)destaddr
, destlen
,
2413 (char *)optaddr
, optlen
, err
);
2422 * This routine removes a port number association from a stream. It
2423 * is called by udp_wput to handle T_UNBIND_REQ messages.
2426 udp_tpi_unbind(queue_t
*q
, mblk_t
*mp
)
2428 conn_t
*connp
= Q_TO_CONN(q
);
2431 error
= udp_do_unbind(connp
);
2434 udp_err_ack(q
, mp
, -error
, 0);
2436 udp_err_ack(q
, mp
, TSYSERR
, error
);
2440 mp
= mi_tpi_ok_ack_alloc(mp
);
2442 ASSERT(((struct T_ok_ack
*)mp
->b_rptr
)->PRIM_type
== T_OK_ACK
);
2447 * Don't let port fall into the privileged range.
2448 * Since the extra privileged ports can be arbitrary we also
2449 * ensure that we exclude those from consideration.
2450 * us->us_epriv_ports is not sorted thus we loop over it until
2451 * there are no changes.
2454 udp_update_next_port(udp_t
*udp
, in_port_t port
, boolean_t random
)
2458 boolean_t restart
= B_FALSE
;
2459 udp_stack_t
*us
= udp
->udp_us
;
2461 if (random
&& udp_random_anon_port
!= 0) {
2462 (void) random_get_pseudo_bytes((uint8_t *)&port
,
2463 sizeof (in_port_t
));
2465 * Unless changed by a sys admin, the smallest anon port
2466 * is 32768 and the largest anon port is 65535. It is
2467 * very likely (50%) for the random port to be smaller
2468 * than the smallest anon port. When that happens,
2469 * add port % (anon port range) to the smallest anon
2470 * port to get the random port. It should fall into the
2471 * valid anon port range.
2473 if ((port
< us
->us_smallest_anon_port
) ||
2474 (port
> us
->us_largest_anon_port
)) {
2475 if (us
->us_smallest_anon_port
==
2476 us
->us_largest_anon_port
) {
2479 bump
= port
% (us
->us_largest_anon_port
-
2480 us
->us_smallest_anon_port
);
2483 port
= us
->us_smallest_anon_port
+ bump
;
2488 if (port
< us
->us_smallest_anon_port
)
2489 port
= us
->us_smallest_anon_port
;
2491 if (port
> us
->us_largest_anon_port
) {
2492 port
= us
->us_smallest_anon_port
;
2498 if (port
< us
->us_smallest_nonpriv_port
)
2499 port
= us
->us_smallest_nonpriv_port
;
2501 for (i
= 0; i
< us
->us_num_epriv_ports
; i
++) {
2502 if (port
== us
->us_epriv_ports
[i
]) {
2505 * Make sure that the port is in the
2516 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
2517 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
2518 * the TPI options, otherwise we take them from msg_control.
2519 * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
2520 * Always consumes mp; never consumes tudr_mp.
2523 udp_output_ancillary(conn_t
*connp
, sin_t
*sin
, sin6_t
*sin6
, mblk_t
*mp
,
2524 mblk_t
*tudr_mp
, struct nmsghdr
*msg
, cred_t
*cr
, pid_t pid
)
2526 udp_t
*udp
= connp
->conn_udp
;
2527 udp_stack_t
*us
= udp
->udp_us
;
2529 ip_xmit_attr_t
*ixa
;
2533 in6_addr_t v6nexthop
;
2537 int is_absreq_failure
= 0;
2538 conn_opt_arg_t coas
, *coa
;
2540 ASSERT(tudr_mp
!= NULL
|| msg
!= NULL
);
2543 * Get ixa before checking state to handle a disconnect race.
2545 * We need an exclusive copy of conn_ixa since the ancillary data
2546 * options might modify it. That copy has no pointers hence we
2547 * need to set them up once we've parsed the ancillary data.
2549 ixa
= conn_get_ixa_exclusive(connp
);
2551 UDPS_BUMP_MIB(us
, udpOutErrors
);
2556 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2558 ixa
->ixa_cpid
= pid
;
2560 /* In case previous destination was multicast or multirt */
2561 ip_attr_newdst(ixa
);
2563 /* Get a copy of conn_xmit_ipp since the options might change it */
2564 ipp
= kmem_zalloc(sizeof (*ipp
), KM_NOSLEEP
);
2566 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2567 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
2568 ixa
->ixa_cpid
= connp
->conn_cpid
;
2570 UDPS_BUMP_MIB(us
, udpOutErrors
);
2574 mutex_enter(&connp
->conn_lock
);
2575 error
= ip_pkt_copy(&connp
->conn_xmit_ipp
, ipp
, KM_NOSLEEP
);
2576 mutex_exit(&connp
->conn_lock
);
2578 UDPS_BUMP_MIB(us
, udpOutErrors
);
2584 * Parse the options and update ixa and ipp as a result.
2588 coa
->coa_connp
= connp
;
2591 coa
->coa_ancillary
= B_TRUE
;
2592 coa
->coa_changed
= 0;
2595 error
= process_auxiliary_options(connp
, msg
->msg_control
,
2596 msg
->msg_controllen
, coa
, &udp_opt_obj
, udp_opt_set
, cr
);
2598 struct T_unitdata_req
*tudr
;
2600 tudr
= (struct T_unitdata_req
*)tudr_mp
->b_rptr
;
2601 ASSERT(tudr
->PRIM_type
== T_UNITDATA_REQ
);
2602 error
= tpi_optcom_buf(connp
->conn_wq
, tudr_mp
,
2603 &tudr
->OPT_length
, tudr
->OPT_offset
, cr
, &udp_opt_obj
,
2604 coa
, &is_absreq_failure
);
2608 * Note: No special action needed in this
2609 * module for "is_absreq_failure"
2612 UDPS_BUMP_MIB(us
, udpOutErrors
);
2615 ASSERT(is_absreq_failure
== 0);
2617 mutex_enter(&connp
->conn_lock
);
2619 * If laddr is unspecified then we look at sin6_src_id.
2620 * We will give precedence to a source address set with IPV6_PKTINFO
2621 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
2622 * want ip_attr_connect to select a source (since it can fail) when
2623 * IPV6_PKTINFO is specified.
2624 * If this doesn't result in a source address then we get a source
2625 * from ip_attr_connect() below.
2627 v6src
= connp
->conn_saddr_v6
;
2629 IN6_IPADDR_TO_V4MAPPED(sin
->sin_addr
.s_addr
, &v6dst
);
2630 dstport
= sin
->sin_port
;
2632 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
2633 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
2634 } else if (sin6
!= NULL
) {
2637 v6dst
= sin6
->sin6_addr
;
2638 dstport
= sin6
->sin6_port
;
2639 flowinfo
= sin6
->sin6_flowinfo
;
2640 srcid
= sin6
->__sin6_src_id
;
2641 if (IN6_IS_ADDR_LINKSCOPE(&v6dst
) && sin6
->sin6_scope_id
!= 0) {
2642 ixa
->ixa_scopeid
= sin6
->sin6_scope_id
;
2643 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
2645 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
2647 v4mapped
= IN6_IS_ADDR_V4MAPPED(&v6dst
);
2649 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
2651 ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
2652 if (srcid
!= 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
2653 if (!ip_srcid_find_id(srcid
, &v6src
, IPCL_ZONEID(connp
),
2654 v4mapped
, connp
->conn_netstack
)) {
2655 /* Mismatch - v4mapped/v6 specified by srcid. */
2656 mutex_exit(&connp
->conn_lock
);
2657 error
= EADDRNOTAVAIL
;
2658 goto failed
; /* Does freemsg() and mib. */
2662 /* Connected case */
2663 v6dst
= connp
->conn_faddr_v6
;
2664 dstport
= connp
->conn_fport
;
2665 flowinfo
= connp
->conn_flowinfo
;
2667 mutex_exit(&connp
->conn_lock
);
2669 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
2670 if (ipp
->ipp_fields
& IPPF_ADDR
) {
2671 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
2672 if (IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
2673 v6src
= ipp
->ipp_addr
;
2675 if (!IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
2676 v6src
= ipp
->ipp_addr
;
2680 ip_attr_nexthop(ipp
, ixa
, &v6dst
, &v6nexthop
);
2681 error
= ip_attr_connect(connp
, ixa
, &v6src
, &v6dst
, &v6nexthop
, dstport
,
2682 &v6src
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
| IPDF_IPSEC
);
2689 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2690 * Don't have the application see that errno
2692 error
= ENETUNREACH
;
2696 * Have !ipif_addr_ready address; drop packet silently
2697 * until we can get applications to not send until we
2704 if (ixa
->ixa_ire
!= NULL
) {
2706 * Let conn_ip_output/ire_send_noroute return
2707 * the error and send any local ICMP error.
2716 UDPS_BUMP_MIB(us
, udpOutErrors
);
2720 mp
= udp_prepend_hdr(connp
, ixa
, ipp
, &v6src
, &v6dst
, dstport
,
2721 flowinfo
, mp
, &error
);
2724 UDPS_BUMP_MIB(us
, udpOutErrors
);
2727 if (ixa
->ixa_pktlen
> IP_MAXPACKET
) {
2729 UDPS_BUMP_MIB(us
, udpOutErrors
);
2733 /* We're done. Pass the packet to ip. */
2734 UDPS_BUMP_MIB(us
, udpHCOutDatagrams
);
2736 DTRACE_UDP5(send
, mblk_t
*, NULL
, ip_xmit_attr_t
*, ixa
,
2737 void_ip_t
*, mp
->b_rptr
, udp_t
*, udp
, udpha_t
*,
2738 &mp
->b_rptr
[ixa
->ixa_ip_hdr_length
]);
2740 error
= conn_ip_output(mp
, ixa
);
2741 /* No udpOutErrors if an error since IP increases its error counter */
2746 (void) ixa_check_drain_insert(connp
, ixa
);
2751 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2752 * Don't have the application see that errno
2754 error
= ENETUNREACH
;
2757 mutex_enter(&connp
->conn_lock
);
2759 * Clear the source and v6lastdst so we call ip_attr_connect
2760 * for the next packet and try to pick a better source.
2762 if (connp
->conn_mcbc_bind
)
2763 connp
->conn_saddr_v6
= ipv6_all_zeros
;
2765 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
2766 connp
->conn_v6lastdst
= ipv6_all_zeros
;
2767 mutex_exit(&connp
->conn_lock
);
2771 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2772 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
2773 ixa
->ixa_cpid
= connp
->conn_cpid
;
2776 kmem_free(ipp
, sizeof (*ipp
));
2781 * Handle sending an M_DATA for a connected socket.
2782 * Handles both IPv4 and IPv6.
2785 udp_output_connected(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
, pid_t pid
)
2787 udp_t
*udp
= connp
->conn_udp
;
2788 udp_stack_t
*us
= udp
->udp_us
;
2790 ip_xmit_attr_t
*ixa
;
2793 * If no other thread is using conn_ixa this just gets a reference to
2794 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
2796 ixa
= conn_get_ixa(connp
, B_FALSE
);
2798 UDPS_BUMP_MIB(us
, udpOutErrors
);
2804 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2806 ixa
->ixa_cpid
= pid
;
2808 mutex_enter(&connp
->conn_lock
);
2809 mp
= udp_prepend_header_template(connp
, ixa
, mp
, &connp
->conn_saddr_v6
,
2810 connp
->conn_fport
, connp
->conn_flowinfo
, &error
);
2814 mutex_exit(&connp
->conn_lock
);
2815 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2816 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
2817 ixa
->ixa_cpid
= connp
->conn_cpid
;
2819 UDPS_BUMP_MIB(us
, udpOutErrors
);
2825 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
2826 * safe copy, then we need to fill in any pointers in it.
2828 if (ixa
->ixa_ire
== NULL
) {
2829 in6_addr_t faddr
, saddr
;
2833 saddr
= connp
->conn_saddr_v6
;
2834 faddr
= connp
->conn_faddr_v6
;
2835 fport
= connp
->conn_fport
;
2836 ip_attr_nexthop(&connp
->conn_xmit_ipp
, ixa
, &faddr
, &nexthop
);
2837 mutex_exit(&connp
->conn_lock
);
2839 error
= ip_attr_connect(connp
, ixa
, &saddr
, &faddr
, &nexthop
,
2840 fport
, NULL
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
|
2847 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2848 * Don't have the application see that errno
2850 error
= ENETUNREACH
;
2854 * Have !ipif_addr_ready address; drop packet silently
2855 * until we can get applications to not send until we
2862 if (ixa
->ixa_ire
!= NULL
) {
2864 * Let conn_ip_output/ire_send_noroute return
2865 * the error and send any local ICMP error.
2873 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2874 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
2875 ixa
->ixa_cpid
= connp
->conn_cpid
;
2878 UDPS_BUMP_MIB(us
, udpOutErrors
);
2882 /* Done with conn_t */
2883 mutex_exit(&connp
->conn_lock
);
2885 ASSERT(ixa
->ixa_ire
!= NULL
);
2887 /* We're done. Pass the packet to ip. */
2888 UDPS_BUMP_MIB(us
, udpHCOutDatagrams
);
2890 DTRACE_UDP5(send
, mblk_t
*, NULL
, ip_xmit_attr_t
*, ixa
,
2891 void_ip_t
*, mp
->b_rptr
, udp_t
*, udp
, udpha_t
*,
2892 &mp
->b_rptr
[ixa
->ixa_ip_hdr_length
]);
2894 error
= conn_ip_output(mp
, ixa
);
2895 /* No udpOutErrors if an error since IP increases its error counter */
2900 (void) ixa_check_drain_insert(connp
, ixa
);
2905 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2906 * Don't have the application see that errno
2908 error
= ENETUNREACH
;
2911 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2912 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
2913 ixa
->ixa_cpid
= connp
->conn_cpid
;
2919 * Handle sending an M_DATA to the last destination.
2920 * Handles both IPv4 and IPv6.
2922 * NOTE: The caller must hold conn_lock and we drop it here.
2925 udp_output_lastdst(conn_t
*connp
, mblk_t
*mp
, cred_t
*cr
, pid_t pid
,
2926 ip_xmit_attr_t
*ixa
)
2928 udp_t
*udp
= connp
->conn_udp
;
2929 udp_stack_t
*us
= udp
->udp_us
;
2932 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
2933 ASSERT(ixa
!= NULL
);
2936 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2938 ixa
->ixa_cpid
= pid
;
2940 mp
= udp_prepend_header_template(connp
, ixa
, mp
, &connp
->conn_v6lastsrc
,
2941 connp
->conn_lastdstport
, connp
->conn_lastflowinfo
, &error
);
2945 mutex_exit(&connp
->conn_lock
);
2946 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
2947 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
2948 ixa
->ixa_cpid
= connp
->conn_cpid
;
2950 UDPS_BUMP_MIB(us
, udpOutErrors
);
2956 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
2957 * safe copy, then we need to fill in any pointers in it.
2959 if (ixa
->ixa_ire
== NULL
) {
2960 in6_addr_t lastdst
, lastsrc
;
2964 lastsrc
= connp
->conn_v6lastsrc
;
2965 lastdst
= connp
->conn_v6lastdst
;
2966 lastport
= connp
->conn_lastdstport
;
2967 ip_attr_nexthop(&connp
->conn_xmit_ipp
, ixa
, &lastdst
, &nexthop
);
2968 mutex_exit(&connp
->conn_lock
);
2970 error
= ip_attr_connect(connp
, ixa
, &lastsrc
, &lastdst
,
2971 &nexthop
, lastport
, NULL
, NULL
, IPDF_ALLOW_MCBC
|
2972 IPDF_VERIFY_DST
| IPDF_IPSEC
);
2978 * IXAF_VERIFY_SOURCE tells us to pick a better source.
2979 * Don't have the application see that errno
2981 error
= ENETUNREACH
;
2985 * Have !ipif_addr_ready address; drop packet silently
2986 * until we can get applications to not send until we
2993 if (ixa
->ixa_ire
!= NULL
) {
2995 * Let conn_ip_output/ire_send_noroute return
2996 * the error and send any local ICMP error.
3004 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3005 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3006 ixa
->ixa_cpid
= connp
->conn_cpid
;
3009 UDPS_BUMP_MIB(us
, udpOutErrors
);
3013 /* Done with conn_t */
3014 mutex_exit(&connp
->conn_lock
);
3017 /* We're done. Pass the packet to ip. */
3018 UDPS_BUMP_MIB(us
, udpHCOutDatagrams
);
3020 DTRACE_UDP5(send
, mblk_t
*, NULL
, ip_xmit_attr_t
*, ixa
,
3021 void_ip_t
*, mp
->b_rptr
, udp_t
*, udp
, udpha_t
*,
3022 &mp
->b_rptr
[ixa
->ixa_ip_hdr_length
]);
3024 error
= conn_ip_output(mp
, ixa
);
3025 /* No udpOutErrors if an error since IP increases its error counter */
3030 (void) ixa_check_drain_insert(connp
, ixa
);
3035 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3036 * Don't have the application see that errno
3038 error
= ENETUNREACH
;
3041 mutex_enter(&connp
->conn_lock
);
3043 * Clear the source and v6lastdst so we call ip_attr_connect
3044 * for the next packet and try to pick a better source.
3046 if (connp
->conn_mcbc_bind
)
3047 connp
->conn_saddr_v6
= ipv6_all_zeros
;
3049 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
3050 connp
->conn_v6lastdst
= ipv6_all_zeros
;
3051 mutex_exit(&connp
->conn_lock
);
3054 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3055 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3056 ixa
->ixa_cpid
= connp
->conn_cpid
;
3063 * Prepend the header template and then fill in the source and
3064 * flowinfo. The caller needs to handle the destination address since
3065 * it's setting is different if rthdr or source route.
3067 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
3068 * When it returns NULL it sets errorp.
3071 udp_prepend_header_template(conn_t
*connp
, ip_xmit_attr_t
*ixa
, mblk_t
*mp
,
3072 const in6_addr_t
*v6src
, in_port_t dstport
, uint32_t flowinfo
, int *errorp
)
3074 udp_t
*udp
= connp
->conn_udp
;
3075 udp_stack_t
*us
= udp
->udp_us
;
3076 boolean_t insert_spi
= udp
->udp_nat_t_endpoint
;
3081 uint_t ip_hdr_length
;
3086 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
3089 * Copy the header template and leave space for an SPI
3091 copylen
= connp
->conn_ht_iphc_len
;
3092 alloclen
= copylen
+ (insert_spi
? sizeof (uint32_t) : 0);
3093 pktlen
= alloclen
+ msgdsize(mp
);
3094 if (pktlen
> IP_MAXPACKET
) {
3099 ixa
->ixa_pktlen
= pktlen
;
3101 /* check/fix buffer config, setup pointers into it */
3102 iph
= mp
->b_rptr
- alloclen
;
3103 if (DB_REF(mp
) != 1 || iph
< DB_BASE(mp
) || !OK_32PTR(iph
)) {
3106 mp1
= allocb(alloclen
+ us
->us_wroff_extra
, BPRI_MED
);
3112 mp1
->b_wptr
= DB_LIM(mp1
);
3115 iph
= (mp
->b_wptr
- alloclen
);
3118 bcopy(connp
->conn_ht_iphc
, iph
, copylen
);
3119 ip_hdr_length
= (uint_t
)(connp
->conn_ht_ulp
- connp
->conn_ht_iphc
);
3121 ixa
->ixa_ip_hdr_length
= ip_hdr_length
;
3122 udpha
= (udpha_t
*)(iph
+ ip_hdr_length
);
3125 * Setup header length and prepare for ULP checksum done in IP.
3126 * udp_build_hdr_template has already massaged any routing header
3127 * and placed the result in conn_sum.
3129 * We make it easy for IP to include our pseudo header
3130 * by putting our length in uha_checksum.
3132 cksum
= pktlen
- ip_hdr_length
;
3133 udpha
->uha_length
= htons(cksum
);
3135 cksum
+= connp
->conn_sum
;
3136 cksum
= (cksum
>> 16) + (cksum
& 0xFFFF);
3137 ASSERT(cksum
< 0x10000);
3139 ipp
= &connp
->conn_xmit_ipp
;
3140 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
3141 ipha_t
*ipha
= (ipha_t
*)iph
;
3143 ipha
->ipha_length
= htons((uint16_t)pktlen
);
3145 /* IP does the checksum if uha_checksum is non-zero */
3146 if (us
->us_do_checksum
)
3147 udpha
->uha_checksum
= htons(cksum
);
3149 /* if IP_PKTINFO specified an addres it wins over bind() */
3150 if ((ipp
->ipp_fields
& IPPF_ADDR
) &&
3151 IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
)) {
3152 ASSERT(ipp
->ipp_addr_v4
!= INADDR_ANY
);
3153 ipha
->ipha_src
= ipp
->ipp_addr_v4
;
3155 IN6_V4MAPPED_TO_IPADDR(v6src
, ipha
->ipha_src
);
3158 ip6_t
*ip6h
= (ip6_t
*)iph
;
3160 ip6h
->ip6_plen
= htons((uint16_t)(pktlen
- IPV6_HDR_LEN
));
3161 udpha
->uha_checksum
= htons(cksum
);
3163 /* if IP_PKTINFO specified an addres it wins over bind() */
3164 if ((ipp
->ipp_fields
& IPPF_ADDR
) &&
3165 !IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
)) {
3166 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp
->ipp_addr
));
3167 ip6h
->ip6_src
= ipp
->ipp_addr
;
3169 ip6h
->ip6_src
= *v6src
;
3172 (IPV6_DEFAULT_VERS_AND_FLOW
& IPV6_VERS_AND_FLOW_MASK
) |
3173 (flowinfo
& ~IPV6_VERS_AND_FLOW_MASK
);
3174 if (ipp
->ipp_fields
& IPPF_TCLASS
) {
3175 /* Overrides the class part of flowinfo */
3176 ip6h
->ip6_vcf
= IPV6_TCLASS_FLOW(ip6h
->ip6_vcf
,
3181 /* Insert all-0s SPI now. */
3183 *((uint32_t *)(udpha
+ 1)) = 0;
3185 udpha
->uha_dst_port
= dstport
;
3190 * Send a T_UDERR_IND in response to an M_DATA
3193 udp_ud_err_connected(conn_t
*connp
, t_scalar_t error
)
3195 struct sockaddr_storage ss
;
3198 struct sockaddr
*addr
;
3202 mutex_enter(&connp
->conn_lock
);
3203 /* Initialize addr and addrlen as if they're passed in */
3204 if (connp
->conn_family
== AF_INET
) {
3207 sin
->sin_family
= AF_INET
;
3208 sin
->sin_port
= connp
->conn_fport
;
3209 sin
->sin_addr
.s_addr
= connp
->conn_faddr_v4
;
3210 addr
= (struct sockaddr
*)sin
;
3211 addrlen
= sizeof (*sin
);
3213 sin6
= (sin6_t
*)&ss
;
3215 sin6
->sin6_family
= AF_INET6
;
3216 sin6
->sin6_port
= connp
->conn_fport
;
3217 sin6
->sin6_flowinfo
= connp
->conn_flowinfo
;
3218 sin6
->sin6_addr
= connp
->conn_faddr_v6
;
3219 if (IN6_IS_ADDR_LINKSCOPE(&connp
->conn_faddr_v6
) &&
3220 (connp
->conn_ixa
->ixa_flags
& IXAF_SCOPEID_SET
)) {
3221 sin6
->sin6_scope_id
= connp
->conn_ixa
->ixa_scopeid
;
3223 sin6
->sin6_scope_id
= 0;
3225 sin6
->__sin6_src_id
= 0;
3226 addr
= (struct sockaddr
*)sin6
;
3227 addrlen
= sizeof (*sin6
);
3229 mutex_exit(&connp
->conn_lock
);
3231 mp1
= mi_tpi_uderror_ind((char *)addr
, addrlen
, NULL
, 0, error
);
3233 putnext(connp
->conn_rq
, mp1
);
3237 * This routine handles all messages passed downstream. It either
3238 * consumes the message or passes it downstream; it never queues a
3241 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode
3242 * is valid when we are directly beneath the stream head, and thus sockfs
3243 * is able to bypass STREAMS and directly call us, passing along the sockaddr
3244 * structure without the cumbersome T_UNITDATA_REQ interface for the case of
3245 * connected endpoints.
3248 udp_wput(queue_t
*q
, mblk_t
*mp
)
3253 conn_t
*connp
= Q_TO_CONN(q
);
3254 udp_t
*udp
= connp
->conn_udp
;
3256 struct sockaddr
*addr
= NULL
;
3258 udp_stack_t
*us
= udp
->udp_us
;
3259 struct T_unitdata_req
*tudr
;
3266 * We directly handle several cases here: T_UNITDATA_REQ message
3267 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
3270 switch (DB_TYPE(mp
)) {
3272 if (!udp
->udp_issocket
|| udp
->udp_state
!= TS_DATA_XFER
) {
3273 /* Not connected; address is required */
3274 UDPS_BUMP_MIB(us
, udpOutErrors
);
3275 UDP_DBGSTAT(us
, udp_data_notconn
);
3276 UDP_STAT(us
, udp_out_err_notconn
);
3281 * All Solaris components should pass a db_credp
3282 * for this message, hence we ASSERT.
3283 * On production kernels we return an error to be robust against
3284 * random streams modules sitting on top of us.
3286 cr
= msg_getcred(mp
, &pid
);
3289 UDPS_BUMP_MIB(us
, udpOutErrors
);
3293 ASSERT(udp
->udp_issocket
);
3294 UDP_DBGSTAT(us
, udp_data_conn
);
3295 error
= udp_output_connected(connp
, mp
, cr
, pid
);
3297 UDP_STAT(us
, udp_out_err_output
);
3298 if (connp
->conn_rq
!= NULL
)
3299 udp_ud_err_connected(connp
, (t_scalar_t
)error
);
3301 printf("udp_output_connected returned %d\n", error
);
3308 tudr
= (struct T_unitdata_req
*)mp
->b_rptr
;
3309 if (MBLKL(mp
) < sizeof (*tudr
) ||
3310 ((t_primp_t
)mp
->b_rptr
)->type
!= T_UNITDATA_REQ
) {
3311 udp_wput_other(q
, mp
);
3317 udp_wput_other(q
, mp
);
3321 /* Handle valid T_UNITDATA_REQ here */
3322 data_mp
= mp
->b_cont
;
3323 if (data_mp
== NULL
) {
3329 if (!MBLKIN(mp
, 0, tudr
->DEST_offset
+ tudr
->DEST_length
)) {
3330 error
= EADDRNOTAVAIL
;
3335 * All Solaris components should pass a db_credp
3336 * for this TPI message, hence we should ASSERT.
3337 * However, RPC (svc_clts_ksend) does this odd thing where it
3338 * passes the options from a T_UNITDATA_IND unchanged in a
3339 * T_UNITDATA_REQ. While that is the right thing to do for
3340 * some options, SCM_UCRED being the key one, this also makes it
3341 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
3343 cr
= msg_getcred(mp
, &pid
);
3345 cr
= connp
->conn_cred
;
3346 pid
= connp
->conn_cpid
;
3350 * If a port has not been bound to the stream, fail.
3351 * This is not a problem when sockfs is directly
3352 * above us, because it will ensure that the socket
3353 * is first bound before allowing data to be sent.
3355 if (udp
->udp_state
== TS_UNBND
) {
3359 addr
= (struct sockaddr
*)&mp
->b_rptr
[tudr
->DEST_offset
];
3360 addrlen
= tudr
->DEST_length
;
3362 switch (connp
->conn_family
) {
3364 sin6
= (sin6_t
*)addr
;
3365 if (!OK_32PTR((char *)sin6
) || (addrlen
!= sizeof (sin6_t
)) ||
3366 (sin6
->sin6_family
!= AF_INET6
)) {
3367 error
= EADDRNOTAVAIL
;
3371 srcid
= sin6
->__sin6_src_id
;
3372 if (!IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
3374 * Destination is a non-IPv4-compatible IPv6 address.
3375 * Send out an IPv6 format packet.
3379 * If the local address is a mapped address return
3381 * It would be possible to send an IPv6 packet but the
3382 * response would never make it back to the application
3383 * since it is bound to a mapped address.
3385 if (IN6_IS_ADDR_V4MAPPED(&connp
->conn_saddr_v6
)) {
3386 error
= EADDRNOTAVAIL
;
3390 UDP_DBGSTAT(us
, udp_out_ipv6
);
3392 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
))
3393 sin6
->sin6_addr
= ipv6_loopback
;
3394 ipversion
= IPV6_VERSION
;
3396 if (connp
->conn_ipv6_v6only
) {
3397 error
= EADDRNOTAVAIL
;
3402 * If the local address is not zero or a mapped address
3403 * return an error. It would be possible to send an
3404 * IPv4 packet but the response would never make it
3405 * back to the application since it is bound to a
3406 * non-mapped address.
3408 if (!IN6_IS_ADDR_V4MAPPED(&connp
->conn_saddr_v6
) &&
3409 !IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_saddr_v6
)) {
3410 error
= EADDRNOTAVAIL
;
3413 UDP_DBGSTAT(us
, udp_out_mapped
);
3415 if (V4_PART_OF_V6(sin6
->sin6_addr
) == INADDR_ANY
) {
3416 V4_PART_OF_V6(sin6
->sin6_addr
) =
3417 htonl(INADDR_LOOPBACK
);
3419 ipversion
= IPV4_VERSION
;
3422 if (tudr
->OPT_length
!= 0) {
3424 * If we are connected then the destination needs to be
3425 * the same as the connected one.
3427 if (udp
->udp_state
== TS_DATA_XFER
&&
3428 !conn_same_as_last_v6(connp
, sin6
)) {
3432 UDP_STAT(us
, udp_out_opt
);
3433 error
= udp_output_ancillary(connp
, NULL
, sin6
,
3434 data_mp
, mp
, NULL
, cr
, pid
);
3436 ip_xmit_attr_t
*ixa
;
3439 * We have to allocate an ip_xmit_attr_t before we grab
3440 * conn_lock and we need to hold conn_lock once we've
3441 * checked conn_same_as_last_v6 to handle concurrent
3442 * send* calls on a socket.
3444 ixa
= conn_get_ixa(connp
, B_FALSE
);
3449 mutex_enter(&connp
->conn_lock
);
3451 if (conn_same_as_last_v6(connp
, sin6
) &&
3452 connp
->conn_lastsrcid
== srcid
&&
3453 ipsec_outbound_policy_current(ixa
)) {
3454 UDP_DBGSTAT(us
, udp_out_lastdst
);
3455 /* udp_output_lastdst drops conn_lock */
3456 error
= udp_output_lastdst(connp
, data_mp
, cr
,
3459 UDP_DBGSTAT(us
, udp_out_diffdst
);
3460 /* udp_output_newdst drops conn_lock */
3461 error
= udp_output_newdst(connp
, data_mp
, NULL
,
3462 sin6
, ipversion
, cr
, pid
, ixa
);
3464 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
3473 sin
= (sin_t
*)addr
;
3474 if ((!OK_32PTR((char *)sin
) || addrlen
!= sizeof (sin_t
)) ||
3475 (sin
->sin_family
!= AF_INET
)) {
3476 error
= EADDRNOTAVAIL
;
3479 UDP_DBGSTAT(us
, udp_out_ipv4
);
3480 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
3481 sin
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
3482 ipversion
= IPV4_VERSION
;
3485 if (tudr
->OPT_length
!= 0) {
3487 * If we are connected then the destination needs to be
3488 * the same as the connected one.
3490 if (udp
->udp_state
== TS_DATA_XFER
&&
3491 !conn_same_as_last_v4(connp
, sin
)) {
3495 UDP_STAT(us
, udp_out_opt
);
3496 error
= udp_output_ancillary(connp
, sin
, NULL
,
3497 data_mp
, mp
, NULL
, cr
, pid
);
3499 ip_xmit_attr_t
*ixa
;
3502 * We have to allocate an ip_xmit_attr_t before we grab
3503 * conn_lock and we need to hold conn_lock once we've
3504 * checked conn_same_as_last_v4 to handle concurrent
3505 * send* calls on a socket.
3507 ixa
= conn_get_ixa(connp
, B_FALSE
);
3512 mutex_enter(&connp
->conn_lock
);
3514 if (conn_same_as_last_v4(connp
, sin
) &&
3515 ipsec_outbound_policy_current(ixa
)) {
3516 UDP_DBGSTAT(us
, udp_out_lastdst
);
3517 /* udp_output_lastdst drops conn_lock */
3518 error
= udp_output_lastdst(connp
, data_mp
, cr
,
3521 UDP_DBGSTAT(us
, udp_out_diffdst
);
3522 /* udp_output_newdst drops conn_lock */
3523 error
= udp_output_newdst(connp
, data_mp
, sin
,
3524 NULL
, ipversion
, cr
, pid
, ixa
);
3526 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
3534 UDP_STAT(us
, udp_out_err_output
);
3536 /* mp is freed by the following routine */
3537 udp_ud_err(q
, mp
, (t_scalar_t
)error
);
3541 UDPS_BUMP_MIB(us
, udpOutErrors
);
3543 UDP_STAT(us
, udp_out_err_output
);
3545 /* mp is freed by the following routine */
3546 udp_ud_err(q
, mp
, (t_scalar_t
)error
);
3550 * Handle the case of the IP address, port, flow label being different
3551 * for both IPv4 and IPv6.
3553 * NOTE: The caller must hold conn_lock and we drop it here.
3556 udp_output_newdst(conn_t
*connp
, mblk_t
*data_mp
, sin_t
*sin
, sin6_t
*sin6
,
3557 ushort_t ipversion
, cred_t
*cr
, pid_t pid
, ip_xmit_attr_t
*ixa
)
3561 udp_t
*udp
= connp
->conn_udp
;
3563 ip_xmit_attr_t
*oldixa
;
3564 udp_stack_t
*us
= udp
->udp_us
;
3567 in6_addr_t v6nexthop
;
3570 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
3571 ASSERT(ixa
!= NULL
);
3573 * We hold conn_lock across all the use and modifications of
3574 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
3579 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3581 ixa
->ixa_cpid
= pid
;
3584 * If we are connected then the destination needs to be the
3585 * same as the connected one, which is not the case here since we
3586 * checked for that above.
3588 if (udp
->udp_state
== TS_DATA_XFER
) {
3589 mutex_exit(&connp
->conn_lock
);
3594 /* In case previous destination was multicast or multirt */
3595 ip_attr_newdst(ixa
);
3598 * If laddr is unspecified then we look at sin6_src_id.
3599 * We will give precedence to a source address set with IPV6_PKTINFO
3600 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
3601 * want ip_attr_connect to select a source (since it can fail) when
3602 * IPV6_PKTINFO is specified.
3603 * If this doesn't result in a source address then we get a source
3604 * from ip_attr_connect() below.
3606 v6src
= connp
->conn_saddr_v6
;
3608 IN6_IPADDR_TO_V4MAPPED(sin
->sin_addr
.s_addr
, &v6dst
);
3609 dstport
= sin
->sin_port
;
3611 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */
3613 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
3614 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
3618 v6dst
= sin6
->sin6_addr
;
3619 dstport
= sin6
->sin6_port
;
3620 flowinfo
= sin6
->sin6_flowinfo
;
3621 srcid
= sin6
->__sin6_src_id
;
3622 if (IN6_IS_ADDR_LINKSCOPE(&v6dst
) && sin6
->sin6_scope_id
!= 0) {
3623 ixa
->ixa_scopeid
= sin6
->sin6_scope_id
;
3624 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
3626 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
3628 v4mapped
= IN6_IS_ADDR_V4MAPPED(&v6dst
);
3630 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
3632 ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
3633 if (srcid
!= 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
3634 if (!ip_srcid_find_id(srcid
, &v6src
, IPCL_ZONEID(connp
),
3635 v4mapped
, connp
->conn_netstack
)) {
3636 /* Mismatched v4mapped/v6 specified by srcid. */
3637 mutex_exit(&connp
->conn_lock
);
3638 error
= EADDRNOTAVAIL
;
3643 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
3644 if (connp
->conn_xmit_ipp
.ipp_fields
& IPPF_ADDR
) {
3645 ip_pkt_t
*ipp
= &connp
->conn_xmit_ipp
;
3647 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
3648 if (IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
3649 v6src
= ipp
->ipp_addr
;
3651 if (!IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
3652 v6src
= ipp
->ipp_addr
;
3656 ip_attr_nexthop(&connp
->conn_xmit_ipp
, ixa
, &v6dst
, &v6nexthop
);
3657 mutex_exit(&connp
->conn_lock
);
3659 error
= ip_attr_connect(connp
, ixa
, &v6src
, &v6dst
, &v6nexthop
, dstport
,
3660 &v6src
, NULL
, IPDF_ALLOW_MCBC
| IPDF_VERIFY_DST
| IPDF_IPSEC
);
3666 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3667 * Don't have the application see that errno
3669 error
= ENETUNREACH
;
3673 * Have !ipif_addr_ready address; drop packet silently
3674 * until we can get applications to not send until we
3681 if (ixa
->ixa_ire
!= NULL
) {
3683 * Let conn_ip_output/ire_send_noroute return
3684 * the error and send any local ICMP error.
3697 * Cluster note: we let the cluster hook know that we are sending to a
3698 * new address and/or port.
3700 if (cl_inet_connect2
!= NULL
) {
3701 CL_INET_UDP_CONNECT(connp
, B_TRUE
, &v6dst
, dstport
, error
);
3703 error
= EHOSTUNREACH
;
3708 mutex_enter(&connp
->conn_lock
);
3710 * While we dropped the lock some other thread might have connected
3711 * this socket. If so we bail out with EISCONN to ensure that the
3712 * connecting thread is the one that updates conn_ixa, conn_ht_*
3715 if (udp
->udp_state
== TS_DATA_XFER
) {
3716 mutex_exit(&connp
->conn_lock
);
3722 * We need to rebuild the headers if
3723 * - we have a source route (or routing header) since we need to
3724 * massage that to get the pseudo-header checksum
3725 * - the IP version is different than the last time
3726 * - a socket option with COA_HEADER_CHANGED has been set which
3727 * set conn_v6lastdst to zero.
3729 * Otherwise the prepend function will just update the src, dst,
3730 * dstport, and flow label.
3732 if ((connp
->conn_xmit_ipp
.ipp_fields
&
3733 (IPPF_IPV4_OPTIONS
|IPPF_RTHDR
)) ||
3734 ipversion
!= connp
->conn_lastipversion
||
3735 IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_v6lastdst
)) {
3736 /* Rebuild the header template */
3737 error
= udp_build_hdr_template(connp
, &v6src
, &v6dst
, dstport
,
3740 mutex_exit(&connp
->conn_lock
);
3744 /* Simply update the destination address if no source route */
3745 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
3746 ipha_t
*ipha
= (ipha_t
*)connp
->conn_ht_iphc
;
3748 IN6_V4MAPPED_TO_IPADDR(&v6dst
, ipha
->ipha_dst
);
3749 if (ixa
->ixa_flags
& IXAF_PMTU_IPV4_DF
) {
3750 ipha
->ipha_fragment_offset_and_flags
|=
3753 ipha
->ipha_fragment_offset_and_flags
&=
3757 ip6_t
*ip6h
= (ip6_t
*)connp
->conn_ht_iphc
;
3758 ip6h
->ip6_dst
= v6dst
;
3763 * Remember the dst/dstport etc which corresponds to the built header
3764 * template and conn_ixa.
3766 oldixa
= conn_replace_ixa(connp
, ixa
);
3767 connp
->conn_v6lastdst
= v6dst
;
3768 connp
->conn_lastipversion
= ipversion
;
3769 connp
->conn_lastdstport
= dstport
;
3770 connp
->conn_lastflowinfo
= flowinfo
;
3771 connp
->conn_lastscopeid
= ixa
->ixa_scopeid
;
3772 connp
->conn_lastsrcid
= srcid
;
3773 /* Also remember a source to use together with lastdst */
3774 connp
->conn_v6lastsrc
= v6src
;
3776 data_mp
= udp_prepend_header_template(connp
, ixa
, data_mp
, &v6src
,
3777 dstport
, flowinfo
, &error
);
3779 /* Done with conn_t */
3780 mutex_exit(&connp
->conn_lock
);
3781 ixa_refrele(oldixa
);
3783 if (data_mp
== NULL
) {
3788 /* We're done. Pass the packet to ip. */
3789 UDPS_BUMP_MIB(us
, udpHCOutDatagrams
);
3791 DTRACE_UDP5(send
, mblk_t
*, NULL
, ip_xmit_attr_t
*, ixa
,
3792 void_ip_t
*, data_mp
->b_rptr
, udp_t
*, udp
, udpha_t
*,
3793 &data_mp
->b_rptr
[ixa
->ixa_ip_hdr_length
]);
3795 error
= conn_ip_output(data_mp
, ixa
);
3796 /* No udpOutErrors if an error since IP increases its error counter */
3801 (void) ixa_check_drain_insert(connp
, ixa
);
3806 * IXAF_VERIFY_SOURCE tells us to pick a better source.
3807 * Don't have the application see that errno
3809 error
= ENETUNREACH
;
3812 mutex_enter(&connp
->conn_lock
);
3814 * Clear the source and v6lastdst so we call ip_attr_connect
3815 * for the next packet and try to pick a better source.
3817 if (connp
->conn_mcbc_bind
)
3818 connp
->conn_saddr_v6
= ipv6_all_zeros
;
3820 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
3821 connp
->conn_v6lastdst
= ipv6_all_zeros
;
3822 mutex_exit(&connp
->conn_lock
);
3825 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3826 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3827 ixa
->ixa_cpid
= connp
->conn_cpid
;
3832 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
3833 ixa
->ixa_cred
= connp
->conn_cred
; /* Restore */
3834 ixa
->ixa_cpid
= connp
->conn_cpid
;
3838 UDPS_BUMP_MIB(us
, udpOutErrors
);
3839 UDP_STAT(us
, udp_out_err_output
);
3845 udp_wput_fallback(queue_t
*wq
, mblk_t
*mp
)
3848 cmn_err(CE_CONT
, "udp_wput_fallback: Message in fallback \n");
3855 * Handle special out-of-band ioctl requests (see PSARC/2008/265).
3858 udp_wput_cmdblk(queue_t
*q
, mblk_t
*mp
)
3861 mblk_t
*datamp
= mp
->b_cont
;
3862 conn_t
*connp
= Q_TO_CONN(q
);
3863 udp_t
*udp
= connp
->conn_udp
;
3864 cmdblk_t
*cmdp
= (cmdblk_t
*)mp
->b_rptr
;
3866 if (datamp
== NULL
|| MBLKL(datamp
) < cmdp
->cb_len
) {
3867 cmdp
->cb_error
= EPROTO
;
3871 data
= datamp
->b_rptr
;
3873 mutex_enter(&connp
->conn_lock
);
3874 switch (cmdp
->cb_cmd
) {
3875 case TI_GETPEERNAME
:
3876 if (udp
->udp_state
!= TS_DATA_XFER
)
3877 cmdp
->cb_error
= ENOTCONN
;
3879 cmdp
->cb_error
= conn_getpeername(connp
, data
,
3883 cmdp
->cb_error
= conn_getsockname(connp
, data
, &cmdp
->cb_len
);
3886 cmdp
->cb_error
= EINVAL
;
3889 mutex_exit(&connp
->conn_lock
);
3895 udp_use_pure_tpi(udp_t
*udp
)
3897 conn_t
*connp
= udp
->udp_connp
;
3899 mutex_enter(&connp
->conn_lock
);
3900 udp
->udp_issocket
= B_FALSE
;
3901 mutex_exit(&connp
->conn_lock
);
3902 UDP_STAT(udp
->udp_us
, udp_sock_fallback
);
3906 udp_wput_other(queue_t
*q
, mblk_t
*mp
)
3908 uchar_t
*rptr
= mp
->b_rptr
;
3909 struct iocblk
*iocp
;
3910 conn_t
*connp
= Q_TO_CONN(q
);
3911 udp_t
*udp
= connp
->conn_udp
;
3914 switch (mp
->b_datap
->db_type
) {
3916 udp_wput_cmdblk(q
, mp
);
3921 if (mp
->b_wptr
- rptr
< sizeof (t_scalar_t
)) {
3923 * If the message does not contain a PRIM_type,
3929 switch (((t_primp_t
)rptr
)->type
) {
3931 udp_addr_req(q
, mp
);
3935 udp_tpi_bind(q
, mp
);
3938 udp_tpi_connect(q
, mp
);
3940 case T_CAPABILITY_REQ
:
3941 udp_capability_req(q
, mp
);
3944 udp_info_req(q
, mp
);
3946 case T_UNITDATA_REQ
:
3948 * If a T_UNITDATA_REQ gets here, the address must
3949 * be bad. Valid T_UNITDATA_REQs are handled
3952 udp_ud_err(q
, mp
, EADDRNOTAVAIL
);
3955 udp_tpi_unbind(q
, mp
);
3957 case T_SVR4_OPTMGMT_REQ
:
3959 * All Solaris components should pass a db_credp
3960 * for this TPI message, hence we ASSERT.
3961 * But in case there is some other M_PROTO that looks
3962 * like a TPI message sent by some other kernel
3963 * component, we check and return an error.
3965 cr
= msg_getcred(mp
, NULL
);
3968 udp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
3971 if (!snmpcom_req(q
, mp
, udp_snmp_set
, ip_snmp_get
,
3973 svr4_optcom_req(q
, mp
, cr
, &udp_opt_obj
);
3979 * All Solaris components should pass a db_credp
3980 * for this TPI message, hence we ASSERT.
3981 * But in case there is some other M_PROTO that looks
3982 * like a TPI message sent by some other kernel
3983 * component, we check and return an error.
3985 cr
= msg_getcred(mp
, NULL
);
3988 udp_err_ack(q
, mp
, TSYSERR
, EINVAL
);
3991 tpi_optcom_req(q
, mp
, cr
, &udp_opt_obj
);
3995 udp_tpi_disconnect(q
, mp
);
3998 /* The following TPI message is not supported by udp. */
4001 udp_err_ack(q
, mp
, TNOTSUPPORT
, 0);
4004 /* The following 3 TPI requests are illegal for udp. */
4008 udp_err_ack(q
, mp
, TNOTSUPPORT
, 0);
4016 flushq(q
, FLUSHDATA
);
4019 iocp
= (struct iocblk
*)mp
->b_rptr
;
4020 switch (iocp
->ioc_cmd
) {
4021 case TI_GETPEERNAME
:
4022 if (udp
->udp_state
!= TS_DATA_XFER
) {
4024 * If a default destination address has not
4025 * been associated with the stream, then we
4026 * don't know the peer's name.
4028 iocp
->ioc_error
= ENOTCONN
;
4029 iocp
->ioc_count
= 0;
4030 mp
->b_datap
->db_type
= M_IOCACK
;
4037 * For TI_GETPEERNAME and TI_GETMYNAME, we first
4038 * need to copyin the user's strbuf structure.
4039 * Processing will continue in the M_IOCDATA case
4042 mi_copyin(q
, mp
, NULL
,
4043 SIZEOF_STRUCT(strbuf
, iocp
->ioc_flag
));
4045 case _SIOCSOCKFALLBACK
:
4047 * Either sockmod is about to be popped and the
4048 * socket would now be treated as a plain stream,
4049 * or a module is about to be pushed so we have
4050 * to follow pure TPI semantics.
4052 if (!udp
->udp_issocket
) {
4053 DB_TYPE(mp
) = M_IOCNAK
;
4054 iocp
->ioc_error
= EINVAL
;
4056 udp_use_pure_tpi(udp
);
4058 DB_TYPE(mp
) = M_IOCACK
;
4059 iocp
->ioc_error
= 0;
4061 iocp
->ioc_count
= 0;
4070 udp_wput_iocdata(q
, mp
);
4073 /* Unrecognized messages are passed through without change. */
4076 ip_wput_nondata(q
, mp
);
4080 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
4084 udp_wput_iocdata(queue_t
*q
, mblk_t
*mp
)
4087 struct iocblk
*iocp
= (struct iocblk
*)mp
->b_rptr
;
4088 STRUCT_HANDLE(strbuf
, sb
);
4090 conn_t
*connp
= Q_TO_CONN(q
);
4091 udp_t
*udp
= connp
->conn_udp
;
4093 /* Make sure it is one of ours. */
4094 switch (iocp
->ioc_cmd
) {
4096 case TI_GETPEERNAME
:
4099 ip_wput_nondata(q
, mp
);
4103 switch (mi_copy_state(q
, mp
, &mp1
)) {
4106 case MI_COPY_CASE(MI_COPY_IN
, 1):
4108 case MI_COPY_CASE(MI_COPY_OUT
, 1):
4110 * The address has been copied out, so now
4111 * copyout the strbuf.
4115 case MI_COPY_CASE(MI_COPY_OUT
, 2):
4117 * The address and strbuf have been copied out.
4118 * We're done, so just acknowledge the original
4121 mi_copy_done(q
, mp
, 0);
4125 * Something strange has happened, so acknowledge
4126 * the original M_IOCTL with an EPROTO error.
4128 mi_copy_done(q
, mp
, EPROTO
);
4133 * Now we have the strbuf structure for TI_GETMYNAME
4134 * and TI_GETPEERNAME. Next we copyout the requested
4135 * address and then we'll copyout the strbuf.
4137 STRUCT_SET_HANDLE(sb
, iocp
->ioc_flag
, (void *)mp1
->b_rptr
);
4139 if (connp
->conn_family
== AF_INET
)
4140 addrlen
= sizeof (sin_t
);
4142 addrlen
= sizeof (sin6_t
);
4144 if (STRUCT_FGET(sb
, maxlen
) < addrlen
) {
4145 mi_copy_done(q
, mp
, EINVAL
);
4149 switch (iocp
->ioc_cmd
) {
4152 case TI_GETPEERNAME
:
4153 if (udp
->udp_state
!= TS_DATA_XFER
) {
4154 mi_copy_done(q
, mp
, ENOTCONN
);
4159 mp1
= mi_copyout_alloc(q
, mp
, STRUCT_FGETP(sb
, buf
), addrlen
, B_TRUE
);
4163 STRUCT_FSET(sb
, len
, addrlen
);
4164 switch (((struct iocblk
*)mp
->b_rptr
)->ioc_cmd
) {
4166 (void) conn_getsockname(connp
, (struct sockaddr
*)mp1
->b_wptr
,
4169 case TI_GETPEERNAME
:
4170 (void) conn_getpeername(connp
, (struct sockaddr
*)mp1
->b_wptr
,
4174 mp1
->b_wptr
+= addrlen
;
4175 /* Copy out the address */
4180 udp_ddi_g_init(void)
4182 udp_max_optsize
= optcom_max_optsize(udp_opt_obj
.odb_opt_des_arr
,
4183 udp_opt_obj
.odb_opt_arr_cnt
);
4186 * We want to be informed each time a stack is created or
4187 * destroyed in the kernel, so we can maintain the
4188 * set of udp_stack_t's.
4190 netstack_register(NS_UDP
, udp_stack_init
, NULL
, udp_stack_fini
);
4194 udp_ddi_g_destroy(void)
4196 netstack_unregister(NS_UDP
);
4199 #define INET_NAME "ip"
4202 * Initialize the UDP stack instance.
4205 udp_stack_init(netstackid_t stackid
, netstack_t
*ns
)
4213 us
= (udp_stack_t
*)kmem_zalloc(sizeof (*us
), KM_SLEEP
);
4214 us
->us_netstack
= ns
;
4216 mutex_init(&us
->us_epriv_port_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4217 us
->us_num_epriv_ports
= UDP_NUM_EPRIV_PORTS
;
4218 us
->us_epriv_ports
[0] = ULP_DEF_EPRIV_PORT1
;
4219 us
->us_epriv_ports
[1] = ULP_DEF_EPRIV_PORT2
;
4222 * The smallest anonymous port in the priviledged port range which UDP
4223 * looks for free port. Use in the option UDP_ANONPRIVBIND.
4225 us
->us_min_anonpriv_port
= 512;
4227 us
->us_bind_fanout_size
= udp_bind_fanout_size
;
4229 /* Roundup variable that might have been modified in /etc/system */
4230 if (!ISP2(us
->us_bind_fanout_size
)) {
4231 /* Not a power of two. Round up to nearest power of two */
4232 for (i
= 0; i
< 31; i
++) {
4233 if (us
->us_bind_fanout_size
< (1 << i
))
4236 us
->us_bind_fanout_size
= 1 << i
;
4238 us
->us_bind_fanout
= kmem_zalloc(us
->us_bind_fanout_size
*
4239 sizeof (udp_fanout_t
), KM_SLEEP
);
4240 for (i
= 0; i
< us
->us_bind_fanout_size
; i
++) {
4241 mutex_init(&us
->us_bind_fanout
[i
].uf_lock
, NULL
, MUTEX_DEFAULT
,
4245 arrsz
= udp_propinfo_count
* sizeof (mod_prop_info_t
);
4246 us
->us_propinfo_tbl
= (mod_prop_info_t
*)kmem_alloc(arrsz
,
4248 bcopy(udp_propinfo_tbl
, us
->us_propinfo_tbl
, arrsz
);
4250 /* Allocate the per netstack stats */
4251 mutex_enter(&cpu_lock
);
4252 us
->us_sc_cnt
= MAX(ncpus
, boot_ncpus
);
4253 mutex_exit(&cpu_lock
);
4254 us
->us_sc
= kmem_zalloc(max_ncpus
* sizeof (udp_stats_cpu_t
*),
4256 for (i
= 0; i
< us
->us_sc_cnt
; i
++) {
4257 us
->us_sc
[i
] = kmem_zalloc(sizeof (udp_stats_cpu_t
),
4261 us
->us_kstat
= udp_kstat2_init(stackid
);
4262 us
->us_mibkp
= udp_kstat_init(stackid
);
4264 major
= mod_name_to_major(INET_NAME
);
4265 error
= ldi_ident_from_major(major
, &us
->us_ldi_ident
);
4271 * Free the UDP stack instance.
4274 udp_stack_fini(netstackid_t stackid
, void *arg
)
4276 udp_stack_t
*us
= (udp_stack_t
*)arg
;
4279 for (i
= 0; i
< us
->us_bind_fanout_size
; i
++) {
4280 mutex_destroy(&us
->us_bind_fanout
[i
].uf_lock
);
4283 kmem_free(us
->us_bind_fanout
, us
->us_bind_fanout_size
*
4284 sizeof (udp_fanout_t
));
4286 us
->us_bind_fanout
= NULL
;
4288 for (i
= 0; i
< us
->us_sc_cnt
; i
++)
4289 kmem_free(us
->us_sc
[i
], sizeof (udp_stats_cpu_t
));
4290 kmem_free(us
->us_sc
, max_ncpus
* sizeof (udp_stats_cpu_t
*));
4292 kmem_free(us
->us_propinfo_tbl
,
4293 udp_propinfo_count
* sizeof (mod_prop_info_t
));
4294 us
->us_propinfo_tbl
= NULL
;
4296 udp_kstat_fini(stackid
, us
->us_mibkp
);
4297 us
->us_mibkp
= NULL
;
4299 udp_kstat2_fini(stackid
, us
->us_kstat
);
4300 us
->us_kstat
= NULL
;
4302 mutex_destroy(&us
->us_epriv_port_lock
);
4303 ldi_ident_release(us
->us_ldi_ident
);
4304 kmem_free(us
, sizeof (*us
));
4308 udp_set_rcv_hiwat(udp_t
*udp
, size_t size
)
4310 udp_stack_t
*us
= udp
->udp_us
;
4312 /* We add a bit of extra buffering */
4314 if (size
> us
->us_max_buf
)
4315 size
= us
->us_max_buf
;
4317 udp
->udp_rcv_hiwat
= size
;
4322 * For the lower queue so that UDP can be a dummy mux.
4323 * Nobody should be sending
4324 * packets up this stream
4327 udp_lrput(queue_t
*q
, mblk_t
*mp
)
4329 switch (mp
->b_datap
->db_type
) {
4332 if (*mp
->b_rptr
& FLUSHW
) {
4333 *mp
->b_rptr
&= ~FLUSHR
;
4343 * For the lower queue so that UDP can be a dummy mux.
4344 * Nobody should be sending packets down this stream.
4348 udp_lwput(queue_t
*q
, mblk_t
*mp
)
4354 * When a CPU is added, we need to allocate the per CPU stats struct.
4357 udp_stack_cpu_add(udp_stack_t
*us
, processorid_t cpu_seqid
)
4361 if (cpu_seqid
< us
->us_sc_cnt
)
4363 for (i
= us
->us_sc_cnt
; i
<= cpu_seqid
; i
++) {
4364 ASSERT(us
->us_sc
[i
] == NULL
);
4365 us
->us_sc
[i
] = kmem_zalloc(sizeof (udp_stats_cpu_t
),
4369 us
->us_sc_cnt
= cpu_seqid
+ 1;
4373 * Below routines for UDP socket module.
4377 udp_do_open(cred_t
*credp
, boolean_t isv6
, int flags
, int *errorp
)
4386 ASSERT(errorp
!= NULL
);
4388 if ((*errorp
= secpolicy_basic_net_access(credp
)) != 0)
4391 ns
= netstack_find_by_cred(credp
);
4393 us
= ns
->netstack_udp
;
4397 * For exclusive stacks we set the zoneid to zero
4398 * to make UDP operate as if in the global zone.
4400 if (ns
->netstack_stackid
!= GLOBAL_NETSTACKID
)
4401 zoneid
= GLOBAL_ZONEID
;
4403 zoneid
= crgetzoneid(credp
);
4405 ASSERT(flags
== KM_SLEEP
|| flags
== KM_NOSLEEP
);
4407 connp
= ipcl_conn_create(IPCL_UDPCONN
, flags
, ns
);
4408 if (connp
== NULL
) {
4413 udp
= connp
->conn_udp
;
4416 * ipcl_conn_create did a netstack_hold. Undo the hold that was
4417 * done by netstack_find_by_cred()
4422 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4423 * need to lock anything.
4425 ASSERT(connp
->conn_proto
== IPPROTO_UDP
);
4426 ASSERT(connp
->conn_udp
== udp
);
4427 ASSERT(udp
->udp_connp
== connp
);
4429 /* Set the initial state of the stream and the privilege status. */
4430 udp
->udp_state
= TS_UNBND
;
4431 connp
->conn_ixa
->ixa_flags
|= IXAF_VERIFY_SOURCE
;
4433 connp
->conn_family
= AF_INET6
;
4434 connp
->conn_ipversion
= IPV6_VERSION
;
4435 connp
->conn_ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
4436 connp
->conn_default_ttl
= us
->us_ipv6_hoplimit
;
4437 len
= sizeof (ip6_t
) + UDPH_SIZE
;
4439 connp
->conn_family
= AF_INET
;
4440 connp
->conn_ipversion
= IPV4_VERSION
;
4441 connp
->conn_ixa
->ixa_flags
|= IXAF_IS_IPV4
;
4442 connp
->conn_default_ttl
= us
->us_ipv4_ttl
;
4443 len
= sizeof (ipha_t
) + UDPH_SIZE
;
4446 ASSERT(connp
->conn_ixa
->ixa_protocol
== connp
->conn_proto
);
4447 connp
->conn_xmit_ipp
.ipp_unicast_hops
= connp
->conn_default_ttl
;
4449 connp
->conn_ixa
->ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
4450 connp
->conn_ixa
->ixa_flags
|= IXAF_MULTICAST_LOOP
| IXAF_SET_ULP_CKSUM
;
4451 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
4452 connp
->conn_ixa
->ixa_zoneid
= zoneid
;
4454 connp
->conn_zoneid
= zoneid
;
4456 connp
->conn_zone_is_global
= (crgetzoneid(credp
) == GLOBAL_ZONEID
);
4460 connp
->conn_rcvbuf
= us
->us_recv_hiwat
;
4461 connp
->conn_sndbuf
= us
->us_xmit_hiwat
;
4462 connp
->conn_sndlowat
= us
->us_xmit_lowat
;
4463 connp
->conn_rcvlowat
= udp_mod_info
.mi_lowat
;
4465 connp
->conn_wroff
= len
+ us
->us_wroff_extra
;
4466 connp
->conn_so_type
= SOCK_DGRAM
;
4468 connp
->conn_recv
= udp_input
;
4469 connp
->conn_recvicmp
= udp_icmp_input
;
4471 connp
->conn_cred
= credp
;
4472 connp
->conn_cpid
= curproc
->p_pid
;
4473 connp
->conn_open_time
= ddi_get_lbolt64();
4474 /* Cache things in ixa without an extra refhold */
4475 ASSERT(!(connp
->conn_ixa
->ixa_free_flags
& IXA_FREE_CRED
));
4476 connp
->conn_ixa
->ixa_cred
= connp
->conn_cred
;
4477 connp
->conn_ixa
->ixa_cpid
= connp
->conn_cpid
;
4479 *((sin6_t
*)&udp
->udp_delayed_addr
) = sin6_null
;
4481 if (us
->us_pmtu_discovery
)
4482 connp
->conn_ixa
->ixa_flags
|= IXAF_PMTU_DISCOVERY
;
4488 udp_create(int family
, int type
, int proto
, sock_downcalls_t
**sock_downcalls
,
4489 uint_t
*smodep
, int *errorp
, int flags
, cred_t
*credp
)
4496 if (type
!= SOCK_DGRAM
|| (family
!= AF_INET
&& family
!= AF_INET6
) ||
4497 (proto
!= 0 && proto
!= IPPROTO_UDP
)) {
4498 *errorp
= EPROTONOSUPPORT
;
4502 if (family
== AF_INET6
)
4507 connp
= udp_do_open(credp
, isv6
, flags
, errorp
);
4511 udp
= connp
->conn_udp
;
4512 ASSERT(udp
!= NULL
);
4516 udp
->udp_issocket
= B_TRUE
;
4517 connp
->conn_flags
|= IPCL_NONSTR
;
4521 * Since this conn_t/udp_t is not yet visible to anybody else we don't
4522 * need to lock anything.
4524 (void) udp_set_rcv_hiwat(udp
, connp
->conn_rcvbuf
);
4525 udp
->udp_rcv_disply_hiwat
= connp
->conn_rcvbuf
;
4527 connp
->conn_flow_cntrld
= B_FALSE
;
4529 mutex_enter(&connp
->conn_lock
);
4530 connp
->conn_state_flags
&= ~CONN_INCIPIENT
;
4531 mutex_exit(&connp
->conn_lock
);
4534 *smodep
= SM_ATOMIC
;
4535 *sock_downcalls
= &sock_udp_downcalls
;
4536 return ((sock_lower_handle_t
)connp
);
4541 udp_activate(sock_lower_handle_t proto_handle
, sock_upper_handle_t sock_handle
,
4542 sock_upcalls_t
*sock_upcalls
, int flags
, cred_t
*cr
)
4544 conn_t
*connp
= (conn_t
*)proto_handle
;
4545 struct sock_proto_props sopp
;
4547 /* All Solaris components should pass a cred for this operation. */
4550 connp
->conn_upcalls
= sock_upcalls
;
4551 connp
->conn_upper_handle
= sock_handle
;
4553 sopp
.sopp_flags
= SOCKOPT_WROFF
| SOCKOPT_RCVHIWAT
| SOCKOPT_RCVLOWAT
|
4554 SOCKOPT_MAXBLK
| SOCKOPT_MAXPSZ
| SOCKOPT_MINPSZ
;
4555 sopp
.sopp_wroff
= connp
->conn_wroff
;
4556 sopp
.sopp_maxblk
= INFPSZ
;
4557 sopp
.sopp_rxhiwat
= connp
->conn_rcvbuf
;
4558 sopp
.sopp_rxlowat
= connp
->conn_rcvlowat
;
4559 sopp
.sopp_maxaddrlen
= sizeof (sin6_t
);
4561 (connp
->conn_family
== AF_INET
) ? UDP_MAXPACKET_IPV4
:
4563 sopp
.sopp_minpsz
= (udp_mod_info
.mi_minpsz
== 1) ? 0 :
4564 udp_mod_info
.mi_minpsz
;
4566 (*connp
->conn_upcalls
->su_set_proto_props
)(connp
->conn_upper_handle
,
4571 udp_do_close(conn_t
*connp
)
4575 ASSERT(connp
!= NULL
&& IPCL_IS_UDP(connp
));
4576 udp
= connp
->conn_udp
;
4578 if (cl_inet_unbind
!= NULL
&& udp
->udp_state
== TS_IDLE
) {
4580 * Running in cluster mode - register unbind information
4582 if (connp
->conn_ipversion
== IPV4_VERSION
) {
4584 connp
->conn_netstack
->netstack_stackid
,
4585 IPPROTO_UDP
, AF_INET
,
4586 (uint8_t *)(&V4_PART_OF_V6(connp
->conn_laddr_v6
)),
4587 (in_port_t
)connp
->conn_lport
, NULL
);
4590 connp
->conn_netstack
->netstack_stackid
,
4591 IPPROTO_UDP
, AF_INET6
,
4592 (uint8_t *)&(connp
->conn_laddr_v6
),
4593 (in_port_t
)connp
->conn_lport
, NULL
);
4597 udp_bind_hash_remove(udp
, B_FALSE
);
4599 ip_quiesce_conn(connp
);
4601 if (!IPCL_IS_NONSTR(connp
)) {
4602 ASSERT(connp
->conn_wq
!= NULL
);
4603 ASSERT(connp
->conn_rq
!= NULL
);
4604 qprocsoff(connp
->conn_rq
);
4607 udp_close_free(connp
);
4610 * Now we are truly single threaded on this stream, and can
4611 * delete the things hanging off the connp, and finally the connp.
4612 * We removed this connp from the fanout list, it cannot be
4613 * accessed thru the fanouts, and we already waited for the
4614 * conn_ref to drop to 0. We are already in close, so
4615 * there cannot be any other thread from the top. qprocsoff
4616 * has completed, and service has completed or won't run in
4619 ASSERT(connp
->conn_ref
== 1);
4621 if (!IPCL_IS_NONSTR(connp
)) {
4622 inet_minor_free(connp
->conn_minor_arena
, connp
->conn_dev
);
4624 ip_free_helper_stream(connp
);
4628 ipcl_conn_destroy(connp
);
4633 udp_close(sock_lower_handle_t proto_handle
, int flags
, cred_t
*cr
)
4635 conn_t
*connp
= (conn_t
*)proto_handle
;
4637 /* All Solaris components should pass a cred for this operation. */
4640 udp_do_close(connp
);
4645 udp_do_bind(conn_t
*connp
, struct sockaddr
*sa
, socklen_t len
, cred_t
*cr
,
4646 boolean_t bind_to_req_port_only
)
4650 udp_t
*udp
= connp
->conn_udp
;
4652 ip_laddr_t laddr_type
= IPVL_UNICAST_UP
; /* INADDR_ANY */
4653 in_port_t port
; /* Host byte order */
4654 in_port_t requested_port
; /* Host byte order */
4656 ipaddr_t v4src
; /* Set if AF_INET */
4660 in_port_t lport
; /* Network byte order */
4662 zoneid_t zoneid
= IPCL_ZONEID(connp
);
4663 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
4664 boolean_t is_inaddr_any
;
4665 udp_stack_t
*us
= udp
->udp_us
;
4668 case sizeof (sin_t
): /* Complete IPv4 address */
4671 if (sin
== NULL
|| !OK_32PTR((char *)sin
))
4674 if (connp
->conn_family
!= AF_INET
||
4675 sin
->sin_family
!= AF_INET
) {
4676 return (EAFNOSUPPORT
);
4678 v4src
= sin
->sin_addr
.s_addr
;
4679 IN6_IPADDR_TO_V4MAPPED(v4src
, &v6src
);
4680 if (v4src
!= INADDR_ANY
) {
4681 laddr_type
= ip_laddr_verify_v4(v4src
, zoneid
, ipst
,
4684 port
= ntohs(sin
->sin_port
);
4687 case sizeof (sin6_t
): /* complete IPv6 address */
4688 sin6
= (sin6_t
*)sa
;
4690 if (sin6
== NULL
|| !OK_32PTR((char *)sin6
))
4693 if (connp
->conn_family
!= AF_INET6
||
4694 sin6
->sin6_family
!= AF_INET6
) {
4695 return (EAFNOSUPPORT
);
4697 v6src
= sin6
->sin6_addr
;
4698 if (IN6_IS_ADDR_V4MAPPED(&v6src
)) {
4699 if (connp
->conn_ipv6_v6only
)
4700 return (EADDRNOTAVAIL
);
4702 IN6_V4MAPPED_TO_IPADDR(&v6src
, v4src
);
4703 if (v4src
!= INADDR_ANY
) {
4704 laddr_type
= ip_laddr_verify_v4(v4src
,
4705 zoneid
, ipst
, B_FALSE
);
4708 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
4709 if (IN6_IS_ADDR_LINKSCOPE(&v6src
))
4710 scopeid
= sin6
->sin6_scope_id
;
4711 laddr_type
= ip_laddr_verify_v6(&v6src
,
4712 zoneid
, ipst
, B_TRUE
, scopeid
);
4715 port
= ntohs(sin6
->sin6_port
);
4718 default: /* Invalid request */
4719 (void) strlog(UDP_MOD_ID
, 0, 1, SL_ERROR
|SL_TRACE
,
4720 "udp_bind: bad ADDR_length length %u", len
);
4724 /* Is the local address a valid unicast, multicast, or broadcast? */
4725 if (laddr_type
== IPVL_BAD
)
4726 return (EADDRNOTAVAIL
);
4728 requested_port
= port
;
4730 if (requested_port
== 0 || !bind_to_req_port_only
)
4731 bind_to_req_port_only
= B_FALSE
;
4732 else /* T_BIND_REQ and requested_port != 0 */
4733 bind_to_req_port_only
= B_TRUE
;
4735 if (requested_port
== 0) {
4737 * If the application passed in zero for the port number, it
4738 * doesn't care which port number we bind to. Get one in the
4741 if (connp
->conn_anon_priv_bind
) {
4742 port
= udp_get_next_priv_port(udp
);
4744 port
= udp_update_next_port(udp
,
4745 us
->us_next_port_to_try
, B_TRUE
);
4749 * If the port is in the well-known privileged range,
4750 * make sure the caller was privileged.
4753 boolean_t priv
= B_FALSE
;
4755 if (port
< us
->us_smallest_nonpriv_port
) {
4758 for (i
= 0; i
< us
->us_num_epriv_ports
; i
++) {
4759 if (port
== us
->us_epriv_ports
[i
]) {
4767 if (secpolicy_net_privaddr(cr
, port
, IPPROTO_UDP
) != 0)
4776 * The state must be TS_UNBND. TPI mandates that users must send
4777 * TPI primitives only 1 at a time and wait for the response before
4778 * sending the next primitive.
4780 mutex_enter(&connp
->conn_lock
);
4781 if (udp
->udp_state
!= TS_UNBND
) {
4782 mutex_exit(&connp
->conn_lock
);
4783 (void) strlog(UDP_MOD_ID
, 0, 1, SL_ERROR
|SL_TRACE
,
4784 "udp_bind: bad state, %u", udp
->udp_state
);
4785 return (-TOUTSTATE
);
4788 * Copy the source address into our udp structure. This address
4789 * may still be zero; if so, IP will fill in the correct address
4790 * each time an outbound packet is passed to it. Since the udp is
4791 * not yet in the bind hash list, we don't grab the uf_lock to
4792 * change conn_ipversion
4794 if (connp
->conn_family
== AF_INET
) {
4795 ASSERT(sin
!= NULL
);
4796 ASSERT(connp
->conn_ixa
->ixa_flags
& IXAF_IS_IPV4
);
4798 if (IN6_IS_ADDR_V4MAPPED(&v6src
)) {
4800 * no need to hold the uf_lock to set the conn_ipversion
4801 * since we are not yet in the fanout list
4803 connp
->conn_ipversion
= IPV4_VERSION
;
4804 connp
->conn_ixa
->ixa_flags
|= IXAF_IS_IPV4
;
4806 connp
->conn_ipversion
= IPV6_VERSION
;
4807 connp
->conn_ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
4812 * If conn_reuseaddr is not set, then we have to make sure that
4813 * the IP address and port number the application requested
4814 * (or we selected for the application) is not being used by
4815 * another stream. If another stream is already using the
4816 * requested IP address and port, the behavior depends on
4817 * "bind_to_req_port_only". If set the bind fails; otherwise we
4818 * search for any an unused port to bind to the stream.
4820 * As per the BSD semantics, as modified by the Deering multicast
4821 * changes, if udp_reuseaddr is set, then we allow multiple binds
4822 * to the same port independent of the local IP address.
4824 * This is slightly different than in SunOS 4.X which did not
4825 * support IP multicast. Note that the change implemented by the
4826 * Deering multicast code effects all binds - not only binding
4827 * to IP multicast addresses.
4829 * Note that when binding to port zero we ignore SO_REUSEADDR in
4830 * order to guarantee a unique port.
4834 if (connp
->conn_anon_priv_bind
) {
4836 * loopmax = (IPPORT_RESERVED-1) -
4837 * us->us_min_anonpriv_port + 1
4839 loopmax
= IPPORT_RESERVED
- us
->us_min_anonpriv_port
;
4841 loopmax
= us
->us_largest_anon_port
-
4842 us
->us_smallest_anon_port
+ 1;
4845 is_inaddr_any
= V6_OR_V4_INADDR_ANY(v6src
);
4849 boolean_t found_exclbind
= B_FALSE
;
4853 * Walk through the list of udp streams bound to
4854 * requested port with the same IP address.
4856 lport
= htons(port
);
4857 udpf
= &us
->us_bind_fanout
[UDP_BIND_HASH(lport
,
4858 us
->us_bind_fanout_size
)];
4859 mutex_enter(&udpf
->uf_lock
);
4860 for (udp1
= udpf
->uf_udp
; udp1
!= NULL
;
4861 udp1
= udp1
->udp_bind_hash
) {
4862 connp1
= udp1
->udp_connp
;
4864 if (lport
!= connp1
->conn_lport
)
4867 if (!IPCL_BIND_ZONE_MATCH(connp1
, connp
))
4871 * If UDP_EXCLBIND is set for either the bound or
4872 * binding endpoint, the semantics of bind
4873 * is changed according to the following chart.
4875 * spec = specified address (v4 or v6)
4876 * unspec = unspecified address (v4 or v6)
4877 * A = specified addresses are different for endpoints
4879 * bound bind to allowed?
4880 * -------------------------------------
4884 * spec spec yes if A
4886 if (connp1
->conn_exclbind
|| connp
->conn_exclbind
) {
4887 if (V6_OR_V4_INADDR_ANY(
4888 connp1
->conn_bound_addr_v6
) ||
4891 &connp1
->conn_bound_addr_v6
,
4893 found_exclbind
= B_TRUE
;
4900 * Check ipversion to allow IPv4 and IPv6 sockets to
4901 * have disjoint port number spaces.
4903 if (connp
->conn_ipversion
!= connp1
->conn_ipversion
) {
4906 * On the first time through the loop, if the
4907 * the user intentionally specified a
4908 * particular port number, then ignore any
4909 * bindings of the other protocol that may
4910 * conflict. This allows the user to bind IPv6
4911 * alone and get both v4 and v6, or bind both
4912 * both and get each seperately. On subsequent
4913 * times through the loop, we're checking a
4914 * port that we chose (not the user) and thus
4915 * we do not allow casual duplicate bindings.
4917 if (count
== 0 && requested_port
!= 0)
4922 * No difference depending on SO_REUSEADDR.
4924 * If existing port is bound to a
4925 * non-wildcard IP address and
4926 * the requesting stream is bound to
4927 * a distinct different IP addresses
4928 * (non-wildcard, also), keep going.
4930 if (!is_inaddr_any
&&
4931 !V6_OR_V4_INADDR_ANY(connp1
->conn_bound_addr_v6
) &&
4932 !IN6_ARE_ADDR_EQUAL(&connp1
->conn_laddr_v6
,
4939 if (!found_exclbind
&&
4940 (connp
->conn_reuseaddr
&& requested_port
!= 0)) {
4946 * No other stream has this IP address
4947 * and port number. We can use it.
4951 mutex_exit(&udpf
->uf_lock
);
4952 if (bind_to_req_port_only
) {
4954 * We get here only when requested port
4955 * is bound (and only first of the for()
4958 * The semantics of this bind request
4959 * require it to fail so we return from
4960 * the routine (and exit the loop).
4963 mutex_exit(&connp
->conn_lock
);
4964 return (-TADDRBUSY
);
4967 if (connp
->conn_anon_priv_bind
) {
4968 port
= udp_get_next_priv_port(udp
);
4970 if ((count
== 0) && (requested_port
!= 0)) {
4972 * If the application wants us to find
4973 * a port, get one to start with. Set
4974 * requested_port to 0, so that we will
4975 * update us->us_next_port_to_try below.
4977 port
= udp_update_next_port(udp
,
4978 us
->us_next_port_to_try
, B_TRUE
);
4981 port
= udp_update_next_port(udp
, port
+ 1,
4986 if (port
== 0 || ++count
>= loopmax
) {
4988 * We've tried every possible port number and
4989 * there are none available, so send an error
4992 mutex_exit(&connp
->conn_lock
);
4998 * Copy the source address into our udp structure. This address
4999 * may still be zero; if so, ip_attr_connect will fill in the correct
5000 * address when a packet is about to be sent.
5001 * If we are binding to a broadcast or multicast address then
5002 * we just set the conn_bound_addr since we don't want to use
5003 * that as the source address when sending.
5005 connp
->conn_bound_addr_v6
= v6src
;
5006 connp
->conn_laddr_v6
= v6src
;
5008 connp
->conn_ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
5009 connp
->conn_ixa
->ixa_scopeid
= scopeid
;
5010 connp
->conn_incoming_ifindex
= scopeid
;
5012 connp
->conn_ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
5013 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
5016 switch (laddr_type
) {
5017 case IPVL_UNICAST_UP
:
5018 case IPVL_UNICAST_DOWN
:
5019 connp
->conn_saddr_v6
= v6src
;
5020 connp
->conn_mcbc_bind
= B_FALSE
;
5024 /* ip_set_destination will pick a source address later */
5025 connp
->conn_saddr_v6
= ipv6_all_zeros
;
5026 connp
->conn_mcbc_bind
= B_TRUE
;
5030 /* Any errors after this point should use late_error */
5031 connp
->conn_lport
= lport
;
5034 * Now reset the next anonymous port if the application requested
5035 * an anonymous port, or we handed out the next anonymous port.
5037 if ((requested_port
== 0) && (!connp
->conn_anon_priv_bind
)) {
5038 us
->us_next_port_to_try
= port
+ 1;
5041 /* Initialize the T_BIND_ACK. */
5042 if (connp
->conn_family
== AF_INET
) {
5043 sin
->sin_port
= connp
->conn_lport
;
5045 sin6
->sin6_port
= connp
->conn_lport
;
5047 udp
->udp_state
= TS_IDLE
;
5048 udp_bind_hash_insert(udpf
, udp
);
5049 mutex_exit(&udpf
->uf_lock
);
5050 mutex_exit(&connp
->conn_lock
);
5054 * Running in cluster mode - register bind information
5056 if (connp
->conn_ipversion
== IPV4_VERSION
) {
5057 (*cl_inet_bind
)(connp
->conn_netstack
->netstack_stackid
,
5058 IPPROTO_UDP
, AF_INET
, (uint8_t *)&v4src
,
5059 (in_port_t
)connp
->conn_lport
, NULL
);
5061 (*cl_inet_bind
)(connp
->conn_netstack
->netstack_stackid
,
5062 IPPROTO_UDP
, AF_INET6
, (uint8_t *)&v6src
,
5063 (in_port_t
)connp
->conn_lport
, NULL
);
5067 mutex_enter(&connp
->conn_lock
);
5068 connp
->conn_anon_port
= B_FALSE
;
5071 * We create an initial header template here to make a subsequent
5072 * sendto have a starting point. Since conn_last_dst is zero the
5073 * first sendto will always follow the 'dst changed' code path.
5074 * Note that we defer massaging options and the related checksum
5075 * adjustment until we have a destination address.
5077 error
= udp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
5078 &connp
->conn_faddr_v6
, connp
->conn_fport
, connp
->conn_flowinfo
);
5080 mutex_exit(&connp
->conn_lock
);
5084 connp
->conn_faddr_v6
= ipv6_all_zeros
;
5085 connp
->conn_fport
= 0;
5086 connp
->conn_v6lastdst
= ipv6_all_zeros
;
5087 mutex_exit(&connp
->conn_lock
);
5089 error
= ip_laddr_fanout_insert(connp
);
5093 /* Bind succeeded */
5097 /* We had already picked the port number, and then the bind failed */
5098 mutex_enter(&connp
->conn_lock
);
5099 udpf
= &us
->us_bind_fanout
[
5100 UDP_BIND_HASH(connp
->conn_lport
,
5101 us
->us_bind_fanout_size
)];
5102 mutex_enter(&udpf
->uf_lock
);
5103 connp
->conn_saddr_v6
= ipv6_all_zeros
;
5104 connp
->conn_bound_addr_v6
= ipv6_all_zeros
;
5105 connp
->conn_laddr_v6
= ipv6_all_zeros
;
5107 connp
->conn_ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
5108 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
5110 udp
->udp_state
= TS_UNBND
;
5111 udp_bind_hash_remove(udp
, B_TRUE
);
5112 connp
->conn_lport
= 0;
5113 mutex_exit(&udpf
->uf_lock
);
5114 connp
->conn_anon_port
= B_FALSE
;
5116 connp
->conn_v6lastdst
= ipv6_all_zeros
;
5118 /* Restore the header that was built above - different source address */
5119 (void) udp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
5120 &connp
->conn_faddr_v6
, connp
->conn_fport
, connp
->conn_flowinfo
);
5121 mutex_exit(&connp
->conn_lock
);
5126 udp_bind(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
5127 socklen_t len
, cred_t
*cr
)
5132 /* All Solaris components should pass a cred for this operation. */
5135 connp
= (conn_t
*)proto_handle
;
5138 error
= udp_do_unbind(connp
);
5140 error
= udp_do_bind(connp
, sa
, len
, cr
, B_TRUE
);
5143 if (error
== -TOUTSTATE
)
5146 error
= proto_tlitosyserr(-error
);
5153 udp_implicit_bind(conn_t
*connp
, cred_t
*cr
)
5161 /* All Solaris components should pass a cred for this operation. */
5164 if (connp
->conn_family
== AF_INET
) {
5165 len
= sizeof (struct sockaddr_in
);
5166 sin
= (sin_t
*)&sin6addr
;
5168 sin
->sin_family
= AF_INET
;
5169 sin
->sin_addr
.s_addr
= INADDR_ANY
;
5171 ASSERT(connp
->conn_family
== AF_INET6
);
5172 len
= sizeof (sin6_t
);
5173 sin6
= (sin6_t
*)&sin6addr
;
5175 sin6
->sin6_family
= AF_INET6
;
5176 V6_SET_ZERO(sin6
->sin6_addr
);
5179 error
= udp_do_bind(connp
, (struct sockaddr
*)&sin6addr
, len
,
5181 return ((error
< 0) ? proto_tlitosyserr(-error
) : error
);
5185 * This routine removes a port number association from a stream. It
5186 * is called by udp_unbind and udp_tpi_unbind.
5189 udp_do_unbind(conn_t
*connp
)
5191 udp_t
*udp
= connp
->conn_udp
;
5193 udp_stack_t
*us
= udp
->udp_us
;
5195 if (cl_inet_unbind
!= NULL
) {
5197 * Running in cluster mode - register unbind information
5199 if (connp
->conn_ipversion
== IPV4_VERSION
) {
5201 connp
->conn_netstack
->netstack_stackid
,
5202 IPPROTO_UDP
, AF_INET
,
5203 (uint8_t *)(&V4_PART_OF_V6(connp
->conn_laddr_v6
)),
5204 (in_port_t
)connp
->conn_lport
, NULL
);
5207 connp
->conn_netstack
->netstack_stackid
,
5208 IPPROTO_UDP
, AF_INET6
,
5209 (uint8_t *)&(connp
->conn_laddr_v6
),
5210 (in_port_t
)connp
->conn_lport
, NULL
);
5214 mutex_enter(&connp
->conn_lock
);
5215 /* If a bind has not been done, we can't unbind. */
5216 if (udp
->udp_state
== TS_UNBND
) {
5217 mutex_exit(&connp
->conn_lock
);
5218 return (-TOUTSTATE
);
5220 udpf
= &us
->us_bind_fanout
[UDP_BIND_HASH(connp
->conn_lport
,
5221 us
->us_bind_fanout_size
)];
5222 mutex_enter(&udpf
->uf_lock
);
5223 udp_bind_hash_remove(udp
, B_TRUE
);
5224 connp
->conn_saddr_v6
= ipv6_all_zeros
;
5225 connp
->conn_bound_addr_v6
= ipv6_all_zeros
;
5226 connp
->conn_laddr_v6
= ipv6_all_zeros
;
5227 connp
->conn_mcbc_bind
= B_FALSE
;
5228 connp
->conn_lport
= 0;
5229 /* In case we were also connected */
5230 connp
->conn_faddr_v6
= ipv6_all_zeros
;
5231 connp
->conn_fport
= 0;
5232 mutex_exit(&udpf
->uf_lock
);
5234 connp
->conn_v6lastdst
= ipv6_all_zeros
;
5235 udp
->udp_state
= TS_UNBND
;
5237 (void) udp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
5238 &connp
->conn_faddr_v6
, connp
->conn_fport
, connp
->conn_flowinfo
);
5239 mutex_exit(&connp
->conn_lock
);
5247 * It associates a default destination address with the stream.
5250 udp_do_connect(conn_t
*connp
, const struct sockaddr
*sa
, socklen_t len
,
5251 cred_t
*cr
, pid_t pid
)
5265 ip_xmit_attr_t
*ixa
;
5266 ip_xmit_attr_t
*oldixa
;
5269 in6_addr_t v6src
= connp
->conn_saddr_v6
;
5272 udp
= connp
->conn_udp
;
5276 * Address has been verified by the caller
5281 * Should never happen
5285 case sizeof (sin_t
):
5287 v4dst
= sin
->sin_addr
.s_addr
;
5288 dstport
= sin
->sin_port
;
5289 IN6_IPADDR_TO_V4MAPPED(v4dst
, &v6dst
);
5290 ASSERT(connp
->conn_ipversion
== IPV4_VERSION
);
5291 ipversion
= IPV4_VERSION
;
5294 case sizeof (sin6_t
):
5295 sin6
= (sin6_t
*)sa
;
5296 v6dst
= sin6
->sin6_addr
;
5297 dstport
= sin6
->sin6_port
;
5298 srcid
= sin6
->__sin6_src_id
;
5299 v4mapped
= IN6_IS_ADDR_V4MAPPED(&v6dst
);
5300 if (srcid
!= 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src
)) {
5301 if (!ip_srcid_find_id(srcid
, &v6src
, IPCL_ZONEID(connp
),
5302 v4mapped
, connp
->conn_netstack
)) {
5303 /* Mismatch v4mapped/v6 specified by srcid. */
5304 return (EADDRNOTAVAIL
);
5308 if (connp
->conn_ipv6_v6only
)
5309 return (EADDRNOTAVAIL
);
5312 * Destination adress is mapped IPv6 address.
5313 * Source bound address should be unspecified or
5314 * IPv6 mapped address as well.
5316 if (!IN6_IS_ADDR_UNSPECIFIED(
5317 &connp
->conn_bound_addr_v6
) &&
5318 !IN6_IS_ADDR_V4MAPPED(&connp
->conn_bound_addr_v6
)) {
5319 return (EADDRNOTAVAIL
);
5321 IN6_V4MAPPED_TO_IPADDR(&v6dst
, v4dst
);
5322 ipversion
= IPV4_VERSION
;
5325 ipversion
= IPV6_VERSION
;
5326 flowinfo
= sin6
->sin6_flowinfo
;
5327 if (IN6_IS_ADDR_LINKLOCAL(&sin6
->sin6_addr
))
5328 scopeid
= sin6
->sin6_scope_id
;
5337 * If there is a different thread using conn_ixa then we get a new
5338 * copy and cut the old one loose from conn_ixa. Otherwise we use
5339 * conn_ixa and prevent any other thread from using/changing it.
5340 * Once connect() is done other threads can use conn_ixa since the
5341 * refcnt will be back at one.
5342 * We defer updating conn_ixa until later to handle any concurrent
5343 * conn_ixa_cleanup thread.
5345 ixa
= conn_get_ixa(connp
, B_FALSE
);
5349 mutex_enter(&connp
->conn_lock
);
5351 * This udp_t must have bound to a port already before doing a connect.
5352 * Reject if a connect is in progress (we drop conn_lock during
5355 if (udp
->udp_state
== TS_UNBND
|| udp
->udp_state
== TS_WCON_CREQ
) {
5356 mutex_exit(&connp
->conn_lock
);
5357 (void) strlog(UDP_MOD_ID
, 0, 1, SL_ERROR
|SL_TRACE
,
5358 "udp_connect: bad state, %u", udp
->udp_state
);
5360 return (-TOUTSTATE
);
5362 ASSERT(connp
->conn_lport
!= 0 && udp
->udp_ptpbhn
!= NULL
);
5364 udpf
= &us
->us_bind_fanout
[UDP_BIND_HASH(connp
->conn_lport
,
5365 us
->us_bind_fanout_size
)];
5367 mutex_enter(&udpf
->uf_lock
);
5368 if (udp
->udp_state
== TS_DATA_XFER
) {
5369 /* Already connected - clear out state */
5370 if (connp
->conn_mcbc_bind
)
5371 connp
->conn_saddr_v6
= ipv6_all_zeros
;
5373 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
5374 connp
->conn_laddr_v6
= connp
->conn_bound_addr_v6
;
5375 connp
->conn_faddr_v6
= ipv6_all_zeros
;
5376 connp
->conn_fport
= 0;
5377 udp
->udp_state
= TS_IDLE
;
5380 connp
->conn_fport
= dstport
;
5381 connp
->conn_ipversion
= ipversion
;
5382 if (ipversion
== IPV4_VERSION
) {
5384 * Interpret a zero destination to mean loopback.
5385 * Update the T_CONN_REQ (sin/sin6) since it is used to
5386 * generate the T_CONN_CON.
5388 if (v4dst
== INADDR_ANY
) {
5389 v4dst
= htonl(INADDR_LOOPBACK
);
5390 IN6_IPADDR_TO_V4MAPPED(v4dst
, &v6dst
);
5391 if (connp
->conn_family
== AF_INET
) {
5392 sin
->sin_addr
.s_addr
= v4dst
;
5394 sin6
->sin6_addr
= v6dst
;
5397 connp
->conn_faddr_v6
= v6dst
;
5398 connp
->conn_flowinfo
= 0;
5400 ASSERT(connp
->conn_ipversion
== IPV6_VERSION
);
5402 * Interpret a zero destination to mean loopback.
5403 * Update the T_CONN_REQ (sin/sin6) since it is used to
5404 * generate the T_CONN_CON.
5406 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst
)) {
5407 v6dst
= ipv6_loopback
;
5408 sin6
->sin6_addr
= v6dst
;
5410 connp
->conn_faddr_v6
= v6dst
;
5411 connp
->conn_flowinfo
= flowinfo
;
5413 mutex_exit(&udpf
->uf_lock
);
5416 * We update our cred/cpid based on the caller of connect
5418 if (connp
->conn_cred
!= cr
) {
5420 crfree(connp
->conn_cred
);
5421 connp
->conn_cred
= cr
;
5423 connp
->conn_cpid
= pid
;
5424 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
5426 ixa
->ixa_cpid
= pid
;
5429 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
5430 ixa
->ixa_scopeid
= scopeid
;
5431 connp
->conn_incoming_ifindex
= scopeid
;
5433 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
5434 connp
->conn_incoming_ifindex
= connp
->conn_bound_if
;
5437 * conn_connect will drop conn_lock and reacquire it.
5438 * To prevent a send* from messing with this udp_t while the lock
5439 * is dropped we set udp_state and clear conn_v6lastdst.
5440 * That will make all send* fail with EISCONN.
5442 connp
->conn_v6lastdst
= ipv6_all_zeros
;
5443 udp
->udp_state
= TS_WCON_CREQ
;
5445 error
= conn_connect(connp
, NULL
, IPDF_ALLOW_MCBC
);
5446 mutex_exit(&connp
->conn_lock
);
5448 goto connect_failed
;
5451 * The addresses have been verified. Time to insert in
5452 * the correct fanout list.
5454 error
= ipcl_conn_insert(connp
);
5456 goto connect_failed
;
5458 mutex_enter(&connp
->conn_lock
);
5459 error
= udp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
5460 &connp
->conn_faddr_v6
, connp
->conn_fport
, connp
->conn_flowinfo
);
5462 mutex_exit(&connp
->conn_lock
);
5463 goto connect_failed
;
5466 udp
->udp_state
= TS_DATA_XFER
;
5467 /* Record this as the "last" send even though we haven't sent any */
5468 connp
->conn_v6lastdst
= connp
->conn_faddr_v6
;
5469 connp
->conn_lastipversion
= connp
->conn_ipversion
;
5470 connp
->conn_lastdstport
= connp
->conn_fport
;
5471 connp
->conn_lastflowinfo
= connp
->conn_flowinfo
;
5472 connp
->conn_lastscopeid
= scopeid
;
5473 connp
->conn_lastsrcid
= srcid
;
5474 /* Also remember a source to use together with lastdst */
5475 connp
->conn_v6lastsrc
= v6src
;
5477 oldixa
= conn_replace_ixa(connp
, ixa
);
5478 mutex_exit(&connp
->conn_lock
);
5479 ixa_refrele(oldixa
);
5482 * We've picked a source address above. Now we can
5483 * verify that the src/port/dst/port is unique for all
5484 * connections in TS_DATA_XFER, skipping ourselves.
5486 mutex_enter(&udpf
->uf_lock
);
5487 for (udp1
= udpf
->uf_udp
; udp1
!= NULL
; udp1
= udp1
->udp_bind_hash
) {
5488 if (udp1
->udp_state
!= TS_DATA_XFER
)
5494 connp1
= udp1
->udp_connp
;
5495 if (connp
->conn_lport
!= connp1
->conn_lport
||
5496 connp
->conn_ipversion
!= connp1
->conn_ipversion
||
5497 dstport
!= connp1
->conn_fport
||
5498 !IN6_ARE_ADDR_EQUAL(&connp
->conn_laddr_v6
,
5499 &connp1
->conn_laddr_v6
) ||
5500 !IN6_ARE_ADDR_EQUAL(&v6dst
, &connp1
->conn_faddr_v6
) ||
5501 !(IPCL_ZONE_MATCH(connp
, connp1
->conn_zoneid
) ||
5502 IPCL_ZONE_MATCH(connp1
, connp
->conn_zoneid
)))
5504 mutex_exit(&udpf
->uf_lock
);
5506 goto connect_failed
;
5508 if (cl_inet_connect2
!= NULL
) {
5509 CL_INET_UDP_CONNECT(connp
, B_TRUE
, &v6dst
, dstport
, error
);
5511 mutex_exit(&udpf
->uf_lock
);
5513 goto connect_failed
;
5516 mutex_exit(&udpf
->uf_lock
);
5524 mutex_enter(&connp
->conn_lock
);
5525 mutex_enter(&udpf
->uf_lock
);
5526 udp
->udp_state
= TS_IDLE
;
5527 connp
->conn_faddr_v6
= ipv6_all_zeros
;
5528 connp
->conn_fport
= 0;
5529 /* In case the source address was set above */
5530 if (connp
->conn_mcbc_bind
)
5531 connp
->conn_saddr_v6
= ipv6_all_zeros
;
5533 connp
->conn_saddr_v6
= connp
->conn_bound_addr_v6
;
5534 connp
->conn_laddr_v6
= connp
->conn_bound_addr_v6
;
5535 mutex_exit(&udpf
->uf_lock
);
5537 connp
->conn_v6lastdst
= ipv6_all_zeros
;
5538 connp
->conn_flowinfo
= 0;
5540 (void) udp_build_hdr_template(connp
, &connp
->conn_saddr_v6
,
5541 &connp
->conn_faddr_v6
, connp
->conn_fport
, connp
->conn_flowinfo
);
5542 mutex_exit(&connp
->conn_lock
);
5547 udp_connect(sock_lower_handle_t proto_handle
, const struct sockaddr
*sa
,
5548 socklen_t len
, sock_connid_t
*id
, cred_t
*cr
)
5550 conn_t
*connp
= (conn_t
*)proto_handle
;
5551 udp_t
*udp
= connp
->conn_udp
;
5553 boolean_t did_bind
= B_FALSE
;
5554 pid_t pid
= curproc
->p_pid
;
5556 /* All Solaris components should pass a cred for this operation. */
5562 * Make sure we are connected
5564 if (udp
->udp_state
!= TS_DATA_XFER
)
5567 error
= udp_disconnect(connp
);
5571 error
= proto_verify_ip_addr(connp
->conn_family
, sa
, len
);
5575 /* do an implicit bind if necessary */
5576 if (udp
->udp_state
== TS_UNBND
) {
5577 error
= udp_implicit_bind(connp
, cr
);
5579 * We could be racing with an actual bind, in which case
5580 * we would see EPROTO. We cross our fingers and try
5583 if (!(error
== 0 || error
== EPROTO
))
5588 * set SO_DGRAM_ERRIND
5590 connp
->conn_dgram_errind
= B_TRUE
;
5592 error
= udp_do_connect(connp
, sa
, len
, cr
, pid
);
5594 if (error
!= 0 && did_bind
) {
5597 unbind_err
= udp_do_unbind(connp
);
5598 ASSERT(unbind_err
== 0);
5603 (*connp
->conn_upcalls
->su_connected
)
5604 (connp
->conn_upper_handle
, 0, NULL
, -1);
5605 } else if (error
< 0) {
5606 error
= proto_tlitosyserr(-error
);
5610 if (error
!= 0 && udp
->udp_state
== TS_DATA_XFER
) {
5612 * No need to hold locks to set state
5613 * after connect failure socket state is undefined
5614 * We set the state only to imitate old sockfs behavior
5616 udp
->udp_state
= TS_IDLE
;
5622 udp_send(sock_lower_handle_t proto_handle
, mblk_t
*mp
, struct nmsghdr
*msg
,
5628 conn_t
*connp
= (conn_t
*)proto_handle
;
5629 udp_t
*udp
= connp
->conn_udp
;
5631 udp_stack_t
*us
= udp
->udp_us
;
5633 pid_t pid
= curproc
->p_pid
;
5634 ip_xmit_attr_t
*ixa
;
5636 ASSERT(DB_TYPE(mp
) == M_DATA
);
5638 /* All Solaris components should pass a cred for this operation. */
5641 /* do an implicit bind if necessary */
5642 if (udp
->udp_state
== TS_UNBND
) {
5643 error
= udp_implicit_bind(connp
, cr
);
5645 * We could be racing with an actual bind, in which case
5646 * we would see EPROTO. We cross our fingers and try
5649 if (!(error
== 0 || error
== EPROTO
)) {
5656 if (msg
->msg_name
== NULL
) {
5657 if (udp
->udp_state
!= TS_DATA_XFER
) {
5658 UDPS_BUMP_MIB(us
, udpOutErrors
);
5659 return (EDESTADDRREQ
);
5661 if (msg
->msg_controllen
!= 0) {
5662 error
= udp_output_ancillary(connp
, NULL
, NULL
, mp
,
5663 NULL
, msg
, cr
, pid
);
5665 error
= udp_output_connected(connp
, mp
, cr
, pid
);
5667 if (us
->us_sendto_ignerr
)
5672 if (udp
->udp_state
== TS_DATA_XFER
) {
5673 UDPS_BUMP_MIB(us
, udpOutErrors
);
5676 error
= proto_verify_ip_addr(connp
->conn_family
,
5677 (struct sockaddr
*)msg
->msg_name
, msg
->msg_namelen
);
5679 UDPS_BUMP_MIB(us
, udpOutErrors
);
5682 switch (connp
->conn_family
) {
5684 sin6
= (sin6_t
*)msg
->msg_name
;
5686 srcid
= sin6
->__sin6_src_id
;
5688 if (!IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
5690 * Destination is a non-IPv4-compatible IPv6 address.
5691 * Send out an IPv6 format packet.
5695 * If the local address is a mapped address return
5697 * It would be possible to send an IPv6 packet but the
5698 * response would never make it back to the application
5699 * since it is bound to a mapped address.
5701 if (IN6_IS_ADDR_V4MAPPED(&connp
->conn_saddr_v6
)) {
5702 UDPS_BUMP_MIB(us
, udpOutErrors
);
5703 return (EADDRNOTAVAIL
);
5705 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
))
5706 sin6
->sin6_addr
= ipv6_loopback
;
5707 ipversion
= IPV6_VERSION
;
5709 if (connp
->conn_ipv6_v6only
) {
5710 UDPS_BUMP_MIB(us
, udpOutErrors
);
5711 return (EADDRNOTAVAIL
);
5715 * If the local address is not zero or a mapped address
5716 * return an error. It would be possible to send an
5717 * IPv4 packet but the response would never make it
5718 * back to the application since it is bound to a
5719 * non-mapped address.
5721 if (!IN6_IS_ADDR_V4MAPPED(&connp
->conn_saddr_v6
) &&
5722 !IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_saddr_v6
)) {
5723 UDPS_BUMP_MIB(us
, udpOutErrors
);
5724 return (EADDRNOTAVAIL
);
5727 if (V4_PART_OF_V6(sin6
->sin6_addr
) == INADDR_ANY
) {
5728 V4_PART_OF_V6(sin6
->sin6_addr
) =
5729 htonl(INADDR_LOOPBACK
);
5731 ipversion
= IPV4_VERSION
;
5735 * We have to allocate an ip_xmit_attr_t before we grab
5736 * conn_lock and we need to hold conn_lock once we've check
5737 * conn_same_as_last_v6 to handle concurrent send* calls on a
5740 if (msg
->msg_controllen
== 0) {
5741 ixa
= conn_get_ixa(connp
, B_FALSE
);
5743 UDPS_BUMP_MIB(us
, udpOutErrors
);
5749 mutex_enter(&connp
->conn_lock
);
5750 if (udp
->udp_delayed_error
!= 0) {
5751 sin6_t
*sin2
= (sin6_t
*)&udp
->udp_delayed_addr
;
5753 error
= udp
->udp_delayed_error
;
5754 udp
->udp_delayed_error
= 0;
5756 /* Compare IP address, port, and family */
5758 if (sin6
->sin6_port
== sin2
->sin6_port
&&
5759 IN6_ARE_ADDR_EQUAL(&sin6
->sin6_addr
,
5760 &sin2
->sin6_addr
) &&
5761 sin6
->sin6_family
== sin2
->sin6_family
) {
5762 mutex_exit(&connp
->conn_lock
);
5763 UDPS_BUMP_MIB(us
, udpOutErrors
);
5770 if (msg
->msg_controllen
!= 0) {
5771 mutex_exit(&connp
->conn_lock
);
5772 ASSERT(ixa
== NULL
);
5773 error
= udp_output_ancillary(connp
, NULL
, sin6
, mp
,
5774 NULL
, msg
, cr
, pid
);
5775 } else if (conn_same_as_last_v6(connp
, sin6
) &&
5776 connp
->conn_lastsrcid
== srcid
&&
5777 ipsec_outbound_policy_current(ixa
)) {
5778 /* udp_output_lastdst drops conn_lock */
5779 error
= udp_output_lastdst(connp
, mp
, cr
, pid
, ixa
);
5781 /* udp_output_newdst drops conn_lock */
5782 error
= udp_output_newdst(connp
, mp
, NULL
, sin6
,
5783 ipversion
, cr
, pid
, ixa
);
5785 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
5786 if (us
->us_sendto_ignerr
)
5791 sin
= (sin_t
*)msg
->msg_name
;
5793 ipversion
= IPV4_VERSION
;
5795 if (sin
->sin_addr
.s_addr
== INADDR_ANY
)
5796 sin
->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
5799 * We have to allocate an ip_xmit_attr_t before we grab
5800 * conn_lock and we need to hold conn_lock once we've check
5801 * conn_same_as_last_v6 to handle concurrent send* on a socket.
5803 if (msg
->msg_controllen
== 0) {
5804 ixa
= conn_get_ixa(connp
, B_FALSE
);
5806 UDPS_BUMP_MIB(us
, udpOutErrors
);
5812 mutex_enter(&connp
->conn_lock
);
5813 if (udp
->udp_delayed_error
!= 0) {
5814 sin_t
*sin2
= (sin_t
*)&udp
->udp_delayed_addr
;
5816 error
= udp
->udp_delayed_error
;
5817 udp
->udp_delayed_error
= 0;
5819 /* Compare IP address and port */
5821 if (sin
->sin_port
== sin2
->sin_port
&&
5822 sin
->sin_addr
.s_addr
== sin2
->sin_addr
.s_addr
) {
5823 mutex_exit(&connp
->conn_lock
);
5824 UDPS_BUMP_MIB(us
, udpOutErrors
);
5830 if (msg
->msg_controllen
!= 0) {
5831 mutex_exit(&connp
->conn_lock
);
5832 ASSERT(ixa
== NULL
);
5833 error
= udp_output_ancillary(connp
, sin
, NULL
, mp
,
5834 NULL
, msg
, cr
, pid
);
5835 } else if (conn_same_as_last_v4(connp
, sin
) &&
5836 ipsec_outbound_policy_current(ixa
)) {
5837 /* udp_output_lastdst drops conn_lock */
5838 error
= udp_output_lastdst(connp
, mp
, cr
, pid
, ixa
);
5840 /* udp_output_newdst drops conn_lock */
5841 error
= udp_output_newdst(connp
, mp
, sin
, NULL
,
5842 ipversion
, cr
, pid
, ixa
);
5844 ASSERT(MUTEX_NOT_HELD(&connp
->conn_lock
));
5845 if (us
->us_sendto_ignerr
)
5855 udp_fallback(sock_lower_handle_t proto_handle
, queue_t
*q
,
5856 boolean_t issocket
, so_proto_quiesced_cb_t quiesced_cb
,
5857 sock_quiesce_arg_t
*arg
)
5859 conn_t
*connp
= (conn_t
*)proto_handle
;
5861 struct T_capability_ack tca
;
5862 struct sockaddr_in6 laddr
, faddr
;
5863 socklen_t laddrlen
, faddrlen
;
5865 struct stroptions
*stropt
;
5866 mblk_t
*mp
, *stropt_mp
;
5869 udp
= connp
->conn_udp
;
5871 stropt_mp
= allocb_wait(sizeof (*stropt
), BPRI_HI
, STR_NOSIG
, NULL
);
5874 * setup the fallback stream that was allocated
5876 connp
->conn_dev
= (dev_t
)RD(q
)->q_ptr
;
5877 connp
->conn_minor_arena
= WR(q
)->q_ptr
;
5879 RD(q
)->q_ptr
= WR(q
)->q_ptr
= connp
;
5881 WR(q
)->q_qinfo
= &udp_winit
;
5883 connp
->conn_rq
= RD(q
);
5884 connp
->conn_wq
= WR(q
);
5886 /* Notify stream head about options before sending up data */
5887 stropt_mp
->b_datap
->db_type
= M_SETOPTS
;
5888 stropt_mp
->b_wptr
+= sizeof (*stropt
);
5889 stropt
= (struct stroptions
*)stropt_mp
->b_rptr
;
5890 stropt
->so_flags
= SO_WROFF
| SO_HIWAT
;
5891 stropt
->so_wroff
= connp
->conn_wroff
;
5892 stropt
->so_hiwat
= udp
->udp_rcv_disply_hiwat
;
5893 putnext(RD(q
), stropt_mp
);
5896 * Free the helper stream
5898 ip_free_helper_stream(connp
);
5901 udp_use_pure_tpi(udp
);
5904 * Collect the information needed to sync with the sonode
5906 udp_do_capability_ack(udp
, &tca
, TC1_INFO
);
5908 laddrlen
= faddrlen
= sizeof (sin6_t
);
5909 (void) udp_getsockname((sock_lower_handle_t
)connp
,
5910 (struct sockaddr
*)&laddr
, &laddrlen
, CRED());
5911 error
= udp_getpeername((sock_lower_handle_t
)connp
,
5912 (struct sockaddr
*)&faddr
, &faddrlen
, CRED());
5917 if (connp
->conn_dgram_errind
)
5918 opts
|= SO_DGRAM_ERRIND
;
5919 if (connp
->conn_ixa
->ixa_flags
& IXAF_DONTROUTE
)
5920 opts
|= SO_DONTROUTE
;
5922 mp
= (*quiesced_cb
)(connp
->conn_upper_handle
, arg
, &tca
,
5923 (struct sockaddr
*)&laddr
, laddrlen
,
5924 (struct sockaddr
*)&faddr
, faddrlen
, opts
);
5926 mutex_enter(&udp
->udp_recv_lock
);
5928 * Attempts to send data up during fallback will result in it being
5929 * queued in udp_t. First push up the datagrams obtained from the
5930 * socket, then any packets queued in udp_t.
5933 mp
->b_next
= udp
->udp_fallback_queue_head
;
5934 udp
->udp_fallback_queue_head
= mp
;
5936 while (udp
->udp_fallback_queue_head
!= NULL
) {
5937 mp
= udp
->udp_fallback_queue_head
;
5938 udp
->udp_fallback_queue_head
= mp
->b_next
;
5939 mutex_exit(&udp
->udp_recv_lock
);
5942 mutex_enter(&udp
->udp_recv_lock
);
5944 udp
->udp_fallback_queue_tail
= udp
->udp_fallback_queue_head
;
5946 * No longer a streams less socket
5948 mutex_enter(&connp
->conn_lock
);
5949 connp
->conn_flags
&= ~IPCL_NONSTR
;
5950 mutex_exit(&connp
->conn_lock
);
5952 mutex_exit(&udp
->udp_recv_lock
);
5954 ASSERT(connp
->conn_ref
>= 1);
5961 udp_getpeername(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
5962 socklen_t
*salenp
, cred_t
*cr
)
5964 conn_t
*connp
= (conn_t
*)proto_handle
;
5965 udp_t
*udp
= connp
->conn_udp
;
5968 /* All Solaris components should pass a cred for this operation. */
5971 mutex_enter(&connp
->conn_lock
);
5972 if (udp
->udp_state
!= TS_DATA_XFER
)
5975 error
= conn_getpeername(connp
, sa
, salenp
);
5976 mutex_exit(&connp
->conn_lock
);
5982 udp_getsockname(sock_lower_handle_t proto_handle
, struct sockaddr
*sa
,
5983 socklen_t
*salenp
, cred_t
*cr
)
5985 conn_t
*connp
= (conn_t
*)proto_handle
;
5988 /* All Solaris components should pass a cred for this operation. */
5991 mutex_enter(&connp
->conn_lock
);
5992 error
= conn_getsockname(connp
, sa
, salenp
);
5993 mutex_exit(&connp
->conn_lock
);
5998 udp_getsockopt(sock_lower_handle_t proto_handle
, int level
, int option_name
,
5999 void *optvalp
, socklen_t
*optlen
, cred_t
*cr
)
6001 conn_t
*connp
= (conn_t
*)proto_handle
;
6003 t_uscalar_t max_optbuf_len
;
6007 /* All Solaris components should pass a cred for this operation. */
6010 error
= proto_opt_check(level
, option_name
, *optlen
, &max_optbuf_len
,
6011 udp_opt_obj
.odb_opt_des_arr
,
6012 udp_opt_obj
.odb_opt_arr_cnt
,
6013 B_FALSE
, B_TRUE
, cr
);
6016 error
= proto_tlitosyserr(-error
);
6020 optvalp_buf
= kmem_alloc(max_optbuf_len
, KM_SLEEP
);
6021 len
= udp_opt_get(connp
, level
, option_name
, optvalp_buf
);
6023 kmem_free(optvalp_buf
, max_optbuf_len
);
6028 * update optlen and copy option value
6030 t_uscalar_t size
= MIN(len
, *optlen
);
6032 bcopy(optvalp_buf
, optvalp
, size
);
6033 bcopy(&size
, optlen
, sizeof (size
));
6035 kmem_free(optvalp_buf
, max_optbuf_len
);
6040 udp_setsockopt(sock_lower_handle_t proto_handle
, int level
, int option_name
,
6041 const void *optvalp
, socklen_t optlen
, cred_t
*cr
)
6043 conn_t
*connp
= (conn_t
*)proto_handle
;
6046 /* All Solaris components should pass a cred for this operation. */
6049 error
= proto_opt_check(level
, option_name
, optlen
, NULL
,
6050 udp_opt_obj
.odb_opt_des_arr
,
6051 udp_opt_obj
.odb_opt_arr_cnt
,
6052 B_TRUE
, B_FALSE
, cr
);
6056 error
= proto_tlitosyserr(-error
);
6060 error
= udp_opt_set(connp
, SETFN_OPTCOM_NEGOTIATE
, level
, option_name
,
6061 optlen
, (uchar_t
*)optvalp
, (uint_t
*)&optlen
, (uchar_t
*)optvalp
,
6070 udp_clr_flowctrl(sock_lower_handle_t proto_handle
)
6072 conn_t
*connp
= (conn_t
*)proto_handle
;
6073 udp_t
*udp
= connp
->conn_udp
;
6075 mutex_enter(&udp
->udp_recv_lock
);
6076 connp
->conn_flow_cntrld
= B_FALSE
;
6077 mutex_exit(&udp
->udp_recv_lock
);
6082 udp_shutdown(sock_lower_handle_t proto_handle
, int how
, cred_t
*cr
)
6084 conn_t
*connp
= (conn_t
*)proto_handle
;
6086 /* All Solaris components should pass a cred for this operation. */
6089 /* shut down the send side */
6091 (*connp
->conn_upcalls
->su_opctl
)(connp
->conn_upper_handle
,
6092 SOCK_OPCTL_SHUT_SEND
, 0);
6093 /* shut down the recv side */
6095 (*connp
->conn_upcalls
->su_opctl
)(connp
->conn_upper_handle
,
6096 SOCK_OPCTL_SHUT_RECV
, 0);
6101 udp_ioctl(sock_lower_handle_t proto_handle
, int cmd
, intptr_t arg
,
6102 int mode
, int32_t *rvalp
, cred_t
*cr
)
6104 conn_t
*connp
= (conn_t
*)proto_handle
;
6107 /* All Solaris components should pass a cred for this operation. */
6111 * If we don't have a helper stream then create one.
6112 * ip_create_helper_stream takes care of locking the conn_t,
6113 * so this check for NULL is just a performance optimization.
6115 if (connp
->conn_helper_info
== NULL
) {
6116 udp_stack_t
*us
= connp
->conn_udp
->udp_us
;
6118 ASSERT(us
->us_ldi_ident
!= NULL
);
6121 * Create a helper stream for non-STREAMS socket.
6123 error
= ip_create_helper_stream(connp
, us
->us_ldi_ident
);
6125 ip0dbg(("tcp_ioctl: create of IP helper stream "
6126 "failed %d\n", error
));
6132 case _SIOCSOCKFALLBACK
:
6133 case TI_GETPEERNAME
:
6135 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
6141 * Pass on to IP using helper stream
6143 error
= ldi_ioctl(connp
->conn_helper_info
->iphs_handle
,
6144 cmd
, arg
, mode
, cr
, rvalp
);
6152 udp_accept(sock_lower_handle_t lproto_handle
,
6153 sock_lower_handle_t eproto_handle
, sock_upper_handle_t sock_handle
,
6156 return (EOPNOTSUPP
);
6161 udp_listen(sock_lower_handle_t proto_handle
, int backlog
, cred_t
*cr
)
6163 return (EOPNOTSUPP
);
6166 sock_downcalls_t sock_udp_downcalls
= {
6167 udp_activate
, /* sd_activate */
6168 udp_accept
, /* sd_accept */
6169 udp_bind
, /* sd_bind */
6170 udp_listen
, /* sd_listen */
6171 udp_connect
, /* sd_connect */
6172 udp_getpeername
, /* sd_getpeername */
6173 udp_getsockname
, /* sd_getsockname */
6174 udp_getsockopt
, /* sd_getsockopt */
6175 udp_setsockopt
, /* sd_setsockopt */
6176 udp_send
, /* sd_send */
6177 NULL
, /* sd_send_uio */
6178 NULL
, /* sd_recv_uio */
6180 udp_shutdown
, /* sd_shutdown */
6181 udp_clr_flowctrl
, /* sd_setflowctrl */
6182 udp_ioctl
, /* sd_ioctl */
6183 udp_close
/* sd_close */