4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
31 #define _SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/stropts.h>
34 #include <sys/strsubr.h>
35 #include <sys/socket.h>
37 #include <netinet/in.h>
38 #include <netinet/ip6.h>
40 #include <inet/common.h>
43 #include <inet/ipclassifier.h>
44 #include <inet/ipsec_impl.h>
46 #include <inet/sctp/sctp_impl.h>
47 #include <inet/sctp/sctp_addr.h>
50 * Common accept code. Called by sctp_conn_request.
51 * cr_pkt is the INIT / INIT ACK packet.
54 sctp_accept_comm(sctp_t
*listener
, sctp_t
*acceptor
, mblk_t
*cr_pkt
,
55 uint_t ip_hdr_len
, sctp_init_chunk_t
*iack
)
59 sctp_chunk_hdr_t
*ich
;
60 sctp_init_chunk_t
*init
;
65 sctp_stack_t
*sctps
= listener
->sctp_sctps
;
67 sctph
= (sctp_hdr_t
*)(cr_pkt
->b_rptr
+ ip_hdr_len
);
68 ASSERT(OK_32PTR(sctph
));
70 aconnp
= acceptor
->sctp_connp
;
71 lconnp
= listener
->sctp_connp
;
72 aconnp
->conn_lport
= lconnp
->conn_lport
;
73 aconnp
->conn_fport
= sctph
->sh_sport
;
75 ich
= (sctp_chunk_hdr_t
*)(iack
+ 1);
76 init
= (sctp_init_chunk_t
*)(ich
+ 1);
78 /* acceptor isn't in any fanouts yet, so don't need to hold locks */
79 ASSERT(acceptor
->sctp_faddrs
== NULL
);
80 err
= sctp_get_addrparams(acceptor
, listener
, cr_pkt
, ich
,
85 if ((err
= sctp_set_hdraddrs(acceptor
)) != 0)
88 if ((err
= sctp_build_hdrs(acceptor
, KM_NOSLEEP
)) != 0)
91 if ((sctp_options
& SCTP_PRSCTP_OPTION
) &&
92 listener
->sctp_prsctp_aware
&& sctps
->sctps_prsctp_enabled
) {
93 acceptor
->sctp_prsctp_aware
= B_TRUE
;
95 acceptor
->sctp_prsctp_aware
= B_FALSE
;
98 /* Get initial TSNs */
99 acceptor
->sctp_ltsn
= ntohl(iack
->sic_inittsn
);
100 acceptor
->sctp_recovery_tsn
= acceptor
->sctp_lastack_rxd
=
101 acceptor
->sctp_ltsn
- 1;
102 acceptor
->sctp_adv_pap
= acceptor
->sctp_lastack_rxd
;
103 /* Serial numbers are initialized to the same value as the TSNs */
104 acceptor
->sctp_lcsn
= acceptor
->sctp_ltsn
;
106 if (!sctp_initialize_params(acceptor
, init
, iack
))
110 * Copy sctp_secret from the listener in case we need to validate
111 * a possibly delayed cookie.
113 bcopy(listener
->sctp_secret
, acceptor
->sctp_secret
, SCTP_SECRET_LEN
);
114 bcopy(listener
->sctp_old_secret
, acceptor
->sctp_old_secret
,
116 acceptor
->sctp_last_secret_update
= ddi_get_lbolt64();
119 * After acceptor is inserted in the hash list, it can be found.
120 * So we need to lock it here.
124 sctp_conn_hash_insert(&sctps
->sctps_conn_fanout
[
125 SCTP_CONN_HASH(sctps
, aconnp
->conn_ports
)], acceptor
, 0);
126 sctp_bind_hash_insert(&sctps
->sctps_bind_fanout
[
127 SCTP_BIND_HASH(ntohs(aconnp
->conn_lport
))], acceptor
, 0);
129 SCTP_ASSOC_EST(sctps
, acceptor
);
133 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
135 sctp_conn_request(sctp_t
*sctp
, mblk_t
*mp
, uint_t ifindex
, uint_t ip_hdr_len
,
136 sctp_init_chunk_t
*iack
, ip_recv_attr_t
*ira
)
141 conn_t
*connp
, *econnp
;
145 in6_addr_t faddr
, laddr
;
147 sctp_listen_cnt_t
*slc
= sctp
->sctp_listen_cnt
;
148 boolean_t slc_set
= B_FALSE
;
151 * No need to check for duplicate as this is the listener
152 * and we are holding the lock. This means that no new
153 * connection can be created out of it. And since the
154 * fanout already done cannot find a match, it means that
155 * there is no duplicate.
157 ASSERT(OK_32PTR(mp
->b_rptr
));
159 connp
= sctp
->sctp_connp
;
160 sctps
= sctp
->sctp_sctps
;
163 * Enforce the limit set on the number of connections per listener.
164 * Note that tlc_cnt starts with 1. So need to add 1 to tlc_max
170 if (atomic_inc_32_nv(&slc
->slc_cnt
) > slc
->slc_max
+ 1) {
171 now
= ddi_get_lbolt64();
172 atomic_dec_32(&slc
->slc_cnt
);
173 SCTP_KSTAT(sctps
, sctp_listen_cnt_drop
);
175 if (now
- slc
->slc_report_time
>
176 MSEC_TO_TICK(SCTP_SLC_REPORT_INTERVAL
)) {
177 zcmn_err(connp
->conn_zoneid
, CE_WARN
,
178 "SCTP listener (port %d) association max "
179 "(%u) reached: %u attempts dropped total\n",
180 ntohs(connp
->conn_lport
),
181 slc
->slc_max
, slc
->slc_drop
);
182 slc
->slc_report_time
= now
;
189 if ((eager
= sctp_create_eager(sctp
)) == NULL
) {
191 atomic_dec_32(&slc
->slc_cnt
);
194 econnp
= eager
->sctp_connp
;
196 if (connp
->conn_policy
!= NULL
) {
197 /* Inherit the policy from the listener; use actions from ira */
198 if (!ip_ipsec_policy_inherit(econnp
, connp
, ira
)) {
199 sctp_close_eager(eager
);
200 SCTPS_BUMP_MIB(sctps
, sctpListenDrop
);
205 ip6h
= (ip6_t
*)mp
->b_rptr
;
206 if (ira
->ira_flags
& IXAF_IS_IPV4
) {
209 ipha
= (ipha_t
*)ip6h
;
210 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_dst
, &laddr
);
211 IN6_IPADDR_TO_V4MAPPED(ipha
->ipha_src
, &faddr
);
213 laddr
= ip6h
->ip6_dst
;
214 faddr
= ip6h
->ip6_src
;
217 if (ira
->ira_flags
& IRAF_IPSEC_SECURE
) {
219 * XXX need to fix the cached policy issue here.
220 * We temporarily set the conn_laddr/conn_faddr here so
221 * that IPsec can use it for the latched policy
222 * selector. This is obvioursly wrong as SCTP can
223 * use different addresses...
225 econnp
->conn_laddr_v6
= laddr
;
226 econnp
->conn_faddr_v6
= faddr
;
227 econnp
->conn_saddr_v6
= laddr
;
229 if (ipsec_conn_cache_policy(econnp
,
230 (ira
->ira_flags
& IRAF_IS_IPV4
) != 0) != 0) {
231 sctp_close_eager(eager
);
232 SCTPS_BUMP_MIB(sctps
, sctpListenDrop
);
236 /* Save for getpeerucred */
238 cpid
= ira
->ira_cpid
;
240 err
= sctp_accept_comm(sctp
, eager
, mp
, ip_hdr_len
, iack
);
242 sctp_close_eager(eager
);
243 SCTPS_BUMP_MIB(sctps
, sctpListenDrop
);
247 ASSERT(eager
->sctp_current
->sf_ixa
!= NULL
);
249 ixa
= eager
->sctp_current
->sf_ixa
;
250 if (!(ira
->ira_flags
& IXAF_IS_IPV4
)) {
251 ASSERT(!(ixa
->ixa_flags
& IXAF_IS_IPV4
));
253 if (IN6_IS_ADDR_LINKLOCAL(&ip6h
->ip6_src
) ||
254 IN6_IS_ADDR_LINKLOCAL(&ip6h
->ip6_dst
)) {
255 eager
->sctp_linklocal
= 1;
257 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
258 ixa
->ixa_scopeid
= ifindex
;
259 econnp
->conn_incoming_ifindex
= ifindex
;
263 /* Connection established, so send up the conn_ind */
264 if ((eager
->sctp_ulpd
= sctp
->sctp_ulp_newconn(sctp
->sctp_ulpd
,
265 (sock_lower_handle_t
)eager
, NULL
, cr
, cpid
,
266 &eager
->sctp_upcalls
)) == NULL
) {
267 sctp_close_eager(eager
);
268 SCTPS_BUMP_MIB(sctps
, sctpListenDrop
);
271 ASSERT(SCTP_IS_DETACHED(eager
));
272 eager
->sctp_detached
= B_FALSE
;
277 * Connect to a peer - this function inserts the sctp in the
278 * bind and conn fanouts, sends the INIT, and replies to the client
282 sctp_connect(sctp_t
*sctp
, const struct sockaddr
*dst
, uint32_t addrlen
,
283 cred_t
*cr
, pid_t pid
)
292 char buf
[INET6_ADDRSTRLEN
];
293 int sleep
= sctp
->sctp_cansleep
? KM_SLEEP
: KM_NOSLEEP
;
295 sctp_faddr_t
*cur_fp
;
296 sctp_stack_t
*sctps
= sctp
->sctp_sctps
;
297 conn_t
*connp
= sctp
->sctp_connp
;
302 * Determine packet type based on type of address passed in
303 * the request should contain an IPv4 or IPv6 address.
304 * Make sure that address family matches the type of
305 * family of the address passed down.
307 if (addrlen
< sizeof (sin_t
)) {
310 switch (dst
->sa_family
) {
314 /* Check for attempt to connect to non-unicast */
315 if (CLASSD(sin
->sin_addr
.s_addr
) ||
316 (sin
->sin_addr
.s_addr
== INADDR_BROADCAST
)) {
317 ip0dbg(("sctp_connect: non-unicast\n"));
320 if (connp
->conn_ipv6_v6only
)
321 return (EAFNOSUPPORT
);
323 /* convert to v6 mapped */
324 /* Check for attempt to connect to INADDR_ANY */
325 if (sin
->sin_addr
.s_addr
== INADDR_ANY
) {
326 struct in_addr v4_addr
;
328 * SunOS 4.x and 4.3 BSD allow an application
329 * to connect a TCP socket to INADDR_ANY.
330 * When they do this, the kernel picks the
331 * address of one interface and uses it
332 * instead. The kernel usually ends up
333 * picking the address of the loopback
334 * interface. This is an undocumented feature.
335 * However, we provide the same thing here
336 * in case any TCP apps that use this feature
337 * are being ported to SCTP...
339 v4_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
340 IN6_INADDR_TO_V4MAPPED(&v4_addr
, &dstaddr
);
342 IN6_INADDR_TO_V4MAPPED(&sin
->sin_addr
, &dstaddr
);
344 dstport
= sin
->sin_port
;
347 sin6
= (sin6_t
*)dst
;
348 /* Check for attempt to connect to non-unicast. */
349 if ((addrlen
< sizeof (sin6_t
)) ||
350 IN6_IS_ADDR_MULTICAST(&sin6
->sin6_addr
)) {
351 ip0dbg(("sctp_connect: non-unicast\n"));
354 if (connp
->conn_ipv6_v6only
&&
355 IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
356 return (EAFNOSUPPORT
);
358 /* check for attempt to connect to unspec */
359 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) {
360 dstaddr
= ipv6_loopback
;
362 dstaddr
= sin6
->sin6_addr
;
363 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr
)) {
364 sctp
->sctp_linklocal
= 1;
365 scope_id
= sin6
->sin6_scope_id
;
368 dstport
= sin6
->sin6_port
;
369 connp
->conn_flowinfo
= sin6
->sin6_flowinfo
;
372 dprint(1, ("sctp_connect: unknown family %d\n",
374 return (EAFNOSUPPORT
);
377 (void) inet_ntop(AF_INET6
, &dstaddr
, buf
, sizeof (buf
));
378 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf
));
382 if (connp
->conn_family
!= dst
->sa_family
||
383 (connp
->conn_state_flags
& CONN_CLOSING
)) {
388 /* We update our cred/cpid based on the caller of connect */
389 if (connp
->conn_cred
!= cr
) {
391 crfree(connp
->conn_cred
);
392 connp
->conn_cred
= cr
;
394 connp
->conn_cpid
= pid
;
396 /* Cache things in conn_ixa without any refhold */
397 ixa
= connp
->conn_ixa
;
398 ASSERT(!(ixa
->ixa_free_flags
& IXA_FREE_CRED
));
402 switch (sctp
->sctp_state
) {
404 struct sockaddr_storage ss
;
407 * We support a quick connect capability here, allowing
408 * clients to transition directly from IDLE to COOKIE_WAIT.
409 * sctp_bindi will pick an unused port, insert the connection
410 * in the bind hash and transition to BOUND state. SCTP
411 * picks and uses what it considers the optimal local address
412 * set (just like specifiying INADDR_ANY to bind()).
414 dprint(1, ("sctp_connect: idle, attempting bind...\n"));
415 ASSERT(sctp
->sctp_nsaddrs
== 0);
417 bzero(&ss
, sizeof (ss
));
418 ss
.ss_family
= connp
->conn_family
;
420 if ((err
= sctp_bind(sctp
, (struct sockaddr
*)&ss
,
421 sizeof (ss
))) != 0) {
429 ASSERT(sctp
->sctp_nsaddrs
> 0);
432 /* XXX check for attempt to connect to self */
433 connp
->conn_fport
= dstport
;
436 * Don't allow this connection to completely duplicate
437 * an existing connection.
439 * Ensure that the duplicate check and insertion is atomic.
441 sctp_conn_hash_remove(sctp
);
442 tbf
= &sctps
->sctps_conn_fanout
[SCTP_CONN_HASH(sctps
,
444 mutex_enter(&tbf
->tf_lock
);
445 lsctp
= sctp_lookup(sctp
, &dstaddr
, tbf
, &connp
->conn_ports
,
448 /* found a duplicate connection */
449 mutex_exit(&tbf
->tf_lock
);
456 * OK; set up the peer addr (this may grow after we get
457 * the INIT ACK from the peer with additional addresses).
459 if ((err
= sctp_add_faddr(sctp
, &dstaddr
, sleep
,
461 mutex_exit(&tbf
->tf_lock
);
465 cur_fp
= sctp
->sctp_faddrs
;
466 ASSERT(cur_fp
->sf_ixa
!= NULL
);
468 /* No valid src addr, return. */
469 if (cur_fp
->sf_state
== SCTP_FADDRS_UNREACH
) {
470 mutex_exit(&tbf
->tf_lock
);
472 return (EADDRNOTAVAIL
);
475 sctp
->sctp_primary
= cur_fp
;
476 sctp
->sctp_current
= cur_fp
;
477 sctp
->sctp_mss
= cur_fp
->sf_pmss
;
478 sctp_conn_hash_insert(tbf
, sctp
, 1);
479 mutex_exit(&tbf
->tf_lock
);
481 ixa
= cur_fp
->sf_ixa
;
482 ASSERT(ixa
->ixa_cred
!= NULL
);
485 ixa
->ixa_flags
|= IXAF_SCOPEID_SET
;
486 ixa
->ixa_scopeid
= scope_id
;
488 ixa
->ixa_flags
&= ~IXAF_SCOPEID_SET
;
491 /* initialize composite headers */
492 if ((err
= sctp_set_hdraddrs(sctp
)) != 0) {
493 sctp_conn_hash_remove(sctp
);
498 if ((err
= sctp_build_hdrs(sctp
, KM_SLEEP
)) != 0) {
499 sctp_conn_hash_remove(sctp
);
505 * Turn off the don't fragment bit on the (only) faddr,
506 * so that if one of the messages exchanged during the
507 * initialization sequence exceeds the path mtu, it
508 * at least has a chance to get there. SCTP does no
509 * fragmentation of initialization messages. The DF bit
510 * will be turned on again in sctp_send_cookie_echo()
511 * (but the cookie echo will still be sent with the df bit
514 cur_fp
->sf_df
= B_FALSE
;
516 /* Mark this address as alive */
517 cur_fp
->sf_state
= SCTP_FADDRS_ALIVE
;
519 /* Send the INIT to the peer */
520 SCTP_FADDR_TIMER_RESTART(sctp
, cur_fp
, cur_fp
->sf_rto
);
521 sctp
->sctp_state
= SCTPS_COOKIE_WAIT
;
523 * sctp_init_mp() could result in modifying the source
524 * address list, so take the hash lock.
526 mutex_enter(&tbf
->tf_lock
);
527 initmp
= sctp_init_mp(sctp
, cur_fp
);
528 if (initmp
== NULL
) {
529 mutex_exit(&tbf
->tf_lock
);
531 * It may happen that all the source addresses
532 * (loopback/link local) are removed. In that case,
535 if (sctp
->sctp_nsaddrs
== 0) {
536 sctp_conn_hash_remove(sctp
);
537 SCTP_FADDR_TIMER_STOP(cur_fp
);
539 return (EADDRNOTAVAIL
);
542 /* Otherwise, let the retransmission timer retry */
546 mutex_exit(&tbf
->tf_lock
);
548 ASSERT(ixa
->ixa_cred
!= NULL
);
549 ASSERT(ixa
->ixa_ire
!= NULL
);
551 (void) conn_ip_output(initmp
, ixa
);
552 BUMP_LOCAL(sctp
->sctp_opkts
);
556 sctp_set_ulp_prop(sctp
);
560 ip0dbg(("sctp_connect: invalid state. %d\n", sctp
->sctp_state
));