Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / net / sctp / sctp_conn.c
blob22b357ac3562befec2a76c7fa54bba59e5ba4683
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define _SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/stropts.h>
34 #include <sys/strsubr.h>
35 #include <sys/socket.h>
37 #include <netinet/in.h>
38 #include <netinet/ip6.h>
40 #include <inet/common.h>
41 #include <inet/ip.h>
42 #include <inet/ip6.h>
43 #include <inet/ipclassifier.h>
44 #include <inet/ipsec_impl.h>
46 #include <inet/sctp/sctp_impl.h>
47 #include <inet/sctp/sctp_addr.h>
50 * Common accept code. Called by sctp_conn_request.
51 * cr_pkt is the INIT / INIT ACK packet.
53 static int
54 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
55 uint_t ip_hdr_len, sctp_init_chunk_t *iack)
58 sctp_hdr_t *sctph;
59 sctp_chunk_hdr_t *ich;
60 sctp_init_chunk_t *init;
61 int err;
62 uint_t sctp_options;
63 conn_t *aconnp;
64 conn_t *lconnp;
65 sctp_stack_t *sctps = listener->sctp_sctps;
67 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
68 ASSERT(OK_32PTR(sctph));
70 aconnp = acceptor->sctp_connp;
71 lconnp = listener->sctp_connp;
72 aconnp->conn_lport = lconnp->conn_lport;
73 aconnp->conn_fport = sctph->sh_sport;
75 ich = (sctp_chunk_hdr_t *)(iack + 1);
76 init = (sctp_init_chunk_t *)(ich + 1);
78 /* acceptor isn't in any fanouts yet, so don't need to hold locks */
79 ASSERT(acceptor->sctp_faddrs == NULL);
80 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
81 &sctp_options);
82 if (err != 0)
83 return (err);
85 if ((err = sctp_set_hdraddrs(acceptor)) != 0)
86 return (err);
88 if ((err = sctp_build_hdrs(acceptor, KM_NOSLEEP)) != 0)
89 return (err);
91 if ((sctp_options & SCTP_PRSCTP_OPTION) &&
92 listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) {
93 acceptor->sctp_prsctp_aware = B_TRUE;
94 } else {
95 acceptor->sctp_prsctp_aware = B_FALSE;
98 /* Get initial TSNs */
99 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
100 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
101 acceptor->sctp_ltsn - 1;
102 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
103 /* Serial numbers are initialized to the same value as the TSNs */
104 acceptor->sctp_lcsn = acceptor->sctp_ltsn;
106 if (!sctp_initialize_params(acceptor, init, iack))
107 return (ENOMEM);
110 * Copy sctp_secret from the listener in case we need to validate
111 * a possibly delayed cookie.
113 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
114 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
115 SCTP_SECRET_LEN);
116 acceptor->sctp_last_secret_update = ddi_get_lbolt64();
119 * After acceptor is inserted in the hash list, it can be found.
120 * So we need to lock it here.
122 RUN_SCTP(acceptor);
124 sctp_conn_hash_insert(&sctps->sctps_conn_fanout[
125 SCTP_CONN_HASH(sctps, aconnp->conn_ports)], acceptor, 0);
126 sctp_bind_hash_insert(&sctps->sctps_bind_fanout[
127 SCTP_BIND_HASH(ntohs(aconnp->conn_lport))], acceptor, 0);
129 SCTP_ASSOC_EST(sctps, acceptor);
130 return (0);
133 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
134 sctp_t *
135 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
136 sctp_init_chunk_t *iack, ip_recv_attr_t *ira)
138 sctp_t *eager;
139 ip6_t *ip6h;
140 int err;
141 conn_t *connp, *econnp;
142 sctp_stack_t *sctps;
143 cred_t *cr;
144 pid_t cpid;
145 in6_addr_t faddr, laddr;
146 ip_xmit_attr_t *ixa;
147 sctp_listen_cnt_t *slc = sctp->sctp_listen_cnt;
148 boolean_t slc_set = B_FALSE;
151 * No need to check for duplicate as this is the listener
152 * and we are holding the lock. This means that no new
153 * connection can be created out of it. And since the
154 * fanout already done cannot find a match, it means that
155 * there is no duplicate.
157 ASSERT(OK_32PTR(mp->b_rptr));
159 connp = sctp->sctp_connp;
160 sctps = sctp->sctp_sctps;
163 * Enforce the limit set on the number of connections per listener.
164 * Note that tlc_cnt starts with 1. So need to add 1 to tlc_max
165 * for comparison.
167 if (slc != NULL) {
168 int64_t now;
170 if (atomic_inc_32_nv(&slc->slc_cnt) > slc->slc_max + 1) {
171 now = ddi_get_lbolt64();
172 atomic_dec_32(&slc->slc_cnt);
173 SCTP_KSTAT(sctps, sctp_listen_cnt_drop);
174 slc->slc_drop++;
175 if (now - slc->slc_report_time >
176 MSEC_TO_TICK(SCTP_SLC_REPORT_INTERVAL)) {
177 zcmn_err(connp->conn_zoneid, CE_WARN,
178 "SCTP listener (port %d) association max "
179 "(%u) reached: %u attempts dropped total\n",
180 ntohs(connp->conn_lport),
181 slc->slc_max, slc->slc_drop);
182 slc->slc_report_time = now;
184 return (NULL);
186 slc_set = B_TRUE;
189 if ((eager = sctp_create_eager(sctp)) == NULL) {
190 if (slc_set)
191 atomic_dec_32(&slc->slc_cnt);
192 return (NULL);
194 econnp = eager->sctp_connp;
196 if (connp->conn_policy != NULL) {
197 /* Inherit the policy from the listener; use actions from ira */
198 if (!ip_ipsec_policy_inherit(econnp, connp, ira)) {
199 sctp_close_eager(eager);
200 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
201 return (NULL);
205 ip6h = (ip6_t *)mp->b_rptr;
206 if (ira->ira_flags & IXAF_IS_IPV4) {
207 ipha_t *ipha;
209 ipha = (ipha_t *)ip6h;
210 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &laddr);
211 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &faddr);
212 } else {
213 laddr = ip6h->ip6_dst;
214 faddr = ip6h->ip6_src;
217 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
219 * XXX need to fix the cached policy issue here.
220 * We temporarily set the conn_laddr/conn_faddr here so
221 * that IPsec can use it for the latched policy
222 * selector. This is obvioursly wrong as SCTP can
223 * use different addresses...
225 econnp->conn_laddr_v6 = laddr;
226 econnp->conn_faddr_v6 = faddr;
227 econnp->conn_saddr_v6 = laddr;
229 if (ipsec_conn_cache_policy(econnp,
230 (ira->ira_flags & IRAF_IS_IPV4) != 0) != 0) {
231 sctp_close_eager(eager);
232 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
233 return (NULL);
236 /* Save for getpeerucred */
237 cr = ira->ira_cred;
238 cpid = ira->ira_cpid;
240 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
241 if (err != 0) {
242 sctp_close_eager(eager);
243 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
244 return (NULL);
247 ASSERT(eager->sctp_current->sf_ixa != NULL);
249 ixa = eager->sctp_current->sf_ixa;
250 if (!(ira->ira_flags & IXAF_IS_IPV4)) {
251 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
253 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
254 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) {
255 eager->sctp_linklocal = 1;
257 ixa->ixa_flags |= IXAF_SCOPEID_SET;
258 ixa->ixa_scopeid = ifindex;
259 econnp->conn_incoming_ifindex = ifindex;
263 /* Connection established, so send up the conn_ind */
264 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
265 (sock_lower_handle_t)eager, NULL, cr, cpid,
266 &eager->sctp_upcalls)) == NULL) {
267 sctp_close_eager(eager);
268 SCTPS_BUMP_MIB(sctps, sctpListenDrop);
269 return (NULL);
271 ASSERT(SCTP_IS_DETACHED(eager));
272 eager->sctp_detached = B_FALSE;
273 return (eager);
277 * Connect to a peer - this function inserts the sctp in the
278 * bind and conn fanouts, sends the INIT, and replies to the client
279 * with an OK ack.
282 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen,
283 cred_t *cr, pid_t pid)
285 sin_t *sin;
286 sin6_t *sin6;
287 in6_addr_t dstaddr;
288 in_port_t dstport;
289 mblk_t *initmp;
290 sctp_tf_t *tbf;
291 sctp_t *lsctp;
292 char buf[INET6_ADDRSTRLEN];
293 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
294 int err;
295 sctp_faddr_t *cur_fp;
296 sctp_stack_t *sctps = sctp->sctp_sctps;
297 conn_t *connp = sctp->sctp_connp;
298 uint_t scope_id = 0;
299 ip_xmit_attr_t *ixa;
302 * Determine packet type based on type of address passed in
303 * the request should contain an IPv4 or IPv6 address.
304 * Make sure that address family matches the type of
305 * family of the address passed down.
307 if (addrlen < sizeof (sin_t)) {
308 return (EINVAL);
310 switch (dst->sa_family) {
311 case AF_INET:
312 sin = (sin_t *)dst;
314 /* Check for attempt to connect to non-unicast */
315 if (CLASSD(sin->sin_addr.s_addr) ||
316 (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
317 ip0dbg(("sctp_connect: non-unicast\n"));
318 return (EINVAL);
320 if (connp->conn_ipv6_v6only)
321 return (EAFNOSUPPORT);
323 /* convert to v6 mapped */
324 /* Check for attempt to connect to INADDR_ANY */
325 if (sin->sin_addr.s_addr == INADDR_ANY) {
326 struct in_addr v4_addr;
328 * SunOS 4.x and 4.3 BSD allow an application
329 * to connect a TCP socket to INADDR_ANY.
330 * When they do this, the kernel picks the
331 * address of one interface and uses it
332 * instead. The kernel usually ends up
333 * picking the address of the loopback
334 * interface. This is an undocumented feature.
335 * However, we provide the same thing here
336 * in case any TCP apps that use this feature
337 * are being ported to SCTP...
339 v4_addr.s_addr = htonl(INADDR_LOOPBACK);
340 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
341 } else {
342 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
344 dstport = sin->sin_port;
345 break;
346 case AF_INET6:
347 sin6 = (sin6_t *)dst;
348 /* Check for attempt to connect to non-unicast. */
349 if ((addrlen < sizeof (sin6_t)) ||
350 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
351 ip0dbg(("sctp_connect: non-unicast\n"));
352 return (EINVAL);
354 if (connp->conn_ipv6_v6only &&
355 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
356 return (EAFNOSUPPORT);
358 /* check for attempt to connect to unspec */
359 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
360 dstaddr = ipv6_loopback;
361 } else {
362 dstaddr = sin6->sin6_addr;
363 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) {
364 sctp->sctp_linklocal = 1;
365 scope_id = sin6->sin6_scope_id;
368 dstport = sin6->sin6_port;
369 connp->conn_flowinfo = sin6->sin6_flowinfo;
370 break;
371 default:
372 dprint(1, ("sctp_connect: unknown family %d\n",
373 dst->sa_family));
374 return (EAFNOSUPPORT);
377 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
378 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));
380 RUN_SCTP(sctp);
382 if (connp->conn_family != dst->sa_family ||
383 (connp->conn_state_flags & CONN_CLOSING)) {
384 WAKE_SCTP(sctp);
385 return (EINVAL);
388 /* We update our cred/cpid based on the caller of connect */
389 if (connp->conn_cred != cr) {
390 crhold(cr);
391 crfree(connp->conn_cred);
392 connp->conn_cred = cr;
394 connp->conn_cpid = pid;
396 /* Cache things in conn_ixa without any refhold */
397 ixa = connp->conn_ixa;
398 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
399 ixa->ixa_cred = cr;
400 ixa->ixa_cpid = pid;
402 switch (sctp->sctp_state) {
403 case SCTPS_IDLE: {
404 struct sockaddr_storage ss;
407 * We support a quick connect capability here, allowing
408 * clients to transition directly from IDLE to COOKIE_WAIT.
409 * sctp_bindi will pick an unused port, insert the connection
410 * in the bind hash and transition to BOUND state. SCTP
411 * picks and uses what it considers the optimal local address
412 * set (just like specifiying INADDR_ANY to bind()).
414 dprint(1, ("sctp_connect: idle, attempting bind...\n"));
415 ASSERT(sctp->sctp_nsaddrs == 0);
417 bzero(&ss, sizeof (ss));
418 ss.ss_family = connp->conn_family;
419 WAKE_SCTP(sctp);
420 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss,
421 sizeof (ss))) != 0) {
422 return (err);
424 RUN_SCTP(sctp);
426 /* FALLTHROUGH */
428 case SCTPS_BOUND:
429 ASSERT(sctp->sctp_nsaddrs > 0);
431 /* do the connect */
432 /* XXX check for attempt to connect to self */
433 connp->conn_fport = dstport;
436 * Don't allow this connection to completely duplicate
437 * an existing connection.
439 * Ensure that the duplicate check and insertion is atomic.
441 sctp_conn_hash_remove(sctp);
442 tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps,
443 connp->conn_ports)];
444 mutex_enter(&tbf->tf_lock);
445 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &connp->conn_ports,
446 SCTPS_COOKIE_WAIT);
447 if (lsctp != NULL) {
448 /* found a duplicate connection */
449 mutex_exit(&tbf->tf_lock);
450 SCTP_REFRELE(lsctp);
451 WAKE_SCTP(sctp);
452 return (EADDRINUSE);
456 * OK; set up the peer addr (this may grow after we get
457 * the INIT ACK from the peer with additional addresses).
459 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep,
460 B_FALSE)) != 0) {
461 mutex_exit(&tbf->tf_lock);
462 WAKE_SCTP(sctp);
463 return (err);
465 cur_fp = sctp->sctp_faddrs;
466 ASSERT(cur_fp->sf_ixa != NULL);
468 /* No valid src addr, return. */
469 if (cur_fp->sf_state == SCTP_FADDRS_UNREACH) {
470 mutex_exit(&tbf->tf_lock);
471 WAKE_SCTP(sctp);
472 return (EADDRNOTAVAIL);
475 sctp->sctp_primary = cur_fp;
476 sctp->sctp_current = cur_fp;
477 sctp->sctp_mss = cur_fp->sf_pmss;
478 sctp_conn_hash_insert(tbf, sctp, 1);
479 mutex_exit(&tbf->tf_lock);
481 ixa = cur_fp->sf_ixa;
482 ASSERT(ixa->ixa_cred != NULL);
484 if (scope_id != 0) {
485 ixa->ixa_flags |= IXAF_SCOPEID_SET;
486 ixa->ixa_scopeid = scope_id;
487 } else {
488 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
491 /* initialize composite headers */
492 if ((err = sctp_set_hdraddrs(sctp)) != 0) {
493 sctp_conn_hash_remove(sctp);
494 WAKE_SCTP(sctp);
495 return (err);
498 if ((err = sctp_build_hdrs(sctp, KM_SLEEP)) != 0) {
499 sctp_conn_hash_remove(sctp);
500 WAKE_SCTP(sctp);
501 return (err);
505 * Turn off the don't fragment bit on the (only) faddr,
506 * so that if one of the messages exchanged during the
507 * initialization sequence exceeds the path mtu, it
508 * at least has a chance to get there. SCTP does no
509 * fragmentation of initialization messages. The DF bit
510 * will be turned on again in sctp_send_cookie_echo()
511 * (but the cookie echo will still be sent with the df bit
512 * off).
514 cur_fp->sf_df = B_FALSE;
516 /* Mark this address as alive */
517 cur_fp->sf_state = SCTP_FADDRS_ALIVE;
519 /* Send the INIT to the peer */
520 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->sf_rto);
521 sctp->sctp_state = SCTPS_COOKIE_WAIT;
523 * sctp_init_mp() could result in modifying the source
524 * address list, so take the hash lock.
526 mutex_enter(&tbf->tf_lock);
527 initmp = sctp_init_mp(sctp, cur_fp);
528 if (initmp == NULL) {
529 mutex_exit(&tbf->tf_lock);
531 * It may happen that all the source addresses
532 * (loopback/link local) are removed. In that case,
533 * faile the connect.
535 if (sctp->sctp_nsaddrs == 0) {
536 sctp_conn_hash_remove(sctp);
537 SCTP_FADDR_TIMER_STOP(cur_fp);
538 WAKE_SCTP(sctp);
539 return (EADDRNOTAVAIL);
542 /* Otherwise, let the retransmission timer retry */
543 WAKE_SCTP(sctp);
544 goto notify_ulp;
546 mutex_exit(&tbf->tf_lock);
548 ASSERT(ixa->ixa_cred != NULL);
549 ASSERT(ixa->ixa_ire != NULL);
551 (void) conn_ip_output(initmp, ixa);
552 BUMP_LOCAL(sctp->sctp_opkts);
553 WAKE_SCTP(sctp);
555 notify_ulp:
556 sctp_set_ulp_prop(sctp);
558 return (0);
559 default:
560 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
561 WAKE_SCTP(sctp);
562 return (EINVAL);