kill tsol ("Trusted Solaris") aka TX ("Trusted Extensions")
[unleashed.git] / kernel / net / sctp / sctp_bind.c
blob72e1a4fac3d5a8510bf1c8cc0b9f44f923efbf8f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/stream.h>
29 #include <sys/cmn_err.h>
30 #include <sys/kmem.h>
31 #define _SUN_TPI_VERSION 2
32 #include <sys/tihdr.h>
33 #include <sys/stropts.h>
34 #include <sys/socket.h>
35 #include <sys/random.h>
36 #include <sys/policy.h>
38 #include <netinet/in.h>
39 #include <netinet/ip6.h>
41 #include <inet/common.h>
42 #include <inet/ip.h>
43 #include <inet/ip6.h>
44 #include <inet/ipclassifier.h>
45 #include <inet/sctp/sctp_impl.h>
46 #include <inet/sctp/sctp_asconf.h>
47 #include <inet/sctp/sctp_addr.h>
50 * Minimum number of associations which can be created per listener. Used
51 * when the listener association count is in effect.
53 static uint32_t sctp_min_assoc_listener = 2;
56 * Returns 0 on success, EACCES on permission failure.
58 static int
59 sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified)
61 sctp_stack_t *sctps = sctp->sctp_sctps;
62 conn_t *connp = sctp->sctp_connp;
65 * Get a valid port (within the anonymous range and should not
66 * be a privileged one) to use if the user has not given a port.
67 * If multiple threads are here, they may all start with
68 * with the same initial port. But, it should be fine as long as
69 * sctp_bindi will ensure that no two threads will be assigned
70 * the same port.
72 if (*requested_port == 0) {
73 *requested_port = sctp_update_next_port(
74 sctps->sctps_next_port_to_try,
75 crgetzone(connp->conn_cred), sctps);
76 if (*requested_port == 0)
77 return (EACCES);
78 *user_specified = 0;
79 } else {
80 int i;
81 boolean_t priv = B_FALSE;
84 * If the requested_port is in the well-known privileged range,
85 * verify that the stream was opened by a privileged user.
86 * Note: No locks are held when inspecting sctp_g_*epriv_ports
87 * but instead the code relies on:
88 * - the fact that the address of the array and its size never
89 * changes
90 * - the atomic assignment of the elements of the array
92 if (*requested_port < sctps->sctps_smallest_nonpriv_port) {
93 priv = B_TRUE;
94 } else {
95 for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) {
96 if (*requested_port ==
97 sctps->sctps_g_epriv_ports[i]) {
98 priv = B_TRUE;
99 break;
103 if (priv) {
105 * sctp_bind() should take a cred_t argument so that
106 * we can use it here.
108 if (secpolicy_net_privaddr(connp->conn_cred,
109 *requested_port, IPPROTO_SCTP) != 0) {
110 dprint(1,
111 ("sctp_bind(x): no prive for port %d",
112 *requested_port));
113 return (EACCES);
116 *user_specified = 1;
119 return (0);
123 sctp_listen(sctp_t *sctp)
125 sctp_tf_t *tf;
126 sctp_stack_t *sctps = sctp->sctp_sctps;
127 conn_t *connp = sctp->sctp_connp;
129 RUN_SCTP(sctp);
131 * TCP handles listen() increasing the backlog, need to check
132 * if it should be handled here too
134 if (sctp->sctp_state > SCTPS_BOUND ||
135 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
136 WAKE_SCTP(sctp);
137 return (EINVAL);
140 /* Do an anonymous bind for unbound socket doing listen(). */
141 if (sctp->sctp_nsaddrs == 0) {
142 struct sockaddr_storage ss;
143 int ret;
145 bzero(&ss, sizeof (ss));
146 ss.ss_family = connp->conn_family;
148 WAKE_SCTP(sctp);
149 if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss,
150 sizeof (ss))) != 0)
151 return (ret);
152 RUN_SCTP(sctp)
155 /* Cache things in the ixa without any refhold */
156 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
157 connp->conn_ixa->ixa_cred = connp->conn_cred;
158 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
160 sctp->sctp_state = SCTPS_LISTEN;
161 (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN);
162 sctp->sctp_last_secret_update = ddi_get_lbolt64();
163 bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN);
166 * If there is an association limit, allocate and initialize
167 * the counter struct. Note that since listen can be called
168 * multiple times, the struct may have been allready allocated.
170 if (!list_is_empty(&sctps->sctps_listener_conf) &&
171 sctp->sctp_listen_cnt == NULL) {
172 sctp_listen_cnt_t *slc;
173 uint32_t ratio;
175 ratio = sctp_find_listener_conf(sctps,
176 ntohs(connp->conn_lport));
177 if (ratio != 0) {
178 uint32_t mem_ratio, tot_buf;
180 slc = kmem_alloc(sizeof (sctp_listen_cnt_t), KM_SLEEP);
182 * Calculate the connection limit based on
183 * the configured ratio and maxusers. Maxusers
184 * are calculated based on memory size,
185 * ~ 1 user per MB. Note that the conn_rcvbuf
186 * and conn_sndbuf may change after a
187 * connection is accepted. So what we have
188 * is only an approximation.
190 if ((tot_buf = connp->conn_rcvbuf +
191 connp->conn_sndbuf) < MB) {
192 mem_ratio = MB / tot_buf;
193 slc->slc_max = maxusers / ratio * mem_ratio;
194 } else {
195 mem_ratio = tot_buf / MB;
196 slc->slc_max = maxusers / ratio / mem_ratio;
198 /* At least we should allow some associations! */
199 if (slc->slc_max < sctp_min_assoc_listener)
200 slc->slc_max = sctp_min_assoc_listener;
201 slc->slc_cnt = 1;
202 slc->slc_drop = 0;
203 sctp->sctp_listen_cnt = slc;
208 tf = &sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(
209 ntohs(connp->conn_lport))];
210 sctp_listen_hash_insert(tf, sctp);
212 WAKE_SCTP(sctp);
213 return (0);
217 * Bind the sctp_t to a sockaddr, which includes an address and other
218 * information, such as port or flowinfo.
221 sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len)
223 int user_specified;
224 boolean_t bind_to_req_port_only;
225 in_port_t requested_port;
226 in_port_t allocated_port;
227 int err = 0;
228 conn_t *connp = sctp->sctp_connp;
229 uint_t scope_id;
230 sin_t *sin;
231 sin6_t *sin6;
233 ASSERT(sctp != NULL);
235 RUN_SCTP(sctp);
237 if ((sctp->sctp_state >= SCTPS_BOUND) ||
238 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING) ||
239 (sa == NULL || len == 0)) {
241 * Multiple binds not allowed for any SCTP socket
242 * Also binding with null address is not supported.
244 err = EINVAL;
245 goto done;
248 switch (sa->sa_family) {
249 case AF_INET:
250 sin = (sin_t *)sa;
251 if (len < sizeof (struct sockaddr_in) ||
252 connp->conn_family == AF_INET6) {
253 err = EINVAL;
254 goto done;
256 requested_port = ntohs(sin->sin_port);
257 break;
258 case AF_INET6:
259 sin6 = (sin6_t *)sa;
260 if (len < sizeof (struct sockaddr_in6) ||
261 connp->conn_family == AF_INET) {
262 err = EINVAL;
263 goto done;
265 requested_port = ntohs(sin6->sin6_port);
266 /* Set the flowinfo. */
267 connp->conn_flowinfo =
268 sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK;
270 scope_id = sin6->sin6_scope_id;
271 if (scope_id != 0 && IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
272 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
273 connp->conn_ixa->ixa_scopeid = scope_id;
274 connp->conn_incoming_ifindex = scope_id;
275 } else {
276 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
277 connp->conn_incoming_ifindex = connp->conn_bound_if;
279 break;
280 default:
281 err = EAFNOSUPPORT;
282 goto done;
284 bind_to_req_port_only = requested_port == 0 ? B_FALSE : B_TRUE;
286 err = sctp_select_port(sctp, &requested_port, &user_specified);
287 if (err != 0)
288 goto done;
290 if ((err = sctp_bind_add(sctp, sa, 1, B_TRUE,
291 user_specified == 1 ? htons(requested_port) : 0)) != 0) {
292 goto done;
294 err = sctp_bindi(sctp, requested_port, bind_to_req_port_only,
295 user_specified, &allocated_port);
296 if (err != 0) {
297 sctp_free_saddrs(sctp);
298 } else {
299 ASSERT(sctp->sctp_state == SCTPS_BOUND);
301 done:
302 WAKE_SCTP(sctp);
303 return (err);
307 * Perform bind/unbind operation of a list of addresses on a sctp_t
310 sctp_bindx(sctp_t *sctp, const void *addrs, int addrcnt, int bindop)
312 ASSERT(sctp != NULL);
313 ASSERT(addrs != NULL);
314 ASSERT(addrcnt > 0);
316 switch (bindop) {
317 case SCTP_BINDX_ADD_ADDR:
318 return (sctp_bind_add(sctp, addrs, addrcnt, B_FALSE,
319 sctp->sctp_connp->conn_lport));
320 case SCTP_BINDX_REM_ADDR:
321 return (sctp_bind_del(sctp, addrs, addrcnt, B_FALSE));
322 default:
323 return (EINVAL);
328 * Add a list of addresses to a sctp_t.
331 sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt,
332 boolean_t caller_hold_lock, in_port_t port)
334 int err = 0;
335 boolean_t do_asconf = B_FALSE;
336 sctp_stack_t *sctps = sctp->sctp_sctps;
337 conn_t *connp = sctp->sctp_connp;
339 if (!caller_hold_lock)
340 RUN_SCTP(sctp);
342 if (sctp->sctp_state > SCTPS_ESTABLISHED ||
343 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
344 if (!caller_hold_lock)
345 WAKE_SCTP(sctp);
346 return (EINVAL);
349 if (sctp->sctp_state > SCTPS_LISTEN) {
351 * Let's do some checking here rather than undoing the
352 * add later (for these reasons).
354 if (!sctps->sctps_addip_enabled ||
355 !sctp->sctp_understands_asconf ||
356 !sctp->sctp_understands_addip) {
357 if (!caller_hold_lock)
358 WAKE_SCTP(sctp);
359 return (EINVAL);
361 do_asconf = B_TRUE;
364 * On a clustered node, for an inaddr_any bind, we will pass the list
365 * of all the addresses in the global list, minus any address on the
366 * loopback interface, and expect the clustering susbsystem to give us
367 * the correct list for the 'port'. For explicit binds we give the
368 * list of addresses and the clustering module validates it for the
369 * 'port'.
371 * On a non-clustered node, cl_sctp_check_addrs will be NULL and
372 * we proceed as usual.
374 if (cl_sctp_check_addrs != NULL) {
375 uchar_t *addrlist = NULL;
376 size_t size = 0;
377 int unspec = 0;
378 boolean_t do_listen;
379 uchar_t *llist = NULL;
380 size_t lsize = 0;
383 * If we are adding addresses after listening, but before
384 * an association is established, we need to update the
385 * clustering module with this info.
387 do_listen = !do_asconf && sctp->sctp_state > SCTPS_BOUND &&
388 cl_sctp_listen != NULL;
390 err = sctp_get_addrlist(sctp, addrs, &addrcnt, &addrlist,
391 &unspec, &size);
392 if (err != 0) {
393 ASSERT(addrlist == NULL);
394 ASSERT(addrcnt == 0);
395 ASSERT(size == 0);
396 if (!caller_hold_lock)
397 WAKE_SCTP(sctp);
398 SCTP_KSTAT(sctps, sctp_cl_check_addrs);
399 return (err);
401 ASSERT(addrlist != NULL);
402 (*cl_sctp_check_addrs)(connp->conn_family, port, &addrlist,
403 size, &addrcnt, unspec == 1);
404 if (addrcnt == 0) {
405 /* We free the list */
406 kmem_free(addrlist, size);
407 if (!caller_hold_lock)
408 WAKE_SCTP(sctp);
409 return (EINVAL);
411 if (do_listen) {
412 lsize = sizeof (in6_addr_t) * addrcnt;
413 llist = kmem_alloc(lsize, KM_SLEEP);
415 err = sctp_valid_addr_list(sctp, addrlist, addrcnt, llist,
416 lsize);
417 if (err == 0 && do_listen) {
418 (*cl_sctp_listen)(connp->conn_family, llist,
419 addrcnt, connp->conn_lport);
420 /* list will be freed by the clustering module */
421 } else if (err != 0 && llist != NULL) {
422 kmem_free(llist, lsize);
424 /* free the list we allocated */
425 kmem_free(addrlist, size);
426 } else {
427 err = sctp_valid_addr_list(sctp, addrs, addrcnt, NULL, 0);
429 if (err != 0) {
430 if (!caller_hold_lock)
431 WAKE_SCTP(sctp);
432 return (err);
434 /* Need to send ASCONF messages */
435 if (do_asconf) {
436 err = sctp_add_ip(sctp, addrs, addrcnt);
437 if (err != 0) {
438 sctp_del_saddr_list(sctp, addrs, addrcnt, B_FALSE);
439 if (!caller_hold_lock)
440 WAKE_SCTP(sctp);
441 return (err);
444 if (!caller_hold_lock)
445 WAKE_SCTP(sctp);
446 return (0);
450 * Remove one or more addresses bound to the sctp_t.
453 sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt,
454 boolean_t caller_hold_lock)
456 int error = 0;
457 boolean_t do_asconf = B_FALSE;
458 uchar_t *ulist = NULL;
459 size_t usize = 0;
460 sctp_stack_t *sctps = sctp->sctp_sctps;
461 conn_t *connp = sctp->sctp_connp;
463 if (!caller_hold_lock)
464 RUN_SCTP(sctp);
466 if (sctp->sctp_state > SCTPS_ESTABLISHED ||
467 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
468 if (!caller_hold_lock)
469 WAKE_SCTP(sctp);
470 return (EINVAL);
473 * Fail the remove if we are beyond listen, but can't send this
474 * to the peer.
476 if (sctp->sctp_state > SCTPS_LISTEN) {
477 if (!sctps->sctps_addip_enabled ||
478 !sctp->sctp_understands_asconf ||
479 !sctp->sctp_understands_addip) {
480 if (!caller_hold_lock)
481 WAKE_SCTP(sctp);
482 return (EINVAL);
484 do_asconf = B_TRUE;
487 /* Can't delete the last address nor all of the addresses */
488 if (sctp->sctp_nsaddrs == 1 || addrcnt >= sctp->sctp_nsaddrs) {
489 if (!caller_hold_lock)
490 WAKE_SCTP(sctp);
491 return (EINVAL);
494 if (cl_sctp_unlisten != NULL && !do_asconf &&
495 sctp->sctp_state > SCTPS_BOUND) {
496 usize = sizeof (in6_addr_t) * addrcnt;
497 ulist = kmem_alloc(usize, KM_SLEEP);
500 error = sctp_del_ip(sctp, addrs, addrcnt, ulist, usize);
501 if (error != 0) {
502 if (ulist != NULL)
503 kmem_free(ulist, usize);
504 if (!caller_hold_lock)
505 WAKE_SCTP(sctp);
506 return (error);
508 /* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */
509 if (ulist != NULL) {
510 ASSERT(cl_sctp_unlisten != NULL);
511 (*cl_sctp_unlisten)(connp->conn_family, ulist, addrcnt,
512 connp->conn_lport);
513 /* ulist will be freed by the clustering module */
515 if (!caller_hold_lock)
516 WAKE_SCTP(sctp);
517 return (error);
521 * Returns 0 for success, errno value otherwise.
523 * If the "bind_to_req_port_only" parameter is set and the requested port
524 * number is available, then set allocated_port to it. If not available,
525 * return an error.
527 * If the "bind_to_req_port_only" parameter is not set and the requested port
528 * number is available, then set allocated_port to it. If not available,
529 * find the first anonymous port we can and set allocated_port to that. If no
530 * anonymous ports are available, return an error.
532 * In either case, when succeeding, update the sctp_t to record the port number
533 * and insert it in the bind hash table.
536 sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only,
537 int user_specified, in_port_t *allocated_port)
539 /* number of times we have run around the loop */
540 int count = 0;
541 /* maximum number of times to run around the loop */
542 int loopmax;
543 sctp_stack_t *sctps = sctp->sctp_sctps;
544 conn_t *connp = sctp->sctp_connp;
545 zone_t *zone = crgetzone(connp->conn_cred);
546 zoneid_t zoneid = connp->conn_zoneid;
549 * Lookup for free addresses is done in a loop and "loopmax"
550 * influences how long we spin in the loop
552 if (bind_to_req_port_only) {
554 * If the requested port is busy, don't bother to look
555 * for a new one. Setting loop maximum count to 1 has
556 * that effect.
558 loopmax = 1;
559 } else {
561 * If the requested port is busy, look for a free one
562 * in the anonymous port range.
563 * Set loopmax appropriately so that one does not look
564 * forever in the case all of the anonymous ports are in use.
566 loopmax = (sctps->sctps_largest_anon_port -
567 sctps->sctps_smallest_anon_port + 1);
569 do {
570 uint16_t lport;
571 sctp_tf_t *tbf;
572 sctp_t *lsctp;
573 int addrcmp;
575 lport = htons(port);
578 * Ensure that the sctp_t is not currently in the bind hash.
579 * Hold the lock on the hash bucket to ensure that
580 * the duplicate check plus the insertion is an atomic
581 * operation.
583 * This function does an inline lookup on the bind hash list
584 * Make sure that we access only members of sctp_t
585 * and that we don't look at sctp_sctp, since we are not
586 * doing a SCTPB_REFHOLD. For more details please see the notes
587 * in sctp_compress()
589 sctp_bind_hash_remove(sctp);
590 tbf = &sctps->sctps_bind_fanout[SCTP_BIND_HASH(port)];
591 mutex_enter(&tbf->tf_lock);
592 for (lsctp = tbf->tf_sctp; lsctp != NULL;
593 lsctp = lsctp->sctp_bind_hash) {
594 conn_t *lconnp = lsctp->sctp_connp;
596 if (lport != lconnp->conn_lport ||
597 lsctp->sctp_state < SCTPS_BOUND)
598 continue;
600 if (lconnp->conn_zoneid != zoneid)
601 continue;
603 addrcmp = sctp_compare_saddrs(sctp, lsctp);
604 if (addrcmp != SCTP_ADDR_DISJOINT) {
605 if (!connp->conn_reuseaddr) {
606 /* in use */
607 break;
608 } else if (lsctp->sctp_state == SCTPS_BOUND ||
609 lsctp->sctp_state == SCTPS_LISTEN) {
611 * socket option SO_REUSEADDR is set
612 * on the binding sctp_t.
614 * We have found a match of IP source
615 * address and source port, which is
616 * refused regardless of the
617 * SO_REUSEADDR setting, so we break.
619 break;
623 if (lsctp != NULL) {
624 /* The port number is busy */
625 mutex_exit(&tbf->tf_lock);
626 } else {
628 * This port is ours. Insert in fanout and mark as
629 * bound to prevent others from getting the port
630 * number.
632 sctp->sctp_state = SCTPS_BOUND;
633 connp->conn_lport = lport;
635 ASSERT(&sctps->sctps_bind_fanout[
636 SCTP_BIND_HASH(port)] == tbf);
637 sctp_bind_hash_insert(tbf, sctp, 1);
639 mutex_exit(&tbf->tf_lock);
642 * We don't want sctp_next_port_to_try to "inherit"
643 * a port number supplied by the user in a bind.
645 * This is the only place where sctp_next_port_to_try
646 * is updated. After the update, it may or may not
647 * be in the valid range.
649 if (user_specified == 0)
650 sctps->sctps_next_port_to_try = port + 1;
652 *allocated_port = port;
654 return (0);
657 if ((count == 0) && (user_specified)) {
659 * We may have to return an anonymous port. So
660 * get one to start with.
662 port = sctp_update_next_port(
663 sctps->sctps_next_port_to_try,
664 zone, sctps);
665 user_specified = 0;
666 } else {
667 port = sctp_update_next_port(port + 1, zone, sctps);
669 if (port == 0)
670 break;
673 * Don't let this loop run forever in the case where
674 * all of the anonymous ports are in use.
676 } while (++count < loopmax);
678 return (bind_to_req_port_only ? EADDRINUSE : EADDRNOTAVAIL);
682 * Don't let port fall into the privileged range.
683 * Since the extra privileged ports can be arbitrary we also
684 * ensure that we exclude those from consideration.
685 * sctp_g_epriv_ports is not sorted thus we loop over it until
686 * there are no changes.
688 * Note: No locks are held when inspecting sctp_g_*epriv_ports
689 * but instead the code relies on:
690 * - the fact that the address of the array and its size never changes
691 * - the atomic assignment of the elements of the array
693 in_port_t
694 sctp_update_next_port(in_port_t port, zone_t *zone, sctp_stack_t *sctps)
696 int i;
697 boolean_t restart = B_FALSE;
699 retry:
700 if (port < sctps->sctps_smallest_anon_port)
701 port = sctps->sctps_smallest_anon_port;
703 if (port > sctps->sctps_largest_anon_port) {
704 if (restart)
705 return (0);
706 restart = B_TRUE;
707 port = sctps->sctps_smallest_anon_port;
710 if (port < sctps->sctps_smallest_nonpriv_port)
711 port = sctps->sctps_smallest_nonpriv_port;
713 for (i = 0; i < sctps->sctps_g_num_epriv_ports; i++) {
714 if (port == sctps->sctps_g_epriv_ports[i]) {
715 port++;
717 * Make sure whether the port is in the
718 * valid range.
720 * XXX Note that if sctp_g_epriv_ports contains
721 * all the anonymous ports this will be an
722 * infinite loop.
724 goto retry;
728 return (port);