kill tsol ("Trusted Solaris") aka TX ("Trusted Extensions")
[unleashed.git] / kernel / net / tcp / tcp_bind.c
blobc381ebe486b27ffc13958c5672b2e56e46c20387
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #include <sys/strsubr.h>
31 #include <sys/stropts.h>
32 #include <sys/strlog.h>
33 #define _SUN_TPI_VERSION 2
34 #include <sys/tihdr.h>
35 #include <sys/suntpi.h>
36 #include <sys/xti_inet.h>
37 #include <sys/policy.h>
38 #include <sys/squeue_impl.h>
39 #include <sys/squeue.h>
41 #include <rpc/pmap_prot.h>
43 #include <inet/common.h>
44 #include <inet/ip.h>
45 #include <inet/tcp.h>
46 #include <inet/tcp_impl.h>
47 #include <inet/proto_set.h>
48 #include <inet/ipsec_impl.h>
50 /* Setable in /etc/system */
51 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
52 static uint32_t tcp_random_anon_port = 1;
54 static int tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t,
55 cred_t *cr);
56 static in_port_t tcp_get_next_priv_port(const tcp_t *);
59 * Hash list insertion routine for tcp_t structures. Each hash bucket
60 * contains a list of tcp_t entries, and each entry is bound to a unique
61 * port. If there are multiple tcp_t's that are bound to the same port, then
62 * one of them will be linked into the hash bucket list, and the rest will
63 * hang off of that one entry. For each port, entries bound to a specific IP
64 * address will be inserted before those those bound to INADDR_ANY.
66 void
67 tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
69 tcp_t **tcpp;
70 tcp_t *tcpnext;
71 tcp_t *tcphash;
72 conn_t *connp = tcp->tcp_connp;
73 conn_t *connext;
75 if (tcp->tcp_ptpbhn != NULL) {
76 ASSERT(!caller_holds_lock);
77 tcp_bind_hash_remove(tcp);
79 tcpp = &tbf->tf_tcp;
80 if (!caller_holds_lock) {
81 mutex_enter(&tbf->tf_lock);
82 } else {
83 ASSERT(MUTEX_HELD(&tbf->tf_lock));
85 tcphash = tcpp[0];
86 tcpnext = NULL;
87 if (tcphash != NULL) {
88 /* Look for an entry using the same port */
89 while ((tcphash = tcpp[0]) != NULL &&
90 connp->conn_lport != tcphash->tcp_connp->conn_lport)
91 tcpp = &(tcphash->tcp_bind_hash);
93 /* The port was not found, just add to the end */
94 if (tcphash == NULL)
95 goto insert;
98 * OK, there already exists an entry bound to the
99 * same port.
101 * If the new tcp bound to the INADDR_ANY address
102 * and the first one in the list is not bound to
103 * INADDR_ANY we skip all entries until we find the
104 * first one bound to INADDR_ANY.
105 * This makes sure that applications binding to a
106 * specific address get preference over those binding to
107 * INADDR_ANY.
109 tcpnext = tcphash;
110 connext = tcpnext->tcp_connp;
111 tcphash = NULL;
112 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
113 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
114 while ((tcpnext = tcpp[0]) != NULL) {
115 connext = tcpnext->tcp_connp;
116 if (!V6_OR_V4_INADDR_ANY(
117 connext->conn_bound_addr_v6))
118 tcpp = &(tcpnext->tcp_bind_hash_port);
119 else
120 break;
122 if (tcpnext != NULL) {
123 tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
124 tcphash = tcpnext->tcp_bind_hash;
125 if (tcphash != NULL) {
126 tcphash->tcp_ptpbhn =
127 &(tcp->tcp_bind_hash);
128 tcpnext->tcp_bind_hash = NULL;
131 } else {
132 tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
133 tcphash = tcpnext->tcp_bind_hash;
134 if (tcphash != NULL) {
135 tcphash->tcp_ptpbhn =
136 &(tcp->tcp_bind_hash);
137 tcpnext->tcp_bind_hash = NULL;
141 insert:
142 tcp->tcp_bind_hash_port = tcpnext;
143 tcp->tcp_bind_hash = tcphash;
144 tcp->tcp_ptpbhn = tcpp;
145 tcpp[0] = tcp;
146 if (!caller_holds_lock)
147 mutex_exit(&tbf->tf_lock);
151 * Hash list removal routine for tcp_t structures.
153 void
154 tcp_bind_hash_remove(tcp_t *tcp)
156 tcp_t *tcpnext;
157 kmutex_t *lockp;
158 tcp_stack_t *tcps = tcp->tcp_tcps;
159 conn_t *connp = tcp->tcp_connp;
161 if (tcp->tcp_ptpbhn == NULL)
162 return;
165 * Extract the lock pointer in case there are concurrent
166 * hash_remove's for this instance.
168 ASSERT(connp->conn_lport != 0);
169 lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(
170 connp->conn_lport)].tf_lock;
172 ASSERT(lockp != NULL);
173 mutex_enter(lockp);
174 if (tcp->tcp_ptpbhn) {
175 tcpnext = tcp->tcp_bind_hash_port;
176 if (tcpnext != NULL) {
177 tcp->tcp_bind_hash_port = NULL;
178 tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
179 tcpnext->tcp_bind_hash = tcp->tcp_bind_hash;
180 if (tcpnext->tcp_bind_hash != NULL) {
181 tcpnext->tcp_bind_hash->tcp_ptpbhn =
182 &(tcpnext->tcp_bind_hash);
183 tcp->tcp_bind_hash = NULL;
185 } else if ((tcpnext = tcp->tcp_bind_hash) != NULL) {
186 tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
187 tcp->tcp_bind_hash = NULL;
189 *tcp->tcp_ptpbhn = tcpnext;
190 tcp->tcp_ptpbhn = NULL;
192 mutex_exit(lockp);
196 * Don't let port fall into the privileged range.
197 * Since the extra privileged ports can be arbitrary we also
198 * ensure that we exclude those from consideration.
199 * tcp_g_epriv_ports is not sorted thus we loop over it until
200 * there are no changes.
202 * Note: No locks are held when inspecting tcp_g_*epriv_ports
203 * but instead the code relies on:
204 * - the fact that the address of the array and its size never changes
205 * - the atomic assignment of the elements of the array
207 * Returns 0 if there are no more ports available.
209 * TS note: skip multilevel ports.
211 in_port_t
212 tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random)
214 int i, bump;
215 boolean_t restart = B_FALSE;
216 tcp_stack_t *tcps = tcp->tcp_tcps;
218 if (random && tcp_random_anon_port != 0) {
219 (void) random_get_pseudo_bytes((uint8_t *)&port,
220 sizeof (in_port_t));
222 * Unless changed by a sys admin, the smallest anon port
223 * is 32768 and the largest anon port is 65535. It is
224 * very likely (50%) for the random port to be smaller
225 * than the smallest anon port. When that happens,
226 * add port % (anon port range) to the smallest anon
227 * port to get the random port. It should fall into the
228 * valid anon port range.
230 if ((port < tcps->tcps_smallest_anon_port) ||
231 (port > tcps->tcps_largest_anon_port)) {
232 if (tcps->tcps_smallest_anon_port ==
233 tcps->tcps_largest_anon_port) {
234 bump = 0;
235 } else {
236 bump = port % (tcps->tcps_largest_anon_port -
237 tcps->tcps_smallest_anon_port);
239 port = tcps->tcps_smallest_anon_port + bump;
243 retry:
244 if (port < tcps->tcps_smallest_anon_port)
245 port = (in_port_t)tcps->tcps_smallest_anon_port;
247 if (port > tcps->tcps_largest_anon_port) {
248 if (restart)
249 return (0);
250 restart = B_TRUE;
251 port = (in_port_t)tcps->tcps_smallest_anon_port;
254 if (port < tcps->tcps_smallest_nonpriv_port)
255 port = (in_port_t)tcps->tcps_smallest_nonpriv_port;
257 for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
258 if (port == tcps->tcps_g_epriv_ports[i]) {
259 port++;
261 * Make sure whether the port is in the
262 * valid range.
264 goto retry;
267 return (port);
271 * Return the next anonymous port in the privileged port range for
272 * bind checking. It starts at IPPORT_RESERVED - 1 and goes
273 * downwards. This is the same behavior as documented in the userland
274 * library call rresvport(3N).
276 * TS note: skip multilevel ports.
278 static in_port_t
279 tcp_get_next_priv_port(const tcp_t *tcp)
281 static in_port_t next_priv_port = IPPORT_RESERVED - 1;
282 in_port_t nextport;
283 boolean_t restart = B_FALSE;
284 tcp_stack_t *tcps = tcp->tcp_tcps;
285 retry:
286 if (next_priv_port < tcps->tcps_min_anonpriv_port ||
287 next_priv_port >= IPPORT_RESERVED) {
288 next_priv_port = IPPORT_RESERVED - 1;
289 if (restart)
290 return (0);
291 restart = B_TRUE;
293 return (next_priv_port--);
296 static int
297 tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
298 boolean_t bind_to_req_port_only, cred_t *cr)
300 boolean_t user_specified;
301 in_port_t allocated_port;
302 in_port_t requested_port = *requested_port_ptr;
303 conn_t *connp = tcp->tcp_connp;
304 zone_t *zone;
305 tcp_stack_t *tcps = tcp->tcp_tcps;
306 in6_addr_t v6addr = connp->conn_laddr_v6;
309 * XXX It's up to the caller to specify bind_to_req_port_only or not.
311 ASSERT(cr != NULL);
314 * Get a valid port (within the anonymous range and should not
315 * be a privileged one) to use if the user has not given a port.
316 * If multiple threads are here, they may all start with
317 * with the same initial port. But, it should be fine as long as
318 * tcp_bindi will ensure that no two threads will be assigned
319 * the same port.
321 * NOTE: XXX If a privileged process asks for an anonymous port, we
322 * still check for ports only in the range > tcp_smallest_non_priv_port,
323 * unless TCP_ANONPRIVBIND option is set.
325 if (requested_port == 0) {
326 requested_port = connp->conn_anon_priv_bind ?
327 tcp_get_next_priv_port(tcp) :
328 tcp_update_next_port(tcps->tcps_next_port_to_try,
329 tcp, B_TRUE);
330 if (requested_port == 0) {
331 return (-TNOADDR);
333 user_specified = B_FALSE;
334 } else {
335 int i;
336 boolean_t priv = B_FALSE;
339 * If the requested_port is in the well-known privileged range,
340 * verify that the stream was opened by a privileged user.
341 * Note: No locks are held when inspecting tcp_g_*epriv_ports
342 * but instead the code relies on:
343 * - the fact that the address of the array and its size never
344 * changes
345 * - the atomic assignment of the elements of the array
347 if (requested_port < tcps->tcps_smallest_nonpriv_port) {
348 priv = B_TRUE;
349 } else {
350 for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
351 if (requested_port ==
352 tcps->tcps_g_epriv_ports[i]) {
353 priv = B_TRUE;
354 break;
358 if (priv) {
359 if (secpolicy_net_privaddr(cr, requested_port,
360 IPPROTO_TCP) != 0) {
361 if (connp->conn_debug) {
362 (void) strlog(TCP_MOD_ID, 0, 1,
363 SL_ERROR|SL_TRACE,
364 "tcp_bind: no priv for port %d",
365 requested_port);
367 return (-TACCES);
370 user_specified = B_TRUE;
372 connp = tcp->tcp_connp;
375 allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
376 connp->conn_reuseaddr, B_FALSE, bind_to_req_port_only,
377 user_specified);
379 if (allocated_port == 0) {
380 if (bind_to_req_port_only) {
381 if (connp->conn_debug) {
382 (void) strlog(TCP_MOD_ID, 0, 1,
383 SL_ERROR|SL_TRACE,
384 "tcp_bind: requested addr busy");
386 return (-TADDRBUSY);
387 } else {
388 /* If we are out of ports, fail the bind. */
389 if (connp->conn_debug) {
390 (void) strlog(TCP_MOD_ID, 0, 1,
391 SL_ERROR|SL_TRACE,
392 "tcp_bind: out of ports?");
394 return (-TNOADDR);
398 /* Pass the allocated port back */
399 *requested_port_ptr = allocated_port;
400 return (0);
404 * Check the address and check/pick a local port number.
407 tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
408 boolean_t bind_to_req_port_only)
410 tcp_t *tcp = connp->conn_tcp;
411 sin_t *sin;
412 sin6_t *sin6;
413 in_port_t requested_port;
414 ipaddr_t v4addr;
415 in6_addr_t v6addr;
416 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */
417 zoneid_t zoneid = IPCL_ZONEID(connp);
418 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
419 uint_t scopeid = 0;
420 int error = 0;
421 ip_xmit_attr_t *ixa = connp->conn_ixa;
423 ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);
425 if (tcp->tcp_state == TCPS_BOUND) {
426 return (0);
427 } else if (tcp->tcp_state > TCPS_BOUND) {
428 if (connp->conn_debug) {
429 (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
430 "tcp_bind: bad state, %d", tcp->tcp_state);
432 return (-TOUTSTATE);
435 ASSERT(sa != NULL && len != 0);
437 if (!OK_32PTR((char *)sa)) {
438 if (connp->conn_debug) {
439 (void) strlog(TCP_MOD_ID, 0, 1,
440 SL_ERROR|SL_TRACE,
441 "tcp_bind: bad address parameter, "
442 "address %p, len %d",
443 (void *)sa, len);
445 return (-TPROTO);
448 error = proto_verify_ip_addr(connp->conn_family, sa, len);
449 if (error != 0) {
450 return (error);
453 switch (len) {
454 case sizeof (sin_t): /* Complete IPv4 address */
455 sin = (sin_t *)sa;
456 requested_port = ntohs(sin->sin_port);
457 v4addr = sin->sin_addr.s_addr;
458 IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
459 if (v4addr != INADDR_ANY) {
460 laddr_type = ip_laddr_verify_v4(v4addr, zoneid, ipst,
461 B_FALSE);
463 break;
465 case sizeof (sin6_t): /* Complete IPv6 address */
466 sin6 = (sin6_t *)sa;
467 v6addr = sin6->sin6_addr;
468 requested_port = ntohs(sin6->sin6_port);
469 if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
470 if (connp->conn_ipv6_v6only)
471 return (EADDRNOTAVAIL);
473 IN6_V4MAPPED_TO_IPADDR(&v6addr, v4addr);
474 if (v4addr != INADDR_ANY) {
475 laddr_type = ip_laddr_verify_v4(v4addr,
476 zoneid, ipst, B_FALSE);
478 } else {
479 if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
480 if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
481 scopeid = sin6->sin6_scope_id;
482 laddr_type = ip_laddr_verify_v6(&v6addr,
483 zoneid, ipst, B_FALSE, scopeid);
486 break;
488 default:
489 if (connp->conn_debug) {
490 (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
491 "tcp_bind: bad address length, %d", len);
493 return (EAFNOSUPPORT);
494 /* return (-TBADADDR); */
497 /* Is the local address a valid unicast address? */
498 if (laddr_type == IPVL_BAD)
499 return (EADDRNOTAVAIL);
501 connp->conn_bound_addr_v6 = v6addr;
502 if (scopeid != 0) {
503 ixa->ixa_flags |= IXAF_SCOPEID_SET;
504 ixa->ixa_scopeid = scopeid;
505 connp->conn_incoming_ifindex = scopeid;
506 } else {
507 ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
508 connp->conn_incoming_ifindex = connp->conn_bound_if;
511 connp->conn_laddr_v6 = v6addr;
512 connp->conn_saddr_v6 = v6addr;
514 bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;
516 error = tcp_bind_select_lport(tcp, &requested_port,
517 bind_to_req_port_only, cr);
518 if (error != 0) {
519 connp->conn_laddr_v6 = ipv6_all_zeros;
520 connp->conn_saddr_v6 = ipv6_all_zeros;
521 connp->conn_bound_addr_v6 = ipv6_all_zeros;
523 return (error);
527 * If the "bind_to_req_port_only" parameter is set, if the requested port
528 * number is available, return it, If not return 0
530 * If "bind_to_req_port_only" parameter is not set and
531 * If the requested port number is available, return it. If not, return
532 * the first anonymous port we happen across. If no anonymous ports are
533 * available, return 0. addr is the requested local address, if any.
535 * In either case, when succeeding update the tcp_t to record the port number
536 * and insert it in the bind hash table.
538 * Note that TCP over IPv4 and IPv6 sockets can use the same port number
539 * without setting SO_REUSEADDR. This is needed so that they
540 * can be viewed as two independent transport protocols.
542 in_port_t
543 tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
544 int reuseaddr, boolean_t quick_connect,
545 boolean_t bind_to_req_port_only, boolean_t user_specified)
547 /* number of times we have run around the loop */
548 int count = 0;
549 /* maximum number of times to run around the loop */
550 int loopmax;
551 conn_t *connp = tcp->tcp_connp;
552 tcp_stack_t *tcps = tcp->tcp_tcps;
555 * Lookup for free addresses is done in a loop and "loopmax"
556 * influences how long we spin in the loop
558 if (bind_to_req_port_only) {
560 * If the requested port is busy, don't bother to look
561 * for a new one. Setting loop maximum count to 1 has
562 * that effect.
564 loopmax = 1;
565 } else {
567 * If the requested port is busy, look for a free one
568 * in the anonymous port range.
569 * Set loopmax appropriately so that one does not look
570 * forever in the case all of the anonymous ports are in use.
572 if (connp->conn_anon_priv_bind) {
574 * loopmax =
575 * (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
577 loopmax = IPPORT_RESERVED -
578 tcps->tcps_min_anonpriv_port;
579 } else {
580 loopmax = (tcps->tcps_largest_anon_port -
581 tcps->tcps_smallest_anon_port + 1);
584 do {
585 uint16_t lport;
586 tf_t *tbf;
587 tcp_t *ltcp;
588 conn_t *lconnp;
590 lport = htons(port);
593 * Ensure that the tcp_t is not currently in the bind hash.
594 * Hold the lock on the hash bucket to ensure that
595 * the duplicate check plus the insertion is an atomic
596 * operation.
598 * This function does an inline lookup on the bind hash list
599 * Make sure that we access only members of tcp_t
600 * and that we don't look at tcp_tcp, since we are not
601 * doing a CONN_INC_REF.
603 tcp_bind_hash_remove(tcp);
604 tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(lport)];
605 mutex_enter(&tbf->tf_lock);
606 for (ltcp = tbf->tf_tcp; ltcp != NULL;
607 ltcp = ltcp->tcp_bind_hash) {
608 if (lport == ltcp->tcp_connp->conn_lport)
609 break;
612 for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
613 boolean_t not_socket;
614 boolean_t exclbind;
616 lconnp = ltcp->tcp_connp;
618 if (!IPCL_BIND_ZONE_MATCH(lconnp, connp))
619 continue;
622 * If TCP_EXCLBIND is set for either the bound or
623 * binding endpoint, the semantics of bind
624 * is changed according to the following.
626 * spec = specified address (v4 or v6)
627 * unspec = unspecified address (v4 or v6)
628 * A = specified addresses are different for endpoints
630 * bound bind to allowed
631 * -------------------------------------
632 * unspec unspec no
633 * unspec spec no
634 * spec unspec no
635 * spec spec yes if A
637 * Note:
639 * 1. Because of TLI semantics, an endpoint can go
640 * back from, say TCP_ESTABLISHED to TCPS_LISTEN or
641 * TCPS_BOUND, depending on whether it is originally
642 * a listener or not. That is why we need to check
643 * for states greater than or equal to TCPS_BOUND
644 * here.
646 * 2. Ideally, we should only check for state equals
647 * to TCPS_LISTEN. And the following check should be
648 * added.
650 * if (ltcp->tcp_state == TCPS_LISTEN ||
651 * !reuseaddr || !lconnp->conn_reuseaddr) {
652 * ...
655 * The semantics will be changed to this. If the
656 * endpoint on the list is in state not equal to
657 * TCPS_LISTEN and both endpoints have SO_REUSEADDR
658 * set, let the bind succeed.
660 * Because of (1), we cannot do that for TLI
661 * endpoints. But we can do that for socket endpoints.
662 * If in future, we can change this going back
663 * semantics, we can use the above check for TLI also.
665 not_socket = !(TCP_IS_SOCKET(ltcp) &&
666 TCP_IS_SOCKET(tcp));
667 exclbind = lconnp->conn_exclbind ||
668 connp->conn_exclbind;
670 if ((exclbind && (not_socket ||
671 ltcp->tcp_state <= TCPS_ESTABLISHED))) {
672 if (V6_OR_V4_INADDR_ANY(
673 lconnp->conn_bound_addr_v6) ||
674 V6_OR_V4_INADDR_ANY(*laddr) ||
675 IN6_ARE_ADDR_EQUAL(laddr,
676 &lconnp->conn_bound_addr_v6)) {
677 break;
679 continue;
683 * Check ipversion to allow IPv4 and IPv6 sockets to
684 * have disjoint port number spaces, if *_EXCLBIND
685 * is not set and only if the application binds to a
686 * specific port. We use the same autoassigned port
687 * number space for IPv4 and IPv6 sockets.
689 if (connp->conn_ipversion != lconnp->conn_ipversion &&
690 bind_to_req_port_only)
691 continue;
694 * Ideally, we should make sure that the source
695 * address, remote address, and remote port in the
696 * four tuple for this tcp-connection is unique.
697 * However, trying to find out the local source
698 * address would require too much code duplication
699 * with IP, since IP needs needs to have that code
700 * to support userland TCP implementations.
702 if (quick_connect &&
703 (ltcp->tcp_state > TCPS_LISTEN) &&
704 ((connp->conn_fport != lconnp->conn_fport) ||
705 !IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
706 &lconnp->conn_faddr_v6)))
707 continue;
709 if (!reuseaddr) {
711 * No socket option SO_REUSEADDR.
712 * If existing port is bound to
713 * a non-wildcard IP address
714 * and the requesting stream is
715 * bound to a distinct
716 * different IP addresses
717 * (non-wildcard, also), keep
718 * going.
720 if (!V6_OR_V4_INADDR_ANY(*laddr) &&
721 !V6_OR_V4_INADDR_ANY(
722 lconnp->conn_bound_addr_v6) &&
723 !IN6_ARE_ADDR_EQUAL(laddr,
724 &lconnp->conn_bound_addr_v6))
725 continue;
726 if (ltcp->tcp_state >= TCPS_BOUND) {
728 * This port is being used and
729 * its state is >= TCPS_BOUND,
730 * so we can't bind to it.
732 break;
734 } else {
736 * socket option SO_REUSEADDR is set on the
737 * binding tcp_t.
739 * If two streams are bound to
740 * same IP address or both addr
741 * and bound source are wildcards
742 * (INADDR_ANY), we want to stop
743 * searching.
744 * We have found a match of IP source
745 * address and source port, which is
746 * refused regardless of the
747 * SO_REUSEADDR setting, so we break.
749 if (IN6_ARE_ADDR_EQUAL(laddr,
750 &lconnp->conn_bound_addr_v6) &&
751 (ltcp->tcp_state == TCPS_LISTEN ||
752 ltcp->tcp_state == TCPS_BOUND))
753 break;
756 if (ltcp != NULL) {
757 /* The port number is busy */
758 mutex_exit(&tbf->tf_lock);
759 } else {
761 * This port is ours. Insert in fanout and mark as
762 * bound to prevent others from getting the port
763 * number.
765 tcp->tcp_state = TCPS_BOUND;
766 DTRACE_TCP6(state__change, void, NULL,
767 ip_xmit_attr_t *, connp->conn_ixa,
768 void, NULL, tcp_t *, tcp, void, NULL,
769 int32_t, TCPS_IDLE);
771 connp->conn_lport = htons(port);
773 ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
774 connp->conn_lport)] == tbf);
775 tcp_bind_hash_insert(tbf, tcp, 1);
777 mutex_exit(&tbf->tf_lock);
780 * We don't want tcp_next_port_to_try to "inherit"
781 * a port number supplied by the user in a bind.
783 if (user_specified)
784 return (port);
787 * This is the only place where tcp_next_port_to_try
788 * is updated. After the update, it may or may not
789 * be in the valid range.
791 if (!connp->conn_anon_priv_bind)
792 tcps->tcps_next_port_to_try = port + 1;
793 return (port);
796 if (connp->conn_anon_priv_bind) {
797 port = tcp_get_next_priv_port(tcp);
798 } else {
799 if (count == 0 && user_specified) {
801 * We may have to return an anonymous port. So
802 * get one to start with.
804 port =
805 tcp_update_next_port(
806 tcps->tcps_next_port_to_try,
807 tcp, B_TRUE);
808 user_specified = B_FALSE;
809 } else {
810 port = tcp_update_next_port(port + 1, tcp,
811 B_FALSE);
814 if (port == 0)
815 break;
818 * Don't let this loop run forever in the case where
819 * all of the anonymous ports are in use.
821 } while (++count < loopmax);
822 return (0);