4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
26 * IP PACKET CLASSIFIER
28 * The IP packet classifier provides mapping between IP packets and persistent
29 * connection state for connection-oriented protocols. It also provides
30 * interface for managing connection states.
32 * The connection state is kept in conn_t data structure and contains, among
35 * o local/remote address and ports
36 * o Transport protocol
37 * o squeue for the connection (for TCP only)
40 * o hash table linkage
41 * o interface/ire information
44 * o send and receive functions.
47 * Connections use a reference counting scheme. They are freed when the
48 * reference counter drops to zero. A reference is incremented when connection
49 * is placed in a list or table, when incoming packet for the connection arrives
50 * and when connection is processed via squeue (squeue processing may be
51 * asynchronous and the reference protects the connection from being destroyed
52 * before its processing is finished).
54 * conn_recv is used to pass up packets to the ULP.
55 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
56 * a listener, and changes to tcp_input_listener as the listener has picked a
57 * good squeue. For other cases it is set to tcp_input_data.
59 * conn_recvicmp is used to pass up ICMP errors to the ULP.
61 * Classifier uses several hash tables:
63 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
64 * ipcl_bind_fanout: contains all connections in BOUND state
65 * ipcl_proto_fanout: IPv4 protocol fanout
66 * ipcl_proto_fanout_v6: IPv6 protocol fanout
67 * ipcl_udp_fanout: contains all UDP connections
68 * ipcl_iptun_fanout: contains all IP tunnel connections
69 * ipcl_globalhash_fanout: contains all connections
71 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
72 * which need to view all existing connections.
74 * All tables are protected by per-bucket locks. When both per-bucket lock and
75 * connection lock need to be held, the per-bucket lock should be acquired
76 * first, followed by the connection lock.
78 * All functions doing search in one of these tables increment a reference
79 * counter on the connection found (if any). This reference should be dropped
80 * when the caller has finished processing the connection.
89 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
90 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
92 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
93 * it can't find any associated connection. If the connection is found, its
94 * reference counter is incremented.
96 * mp: mblock, containing packet header. The full header should fit
97 * into a single mblock. It should also contain at least full IP
98 * and TCP or UDP header.
100 * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
102 * hdr_len: The size of IP header. It is used to find TCP or UDP header in
105 * ira->ira_zoneid: The zone in which the returned connection must be; the
106 * zoneid corresponding to the ire_zoneid on the IRE located for
107 * the packet's destination address.
109 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
110 * IRAF_TX_SHARED_ADDR flags
112 * For TCP connections, the lookup order is as follows:
113 * 5-tuple {src, dst, protocol, local port, remote port}
114 * lookup in ipcl_conn_fanout table.
115 * 3-tuple {dst, remote port, protocol} lookup in
116 * ipcl_bind_fanout table.
118 * For UDP connections, a 5-tuple {src, dst, protocol, local port,
119 * remote port} lookup is done on ipcl_udp_fanout. Note that,
120 * these interfaces do not handle cases where a packets belongs
121 * to multiple UDP clients, which is handled in IP itself.
123 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
124 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
127 * Lookup routine to find a exact match for {src, dst, local port,
128 * remote port) for TCP connections in ipcl_conn_fanout. The address and
129 * ports are read from the IP and TCP header respectively.
131 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
133 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
136 * Lookup routine to find a listener with the tuple {lport, laddr,
137 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
138 * parameter interface index is also compared.
140 * void ipcl_walk(func, arg, ip_stack)
142 * Apply 'func' to every connection available. The 'func' is called as
143 * (*func)(connp, arg). The walk is non-atomic so connections may be
144 * created and destroyed during the walk. The CONN_CONDEMNED and
145 * CONN_INCIPIENT flags ensure that connections which are newly created
146 * or being destroyed are not selected by the walker.
151 * int ipcl_conn_insert(connp);
152 * int ipcl_conn_insert_v4(connp);
153 * int ipcl_conn_insert_v6(connp);
155 * Insert 'connp' in the ipcl_conn_fanout.
157 * connp conn_t to be inserted
160 * 0 if connp was inserted
161 * EADDRINUSE if the connection with the same tuple
164 * int ipcl_bind_insert(connp);
165 * int ipcl_bind_insert_v4(connp);
166 * int ipcl_bind_insert_v6(connp);
168 * Insert 'connp' in ipcl_bind_fanout.
170 * connp conn_t to be inserted
173 * void ipcl_hash_remove(connp);
175 * Removes the 'connp' from the connection fanout table.
177 * Connection Creation/Destruction
178 * -------------------------------
180 * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
182 * Creates a new conn based on the type flag, inserts it into
185 * type: This flag determines the type of conn_t which needs to be
186 * created i.e., which kmem_cache it comes from.
187 * IPCL_TCPCONN indicates a TCP connection
188 * IPCL_SCTPCONN indicates a SCTP connection
189 * IPCL_UDPCONN indicates a UDP conn_t.
190 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
191 * IPCL_RTSCONN indicates a RTS conn_t.
192 * IPCL_IPCCONN indicates all other connections.
194 * void ipcl_conn_destroy(connp)
196 * Destroys the connection state, removes it from the global
197 * connection hash table and frees its memory.
200 #include <sys/types.h>
201 #include <sys/stream.h>
202 #include <sys/stropts.h>
203 #include <sys/sysmacros.h>
204 #include <sys/strsubr.h>
205 #include <sys/strsun.h>
206 #define _SUN_TPI_VERSION 2
208 #include <sys/cmn_err.h>
209 #include <sys/debug.h>
211 #include <sys/systm.h>
212 #include <sys/param.h>
213 #include <sys/kmem.h>
214 #include <sys/isa_defs.h>
215 #include <inet/common.h>
216 #include <netinet/ip6.h>
217 #include <netinet/icmp6.h>
220 #include <inet/ip_if.h>
221 #include <inet/ip_ire.h>
222 #include <inet/ip6.h>
223 #include <inet/ip_ndp.h>
224 #include <inet/ip_impl.h>
225 #include <inet/udp_impl.h>
226 #include <inet/sctp_ip.h>
227 #include <inet/sctp/sctp_impl.h>
228 #include <inet/rawip_impl.h>
229 #include <inet/rts_impl.h>
230 #include <inet/iptun/iptun_impl.h>
232 #include <sys/cpuvar.h>
234 #include <inet/ipclassifier.h>
235 #include <inet/tcp.h>
236 #include <inet/ipsec_impl.h>
238 #include <sys/sockio.h>
240 /* Old value for compatibility. Setable in /etc/system */
241 uint_t tcp_conn_hash_size
= 0;
243 /* New value. Zero means choose automatically. Setable in /etc/system */
244 uint_t ipcl_conn_hash_size
= 0;
245 uint_t ipcl_conn_hash_memfactor
= 8192;
246 uint_t ipcl_conn_hash_maxsize
= 82500;
248 /* bind/udp fanout table size */
249 uint_t ipcl_bind_fanout_size
= 512;
250 uint_t ipcl_udp_fanout_size
= 16384;
252 /* Raw socket fanout size. Must be a power of 2. */
253 uint_t ipcl_raw_fanout_size
= 256;
256 * The IPCL_IPTUN_HASH() function works best with a prime table size. We
257 * expect that most large deployments would have hundreds of tunnels, and
258 * thousands in the extreme case.
260 uint_t ipcl_iptun_fanout_size
= 6143;
263 * Power of 2^N Primes useful for hashing for N of 0-28,
264 * these primes are the nearest prime <= 2^N - 2^(N-2).
267 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
268 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
269 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
270 50331599, 100663291, 201326557, 0}
273 * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
274 * are aligned on cache lines.
276 typedef union itc_s
{
278 char itcu_filler
[CACHE_ALIGN(conn_s
)];
281 struct kmem_cache
*tcp_conn_cache
;
282 struct kmem_cache
*ip_conn_cache
;
283 extern struct kmem_cache
*sctp_conn_cache
;
284 struct kmem_cache
*udp_conn_cache
;
285 struct kmem_cache
*rawip_conn_cache
;
286 struct kmem_cache
*rts_conn_cache
;
288 extern void tcp_timermp_free(tcp_t
*);
289 extern mblk_t
*tcp_timermp_alloc(int);
291 static int ip_conn_constructor(void *, void *, int);
292 static void ip_conn_destructor(void *, void *);
294 static int tcp_conn_constructor(void *, void *, int);
295 static void tcp_conn_destructor(void *, void *);
297 static int udp_conn_constructor(void *, void *, int);
298 static void udp_conn_destructor(void *, void *);
300 static int rawip_conn_constructor(void *, void *, int);
301 static void rawip_conn_destructor(void *, void *);
303 static int rts_conn_constructor(void *, void *, int);
304 static void rts_conn_destructor(void *, void *);
307 * Global (for all stack instances) init routine
312 ip_conn_cache
= kmem_cache_create("ip_conn_cache",
313 sizeof (conn_t
), CACHE_ALIGN_SIZE
,
314 ip_conn_constructor
, ip_conn_destructor
,
315 NULL
, NULL
, NULL
, 0);
317 tcp_conn_cache
= kmem_cache_create("tcp_conn_cache",
318 sizeof (itc_t
) + sizeof (tcp_t
), CACHE_ALIGN_SIZE
,
319 tcp_conn_constructor
, tcp_conn_destructor
,
320 tcp_conn_reclaim
, NULL
, NULL
, 0);
322 udp_conn_cache
= kmem_cache_create("udp_conn_cache",
323 sizeof (itc_t
) + sizeof (udp_t
), CACHE_ALIGN_SIZE
,
324 udp_conn_constructor
, udp_conn_destructor
,
325 NULL
, NULL
, NULL
, 0);
327 rawip_conn_cache
= kmem_cache_create("rawip_conn_cache",
328 sizeof (itc_t
) + sizeof (icmp_t
), CACHE_ALIGN_SIZE
,
329 rawip_conn_constructor
, rawip_conn_destructor
,
330 NULL
, NULL
, NULL
, 0);
332 rts_conn_cache
= kmem_cache_create("rts_conn_cache",
333 sizeof (itc_t
) + sizeof (rts_t
), CACHE_ALIGN_SIZE
,
334 rts_conn_constructor
, rts_conn_destructor
,
335 NULL
, NULL
, NULL
, 0);
339 * ipclassifier intialization routine, sets up hash tables.
342 ipcl_init(ip_stack_t
*ipst
)
345 int sizes
[] = P2Ps();
348 * Calculate size of conn fanout table from /etc/system settings
350 if (ipcl_conn_hash_size
!= 0) {
351 ipst
->ips_ipcl_conn_fanout_size
= ipcl_conn_hash_size
;
352 } else if (tcp_conn_hash_size
!= 0) {
353 ipst
->ips_ipcl_conn_fanout_size
= tcp_conn_hash_size
;
355 extern pgcnt_t freemem
;
357 ipst
->ips_ipcl_conn_fanout_size
=
358 (freemem
* PAGESIZE
) / ipcl_conn_hash_memfactor
;
360 if (ipst
->ips_ipcl_conn_fanout_size
> ipcl_conn_hash_maxsize
) {
361 ipst
->ips_ipcl_conn_fanout_size
=
362 ipcl_conn_hash_maxsize
;
366 for (i
= 9; i
< sizeof (sizes
) / sizeof (*sizes
) - 1; i
++) {
367 if (sizes
[i
] >= ipst
->ips_ipcl_conn_fanout_size
) {
371 if ((ipst
->ips_ipcl_conn_fanout_size
= sizes
[i
]) == 0) {
372 /* Out of range, use the 2^16 value */
373 ipst
->ips_ipcl_conn_fanout_size
= sizes
[16];
376 /* Take values from /etc/system */
377 ipst
->ips_ipcl_bind_fanout_size
= ipcl_bind_fanout_size
;
378 ipst
->ips_ipcl_udp_fanout_size
= ipcl_udp_fanout_size
;
379 ipst
->ips_ipcl_raw_fanout_size
= ipcl_raw_fanout_size
;
380 ipst
->ips_ipcl_iptun_fanout_size
= ipcl_iptun_fanout_size
;
382 ASSERT(ipst
->ips_ipcl_conn_fanout
== NULL
);
384 ipst
->ips_ipcl_conn_fanout
= kmem_zalloc(
385 ipst
->ips_ipcl_conn_fanout_size
* sizeof (connf_t
), KM_SLEEP
);
387 for (i
= 0; i
< ipst
->ips_ipcl_conn_fanout_size
; i
++) {
388 mutex_init(&ipst
->ips_ipcl_conn_fanout
[i
].connf_lock
, NULL
,
389 MUTEX_DEFAULT
, NULL
);
392 ipst
->ips_ipcl_bind_fanout
= kmem_zalloc(
393 ipst
->ips_ipcl_bind_fanout_size
* sizeof (connf_t
), KM_SLEEP
);
395 for (i
= 0; i
< ipst
->ips_ipcl_bind_fanout_size
; i
++) {
396 mutex_init(&ipst
->ips_ipcl_bind_fanout
[i
].connf_lock
, NULL
,
397 MUTEX_DEFAULT
, NULL
);
400 ipst
->ips_ipcl_proto_fanout_v4
= kmem_zalloc(IPPROTO_MAX
*
401 sizeof (connf_t
), KM_SLEEP
);
402 for (i
= 0; i
< IPPROTO_MAX
; i
++) {
403 mutex_init(&ipst
->ips_ipcl_proto_fanout_v4
[i
].connf_lock
, NULL
,
404 MUTEX_DEFAULT
, NULL
);
407 ipst
->ips_ipcl_proto_fanout_v6
= kmem_zalloc(IPPROTO_MAX
*
408 sizeof (connf_t
), KM_SLEEP
);
409 for (i
= 0; i
< IPPROTO_MAX
; i
++) {
410 mutex_init(&ipst
->ips_ipcl_proto_fanout_v6
[i
].connf_lock
, NULL
,
411 MUTEX_DEFAULT
, NULL
);
414 ipst
->ips_rts_clients
= kmem_zalloc(sizeof (connf_t
), KM_SLEEP
);
415 mutex_init(&ipst
->ips_rts_clients
->connf_lock
,
416 NULL
, MUTEX_DEFAULT
, NULL
);
418 ipst
->ips_ipcl_udp_fanout
= kmem_zalloc(
419 ipst
->ips_ipcl_udp_fanout_size
* sizeof (connf_t
), KM_SLEEP
);
420 for (i
= 0; i
< ipst
->ips_ipcl_udp_fanout_size
; i
++) {
421 mutex_init(&ipst
->ips_ipcl_udp_fanout
[i
].connf_lock
, NULL
,
422 MUTEX_DEFAULT
, NULL
);
425 ipst
->ips_ipcl_iptun_fanout
= kmem_zalloc(
426 ipst
->ips_ipcl_iptun_fanout_size
* sizeof (connf_t
), KM_SLEEP
);
427 for (i
= 0; i
< ipst
->ips_ipcl_iptun_fanout_size
; i
++) {
428 mutex_init(&ipst
->ips_ipcl_iptun_fanout
[i
].connf_lock
, NULL
,
429 MUTEX_DEFAULT
, NULL
);
432 ipst
->ips_ipcl_raw_fanout
= kmem_zalloc(
433 ipst
->ips_ipcl_raw_fanout_size
* sizeof (connf_t
), KM_SLEEP
);
434 for (i
= 0; i
< ipst
->ips_ipcl_raw_fanout_size
; i
++) {
435 mutex_init(&ipst
->ips_ipcl_raw_fanout
[i
].connf_lock
, NULL
,
436 MUTEX_DEFAULT
, NULL
);
439 ipst
->ips_ipcl_globalhash_fanout
= kmem_zalloc(
440 sizeof (connf_t
) * CONN_G_HASH_SIZE
, KM_SLEEP
);
441 for (i
= 0; i
< CONN_G_HASH_SIZE
; i
++) {
442 mutex_init(&ipst
->ips_ipcl_globalhash_fanout
[i
].connf_lock
,
443 NULL
, MUTEX_DEFAULT
, NULL
);
450 kmem_cache_destroy(ip_conn_cache
);
451 kmem_cache_destroy(tcp_conn_cache
);
452 kmem_cache_destroy(udp_conn_cache
);
453 kmem_cache_destroy(rawip_conn_cache
);
454 kmem_cache_destroy(rts_conn_cache
);
458 * All user-level and kernel use of the stack must be gone
462 ipcl_destroy(ip_stack_t
*ipst
)
466 for (i
= 0; i
< ipst
->ips_ipcl_conn_fanout_size
; i
++) {
467 ASSERT(ipst
->ips_ipcl_conn_fanout
[i
].connf_head
== NULL
);
468 mutex_destroy(&ipst
->ips_ipcl_conn_fanout
[i
].connf_lock
);
470 kmem_free(ipst
->ips_ipcl_conn_fanout
, ipst
->ips_ipcl_conn_fanout_size
*
472 ipst
->ips_ipcl_conn_fanout
= NULL
;
474 for (i
= 0; i
< ipst
->ips_ipcl_bind_fanout_size
; i
++) {
475 ASSERT(ipst
->ips_ipcl_bind_fanout
[i
].connf_head
== NULL
);
476 mutex_destroy(&ipst
->ips_ipcl_bind_fanout
[i
].connf_lock
);
478 kmem_free(ipst
->ips_ipcl_bind_fanout
, ipst
->ips_ipcl_bind_fanout_size
*
480 ipst
->ips_ipcl_bind_fanout
= NULL
;
482 for (i
= 0; i
< IPPROTO_MAX
; i
++) {
483 ASSERT(ipst
->ips_ipcl_proto_fanout_v4
[i
].connf_head
== NULL
);
484 mutex_destroy(&ipst
->ips_ipcl_proto_fanout_v4
[i
].connf_lock
);
486 kmem_free(ipst
->ips_ipcl_proto_fanout_v4
,
487 IPPROTO_MAX
* sizeof (connf_t
));
488 ipst
->ips_ipcl_proto_fanout_v4
= NULL
;
490 for (i
= 0; i
< IPPROTO_MAX
; i
++) {
491 ASSERT(ipst
->ips_ipcl_proto_fanout_v6
[i
].connf_head
== NULL
);
492 mutex_destroy(&ipst
->ips_ipcl_proto_fanout_v6
[i
].connf_lock
);
494 kmem_free(ipst
->ips_ipcl_proto_fanout_v6
,
495 IPPROTO_MAX
* sizeof (connf_t
));
496 ipst
->ips_ipcl_proto_fanout_v6
= NULL
;
498 for (i
= 0; i
< ipst
->ips_ipcl_udp_fanout_size
; i
++) {
499 ASSERT(ipst
->ips_ipcl_udp_fanout
[i
].connf_head
== NULL
);
500 mutex_destroy(&ipst
->ips_ipcl_udp_fanout
[i
].connf_lock
);
502 kmem_free(ipst
->ips_ipcl_udp_fanout
, ipst
->ips_ipcl_udp_fanout_size
*
504 ipst
->ips_ipcl_udp_fanout
= NULL
;
506 for (i
= 0; i
< ipst
->ips_ipcl_iptun_fanout_size
; i
++) {
507 ASSERT(ipst
->ips_ipcl_iptun_fanout
[i
].connf_head
== NULL
);
508 mutex_destroy(&ipst
->ips_ipcl_iptun_fanout
[i
].connf_lock
);
510 kmem_free(ipst
->ips_ipcl_iptun_fanout
,
511 ipst
->ips_ipcl_iptun_fanout_size
* sizeof (connf_t
));
512 ipst
->ips_ipcl_iptun_fanout
= NULL
;
514 for (i
= 0; i
< ipst
->ips_ipcl_raw_fanout_size
; i
++) {
515 ASSERT(ipst
->ips_ipcl_raw_fanout
[i
].connf_head
== NULL
);
516 mutex_destroy(&ipst
->ips_ipcl_raw_fanout
[i
].connf_lock
);
518 kmem_free(ipst
->ips_ipcl_raw_fanout
, ipst
->ips_ipcl_raw_fanout_size
*
520 ipst
->ips_ipcl_raw_fanout
= NULL
;
522 for (i
= 0; i
< CONN_G_HASH_SIZE
; i
++) {
523 ASSERT(ipst
->ips_ipcl_globalhash_fanout
[i
].connf_head
== NULL
);
524 mutex_destroy(&ipst
->ips_ipcl_globalhash_fanout
[i
].connf_lock
);
526 kmem_free(ipst
->ips_ipcl_globalhash_fanout
,
527 sizeof (connf_t
) * CONN_G_HASH_SIZE
);
528 ipst
->ips_ipcl_globalhash_fanout
= NULL
;
530 ASSERT(ipst
->ips_rts_clients
->connf_head
== NULL
);
531 mutex_destroy(&ipst
->ips_rts_clients
->connf_lock
);
532 kmem_free(ipst
->ips_rts_clients
, sizeof (connf_t
));
533 ipst
->ips_rts_clients
= NULL
;
537 * conn creation routine. initialize the conn, sets the reference
538 * and inserts it in the global hash table.
541 ipcl_conn_create(uint32_t type
, int sleep
, netstack_t
*ns
)
544 struct kmem_cache
*conn_cache
;
548 if ((connp
= kmem_cache_alloc(sctp_conn_cache
, sleep
)) == NULL
)
550 sctp_conn_init(connp
);
552 connp
->conn_netstack
= ns
;
553 connp
->conn_ixa
->ixa_ipst
= ns
->netstack_ip
;
554 connp
->conn_ixa
->ixa_conn_id
= (long)connp
;
555 ipcl_globalhash_insert(connp
);
559 conn_cache
= tcp_conn_cache
;
563 conn_cache
= udp_conn_cache
;
567 conn_cache
= rawip_conn_cache
;
571 conn_cache
= rts_conn_cache
;
575 conn_cache
= ip_conn_cache
;
583 if ((connp
= kmem_cache_alloc(conn_cache
, sleep
)) == NULL
)
588 connp
->conn_netstack
= ns
;
589 connp
->conn_ixa
->ixa_ipst
= ns
->netstack_ip
;
590 connp
->conn_ixa
->ixa_conn_id
= (long)connp
;
591 ipcl_globalhash_insert(connp
);
596 ipcl_conn_destroy(conn_t
*connp
)
599 netstack_t
*ns
= connp
->conn_netstack
;
601 ASSERT(!MUTEX_HELD(&connp
->conn_lock
));
602 ASSERT(connp
->conn_ref
== 0);
603 ASSERT(connp
->conn_ioctlref
== 0);
605 DTRACE_PROBE1(conn__destroy
, conn_t
*, connp
);
607 if (connp
->conn_cred
!= NULL
) {
608 crfree(connp
->conn_cred
);
609 connp
->conn_cred
= NULL
;
610 /* ixa_cred done in ipcl_conn_cleanup below */
613 if (connp
->conn_ht_iphc
!= NULL
) {
614 kmem_free(connp
->conn_ht_iphc
, connp
->conn_ht_iphc_allocated
);
615 connp
->conn_ht_iphc
= NULL
;
616 connp
->conn_ht_iphc_allocated
= 0;
617 connp
->conn_ht_iphc_len
= 0;
618 connp
->conn_ht_ulp
= NULL
;
619 connp
->conn_ht_ulp_len
= 0;
621 ip_pkt_free(&connp
->conn_xmit_ipp
);
623 ipcl_globalhash_remove(connp
);
625 if (connp
->conn_latch
!= NULL
) {
626 IPLATCH_REFRELE(connp
->conn_latch
);
627 connp
->conn_latch
= NULL
;
629 if (connp
->conn_latch_in_policy
!= NULL
) {
630 IPPOL_REFRELE(connp
->conn_latch_in_policy
);
631 connp
->conn_latch_in_policy
= NULL
;
633 if (connp
->conn_latch_in_action
!= NULL
) {
634 IPACT_REFRELE(connp
->conn_latch_in_action
);
635 connp
->conn_latch_in_action
= NULL
;
637 if (connp
->conn_policy
!= NULL
) {
638 IPPH_REFRELE(connp
->conn_policy
, ns
);
639 connp
->conn_policy
= NULL
;
642 if (connp
->conn_ipsec_opt_mp
!= NULL
) {
643 freemsg(connp
->conn_ipsec_opt_mp
);
644 connp
->conn_ipsec_opt_mp
= NULL
;
647 if (connp
->conn_flags
& IPCL_TCPCONN
) {
648 tcp_t
*tcp
= connp
->conn_tcp
;
651 mp
= tcp
->tcp_timercache
;
653 tcp
->tcp_tcps
= NULL
;
656 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
659 if (tcp
->tcp_rsrv_mp
!= NULL
) {
660 freeb(tcp
->tcp_rsrv_mp
);
661 tcp
->tcp_rsrv_mp
= NULL
;
662 mutex_destroy(&tcp
->tcp_rsrv_mp_lock
);
665 ipcl_conn_cleanup(connp
);
666 connp
->conn_flags
= IPCL_TCPCONN
;
668 ASSERT(tcp
->tcp_tcps
== NULL
);
669 connp
->conn_netstack
= NULL
;
670 connp
->conn_ixa
->ixa_ipst
= NULL
;
674 bzero(tcp
, sizeof (tcp_t
));
676 tcp
->tcp_timercache
= mp
;
677 tcp
->tcp_connp
= connp
;
678 kmem_cache_free(tcp_conn_cache
, connp
);
682 if (connp
->conn_flags
& IPCL_SCTPCONN
) {
688 ipcl_conn_cleanup(connp
);
690 connp
->conn_netstack
= NULL
;
691 connp
->conn_ixa
->ixa_ipst
= NULL
;
695 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
696 if (connp
->conn_flags
& IPCL_UDPCONN
) {
697 connp
->conn_flags
= IPCL_UDPCONN
;
698 kmem_cache_free(udp_conn_cache
, connp
);
699 } else if (connp
->conn_flags
& IPCL_RAWIPCONN
) {
700 connp
->conn_flags
= IPCL_RAWIPCONN
;
701 connp
->conn_proto
= IPPROTO_ICMP
;
702 connp
->conn_ixa
->ixa_protocol
= connp
->conn_proto
;
703 kmem_cache_free(rawip_conn_cache
, connp
);
704 } else if (connp
->conn_flags
& IPCL_RTSCONN
) {
705 connp
->conn_flags
= IPCL_RTSCONN
;
706 kmem_cache_free(rts_conn_cache
, connp
);
708 connp
->conn_flags
= IPCL_IPCCONN
;
709 ASSERT(connp
->conn_flags
& IPCL_IPCCONN
);
710 ASSERT(connp
->conn_priv
== NULL
);
711 kmem_cache_free(ip_conn_cache
, connp
);
716 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
717 * which table the conn belonged to). So for debugging we can see which hash
718 * table this connection was in.
720 #define IPCL_HASH_REMOVE(connp) { \
721 connf_t *connfp = (connp)->conn_fanout; \
722 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
723 if (connfp != NULL) { \
724 mutex_enter(&connfp->connf_lock); \
725 if ((connp)->conn_next != NULL) \
726 (connp)->conn_next->conn_prev = \
727 (connp)->conn_prev; \
728 if ((connp)->conn_prev != NULL) \
729 (connp)->conn_prev->conn_next = \
730 (connp)->conn_next; \
732 connfp->connf_head = (connp)->conn_next; \
733 (connp)->conn_fanout = NULL; \
734 (connp)->conn_next = NULL; \
735 (connp)->conn_prev = NULL; \
736 (connp)->conn_flags |= IPCL_REMOVED; \
737 CONN_DEC_REF((connp)); \
738 mutex_exit(&connfp->connf_lock); \
743 ipcl_hash_remove(conn_t
*connp
)
745 uint8_t protocol
= connp
->conn_proto
;
747 IPCL_HASH_REMOVE(connp
);
748 if (protocol
== IPPROTO_RSVP
)
749 ill_set_inputfn_all(connp
->conn_netstack
->netstack_ip
);
753 * The whole purpose of this function is allow removal of
754 * a conn_t from the connected hash for timewait reclaim.
755 * This is essentially a TW reclaim fastpath where timewait
756 * collector checks under fanout lock (so no one else can
757 * get access to the conn_t) that refcnt is 2 i.e. one for
758 * TCP and one for the classifier hash list. If ref count
759 * is indeed 2, we can just remove the conn under lock and
760 * avoid cleaning up the conn under squeue. This gives us
761 * improved performance.
764 ipcl_hash_remove_locked(conn_t
*connp
, connf_t
*connfp
)
766 ASSERT(MUTEX_HELD(&connfp
->connf_lock
));
767 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
769 if ((connp
)->conn_next
!= NULL
) {
770 (connp
)->conn_next
->conn_prev
= (connp
)->conn_prev
;
772 if ((connp
)->conn_prev
!= NULL
) {
773 (connp
)->conn_prev
->conn_next
= (connp
)->conn_next
;
775 connfp
->connf_head
= (connp
)->conn_next
;
777 (connp
)->conn_fanout
= NULL
;
778 (connp
)->conn_next
= NULL
;
779 (connp
)->conn_prev
= NULL
;
780 (connp
)->conn_flags
|= IPCL_REMOVED
;
781 ASSERT((connp
)->conn_ref
== 2);
785 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
786 ASSERT((connp)->conn_fanout == NULL); \
787 ASSERT((connp)->conn_next == NULL); \
788 ASSERT((connp)->conn_prev == NULL); \
789 if ((connfp)->connf_head != NULL) { \
790 (connfp)->connf_head->conn_prev = (connp); \
791 (connp)->conn_next = (connfp)->connf_head; \
793 (connp)->conn_fanout = (connfp); \
794 (connfp)->connf_head = (connp); \
795 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
797 CONN_INC_REF(connp); \
800 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
801 IPCL_HASH_REMOVE((connp)); \
802 mutex_enter(&(connfp)->connf_lock); \
803 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
804 mutex_exit(&(connfp)->connf_lock); \
807 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
808 conn_t *pconnp = NULL, *nconnp; \
809 IPCL_HASH_REMOVE((connp)); \
810 mutex_enter(&(connfp)->connf_lock); \
811 nconnp = (connfp)->connf_head; \
812 while (nconnp != NULL && \
813 !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
815 nconnp = nconnp->conn_next; \
817 if (pconnp != NULL) { \
818 pconnp->conn_next = (connp); \
819 (connp)->conn_prev = pconnp; \
821 (connfp)->connf_head = (connp); \
823 if (nconnp != NULL) { \
824 (connp)->conn_next = nconnp; \
825 nconnp->conn_prev = (connp); \
827 (connp)->conn_fanout = (connfp); \
828 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
830 CONN_INC_REF(connp); \
831 mutex_exit(&(connfp)->connf_lock); \
834 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
835 conn_t **list, *prev, *next; \
836 boolean_t isv4mapped = \
837 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
838 IPCL_HASH_REMOVE((connp)); \
839 mutex_enter(&(connfp)->connf_lock); \
840 list = &(connfp)->connf_head; \
842 while ((next = *list) != NULL) { \
844 IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
845 connp->conn_zoneid == next->conn_zoneid) { \
846 (connp)->conn_next = next; \
848 prev = next->conn_prev; \
849 next->conn_prev = (connp); \
852 list = &next->conn_next; \
855 (connp)->conn_prev = prev; \
857 (connp)->conn_fanout = (connfp); \
858 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
860 CONN_INC_REF((connp)); \
861 mutex_exit(&(connfp)->connf_lock); \
865 ipcl_hash_insert_wildcard(connf_t
*connfp
, conn_t
*connp
)
867 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
871 * Because the classifier is used to classify inbound packets, the destination
872 * address is meant to be our local tunnel address (tunnel source), and the
873 * source the remote tunnel address (tunnel destination).
875 * Note that conn_proto can't be used for fanout since the upper protocol
876 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
879 ipcl_iptun_classify_v4(ipaddr_t
*src
, ipaddr_t
*dst
, ip_stack_t
*ipst
)
884 /* first look for IPv4 tunnel links */
885 connfp
= &ipst
->ips_ipcl_iptun_fanout
[IPCL_IPTUN_HASH(*dst
, *src
)];
886 mutex_enter(&connfp
->connf_lock
);
887 for (connp
= connfp
->connf_head
; connp
!= NULL
;
888 connp
= connp
->conn_next
) {
889 if (IPCL_IPTUN_MATCH(connp
, *dst
, *src
))
895 mutex_exit(&connfp
->connf_lock
);
897 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
898 connfp
= &ipst
->ips_ipcl_iptun_fanout
[IPCL_IPTUN_HASH(*dst
,
900 mutex_enter(&connfp
->connf_lock
);
901 for (connp
= connfp
->connf_head
; connp
!= NULL
;
902 connp
= connp
->conn_next
) {
903 if (IPCL_IPTUN_MATCH(connp
, *dst
, INADDR_ANY
))
909 mutex_exit(&connfp
->connf_lock
);
914 ipcl_iptun_classify_v6(in6_addr_t
*src
, in6_addr_t
*dst
, ip_stack_t
*ipst
)
919 /* Look for an IPv6 tunnel link */
920 connfp
= &ipst
->ips_ipcl_iptun_fanout
[IPCL_IPTUN_HASH_V6(dst
, src
)];
921 mutex_enter(&connfp
->connf_lock
);
922 for (connp
= connfp
->connf_head
; connp
!= NULL
;
923 connp
= connp
->conn_next
) {
924 if (IPCL_IPTUN_MATCH_V6(connp
, dst
, src
)) {
929 mutex_exit(&connfp
->connf_lock
);
934 * This function is used only for inserting SCTP raw socket now.
935 * This may change later.
937 * Note that only one raw socket can be bound to a port. The param
938 * lport is in network byte order.
941 ipcl_sctp_hash_insert(conn_t
*connp
, in_port_t lport
)
945 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
947 connfp
= &ipst
->ips_ipcl_raw_fanout
[IPCL_RAW_HASH(ntohs(lport
), ipst
)];
949 /* Check for existing raw socket already bound to the port. */
950 mutex_enter(&connfp
->connf_lock
);
951 for (oconnp
= connfp
->connf_head
; oconnp
!= NULL
;
952 oconnp
= oconnp
->conn_next
) {
953 if (oconnp
->conn_lport
== lport
&&
954 oconnp
->conn_zoneid
== connp
->conn_zoneid
&&
955 oconnp
->conn_family
== connp
->conn_family
&&
956 ((IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_laddr_v6
) ||
957 IN6_IS_ADDR_UNSPECIFIED(&oconnp
->conn_laddr_v6
) ||
958 IN6_IS_ADDR_V4MAPPED_ANY(&connp
->conn_laddr_v6
) ||
959 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp
->conn_laddr_v6
)) ||
960 IN6_ARE_ADDR_EQUAL(&oconnp
->conn_laddr_v6
,
961 &connp
->conn_laddr_v6
))) {
965 mutex_exit(&connfp
->connf_lock
);
967 return (EADDRNOTAVAIL
);
969 if (IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_faddr_v6
) ||
970 IN6_IS_ADDR_V4MAPPED_ANY(&connp
->conn_faddr_v6
)) {
971 if (IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_laddr_v6
) ||
972 IN6_IS_ADDR_V4MAPPED_ANY(&connp
->conn_laddr_v6
)) {
973 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
975 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
978 IPCL_HASH_INSERT_CONNECTED(connfp
, connp
);
984 ipcl_iptun_hash_insert(conn_t
*connp
, ip_stack_t
*ipst
)
988 ipaddr_t laddr
= connp
->conn_laddr_v4
;
989 ipaddr_t faddr
= connp
->conn_faddr_v4
;
991 connfp
= &ipst
->ips_ipcl_iptun_fanout
[IPCL_IPTUN_HASH(laddr
, faddr
)];
992 mutex_enter(&connfp
->connf_lock
);
993 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
994 tconnp
= tconnp
->conn_next
) {
995 if (IPCL_IPTUN_MATCH(tconnp
, laddr
, faddr
)) {
996 /* A tunnel is already bound to these addresses. */
997 mutex_exit(&connfp
->connf_lock
);
1001 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp
, connp
);
1002 mutex_exit(&connfp
->connf_lock
);
1007 ipcl_iptun_hash_insert_v6(conn_t
*connp
, ip_stack_t
*ipst
)
1011 in6_addr_t
*laddr
= &connp
->conn_laddr_v6
;
1012 in6_addr_t
*faddr
= &connp
->conn_faddr_v6
;
1014 connfp
= &ipst
->ips_ipcl_iptun_fanout
[IPCL_IPTUN_HASH_V6(laddr
, faddr
)];
1015 mutex_enter(&connfp
->connf_lock
);
1016 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
1017 tconnp
= tconnp
->conn_next
) {
1018 if (IPCL_IPTUN_MATCH_V6(tconnp
, laddr
, faddr
)) {
1019 /* A tunnel is already bound to these addresses. */
1020 mutex_exit(&connfp
->connf_lock
);
1021 return (EADDRINUSE
);
1024 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp
, connp
);
1025 mutex_exit(&connfp
->connf_lock
);
1030 * (v4, v6) bind hash insertion routines
1031 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1035 ipcl_bind_insert(conn_t
*connp
)
1037 if (connp
->conn_ipversion
== IPV6_VERSION
)
1038 return (ipcl_bind_insert_v6(connp
));
1040 return (ipcl_bind_insert_v4(connp
));
1044 ipcl_bind_insert_v4(conn_t
*connp
)
1048 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
1049 uint16_t lport
= connp
->conn_lport
;
1050 uint8_t protocol
= connp
->conn_proto
;
1052 if (IPCL_IS_IPTUN(connp
))
1053 return (ipcl_iptun_hash_insert(connp
, ipst
));
1058 if (protocol
== IPPROTO_UDP
) {
1059 connfp
= &ipst
->ips_ipcl_udp_fanout
[
1060 IPCL_UDP_HASH(lport
, ipst
)];
1062 connfp
= &ipst
->ips_ipcl_proto_fanout_v4
[protocol
];
1065 if (connp
->conn_faddr_v4
!= INADDR_ANY
) {
1066 IPCL_HASH_INSERT_CONNECTED(connfp
, connp
);
1067 } else if (connp
->conn_laddr_v4
!= INADDR_ANY
) {
1068 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
1070 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
1072 if (protocol
== IPPROTO_RSVP
)
1073 ill_set_inputfn_all(ipst
);
1077 /* Insert it in the Bind Hash */
1078 ASSERT(connp
->conn_zoneid
!= ALL_ZONES
);
1079 connfp
= &ipst
->ips_ipcl_bind_fanout
[
1080 IPCL_BIND_HASH(lport
, ipst
)];
1081 if (connp
->conn_laddr_v4
!= INADDR_ANY
) {
1082 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
1084 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
1089 ret
= ipcl_sctp_hash_insert(connp
, lport
);
1097 ipcl_bind_insert_v6(conn_t
*connp
)
1101 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
1102 uint16_t lport
= connp
->conn_lport
;
1103 uint8_t protocol
= connp
->conn_proto
;
1105 if (IPCL_IS_IPTUN(connp
)) {
1106 return (ipcl_iptun_hash_insert_v6(connp
, ipst
));
1112 if (protocol
== IPPROTO_UDP
) {
1113 connfp
= &ipst
->ips_ipcl_udp_fanout
[
1114 IPCL_UDP_HASH(lport
, ipst
)];
1116 connfp
= &ipst
->ips_ipcl_proto_fanout_v6
[protocol
];
1119 if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_faddr_v6
)) {
1120 IPCL_HASH_INSERT_CONNECTED(connfp
, connp
);
1121 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_laddr_v6
)) {
1122 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
1124 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
1129 /* Insert it in the Bind Hash */
1130 ASSERT(connp
->conn_zoneid
!= ALL_ZONES
);
1131 connfp
= &ipst
->ips_ipcl_bind_fanout
[
1132 IPCL_BIND_HASH(lport
, ipst
)];
1133 if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_laddr_v6
)) {
1134 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
1136 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
1141 ret
= ipcl_sctp_hash_insert(connp
, lport
);
1149 * ipcl_conn_hash insertion routines.
1150 * The caller has already set conn_proto and the addresses/ports in the conn_t.
1154 ipcl_conn_insert(conn_t
*connp
)
1156 if (connp
->conn_ipversion
== IPV6_VERSION
)
1157 return (ipcl_conn_insert_v6(connp
));
1159 return (ipcl_conn_insert_v4(connp
));
1163 ipcl_conn_insert_v4(conn_t
*connp
)
1168 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
1169 uint16_t lport
= connp
->conn_lport
;
1170 uint8_t protocol
= connp
->conn_proto
;
1172 if (IPCL_IS_IPTUN(connp
))
1173 return (ipcl_iptun_hash_insert(connp
, ipst
));
1178 * For TCP, we check whether the connection tuple already
1179 * exists before allowing the connection to proceed. We
1180 * also allow indexing on the zoneid. This is to allow
1181 * multiple shared stack zones to have the same tcp
1182 * connection tuple. In practice this only happens for
1183 * INADDR_LOOPBACK as it's the only local address which
1184 * doesn't have to be unique.
1186 connfp
= &ipst
->ips_ipcl_conn_fanout
[
1187 IPCL_CONN_HASH(connp
->conn_faddr_v4
,
1188 connp
->conn_ports
, ipst
)];
1189 mutex_enter(&connfp
->connf_lock
);
1190 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
1191 tconnp
= tconnp
->conn_next
) {
1192 if (IPCL_CONN_MATCH(tconnp
, connp
->conn_proto
,
1193 connp
->conn_faddr_v4
, connp
->conn_laddr_v4
,
1194 connp
->conn_ports
) &&
1195 IPCL_ZONE_MATCH(tconnp
, connp
->conn_zoneid
)) {
1196 /* Already have a conn. bail out */
1197 mutex_exit(&connfp
->connf_lock
);
1198 return (EADDRINUSE
);
1201 if (connp
->conn_fanout
!= NULL
) {
1203 * Probably a XTI/TLI application trying to do a
1204 * rebind. Let it happen.
1206 mutex_exit(&connfp
->connf_lock
);
1207 IPCL_HASH_REMOVE(connp
);
1208 mutex_enter(&connfp
->connf_lock
);
1211 ASSERT(connp
->conn_recv
!= NULL
);
1212 ASSERT(connp
->conn_recvicmp
!= NULL
);
1214 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp
, connp
);
1215 mutex_exit(&connfp
->connf_lock
);
1220 * The raw socket may have already been bound, remove it
1221 * from the hash first.
1223 IPCL_HASH_REMOVE(connp
);
1224 ret
= ipcl_sctp_hash_insert(connp
, lport
);
1229 if (protocol
== IPPROTO_UDP
) {
1230 connfp
= &ipst
->ips_ipcl_udp_fanout
[
1231 IPCL_UDP_HASH(lport
, ipst
)];
1233 connfp
= &ipst
->ips_ipcl_proto_fanout_v4
[protocol
];
1236 if (connp
->conn_faddr_v4
!= INADDR_ANY
) {
1237 IPCL_HASH_INSERT_CONNECTED(connfp
, connp
);
1238 } else if (connp
->conn_laddr_v4
!= INADDR_ANY
) {
1239 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
1241 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
1250 ipcl_conn_insert_v6(conn_t
*connp
)
1255 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
1256 uint16_t lport
= connp
->conn_lport
;
1257 uint8_t protocol
= connp
->conn_proto
;
1258 uint_t ifindex
= connp
->conn_bound_if
;
1260 if (IPCL_IS_IPTUN(connp
))
1261 return (ipcl_iptun_hash_insert_v6(connp
, ipst
));
1267 * For tcp, we check whether the connection tuple already
1268 * exists before allowing the connection to proceed. We
1269 * also allow indexing on the zoneid. This is to allow
1270 * multiple shared stack zones to have the same tcp
1271 * connection tuple. In practice this only happens for
1272 * ipv6_loopback as it's the only local address which
1273 * doesn't have to be unique.
1275 connfp
= &ipst
->ips_ipcl_conn_fanout
[
1276 IPCL_CONN_HASH_V6(connp
->conn_faddr_v6
, connp
->conn_ports
,
1278 mutex_enter(&connfp
->connf_lock
);
1279 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
1280 tconnp
= tconnp
->conn_next
) {
1281 /* NOTE: need to match zoneid. Bug in onnv-gate */
1282 if (IPCL_CONN_MATCH_V6(tconnp
, connp
->conn_proto
,
1283 connp
->conn_faddr_v6
, connp
->conn_laddr_v6
,
1284 connp
->conn_ports
) &&
1285 (tconnp
->conn_bound_if
== 0 ||
1286 tconnp
->conn_bound_if
== ifindex
) &&
1287 IPCL_ZONE_MATCH(tconnp
, connp
->conn_zoneid
)) {
1288 /* Already have a conn. bail out */
1289 mutex_exit(&connfp
->connf_lock
);
1290 return (EADDRINUSE
);
1293 if (connp
->conn_fanout
!= NULL
) {
1295 * Probably a XTI/TLI application trying to do a
1296 * rebind. Let it happen.
1298 mutex_exit(&connfp
->connf_lock
);
1299 IPCL_HASH_REMOVE(connp
);
1300 mutex_enter(&connfp
->connf_lock
);
1302 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp
, connp
);
1303 mutex_exit(&connfp
->connf_lock
);
1307 IPCL_HASH_REMOVE(connp
);
1308 ret
= ipcl_sctp_hash_insert(connp
, lport
);
1313 if (protocol
== IPPROTO_UDP
) {
1314 connfp
= &ipst
->ips_ipcl_udp_fanout
[
1315 IPCL_UDP_HASH(lport
, ipst
)];
1317 connfp
= &ipst
->ips_ipcl_proto_fanout_v6
[protocol
];
1320 if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_faddr_v6
)) {
1321 IPCL_HASH_INSERT_CONNECTED(connfp
, connp
);
1322 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_laddr_v6
)) {
1323 IPCL_HASH_INSERT_BOUND(connfp
, connp
);
1325 IPCL_HASH_INSERT_WILDCARD(connfp
, connp
);
1334 * v4 packet classifying function. looks up the fanout table to
1335 * find the conn, the packet belongs to. returns the conn with
1336 * the reference held, null otherwise.
1338 * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1339 * Lookup" comment block are applied. Labels are also checked as described
1343 ipcl_classify_v4(mblk_t
*mp
, uint8_t protocol
, uint_t hdr_len
,
1344 ip_recv_attr_t
*ira
, ip_stack_t
*ipst
)
1347 connf_t
*connfp
, *bind_connfp
;
1353 zoneid_t zoneid
= ira
->ira_zoneid
;
1355 ipha
= (ipha_t
*)mp
->b_rptr
;
1356 up
= (uint16_t *)((uchar_t
*)ipha
+ hdr_len
+ TCP_PORTS_OFFSET
);
1360 ports
= *(uint32_t *)up
;
1362 &ipst
->ips_ipcl_conn_fanout
[IPCL_CONN_HASH(ipha
->ipha_src
,
1364 mutex_enter(&connfp
->connf_lock
);
1365 for (connp
= connfp
->connf_head
; connp
!= NULL
;
1366 connp
= connp
->conn_next
) {
1367 if (IPCL_CONN_MATCH(connp
, protocol
,
1368 ipha
->ipha_src
, ipha
->ipha_dst
, ports
) &&
1369 (connp
->conn_zoneid
== zoneid
||
1370 connp
->conn_allzones
))
1374 if (connp
!= NULL
) {
1375 /* We have a fully-bound TCP connection. */
1376 CONN_INC_REF(connp
);
1377 mutex_exit(&connfp
->connf_lock
);
1381 mutex_exit(&connfp
->connf_lock
);
1384 &ipst
->ips_ipcl_bind_fanout
[IPCL_BIND_HASH(lport
, ipst
)];
1385 mutex_enter(&bind_connfp
->connf_lock
);
1386 for (connp
= bind_connfp
->connf_head
; connp
!= NULL
;
1387 connp
= connp
->conn_next
) {
1388 if (IPCL_BIND_MATCH(connp
, protocol
, ipha
->ipha_dst
,
1390 (connp
->conn_zoneid
== zoneid
||
1391 connp
->conn_allzones
))
1395 if (connp
!= NULL
) {
1396 /* Have a listener at least */
1397 CONN_INC_REF(connp
);
1398 mutex_exit(&bind_connfp
->connf_lock
);
1402 mutex_exit(&bind_connfp
->connf_lock
);
1408 connfp
= &ipst
->ips_ipcl_udp_fanout
[IPCL_UDP_HASH(lport
, ipst
)];
1409 mutex_enter(&connfp
->connf_lock
);
1410 for (connp
= connfp
->connf_head
; connp
!= NULL
;
1411 connp
= connp
->conn_next
) {
1412 if (IPCL_UDP_MATCH(connp
, lport
, ipha
->ipha_dst
,
1413 fport
, ipha
->ipha_src
) &&
1414 (connp
->conn_zoneid
== zoneid
||
1415 connp
->conn_allzones
))
1419 if (connp
!= NULL
) {
1420 CONN_INC_REF(connp
);
1421 mutex_exit(&connfp
->connf_lock
);
1426 * We shouldn't come here for multicast/broadcast packets
1428 mutex_exit(&connfp
->connf_lock
);
1434 return (ipcl_iptun_classify_v4(&ipha
->ipha_src
,
1435 &ipha
->ipha_dst
, ipst
));
1442 ipcl_classify_v6(mblk_t
*mp
, uint8_t protocol
, uint_t hdr_len
,
1443 ip_recv_attr_t
*ira
, ip_stack_t
*ipst
)
1446 connf_t
*connfp
, *bind_connfp
;
1453 zoneid_t zoneid
= ira
->ira_zoneid
;
1455 ip6h
= (ip6_t
*)mp
->b_rptr
;
1459 tcpha
= (tcpha_t
*)&mp
->b_rptr
[hdr_len
];
1460 up
= &tcpha
->tha_lport
;
1461 ports
= *(uint32_t *)up
;
1464 &ipst
->ips_ipcl_conn_fanout
[IPCL_CONN_HASH_V6(ip6h
->ip6_src
,
1466 mutex_enter(&connfp
->connf_lock
);
1467 for (connp
= connfp
->connf_head
; connp
!= NULL
;
1468 connp
= connp
->conn_next
) {
1469 if (IPCL_CONN_MATCH_V6(connp
, protocol
,
1470 ip6h
->ip6_src
, ip6h
->ip6_dst
, ports
) &&
1471 (connp
->conn_zoneid
== zoneid
||
1472 connp
->conn_allzones
))
1476 if (connp
!= NULL
) {
1477 /* We have a fully-bound TCP connection. */
1478 CONN_INC_REF(connp
);
1479 mutex_exit(&connfp
->connf_lock
);
1483 mutex_exit(&connfp
->connf_lock
);
1487 &ipst
->ips_ipcl_bind_fanout
[IPCL_BIND_HASH(lport
, ipst
)];
1488 mutex_enter(&bind_connfp
->connf_lock
);
1489 for (connp
= bind_connfp
->connf_head
; connp
!= NULL
;
1490 connp
= connp
->conn_next
) {
1491 if (IPCL_BIND_MATCH_V6(connp
, protocol
,
1492 ip6h
->ip6_dst
, lport
) &&
1493 (connp
->conn_zoneid
== zoneid
||
1494 connp
->conn_allzones
))
1498 if (connp
!= NULL
) {
1499 /* Have a listner at least */
1500 CONN_INC_REF(connp
);
1501 mutex_exit(&bind_connfp
->connf_lock
);
1505 mutex_exit(&bind_connfp
->connf_lock
);
1509 up
= (uint16_t *)&mp
->b_rptr
[hdr_len
];
1512 connfp
= &ipst
->ips_ipcl_udp_fanout
[IPCL_UDP_HASH(lport
, ipst
)];
1513 mutex_enter(&connfp
->connf_lock
);
1514 for (connp
= connfp
->connf_head
; connp
!= NULL
;
1515 connp
= connp
->conn_next
) {
1516 if (IPCL_UDP_MATCH_V6(connp
, lport
, ip6h
->ip6_dst
,
1517 fport
, ip6h
->ip6_src
) &&
1518 (connp
->conn_zoneid
== zoneid
||
1519 connp
->conn_allzones
))
1523 if (connp
!= NULL
) {
1524 CONN_INC_REF(connp
);
1525 mutex_exit(&connfp
->connf_lock
);
1530 * We shouldn't come here for multicast/broadcast packets
1532 mutex_exit(&connfp
->connf_lock
);
1536 return (ipcl_iptun_classify_v6(&ip6h
->ip6_src
,
1537 &ip6h
->ip6_dst
, ipst
));
1544 * wrapper around ipcl_classify_(v4,v6) routines.
1547 ipcl_classify(mblk_t
*mp
, ip_recv_attr_t
*ira
, ip_stack_t
*ipst
)
1549 if (ira
->ira_flags
& IRAF_IS_IPV4
) {
1550 return (ipcl_classify_v4(mp
, ira
->ira_protocol
,
1551 ira
->ira_ip_hdr_length
, ira
, ipst
));
1553 return (ipcl_classify_v6(mp
, ira
->ira_protocol
,
1554 ira
->ira_ip_hdr_length
, ira
, ipst
));
1559 * Only used to classify SCTP RAW sockets
1562 ipcl_classify_raw(mblk_t
*mp
, uint8_t protocol
, uint32_t ports
,
1563 ipha_t
*ipha
, ip6_t
*ip6h
, ip_recv_attr_t
*ira
, ip_stack_t
*ipst
)
1570 zoneid_t zoneid
= ira
->ira_zoneid
;
1572 lport
= ((uint16_t *)&ports
)[1];
1573 if (ira
->ira_flags
& IRAF_IS_IPV4
) {
1574 dst
= (const void *)&ipha
->ipha_dst
;
1575 ipversion
= IPV4_VERSION
;
1577 dst
= (const void *)&ip6h
->ip6_dst
;
1578 ipversion
= IPV6_VERSION
;
1581 connfp
= &ipst
->ips_ipcl_raw_fanout
[IPCL_RAW_HASH(ntohs(lport
), ipst
)];
1582 mutex_enter(&connfp
->connf_lock
);
1583 for (connp
= connfp
->connf_head
; connp
!= NULL
;
1584 connp
= connp
->conn_next
) {
1585 /* We don't allow v4 fallback for v6 raw socket. */
1586 if (ipversion
!= connp
->conn_ipversion
)
1588 if (!IN6_IS_ADDR_UNSPECIFIED(&connp
->conn_faddr_v6
) &&
1589 !IN6_IS_ADDR_V4MAPPED_ANY(&connp
->conn_faddr_v6
)) {
1590 if (ipversion
== IPV4_VERSION
) {
1591 if (!IPCL_CONN_MATCH(connp
, protocol
,
1592 ipha
->ipha_src
, ipha
->ipha_dst
, ports
))
1595 if (!IPCL_CONN_MATCH_V6(connp
, protocol
,
1596 ip6h
->ip6_src
, ip6h
->ip6_dst
, ports
))
1600 if (ipversion
== IPV4_VERSION
) {
1601 if (!IPCL_BIND_MATCH(connp
, protocol
,
1602 ipha
->ipha_dst
, lport
))
1605 if (!IPCL_BIND_MATCH_V6(connp
, protocol
,
1606 ip6h
->ip6_dst
, lport
))
1611 if (connp
->conn_zoneid
== zoneid
|| connp
->conn_allzones
)
1617 mutex_exit(&connfp
->connf_lock
);
1619 /* Try to look for a wildcard SCTP RAW socket match. */
1620 connfp
= &ipst
->ips_ipcl_raw_fanout
[IPCL_RAW_HASH(0, ipst
)];
1621 mutex_enter(&connfp
->connf_lock
);
1622 for (connp
= connfp
->connf_head
; connp
!= NULL
;
1623 connp
= connp
->conn_next
) {
1624 /* We don't allow v4 fallback for v6 raw socket. */
1625 if (ipversion
!= connp
->conn_ipversion
)
1627 if (!IPCL_ZONE_MATCH(connp
, zoneid
))
1630 if (ipversion
== IPV4_VERSION
) {
1631 if (IPCL_RAW_MATCH(connp
, protocol
, ipha
->ipha_dst
))
1634 if (IPCL_RAW_MATCH_V6(connp
, protocol
, ip6h
->ip6_dst
)) {
1643 mutex_exit(&connfp
->connf_lock
);
1647 ASSERT(connp
!= NULL
);
1648 CONN_INC_REF(connp
);
1649 mutex_exit(&connfp
->connf_lock
);
1655 tcp_conn_constructor(void *buf
, void *cdrarg
, int kmflags
)
1657 itc_t
*itc
= (itc_t
*)buf
;
1658 conn_t
*connp
= &itc
->itc_conn
;
1659 tcp_t
*tcp
= (tcp_t
*)&itc
[1];
1661 bzero(connp
, sizeof (conn_t
));
1662 bzero(tcp
, sizeof (tcp_t
));
1664 mutex_init(&connp
->conn_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1665 cv_init(&connp
->conn_cv
, NULL
, CV_DEFAULT
, NULL
);
1666 cv_init(&connp
->conn_sq_cv
, NULL
, CV_DEFAULT
, NULL
);
1667 tcp
->tcp_timercache
= tcp_timermp_alloc(kmflags
);
1668 if (tcp
->tcp_timercache
== NULL
)
1670 connp
->conn_tcp
= tcp
;
1671 connp
->conn_flags
= IPCL_TCPCONN
;
1672 connp
->conn_proto
= IPPROTO_TCP
;
1673 tcp
->tcp_connp
= connp
;
1674 rw_init(&connp
->conn_ilg_lock
, NULL
, RW_DEFAULT
, NULL
);
1676 connp
->conn_ixa
= kmem_zalloc(sizeof (ip_xmit_attr_t
), kmflags
);
1677 if (connp
->conn_ixa
== NULL
) {
1678 tcp_timermp_free(tcp
);
1681 connp
->conn_ixa
->ixa_refcnt
= 1;
1682 connp
->conn_ixa
->ixa_protocol
= connp
->conn_proto
;
1683 connp
->conn_ixa
->ixa_xmit_hint
= CONN_TO_XMIT_HINT(connp
);
1689 tcp_conn_destructor(void *buf
, void *cdrarg
)
1691 itc_t
*itc
= (itc_t
*)buf
;
1692 conn_t
*connp
= &itc
->itc_conn
;
1693 tcp_t
*tcp
= (tcp_t
*)&itc
[1];
1695 ASSERT(connp
->conn_flags
& IPCL_TCPCONN
);
1696 ASSERT(tcp
->tcp_connp
== connp
);
1697 ASSERT(connp
->conn_tcp
== tcp
);
1698 tcp_timermp_free(tcp
);
1699 mutex_destroy(&connp
->conn_lock
);
1700 cv_destroy(&connp
->conn_cv
);
1701 cv_destroy(&connp
->conn_sq_cv
);
1702 rw_destroy(&connp
->conn_ilg_lock
);
1704 /* Can be NULL if constructor failed */
1705 if (connp
->conn_ixa
!= NULL
) {
1706 ASSERT(connp
->conn_ixa
->ixa_refcnt
== 1);
1707 ASSERT(connp
->conn_ixa
->ixa_ire
== NULL
);
1708 ASSERT(connp
->conn_ixa
->ixa_nce
== NULL
);
1709 ixa_refrele(connp
->conn_ixa
);
1715 ip_conn_constructor(void *buf
, void *cdrarg
, int kmflags
)
1717 itc_t
*itc
= (itc_t
*)buf
;
1718 conn_t
*connp
= &itc
->itc_conn
;
1720 bzero(connp
, sizeof (conn_t
));
1721 mutex_init(&connp
->conn_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1722 cv_init(&connp
->conn_cv
, NULL
, CV_DEFAULT
, NULL
);
1723 connp
->conn_flags
= IPCL_IPCCONN
;
1724 rw_init(&connp
->conn_ilg_lock
, NULL
, RW_DEFAULT
, NULL
);
1726 connp
->conn_ixa
= kmem_zalloc(sizeof (ip_xmit_attr_t
), kmflags
);
1727 if (connp
->conn_ixa
== NULL
)
1729 connp
->conn_ixa
->ixa_refcnt
= 1;
1730 connp
->conn_ixa
->ixa_xmit_hint
= CONN_TO_XMIT_HINT(connp
);
1736 ip_conn_destructor(void *buf
, void *cdrarg
)
1738 itc_t
*itc
= (itc_t
*)buf
;
1739 conn_t
*connp
= &itc
->itc_conn
;
1741 ASSERT(connp
->conn_flags
& IPCL_IPCCONN
);
1742 ASSERT(connp
->conn_priv
== NULL
);
1743 mutex_destroy(&connp
->conn_lock
);
1744 cv_destroy(&connp
->conn_cv
);
1745 rw_destroy(&connp
->conn_ilg_lock
);
1747 /* Can be NULL if constructor failed */
1748 if (connp
->conn_ixa
!= NULL
) {
1749 ASSERT(connp
->conn_ixa
->ixa_refcnt
== 1);
1750 ASSERT(connp
->conn_ixa
->ixa_ire
== NULL
);
1751 ASSERT(connp
->conn_ixa
->ixa_nce
== NULL
);
1752 ixa_refrele(connp
->conn_ixa
);
1758 udp_conn_constructor(void *buf
, void *cdrarg
, int kmflags
)
1760 itc_t
*itc
= (itc_t
*)buf
;
1761 conn_t
*connp
= &itc
->itc_conn
;
1762 udp_t
*udp
= (udp_t
*)&itc
[1];
1764 bzero(connp
, sizeof (conn_t
));
1765 bzero(udp
, sizeof (udp_t
));
1767 mutex_init(&connp
->conn_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1768 cv_init(&connp
->conn_cv
, NULL
, CV_DEFAULT
, NULL
);
1769 connp
->conn_udp
= udp
;
1770 connp
->conn_flags
= IPCL_UDPCONN
;
1771 connp
->conn_proto
= IPPROTO_UDP
;
1772 udp
->udp_connp
= connp
;
1773 rw_init(&connp
->conn_ilg_lock
, NULL
, RW_DEFAULT
, NULL
);
1774 connp
->conn_ixa
= kmem_zalloc(sizeof (ip_xmit_attr_t
), kmflags
);
1775 if (connp
->conn_ixa
== NULL
)
1777 connp
->conn_ixa
->ixa_refcnt
= 1;
1778 connp
->conn_ixa
->ixa_protocol
= connp
->conn_proto
;
1779 connp
->conn_ixa
->ixa_xmit_hint
= CONN_TO_XMIT_HINT(connp
);
1785 udp_conn_destructor(void *buf
, void *cdrarg
)
1787 itc_t
*itc
= (itc_t
*)buf
;
1788 conn_t
*connp
= &itc
->itc_conn
;
1789 udp_t
*udp
= (udp_t
*)&itc
[1];
1791 ASSERT(connp
->conn_flags
& IPCL_UDPCONN
);
1792 ASSERT(udp
->udp_connp
== connp
);
1793 ASSERT(connp
->conn_udp
== udp
);
1794 mutex_destroy(&connp
->conn_lock
);
1795 cv_destroy(&connp
->conn_cv
);
1796 rw_destroy(&connp
->conn_ilg_lock
);
1798 /* Can be NULL if constructor failed */
1799 if (connp
->conn_ixa
!= NULL
) {
1800 ASSERT(connp
->conn_ixa
->ixa_refcnt
== 1);
1801 ASSERT(connp
->conn_ixa
->ixa_ire
== NULL
);
1802 ASSERT(connp
->conn_ixa
->ixa_nce
== NULL
);
1803 ixa_refrele(connp
->conn_ixa
);
1809 rawip_conn_constructor(void *buf
, void *cdrarg
, int kmflags
)
1811 itc_t
*itc
= (itc_t
*)buf
;
1812 conn_t
*connp
= &itc
->itc_conn
;
1813 icmp_t
*icmp
= (icmp_t
*)&itc
[1];
1815 bzero(connp
, sizeof (conn_t
));
1816 bzero(icmp
, sizeof (icmp_t
));
1818 mutex_init(&connp
->conn_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1819 cv_init(&connp
->conn_cv
, NULL
, CV_DEFAULT
, NULL
);
1820 connp
->conn_icmp
= icmp
;
1821 connp
->conn_flags
= IPCL_RAWIPCONN
;
1822 connp
->conn_proto
= IPPROTO_ICMP
;
1823 icmp
->icmp_connp
= connp
;
1824 rw_init(&connp
->conn_ilg_lock
, NULL
, RW_DEFAULT
, NULL
);
1825 connp
->conn_ixa
= kmem_zalloc(sizeof (ip_xmit_attr_t
), kmflags
);
1826 if (connp
->conn_ixa
== NULL
)
1828 connp
->conn_ixa
->ixa_refcnt
= 1;
1829 connp
->conn_ixa
->ixa_protocol
= connp
->conn_proto
;
1830 connp
->conn_ixa
->ixa_xmit_hint
= CONN_TO_XMIT_HINT(connp
);
1836 rawip_conn_destructor(void *buf
, void *cdrarg
)
1838 itc_t
*itc
= (itc_t
*)buf
;
1839 conn_t
*connp
= &itc
->itc_conn
;
1840 icmp_t
*icmp
= (icmp_t
*)&itc
[1];
1842 ASSERT(connp
->conn_flags
& IPCL_RAWIPCONN
);
1843 ASSERT(icmp
->icmp_connp
== connp
);
1844 ASSERT(connp
->conn_icmp
== icmp
);
1845 mutex_destroy(&connp
->conn_lock
);
1846 cv_destroy(&connp
->conn_cv
);
1847 rw_destroy(&connp
->conn_ilg_lock
);
1849 /* Can be NULL if constructor failed */
1850 if (connp
->conn_ixa
!= NULL
) {
1851 ASSERT(connp
->conn_ixa
->ixa_refcnt
== 1);
1852 ASSERT(connp
->conn_ixa
->ixa_ire
== NULL
);
1853 ASSERT(connp
->conn_ixa
->ixa_nce
== NULL
);
1854 ixa_refrele(connp
->conn_ixa
);
1860 rts_conn_constructor(void *buf
, void *cdrarg
, int kmflags
)
1862 itc_t
*itc
= (itc_t
*)buf
;
1863 conn_t
*connp
= &itc
->itc_conn
;
1864 rts_t
*rts
= (rts_t
*)&itc
[1];
1866 bzero(connp
, sizeof (conn_t
));
1867 bzero(rts
, sizeof (rts_t
));
1869 mutex_init(&connp
->conn_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1870 cv_init(&connp
->conn_cv
, NULL
, CV_DEFAULT
, NULL
);
1871 connp
->conn_rts
= rts
;
1872 connp
->conn_flags
= IPCL_RTSCONN
;
1873 rts
->rts_connp
= connp
;
1874 rw_init(&connp
->conn_ilg_lock
, NULL
, RW_DEFAULT
, NULL
);
1875 connp
->conn_ixa
= kmem_zalloc(sizeof (ip_xmit_attr_t
), kmflags
);
1876 if (connp
->conn_ixa
== NULL
)
1878 connp
->conn_ixa
->ixa_refcnt
= 1;
1879 connp
->conn_ixa
->ixa_xmit_hint
= CONN_TO_XMIT_HINT(connp
);
1885 rts_conn_destructor(void *buf
, void *cdrarg
)
1887 itc_t
*itc
= (itc_t
*)buf
;
1888 conn_t
*connp
= &itc
->itc_conn
;
1889 rts_t
*rts
= (rts_t
*)&itc
[1];
1891 ASSERT(connp
->conn_flags
& IPCL_RTSCONN
);
1892 ASSERT(rts
->rts_connp
== connp
);
1893 ASSERT(connp
->conn_rts
== rts
);
1894 mutex_destroy(&connp
->conn_lock
);
1895 cv_destroy(&connp
->conn_cv
);
1896 rw_destroy(&connp
->conn_ilg_lock
);
1898 /* Can be NULL if constructor failed */
1899 if (connp
->conn_ixa
!= NULL
) {
1900 ASSERT(connp
->conn_ixa
->ixa_refcnt
== 1);
1901 ASSERT(connp
->conn_ixa
->ixa_ire
== NULL
);
1902 ASSERT(connp
->conn_ixa
->ixa_nce
== NULL
);
1903 ixa_refrele(connp
->conn_ixa
);
1908 * Called as part of ipcl_conn_destroy to assert and clear any pointers
1911 * Below we list all the pointers in the conn_t as a documentation aid.
1912 * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
1913 * If you add any pointers to the conn_t please add an ASSERT here
1914 * and #ifdef it out if it can't be actually asserted to be NULL.
1915 * In any case, we bzero most of the conn_t at the end of the function.
1918 ipcl_conn_cleanup(conn_t
*connp
)
1920 ip_xmit_attr_t
*ixa
;
1922 ASSERT(connp
->conn_latch
== NULL
);
1923 ASSERT(connp
->conn_latch_in_policy
== NULL
);
1924 ASSERT(connp
->conn_latch_in_action
== NULL
);
1926 ASSERT(connp
->conn_rq
== NULL
);
1927 ASSERT(connp
->conn_wq
== NULL
);
1929 ASSERT(connp
->conn_cred
== NULL
);
1930 ASSERT(connp
->conn_g_fanout
== NULL
);
1931 ASSERT(connp
->conn_g_next
== NULL
);
1932 ASSERT(connp
->conn_g_prev
== NULL
);
1933 ASSERT(connp
->conn_policy
== NULL
);
1934 ASSERT(connp
->conn_fanout
== NULL
);
1935 ASSERT(connp
->conn_next
== NULL
);
1936 ASSERT(connp
->conn_prev
== NULL
);
1937 ASSERT(connp
->conn_oper_pending_ill
== NULL
);
1938 ASSERT(connp
->conn_ilg
== NULL
);
1939 ASSERT(connp
->conn_drain_next
== NULL
);
1940 ASSERT(connp
->conn_drain_prev
== NULL
);
1942 /* conn_idl is not cleared when removed from idl list */
1943 ASSERT(connp
->conn_idl
== NULL
);
1945 ASSERT(connp
->conn_ipsec_opt_mp
== NULL
);
1947 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
1948 ASSERT(connp
->conn_netstack
== NULL
);
1951 ASSERT(connp
->conn_helper_info
== NULL
);
1952 ASSERT(connp
->conn_ixa
!= NULL
);
1953 ixa
= connp
->conn_ixa
;
1954 ASSERT(ixa
->ixa_refcnt
== 1);
1955 /* Need to preserve ixa_protocol */
1959 /* Clear out the conn_t fields that are not preserved */
1960 bzero(&connp
->conn_start_clr
,
1962 ((uchar_t
*)&connp
->conn_start_clr
- (uchar_t
*)connp
));
1966 * All conns are inserted in a global multi-list for the benefit of
1967 * walkers. The walk is guaranteed to walk all open conns at the time
1968 * of the start of the walk exactly once. This property is needed to
1969 * achieve some cleanups during unplumb of interfaces. This is achieved
1972 * ipcl_conn_create and ipcl_conn_destroy are the only functions that
1973 * call the insert and delete functions below at creation and deletion
1974 * time respectively. The conn never moves or changes its position in this
1975 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
1976 * won't increase due to walkers, once the conn deletion has started. Note
1977 * that we can't remove the conn from the global list and then wait for
1978 * the refcnt to drop to zero, since walkers would then see a truncated
1979 * list. CONN_INCIPIENT ensures that walkers don't start looking at
1980 * conns until ip_open is ready to make them globally visible.
1981 * The global round robin multi-list locks are held only to get the
1982 * next member/insertion/deletion and contention should be negligible
1983 * if the multi-list is much greater than the number of cpus.
1986 ipcl_globalhash_insert(conn_t
*connp
)
1989 struct connf_s
*connfp
;
1990 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
1993 * No need for atomic here. Approximate even distribution
1994 * in the global lists is sufficient.
1996 ipst
->ips_conn_g_index
++;
1997 index
= ipst
->ips_conn_g_index
& (CONN_G_HASH_SIZE
- 1);
1999 connp
->conn_g_prev
= NULL
;
2001 * Mark as INCIPIENT, so that walkers will ignore this
2002 * for now, till ip_open is ready to make it visible globally.
2004 connp
->conn_state_flags
|= CONN_INCIPIENT
;
2006 connfp
= &ipst
->ips_ipcl_globalhash_fanout
[index
];
2007 /* Insert at the head of the list */
2008 mutex_enter(&connfp
->connf_lock
);
2009 connp
->conn_g_next
= connfp
->connf_head
;
2010 if (connp
->conn_g_next
!= NULL
)
2011 connp
->conn_g_next
->conn_g_prev
= connp
;
2012 connfp
->connf_head
= connp
;
2014 /* The fanout bucket this conn points to */
2015 connp
->conn_g_fanout
= connfp
;
2017 mutex_exit(&connfp
->connf_lock
);
2021 ipcl_globalhash_remove(conn_t
*connp
)
2023 struct connf_s
*connfp
;
2026 * We were never inserted in the global multi list.
2027 * IPCL_NONE variety is never inserted in the global multilist
2028 * since it is presumed to not need any cleanup and is transient.
2030 if (connp
->conn_g_fanout
== NULL
)
2033 connfp
= connp
->conn_g_fanout
;
2034 mutex_enter(&connfp
->connf_lock
);
2035 if (connp
->conn_g_prev
!= NULL
)
2036 connp
->conn_g_prev
->conn_g_next
= connp
->conn_g_next
;
2038 connfp
->connf_head
= connp
->conn_g_next
;
2039 if (connp
->conn_g_next
!= NULL
)
2040 connp
->conn_g_next
->conn_g_prev
= connp
->conn_g_prev
;
2041 mutex_exit(&connfp
->connf_lock
);
2043 /* Better to stumble on a null pointer than to corrupt memory */
2044 connp
->conn_g_next
= NULL
;
2045 connp
->conn_g_prev
= NULL
;
2046 connp
->conn_g_fanout
= NULL
;
2050 * Walk the list of all conn_t's in the system, calling the function provided
2051 * With the specified argument for each.
2052 * Applies to both IPv4 and IPv6.
2054 * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2055 * conn_oper_pending_ill). To guard against stale pointers
2056 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2057 * unplumbed or removed. New conn_t's that are created while we are walking
2058 * may be missed by this walk, because they are not necessarily inserted
2059 * at the tail of the list. They are new conn_t's and thus don't have any
2060 * stale pointers. The CONN_CLOSING flag ensures that no new reference
2061 * is created to the struct that is going away.
2064 ipcl_walk(pfv_t func
, void *arg
, ip_stack_t
*ipst
)
2070 for (i
= 0; i
< CONN_G_HASH_SIZE
; i
++) {
2071 mutex_enter(&ipst
->ips_ipcl_globalhash_fanout
[i
].connf_lock
);
2073 connp
= ipst
->ips_ipcl_globalhash_fanout
[i
].connf_head
;
2074 while (connp
!= NULL
) {
2075 mutex_enter(&connp
->conn_lock
);
2076 if (connp
->conn_state_flags
&
2077 (CONN_CONDEMNED
| CONN_INCIPIENT
)) {
2078 mutex_exit(&connp
->conn_lock
);
2079 connp
= connp
->conn_g_next
;
2082 CONN_INC_REF_LOCKED(connp
);
2083 mutex_exit(&connp
->conn_lock
);
2085 &ipst
->ips_ipcl_globalhash_fanout
[i
].connf_lock
);
2086 (*func
)(connp
, arg
);
2087 if (prev_connp
!= NULL
)
2088 CONN_DEC_REF(prev_connp
);
2090 &ipst
->ips_ipcl_globalhash_fanout
[i
].connf_lock
);
2092 connp
= connp
->conn_g_next
;
2094 mutex_exit(&ipst
->ips_ipcl_globalhash_fanout
[i
].connf_lock
);
2095 if (prev_connp
!= NULL
)
2096 CONN_DEC_REF(prev_connp
);
2101 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2102 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2103 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2104 * (peer tcp in ESTABLISHED state).
2107 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t
*connp
, ipha_t
*ipha
, tcpha_t
*tcpha
,
2111 uint16_t *pports
= (uint16_t *)&ports
;
2117 * If either the source of destination address is loopback, then
2118 * both endpoints must be in the same Zone. Otherwise, both of
2119 * the addresses are system-wide unique (tcp is in ESTABLISHED
2120 * state) and the endpoints may reside in different Zones.
2122 zone_chk
= (ipha
->ipha_src
== htonl(INADDR_LOOPBACK
) ||
2123 ipha
->ipha_dst
== htonl(INADDR_LOOPBACK
));
2125 pports
[0] = tcpha
->tha_fport
;
2126 pports
[1] = tcpha
->tha_lport
;
2128 connfp
= &ipst
->ips_ipcl_conn_fanout
[IPCL_CONN_HASH(ipha
->ipha_dst
,
2131 mutex_enter(&connfp
->connf_lock
);
2132 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
2133 tconnp
= tconnp
->conn_next
) {
2135 if (IPCL_CONN_MATCH(tconnp
, IPPROTO_TCP
,
2136 ipha
->ipha_dst
, ipha
->ipha_src
, ports
) &&
2137 tconnp
->conn_tcp
->tcp_state
== TCPS_ESTABLISHED
&&
2138 (!zone_chk
|| tconnp
->conn_zoneid
== connp
->conn_zoneid
)) {
2140 ASSERT(tconnp
!= connp
);
2141 CONN_INC_REF(tconnp
);
2142 mutex_exit(&connfp
->connf_lock
);
2146 mutex_exit(&connfp
->connf_lock
);
2151 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2152 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2153 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2154 * (peer tcp in ESTABLISHED state).
2157 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t
*connp
, ip6_t
*ip6h
, tcpha_t
*tcpha
,
2161 uint16_t *pports
= (uint16_t *)&ports
;
2167 * If either the source of destination address is loopback, then
2168 * both endpoints must be in the same Zone. Otherwise, both of
2169 * the addresses are system-wide unique (tcp is in ESTABLISHED
2170 * state) and the endpoints may reside in different Zones. We
2171 * don't do Zone check for link local address(es) because the
2172 * current Zone implementation treats each link local address as
2173 * being unique per system node, i.e. they belong to global Zone.
2175 zone_chk
= (IN6_IS_ADDR_LOOPBACK(&ip6h
->ip6_src
) ||
2176 IN6_IS_ADDR_LOOPBACK(&ip6h
->ip6_dst
));
2178 pports
[0] = tcpha
->tha_fport
;
2179 pports
[1] = tcpha
->tha_lport
;
2181 connfp
= &ipst
->ips_ipcl_conn_fanout
[IPCL_CONN_HASH_V6(ip6h
->ip6_dst
,
2184 mutex_enter(&connfp
->connf_lock
);
2185 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
2186 tconnp
= tconnp
->conn_next
) {
2188 /* We skip conn_bound_if check here as this is loopback tcp */
2189 if (IPCL_CONN_MATCH_V6(tconnp
, IPPROTO_TCP
,
2190 ip6h
->ip6_dst
, ip6h
->ip6_src
, ports
) &&
2191 tconnp
->conn_tcp
->tcp_state
== TCPS_ESTABLISHED
&&
2192 (!zone_chk
|| tconnp
->conn_zoneid
== connp
->conn_zoneid
)) {
2194 ASSERT(tconnp
!= connp
);
2195 CONN_INC_REF(tconnp
);
2196 mutex_exit(&connfp
->connf_lock
);
2200 mutex_exit(&connfp
->connf_lock
);
2205 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2206 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2207 * Only checks for connected entries i.e. no INADDR_ANY checks.
2210 ipcl_tcp_lookup_reversed_ipv4(ipha_t
*ipha
, tcpha_t
*tcpha
, int min_state
,
2218 pports
= (uint16_t *)&ports
;
2219 pports
[0] = tcpha
->tha_fport
;
2220 pports
[1] = tcpha
->tha_lport
;
2222 connfp
= &ipst
->ips_ipcl_conn_fanout
[IPCL_CONN_HASH(ipha
->ipha_dst
,
2225 mutex_enter(&connfp
->connf_lock
);
2226 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
2227 tconnp
= tconnp
->conn_next
) {
2229 if (IPCL_CONN_MATCH(tconnp
, IPPROTO_TCP
,
2230 ipha
->ipha_dst
, ipha
->ipha_src
, ports
) &&
2231 tconnp
->conn_tcp
->tcp_state
>= min_state
) {
2233 CONN_INC_REF(tconnp
);
2234 mutex_exit(&connfp
->connf_lock
);
2238 mutex_exit(&connfp
->connf_lock
);
2243 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2244 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2245 * Only checks for connected entries i.e. no INADDR_ANY checks.
2246 * Match on ifindex in addition to addresses.
2249 ipcl_tcp_lookup_reversed_ipv6(ip6_t
*ip6h
, tcpha_t
*tcpha
, int min_state
,
2250 uint_t ifindex
, ip_stack_t
*ipst
)
2258 pports
= (uint16_t *)&ports
;
2259 pports
[0] = tcpha
->tha_fport
;
2260 pports
[1] = tcpha
->tha_lport
;
2262 connfp
= &ipst
->ips_ipcl_conn_fanout
[IPCL_CONN_HASH_V6(ip6h
->ip6_dst
,
2265 mutex_enter(&connfp
->connf_lock
);
2266 for (tconnp
= connfp
->connf_head
; tconnp
!= NULL
;
2267 tconnp
= tconnp
->conn_next
) {
2269 tcp
= tconnp
->conn_tcp
;
2270 if (IPCL_CONN_MATCH_V6(tconnp
, IPPROTO_TCP
,
2271 ip6h
->ip6_dst
, ip6h
->ip6_src
, ports
) &&
2272 tcp
->tcp_state
>= min_state
&&
2273 (tconnp
->conn_bound_if
== 0 ||
2274 tconnp
->conn_bound_if
== ifindex
)) {
2276 CONN_INC_REF(tconnp
);
2277 mutex_exit(&connfp
->connf_lock
);
2281 mutex_exit(&connfp
->connf_lock
);
2286 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2287 * a listener when changing state.
2290 ipcl_lookup_listener_v4(uint16_t lport
, ipaddr_t laddr
, zoneid_t zoneid
,
2293 connf_t
*bind_connfp
;
2298 * Avoid false matches for packets sent to an IP destination of
2304 ASSERT(zoneid
!= ALL_ZONES
);
2306 bind_connfp
= &ipst
->ips_ipcl_bind_fanout
[IPCL_BIND_HASH(lport
, ipst
)];
2307 mutex_enter(&bind_connfp
->connf_lock
);
2308 for (connp
= bind_connfp
->connf_head
; connp
!= NULL
;
2309 connp
= connp
->conn_next
) {
2310 tcp
= connp
->conn_tcp
;
2311 if (IPCL_BIND_MATCH(connp
, IPPROTO_TCP
, laddr
, lport
) &&
2312 IPCL_ZONE_MATCH(connp
, zoneid
) &&
2313 (tcp
->tcp_listener
== NULL
)) {
2314 CONN_INC_REF(connp
);
2315 mutex_exit(&bind_connfp
->connf_lock
);
2319 mutex_exit(&bind_connfp
->connf_lock
);
2324 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2325 * a listener when changing state.
2328 ipcl_lookup_listener_v6(uint16_t lport
, in6_addr_t
*laddr
, uint_t ifindex
,
2329 zoneid_t zoneid
, ip_stack_t
*ipst
)
2331 connf_t
*bind_connfp
;
2332 conn_t
*connp
= NULL
;
2336 * Avoid false matches for packets sent to an IP destination of
2339 if (IN6_IS_ADDR_UNSPECIFIED(laddr
))
2342 ASSERT(zoneid
!= ALL_ZONES
);
2344 bind_connfp
= &ipst
->ips_ipcl_bind_fanout
[IPCL_BIND_HASH(lport
, ipst
)];
2345 mutex_enter(&bind_connfp
->connf_lock
);
2346 for (connp
= bind_connfp
->connf_head
; connp
!= NULL
;
2347 connp
= connp
->conn_next
) {
2348 tcp
= connp
->conn_tcp
;
2349 if (IPCL_BIND_MATCH_V6(connp
, IPPROTO_TCP
, *laddr
, lport
) &&
2350 IPCL_ZONE_MATCH(connp
, zoneid
) &&
2351 (connp
->conn_bound_if
== 0 ||
2352 connp
->conn_bound_if
== ifindex
) &&
2353 tcp
->tcp_listener
== NULL
) {
2354 CONN_INC_REF(connp
);
2355 mutex_exit(&bind_connfp
->connf_lock
);
2359 mutex_exit(&bind_connfp
->connf_lock
);
2364 * ipcl_get_next_conn
2365 * get the next entry in the conn global list
2366 * and put a reference on the next_conn.
2367 * decrement the reference on the current conn.
2369 * This is an iterator based walker function that also provides for
2370 * some selection by the caller. It walks through the conn_hash bucket
2371 * searching for the next valid connp in the list, and selects connections
2372 * that are neither closed nor condemned. It also REFHOLDS the conn
2373 * thus ensuring that the conn exists when the caller uses the conn.
2376 ipcl_get_next_conn(connf_t
*connfp
, conn_t
*connp
, uint32_t conn_flags
)
2383 mutex_enter(&connfp
->connf_lock
);
2385 next_connp
= (connp
== NULL
) ?
2386 connfp
->connf_head
: connp
->conn_g_next
;
2388 while (next_connp
!= NULL
) {
2389 mutex_enter(&next_connp
->conn_lock
);
2390 if (!(next_connp
->conn_flags
& conn_flags
) ||
2391 (next_connp
->conn_state_flags
&
2392 (CONN_CONDEMNED
| CONN_INCIPIENT
))) {
2394 * This conn has been condemned or
2395 * is closing, or the flags don't match
2397 mutex_exit(&next_connp
->conn_lock
);
2398 next_connp
= next_connp
->conn_g_next
;
2401 CONN_INC_REF_LOCKED(next_connp
);
2402 mutex_exit(&next_connp
->conn_lock
);
2406 mutex_exit(&connfp
->connf_lock
);
2409 CONN_DEC_REF(connp
);
2411 return (next_connp
);
2416 * Trace of the last NBUF refhold/refrele
2419 conn_trace_ref(conn_t
*connp
)
2424 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
2425 last
= connp
->conn_trace_last
;
2427 if (last
== CONN_TRACE_MAX
)
2430 ctb
= &connp
->conn_trace_buf
[last
];
2431 ctb
->ctb_depth
= getpcstack(ctb
->ctb_stack
, CONN_STACK_DEPTH
);
2432 connp
->conn_trace_last
= last
;
2437 conn_untrace_ref(conn_t
*connp
)
2442 ASSERT(MUTEX_HELD(&connp
->conn_lock
));
2443 last
= connp
->conn_trace_last
;
2445 if (last
== CONN_TRACE_MAX
)
2448 ctb
= &connp
->conn_trace_buf
[last
];
2449 ctb
->ctb_depth
= getpcstack(ctb
->ctb_stack
, CONN_STACK_DEPTH
);
2450 connp
->conn_trace_last
= last
;