Merge commit 'b1e7e97d3b60469b243b3b2e22c7d8cbd11c7c90'
[unleashed.git] / kernel / net / ip / ip_attr.c
blobc01ceda95c6e293d95eaabae214ad087a5f9b834
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1990 Mentat Inc. */
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #include <sys/zone.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/debug.h>
35 #include <sys/atomic.h>
37 #include <sys/systm.h>
38 #include <sys/param.h>
39 #include <sys/kmem.h>
40 #include <sys/sdt.h>
41 #include <sys/socket.h>
42 #include <sys/mac.h>
43 #include <net/if.h>
44 #include <net/if_arp.h>
45 #include <net/route.h>
46 #include <sys/sockio.h>
47 #include <netinet/in.h>
48 #include <net/if_dl.h>
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/mib2.h>
53 #include <inet/nd.h>
54 #include <inet/arp.h>
55 #include <inet/snmpcom.h>
56 #include <inet/kstatcom.h>
58 #include <netinet/igmp_var.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <netinet/sctp.h>
63 #include <inet/ip.h>
64 #include <inet/ip_impl.h>
65 #include <inet/ip6.h>
66 #include <inet/ip6_asp.h>
67 #include <inet/tcp.h>
68 #include <inet/ip_multi.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_ftable.h>
72 #include <inet/ip_rts.h>
73 #include <inet/optcom.h>
74 #include <inet/ip_ndp.h>
75 #include <inet/ip_listutils.h>
76 #include <netinet/igmp.h>
77 #include <netinet/ip_mroute.h>
78 #include <inet/ipp_common.h>
80 #include <net/pfkeyv2.h>
81 #include <inet/sadb.h>
82 #include <inet/ipsec_impl.h>
83 #include <inet/ipdrop.h>
84 #include <inet/ip_netinfo.h>
85 #include <sys/squeue_impl.h>
86 #include <sys/squeue.h>
88 #include <inet/ipclassifier.h>
89 #include <inet/sctp_ip.h>
90 #include <inet/sctp/sctp_impl.h>
91 #include <inet/udp_impl.h>
92 #include <sys/sunddi.h>
95 * Release a reference on ip_xmit_attr.
96 * The reference is acquired by conn_get_ixa()
98 #define IXA_REFRELE(ixa) \
99 { \
100 if (atomic_dec_32_nv(&(ixa)->ixa_refcnt) == 0) \
101 ixa_inactive(ixa); \
104 #define IXA_REFHOLD(ixa) \
106 ASSERT((ixa)->ixa_refcnt != 0); \
107 atomic_inc_32(&(ixa)->ixa_refcnt); \
111 * When we need to handle a transmit side asynchronous operation, then we need
112 * to save sufficient information so that we can call the fragment and postfrag
113 * functions. That information is captured in an mblk containing this structure.
115 * Since this is currently only used for IPsec, we include information for
116 * the kernel crypto framework.
118 typedef struct ixamblk_s {
119 boolean_t ixm_inbound; /* B_FALSE */
120 iaflags_t ixm_flags; /* ixa_flags */
121 netstackid_t ixm_stackid; /* Verify it didn't go away */
122 uint_t ixm_ifindex; /* Used to find the nce */
123 in6_addr_t ixm_nceaddr_v6; /* Used to find nce */
124 #define ixm_nceaddr_v4 V4_PART_OF_V6(ixm_nceaddr_v6)
125 uint32_t ixm_fragsize;
126 uint_t ixm_pktlen;
127 uint16_t ixm_ip_hdr_length; /* Points to ULP header */
128 uint8_t ixm_protocol; /* Protocol number for ULP cksum */
129 pfirepostfrag_t ixm_postfragfn;
131 zoneid_t ixm_zoneid; /* Needed for ipobs */
132 zoneid_t ixm_no_loop_zoneid; /* IXAF_NO_LOOP_ZONEID_SET */
134 uint_t ixm_scopeid; /* For IPv6 link-locals */
136 uint32_t ixm_ident; /* For IPv6 fragment header */
137 uint32_t ixm_xmit_hint;
139 uint64_t ixm_conn_id; /* Used by DTrace */
140 cred_t *ixm_cred; /* For getpeerucred - refhold if set */
141 pid_t ixm_cpid; /* For getpeerucred */
144 * When the pointers below are set they have a refhold on the struct.
146 ipsec_latch_t *ixm_ipsec_latch;
147 struct ipsa_s *ixm_ipsec_ah_sa; /* SA for AH */
148 struct ipsa_s *ixm_ipsec_esp_sa; /* SA for ESP */
149 struct ipsec_policy_s *ixm_ipsec_policy; /* why are we here? */
150 struct ipsec_action_s *ixm_ipsec_action; /* For reflected packets */
152 ipsa_ref_t ixm_ipsec_ref[2]; /* Soft reference to SA */
154 /* Need these while waiting for SA */
155 uint16_t ixm_ipsec_src_port; /* Source port number of d-gram. */
156 uint16_t ixm_ipsec_dst_port; /* Destination port number of d-gram. */
157 uint8_t ixm_ipsec_icmp_type; /* ICMP type of d-gram */
158 uint8_t ixm_ipsec_icmp_code; /* ICMP code of d-gram */
160 sa_family_t ixm_ipsec_inaf; /* Inner address family */
161 uint32_t ixm_ipsec_insrc[IXA_MAX_ADDRLEN]; /* Inner src address */
162 uint32_t ixm_ipsec_indst[IXA_MAX_ADDRLEN]; /* Inner dest address */
163 uint8_t ixm_ipsec_insrcpfx; /* Inner source prefix */
164 uint8_t ixm_ipsec_indstpfx; /* Inner destination prefix */
166 uint8_t ixm_ipsec_proto; /* IP protocol number for d-gram. */
167 } ixamblk_t;
171 * When we need to handle a receive side asynchronous operation, then we need
172 * to save sufficient information so that we can call ip_fanout.
173 * That information is captured in an mblk containing this structure.
175 * Since this is currently only used for IPsec, we include information for
176 * the kernel crypto framework.
178 typedef struct iramblk_s {
179 boolean_t irm_inbound; /* B_TRUE */
180 iaflags_t irm_flags; /* ira_flags */
181 netstackid_t irm_stackid; /* Verify it didn't go away */
182 uint_t irm_ifindex; /* To find ira_ill */
184 uint_t irm_rifindex; /* ira_rifindex */
185 uint_t irm_ruifindex; /* ira_ruifindex */
186 uint_t irm_pktlen;
187 uint16_t irm_ip_hdr_length; /* Points to ULP header */
188 uint8_t irm_protocol; /* Protocol number for ULP cksum */
189 zoneid_t irm_zoneid; /* ALL_ZONES unless local delivery */
191 squeue_t *irm_sqp;
192 ill_rx_ring_t *irm_ring;
194 ipaddr_t irm_mroute_tunnel; /* IRAF_MROUTE_TUNNEL_SET */
195 zoneid_t irm_no_loop_zoneid; /* IRAF_NO_LOOP_ZONEID_SET */
196 uint32_t irm_esp_udp_ports; /* IRAF_ESP_UDP_PORTS */
198 char irm_l2src[IRA_L2SRC_SIZE]; /* If IRAF_L2SRC_SET */
200 cred_t *irm_cred; /* For getpeerucred - refhold if set */
201 pid_t irm_cpid; /* For getpeerucred */
204 * When set these correspond to a refhold on the object.
206 struct ipsa_s *irm_ipsec_ah_sa; /* SA for AH */
207 struct ipsa_s *irm_ipsec_esp_sa; /* SA for ESP */
208 struct ipsec_action_s *irm_ipsec_action; /* For reflected packets */
209 } iramblk_t;
213 * Take the information in ip_xmit_attr_t and stick it in an mblk
214 * that can later be passed to ip_xmit_attr_from_mblk to recreate the
215 * ip_xmit_attr_t.
217 * Returns NULL on memory allocation failure.
219 mblk_t *
220 ip_xmit_attr_to_mblk(ip_xmit_attr_t *ixa)
222 mblk_t *ixamp;
223 ixamblk_t *ixm;
224 nce_t *nce = ixa->ixa_nce;
226 ASSERT(nce != NULL);
227 ixamp = allocb(sizeof (*ixm), BPRI_MED);
228 if (ixamp == NULL)
229 return (NULL);
231 ixamp->b_datap->db_type = M_BREAK;
232 ixamp->b_wptr += sizeof (*ixm);
233 ixm = (ixamblk_t *)ixamp->b_rptr;
235 bzero(ixm, sizeof (*ixm));
236 ixm->ixm_inbound = B_FALSE;
237 ixm->ixm_flags = ixa->ixa_flags;
238 ixm->ixm_stackid = ixa->ixa_ipst->ips_netstack->netstack_stackid;
239 ixm->ixm_ifindex = nce->nce_ill->ill_phyint->phyint_ifindex;
240 ixm->ixm_nceaddr_v6 = nce->nce_addr;
241 ixm->ixm_fragsize = ixa->ixa_fragsize;
242 ixm->ixm_pktlen = ixa->ixa_pktlen;
243 ixm->ixm_ip_hdr_length = ixa->ixa_ip_hdr_length;
244 ixm->ixm_protocol = ixa->ixa_protocol;
245 ixm->ixm_postfragfn = ixa->ixa_postfragfn;
246 ixm->ixm_zoneid = ixa->ixa_zoneid;
247 ixm->ixm_no_loop_zoneid = ixa->ixa_no_loop_zoneid;
248 ixm->ixm_scopeid = ixa->ixa_scopeid;
249 ixm->ixm_ident = ixa->ixa_ident;
250 ixm->ixm_xmit_hint = ixa->ixa_xmit_hint;
252 if (ixa->ixa_cred != NULL) {
253 ixm->ixm_cred = ixa->ixa_cred;
254 crhold(ixa->ixa_cred);
256 ixm->ixm_cpid = ixa->ixa_cpid;
257 ixm->ixm_conn_id = ixa->ixa_conn_id;
259 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
260 if (ixa->ixa_ipsec_ah_sa != NULL) {
261 ixm->ixm_ipsec_ah_sa = ixa->ixa_ipsec_ah_sa;
262 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
264 if (ixa->ixa_ipsec_esp_sa != NULL) {
265 ixm->ixm_ipsec_esp_sa = ixa->ixa_ipsec_esp_sa;
266 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
268 if (ixa->ixa_ipsec_policy != NULL) {
269 ixm->ixm_ipsec_policy = ixa->ixa_ipsec_policy;
270 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
272 if (ixa->ixa_ipsec_action != NULL) {
273 ixm->ixm_ipsec_action = ixa->ixa_ipsec_action;
274 IPACT_REFHOLD(ixa->ixa_ipsec_action);
276 if (ixa->ixa_ipsec_latch != NULL) {
277 ixm->ixm_ipsec_latch = ixa->ixa_ipsec_latch;
278 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
280 ixm->ixm_ipsec_ref[0] = ixa->ixa_ipsec_ref[0];
281 ixm->ixm_ipsec_ref[1] = ixa->ixa_ipsec_ref[1];
282 ixm->ixm_ipsec_src_port = ixa->ixa_ipsec_src_port;
283 ixm->ixm_ipsec_dst_port = ixa->ixa_ipsec_dst_port;
284 ixm->ixm_ipsec_icmp_type = ixa->ixa_ipsec_icmp_type;
285 ixm->ixm_ipsec_icmp_code = ixa->ixa_ipsec_icmp_code;
286 ixm->ixm_ipsec_inaf = ixa->ixa_ipsec_inaf;
287 ixm->ixm_ipsec_insrc[0] = ixa->ixa_ipsec_insrc[0];
288 ixm->ixm_ipsec_insrc[1] = ixa->ixa_ipsec_insrc[1];
289 ixm->ixm_ipsec_insrc[2] = ixa->ixa_ipsec_insrc[2];
290 ixm->ixm_ipsec_insrc[3] = ixa->ixa_ipsec_insrc[3];
291 ixm->ixm_ipsec_indst[0] = ixa->ixa_ipsec_indst[0];
292 ixm->ixm_ipsec_indst[1] = ixa->ixa_ipsec_indst[1];
293 ixm->ixm_ipsec_indst[2] = ixa->ixa_ipsec_indst[2];
294 ixm->ixm_ipsec_indst[3] = ixa->ixa_ipsec_indst[3];
295 ixm->ixm_ipsec_insrcpfx = ixa->ixa_ipsec_insrcpfx;
296 ixm->ixm_ipsec_indstpfx = ixa->ixa_ipsec_indstpfx;
297 ixm->ixm_ipsec_proto = ixa->ixa_ipsec_proto;
299 return (ixamp);
303 * Extract the ip_xmit_attr_t from the mblk, checking that the
304 * ip_stack_t, ill_t, and nce_t still exist. Returns B_FALSE if that is
305 * not the case.
307 * Otherwise ixa is updated.
308 * Caller needs to release references on the ixa by calling ixa_refrele()
309 * which will imediately call ixa_inactive to release the references.
311 boolean_t
312 ip_xmit_attr_from_mblk(mblk_t *ixamp, ip_xmit_attr_t *ixa)
314 ixamblk_t *ixm;
315 netstack_t *ns;
316 ip_stack_t *ipst;
317 ill_t *ill;
318 nce_t *nce;
320 /* We assume the caller hasn't initialized ixa */
321 bzero(ixa, sizeof (*ixa));
323 ASSERT(DB_TYPE(ixamp) == M_BREAK);
324 ASSERT(ixamp->b_cont == NULL);
326 ixm = (ixamblk_t *)ixamp->b_rptr;
327 ASSERT(!ixm->ixm_inbound);
329 /* Verify the netstack is still around */
330 ns = netstack_find_by_stackid(ixm->ixm_stackid);
331 if (ns == NULL) {
332 /* Disappeared on us */
333 (void) ip_xmit_attr_free_mblk(ixamp);
334 return (B_FALSE);
336 ipst = ns->netstack_ip;
338 /* Verify the ill is still around */
339 ill = ill_lookup_on_ifindex(ixm->ixm_ifindex,
340 !(ixm->ixm_flags & IXAF_IS_IPV4), ipst);
342 /* We have the ill, hence the netstack can't go away */
343 netstack_rele(ns);
344 if (ill == NULL) {
345 /* Disappeared on us */
346 (void) ip_xmit_attr_free_mblk(ixamp);
347 return (B_FALSE);
350 * Find the nce. We don't load-spread (only lookup nce's on the ill)
351 * because we want to find the same nce as the one we had when
352 * ip_xmit_attr_to_mblk was called.
354 if (ixm->ixm_flags & IXAF_IS_IPV4) {
355 nce = nce_lookup_v4(ill, &ixm->ixm_nceaddr_v4);
356 } else {
357 nce = nce_lookup_v6(ill, &ixm->ixm_nceaddr_v6);
360 /* We have the nce, hence the ill can't go away */
361 ill_refrele(ill);
362 if (nce == NULL) {
364 * Since this is unusual and we don't know what type of
365 * nce it was, we drop the packet.
367 (void) ip_xmit_attr_free_mblk(ixamp);
368 return (B_FALSE);
371 ixa->ixa_flags = ixm->ixm_flags;
372 ixa->ixa_refcnt = 1;
373 ixa->ixa_ipst = ipst;
374 ixa->ixa_fragsize = ixm->ixm_fragsize;
375 ixa->ixa_pktlen = ixm->ixm_pktlen;
376 ixa->ixa_ip_hdr_length = ixm->ixm_ip_hdr_length;
377 ixa->ixa_protocol = ixm->ixm_protocol;
378 ixa->ixa_nce = nce;
379 ixa->ixa_postfragfn = ixm->ixm_postfragfn;
380 ixa->ixa_zoneid = ixm->ixm_zoneid;
381 ixa->ixa_no_loop_zoneid = ixm->ixm_no_loop_zoneid;
382 ixa->ixa_scopeid = ixm->ixm_scopeid;
383 ixa->ixa_ident = ixm->ixm_ident;
384 ixa->ixa_xmit_hint = ixm->ixm_xmit_hint;
386 if (ixm->ixm_cred != NULL) {
387 ixa->ixa_cred = ixm->ixm_cred;
388 ixa->ixa_free_flags |= IXA_FREE_CRED;
389 ixm->ixm_cred = NULL;
391 ixa->ixa_cpid = ixm->ixm_cpid;
392 ixa->ixa_conn_id = ixm->ixm_conn_id;
394 ixa->ixa_ipsec_ah_sa = ixm->ixm_ipsec_ah_sa;
395 ixa->ixa_ipsec_esp_sa = ixm->ixm_ipsec_esp_sa;
396 ixa->ixa_ipsec_policy = ixm->ixm_ipsec_policy;
397 ixa->ixa_ipsec_action = ixm->ixm_ipsec_action;
398 ixa->ixa_ipsec_latch = ixm->ixm_ipsec_latch;
400 ixa->ixa_ipsec_ref[0] = ixm->ixm_ipsec_ref[0];
401 ixa->ixa_ipsec_ref[1] = ixm->ixm_ipsec_ref[1];
402 ixa->ixa_ipsec_src_port = ixm->ixm_ipsec_src_port;
403 ixa->ixa_ipsec_dst_port = ixm->ixm_ipsec_dst_port;
404 ixa->ixa_ipsec_icmp_type = ixm->ixm_ipsec_icmp_type;
405 ixa->ixa_ipsec_icmp_code = ixm->ixm_ipsec_icmp_code;
406 ixa->ixa_ipsec_inaf = ixm->ixm_ipsec_inaf;
407 ixa->ixa_ipsec_insrc[0] = ixm->ixm_ipsec_insrc[0];
408 ixa->ixa_ipsec_insrc[1] = ixm->ixm_ipsec_insrc[1];
409 ixa->ixa_ipsec_insrc[2] = ixm->ixm_ipsec_insrc[2];
410 ixa->ixa_ipsec_insrc[3] = ixm->ixm_ipsec_insrc[3];
411 ixa->ixa_ipsec_indst[0] = ixm->ixm_ipsec_indst[0];
412 ixa->ixa_ipsec_indst[1] = ixm->ixm_ipsec_indst[1];
413 ixa->ixa_ipsec_indst[2] = ixm->ixm_ipsec_indst[2];
414 ixa->ixa_ipsec_indst[3] = ixm->ixm_ipsec_indst[3];
415 ixa->ixa_ipsec_insrcpfx = ixm->ixm_ipsec_insrcpfx;
416 ixa->ixa_ipsec_indstpfx = ixm->ixm_ipsec_indstpfx;
417 ixa->ixa_ipsec_proto = ixm->ixm_ipsec_proto;
419 freeb(ixamp);
420 return (B_TRUE);
424 * Free the ixm mblk and any references it holds
425 * Returns b_cont.
427 mblk_t *
428 ip_xmit_attr_free_mblk(mblk_t *ixamp)
430 ixamblk_t *ixm;
431 mblk_t *mp;
433 /* Consume mp */
434 ASSERT(DB_TYPE(ixamp) == M_BREAK);
435 mp = ixamp->b_cont;
437 ixm = (ixamblk_t *)ixamp->b_rptr;
438 ASSERT(!ixm->ixm_inbound);
440 if (ixm->ixm_ipsec_ah_sa != NULL) {
441 IPSA_REFRELE(ixm->ixm_ipsec_ah_sa);
442 ixm->ixm_ipsec_ah_sa = NULL;
444 if (ixm->ixm_ipsec_esp_sa != NULL) {
445 IPSA_REFRELE(ixm->ixm_ipsec_esp_sa);
446 ixm->ixm_ipsec_esp_sa = NULL;
448 if (ixm->ixm_ipsec_policy != NULL) {
449 IPPOL_REFRELE(ixm->ixm_ipsec_policy);
450 ixm->ixm_ipsec_policy = NULL;
452 if (ixm->ixm_ipsec_action != NULL) {
453 IPACT_REFRELE(ixm->ixm_ipsec_action);
454 ixm->ixm_ipsec_action = NULL;
456 if (ixm->ixm_ipsec_latch) {
457 IPLATCH_REFRELE(ixm->ixm_ipsec_latch);
458 ixm->ixm_ipsec_latch = NULL;
461 if (ixm->ixm_cred != NULL) {
462 crfree(ixm->ixm_cred);
463 ixm->ixm_cred = NULL;
465 freeb(ixamp);
466 return (mp);
470 * Take the information in ip_recv_attr_t and stick it in an mblk
471 * that can later be passed to ip_recv_attr_from_mblk to recreate the
472 * ip_recv_attr_t.
474 * Returns NULL on memory allocation failure.
476 mblk_t *
477 ip_recv_attr_to_mblk(ip_recv_attr_t *ira)
479 mblk_t *iramp;
480 iramblk_t *irm;
481 ill_t *ill = ira->ira_ill;
483 ASSERT(ira->ira_ill != NULL || ira->ira_ruifindex != 0);
485 iramp = allocb(sizeof (*irm), BPRI_MED);
486 if (iramp == NULL)
487 return (NULL);
489 iramp->b_datap->db_type = M_BREAK;
490 iramp->b_wptr += sizeof (*irm);
491 irm = (iramblk_t *)iramp->b_rptr;
493 bzero(irm, sizeof (*irm));
494 irm->irm_inbound = B_TRUE;
495 irm->irm_flags = ira->ira_flags;
496 if (ill != NULL) {
497 /* Internal to IP - preserve ip_stack_t, ill and rill */
498 irm->irm_stackid =
499 ill->ill_ipst->ips_netstack->netstack_stackid;
500 irm->irm_ifindex = ira->ira_ill->ill_phyint->phyint_ifindex;
501 ASSERT(ira->ira_rill->ill_phyint->phyint_ifindex ==
502 ira->ira_rifindex);
503 } else {
504 /* Let ip_recv_attr_from_stackid know there isn't one */
505 irm->irm_stackid = -1;
507 irm->irm_rifindex = ira->ira_rifindex;
508 irm->irm_ruifindex = ira->ira_ruifindex;
509 irm->irm_pktlen = ira->ira_pktlen;
510 irm->irm_ip_hdr_length = ira->ira_ip_hdr_length;
511 irm->irm_protocol = ira->ira_protocol;
513 irm->irm_sqp = ira->ira_sqp;
514 irm->irm_ring = ira->ira_ring;
516 irm->irm_zoneid = ira->ira_zoneid;
517 irm->irm_mroute_tunnel = ira->ira_mroute_tunnel;
518 irm->irm_no_loop_zoneid = ira->ira_no_loop_zoneid;
519 irm->irm_esp_udp_ports = ira->ira_esp_udp_ports;
521 if (ira->ira_cred != NULL) {
522 irm->irm_cred = ira->ira_cred;
523 crhold(ira->ira_cred);
525 irm->irm_cpid = ira->ira_cpid;
527 if (ira->ira_flags & IRAF_L2SRC_SET)
528 bcopy(ira->ira_l2src, irm->irm_l2src, IRA_L2SRC_SIZE);
530 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
531 if (ira->ira_ipsec_ah_sa != NULL) {
532 irm->irm_ipsec_ah_sa = ira->ira_ipsec_ah_sa;
533 IPSA_REFHOLD(ira->ira_ipsec_ah_sa);
535 if (ira->ira_ipsec_esp_sa != NULL) {
536 irm->irm_ipsec_esp_sa = ira->ira_ipsec_esp_sa;
537 IPSA_REFHOLD(ira->ira_ipsec_esp_sa);
539 if (ira->ira_ipsec_action != NULL) {
540 irm->irm_ipsec_action = ira->ira_ipsec_action;
541 IPACT_REFHOLD(ira->ira_ipsec_action);
544 return (iramp);
548 * Extract the ip_recv_attr_t from the mblk. If we are used inside IP
549 * then irm_stackid is not -1, in which case we check that the
550 * ip_stack_t and ill_t still exist. Returns B_FALSE if that is
551 * not the case.
552 * If irm_stackid is zero then we are used by an ULP (e.g., squeue_enter)
553 * and we just proceed with ira_ill and ira_rill as NULL.
555 * The caller needs to release any references on the pointers inside the ire
556 * by calling ira_cleanup.
558 boolean_t
559 ip_recv_attr_from_mblk(mblk_t *iramp, ip_recv_attr_t *ira)
561 iramblk_t *irm;
562 netstack_t *ns;
563 ip_stack_t *ipst = NULL;
564 ill_t *ill = NULL, *rill = NULL;
566 /* We assume the caller hasn't initialized ira */
567 bzero(ira, sizeof (*ira));
569 ASSERT(DB_TYPE(iramp) == M_BREAK);
570 ASSERT(iramp->b_cont == NULL);
572 irm = (iramblk_t *)iramp->b_rptr;
573 ASSERT(irm->irm_inbound);
575 if (irm->irm_stackid != -1) {
576 /* Verify the netstack is still around */
577 ns = netstack_find_by_stackid(irm->irm_stackid);
578 if (ns == NULL) {
579 /* Disappeared on us */
580 (void) ip_recv_attr_free_mblk(iramp);
581 return (B_FALSE);
583 ipst = ns->netstack_ip;
585 /* Verify the ill is still around */
586 ill = ill_lookup_on_ifindex(irm->irm_ifindex,
587 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
589 if (irm->irm_ifindex == irm->irm_rifindex) {
590 rill = ill;
591 } else {
592 rill = ill_lookup_on_ifindex(irm->irm_rifindex,
593 !(irm->irm_flags & IRAF_IS_IPV4), ipst);
596 /* We have the ill, hence the netstack can't go away */
597 netstack_rele(ns);
598 if (ill == NULL || rill == NULL) {
599 /* Disappeared on us */
600 if (ill != NULL)
601 ill_refrele(ill);
602 if (rill != NULL && rill != ill)
603 ill_refrele(rill);
604 (void) ip_recv_attr_free_mblk(iramp);
605 return (B_FALSE);
609 ira->ira_flags = irm->irm_flags;
610 /* Caller must ill_refele(ira_ill) by using ira_cleanup() */
611 ira->ira_ill = ill;
612 ira->ira_rill = rill;
614 ira->ira_rifindex = irm->irm_rifindex;
615 ira->ira_ruifindex = irm->irm_ruifindex;
616 ira->ira_pktlen = irm->irm_pktlen;
617 ira->ira_ip_hdr_length = irm->irm_ip_hdr_length;
618 ira->ira_protocol = irm->irm_protocol;
620 ira->ira_sqp = irm->irm_sqp;
621 /* The rest of IP assumes that the rings never go away. */
622 ira->ira_ring = irm->irm_ring;
624 ira->ira_zoneid = irm->irm_zoneid;
625 ira->ira_mroute_tunnel = irm->irm_mroute_tunnel;
626 ira->ira_no_loop_zoneid = irm->irm_no_loop_zoneid;
627 ira->ira_esp_udp_ports = irm->irm_esp_udp_ports;
629 if (irm->irm_cred != NULL) {
630 ira->ira_cred = irm->irm_cred;
631 ira->ira_free_flags |= IRA_FREE_CRED;
632 irm->irm_cred = NULL;
634 ira->ira_cpid = irm->irm_cpid;
636 if (ira->ira_flags & IRAF_L2SRC_SET)
637 bcopy(irm->irm_l2src, ira->ira_l2src, IRA_L2SRC_SIZE);
639 ira->ira_ipsec_ah_sa = irm->irm_ipsec_ah_sa;
640 ira->ira_ipsec_esp_sa = irm->irm_ipsec_esp_sa;
641 ira->ira_ipsec_action = irm->irm_ipsec_action;
643 freeb(iramp);
644 return (B_TRUE);
648 * Free the irm mblk and any references it holds
649 * Returns b_cont.
651 mblk_t *
652 ip_recv_attr_free_mblk(mblk_t *iramp)
654 iramblk_t *irm;
655 mblk_t *mp;
657 /* Consume mp */
658 ASSERT(DB_TYPE(iramp) == M_BREAK);
659 mp = iramp->b_cont;
661 irm = (iramblk_t *)iramp->b_rptr;
662 ASSERT(irm->irm_inbound);
664 if (irm->irm_ipsec_ah_sa != NULL) {
665 IPSA_REFRELE(irm->irm_ipsec_ah_sa);
666 irm->irm_ipsec_ah_sa = NULL;
668 if (irm->irm_ipsec_esp_sa != NULL) {
669 IPSA_REFRELE(irm->irm_ipsec_esp_sa);
670 irm->irm_ipsec_esp_sa = NULL;
672 if (irm->irm_ipsec_action != NULL) {
673 IPACT_REFRELE(irm->irm_ipsec_action);
674 irm->irm_ipsec_action = NULL;
676 if (irm->irm_cred != NULL) {
677 crfree(irm->irm_cred);
678 irm->irm_cred = NULL;
681 freeb(iramp);
682 return (mp);
686 * Returns true if the mblk contains an ip_recv_attr_t
687 * For now we just check db_type.
689 boolean_t
690 ip_recv_attr_is_mblk(mblk_t *mp)
693 * Need to handle the various forms of tcp_timermp which are tagged
694 * with b_wptr and might have a NULL b_datap.
696 if (mp->b_wptr == NULL || mp->b_wptr == (uchar_t *)-1)
697 return (B_FALSE);
699 #ifdef DEBUG
700 iramblk_t *irm;
702 if (DB_TYPE(mp) != M_BREAK)
703 return (B_FALSE);
705 irm = (iramblk_t *)mp->b_rptr;
706 ASSERT(irm->irm_inbound);
707 return (B_TRUE);
708 #else
709 return (DB_TYPE(mp) == M_BREAK);
710 #endif
713 static ip_xmit_attr_t *
714 conn_get_ixa_impl(conn_t *connp, boolean_t replace, int kmflag)
716 ip_xmit_attr_t *ixa;
717 ip_xmit_attr_t *oldixa;
719 mutex_enter(&connp->conn_lock);
720 ixa = connp->conn_ixa;
722 /* At least one references for the conn_t */
723 ASSERT(ixa->ixa_refcnt >= 1);
724 if (atomic_inc_32_nv(&ixa->ixa_refcnt) == 2) {
725 /* No other thread using conn_ixa */
726 mutex_exit(&connp->conn_lock);
727 return (ixa);
729 ixa = kmem_alloc(sizeof (*ixa), kmflag);
730 if (ixa == NULL) {
731 mutex_exit(&connp->conn_lock);
732 ixa_refrele(connp->conn_ixa);
733 return (NULL);
735 ixa_safe_copy(connp->conn_ixa, ixa);
737 /* Make sure we drop conn_lock before any refrele */
738 if (replace) {
739 ixa->ixa_refcnt++; /* No atomic needed - not visible */
740 oldixa = connp->conn_ixa;
741 connp->conn_ixa = ixa;
742 mutex_exit(&connp->conn_lock);
743 IXA_REFRELE(oldixa); /* Undo refcnt from conn_t */
744 } else {
745 oldixa = connp->conn_ixa;
746 mutex_exit(&connp->conn_lock);
748 IXA_REFRELE(oldixa); /* Undo above atomic_add_32_nv */
750 return (ixa);
754 * Return an ip_xmit_attr_t to use with a conn_t that ensures that only
755 * the caller can access the ip_xmit_attr_t.
757 * If nobody else is using conn_ixa we return it.
758 * Otherwise we make a "safe" copy of conn_ixa
759 * and return it. The "safe" copy has the pointers set to NULL
760 * (since the pointers might be changed by another thread using
761 * conn_ixa). The caller needs to check for NULL pointers to see
762 * if ip_set_destination needs to be called to re-establish the pointers.
764 * If 'replace' is set then we replace conn_ixa with the new ip_xmit_attr_t.
765 * That is used when we connect() the ULP.
767 ip_xmit_attr_t *
768 conn_get_ixa(conn_t *connp, boolean_t replace)
770 return (conn_get_ixa_impl(connp, replace, KM_NOSLEEP));
774 * Used only when the option is to have the kernel hang due to not
775 * cleaning up ixa references on ills etc.
777 ip_xmit_attr_t *
778 conn_get_ixa_tryhard(conn_t *connp, boolean_t replace)
780 return (conn_get_ixa_impl(connp, replace, KM_SLEEP));
784 * Replace conn_ixa with the ixa argument.
786 * The caller must hold conn_lock.
788 * We return the old ixa; the caller must ixa_refrele that after conn_lock
789 * has been dropped.
791 ip_xmit_attr_t *
792 conn_replace_ixa(conn_t *connp, ip_xmit_attr_t *ixa)
794 ip_xmit_attr_t *oldixa;
796 ASSERT(MUTEX_HELD(&connp->conn_lock));
798 oldixa = connp->conn_ixa;
799 IXA_REFHOLD(ixa);
800 ixa->ixa_conn_id = oldixa->ixa_conn_id;
801 connp->conn_ixa = ixa;
802 return (oldixa);
806 * Return a ip_xmit_attr_t to use with a conn_t that is based on but
807 * separate from conn_ixa.
809 * This "safe" copy has the pointers set to NULL
810 * (since the pointers might be changed by another thread using
811 * conn_ixa). The caller needs to check for NULL pointers to see
812 * if ip_set_destination needs to be called to re-establish the pointers.
814 ip_xmit_attr_t *
815 conn_get_ixa_exclusive(conn_t *connp)
817 ip_xmit_attr_t *ixa;
819 mutex_enter(&connp->conn_lock);
820 ixa = connp->conn_ixa;
822 /* At least one references for the conn_t */
823 ASSERT(ixa->ixa_refcnt >= 1);
825 /* Make sure conn_ixa doesn't disappear while we copy it */
826 atomic_inc_32(&ixa->ixa_refcnt);
828 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
829 if (ixa == NULL) {
830 mutex_exit(&connp->conn_lock);
831 ixa_refrele(connp->conn_ixa);
832 return (NULL);
834 ixa_safe_copy(connp->conn_ixa, ixa);
835 mutex_exit(&connp->conn_lock);
836 IXA_REFRELE(connp->conn_ixa);
837 return (ixa);
840 void
841 ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
843 bcopy(src, ixa, sizeof (*ixa));
844 ixa->ixa_refcnt = 1;
846 * Clear any pointers that have references and might be changed
847 * by ip_set_destination or the ULP
849 ixa->ixa_ire = NULL;
850 ixa->ixa_nce = NULL;
851 ixa->ixa_dce = NULL;
852 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
853 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
854 #ifdef DEBUG
855 ixa->ixa_curthread = NULL;
856 #endif
857 /* Clear all the IPsec pointers and the flag as well. */
858 ixa->ixa_flags &= ~IXAF_IPSEC_SECURE;
860 ixa->ixa_ipsec_latch = NULL;
861 ixa->ixa_ipsec_ah_sa = NULL;
862 ixa->ixa_ipsec_esp_sa = NULL;
863 ixa->ixa_ipsec_policy = NULL;
864 ixa->ixa_ipsec_action = NULL;
867 * We leave ixa_cred unchanged, but if it has a refhold we need
868 * to get an extra refhold.
870 if (ixa->ixa_free_flags & IXA_FREE_CRED)
871 crhold(ixa->ixa_cred);
874 * There is no cleanup in progress on this new copy.
876 ixa->ixa_tcpcleanup = IXATC_IDLE;
880 * Duplicate an ip_xmit_attr_t.
881 * Assumes that the caller controls the ixa, hence we do not need to use
882 * a safe copy. We just have to increase the refcnt on any pointers.
884 ip_xmit_attr_t *
885 ip_xmit_attr_duplicate(ip_xmit_attr_t *src_ixa)
887 ip_xmit_attr_t *ixa;
889 ixa = kmem_alloc(sizeof (*ixa), KM_NOSLEEP);
890 if (ixa == NULL)
891 return (NULL);
892 bcopy(src_ixa, ixa, sizeof (*ixa));
893 ixa->ixa_refcnt = 1;
895 if (ixa->ixa_ire != NULL)
896 ire_refhold_notr(ixa->ixa_ire);
897 if (ixa->ixa_nce != NULL)
898 nce_refhold(ixa->ixa_nce);
899 if (ixa->ixa_dce != NULL)
900 dce_refhold_notr(ixa->ixa_dce);
902 #ifdef DEBUG
903 ixa->ixa_curthread = NULL;
904 #endif
906 if (ixa->ixa_ipsec_latch != NULL)
907 IPLATCH_REFHOLD(ixa->ixa_ipsec_latch);
908 if (ixa->ixa_ipsec_ah_sa != NULL)
909 IPSA_REFHOLD(ixa->ixa_ipsec_ah_sa);
910 if (ixa->ixa_ipsec_esp_sa != NULL)
911 IPSA_REFHOLD(ixa->ixa_ipsec_esp_sa);
912 if (ixa->ixa_ipsec_policy != NULL)
913 IPPOL_REFHOLD(ixa->ixa_ipsec_policy);
914 if (ixa->ixa_ipsec_action != NULL)
915 IPACT_REFHOLD(ixa->ixa_ipsec_action);
917 if (ixa->ixa_cred != NULL) {
918 crhold(ixa->ixa_cred);
919 ixa->ixa_free_flags |= IXA_FREE_CRED;
921 return (ixa);
924 void
925 ixa_refrele(ip_xmit_attr_t *ixa)
927 IXA_REFRELE(ixa);
930 void
931 ixa_inactive(ip_xmit_attr_t *ixa)
933 ASSERT(ixa->ixa_refcnt == 0);
935 ixa_cleanup(ixa);
936 kmem_free(ixa, sizeof (*ixa));
940 * Release any references contained in the ixa.
941 * Also clear any fields that are not controlled by ixa_flags.
943 void
944 ixa_cleanup(ip_xmit_attr_t *ixa)
946 if (ixa->ixa_ire != NULL) {
947 ire_refrele_notr(ixa->ixa_ire);
948 ixa->ixa_ire = NULL;
950 if (ixa->ixa_dce != NULL) {
951 dce_refrele_notr(ixa->ixa_dce);
952 ixa->ixa_dce = NULL;
954 if (ixa->ixa_nce != NULL) {
955 nce_refrele(ixa->ixa_nce);
956 ixa->ixa_nce = NULL;
958 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
959 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
960 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) {
961 ipsec_out_release_refs(ixa);
963 if (ixa->ixa_free_flags & IXA_FREE_CRED) {
964 ASSERT(ixa->ixa_cred != NULL);
965 crfree(ixa->ixa_cred);
966 ixa->ixa_free_flags &= ~IXA_FREE_CRED;
968 ixa->ixa_cred = NULL;
969 ixa->ixa_src_preferences = 0;
970 ixa->ixa_ifindex = 0;
971 ixa->ixa_multicast_ifindex = 0;
972 ixa->ixa_multicast_ifaddr = INADDR_ANY;
976 * Release any references contained in the ira.
977 * Callers which use ip_recv_attr_from_mblk() would pass B_TRUE as the second
978 * argument.
980 void
981 ira_cleanup(ip_recv_attr_t *ira, boolean_t refrele_ill)
983 if (ira->ira_ill != NULL) {
984 if (ira->ira_rill != ira->ira_ill) {
985 /* Caused by async processing */
986 ill_refrele(ira->ira_rill);
988 if (refrele_ill)
989 ill_refrele(ira->ira_ill);
991 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
992 ipsec_in_release_refs(ira);
994 if (ira->ira_free_flags & IRA_FREE_CRED) {
995 ASSERT(ira->ira_cred != NULL);
996 crfree(ira->ira_cred);
997 ira->ira_free_flags &= ~IRA_FREE_CRED;
999 ira->ira_cred = NULL;
1003 * Function to help release any IRE, NCE, or DCEs that
1004 * have been deleted and are marked as condemned.
1005 * The caller is responsible for any serialization which is different
1006 * for TCP, SCTP, and others.
1008 static void
1009 ixa_cleanup_stale(ip_xmit_attr_t *ixa)
1011 ire_t *ire;
1012 nce_t *nce;
1013 dce_t *dce;
1015 ire = ixa->ixa_ire;
1016 nce = ixa->ixa_nce;
1017 dce = ixa->ixa_dce;
1019 if (ire != NULL && IRE_IS_CONDEMNED(ire)) {
1020 ire_refrele_notr(ire);
1021 ire = ire_blackhole(ixa->ixa_ipst,
1022 !(ixa->ixa_flags & IXAF_IS_IPV4));
1023 ASSERT(ire != NULL);
1024 #ifdef DEBUG
1025 ire_refhold_notr(ire);
1026 ire_refrele(ire);
1027 #endif
1028 ixa->ixa_ire = ire;
1029 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1031 if (nce != NULL && nce->nce_is_condemned) {
1032 /* Can make it NULL as long as we set IRE_GENERATION_VERIFY */
1033 nce_refrele(nce);
1034 ixa->ixa_nce = NULL;
1035 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
1037 if (dce != NULL && DCE_IS_CONDEMNED(dce)) {
1038 dce_refrele_notr(dce);
1039 dce = dce_get_default(ixa->ixa_ipst);
1040 ASSERT(dce != NULL);
1041 #ifdef DEBUG
1042 dce_refhold_notr(dce);
1043 dce_refrele(dce);
1044 #endif
1045 ixa->ixa_dce = dce;
1046 ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
1050 static mblk_t *
1051 tcp_ixa_cleanup_getmblk(conn_t *connp)
1053 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1054 int need_retry;
1055 mblk_t *mp;
1057 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1060 * It's possible that someone else came in and started cleaning up
1061 * another connection between the time we verified this one is not being
1062 * cleaned up and the time we actually get the shared mblk. If that's
1063 * the case, we've dropped the lock, and some other thread may have
1064 * cleaned up this connection again, and is still waiting for
1065 * notification of that cleanup's completion. Therefore we need to
1066 * recheck.
1068 do {
1069 need_retry = 0;
1070 while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) {
1071 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1072 &tcps->tcps_ixa_cleanup_lock);
1075 while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
1077 * Multiple concurrent cleanups; need to have the last
1078 * one run since it could be an unplumb.
1080 need_retry = 1;
1081 cv_wait(&tcps->tcps_ixa_cleanup_ready_cv,
1082 &tcps->tcps_ixa_cleanup_lock);
1084 } while (need_retry);
1087 * We now have the lock and the mblk; now make sure that no one else can
1088 * try to clean up this connection or enqueue it for cleanup, clear the
1089 * mblk pointer for this stack, drop the lock, and return the mblk.
1091 ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock));
1092 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE);
1093 ASSERT(tcps->tcps_ixa_cleanup_mp == mp);
1094 ASSERT(mp != NULL);
1096 connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS;
1097 tcps->tcps_ixa_cleanup_mp = NULL;
1098 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1100 return (mp);
1104 * Used to run ixa_cleanup_stale inside the tcp squeue.
1105 * When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
1106 * and waking up the caller.
1108 /* ARGSUSED2 */
1109 static void
1110 tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,
1111 ip_recv_attr_t *dummy)
1113 conn_t *connp = (conn_t *)arg;
1114 tcp_stack_t *tcps;
1116 tcps = connp->conn_netstack->netstack_tcp;
1118 ixa_cleanup_stale(connp->conn_ixa);
1120 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1121 ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
1122 connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE;
1123 tcps->tcps_ixa_cleanup_mp = mp;
1124 cv_signal(&tcps->tcps_ixa_cleanup_ready_cv);
1126 * It is possible for any number of threads to be waiting for cleanup of
1127 * different connections. Absent a per-connection (or per-IXA) CV, we
1128 * need to wake them all up even though only one can be waiting on this
1129 * particular cleanup.
1131 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1132 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1135 static void
1136 tcp_ixa_cleanup_wait_and_finish(conn_t *connp)
1138 tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
1140 mutex_enter(&tcps->tcps_ixa_cleanup_lock);
1142 ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE);
1144 while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) {
1145 cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
1146 &tcps->tcps_ixa_cleanup_lock);
1149 ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE);
1150 connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE;
1151 cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
1153 mutex_exit(&tcps->tcps_ixa_cleanup_lock);
1157 * ipcl_walk() function to help release any IRE, NCE, or DCEs that
1158 * have been deleted and are marked as condemned.
1159 * Note that we can't cleanup the pointers since there can be threads
1160 * in conn_ip_output() sending while we are called.
1162 void
1163 conn_ixa_cleanup(conn_t *connp, void *arg)
1165 boolean_t tryhard = (boolean_t)arg;
1167 if (IPCL_IS_TCP(connp)) {
1168 mblk_t *mp;
1170 mp = tcp_ixa_cleanup_getmblk(connp);
1172 if (connp->conn_sqp->sq_run == curthread) {
1173 /* Already on squeue */
1174 tcp_ixa_cleanup(connp, mp, NULL, NULL);
1175 } else {
1176 CONN_INC_REF(connp);
1177 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
1178 connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);
1180 tcp_ixa_cleanup_wait_and_finish(connp);
1181 } else if (IPCL_IS_SCTP(connp)) {
1182 sctp_t *sctp;
1183 sctp_faddr_t *fp;
1185 sctp = CONN2SCTP(connp);
1186 RUN_SCTP(sctp);
1187 ixa_cleanup_stale(connp->conn_ixa);
1188 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next)
1189 ixa_cleanup_stale(fp->sf_ixa);
1190 WAKE_SCTP(sctp);
1191 } else {
1192 ip_xmit_attr_t *ixa;
1195 * If there is a different thread using conn_ixa then we get a
1196 * new copy and cut the old one loose from conn_ixa. Otherwise
1197 * we use conn_ixa and prevent any other thread from
1198 * using/changing it. Anybody using conn_ixa (e.g., a thread in
1199 * conn_ip_output) will do an ixa_refrele which will remove any
1200 * references on the ire etc.
1202 * Once we are done other threads can use conn_ixa since the
1203 * refcnt will be back at one.
1205 * We are called either because an ill is going away, or
1206 * due to memory reclaim. In the former case we wait for
1207 * memory since we must remove the refcnts on the ill.
1209 if (tryhard) {
1210 ixa = conn_get_ixa_tryhard(connp, B_TRUE);
1211 ASSERT(ixa != NULL);
1212 } else {
1213 ixa = conn_get_ixa(connp, B_TRUE);
1214 if (ixa == NULL) {
1216 * Somebody else was using it and kmem_alloc
1217 * failed! Next memory reclaim will try to
1218 * clean up.
1220 DTRACE_PROBE1(conn__ixa__cleanup__bail,
1221 conn_t *, connp);
1222 return;
1225 ixa_cleanup_stale(ixa);
1226 ixa_refrele(ixa);
1231 * ixa needs to be an exclusive copy so that no one changes the cookie
1232 * or the ixa_nce.
1234 boolean_t
1235 ixa_check_drain_insert(conn_t *connp, ip_xmit_attr_t *ixa)
1237 uintptr_t cookie = ixa->ixa_cookie;
1238 ill_dld_direct_t *idd;
1239 idl_tx_list_t *idl_txl;
1240 ill_t *ill = ixa->ixa_nce->nce_ill;
1241 boolean_t inserted = B_FALSE;
1243 idd = &(ill)->ill_dld_capab->idc_direct;
1244 idl_txl = &ixa->ixa_ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];
1245 mutex_enter(&idl_txl->txl_lock);
1248 * If `cookie' is zero, ip_xmit() -> canputnext() failed -- i.e., flow
1249 * control is asserted on an ill that does not support direct calls.
1250 * Jump to insert.
1252 if (cookie == 0)
1253 goto tryinsert;
1255 ASSERT(ILL_DIRECT_CAPABLE(ill));
1257 if (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, cookie) == 0) {
1258 DTRACE_PROBE1(ill__tx__not__blocked, uintptr_t, cookie);
1259 } else if (idl_txl->txl_cookie != (uintptr_t)NULL &&
1260 idl_txl->txl_cookie != ixa->ixa_cookie) {
1261 DTRACE_PROBE2(ill__tx__cookie__collision, uintptr_t, cookie,
1262 uintptr_t, idl_txl->txl_cookie);
1263 /* TODO: bump kstat for cookie collision */
1264 } else {
1266 * Check/set conn_blocked under conn_lock. Note that txl_lock
1267 * will not suffice since two separate UDP threads may be
1268 * racing to send to different destinations that are
1269 * associated with different cookies and thus may not be
1270 * holding the same txl_lock. Further, since a given conn_t
1271 * can only be on a single drain list, the conn_t will be
1272 * enqueued on whichever thread wins this race.
1274 tryinsert: mutex_enter(&connp->conn_lock);
1275 if (connp->conn_blocked) {
1276 DTRACE_PROBE1(ill__tx__conn__already__blocked,
1277 conn_t *, connp);
1278 mutex_exit(&connp->conn_lock);
1279 } else {
1280 connp->conn_blocked = B_TRUE;
1281 mutex_exit(&connp->conn_lock);
1282 idl_txl->txl_cookie = cookie;
1283 conn_drain_insert(connp, idl_txl);
1284 if (!IPCL_IS_NONSTR(connp))
1285 noenable(connp->conn_wq);
1286 inserted = B_TRUE;
1289 mutex_exit(&idl_txl->txl_lock);
1290 return (inserted);