3861 tcp buffer size tunables for iscsi connections
[unleashed.git] / usr / src / uts / common / io / idm / idm_so.c
blobc1c5a27a18a852eac2e7fbbcbfa85f5351ddb47a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2013 by Delphix. All rights reserved.
29 #include <sys/conf.h>
30 #include <sys/stat.h>
31 #include <sys/file.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/modctl.h>
35 #include <sys/priv.h>
36 #include <sys/cpuvar.h>
37 #include <sys/socket.h>
38 #include <sys/strsubr.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sdt.h>
41 #include <netinet/tcp.h>
42 #include <inet/tcp.h>
43 #include <sys/socketvar.h>
44 #include <sys/pathname.h>
45 #include <sys/fs/snode.h>
46 #include <sys/fs/dv_node.h>
47 #include <sys/vnode.h>
48 #include <netinet/in.h>
49 #include <net/if.h>
50 #include <sys/sockio.h>
51 #include <sys/ksocket.h>
52 #include <sys/filio.h> /* FIONBIO */
53 #include <sys/iscsi_protocol.h>
54 #include <sys/idm/idm.h>
55 #include <sys/idm/idm_so.h>
56 #include <sys/idm/idm_text.h>
58 #define IN_PROGRESS_DELAY 1
61 * in6addr_any is currently all zeroes, but use the macro in case this
62 * ever changes.
64 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
66 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
67 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
68 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
70 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
71 static void idm_so_conn_destroy_common(idm_conn_t *ic);
72 static void idm_so_conn_connect_common(idm_conn_t *ic);
74 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
75 boolean_t boot_conn);
76 static void idm_set_postconnect_options(ksocket_t so);
77 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
79 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
80 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
81 idm_buf_t *idb, uint32_t offset, uint32_t length);
82 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
83 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
84 idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
86 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
87 uint32_t ro, uint32_t dlength);
89 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
90 nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
92 static void idm_so_socket_set_nonblock(struct sonode *node);
93 static void idm_so_socket_set_block(struct sonode *node);
96 * Transport ops prototypes
98 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
99 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
100 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
101 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
102 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
103 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
104 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
105 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
106 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
107 static void idm_so_notice_key_values(idm_conn_t *it,
108 nvlist_t *negotiated_nvl);
109 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
110 nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
111 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
112 idm_transport_caps_t *caps);
113 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
114 static void idm_so_buf_free(idm_buf_t *idb);
115 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
116 static void idm_so_buf_teardown(idm_buf_t *idb);
117 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
118 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
119 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
120 static void idm_so_tgt_svc_offline(idm_svc_t *is);
121 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
122 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
123 static void idm_so_conn_disconnect(idm_conn_t *ic);
124 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
125 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
126 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
129 * IDM Native Sockets transport operations
131 static
132 idm_transport_ops_t idm_so_transport_ops = {
133 idm_so_tx, /* it_tx_pdu */
134 idm_so_buf_tx_to_ini, /* it_buf_tx_to_ini */
135 idm_so_buf_rx_from_ini, /* it_buf_rx_from_ini */
136 idm_so_rx_datain, /* it_rx_datain */
137 idm_so_rx_rtt, /* it_rx_rtt */
138 idm_so_rx_dataout, /* it_rx_dataout */
139 NULL, /* it_alloc_conn_rsrc */
140 NULL, /* it_free_conn_rsrc */
141 NULL, /* it_tgt_enable_datamover */
142 NULL, /* it_ini_enable_datamover */
143 NULL, /* it_conn_terminate */
144 idm_so_free_task_rsrc, /* it_free_task_rsrc */
145 idm_so_negotiate_key_values, /* it_negotiate_key_values */
146 idm_so_notice_key_values, /* it_notice_key_values */
147 idm_so_conn_is_capable, /* it_conn_is_capable */
148 idm_so_buf_alloc, /* it_buf_alloc */
149 idm_so_buf_free, /* it_buf_free */
150 idm_so_buf_setup, /* it_buf_setup */
151 idm_so_buf_teardown, /* it_buf_teardown */
152 idm_so_tgt_svc_create, /* it_tgt_svc_create */
153 idm_so_tgt_svc_destroy, /* it_tgt_svc_destroy */
154 idm_so_tgt_svc_online, /* it_tgt_svc_online */
155 idm_so_tgt_svc_offline, /* it_tgt_svc_offline */
156 idm_so_tgt_conn_destroy, /* it_tgt_conn_destroy */
157 idm_so_tgt_conn_connect, /* it_tgt_conn_connect */
158 idm_so_conn_disconnect, /* it_tgt_conn_disconnect */
159 idm_so_ini_conn_create, /* it_ini_conn_create */
160 idm_so_ini_conn_destroy, /* it_ini_conn_destroy */
161 idm_so_ini_conn_connect, /* it_ini_conn_connect */
162 idm_so_conn_disconnect, /* it_ini_conn_disconnect */
163 idm_so_declare_key_values /* it_declare_key_values */
166 kmutex_t idm_so_timed_socket_mutex;
168 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
169 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
172 * idm_so_init()
173 * Sockets transport initialization
175 void
176 idm_so_init(idm_transport_t *it)
178 /* Cache for IDM Data and R2T Transmit PDU's */
179 idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
180 sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
181 &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
183 /* Cache for IDM Receive PDU's */
184 idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
185 sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
186 &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
188 /* 128k buffer cache */
189 idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
190 IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
192 /* Set the sockets transport ops */
193 it->it_ops = &idm_so_transport_ops;
195 mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
200 * idm_so_fini()
201 * Sockets transport teardown
203 void
204 idm_so_fini(void)
206 kmem_cache_destroy(idm.idm_so_128k_buf_cache);
207 kmem_cache_destroy(idm.idm_sotx_pdu_cache);
208 kmem_cache_destroy(idm.idm_sorx_pdu_cache);
209 mutex_destroy(&idm_so_timed_socket_mutex);
212 ksocket_t
213 idm_socreate(int domain, int type, int protocol)
215 ksocket_t ks;
217 if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
218 CRED())) {
219 return (ks);
220 } else {
221 return (NULL);
226 * idm_soshutdown will disconnect the socket and prevent subsequent PDU
227 * reception and transmission. The sonode still exists but its state
228 * gets modified to indicate it is no longer connected. Calls to
229 * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
230 * regain control of a thread stuck in idm_sorecv.
232 void
233 idm_soshutdown(ksocket_t so)
235 (void) ksocket_shutdown(so, SHUT_RDWR, CRED());
239 * idm_sodestroy releases all resources associated with a socket previously
240 * created with idm_socreate. The socket must be shutdown using
241 * idm_soshutdown before the socket is destroyed with idm_sodestroy,
242 * otherwise undefined behavior will result.
244 void
245 idm_sodestroy(ksocket_t ks)
247 (void) ksocket_close(ks, CRED());
251 * Function to compare two addresses in sockaddr_storage format
255 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
256 const struct sockaddr_storage *cmp_ss2,
257 boolean_t v4_mapped_as_v4,
258 boolean_t compare_ports)
260 struct sockaddr_storage mapped_v4_ss1, mapped_v4_ss2;
261 const struct sockaddr_storage *ss1, *ss2;
262 struct in_addr *in1, *in2;
263 struct in6_addr *in61, *in62;
264 int i;
267 * Normalize V4-mapped IPv6 addresses into V4 format if
268 * v4_mapped_as_v4 is B_TRUE.
270 ss1 = cmp_ss1;
271 ss2 = cmp_ss2;
272 if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
273 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
274 if (IN6_IS_ADDR_V4MAPPED(in61)) {
275 bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
276 mapped_v4_ss1.ss_family = AF_INET;
277 ((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
278 ((struct sockaddr_in *)ss1)->sin_port;
279 IN6_V4MAPPED_TO_INADDR(in61,
280 &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
281 ss1 = &mapped_v4_ss1;
284 ss2 = cmp_ss2;
285 if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
286 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
287 if (IN6_IS_ADDR_V4MAPPED(in62)) {
288 bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
289 mapped_v4_ss2.ss_family = AF_INET;
290 ((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
291 ((struct sockaddr_in *)ss2)->sin_port;
292 IN6_V4MAPPED_TO_INADDR(in62,
293 &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
294 ss2 = &mapped_v4_ss2;
299 * Compare ports, then address family, then ip address
301 if (compare_ports &&
302 (((struct sockaddr_in *)ss1)->sin_port !=
303 ((struct sockaddr_in *)ss2)->sin_port)) {
304 if (((struct sockaddr_in *)ss1)->sin_port >
305 ((struct sockaddr_in *)ss2)->sin_port)
306 return (1);
307 else
308 return (-1);
312 * ports are the same
314 if (ss1->ss_family != ss2->ss_family) {
315 if (ss1->ss_family == AF_INET)
316 return (1);
317 else
318 return (-1);
322 * address families are the same
324 if (ss1->ss_family == AF_INET) {
325 in1 = &((struct sockaddr_in *)ss1)->sin_addr;
326 in2 = &((struct sockaddr_in *)ss2)->sin_addr;
328 if (in1->s_addr > in2->s_addr)
329 return (1);
330 else if (in1->s_addr < in2->s_addr)
331 return (-1);
332 else
333 return (0);
334 } else if (ss1->ss_family == AF_INET6) {
335 in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
336 in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
338 for (i = 0; i < 4; i++) {
339 if (in61->s6_addr32[i] > in62->s6_addr32[i])
340 return (1);
341 else if (in61->s6_addr32[i] < in62->s6_addr32[i])
342 return (-1);
344 return (0);
347 return (1);
351 * IP address filter functions to flag addresses that should not
352 * go out to initiators through discovery.
354 static boolean_t
355 idm_v4_addr_okay(struct in_addr *in_addr)
357 in_addr_t addr = ntohl(in_addr->s_addr);
359 if ((INADDR_NONE == addr) ||
360 (IN_MULTICAST(addr)) ||
361 ((addr >> IN_CLASSA_NSHIFT) == 0) ||
362 ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
363 return (B_FALSE);
365 return (B_TRUE);
368 static boolean_t
369 idm_v6_addr_okay(struct in6_addr *addr6)
372 if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
373 (IN6_IS_ADDR_LOOPBACK(addr6)) ||
374 (IN6_IS_ADDR_MULTICAST(addr6)) ||
375 (IN6_IS_ADDR_V4MAPPED(addr6)) ||
376 (IN6_IS_ADDR_V4COMPAT(addr6)) ||
377 (IN6_IS_ADDR_LINKLOCAL(addr6))) {
378 return (B_FALSE);
380 return (B_TRUE);
384 * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
385 * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
388 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
390 ksocket_t so4, so6;
391 struct lifnum lifn;
392 struct lifconf lifc;
393 struct lifreq *lp;
394 int rval;
395 int numifs;
396 int bufsize;
397 void *buf;
398 int i, j, n, rc;
399 struct sockaddr_storage ss;
400 struct sockaddr_in *sin;
401 struct sockaddr_in6 *sin6;
402 idm_addr_t *ip;
403 idm_addr_list_t *ipaddr = NULL;
404 int size_ipaddr;
406 *ipaddr_p = NULL;
407 size_ipaddr = 0;
408 buf = NULL;
410 /* create an ipv4 and ipv6 UDP socket */
411 if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
412 return (0);
413 if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
414 idm_sodestroy(so6);
415 return (0);
419 retry_count:
420 /* snapshot the current number of interfaces */
421 lifn.lifn_family = PF_UNSPEC;
422 lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
423 lifn.lifn_count = 0;
424 /* use vp6 for ioctls with unspecified families by default */
425 if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
426 != 0) {
427 goto cleanup;
430 numifs = lifn.lifn_count;
431 if (numifs <= 0) {
432 goto cleanup;
435 /* allocate extra room in case more interfaces appear */
436 numifs += 10;
438 /* get the interface names and ip addresses */
439 bufsize = numifs * sizeof (struct lifreq);
440 buf = kmem_alloc(bufsize, KM_SLEEP);
442 lifc.lifc_family = AF_UNSPEC;
443 lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
444 lifc.lifc_len = bufsize;
445 lifc.lifc_buf = buf;
446 rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
447 if (rc != 0) {
448 goto cleanup;
450 /* if our extra room is used up, try again */
451 if (bufsize <= lifc.lifc_len) {
452 kmem_free(buf, bufsize);
453 buf = NULL;
454 goto retry_count;
456 /* calc actual number of ifconfs */
457 n = lifc.lifc_len / sizeof (struct lifreq);
459 /* get ip address */
460 if (n > 0) {
461 size_ipaddr = sizeof (idm_addr_list_t) +
462 (n - 1) * sizeof (idm_addr_t);
463 ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
464 } else {
465 goto cleanup;
469 * Examine the array of interfaces and filter uninteresting ones
471 for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
474 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
476 ss = lp->lifr_addr;
478 * fetch the flags using the socket of the correct family
480 switch (ss.ss_family) {
481 case AF_INET:
482 rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
483 &rval, CRED());
484 break;
485 case AF_INET6:
486 rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
487 &rval, CRED());
488 break;
489 default:
490 continue;
492 if (rc == 0) {
494 * If we got the flags, skip uninteresting
495 * interfaces based on flags
497 if ((lp->lifr_flags & IFF_UP) != IFF_UP)
498 continue;
499 if (lp->lifr_flags &
500 (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
501 continue;
504 /* save ip address */
505 ip = &ipaddr->al_addrs[j];
506 switch (ss.ss_family) {
507 case AF_INET:
508 sin = (struct sockaddr_in *)&ss;
509 if (!idm_v4_addr_okay(&sin->sin_addr))
510 continue;
511 ip->a_addr.i_addr.in4 = sin->sin_addr;
512 ip->a_addr.i_insize = sizeof (struct in_addr);
513 break;
514 case AF_INET6:
515 sin6 = (struct sockaddr_in6 *)&ss;
516 if (!idm_v6_addr_okay(&sin6->sin6_addr))
517 continue;
518 ip->a_addr.i_addr.in6 = sin6->sin6_addr;
519 ip->a_addr.i_insize = sizeof (struct in6_addr);
520 break;
521 default:
522 continue;
524 j++;
527 if (j == 0) {
528 /* no valid ifaddr */
529 kmem_free(ipaddr, size_ipaddr);
530 size_ipaddr = 0;
531 ipaddr = NULL;
532 } else {
533 ipaddr->al_out_cnt = j;
537 cleanup:
538 idm_sodestroy(so6);
539 idm_sodestroy(so4);
541 if (buf != NULL)
542 kmem_free(buf, bufsize);
544 *ipaddr_p = ipaddr;
545 return (size_ipaddr);
549 idm_sorecv(ksocket_t so, void *msg, size_t len)
551 iovec_t iov;
553 ASSERT(so != NULL);
554 ASSERT(len != 0);
557 * Fill in iovec and receive data
559 iov.iov_base = msg;
560 iov.iov_len = len;
562 return (idm_iov_sorecv(so, &iov, 1, len));
566 * idm_sosendto - Sends a buffered data on a non-connected socket.
568 * This function puts the data provided on the wire by calling sosendmsg.
569 * It will return only when all the data has been sent or if an error
570 * occurs.
572 * Returns 0 for success, the socket errno value if sosendmsg fails, and
573 * -1 if sosendmsg returns success but uio_resid != 0
576 idm_sosendto(ksocket_t so, void *buff, size_t len,
577 struct sockaddr *name, socklen_t namelen)
579 struct msghdr msg;
580 struct iovec iov[1];
581 int error;
582 size_t sent = 0;
584 iov[0].iov_base = buff;
585 iov[0].iov_len = len;
587 /* Initialization of the message header. */
588 bzero(&msg, sizeof (msg));
589 msg.msg_iov = iov;
590 msg.msg_iovlen = 1;
591 msg.msg_name = name;
592 msg.msg_namelen = namelen;
594 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
595 /* Data sent */
596 if (sent == len) {
597 /* All data sent. Success. */
598 return (0);
599 } else {
600 /* Not all data was sent. Failure */
601 return (-1);
605 /* Send failed */
606 return (error);
610 * idm_iov_sosend - Sends an iovec on a connection.
612 * This function puts the data provided on the wire by calling sosendmsg.
613 * It will return only when all the data has been sent or if an error
614 * occurs.
616 * Returns 0 for success, the socket errno value if sosendmsg fails, and
617 * -1 if sosendmsg returns success but uio_resid != 0
620 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
622 struct msghdr msg;
623 int error;
624 size_t sent = 0;
626 ASSERT(iop != NULL);
628 /* Initialization of the message header. */
629 bzero(&msg, sizeof (msg));
630 msg.msg_iov = iop;
631 msg.msg_iovlen = iovlen;
633 if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
634 == 0) {
635 /* Data sent */
636 if (sent == total_len) {
637 /* All data sent. Success. */
638 return (0);
639 } else {
640 /* Not all data was sent. Failure */
641 return (-1);
645 /* Send failed */
646 return (error);
650 * idm_iov_sorecv - Receives an iovec from a connection
652 * This function gets the data asked for from the socket. It will return
653 * only when all the requested data has been retrieved or if an error
654 * occurs.
656 * Returns 0 for success, the socket errno value if sorecvmsg fails, and
657 * -1 if sorecvmsg returns success but uio_resid != 0
660 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
662 struct msghdr msg;
663 int error;
664 size_t recv;
665 int flags;
667 ASSERT(iop != NULL);
669 /* Initialization of the message header. */
670 bzero(&msg, sizeof (msg));
671 msg.msg_iov = iop;
672 msg.msg_iovlen = iovlen;
673 flags = MSG_WAITALL;
675 if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
676 == 0) {
677 /* Received data */
678 if (recv == total_len) {
679 /* All requested data received. Success */
680 return (0);
681 } else {
683 * Not all data was received. The connection has
684 * probably failed.
686 return (-1);
690 /* Receive failed */
691 return (error);
694 static void
695 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
697 int conn_abort = 10000;
698 int conn_notify = 2000;
699 int abort = 30000;
701 /* Pre-connect socket options */
702 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
703 TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
704 CRED());
705 if (boot_conn == B_FALSE) {
706 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
707 TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
708 CRED());
709 (void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
710 TCP_ABORT_THRESHOLD,
711 (char *)&abort, sizeof (int), CRED());
715 static void
716 idm_set_postconnect_options(ksocket_t ks)
718 const int on = 1;
720 /* Set connect options */
721 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
722 (char *)&idm_so_rcvbuf, sizeof (int), CRED());
723 (void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
724 (char *)&idm_so_sndbuf, sizeof (int), CRED());
725 (void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
726 (char *)&on, sizeof (on), CRED());
729 static uint32_t
730 n2h24(const uchar_t *ptr)
732 return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
736 static idm_status_t
737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
739 iscsi_hdr_t *bhs;
740 uint32_t hdr_digest_crc;
741 uint32_t crc_calculated;
742 void *new_hdr;
743 int ahslen = 0;
744 int total_len = 0;
745 int iovlen = 0;
746 struct iovec iov[2];
747 idm_so_conn_t *so_conn;
748 int rc;
750 so_conn = ic->ic_transport_private;
753 * Read BHS
755 bhs = pdu->isp_hdr;
756 rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
757 if (rc != IDM_STATUS_SUCCESS) {
758 return (IDM_STATUS_FAIL);
762 * Check actual AHS length against the amount available in the buffer
764 pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
765 (bhs->hlength * sizeof (uint32_t));
766 pdu->isp_datalen = n2h24(bhs->dlength);
767 if (ic->ic_conn_type == CONN_TYPE_TGT &&
768 pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
769 IDM_CONN_LOG(CE_WARN,
770 "idm_sorecvhdr: exceeded the max data segment length");
771 return (IDM_STATUS_FAIL);
773 if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
774 /* Allocate a new header segment and change the callback */
775 new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
776 bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
777 pdu->isp_hdr = new_hdr;
778 pdu->isp_flags |= IDM_PDU_ADDL_HDR;
781 * This callback will restore the expected values after
782 * the RX PDU has been processed.
784 pdu->isp_callback = idm_sorx_addl_pdu_cb;
788 * Setup receipt of additional header and header digest (if enabled).
790 if (bhs->hlength > 0) {
791 iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
792 ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
793 iov[iovlen].iov_len = ahslen;
794 total_len += iov[iovlen].iov_len;
795 iovlen++;
798 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
799 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
800 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
801 total_len += iov[iovlen].iov_len;
802 iovlen++;
805 if ((iovlen != 0) &&
806 (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
807 total_len) != 0)) {
808 return (IDM_STATUS_FAIL);
812 * Validate header digest if enabled
814 if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
815 crc_calculated = idm_crc32c(pdu->isp_hdr,
816 sizeof (iscsi_hdr_t) + ahslen);
817 if (crc_calculated != hdr_digest_crc) {
818 /* Invalid Header Digest */
819 return (IDM_STATUS_HEADER_DIGEST);
823 return (0);
827 * idm_so_ini_conn_create()
828 * Allocate the sockets transport connection resources.
830 static idm_status_t
831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
833 ksocket_t so;
834 idm_so_conn_t *so_conn;
835 idm_status_t idmrc;
837 so = idm_socreate(cr->cr_domain, cr->cr_type,
838 cr->cr_protocol);
839 if (so == NULL) {
840 return (IDM_STATUS_FAIL);
843 /* Bind the socket if configured to do so */
844 if (cr->cr_bound) {
845 if (ksocket_bind(so, &cr->cr_bound_addr.sin,
846 SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
847 idm_sodestroy(so);
848 return (IDM_STATUS_FAIL);
852 idmrc = idm_so_conn_create_common(ic, so);
853 if (idmrc != IDM_STATUS_SUCCESS) {
854 idm_soshutdown(so);
855 idm_sodestroy(so);
856 return (IDM_STATUS_FAIL);
859 so_conn = ic->ic_transport_private;
860 /* Set up socket options */
861 idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
863 return (IDM_STATUS_SUCCESS);
867 * idm_so_ini_conn_destroy()
868 * Tear down the sockets transport connection resources.
870 static void
871 idm_so_ini_conn_destroy(idm_conn_t *ic)
873 idm_so_conn_destroy_common(ic);
877 * idm_so_ini_conn_connect()
878 * Establish the connection referred to by the handle previously allocated via
879 * idm_so_ini_conn_create().
881 static idm_status_t
882 idm_so_ini_conn_connect(idm_conn_t *ic)
884 idm_so_conn_t *so_conn;
885 struct sonode *node = NULL;
886 int rc;
887 clock_t lbolt, conn_login_max, conn_login_interval;
888 boolean_t nonblock;
890 so_conn = ic->ic_transport_private;
891 nonblock = ic->ic_conn_params.nonblock_socket;
892 conn_login_max = ic->ic_conn_params.conn_login_max;
893 conn_login_interval = ddi_get_lbolt() +
894 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
896 if (nonblock == B_TRUE) {
897 node = ((struct sonode *)(so_conn->ic_so));
898 /* Set to none block socket mode */
899 idm_so_socket_set_nonblock(node);
900 do {
901 rc = ksocket_connect(so_conn->ic_so,
902 &ic->ic_ini_dst_addr.sin,
903 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
904 CRED());
905 if (rc == 0 || rc == EISCONN) {
906 /* socket success or already success */
907 rc = IDM_STATUS_SUCCESS;
908 break;
910 if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
911 (rc == ECONNRESET)) {
912 /* socket connection timeout or refuse */
913 break;
915 lbolt = ddi_get_lbolt();
916 if (lbolt > conn_login_max) {
918 * Connection retry timeout,
919 * failed connect to target.
921 break;
923 if (lbolt < conn_login_interval) {
924 if ((rc == EINPROGRESS) || (rc == EALREADY)) {
925 /* TCP connect still in progress */
926 delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
927 continue;
928 } else {
929 delay(conn_login_interval - lbolt);
932 conn_login_interval = ddi_get_lbolt() +
933 SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
934 } while (rc != 0);
935 /* resume to nonblock mode */
936 if (rc == IDM_STATUS_SUCCESS) {
937 idm_so_socket_set_block(node);
939 } else {
940 rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
941 (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
944 if (rc != 0) {
945 idm_soshutdown(so_conn->ic_so);
946 return (IDM_STATUS_FAIL);
949 idm_so_conn_connect_common(ic);
951 idm_set_postconnect_options(so_conn->ic_so);
953 return (IDM_STATUS_SUCCESS);
956 idm_status_t
957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
959 idm_status_t idmrc;
961 idm_set_postconnect_options(new_so);
962 idmrc = idm_so_conn_create_common(ic, new_so);
964 return (idmrc);
967 static void
968 idm_so_tgt_conn_destroy(idm_conn_t *ic)
970 idm_so_conn_destroy_common(ic);
974 * idm_so_tgt_conn_connect()
975 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
976 * is invoked from the SM as a result of an inbound connection request.
978 static idm_status_t
979 idm_so_tgt_conn_connect(idm_conn_t *ic)
981 idm_so_conn_connect_common(ic);
983 return (IDM_STATUS_SUCCESS);
986 static idm_status_t
987 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
989 idm_so_conn_t *so_conn;
991 so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
992 so_conn->ic_so = new_so;
994 ic->ic_transport_private = so_conn;
995 ic->ic_transport_hdrlen = 0;
997 /* Set the scoreboarding flag on this connection */
998 ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
999 ic->ic_conn_params.max_recv_dataseglen =
1000 ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1001 ic->ic_conn_params.max_xmit_dataseglen =
1002 ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1005 * Initialize tx thread mutex and list
1007 mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1008 cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1009 list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1010 offsetof(idm_pdu_t, idm_tx_link));
1012 return (IDM_STATUS_SUCCESS);
1015 static void
1016 idm_so_conn_destroy_common(idm_conn_t *ic)
1018 idm_so_conn_t *so_conn = ic->ic_transport_private;
1020 ic->ic_transport_private = NULL;
1021 idm_sodestroy(so_conn->ic_so);
1022 list_destroy(&so_conn->ic_tx_list);
1023 mutex_destroy(&so_conn->ic_tx_mutex);
1024 cv_destroy(&so_conn->ic_tx_cv);
1026 kmem_free(so_conn, sizeof (idm_so_conn_t));
1029 static void
1030 idm_so_conn_connect_common(idm_conn_t *ic)
1032 idm_so_conn_t *so_conn;
1033 struct sockaddr_in6 t_addr;
1034 socklen_t t_addrlen = 0;
1036 so_conn = ic->ic_transport_private;
1037 bzero(&t_addr, sizeof (struct sockaddr_in6));
1038 t_addrlen = sizeof (struct sockaddr_in6);
1040 /* Set the local and remote addresses in the idm conn handle */
1041 (void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1042 &t_addrlen, CRED());
1043 bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1044 (void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1045 &t_addrlen, CRED());
1046 bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1048 mutex_enter(&ic->ic_mutex);
1049 so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1050 &p0, TS_RUN, minclsyspri);
1051 so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1052 &p0, TS_RUN, minclsyspri);
1054 while (so_conn->ic_rx_thread_did == 0 ||
1055 so_conn->ic_tx_thread_did == 0)
1056 cv_wait(&ic->ic_cv, &ic->ic_mutex);
1057 mutex_exit(&ic->ic_mutex);
1061 * idm_so_conn_disconnect()
1062 * Shutdown the socket connection and stop the thread
1064 static void
1065 idm_so_conn_disconnect(idm_conn_t *ic)
1067 idm_so_conn_t *so_conn;
1069 so_conn = ic->ic_transport_private;
1071 mutex_enter(&ic->ic_mutex);
1072 so_conn->ic_rx_thread_running = B_FALSE;
1073 so_conn->ic_tx_thread_running = B_FALSE;
1074 /* We need to wakeup the TX thread */
1075 mutex_enter(&so_conn->ic_tx_mutex);
1076 cv_signal(&so_conn->ic_tx_cv);
1077 mutex_exit(&so_conn->ic_tx_mutex);
1078 mutex_exit(&ic->ic_mutex);
1080 /* This should wakeup the RX thread if it is sleeping */
1081 idm_soshutdown(so_conn->ic_so);
1083 thread_join(so_conn->ic_tx_thread_did);
1084 thread_join(so_conn->ic_rx_thread_did);
1088 * idm_so_tgt_svc_create()
1089 * Establish a service on an IP address and port. idm_svc_req_t contains
1090 * the service parameters.
1092 /*ARGSUSED*/
1093 static idm_status_t
1094 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1096 idm_so_svc_t *so_svc;
1098 so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1100 /* Set the new sockets service in svc handle */
1101 is->is_so_svc = (void *)so_svc;
1103 return (IDM_STATUS_SUCCESS);
1107 * idm_so_tgt_svc_destroy()
1108 * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1110 static void
1111 idm_so_tgt_svc_destroy(idm_svc_t *is)
1113 /* the socket will have been torn down; free the service */
1114 kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1118 * idm_so_tgt_svc_online()
1119 * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1122 static idm_status_t
1123 idm_so_tgt_svc_online(idm_svc_t *is)
1125 idm_so_svc_t *so_svc;
1126 idm_svc_req_t *sr = &is->is_svc_req;
1127 struct sockaddr_in6 sin6_ip;
1128 const uint32_t on = 1;
1129 const uint32_t off = 0;
1131 mutex_enter(&is->is_mutex);
1132 so_svc = (idm_so_svc_t *)is->is_so_svc;
1135 * Try creating an IPv6 socket first
1137 if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1138 mutex_exit(&is->is_mutex);
1139 return (IDM_STATUS_FAIL);
1140 } else {
1141 bzero(&sin6_ip, sizeof (sin6_ip));
1142 sin6_ip.sin6_family = AF_INET6;
1143 sin6_ip.sin6_port = htons(sr->sr_port);
1144 sin6_ip.sin6_addr = in6addr_any;
1146 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1147 SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1149 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1151 (void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1152 SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1154 if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1155 sizeof (sin6_ip), CRED()) != 0) {
1156 mutex_exit(&is->is_mutex);
1157 idm_sodestroy(so_svc->is_so);
1158 return (IDM_STATUS_FAIL);
1162 idm_set_postconnect_options(so_svc->is_so);
1164 if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1165 mutex_exit(&is->is_mutex);
1166 idm_soshutdown(so_svc->is_so);
1167 idm_sodestroy(so_svc->is_so);
1168 return (IDM_STATUS_FAIL);
1171 /* Launch a watch thread */
1172 so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1173 is, 0, &p0, TS_RUN, minclsyspri);
1175 if (so_svc->is_thread == NULL) {
1176 /* Failure to launch; teardown the socket */
1177 mutex_exit(&is->is_mutex);
1178 idm_soshutdown(so_svc->is_so);
1179 idm_sodestroy(so_svc->is_so);
1180 return (IDM_STATUS_FAIL);
1182 ksocket_hold(so_svc->is_so);
1183 /* Wait for the port watcher thread to start */
1184 while (!so_svc->is_thread_running)
1185 cv_wait(&is->is_cv, &is->is_mutex);
1186 mutex_exit(&is->is_mutex);
1188 return (IDM_STATUS_SUCCESS);
1192 * idm_so_tgt_svc_offline
1194 * Stop listening on the IP address and port identified by idm_svc_t.
1196 static void
1197 idm_so_tgt_svc_offline(idm_svc_t *is)
1199 idm_so_svc_t *so_svc;
1200 mutex_enter(&is->is_mutex);
1201 so_svc = (idm_so_svc_t *)is->is_so_svc;
1202 so_svc->is_thread_running = B_FALSE;
1203 mutex_exit(&is->is_mutex);
1206 * Teardown socket
1208 idm_sodestroy(so_svc->is_so);
1211 * Now we expect the port watcher thread to terminate
1213 thread_join(so_svc->is_thread_did);
1217 * Watch thread for target service connection establishment.
1219 void
1220 idm_so_svc_port_watcher(void *arg)
1222 idm_svc_t *svc = arg;
1223 ksocket_t new_so;
1224 idm_conn_t *ic;
1225 idm_status_t idmrc;
1226 idm_so_svc_t *so_svc;
1227 int rc;
1228 const uint32_t off = 0;
1229 struct sockaddr_in6 t_addr;
1230 socklen_t t_addrlen;
1232 bzero(&t_addr, sizeof (struct sockaddr_in6));
1233 t_addrlen = sizeof (struct sockaddr_in6);
1234 mutex_enter(&svc->is_mutex);
1236 so_svc = svc->is_so_svc;
1237 so_svc->is_thread_running = B_TRUE;
1238 so_svc->is_thread_did = so_svc->is_thread->t_did;
1240 cv_signal(&svc->is_cv);
1242 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1243 svc->is_svc_req.sr_port);
1245 while (so_svc->is_thread_running) {
1246 mutex_exit(&svc->is_mutex);
1248 if ((rc = ksocket_accept(so_svc->is_so,
1249 (struct sockaddr *)&t_addr, &t_addrlen,
1250 &new_so, CRED())) != 0) {
1251 mutex_enter(&svc->is_mutex);
1252 if (rc == ECONNABORTED)
1253 continue;
1254 /* Connection problem */
1255 break;
1258 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1260 (void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1261 (char *)&off, sizeof (off), CRED());
1263 idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1264 &ic);
1265 if (idmrc != IDM_STATUS_SUCCESS) {
1266 /* Drop connection */
1267 idm_soshutdown(new_so);
1268 idm_sodestroy(new_so);
1269 mutex_enter(&svc->is_mutex);
1270 continue;
1273 idmrc = idm_so_tgt_conn_create(ic, new_so);
1274 if (idmrc != IDM_STATUS_SUCCESS) {
1275 idm_svc_conn_destroy(ic);
1276 idm_soshutdown(new_so);
1277 idm_sodestroy(new_so);
1278 mutex_enter(&svc->is_mutex);
1279 continue;
1283 * Kick the state machine. At CS_S3_XPT_UP the state machine
1284 * will notify the client (target) about the new connection.
1286 idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1288 mutex_enter(&svc->is_mutex);
1290 ksocket_rele(so_svc->is_so);
1291 so_svc->is_thread_running = B_FALSE;
1292 mutex_exit(&svc->is_mutex);
1294 IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1295 svc->is_svc_req.sr_port);
1297 thread_exit();
1301 * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1302 * frees resources associated with the task.
1304 * It's not clear that this should return idm_status_t. What do we do
1305 * if it fails?
1307 static idm_status_t
1308 idm_so_free_task_rsrc(idm_task_t *idt)
1310 idm_buf_t *idb, *next_idb;
1313 * There is nothing to cleanup on initiator connections
1315 if (IDM_CONN_ISINI(idt->idt_ic))
1316 return (IDM_STATUS_SUCCESS);
1319 * If this is a target connection, call idm_buf_rx_from_ini_done for
1320 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1322 * In addition, remove any buffers associated with this task from
1323 * the ic_tx_list. We'll do this by walking the idt_inbufv list, but
1324 * items don't actually get removed from that list (and completion
1325 * routines called) until idm_task_cleanup.
1327 mutex_enter(&idt->idt_mutex);
1329 for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1330 next_idb = list_next(&idt->idt_outbufv, idb);
1331 if (idb->idb_in_transport) {
1333 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1335 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1336 uintptr_t, idb->idb_buf,
1337 uint32_t, idb->idb_bufoffset,
1338 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1339 uint32_t, idb->idb_xfer_len,
1340 int, XFER_BUF_RX_FROM_INI);
1341 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1342 mutex_enter(&idt->idt_mutex);
1346 for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1347 next_idb = list_next(&idt->idt_inbufv, idb);
1349 * We want to remove these items from the tx_list as well,
1350 * but knowing it's in the idt_inbufv list is not a guarantee
1351 * that it's in the tx_list. If it's on the tx list then
1352 * let idm_sotx_thread() clean it up.
1354 if (idb->idb_in_transport && !idb->idb_tx_thread) {
1356 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1358 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1359 uintptr_t, idb->idb_buf,
1360 uint32_t, idb->idb_bufoffset,
1361 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1362 uint32_t, idb->idb_xfer_len,
1363 int, XFER_BUF_TX_TO_INI);
1364 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1365 mutex_enter(&idt->idt_mutex);
1369 mutex_exit(&idt->idt_mutex);
1371 return (IDM_STATUS_SUCCESS);
1375 * idm_so_negotiate_key_values() validates the key values for this connection
1377 /* ARGSUSED */
1378 static kv_status_t
1379 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1380 nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1382 /* All parameters are negotiated at the iscsit level */
1383 return (KV_HANDLED);
1387 * idm_so_notice_key_values() activates the negotiated key values for
1388 * this connection.
1390 static void
1391 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1393 char *nvp_name;
1394 nvpair_t *nvp;
1395 nvpair_t *next_nvp;
1396 int nvrc;
1397 idm_status_t idm_status;
1398 const idm_kv_xlate_t *ikvx;
1399 uint64_t num_val;
1401 for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1402 nvp != NULL; nvp = next_nvp) {
1403 next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1404 nvp_name = nvpair_name(nvp);
1406 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1407 switch (ikvx->ik_key_id) {
1408 case KI_HEADER_DIGEST:
1409 case KI_DATA_DIGEST:
1410 idm_status = idm_so_handle_digest(it, nvp, ikvx);
1411 ASSERT(idm_status == 0);
1413 /* Remove processed item from negotiated_nvl list */
1414 nvrc = nvlist_remove_all(
1415 negotiated_nvl, ikvx->ik_key_name);
1416 ASSERT(nvrc == 0);
1417 break;
1418 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1420 * Just pass the value down to idm layer.
1421 * No need to remove it from negotiated_nvl list here.
1423 nvrc = nvpair_value_uint64(nvp, &num_val);
1424 ASSERT(nvrc == 0);
1425 it->ic_conn_params.max_xmit_dataseglen =
1426 (uint32_t)num_val;
1427 break;
1428 default:
1429 break;
1435 * idm_so_declare_key_values() declares the key values for this connection
1437 /* ARGSUSED */
1438 static kv_status_t
1439 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1440 nvlist_t *outgoing_nvl)
1442 char *nvp_name;
1443 nvpair_t *nvp;
1444 nvpair_t *next_nvp;
1445 kv_status_t kvrc;
1446 int nvrc = 0;
1447 const idm_kv_xlate_t *ikvx;
1448 uint64_t num_val;
1450 for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1451 nvp != NULL && nvrc == 0; nvp = next_nvp) {
1452 next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1453 nvp_name = nvpair_name(nvp);
1455 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1456 switch (ikvx->ik_key_id) {
1457 case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1458 if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1459 break;
1461 if (outgoing_nvl &&
1462 (nvrc = nvlist_add_uint64(outgoing_nvl,
1463 nvp_name, num_val)) != 0) {
1464 break;
1466 it->ic_conn_params.max_recv_dataseglen =
1467 (uint32_t)num_val;
1468 break;
1469 default:
1470 break;
1473 kvrc = idm_nvstat_to_kvstat(nvrc);
1474 return (kvrc);
1477 static idm_status_t
1478 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1479 const idm_kv_xlate_t *ikvx)
1481 int nvrc;
1482 char *digest_choice_string;
1484 nvrc = nvpair_value_string(digest_choice,
1485 &digest_choice_string);
1486 ASSERT(nvrc == 0);
1487 if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1488 switch (ikvx->ik_key_id) {
1489 case KI_HEADER_DIGEST:
1490 it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1491 break;
1492 case KI_DATA_DIGEST:
1493 it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1494 break;
1495 default:
1496 ASSERT(0);
1497 break;
1499 } else if (strcasecmp(digest_choice_string, "none") == 0) {
1500 switch (ikvx->ik_key_id) {
1501 case KI_HEADER_DIGEST:
1502 it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1503 break;
1504 case KI_DATA_DIGEST:
1505 it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1506 break;
1507 default:
1508 ASSERT(0);
1509 break;
1511 } else {
1512 ASSERT(0);
1515 return (IDM_STATUS_SUCCESS);
1520 * idm_so_conn_is_capable() verifies that the passed connection is provided
1521 * for by the sockets interface.
1523 /* ARGSUSED */
1524 static boolean_t
1525 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1527 return (B_TRUE);
1531 * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1532 * idm_sorecv_scsidata() function invoked earlier actually reads the data
1533 * off the socket into the appropriate buffers.
1535 static void
1536 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1538 iscsi_data_hdr_t *bhs;
1539 idm_task_t *idt;
1540 idm_buf_t *idb;
1541 uint32_t datasn;
1542 size_t offset;
1543 iscsi_hdr_t *ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1544 iscsi_data_rsp_hdr_t *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1546 ASSERT(ic != NULL);
1547 ASSERT(pdu != NULL);
1549 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1550 datasn = ntohl(bhs->datasn);
1551 offset = ntohl(bhs->offset);
1553 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1556 * Look up the task corresponding to the initiator task tag
1557 * to get the buffers affiliated with the task.
1559 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1560 if (idt == NULL) {
1561 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1562 idm_pdu_rx_protocol_error(ic, pdu);
1563 return;
1566 idb = pdu->isp_sorx_buf;
1567 if (idb == NULL) {
1568 IDM_CONN_LOG(CE_WARN,
1569 "idm_so_rx_datain: failed to find buffer");
1570 idm_task_rele(idt);
1571 idm_pdu_rx_protocol_error(ic, pdu);
1572 return;
1576 * DataSN values should be sequential and should not have any gaps or
1577 * repetitions. Check the DataSN with the one stored in the task.
1579 if (datasn == idt->idt_exp_datasn) {
1580 idt->idt_exp_datasn++; /* keep track of DataSN received */
1581 } else {
1582 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1583 idm_task_rele(idt);
1584 idm_pdu_rx_protocol_error(ic, pdu);
1585 return;
1589 * PDUs in a sequence should be in continuously increasing
1590 * address offset
1592 if (offset != idb->idb_exp_offset) {
1593 IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1594 idm_task_rele(idt);
1595 idm_pdu_rx_protocol_error(ic, pdu);
1596 return;
1598 /* Expected next relative buffer offset */
1599 idb->idb_exp_offset += n2h24(bhs->dlength);
1600 idt->idt_rx_bytes += n2h24(bhs->dlength);
1602 idm_task_rele(idt);
1605 * For now call scsi_rsp which will process the data rsp
1606 * Revisit, need to provide an explicit client entry point for
1607 * phase collapse completions.
1609 if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1610 (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1611 (*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1614 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1618 * The idm_so_rx_dataout() function is used by the iSCSI target to read
1619 * data from the Data-Out PDU sent by the iSCSI initiator.
1621 * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1622 * task to get the buffers associated with the PDU. A PDU might span buffers.
1623 * The data is then read into the respective buffer.
1625 static void
1626 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1629 iscsi_data_hdr_t *bhs;
1630 idm_task_t *idt;
1631 idm_buf_t *idb;
1632 size_t offset;
1634 ASSERT(ic != NULL);
1635 ASSERT(pdu != NULL);
1637 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1638 offset = ntohl(bhs->offset);
1639 ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1642 * Look up the task corresponding to the initiator task tag
1643 * to get the buffers affiliated with the task.
1645 idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1646 if (idt == NULL) {
1647 IDM_CONN_LOG(CE_WARN,
1648 "idm_so_rx_dataout: failed to find task");
1649 idm_pdu_rx_protocol_error(ic, pdu);
1650 return;
1653 idb = pdu->isp_sorx_buf;
1654 if (idb == NULL) {
1655 IDM_CONN_LOG(CE_WARN,
1656 "idm_so_rx_dataout: failed to find buffer");
1657 idm_task_rele(idt);
1658 idm_pdu_rx_protocol_error(ic, pdu);
1659 return;
1662 /* Keep track of data transferred - check data offsets */
1663 if (offset != idb->idb_exp_offset) {
1664 IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1665 "%ld, %d", offset, idb->idb_exp_offset);
1666 idm_task_rele(idt);
1667 idm_pdu_rx_protocol_error(ic, pdu);
1668 return;
1670 /* Expected next relative offset */
1671 idb->idb_exp_offset += ntoh24(bhs->dlength);
1672 idt->idt_rx_bytes += n2h24(bhs->dlength);
1675 * Call the buffer callback when the transfer is complete
1677 * The connection state machine should only abort tasks after
1678 * shutting down the connection so we are assured that there
1679 * won't be a simultaneous attempt to abort this task at the
1680 * same time as we are processing this PDU (due to a connection
1681 * state change).
1683 if (bhs->flags & ISCSI_FLAG_FINAL) {
1685 * We only want to call idm_buf_rx_from_ini_done once
1686 * per transfer. It's possible that this task has
1687 * already been aborted in which case
1688 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1689 * for each buffer with idb_in_transport==B_TRUE. To
1690 * close this window and ensure that this doesn't happen,
1691 * we'll clear idb->idb_in_transport now while holding
1692 * the task mutex. This is only really an issue for
1693 * SCSI task abort -- if tasks were being aborted because
1694 * of a connection state change the state machine would
1695 * have already stopped the receive thread.
1697 mutex_enter(&idt->idt_mutex);
1700 * Release the task hold here (obtained in idm_task_find)
1701 * because the task may complete synchronously during
1702 * idm_buf_rx_from_ini_done. Since we still have an active
1703 * buffer we know there is at least one additional hold on idt.
1705 idm_task_rele(idt);
1708 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1710 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1711 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1712 uint64_t, 0, uint32_t, 0, uint32_t, 0,
1713 uint32_t, idb->idb_xfer_len,
1714 int, XFER_BUF_RX_FROM_INI);
1715 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1716 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717 return;
1720 idm_task_rele(idt);
1721 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1725 * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1726 * the R2T PDU sent by the iSCSI target indicating that it is ready to
1727 * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1728 * and looks up the task in the task tree using the itt to get the output
1729 * buffers associated the task. The R2T PDU contains the offset of the
1730 * requested data and the data length. This function then constructs a
1731 * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1732 * PDU is associated with the R2T by the Target Transfer Tag (ttt).
1735 static void
1736 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1738 idm_task_t *idt;
1739 idm_buf_t *idb;
1740 iscsi_rtt_hdr_t *rtt_hdr;
1741 uint32_t data_offset;
1742 uint32_t data_length;
1744 ASSERT(ic != NULL);
1745 ASSERT(pdu != NULL);
1747 rtt_hdr = (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1748 data_offset = ntohl(rtt_hdr->data_offset);
1749 data_length = ntohl(rtt_hdr->data_length);
1750 idt = idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1752 if (idt == NULL) {
1753 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1754 idm_pdu_rx_protocol_error(ic, pdu);
1755 return;
1758 /* Find the buffer bound to the task by the iSCSI initiator */
1759 mutex_enter(&idt->idt_mutex);
1760 idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1761 if (idb == NULL) {
1762 mutex_exit(&idt->idt_mutex);
1763 idm_task_rele(idt);
1764 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1765 idm_pdu_rx_protocol_error(ic, pdu);
1766 return;
1769 /* return buffer contains this data */
1770 if (data_offset + data_length > idb->idb_buflen) {
1771 /* Overflow */
1772 mutex_exit(&idt->idt_mutex);
1773 idm_task_rele(idt);
1774 IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1775 "buffer");
1776 idm_pdu_rx_protocol_error(ic, pdu);
1777 return;
1780 idt->idt_r2t_ttt = rtt_hdr->ttt;
1781 idt->idt_exp_datasn = 0;
1783 idm_so_send_rtt_data(ic, idt, idb, data_offset,
1784 ntohl(rtt_hdr->data_length));
1786 * the idt_mutex is released in idm_so_send_rtt_data
1789 idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1790 idm_task_rele(idt);
1794 idm_status_t
1795 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1797 uint8_t pad[ISCSI_PAD_WORD_LEN];
1798 int pad_len;
1799 uint32_t data_digest_crc;
1800 uint32_t crc_calculated;
1801 int total_len;
1802 idm_so_conn_t *so_conn;
1804 so_conn = ic->ic_transport_private;
1806 pad_len = ((ISCSI_PAD_WORD_LEN -
1807 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1808 (ISCSI_PAD_WORD_LEN - 1));
1810 ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1812 total_len = pdu->isp_datalen;
1814 if (pad_len) {
1815 pdu->isp_iov[pdu->isp_iovlen].iov_base = (char *)&pad;
1816 pdu->isp_iov[pdu->isp_iovlen].iov_len = pad_len;
1817 total_len += pad_len;
1818 pdu->isp_iovlen++;
1821 /* setup data digest */
1822 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1823 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1824 (char *)&data_digest_crc;
1825 pdu->isp_iov[pdu->isp_iovlen].iov_len =
1826 sizeof (data_digest_crc);
1827 total_len += sizeof (data_digest_crc);
1828 pdu->isp_iovlen++;
1831 pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1833 if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1834 pdu->isp_iovlen, total_len) != 0) {
1835 return (IDM_STATUS_IO);
1838 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1839 crc_calculated = idm_crc32c(pdu->isp_data,
1840 pdu->isp_datalen);
1841 if (pad_len) {
1842 crc_calculated = idm_crc32c_continued((char *)&pad,
1843 pad_len, crc_calculated);
1845 if (crc_calculated != data_digest_crc) {
1846 IDM_CONN_LOG(CE_WARN,
1847 "idm_sorecvdata: "
1848 "CRC error: actual 0x%x, calc 0x%x",
1849 data_digest_crc, crc_calculated);
1851 /* Invalid Data Digest */
1852 return (IDM_STATUS_DATA_DIGEST);
1856 return (IDM_STATUS_SUCCESS);
1860 * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1861 * Data-type PDU header must be read into the idm_pdu_t structure prior to
1862 * calling this function.
1864 idm_status_t
1865 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1867 iscsi_data_hdr_t *bhs;
1868 idm_task_t *task;
1869 uint32_t offset;
1870 uint8_t opcode;
1871 uint32_t dlength;
1872 list_t *buflst;
1873 uint32_t xfer_bytes;
1874 idm_status_t status;
1876 ASSERT(ic != NULL);
1877 ASSERT(pdu != NULL);
1879 bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1881 offset = ntohl(bhs->offset);
1882 opcode = bhs->opcode;
1883 dlength = n2h24(bhs->dlength);
1885 ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1886 (opcode == ISCSI_OP_SCSI_DATA));
1889 * Successful lookup implicitly gets a "hold" on the task. This
1890 * hold must be released before leaving this function. At one
1891 * point we were caching this task context and retaining the hold
1892 * but it turned out to be very difficult to release the hold properly.
1893 * The task can be aborted and the connection shutdown between this
1894 * call and the subsequent expected call to idm_so_rx_datain/
1895 * idm_so_rx_dataout (in which case those functions are not called).
1896 * Releasing the hold in the PDU callback doesn't work well either
1897 * because the whole task may be completed by then at which point
1898 * it is too late to release the hold -- for better or worse this
1899 * code doesn't wait on the refcnts during normal operation.
1900 * idm_task_find() is very fast and it is not a huge burden if we
1901 * have to do it twice.
1903 task = idm_task_find(ic, bhs->itt, bhs->ttt);
1904 if (task == NULL) {
1905 IDM_CONN_LOG(CE_WARN,
1906 "idm_sorecv_scsidata: could not find task");
1907 return (IDM_STATUS_FAIL);
1910 mutex_enter(&task->idt_mutex);
1911 buflst = (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1912 &task->idt_inbufv : &task->idt_outbufv;
1913 pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1914 mutex_exit(&task->idt_mutex);
1916 if (pdu->isp_sorx_buf == NULL) {
1917 idm_task_rele(task);
1918 IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1919 "buffer for offset %x opcode=%x",
1920 offset, opcode);
1921 return (IDM_STATUS_FAIL);
1924 xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1925 ASSERT(xfer_bytes != 0);
1926 if (xfer_bytes != dlength) {
1927 idm_task_rele(task);
1929 * Buffer overflow, connection error. The PDU data is still
1930 * sitting in the socket so we can't use the connection
1931 * again until that data is drained.
1933 return (IDM_STATUS_FAIL);
1936 status = idm_sorecvdata(ic, pdu);
1938 idm_task_rele(task);
1940 return (status);
1943 static uint32_t
1944 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1946 uint32_t buf_ro = ro - idb->idb_bufoffset;
1947 uint32_t xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1949 ASSERT(ro >= idb->idb_bufoffset);
1951 pdu->isp_iov[pdu->isp_iovlen].iov_base =
1952 (caddr_t)idb->idb_buf + buf_ro;
1953 pdu->isp_iov[pdu->isp_iovlen].iov_len = xfer_len;
1954 pdu->isp_iovlen++;
1956 return (xfer_len);
1960 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1962 pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1963 ASSERT(pdu->isp_data != NULL);
1965 pdu->isp_databuflen = pdu->isp_datalen;
1966 pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1967 pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1968 pdu->isp_iovlen = 1;
1970 * Since we are associating a new data buffer with this received
1971 * PDU we need to set a specific callback to free the data
1972 * after the PDU is processed.
1974 pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1975 pdu->isp_callback = idm_sorx_addl_pdu_cb;
1977 return (idm_sorecvdata(ic, pdu));
1980 void
1981 idm_sorx_thread(void *arg)
1983 boolean_t conn_failure = B_FALSE;
1984 idm_conn_t *ic = (idm_conn_t *)arg;
1985 idm_so_conn_t *so_conn;
1986 idm_pdu_t *pdu;
1987 idm_status_t rc;
1989 idm_conn_hold(ic);
1991 mutex_enter(&ic->ic_mutex);
1993 so_conn = ic->ic_transport_private;
1994 so_conn->ic_rx_thread_running = B_TRUE;
1995 so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1996 cv_signal(&ic->ic_cv);
1998 while (so_conn->ic_rx_thread_running) {
1999 mutex_exit(&ic->ic_mutex);
2002 * Get PDU with default header size (large enough for
2003 * BHS plus any anticipated AHS). PDU from
2004 * the cache will have all values set correctly
2005 * for sockets RX including callback.
2007 pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2008 pdu->isp_ic = ic;
2009 pdu->isp_flags = 0;
2010 pdu->isp_transport_hdrlen = 0;
2012 if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2014 * Call idm_pdu_complete so that we call the callback
2015 * and ensure any memory allocated in idm_sorecvhdr
2016 * gets freed up.
2018 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2021 * If ic_rx_thread_running is still set then
2022 * this is some kind of connection problem
2023 * on the socket. In this case we want to
2024 * generate an event. Otherwise some other
2025 * thread closed the socket due to another
2026 * issue in which case we don't need to
2027 * generate an event.
2029 mutex_enter(&ic->ic_mutex);
2030 if (so_conn->ic_rx_thread_running) {
2031 conn_failure = B_TRUE;
2032 so_conn->ic_rx_thread_running = B_FALSE;
2035 continue;
2039 * Header has been read and validated. Now we need
2040 * to read the PDU data payload (if present). SCSI data
2041 * need to be transferred from the socket directly into
2042 * the associated transfer buffer for the SCSI task.
2044 if (pdu->isp_datalen != 0) {
2045 if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2046 (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2047 rc = idm_sorecv_scsidata(ic, pdu);
2049 * All SCSI errors are fatal to the
2050 * connection right now since we have no
2051 * place to put the data. What we need
2052 * is some kind of sink to dispose of unwanted
2053 * SCSI data. For example an invalid task tag
2054 * should not kill the connection (although
2055 * we may want to drop the connection).
2057 } else {
2059 * Not data PDUs so allocate a buffer for the
2060 * data segment and read the remaining data.
2062 rc = idm_sorecv_nonscsidata(ic, pdu);
2064 if (rc != 0) {
2066 * Call idm_pdu_complete so that we call the
2067 * callback and ensure any memory allocated
2068 * in idm_sorecvhdr gets freed up.
2070 idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2073 * If ic_rx_thread_running is still set then
2074 * this is some kind of connection problem
2075 * on the socket. In this case we want to
2076 * generate an event. Otherwise some other
2077 * thread closed the socket due to another
2078 * issue in which case we don't need to
2079 * generate an event.
2081 mutex_enter(&ic->ic_mutex);
2082 if (so_conn->ic_rx_thread_running) {
2083 conn_failure = B_TRUE;
2084 so_conn->ic_rx_thread_running = B_FALSE;
2086 continue;
2091 * Process RX PDU
2093 idm_pdu_rx(ic, pdu);
2095 mutex_enter(&ic->ic_mutex);
2098 mutex_exit(&ic->ic_mutex);
2101 * If we dropped out of the RX processing loop because of
2102 * a socket problem or other connection failure (including
2103 * digest errors) then we need to generate a state machine
2104 * event to shut the connection down.
2105 * If the state machine is already in, for example, INIT_ERROR, this
2106 * event will get dropped, and the TX thread will never be notified
2107 * to shut down. To be safe, we'll just notify it here.
2109 if (conn_failure) {
2110 if (so_conn->ic_tx_thread_running) {
2111 so_conn->ic_tx_thread_running = B_FALSE;
2112 mutex_enter(&so_conn->ic_tx_mutex);
2113 cv_signal(&so_conn->ic_tx_cv);
2114 mutex_exit(&so_conn->ic_tx_mutex);
2117 idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2120 idm_conn_rele(ic);
2122 thread_exit();
2126 * idm_so_tx
2128 * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2129 * point. By definition, it is supposed to be fast. So, simply queue
2130 * the entry and return. The real work is done by idm_i_so_tx() via
2131 * idm_sotx_thread().
2134 static void
2135 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2137 idm_so_conn_t *so_conn = ic->ic_transport_private;
2139 ASSERT(pdu->isp_ic == ic);
2140 mutex_enter(&so_conn->ic_tx_mutex);
2142 if (!so_conn->ic_tx_thread_running) {
2143 mutex_exit(&so_conn->ic_tx_mutex);
2144 idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2145 return;
2148 list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2149 cv_signal(&so_conn->ic_tx_cv);
2150 mutex_exit(&so_conn->ic_tx_mutex);
2153 static idm_status_t
2154 idm_i_so_tx(idm_pdu_t *pdu)
2156 idm_conn_t *ic = pdu->isp_ic;
2157 idm_status_t status = IDM_STATUS_SUCCESS;
2158 uint8_t pad[ISCSI_PAD_WORD_LEN];
2159 int pad_len;
2160 uint32_t hdr_digest_crc;
2161 uint32_t data_digest_crc = 0;
2162 int total_len = 0;
2163 int iovlen = 0;
2164 struct iovec iov[6];
2165 idm_so_conn_t *so_conn;
2167 so_conn = ic->ic_transport_private;
2169 /* Setup BHS */
2170 iov[iovlen].iov_base = (caddr_t)pdu->isp_hdr;
2171 iov[iovlen].iov_len = pdu->isp_hdrlen;
2172 total_len += iov[iovlen].iov_len;
2173 iovlen++;
2175 /* Setup header digest */
2176 if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2177 (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2178 hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2180 iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
2181 iov[iovlen].iov_len = sizeof (hdr_digest_crc);
2182 total_len += iov[iovlen].iov_len;
2183 iovlen++;
2186 /* Setup the data */
2187 if (pdu->isp_datalen) {
2188 idm_task_t *idt;
2189 idm_buf_t *idb;
2190 iscsi_data_hdr_t *ihp;
2191 ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2192 /* Write of immediate data */
2193 if (ic->ic_ffp &&
2194 (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2195 ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2196 idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2197 if (idt) {
2198 mutex_enter(&idt->idt_mutex);
2199 idb = idm_buf_find(&idt->idt_outbufv, 0);
2200 mutex_exit(&idt->idt_mutex);
2202 * If the initiator call to idm_buf_alloc
2203 * failed then we can get to this point
2204 * without a bound buffer. The associated
2205 * connection failure will clean things up
2206 * later. It would be nice to come up with
2207 * a cleaner way to handle this. In
2208 * particular it seems absurd to look up
2209 * the task and the buffer just to update
2210 * this counter.
2212 if (idb)
2213 idb->idb_xfer_len += pdu->isp_datalen;
2214 idm_task_rele(idt);
2218 iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2219 iov[iovlen].iov_len = pdu->isp_datalen;
2220 total_len += iov[iovlen].iov_len;
2221 iovlen++;
2224 /* Setup the data pad if necessary */
2225 pad_len = ((ISCSI_PAD_WORD_LEN -
2226 (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2227 (ISCSI_PAD_WORD_LEN - 1));
2229 if (pad_len) {
2230 bzero(pad, sizeof (pad));
2231 iov[iovlen].iov_base = (void *)&pad;
2232 iov[iovlen].iov_len = pad_len;
2233 total_len += iov[iovlen].iov_len;
2234 iovlen++;
2238 * Setup the data digest if enabled. Data-digest is not sent
2239 * for login-phase PDUs.
2241 if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2242 ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2243 (pdu->isp_datalen || pad_len)) {
2245 * RFC3720/10.2.3: A zero-length Data Segment also
2246 * implies a zero-length data digest.
2248 if (pdu->isp_datalen) {
2249 data_digest_crc = idm_crc32c(pdu->isp_data,
2250 pdu->isp_datalen);
2252 if (pad_len) {
2253 data_digest_crc = idm_crc32c_continued(&pad,
2254 pad_len, data_digest_crc);
2257 iov[iovlen].iov_base = (caddr_t)&data_digest_crc;
2258 iov[iovlen].iov_len = sizeof (data_digest_crc);
2259 total_len += iov[iovlen].iov_len;
2260 iovlen++;
2263 /* Transmit the PDU */
2264 if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2265 total_len) != 0) {
2266 /* Set error status */
2267 IDM_CONN_LOG(CE_WARN,
2268 "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2269 "data: %p", (void *) so_conn->ic_so, (void *) ic,
2270 (void *) pdu->isp_data);
2271 status = IDM_STATUS_IO;
2275 * Success does not mean that the PDU actually reached the
2276 * remote node since it could get dropped along the way.
2278 idm_pdu_complete(pdu, status);
2280 return (status);
2284 * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2285 * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2286 * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2287 * A target can invoke this function multiple times for a single read command
2288 * (identified by the same ITT) to split the input into several sequences.
2290 * DataSN starts with 0 for the first data PDU of an input command and advances
2291 * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2292 * which is set to 1 for the last data PDU of a sequence.
2293 * If the initiator supports phase collapse, the status bit must be set along
2294 * with the F bit to indicate that the status is shipped together with the last
2295 * Data-In PDU.
2297 * The data PDUs within a sequence will be sent in order with the buffer offset
2298 * in increasing order. i.e. initiator and target must have negotiated the
2299 * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2301 * Caller holds idt->idt_mutex
2303 static idm_status_t
2304 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2306 idm_so_conn_t *so_conn = idb->idb_ic->ic_transport_private;
2307 idm_pdu_t tmppdu;
2309 ASSERT(mutex_owned(&idt->idt_mutex));
2312 * Put the idm_buf_t on the tx queue. It will be transmitted by
2313 * idm_sotx_thread.
2315 mutex_enter(&so_conn->ic_tx_mutex);
2317 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2318 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2319 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2320 uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2322 if (!so_conn->ic_tx_thread_running) {
2323 mutex_exit(&so_conn->ic_tx_mutex);
2325 * Don't release idt->idt_mutex since we're supposed to hold
2326 * in when calling idm_buf_tx_to_ini_done
2328 DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2329 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331 uint32_t, idb->idb_xfer_len,
2332 int, XFER_BUF_TX_TO_INI);
2333 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2334 return (IDM_STATUS_FAIL);
2338 * Build a template for the data PDU headers we will use so that
2339 * the SN values will stay consistent with other PDU's we are
2340 * transmitting like R2T and SCSI status.
2342 bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2343 tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2344 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2345 ISCSI_OP_SCSI_DATA_RSP);
2346 idb->idb_tx_thread = B_TRUE;
2347 list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2348 cv_signal(&so_conn->ic_tx_cv);
2349 mutex_exit(&so_conn->ic_tx_mutex);
2350 mutex_exit(&idt->idt_mutex);
2353 * Returning success here indicates the transfer was successfully
2354 * dispatched -- it does not mean that the transfer completed
2355 * successfully.
2357 return (IDM_STATUS_SUCCESS);
2361 * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2362 * data blocks it is ready to receive from the initiator in response to a WRITE
2363 * SCSI command. The target iSCSI layer passes the information about the desired
2364 * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2365 * offset and datalen are passed via the 'idb' argument.
2367 * Scope for Prototype build:
2368 * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2369 * negotiated the "InitialR2T" to "Yes".
2371 * Caller holds idt->idt_mutex
2373 static idm_status_t
2374 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2376 idm_pdu_t *pdu;
2377 iscsi_rtt_hdr_t *rtt;
2379 ASSERT(mutex_owned(&idt->idt_mutex));
2381 DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2382 uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2383 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2384 uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2386 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2387 pdu->isp_ic = idt->idt_ic;
2388 pdu->isp_flags = IDM_PDU_SET_STATSN;
2389 bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2391 /* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2392 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2394 /* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2395 rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2397 rtt->opcode = ISCSI_OP_RTT_RSP;
2398 rtt->flags = ISCSI_FLAG_FINAL;
2399 rtt->data_offset = htonl(idb->idb_bufoffset);
2400 rtt->data_length = htonl(idb->idb_xfer_len);
2401 rtt->rttsn = htonl(idt->idt_exp_rttsn++);
2403 /* Keep track of buffer offsets */
2404 idb->idb_exp_offset = idb->idb_bufoffset;
2405 mutex_exit(&idt->idt_mutex);
2408 * Transmit the PDU.
2410 idm_pdu_tx(pdu);
2412 return (IDM_STATUS_SUCCESS);
2415 static idm_status_t
2416 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2418 if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2419 idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2420 KM_NOSLEEP);
2421 idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2422 } else {
2423 idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2424 idb->idb_buf_private = NULL;
2427 if (idb->idb_buf == NULL) {
2428 IDM_CONN_LOG(CE_NOTE,
2429 "idm_so_buf_alloc: failed buffer allocation");
2430 return (IDM_STATUS_FAIL);
2433 return (IDM_STATUS_SUCCESS);
2436 /* ARGSUSED */
2437 static idm_status_t
2438 idm_so_buf_setup(idm_buf_t *idb)
2440 /* Ensure bufalloc'd flag is unset */
2441 idb->idb_bufalloc = B_FALSE;
2443 return (IDM_STATUS_SUCCESS);
2446 /* ARGSUSED */
2447 static void
2448 idm_so_buf_teardown(idm_buf_t *idb)
2450 /* nothing to do here */
2453 static void
2454 idm_so_buf_free(idm_buf_t *idb)
2456 if (idb->idb_buf_private == NULL) {
2457 kmem_free(idb->idb_buf, idb->idb_buflen);
2458 } else {
2459 kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2463 static void
2464 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2465 uint32_t offset, uint32_t length)
2467 idm_so_conn_t *so_conn = ic->ic_transport_private;
2468 idm_pdu_t tmppdu;
2469 idm_buf_t *rtt_buf;
2471 ASSERT(mutex_owned(&idt->idt_mutex));
2474 * Allocate a buffer to represent the RTT transfer. We could further
2475 * optimize this by allocating the buffers internally from an rtt
2476 * specific buffer cache since this is socket-specific code but for
2477 * now we will keep it simple.
2479 rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2480 if (rtt_buf == NULL) {
2482 * If we're in FFP then the failure was likely a resource
2483 * allocation issue and we should close the connection by
2484 * sending a CE_TRANSPORT_FAIL event.
2486 * If we're not in FFP then idm_buf_alloc will always
2487 * fail and the state is transitioning to "complete" anyway
2488 * so we won't bother to send an event.
2490 mutex_enter(&ic->ic_state_mutex);
2491 if (ic->ic_ffp)
2492 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2493 NULL, CT_NONE);
2494 mutex_exit(&ic->ic_state_mutex);
2495 mutex_exit(&idt->idt_mutex);
2496 return;
2499 rtt_buf->idb_buf_cb = NULL;
2500 rtt_buf->idb_cb_arg = NULL;
2501 rtt_buf->idb_bufoffset = offset;
2502 rtt_buf->idb_xfer_len = length;
2503 rtt_buf->idb_ic = idt->idt_ic;
2504 rtt_buf->idb_task_binding = idt;
2507 * The new buffer (if any) represents an additional
2508 * reference on the task
2510 idm_task_hold(idt);
2511 mutex_exit(&idt->idt_mutex);
2514 * Put the idm_buf_t on the tx queue. It will be transmitted by
2515 * idm_sotx_thread.
2517 mutex_enter(&so_conn->ic_tx_mutex);
2519 if (!so_conn->ic_tx_thread_running) {
2520 idm_buf_free(rtt_buf);
2521 mutex_exit(&so_conn->ic_tx_mutex);
2522 idm_task_rele(idt);
2523 return;
2527 * Build a template for the data PDU headers we will use so that
2528 * the SN values will stay consistent with other PDU's we are
2529 * transmitting like R2T and SCSI status.
2531 bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2532 tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2533 (*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2534 ISCSI_OP_SCSI_DATA);
2535 rtt_buf->idb_tx_thread = B_TRUE;
2536 rtt_buf->idb_in_transport = B_TRUE;
2537 list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2538 cv_signal(&so_conn->ic_tx_cv);
2539 mutex_exit(&so_conn->ic_tx_mutex);
2542 static void
2543 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2546 * Don't worry about status -- we assume any error handling
2547 * is performed by the caller (idm_sotx_thread).
2549 idb->idb_in_transport = B_FALSE;
2550 idm_task_rele(idt);
2551 idm_buf_free(idb);
2554 static idm_status_t
2555 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2556 uint32_t buf_region_offset, uint32_t buf_region_length)
2558 idm_conn_t *ic;
2559 uint32_t max_dataseglen;
2560 size_t remainder, chunk;
2561 uint32_t data_offset = buf_region_offset;
2562 iscsi_data_hdr_t *bhs;
2563 idm_pdu_t *pdu;
2564 idm_status_t tx_status;
2566 ASSERT(mutex_owned(&idt->idt_mutex));
2568 ic = idt->idt_ic;
2570 max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2571 remainder = buf_region_length;
2573 while (remainder) {
2574 if (idt->idt_state != TASK_ACTIVE) {
2575 ASSERT((idt->idt_state != TASK_IDLE) &&
2576 (idt->idt_state != TASK_COMPLETE));
2577 return (IDM_STATUS_ABORTED);
2580 /* check to see if we need to chunk the data */
2581 if (remainder > max_dataseglen) {
2582 chunk = max_dataseglen;
2583 } else {
2584 chunk = remainder;
2587 /* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2588 pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2589 pdu->isp_ic = ic;
2590 pdu->isp_flags = 0; /* initialize isp_flags */
2593 * We've already built a build a header template
2594 * to use during the transfer. Use this template so that
2595 * the SN values stay consistent with any unrelated PDU's
2596 * being transmitted.
2598 bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2599 sizeof (iscsi_hdr_t));
2602 * Set DataSN, data offset, and flags in BHS
2603 * For the prototype build, A = 0, S = 0, U = 0
2605 bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2607 bhs->datasn = htonl(idt->idt_exp_datasn++);
2609 hton24(bhs->dlength, chunk);
2610 bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2612 /* setup data */
2613 pdu->isp_data = (uint8_t *)idb->idb_buf + data_offset;
2614 pdu->isp_datalen = (uint_t)chunk;
2616 if (chunk == remainder) {
2617 bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2618 /* Piggyback the status with the last data PDU */
2619 if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2620 pdu->isp_flags |= IDM_PDU_SET_STATSN |
2621 IDM_PDU_ADVANCE_STATSN;
2622 (*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2623 (idt, pdu);
2624 idt->idt_flags |=
2625 IDM_TASK_PHASECOLLAPSE_SUCCESS;
2630 remainder -= chunk;
2631 data_offset += chunk;
2633 /* Instrument the data-send DTrace probe. */
2634 if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2635 DTRACE_ISCSI_2(data__send,
2636 idm_conn_t *, idt->idt_ic,
2637 iscsi_data_rsp_hdr_t *,
2638 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2642 * Now that we're done working with idt_exp_datasn,
2643 * idt->idt_state and idb->idb_bufoffset we can release
2644 * the task lock -- don't want to hold it across the
2645 * call to idm_i_so_tx since we could block.
2647 mutex_exit(&idt->idt_mutex);
2650 * Transmit the PDU. Call the internal routine directly
2651 * as there is already implicit ordering.
2653 if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2654 mutex_enter(&idt->idt_mutex);
2655 return (tx_status);
2658 mutex_enter(&idt->idt_mutex);
2659 idt->idt_tx_bytes += chunk;
2662 return (IDM_STATUS_SUCCESS);
2666 * TX PDU cache
2668 /* ARGSUSED */
2670 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2672 idm_pdu_t *pdu = hdl;
2674 bzero(pdu, sizeof (idm_pdu_t));
2675 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2676 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2677 pdu->isp_callback = idm_sotx_cache_pdu_cb;
2678 pdu->isp_magic = IDM_PDU_MAGIC;
2679 bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2681 return (0);
2684 /* ARGSUSED */
2685 void
2686 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2688 /* reset values between use */
2689 pdu->isp_datalen = 0;
2691 kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2695 * RX PDU cache
2697 /* ARGSUSED */
2699 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2701 idm_pdu_t *pdu = hdl;
2703 bzero(pdu, sizeof (idm_pdu_t));
2704 pdu->isp_magic = IDM_PDU_MAGIC;
2705 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2706 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2708 return (0);
2711 /* ARGSUSED */
2712 static void
2713 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2715 pdu->isp_iovlen = 0;
2716 pdu->isp_sorx_buf = 0;
2717 kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2720 static void
2721 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2724 * We had to modify our cached RX PDU with a longer header buffer
2725 * and/or a longer data buffer. Release the new buffers and fix
2726 * the fields back to what we would expect for a cached RX PDU.
2728 if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2729 kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2731 if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2732 kmem_free(pdu->isp_data, pdu->isp_datalen);
2734 pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2735 pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2736 pdu->isp_data = NULL;
2737 pdu->isp_datalen = 0;
2738 pdu->isp_sorx_buf = 0;
2739 pdu->isp_callback = idm_sorx_cache_pdu_cb;
2740 idm_sorx_cache_pdu_cb(pdu, status);
2744 * This thread is only active when I/O is queued for transmit
2745 * because the socket is busy.
2747 void
2748 idm_sotx_thread(void *arg)
2750 idm_conn_t *ic = arg;
2751 idm_tx_obj_t *object, *next;
2752 idm_so_conn_t *so_conn;
2753 idm_status_t status = IDM_STATUS_SUCCESS;
2755 idm_conn_hold(ic);
2757 mutex_enter(&ic->ic_mutex);
2758 so_conn = ic->ic_transport_private;
2759 so_conn->ic_tx_thread_running = B_TRUE;
2760 so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2761 cv_signal(&ic->ic_cv);
2762 mutex_exit(&ic->ic_mutex);
2764 mutex_enter(&so_conn->ic_tx_mutex);
2766 while (so_conn->ic_tx_thread_running) {
2767 while (list_is_empty(&so_conn->ic_tx_list)) {
2768 DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2769 cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2770 DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2772 if (!so_conn->ic_tx_thread_running) {
2773 goto tx_bail;
2777 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2778 list_remove(&so_conn->ic_tx_list, object);
2779 mutex_exit(&so_conn->ic_tx_mutex);
2781 switch (object->idm_tx_obj_magic) {
2782 case IDM_PDU_MAGIC: {
2783 idm_pdu_t *pdu = (idm_pdu_t *)object;
2784 DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2785 idm_pdu_t *, (idm_pdu_t *)object);
2787 if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2788 /* No IDM task */
2789 (ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2791 status = idm_i_so_tx((idm_pdu_t *)object);
2792 break;
2794 case IDM_BUF_MAGIC: {
2795 idm_buf_t *idb = (idm_buf_t *)object;
2796 idm_task_t *idt = idb->idb_task_binding;
2798 DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2799 idm_buf_t *, idb);
2801 mutex_enter(&idt->idt_mutex);
2802 status = idm_so_send_buf_region(idt,
2803 idb, 0, idb->idb_xfer_len);
2806 * TX thread owns the buffer so we expect it to
2807 * be "in transport"
2809 ASSERT(idb->idb_in_transport);
2810 if (IDM_CONN_ISTGT(ic)) {
2812 * idm_buf_tx_to_ini_done releases
2813 * idt->idt_mutex
2815 DTRACE_ISCSI_8(xfer__done,
2816 idm_conn_t *, idt->idt_ic,
2817 uintptr_t, idb->idb_buf,
2818 uint32_t, idb->idb_bufoffset,
2819 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2820 uint32_t, idb->idb_xfer_len,
2821 int, XFER_BUF_TX_TO_INI);
2822 idm_buf_tx_to_ini_done(idt, idb, status);
2823 } else {
2824 idm_so_send_rtt_data_done(idt, idb);
2825 mutex_exit(&idt->idt_mutex);
2827 break;
2830 default:
2831 IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2832 "(0x%08x)", object->idm_tx_obj_magic);
2833 status = IDM_STATUS_FAIL;
2836 mutex_enter(&so_conn->ic_tx_mutex);
2838 if (status != IDM_STATUS_SUCCESS) {
2839 so_conn->ic_tx_thread_running = B_FALSE;
2840 idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2845 * Before we leave, we need to abort every item remaining in the
2846 * TX list.
2849 tx_bail:
2850 object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2852 while (object != NULL) {
2853 next = list_next(&so_conn->ic_tx_list, object);
2855 list_remove(&so_conn->ic_tx_list, object);
2856 switch (object->idm_tx_obj_magic) {
2857 case IDM_PDU_MAGIC:
2858 idm_pdu_complete((idm_pdu_t *)object,
2859 IDM_STATUS_ABORTED);
2860 break;
2862 case IDM_BUF_MAGIC: {
2863 idm_buf_t *idb = (idm_buf_t *)object;
2864 idm_task_t *idt = idb->idb_task_binding;
2865 mutex_exit(&so_conn->ic_tx_mutex);
2866 mutex_enter(&idt->idt_mutex);
2868 * TX thread owns the buffer so we expect it to
2869 * be "in transport"
2871 ASSERT(idb->idb_in_transport);
2872 if (IDM_CONN_ISTGT(ic)) {
2874 * idm_buf_tx_to_ini_done releases
2875 * idt->idt_mutex
2877 DTRACE_ISCSI_8(xfer__done,
2878 idm_conn_t *, idt->idt_ic,
2879 uintptr_t, idb->idb_buf,
2880 uint32_t, idb->idb_bufoffset,
2881 uint64_t, 0, uint32_t, 0, uint32_t, 0,
2882 uint32_t, idb->idb_xfer_len,
2883 int, XFER_BUF_TX_TO_INI);
2884 idm_buf_tx_to_ini_done(idt, idb,
2885 IDM_STATUS_ABORTED);
2886 } else {
2887 idm_so_send_rtt_data_done(idt, idb);
2888 mutex_exit(&idt->idt_mutex);
2890 mutex_enter(&so_conn->ic_tx_mutex);
2891 break;
2893 default:
2894 IDM_CONN_LOG(CE_WARN,
2895 "idm_sotx_thread: Unexpected magic "
2896 "(0x%08x)", object->idm_tx_obj_magic);
2899 object = next;
2902 mutex_exit(&so_conn->ic_tx_mutex);
2903 idm_conn_rele(ic);
2904 thread_exit();
2905 /*NOTREACHED*/
2908 static void
2909 idm_so_socket_set_nonblock(struct sonode *node)
2911 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2912 (node->so_state | FNONBLOCK), CRED(), NULL);
2915 static void
2916 idm_so_socket_set_block(struct sonode *node)
2918 (void) VOP_SETFL(node->so_vnode, node->so_flag,
2919 (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2924 * Called by kernel sockets when the connection has been accepted or
2925 * rejected. In early volo, a "disconnect" callback was sent instead of
2926 * "connectfailed", so we check for both.
2928 /* ARGSUSED */
2929 void
2930 idm_so_timed_socket_connect_cb(ksocket_t ks,
2931 ksocket_callback_event_t ev, void *arg, uintptr_t info)
2933 idm_so_timed_socket_t *itp = arg;
2934 ASSERT(itp != NULL);
2935 ASSERT(ev == KSOCKET_EV_CONNECTED ||
2936 ev == KSOCKET_EV_CONNECTFAILED ||
2937 ev == KSOCKET_EV_DISCONNECTED);
2939 mutex_enter(&idm_so_timed_socket_mutex);
2940 itp->it_callback_called = B_TRUE;
2941 if (ev == KSOCKET_EV_CONNECTED) {
2942 itp->it_socket_error_code = 0;
2943 } else {
2944 /* Make sure the error code is non-zero on error */
2945 if (info == 0)
2946 info = ECONNRESET;
2947 itp->it_socket_error_code = (int)info;
2949 cv_signal(&itp->it_cv);
2950 mutex_exit(&idm_so_timed_socket_mutex);
2954 idm_so_timed_socket_connect(ksocket_t ks,
2955 struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2957 clock_t conn_login_max;
2958 int rc, nonblocking, rval;
2959 idm_so_timed_socket_t it;
2960 ksocket_callbacks_t ks_cb;
2962 conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2965 * Set to non-block socket mode, with callback on connect
2966 * Early volo used "disconnected" instead of "connectfailed",
2967 * so set callback to look for both.
2969 bzero(&it, sizeof (it));
2970 ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2971 KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2972 ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2973 ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2974 ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2975 cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2976 rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2977 if (rc != 0)
2978 return (rc);
2980 /* Set to non-blocking mode */
2981 nonblocking = 1;
2982 rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2983 CRED());
2984 if (rc != 0)
2985 goto cleanup;
2987 bzero(&it, sizeof (it));
2988 for (;;) {
2990 * Warning -- in a loopback scenario, the call to
2991 * the connect_cb can occur inside the call to
2992 * ksocket_connect. Do not hold the mutex around the
2993 * call to ksocket_connect.
2995 rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
2996 if (rc == 0 || rc == EISCONN) {
2997 /* socket success or already success */
2998 rc = 0;
2999 break;
3001 if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3002 break;
3005 /* TCP connect still in progress. See if out of time. */
3006 if (ddi_get_lbolt() > conn_login_max) {
3008 * Connection retry timeout,
3009 * failed connect to target.
3011 rc = ETIMEDOUT;
3012 break;
3016 * TCP connect still in progress. Sleep until callback.
3017 * Do NOT go to sleep if the callback already occurred!
3019 mutex_enter(&idm_so_timed_socket_mutex);
3020 if (!it.it_callback_called) {
3021 (void) cv_timedwait(&it.it_cv,
3022 &idm_so_timed_socket_mutex, conn_login_max);
3024 if (it.it_callback_called) {
3025 rc = it.it_socket_error_code;
3026 mutex_exit(&idm_so_timed_socket_mutex);
3027 break;
3029 /* If timer expires, go call ksocket_connect one last time. */
3030 mutex_exit(&idm_so_timed_socket_mutex);
3033 /* resume blocking mode */
3034 nonblocking = 0;
3035 (void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3036 CRED());
3037 cleanup:
3038 (void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3039 cv_destroy(&it.it_cv);
3040 if (rc != 0) {
3041 idm_soshutdown(ks);
3043 return (rc);
3047 void
3048 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3050 int dp_addr_size;
3051 struct sockaddr_in *sin;
3052 struct sockaddr_in6 *sin6;
3054 /* Build sockaddr_storage for this portal (idm_addr_t) */
3055 bzero(sa, sizeof (*sa));
3056 dp_addr_size = dportal->a_addr.i_insize;
3057 if (dp_addr_size == sizeof (struct in_addr)) {
3058 /* IPv4 */
3059 sa->ss_family = AF_INET;
3060 sin = (struct sockaddr_in *)sa;
3061 sin->sin_port = htons(dportal->a_port);
3062 bcopy(&dportal->a_addr.i_addr.in4,
3063 &sin->sin_addr, sizeof (struct in_addr));
3064 } else if (dp_addr_size == sizeof (struct in6_addr)) {
3065 /* IPv6 */
3066 sa->ss_family = AF_INET6;
3067 sin6 = (struct sockaddr_in6 *)sa;
3068 sin6->sin6_port = htons(dportal->a_port);
3069 bcopy(&dportal->a_addr.i_addr.in6,
3070 &sin6->sin6_addr, sizeof (struct in6_addr));
3071 } else {
3072 ASSERT(0);
3078 * return a human-readable form of a sockaddr_storage, in the form
3079 * [ip-address]:port. This is used in calls to logging functions.
3080 * If several calls to idm_sa_ntop are made within the same invocation
3081 * of a logging function, then each one needs its own buf.
3083 const char *
3084 idm_sa_ntop(const struct sockaddr_storage *sa,
3085 char *buf, size_t size)
3087 static const char bogus_ip[] = "[0].-1";
3088 char tmp[INET6_ADDRSTRLEN];
3090 switch (sa->ss_family) {
3091 case AF_INET6:
3093 const struct sockaddr_in6 *in6 =
3094 (const struct sockaddr_in6 *) sa;
3096 if (inet_ntop(in6->sin6_family,
3097 &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3098 goto err;
3100 if (strlen(tmp) + sizeof ("[].65535") > size) {
3101 goto err;
3103 /* struct sockaddr_storage gets port info from v4 loc */
3104 (void) snprintf(buf, size, "[%s].%u", tmp,
3105 ntohs(in6->sin6_port));
3106 return (buf);
3108 case AF_INET:
3110 const struct sockaddr_in *in =
3111 (const struct sockaddr_in *) sa;
3113 if (inet_ntop(in->sin_family, &in->sin_addr,
3114 tmp, sizeof (tmp)) == NULL) {
3115 goto err;
3117 if (strlen(tmp) + sizeof ("[].65535") > size) {
3118 goto err;
3120 (void) snprintf(buf, size, "[%s].%u", tmp,
3121 ntohs(in->sin_port));
3122 return (buf);
3124 default:
3125 break;
3127 err:
3128 (void) snprintf(buf, size, "%s", bogus_ip);
3129 return (buf);