2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * TODO: txcmd CREATE state is deferred by txmsgq, need to calculate
36 * a streaming response. See subr_diskiocom()'s diskiodone().
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/kernel.h>
42 #include <sys/systm.h>
43 #include <sys/queue.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/socket.h>
48 #include <sys/vnode.h>
52 #include <sys/thread.h>
53 #include <sys/globaldata.h>
54 #include <sys/limits.h>
58 RB_GENERATE(kdmsg_state_tree
, kdmsg_state
, rbnode
, kdmsg_state_cmp
);
59 RB_GENERATE(kdmsg_circuit_tree
, kdmsg_circuit
, rbnode
, kdmsg_circuit_cmp
);
61 static int kdmsg_msg_receive_handling(kdmsg_msg_t
*msg
);
62 static int kdmsg_circ_msgrx(kdmsg_msg_t
*msg
);
63 static int kdmsg_state_msgrx(kdmsg_msg_t
*msg
);
64 static int kdmsg_state_msgtx(kdmsg_msg_t
*msg
);
65 static void kdmsg_state_cleanuprx(kdmsg_msg_t
*msg
);
66 static void kdmsg_state_cleanuptx(kdmsg_msg_t
*msg
);
67 static void kdmsg_state_abort(kdmsg_state_t
*state
);
68 static void kdmsg_state_free(kdmsg_state_t
*state
);
70 static void kdmsg_iocom_thread_rd(void *arg
);
71 static void kdmsg_iocom_thread_wr(void *arg
);
72 static int kdmsg_autorxmsg(kdmsg_msg_t
*msg
);
73 static void kdmsg_autocirc(kdmsg_msg_t
*msg
);
74 static int kdmsg_autocirc_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
);
76 static struct lwkt_token kdmsg_token
= LWKT_TOKEN_INITIALIZER(kdmsg_token
);
79 kdmsg_circ_hold(kdmsg_circuit_t
*circ
)
81 atomic_add_int(&circ
->refs
, 1);
85 kdmsg_circ_drop(kdmsg_circuit_t
*circ
)
89 if (atomic_fetchadd_int(&circ
->refs
, -1) == 1) {
90 KKASSERT(circ
->span_state
== NULL
&&
91 circ
->circ_state
== NULL
&&
92 circ
->rcirc_state
== NULL
&&
96 kfree(circ
, iocom
->mmsg
);
102 * Initialize the roll-up communications structure for a network
103 * messaging session. This function does not install the socket.
106 kdmsg_iocom_init(kdmsg_iocom_t
*iocom
, void *handle
, uint32_t flags
,
107 struct malloc_type
*mmsg
,
108 int (*rcvmsg
)(kdmsg_msg_t
*msg
))
110 bzero(iocom
, sizeof(*iocom
));
111 iocom
->handle
= handle
;
113 iocom
->rcvmsg
= rcvmsg
;
114 iocom
->flags
= flags
;
115 lockinit(&iocom
->msglk
, "h2msg", 0, 0);
116 TAILQ_INIT(&iocom
->msgq
);
117 RB_INIT(&iocom
->circ_tree
);
118 RB_INIT(&iocom
->staterd_tree
);
119 RB_INIT(&iocom
->statewr_tree
);
123 * [Re]connect using the passed file pointer. The caller must ref the
124 * fp for us. We own that ref now.
127 kdmsg_iocom_reconnect(kdmsg_iocom_t
*iocom
, struct file
*fp
,
128 const char *subsysname
)
131 * Destroy the current connection
133 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
134 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILL
);
135 while (iocom
->msgrd_td
|| iocom
->msgwr_td
) {
136 wakeup(&iocom
->msg_ctl
);
137 lksleep(iocom
, &iocom
->msglk
, 0, "clstrkl", hz
);
141 * Drop communications descriptor
144 fdrop(iocom
->msg_fp
);
145 iocom
->msg_fp
= NULL
;
149 * Setup new communications descriptor
154 iocom
->flags
&= ~KDMSG_IOCOMF_EXITNOACC
;
156 lwkt_create(kdmsg_iocom_thread_rd
, iocom
, &iocom
->msgrd_td
,
157 NULL
, 0, -1, "%s-msgrd", subsysname
);
158 lwkt_create(kdmsg_iocom_thread_wr
, iocom
, &iocom
->msgwr_td
,
159 NULL
, 0, -1, "%s-msgwr", subsysname
);
160 lockmgr(&iocom
->msglk
, LK_RELEASE
);
164 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls
165 * this function to handle the state machine for LNK_CONN and LNK_SPAN.
167 * NOTE: Caller typically also sets the IOCOMF_AUTOCONN, IOCOMF_AUTOSPAN,
168 * and IOCOMF_AUTOCIRC in the kdmsg_iocom_init() call. Clients
169 * typically set IOCOMF_AUTOFORGE to automatically forged circuits
170 * for received SPANs.
172 static int kdmsg_lnk_conn_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
);
173 static int kdmsg_lnk_span_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
);
176 kdmsg_iocom_autoinitiate(kdmsg_iocom_t
*iocom
,
177 void (*auto_callback
)(kdmsg_msg_t
*msg
))
181 iocom
->auto_callback
= auto_callback
;
183 msg
= kdmsg_msg_alloc(iocom
, NULL
,
184 DMSG_LNK_CONN
| DMSGF_CREATE
,
185 kdmsg_lnk_conn_reply
, NULL
);
186 iocom
->auto_lnk_conn
.head
= msg
->any
.head
;
187 msg
->any
.lnk_conn
= iocom
->auto_lnk_conn
;
188 iocom
->conn_state
= msg
->state
;
189 kdmsg_msg_write(msg
);
194 kdmsg_lnk_conn_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
)
196 kdmsg_iocom_t
*iocom
= state
->iocom
;
199 if (msg
->any
.head
.cmd
& DMSGF_CREATE
) {
200 rmsg
= kdmsg_msg_alloc(iocom
, NULL
,
201 DMSG_LNK_SPAN
| DMSGF_CREATE
,
202 kdmsg_lnk_span_reply
, NULL
);
203 iocom
->auto_lnk_span
.head
= rmsg
->any
.head
;
204 rmsg
->any
.lnk_span
= iocom
->auto_lnk_span
;
205 kdmsg_msg_write(rmsg
);
209 * Process shim after the CONN is acknowledged and before the CONN
210 * transaction is deleted. For deletions this gives device drivers
211 * the ability to interlock new operations on the circuit before
212 * it becomes illegal and panics.
214 if (iocom
->auto_callback
)
215 iocom
->auto_callback(msg
);
217 if ((state
->txcmd
& DMSGF_DELETE
) == 0 &&
218 (msg
->any
.head
.cmd
& DMSGF_DELETE
)) {
219 iocom
->conn_state
= NULL
;
220 kdmsg_msg_reply(msg
, 0);
228 kdmsg_lnk_span_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
)
231 * Be sure to process shim before terminating the SPAN
232 * transaction. Gives device drivers the ability to
233 * interlock new operations on the circuit before it
234 * becomes illegal and panics.
236 if (state
->iocom
->auto_callback
)
237 state
->iocom
->auto_callback(msg
);
239 if ((state
->txcmd
& DMSGF_DELETE
) == 0 &&
240 (msg
->any
.head
.cmd
& DMSGF_DELETE
)) {
241 kdmsg_msg_reply(msg
, 0);
247 * Disconnect and clean up
250 kdmsg_iocom_uninit(kdmsg_iocom_t
*iocom
)
252 kdmsg_state_t
*state
;
255 * Ask the cluster controller to go away
257 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
258 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILL
);
260 while (iocom
->msgrd_td
|| iocom
->msgwr_td
) {
261 wakeup(&iocom
->msg_ctl
);
262 lksleep(iocom
, &iocom
->msglk
, 0, "clstrkl", hz
);
268 if ((state
= iocom
->freerd_state
) != NULL
) {
269 iocom
->freerd_state
= NULL
;
270 kdmsg_state_free(state
);
273 if ((state
= iocom
->freewr_state
) != NULL
) {
274 iocom
->freewr_state
= NULL
;
275 kdmsg_state_free(state
);
279 * Drop communications descriptor
282 fdrop(iocom
->msg_fp
);
283 iocom
->msg_fp
= NULL
;
285 lockmgr(&iocom
->msglk
, LK_RELEASE
);
289 * Cluster controller thread. Perform messaging functions. We have one
290 * thread for the reader and one for the writer. The writer handles
291 * shutdown requests (which should break the reader thread).
295 kdmsg_iocom_thread_rd(void *arg
)
297 kdmsg_iocom_t
*iocom
= arg
;
299 kdmsg_msg_t
*msg
= NULL
;
304 while ((iocom
->msg_ctl
& KDMSG_CLUSTERCTL_KILL
) == 0) {
306 * Retrieve the message from the pipe or socket.
308 error
= fp_read(iocom
->msg_fp
, &hdr
, sizeof(hdr
),
309 NULL
, 1, UIO_SYSSPACE
);
312 if (hdr
.magic
!= DMSG_HDR_MAGIC
) {
313 kprintf("kdmsg: bad magic: %04x\n", hdr
.magic
);
317 hbytes
= (hdr
.cmd
& DMSGF_SIZE
) * DMSG_ALIGN
;
318 if (hbytes
< sizeof(hdr
) || hbytes
> DMSG_AUX_MAX
) {
319 kprintf("kdmsg: bad header size %zd\n", hbytes
);
323 /* XXX messy: mask cmd to avoid allocating state */
324 msg
= kdmsg_msg_alloc(iocom
, NULL
,
325 hdr
.cmd
& DMSGF_BASECMDMASK
,
328 msg
->hdr_size
= hbytes
;
329 if (hbytes
> sizeof(hdr
)) {
330 error
= fp_read(iocom
->msg_fp
, &msg
->any
.head
+ 1,
331 hbytes
- sizeof(hdr
),
332 NULL
, 1, UIO_SYSSPACE
);
334 kprintf("kdmsg: short msg received\n");
339 msg
->aux_size
= hdr
.aux_bytes
;
340 if (msg
->aux_size
> DMSG_AUX_MAX
) {
341 kprintf("kdmsg: illegal msg payload size %zd\n",
347 abytes
= DMSG_DOALIGN(msg
->aux_size
);
348 msg
->aux_data
= kmalloc(abytes
, iocom
->mmsg
, M_WAITOK
);
349 msg
->flags
|= KDMSG_FLAG_AUXALLOC
;
350 error
= fp_read(iocom
->msg_fp
, msg
->aux_data
,
351 abytes
, NULL
, 1, UIO_SYSSPACE
);
353 kprintf("kdmsg: short msg payload received\n");
358 (void)kdmsg_circ_msgrx(msg
);
359 error
= kdmsg_msg_receive_handling(msg
);
364 kprintf("kdmsg: read failed error %d\n", error
);
366 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
371 * Shutdown the socket before waiting for the transmit side.
373 * If we are dying due to e.g. a socket disconnect verses being
374 * killed explicity we have to set KILL in order to kick the tx
375 * side when it might not have any other work to do. KILL might
376 * already be set if we are in an unmount or reconnect.
378 fp_shutdown(iocom
->msg_fp
, SHUT_RDWR
);
380 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILL
);
381 wakeup(&iocom
->msg_ctl
);
384 * Wait for the transmit side to drain remaining messages
385 * before cleaning up the rx state. The transmit side will
386 * set KILLTX and wait for the rx side to completely finish
387 * (set msgrd_td to NULL) before cleaning up any remaining
390 lockmgr(&iocom
->msglk
, LK_RELEASE
);
391 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILLRX
);
392 wakeup(&iocom
->msg_ctl
);
393 while ((iocom
->msg_ctl
& KDMSG_CLUSTERCTL_KILLTX
) == 0) {
394 wakeup(&iocom
->msg_ctl
);
395 tsleep(iocom
, 0, "clstrkw", hz
);
398 iocom
->msgrd_td
= NULL
;
401 * iocom can be ripped out from under us at this point but
410 kdmsg_iocom_thread_wr(void *arg
)
412 kdmsg_iocom_t
*iocom
= arg
;
414 kdmsg_state_t
*state
;
424 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
426 while ((iocom
->msg_ctl
& KDMSG_CLUSTERCTL_KILL
) == 0 && error
== 0) {
428 * Sleep if no messages pending. Interlock with flag while
431 if (TAILQ_EMPTY(&iocom
->msgq
)) {
432 atomic_set_int(&iocom
->msg_ctl
,
433 KDMSG_CLUSTERCTL_SLEEPING
);
434 lksleep(&iocom
->msg_ctl
, &iocom
->msglk
, 0, "msgwr", hz
);
435 atomic_clear_int(&iocom
->msg_ctl
,
436 KDMSG_CLUSTERCTL_SLEEPING
);
439 while ((msg
= TAILQ_FIRST(&iocom
->msgq
)) != NULL
) {
441 * Remove msg from the transmit queue and do
442 * persist and half-closed state handling.
444 TAILQ_REMOVE(&iocom
->msgq
, msg
, qentry
);
445 lockmgr(&iocom
->msglk
, LK_RELEASE
);
447 error
= kdmsg_state_msgtx(msg
);
448 if (error
== EALREADY
) {
451 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
456 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
461 * Dump the message to the pipe or socket.
463 * We have to clean up the message as if the transmit
464 * succeeded even if it failed.
466 error
= fp_write(iocom
->msg_fp
, &msg
->any
,
467 msg
->hdr_size
, &res
, UIO_SYSSPACE
);
468 if (error
|| res
!= msg
->hdr_size
) {
471 kdmsg_state_cleanuptx(msg
);
472 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
476 abytes
= DMSG_DOALIGN(msg
->aux_size
);
477 error
= fp_write(iocom
->msg_fp
,
478 msg
->aux_data
, abytes
,
480 if (error
|| res
!= abytes
) {
483 kdmsg_state_cleanuptx(msg
);
484 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
488 kdmsg_state_cleanuptx(msg
);
489 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
494 * Cleanup messages pending transmission and release msgq lock.
497 kprintf("kdmsg: write failed error %d\n", error
);
498 kprintf("thread_wr: Terminating iocom\n");
501 * Shutdown the socket. This will cause the rx thread to get an
502 * EOF and ensure that both threads get to a termination state.
504 fp_shutdown(iocom
->msg_fp
, SHUT_RDWR
);
507 * Set KILLTX (which the rx side waits for), then wait for the RX
508 * side to completely finish before we clean out any remaining
511 lockmgr(&iocom
->msglk
, LK_RELEASE
);
512 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILLTX
);
513 wakeup(&iocom
->msg_ctl
);
514 while (iocom
->msgrd_td
) {
515 wakeup(&iocom
->msg_ctl
);
516 tsleep(iocom
, 0, "clstrkw", hz
);
518 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
521 * Simulate received MSGF_DELETE's for any remaining states.
522 * (For remote masters).
524 * Drain the message queue to handle any device initiated writes
525 * due to state callbacks.
528 kdmsg_drain_msgq(iocom
);
529 RB_FOREACH(state
, kdmsg_state_tree
, &iocom
->staterd_tree
) {
530 if ((state
->rxcmd
& DMSGF_DELETE
) == 0) {
531 lockmgr(&iocom
->msglk
, LK_RELEASE
);
532 kdmsg_state_abort(state
);
533 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
539 * Simulate received MSGF_DELETE's for any remaining states.
540 * (For local masters).
543 kdmsg_drain_msgq(iocom
);
544 RB_FOREACH(state
, kdmsg_state_tree
, &iocom
->statewr_tree
) {
545 if ((state
->rxcmd
& DMSGF_DELETE
) == 0) {
546 lockmgr(&iocom
->msglk
, LK_RELEASE
);
547 kdmsg_state_abort(state
);
548 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
554 * Retry until all work is done
557 panic("kdmsg: comm thread shutdown couldn't drain");
558 if (TAILQ_FIRST(&iocom
->msgq
) ||
559 RB_ROOT(&iocom
->staterd_tree
) ||
560 RB_ROOT(&iocom
->statewr_tree
)) {
563 iocom
->flags
|= KDMSG_IOCOMF_EXITNOACC
;
565 lockmgr(&iocom
->msglk
, LK_RELEASE
);
568 * The state trees had better be empty now
570 KKASSERT(RB_EMPTY(&iocom
->staterd_tree
));
571 KKASSERT(RB_EMPTY(&iocom
->statewr_tree
));
572 KKASSERT(iocom
->conn_state
== NULL
);
574 if (iocom
->exit_func
) {
576 * iocom is invalid after we call the exit function.
578 iocom
->msgwr_td
= NULL
;
579 iocom
->exit_func(iocom
);
582 * iocom can be ripped out from under us once msgwr_td is
583 * set to NULL. The wakeup is safe.
585 iocom
->msgwr_td
= NULL
;
592 * This cleans out the pending transmit message queue, adjusting any
593 * persistent states properly in the process.
595 * Caller must hold pmp->iocom.msglk
598 kdmsg_drain_msgq(kdmsg_iocom_t
*iocom
)
603 * Clean out our pending transmit queue, executing the
604 * appropriate state adjustments. If this tries to open
605 * any new outgoing transactions we have to loop up and
608 while ((msg
= TAILQ_FIRST(&iocom
->msgq
)) != NULL
) {
609 TAILQ_REMOVE(&iocom
->msgq
, msg
, qentry
);
610 lockmgr(&iocom
->msglk
, LK_RELEASE
);
611 if (kdmsg_state_msgtx(msg
))
614 kdmsg_state_cleanuptx(msg
);
615 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
620 * Do all processing required to handle a freshly received message
621 * after its low level header has been validated.
625 kdmsg_msg_receive_handling(kdmsg_msg_t
*msg
)
627 kdmsg_iocom_t
*iocom
= msg
->iocom
;
631 * State machine tracking, state assignment for msg,
632 * returns error and discard status. Errors are fatal
633 * to the connection except for EALREADY which forces
634 * a discard without execution.
636 error
= kdmsg_state_msgrx(msg
);
639 * Raw protocol or connection error
642 if (error
== EALREADY
)
644 } else if (msg
->state
&& msg
->state
->func
) {
646 * Message related to state which already has a
647 * handling function installed for it.
649 error
= msg
->state
->func(msg
->state
, msg
);
650 kdmsg_state_cleanuprx(msg
);
651 } else if (iocom
->flags
& KDMSG_IOCOMF_AUTOANY
) {
652 error
= kdmsg_autorxmsg(msg
);
653 kdmsg_state_cleanuprx(msg
);
655 error
= iocom
->rcvmsg(msg
);
656 kdmsg_state_cleanuprx(msg
);
662 * Process circuit tracking (NEEDS WORK)
666 kdmsg_circ_msgrx(kdmsg_msg_t
*msg
)
668 kdmsg_circuit_t dummy
;
669 kdmsg_circuit_t
*circ
;
672 if (msg
->any
.head
.circuit
) {
673 dummy
.msgid
= msg
->any
.head
.circuit
;
674 lwkt_gettoken(&kdmsg_token
);
675 circ
= RB_FIND(kdmsg_circuit_tree
, &msg
->iocom
->circ_tree
,
679 kdmsg_circ_hold(circ
);
682 kprintf("KDMSG_CIRC_MSGRX CMD %08x: IOCOM %p "
683 "Bad circuit %016jx\n",
686 (intmax_t)msg
->any
.head
.circuit
);
687 kprintf("KDMSG_CIRC_MSGRX: Avail circuits: ");
688 RB_FOREACH(circ
, kdmsg_circuit_tree
,
689 &msg
->iocom
->circ_tree
) {
690 kprintf(" %016jx", (intmax_t)circ
->msgid
);
695 lwkt_reltoken(&kdmsg_token
);
701 * Process state tracking for a message after reception, prior to
704 * Called with msglk held and the msg dequeued.
706 * All messages are called with dummy state and return actual state.
707 * (One-off messages often just return the same dummy state).
709 * May request that caller discard the message by setting *discardp to 1.
710 * The returned state is not used in this case and is allowed to be NULL.
714 * These routines handle persistent and command/reply message state via the
715 * CREATE and DELETE flags. The first message in a command or reply sequence
716 * sets CREATE, the last message in a command or reply sequence sets DELETE.
718 * There can be any number of intermediate messages belonging to the same
719 * sequence sent inbetween the CREATE message and the DELETE message,
720 * which set neither flag. This represents a streaming command or reply.
722 * Any command message received with CREATE set expects a reply sequence to
723 * be returned. Reply sequences work the same as command sequences except the
724 * REPLY bit is also sent. Both the command side and reply side can
725 * degenerate into a single message with both CREATE and DELETE set. Note
726 * that one side can be streaming and the other side not, or neither, or both.
728 * The msgid is unique for the initiator. That is, two sides sending a new
729 * message can use the same msgid without colliding.
733 * ABORT sequences work by setting the ABORT flag along with normal message
734 * state. However, ABORTs can also be sent on half-closed messages, that is
735 * even if the command or reply side has already sent a DELETE, as long as
736 * the message has not been fully closed it can still send an ABORT+DELETE
737 * to terminate the half-closed message state.
739 * Since ABORT+DELETEs can race we silently discard ABORT's for message
740 * state which has already been fully closed. REPLY+ABORT+DELETEs can
741 * also race, and in this situation the other side might have already
742 * initiated a new unrelated command with the same message id. Since
743 * the abort has not set the CREATE flag the situation can be detected
744 * and the message will also be discarded.
746 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
747 * The ABORT request is essentially integrated into the command instead
748 * of being sent later on. In this situation the command implementation
749 * detects that CREATE and ABORT are both set (vs ABORT alone) and can
750 * special-case non-blocking operation for the command.
752 * NOTE! Messages with ABORT set without CREATE or DELETE are considered
753 * to be mid-stream aborts for command/reply sequences. ABORTs on
754 * one-way messages are not supported.
756 * NOTE! If a command sequence does not support aborts the ABORT flag is
761 * One-off messages (no reply expected) are sent with neither CREATE or DELETE
762 * set. One-off messages cannot be aborted and typically aren't processed
763 * by these routines. The REPLY bit can be used to distinguish whether a
764 * one-off message is a command or reply. For example, one-off replies
765 * will typically just contain status updates.
769 kdmsg_state_msgrx(kdmsg_msg_t
*msg
)
771 kdmsg_iocom_t
*iocom
= msg
->iocom
;
772 kdmsg_state_t
*state
;
776 * Make sure a state structure is ready to go in case we need a new
777 * one. This is the only routine which uses freerd_state so no
778 * races are possible.
780 if ((state
= iocom
->freerd_state
) == NULL
) {
781 state
= kmalloc(sizeof(*state
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
782 state
->flags
= KDMSG_STATE_DYNAMIC
;
783 iocom
->freerd_state
= state
;
787 * Lock RB tree and locate existing persistent state, if any.
789 * If received msg is a command state is on staterd_tree.
790 * If received msg is a reply state is on statewr_tree.
792 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
794 state
->msgid
= msg
->any
.head
.msgid
;
795 state
->circ
= msg
->circ
;
796 state
->iocom
= iocom
;
797 if (msg
->any
.head
.cmd
& DMSGF_REPLY
)
798 state
= RB_FIND(kdmsg_state_tree
, &iocom
->statewr_tree
, state
);
800 state
= RB_FIND(kdmsg_state_tree
, &iocom
->staterd_tree
, state
);
804 * Short-cut one-off or mid-stream messages (state may be NULL).
806 if ((msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
|
807 DMSGF_ABORT
)) == 0) {
808 lockmgr(&iocom
->msglk
, LK_RELEASE
);
813 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
814 * inside the case statements.
816 switch(msg
->any
.head
.cmd
& (DMSGF_CREATE
|DMSGF_DELETE
|DMSGF_REPLY
)) {
818 case DMSGF_CREATE
| DMSGF_DELETE
:
820 * New persistant command received.
823 kprintf("kdmsg_state_msgrx: duplicate transaction\n");
827 state
= iocom
->freerd_state
;
828 iocom
->freerd_state
= NULL
;
831 state
->icmd
= msg
->any
.head
.cmd
& DMSGF_BASECMDMASK
;
832 state
->rxcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
833 state
->txcmd
= DMSGF_REPLY
;
834 state
->msgid
= msg
->any
.head
.msgid
;
835 if ((state
->circ
= msg
->circ
) != NULL
)
836 kdmsg_circ_hold(state
->circ
);
837 RB_INSERT(kdmsg_state_tree
, &iocom
->staterd_tree
, state
);
838 state
->flags
|= KDMSG_STATE_INSERTED
;
843 * Persistent state is expected but might not exist if an
844 * ABORT+DELETE races the close.
847 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
850 kprintf("kdmsg_state_msgrx: "
851 "no state for DELETE\n");
858 * Handle another ABORT+DELETE case if the msgid has already
861 if ((state
->rxcmd
& DMSGF_CREATE
) == 0) {
862 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
865 kprintf("kdmsg_state_msgrx: "
866 "state reused for DELETE\n");
875 * Check for mid-stream ABORT command received, otherwise
878 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
880 (state
->rxcmd
& DMSGF_CREATE
) == 0) {
887 case DMSGF_REPLY
| DMSGF_CREATE
:
888 case DMSGF_REPLY
| DMSGF_CREATE
| DMSGF_DELETE
:
890 * When receiving a reply with CREATE set the original
891 * persistent state message should already exist.
894 kprintf("kdmsg_state_msgrx: no state match for "
895 "REPLY cmd=%08x msgid=%016jx\n",
897 (intmax_t)msg
->any
.head
.msgid
);
901 state
->rxcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
904 case DMSGF_REPLY
| DMSGF_DELETE
:
906 * Received REPLY+ABORT+DELETE in case where msgid has
907 * already been fully closed, ignore the message.
910 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
913 kprintf("kdmsg_state_msgrx: no state match "
914 "for REPLY|DELETE\n");
921 * Received REPLY+ABORT+DELETE in case where msgid has
922 * already been reused for an unrelated message,
923 * ignore the message.
925 if ((state
->rxcmd
& DMSGF_CREATE
) == 0) {
926 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
929 kprintf("kdmsg_state_msgrx: state reused "
930 "for REPLY|DELETE\n");
939 * Check for mid-stream ABORT reply received to sent command.
941 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
943 (state
->rxcmd
& DMSGF_CREATE
) == 0) {
951 lockmgr(&iocom
->msglk
, LK_RELEASE
);
956 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set.
957 * This routine must call iocom->rcvmsg() for anything not automatically
961 kdmsg_autorxmsg(kdmsg_msg_t
*msg
)
963 kdmsg_iocom_t
*iocom
= msg
->iocom
;
964 kdmsg_circuit_t
*circ
;
969 * Process a combination of the transaction command and the message
970 * flags. For the purposes of this routine, the message command is
971 * only relevant when it initiates a transaction (where it is
974 cmd
= (msg
->state
? msg
->state
->icmd
: msg
->any
.head
.cmd
) &
976 cmd
|= msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
| DMSGF_REPLY
);
979 case DMSG_LNK_CONN
| DMSGF_CREATE
:
980 case DMSG_LNK_CONN
| DMSGF_CREATE
| DMSGF_DELETE
:
982 * Received LNK_CONN transaction. Transmit response and
983 * leave transaction open, which allows the other end to
984 * start to the SPAN protocol.
986 * Handle shim after acknowledging the CONN.
988 if ((msg
->any
.head
.cmd
& DMSGF_DELETE
) == 0) {
989 if (iocom
->flags
& KDMSG_IOCOMF_AUTOCONN
) {
990 kdmsg_msg_result(msg
, 0);
991 if (iocom
->auto_callback
)
992 iocom
->auto_callback(msg
);
994 error
= iocom
->rcvmsg(msg
);
999 case DMSG_LNK_CONN
| DMSGF_DELETE
:
1001 * This message is usually simulated after a link is lost
1002 * to clean up the transaction.
1004 if (iocom
->flags
& KDMSG_IOCOMF_AUTOCONN
) {
1005 if (iocom
->auto_callback
)
1006 iocom
->auto_callback(msg
);
1007 kdmsg_msg_reply(msg
, 0);
1009 error
= iocom
->rcvmsg(msg
);
1012 case DMSG_LNK_SPAN
| DMSGF_CREATE
:
1013 case DMSG_LNK_SPAN
| DMSGF_CREATE
| DMSGF_DELETE
:
1015 * Received LNK_SPAN transaction. We do not have to respond
1016 * but we must leave the transaction open.
1018 * If AUTOCIRC is set automatically initiate a virtual circuit
1019 * to the received span. This will attach a kdmsg_circuit
1020 * to the SPAN state. The circuit is lost when the span is
1023 * Handle shim after acknowledging the SPAN.
1025 if (iocom
->flags
& KDMSG_IOCOMF_AUTOSPAN
) {
1026 if ((msg
->any
.head
.cmd
& DMSGF_DELETE
) == 0) {
1027 if (iocom
->flags
& KDMSG_IOCOMF_AUTOFORGE
)
1028 kdmsg_autocirc(msg
);
1029 if (iocom
->auto_callback
)
1030 iocom
->auto_callback(msg
);
1035 error
= iocom
->rcvmsg(msg
);
1039 case DMSG_LNK_SPAN
| DMSGF_DELETE
:
1041 * Process shims (auto_callback) before cleaning up the
1042 * circuit structure and closing the transactions. Device
1043 * driver should ensure that the circuit is not used after
1044 * the auto_callback() returns.
1046 * Handle shim before closing the SPAN transaction.
1048 if (iocom
->flags
& KDMSG_IOCOMF_AUTOSPAN
) {
1049 if (iocom
->auto_callback
)
1050 iocom
->auto_callback(msg
);
1051 if (iocom
->flags
& KDMSG_IOCOMF_AUTOFORGE
)
1052 kdmsg_autocirc(msg
);
1053 kdmsg_msg_reply(msg
, 0);
1055 error
= iocom
->rcvmsg(msg
);
1058 case DMSG_LNK_CIRC
| DMSGF_CREATE
:
1059 case DMSG_LNK_CIRC
| DMSGF_CREATE
| DMSGF_DELETE
:
1061 * Received LNK_CIRC transaction. We must respond and should
1062 * leave the transaction open, allowing the circuit. The
1063 * remote can start issuing commands to us over the circuit
1064 * even before we respond.
1066 if (iocom
->flags
& KDMSG_IOCOMF_AUTOCIRC
) {
1067 if ((msg
->any
.head
.cmd
& DMSGF_DELETE
) == 0) {
1068 circ
= kmalloc(sizeof(*circ
), iocom
->mmsg
,
1070 lwkt_gettoken(&kdmsg_token
);
1071 msg
->state
->any
.circ
= circ
;
1072 circ
->iocom
= iocom
;
1073 circ
->rcirc_state
= msg
->state
;
1074 kdmsg_circ_hold(circ
); /* for rcirc_state */
1076 circ
->msgid
= circ
->rcirc_state
->msgid
;
1077 /* XXX no span link for received circuits */
1078 kdmsg_circ_hold(circ
); /* for circ_state */
1080 if (RB_INSERT(kdmsg_circuit_tree
,
1081 &iocom
->circ_tree
, circ
)) {
1082 panic("duplicate circuitid allocated");
1084 lwkt_reltoken(&kdmsg_token
);
1085 kdmsg_msg_result(msg
, 0);
1088 * Handle shim after adding the circuit and
1089 * after acknowledging the CIRC.
1091 if (iocom
->auto_callback
)
1092 iocom
->auto_callback(msg
);
1097 error
= iocom
->rcvmsg(msg
);
1101 case DMSG_LNK_CIRC
| DMSGF_DELETE
:
1102 if (iocom
->flags
& KDMSG_IOCOMF_AUTOCIRC
) {
1103 circ
= msg
->state
->any
.circ
;
1108 * Handle shim before terminating the circuit.
1111 kprintf("KDMSG VC: RECEIVE CIRC DELETE "
1112 "IOCOM %p MSGID %016jx\n",
1113 msg
->iocom
, circ
->msgid
);
1115 if (iocom
->auto_callback
)
1116 iocom
->auto_callback(msg
);
1118 KKASSERT(circ
->rcirc_state
== msg
->state
);
1119 lwkt_gettoken(&kdmsg_token
);
1120 circ
->rcirc_state
= NULL
;
1121 msg
->state
->any
.circ
= NULL
;
1122 RB_REMOVE(kdmsg_circuit_tree
, &iocom
->circ_tree
, circ
);
1123 lwkt_reltoken(&kdmsg_token
);
1124 kdmsg_circ_drop(circ
); /* for rcirc_state */
1125 kdmsg_msg_reply(msg
, 0);
1127 error
= iocom
->rcvmsg(msg
);
1132 * Anything unhandled goes into rcvmsg.
1134 * NOTE: Replies to link-level messages initiated by our side
1135 * are handled by the state callback, they are NOT
1138 error
= iocom
->rcvmsg(msg
);
1145 * Handle automatic forging of virtual circuits based on received SPANs.
1146 * (AUTOFORGE). Note that other code handles tracking received circuit
1147 * transactions (AUTOCIRC).
1149 * We can ignore non-transactions here. Use trans->icmd to test the
1150 * transactional command (once past the CREATE the individual message
1151 * commands are not usually the icmd).
1157 kdmsg_autocirc(kdmsg_msg_t
*msg
)
1159 kdmsg_iocom_t
*iocom
= msg
->iocom
;
1160 kdmsg_circuit_t
*circ
;
1161 kdmsg_msg_t
*xmsg
; /* CIRC */
1163 if (msg
->state
== NULL
)
1167 * Gaining the SPAN, automatically forge a circuit to the target.
1169 * NOTE!! The shim is not executed until we receive an acknowlegement
1170 * to our forged LNK_CIRC (see kdmsg_autocirc_reply()).
1172 if (msg
->state
->icmd
== DMSG_LNK_SPAN
&&
1173 (msg
->any
.head
.cmd
& DMSGF_CREATE
)) {
1174 circ
= kmalloc(sizeof(*circ
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1175 lwkt_gettoken(&kdmsg_token
);
1176 msg
->state
->any
.circ
= circ
;
1177 circ
->iocom
= iocom
;
1178 circ
->span_state
= msg
->state
;
1179 kdmsg_circ_hold(circ
); /* for span_state */
1180 xmsg
= kdmsg_msg_alloc(iocom
, NULL
,
1181 DMSG_LNK_CIRC
| DMSGF_CREATE
,
1182 kdmsg_autocirc_reply
, circ
);
1183 circ
->circ_state
= xmsg
->state
;
1184 circ
->weight
= msg
->any
.lnk_span
.dist
;
1185 circ
->msgid
= circ
->circ_state
->msgid
;
1186 kdmsg_circ_hold(circ
); /* for circ_state */
1188 kprintf("KDMSG VC: CREATE SPAN->CIRC IOCOM %p MSGID %016jx\n",
1189 msg
->iocom
, circ
->msgid
);
1192 if (RB_INSERT(kdmsg_circuit_tree
, &iocom
->circ_tree
, circ
))
1193 panic("duplicate circuitid allocated");
1194 lwkt_reltoken(&kdmsg_token
);
1196 xmsg
->any
.lnk_circ
.target
= msg
->any
.head
.msgid
;
1197 kdmsg_msg_write(xmsg
);
1203 * NOTE: When losing a SPAN, any circuits using the span should be
1204 * deleted by the remote end first. XXX might not be ordered
1205 * on actual loss of connection.
1207 if (msg
->state
->icmd
== DMSG_LNK_SPAN
&&
1208 (msg
->any
.head
.cmd
& DMSGF_DELETE
) &&
1209 msg
->state
->any
.circ
) {
1210 circ
= msg
->state
->any
.circ
;
1211 lwkt_gettoken(&kdmsg_token
);
1212 circ
->span_state
= NULL
;
1213 msg
->state
->any
.circ
= NULL
;
1214 RB_REMOVE(kdmsg_circuit_tree
, &iocom
->circ_tree
, circ
);
1216 kprintf("KDMSG VC: DELETE SPAN->CIRC IOCOM %p MSGID %016jx\n",
1217 msg
->iocom
, (intmax_t)circ
->msgid
);
1219 kdmsg_circ_drop(circ
); /* for span_state */
1220 lwkt_reltoken(&kdmsg_token
);
1226 kdmsg_autocirc_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
)
1228 kdmsg_iocom_t
*iocom
= state
->iocom
;
1229 kdmsg_circuit_t
*circ
= state
->any
.circ
;
1232 * Call shim after receiving an acknowlegement to our forged
1233 * circuit and before processing a received termination.
1235 if (iocom
->auto_callback
)
1236 iocom
->auto_callback(msg
);
1239 * If the remote is terminating the VC we terminate our side
1241 if ((state
->txcmd
& DMSGF_DELETE
) == 0 &&
1242 (msg
->any
.head
.cmd
& DMSGF_DELETE
)) {
1244 kprintf("KDMSG VC: DELETE CIRC FROM REMOTE\n");
1246 lwkt_gettoken(&kdmsg_token
);
1247 circ
->circ_state
= NULL
;
1248 state
->any
.circ
= NULL
;
1249 kdmsg_circ_drop(circ
); /* for circ_state */
1250 lwkt_reltoken(&kdmsg_token
);
1251 kdmsg_msg_reply(msg
, 0);
1257 * Post-receive-handling message and state cleanup. This routine is called
1258 * after the state function handling/callback to properly dispose of the
1259 * message and update or dispose of the state.
1263 kdmsg_state_cleanuprx(kdmsg_msg_t
*msg
)
1265 kdmsg_iocom_t
*iocom
= msg
->iocom
;
1266 kdmsg_state_t
*state
;
1268 if ((state
= msg
->state
) == NULL
) {
1269 kdmsg_msg_free(msg
);
1270 } else if (msg
->any
.head
.cmd
& DMSGF_DELETE
) {
1271 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1272 KKASSERT((state
->rxcmd
& DMSGF_DELETE
) == 0);
1273 state
->rxcmd
|= DMSGF_DELETE
;
1274 if (state
->txcmd
& DMSGF_DELETE
) {
1275 KKASSERT(state
->flags
& KDMSG_STATE_INSERTED
);
1276 if (state
->rxcmd
& DMSGF_REPLY
) {
1277 KKASSERT(msg
->any
.head
.cmd
&
1279 RB_REMOVE(kdmsg_state_tree
,
1280 &iocom
->statewr_tree
, state
);
1282 KKASSERT((msg
->any
.head
.cmd
&
1284 RB_REMOVE(kdmsg_state_tree
,
1285 &iocom
->staterd_tree
, state
);
1287 state
->flags
&= ~KDMSG_STATE_INSERTED
;
1288 if (msg
!= state
->msg
)
1289 kdmsg_msg_free(msg
);
1290 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1291 kdmsg_state_free(state
);
1293 if (msg
!= state
->msg
)
1294 kdmsg_msg_free(msg
);
1295 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1297 } else if (msg
!= state
->msg
) {
1298 kdmsg_msg_free(msg
);
1303 * Simulate receiving a message which terminates an active transaction
1304 * state. Our simulated received message must set DELETE and may also
1305 * have to set CREATE. It must also ensure that all fields are set such
1306 * that the receive handling code can find the state (kdmsg_state_msgrx())
1307 * or an endless loop will ensue.
1309 * This is used when the other end of the link or virtual circuit is dead
1310 * so the device driver gets a completed transaction for all pending states.
1314 kdmsg_state_abort(kdmsg_state_t
*state
)
1316 kdmsg_iocom_t
*iocom
= state
->iocom
;
1320 * Prevent recursive aborts which could otherwise occur if the
1321 * simulated message reception runs state->func which then turns
1322 * around and tries to reply to a broken circuit when then calls
1323 * the state abort code again.
1325 if (state
->flags
& KDMSG_STATE_ABORTING
)
1327 state
->flags
|= KDMSG_STATE_ABORTING
;
1330 * Simulatem essage reception
1332 msg
= kdmsg_msg_alloc(iocom
, state
->circ
,
1335 if ((state
->rxcmd
& DMSGF_CREATE
) == 0)
1336 msg
->any
.head
.cmd
|= DMSGF_CREATE
;
1337 msg
->any
.head
.cmd
|= DMSGF_DELETE
| (state
->rxcmd
& DMSGF_REPLY
);
1338 msg
->any
.head
.error
= DMSG_ERR_LOSTLINK
;
1339 msg
->any
.head
.msgid
= state
->msgid
;
1341 kdmsg_msg_receive_handling(msg
);
1345 * Process state tracking for a message prior to transmission.
1347 * Called with msglk held and the msg dequeued. Returns non-zero if
1348 * the message is bad and should be deleted by the caller.
1350 * One-off messages are usually with dummy state and msg->state may be NULL
1351 * in this situation.
1353 * New transactions (when CREATE is set) will insert the state.
1355 * May request that caller discard the message by setting *discardp to 1.
1356 * A NULL state may be returned in this case.
1360 kdmsg_state_msgtx(kdmsg_msg_t
*msg
)
1362 kdmsg_iocom_t
*iocom
= msg
->iocom
;
1363 kdmsg_state_t
*state
;
1367 * Make sure a state structure is ready to go in case we need a new
1368 * one. This is the only routine which uses freewr_state so no
1369 * races are possible.
1371 if ((state
= iocom
->freewr_state
) == NULL
) {
1372 state
= kmalloc(sizeof(*state
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1373 state
->flags
= KDMSG_STATE_DYNAMIC
;
1374 state
->iocom
= iocom
;
1375 iocom
->freewr_state
= state
;
1379 * Lock RB tree. If persistent state is present it will have already
1380 * been assigned to msg.
1382 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1386 * Short-cut one-off or mid-stream messages (state may be NULL).
1388 if ((msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
|
1389 DMSGF_ABORT
)) == 0) {
1390 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1396 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
1397 * inside the case statements.
1399 switch(msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
|
1402 case DMSGF_CREATE
| DMSGF_DELETE
:
1404 * Insert the new persistent message state and mark
1405 * half-closed if DELETE is set. Since this is a new
1406 * message it isn't possible to transition into the fully
1407 * closed state here.
1409 * XXX state must be assigned and inserted by
1410 * kdmsg_msg_write(). txcmd is assigned by us
1413 KKASSERT(state
!= NULL
);
1414 state
->icmd
= msg
->any
.head
.cmd
& DMSGF_BASECMDMASK
;
1415 state
->txcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
1416 state
->rxcmd
= DMSGF_REPLY
;
1421 * Sent ABORT+DELETE in case where msgid has already
1422 * been fully closed, ignore the message.
1424 if (state
== NULL
) {
1425 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1428 kprintf("kdmsg_state_msgtx: no state match "
1429 "for DELETE cmd=%08x msgid=%016jx\n",
1431 (intmax_t)msg
->any
.head
.msgid
);
1438 * Sent ABORT+DELETE in case where msgid has
1439 * already been reused for an unrelated message,
1440 * ignore the message.
1442 if ((state
->txcmd
& DMSGF_CREATE
) == 0) {
1443 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1446 kprintf("kdmsg_state_msgtx: state reused "
1456 * Check for mid-stream ABORT command sent
1458 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1459 if (state
== NULL
||
1460 (state
->txcmd
& DMSGF_CREATE
) == 0) {
1467 case DMSGF_REPLY
| DMSGF_CREATE
:
1468 case DMSGF_REPLY
| DMSGF_CREATE
| DMSGF_DELETE
:
1470 * When transmitting a reply with CREATE set the original
1471 * persistent state message should already exist.
1473 if (state
== NULL
) {
1474 kprintf("kdmsg_state_msgtx: no state match "
1475 "for REPLY | CREATE\n");
1479 state
->txcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
1482 case DMSGF_REPLY
| DMSGF_DELETE
:
1484 * When transmitting a reply with DELETE set the original
1485 * persistent state message should already exist.
1487 * This is very similar to the REPLY|CREATE|* case except
1488 * txcmd is already stored, so we just add the DELETE flag.
1490 * Sent REPLY+ABORT+DELETE in case where msgid has
1491 * already been fully closed, ignore the message.
1493 if (state
== NULL
) {
1494 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1497 kprintf("kdmsg_state_msgtx: no state match "
1498 "for REPLY | DELETE\n");
1505 * Sent REPLY+ABORT+DELETE in case where msgid has already
1506 * been reused for an unrelated message, ignore the message.
1508 if ((state
->txcmd
& DMSGF_CREATE
) == 0) {
1509 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1512 kprintf("kdmsg_state_msgtx: state reused "
1513 "for REPLY | DELETE\n");
1522 * Check for mid-stream ABORT reply sent.
1524 * One-off REPLY messages are allowed for e.g. status updates.
1526 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1527 if (state
== NULL
||
1528 (state
->txcmd
& DMSGF_CREATE
) == 0) {
1536 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1542 kdmsg_state_cleanuptx(kdmsg_msg_t
*msg
)
1544 kdmsg_iocom_t
*iocom
= msg
->iocom
;
1545 kdmsg_state_t
*state
;
1547 if ((state
= msg
->state
) == NULL
) {
1548 kdmsg_msg_free(msg
);
1549 } else if (msg
->any
.head
.cmd
& DMSGF_DELETE
) {
1550 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1551 KKASSERT((state
->txcmd
& DMSGF_DELETE
) == 0);
1552 state
->txcmd
|= DMSGF_DELETE
;
1553 if (state
->rxcmd
& DMSGF_DELETE
) {
1554 KKASSERT(state
->flags
& KDMSG_STATE_INSERTED
);
1555 if (state
->txcmd
& DMSGF_REPLY
) {
1556 KKASSERT(msg
->any
.head
.cmd
&
1558 RB_REMOVE(kdmsg_state_tree
,
1559 &iocom
->staterd_tree
, state
);
1561 KKASSERT((msg
->any
.head
.cmd
&
1563 RB_REMOVE(kdmsg_state_tree
,
1564 &iocom
->statewr_tree
, state
);
1566 state
->flags
&= ~KDMSG_STATE_INSERTED
;
1567 if (msg
!= state
->msg
)
1568 kdmsg_msg_free(msg
);
1569 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1570 kdmsg_state_free(state
);
1572 if (msg
!= state
->msg
)
1573 kdmsg_msg_free(msg
);
1574 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1576 } else if (msg
!= state
->msg
) {
1577 kdmsg_msg_free(msg
);
1583 kdmsg_state_free(kdmsg_state_t
*state
)
1585 kdmsg_iocom_t
*iocom
= state
->iocom
;
1588 KKASSERT((state
->flags
& KDMSG_STATE_INSERTED
) == 0);
1591 kfree(state
, iocom
->mmsg
);
1594 kdmsg_msg_free(msg
);
1599 kdmsg_msg_alloc(kdmsg_iocom_t
*iocom
, kdmsg_circuit_t
*circ
, uint32_t cmd
,
1600 int (*func
)(kdmsg_state_t
*, kdmsg_msg_t
*), void *data
)
1603 kdmsg_state_t
*state
;
1606 KKASSERT(iocom
!= NULL
);
1607 hbytes
= (cmd
& DMSGF_SIZE
) * DMSG_ALIGN
;
1608 msg
= kmalloc(offsetof(struct kdmsg_msg
, any
) + hbytes
,
1609 iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1610 msg
->hdr_size
= hbytes
;
1612 msg
->any
.head
.magic
= DMSG_HDR_MAGIC
;
1613 msg
->any
.head
.cmd
= cmd
;
1615 kdmsg_circ_hold(circ
);
1617 msg
->any
.head
.circuit
= circ
->msgid
;
1620 if (cmd
& DMSGF_CREATE
) {
1622 * New transaction, requires tracking state and a unique
1623 * msgid to be allocated.
1625 KKASSERT(msg
->state
== NULL
);
1626 state
= kmalloc(sizeof(*state
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1627 state
->flags
= KDMSG_STATE_DYNAMIC
;
1629 state
->any
.any
= data
;
1631 state
->msgid
= (uint64_t)(uintptr_t)state
;
1633 state
->iocom
= iocom
;
1636 kdmsg_circ_hold(circ
);
1637 /*msg->any.head.msgid = state->msgid;XXX*/
1639 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1640 if (RB_INSERT(kdmsg_state_tree
, &iocom
->statewr_tree
, state
))
1641 panic("duplicate msgid allocated");
1642 state
->flags
|= KDMSG_STATE_INSERTED
;
1643 msg
->any
.head
.msgid
= state
->msgid
;
1644 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1650 kdmsg_msg_alloc_state(kdmsg_state_t
*state
, uint32_t cmd
,
1651 int (*func
)(kdmsg_state_t
*, kdmsg_msg_t
*), void *data
)
1653 kdmsg_iocom_t
*iocom
= state
->iocom
;
1657 KKASSERT(iocom
!= NULL
);
1658 hbytes
= (cmd
& DMSGF_SIZE
) * DMSG_ALIGN
;
1659 msg
= kmalloc(offsetof(struct kdmsg_msg
, any
) + hbytes
,
1660 iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1661 msg
->hdr_size
= hbytes
;
1663 msg
->any
.head
.magic
= DMSG_HDR_MAGIC
;
1664 msg
->any
.head
.cmd
= cmd
;
1667 kdmsg_circ_hold(state
->circ
);
1668 msg
->circ
= state
->circ
;
1669 msg
->any
.head
.circuit
= state
->circ
->msgid
;
1675 kdmsg_msg_free(kdmsg_msg_t
*msg
)
1677 kdmsg_iocom_t
*iocom
= msg
->iocom
;
1679 if ((msg
->flags
& KDMSG_FLAG_AUXALLOC
) &&
1680 msg
->aux_data
&& msg
->aux_size
) {
1681 kfree(msg
->aux_data
, iocom
->mmsg
);
1682 msg
->flags
&= ~KDMSG_FLAG_AUXALLOC
;
1685 kdmsg_circ_drop(msg
->circ
);
1689 if (msg
->state
->msg
== msg
)
1690 msg
->state
->msg
= NULL
;
1693 msg
->aux_data
= NULL
;
1696 kfree(msg
, iocom
->mmsg
);
1700 * Circuits are tracked in a red-black tree by their circuit id (msgid).
1703 kdmsg_circuit_cmp(kdmsg_circuit_t
*circ1
, kdmsg_circuit_t
*circ2
)
1705 if (circ1
->msgid
< circ2
->msgid
)
1707 if (circ1
->msgid
> circ2
->msgid
)
1713 * Indexed messages are stored in a red-black tree indexed by their
1714 * msgid. Only persistent messages are indexed.
1717 kdmsg_state_cmp(kdmsg_state_t
*state1
, kdmsg_state_t
*state2
)
1719 if (state1
->iocom
< state2
->iocom
)
1721 if (state1
->iocom
> state2
->iocom
)
1723 if (state1
->circ
< state2
->circ
)
1725 if (state1
->circ
> state2
->circ
)
1727 if (state1
->msgid
< state2
->msgid
)
1729 if (state1
->msgid
> state2
->msgid
)
1735 * Write a message. All requisit command flags have been set.
1737 * If msg->state is non-NULL the message is written to the existing
1738 * transaction. msgid will be set accordingly.
1740 * If msg->state is NULL and CREATE is set new state is allocated and
1741 * (func, data) is installed. A msgid is assigned.
1743 * If msg->state is NULL and CREATE is not set the message is assumed
1744 * to be a one-way message. The originator must assign the msgid
1745 * (or leave it 0, which is typical.
1747 * This function merely queues the message to the management thread, it
1748 * does not write to the message socket/pipe.
1751 kdmsg_msg_write(kdmsg_msg_t
*msg
)
1753 kdmsg_iocom_t
*iocom
= msg
->iocom
;
1754 kdmsg_state_t
*state
;
1758 * Continuance or termination of existing transaction.
1759 * The transaction could have been initiated by either end.
1761 * (Function callback and aux data for the receive side can
1762 * be replaced or left alone).
1765 msg
->any
.head
.msgid
= state
->msgid
;
1766 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1769 * One-off message (always uses msgid 0 to distinguish
1770 * between a possibly lost in-transaction message due to
1771 * competing aborts and a real one-off message?)
1774 msg
->any
.head
.msgid
= 0;
1775 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1779 * With AUTOCIRC and AUTOFORGE it is possible for the circuit to
1780 * get ripped out in the rxthread while some other thread is
1781 * holding a ref on it inbetween allocating and sending a dmsg.
1783 if (msg
->circ
&& msg
->circ
->rcirc_state
== NULL
&&
1784 (msg
->circ
->span_state
== NULL
|| msg
->circ
->circ_state
== NULL
)) {
1785 kprintf("kdmsg_msg_write: Attempt to write message to "
1786 "terminated circuit: msg %08x\n", msg
->any
.head
.cmd
);
1787 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1788 if (kdmsg_state_msgtx(msg
)) {
1789 if (state
== NULL
|| msg
!= state
->msg
)
1790 kdmsg_msg_free(msg
);
1791 } else if ((msg
->state
->rxcmd
& DMSGF_DELETE
) == 0) {
1792 /* XXX SMP races simulating a response here */
1793 kdmsg_state_t
*state
= msg
->state
;
1794 kdmsg_state_cleanuptx(msg
);
1795 kdmsg_state_abort(state
);
1797 kdmsg_state_cleanuptx(msg
);
1803 * This flag is not set until after the tx thread has drained
1804 * the txmsgq and simulated responses. After that point the
1805 * txthread is dead and can no longer simulate responses.
1807 * Device drivers should never try to send a message once this
1808 * flag is set. They should have detected (through the state
1809 * closures) that the link is in trouble.
1811 if (iocom
->flags
& KDMSG_IOCOMF_EXITNOACC
) {
1812 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1813 panic("kdmsg_msg_write: Attempt to write message to "
1814 "terminated iocom\n");
1818 * Finish up the msg fields. Note that msg->aux_size and the
1819 * aux_bytes stored in the message header represent the unaligned
1820 * (actual) bytes of data, but the buffer is sized to an aligned
1821 * size and the CRC is generated over the aligned length.
1823 msg
->any
.head
.salt
= /* (random << 8) | */ (iocom
->msg_seq
& 255);
1826 if (msg
->aux_data
&& msg
->aux_size
) {
1827 uint32_t abytes
= DMSG_DOALIGN(msg
->aux_size
);
1829 msg
->any
.head
.aux_bytes
= msg
->aux_size
;
1830 msg
->any
.head
.aux_crc
= iscsi_crc32(msg
->aux_data
, abytes
);
1832 msg
->any
.head
.hdr_crc
= 0;
1833 msg
->any
.head
.hdr_crc
= iscsi_crc32(msg
->any
.buf
, msg
->hdr_size
);
1835 TAILQ_INSERT_TAIL(&iocom
->msgq
, msg
, qentry
);
1837 if (iocom
->msg_ctl
& KDMSG_CLUSTERCTL_SLEEPING
) {
1838 atomic_clear_int(&iocom
->msg_ctl
,
1839 KDMSG_CLUSTERCTL_SLEEPING
);
1840 wakeup(&iocom
->msg_ctl
);
1843 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1847 * Reply to a message and terminate our side of the transaction.
1849 * If msg->state is non-NULL we are replying to a one-way message.
1852 kdmsg_msg_reply(kdmsg_msg_t
*msg
, uint32_t error
)
1854 kdmsg_state_t
*state
= msg
->state
;
1859 * Reply with a simple error code and terminate the transaction.
1861 cmd
= DMSG_LNK_ERROR
;
1864 * Check if our direction has even been initiated yet, set CREATE.
1866 * Check what direction this is (command or reply direction). Note
1867 * that txcmd might not have been initiated yet.
1869 * If our direction has already been closed we just return without
1873 if (state
->txcmd
& DMSGF_DELETE
)
1875 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
1876 cmd
|= DMSGF_CREATE
;
1877 if (state
->txcmd
& DMSGF_REPLY
)
1879 cmd
|= DMSGF_DELETE
;
1881 if ((msg
->any
.head
.cmd
& DMSGF_REPLY
) == 0)
1885 /* XXX messy mask cmd to avoid allocating state */
1886 nmsg
= kdmsg_msg_alloc_state(state
, cmd
, NULL
, NULL
);
1887 nmsg
->any
.head
.error
= error
;
1888 kdmsg_msg_write(nmsg
);
1892 * Reply to a message and continue our side of the transaction.
1894 * If msg->state is non-NULL we are replying to a one-way message and this
1895 * function degenerates into the same as kdmsg_msg_reply().
1898 kdmsg_msg_result(kdmsg_msg_t
*msg
, uint32_t error
)
1900 kdmsg_state_t
*state
= msg
->state
;
1905 * Return a simple result code, do NOT terminate the transaction.
1907 cmd
= DMSG_LNK_ERROR
;
1910 * Check if our direction has even been initiated yet, set CREATE.
1912 * Check what direction this is (command or reply direction). Note
1913 * that txcmd might not have been initiated yet.
1915 * If our direction has already been closed we just return without
1919 if (state
->txcmd
& DMSGF_DELETE
)
1921 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
1922 cmd
|= DMSGF_CREATE
;
1923 if (state
->txcmd
& DMSGF_REPLY
)
1925 /* continuing transaction, do not set MSGF_DELETE */
1927 if ((msg
->any
.head
.cmd
& DMSGF_REPLY
) == 0)
1931 /* XXX messy mask cmd to avoid allocating state */
1932 nmsg
= kdmsg_msg_alloc_state(state
, cmd
, NULL
, NULL
);
1933 nmsg
->any
.head
.error
= error
;
1934 kdmsg_msg_write(nmsg
);
1938 * Reply to a message and terminate our side of the transaction.
1940 * If msg->state is non-NULL we are replying to a one-way message.
1943 kdmsg_state_reply(kdmsg_state_t
*state
, uint32_t error
)
1949 * Reply with a simple error code and terminate the transaction.
1951 cmd
= DMSG_LNK_ERROR
;
1954 * Check if our direction has even been initiated yet, set CREATE.
1956 * Check what direction this is (command or reply direction). Note
1957 * that txcmd might not have been initiated yet.
1959 * If our direction has already been closed we just return without
1963 if (state
->txcmd
& DMSGF_DELETE
)
1965 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
1966 cmd
|= DMSGF_CREATE
;
1967 if (state
->txcmd
& DMSGF_REPLY
)
1969 cmd
|= DMSGF_DELETE
;
1971 if ((state
->txcmd
& DMSGF_REPLY
) == 0)
1975 /* XXX messy mask cmd to avoid allocating state */
1976 nmsg
= kdmsg_msg_alloc_state(state
, cmd
, NULL
, NULL
);
1977 nmsg
->any
.head
.error
= error
;
1978 kdmsg_msg_write(nmsg
);
1982 * Reply to a message and continue our side of the transaction.
1984 * If msg->state is non-NULL we are replying to a one-way message and this
1985 * function degenerates into the same as kdmsg_msg_reply().
1988 kdmsg_state_result(kdmsg_state_t
*state
, uint32_t error
)
1994 * Return a simple result code, do NOT terminate the transaction.
1996 cmd
= DMSG_LNK_ERROR
;
1999 * Check if our direction has even been initiated yet, set CREATE.
2001 * Check what direction this is (command or reply direction). Note
2002 * that txcmd might not have been initiated yet.
2004 * If our direction has already been closed we just return without
2008 if (state
->txcmd
& DMSGF_DELETE
)
2010 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
2011 cmd
|= DMSGF_CREATE
;
2012 if (state
->txcmd
& DMSGF_REPLY
)
2014 /* continuing transaction, do not set MSGF_DELETE */
2016 if ((state
->txcmd
& DMSGF_REPLY
) == 0)
2020 /* XXX messy mask cmd to avoid allocating state */
2021 nmsg
= kdmsg_msg_alloc_state(state
, cmd
, NULL
, NULL
);
2022 nmsg
->any
.head
.error
= error
;
2023 kdmsg_msg_write(nmsg
);