2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * TODO: txcmd CREATE state is deferred by tx msgq, need to calculate
36 * a streaming response. See subr_diskiocom()'s diskiodone().
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/kernel.h>
42 #include <sys/systm.h>
43 #include <sys/queue.h>
45 #include <sys/malloc.h>
46 #include <sys/mount.h>
47 #include <sys/socket.h>
48 #include <sys/vnode.h>
49 #include <sys/sysctl.h>
53 #include <sys/thread.h>
54 #include <sys/globaldata.h>
55 #include <sys/limits.h>
59 RB_GENERATE(kdmsg_state_tree
, kdmsg_state
, rbnode
, kdmsg_state_cmp
);
61 SYSCTL_NODE(, OID_AUTO
, kdmsg
, CTLFLAG_RW
, 0, "kdmsg");
62 static int kdmsg_debug
= 1;
63 SYSCTL_INT(_kdmsg
, OID_AUTO
, debug
, CTLFLAG_RW
, &kdmsg_debug
, 0,
64 "Set debug level for kernel dmsg layer");
66 #define kd_printf(level, ctl, ...) \
67 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__)
69 #define kdio_printf(iocom, level, ctl, ...) \
70 if (kdmsg_debug >= (level)) kprintf("kdmsg: " ctl, __VA_ARGS__)
72 static int kdmsg_msg_receive_handling(kdmsg_msg_t
*msg
);
73 static int kdmsg_state_msgrx(kdmsg_msg_t
*msg
);
74 static int kdmsg_state_msgtx(kdmsg_msg_t
*msg
);
75 static void kdmsg_msg_write_locked(kdmsg_iocom_t
*iocom
, kdmsg_msg_t
*msg
);
76 static void kdmsg_state_cleanuprx(kdmsg_msg_t
*msg
);
77 static void kdmsg_state_cleanuptx(kdmsg_msg_t
*msg
);
78 static void kdmsg_subq_delete(kdmsg_state_t
*state
);
79 static void kdmsg_simulate_failure(kdmsg_state_t
*state
, int meto
, int error
);
80 static void kdmsg_state_abort(kdmsg_state_t
*state
);
81 static void kdmsg_state_dying(kdmsg_state_t
*state
);
82 static void kdmsg_state_free(kdmsg_state_t
*state
);
85 #define KDMSG_DEBUG_ARGS , const char *file, int line
86 #define kdmsg_state_hold(state) _kdmsg_state_hold(state, __FILE__, __LINE__)
87 #define kdmsg_state_drop(state) _kdmsg_state_drop(state, __FILE__, __LINE__)
89 #define KDMSG_DEBUG_ARGS
90 #define kdmsg_state_hold(state) _kdmsg_state_hold(state)
91 #define kdmsg_state_drop(state) _kdmsg_state_drop(state)
93 static void _kdmsg_state_hold(kdmsg_state_t
*state KDMSG_DEBUG_ARGS
);
94 static void _kdmsg_state_drop(kdmsg_state_t
*state KDMSG_DEBUG_ARGS
);
96 static void kdmsg_iocom_thread_rd(void *arg
);
97 static void kdmsg_iocom_thread_wr(void *arg
);
98 static int kdmsg_autorxmsg(kdmsg_msg_t
*msg
);
100 /*static struct lwkt_token kdmsg_token = LWKT_TOKEN_INITIALIZER(kdmsg_token);*/
103 * Initialize the roll-up communications structure for a network
104 * messaging session. This function does not install the socket.
107 kdmsg_iocom_init(kdmsg_iocom_t
*iocom
, void *handle
, uint32_t flags
,
108 struct malloc_type
*mmsg
,
109 int (*rcvmsg
)(kdmsg_msg_t
*msg
))
111 bzero(iocom
, sizeof(*iocom
));
112 iocom
->handle
= handle
;
114 iocom
->rcvmsg
= rcvmsg
;
115 iocom
->flags
= flags
;
116 lockinit(&iocom
->msglk
, "h2msg", 0, 0);
117 TAILQ_INIT(&iocom
->msgq
);
118 RB_INIT(&iocom
->staterd_tree
);
119 RB_INIT(&iocom
->statewr_tree
);
121 iocom
->state0
.iocom
= iocom
;
122 iocom
->state0
.parent
= &iocom
->state0
;
123 TAILQ_INIT(&iocom
->state0
.subq
);
127 * [Re]connect using the passed file pointer. The caller must ref the
128 * fp for us. We own that ref now.
131 kdmsg_iocom_reconnect(kdmsg_iocom_t
*iocom
, struct file
*fp
,
132 const char *subsysname
)
135 * Destroy the current connection
137 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
138 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILLRX
);
139 while (iocom
->msgrd_td
|| iocom
->msgwr_td
) {
140 wakeup(&iocom
->msg_ctl
);
141 lksleep(iocom
, &iocom
->msglk
, 0, "clstrkl", hz
);
145 * Drop communications descriptor
148 fdrop(iocom
->msg_fp
);
149 iocom
->msg_fp
= NULL
;
153 * Setup new communications descriptor
158 iocom
->flags
&= ~KDMSG_IOCOMF_EXITNOACC
;
160 lwkt_create(kdmsg_iocom_thread_rd
, iocom
, &iocom
->msgrd_td
,
161 NULL
, 0, -1, "%s-msgrd", subsysname
);
162 lwkt_create(kdmsg_iocom_thread_wr
, iocom
, &iocom
->msgwr_td
,
163 NULL
, 0, -1, "%s-msgwr", subsysname
);
164 lockmgr(&iocom
->msglk
, LK_RELEASE
);
168 * Caller sets up iocom->auto_lnk_conn and iocom->auto_lnk_span, then calls
169 * this function to handle the state machine for LNK_CONN and LNK_SPAN.
171 static int kdmsg_lnk_conn_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
);
172 static int kdmsg_lnk_span_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
);
175 kdmsg_iocom_autoinitiate(kdmsg_iocom_t
*iocom
,
176 void (*auto_callback
)(kdmsg_msg_t
*msg
))
180 iocom
->auto_callback
= auto_callback
;
182 msg
= kdmsg_msg_alloc(&iocom
->state0
,
183 DMSG_LNK_CONN
| DMSGF_CREATE
,
184 kdmsg_lnk_conn_reply
, NULL
);
185 iocom
->auto_lnk_conn
.head
= msg
->any
.head
;
186 msg
->any
.lnk_conn
= iocom
->auto_lnk_conn
;
187 iocom
->conn_state
= msg
->state
;
188 kdmsg_state_hold(msg
->state
); /* iocom->conn_state */
189 kdmsg_msg_write(msg
);
194 kdmsg_lnk_conn_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
)
196 kdmsg_iocom_t
*iocom
= state
->iocom
;
200 * Upon receipt of the LNK_CONN acknowledgement initiate an
201 * automatic SPAN if we were asked to. Used by e.g. xdisk, but
202 * not used by HAMMER2 which must manage more than one transmitted
205 if ((msg
->any
.head
.cmd
& DMSGF_CREATE
) &&
206 (iocom
->flags
& KDMSG_IOCOMF_AUTOTXSPAN
)) {
207 rmsg
= kdmsg_msg_alloc(&iocom
->state0
,
208 DMSG_LNK_SPAN
| DMSGF_CREATE
,
209 kdmsg_lnk_span_reply
, NULL
);
210 iocom
->auto_lnk_span
.head
= rmsg
->any
.head
;
211 rmsg
->any
.lnk_span
= iocom
->auto_lnk_span
;
212 kdmsg_msg_write(rmsg
);
216 * Process shim after the CONN is acknowledged and before the CONN
217 * transaction is deleted. For deletions this gives device drivers
218 * the ability to interlock new operations on the circuit before
219 * it becomes illegal and panics.
221 if (iocom
->auto_callback
)
222 iocom
->auto_callback(msg
);
224 if ((state
->txcmd
& DMSGF_DELETE
) == 0 &&
225 (msg
->any
.head
.cmd
& DMSGF_DELETE
)) {
227 * iocom->conn_state has a state ref, drop it when clearing.
229 if (iocom
->conn_state
)
230 kdmsg_state_drop(iocom
->conn_state
);
231 iocom
->conn_state
= NULL
;
232 kdmsg_msg_reply(msg
, 0);
240 kdmsg_lnk_span_reply(kdmsg_state_t
*state
, kdmsg_msg_t
*msg
)
243 * Be sure to process shim before terminating the SPAN
244 * transaction. Gives device drivers the ability to
245 * interlock new operations on the circuit before it
246 * becomes illegal and panics.
248 if (state
->iocom
->auto_callback
)
249 state
->iocom
->auto_callback(msg
);
251 if ((state
->txcmd
& DMSGF_DELETE
) == 0 &&
252 (msg
->any
.head
.cmd
& DMSGF_DELETE
)) {
253 kdmsg_msg_reply(msg
, 0);
259 * Disconnect and clean up
262 kdmsg_iocom_uninit(kdmsg_iocom_t
*iocom
)
264 kdmsg_state_t
*state
;
269 * Ask the cluster controller to go away by setting
270 * KILLRX. Send a PING to get a response to unstick reading
273 * After 10 seconds shitcan the pipe and do an unclean shutdown.
275 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
277 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILLRX
);
278 msg
= kdmsg_msg_alloc(&iocom
->state0
, DMSG_LNK_PING
, NULL
, NULL
);
279 kdmsg_msg_write_locked(iocom
, msg
);
282 while (iocom
->msgrd_td
|| iocom
->msgwr_td
) {
283 wakeup(&iocom
->msg_ctl
);
284 lksleep(iocom
, &iocom
->msglk
, 0, "clstrkl", hz
);
285 if (--retries
== 0 && iocom
->msg_fp
) {
286 kdio_printf(iocom
, 0, "%s\n",
288 "shitcanning unresponsive pipe");
289 fp_shutdown(iocom
->msg_fp
, SHUT_RDWR
);
290 /* retries allowed to go negative, keep looping */
297 if ((state
= iocom
->freerd_state
) != NULL
) {
298 iocom
->freerd_state
= NULL
;
299 kdmsg_state_drop(state
);
302 if ((state
= iocom
->freewr_state
) != NULL
) {
303 iocom
->freewr_state
= NULL
;
304 kdmsg_state_drop(state
);
308 * Drop communications descriptor
311 fdrop(iocom
->msg_fp
);
312 iocom
->msg_fp
= NULL
;
314 lockmgr(&iocom
->msglk
, LK_RELEASE
);
318 * Cluster controller thread. Perform messaging functions. We have one
319 * thread for the reader and one for the writer. The writer handles
320 * shutdown requests (which should break the reader thread).
324 kdmsg_iocom_thread_rd(void *arg
)
326 kdmsg_iocom_t
*iocom
= arg
;
328 kdmsg_msg_t
*msg
= NULL
;
333 while ((iocom
->msg_ctl
& KDMSG_CLUSTERCTL_KILLRX
) == 0) {
335 * Retrieve the message from the pipe or socket.
337 error
= fp_read(iocom
->msg_fp
, &hdr
, sizeof(hdr
),
338 NULL
, 1, UIO_SYSSPACE
);
341 if (hdr
.magic
!= DMSG_HDR_MAGIC
) {
342 kdio_printf(iocom
, 1, "bad magic: %04x\n", hdr
.magic
);
346 hbytes
= (hdr
.cmd
& DMSGF_SIZE
) * DMSG_ALIGN
;
347 if (hbytes
< sizeof(hdr
) || hbytes
> DMSG_HDR_MAX
) {
348 kdio_printf(iocom
, 1, "bad header size %zd\n", hbytes
);
353 /* XXX messy: mask cmd to avoid allocating state */
354 msg
= kdmsg_msg_alloc(&iocom
->state0
,
355 hdr
.cmd
& DMSGF_BASECMDMASK
,
358 msg
->hdr_size
= hbytes
;
359 if (hbytes
> sizeof(hdr
)) {
360 error
= fp_read(iocom
->msg_fp
, &msg
->any
.head
+ 1,
361 hbytes
- sizeof(hdr
),
362 NULL
, 1, UIO_SYSSPACE
);
364 kdio_printf(iocom
, 1, "%s\n",
365 "short msg received");
370 msg
->aux_size
= hdr
.aux_bytes
;
371 if (msg
->aux_size
> DMSG_AUX_MAX
) {
372 kdio_printf(iocom
, 1,
373 "illegal msg payload size %zd\n",
379 abytes
= DMSG_DOALIGN(msg
->aux_size
);
380 msg
->aux_data
= kmalloc(abytes
, iocom
->mmsg
, M_WAITOK
);
381 msg
->flags
|= KDMSG_FLAG_AUXALLOC
;
382 error
= fp_read(iocom
->msg_fp
, msg
->aux_data
,
383 abytes
, NULL
, 1, UIO_SYSSPACE
);
385 kdio_printf(iocom
, 1, "%s\n",
386 "short msg payload received");
391 error
= kdmsg_msg_receive_handling(msg
);
395 kdio_printf(iocom
, 1, "read thread terminating error=%d\n", error
);
397 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
402 * Shutdown the socket and set KILLRX for consistency in case the
403 * shutdown was not commanded. Signal the transmit side to shutdown
404 * by setting KILLTX and waking it up.
406 fp_shutdown(iocom
->msg_fp
, SHUT_RDWR
);
407 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILLRX
|
408 KDMSG_CLUSTERCTL_KILLTX
);
409 iocom
->msgrd_td
= NULL
;
410 lockmgr(&iocom
->msglk
, LK_RELEASE
);
411 wakeup(&iocom
->msg_ctl
);
414 * iocom can be ripped out at any time once the lock is
415 * released with msgrd_td set to NULL. The wakeup()s are safe but
424 kdmsg_iocom_thread_wr(void *arg
)
426 kdmsg_iocom_t
*iocom
= arg
;
438 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
440 while ((iocom
->msg_ctl
& KDMSG_CLUSTERCTL_KILLTX
) == 0 && error
== 0) {
442 * Sleep if no messages pending. Interlock with flag while
445 if (TAILQ_EMPTY(&iocom
->msgq
)) {
446 atomic_set_int(&iocom
->msg_ctl
,
447 KDMSG_CLUSTERCTL_SLEEPING
);
448 lksleep(&iocom
->msg_ctl
, &iocom
->msglk
, 0, "msgwr", hz
);
449 atomic_clear_int(&iocom
->msg_ctl
,
450 KDMSG_CLUSTERCTL_SLEEPING
);
453 while ((msg
= TAILQ_FIRST(&iocom
->msgq
)) != NULL
) {
455 * Remove msg from the transmit queue and do
456 * persist and half-closed state handling.
458 TAILQ_REMOVE(&iocom
->msgq
, msg
, qentry
);
460 error
= kdmsg_state_msgtx(msg
);
461 if (error
== EALREADY
) {
472 * Dump the message to the pipe or socket.
474 * We have to clean up the message as if the transmit
475 * succeeded even if it failed.
477 lockmgr(&iocom
->msglk
, LK_RELEASE
);
478 error
= fp_write(iocom
->msg_fp
, &msg
->any
,
479 msg
->hdr_size
, &res
, UIO_SYSSPACE
);
480 if (error
|| res
!= msg
->hdr_size
) {
483 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
484 kdmsg_state_cleanuptx(msg
);
488 abytes
= DMSG_DOALIGN(msg
->aux_size
);
489 error
= fp_write(iocom
->msg_fp
,
490 msg
->aux_data
, abytes
,
492 if (error
|| res
!= abytes
) {
495 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
496 kdmsg_state_cleanuptx(msg
);
500 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
501 kdmsg_state_cleanuptx(msg
);
505 kdio_printf(iocom
, 1, "write thread terminating error=%d\n", error
);
508 * Shutdown the socket and set KILLTX for consistency in case the
509 * shutdown was not commanded. Signal the receive side to shutdown
510 * by setting KILLRX and waking it up.
512 fp_shutdown(iocom
->msg_fp
, SHUT_RDWR
);
513 atomic_set_int(&iocom
->msg_ctl
, KDMSG_CLUSTERCTL_KILLRX
|
514 KDMSG_CLUSTERCTL_KILLTX
);
515 wakeup(&iocom
->msg_ctl
);
518 * The transmit thread is responsible for final cleanups, wait
519 * for the receive side to terminate to prevent new received
520 * states from interfering with our cleanup.
522 * Do not set msgwr_td to NULL until we actually exit.
524 while (iocom
->msgrd_td
) {
525 wakeup(&iocom
->msg_ctl
);
526 lksleep(iocom
, &iocom
->msglk
, 0, "clstrkt", hz
);
530 * We can no longer receive new messages. We must drain the transmit
531 * message queue and simulate received messages to close anay remaining
534 * Loop until all the states are gone and there are no messages
540 while (TAILQ_FIRST(&iocom
->msgq
) ||
541 RB_ROOT(&iocom
->staterd_tree
) ||
542 RB_ROOT(&iocom
->statewr_tree
)) {
544 * Simulate failure for all sub-states of state0.
546 kdmsg_drain_msgq(iocom
);
547 kdio_printf(iocom
, 2, "%s\n",
548 "simulate failure for all substates of state0");
549 kdmsg_simulate_failure(&iocom
->state0
, 0, DMSG_ERR_LOSTLINK
);
551 lksleep(iocom
, &iocom
->msglk
, 0, "clstrtk", hz
/ 2);
553 if ((int)(ticks
- save_ticks
) > hz
*2 && didwarn
== 0) {
555 kdio_printf(iocom
, 0,
556 "Warning, write thread on %p "
557 "still terminating\n",
560 if ((int)(ticks
- save_ticks
) > hz
*15 && didwarn
== 1) {
562 kdio_printf(iocom
, 0,
563 "Warning, write thread on %p "
564 "still terminating\n",
567 if ((int)(ticks
- save_ticks
) > hz
*60) {
568 kdio_printf(iocom
, 0,
569 "Can't terminate: msgq %p "
570 "rd_tree %p wr_tree %p\n",
571 TAILQ_FIRST(&iocom
->msgq
),
572 RB_ROOT(&iocom
->staterd_tree
),
573 RB_ROOT(&iocom
->statewr_tree
));
574 lksleep(iocom
, &iocom
->msglk
, 0, "clstrtk", hz
* 10);
579 * Exit handling is done by the write thread.
581 iocom
->flags
|= KDMSG_IOCOMF_EXITNOACC
;
582 lockmgr(&iocom
->msglk
, LK_RELEASE
);
585 * The state trees had better be empty now
587 KKASSERT(RB_EMPTY(&iocom
->staterd_tree
));
588 KKASSERT(RB_EMPTY(&iocom
->statewr_tree
));
589 KKASSERT(iocom
->conn_state
== NULL
);
591 if (iocom
->exit_func
) {
593 * iocom is invalid after we call the exit function.
595 iocom
->msgwr_td
= NULL
;
596 iocom
->exit_func(iocom
);
599 * iocom can be ripped out from under us once msgwr_td is
600 * set to NULL. The wakeup is safe.
602 iocom
->msgwr_td
= NULL
;
609 * This cleans out the pending transmit message queue, adjusting any
610 * persistent states properly in the process.
612 * Called with iocom locked.
615 kdmsg_drain_msgq(kdmsg_iocom_t
*iocom
)
620 * Clean out our pending transmit queue, executing the
621 * appropriate state adjustments. If this tries to open
622 * any new outgoing transactions we have to loop up and
625 while ((msg
= TAILQ_FIRST(&iocom
->msgq
)) != NULL
) {
626 TAILQ_REMOVE(&iocom
->msgq
, msg
, qentry
);
627 if (kdmsg_state_msgtx(msg
))
630 kdmsg_state_cleanuptx(msg
);
635 * Do all processing required to handle a freshly received message
636 * after its low level header has been validated.
638 * iocom is not locked.
642 kdmsg_msg_receive_handling(kdmsg_msg_t
*msg
)
644 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
648 * State machine tracking, state assignment for msg,
649 * returns error and discard status. Errors are fatal
650 * to the connection except for EALREADY which forces
651 * a discard without execution.
653 error
= kdmsg_state_msgrx(msg
);
654 if (msg
->state
->flags
& KDMSG_STATE_ABORTING
) {
655 kdio_printf(iocom
, 5,
656 "kdmsg_state_abort(b): state %p rxcmd=%08x "
657 "txcmd=%08x msgrx error %d\n",
658 msg
->state
, msg
->state
->rxcmd
,
659 msg
->state
->txcmd
, error
);
663 * Raw protocol or connection error
665 if (msg
->state
->flags
& KDMSG_STATE_ABORTING
)
666 kdio_printf(iocom
, 5,
667 "X1 state %p error %d\n",
670 if (error
== EALREADY
)
672 } else if (msg
->state
&& msg
->state
->func
) {
674 * Message related to state which already has a
675 * handling function installed for it.
677 if (msg
->state
->flags
& KDMSG_STATE_ABORTING
)
678 kdio_printf(iocom
, 5,
679 "X2 state %p func %p\n",
680 msg
->state
, msg
->state
->func
);
681 error
= msg
->state
->func(msg
->state
, msg
);
682 kdmsg_state_cleanuprx(msg
);
683 } else if (iocom
->flags
& KDMSG_IOCOMF_AUTOANY
) {
684 if (msg
->state
->flags
& KDMSG_STATE_ABORTING
)
685 kdio_printf(iocom
, 5,
686 "X3 state %p\n", msg
->state
);
687 error
= kdmsg_autorxmsg(msg
);
688 kdmsg_state_cleanuprx(msg
);
690 if (msg
->state
->flags
& KDMSG_STATE_ABORTING
)
691 kdio_printf(iocom
, 5,
692 "X4 state %p\n", msg
->state
);
693 error
= iocom
->rcvmsg(msg
);
694 kdmsg_state_cleanuprx(msg
);
700 * Process state tracking for a message after reception and dequeueing,
701 * prior to execution of the state callback. The state is updated and
702 * will be removed from the RBTREE if completely closed, but the state->parent
703 * and subq linkage is not cleaned up until after the callback (see
708 * NOTE: A message transaction can consist of several messages in either
711 * NOTE: The msgid is unique to the initiator, not necessarily unique for
712 * us or for any relay or for the return direction for that matter.
713 * That is, two sides sending a new message can use the same msgid
718 * ABORT sequences work by setting the ABORT flag along with normal message
719 * state. However, ABORTs can also be sent on half-closed messages, that is
720 * even if the command or reply side has already sent a DELETE, as long as
721 * the message has not been fully closed it can still send an ABORT+DELETE
722 * to terminate the half-closed message state.
724 * Since ABORT+DELETEs can race we silently discard ABORT's for message
725 * state which has already been fully closed. REPLY+ABORT+DELETEs can
726 * also race, and in this situation the other side might have already
727 * initiated a new unrelated command with the same message id. Since
728 * the abort has not set the CREATE flag the situation can be detected
729 * and the message will also be discarded.
731 * Non-blocking requests can be initiated with ABORT+CREATE[+DELETE].
732 * The ABORT request is essentially integrated into the command instead
733 * of being sent later on. In this situation the command implementation
734 * detects that CREATE and ABORT are both set (vs ABORT alone) and can
735 * special-case non-blocking operation for the command.
737 * NOTE! Messages with ABORT set without CREATE or DELETE are considered
738 * to be mid-stream aborts for command/reply sequences. ABORTs on
739 * one-way messages are not supported.
741 * NOTE! If a command sequence does not support aborts the ABORT flag is
746 * One-off messages (no reply expected) are sent with neither CREATE or DELETE
747 * set. One-off messages cannot be aborted and typically aren't processed
748 * by these routines. The REPLY bit can be used to distinguish whether a
749 * one-off message is a command or reply. For example, one-off replies
750 * will typically just contain status updates.
754 kdmsg_state_msgrx(kdmsg_msg_t
*msg
)
756 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
757 kdmsg_state_t
*state
;
758 kdmsg_state_t
*pstate
;
759 kdmsg_state_t sdummy
;
763 * Make sure a state structure is ready to go in case we need a new
764 * one. This is the only routine which uses freerd_state so no
765 * races are possible.
767 if ((state
= iocom
->freerd_state
) == NULL
) {
768 state
= kmalloc(sizeof(*state
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
769 state
->flags
= KDMSG_STATE_DYNAMIC
;
770 state
->iocom
= iocom
;
772 TAILQ_INIT(&state
->subq
);
773 iocom
->freerd_state
= state
;
775 state
= NULL
; /* safety */
778 * Lock RB tree and locate existing persistent state, if any.
780 * If received msg is a command state is on staterd_tree.
781 * If received msg is a reply state is on statewr_tree.
783 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
786 if (msg
->state
== &iocom
->state0
) {
787 sdummy
.msgid
= msg
->any
.head
.msgid
;
788 sdummy
.iocom
= iocom
;
789 if (msg
->any
.head
.cmd
& DMSGF_REVTRANS
) {
790 state
= RB_FIND(kdmsg_state_tree
, &iocom
->statewr_tree
,
793 state
= RB_FIND(kdmsg_state_tree
, &iocom
->staterd_tree
,
798 * Set message state unconditionally. If this is a CREATE
799 * message this state will become the parent state and new
800 * state will be allocated for the message state.
803 state
= &iocom
->state0
;
804 if (state
->flags
& KDMSG_STATE_INTERLOCK
) {
805 state
->flags
|= KDMSG_STATE_SIGNAL
;
806 lksleep(state
, &iocom
->msglk
, 0, "dmrace", hz
);
809 kdmsg_state_hold(state
);
810 kdmsg_state_drop(msg
->state
); /* iocom->state0 */
817 * Short-cut one-off or mid-stream messages.
819 if ((msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
|
820 DMSGF_ABORT
)) == 0) {
826 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
827 * inside the case statements.
829 switch(msg
->any
.head
.cmd
& (DMSGF_CREATE
|DMSGF_DELETE
|DMSGF_REPLY
)) {
831 case DMSGF_CREATE
| DMSGF_DELETE
:
833 * New persistant command received.
835 if (state
!= &iocom
->state0
) {
836 kdio_printf(iocom
, 1, "%s\n",
837 "duplicate transaction");
843 * Lookup the circuit. The circuit is an open transaction.
844 * the REVCIRC bit in the message tells us which side
845 * initiated the transaction representing the circuit.
847 if (msg
->any
.head
.circuit
) {
848 sdummy
.msgid
= msg
->any
.head
.circuit
;
850 if (msg
->any
.head
.cmd
& DMSGF_REVCIRC
) {
851 pstate
= RB_FIND(kdmsg_state_tree
,
852 &iocom
->statewr_tree
,
855 pstate
= RB_FIND(kdmsg_state_tree
,
856 &iocom
->staterd_tree
,
859 if (pstate
== NULL
) {
860 kdio_printf(iocom
, 1, "%s\n",
867 pstate
= &iocom
->state0
;
871 * Allocate new state.
873 * msg->state becomes the owner of the ref we inherit from
876 kdmsg_state_drop(state
);
877 state
= iocom
->freerd_state
;
878 iocom
->freerd_state
= NULL
;
880 msg
->state
= state
; /* inherits freerd ref */
881 state
->parent
= pstate
;
882 KKASSERT(state
->iocom
== iocom
);
883 state
->flags
|= KDMSG_STATE_RBINSERTED
|
884 KDMSG_STATE_SUBINSERTED
|
885 KDMSG_STATE_OPPOSITE
;
886 if (TAILQ_EMPTY(&pstate
->subq
))
887 kdmsg_state_hold(pstate
);/* states on pstate->subq */
888 kdmsg_state_hold(state
); /* state on pstate->subq */
889 kdmsg_state_hold(state
); /* state on rbtree */
890 state
->icmd
= msg
->any
.head
.cmd
& DMSGF_BASECMDMASK
;
891 state
->rxcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
892 state
->txcmd
= DMSGF_REPLY
;
893 state
->msgid
= msg
->any
.head
.msgid
;
894 state
->flags
&= ~KDMSG_STATE_NEW
;
895 RB_INSERT(kdmsg_state_tree
, &iocom
->staterd_tree
, state
);
896 TAILQ_INSERT_TAIL(&pstate
->subq
, state
, entry
);
901 * Persistent state is expected but might not exist if an
902 * ABORT+DELETE races the close.
904 if (state
== &iocom
->state0
) {
905 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
906 kdio_printf(iocom
, 1, "%s\n",
911 kdio_printf(iocom
, 1, "%s\n",
912 "msgrx: no state for DELETE");
919 * Handle another ABORT+DELETE case if the msgid has already
922 if ((state
->rxcmd
& DMSGF_CREATE
) == 0) {
923 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
924 kdio_printf(iocom
, 1, "%s\n",
925 "msgrx: state already B");
928 kdio_printf(iocom
, 1, "%s\n",
929 "msgrx: state reused for DELETE");
938 * Check for mid-stream ABORT command received, otherwise
941 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
942 if (state
== &iocom
->state0
||
943 (state
->rxcmd
& DMSGF_CREATE
) == 0) {
950 case DMSGF_REPLY
| DMSGF_CREATE
:
951 case DMSGF_REPLY
| DMSGF_CREATE
| DMSGF_DELETE
:
953 * When receiving a reply with CREATE set the original
954 * persistent state message should already exist.
956 if (state
== &iocom
->state0
) {
957 kdio_printf(iocom
, 1,
958 "msgrx: no state match for "
959 "REPLY cmd=%08x msgid=%016jx\n",
961 (intmax_t)msg
->any
.head
.msgid
);
965 state
->rxcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
968 case DMSGF_REPLY
| DMSGF_DELETE
:
970 * Received REPLY+ABORT+DELETE in case where msgid has
971 * already been fully closed, ignore the message.
973 if (state
== &iocom
->state0
) {
974 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
977 kdio_printf(iocom
, 1, "%s\n",
978 "msgrx: no state match "
986 * Received REPLY+ABORT+DELETE in case where msgid has
987 * already been reused for an unrelated message,
988 * ignore the message.
990 if ((state
->rxcmd
& DMSGF_CREATE
) == 0) {
991 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
994 kdio_printf(iocom
, 1, "%s\n",
995 "msgrx: state reused "
1005 * Check for mid-stream ABORT reply received to sent command.
1007 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1008 if (state
== &iocom
->state0
||
1009 (state
->rxcmd
& DMSGF_CREATE
) == 0) {
1019 * Calculate the easy-switch() transactional command. Represents
1020 * the outer-transaction command for any transaction-create or
1021 * transaction-delete, and the inner message command for any
1022 * non-transaction or inside-transaction command. tcmd will be
1023 * set to 0 if the message state is illegal.
1025 * The two can be told apart because outer-transaction commands
1026 * always have a DMSGF_CREATE and/or DMSGF_DELETE flag.
1029 if (msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
)) {
1030 if (state
!= &iocom
->state0
) {
1031 msg
->tcmd
= (msg
->state
->icmd
& DMSGF_BASECMDMASK
) |
1032 (msg
->any
.head
.cmd
& (DMSGF_CREATE
|
1039 msg
->tcmd
= msg
->any
.head
.cmd
& DMSGF_CMDSWMASK
;
1043 * Adjust the state for DELETE handling now, before making the
1044 * callback so we are atomic with other state updates.
1046 * Subq/parent linkages are cleaned up after the callback.
1047 * If an error occurred the message is ignored and state is not
1050 if ((state
= msg
->state
) == NULL
|| error
!= 0) {
1051 kdio_printf(iocom
, 1,
1052 "msgrx: state=%p error %d\n",
1054 } else if (msg
->any
.head
.cmd
& DMSGF_DELETE
) {
1055 KKASSERT((state
->rxcmd
& DMSGF_DELETE
) == 0);
1056 state
->rxcmd
|= DMSGF_DELETE
;
1057 if (state
->txcmd
& DMSGF_DELETE
) {
1058 KKASSERT(state
->flags
& KDMSG_STATE_RBINSERTED
);
1059 if (state
->rxcmd
& DMSGF_REPLY
) {
1060 KKASSERT(msg
->any
.head
.cmd
&
1062 RB_REMOVE(kdmsg_state_tree
,
1063 &iocom
->statewr_tree
, state
);
1065 KKASSERT((msg
->any
.head
.cmd
&
1067 RB_REMOVE(kdmsg_state_tree
,
1068 &iocom
->staterd_tree
, state
);
1070 state
->flags
&= ~KDMSG_STATE_RBINSERTED
;
1071 kdmsg_state_drop(state
); /* state on rbtree */
1074 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1080 * Called instead of iocom->rcvmsg() if any of the AUTO flags are set.
1081 * This routine must call iocom->rcvmsg() for anything not automatically
1085 kdmsg_autorxmsg(kdmsg_msg_t
*msg
)
1087 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
1093 * Main switch processes transaction create/delete sequences only.
1094 * Use icmd (DELETEs use DMSG_LNK_ERROR
1096 * NOTE: If processing in-transaction messages you generally want
1097 * an inner switch on msg->any.head.cmd.
1100 cmd
= (msg
->state
->icmd
& DMSGF_BASECMDMASK
) |
1101 (msg
->any
.head
.cmd
& (DMSGF_CREATE
|
1111 * Received ping, send reply
1113 rep
= kdmsg_msg_alloc(msg
->state
, DMSG_LNK_PING
| DMSGF_REPLY
,
1115 kdmsg_msg_write(rep
);
1117 case DMSG_LNK_PING
| DMSGF_REPLY
:
1118 /* ignore replies */
1120 case DMSG_LNK_CONN
| DMSGF_CREATE
:
1121 case DMSG_LNK_CONN
| DMSGF_CREATE
| DMSGF_DELETE
:
1123 * Received LNK_CONN transaction. Transmit response and
1124 * leave transaction open, which allows the other end to
1125 * start to the SPAN protocol.
1127 * Handle shim after acknowledging the CONN.
1129 if ((msg
->any
.head
.cmd
& DMSGF_DELETE
) == 0) {
1130 if (iocom
->flags
& KDMSG_IOCOMF_AUTOCONN
) {
1131 kdmsg_msg_result(msg
, 0);
1132 if (iocom
->auto_callback
)
1133 iocom
->auto_callback(msg
);
1135 error
= iocom
->rcvmsg(msg
);
1140 case DMSG_LNK_CONN
| DMSGF_DELETE
:
1142 * This message is usually simulated after a link is lost
1143 * to clean up the transaction.
1145 if (iocom
->flags
& KDMSG_IOCOMF_AUTOCONN
) {
1146 if (iocom
->auto_callback
)
1147 iocom
->auto_callback(msg
);
1148 kdmsg_msg_reply(msg
, 0);
1150 error
= iocom
->rcvmsg(msg
);
1153 case DMSG_LNK_SPAN
| DMSGF_CREATE
:
1154 case DMSG_LNK_SPAN
| DMSGF_CREATE
| DMSGF_DELETE
:
1156 * Received LNK_SPAN transaction. We do not have to respond
1157 * (except on termination), but we must leave the transaction
1160 * Handle shim after acknowledging the SPAN.
1162 if (iocom
->flags
& KDMSG_IOCOMF_AUTORXSPAN
) {
1163 if ((msg
->any
.head
.cmd
& DMSGF_DELETE
) == 0) {
1164 if (iocom
->auto_callback
)
1165 iocom
->auto_callback(msg
);
1170 error
= iocom
->rcvmsg(msg
);
1174 case DMSG_LNK_SPAN
| DMSGF_DELETE
:
1176 * Process shims (auto_callback) before cleaning up the
1177 * circuit structure and closing the transactions. Device
1178 * driver should ensure that the circuit is not used after
1179 * the auto_callback() returns.
1181 * Handle shim before closing the SPAN transaction.
1183 if (iocom
->flags
& KDMSG_IOCOMF_AUTORXSPAN
) {
1184 if (iocom
->auto_callback
)
1185 iocom
->auto_callback(msg
);
1186 kdmsg_msg_reply(msg
, 0);
1188 error
= iocom
->rcvmsg(msg
);
1193 * Anything unhandled goes into rcvmsg.
1195 * NOTE: Replies to link-level messages initiated by our side
1196 * are handled by the state callback, they are NOT
1199 error
= iocom
->rcvmsg(msg
);
1206 * Post-receive-handling message and state cleanup. This routine is called
1207 * after the state function handling/callback to properly dispose of the
1208 * message and unlink the state's parent/subq linkage if the state is
1209 * completely closed.
1211 * msglk is not held.
1215 kdmsg_state_cleanuprx(kdmsg_msg_t
*msg
)
1217 kdmsg_state_t
*state
= msg
->state
;
1218 kdmsg_iocom_t
*iocom
= state
->iocom
;
1220 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1221 if (state
!= &iocom
->state0
) {
1223 * When terminating a transaction (in either direction), all
1224 * sub-states are aborted.
1226 if ((msg
->any
.head
.cmd
& DMSGF_DELETE
) &&
1227 TAILQ_FIRST(&msg
->state
->subq
)) {
1228 kdio_printf(iocom
, 2,
1229 "simulate failure for substates of "
1230 "state %p cmd %08x/%08x\n",
1234 kdmsg_simulate_failure(msg
->state
,
1235 0, DMSG_ERR_LOSTLINK
);
1239 * Once the state is fully closed we can (try to) remove it
1240 * from the subq topology.
1242 if ((state
->flags
& KDMSG_STATE_SUBINSERTED
) &&
1243 (state
->rxcmd
& DMSGF_DELETE
) &&
1244 (state
->txcmd
& DMSGF_DELETE
)) {
1246 * Remove parent linkage if state is completely closed.
1248 kdmsg_subq_delete(state
);
1251 kdmsg_msg_free(msg
);
1253 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1257 * Remove state from its parent's subq. This can wind up recursively
1258 * dropping the parent upward.
1260 * NOTE: Once we drop the parent, our pstate pointer may become invalid.
1264 kdmsg_subq_delete(kdmsg_state_t
*state
)
1266 kdmsg_state_t
*pstate
;
1268 if (state
->flags
& KDMSG_STATE_SUBINSERTED
) {
1269 pstate
= state
->parent
;
1271 if (pstate
->scan
== state
)
1272 pstate
->scan
= NULL
;
1273 TAILQ_REMOVE(&pstate
->subq
, state
, entry
);
1274 state
->flags
&= ~KDMSG_STATE_SUBINSERTED
;
1275 state
->parent
= NULL
;
1276 if (TAILQ_EMPTY(&pstate
->subq
)) {
1277 kdmsg_state_drop(pstate
);/* pstate->subq */
1279 pstate
= NULL
; /* safety */
1280 kdmsg_state_drop(state
); /* pstate->subq */
1282 KKASSERT(state
->parent
== NULL
);
1287 * Simulate receiving a message which terminates an active transaction
1288 * state. Our simulated received message must set DELETE and may also
1289 * have to set CREATE. It must also ensure that all fields are set such
1290 * that the receive handling code can find the state (kdmsg_state_msgrx())
1291 * or an endless loop will ensue.
1293 * This is used when the other end of the link is dead so the device driver
1294 * gets a completed transaction for all pending states.
1296 * Called with iocom locked.
1300 kdmsg_simulate_failure(kdmsg_state_t
*state
, int meto
, int error
)
1302 kdmsg_state_t
*substate
;
1304 kdmsg_state_hold(state
); /* aborting */
1307 * Abort parent state first. Parent will not actually disappear
1308 * until children are gone. Device drivers must handle the situation.
1309 * The advantage of this is that device drivers can flag the situation
1310 * as an interlock against new operations on dying states. And since
1311 * device operations are often asynchronous anyway, this sequence of
1312 * events works out better.
1315 kdmsg_state_abort(state
);
1318 * Recurse through any children.
1321 TAILQ_FOREACH(substate
, &state
->subq
, entry
) {
1322 if (substate
->flags
& KDMSG_STATE_ABORTING
)
1324 state
->scan
= substate
;
1325 kdmsg_simulate_failure(substate
, 1, error
);
1326 if (state
->scan
!= substate
)
1329 kdmsg_state_drop(state
); /* aborting */
1334 kdmsg_state_abort(kdmsg_state_t
*state
)
1339 * Set ABORTING and DYING, return if already set. If the state was
1340 * just allocated we defer the abort operation until the related
1341 * message is processed.
1343 KKASSERT((state
->flags
& KDMSG_STATE_ABORTING
) == 0);
1344 if (state
->flags
& KDMSG_STATE_ABORTING
)
1346 state
->flags
|= KDMSG_STATE_ABORTING
;
1347 kdmsg_state_dying(state
);
1348 if (state
->flags
& KDMSG_STATE_NEW
) {
1349 kdio_printf(iocom
, 5,
1350 "kdmsg_state_abort(0): state %p rxcmd %08x "
1351 "txcmd %08x flags %08x - in NEW state\n",
1352 state
, state
->rxcmd
,
1353 state
->txcmd
, state
->flags
);
1358 * NOTE: The DELETE flag might already be set due to an early
1361 * NOTE: Args to kdmsg_msg_alloc() to avoid dynamic state allocation.
1363 * NOTE: We are simulating a received message using our state
1364 * (vs a message generated by the other side using its state),
1365 * so we must invert DMSGF_REVTRANS and DMSGF_REVCIRC.
1367 kdio_printf(iocom
, 5,
1368 "kdmsg_state_abort(1): state %p rxcmd %08x txcmd %08x\n",
1369 state
, state
->rxcmd
, state
->txcmd
);
1370 if ((state
->rxcmd
& DMSGF_DELETE
) == 0) {
1371 msg
= kdmsg_msg_alloc(state
, DMSG_LNK_ERROR
, NULL
, NULL
);
1372 if ((state
->rxcmd
& DMSGF_CREATE
) == 0)
1373 msg
->any
.head
.cmd
|= DMSGF_CREATE
;
1374 msg
->any
.head
.cmd
|= DMSGF_DELETE
|
1375 (state
->rxcmd
& DMSGF_REPLY
);
1376 msg
->any
.head
.cmd
^= (DMSGF_REVTRANS
| DMSGF_REVCIRC
);
1377 msg
->any
.head
.error
= DMSG_ERR_LOSTLINK
;
1378 kdio_printf(iocom
, 5,
1379 "kdmsg_state_abort(a): state %p msgcmd %08x\n",
1380 state
, msg
->any
.head
.cmd
);
1381 /* circuit not initialized */
1382 lockmgr(&state
->iocom
->msglk
, LK_RELEASE
);
1383 kdmsg_msg_receive_handling(msg
);
1384 lockmgr(&state
->iocom
->msglk
, LK_EXCLUSIVE
);
1387 kdio_printf(iocom
, 5,
1388 "kdmsg_state_abort(2): state %p rxcmd %08x txcmd %08x\n",
1389 state
, state
->rxcmd
, state
->txcmd
);
1393 * Recursively sets KDMSG_STATE_DYING on state and all sub-states, preventing
1394 * the transmission of any new messages on these states. This is done
1395 * atomically when parent state is terminating, whereas setting ABORTING is
1396 * not atomic and can leak races.
1400 kdmsg_state_dying(kdmsg_state_t
*state
)
1402 kdmsg_state_t
*scan
;
1404 if ((state
->flags
& KDMSG_STATE_DYING
) == 0) {
1405 state
->flags
|= KDMSG_STATE_DYING
;
1406 TAILQ_FOREACH(scan
, &state
->subq
, entry
)
1407 kdmsg_state_dying(scan
);
1412 * Process state tracking for a message prior to transmission.
1414 * Called with msglk held and the msg dequeued. Returns non-zero if
1415 * the message is bad and should be deleted by the caller.
1417 * One-off messages are usually with dummy state and msg->state may be NULL
1418 * in this situation.
1420 * New transactions (when CREATE is set) will insert the state.
1422 * May request that caller discard the message by setting *discardp to 1.
1423 * A NULL state may be returned in this case.
1427 kdmsg_state_msgtx(kdmsg_msg_t
*msg
)
1429 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
1430 kdmsg_state_t
*state
;
1434 * Make sure a state structure is ready to go in case we need a new
1435 * one. This is the only routine which uses freewr_state so no
1436 * races are possible.
1438 if ((state
= iocom
->freewr_state
) == NULL
) {
1439 state
= kmalloc(sizeof(*state
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1440 state
->flags
= KDMSG_STATE_DYNAMIC
;
1441 state
->iocom
= iocom
;
1443 TAILQ_INIT(&state
->subq
);
1444 iocom
->freewr_state
= state
;
1448 * Lock RB tree. If persistent state is present it will have already
1449 * been assigned to msg.
1454 * Short-cut one-off or mid-stream messages (state may be NULL).
1456 if ((msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
|
1457 DMSGF_ABORT
)) == 0) {
1463 * Switch on CREATE, DELETE, REPLY, and also handle ABORT from
1464 * inside the case statements.
1466 switch(msg
->any
.head
.cmd
& (DMSGF_CREATE
| DMSGF_DELETE
|
1469 case DMSGF_CREATE
| DMSGF_DELETE
:
1471 * Insert the new persistent message state and mark
1472 * half-closed if DELETE is set. Since this is a new
1473 * message it isn't possible to transition into the fully
1474 * closed state here.
1476 * XXX state must be assigned and inserted by
1477 * kdmsg_msg_write(). txcmd is assigned by us
1480 KKASSERT(state
!= NULL
);
1481 state
->icmd
= msg
->any
.head
.cmd
& DMSGF_BASECMDMASK
;
1482 state
->txcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
1483 state
->rxcmd
= DMSGF_REPLY
;
1484 state
->flags
&= ~KDMSG_STATE_NEW
;
1489 * Sent ABORT+DELETE in case where msgid has already
1490 * been fully closed, ignore the message.
1492 if (state
== &iocom
->state0
) {
1493 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1496 kdio_printf(iocom
, 1,
1497 "msgtx: no state match "
1498 "for DELETE cmd=%08x msgid=%016jx\n",
1500 (intmax_t)msg
->any
.head
.msgid
);
1507 * Sent ABORT+DELETE in case where msgid has
1508 * already been reused for an unrelated message,
1509 * ignore the message.
1511 if ((state
->txcmd
& DMSGF_CREATE
) == 0) {
1512 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1515 kdio_printf(iocom
, 1, "%s\n",
1516 "msgtx: state reused "
1526 * Check for mid-stream ABORT command sent
1528 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1529 if (state
== &state
->iocom
->state0
||
1530 (state
->txcmd
& DMSGF_CREATE
) == 0) {
1537 case DMSGF_REPLY
| DMSGF_CREATE
:
1538 case DMSGF_REPLY
| DMSGF_CREATE
| DMSGF_DELETE
:
1540 * When transmitting a reply with CREATE set the original
1541 * persistent state message should already exist.
1543 if (state
== &state
->iocom
->state0
) {
1544 kdio_printf(iocom
, 1, "%s\n",
1545 "msgtx: no state match "
1546 "for REPLY | CREATE");
1550 state
->txcmd
= msg
->any
.head
.cmd
& ~DMSGF_DELETE
;
1553 case DMSGF_REPLY
| DMSGF_DELETE
:
1555 * When transmitting a reply with DELETE set the original
1556 * persistent state message should already exist.
1558 * This is very similar to the REPLY|CREATE|* case except
1559 * txcmd is already stored, so we just add the DELETE flag.
1561 * Sent REPLY+ABORT+DELETE in case where msgid has
1562 * already been fully closed, ignore the message.
1564 if (state
== &state
->iocom
->state0
) {
1565 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1568 kdio_printf(iocom
, 1, "%s\n",
1569 "msgtx: no state match "
1570 "for REPLY | DELETE");
1577 * Sent REPLY+ABORT+DELETE in case where msgid has already
1578 * been reused for an unrelated message, ignore the message.
1580 if ((state
->txcmd
& DMSGF_CREATE
) == 0) {
1581 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1584 kdio_printf(iocom
, 1, "%s\n",
1585 "msgtx: state reused "
1586 "for REPLY | DELETE");
1595 * Check for mid-stream ABORT reply sent.
1597 * One-off REPLY messages are allowed for e.g. status updates.
1599 if (msg
->any
.head
.cmd
& DMSGF_ABORT
) {
1600 if (state
== &state
->iocom
->state0
||
1601 (state
->txcmd
& DMSGF_CREATE
) == 0) {
1611 * Set interlock (XXX hack) in case the send side blocks and a
1612 * response is returned before kdmsg_state_cleanuptx() can be
1615 if (state
&& error
== 0)
1616 state
->flags
|= KDMSG_STATE_INTERLOCK
;
1622 * Called with iocom locked.
1626 kdmsg_state_cleanuptx(kdmsg_msg_t
*msg
)
1628 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
1629 kdmsg_state_t
*state
;
1631 if ((state
= msg
->state
) == NULL
) {
1632 kdmsg_msg_free(msg
);
1637 * Clear interlock (XXX hack) in case the send side blocks and a
1638 * response is returned in the other thread before
1639 * kdmsg_state_cleanuptx() can be run. We maintain our hold on
1640 * iocom->msglk so we can do this before completing our task.
1642 if (state
->flags
& KDMSG_STATE_SIGNAL
) {
1643 kdio_printf(iocom
, 1, "state %p interlock!\n", state
);
1646 state
->flags
&= ~(KDMSG_STATE_INTERLOCK
| KDMSG_STATE_SIGNAL
);
1647 kdmsg_state_hold(state
);
1649 if (msg
->any
.head
.cmd
& DMSGF_DELETE
) {
1650 KKASSERT((state
->txcmd
& DMSGF_DELETE
) == 0);
1651 state
->txcmd
|= DMSGF_DELETE
;
1652 if (state
->rxcmd
& DMSGF_DELETE
) {
1653 KKASSERT(state
->flags
& KDMSG_STATE_RBINSERTED
);
1654 if (state
->txcmd
& DMSGF_REPLY
) {
1655 KKASSERT(msg
->any
.head
.cmd
&
1657 RB_REMOVE(kdmsg_state_tree
,
1658 &iocom
->staterd_tree
, state
);
1660 KKASSERT((msg
->any
.head
.cmd
&
1662 RB_REMOVE(kdmsg_state_tree
,
1663 &iocom
->statewr_tree
, state
);
1665 state
->flags
&= ~KDMSG_STATE_RBINSERTED
;
1668 * The subq recursion is used for parent linking and
1669 * scanning the topology for aborts, we can only
1670 * remove leafs. The circuit is effectively dead now,
1671 * but topology won't be torn down until all of its
1672 * children have finished/aborted.
1674 * This is particularly important for end-point
1675 * devices which might need to access private data
1676 * in parent states. Out of order disconnects can
1677 * occur if an end-point device is processing a
1678 * message transaction asynchronously because abort
1679 * requests are basically synchronous and it probably
1680 * isn't convenient (or possible) for the end-point
1681 * to abort an asynchronous operation.
1683 if (TAILQ_EMPTY(&state
->subq
))
1684 kdmsg_subq_delete(state
);
1685 kdmsg_msg_free(msg
);
1686 kdmsg_state_drop(state
); /* state on rbtree */
1688 kdmsg_msg_free(msg
);
1691 kdmsg_msg_free(msg
);
1695 * Deferred abort after transmission.
1697 if ((state
->flags
& (KDMSG_STATE_ABORTING
| KDMSG_STATE_DYING
)) &&
1698 (state
->rxcmd
& DMSGF_DELETE
) == 0) {
1699 kdio_printf(iocom
, 5,
1700 "kdmsg_state_cleanuptx: state=%p "
1701 "executing deferred abort\n",
1703 state
->flags
&= ~KDMSG_STATE_ABORTING
;
1704 kdmsg_state_abort(state
);
1706 kdmsg_state_drop(state
);
1711 _kdmsg_state_hold(kdmsg_state_t
*state KDMSG_DEBUG_ARGS
)
1713 atomic_add_int(&state
->refs
, 1);
1715 kd_printf(4, "state %p +%d\t%s:%d\n", state
, state
->refs
, file
, line
);
1721 _kdmsg_state_drop(kdmsg_state_t
*state KDMSG_DEBUG_ARGS
)
1723 KKASSERT(state
->refs
> 0);
1725 kd_printf(4, "state %p -%d\t%s:%d\n", state
, state
->refs
, file
, line
);
1727 if (atomic_fetchadd_int(&state
->refs
, -1) == 1)
1728 kdmsg_state_free(state
);
1733 kdmsg_state_free(kdmsg_state_t
*state
)
1735 kdmsg_iocom_t
*iocom
= state
->iocom
;
1737 KKASSERT((state
->flags
& KDMSG_STATE_RBINSERTED
) == 0);
1738 KKASSERT((state
->flags
& KDMSG_STATE_SUBINSERTED
) == 0);
1739 KKASSERT(TAILQ_EMPTY(&state
->subq
));
1741 if (state
!= &state
->iocom
->state0
)
1742 kfree(state
, iocom
->mmsg
);
1746 kdmsg_msg_alloc(kdmsg_state_t
*state
, uint32_t cmd
,
1747 int (*func
)(kdmsg_state_t
*, kdmsg_msg_t
*), void *data
)
1749 kdmsg_iocom_t
*iocom
= state
->iocom
;
1750 kdmsg_state_t
*pstate
;
1754 KKASSERT(iocom
!= NULL
);
1755 hbytes
= (cmd
& DMSGF_SIZE
) * DMSG_ALIGN
;
1756 msg
= kmalloc(offsetof(struct kdmsg_msg
, any
) + hbytes
,
1757 iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1758 msg
->hdr_size
= hbytes
;
1760 if ((cmd
& (DMSGF_CREATE
| DMSGF_REPLY
)) == DMSGF_CREATE
) {
1762 * New transaction, requires tracking state and a unique
1763 * msgid to be allocated.
1765 * It is possible to race a circuit failure, inherit the
1766 * parent's STATE_DYING flag to trigger an abort sequence
1767 * in the transmit path. By not inheriting ABORTING the
1768 * abort sequence can recurse.
1770 * NOTE: The transactions has not yet been initiated so we
1771 * cannot set DMSGF_CREATE/DELETE bits in txcmd or rxcmd.
1772 * We have to properly setup DMSGF_REPLY, however.
1775 state
= kmalloc(sizeof(*state
), iocom
->mmsg
, M_WAITOK
| M_ZERO
);
1776 TAILQ_INIT(&state
->subq
);
1777 state
->iocom
= iocom
;
1778 state
->parent
= pstate
;
1779 state
->flags
= KDMSG_STATE_DYNAMIC
|
1782 state
->any
.any
= data
;
1783 state
->msgid
= (uint64_t)(uintptr_t)state
;
1784 /*msg->any.head.msgid = state->msgid;XXX*/
1786 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1787 if (RB_INSERT(kdmsg_state_tree
, &iocom
->statewr_tree
, state
))
1788 panic("duplicate msgid allocated");
1789 if (TAILQ_EMPTY(&pstate
->subq
))
1790 kdmsg_state_hold(pstate
);/* pstate->subq */
1791 TAILQ_INSERT_TAIL(&pstate
->subq
, state
, entry
);
1792 state
->flags
|= KDMSG_STATE_RBINSERTED
|
1793 KDMSG_STATE_SUBINSERTED
;
1794 state
->flags
|= pstate
->flags
& KDMSG_STATE_DYING
;
1795 kdmsg_state_hold(state
); /* pstate->subq */
1796 kdmsg_state_hold(state
); /* state on rbtree */
1797 kdmsg_state_hold(state
); /* msg->state */
1798 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1800 pstate
= state
->parent
;
1801 KKASSERT(pstate
!= NULL
);
1802 kdmsg_state_hold(state
); /* msg->state */
1805 if (state
->flags
& KDMSG_STATE_OPPOSITE
)
1806 cmd
|= DMSGF_REVTRANS
;
1807 if (pstate
->flags
& KDMSG_STATE_OPPOSITE
)
1808 cmd
|= DMSGF_REVCIRC
;
1810 msg
->any
.head
.magic
= DMSG_HDR_MAGIC
;
1811 msg
->any
.head
.cmd
= cmd
;
1812 msg
->any
.head
.msgid
= state
->msgid
;
1813 msg
->any
.head
.circuit
= pstate
->msgid
;
1820 kdmsg_msg_free(kdmsg_msg_t
*msg
)
1822 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
1823 kdmsg_state_t
*state
;
1825 if ((msg
->flags
& KDMSG_FLAG_AUXALLOC
) &&
1826 msg
->aux_data
&& msg
->aux_size
) {
1827 kfree(msg
->aux_data
, iocom
->mmsg
);
1828 msg
->flags
&= ~KDMSG_FLAG_AUXALLOC
;
1830 if ((state
= msg
->state
) != NULL
) {
1832 kdmsg_state_drop(state
); /* msg->state */
1834 msg
->aux_data
= NULL
;
1837 kfree(msg
, iocom
->mmsg
);
1841 kdmsg_detach_aux_data(kdmsg_msg_t
*msg
, kdmsg_data_t
*data
)
1843 if (msg
->flags
& KDMSG_FLAG_AUXALLOC
) {
1844 data
->aux_data
= msg
->aux_data
;
1845 data
->aux_size
= msg
->aux_size
;
1846 data
->iocom
= msg
->state
->iocom
;
1847 msg
->flags
&= ~KDMSG_FLAG_AUXALLOC
;
1849 data
->aux_data
= NULL
;
1851 data
->iocom
= msg
->state
->iocom
;
1856 kdmsg_free_aux_data(kdmsg_data_t
*data
)
1859 kfree(data
->aux_data
, data
->iocom
->mmsg
);
1863 * Indexed messages are stored in a red-black tree indexed by their
1864 * msgid. Only persistent messages are indexed.
1867 kdmsg_state_cmp(kdmsg_state_t
*state1
, kdmsg_state_t
*state2
)
1869 if (state1
->iocom
< state2
->iocom
)
1871 if (state1
->iocom
> state2
->iocom
)
1873 if (state1
->msgid
< state2
->msgid
)
1875 if (state1
->msgid
> state2
->msgid
)
1881 * Write a message. All requisit command flags have been set.
1883 * If msg->state is non-NULL the message is written to the existing
1884 * transaction. msgid will be set accordingly.
1886 * If msg->state is NULL and CREATE is set new state is allocated and
1887 * (func, data) is installed. A msgid is assigned.
1889 * If msg->state is NULL and CREATE is not set the message is assumed
1890 * to be a one-way message. The originator must assign the msgid
1891 * (or leave it 0, which is typical.
1893 * This function merely queues the message to the management thread, it
1894 * does not write to the message socket/pipe.
1897 kdmsg_msg_write(kdmsg_msg_t
*msg
)
1899 kdmsg_iocom_t
*iocom
= msg
->state
->iocom
;
1901 lockmgr(&iocom
->msglk
, LK_EXCLUSIVE
);
1902 kdmsg_msg_write_locked(iocom
, msg
);
1903 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1907 kdmsg_msg_write_locked(kdmsg_iocom_t
*iocom
, kdmsg_msg_t
*msg
)
1909 kdmsg_state_t
*state
;
1913 * Continuance or termination of existing transaction.
1914 * The transaction could have been initiated by either end.
1916 * (Function callback and aux data for the receive side can
1917 * be replaced or left alone).
1920 msg
->any
.head
.msgid
= state
->msgid
;
1923 * One-off message (always uses msgid 0 to distinguish
1924 * between a possibly lost in-transaction message due to
1925 * competing aborts and a real one-off message?)
1928 msg
->any
.head
.msgid
= 0;
1933 * XXX removed - don't make this a panic, allow the state checks
1934 * below to catch the situation.
1936 * This flag is not set until after the tx thread has drained
1937 * the tx msgq and simulated responses. After that point the
1938 * txthread is dead and can no longer simulate responses.
1940 * Device drivers should never try to send a message once this
1941 * flag is set. They should have detected (through the state
1942 * closures) that the link is in trouble.
1944 if (iocom
->flags
& KDMSG_IOCOMF_EXITNOACC
) {
1945 lockmgr(&iocom
->msglk
, LK_RELEASE
);
1946 panic("kdmsg_msg_write: Attempt to write message to "
1947 "terminated iocom\n");
1952 * For stateful messages, if the circuit is dead or dying we have
1953 * to abort the potentially newly-created state and discard the
1956 * - We must discard the message because the other end will not
1957 * be expecting any more messages over the dead or dying circuit
1958 * and might not be able to receive them.
1960 * - We abort the state by simulating a failure to generate a fake
1961 * incoming DELETE. This will trigger the state callback and allow
1962 * the device to clean things up and reply, closing the outgoing
1963 * direction and allowing the state to be freed.
1965 * This situation occurs quite often, particularly as SPANs stabilize.
1966 * End-points must do the right thing.
1969 KKASSERT((state
->txcmd
& DMSGF_DELETE
) == 0);
1970 if (state
->flags
& KDMSG_STATE_DYING
) {
1972 if ((state
->flags
& KDMSG_STATE_DYING
) ||
1973 (state
->parent
->txcmd
& DMSGF_DELETE
) ||
1974 (state
->parent
->flags
& KDMSG_STATE_DYING
)) {
1976 kdio_printf(iocom
, 4,
1977 "kdmsg_msg_write: Write to dying circuit "
1979 "ptxcmd=%08x prxcmd=%08x flags=%08x\n",
1981 state
->parent
->rxcmd
,
1982 state
->parent
->txcmd
,
1983 state
->parent
->flags
);
1984 kdmsg_state_hold(state
);
1985 kdmsg_state_msgtx(msg
);
1986 kdmsg_state_cleanuptx(msg
);
1987 kdmsg_state_drop(state
);
1993 * Finish up the msg fields. Note that msg->aux_size and the
1994 * aux_bytes stored in the message header represent the unaligned
1995 * (actual) bytes of data, but the buffer is sized to an aligned
1996 * size and the CRC is generated over the aligned length.
1998 msg
->any
.head
.salt
= /* (random << 8) | */ (iocom
->msg_seq
& 255);
2001 if (msg
->aux_data
&& msg
->aux_size
) {
2002 uint32_t abytes
= DMSG_DOALIGN(msg
->aux_size
);
2004 msg
->any
.head
.aux_bytes
= msg
->aux_size
;
2005 msg
->any
.head
.aux_crc
= iscsi_crc32(msg
->aux_data
, abytes
);
2007 msg
->any
.head
.hdr_crc
= 0;
2008 msg
->any
.head
.hdr_crc
= iscsi_crc32(msg
->any
.buf
, msg
->hdr_size
);
2010 TAILQ_INSERT_TAIL(&iocom
->msgq
, msg
, qentry
);
2012 if (iocom
->msg_ctl
& KDMSG_CLUSTERCTL_SLEEPING
) {
2013 atomic_clear_int(&iocom
->msg_ctl
,
2014 KDMSG_CLUSTERCTL_SLEEPING
);
2015 wakeup(&iocom
->msg_ctl
);
2020 * Reply to a message and terminate our side of the transaction.
2022 * If msg->state is non-NULL we are replying to a one-way message.
2025 kdmsg_msg_reply(kdmsg_msg_t
*msg
, uint32_t error
)
2027 kdmsg_state_t
*state
= msg
->state
;
2032 * Reply with a simple error code and terminate the transaction.
2034 cmd
= DMSG_LNK_ERROR
;
2037 * Check if our direction has even been initiated yet, set CREATE.
2039 * Check what direction this is (command or reply direction). Note
2040 * that txcmd might not have been initiated yet.
2042 * If our direction has already been closed we just return without
2045 if (state
!= &state
->iocom
->state0
) {
2046 if (state
->txcmd
& DMSGF_DELETE
)
2048 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
2049 cmd
|= DMSGF_CREATE
;
2050 if (state
->txcmd
& DMSGF_REPLY
)
2052 cmd
|= DMSGF_DELETE
;
2054 if ((msg
->any
.head
.cmd
& DMSGF_REPLY
) == 0)
2058 nmsg
= kdmsg_msg_alloc(state
, cmd
, NULL
, NULL
);
2059 nmsg
->any
.head
.error
= error
;
2060 kdmsg_msg_write(nmsg
);
2064 * Reply to a message and continue our side of the transaction.
2066 * If msg->state is non-NULL we are replying to a one-way message and this
2067 * function degenerates into the same as kdmsg_msg_reply().
2070 kdmsg_msg_result(kdmsg_msg_t
*msg
, uint32_t error
)
2072 kdmsg_state_t
*state
= msg
->state
;
2077 * Return a simple result code, do NOT terminate the transaction.
2079 cmd
= DMSG_LNK_ERROR
;
2082 * Check if our direction has even been initiated yet, set CREATE.
2084 * Check what direction this is (command or reply direction). Note
2085 * that txcmd might not have been initiated yet.
2087 * If our direction has already been closed we just return without
2090 if (state
!= &state
->iocom
->state0
) {
2091 if (state
->txcmd
& DMSGF_DELETE
)
2093 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
2094 cmd
|= DMSGF_CREATE
;
2095 if (state
->txcmd
& DMSGF_REPLY
)
2097 /* continuing transaction, do not set MSGF_DELETE */
2099 if ((msg
->any
.head
.cmd
& DMSGF_REPLY
) == 0)
2103 nmsg
= kdmsg_msg_alloc(state
, cmd
, NULL
, NULL
);
2104 nmsg
->any
.head
.error
= error
;
2105 kdmsg_msg_write(nmsg
);
2109 * Reply to a message and terminate our side of the transaction.
2111 * If msg->state is non-NULL we are replying to a one-way message.
2114 kdmsg_state_reply(kdmsg_state_t
*state
, uint32_t error
)
2120 * Reply with a simple error code and terminate the transaction.
2122 cmd
= DMSG_LNK_ERROR
;
2125 * Check if our direction has even been initiated yet, set CREATE.
2127 * Check what direction this is (command or reply direction). Note
2128 * that txcmd might not have been initiated yet.
2130 * If our direction has already been closed we just return without
2134 if (state
->txcmd
& DMSGF_DELETE
)
2136 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
2137 cmd
|= DMSGF_CREATE
;
2138 if (state
->txcmd
& DMSGF_REPLY
)
2140 cmd
|= DMSGF_DELETE
;
2142 nmsg
= kdmsg_msg_alloc(state
, cmd
, NULL
, NULL
);
2143 nmsg
->any
.head
.error
= error
;
2144 kdmsg_msg_write(nmsg
);
2148 * Reply to a message and continue our side of the transaction.
2150 * If msg->state is non-NULL we are replying to a one-way message and this
2151 * function degenerates into the same as kdmsg_msg_reply().
2154 kdmsg_state_result(kdmsg_state_t
*state
, uint32_t error
)
2160 * Return a simple result code, do NOT terminate the transaction.
2162 cmd
= DMSG_LNK_ERROR
;
2165 * Check if our direction has even been initiated yet, set CREATE.
2167 * Check what direction this is (command or reply direction). Note
2168 * that txcmd might not have been initiated yet.
2170 * If our direction has already been closed we just return without
2174 if (state
->txcmd
& DMSGF_DELETE
)
2176 if ((state
->txcmd
& DMSGF_CREATE
) == 0)
2177 cmd
|= DMSGF_CREATE
;
2178 if (state
->txcmd
& DMSGF_REPLY
)
2180 /* continuing transaction, do not set MSGF_DELETE */
2182 nmsg
= kdmsg_msg_alloc(state
, cmd
, NULL
, NULL
);
2183 nmsg
->any
.head
.error
= error
;
2184 kdmsg_msg_write(nmsg
);