2 * Copyright (c) 2006-2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/kern/kern_syslink.c,v 1.10 2007/04/26 02:10:59 dillon Exp $
37 * This module implements the syslink() system call and protocol which
38 * is used to glue clusters together as well as to interface userland
39 * devices and filesystems to the kernel.
41 * We implement the management node concept in this module. A management
42 * node is basically a router node with additional features that take much
43 * of the protocol burden away from connecting terminal nodes.
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
49 #include <sys/malloc.h>
50 #include <sys/alist.h>
55 #include <sys/thread.h>
57 #include <sys/sysctl.h>
58 #include <sys/sysproto.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/socketops.h>
63 #include <sys/syslink.h>
64 #include <sys/syslink_msg.h>
65 #include <netinet/in.h>
67 #include <sys/thread2.h>
69 #include "opt_syslink.h"
72 * Red-Black trees organizing the syslink 'router' nodes and connections
78 RB_HEAD(slrouter_rb_tree
, slrouter
);
79 RB_HEAD(sldata_rb_tree
, sldata
);
80 RB_PROTOTYPE2(slrouter_rb_tree
, slrouter
, rbnode
,
81 rb_slrouter_compare
, sysid_t
);
82 RB_PROTOTYPE2(sldata_rb_tree
, sldata
, rbnode
,
83 rb_sldata_compare
, int);
86 * Fifo used to buffer broadcast packets
90 int bufsize
; /* must be a power of 2 */
91 int bufmask
; /* (bufsize - 1) */
92 int rindex
; /* tail-chasing FIFO indices */
97 * Syslink Router abstraction
100 RB_ENTRY(slrouter
) rbnode
; /* list of routers */
101 struct sldata_rb_tree sldata_rb_root
; /* connections to router */
102 sysid_t sysid
; /* logical sysid of router */
103 int flags
; /* flags passed on create */
104 int bits
; /* accomodate connections */
105 int count
; /* number of connections */
108 struct slbuf bbuf
; /* broadcast buffer */
109 char label
[SYSLINK_LABEL_SIZE
];
113 * Syslink Connection abstraction
116 RB_ENTRY(sldata
) rbnode
;
117 struct slrouter
*router
; /* organizing router */
118 struct file
*xfp
; /* external file pointer */
119 struct lock rlock
; /* synchronizing lock */
120 struct lock wlock
; /* synchronizing lock */
121 struct thread
*rthread
; /* helper thread */
122 struct thread
*wthread
; /* helper thread */
123 struct sockbuf sior
; /* accumulate incoming mbufs */
124 struct sockbuf siow
; /* accumulate outgoing mbufs */
125 struct sockaddr sa
; /* used w/SLIF_SUBNET mode */
126 int bindex
; /* broadcast index */
127 int flags
; /* connection flags */
131 char label
[SYSLINK_LABEL_SIZE
];
134 #define SYSLINK_BBUFSIZE (32*1024)
135 #define SYSLINK_SIOBUFSIZE (128*1024)
137 static int rb_slrouter_compare(struct slrouter
*r1
, struct slrouter
*r2
);
138 static int rb_sldata_compare(struct sldata
*d1
, struct sldata
*d2
);
140 static int syslink_destroy(struct slrouter
*slrouter
);
141 static int syslink_add(struct slrouter
*slrouter
,
142 struct syslink_info
*info
, int *result
);
143 static int syslink_rem(struct slrouter
*slrouter
, struct sldata
*sldata
,
144 struct syslink_info
*info
);
146 static int syslink_read(struct file
*fp
, struct uio
*uio
,
147 struct ucred
*cred
, int flags
);
148 static int syslink_write(struct file
*fp
, struct uio
*uio
,
149 struct ucred
*cred
, int flags
);
150 static int syslink_close(struct file
*fp
);
151 static int syslink_stat(struct file
*fp
, struct stat
*sb
, struct ucred
*cred
);
152 static int syslink_shutdown(struct file
*fp
, int how
);
153 static int syslink_ioctl(struct file
*fp
, u_long cmd
, caddr_t data
,
155 static int syslink_poll(struct file
*fp
, int events
, struct ucred
*cred
);
156 static int syslink_kqfilter(struct file
*fp
, struct knote
*kn
);
158 static void syslink_rthread_so(void *arg
);
159 static void syslink_rthread_fp(void *arg
);
160 static void syslink_wthread_so(void *arg
);
161 static void syslink_wthread_fp(void *arg
);
162 static int syslink_getsubnet(struct sockaddr
*sa
);
163 static struct mbuf
*syslink_parse_stream(struct sockbuf
*sio
);
164 static void syslink_route(struct slrouter
*slrouter
, int linkid
, struct mbuf
*m
);
165 static void slbuf_alloc(struct slbuf
*buf
, int bytes
);
166 static void slbuf_free(struct slbuf
*buf
);
167 static void sldata_rels(struct sldata
*sldata
);
168 static void slrouter_rels(struct slrouter
*slrouter
);
169 static int process_syslink_msg(struct sldata
*sldata
, struct syslink_msg
*head
);
170 static int syslink_validate(struct syslink_msg
*head
, int bytes
);
172 RB_GENERATE2(slrouter_rb_tree
, slrouter
, rbnode
,
173 rb_slrouter_compare
, sysid_t
, sysid
);
174 RB_GENERATE2(sldata_rb_tree
, sldata
, rbnode
,
175 rb_sldata_compare
, int, linkid
);
177 static struct fileops syslinkops
= {
178 .fo_read
= syslink_read
,
179 .fo_write
= syslink_write
,
180 .fo_ioctl
= syslink_ioctl
,
181 .fo_poll
= syslink_poll
,
182 .fo_kqfilter
= syslink_kqfilter
,
183 .fo_stat
= syslink_stat
,
184 .fo_close
= syslink_close
,
185 .fo_shutdown
= syslink_shutdown
188 MALLOC_DEFINE(M_SYSLINK
, "syslink", "syslink manager");
190 static int syslink_enabled
;
191 SYSCTL_INT(_kern
, OID_AUTO
, syslink_enabled
,
192 CTLFLAG_RW
, &syslink_enabled
, 0, "Enable SYSLINK");
195 * Support declarations and compare function for our RB trees
197 static struct slrouter_rb_tree slrouter_rb_root
;
200 rb_slrouter_compare(struct slrouter
*r1
, struct slrouter
*r2
)
202 if (r1
->sysid
< r2
->sysid
)
204 if (r1
->sysid
> r2
->sysid
)
210 rb_sldata_compare(struct sldata
*d1
, struct sldata
*d2
)
212 if (d1
->linkid
< d2
->linkid
)
214 if (d1
->linkid
> d2
->linkid
)
220 * Compare and callback functions for first-sysid and first-linkid searches.
223 syslink_cmd_locate_cmp(struct slrouter
*slrouter
, void *data
)
225 struct syslink_info
*info
= data
;
227 if (slrouter
->sysid
< info
->sysid
)
229 if (slrouter
->sysid
> info
->sysid
)
235 syslink_cmd_locate_callback(struct slrouter
*slrouter
, void *data
)
237 struct syslink_info
*info
= data
;
239 info
->flags
= slrouter
->flags
; /* also clears SLIF_ERROR */
240 bcopy(slrouter
->label
, info
->label
, SYSLINK_LABEL_SIZE
);
246 syslink_cmd_find_cmp(struct sldata
*sldata
, void *data
)
248 struct syslink_info
*info
= data
;
250 if (sldata
->linkid
< info
->linkid
)
252 if (sldata
->linkid
> info
->linkid
)
258 syslink_cmd_find_callback(struct sldata
*sldata
, void *data
)
260 struct syslink_info
*info
= data
;
262 info
->linkid
= sldata
->linkid
;
263 info
->flags
= sldata
->flags
; /* also clears SLIF_ERROR */
264 bcopy(sldata
->label
, info
->label
, SYSLINK_LABEL_SIZE
);
270 * Primary system call interface - associate a full-duplex stream
271 * (typically a pipe or a connected socket) with a sysid namespace,
272 * or create a direct link.
274 * syslink(int cmd, struct syslink_info *info, size_t bytes)
277 sys_syslink(struct syslink_args
*uap
)
279 struct syslink_info info
;
280 struct slrouter
*slrouter
= NULL
;
281 struct sldata
*sldata
= NULL
;
286 * System call is under construction and disabled by default.
287 * Superuser access is also required.
289 if (syslink_enabled
== 0)
291 error
= suser(curthread
);
296 * Load and validate the info structure. Unloaded bytes are zerod
297 * out. The label field must always be 0-filled, even if not used
300 bzero(&info
, sizeof(info
));
301 if ((unsigned)uap
->bytes
<= sizeof(info
)) {
303 error
= copyin(uap
->info
, &info
, uap
->bytes
);
310 if (info
.label
[sizeof(info
.label
)-1] != 0)
317 case SYSLINK_CMD_CREATE
:
319 * Create a new syslink router node. Set refs to prevent the
320 * router node from being destroyed. One ref is our temporary
321 * reference while the other is the SLIF_DESTROYED-interlocked
324 if (info
.bits
< 2 || info
.bits
> SYSLINK_ROUTER_MAXBITS
)
326 slrouter
= kmalloc(sizeof(struct slrouter
), M_SYSLINK
,
328 if (slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root
, info
.sysid
)) {
329 kfree(slrouter
, M_SYSLINK
);
333 slrouter
->sysid
= info
.sysid
;
335 slrouter
->bits
= info
.bits
;
336 slrouter
->flags
= info
.flags
& SLIF_USERFLAGS
;
337 slrouter
->bitmap
= alist_create(1 << info
.bits
, M_SYSLINK
);
338 slbuf_alloc(&slrouter
->bbuf
, SYSLINK_BBUFSIZE
);
339 RB_INIT(&slrouter
->sldata_rb_root
);
340 RB_INSERT(slrouter_rb_tree
, &slrouter_rb_root
, slrouter
);
342 case SYSLINK_CMD_DESTROY
:
344 * Destroy a syslink router node. The physical node is
345 * not freed until our temporary reference is removed.
347 slrouter
= slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root
,
351 if ((slrouter
->flags
& SLIF_DESTROYED
) == 0) {
352 slrouter
->flags
|= SLIF_DESTROYED
;
353 /* SLIF_DESTROYED interlock */
354 slrouter_rels(slrouter
);
355 error
= syslink_destroy(slrouter
);
356 /* still holding our private interlock */
360 case SYSLINK_CMD_LOCATE
:
362 * Locate the first syslink router node >= info.sysid
364 info
.flags
|= SLIF_ERROR
;
365 n
= slrouter_rb_tree_RB_SCAN(
367 syslink_cmd_locate_cmp
, syslink_cmd_locate_callback
,
369 if (info
.flags
& SLIF_ERROR
)
372 case SYSLINK_CMD_ADD
:
373 slrouter
= slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root
, info
.sysid
);
375 (info
.bits
< 2 || info
.bits
> SYSLINK_ROUTER_MAXBITS
)) {
377 } else if (slrouter
&& (slrouter
->flags
& SLIF_DESTROYED
)) {
379 * Someone is trying to destroy this route node,
380 * no new adds please!
383 } else if (slrouter
) {
385 error
= syslink_add(slrouter
, &info
,
386 &uap
->sysmsg_result
);
391 case SYSLINK_CMD_REM
:
392 slrouter
= slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root
,
396 sldata
= sldata_rb_tree_RB_LOOKUP(&slrouter
->sldata_rb_root
, info
.linkid
);
399 error
= syslink_rem(slrouter
, sldata
, &info
);
407 case SYSLINK_CMD_FIND
:
408 slrouter
= slrouter_rb_tree_RB_LOOKUP(&slrouter_rb_root
, info
.sysid
);
409 info
.flags
|= SLIF_ERROR
;
412 n
= sldata_rb_tree_RB_SCAN(
413 &slrouter
->sldata_rb_root
,
414 syslink_cmd_find_cmp
, syslink_cmd_find_callback
,
416 if (info
.flags
& SLIF_ERROR
)
433 slrouter_rels(slrouter
);
439 syslink_destroy_callback(struct sldata
*sldata
, void *data __unused
)
442 if ((sldata
->flags
& SLIF_RQUIT
) == 0) {
443 sldata
->flags
|= SLIF_RQUIT
;
444 wakeup(&sldata
->rthread
);
446 if ((sldata
->flags
& SLIF_WQUIT
) == 0) {
447 sldata
->flags
|= SLIF_WQUIT
;
448 wakeup(&sldata
->wthread
);
455 * Shutdown all the connections going into this syslink.
457 * Try to wait for completion, but return after 1 second
462 syslink_destroy(struct slrouter
*slrouter
)
466 while (!RB_EMPTY(&slrouter
->sldata_rb_root
) && retries
) {
467 RB_SCAN(sldata_rb_tree
, &slrouter
->sldata_rb_root
, NULL
,
468 syslink_destroy_callback
, slrouter
);
470 tsleep(&retries
, 0, "syslnk", hz
/ 10);
472 if (RB_EMPTY(&slrouter
->sldata_rb_root
))
480 syslink_add(struct slrouter
*slrouter
, struct syslink_info
*info
,
483 struct sldata
*sldata
;
491 maxphys
= 1 << slrouter
->bits
;
492 numphys
= info
->bits
? (1 << info
->bits
) : 1;
495 * Create a connection to the route node and allocate a physical ID.
496 * Physical ID 0 is reserved for the route node itself, and an all-1's
497 * ID is reserved as a broadcast address.
499 sldata
= kmalloc(sizeof(struct sldata
), M_SYSLINK
, M_WAITOK
|M_ZERO
);
501 linkid
= alist_alloc(slrouter
->bitmap
, numphys
);
502 if (linkid
== ALIST_BLOCK_NONE
) {
503 kfree(sldata
, M_SYSLINK
);
508 * Insert the node, initializing enough fields to prevent things from
509 * being ripped out from under us before we have a chance to complete
512 sldata
->linkid
= linkid
;
515 if (sldata_rb_tree_RB_LOOKUP(&slrouter
->sldata_rb_root
, linkid
))
516 panic("syslink_add: free linkid wasn't free!");
517 RB_INSERT(sldata_rb_tree
, &slrouter
->sldata_rb_root
, sldata
);
520 * Complete initialization of the physical route node. Setting
521 * sldata->router activates the node.
523 sbinit(&sldata
->sior
, SYSLINK_SIOBUFSIZE
);
524 sbinit(&sldata
->siow
, SYSLINK_SIOBUFSIZE
);
525 sldata
->bindex
= slrouter
->bbuf
.windex
;
526 sldata
->flags
= info
->flags
& SLIF_USERFLAGS
;
527 lockinit(&sldata
->rlock
, "slread", 0, 0);
528 lockinit(&sldata
->wlock
, "slwrite", 0, 0);
529 bcopy(&info
->u
.sa
, &sldata
->sa
, sizeof(sldata
->sa
));
533 * We create a direct syslink descriptor. No helper threads
536 error
= falloc(curproc
, &fp
, &info
->fd
);
538 fp
->f_type
= DTYPE_SYSLINK
;
539 fp
->f_flag
= FREAD
| FWRITE
;
540 fp
->f_ops
= &syslinkops
;
542 /* one ref: the fp descriptor */
544 sldata
->flags
|= SLIF_WQUIT
| SLIF_WDONE
;
545 sldata
->flags
|= SLIF_RQUIT
| SLIF_RDONE
;
546 fsetfd(curproc
, fp
, info
->fd
);
551 sldata
->xfp
= holdfp(curproc
->p_fd
, info
->fd
, -1);
552 if (sldata
->xfp
!= NULL
) {
553 /* two refs: reader thread and writer thread */
555 if (sldata
->xfp
->f_type
== DTYPE_SOCKET
) {
556 lwkt_create(syslink_rthread_so
, sldata
,
557 &sldata
->rthread
, NULL
,
559 lwkt_create(syslink_wthread_so
, sldata
,
560 &sldata
->wthread
, NULL
,
563 lwkt_create(syslink_rthread_fp
, sldata
,
564 &sldata
->rthread
, NULL
,
566 lwkt_create(syslink_wthread_fp
, sldata
,
567 &sldata
->wthread
, NULL
,
574 sldata
->router
= slrouter
;
581 syslink_rem(struct slrouter
*slrouter
, struct sldata
*sldata
,
582 struct syslink_info
*info
)
584 int error
= EINPROGRESS
;
586 if ((sldata
->flags
& SLIF_RQUIT
) == 0) {
587 sldata
->flags
|= SLIF_RQUIT
;
588 wakeup(&sldata
->rthread
);
591 if ((sldata
->flags
& SLIF_WQUIT
) == 0) {
592 sldata
->flags
|= SLIF_WQUIT
;
593 wakeup(&sldata
->wthread
);
600 * Read syslink messages from an external socket and route them.
604 syslink_rthread_so(void *arg
)
606 struct sldata
*sldata
= arg
;
615 so
= (void *)sldata
->xfp
->f_data
;
619 * Calculate whether we need to get the peer address or not.
620 * We need to obtain the peer address for packet-mode sockets
621 * representing subnets (rather then single connections).
623 needsa
= (sldata
->bits
&& (sldata
->flags
& SLIF_PACKET
));
625 while ((sldata
->flags
& SLIF_RQUIT
) == 0) {
627 * Read some data. This is easy if the data is packetized,
628 * otherwise we can still obtain an mbuf chain but we have
629 * to parse out the syslink messages.
632 error
= so_pru_soreceive(so
,
633 (needsa
? &sa
: NULL
),
638 * The target is responsible for adjusting the src address
639 * field in the syslink_msg. We may need subnet information
640 * from the sockaddr to accomplish this.
642 * For streams representing subnets the originator is
643 * responsible for tagging its subnet bits in the src
644 * address but we have to renormalize
646 linkid
= sldata
->linkid
;
647 if (sldata
->flags
& SLIF_PACKET
) {
649 linkid
+= syslink_getsubnet(sa
) &
650 ((1 << sldata
->bits
) - 1);
652 if ((m
= sldata
->sior
.sb_mb
) != NULL
) {
653 sbinit(&sldata
->sior
, SYSLINK_SIOBUFSIZE
);
654 syslink_route(sldata
->router
, linkid
, m
);
657 while ((m
= syslink_parse_stream(&sldata
->sior
)) != NULL
) {
658 syslink_route(sldata
->router
, linkid
, m
);
667 if ((sldata
->flags
& SLIF_SUBNET
) && sldata
->bits
&& sa
) {
668 linkid
+= syslink_getsubnet(sa
) &
669 ((1 << sldata
->bits
) - 1);
676 * Note: Incoming syslink messages must have their headers
677 * adjusted to reflect the origination address. This will
678 * be handled by syslink_route.
680 if (sldata
->flags
& SLIF_PACKET
) {
682 * Packetized data can just be directly routed.
684 if ((m
= sldata
->sior
.sb_mb
) != NULL
) {
685 sbinit(&sldata
->sior
, SYSLINK_SIOBUFSIZE
);
686 syslink_route(sldata
->router
, linkid
, m
);
690 * Stream data has to be parsed out.
692 while ((m
= syslink_parse_stream(&sldata
->sior
)) != NULL
) {
693 syslink_route(sldata
->router
, linkid
, m
);
699 * Mark us as done and deref sldata. Tell the writer to terminate as
702 sldata
->flags
|= SLIF_RDONE
;
703 sbflush(&sldata
->sior
);
704 sbflush(&sldata
->siow
);
705 if ((sldata
->flags
& SLIF_WDONE
) == 0) {
706 sldata
->flags
|= SLIF_WQUIT
;
707 wakeup(&sldata
->wthread
);
709 wakeup(&sldata
->rthread
);
710 wakeup(&sldata
->wthread
);
715 * Read syslink messages from an external descriptor and route them. Used
716 * when no socket interface is available.
720 syslink_rthread_fp(void *arg
)
722 struct sldata
*sldata
= arg
;
726 * Loop until told otherwise
728 while ((sldata
->flags
& SLIF_RQUIT
) == 0) {
729 error
= fp_read(slink
->xfp
,
731 (slbuf
->windex
& slbuf
->bufmask
733 count
, &count
, 0, UIO_SYSSPACE
);
738 * Mark us as done and deref sldata. Tell the writer to terminate as
741 sldata
->flags
|= SLIF_RDONE
;
742 sbflush(&sldata
->sior
);
743 sbflush(&sldata
->siow
);
744 if ((sldata
->flags
& SLIF_WDONE
) == 0) {
745 sldata
->flags
|= SLIF_WQUIT
;
746 wakeup(&sldata
->wthread
);
748 wakeup(&sldata
->rthread
);
749 wakeup(&sldata
->wthread
);
755 syslink_parse_stream(struct sockbuf
*sio
)
762 syslink_route(struct slrouter
*slrouter
, int linkid
, struct mbuf
*m
)
775 * Calculate contiguous space available to read and read as
778 * If the entire buffer is used there's probably a format
779 * error of some sort and we terminate the link.
781 used
= slbuf
->windex
- slbuf
->rindex
;
785 * Read some data, terminate the link if an error occurs or
786 * if EOF is encountered. xfp can be NULL, indicating that
787 * the data was injected by other means.
790 count
= slbuf
->bufsize
-
791 (slbuf
->windex
& slbuf
->bufmask
);
792 if (count
> slbuf
->bufsize
- used
)
793 count
= slbuf
->bufsize
- used
;
796 error
= fp_read(sldata
->xfp
,
798 (slbuf
->windex
& slbuf
->bufmask
),
799 count
, &count
, 0, UIO_SYSSPACE
);
804 slbuf
->windex
+= count
;
807 tsleep(slbuf
, 0, "fiford", 0);
811 * Process as many syslink messages as we can. The record
812 * length must be at least a minimal PAD record (8 bytes).
814 while (slbuf
->windex
- slbuf
->rindex
>= min_msg_size
) {
817 head
= (void *)(slbuf
->buf
+
818 (slbuf
->rindex
& slbuf
->bufmask
));
819 if (head
->sm_bytes
< min_msg_size
) {
823 aligned_reclen
= SLMSG_ALIGN(head
->sm_bytes
);
828 if ((slbuf
->rindex
& slbuf
->bufmask
) >
829 ((slbuf
->rindex
+ aligned_reclen
) & slbuf
->bufmask
)
836 * Insufficient data read
838 if (slbuf
->windex
- slbuf
->rindex
< aligned_reclen
)
842 * Process non-pad messages. Non-pad messages have
843 * to be at least the size of the syslink_msg
846 * A PAD message's sm_cmd field contains 0.
849 if (head
->sm_bytes
< sizeof(*head
)) {
853 error
= process_syslink_msg(sldata
, head
);
858 slbuf
->rindex
+= aligned_reclen
;
869 * This thread takes outgoing syslink messages queued to wbuf and writes them
870 * to the descriptor. PAD is stripped. PAD is also added as required to
871 * conform to the outgoing descriptor's buffering requirements.
875 syslink_wthread_so(void *arg
)
877 struct sldata
*sldata
= arg
;
878 struct slrouter
*slrouter
;
879 struct syslink_msg
*head
;
889 so
= (void *)sldata
->xfp
->f_data
;
890 slrouter
= sldata
->router
;
892 while ((sldata
->flags
& SLIF_WQUIT
) == 0) {
894 * Deal with any broadcast data sitting in the route node's
895 * broadcast buffer. If we have fallen too far behind the
896 * data may no longer be valid.
898 * avail -- available data in broadcast buffer and
899 * bytes -- available contiguous data in broadcast buffer
901 if (slrouter
->bbuf
.rindex
- sldata
->bindex
> 0)
902 sldata
->bindex
= slrouter
->bbuf
.rindex
;
903 if ((avail
= slrouter
->bbuf
.windex
- sldata
->bindex
) > 0) {
904 bytes
= slrouter
->bbuf
.bufsize
-
905 (sldata
->bindex
& slrouter
->bbuf
.bufmask
);
908 head
= (void *)(slrouter
->bbuf
.buf
+
909 (sldata
->bindex
& slrouter
->bbuf
.bufmask
));
911 * Break into packets if necessary, else just write
912 * it all in one fell swoop.
914 aiov
.iov_base
= (void *)head
;
915 aiov
.iov_len
= bytes
;
916 auio
.uio_iov
= &aiov
;
919 auio
.uio_resid
= bytes
;
920 auio
.uio_segflg
= UIO_SYSSPACE
;
921 auio
.uio_rw
= UIO_WRITE
;
922 auio
.uio_td
= curthread
;
923 if (sldata
->flags
& SLIF_PACKET
) {
924 if (head
->sm_bytes
< SL_MIN_MESSAGE_SIZE
) {
925 kprintf("syslink_msg too small, terminating\n");
928 if (head
->sm_bytes
> bytes
) {
929 kprintf("syslink_msg not FIFO aligned, terminating\n");
932 bytes
= SLMSG_ALIGN(head
->sm_bytes
);
933 so_pru_sosend(so
, sa
, &auio
, NULL
, NULL
, 0, curthread
);
935 so_pru_sosend(so
, sa
, &auio
, NULL
, NULL
, 0, curthread
);
941 * Deal with mbuf records waiting to be output
943 if (sldata
->siow
.sb_mb
!= NULL
) {
948 * Block waiting for something to do.
950 tsleep(&sldata
->wthread
, 0, "wait", 0);
960 used
= slbuf
->windex
- slbuf
->rindex
;
961 if (used
< SL_MIN_MESSAGE_SIZE
)
964 head
= (void *)(slbuf
->buf
+
965 (slbuf
->rindex
& slbuf
->bufmask
));
966 if (head
->sm_bytes
< SL_MIN_MESSAGE_SIZE
) {
970 aligned_reclen
= SLMSG_ALIGN(head
->sm_bytes
);
975 if ((slbuf
->rindex
& slbuf
->bufmask
) >
976 ((slbuf
->rindex
+ aligned_reclen
) & slbuf
->bufmask
)
983 * Insufficient data read
985 if (used
< aligned_reclen
)
989 * Write it out whether it is PAD or not.
990 * XXX re-PAD for output here.
992 error
= fp_write(sldata
->xfp
, head
,
996 if (error
&& error
!= ENOBUFS
)
998 if (count
!= aligned_reclen
) {
1002 slbuf
->rindex
+= aligned_reclen
;
1006 tsleep(slbuf
, 0, "fifowt", 0);
1009 sldata
->flags
|= SLIF_WDONE
;
1010 sldata_rels(sldata
);
1015 syslink_wthread_fp(void *arg
)
1017 struct sldata
*sldata
= arg
;
1019 sldata
->flags
|= SLIF_WDONE
;
1020 sldata_rels(sldata
);
1025 slbuf_alloc(struct slbuf
*slbuf
, int bytes
)
1027 bzero(slbuf
, sizeof(*slbuf
));
1028 slbuf
->buf
= kmalloc(bytes
, M_SYSLINK
, M_WAITOK
);
1029 slbuf
->bufsize
= bytes
;
1030 slbuf
->bufmask
= bytes
- 1;
1035 slbuf_free(struct slbuf
*slbuf
)
1037 kfree(slbuf
->buf
, M_SYSLINK
);
1043 sldata_rels(struct sldata
*sldata
)
1045 struct slrouter
*slrouter
;
1047 if (--sldata
->refs
== 0) {
1048 slrouter
= sldata
->router
;
1049 KKASSERT(slrouter
!= NULL
);
1051 RB_REMOVE(sldata_rb_tree
,
1052 &sldata
->router
->sldata_rb_root
, sldata
);
1053 sldata
->router
= NULL
;
1054 kfree(sldata
, M_SYSLINK
);
1055 slrouter_rels(slrouter
);
1061 slrouter_rels(struct slrouter
*slrouter
)
1063 if (--slrouter
->refs
== 0 && RB_EMPTY(&slrouter
->sldata_rb_root
)) {
1064 KKASSERT(slrouter
->flags
& SLIF_DESTROYED
);
1065 RB_REMOVE(slrouter_rb_tree
, &slrouter_rb_root
, slrouter
);
1066 alist_destroy(slrouter
->bitmap
, M_SYSLINK
);
1067 slrouter
->bitmap
= NULL
;
1068 slbuf_free(&slrouter
->bbuf
);
1069 kfree(slrouter
, M_SYSLINK
);
1074 * A switched ethernet socket connected to a syslink router node may
1075 * represent an entire subnet. We need to generate a subnet id from
1076 * the originating IP address which the caller can then incorporate into
1077 * the base linkid assigned to the connection to form the actual linkid
1078 * originating the message.
1082 syslink_getsubnet(struct sockaddr
*sa
)
1085 struct in6_addr
*i6
;
1088 switch(sa
->sa_family
) {
1090 i4
= &((struct sockaddr_in
*)sa
)->sin_addr
;
1091 linkid
= (int)ntohl(i4
->s_addr
);
1094 i6
= &((struct sockaddr_in6
*)sa
)->sin6_addr
;
1095 linkid
= (int)ntohl(i6
->s6_addr32
[0]); /* XXX */
1105 * fileops for an established syslink when the kernel is asked to create a
1106 * descriptor (verses one being handed to it). No threads are created in
1111 * Transfer zero or more messages from the kernel to userland. Only complete
1112 * messages are returned. If the uio has insufficient space then EMSGSIZE
1113 * is returned. The kernel feeds messages to wbuf so we use wlock (structures
1114 * are relative to the kernel).
1118 syslink_read(struct file
*fp
, struct uio
*uio
, struct ucred
*cred
, int flags
)
1120 struct sldata
*sldata
= fp
->f_data
;
1122 struct syslink_msg
*head
;
1129 if (flags
& O_FBLOCKING
)
1131 else if (flags
& O_FNONBLOCKING
)
1133 else if (fp
->f_flag
& O_NONBLOCK
)
1138 lockmgr(&sldata
->wlock
, LK_EXCLUSIVE
| LK_RETRY
);
1143 * Calculate the number of bytes we can transfer in one shot. Transfers
1144 * do not wrap the FIFO.
1146 contig
= slbuf
->bufsize
- (slbuf
->rindex
& slbuf
->bufmask
);
1148 bytes
= slbuf
->windex
- slbuf
->rindex
;
1151 if (sldata
->flags
& SLIF_RDONE
) {
1159 tsleep(slbuf
, 0, "fiford", 0);
1165 * The uio must be able to accomodate the transfer.
1167 if (uio
->uio_resid
< bytes
) {
1173 * Copy the data to userland and update rindex.
1175 head
= (void *)(slbuf
->buf
+ (slbuf
->rindex
& slbuf
->bufmask
));
1176 error
= uiomove((caddr_t
)head
, bytes
, uio
);
1178 slbuf
->rindex
+= bytes
;
1185 lockmgr(&sldata
->wlock
, LK_RELEASE
);
1190 * Transfer zero or more messages from userland to the kernel. Only complete
1191 * messages may be written. The kernel processes from rbuf so that is where
1192 * we have to copy the messages.
1196 syslink_write (struct file
*fp
, struct uio
*uio
, struct ucred
*cred
, int flags
)
1198 struct sldata
*sldata
= fp
->f_data
;
1200 struct slbuf
*slbuf
= &sldata
->rbuf
;
1201 struct syslink_msg
*head
;
1208 if (flags
& O_FBLOCKING
)
1210 else if (flags
& O_FNONBLOCKING
)
1212 else if (fp
->f_flag
& O_NONBLOCK
)
1217 lockmgr(&sldata
->rlock
, LK_EXCLUSIVE
| LK_RETRY
);
1222 * Calculate the maximum number of contiguous bytes that may be
1223 * available. Caller is required to not wrap our FIFO.
1225 contig
= slbuf
->bufsize
- (slbuf
->windex
& slbuf
->bufmask
);
1226 if (uio
->uio_resid
> contig
) {
1232 * Truncate based on actual unused space available in the FIFO. If
1233 * the uio does not fit, block and loop.
1236 bytes
= slbuf
->bufsize
- (slbuf
->windex
- slbuf
->rindex
);
1239 if (uio
->uio_resid
<= bytes
)
1241 if (sldata
->flags
& SLIF_RDONE
) {
1249 tsleep(slbuf
, 0, "fifowr", 0);
1251 bytes
= uio
->uio_resid
;
1252 head
= (void *)(slbuf
->buf
+ (slbuf
->windex
& slbuf
->bufmask
));
1253 error
= uiomove((caddr_t
)head
, bytes
, uio
);
1255 error
= syslink_validate(head
, bytes
);
1257 slbuf
->windex
+= bytes
;
1262 lockmgr(&sldata
->rlock
, LK_RELEASE
);
1268 syslink_close (struct file
*fp
)
1270 struct sldata
*sldata
;
1272 sldata
= fp
->f_data
;
1273 if ((sldata
->flags
& SLIF_RQUIT
) == 0) {
1274 sldata
->flags
|= SLIF_RQUIT
;
1275 wakeup(&sldata
->rthread
);
1277 if ((sldata
->flags
& SLIF_WQUIT
) == 0) {
1278 sldata
->flags
|= SLIF_WQUIT
;
1279 wakeup(&sldata
->wthread
);
1282 sldata_rels(sldata
);
1288 syslink_stat (struct file
*fp
, struct stat
*sb
, struct ucred
*cred
)
1295 syslink_shutdown (struct file
*fp
, int how
)
1302 syslink_ioctl (struct file
*fp
, u_long cmd
, caddr_t data
, struct ucred
*cred
)
1309 syslink_poll (struct file
*fp
, int events
, struct ucred
*cred
)
1316 syslink_kqfilter(struct file
*fp
, struct knote
*kn
)
1322 * This routine is called from a route node's reader thread to process a
1323 * syslink message once it has been completely read and its size validated.
1327 process_syslink_msg(struct sldata
*sldata
, struct syslink_msg
*head
)
1329 kprintf("process syslink msg %08x\n", head
->sm_cmd
);
1334 * Validate that the syslink message header(s) are correctly sized.
1338 syslink_validate(struct syslink_msg
*head
, int bytes
)
1340 const int min_msg_size
= SL_MIN_MESSAGE_SIZE
;
1345 * Message size and alignment
1347 if (bytes
< min_msg_size
)
1349 if (bytes
& SL_ALIGNMASK
)
1351 if (head
->sm_cmd
&& bytes
< sizeof(struct syslink_msg
))
1355 * Buffer must contain entire record
1357 aligned_reclen
= SLMSG_ALIGN(head
->sm_bytes
);
1358 if (bytes
< aligned_reclen
)
1360 bytes
-= aligned_reclen
;
1361 head
= (void *)((char *)head
+ aligned_reclen
);