2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org>
6 * Copyright 2020 Joyent, Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * Copyright 2020 Joyent, Inc.
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #ifndef WITHOUT_CAPSICUM
41 #include <sys/capsicum.h>
43 #include <sys/queue.h>
44 #include <sys/errno.h>
46 #include <sys/ioctl.h>
49 #include <sys/limits.h>
55 #ifndef WITHOUT_CAPSICUM
56 #include <capsicum_helpers.h>
64 #include <pthread_np.h>
69 #include <machine/atomic.h>
78 #define BLOCKIF_SIG 0xb109b109
81 #define BLOCKIF_NUMTHR 8
83 /* Enlarge to keep pace with the virtio-block ring size */
84 #define BLOCKIF_NUMTHR 16
86 #define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR)
106 struct blockif_elem
{
107 TAILQ_ENTRY(blockif_elem
) be_link
;
108 struct blockif_req
*be_req
;
110 enum blockstat be_status
;
123 struct blockif_ctxt
{
124 unsigned int bc_magic
;
130 enum blockif_wce bc_wce
;
138 pthread_t bc_btid
[BLOCKIF_NUMTHR
];
139 pthread_mutex_t bc_mtx
;
140 pthread_cond_t bc_cond
;
141 blockif_resize_cb
*bc_resize_cb
;
142 void *bc_resize_cb_arg
;
143 struct mevent
*bc_resize_event
;
145 /* Request elements and free/pending/busy queues */
146 TAILQ_HEAD(, blockif_elem
) bc_freeq
;
147 TAILQ_HEAD(, blockif_elem
) bc_pendq
;
148 TAILQ_HEAD(, blockif_elem
) bc_busyq
;
149 struct blockif_elem bc_reqs
[BLOCKIF_MAXREQ
];
152 static pthread_once_t blockif_once
= PTHREAD_ONCE_INIT
;
154 struct blockif_sig_elem
{
155 pthread_mutex_t bse_mtx
;
156 pthread_cond_t bse_cond
;
158 struct blockif_sig_elem
*bse_next
;
161 static struct blockif_sig_elem
*blockif_bse_head
;
164 blockif_enqueue(struct blockif_ctxt
*bc
, struct blockif_req
*breq
,
167 struct blockif_elem
*be
, *tbe
;
171 be
= TAILQ_FIRST(&bc
->bc_freeq
);
173 assert(be
->be_status
== BST_FREE
);
174 TAILQ_REMOVE(&bc
->bc_freeq
, be
, be_link
);
184 off
= breq
->br_offset
;
185 for (i
= 0; i
< breq
->br_iovcnt
; i
++)
186 off
+= breq
->br_iov
[i
].iov_len
;
192 TAILQ_FOREACH(tbe
, &bc
->bc_pendq
, be_link
) {
193 if (tbe
->be_block
== breq
->br_offset
)
197 TAILQ_FOREACH(tbe
, &bc
->bc_busyq
, be_link
) {
198 if (tbe
->be_block
== breq
->br_offset
)
203 be
->be_status
= BST_PEND
;
205 be
->be_status
= BST_BLOCK
;
206 TAILQ_INSERT_TAIL(&bc
->bc_pendq
, be
, be_link
);
207 return (be
->be_status
== BST_PEND
);
211 blockif_dequeue(struct blockif_ctxt
*bc
, pthread_t t
, struct blockif_elem
**bep
)
213 struct blockif_elem
*be
;
215 TAILQ_FOREACH(be
, &bc
->bc_pendq
, be_link
) {
216 if (be
->be_status
== BST_PEND
)
218 assert(be
->be_status
== BST_BLOCK
);
222 TAILQ_REMOVE(&bc
->bc_pendq
, be
, be_link
);
223 be
->be_status
= BST_BUSY
;
225 TAILQ_INSERT_TAIL(&bc
->bc_busyq
, be
, be_link
);
231 blockif_complete(struct blockif_ctxt
*bc
, struct blockif_elem
*be
)
233 struct blockif_elem
*tbe
;
235 if (be
->be_status
== BST_DONE
|| be
->be_status
== BST_BUSY
)
236 TAILQ_REMOVE(&bc
->bc_busyq
, be
, be_link
);
238 TAILQ_REMOVE(&bc
->bc_pendq
, be
, be_link
);
239 TAILQ_FOREACH(tbe
, &bc
->bc_pendq
, be_link
) {
240 if (tbe
->be_req
->br_offset
== be
->be_block
)
241 tbe
->be_status
= BST_PEND
;
244 be
->be_status
= BST_FREE
;
246 TAILQ_INSERT_TAIL(&bc
->bc_freeq
, be
, be_link
);
250 blockif_flush_bc(struct blockif_ctxt
*bc
)
254 if (ioctl(bc
->bc_fd
, DIOCGFLUSH
))
256 } else if (fsync(bc
->bc_fd
))
260 * This fsync() should be adequate to flush the cache of a file
261 * or device. In VFS, the VOP_SYNC operation is converted to
262 * the appropriate ioctl in both sdev (for real devices) and
265 if (fsync(bc
->bc_fd
))
273 blockif_proc(struct blockif_ctxt
*bc
, struct blockif_elem
*be
, uint8_t *buf
)
276 struct spacectl_range range
;
278 struct blockif_req
*br
;
283 size_t clen
, len
, off
, boff
, voff
;
287 assert(br
->br_resid
>= 0);
289 if (br
->br_iovcnt
<= 1)
295 if ((n
= preadv(bc
->bc_fd
, br
->br_iov
, br
->br_iovcnt
,
304 while (br
->br_resid
> 0) {
305 len
= MIN(br
->br_resid
, MAXPHYS
);
306 n
= pread(bc
->bc_fd
, buf
, len
, br
->br_offset
+ off
);
314 clen
= MIN(len
- boff
, br
->br_iov
[i
].iov_len
-
316 memcpy((uint8_t *)br
->br_iov
[i
].iov_base
+ voff
,
318 if (clen
< br
->br_iov
[i
].iov_len
- voff
)
325 } while (boff
< len
);
336 if ((n
= pwritev(bc
->bc_fd
, br
->br_iov
, br
->br_iovcnt
,
345 while (br
->br_resid
> 0) {
346 len
= MIN(br
->br_resid
, MAXPHYS
);
349 clen
= MIN(len
- boff
, br
->br_iov
[i
].iov_len
-
352 (uint8_t *)br
->br_iov
[i
].iov_base
+ voff
,
354 if (clen
< br
->br_iov
[i
].iov_len
- voff
)
361 } while (boff
< len
);
363 n
= pwrite(bc
->bc_fd
, buf
, len
, br
->br_offset
+ off
);
373 err
= blockif_flush_bc(bc
);
376 if (!bc
->bc_candelete
)
378 else if (bc
->bc_rdonly
)
381 else if (bc
->bc_ischr
) {
382 arg
[0] = br
->br_offset
;
383 arg
[1] = br
->br_resid
;
384 if (ioctl(bc
->bc_fd
, DIOCGDELETE
, arg
))
389 range
.r_offset
= br
->br_offset
;
390 range
.r_len
= br
->br_resid
;
392 while (range
.r_len
> 0) {
393 if (fspacectl(bc
->bc_fd
, SPACECTL_DEALLOC
,
394 &range
, 0, &range
) != 0) {
403 else if (bc
->bc_ischr
) {
404 dkioc_free_list_t dfl
= {
410 .dfle_start
= br
->br_offset
,
411 .dfle_length
= br
->br_resid
416 if (ioctl(bc
->bc_fd
, DKIOCFREE
, &dfl
))
424 .l_start
= br
->br_offset
,
425 .l_len
= br
->br_resid
428 if (fcntl(bc
->bc_fd
, F_FREESP
, &fl
))
440 be
->be_status
= BST_DONE
;
442 (*br
->br_callback
)(br
, err
);
446 blockif_empty(const struct blockif_ctxt
*bc
)
448 return (TAILQ_EMPTY(&bc
->bc_pendq
) && TAILQ_EMPTY(&bc
->bc_busyq
));
452 blockif_thr(void *arg
)
454 struct blockif_ctxt
*bc
;
455 struct blockif_elem
*be
;
461 buf
= malloc(MAXPHYS
);
466 pthread_mutex_lock(&bc
->bc_mtx
);
468 while (blockif_dequeue(bc
, t
, &be
)) {
469 pthread_mutex_unlock(&bc
->bc_mtx
);
470 blockif_proc(bc
, be
, buf
);
471 pthread_mutex_lock(&bc
->bc_mtx
);
472 blockif_complete(bc
, be
);
474 /* Check ctxt status here to see if exit requested */
478 pthread_cond_wait(&bc
->bc_cond
, &bc
->bc_mtx
);
480 pthread_mutex_unlock(&bc
->bc_mtx
);
490 blockif_sigcont_handler(int signal __unused
, enum ev_type type __unused
,
494 blockif_sigcont_handler(int signal __unused
)
497 struct blockif_sig_elem
*bse
;
501 * Process the entire list even if not intended for
505 bse
= blockif_bse_head
;
508 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head
,
510 (uintptr_t)bse
->bse_next
));
512 pthread_mutex_lock(&bse
->bse_mtx
);
513 bse
->bse_pending
= 0;
514 pthread_cond_signal(&bse
->bse_cond
);
515 pthread_mutex_unlock(&bse
->bse_mtx
);
523 mevent_add(SIGCONT
, EVF_SIGNAL
, blockif_sigcont_handler
, NULL
);
524 (void) signal(SIGCONT
, SIG_IGN
);
526 (void) sigset(SIGCONT
, blockif_sigcont_handler
);
531 blockif_legacy_config(nvlist_t
*nvl
, const char *opts
)
538 cp
= strchr(opts
, ',');
540 set_config_value_node(nvl
, "path", opts
);
543 path
= strndup(opts
, cp
- opts
);
544 set_config_value_node(nvl
, "path", path
);
546 return (pci_parse_legacy_config(nvl
, cp
+ 1));
549 struct blockif_ctxt
*
550 blockif_open(nvlist_t
*nvl
, const char *ident
)
552 char tname
[MAXCOMLEN
+ 1];
554 char name
[MAXPATHLEN
];
556 const char *path
, *pssval
, *ssval
;
558 struct blockif_ctxt
*bc
;
561 struct diocgattr_arg arg
;
563 enum blockif_wce wce
= WCE_NONE
;
565 off_t size
, psectsz
, psectoff
;
566 int extra
, fd
, i
, sectsz
;
567 int ro
, candelete
, geom
, ssopt
, pssopt
;
570 #ifndef WITHOUT_CAPSICUM
572 cap_ioctl_t cmds
[] = { DIOCGFLUSH
, DIOCGDELETE
, DIOCGMEDIASIZE
};
575 pthread_once(&blockif_once
, blockif_init
);
586 if (get_config_bool_node_default(nvl
, "nocache", false))
588 if (get_config_bool_node_default(nvl
, "nodelete", false))
590 if (get_config_bool_node_default(nvl
, "sync", false) ||
591 get_config_bool_node_default(nvl
, "direct", false))
593 if (get_config_bool_node_default(nvl
, "ro", false))
595 ssval
= get_config_value_node(nvl
, "sectorsize");
597 ssopt
= strtol(ssval
, &cp
, 10);
599 EPRINTLN("Invalid sector size \"%s\"", ssval
);
604 } else if (*cp
== '/') {
606 pssopt
= strtol(pssval
, &cp
, 10);
607 if (cp
== pssval
|| *cp
!= '\0') {
608 EPRINTLN("Invalid sector size \"%s\"", ssval
);
612 EPRINTLN("Invalid sector size \"%s\"", ssval
);
617 path
= get_config_value_node(nvl
, "path");
619 EPRINTLN("Missing \"path\" for block device.");
623 fd
= open(path
, (ro
? O_RDONLY
: O_RDWR
) | extra
);
625 /* Attempt a r/w fail with a r/o open */
626 fd
= open(path
, O_RDONLY
| extra
);
631 warn("Could not open backing file: %s", path
);
635 if (fstat(fd
, &sbuf
) < 0) {
636 warn("Could not stat backing file %s", path
);
640 #ifndef WITHOUT_CAPSICUM
641 cap_rights_init(&rights
, CAP_FSYNC
, CAP_IOCTL
, CAP_READ
, CAP_SEEK
,
642 CAP_WRITE
, CAP_FSTAT
, CAP_EVENT
, CAP_FPATHCONF
);
644 cap_rights_clear(&rights
, CAP_FSYNC
, CAP_WRITE
);
646 if (caph_rights_limit(fd
, &rights
) == -1)
647 errx(EX_OSERR
, "Unable to apply rights for sandbox");
651 * Deal with raw devices
655 psectsz
= psectoff
= 0;
656 candelete
= geom
= 0;
658 if (S_ISCHR(sbuf
.st_mode
)) {
659 if (ioctl(fd
, DIOCGMEDIASIZE
, &size
) < 0 ||
660 ioctl(fd
, DIOCGSECTORSIZE
, §sz
)) {
661 perror("Could not fetch dev blk/sector size");
666 if (ioctl(fd
, DIOCGSTRIPESIZE
, &psectsz
) == 0 && psectsz
> 0)
667 ioctl(fd
, DIOCGSTRIPEOFFSET
, &psectoff
);
668 strlcpy(arg
.name
, "GEOM::candelete", sizeof(arg
.name
));
669 arg
.len
= sizeof(arg
.value
.i
);
670 if (nodelete
== 0 && ioctl(fd
, DIOCGATTR
, &arg
) == 0)
671 candelete
= arg
.value
.i
;
672 if (ioctl(fd
, DIOCGPROVIDERNAME
, name
) == 0)
675 psectsz
= sbuf
.st_blksize
;
676 /* Avoid fallback implementation */
677 candelete
= fpathconf(fd
, _PC_DEALLOC_PRESENT
) == 1;
680 psectsz
= sbuf
.st_blksize
;
681 if (S_ISCHR(sbuf
.st_mode
)) {
682 struct dk_minfo_ext dkmext
;
685 /* Look for a more accurate physical block/media size */
686 if (ioctl(fd
, DKIOCGMEDIAINFOEXT
, &dkmext
) == 0) {
687 psectsz
= dkmext
.dki_pbsize
;
688 size
= dkmext
.dki_lbsize
* dkmext
.dki_capacity
;
690 /* See if a configurable write cache is present and working */
691 if (ioctl(fd
, DKIOCGETWCE
, &wce_val
) == 0) {
693 * If WCE is already active, disable it until the
694 * specific device driver calls for its return. If it
695 * is not active, toggle it on and off to verify that
696 * such actions are possible.
701 * Inability to disable the cache is a threat
702 * to data durability.
704 assert(ioctl(fd
, DKIOCSETWCE
, &wce_val
) == 0);
710 r1
= ioctl(fd
, DKIOCSETWCE
, &wce_val
);
712 r2
= ioctl(fd
, DKIOCSETWCE
, &wce_val
);
714 if (r1
== 0 && r2
== 0) {
718 * If the cache cache toggle was not
719 * successful, ensure that the cache
720 * was not left enabled.
727 if (nodelete
== 0 && ioctl(fd
, DKIOC_CANFREE
, &candelete
))
733 if ((flags
= fcntl(fd
, F_GETFL
)) >= 0) {
735 if (fcntl(fd
, F_SETFL
, flags
) != -1) {
741 * We don't have a way to discover if a file supports the
742 * FREESP fcntl cmd (other than trying it). However,
743 * zfs, ufs, tmpfs, and udfs all support the FREESP fcntl cmd.
744 * Nfsv4 and nfsv4 also forward the FREESP request
745 * to the server, so we always enable it for file based
746 * volumes. Anyone trying to run volumes on an unsupported
747 * configuration is on their own, and should be prepared
748 * for the requests to fail.
755 #ifndef WITHOUT_CAPSICUM
756 if (caph_ioctls_limit(fd
, cmds
, nitems(cmds
)) == -1)
757 errx(EX_OSERR
, "Unable to apply rights for sandbox");
761 if (!powerof2(ssopt
) || !powerof2(pssopt
) || ssopt
< 512 ||
763 EPRINTLN("Invalid sector size %d/%d",
769 * Some backend drivers (e.g. cd0, ada0) require that the I/O
770 * size be a multiple of the device's sector size.
772 * Validate that the emulated sector size complies with this
775 if (S_ISCHR(sbuf
.st_mode
)) {
776 if (ssopt
< sectsz
|| (ssopt
% sectsz
) != 0) {
777 EPRINTLN("Sector size %d incompatible "
778 "with underlying device sector size %d",
789 bc
= calloc(1, sizeof(struct blockif_ctxt
));
795 bc
->bc_magic
= BLOCKIF_SIG
;
797 bc
->bc_ischr
= S_ISCHR(sbuf
.st_mode
);
798 bc
->bc_isgeom
= geom
;
799 bc
->bc_candelete
= candelete
;
805 bc
->bc_sectsz
= sectsz
;
806 bc
->bc_psectsz
= psectsz
;
807 bc
->bc_psectoff
= psectoff
;
808 pthread_mutex_init(&bc
->bc_mtx
, NULL
);
809 pthread_cond_init(&bc
->bc_cond
, NULL
);
810 TAILQ_INIT(&bc
->bc_freeq
);
811 TAILQ_INIT(&bc
->bc_pendq
);
812 TAILQ_INIT(&bc
->bc_busyq
);
813 for (i
= 0; i
< BLOCKIF_MAXREQ
; i
++) {
814 bc
->bc_reqs
[i
].be_status
= BST_FREE
;
815 TAILQ_INSERT_HEAD(&bc
->bc_freeq
, &bc
->bc_reqs
[i
], be_link
);
818 for (i
= 0; i
< BLOCKIF_NUMTHR
; i
++) {
819 pthread_create(&bc
->bc_btid
[i
], NULL
, blockif_thr
, bc
);
820 snprintf(tname
, sizeof(tname
), "blk-%s-%d", ident
, i
);
821 pthread_set_name_np(bc
->bc_btid
[i
], tname
);
832 blockif_resized(int fd
, enum ev_type type __unused
, void *arg
)
834 struct blockif_ctxt
*bc
;
838 if (fstat(fd
, &sb
) != 0)
842 if (S_ISCHR(sb
.st_mode
)) {
843 if (ioctl(fd
, DIOCGMEDIASIZE
, &mediasize
) < 0) {
844 EPRINTLN("blockif_resized: get mediasize failed: %s",
849 mediasize
= sb
.st_size
;
851 mediasize
= sb
.st_size
;
852 if (S_ISCHR(sb
.st_mode
)) {
855 if (ioctl(fd
, DKIOCGMEDIAINFO
, &dkm
) == 0)
856 mediasize
= dkm
.dki_lbsize
* dkm
.dki_capacity
;
861 pthread_mutex_lock(&bc
->bc_mtx
);
862 if (mediasize
!= bc
->bc_size
) {
863 bc
->bc_size
= mediasize
;
864 bc
->bc_resize_cb(bc
, bc
->bc_resize_cb_arg
, bc
->bc_size
);
866 pthread_mutex_unlock(&bc
->bc_mtx
);
870 blockif_register_resize_callback(struct blockif_ctxt
*bc
, blockif_resize_cb
*cb
,
881 pthread_mutex_lock(&bc
->bc_mtx
);
882 if (bc
->bc_resize_cb
!= NULL
) {
887 assert(bc
->bc_closing
== 0);
889 if (fstat(bc
->bc_fd
, &sb
) != 0) {
894 bc
->bc_resize_event
= mevent_add_flags(bc
->bc_fd
, EVF_VNODE
,
895 EVFF_ATTRIB
, blockif_resized
, bc
);
896 if (bc
->bc_resize_event
== NULL
) {
901 bc
->bc_resize_cb
= cb
;
902 bc
->bc_resize_cb_arg
= cb_arg
;
904 pthread_mutex_unlock(&bc
->bc_mtx
);
910 blockif_request(struct blockif_ctxt
*bc
, struct blockif_req
*breq
,
917 pthread_mutex_lock(&bc
->bc_mtx
);
918 if (!TAILQ_EMPTY(&bc
->bc_freeq
)) {
920 * Enqueue and inform the block i/o thread
921 * that there is work available
923 if (blockif_enqueue(bc
, breq
, op
))
924 pthread_cond_signal(&bc
->bc_cond
);
927 * Callers are not allowed to enqueue more than
928 * the specified blockif queue limit. Return an
929 * error to indicate that the queue length has been
934 pthread_mutex_unlock(&bc
->bc_mtx
);
940 blockif_read(struct blockif_ctxt
*bc
, struct blockif_req
*breq
)
942 assert(bc
->bc_magic
== BLOCKIF_SIG
);
943 return (blockif_request(bc
, breq
, BOP_READ
));
947 blockif_write(struct blockif_ctxt
*bc
, struct blockif_req
*breq
)
949 assert(bc
->bc_magic
== BLOCKIF_SIG
);
950 return (blockif_request(bc
, breq
, BOP_WRITE
));
954 blockif_flush(struct blockif_ctxt
*bc
, struct blockif_req
*breq
)
956 assert(bc
->bc_magic
== BLOCKIF_SIG
);
957 return (blockif_request(bc
, breq
, BOP_FLUSH
));
961 blockif_delete(struct blockif_ctxt
*bc
, struct blockif_req
*breq
)
963 assert(bc
->bc_magic
== BLOCKIF_SIG
);
964 return (blockif_request(bc
, breq
, BOP_DELETE
));
968 blockif_cancel(struct blockif_ctxt
*bc
, struct blockif_req
*breq
)
970 struct blockif_elem
*be
;
972 assert(bc
->bc_magic
== BLOCKIF_SIG
);
974 pthread_mutex_lock(&bc
->bc_mtx
);
976 * Check pending requests.
978 TAILQ_FOREACH(be
, &bc
->bc_pendq
, be_link
) {
979 if (be
->be_req
== breq
)
986 blockif_complete(bc
, be
);
987 pthread_mutex_unlock(&bc
->bc_mtx
);
993 * Check in-flight requests.
995 TAILQ_FOREACH(be
, &bc
->bc_busyq
, be_link
) {
996 if (be
->be_req
== breq
)
1003 pthread_mutex_unlock(&bc
->bc_mtx
);
1008 * Interrupt the processing thread to force it return
1009 * prematurely via it's normal callback path.
1011 while (be
->be_status
== BST_BUSY
) {
1012 struct blockif_sig_elem bse
, *old_head
;
1014 pthread_mutex_init(&bse
.bse_mtx
, NULL
);
1015 pthread_cond_init(&bse
.bse_cond
, NULL
);
1017 bse
.bse_pending
= 1;
1020 old_head
= blockif_bse_head
;
1021 bse
.bse_next
= old_head
;
1022 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head
,
1023 (uintptr_t)old_head
,
1026 pthread_kill(be
->be_tid
, SIGCONT
);
1028 pthread_mutex_lock(&bse
.bse_mtx
);
1029 while (bse
.bse_pending
)
1030 pthread_cond_wait(&bse
.bse_cond
, &bse
.bse_mtx
);
1031 pthread_mutex_unlock(&bse
.bse_mtx
);
1034 pthread_mutex_unlock(&bc
->bc_mtx
);
1037 * The processing thread has been interrupted. Since it's not
1038 * clear if the callback has been invoked yet, return EBUSY.
1044 blockif_close(struct blockif_ctxt
*bc
)
1049 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1052 * Stop the block i/o thread
1054 pthread_mutex_lock(&bc
->bc_mtx
);
1056 if (bc
->bc_resize_event
!= NULL
)
1057 mevent_disable(bc
->bc_resize_event
);
1058 pthread_mutex_unlock(&bc
->bc_mtx
);
1059 pthread_cond_broadcast(&bc
->bc_cond
);
1060 for (i
= 0; i
< BLOCKIF_NUMTHR
; i
++)
1061 pthread_join(bc
->bc_btid
[i
], &jval
);
1063 /* XXX Cancel queued i/o's ??? */
1076 * Return virtual C/H/S values for a given block. Use the algorithm
1077 * outlined in the VHD specification to calculate values.
1080 blockif_chs(struct blockif_ctxt
*bc
, uint16_t *c
, uint8_t *h
, uint8_t *s
)
1082 off_t sectors
; /* total sectors of the block dev */
1083 off_t hcyl
; /* cylinders times heads */
1084 uint16_t secpt
; /* sectors per track */
1087 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1089 sectors
= bc
->bc_size
/ bc
->bc_sectsz
;
1091 /* Clamp the size to the largest possible with CHS */
1092 if (sectors
> 65535L * 16 * 255)
1093 sectors
= 65535L * 16 * 255;
1095 if (sectors
>= 65536L * 16 * 63) {
1098 hcyl
= sectors
/ secpt
;
1101 hcyl
= sectors
/ secpt
;
1102 heads
= (hcyl
+ 1023) / 1024;
1107 if (hcyl
>= (heads
* 1024) || heads
> 16) {
1110 hcyl
= sectors
/ secpt
;
1112 if (hcyl
>= (heads
* 1024)) {
1115 hcyl
= sectors
/ secpt
;
1128 blockif_size(struct blockif_ctxt
*bc
)
1130 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1131 return (bc
->bc_size
);
1135 blockif_sectsz(struct blockif_ctxt
*bc
)
1137 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1138 return (bc
->bc_sectsz
);
1142 blockif_psectsz(struct blockif_ctxt
*bc
, int *size
, int *off
)
1144 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1145 *size
= bc
->bc_psectsz
;
1146 *off
= bc
->bc_psectoff
;
1150 blockif_queuesz(struct blockif_ctxt
*bc
)
1152 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1153 return (BLOCKIF_MAXREQ
- 1);
1157 blockif_is_ro(struct blockif_ctxt
*bc
)
1159 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1160 return (bc
->bc_rdonly
);
1164 blockif_candelete(struct blockif_ctxt
*bc
)
1166 assert(bc
->bc_magic
== BLOCKIF_SIG
);
1167 return (bc
->bc_candelete
);
1172 blockif_set_wce(struct blockif_ctxt
*bc
, int wc_enable
)
1175 int clean_val
= (wc_enable
!= 0) ? 1 : 0;
1177 (void) pthread_mutex_lock(&bc
->bc_mtx
);
1178 switch (bc
->bc_wce
) {
1180 res
= ioctl(bc
->bc_fd
, DKIOCSETWCE
, &clean_val
);
1183 if ((flags
= fcntl(bc
->bc_fd
, F_GETFL
)) >= 0) {
1184 if (wc_enable
== 0) {
1189 if (fcntl(bc
->bc_fd
, F_SETFL
, flags
) == -1) {
1201 * After a successful disable of the write cache, ensure that any
1202 * lingering data in the cache is synced out.
1204 if (res
== 0 && wc_enable
== 0) {
1205 res
= fsync(bc
->bc_fd
);
1207 (void) pthread_mutex_unlock(&bc
->bc_mtx
);
1211 #endif /* __FreeBSD__ */