2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $
29 /* Driver for VirtIO block devices. */
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/sglist.h>
38 #include <sys/sysctl.h>
39 #include <sys/queue.h>
40 #include <sys/serialize.h>
44 #include <sys/devicestat.h>
46 #include <dev/virtual/virtio/virtio/virtio.h>
47 #include <dev/virtual/virtio/virtio/virtqueue.h>
48 #include "virtio_blk.h"
49 #include "virtio_if.h"
51 struct vtblk_request
{
52 struct virtio_blk_outhdr vbr_hdr
__aligned(16);
56 SLIST_ENTRY(vtblk_request
) vbr_link
;
59 enum vtblk_cache_mode
{
60 VTBLK_CACHE_WRITETHROUGH
,
61 VTBLK_CACHE_WRITEBACK
,
67 struct lwkt_serialize vtblk_slz
;
68 uint64_t vtblk_features
;
70 #define VTBLK_FLAG_INDIRECT 0x0001
71 #define VTBLK_FLAG_READONLY 0x0002
72 #define VTBLK_FLAG_DETACH 0x0004
73 #define VTBLK_FLAG_SUSPEND 0x0008
74 #define VTBLK_FLAG_DUMPING 0x0010
75 #define VTBLK_FLAG_WC_CONFIG 0x0020
77 struct virtqueue
*vtblk_vq
;
78 struct sglist
*vtblk_sglist
;
79 struct disk vtblk_disk
;
83 struct bio_queue_head vtblk_bioq
;
84 SLIST_HEAD(, vtblk_request
)
87 int vtblk_sector_size
;
89 int vtblk_request_count
;
90 enum vtblk_cache_mode vtblk_write_cache
;
92 struct vtblk_request vtblk_dump_request
;
95 static struct virtio_feature_desc vtblk_feature_desc
[] = {
96 { VIRTIO_BLK_F_BARRIER
, "HostBarrier" },
97 { VIRTIO_BLK_F_SIZE_MAX
, "MaxSegSize" },
98 { VIRTIO_BLK_F_SEG_MAX
, "MaxNumSegs" },
99 { VIRTIO_BLK_F_GEOMETRY
, "DiskGeometry" },
100 { VIRTIO_BLK_F_RO
, "ReadOnly" },
101 { VIRTIO_BLK_F_BLK_SIZE
, "BlockSize" },
102 { VIRTIO_BLK_F_SCSI
, "SCSICmds" },
103 { VIRTIO_BLK_F_WCE
, "WriteCache" },
104 { VIRTIO_BLK_F_TOPOLOGY
, "Topology" },
105 { VIRTIO_BLK_F_CONFIG_WCE
, "ConfigWCE" },
110 static int vtblk_probe(device_t
);
111 static int vtblk_attach(device_t
);
112 static int vtblk_detach(device_t
);
113 static int vtblk_suspend(device_t
);
114 static int vtblk_resume(device_t
);
115 static int vtblk_shutdown(device_t
);
117 static void vtblk_negotiate_features(struct vtblk_softc
*);
118 static int vtblk_maximum_segments(struct vtblk_softc
*,
119 struct virtio_blk_config
*);
120 static int vtblk_alloc_virtqueue(struct vtblk_softc
*);
121 static void vtblk_set_write_cache(struct vtblk_softc
*, int);
122 static int vtblk_write_cache_enabled(struct vtblk_softc
*sc
,
123 struct virtio_blk_config
*);
124 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS
);
125 static void vtblk_alloc_disk(struct vtblk_softc
*,
126 struct virtio_blk_config
*);
128 * Interface to the device switch.
130 static d_open_t vtblk_open
;
131 static d_strategy_t vtblk_strategy
;
132 static d_dump_t vtblk_dump
;
134 static struct dev_ops vbd_disk_ops
= {
135 { "vbd", 200, D_DISK
| D_MPSAFE
},
136 .d_open
= vtblk_open
,
137 .d_close
= nullclose
,
139 .d_write
= physwrite
,
140 .d_strategy
= vtblk_strategy
,
141 .d_dump
= vtblk_dump
,
144 static void vtblk_startio(struct vtblk_softc
*);
145 static struct vtblk_request
* vtblk_bio_request(struct vtblk_softc
*);
146 static int vtblk_execute_request(struct vtblk_softc
*,
147 struct vtblk_request
*);
149 static int vtblk_vq_intr(void *);
150 static void vtblk_complete(void *);
152 static void vtblk_stop(struct vtblk_softc
*);
154 static void vtblk_prepare_dump(struct vtblk_softc
*);
155 static int vtblk_write_dump(struct vtblk_softc
*, void *, off_t
, size_t);
156 static int vtblk_flush_dump(struct vtblk_softc
*);
157 static int vtblk_poll_request(struct vtblk_softc
*,
158 struct vtblk_request
*);
160 static void vtblk_drain_vq(struct vtblk_softc
*, int);
161 static void vtblk_drain(struct vtblk_softc
*);
163 static int vtblk_alloc_requests(struct vtblk_softc
*);
164 static void vtblk_free_requests(struct vtblk_softc
*);
165 static struct vtblk_request
* vtblk_dequeue_request(struct vtblk_softc
*);
166 static void vtblk_enqueue_request(struct vtblk_softc
*,
167 struct vtblk_request
*);
169 static int vtblk_request_error(struct vtblk_request
*);
170 static void vtblk_finish_bio(struct bio
*, int);
172 static void vtblk_setup_sysctl(struct vtblk_softc
*);
173 static int vtblk_tunable_int(struct vtblk_softc
*, const char *, int);
176 static int vtblk_writecache_mode
= -1;
177 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode
);
179 /* Features desired/implemented by this driver. */
180 #define VTBLK_FEATURES \
181 (VIRTIO_BLK_F_SIZE_MAX | \
182 VIRTIO_BLK_F_SEG_MAX | \
183 VIRTIO_BLK_F_GEOMETRY | \
185 VIRTIO_BLK_F_BLK_SIZE | \
187 VIRTIO_BLK_F_CONFIG_WCE | \
188 VIRTIO_RING_F_INDIRECT_DESC)
191 * Each block request uses at least two segments - one for the header
192 * and one for the status.
194 #define VTBLK_MIN_SEGMENTS 2
196 static device_method_t vtblk_methods
[] = {
197 /* Device methods. */
198 DEVMETHOD(device_probe
, vtblk_probe
),
199 DEVMETHOD(device_attach
, vtblk_attach
),
200 DEVMETHOD(device_detach
, vtblk_detach
),
201 DEVMETHOD(device_suspend
, vtblk_suspend
),
202 DEVMETHOD(device_resume
, vtblk_resume
),
203 DEVMETHOD(device_shutdown
, vtblk_shutdown
),
208 static driver_t vtblk_driver
= {
211 sizeof(struct vtblk_softc
)
213 static devclass_t vtblk_devclass
;
215 DRIVER_MODULE(virtio_blk
, virtio_pci
, vtblk_driver
, vtblk_devclass
, NULL
, NULL
);
216 MODULE_VERSION(virtio_blk
, 1);
217 MODULE_DEPEND(virtio_blk
, virtio
, 1, 1, 1);
220 vtblk_probe(device_t dev
)
223 if (virtio_get_device_type(dev
) != VIRTIO_ID_BLOCK
)
226 device_set_desc(dev
, "VirtIO Block Adapter");
228 return (BUS_PROBE_DEFAULT
);
232 vtblk_attach(device_t dev
)
234 struct vtblk_softc
*sc
;
235 struct virtio_blk_config blkcfg
;
238 sc
= device_get_softc(dev
);
241 lwkt_serialize_init(&sc
->vtblk_slz
);
243 bioq_init(&sc
->vtblk_bioq
);
244 SLIST_INIT(&sc
->vtblk_req_free
);
246 virtio_set_feature_desc(dev
, vtblk_feature_desc
);
247 vtblk_negotiate_features(sc
);
249 if (virtio_with_feature(dev
, VIRTIO_RING_F_INDIRECT_DESC
))
250 sc
->vtblk_flags
|= VTBLK_FLAG_INDIRECT
;
251 if (virtio_with_feature(dev
, VIRTIO_BLK_F_RO
))
252 sc
->vtblk_flags
|= VTBLK_FLAG_READONLY
;
253 if (virtio_with_feature(dev
, VIRTIO_BLK_F_CONFIG_WCE
))
254 sc
->vtblk_flags
|= VTBLK_FLAG_WC_CONFIG
;
256 vtblk_setup_sysctl(sc
);
258 /* Get local copy of config. */
259 virtio_read_device_config(dev
, 0, &blkcfg
,
260 sizeof(struct virtio_blk_config
));
263 * With the current sglist(9) implementation, it is not easy
264 * for us to support a maximum segment size as adjacent
265 * segments are coalesced. For now, just make sure it's larger
266 * than the maximum supported transfer size.
268 if (virtio_with_feature(dev
, VIRTIO_BLK_F_SIZE_MAX
)) {
269 if (blkcfg
.size_max
< MAXPHYS
) {
271 device_printf(dev
, "host requires unsupported "
272 "maximum segment size feature\n");
277 sc
->vtblk_max_nsegs
= vtblk_maximum_segments(sc
, &blkcfg
);
278 if (sc
->vtblk_max_nsegs
<= VTBLK_MIN_SEGMENTS
) {
280 device_printf(dev
, "fewer than minimum number of segments "
281 "allowed: %d\n", sc
->vtblk_max_nsegs
);
286 * Allocate working sglist. The number of segments may be too
287 * large to safely store on the stack.
289 sc
->vtblk_sglist
= sglist_alloc(sc
->vtblk_max_nsegs
, M_INTWAIT
);
290 if (sc
->vtblk_sglist
== NULL
) {
292 device_printf(dev
, "cannot allocate sglist\n");
296 error
= vtblk_alloc_virtqueue(sc
);
298 device_printf(dev
, "cannot allocate virtqueue\n");
302 error
= vtblk_alloc_requests(sc
);
304 device_printf(dev
, "cannot preallocate requests\n");
308 error
= virtio_setup_intr(dev
, &sc
->vtblk_slz
);
310 device_printf(dev
, "cannot setup virtqueue interrupt\n");
314 virtqueue_enable_intr(sc
->vtblk_vq
);
316 vtblk_alloc_disk(sc
, &blkcfg
);
326 vtblk_detach(device_t dev
)
328 struct vtblk_softc
*sc
;
330 sc
= device_get_softc(dev
);
332 lwkt_serialize_enter(&sc
->vtblk_slz
);
333 sc
->vtblk_flags
|= VTBLK_FLAG_DETACH
;
334 if (device_is_attached(dev
))
336 lwkt_serialize_exit(&sc
->vtblk_slz
);
340 if (sc
->cdev
!= NULL
) {
341 disk_destroy(&sc
->vtblk_disk
);
345 if (sc
->vtblk_sglist
!= NULL
) {
346 sglist_free(sc
->vtblk_sglist
);
347 sc
->vtblk_sglist
= NULL
;
354 vtblk_suspend(device_t dev
)
356 struct vtblk_softc
*sc
;
358 sc
= device_get_softc(dev
);
360 lwkt_serialize_enter(&sc
->vtblk_slz
);
361 sc
->vtblk_flags
|= VTBLK_FLAG_SUSPEND
;
362 /* XXX BMV: virtio_stop(), etc needed here? */
363 lwkt_serialize_exit(&sc
->vtblk_slz
);
369 vtblk_resume(device_t dev
)
371 struct vtblk_softc
*sc
;
373 sc
= device_get_softc(dev
);
375 lwkt_serialize_enter(&sc
->vtblk_slz
);
376 /* XXX BMV: virtio_reinit(), etc needed here? */
377 sc
->vtblk_flags
&= ~VTBLK_FLAG_SUSPEND
;
378 #if 0 /* XXX Resume IO? */
381 lwkt_serialize_exit(&sc
->vtblk_slz
);
387 vtblk_shutdown(device_t dev
)
394 vtblk_open(struct dev_open_args
*ap
)
396 struct vtblk_softc
*sc
;
397 cdev_t dev
= ap
->a_head
.a_dev
;
402 return (sc
->vtblk_flags
& VTBLK_FLAG_DETACH
? ENXIO
: 0);
406 vtblk_dump(struct dev_dump_args
*ap
)
408 struct vtblk_softc
*sc
;
409 cdev_t dev
= ap
->a_head
.a_dev
;
410 uint64_t buf_start
, buf_len
;
417 buf_start
= ap
->a_offset
;
418 buf_len
= ap
->a_length
;
420 // lwkt_serialize_enter(&sc->vtblk_slz);
422 if ((sc
->vtblk_flags
& VTBLK_FLAG_DUMPING
) == 0) {
423 vtblk_prepare_dump(sc
);
424 sc
->vtblk_flags
|= VTBLK_FLAG_DUMPING
;
428 error
= vtblk_write_dump(sc
, ap
->a_virtual
, buf_start
,
430 else if (buf_len
== 0)
431 error
= vtblk_flush_dump(sc
);
434 sc
->vtblk_flags
&= ~VTBLK_FLAG_DUMPING
;
437 // lwkt_serialize_exit(&sc->vtblk_slz);
443 vtblk_strategy(struct dev_strategy_args
*ap
)
445 struct vtblk_softc
*sc
;
446 cdev_t dev
= ap
->a_head
.a_dev
;
448 struct bio
*bio
= ap
->a_bio
;
449 struct buf
*bp
= bio
->bio_buf
;
452 vtblk_finish_bio(bio
, EINVAL
);
457 * Fail any write if RO. Unfortunately, there does not seem to
458 * be a better way to report our readonly'ness to GEOM above.
460 * XXX: Is that true in DFly?
462 if (sc
->vtblk_flags
& VTBLK_FLAG_READONLY
&&
463 (bp
->b_cmd
== BUF_CMD_READ
|| bp
->b_cmd
== BUF_CMD_FLUSH
)) {
464 vtblk_finish_bio(bio
, EROFS
);
468 lwkt_serialize_enter(&sc
->vtblk_slz
);
469 if ((sc
->vtblk_flags
& VTBLK_FLAG_DETACH
) == 0) {
470 bioqdisksort(&sc
->vtblk_bioq
, bio
);
472 lwkt_serialize_exit(&sc
->vtblk_slz
);
474 lwkt_serialize_exit(&sc
->vtblk_slz
);
475 vtblk_finish_bio(bio
, ENXIO
);
481 vtblk_negotiate_features(struct vtblk_softc
*sc
)
487 features
= VTBLK_FEATURES
;
489 sc
->vtblk_features
= virtio_negotiate_features(dev
, features
);
493 * Calculate the maximum number of DMA segment supported. Note
494 * that the in/out header is encoded in the segment list. We
495 * assume that VTBLK_MIN_SEGMENTS covers that part of it so
496 * we add it into the desired total. If the SEG_MAX feature
497 * is not specified we have to just assume that the host can
498 * handle the maximum number of segments required for a MAXPHYS
501 * The additional + 1 is in case a MAXPHYS-sized buffer crosses
505 vtblk_maximum_segments(struct vtblk_softc
*sc
,
506 struct virtio_blk_config
*blkcfg
)
512 nsegs
= VTBLK_MIN_SEGMENTS
;
514 if (virtio_with_feature(dev
, VIRTIO_BLK_F_SEG_MAX
)) {
515 nsegs
= MIN(blkcfg
->seg_max
, MAXPHYS
/ PAGE_SIZE
+ 1 + nsegs
);
517 nsegs
= MAXPHYS
/ PAGE_SIZE
+ 1 + nsegs
;
519 if (sc
->vtblk_flags
& VTBLK_FLAG_INDIRECT
)
520 nsegs
= MIN(nsegs
, VIRTIO_MAX_INDIRECT
);
526 vtblk_alloc_virtqueue(struct vtblk_softc
*sc
)
529 struct vq_alloc_info vq_info
;
533 VQ_ALLOC_INFO_INIT(&vq_info
, sc
->vtblk_max_nsegs
,
534 vtblk_vq_intr
, sc
, &sc
->vtblk_vq
,
535 "%s request", device_get_nameunit(dev
));
537 return (virtio_alloc_virtqueues(dev
, 0, 1, &vq_info
));
541 vtblk_set_write_cache(struct vtblk_softc
*sc
, int wc
)
544 /* Set either writeback (1) or writethrough (0) mode. */
545 virtio_write_dev_config_1(sc
->vtblk_dev
,
546 offsetof(struct virtio_blk_config
, writeback
), wc
);
550 vtblk_write_cache_enabled(struct vtblk_softc
*sc
,
551 struct virtio_blk_config
*blkcfg
)
555 if (sc
->vtblk_flags
& VTBLK_FLAG_WC_CONFIG
) {
556 wc
= vtblk_tunable_int(sc
, "writecache_mode",
557 vtblk_writecache_mode
);
558 if (wc
>= 0 && wc
< VTBLK_CACHE_MAX
)
559 vtblk_set_write_cache(sc
, wc
);
561 wc
= blkcfg
->writeback
;
563 wc
= virtio_with_feature(sc
->vtblk_dev
, VIRTIO_BLK_F_WCE
);
569 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS
)
571 struct vtblk_softc
*sc
;
575 wc
= sc
->vtblk_write_cache
;
577 error
= sysctl_handle_int(oidp
, &wc
, 0, req
);
578 if (error
|| req
->newptr
== NULL
)
580 if ((sc
->vtblk_flags
& VTBLK_FLAG_WC_CONFIG
) == 0)
582 if (wc
< 0 || wc
>= VTBLK_CACHE_MAX
)
585 lwkt_serialize_enter(&sc
->vtblk_slz
);
586 sc
->vtblk_write_cache
= wc
;
587 vtblk_set_write_cache(sc
, sc
->vtblk_write_cache
);
588 lwkt_serialize_exit(&sc
->vtblk_slz
);
594 vtblk_alloc_disk(struct vtblk_softc
*sc
, struct virtio_blk_config
*blkcfg
)
597 struct disk_info info
;
599 /* construct the disk_info */
600 bzero(&info
, sizeof(info
));
602 if (virtio_with_feature(sc
->vtblk_dev
, VIRTIO_BLK_F_BLK_SIZE
))
603 sc
->vtblk_sector_size
= blkcfg
->blk_size
;
605 sc
->vtblk_sector_size
= DEV_BSIZE
;
607 info
.d_media_blksize
= sc
->vtblk_sector_size
;
608 info
.d_media_blocks
= blkcfg
->capacity
;
610 info
.d_ncylinders
= blkcfg
->geometry
.cylinders
;
611 info
.d_nheads
= blkcfg
->geometry
.heads
;
612 info
.d_secpertrack
= blkcfg
->geometry
.sectors
;
614 info
.d_secpercyl
= info
.d_secpertrack
* info
.d_nheads
;
616 if (vtblk_write_cache_enabled(sc
, blkcfg
) != 0)
617 sc
->vtblk_write_cache
= VTBLK_CACHE_WRITEBACK
;
619 sc
->vtblk_write_cache
= VTBLK_CACHE_WRITETHROUGH
;
621 devstat_add_entry(&sc
->stats
, "vbd", device_get_unit(sc
->vtblk_dev
),
622 DEV_BSIZE
, DEVSTAT_ALL_SUPPORTED
,
623 DEVSTAT_TYPE_DIRECT
| DEVSTAT_TYPE_IF_OTHER
,
624 DEVSTAT_PRIORITY_DISK
);
626 /* attach a generic disk device to ourselves */
627 sc
->cdev
= disk_create(device_get_unit(sc
->vtblk_dev
), &sc
->vtblk_disk
,
630 sc
->cdev
->si_drv1
= sc
;
631 sc
->cdev
->si_iosize_max
= MAXPHYS
;
632 disk_setdiskinfo(&sc
->vtblk_disk
, &info
);
636 vtblk_startio(struct vtblk_softc
*sc
)
638 struct virtqueue
*vq
;
639 struct vtblk_request
*req
;
645 ASSERT_SERIALIZED(&sc
->vtblk_slz
);
647 if (sc
->vtblk_flags
& VTBLK_FLAG_SUSPEND
)
650 while (!virtqueue_full(vq
)) {
651 req
= vtblk_bio_request(sc
);
655 if (vtblk_execute_request(sc
, req
) != 0) {
656 bioqdisksort(&sc
->vtblk_bioq
, req
->vbr_bio
);
657 vtblk_enqueue_request(sc
, req
);
660 devstat_start_transaction(&sc
->stats
);
666 virtqueue_notify(vq
, &sc
->vtblk_slz
);
669 static struct vtblk_request
*
670 vtblk_bio_request(struct vtblk_softc
*sc
)
672 struct bio_queue_head
*bioq
;
673 struct vtblk_request
*req
;
677 bioq
= &sc
->vtblk_bioq
;
679 if (bioq_first(bioq
) == NULL
)
682 req
= vtblk_dequeue_request(sc
);
686 bio
= bioq_takefirst(bioq
);
689 req
->vbr_hdr
.ioprio
= 1;
694 req
->vbr_hdr
.type
= VIRTIO_BLK_T_FLUSH
;
697 req
->vbr_hdr
.type
= VIRTIO_BLK_T_IN
;
698 req
->vbr_hdr
.sector
= bio
->bio_offset
/ DEV_BSIZE
;
701 req
->vbr_hdr
.type
= VIRTIO_BLK_T_OUT
;
702 req
->vbr_hdr
.sector
= bio
->bio_offset
/ DEV_BSIZE
;
705 KASSERT(0, ("bio with unhandled cmd: %d", bp
->b_cmd
));
706 req
->vbr_hdr
.type
= -1;
714 vtblk_execute_request(struct vtblk_softc
*sc
, struct vtblk_request
*req
)
721 sg
= sc
->vtblk_sglist
;
727 * sglist is live throughout this subroutine.
731 error
= sglist_append(sg
, &req
->vbr_hdr
,
732 sizeof(struct virtio_blk_outhdr
));
733 KASSERT(error
== 0, ("error adding header to sglist"));
734 KASSERT(sg
->sg_nseg
== 1,
735 ("header spanned multiple segments: %d", sg
->sg_nseg
));
737 if (bp
->b_cmd
== BUF_CMD_READ
|| bp
->b_cmd
== BUF_CMD_WRITE
) {
738 error
= sglist_append(sg
, bp
->b_data
, bp
->b_bcount
);
739 KASSERT(error
== 0, ("error adding buffer to sglist"));
741 /* BUF_CMD_READ means the host writes into our buffer. */
742 if (bp
->b_cmd
== BUF_CMD_READ
)
743 writable
+= sg
->sg_nseg
- 1;
746 error
= sglist_append(sg
, &req
->vbr_ack
, sizeof(uint8_t));
747 KASSERT(error
== 0, ("error adding ack to sglist"));
750 KASSERT(sg
->sg_nseg
>= VTBLK_MIN_SEGMENTS
,
751 ("fewer than min segments: %d", sg
->sg_nseg
));
753 error
= virtqueue_enqueue(sc
->vtblk_vq
, req
, sg
,
754 sg
->sg_nseg
- writable
, writable
);
762 vtblk_vq_intr(void *xsc
)
770 vtblk_complete(void *arg
)
772 struct vtblk_softc
*sc
;
773 struct vtblk_request
*req
;
774 struct virtqueue
*vq
;
781 lwkt_serialize_handler_disable(&sc
->vtblk_slz
);
782 virtqueue_disable_intr(sc
->vtblk_vq
);
783 ASSERT_SERIALIZED(&sc
->vtblk_slz
);
786 if (sc
->vtblk_flags
& VTBLK_FLAG_DETACH
)
789 while ((req
= virtqueue_dequeue(vq
, NULL
)) != NULL
) {
793 if (req
->vbr_ack
== VIRTIO_BLK_S_OK
)
796 bp
->b_flags
|= B_ERROR
;
797 if (req
->vbr_ack
== VIRTIO_BLK_S_UNSUPP
) {
798 bp
->b_error
= ENOTSUP
;
804 devstat_end_transaction_buf(&sc
->stats
, bio
->bio_buf
);
806 lwkt_serialize_exit(&sc
->vtblk_slz
);
808 * Unlocking the controller around biodone() does not allow
809 * processing further device interrupts; when we queued
810 * vtblk_complete, we disabled interrupts. It will allow
811 * concurrent vtblk_strategy/_startio command dispatches.
814 lwkt_serialize_enter(&sc
->vtblk_slz
);
816 vtblk_enqueue_request(sc
, req
);
821 if (virtqueue_enable_intr(vq
) != 0) {
823 * If new virtqueue entries appeared immediately after
824 * enabling interrupts, process them now. Release and
825 * retake softcontroller lock to try to avoid blocking
826 * I/O dispatch for too long.
828 virtqueue_disable_intr(vq
);
831 lwkt_serialize_handler_enable(&sc
->vtblk_slz
);
835 vtblk_stop(struct vtblk_softc
*sc
)
838 virtqueue_disable_intr(sc
->vtblk_vq
);
839 virtio_stop(sc
->vtblk_dev
);
843 vtblk_prepare_dump(struct vtblk_softc
*sc
)
846 struct virtqueue
*vq
;
854 * Drain all requests caught in-flight in the virtqueue,
855 * skipping biodone(). When dumping, only one request is
856 * outstanding at a time, and we just poll the virtqueue
859 vtblk_drain_vq(sc
, 1);
861 if (virtio_reinit(dev
, sc
->vtblk_features
) != 0) {
862 panic("%s: cannot reinit VirtIO block device during dump",
863 device_get_nameunit(dev
));
866 virtqueue_disable_intr(vq
);
867 virtio_reinit_complete(dev
);
871 vtblk_write_dump(struct vtblk_softc
*sc
, void *virtual, off_t offset
,
876 struct vtblk_request
*req
;
878 req
= &sc
->vtblk_dump_request
;
880 req
->vbr_hdr
.type
= VIRTIO_BLK_T_OUT
;
881 req
->vbr_hdr
.ioprio
= 1;
882 req
->vbr_hdr
.sector
= offset
/ 512;
885 bzero(&bio
, sizeof(struct bio
));
886 bzero(&buf
, sizeof(struct buf
));
889 bp
.b_cmd
= BUF_CMD_WRITE
;
891 bp
.b_bcount
= length
;
893 return (vtblk_poll_request(sc
, req
));
897 vtblk_flush_dump(struct vtblk_softc
*sc
)
901 struct vtblk_request
*req
;
903 req
= &sc
->vtblk_dump_request
;
905 req
->vbr_hdr
.type
= VIRTIO_BLK_T_FLUSH
;
906 req
->vbr_hdr
.ioprio
= 1;
907 req
->vbr_hdr
.sector
= 0;
910 bzero(&bio
, sizeof(struct bio
));
911 bzero(&bp
, sizeof(struct buf
));
914 bp
.b_cmd
= BUF_CMD_FLUSH
;
916 return (vtblk_poll_request(sc
, req
));
920 vtblk_poll_request(struct vtblk_softc
*sc
, struct vtblk_request
*req
)
922 struct virtqueue
*vq
;
927 if (!virtqueue_empty(vq
))
930 error
= vtblk_execute_request(sc
, req
);
934 virtqueue_notify(vq
, NULL
);
935 virtqueue_poll(vq
, NULL
);
937 error
= vtblk_request_error(req
);
938 if (error
&& bootverbose
) {
939 device_printf(sc
->vtblk_dev
,
940 "%s: IO error: %d\n", __func__
, error
);
947 vtblk_drain_vq(struct vtblk_softc
*sc
, int skip_done
)
949 struct virtqueue
*vq
;
950 struct vtblk_request
*req
;
956 while ((req
= virtqueue_drain(vq
, &last
)) != NULL
) {
958 vtblk_finish_bio(req
->vbr_bio
, ENXIO
);
960 vtblk_enqueue_request(sc
, req
);
963 KASSERT(virtqueue_empty(vq
), ("virtqueue not empty"));
967 vtblk_drain(struct vtblk_softc
*sc
)
969 struct bio_queue_head
*bioq
;
972 bioq
= &sc
->vtblk_bioq
;
974 if (sc
->vtblk_vq
!= NULL
)
975 vtblk_drain_vq(sc
, 0);
977 while (bioq_first(bioq
) != NULL
) {
978 bio
= bioq_takefirst(bioq
);
979 vtblk_finish_bio(bio
, ENXIO
);
982 vtblk_free_requests(sc
);
986 vtblk_alloc_requests(struct vtblk_softc
*sc
)
988 struct vtblk_request
*req
;
991 nreqs
= virtqueue_size(sc
->vtblk_vq
);
994 * Preallocate sufficient requests to keep the virtqueue full. Each
995 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
996 * the number allocated when indirect descriptors are not available.
998 if ((sc
->vtblk_flags
& VTBLK_FLAG_INDIRECT
) == 0)
999 nreqs
/= VTBLK_MIN_SEGMENTS
;
1001 for (i
= 0; i
< nreqs
; i
++) {
1002 req
= contigmalloc(sizeof(struct vtblk_request
), M_DEVBUF
,
1003 M_WAITOK
, 0, BUS_SPACE_MAXADDR
, 16, 0);
1007 KKASSERT(sglist_count(&req
->vbr_hdr
, sizeof(req
->vbr_hdr
))
1009 KKASSERT(sglist_count(&req
->vbr_ack
, sizeof(req
->vbr_ack
))
1012 sc
->vtblk_request_count
++;
1013 vtblk_enqueue_request(sc
, req
);
1020 vtblk_free_requests(struct vtblk_softc
*sc
)
1022 struct vtblk_request
*req
;
1024 while ((req
= vtblk_dequeue_request(sc
)) != NULL
) {
1025 sc
->vtblk_request_count
--;
1026 contigfree(req
, sizeof(struct vtblk_request
), M_DEVBUF
);
1029 KASSERT(sc
->vtblk_request_count
== 0, ("leaked requests"));
1032 static struct vtblk_request
*
1033 vtblk_dequeue_request(struct vtblk_softc
*sc
)
1035 struct vtblk_request
*req
;
1037 req
= SLIST_FIRST(&sc
->vtblk_req_free
);
1039 SLIST_REMOVE_HEAD(&sc
->vtblk_req_free
, vbr_link
);
1045 vtblk_enqueue_request(struct vtblk_softc
*sc
, struct vtblk_request
*req
)
1048 bzero(req
, sizeof(struct vtblk_request
));
1049 SLIST_INSERT_HEAD(&sc
->vtblk_req_free
, req
, vbr_link
);
1053 vtblk_request_error(struct vtblk_request
*req
)
1057 switch (req
->vbr_ack
) {
1058 case VIRTIO_BLK_S_OK
:
1061 case VIRTIO_BLK_S_UNSUPP
:
1073 vtblk_finish_bio(struct bio
*bio
, int error
)
1080 vtblk_setup_sysctl(struct vtblk_softc
*sc
)
1083 struct sysctl_ctx_list
*ctx
;
1084 struct sysctl_oid
*tree
;
1085 struct sysctl_oid_list
*child
;
1087 dev
= sc
->vtblk_dev
;
1088 ctx
= device_get_sysctl_ctx(dev
);
1089 tree
= device_get_sysctl_tree(dev
);
1090 child
= SYSCTL_CHILDREN(tree
);
1092 SYSCTL_ADD_PROC(ctx
, child
, OID_AUTO
, "writecache_mode",
1093 CTLTYPE_INT
| CTLFLAG_RW
, sc
, 0, vtblk_write_cache_sysctl
,
1094 "I", "Write cache mode (writethrough (0) or writeback (1))");
1098 vtblk_tunable_int(struct vtblk_softc
*sc
, const char *knob
, int def
)
1102 ksnprintf(path
, sizeof(path
),
1103 "hw.vtblk.%d.%s", device_get_unit(sc
->vtblk_dev
), knob
);
1104 TUNABLE_INT_FETCH(path
, &def
);