virtio_blk - No need to negotiate unused (and legacy) VIRTIO_BLK_F_BARRIER.
[dragonfly.git] / sys / dev / virtual / virtio / block / virtio_blk.c
blob1ed7a231dce9d75ae356ac9832f5e6e305351c83
1 /*-
2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice unmodified, this list of conditions, and the following
10 * disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * $FreeBSD: head/sys/dev/virtio/block/virtio_blk.c 252707 2013-07-04 17:57:26Z bryanv $
29 /* Driver for VirtIO block devices. */
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/sglist.h>
38 #include <sys/sysctl.h>
39 #include <sys/queue.h>
40 #include <sys/serialize.h>
41 #include <sys/buf2.h>
42 #include <sys/rman.h>
43 #include <sys/disk.h>
44 #include <sys/devicestat.h>
46 #include <dev/virtual/virtio/virtio/virtio.h>
47 #include <dev/virtual/virtio/virtio/virtqueue.h>
48 #include "virtio_blk.h"
49 #include "virtio_if.h"
51 struct vtblk_request {
52 struct virtio_blk_outhdr vbr_hdr __aligned(16);
53 struct bio *vbr_bio;
54 uint8_t vbr_ack;
56 SLIST_ENTRY(vtblk_request) vbr_link;
59 enum vtblk_cache_mode {
60 VTBLK_CACHE_WRITETHROUGH,
61 VTBLK_CACHE_WRITEBACK,
62 VTBLK_CACHE_MAX
65 struct vtblk_softc {
66 device_t vtblk_dev;
67 struct lwkt_serialize vtblk_slz;
68 uint64_t vtblk_features;
69 uint32_t vtblk_flags;
70 #define VTBLK_FLAG_INDIRECT 0x0001
71 #define VTBLK_FLAG_READONLY 0x0002
72 #define VTBLK_FLAG_DETACH 0x0004
73 #define VTBLK_FLAG_SUSPEND 0x0008
74 #define VTBLK_FLAG_DUMPING 0x0010
75 #define VTBLK_FLAG_WC_CONFIG 0x0020
77 struct virtqueue *vtblk_vq;
78 struct sglist *vtblk_sglist;
79 struct disk vtblk_disk;
80 cdev_t cdev;
81 struct devstat stats;
83 struct bio_queue_head vtblk_bioq;
84 SLIST_HEAD(, vtblk_request)
85 vtblk_req_free;
87 int vtblk_sector_size;
88 int vtblk_max_nsegs;
89 int vtblk_request_count;
90 enum vtblk_cache_mode vtblk_write_cache;
92 struct vtblk_request vtblk_dump_request;
95 static struct virtio_feature_desc vtblk_feature_desc[] = {
96 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
97 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
98 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
99 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
100 { VIRTIO_BLK_F_RO, "ReadOnly" },
101 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
102 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
103 { VIRTIO_BLK_F_WCE, "WriteCache" },
104 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
105 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
107 { 0, NULL }
110 static int vtblk_probe(device_t);
111 static int vtblk_attach(device_t);
112 static int vtblk_detach(device_t);
113 static int vtblk_suspend(device_t);
114 static int vtblk_resume(device_t);
115 static int vtblk_shutdown(device_t);
117 static void vtblk_negotiate_features(struct vtblk_softc *);
118 static int vtblk_maximum_segments(struct vtblk_softc *,
119 struct virtio_blk_config *);
120 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
121 static void vtblk_set_write_cache(struct vtblk_softc *, int);
122 static int vtblk_write_cache_enabled(struct vtblk_softc *sc,
123 struct virtio_blk_config *);
124 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
125 static void vtblk_alloc_disk(struct vtblk_softc *,
126 struct virtio_blk_config *);
128 * Interface to the device switch.
130 static d_open_t vtblk_open;
131 static d_strategy_t vtblk_strategy;
132 static d_dump_t vtblk_dump;
134 static struct dev_ops vbd_disk_ops = {
135 { "vbd", 200, D_DISK | D_MPSAFE },
136 .d_open = vtblk_open,
137 .d_close = nullclose,
138 .d_read = physread,
139 .d_write = physwrite,
140 .d_strategy = vtblk_strategy,
141 .d_dump = vtblk_dump,
144 static void vtblk_startio(struct vtblk_softc *);
145 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
146 static int vtblk_execute_request(struct vtblk_softc *,
147 struct vtblk_request *);
149 static int vtblk_vq_intr(void *);
150 static void vtblk_complete(void *);
152 static void vtblk_stop(struct vtblk_softc *);
154 static void vtblk_prepare_dump(struct vtblk_softc *);
155 static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
156 static int vtblk_flush_dump(struct vtblk_softc *);
157 static int vtblk_poll_request(struct vtblk_softc *,
158 struct vtblk_request *);
160 static void vtblk_drain_vq(struct vtblk_softc *, int);
161 static void vtblk_drain(struct vtblk_softc *);
163 static int vtblk_alloc_requests(struct vtblk_softc *);
164 static void vtblk_free_requests(struct vtblk_softc *);
165 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *);
166 static void vtblk_enqueue_request(struct vtblk_softc *,
167 struct vtblk_request *);
169 static int vtblk_request_error(struct vtblk_request *);
170 static void vtblk_finish_bio(struct bio *, int);
172 static void vtblk_setup_sysctl(struct vtblk_softc *);
173 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int);
175 /* Tunables. */
176 static int vtblk_writecache_mode = -1;
177 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
179 /* Features desired/implemented by this driver. */
180 #define VTBLK_FEATURES \
181 (VIRTIO_BLK_F_SIZE_MAX | \
182 VIRTIO_BLK_F_SEG_MAX | \
183 VIRTIO_BLK_F_GEOMETRY | \
184 VIRTIO_BLK_F_RO | \
185 VIRTIO_BLK_F_BLK_SIZE | \
186 VIRTIO_BLK_F_WCE | \
187 VIRTIO_BLK_F_CONFIG_WCE | \
188 VIRTIO_RING_F_INDIRECT_DESC)
191 * Each block request uses at least two segments - one for the header
192 * and one for the status.
194 #define VTBLK_MIN_SEGMENTS 2
196 static device_method_t vtblk_methods[] = {
197 /* Device methods. */
198 DEVMETHOD(device_probe, vtblk_probe),
199 DEVMETHOD(device_attach, vtblk_attach),
200 DEVMETHOD(device_detach, vtblk_detach),
201 DEVMETHOD(device_suspend, vtblk_suspend),
202 DEVMETHOD(device_resume, vtblk_resume),
203 DEVMETHOD(device_shutdown, vtblk_shutdown),
205 DEVMETHOD_END
208 static driver_t vtblk_driver = {
209 "vtblk",
210 vtblk_methods,
211 sizeof(struct vtblk_softc)
213 static devclass_t vtblk_devclass;
215 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, NULL, NULL);
216 MODULE_VERSION(virtio_blk, 1);
217 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
219 static int
220 vtblk_probe(device_t dev)
223 if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
224 return (ENXIO);
226 device_set_desc(dev, "VirtIO Block Adapter");
228 return (BUS_PROBE_DEFAULT);
231 static int
232 vtblk_attach(device_t dev)
234 struct vtblk_softc *sc;
235 struct virtio_blk_config blkcfg;
236 int error;
238 sc = device_get_softc(dev);
239 sc->vtblk_dev = dev;
241 lwkt_serialize_init(&sc->vtblk_slz);
243 bioq_init(&sc->vtblk_bioq);
244 SLIST_INIT(&sc->vtblk_req_free);
246 virtio_set_feature_desc(dev, vtblk_feature_desc);
247 vtblk_negotiate_features(sc);
249 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
250 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
251 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
252 sc->vtblk_flags |= VTBLK_FLAG_READONLY;
253 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
254 sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
256 vtblk_setup_sysctl(sc);
258 /* Get local copy of config. */
259 virtio_read_device_config(dev, 0, &blkcfg,
260 sizeof(struct virtio_blk_config));
263 * With the current sglist(9) implementation, it is not easy
264 * for us to support a maximum segment size as adjacent
265 * segments are coalesced. For now, just make sure it's larger
266 * than the maximum supported transfer size.
268 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
269 if (blkcfg.size_max < MAXPHYS) {
270 error = ENOTSUP;
271 device_printf(dev, "host requires unsupported "
272 "maximum segment size feature\n");
273 goto fail;
277 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
278 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
279 error = EINVAL;
280 device_printf(dev, "fewer than minimum number of segments "
281 "allowed: %d\n", sc->vtblk_max_nsegs);
282 goto fail;
286 * Allocate working sglist. The number of segments may be too
287 * large to safely store on the stack.
289 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_INTWAIT);
290 if (sc->vtblk_sglist == NULL) {
291 error = ENOMEM;
292 device_printf(dev, "cannot allocate sglist\n");
293 goto fail;
296 error = vtblk_alloc_virtqueue(sc);
297 if (error) {
298 device_printf(dev, "cannot allocate virtqueue\n");
299 goto fail;
302 error = vtblk_alloc_requests(sc);
303 if (error) {
304 device_printf(dev, "cannot preallocate requests\n");
305 goto fail;
308 error = virtio_setup_intr(dev, &sc->vtblk_slz);
309 if (error) {
310 device_printf(dev, "cannot setup virtqueue interrupt\n");
311 goto fail;
314 virtqueue_enable_intr(sc->vtblk_vq);
316 vtblk_alloc_disk(sc, &blkcfg);
318 fail:
319 if (error)
320 vtblk_detach(dev);
322 return (error);
325 static int
326 vtblk_detach(device_t dev)
328 struct vtblk_softc *sc;
330 sc = device_get_softc(dev);
332 lwkt_serialize_enter(&sc->vtblk_slz);
333 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
334 if (device_is_attached(dev))
335 vtblk_stop(sc);
336 lwkt_serialize_exit(&sc->vtblk_slz);
338 vtblk_drain(sc);
340 if (sc->cdev != NULL) {
341 disk_destroy(&sc->vtblk_disk);
342 sc->cdev = NULL;
345 if (sc->vtblk_sglist != NULL) {
346 sglist_free(sc->vtblk_sglist);
347 sc->vtblk_sglist = NULL;
350 return (0);
353 static int
354 vtblk_suspend(device_t dev)
356 struct vtblk_softc *sc;
358 sc = device_get_softc(dev);
360 lwkt_serialize_enter(&sc->vtblk_slz);
361 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
362 /* XXX BMV: virtio_stop(), etc needed here? */
363 lwkt_serialize_exit(&sc->vtblk_slz);
365 return (0);
368 static int
369 vtblk_resume(device_t dev)
371 struct vtblk_softc *sc;
373 sc = device_get_softc(dev);
375 lwkt_serialize_enter(&sc->vtblk_slz);
376 /* XXX BMV: virtio_reinit(), etc needed here? */
377 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
378 #if 0 /* XXX Resume IO? */
379 vtblk_startio(sc);
380 #endif
381 lwkt_serialize_exit(&sc->vtblk_slz);
383 return (0);
386 static int
387 vtblk_shutdown(device_t dev)
390 return (0);
393 static int
394 vtblk_open(struct dev_open_args *ap)
396 struct vtblk_softc *sc;
397 cdev_t dev = ap->a_head.a_dev;
398 sc = dev->si_drv1;
399 if (sc == NULL)
400 return (ENXIO);
402 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
405 static int
406 vtblk_dump(struct dev_dump_args *ap)
408 struct vtblk_softc *sc;
409 cdev_t dev = ap->a_head.a_dev;
410 uint64_t buf_start, buf_len;
411 int error;
413 sc = dev->si_drv1;
414 if (sc == NULL)
415 return (ENXIO);
417 buf_start = ap->a_offset;
418 buf_len = ap->a_length;
420 // lwkt_serialize_enter(&sc->vtblk_slz);
422 if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
423 vtblk_prepare_dump(sc);
424 sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
427 if (buf_len > 0)
428 error = vtblk_write_dump(sc, ap->a_virtual, buf_start,
429 buf_len);
430 else if (buf_len == 0)
431 error = vtblk_flush_dump(sc);
432 else {
433 error = EINVAL;
434 sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
437 // lwkt_serialize_exit(&sc->vtblk_slz);
439 return (error);
442 static int
443 vtblk_strategy(struct dev_strategy_args *ap)
445 struct vtblk_softc *sc;
446 cdev_t dev = ap->a_head.a_dev;
447 sc = dev->si_drv1;
448 struct bio *bio = ap->a_bio;
449 struct buf *bp = bio->bio_buf;
451 if (sc == NULL) {
452 vtblk_finish_bio(bio, EINVAL);
453 return EINVAL;
457 * Fail any write if RO. Unfortunately, there does not seem to
458 * be a better way to report our readonly'ness to GEOM above.
460 * XXX: Is that true in DFly?
462 if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
463 (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_FLUSH)) {
464 vtblk_finish_bio(bio, EROFS);
465 return (EINVAL);
468 lwkt_serialize_enter(&sc->vtblk_slz);
469 if ((sc->vtblk_flags & VTBLK_FLAG_DETACH) == 0) {
470 bioqdisksort(&sc->vtblk_bioq, bio);
471 vtblk_startio(sc);
472 lwkt_serialize_exit(&sc->vtblk_slz);
473 } else {
474 lwkt_serialize_exit(&sc->vtblk_slz);
475 vtblk_finish_bio(bio, ENXIO);
477 return 0;
480 static void
481 vtblk_negotiate_features(struct vtblk_softc *sc)
483 device_t dev;
484 uint64_t features;
486 dev = sc->vtblk_dev;
487 features = VTBLK_FEATURES;
489 sc->vtblk_features = virtio_negotiate_features(dev, features);
493 * Calculate the maximum number of DMA segment supported. Note
494 * that the in/out header is encoded in the segment list. We
495 * assume that VTBLK_MIN_SEGMENTS covers that part of it so
496 * we add it into the desired total. If the SEG_MAX feature
497 * is not specified we have to just assume that the host can
498 * handle the maximum number of segments required for a MAXPHYS
499 * sized request.
501 * The additional + 1 is in case a MAXPHYS-sized buffer crosses
502 * a page boundary.
504 static int
505 vtblk_maximum_segments(struct vtblk_softc *sc,
506 struct virtio_blk_config *blkcfg)
508 device_t dev;
509 int nsegs;
511 dev = sc->vtblk_dev;
512 nsegs = VTBLK_MIN_SEGMENTS;
514 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
515 nsegs = MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1 + nsegs);
516 } else {
517 nsegs = MAXPHYS / PAGE_SIZE + 1 + nsegs;
519 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
520 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
522 return (nsegs);
525 static int
526 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
528 device_t dev;
529 struct vq_alloc_info vq_info;
531 dev = sc->vtblk_dev;
533 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
534 vtblk_vq_intr, sc, &sc->vtblk_vq,
535 "%s request", device_get_nameunit(dev));
537 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
540 static void
541 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
544 /* Set either writeback (1) or writethrough (0) mode. */
545 virtio_write_dev_config_1(sc->vtblk_dev,
546 offsetof(struct virtio_blk_config, writeback), wc);
549 static int
550 vtblk_write_cache_enabled(struct vtblk_softc *sc,
551 struct virtio_blk_config *blkcfg)
553 int wc;
555 if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
556 wc = vtblk_tunable_int(sc, "writecache_mode",
557 vtblk_writecache_mode);
558 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
559 vtblk_set_write_cache(sc, wc);
560 else
561 wc = blkcfg->writeback;
562 } else
563 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
565 return (wc);
568 static int
569 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
571 struct vtblk_softc *sc;
572 int wc, error;
574 sc = oidp->oid_arg1;
575 wc = sc->vtblk_write_cache;
577 error = sysctl_handle_int(oidp, &wc, 0, req);
578 if (error || req->newptr == NULL)
579 return (error);
580 if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
581 return (EPERM);
582 if (wc < 0 || wc >= VTBLK_CACHE_MAX)
583 return (EINVAL);
585 lwkt_serialize_enter(&sc->vtblk_slz);
586 sc->vtblk_write_cache = wc;
587 vtblk_set_write_cache(sc, sc->vtblk_write_cache);
588 lwkt_serialize_exit(&sc->vtblk_slz);
590 return (0);
593 static void
594 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
597 struct disk_info info;
599 /* construct the disk_info */
600 bzero(&info, sizeof(info));
602 if (virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_BLK_SIZE))
603 sc->vtblk_sector_size = blkcfg->blk_size;
604 else
605 sc->vtblk_sector_size = DEV_BSIZE;
607 info.d_media_blksize = sc->vtblk_sector_size;
608 info.d_media_blocks = blkcfg->capacity;
610 info.d_ncylinders = blkcfg->geometry.cylinders;
611 info.d_nheads = blkcfg->geometry.heads;
612 info.d_secpertrack = blkcfg->geometry.sectors;
614 info.d_secpercyl = info.d_secpertrack * info.d_nheads;
616 if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
617 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
618 else
619 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
621 devstat_add_entry(&sc->stats, "vbd", device_get_unit(sc->vtblk_dev),
622 DEV_BSIZE, DEVSTAT_ALL_SUPPORTED,
623 DEVSTAT_TYPE_DIRECT | DEVSTAT_TYPE_IF_OTHER,
624 DEVSTAT_PRIORITY_DISK);
626 /* attach a generic disk device to ourselves */
627 sc->cdev = disk_create(device_get_unit(sc->vtblk_dev), &sc->vtblk_disk,
628 &vbd_disk_ops);
630 sc->cdev->si_drv1 = sc;
631 sc->cdev->si_iosize_max = MAXPHYS;
632 disk_setdiskinfo(&sc->vtblk_disk, &info);
635 static void
636 vtblk_startio(struct vtblk_softc *sc)
638 struct virtqueue *vq;
639 struct vtblk_request *req;
640 int enq;
642 vq = sc->vtblk_vq;
643 enq = 0;
645 ASSERT_SERIALIZED(&sc->vtblk_slz);
647 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
648 return;
650 while (!virtqueue_full(vq)) {
651 req = vtblk_bio_request(sc);
652 if (req == NULL)
653 break;
655 if (vtblk_execute_request(sc, req) != 0) {
656 bioqdisksort(&sc->vtblk_bioq, req->vbr_bio);
657 vtblk_enqueue_request(sc, req);
658 break;
660 devstat_start_transaction(&sc->stats);
662 enq++;
665 if (enq > 0)
666 virtqueue_notify(vq, &sc->vtblk_slz);
669 static struct vtblk_request *
670 vtblk_bio_request(struct vtblk_softc *sc)
672 struct bio_queue_head *bioq;
673 struct vtblk_request *req;
674 struct bio *bio;
675 struct buf *bp;
677 bioq = &sc->vtblk_bioq;
679 if (bioq_first(bioq) == NULL)
680 return (NULL);
682 req = vtblk_dequeue_request(sc);
683 if (req == NULL)
684 return (NULL);
686 bio = bioq_takefirst(bioq);
687 req->vbr_bio = bio;
688 req->vbr_ack = -1;
689 req->vbr_hdr.ioprio = 1;
690 bp = bio->bio_buf;
692 switch (bp->b_cmd) {
693 case BUF_CMD_FLUSH:
694 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
695 break;
696 case BUF_CMD_READ:
697 req->vbr_hdr.type = VIRTIO_BLK_T_IN;
698 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
699 break;
700 case BUF_CMD_WRITE:
701 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
702 req->vbr_hdr.sector = bio->bio_offset / DEV_BSIZE;
703 break;
704 default:
705 KASSERT(0, ("bio with unhandled cmd: %d", bp->b_cmd));
706 req->vbr_hdr.type = -1;
707 break;
710 return (req);
713 static int
714 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
716 struct sglist *sg;
717 struct bio *bio;
718 struct buf *bp;
719 int writable, error;
721 sg = sc->vtblk_sglist;
722 bio = req->vbr_bio;
723 bp = bio->bio_buf;
724 writable = 0;
727 * sglist is live throughout this subroutine.
729 sglist_reset(sg);
731 error = sglist_append(sg, &req->vbr_hdr,
732 sizeof(struct virtio_blk_outhdr));
733 KASSERT(error == 0, ("error adding header to sglist"));
734 KASSERT(sg->sg_nseg == 1,
735 ("header spanned multiple segments: %d", sg->sg_nseg));
737 if (bp->b_cmd == BUF_CMD_READ || bp->b_cmd == BUF_CMD_WRITE) {
738 error = sglist_append(sg, bp->b_data, bp->b_bcount);
739 KASSERT(error == 0, ("error adding buffer to sglist"));
741 /* BUF_CMD_READ means the host writes into our buffer. */
742 if (bp->b_cmd == BUF_CMD_READ)
743 writable += sg->sg_nseg - 1;
746 error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
747 KASSERT(error == 0, ("error adding ack to sglist"));
748 writable++;
750 KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
751 ("fewer than min segments: %d", sg->sg_nseg));
753 error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
754 sg->sg_nseg - writable, writable);
756 sglist_reset(sg);
758 return (error);
761 static int
762 vtblk_vq_intr(void *xsc)
764 vtblk_complete(xsc);
766 return (1);
769 static void
770 vtblk_complete(void *arg)
772 struct vtblk_softc *sc;
773 struct vtblk_request *req;
774 struct virtqueue *vq;
775 struct bio *bio;
776 struct buf *bp;
778 sc = arg;
779 vq = sc->vtblk_vq;
781 lwkt_serialize_handler_disable(&sc->vtblk_slz);
782 virtqueue_disable_intr(sc->vtblk_vq);
783 ASSERT_SERIALIZED(&sc->vtblk_slz);
785 retry:
786 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
787 return;
789 while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
790 bio = req->vbr_bio;
791 bp = bio->bio_buf;
793 if (req->vbr_ack == VIRTIO_BLK_S_OK)
794 bp->b_resid = 0;
795 else {
796 bp->b_flags |= B_ERROR;
797 if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP) {
798 bp->b_error = ENOTSUP;
799 } else {
800 bp->b_error = EIO;
804 devstat_end_transaction_buf(&sc->stats, bio->bio_buf);
806 lwkt_serialize_exit(&sc->vtblk_slz);
808 * Unlocking the controller around biodone() does not allow
809 * processing further device interrupts; when we queued
810 * vtblk_complete, we disabled interrupts. It will allow
811 * concurrent vtblk_strategy/_startio command dispatches.
813 biodone(bio);
814 lwkt_serialize_enter(&sc->vtblk_slz);
816 vtblk_enqueue_request(sc, req);
819 vtblk_startio(sc);
821 if (virtqueue_enable_intr(vq) != 0) {
823 * If new virtqueue entries appeared immediately after
824 * enabling interrupts, process them now. Release and
825 * retake softcontroller lock to try to avoid blocking
826 * I/O dispatch for too long.
828 virtqueue_disable_intr(vq);
829 goto retry;
831 lwkt_serialize_handler_enable(&sc->vtblk_slz);
834 static void
835 vtblk_stop(struct vtblk_softc *sc)
838 virtqueue_disable_intr(sc->vtblk_vq);
839 virtio_stop(sc->vtblk_dev);
842 static void
843 vtblk_prepare_dump(struct vtblk_softc *sc)
845 device_t dev;
846 struct virtqueue *vq;
848 dev = sc->vtblk_dev;
849 vq = sc->vtblk_vq;
851 vtblk_stop(sc);
854 * Drain all requests caught in-flight in the virtqueue,
855 * skipping biodone(). When dumping, only one request is
856 * outstanding at a time, and we just poll the virtqueue
857 * for the response.
859 vtblk_drain_vq(sc, 1);
861 if (virtio_reinit(dev, sc->vtblk_features) != 0) {
862 panic("%s: cannot reinit VirtIO block device during dump",
863 device_get_nameunit(dev));
866 virtqueue_disable_intr(vq);
867 virtio_reinit_complete(dev);
870 static int
871 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
872 size_t length)
874 struct bio bio;
875 struct buf bp;
876 struct vtblk_request *req;
878 req = &sc->vtblk_dump_request;
879 req->vbr_ack = -1;
880 req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
881 req->vbr_hdr.ioprio = 1;
882 req->vbr_hdr.sector = offset / 512;
884 req->vbr_bio = &bio;
885 bzero(&bio, sizeof(struct bio));
886 bzero(&buf, sizeof(struct buf));
888 bio.bio_buf = &bp;
889 bp.b_cmd = BUF_CMD_WRITE;
890 bp.b_data = virtual;
891 bp.b_bcount = length;
893 return (vtblk_poll_request(sc, req));
896 static int
897 vtblk_flush_dump(struct vtblk_softc *sc)
899 struct bio bio;
900 struct buf bp;
901 struct vtblk_request *req;
903 req = &sc->vtblk_dump_request;
904 req->vbr_ack = -1;
905 req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
906 req->vbr_hdr.ioprio = 1;
907 req->vbr_hdr.sector = 0;
909 req->vbr_bio = &bio;
910 bzero(&bio, sizeof(struct bio));
911 bzero(&bp, sizeof(struct buf));
913 bio.bio_buf = &bp;
914 bp.b_cmd = BUF_CMD_FLUSH;
916 return (vtblk_poll_request(sc, req));
919 static int
920 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
922 struct virtqueue *vq;
923 int error;
925 vq = sc->vtblk_vq;
927 if (!virtqueue_empty(vq))
928 return (EBUSY);
930 error = vtblk_execute_request(sc, req);
931 if (error)
932 return (error);
934 virtqueue_notify(vq, NULL);
935 virtqueue_poll(vq, NULL);
937 error = vtblk_request_error(req);
938 if (error && bootverbose) {
939 device_printf(sc->vtblk_dev,
940 "%s: IO error: %d\n", __func__, error);
943 return (error);
946 static void
947 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
949 struct virtqueue *vq;
950 struct vtblk_request *req;
951 int last;
953 vq = sc->vtblk_vq;
954 last = 0;
956 while ((req = virtqueue_drain(vq, &last)) != NULL) {
957 if (!skip_done)
958 vtblk_finish_bio(req->vbr_bio, ENXIO);
960 vtblk_enqueue_request(sc, req);
963 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
966 static void
967 vtblk_drain(struct vtblk_softc *sc)
969 struct bio_queue_head *bioq;
970 struct bio *bio;
972 bioq = &sc->vtblk_bioq;
974 if (sc->vtblk_vq != NULL)
975 vtblk_drain_vq(sc, 0);
977 while (bioq_first(bioq) != NULL) {
978 bio = bioq_takefirst(bioq);
979 vtblk_finish_bio(bio, ENXIO);
982 vtblk_free_requests(sc);
985 static int
986 vtblk_alloc_requests(struct vtblk_softc *sc)
988 struct vtblk_request *req;
989 int i, nreqs;
991 nreqs = virtqueue_size(sc->vtblk_vq);
994 * Preallocate sufficient requests to keep the virtqueue full. Each
995 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
996 * the number allocated when indirect descriptors are not available.
998 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
999 nreqs /= VTBLK_MIN_SEGMENTS;
1001 for (i = 0; i < nreqs; i++) {
1002 req = contigmalloc(sizeof(struct vtblk_request), M_DEVBUF,
1003 M_WAITOK, 0, BUS_SPACE_MAXADDR, 16, 0);
1004 if (req == NULL)
1005 return (ENOMEM);
1007 KKASSERT(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr))
1008 == 1);
1009 KKASSERT(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack))
1010 == 1);
1012 sc->vtblk_request_count++;
1013 vtblk_enqueue_request(sc, req);
1016 return (0);
1019 static void
1020 vtblk_free_requests(struct vtblk_softc *sc)
1022 struct vtblk_request *req;
1024 while ((req = vtblk_dequeue_request(sc)) != NULL) {
1025 sc->vtblk_request_count--;
1026 contigfree(req, sizeof(struct vtblk_request), M_DEVBUF);
1029 KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
1032 static struct vtblk_request *
1033 vtblk_dequeue_request(struct vtblk_softc *sc)
1035 struct vtblk_request *req;
1037 req = SLIST_FIRST(&sc->vtblk_req_free);
1038 if (req != NULL)
1039 SLIST_REMOVE_HEAD(&sc->vtblk_req_free, vbr_link);
1041 return (req);
1044 static void
1045 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
1048 bzero(req, sizeof(struct vtblk_request));
1049 SLIST_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1052 static int
1053 vtblk_request_error(struct vtblk_request *req)
1055 int error;
1057 switch (req->vbr_ack) {
1058 case VIRTIO_BLK_S_OK:
1059 error = 0;
1060 break;
1061 case VIRTIO_BLK_S_UNSUPP:
1062 error = ENOTSUP;
1063 break;
1064 default:
1065 error = EIO;
1066 break;
1069 return (error);
1072 static void
1073 vtblk_finish_bio(struct bio *bio, int error)
1076 biodone(bio);
1079 static void
1080 vtblk_setup_sysctl(struct vtblk_softc *sc)
1082 device_t dev;
1083 struct sysctl_ctx_list *ctx;
1084 struct sysctl_oid *tree;
1085 struct sysctl_oid_list *child;
1087 dev = sc->vtblk_dev;
1088 ctx = device_get_sysctl_ctx(dev);
1089 tree = device_get_sysctl_tree(dev);
1090 child = SYSCTL_CHILDREN(tree);
1092 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1093 CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1094 "I", "Write cache mode (writethrough (0) or writeback (1))");
1097 static int
1098 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1100 char path[64];
1102 ksnprintf(path, sizeof(path),
1103 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1104 TUNABLE_INT_FETCH(path, &def);
1106 return (def);