From 1b8ca4cb1e4b29c7870b6fd2908e420c851870de Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 25 Sep 2008 01:44:57 +0000 Subject: [PATCH] MFC numerous features from HEAD. * Bounce buffer fixes for physio. * Disk flush support in scsi and nata subsystems. * Dead bio handling --- sys/dev/disk/nata/ata-disk.c | 4 +- sys/dev/disk/nata/ata-raid.c | 62 +++++++++++++++++++++++++++++- sys/dev/disk/nata/atapi-fd.c | 24 +++++++----- sys/dev/disk/nata/atapi-tape.c | 18 +++++---- sys/dev/raid/amr/amr.c | 18 +++++++-- sys/kern/kern_physio.c | 87 +++++++++++++++++++++++++++++------------- sys/kern/subr_diskslice.c | 4 +- sys/kern/vfs_bio.c | 15 +------- sys/sys/buf.h | 11 +++++- 9 files changed, 176 insertions(+), 67 deletions(-) diff --git a/sys/dev/disk/nata/ata-disk.c b/sys/dev/disk/nata/ata-disk.c index 3d2d7482af..f42e7f57ad 100644 --- a/sys/dev/disk/nata/ata-disk.c +++ b/sys/dev/disk/nata/ata-disk.c @@ -24,7 +24,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/ata/ata-disk.c,v 1.199 2006/09/14 19:12:29 sos Exp $ - * $DragonFly: src/sys/dev/disk/nata/ata-disk.c,v 1.8 2008/06/27 01:24:46 dillon Exp $ + * $DragonFly: src/sys/dev/disk/nata/ata-disk.c,v 1.8.2.1 2008/09/25 01:44:55 dillon Exp $ */ #include "opt_ata.h" @@ -330,7 +330,6 @@ ad_strategy(struct dev_strategy_args *ap) else request->u.ata.command = ATA_WRITE; break; -#if 0 /* NOT YET */ case BUF_CMD_FLUSH: request->u.ata.lba = 0; request->u.ata.count = 0; @@ -340,7 +339,6 @@ ad_strategy(struct dev_strategy_args *ap) request->flags = ATA_R_CONTROL; request->u.ata.command = ATA_FLUSHCACHE; break; -#endif default: device_printf(dev, "FAILURE - unknown BUF operation\n"); ata_free_request(request); diff --git a/sys/dev/disk/nata/ata-raid.c b/sys/dev/disk/nata/ata-raid.c index 603e7a8048..87e177e8ca 100644 --- a/sys/dev/disk/nata/ata-raid.c +++ b/sys/dev/disk/nata/ata-raid.c @@ -24,7 +24,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/ata/ata-raid.c,v 1.120 2006/04/15 10:27:41 maxim Exp $ - * $DragonFly: src/sys/dev/disk/nata/ata-raid.c,v 1.10 2008/06/27 01:24:46 dillon Exp $ + * $DragonFly: src/sys/dev/disk/nata/ata-raid.c,v 1.10.2.1 2008/09/25 01:44:55 dillon Exp $ */ #include "opt_ata.h" @@ -253,6 +253,38 @@ ata_raid_ioctl(u_long cmd, caddr_t data) return error; } +static int +ata_raid_flush(struct ar_softc *rdp, struct bio *bp) +{ + struct ata_request *request; + device_t dev; + int disk, error; + + error = 0; + bp->bio_driver_info = (void *)0; + + for (disk = 0; disk < rdp->total_disks; disk++) { + if ((dev = rdp->disks[disk].dev) != NULL) + bp->bio_driver_info = (void *)((intptr_t)bp->bio_driver_info + 1); + } + for (disk = 0; disk < rdp->total_disks; disk++) { + if ((dev = rdp->disks[disk].dev) == NULL) + continue; + if (!(request = ata_raid_init_request(rdp, bp))) + return ENOMEM; + request->dev = dev; + request->u.ata.command = ATA_FLUSHCACHE; + request->u.ata.lba = 0; + request->u.ata.count = 0; + request->u.ata.feature = 0; + request->timeout = 1; + request->retries = 0; + request->flags |= ATA_R_ORDERED | ATA_R_DIRECT; + ata_queue_request(request); + } + return 0; +} + /* * XXX TGEN there are a lot of offset -> block number conversions going on * here, which is suboptimal. @@ -268,6 +300,18 @@ ata_raid_strategy(struct dev_strategy_args *ap) u_int64_t blkno, lba, blk = 0; int count, chunk, drv, par = 0, change = 0; + if (bbp->b_cmd == BUF_CMD_FLUSH) { + int error; + + error = ata_raid_flush(rdp, bp); + if (error != 0) { + bbp->b_flags |= B_ERROR; + bbp->b_error = error; + biodone(bp); + } + return(0); + } + if (!(rdp->status & AR_S_READY) || (bbp->b_cmd != BUF_CMD_READ && bbp->b_cmd != BUF_CMD_WRITE)) { bbp->b_flags |= B_ERROR; @@ -602,6 +646,19 @@ ata_raid_done(struct ata_request *request) struct buf *bbp = bp->bio_buf; int i, mirror, finished = 0; + if (bbp->b_cmd == BUF_CMD_FLUSH) { + if (bbp->b_error == 0) + bbp->b_error = request->result; + ata_free_request(request); + bp->bio_driver_info = (void *)((intptr_t)bp->bio_driver_info - 1); + if ((intptr_t)bp->bio_driver_info == 0) { + if (bbp->b_error) + bbp->b_flags |= B_ERROR; + biodone(bp); + } + return; + } + switch (rdp->type) { case AR_T_JBOD: case AR_T_SPAN: @@ -3966,6 +4023,9 @@ ata_raid_init_request(struct ar_softc *rdp, struct bio *bio) case BUF_CMD_WRITE: request->flags = ATA_R_WRITE; break; + case BUF_CMD_FLUSH: + request->flags = ATA_R_CONTROL; + break; default: kprintf("ar%d: FAILURE - unknown BUF operation\n", rdp->lun); ata_free_request(request); diff --git a/sys/dev/disk/nata/atapi-fd.c b/sys/dev/disk/nata/atapi-fd.c index 254c290366..b85bef1925 100644 --- a/sys/dev/disk/nata/atapi-fd.c +++ b/sys/dev/disk/nata/atapi-fd.c @@ -24,7 +24,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/ata/atapi-fd.c,v 1.109 2006/03/30 05:29:57 marcel Exp $ - * $DragonFly: src/sys/dev/disk/nata/atapi-fd.c,v 1.4 2007/06/03 04:48:29 dillon Exp $ + * $DragonFly: src/sys/dev/disk/nata/atapi-fd.c,v 1.4.6.1 2008/09/25 01:44:55 dillon Exp $ */ #include @@ -257,10 +257,20 @@ afd_strategy(struct dev_strategy_args *ap) bzero(ccb, sizeof(ccb)); - if (bbp->b_cmd == BUF_CMD_READ) + switch(bbp->b_cmd) { + case BUF_CMD_READ: ccb[0] = ATAPI_READ_BIG; - else + break; + case BUF_CMD_WRITE: ccb[0] = ATAPI_WRITE_BIG; + break; + default: + device_printf(dev, "unknown BUF operation\n"); + bbp->b_flags |= B_ERROR; + bbp->b_error = EIO; + biodone(bp); + return 0; + } ccb[2] = lba >> 24; ccb[3] = lba >> 16; @@ -286,6 +296,7 @@ afd_strategy(struct dev_strategy_args *ap) request->timeout = (ccb[0] == ATAPI_WRITE_BIG) ? 60 : 30; request->retries = 2; request->callback = afd_done; + switch (bbp->b_cmd) { case BUF_CMD_READ: request->flags = (ATA_R_ATAPI | ATA_R_READ); @@ -294,12 +305,7 @@ afd_strategy(struct dev_strategy_args *ap) request->flags = (ATA_R_ATAPI | ATA_R_WRITE); break; default: - device_printf(dev, "unknown BUF operation\n"); - ata_free_request(request); - bbp->b_flags |= B_ERROR; - bbp->b_error = EIO; - biodone(bp); - return 0; + panic("bbp->b_cmd"); } if (atadev->mode >= ATA_DMA) request->flags |= ATA_R_DMA; diff --git a/sys/dev/disk/nata/atapi-tape.c b/sys/dev/disk/nata/atapi-tape.c index 43fc81a434..d3be74d28d 100644 --- a/sys/dev/disk/nata/atapi-tape.c +++ b/sys/dev/disk/nata/atapi-tape.c @@ -24,7 +24,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/dev/ata/atapi-tape.c,v 1.101 2006/01/05 21:27:19 sos Exp $ - * $DragonFly: src/sys/dev/disk/nata/atapi-tape.c,v 1.3 2007/06/03 04:48:29 dillon Exp $ + * $DragonFly: src/sys/dev/disk/nata/atapi-tape.c,v 1.3.6.1 2008/09/25 01:44:55 dillon Exp $ */ #include "opt_ata.h" @@ -398,12 +398,18 @@ ast_strategy(struct dev_strategy_args *ap) biodone(bp); return 0; } - if (!(bbp->b_cmd == BUF_CMD_READ) && stp->flags & F_WRITEPROTECT) { + if (!(bbp->b_cmd == BUF_CMD_READ) && (stp->flags & F_WRITEPROTECT)) { bbp->b_flags |= B_ERROR; bbp->b_error = EPERM; biodone(bp); return 0; } + if (bbp->b_cmd != BUF_CMD_READ && bbp->b_cmd != BUF_CMD_WRITE) { + bbp->b_flags |= B_ERROR; + bbp->b_error = EIO; + biodone(bp); + return 0; + } /* check for != blocksize requests */ if (bbp->b_bcount % stp->blksize) { @@ -454,6 +460,7 @@ ast_strategy(struct dev_strategy_args *ap) request->timeout = (ccb[0] == ATAPI_WRITE_BIG) ? 180 : 120; request->retries = 2; request->callback = ast_done; + switch (bbp->b_cmd) { case BUF_CMD_READ: request->flags |= (ATA_R_ATAPI | ATA_R_READ); @@ -462,12 +469,7 @@ ast_strategy(struct dev_strategy_args *ap) request->flags |= (ATA_R_ATAPI | ATA_R_WRITE); break; default: - device_printf(dev, "unknown BUF operation\n"); - ata_free_request(request); - bbp->b_flags |= B_ERROR; - bbp->b_error = EIO; - biodone(bp); - return 0; + panic("bbp->b_cmd"); } devstat_start_transaction(&stp->stats); ata_queue_request(request); diff --git a/sys/dev/raid/amr/amr.c b/sys/dev/raid/amr/amr.c index d675af4220..c5e4794cc4 100644 --- a/sys/dev/raid/amr/amr.c +++ b/sys/dev/raid/amr/amr.c @@ -53,7 +53,7 @@ * SUCH DAMAGE. * * $FreeBSD: src/sys/dev/amr/amr.c,v 1.7.2.13 2003/01/15 13:41:18 emoore Exp $ - * $DragonFly: src/sys/dev/raid/amr/amr.c,v 1.25.4.2 2008/09/16 12:51:21 swildner Exp $ + * $DragonFly: src/sys/dev/raid/amr/amr.c,v 1.25.4.3 2008/09/25 01:44:57 dillon Exp $ */ /* @@ -867,16 +867,28 @@ amr_bio_command(struct amr_softc *sc, struct amr_command **acp) ac->ac_bio = bio; ac->ac_data = bio->bio_buf->b_data; ac->ac_length = bio->bio_buf->b_bcount; - if (bio->bio_buf->b_cmd == BUF_CMD_READ) { + + switch (bio->bio_buf->b_cmd) { + case BUF_CMD_READ: ac->ac_flags |= AMR_CMD_DATAIN; cmd = AMR_CMD_LREAD; - } else { + break; + case BUF_CMD_WRITE: ac->ac_flags |= AMR_CMD_DATAOUT; cmd = AMR_CMD_LWRITE; + break; + case BUF_CMD_FLUSH: + ac->ac_flags |= AMR_CMD_PRIORITY | AMR_CMD_DATAOUT; + cmd = AMR_CMD_FLUSH; + break; + default: + cmd = 0; + break; } amrd = (struct amrd_softc *)bio->bio_driver_info; driveno = amrd->amrd_drive - sc->amr_drive; blkcount = (bio->bio_buf->b_bcount + AMR_BLKSIZE - 1) / AMR_BLKSIZE; + lba = bio->bio_offset / AMR_BLKSIZE; KKASSERT(lba < 0x100000000ULL); diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c index d8aed754e8..cdea0d9b4e 100644 --- a/sys/kern/kern_physio.c +++ b/sys/kern/kern_physio.c @@ -17,7 +17,7 @@ * are met. * * $FreeBSD: src/sys/kern/kern_physio.c,v 1.46.2.4 2003/11/14 09:51:47 simokawa Exp $ - * $DragonFly: src/sys/kern/kern_physio.c,v 1.25 2007/08/21 17:26:45 dillon Exp $ + * $DragonFly: src/sys/kern/kern_physio.c,v 1.25.4.1 2008/09/25 01:44:52 dillon Exp $ */ #include @@ -44,10 +44,11 @@ physio(cdev_t dev, struct uio *uio, int ioflag) { int i; int error; - int chk_blockno; int saflags; int iolen; int bcount; + int bounceit; + caddr_t ubase; struct buf *bp; bp = getpbuf(NULL); @@ -55,7 +56,7 @@ physio(cdev_t dev, struct uio *uio, int ioflag) error = 0; /* XXX: sanity check */ - if(dev->si_iosize_max < PAGE_SIZE) { + if (dev->si_iosize_max < PAGE_SIZE) { kprintf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; @@ -64,12 +65,6 @@ physio(cdev_t dev, struct uio *uio, int ioflag) /* Must be a real uio */ KKASSERT(uio->uio_segflg != UIO_NOCOPY); - /* Don't check block number overflow for D_MEM */ - if ((dev_dflags(dev) & D_TYPEMASK) == D_MEM) - chk_blockno = 0; - else - chk_blockno = 1; - for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { if (uio->uio_rw == UIO_READ) @@ -83,25 +78,53 @@ physio(cdev_t dev, struct uio *uio, int ioflag) bp->b_bio1.bio_offset = uio->uio_offset; bp->b_bio1.bio_done = physwakeup; - /* Don't exceed drivers iosize limit */ + /* + * Setup for mapping the request into kernel memory. + * + * We can only write as much as fits in a pbuf, + * which is MAXPHYS, and no larger then the device's + * ability. + * + * If not using bounce pages the base address of the + * user mapping into the pbuf may be offset, further + * reducing how much will actually fit in the pbuf. + */ if (bcount > dev->si_iosize_max) bcount = dev->si_iosize_max; - /* - * Make sure the pbuf can map the request - * XXX: The pbuf has kvasize = MAXPHYS so a request - * XXX: larger than MAXPHYS - PAGE_SIZE must be - * XXX: page aligned or it will be fragmented. - */ - iolen = ((vm_offset_t) uio->uio_iov[i].iov_base) & - PAGE_MASK; - if ((bcount + iolen) > bp->b_kvasize) { - bcount = bp->b_kvasize; - if (iolen != 0) - bcount -= PAGE_SIZE; + ubase = uio->uio_iov[i].iov_base; + bounceit = (int)(((vm_offset_t)ubase) & 15); + iolen = ((vm_offset_t)ubase) & PAGE_MASK; + if (bounceit) { + if (bcount > bp->b_kvasize) + bcount = bp->b_kvasize; + } else { + if ((bcount + iolen) > bp->b_kvasize) { + bcount = bp->b_kvasize; + if (iolen != 0) + bcount -= PAGE_SIZE; + } } + + /* + * If we have to use a bounce buffer allocate kernel + * memory and copyin/copyout. Otherwise map the + * user buffer directly into kernel memory without + * copying. + */ if (uio->uio_segflg == UIO_USERSPACE) { - if (vmapbuf(bp, uio->uio_iov[i].iov_base, bcount) < 0) { + if (bounceit) { + bp->b_data = bp->b_kvabase; + bp->b_bcount = bcount; + vm_hold_load_pages(bp, (vm_offset_t)bp->b_data, (vm_offset_t)bp->b_data + bcount); + if (uio->uio_rw == UIO_WRITE) { + error = copyin(ubase, bp->b_data, bcount); + if (error) { + vm_hold_free_pages(bp, (vm_offset_t)bp->b_data, (vm_offset_t)bp->b_data + bcount); + goto doerror; + } + } + } else if (vmapbuf(bp, ubase, bcount) < 0) { error = EFAULT; goto doerror; } @@ -115,16 +138,28 @@ physio(cdev_t dev, struct uio *uio, int ioflag) tsleep(&bp->b_bio1, 0, "physstr", 0); crit_exit(); - if (uio->uio_segflg == UIO_USERSPACE) - vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; + if (uio->uio_segflg == UIO_USERSPACE) { + if (bounceit) { + if (uio->uio_rw == UIO_READ && iolen) { + error = copyout(bp->b_data, ubase, iolen); + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = error; + } + } + vm_hold_free_pages(bp, (vm_offset_t)bp->b_data, (vm_offset_t)bp->b_data + bcount); + } else { + vunmapbuf(bp); + } + } if (iolen == 0 && !(bp->b_flags & B_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base += iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; - if( bp->b_flags & B_ERROR) { + if (bp->b_flags & B_ERROR) { error = bp->b_error; goto doerror; } diff --git a/sys/kern/subr_diskslice.c b/sys/kern/subr_diskslice.c index 9500b44667..227069bbe1 100644 --- a/sys/kern/subr_diskslice.c +++ b/sys/kern/subr_diskslice.c @@ -44,7 +44,7 @@ * from: @(#)ufs_disksubr.c 7.16 (Berkeley) 5/4/91 * from: ufs_disksubr.c,v 1.8 1994/06/07 01:21:39 phk Exp $ * $FreeBSD: src/sys/kern/subr_diskslice.c,v 1.82.2.6 2001/07/24 09:49:41 dd Exp $ - * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.50 2008/04/20 13:44:25 swildner Exp $ + * $DragonFly: src/sys/kern/subr_diskslice.c,v 1.50.2.1 2008/09/25 01:44:52 dillon Exp $ */ #include @@ -222,7 +222,7 @@ doshift: * Disallow writes to reserved areas unless ds_wlabel allows it. */ if (slicerel_secno < sp->ds_reserved && nsec && - bp->b_cmd != BUF_CMD_READ && sp->ds_wlabel == 0) { + bp->b_cmd == BUF_CMD_WRITE && sp->ds_wlabel == 0) { bp->b_error = EROFS; goto error; } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 3332e50f42..62ba732515 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -12,7 +12,7 @@ * John S. Dyson. * * $FreeBSD: src/sys/kern/vfs_bio.c,v 1.242.2.20 2003/05/28 18:38:10 alc Exp $ - * $DragonFly: src/sys/kern/vfs_bio.c,v 1.112.2.1 2008/07/18 00:02:10 dillon Exp $ + * $DragonFly: src/sys/kern/vfs_bio.c,v 1.112.2.2 2008/09/25 01:44:52 dillon Exp $ */ /* @@ -90,10 +90,6 @@ static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer"); struct buf *buf; /* buffer header pool */ -static void vm_hold_free_pages(struct buf *bp, vm_offset_t from, - vm_offset_t to); -static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, - vm_offset_t to); static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m); static void vfs_clean_pages(struct buf *bp); @@ -646,10 +642,7 @@ bfreekva(struct buf *bp) void bremfree(struct buf *bp) { - int old_qindex; - crit_enter(); - old_qindex = bp->b_qindex; if (bp->b_qindex != BQUEUE_NONE) { KASSERT(BUF_REFCNTNB(bp) == 1, @@ -3545,12 +3538,8 @@ vfs_clean_pages(struct buf *bp) for (i = 0; i < bp->b_xio.xio_npages; i++) { vm_page_t m = bp->b_xio.xio_pages[i]; vm_ooffset_t noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; - vm_ooffset_t eoff = noff; - if (eoff > bp->b_loffset + bp->b_bufsize) - eoff = bp->b_loffset + bp->b_bufsize; vfs_page_set_valid(bp, foff, i, m); - /* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */ foff = noff; } } @@ -3626,7 +3615,7 @@ vfs_bio_clrbuf(struct buf *bp) return; } } - ea = sa = bp->b_data; + sa = bp->b_data; for(i=0;ib_xio.xio_npages;i++,sa=ea) { int j = ((vm_offset_t)sa & PAGE_MASK) / DEV_BSIZE; ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index a793514b76..e1dcec4a44 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -37,7 +37,7 @@ * * @(#)buf.h 8.9 (Berkeley) 3/30/95 * $FreeBSD: src/sys/sys/buf.h,v 1.88.2.10 2003/01/25 19:02:23 dillon Exp $ - * $DragonFly: src/sys/sys/buf.h,v 1.51 2008/07/14 03:08:58 dillon Exp $ + * $DragonFly: src/sys/sys/buf.h,v 1.51.2.1 2008/09/25 01:44:54 dillon Exp $ */ #ifndef _SYS_BUF_H_ @@ -93,7 +93,8 @@ typedef enum buf_cmd { BUF_CMD_READ, BUF_CMD_WRITE, BUF_CMD_FREEBLKS, - BUF_CMD_FORMAT + BUF_CMD_FORMAT, + BUF_CMD_FLUSH } buf_cmd_t; #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) @@ -271,6 +272,10 @@ struct buf { * Setting B_AGE on a dirty buffer will not cause it * to be flushed more quickly but will cause it to be * reallocated more quickly after having been flushed. + * + * B_NOCACHE Request that the buffer and backing store be + * destroyed on completion. If B_DELWRI is set and the + * write fails, the buffer remains intact. */ #define B_AGE 0x00000001 /* Reuse more quickly */ @@ -428,6 +433,8 @@ int scan_all_buffers (int (*)(struct buf *, void *), void *); void reassignbuf (struct buf *); struct buf *trypbuf (int *); void bio_ops_sync(struct mount *mp); +void vm_hold_free_pages(struct buf *bp, vm_offset_t from, vm_offset_t to); +void vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to); #endif /* _KERNEL */ #endif /* _KERNEL || _KERNEL_STRUCTURES */ -- 2.11.4.GIT