From f3b0f38263552a134796e57046c5d0aed1d505c2 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Fri, 11 Jan 2008 01:41:34 +0000 Subject: [PATCH] HAMMER 18/many: Stabilization pass * Properly flag the inode when dirty buffer cache buffers are present so fsync syncs them. Fixes a umount panic. * Fix A-list bugs introduced when I added the '10' bit code to record an all-allocated/initialized state vs '00' (all-allocated uninitialized). * Fix an A-list bug in *alloc_rev. A comparison was off and could result in a near full A-list from incorrectly believing it was completely full. * When generating a spike also allocate a spike record for the recovery code to find. * Generate the initial free blocks for a hammer buffer via its cluster A-list instead of directly so the cluster A-list's meta-data is properly synchronized. --- sys/vfs/hammer/hammer.h | 5 +++-- sys/vfs/hammer/hammer_alist.c | 49 +++++++++++++++++++++++++++--------------- sys/vfs/hammer/hammer_disk.h | 14 +++++++++++- sys/vfs/hammer/hammer_inode.c | 12 +++++++---- sys/vfs/hammer/hammer_io.c | 10 ++++++--- sys/vfs/hammer/hammer_ondisk.c | 36 ++++++++++++++++++++++++------- sys/vfs/hammer/hammer_spike.c | 14 +++++++++++- sys/vfs/hammer/hammer_vnops.c | 4 ++-- 8 files changed, 106 insertions(+), 38 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index a75e8cb123..86b9e59b6f 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.22 2008/01/10 07:41:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.23 2008/01/11 01:41:33 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -172,9 +172,10 @@ typedef struct hammer_inode *hammer_inode_t; #define HAMMER_INODE_RO 0x0200 /* read-only (because of as-of) */ #define HAMMER_INODE_GONE 0x0400 /* delete flushed out */ #define HAMMER_INODE_DONDISK 0x0800 /* data records may be on disk */ +#define HAMMER_INODE_BUFS 0x1000 /* dirty high level bps present */ #define HAMMER_INODE_MODMASK (HAMMER_INODE_DDIRTY|HAMMER_INODE_RDIRTY| \ - HAMMER_INODE_XDIRTY|\ + HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS| \ HAMMER_INODE_ITIMES|HAMMER_INODE_DELETED) #define HAMMER_MAX_INODE_CURSORS 4 diff --git a/sys/vfs/hammer/hammer_alist.c b/sys/vfs/hammer/hammer_alist.c index 9d9372b90e..f7698cf4d4 100644 --- a/sys/vfs/hammer/hammer_alist.c +++ b/sys/vfs/hammer/hammer_alist.c @@ -38,7 +38,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/Attic/hammer_alist.c,v 1.7 2008/01/09 00:46:22 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/Attic/hammer_alist.c,v 1.8 2008/01/11 01:41:33 dillon Exp $ */ /* * This module implements a generic allocator through the use of a hinted @@ -843,8 +843,10 @@ hammer_alst_meta_alloc_fwd(hammer_alist_t live, hammer_almeta_t scan, radix, count, atblk, &full); if (r != HAMMER_ALIST_BLOCK_NONE) { - if (full) + if (full) { scan->bm_bitmap &= ~mask; + scan->bm_bitmap |= pmask << 1; + } return(r); } } @@ -922,6 +924,8 @@ failed: /* * This version allocates blocks in the reverse direction. + * + * This is really nasty code */ static int32_t hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan, @@ -984,13 +988,11 @@ hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan, } /* - * If the count is too big we couldn't allocate anything from a - * recursion even if the sub-tree were entirely free. + * NOTE: saveblk must represent the entire layer, not the base blk + * of the last element. Otherwise an atblk that is inside the + * last element can cause bighint to be updated for a failed + * allocation when we didn't actually test all available blocks. */ - if (count > radix) { - saveblk = atblk; /* make it work for the conditional */ - goto failed; /* at the failed label */ - } if (skip == 1) { /* @@ -999,8 +1001,10 @@ hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan, */ mask = 0xC0000000; pmask = 0x40000000; - blk += radix * HAMMER_ALIST_META_RADIX - radix; + blk += radix * HAMMER_ALIST_META_RADIX; + saveblk = blk; + blk -= radix; for (i = 0; i < (int)HAMMER_ALIST_META_RADIX; ++i) { /* @@ -1029,8 +1033,10 @@ hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan, radix, count, atblk, &full); if (r != HAMMER_ALIST_BLOCK_NONE) { - if (full) + if (full) { scan->bm_bitmap &= ~mask; + scan->bm_bitmap |= pmask << 1; + } return(r); } } @@ -1057,12 +1063,13 @@ hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan, blk += radix; j += 2; } + + saveblk = blk; blk -= radix; j -= 2; mask = 0x00000003 << j; pmask = 0x00000001 << j; i -= next_skip; - saveblk = blk; while (i >= 1) { /* @@ -1109,7 +1116,6 @@ hammer_alst_meta_alloc_rev(hammer_alist_t live, hammer_almeta_t scan, } } -failed: /* * We couldn't allocate count in this subtree, update bighint. * Since we are restricted to powers of 2, the next highest count @@ -1341,6 +1347,7 @@ hammer_alst_meta_free(hammer_alist_t live, hammer_almeta_t scan, bl->bl_radix_destroy(live->info, blk, radix); /* XXX bighint not being set properly */ } else { + scan->bm_bitmap &= ~mask; scan->bm_bitmap |= pmask; if (scan->bm_bighint < radix / 2) scan->bm_bighint = radix / 2; @@ -1362,6 +1369,7 @@ hammer_alst_meta_free(hammer_alist_t live, hammer_almeta_t scan, int32_t v; KKASSERT(mask != 0); + KKASSERT(count != 0); v = blk + radix - freeBlk; if (v > count) @@ -1389,12 +1397,17 @@ hammer_alst_meta_free(hammer_alist_t live, hammer_almeta_t scan, radix, next_skip, blk); } - if (scan[i].bm_bitmap == (u_int32_t)-1) + if (scan[i].bm_bitmap == (u_int32_t)-1) { scan->bm_bitmap |= mask; - else + /* XXX bighint not set properly */ + scan->bm_bighint = radix * HAMMER_ALIST_META_RADIX; + } else { + scan->bm_bitmap &= ~mask; scan->bm_bitmap |= pmask; - if (scan->bm_bighint < scan[i].bm_bighint) - scan->bm_bighint = scan[i].bm_bighint; + /* XXX bighint not set properly */ + if (scan->bm_bighint < scan[i].bm_bighint) + scan->bm_bighint = scan[i].bm_bighint; + } } mask <<= 2; pmask <<= 2; @@ -1488,8 +1501,10 @@ hammer_alst_radix_init(hammer_almeta_t scan, int32_t radix, /* * Mark as partially allocated */ - if (scan) + if (scan) { + scan->bm_bitmap &= ~mask; scan->bm_bitmap |= pmask; + } } else { /* * Add terminator and break out. The terminal diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 35af10c787..1d5d75fd56 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.15 2008/01/09 00:46:22 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.16 2008/01/11 01:41:33 dillon Exp $ */ #ifndef _SYS_UUID_H_ @@ -584,11 +584,23 @@ struct hammer_entry_record { }; /* + * Spike record + */ +struct hammer_spike_record { + struct hammer_base_record base; + int32_t clu_no; + int32_t vol_no; + hammer_tid_t clu_id; + char reserved[16]; +}; + +/* * Hammer rollup record */ union hammer_record_ondisk { struct hammer_base_record base; struct hammer_generic_record generic; + struct hammer_spike_record spike; struct hammer_inode_record inode; struct hammer_data_record data; struct hammer_entry_record entry; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 75bc1aeddf..4fa41d02f6 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.18 2008/01/10 07:41:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.19 2008/01/11 01:41:33 dillon Exp $ */ #include "hammer.h" @@ -661,12 +661,16 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete) } /* - * Sync the buffer cache + * Sync the buffer cache. */ - if (ip->vp != NULL) + if (ip->vp != NULL) { error = vfsync(ip->vp, waitfor, 1, NULL, NULL); - else + if (RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL) + ip->flags &= ~HAMMER_INODE_BUFS; + } else { error = 0; + } + /* * Now sync related records diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 3a59d307bd..b50267e4fb 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.14 2008/01/10 07:41:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.15 2008/01/11 01:41:33 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -420,7 +420,9 @@ hammer_modify_buffer(hammer_buffer_t buffer) cluster->io.released = 1; cluster->io.running = 1; bawrite(cluster->io.bp); - kprintf("OPEN CLUSTER %d\n", cluster->clu_no); + kprintf("OPEN CLUSTER %d:%d\n", + cluster->volume->vol_no, + cluster->clu_no); } hammer_unlock(&cluster->io.lock); } @@ -624,7 +626,9 @@ hammer_io_checkwrite(struct buf *bp) if (TAILQ_EMPTY(&cluster->io.deplist)) { cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN; - kprintf("CLOSE CLUSTER %d\n", cluster->clu_no); + kprintf("CLOSE CLUSTER %d:%d\n", + cluster->volume->vol_no, + cluster->clu_no); } } return(0); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 21af44f2a0..33304bbccd 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.18 2008/01/10 07:41:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.19 2008/01/11 01:41:33 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -1585,7 +1585,7 @@ hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint, * Acquire the cluster. On success this will force *errorp to 0. */ if (clu_no != HAMMER_ALIST_BLOCK_NONE) { - kprintf("ALLOC CLUSTER %d\n", clu_no); + kprintf("ALLOC CLUSTER %d:%d\n", volume->vol_no, clu_no); cluster = hammer_get_cluster(volume, clu_no, errorp, HAMMER_ASTATE_FREE); volume->clu_iterator = clu_no; @@ -1938,6 +1938,7 @@ alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live, { hammer_buffer_t buffer; int32_t buf_no; + int32_t base_blk; int isfwd; if (*bufferp) @@ -1976,9 +1977,21 @@ alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live, * Free the buffer to the appropriate slave list so the * cluster-based allocator sees it. */ - hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS, - HAMMER_FSBUF_MAXBLKS); + /*hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS, + HAMMER_FSBUF_MAXBLKS);*/ + base_blk = buf_no * HAMMER_FSBUF_MAXBLKS; + switch(type) { + case HAMMER_FSBUF_BTREE: + hammer_alist_free(live, base_blk, HAMMER_BTREE_NODES); + break; + case HAMMER_FSBUF_DATA: + hammer_alist_free(live, base_blk, HAMMER_DATA_NODES); + break; + case HAMMER_FSBUF_RECORDS: + hammer_alist_free(live, base_blk, HAMMER_RECORD_NODES); + break; + } } /* @@ -2092,7 +2105,11 @@ hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused) } /* - * Generic buffer initialization + * Generic buffer initialization. Initialize the A-list into an all-allocated + * state with the free block limit properly set. + * + * Note that alloc_new_buffer() will free the appropriate block range via + * the appropriate cluster alist, so the free count is properly propogated. */ void hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type) @@ -2101,13 +2118,16 @@ hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type) switch(type) { case HAMMER_FSBUF_BTREE: - hammer_alist_init(live, 0, HAMMER_BTREE_NODES, HAMMER_ASTATE_FREE); + hammer_alist_init(live, 0, HAMMER_BTREE_NODES, + HAMMER_ASTATE_ALLOC); break; case HAMMER_FSBUF_DATA: - hammer_alist_init(live, 0, HAMMER_DATA_NODES, HAMMER_ASTATE_FREE); + hammer_alist_init(live, 0, HAMMER_DATA_NODES, + HAMMER_ASTATE_ALLOC); break; case HAMMER_FSBUF_RECORDS: - hammer_alist_init(live, 0, HAMMER_RECORD_NODES, HAMMER_ASTATE_FREE); + hammer_alist_init(live, 0, HAMMER_RECORD_NODES, + HAMMER_ASTATE_ALLOC); break; default: hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC); diff --git a/sys/vfs/hammer/hammer_spike.c b/sys/vfs/hammer/hammer_spike.c index 6e808da3bc..d26eec0a92 100644 --- a/sys/vfs/hammer/hammer_spike.c +++ b/sys/vfs/hammer/hammer_spike.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.5 2008/01/10 07:41:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.6 2008/01/11 01:41:34 dillon Exp $ */ #include "hammer.h" @@ -87,6 +87,7 @@ hammer_spike(struct hammer_cursor **spikep) hammer_cluster_t ocluster; hammer_cluster_t ncluster; hammer_node_t onode; + hammer_record_ondisk_t rec; int error; kprintf("hammer_spike: ENOSPC in cluster, spiking\n"); @@ -202,6 +203,17 @@ hammer_spike(struct hammer_cursor **spikep) onode->flags |= HAMMER_NODE_DELETED; /* + * Add a record representing the spike using space freed up by the + * above deletions. + */ + rec = hammer_alloc_record(ocluster, &error, &spike->record_buffer); + KKASSERT(error == 0); + rec->spike.base.base.rec_type = HAMMER_RECTYPE_CLUSTER; + rec->spike.clu_no = ncluster->clu_no; + rec->spike.vol_no = ncluster->volume->vol_no; + rec->spike.clu_id = 0; + + /* * XXX I/O dependancy - new cluster must be flushed before current * cluster can be flushed. */ diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 5eef2aecfa..4322c50f7f 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.17 2008/01/10 07:41:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.18 2008/01/11 01:41:34 dillon Exp $ */ #include @@ -348,7 +348,7 @@ hammer_vop_write(struct vop_write_args *ap) flags = 0; } ip->ino_rec.ino_mtime = trans.tid; - flags |= HAMMER_INODE_ITIMES; + flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS; hammer_modify_inode(&trans, ip, flags); if (ap->a_ioflag & IO_SYNC) { bwrite(bp); -- 2.11.4.GIT