From cebe949331f2cd2feb05aaab0a1282926277b696 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 10 Jun 2008 00:40:31 +0000 Subject: [PATCH] HAMMER 53D/Many: Stabilization * Fix an overwrite bug with direct write which could result in file corruption. * Reserve just-freed big blocks for two flush cycles to prevent HAMMER from overwriting destroyed data so it does not become corrupt if the system crashes. This is needed because the recover code does not record UNDOs for data (nor do we want it to). * More I/O subsystem work. There may still be an ellusive panic related to calls to regetblk(). --- sys/vfs/hammer/hammer.h | 38 ++++++++++---- sys/vfs/hammer/hammer_blockmap.c | 3 +- sys/vfs/hammer/hammer_flusher.c | 25 ++++++++- sys/vfs/hammer/hammer_freemap.c | 65 +++++++++++++++++------ sys/vfs/hammer/hammer_io.c | 109 ++++++++++++++++++++------------------- sys/vfs/hammer/hammer_object.c | 50 ++++++++++++++---- sys/vfs/hammer/hammer_ondisk.c | 22 +++++--- sys/vfs/hammer/hammer_prune.c | 4 +- sys/vfs/hammer/hammer_vfsops.c | 8 ++- sys/vfs/hammer/hammer_vnops.c | 4 +- 10 files changed, 224 insertions(+), 104 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 3701feb342..81e67eadf5 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.76 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.77 2008/06/10 00:40:31 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -402,6 +402,7 @@ struct hammer_io { u_int waitdep : 1; /* flush waits for dependancies */ u_int recovered : 1; /* has recovery ref */ u_int waitmod : 1; /* waiting for modify_refs */ + u_int reclaim : 1; /* reclaim requested */ }; typedef struct hammer_io *hammer_io_t; @@ -436,6 +437,7 @@ struct hammer_buffer { struct hammer_volume *volume; hammer_off_t zoneX_offset; hammer_off_t zone2_offset; + struct hammer_reserve *resv; struct hammer_node_list clist; }; @@ -497,10 +499,13 @@ union hammer_io_structure { typedef union hammer_io_structure *hammer_io_structure_t; /* - * Allocation holes are recorded for a short period of time in an attempt - * to use up the space. + * Allocation holes are recorded when an allocation does not fit within a + * buffer. Later allocations which might fit may then be satisfied from + * a recorded hole. The resv reference prevents the big block from being + * allocated out of via the normal blockmap mechanism. + * + * This is strictly a heuristic. */ - #define HAMMER_MAX_HOLES 8 struct hammer_hole; @@ -521,10 +526,21 @@ struct hammer_hole { typedef struct hammer_hole *hammer_hole_t; +/* + * The reserve structure prevents the blockmap from allocating + * out of a reserved bigblock. Such reservations are used by + * the direct-write mechanism. + * + * The structure is also used to hold off on reallocations of + * big blocks from the freemap until flush dependancies have + * been dealt with. + */ struct hammer_reserve { RB_ENTRY(hammer_reserve) rb_node; - hammer_off_t zone_offset; + TAILQ_ENTRY(hammer_reserve) delay_entry; + int flush_group; int refs; + hammer_off_t zone_offset; }; typedef struct hammer_reserve *hammer_reserve_t; @@ -532,7 +548,10 @@ typedef struct hammer_reserve *hammer_reserve_t; #include "hammer_cursor.h" /* - * Undo history tracking + * The undo structure tracks recent undos to avoid laying down duplicate + * undos within a flush group, saving us a significant amount of overhead. + * + * This is strictly a heuristic. */ #define HAMMER_MAX_UNDOS 256 @@ -608,6 +627,7 @@ struct hammer_mount { int undo_alloc; TAILQ_HEAD(, hammer_undo) undo_lru_list; TAILQ_HEAD(, hammer_inode) flush_list; + TAILQ_HEAD(, hammer_reserve) delay_list; TAILQ_HEAD(, hammer_objid_cache) objid_cache_list; }; @@ -638,6 +658,7 @@ extern int hammer_debug_btree; extern int hammer_debug_tid; extern int hammer_debug_recover; extern int hammer_debug_recover_faults; +extern int hammer_debug_write_release; extern int hammer_count_inodes; extern int hammer_count_reclaiming; extern int hammer_count_records; @@ -876,8 +897,6 @@ int hammer_ip_delete_range_all(hammer_cursor_t cursor, hammer_inode_t ip, int *countp); int hammer_ip_sync_data(hammer_cursor_t cursor, hammer_inode_t ip, int64_t offset, void *data, int bytes); -int hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, - off_t file_offset, void *data, int bytes); int hammer_ip_sync_record(hammer_transaction_t trans, hammer_record_t rec); int hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t rec); @@ -892,7 +911,6 @@ int hammer_io_new(struct vnode *devvp, struct hammer_io *io); void hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset); void hammer_io_release(struct hammer_io *io, int flush); void hammer_io_flush(struct hammer_io *io); -void hammer_io_clear_modify(struct hammer_io *io); void hammer_io_waitdep(struct hammer_io *io); int hammer_io_direct_read(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, struct bio *bio); @@ -900,6 +918,8 @@ int hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, struct bio *bio); void hammer_io_write_interlock(hammer_io_t io); void hammer_io_done_interlock(hammer_io_t io); +void hammer_io_clear_modify(struct hammer_io *io); +void hammer_io_clear_modlist(struct hammer_io *io); void hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume, void *base, int len); void hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer, diff --git a/sys/vfs/hammer/hammer_blockmap.c b/sys/vfs/hammer/hammer_blockmap.c index a3b70fb2a4..38e5d02cbf 100644 --- a/sys/vfs/hammer/hammer_blockmap.c +++ b/sys/vfs/hammer/hammer_blockmap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.16 2008/06/08 18:16:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.17 2008/06/10 00:40:31 dillon Exp $ */ /* @@ -655,6 +655,7 @@ hammer_blockmap_free(hammer_transaction_t trans, KKASSERT(((bmap_off ^ (bmap_off + (bytes - 1))) & ~HAMMER_LARGEBLOCK_MASK64) == 0); } else { + bytes = -((-bytes + 15) & ~15); KKASSERT(bytes >= -HAMMER_BUFSIZE); } zone = HAMMER_ZONE_DECODE(bmap_off); diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index 130dedc456..845fb5b510 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.20 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.21 2008/06/10 00:40:31 dillon Exp $ */ /* * HAMMER dependancy flusher thread @@ -157,14 +157,18 @@ hammer_flusher_clean_loose_ios(hammer_mount_t hmp) } /* - * Flush all inodes in the current flush group + * Flush all inodes in the current flush group. */ static void hammer_flusher_flush(hammer_mount_t hmp) { struct hammer_transaction trans; hammer_inode_t ip; + hammer_reserve_t resv; + /* + * Flush the inodes + */ hammer_start_transaction_fls(&trans, hmp); while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) { if (ip->flush_group != hmp->flusher_act) @@ -174,6 +178,21 @@ hammer_flusher_flush(hammer_mount_t hmp) } hammer_flusher_finalize(&trans, 1); hmp->flusher_tid = trans.tid; + + /* + * Clean up any freed big-blocks (typically zone-2). + * resv->flush_group is typically set several flush groups ahead + * of the free to ensure that the freed block is not reused until + * it can no longer be reused. + */ + while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) { + if (resv->flush_group != hmp->flusher_act) + break; + TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry); + hammer_blockmap_reserve_complete(hmp, resv); + } + + hammer_done_transaction(&trans); } @@ -186,8 +205,10 @@ hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans) { hammer_mount_t hmp = ip->hmp; + /*hammer_lock_ex(&ip->lock);*/ ip->error = hammer_sync_inode(ip); hammer_flush_inode_done(ip); + /*hammer_unlock(&ip->lock);*/ if (hammer_must_finalize_undo(hmp)) { kprintf("HAMMER: Warning: UNDO area too small!"); diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c index bf89d4a592..1bb77cc8d1 100644 --- a/sys/vfs/hammer/hammer_freemap.c +++ b/sys/vfs/hammer/hammer_freemap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.14 2008/06/08 18:16:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.15 2008/06/10 00:40:31 dillon Exp $ */ /* @@ -46,6 +46,8 @@ #include "hammer.h" +static int hammer_freemap_reserved(hammer_mount_t hmp, hammer_off_t zone2_base); + /* * Backend big-block allocation */ @@ -53,6 +55,7 @@ hammer_off_t hammer_freemap_alloc(hammer_transaction_t trans, hammer_off_t owner, int *errorp) { + hammer_mount_t hmp; hammer_volume_ondisk_t ondisk; hammer_off_t layer1_offset; hammer_off_t layer2_offset; @@ -65,26 +68,27 @@ hammer_freemap_alloc(hammer_transaction_t trans, hammer_off_t owner, int vol_no; int loops = 0; + hmp = trans->hmp; *errorp = 0; ondisk = trans->rootvol->ondisk; - hammer_lock_ex(&trans->hmp->free_lock); + hammer_lock_ex(&hmp->free_lock); - blockmap = &trans->hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; + blockmap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; result_offset = blockmap->next_offset; vol_no = HAMMER_VOL_DECODE(result_offset); for (;;) { layer1_offset = blockmap->phys_offset + HAMMER_BLOCKMAP_LAYER1_OFFSET(result_offset); - layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1); + layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1); if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) { /* * End-of-volume, try next volume. */ new_volume: ++vol_no; - if (vol_no >= trans->hmp->nvolumes) + if (vol_no >= hmp->nvolumes) vol_no = 0; result_offset = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0); if (vol_no == 0 && ++loops == 2) { @@ -95,9 +99,11 @@ new_volume: } else { layer2_offset = layer1->phys_offset + HAMMER_BLOCKMAP_LAYER2_OFFSET(result_offset); - layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, + layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2); - if (layer2->u.owner == HAMMER_BLOCKMAP_FREE) { + + if (layer2->u.owner == HAMMER_BLOCKMAP_FREE && + !hammer_freemap_reserved(hmp, result_offset)) { hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2)); layer2->u.owner = owner & @@ -111,7 +117,7 @@ new_volume: trans->rootvol, vol0_stat_freebigblocks); --ondisk->vol0_stat_freebigblocks; - trans->hmp->copy_stat_freebigblocks = + hmp->copy_stat_freebigblocks = ondisk->vol0_stat_freebigblocks; hammer_modify_volume_done(trans->rootvol); break; @@ -136,7 +142,7 @@ new_volume: blockmap->next_offset = result_offset + HAMMER_LARGEBLOCK_SIZE; hammer_modify_volume_done(trans->rootvol); done: - hammer_unlock(&trans->hmp->free_lock); + hammer_unlock(&hmp->free_lock); if (buffer1) hammer_rel_buffer(buffer1, 0); if (buffer2) @@ -151,6 +157,7 @@ void hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, hammer_off_t owner, int *errorp) { + hammer_mount_t hmp; hammer_volume_ondisk_t ondisk; hammer_off_t layer1_offset; hammer_off_t layer2_offset; @@ -159,24 +166,39 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, hammer_buffer_t buffer2 = NULL; struct hammer_blockmap_layer1 *layer1; struct hammer_blockmap_layer2 *layer2; + hammer_reserve_t resv; + + hmp = trans->hmp; KKASSERT((phys_offset & HAMMER_LARGEBLOCK_MASK64) == 0); + KKASSERT(hammer_freemap_reserved(hmp, phys_offset) == 0); + + /* + * Create a reservation + */ + resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO); + resv->refs = 1; + resv->zone_offset = phys_offset; + resv->flush_group = hmp->flusher_next + 1; + RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv); + TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry); + ++hammer_count_reservations; + + hammer_lock_ex(&hmp->free_lock); *errorp = 0; ondisk = trans->rootvol->ondisk; - hammer_lock_ex(&trans->hmp->free_lock); - - blockmap = &trans->hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; + blockmap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX]; layer1_offset = blockmap->phys_offset + HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset); - layer1 = hammer_bread(trans->hmp, layer1_offset, errorp, &buffer1); + layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1); KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL); layer2_offset = layer1->phys_offset + HAMMER_BLOCKMAP_LAYER2_OFFSET(phys_offset); - layer2 = hammer_bread(trans->hmp, layer2_offset, errorp, &buffer2); + layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2); KKASSERT(layer2->u.owner == (owner & ~HAMMER_LARGEBLOCK_MASK64)); hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1)); @@ -190,9 +212,9 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, vol0_stat_freebigblocks); ++ondisk->vol0_stat_freebigblocks; hammer_modify_volume_done(trans->rootvol); - trans->hmp->copy_stat_freebigblocks = ondisk->vol0_stat_freebigblocks; + hmp->copy_stat_freebigblocks = ondisk->vol0_stat_freebigblocks; - hammer_unlock(&trans->hmp->free_lock); + hammer_unlock(&hmp->free_lock); if (buffer1) hammer_rel_buffer(buffer1, 0); @@ -201,6 +223,17 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, } /* + * Check whether a free block has been reserved in zone-2. + */ +static int +hammer_freemap_reserved(hammer_mount_t hmp, hammer_off_t zone2_base) +{ + if (RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, zone2_base)) + return(1); + return(0); +} + +/* * Check space availability */ int diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index d43eae30ce..e0375910f3 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.36 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.37 2008/06/10 00:40:31 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -88,10 +88,8 @@ hammer_io_disassociate(hammer_io_structure_t iou, int elseit) /* * If the buffer was locked someone wanted to get rid of it. */ - if (bp->b_flags & B_LOCKED) { + if (bp->b_flags & B_LOCKED) bp->b_flags &= ~B_LOCKED; - bp->b_flags |= B_RELBUF; - } /* * elseit is 0 when called from the kernel path, the caller is @@ -100,10 +98,13 @@ hammer_io_disassociate(hammer_io_structure_t iou, int elseit) if (elseit) { KKASSERT(iou->io.released == 0); iou->io.released = 1; + if (iou->io.reclaim) + bp->b_flags |= B_NOCACHE|B_RELBUF; bqrelse(bp); } else { KKASSERT(iou->io.released); } + iou->io.reclaim = 0; switch(iou->io.type) { case HAMMER_STRUCTURE_VOLUME: @@ -224,6 +225,7 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io) void hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) { + hammer_io_structure_t iou; hammer_off_t phys_offset; struct buf *bp; @@ -231,10 +233,15 @@ hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) (zone2_offset & HAMMER_OFF_SHORT_MASK); if (findblk(volume->devvp, phys_offset)) { bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0); - if (LIST_FIRST(&bp->b_dep) != NULL) { + if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { + hammer_io_clear_modify(&iou->io); + bundirty(bp); + iou->io.reclaim = 1; hammer_io_deallocate(bp); } else { - bp->b_flags |= B_RELBUF; + KKASSERT((bp->b_flags & B_LOCKED) == 0); + bundirty(bp); + bp->b_flags |= B_NOCACHE|B_RELBUF; brelse(bp); } } @@ -293,7 +300,7 @@ hammer_io_release(struct hammer_io *io, int flush) * that our bioops can override kernel decisions with regards to * the buffer). */ - if (flush && io->modified == 0 && io->running == 0) { + if ((flush || io->reclaim) && io->modified == 0 && io->running == 0) { /* * Always disassociate the bp if an explicit flush * was requested and the IO completed with no error @@ -332,8 +339,12 @@ hammer_io_release(struct hammer_io *io, int flush) if (bp->b_flags & B_LOCKED) { hammer_io_disassociate(iou, 1); } else { - io->released = 1; - bqrelse(bp); + if (io->reclaim) { + hammer_io_disassociate(iou, 1); + } else { + io->released = 1; + bqrelse(bp); + } } } else { /* @@ -345,7 +356,7 @@ hammer_io_release(struct hammer_io *io, int flush) crit_enter(); if (io->running == 0 && (bp->b_flags & B_LOCKED)) { regetblk(bp); - if (bp->b_flags & B_LOCKED) { + if ((bp->b_flags & B_LOCKED) || io->reclaim) { io->released = 0; hammer_io_disassociate(iou, 1); } else { @@ -409,15 +420,7 @@ hammer_io_flush(struct hammer_io *io) * Do this before potentially blocking so any attempt to modify the * ondisk while we are blocked blocks waiting for us. */ - KKASSERT(io->mod_list != NULL); - if (io->mod_list == &io->hmp->volu_list || - io->mod_list == &io->hmp->meta_list) { - --io->hmp->locked_dirty_count; - --hammer_count_dirtybufs; - } - TAILQ_REMOVE(io->mod_list, io, mod_entry); - io->mod_list = NULL; - io->modified = 0; + hammer_io_clear_modify(io); /* * Transfer ownership to the kernel and initiate I/O. @@ -589,34 +592,37 @@ hammer_modify_buffer_done(hammer_buffer_t buffer) } /* - * Mark an entity as not being dirty any more -- this usually occurs when - * the governing a-list has freed the entire entity. - * - * XXX + * Mark an entity as not being dirty any more. */ void hammer_io_clear_modify(struct hammer_io *io) { -#if 0 - struct buf *bp; - - io->modified = 0; - XXX mod_list/entry - if ((bp = io->bp) != NULL) { - if (io->released) { - regetblk(bp); - /* BUF_KERNPROC(io->bp); */ - } else { - io->released = 1; - } - if (io->modified == 0) { - bundirty(bp); - bqrelse(bp); - } else { - bdwrite(bp); + if (io->modified) { + KKASSERT(io->mod_list != NULL); + if (io->mod_list == &io->hmp->volu_list || + io->mod_list == &io->hmp->meta_list) { + --io->hmp->locked_dirty_count; + --hammer_count_dirtybufs; } + TAILQ_REMOVE(io->mod_list, io, mod_entry); + io->mod_list = NULL; + io->modified = 0; + } +} + +/* + * Clear the IO's modify list. Even though the IO is no longer modified + * it may still be on the lose_list. This routine is called just before + * the governing hammer_buffer is destroyed. + */ +void +hammer_io_clear_modlist(struct hammer_io *io) +{ + if (io->mod_list) { + KKASSERT(io->mod_list == &io->hmp->lose_list); + TAILQ_REMOVE(io->mod_list, io, mod_entry); + io->mod_list = NULL; } -#endif } /************************************************************************ @@ -776,17 +782,8 @@ hammer_io_checkwrite(struct buf *bp) * We can only clear the modified bit if the IO is not currently * undergoing modification. Otherwise we may miss changes. */ - if (io->modify_refs == 0 && io->modified) { - KKASSERT(io->mod_list != NULL); - if (io->mod_list == &io->hmp->volu_list || - io->mod_list == &io->hmp->meta_list) { - --io->hmp->locked_dirty_count; - --hammer_count_dirtybufs; - } - TAILQ_REMOVE(io->mod_list, io, mod_entry); - io->mod_list = NULL; - io->modified = 0; - } + if (io->modify_refs == 0 && io->modified) + hammer_io_clear_modify(io); /* * The kernel is going to start the IO, set io->running. @@ -860,6 +857,8 @@ hammer_io_direct_read(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, hammer_rel_volume(volume, 0); } if (error) { + kprintf("hammer_direct_read: failed @ %016llx\n", + leaf->data_offset); bp = bio->bio_buf; bp->b_error = error; bp->b_flags |= B_ERROR; @@ -914,6 +913,8 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, nbio = push_bio(bio); nbio->bio_offset = volume->ondisk->vol_buf_beg + zone2_offset; + if (hammer_debug_write_release & 1) + nbio->bio_buf->b_flags |= B_RELBUF|B_NOCACHE; vn_strategy(volume->devvp, nbio); } hammer_rel_volume(volume, 0); @@ -926,12 +927,14 @@ hammer_io_direct_write(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf, hammer_io_modify(&buffer->io, 1); bcopy(bp->b_data, ptr, leaf->data_len); hammer_io_modify_done(&buffer->io); - hammer_rel_buffer(buffer, 0); + hammer_rel_buffer(buffer, (hammer_debug_write_release & 2)); bp->b_resid = 0; biodone(bio); } } if (error) { + kprintf("hammer_direct_write: failed @ %016llx\n", + leaf->data_offset); bp = bio->bio_buf; bp->b_resid = 0; bp->b_error = EIO; diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index cf2b4724e6..7c8c417784 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.63 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.64 2008/06/10 00:40:31 dillon Exp $ */ #include "hammer.h" @@ -63,6 +63,11 @@ hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2) if (rec1->leaf.base.key > rec2->leaf.base.key) return(1); +#if 0 + /* + * XXX create_tid is set during sync, memory records are always + * current. Do not match against create_tid. + */ if (rec1->leaf.base.create_tid == 0) { if (rec2->leaf.base.create_tid == 0) return(0); @@ -75,6 +80,7 @@ hammer_rec_rb_compare(hammer_record_t rec1, hammer_record_t rec2) return(-1); if (rec1->leaf.base.create_tid > rec2->leaf.base.create_tid) return(1); +#endif /* * Never match against an item deleted by the front-end. @@ -103,6 +109,11 @@ hammer_rec_cmp(hammer_base_elm_t elm, hammer_record_t rec) if (elm->key > rec->leaf.base.key) return(2); +#if 0 + /* + * XXX create_tid is set during sync, memory records are always + * current. Do not match against create_tid. + */ if (elm->create_tid == 0) { if (rec->leaf.base.create_tid == 0) return(0); @@ -114,6 +125,12 @@ hammer_rec_cmp(hammer_base_elm_t elm, hammer_record_t rec) return(-1); if (elm->create_tid > rec->leaf.base.create_tid) return(1); +#endif + /* + * Never match against an item deleted by the front-end. + */ + if (rec->flags & HAMMER_RECF_DELETED_FE) + return(1); return(0); } @@ -134,8 +151,10 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec) return(3); if (leaf->base.rec_type == HAMMER_RECTYPE_DATA) { + /* leaf_end <= rec_beg */ if (leaf->base.key <= rec->leaf.base.key - rec->leaf.data_len) return(-2); + /* leaf_beg >= rec_end */ if (leaf->base.key - leaf->data_len >= rec->leaf.base.key) return(2); } else { @@ -145,6 +164,7 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec) return(2); } +#if 0 if (leaf->base.create_tid == 0) { if (rec->leaf.base.create_tid == 0) return(0); @@ -156,6 +176,12 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec) return(-1); if (leaf->base.create_tid > rec->leaf.base.create_tid) return(1); +#endif + /* + * Never match against an item deleted by the front-end. + */ + if (rec->flags & HAMMER_RECF_DELETED_FE) + return(1); return(0); } @@ -164,9 +190,6 @@ hammer_rec_overlap_compare(hammer_btree_leaf_elm_t leaf, hammer_record_t rec) * is reversed so the comparison result has to be negated. key_beg and * key_end are both range-inclusive. * - * The creation timestamp can cause hammer_rec_cmp() to return -1 or +1. - * These do not stop the scan. - * * Localized deletions are not cached in-memory. */ static @@ -825,6 +848,7 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes, &record->leaf.data_offset, errorp); if (record->resv == NULL) { + kprintf("hammer_ip_add_bulk: reservation failed\n"); hammer_rel_mem_record(record); return(NULL); } @@ -839,6 +863,7 @@ hammer_ip_add_bulk(hammer_inode_t ip, off_t file_offset, void *data, int bytes, hammer_ref(&record->lock); /* mem_add eats a reference */ *errorp = hammer_mem_add(record); + KKASSERT(*errorp == 0); return (record); } @@ -1011,6 +1036,8 @@ done: return(error); } +#if 0 + /* * Backend code which actually performs the write to the media. This * routine is typically called from the flusher. The bio will be disposed @@ -1076,6 +1103,7 @@ hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, return(error); } +#endif /* * Backend code. Sync a record to the media. @@ -1084,6 +1112,7 @@ int hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) { hammer_transaction_t trans = cursor->trans; + int64_t file_offset; void *bdata; int error; @@ -1108,11 +1137,12 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) * It is ok for the lookup to return ENOENT. */ if (record->type == HAMMER_MEM_RECORD_DATA) { - KKASSERT(((record->leaf.base.key - record->leaf.data_len) & HAMMER_BUFMASK) == 0); + file_offset = record->leaf.base.key - record->leaf.data_len; + KKASSERT((file_offset & HAMMER_BUFMASK) == 0); error = hammer_ip_delete_range( cursor, record->ip, - record->leaf.base.key - record->leaf.data_len, - HAMMER_BUFSIZE - 1, 1); + file_offset, file_offset + HAMMER_BUFSIZE - 1, + 1); if (error && error != ENOENT) goto done; } @@ -1547,9 +1577,10 @@ next_memory: int64_t base1 = elm->leaf.base.key - elm->leaf.data_len; int64_t base2 = cursor->iprec->leaf.base.key - cursor->iprec->leaf.data_len; - if (base1 == base2) + if (base1 == base2) { + kprintf("G"); r = 0; - kprintf("G"); + } } if (r < 0) { @@ -1909,6 +1940,7 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_inode_t ip, int dodelete; KKASSERT(cursor->flags & HAMMER_CURSOR_BACKEND); + KKASSERT(tid != 0); /* * In-memory (unsynchronized) records can simply be freed. This diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index c8e5729693..d9b4c807f6 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.51 2008/06/08 18:16:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.52 2008/06/10 00:40:31 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -309,6 +309,7 @@ hammer_unload_volume(hammer_volume_t volume, void *data __unused) */ volume->io.waitdep = 1; hammer_io_release(&volume->io, 1); + hammer_io_clear_modlist(&volume->io); /* * There should be no references on the volume, no clusters, and @@ -622,8 +623,11 @@ found: /* * Destroy all buffers covering the specified zoneX offset range. This - * is called when the related blockmap layer2 entry is freed. The buffers - * must not be in use or modified. + * is called when the related blockmap layer2 entry is freed or when + * a direct write bypasses our buffer/buffer-cache subsystem. + * + * The buffers may be referenced by the caller itself. Setting reclaim + * will cause the buffer to be destroyed when it's ref count reaches zero. */ void hammer_del_buffers(hammer_mount_t hmp, hammer_off_t base_offset, @@ -642,13 +646,15 @@ hammer_del_buffers(hammer_mount_t hmp, hammer_off_t base_offset, buffer = RB_LOOKUP(hammer_buf_rb_tree, &hmp->rb_bufs_root, base_offset); if (buffer) { - KKASSERT(buffer->io.lock.refs == 0); - KKASSERT(buffer->io.modified == 0); KKASSERT(buffer->zone2_offset == zone2_offset); + hammer_io_clear_modify(&buffer->io); + buffer->io.reclaim = 1; KKASSERT(buffer->volume == volume); - hammer_unload_buffer(buffer, NULL); + if (buffer->io.lock.refs == 0) + hammer_unload_buffer(buffer, NULL); + } else { + hammer_io_inval(volume, zone2_offset); } - hammer_io_inval(volume, zone2_offset); base_offset += HAMMER_BUFSIZE; zone2_offset += HAMMER_BUFSIZE; bytes -= HAMMER_BUFSIZE; @@ -775,6 +781,7 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush) volume = buffer->volume; buffer->volume = NULL; /* sanity */ hammer_rel_volume(volume, 0); + hammer_io_clear_modlist(&buffer->io); freeme = 1; } } @@ -784,7 +791,6 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush) hammer_unref(&buffer->io.lock); crit_exit(); if (freeme) { - KKASSERT(buffer->io.mod_list == NULL); --hammer_count_buffers; kfree(buffer, M_HAMMER); } diff --git a/sys/vfs/hammer/hammer_prune.c b/sys/vfs/hammer/hammer_prune.c index b44ab45657..fb5ccf8eeb 100644 --- a/sys/vfs/hammer/hammer_prune.c +++ b/sys/vfs/hammer/hammer_prune.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.5 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.6 2008/06/10 00:40:31 dillon Exp $ */ #include "hammer.h" @@ -122,8 +122,8 @@ retry: */ cursor.flags |= HAMMER_CURSOR_PRUNING; - error = hammer_btree_last(&cursor); hammer_sync_lock_sh(trans); + error = hammer_btree_last(&cursor); while (error == 0) { /* diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 6d1d4ffab6..af48bbddbe 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.41 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.42 2008/06/10 00:40:31 dillon Exp $ */ #include @@ -54,8 +54,9 @@ int hammer_debug_inode; int hammer_debug_locks; int hammer_debug_btree; int hammer_debug_tid; -int hammer_debug_recover; /* -1 will disable, +1 will force */ +int hammer_debug_recover; /* -1 will disable, +1 will force */ int hammer_debug_recover_faults; +int hammer_debug_write_release; /* if 1 release buffer on strategy */ int hammer_count_inodes; int hammer_count_reclaiming; int hammer_count_records; @@ -93,6 +94,8 @@ SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover, CTLFLAG_RW, &hammer_debug_recover, 0, ""); SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_recover_faults, CTLFLAG_RW, &hammer_debug_recover_faults, 0, ""); +SYSCTL_INT(_vfs_hammer, OID_AUTO, debug_write_release, CTLFLAG_RW, + &hammer_debug_write_release, 0, ""); SYSCTL_INT(_vfs_hammer, OID_AUTO, limit_dirtybufs, CTLFLAG_RW, &hammer_limit_dirtybufs, 0, ""); @@ -237,6 +240,7 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, hmp->free_lock.refs = 1; TAILQ_INIT(&hmp->flush_list); + TAILQ_INIT(&hmp->delay_list); TAILQ_INIT(&hmp->objid_cache_list); TAILQ_INIT(&hmp->undo_lru_list); diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index 98713d14b5..01184e89a8 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.61 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.62 2008/06/10 00:40:31 dillon Exp $ */ #include @@ -1984,7 +1984,7 @@ hammer_vop_strategy_read(struct vop_strategy_args *ap) * * WARNING: If we hit the else clause. */ - if (roff == 0 && n == bp->b_bufsize && + if (roff == 0 && boff == 0 && n == bp->b_bufsize && (rec_offset & HAMMER_BUFMASK) == 0) { error = hammer_io_direct_read(trans.hmp, cursor.leaf, bio); -- 2.11.4.GIT