From 055f5ff8b5fd10006ff65698f619a74ce61e5fa8 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 10 Jan 2008 07:41:03 +0000 Subject: [PATCH] HAMMER 17/many: Refactor IO backend, clean up buffer cache deadlocks. Rewrite HAMMER's IO backend, sans locking which will occur in another patch. The new backend is far less confusing though I wouldn't exactly call it simple. The new backend keeps track of dependancies with a structure->structure dependancy list, plus implements the special case of opening and closing a cluster header. Buffers are synchronized first, then cluster headers, then volume headers. The new backend also removes a number of potential deadlocks. --- sys/vfs/hammer/hammer.h | 105 ++---- sys/vfs/hammer/hammer_btree.c | 24 +- sys/vfs/hammer/hammer_cursor.c | 7 +- sys/vfs/hammer/hammer_inode.c | 5 +- sys/vfs/hammer/hammer_io.c | 672 ++++++++++++++++++------------------ sys/vfs/hammer/hammer_object.c | 10 +- sys/vfs/hammer/hammer_ondisk.c | 461 ++++++++++--------------- sys/vfs/hammer/hammer_recover.c | 6 +- sys/vfs/hammer/hammer_spike.c | 4 +- sys/vfs/hammer/hammer_transaction.c | 4 +- sys/vfs/hammer/hammer_vnops.c | 9 +- 11 files changed, 545 insertions(+), 762 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index c5ab479795..a75e8cb123 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.21 2008/01/09 04:05:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.22 2008/01/10 07:41:03 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -88,7 +88,6 @@ typedef struct hammer_transaction *hammer_transaction_t; */ struct hammer_lock { int refs; /* active references delay writes */ - int modifying; /* indicates buffer being modified */ int lockcount; /* lock count for exclusive/shared access */ int wanted; struct thread *locktd; @@ -241,19 +240,27 @@ enum hammer_io_type { HAMMER_STRUCTURE_VOLUME, HAMMER_STRUCTURE_BUFFER }; union hammer_io_structure; +struct hammer_io; struct worklist { LIST_ENTRY(worklist) node; }; +TAILQ_HEAD(hammer_io_list, hammer_io); + struct hammer_io { struct worklist worklist; struct hammer_lock lock; enum hammer_io_type type; struct buf *bp; int64_t offset; + TAILQ_ENTRY(hammer_io) entry; /* based on modified flag */ + struct hammer_io_list *entry_list; + struct hammer_io_list deplist; u_int modified : 1; /* bp's data was modified */ u_int released : 1; /* bp released (w/ B_LOCKED set) */ + u_int running : 1; /* bp write IO in progress */ + u_int waiting : 1; /* someone is waiting on us */ }; typedef struct hammer_io *hammer_io_t; @@ -295,12 +302,6 @@ struct hammer_supercl { typedef struct hammer_supercl *hammer_supercl_t; -enum hammer_cluster_state { - HAMMER_CLUSTER_IDLE, - HAMMER_CLUSTER_ASYNC, - HAMMER_CLUSTER_OPEN -}; - /* * In-memory cluster representing on-disk buffer * @@ -321,7 +322,6 @@ struct hammer_cluster { struct hammer_base_elm clu_btree_beg; /* copy of on-disk info */ struct hammer_base_elm clu_btree_end; /* copy of on-disk info */ int32_t clu_no; - enum hammer_cluster_state state; }; typedef struct hammer_cluster *hammer_cluster_t; @@ -340,7 +340,6 @@ struct hammer_buffer { u_int64_t buf_type; struct hammer_alist_live alist; struct hammer_node_list clist; - struct hammer_node *save_scan; }; typedef struct hammer_buffer *hammer_buffer_t; @@ -389,6 +388,8 @@ union hammer_io_structure { struct hammer_buffer buffer; }; +typedef union hammer_io_structure *hammer_io_structure_t; + #define HAMFS_CLUSTER_DIRTY 0x0001 #include "hammer_cursor.h" @@ -548,7 +549,6 @@ int hammer_ref_cluster(hammer_cluster_t cluster); int hammer_ref_buffer(hammer_buffer_t buffer); void hammer_flush_buffer_nodes(hammer_buffer_t buffer); - void hammer_rel_volume(hammer_volume_t volume, int flush); void hammer_rel_supercl(hammer_supercl_t supercl, int flush); void hammer_rel_cluster(hammer_cluster_t cluster, int flush); @@ -557,6 +557,8 @@ void hammer_rel_buffer(hammer_buffer_t buffer, int flush); hammer_node_t hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp); int hammer_ref_node(hammer_node_t node); +hammer_node_t hammer_ref_node_safe(struct hammer_mount *hmp, + struct hammer_node **cache, int *errorp); void hammer_rel_node(hammer_node_t node); void hammer_cache_node(hammer_node_t node, struct hammer_node **cache); @@ -635,81 +637,22 @@ void hammer_load_spike(hammer_cursor_t cursor, struct hammer_cursor **spikep); int hammer_spike(struct hammer_cursor **spikep); int hammer_recover(struct hammer_cluster *cluster); +void hammer_io_init(hammer_io_t io, enum hammer_io_type type); int hammer_io_read(struct vnode *devvp, struct hammer_io *io); int hammer_io_new(struct vnode *devvp, struct hammer_io *io); void hammer_io_release(struct hammer_io *io, int flush); +void hammer_io_flush(struct hammer_io *io); int hammer_io_checkflush(hammer_io_t io); void hammer_io_notify_cluster(hammer_cluster_t cluster); -void hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info); -void hammer_io_intend_modify(struct hammer_io *io); -void hammer_io_modify_done(struct hammer_io *io); void hammer_io_clear_modify(struct hammer_io *io); +void hammer_io_waitdep(struct hammer_io *io); -#endif - -/* - * Inline support functions (not kernel specific) - */ -static __inline void -hammer_modify_volume(struct hammer_volume *volume) -{ - volume->io.modified = 1; - ++volume->io.lock.modifying; - if (volume->io.released) - hammer_io_intend_modify(&volume->io); -} - -static __inline void -hammer_modify_volume_done(struct hammer_volume *volume) -{ - hammer_io_modify_done(&volume->io); -} - -static __inline void -hammer_modify_supercl(struct hammer_supercl *supercl) -{ - supercl->io.modified = 1; - ++supercl->io.lock.modifying; - if (supercl->io.released) - hammer_io_intend_modify(&supercl->io); -} - -static __inline void -hammer_modify_supercl_done(struct hammer_supercl *supercl) -{ - hammer_io_modify_done(&supercl->io); -} - -static __inline void -hammer_modify_cluster(struct hammer_cluster *cluster) -{ - cluster->io.modified = 1; - ++cluster->io.lock.modifying; - if (cluster->io.released) - hammer_io_intend_modify(&cluster->io); -} - -static __inline void -hammer_modify_cluster_done(struct hammer_cluster *cluster) -{ - hammer_io_modify_done(&cluster->io); -} - -static __inline void -hammer_modify_buffer(struct hammer_buffer *buffer) -{ - hammer_io_notify_cluster(buffer->cluster); - buffer->io.modified = 1; - ++buffer->io.lock.modifying; - if (buffer->io.released) - hammer_io_intend_modify(&buffer->io); -} +void hammer_modify_volume(hammer_volume_t volume); +void hammer_modify_supercl(hammer_supercl_t supercl); +void hammer_modify_cluster(hammer_cluster_t cluster); +void hammer_modify_buffer(hammer_buffer_t buffer); -static __inline void -hammer_modify_buffer_done(struct hammer_buffer *buffer) -{ - hammer_io_modify_done(&buffer->io); -} +#endif static __inline void hammer_modify_node(struct hammer_node *node) @@ -717,12 +660,6 @@ hammer_modify_node(struct hammer_node *node) hammer_modify_buffer(node->buffer); } -static __inline void -hammer_modify_node_done(struct hammer_node *node) -{ - hammer_modify_buffer_done(node->buffer); -} - /* * Return the cluster-relative byte offset of an element within a buffer */ diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index eea29c12e5..721d7c6cd9 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.16 2008/01/03 06:48:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.17 2008/01/10 07:41:03 dillon Exp $ */ /* @@ -460,7 +460,6 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) } node->elms[i] = *elm; ++node->count; - hammer_modify_node_done(cursor->node); KKASSERT(hammer_btree_cmp(cursor->left_bound, &elm->leaf.base) <= 0); KKASSERT(hammer_btree_cmp(cursor->right_bound, &elm->leaf.base) > 0); @@ -478,7 +477,6 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_elm_t elm) parent = cursor->parent->ondisk; i = cursor->parent_index; ++parent->elms[i].internal.subtree_count; - hammer_modify_node_done(cursor->parent); KKASSERT(parent->elms[i].internal.subtree_count <= node->count); } return(0); @@ -535,7 +533,6 @@ hammer_btree_delete(hammer_cursor_t cursor) (ondisk->count - i - 1) * sizeof(ondisk->elms[0])); } --ondisk->count; - hammer_modify_node_done(node); if (cursor->parent != NULL) { /* * Adjust parent's notion of the leaf's count. subtree_count @@ -548,7 +545,6 @@ hammer_btree_delete(hammer_cursor_t cursor) elm = &parent->ondisk->elms[cursor->parent_index]; if (elm->internal.subtree_count) --elm->internal.subtree_count; - hammer_modify_node_done(parent); KKASSERT(elm->internal.subtree_count <= ondisk->count); } @@ -800,7 +796,6 @@ btree_search(hammer_cursor_t cursor, int flags) save = node->elms[0].subtree_type; node->elms[0].base = *cursor->left_bound; node->elms[0].subtree_type = save; - hammer_modify_node_done(cursor->node); } else if (i == node->count) { /* * Terminate early if not inserting and the key is @@ -835,7 +830,6 @@ btree_search(hammer_cursor_t cursor, int flags) cursor->right_bound) != 0) { hammer_modify_node(cursor->node); elm->base = *cursor->right_bound; - hammer_modify_node_done(cursor->node); } --i; } else { @@ -1119,7 +1113,6 @@ btree_split_internal(hammer_cursor_t cursor) ondisk->elms[1].base = node->cluster->clu_btree_end; made_root = 1; parent_index = 0; /* index of current node in parent */ - hammer_modify_node_done(parent); } else { made_root = 0; parent = cursor->parent; @@ -1198,7 +1191,6 @@ btree_split_internal(hammer_cursor_t cursor) parent_elm->internal.subtree_vol_no = 0; parent_elm->internal.rec_offset = 0; ++ondisk->count; - hammer_modify_node_done(parent); /* * The children of new_node need their parent pointer set to new_node. @@ -1217,7 +1209,6 @@ btree_split_internal(hammer_cursor_t cursor) if (made_root) { hammer_modify_cluster(node->cluster); node->cluster->ondisk->clu_btree_root = parent->node_offset; - hammer_modify_cluster_done(node->cluster); node->ondisk->parent = parent->node_offset; if (cursor->parent) { hammer_unlock(&cursor->parent->lock); @@ -1225,8 +1216,6 @@ btree_split_internal(hammer_cursor_t cursor) } cursor->parent = parent; /* lock'd and ref'd */ } - hammer_modify_node_done(new_node); - hammer_modify_node_done(node); /* @@ -1319,7 +1308,6 @@ btree_split_leaf(hammer_cursor_t cursor) ondisk->elms[0].internal.subtree_type = leaf->ondisk->type; ondisk->elms[0].internal.subtree_offset = leaf->node_offset; ondisk->elms[1].base = leaf->cluster->clu_btree_end; - hammer_modify_node_done(parent); made_root = 1; parent_index = 0; /* insertion point in parent */ } else { @@ -1397,7 +1385,6 @@ btree_split_leaf(hammer_cursor_t cursor) parent_elm->internal.rec_offset = 0; mid_boundary = &parent_elm->base; ++ondisk->count; - hammer_modify_node_done(parent); /* * The cluster's root pointer may have to be updated. @@ -1405,7 +1392,6 @@ btree_split_leaf(hammer_cursor_t cursor) if (made_root) { hammer_modify_cluster(leaf->cluster); leaf->cluster->ondisk->clu_btree_root = parent->node_offset; - hammer_modify_cluster_done(leaf->cluster); leaf->ondisk->parent = parent->node_offset; if (cursor->parent) { hammer_unlock(&cursor->parent->lock); @@ -1413,8 +1399,6 @@ btree_split_leaf(hammer_cursor_t cursor) } cursor->parent = parent; /* lock'd and ref'd */ } - hammer_modify_node_done(leaf); - hammer_modify_node_done(new_leaf); /* * Ok, now adjust the cursor depending on which element the original @@ -1490,7 +1474,6 @@ btree_remove(hammer_cursor_t cursor) ondisk->type = HAMMER_BTREE_TYPE_LEAF; ondisk->count = 0; cursor->index = 0; - hammer_modify_node_done(cursor->node); kprintf("EMPTY ROOT OF ROOT CLUSTER -> LEAF\n"); return(0); } @@ -1575,7 +1558,6 @@ btree_remove(hammer_cursor_t cursor) bcopy(&ondisk->elms[i+1], &ondisk->elms[i], (ondisk->count - i) * sizeof(ondisk->elms[0])); --ondisk->count; - hammer_modify_node_done(node); /* * Adjust the parent-parent's (now parent) reference to the parent @@ -1586,13 +1568,11 @@ btree_remove(hammer_cursor_t cursor) if (elm->internal.subtree_count != ondisk->count) { hammer_modify_node(parent); elm->internal.subtree_count = ondisk->count; - hammer_modify_node_done(parent); } if (elm->subtree_type != HAMMER_BTREE_TYPE_CLUSTER && elm->subtree_type != ondisk->type) { hammer_modify_node(parent); elm->subtree_type = ondisk->type; - hammer_modify_node_done(parent); } } @@ -1636,7 +1616,6 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm) hammer_lock_ex(&child->lock); child->ondisk->parent = node->node_offset; hammer_unlock(&child->lock); - hammer_modify_node_done(child); hammer_rel_node(child); } break; @@ -1655,7 +1634,6 @@ btree_set_parent(hammer_node_t node, hammer_btree_elm_t elm) hammer_lock_ex(&cluster->io.lock); cluster->ondisk->clu_btree_parent_offset = node->node_offset; hammer_unlock(&cluster->io.lock); - hammer_modify_cluster_done(cluster); KKASSERT(cluster->ondisk->clu_btree_parent_clu_no == node->cluster->clu_no); KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 36d161d4c0..d36d1530f1 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.10 2008/01/03 06:48:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.11 2008/01/10 07:41:03 dillon Exp $ */ /* @@ -62,8 +62,7 @@ hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache, * Step 1 - acquire a locked node from the cache if possible */ if (cache && *cache) { - node = *cache; - error = hammer_ref_node(node); + node = hammer_ref_node_safe(hmp, cache, &error); if (error == 0) { hammer_lock_ex(&node->lock); if (node->flags & HAMMER_NODE_DELETED) { @@ -71,8 +70,6 @@ hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache, hammer_rel_node(node); node = NULL; } - } else { - node = NULL; } } else { node = NULL; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index abee226064..75bc1aeddf 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.17 2008/01/03 06:48:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.18 2008/01/10 07:41:03 dillon Exp $ */ #include "hammer.h" @@ -448,7 +448,6 @@ retry: if ((ip->flags & HAMMER_INODE_ONDISK) == 0) { hammer_modify_volume(ip->hmp->rootvol); ++ip->hmp->rootvol->ondisk->vol0_stat_inodes; - hammer_modify_volume_done(ip->hmp->rootvol); ip->flags |= HAMMER_INODE_ONDISK; } } @@ -486,7 +485,6 @@ hammer_update_itimes(hammer_inode_t ip) hammer_modify_buffer(cursor.record_buffer); rec->ino_atime = ip->ino_rec.ino_atime; rec->ino_mtime = ip->ino_rec.ino_mtime; - hammer_modify_buffer_done(cursor.record_buffer); ip->flags &= ~HAMMER_INODE_ITIMES; /* XXX recalculate crc */ } @@ -660,7 +658,6 @@ hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete) hammer_modify_inode(&trans, ip, HAMMER_INODE_DELETED); hammer_modify_volume(ip->hmp->rootvol); --ip->hmp->rootvol->ondisk->vol0_stat_inodes; - hammer_modify_volume_done(ip->hmp->rootvol); } /* diff --git a/sys/vfs/hammer/hammer_io.c b/sys/vfs/hammer/hammer_io.c index 7e5dd1e606..3a59d307bd 100644 --- a/sys/vfs/hammer/hammer_io.c +++ b/sys/vfs/hammer/hammer_io.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.13 2008/01/09 00:46:22 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.14 2008/01/10 07:41:03 dillon Exp $ */ /* * IO Primitives and buffer cache management @@ -52,81 +52,103 @@ #include #include +static void hammer_io_deallocate(struct buf *bp); +static int hammer_io_checkwrite(struct buf *bp); + +/* + * Initialize an already-zero'd hammer_io structure + */ +void +hammer_io_init(hammer_io_t io, enum hammer_io_type type) +{ + io->type = type; + TAILQ_INIT(&io->deplist); +} + /* * Helper routine to disassociate a buffer cache buffer from an I/O - * structure. + * structure. Called with the io structure exclusively locked. + * + * The io may have 0 or 1 references depending on who called us. The + * caller is responsible for dealing with the refs. + * + * This call can only be made when no action is required on the buffer. + * HAMMER must own the buffer (released == 0) since mess around with it. */ static void -hammer_io_disassociate(union hammer_io_structure *io) +hammer_io_disassociate(hammer_io_structure_t iou, int elseit) { - struct buf *bp = io->io.bp; + struct buf *bp = iou->io.bp; - KKASSERT(io->io.released && io->io.modified == 0); + KKASSERT(TAILQ_EMPTY(&iou->io.deplist) && iou->io.modified == 0); buf_dep_init(bp); - io->io.bp = NULL; - bp->b_flags &= ~B_LOCKED; + iou->io.bp = NULL; + if (elseit) { + KKASSERT(iou->io.released == 0); + iou->io.released = 1; + bqrelse(bp); + } else { + KKASSERT(iou->io.released); + } - switch(io->io.type) { + switch(iou->io.type) { case HAMMER_STRUCTURE_VOLUME: - io->volume.ondisk = NULL; - io->volume.alist.meta = NULL; + iou->volume.ondisk = NULL; + iou->volume.alist.meta = NULL; break; case HAMMER_STRUCTURE_SUPERCL: - io->supercl.ondisk = NULL; - io->supercl.alist.meta = NULL; + iou->supercl.ondisk = NULL; + iou->supercl.alist.meta = NULL; break; case HAMMER_STRUCTURE_CLUSTER: - /*KKASSERT((io->cluster.ondisk->clu_flags & HAMMER_CLUF_OPEN) == 0);*/ - io->cluster.ondisk = NULL; - io->cluster.alist_master.meta = NULL; - io->cluster.alist_btree.meta = NULL; - io->cluster.alist_record.meta = NULL; - io->cluster.alist_mdata.meta = NULL; + iou->cluster.ondisk = NULL; + iou->cluster.alist_master.meta = NULL; + iou->cluster.alist_btree.meta = NULL; + iou->cluster.alist_record.meta = NULL; + iou->cluster.alist_mdata.meta = NULL; break; case HAMMER_STRUCTURE_BUFFER: - io->buffer.ondisk = NULL; - io->buffer.alist.meta = NULL; + iou->buffer.ondisk = NULL; + iou->buffer.alist.meta = NULL; break; } } /* - * Mark a cluster as being closed. This is done as late as possible, - * only when we are asked to flush the cluster + * Wait for any physical IO to complete */ static void -hammer_close_cluster(hammer_cluster_t cluster) +hammer_io_wait(hammer_io_t io) { - while (cluster->state == HAMMER_CLUSTER_ASYNC) - tsleep(cluster, 0, "hmrdep", 0); - if (cluster->state == HAMMER_CLUSTER_OPEN) { - cluster->state = HAMMER_CLUSTER_IDLE; - hammer_modify_cluster(cluster); - cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN; - hammer_modify_cluster_done(cluster); - kprintf("CLOSE CLUSTER\n"); + if (io->running) { + crit_enter(); + tsleep_interlock(io); + io->waiting = 1; + for (;;) { + tsleep(io, 0, "hmrflw", 0); + if (io->running == 0) + break; + tsleep_interlock(io); + io->waiting = 1; + if (io->running == 0) + break; + } + crit_exit(); } } -/* - * Hack XXX - called from kernel syncer via hammer_io_checkwrite() when it - * wants to flush buffer. Because we disassociate after this call and - * because the kernel is already intending to write out the buffer, don't - * set the io.modified bit. - */ -static void -hammer_close_cluster_quick(hammer_cluster_t cluster) +void +hammer_io_waitdep(hammer_io_t io) { - if (cluster->state == HAMMER_CLUSTER_OPEN) { - cluster->state = HAMMER_CLUSTER_IDLE; - cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN; - kprintf("CLOSE CLUSTER ON KERNEL WRITE\n"); + while (TAILQ_FIRST(&io->deplist)) { + kprintf("waitdep %p\n", io); + tsleep(io, 0, "hmrdep", hz); } } - /* - * Load bp for a HAMMER structure. + * Load bp for a HAMMER structure. The io is exclusively locked by the + * caller. */ int hammer_io_read(struct vnode *devvp, struct hammer_io *io) @@ -144,6 +166,8 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io) } io->modified = 0; /* no new modifications yet */ io->released = 0; /* we hold an active lock on bp */ + io->running = 0; + io->waiting = 0; } else { error = 0; } @@ -154,6 +178,9 @@ hammer_io_read(struct vnode *devvp, struct hammer_io *io) * Similar to hammer_io_read() but returns a zero'd out buffer instead. * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background * I/O so we can call it. + * + * The caller is responsible for calling hammer_modify_*() on the appropriate + * HAMMER structure. */ int hammer_io_new(struct vnode *devvp, struct hammer_io *io) @@ -165,7 +192,10 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io) bp = io->bp; bp->b_ops = &hammer_bioops; LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); - io->released = 0; /* we hold an active lock on bp */ + io->modified = 0; + io->released = 0; + io->running = 0; + io->waiting = 0; BUF_KERNPROC(bp); } else { if (io->released) { @@ -174,214 +204,170 @@ hammer_io_new(struct vnode *devvp, struct hammer_io *io) io->released = 0; } } - io->modified = 1; vfs_bio_clrbuf(bp); return(0); } /* - * This routine is called when a buffer within a cluster is modified. We - * mark the cluster open and immediately initiate asynchronous I/O. Any - * related hammer_buffer write I/O blocks until our async write completes. - * This guarentees (inasmuch as the OS can) that the cluster recovery code - * will see a cluster marked open if a crash occured while the filesystem - * still had dirty buffers associated with that cluster. - * - * XXX - */ -void -hammer_io_notify_cluster(hammer_cluster_t cluster) -{ - struct hammer_io *io = &cluster->io; - - if (cluster->state == HAMMER_CLUSTER_IDLE) { - hammer_lock_ex(&cluster->io.lock); - if (cluster->state == HAMMER_CLUSTER_IDLE) { - if (io->released) - regetblk(io->bp); - else - io->released = 1; - kprintf("MARK CLUSTER OPEN\n"); - cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN; - cluster->state = HAMMER_CLUSTER_ASYNC; - cluster->io.modified = 1; - bawrite(io->bp); - } - hammer_unlock(&cluster->io.lock); - } -} - -/* * This routine is called on the last reference to a hammer structure. - * Regardless of the state io->modified must be cleared when we return. + * The io is usually locked exclusively (but may not be during unmount). * - * If flush is non-zero we have to completely disassociate the bp from the - * structure (which may involve blocking). Otherwise we can leave the bp - * passively associated with the structure. + * If flush is 1, or B_LOCKED was set indicating that the kernel + * wanted to recycle the buffer, and there are no dependancies, this + * function will issue an asynchronous write. * - * The caller is holding io->lock exclusively. + * If flush is 2 this function waits until all I/O has completed and + * disassociates the bp from the IO before returning, unless there + * are still other references. */ void hammer_io_release(struct hammer_io *io, int flush) { - union hammer_io_structure *iou = (void *)io; - hammer_cluster_t cluster; struct buf *bp; - int modified; - if ((bp = io->bp) != NULL) { - /* - * If neither we nor the kernel want to flush the bp, we can - * stop here. Make sure the bp is passively released - * before returning. Even though we are still holding it, - * we want to be notified when the kernel wishes to flush - * it out so make sure B_DELWRI is properly set if we had - * made modifications. - */ - if (flush == 0 && (bp->b_flags & B_LOCKED) == 0) { - if ((bp->b_flags & B_DELWRI) == 0 && io->modified) { - if (io->released) - regetblk(bp); - else - io->released = 1; - io->modified = 0; - bdwrite(bp); - } else if (io->released == 0) { - /* buffer write state already synchronized */ - io->modified = 0; - io->released = 1; - bqrelse(bp); - } else { - /* buffer write state already synchronized */ - io->modified = 0; - } - return; - } + if ((bp = io->bp) == NULL) + return; - /* - * Either we want to flush the buffer or the kernel tried to - * flush the buffer. - * - * If this is a hammer_buffer we may have to wait for the - * cluster header write to complete. - */ - if (iou->io.type == HAMMER_STRUCTURE_BUFFER && - (io->modified || (bp->b_flags & B_DELWRI))) { - cluster = iou->buffer.cluster; - while (cluster->state == HAMMER_CLUSTER_ASYNC) - tsleep(iou->buffer.cluster, 0, "hmrdep", 0); - } +#if 0 + /* + * If flush is 2 wait for dependancies + */ + while (flush == 2 && TAILQ_FIRST(&io->deplist)) { + hammer_io_wait(TAILQ_FIRST(&io->deplist)); + } +#endif - /* - * If we have an open cluster header, close it - */ - if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) { - hammer_close_cluster(&iou->cluster); - } + /* + * Try to flush a dirty IO to disk if asked to by the caller + * or if the kernel tried to flush the buffer in the past. + * + * The flush will fail if any dependancies are present. + */ + if (io->modified && (flush || bp->b_flags & B_LOCKED)) + hammer_io_flush(io); - /* - * Gain ownership of the buffer. Nothing can take it away - * from the io structure while we have it locked, so we - * can safely reget. - * - * Once our thread owns the buffer we can disassociate it - * from the io structure. - */ - if (io->released) + /* + * If flush is 2 we wait for the IO to complete. + */ + if (flush == 2 && io->running) { + hammer_io_wait(io); + } + + /* + * Actively or passively release the buffer. Modified IOs with + * dependancies cannot be released. + */ + if (flush && io->modified == 0 && io->running == 0) { + KKASSERT(TAILQ_EMPTY(&io->deplist)); + if (io->released) { regetblk(bp); - else + io->released = 0; + } + hammer_io_disassociate((hammer_io_structure_t)io, 1); + } else if (io->modified) { + if (io->released == 0 && TAILQ_EMPTY(&io->deplist)) { io->released = 1; - modified = io->modified; - io->modified = 0; - hammer_io_disassociate(iou); - - /* - * Now dispose of the buffer. Someone tried to flush, so - * issue the I/O immediately. - */ - if (modified || (bp->b_flags & B_DELWRI)) - bawrite(bp); - else - bqrelse(bp); + bdwrite(bp); + } + } else if (io->released == 0) { + io->released = 1; + bqrelse(bp); } } /* - * Flush dirty data, if any. + * This routine is called with a locked IO when a flush is desired. */ void -hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info) +hammer_io_flush(struct hammer_io *io) { struct buf *bp; - int error; -again: - if ((bp = io->bp) == NULL) + /* + * Can't flush if the IO isn't modified or if it has dependancies. + */ + if (io->modified == 0) return; - if (bp->b_flags & B_DELWRI) - io->modified = 1; + if (TAILQ_FIRST(&io->deplist)) + return; + + KKASSERT(io->bp); + + bp = io->bp; /* - * We can't initiate a write while the buffer is being modified - * by someone. + * If we are trying to flush a buffer we have to wait until the + * cluster header for the mark-OPEN has completed its I/O. */ - while (io->lock.modifying) { - io->lock.wanted = 1; - kprintf("DELAYING IO FLUSH BP %p TYPE %d REFS %d modifying %d\n", - bp, io->type, io->lock.refs, io->lock.modifying); - tsleep(&io->lock, 0, "hmrfls", 0); + if (io->type == HAMMER_STRUCTURE_BUFFER) { + hammer_io_structure_t iou = (void *)io; + hammer_cluster_t cluster = iou->buffer.cluster; + + if (cluster->io.running) { + kprintf("WAIT CLUSTER OPEN %d\n", cluster->clu_no); + hammer_io_wait(&cluster->io); + kprintf("WAIT CLUSTER OPEN OK\n"); + } } - hammer_lock_ex(&io->lock); - if (io->lock.modifying || io->bp == NULL) { - hammer_unlock(&io->lock); - goto again; + if (io->type == HAMMER_STRUCTURE_CLUSTER) { + /* + * Mark the cluster closed if we can + */ + hammer_io_checkwrite(io->bp); } - - /* - * Acquire ownership of the buffer cache buffer so we can flush it - * out. - */ if (io->released) { - if (io->modified == 0) - goto done; regetblk(bp); - } else { - io->released = 1; - } - - /* - * Return the bp to the system, issuing I/O if necessary. The - * system will issue a callback to us when it actually wants to - * throw the bp away. - */ - if (io->modified == 0) { - bqrelse(bp); - } else if (info->waitfor & MNT_WAIT) { - io->modified = 0; - error = bwrite(bp); - if (error) - info->error = error; - } else { - io->modified = 0; - bawrite(bp); + /* BUF_KERNPROC(io->bp); */ + io->released = 0; } -done: - hammer_unlock(&io->lock); + io->released = 1; + io->running = 1; + bawrite(bp); } -/* - * Called prior to any modifications being made to ondisk data. This - * forces the caller to wait for any writes to complete. We explicitly - * avoid the write-modify race. +/************************************************************************ + * BUFFER DIRTYING * + ************************************************************************ + * + * These routines deal with dependancies created when IO buffers get + * modified. The caller must call hammer_modify_*() on a referenced + * HAMMER structure prior to modifying its on-disk data. * - * This routine is only called on hammer structures which are already - * actively referenced. + * Any intent to modify an IO buffer acquires the related bp and imposes + * various write ordering dependancies. */ -void -hammer_io_intend_modify(struct hammer_io *io) + +/* + * Ensure that the bp is acquired and return non-zero on a 0->1 transition + * of the modified bit. + */ +static __inline +int +hammer_io_modify(hammer_io_t io, struct hammer_io_list *list) { + int r = 0; + KKASSERT(io->lock.refs != 0 && io->bp != NULL); - if (io->released) { + if (io->modified == 0) { + hammer_lock_ex(&io->lock); + if (io->modified == 0) { + if (io->released) { + regetblk(io->bp); + BUF_KERNPROC(io->bp); + io->released = 0; + } + io->modified = 1; + io->entry_list = list; + if (list) + TAILQ_INSERT_TAIL(list, io, entry); + r = 1; + } + hammer_unlock(&io->lock); + } else if (io->released) { + /* + * Make sure no IO is occuring while we modify the contents + * of the buffer. XXX should be able to avoid doing this. + */ hammer_lock_ex(&io->lock); if (io->released) { regetblk(io->bp); @@ -390,34 +376,77 @@ hammer_io_intend_modify(struct hammer_io *io) } hammer_unlock(&io->lock); } + return(r); +} + +void +hammer_modify_volume(hammer_volume_t volume) +{ + hammer_io_modify(&volume->io, NULL); +} + +void +hammer_modify_supercl(hammer_supercl_t supercl) +{ + hammer_io_modify(&supercl->io, &supercl->volume->io.deplist); +} + +/* + * Caller intends to modify a cluster's ondisk structure. + */ +void +hammer_modify_cluster(hammer_cluster_t cluster) +{ + hammer_io_modify(&cluster->io, &cluster->volume->io.deplist); } +/* + * Caller intends to modify a buffer's ondisk structure. The related + * cluster must be marked open prior to being able to flush the modified + * buffer so get that I/O going now. + */ void -hammer_io_modify_done(struct hammer_io *io) +hammer_modify_buffer(hammer_buffer_t buffer) { - KKASSERT(io->lock.modifying > 0); - --io->lock.modifying; - if (io->lock.wanted && io->lock.modifying == 0) { - io->lock.wanted = 0; - wakeup(&io->lock); + hammer_cluster_t cluster = buffer->cluster; + + if (hammer_io_modify(&buffer->io, &cluster->io.deplist)) { + hammer_modify_cluster(cluster); + if ((cluster->ondisk->clu_flags & HAMMER_CLUF_OPEN) == 0) { + hammer_lock_ex(&cluster->io.lock); + if ((cluster->ondisk->clu_flags & HAMMER_CLUF_OPEN) == 0) { + KKASSERT(cluster->io.released == 0); + cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN; + cluster->io.released = 1; + cluster->io.running = 1; + bawrite(cluster->io.bp); + kprintf("OPEN CLUSTER %d\n", cluster->clu_no); + } + hammer_unlock(&cluster->io.lock); + } } } /* - * Mark an entity as not being dirty any more -- usually occurs when + * Mark an entity as not being dirty any more -- this usually occurs when * the governing a-list has freed the entire entity. + * + * XXX */ void hammer_io_clear_modify(struct hammer_io *io) { +#if 0 struct buf *bp; io->modified = 0; if ((bp = io->bp) != NULL) { - if (io->released) + if (io->released) { regetblk(bp); - else + /* BUF_KERNPROC(io->bp); */ + } else { io->released = 1; + } if (io->modified == 0) { kprintf("hammer_io_clear_modify: cleared %p\n", io); bundirty(bp); @@ -426,51 +455,85 @@ hammer_io_clear_modify(struct hammer_io *io) bdwrite(bp); } } +#endif } -/* - * HAMMER_BIOOPS +/************************************************************************ + * HAMMER_BIOOPS * + ************************************************************************ + * */ /* - * Pre and post I/O callbacks. + * Pre-IO initiation kernel callback - cluster build only */ -static void hammer_io_deallocate(struct buf *bp); - static void hammer_io_start(struct buf *bp) { -#if 0 - union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); - - if (io->io.type == HAMMER_STRUCTURE_BUFFER) { - while (io->buffer.cluster->io_in_progress) { - kprintf("hammer_io_start: wait for cluster\n"); - tsleep(io->buffer.cluster, 0, "hmrdep", 0); - kprintf("hammer_io_start: wait for cluster done\n"); - } - } -#endif } +/* + * Post-IO completion kernel callback + */ static void hammer_io_complete(struct buf *bp) { - union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); + union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); + + KKASSERT(iou->io.released == 1); - if (io->io.type == HAMMER_STRUCTURE_CLUSTER) { - if (io->cluster.state == HAMMER_CLUSTER_ASYNC) { - io->cluster.state = HAMMER_CLUSTER_OPEN; - wakeup(&io->cluster); + if (iou->io.modified == 0) + return; + + /* + * If we were writing the cluster header out and CLUF_OPEN is set, + * do NOT clear the modify bit. Just clear the IO running bit + * and do a wakeup. + */ + if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) { + if (iou->cluster.ondisk->clu_flags & HAMMER_CLUF_OPEN) { + iou->io.running = 0; + if (iou->io.waiting) { + iou->io.waiting = 0; + wakeup(iou); + } + return; } } + + + /* + * If this was a write then clear the modified status and remove us + * from the dependancy list. + * + * If no lock references remain and we can acquire the IO lock and + * someone at some point wanted us to flush (B_LOCKED test), then + * try to dispose of the IO. + */ + iou->io.modified = 0; + if (iou->io.entry_list) { + TAILQ_REMOVE(iou->io.entry_list, &iou->io, entry); + iou->io.entry_list = NULL; + } + iou->io.running = 0; + if (iou->io.waiting) { + iou->io.waiting = 0; + wakeup(iou); + } + + /* + * Someone wanted us to flush, try to clean out the buffer. + */ + if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) { + hammer_io_deallocate(bp); + /* structure may be dead now */ + } } /* * Callback from kernel when it wishes to deallocate a passively - * associated structure. This can only occur if the buffer is - * passively associated with the structure. The kernel has locked - * the buffer. + * associated structure. This case can only occur with read-only + * bp's. * * If we cannot disassociate we set B_LOCKED to prevent the buffer * from getting reused. @@ -478,82 +541,35 @@ hammer_io_complete(struct buf *bp) static void hammer_io_deallocate(struct buf *bp) { - union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); - - /* XXX memory interlock, spinlock to sync cpus */ + hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep); - /* - * Since the kernel is passing us a locked buffer, the HAMMER - * structure had better not believe it has a lock on the buffer. - */ - KKASSERT(io->io.released); - crit_enter(); - - /* - * First, ref the structure to prevent either the buffer or the - * structure from going away or being unexpectedly flushed. - */ - hammer_ref(&io->io.lock); - - /* - * Buffers can have active references from cached hammer_node's, - * even if those nodes are themselves passively cached. Attempt - * to clean them out. This may not succeed. - * - * We have to do some magic with io.released because - * hammer_io_intend_modify() can be called indirectly from the - * flush code, otherwise we might panic with a recursive bp lock. - */ - if (io->io.type == HAMMER_STRUCTURE_BUFFER && - hammer_lock_ex_try(&io->io.lock) == 0) { - io->io.released = 0; - hammer_flush_buffer_nodes(&io->buffer); - KKASSERT(io->io.released == 0); - io->io.released = 1; - hammer_unlock(&io->io.lock); + KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0); + if (iou->io.modified) { + bp->b_flags |= B_LOCKED; + return; } + hammer_ref(&iou->io.lock); + if (iou->io.lock.refs > 1 || iou->io.modified) { + hammer_unref(&iou->io.lock); + bp->b_flags |= B_LOCKED; + } else { + hammer_io_disassociate(iou, 0); - if (hammer_islastref(&io->io.lock)) { - /* - * If we are the only ref left we can disassociate the I/O. - * It had better still be in a released state because the - * kernel is holding a lock on the buffer. Any passive - * modifications should have already been synchronized with - * the buffer. - */ - KKASSERT(io->io.modified == 0); - hammer_io_disassociate(io); - - /* - * Perform final rights on the structure. This can cause - * a chain reaction - e.g. last buffer -> last cluster -> - * last supercluster -> last volume. - */ - switch(io->io.type) { + switch(iou->io.type) { case HAMMER_STRUCTURE_VOLUME: - hammer_rel_volume(&io->volume, 1); + hammer_rel_volume(&iou->volume, 1); break; case HAMMER_STRUCTURE_SUPERCL: - hammer_rel_supercl(&io->supercl, 1); + hammer_rel_supercl(&iou->supercl, 1); break; case HAMMER_STRUCTURE_CLUSTER: - hammer_rel_cluster(&io->cluster, 1); + hammer_rel_cluster(&iou->cluster, 1); break; case HAMMER_STRUCTURE_BUFFER: - hammer_rel_buffer(&io->buffer, 1); + hammer_rel_buffer(&iou->buffer, 1); break; } - } else { - /* - * Otherwise tell the kernel not to destroy the buffer. - * - * We have to unref the structure without performing any - * final rights to it to avoid a deadlock. - */ - bp->b_flags |= B_LOCKED; - hammer_unref(&io->io.lock); } - crit_exit(); } static int @@ -600,35 +616,18 @@ hammer_io_checkwrite(struct buf *bp) { union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); - if (iou->io.type == HAMMER_STRUCTURE_BUFFER && - iou->buffer.cluster->state == HAMMER_CLUSTER_ASYNC) { - /* - * Cannot write out a cluster buffer if the cluster header - * I/O opening the cluster has not completed. - */ - bp->b_flags |= B_LOCKED; - return(-1); - } else if (iou->io.lock.refs) { - /* - * Cannot write out a bp if its associated buffer has active - * references. - */ - bp->b_flags |= B_LOCKED; - return(-1); - } else { - /* - * We're good, but before we can let the kernel proceed we - * may have to make some adjustments. - * - * Since there are no refs on the io structure, HAMMER must - * have already synchronized its modify state with the bp - * so iou->io.modified should be 0. - */ - if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) - hammer_close_cluster_quick(&iou->cluster); - hammer_io_disassociate(iou); - return(0); + /* + * A modified cluster with no dependancies can be closed. + */ + if (iou->io.type == HAMMER_STRUCTURE_CLUSTER && iou->io.modified) { + hammer_cluster_t cluster = &iou->cluster; + + if (TAILQ_EMPTY(&cluster->io.deplist)) { + cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN; + kprintf("CLOSE CLUSTER %d\n", cluster->clu_no); + } } + return(0); } /* @@ -638,8 +637,9 @@ hammer_io_checkwrite(struct buf *bp) int hammer_io_checkflush(struct hammer_io *io) { - if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) + if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) { return(1); + } return(0); } diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index 9c12c1987b..f1b3d22a6a 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.17 2008/01/09 04:05:37 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.18 2008/01/10 07:41:03 dillon Exp $ */ #include "hammer.h" @@ -482,11 +482,9 @@ hammer_ip_sync_data(hammer_transaction_t trans, hammer_inode_t ip, rec->base.rec_id = 0; /* XXX */ rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer, bdata); rec->base.data_len = bytes; - hammer_modify_buffer_done(cursor.record_buffer); hammer_modify_buffer(cursor.data_buffer); bcopy(data, bdata, bytes); - hammer_modify_buffer_done(cursor.data_buffer); elm.leaf.base = cursor.key_beg; elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec); @@ -635,11 +633,9 @@ again: rec->base.data_offset = hammer_bclu_offset(cursor.data_buffer,bdata); hammer_modify_buffer(cursor.data_buffer); bcopy(record->data, bdata, rec->base.data_len); - hammer_modify_buffer_done(cursor.data_buffer); } } rec->base.rec_id = 0; /* XXX */ - hammer_modify_buffer_done(cursor.record_buffer); elm.leaf.base = cursor.key_beg; elm.leaf.rec_offset = hammer_bclu_offset(cursor.record_buffer, rec); @@ -762,11 +758,9 @@ hammer_write_record(hammer_cursor_t cursor, hammer_record_ondisk_t orec, nrec->base.data_offset = hammer_bclu_offset(cursor->data_buffer, bdata); hammer_modify_buffer(cursor->data_buffer); bcopy(data, bdata, nrec->base.data_len); - hammer_modify_buffer_done(cursor->data_buffer); } } nrec->base.rec_id = 0; /* XXX */ - hammer_modify_buffer_done(cursor->record_buffer); elm.leaf.base = nrec->base.base; elm.leaf.rec_offset = hammer_bclu_offset(cursor->record_buffer, nrec); @@ -1283,11 +1277,9 @@ hammer_ip_delete_record(hammer_cursor_t cursor, hammer_tid_t tid) hammer_modify_buffer(cursor->record_buffer); cursor->record->base.base.delete_tid = tid; - hammer_modify_buffer_done(cursor->record_buffer); hammer_modify_node(cursor->node); elm = &cursor->node->ondisk->elms[cursor->index]; elm->leaf.base.delete_tid = tid; - hammer_modify_node_done(cursor->node); hammer_update_syncid(cursor->record_buffer->cluster, tid); } diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index 3b5ec286db..21af44f2a0 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.17 2008/01/09 00:46:22 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.18 2008/01/10 07:41:03 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -52,8 +52,7 @@ static int hammer_load_supercl(hammer_supercl_t supercl, static int hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew); static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type); -static void hammer_remove_node_clist(hammer_buffer_t buffer, - hammer_node_t node); +static int hammer_load_node(hammer_node_t node); static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live, int32_t start, int *errorp, @@ -209,7 +208,7 @@ hammer_install_volume(struct hammer_mount *hmp, const char *volname) volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO); volume->vol_name = kstrdup(volname, M_HAMMER); volume->hmp = hmp; - volume->io.type = HAMMER_STRUCTURE_VOLUME; + hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME); volume->io.offset = 0LL; /* @@ -338,11 +337,12 @@ hammer_unload_volume(hammer_volume_t volume, void *data __unused) hammer_unload_cluster, NULL); RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL, hammer_unload_supercl, NULL); + hammer_io_waitdep(&volume->io); /* * Release our buffer and flush anything left in the buffer cache. */ - hammer_io_release(&volume->io, 1); + hammer_io_release(&volume->io, 2); /* * There should be no references on the volume, no clusters, and @@ -520,6 +520,8 @@ hammer_rel_volume(hammer_volume_t volume, int flush) if (volume->io.lock.refs == 1) { volume->ondisk = NULL; hammer_io_release(&volume->io, flush); + } else if (flush) { + hammer_io_flush(&volume->io); } hammer_unlock(&volume->io.lock); } @@ -559,7 +561,7 @@ again: supercl->scl_no = scl_no; supercl->volume = volume; supercl->io.offset = calculate_supercl_offset(volume, scl_no); - supercl->io.type = HAMMER_STRUCTURE_SUPERCL; + hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL); hammer_ref(&supercl->io.lock); /* @@ -628,6 +630,8 @@ hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew) */ struct hammer_alist_live dummy; + hammer_modify_supercl(supercl); + ondisk = supercl->ondisk; dummy.config = &Buf_alist_config; dummy.meta = ondisk->head.buf_almeta; @@ -654,7 +658,7 @@ hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused) { KKASSERT(supercl->io.lock.refs == 0); hammer_ref(&supercl->io.lock); - hammer_rel_supercl(supercl, 1); + hammer_rel_supercl(supercl, 2); return(0); } @@ -686,6 +690,8 @@ hammer_rel_supercl(hammer_supercl_t supercl, int flush) hammer_rel_volume(volume, 0); return; } + } else if (flush) { + hammer_io_flush(&supercl->io); } hammer_unlock(&supercl->io.lock); } @@ -710,11 +716,10 @@ again: cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO); cluster->clu_no = clu_no; cluster->volume = volume; - cluster->io.offset = calculate_cluster_offset(volume, clu_no); - cluster->state = HAMMER_CLUSTER_IDLE; RB_INIT(&cluster->rb_bufs_root); RB_INIT(&cluster->rb_nods_root); - cluster->io.type = HAMMER_STRUCTURE_CLUSTER; + hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER); + cluster->io.offset = calculate_cluster_offset(volume, clu_no); hammer_ref(&cluster->io.lock); /* @@ -888,7 +893,6 @@ hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew) ondisk->clu_btree_parent_clu_no = -2; ondisk->clu_btree_parent_offset = -2; ondisk->clu_btree_parent_clu_gen = -2; - hammer_modify_cluster_done(cluster); croot = hammer_alloc_btree(cluster, &error); if (error == 0) { @@ -896,10 +900,8 @@ hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew) bzero(croot->ondisk, sizeof(*croot->ondisk)); croot->ondisk->count = 0; croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF; - hammer_modify_node_done(croot); hammer_modify_cluster(cluster); ondisk->clu_btree_root = croot->node_offset; - hammer_modify_cluster_done(cluster); hammer_rel_node(croot); } } @@ -916,8 +918,9 @@ hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused) hammer_ref(&cluster->io.lock); RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL, hammer_unload_buffer, NULL); + hammer_io_waitdep(&cluster->io); KKASSERT(cluster->io.lock.refs == 1); - hammer_rel_cluster(cluster, 1); + hammer_rel_cluster(cluster, 2); return(0); } @@ -932,7 +935,6 @@ hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid) hammer_modify_cluster(cluster); if (cluster->ondisk->synchronized_tid < tid) cluster->ondisk->synchronized_tid = tid; - hammer_modify_cluster_done(cluster); } /* @@ -964,14 +966,12 @@ hammer_ref_cluster(hammer_cluster_t cluster) * Release a cluster. We have to deal with several places where * another thread can ref the cluster. * - * Only destroy the structure itself if the related buffer cache buffer - * was disassociated from it. This ties the management of the structure - * to the buffer cache subsystem. + * Only destroy the structure itself if we no longer have an IO or any + * hammer buffers associated with the structure. */ void hammer_rel_cluster(hammer_cluster_t cluster, int flush) { - hammer_node_t node; hammer_volume_t volume; if (cluster->io.lock.refs == 1) { @@ -986,33 +986,13 @@ hammer_rel_cluster(hammer_cluster_t cluster, int flush) hammer_io_release(&cluster->io, flush); /* - * The B-Tree node cache is not counted in the - * cluster's reference count. Clean out the - * cache. - * - * If the cluster acquires a new reference while we - * are trying to clean it out, abort the cleaning. - * - * Any actively referenced nodes will reference the - * related buffer and cluster, so a ref count check - * should be sufficient. - */ - while (cluster->io.bp == NULL && - cluster->io.lock.refs == 1 && - (node = RB_ROOT(&cluster->rb_nods_root)) != NULL - ) { - KKASSERT(node->lock.refs == 0); - hammer_flush_node(node); - } - - /* * Final cleanup */ if (cluster != cluster->volume->hmp->rootcl && cluster->io.bp == NULL && cluster->io.lock.refs == 1 && - RB_EMPTY(&cluster->rb_nods_root)) { - KKASSERT(RB_EMPTY(&cluster->rb_bufs_root)); + RB_EMPTY(&cluster->rb_bufs_root)) { + KKASSERT(RB_EMPTY(&cluster->rb_nods_root)); volume = cluster->volume; RB_REMOVE(hammer_clu_rb_tree, &volume->rb_clus_root, cluster); @@ -1022,6 +1002,8 @@ hammer_rel_cluster(hammer_cluster_t cluster, int flush) hammer_rel_volume(volume, 0); return; } + } else if (flush) { + hammer_io_flush(&cluster->io); } hammer_unlock(&cluster->io.lock); } @@ -1062,9 +1044,9 @@ again: buffer->buf_no = buf_no; buffer->cluster = cluster; buffer->volume = cluster->volume; + hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER); buffer->io.offset = cluster->io.offset + (buf_no * HAMMER_BUFSIZE); - buffer->io.type = HAMMER_STRUCTURE_BUFFER; TAILQ_INIT(&buffer->clist); hammer_ref(&buffer->io.lock); @@ -1130,6 +1112,7 @@ hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type) error = 0; } if (error == 0 && buf_type) { + hammer_modify_buffer(buffer); ondisk = buffer->ondisk; hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type); buffer->buf_type = ondisk->head.buf_type; @@ -1147,7 +1130,7 @@ hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused) hammer_ref(&buffer->io.lock); hammer_flush_buffer_nodes(buffer); KKASSERT(buffer->io.lock.refs == 1); - hammer_rel_buffer(buffer, 1); + hammer_rel_buffer(buffer, 2); return(0); } @@ -1192,29 +1175,16 @@ void hammer_rel_buffer(hammer_buffer_t buffer, int flush) { hammer_cluster_t cluster; - hammer_node_t node; if (buffer->io.lock.refs == 1) { hammer_lock_ex(&buffer->io.lock); if (buffer->io.lock.refs == 1) { hammer_io_release(&buffer->io, flush); - /* - * Clean out the B-Tree node cache, if any, then - * clean up the cluster ref and free the buffer. - * - * If the buffer acquires a new reference while we - * are trying to clean it out, abort the cleaning. - */ - while (buffer->io.bp == NULL && - buffer->io.lock.refs == 1 && - (node = TAILQ_FIRST(&buffer->clist)) != NULL - ) { - KKASSERT(node->lock.refs == 0); - hammer_flush_node(node); - } if (buffer->io.bp == NULL && - hammer_islastref(&buffer->io.lock)) { + buffer->io.lock.refs == 1) { + hammer_flush_buffer_nodes(buffer); + KKASSERT(TAILQ_EMPTY(&buffer->clist)); cluster = buffer->cluster; RB_REMOVE(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer); @@ -1224,34 +1194,14 @@ hammer_rel_buffer(hammer_buffer_t buffer, int flush) hammer_rel_cluster(cluster, 0); return; } + } else if (flush) { + hammer_io_flush(&buffer->io); } hammer_unlock(&buffer->io.lock); } hammer_unref(&buffer->io.lock); } -/* - * Flush passively cached B-Tree nodes associated with this buffer. - * - * NOTE: The buffer is referenced and locked. - */ -void -hammer_flush_buffer_nodes(hammer_buffer_t buffer) -{ - hammer_node_t node; - - node = TAILQ_FIRST(&buffer->clist); - while (node) { - buffer->save_scan = TAILQ_NEXT(node, entry); - if (node->lock.refs == 0) { - hammer_ref(&node->lock); - node->flags |= HAMMER_NODE_FLUSH; - hammer_rel_node(node); - } - node = buffer->save_scan; - } -} - /************************************************************************ * NODES * ************************************************************************ @@ -1260,18 +1210,15 @@ hammer_flush_buffer_nodes(hammer_buffer_t buffer) * method used by the HAMMER filesystem. * * Unlike other HAMMER structures, a hammer_node can be PASSIVELY - * associated with its buffer. It can have an active buffer reference - * even when the node itself has no references. The node also passively - * associates itself with its cluster without holding any cluster refs. - * The cluster ref is indirectly maintained by the active buffer ref when - * a node is acquired. + * associated with its buffer, and will only referenced the buffer while + * the node itself is referenced. * * A hammer_node can also be passively associated with other HAMMER * structures, such as inodes, while retaining 0 references. These * associations can be cleared backwards using a pointer-to-pointer in * the hammer_node. * - * This allows the HAMMER implementation to cache hammer_node's long-term + * This allows the HAMMER implementation to cache hammer_nodes long-term * and short-cut a great deal of the infrastructure's complexity. In * most cases a cached node can be reacquired without having to dip into * either the buffer or cluster management code. @@ -1303,180 +1250,166 @@ again: goto again; } } - *errorp = hammer_ref_node(node); + hammer_ref(&node->lock); + *errorp = hammer_load_node(node); if (*errorp) { - /* - * NOTE: The node pointer may be stale on error return. - * In fact, its probably been destroyed. - */ + hammer_rel_node(node); node = NULL; } return(node); } /* - * Reference the node to prevent disassociations, then associate and - * load the related buffer. This routine can also be called to reference - * a node from a cache pointer. - * - * NOTE: Because the caller does not have a ref on the node, the caller's - * node pointer will be stale if an error is returned. We may also wind - * up clearing the related cache pointers. - * - * NOTE: The cluster is indirectly referenced by our buffer ref. + * Reference an already-referenced node. */ int hammer_ref_node(hammer_node_t node) { + int error; + + KKASSERT(node->lock.refs > 0); + hammer_ref(&node->lock); + if ((error = hammer_load_node(node)) != 0) + hammer_rel_node(node); + return(error); +} + +/* + * Load a node's on-disk data reference. + */ +static int +hammer_load_node(hammer_node_t node) +{ hammer_buffer_t buffer; int32_t buf_no; int error; - hammer_ref(&node->lock); + if (node->ondisk) + return(0); error = 0; + hammer_lock_ex(&node->lock); if (node->ondisk == NULL) { - hammer_lock_ex(&node->lock); - if (node->ondisk == NULL) { - /* - * This is a little confusing but the jist is that - * node->buffer determines whether the node is on - * the buffer's clist and node->ondisk determines - * whether the buffer is referenced. - */ - if ((buffer = node->buffer) != NULL) { - error = hammer_ref_buffer(buffer); - } else { - buf_no = node->node_offset / HAMMER_BUFSIZE; - buffer = hammer_get_buffer(node->cluster, - buf_no, 0, &error); - if (buffer) { - KKASSERT(error == 0); - TAILQ_INSERT_TAIL(&buffer->clist, - node, entry); - node->buffer = buffer; - } - } - if (error == 0) { - node->ondisk = (void *)((char *)buffer->ondisk + - (node->node_offset & HAMMER_BUFMASK)); + /* + * This is a little confusing but the jist is that + * node->buffer determines whether the node is on + * the buffer's clist and node->ondisk determines + * whether the buffer is referenced. + */ + if ((buffer = node->buffer) != NULL) { + error = hammer_ref_buffer(buffer); + } else { + buf_no = node->node_offset / HAMMER_BUFSIZE; + buffer = hammer_get_buffer(node->cluster, + buf_no, 0, &error); + if (buffer) { + KKASSERT(error == 0); + TAILQ_INSERT_TAIL(&buffer->clist, + node, entry); + node->buffer = buffer; } } - hammer_unlock(&node->lock); + if (error == 0) { + node->ondisk = (void *)((char *)buffer->ondisk + + (node->node_offset & HAMMER_BUFMASK)); + } } - if (error) - hammer_rel_node(node); + hammer_unlock(&node->lock); return (error); } /* - * Release a hammer_node. The node retains a passive association with - * its cluster, buffer and caches. - * - * However, to avoid cluttering up kernel memory with tons of B-Tree - * node cache structures we destroy the node if no passive cache or - * (instantiated) buffer references exist. + * Safely reference a node, interlock against flushes via the IO subsystem. + */ +hammer_node_t +hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache, + int *errorp) +{ + hammer_node_t node; + + if ((node = *cache) != NULL) + hammer_ref(&node->lock); + if (node) { + *errorp = hammer_load_node(node); + if (*errorp) { + hammer_rel_node(node); + node = NULL; + } + } else { + *errorp = ENOENT; + } + return(node); +} + +/* + * Release a hammer_node. On the last release the node dereferences + * its underlying buffer and may or may not be destroyed. */ void hammer_rel_node(hammer_node_t node) { hammer_cluster_t cluster; hammer_buffer_t buffer; + int32_t node_offset; + int flags; - if (hammer_islastref(&node->lock)) { - cluster = node->cluster; - - /* - * Destroy the node if it is being deleted. Free the node - * in the bitmap after we have unhooked it. - */ - hammer_ref_cluster(cluster); - if (node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) { - hammer_flush_node(node); - RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root, - node); - if ((buffer = node->buffer) != NULL) { - node->buffer = NULL; - hammer_remove_node_clist(buffer, node); - if (node->ondisk) { - node->ondisk = NULL; - hammer_rel_buffer(buffer, 0); - } - } - if (node->flags & HAMMER_NODE_DELETED) { - hammer_free_btree(node->cluster, - node->node_offset); - if (node->node_offset == - cluster->ondisk->clu_btree_root) { - kprintf("FREE CLUSTER %d\n", cluster->clu_no); - hammer_free_cluster(cluster); - /*hammer_io_undirty(&cluster->io);*/ - } - } - hammer_rel_cluster(cluster, 0); - --hammer_count_nodes; - kfree(node, M_HAMMER); - return; - } + /* + * If this isn't the last ref just decrement the ref count and + * return. + */ + if (node->lock.refs > 1) { + hammer_unref(&node->lock); + return; + } - /* - * node->ondisk determines whether we have a buffer reference - * to get rid of or not. Only get rid of the reference if - * the kernel tried to flush the buffer. - * - * NOTE: Once unref'd the node can be physically destroyed, - * so our node is stale afterwords. - * - * This case occurs if the node still has cache references. - * We could remove the references and free the structure - * but for now we allow them (and the node structure) to - * remain intact. - */ - if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) { - hammer_flush_node(node); - buffer = node->buffer; - node->buffer = NULL; - node->ondisk = NULL; - hammer_remove_node_clist(buffer, node); - hammer_rel_buffer(buffer, 0); - } + /* + * If there is no ondisk info or no buffer the node failed to load, + * remove the last reference and destroy the node. + */ + if (node->ondisk == NULL) { + hammer_unref(&node->lock); + hammer_flush_node(node); + /* node is stale now */ + return; + } - /* - * Clutter control, this case only occurs after a failed - * load since otherwise ondisk will be non-NULL. - */ - if (node->cache1 == NULL && node->cache2 == NULL && - node->ondisk == NULL) { - RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root, - node); - if ((buffer = node->buffer) != NULL) { - node->buffer = NULL; /* sanity */ - node->ondisk = NULL; /* sanity */ - hammer_remove_node_clist(buffer, node); - } - --hammer_count_nodes; - node->lock.refs = -1; /* sanity */ - kfree(node, M_HAMMER); - } else { - hammer_unref(&node->lock); - } + /* + * Do final cleanups and then either destroy the node and leave it + * passively cached. The buffer reference is removed regardless. + */ + buffer = node->buffer; + node->ondisk = NULL; - /* - * We have to do this last, after the node has been removed - * from the cluster's RB tree or we risk a deadlock due to - * hammer_rel_buffer->hammer_rel_cluster->(node deadlock) - */ - hammer_rel_cluster(cluster, 0); - } else { + if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) { hammer_unref(&node->lock); + hammer_rel_buffer(buffer, 0); + return; } + + /* + * Destroy the node. Record pertainant data because the node + * becomes stale the instant we flush it. + */ + flags = node->flags; + node_offset = node->node_offset; + hammer_unref(&node->lock); + hammer_flush_node(node); + /* node is stale */ + + cluster = buffer->cluster; + if (flags & HAMMER_NODE_DELETED) { + hammer_free_btree(cluster, node_offset); + if (node_offset == cluster->ondisk->clu_btree_root) { + kprintf("FREE CLUSTER %d\n", cluster->clu_no); + hammer_free_cluster(cluster); + /*hammer_io_undirty(&cluster->io);*/ + } + } + hammer_rel_buffer(buffer, 0); } /* - * Cache-and-release a hammer_node. Kinda like catching and releasing a - * fish, but keeping an eye on him. The node is passively cached in *cache. - * - * NOTE! HAMMER may NULL *cache at any time, even after you have - * referenced the node! + * Passively cache a referenced hammer_node in *cache. The caller may + * release the node on return. */ void hammer_cache_node(hammer_node_t node, struct hammer_node **cache) @@ -1553,7 +1486,7 @@ hammer_flush_node(hammer_node_t node) node); if ((buffer = node->buffer) != NULL) { node->buffer = NULL; - hammer_remove_node_clist(buffer, node); + TAILQ_REMOVE(&buffer->clist, node, entry); /* buffer is unreferenced because ondisk is NULL */ } --hammer_count_nodes; @@ -1562,17 +1495,21 @@ hammer_flush_node(hammer_node_t node) } /* - * Remove a node from the buffer's clist. Adjust save_scan as appropriate. - * This is in its own little routine to properly handle interactions with - * save_scan, so it is possible to block while scanning a buffer's node list. + * Flush passively cached B-Tree nodes associated with this buffer. + * This is only called when the buffer is about to be destroyed, so + * none of the nodes should have any references. */ -static void -hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node) +hammer_flush_buffer_nodes(hammer_buffer_t buffer) { - if (buffer->save_scan == node) - buffer->save_scan = TAILQ_NEXT(node, entry); - TAILQ_REMOVE(&buffer->clist, node, entry); + hammer_node_t node; + + while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) { + KKASSERT(node->lock.refs == 0 && node->ondisk == NULL); + hammer_ref(&node->lock); + node->flags |= HAMMER_NODE_FLUSH; + hammer_rel_node(node); + } } /************************************************************************ @@ -1635,7 +1572,6 @@ hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint, 1, clu_hint); } } - hammer_modify_volume_done(volume); if (clu_no != HAMMER_ALIST_BLOCK_NONE) break; hammer_rel_volume(volume, 0); @@ -1673,7 +1609,6 @@ hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound, ondisk->clu_btree_end = *right_bound; cluster->clu_btree_beg = ondisk->clu_btree_beg; cluster->clu_btree_end = ondisk->clu_btree_end; - hammer_modify_cluster_done(cluster); } /* @@ -1684,7 +1619,6 @@ hammer_free_cluster(hammer_cluster_t cluster) { hammer_modify_cluster(cluster); hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1); - hammer_modify_cluster_done(cluster); } /* @@ -1728,7 +1662,6 @@ hammer_alloc_btree(hammer_cluster_t cluster, int *errorp) *errorp = ENOSPC; if (buffer) hammer_rel_buffer(buffer, 0); - hammer_modify_cluster_done(cluster); return(NULL); } } @@ -1746,13 +1679,11 @@ hammer_alloc_btree(hammer_cluster_t cluster, int *errorp) if (node) { hammer_modify_node(node); bzero(node->ondisk, sizeof(*node->ondisk)); - hammer_modify_node_done(node); } else { hammer_alist_free(live, elm_no, 1); hammer_rel_node(node); node = NULL; } - hammer_modify_cluster_done(cluster); if (buffer) hammer_rel_buffer(buffer, 0); return(node); @@ -1783,12 +1714,10 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes, cluster->ondisk->idx_ldata, 1); if (buf_no == HAMMER_ALIST_BLOCK_NONE) { *errorp = ENOSPC; - hammer_modify_cluster_done(cluster); return(NULL); } hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks); cluster->ondisk->idx_ldata = buf_no; - hammer_modify_cluster_done(cluster); buffer = *bufferp; *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp); if (buffer) @@ -1813,12 +1742,10 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes, elm_no = hammer_alist_alloc(live, nblks); if (elm_no == HAMMER_ALIST_BLOCK_NONE) { *errorp = ENOSPC; - hammer_modify_cluster_done(cluster); return(NULL); } } cluster->ondisk->idx_index = elm_no; - hammer_modify_cluster_done(cluster); /* * Load and return the B-Tree element @@ -1837,7 +1764,6 @@ hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes, hammer_modify_buffer(buffer); item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK]; bzero(item, nblks * HAMMER_DATA_BLKSIZE); - hammer_modify_buffer_done(buffer); *errorp = 0; return(item); } @@ -1867,12 +1793,10 @@ hammer_alloc_record(hammer_cluster_t cluster, kprintf("hammer_alloc_record elm again %08x\n", elm_no); if (elm_no == HAMMER_ALIST_BLOCK_NONE) { *errorp = ENOSPC; - hammer_modify_cluster_done(cluster); return(NULL); } } cluster->ondisk->idx_record = elm_no; - hammer_modify_cluster_done(cluster); /* * Load and return the record element @@ -1891,7 +1815,6 @@ hammer_alloc_record(hammer_cluster_t cluster, hammer_modify_buffer(buffer); item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK]; bzero(item, sizeof(union hammer_record_ondisk)); - hammer_modify_buffer_done(buffer); *errorp = 0; return(item); } @@ -1910,7 +1833,6 @@ hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes) hammer_alist_free(&buffer->cluster->alist_master, buffer->buf_no, nblks); hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks); - hammer_modify_cluster_done(buffer->cluster); return; } @@ -1922,7 +1844,6 @@ hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes) nblks /= HAMMER_DATA_BLKSIZE; live = &buffer->cluster->alist_mdata; hammer_alist_free(live, elm_no, nblks); - hammer_modify_cluster_done(buffer->cluster); } void @@ -1937,7 +1858,6 @@ hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec) elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS; live = &buffer->cluster->alist_record; hammer_alist_free(live, elm_no, 1); - hammer_modify_cluster_done(buffer->cluster); } void @@ -1955,7 +1875,6 @@ hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset) KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); elm_no += fsbuf_offset / blksize; hammer_alist_free(live, elm_no, 1); - hammer_modify_cluster_done(cluster); } void @@ -1975,7 +1894,6 @@ hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes) buf_no = bclu_offset / HAMMER_BUFSIZE; hammer_alist_free(&cluster->alist_master, buf_no, nblks); hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks); - hammer_modify_cluster_done(cluster); return; } @@ -1987,7 +1905,6 @@ hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes) KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); elm_no += fsbuf_offset / blksize; hammer_alist_free(live, elm_no, nblks); - hammer_modify_cluster_done(cluster); } void @@ -2005,7 +1922,6 @@ hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset) KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0); elm_no += fsbuf_offset / blksize; hammer_alist_free(live, elm_no, 1); - hammer_modify_cluster_done(cluster); } @@ -2054,7 +1970,6 @@ alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live, buf_no, type, nelements); #endif hammer_modify_buffer(buffer); /*XXX*/ - hammer_modify_buffer_done(buffer); /*XXX*/ hammer_adjust_stats(cluster, type, 1); /* @@ -2078,7 +1993,6 @@ alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live, hammer_modify_cluster(cluster); cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5] |= (1 << (buf_no & 31)); - hammer_modify_cluster_done(cluster); } } @@ -2147,10 +2061,8 @@ hammer_sync_volume(hammer_volume_t volume, void *data) RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL, hammer_sync_cluster, info); - if (hammer_ref_volume(volume) == 0) { - hammer_io_flush(&volume->io, info); - hammer_rel_volume(volume, 0); - } + if (hammer_ref_volume(volume) == 0) + hammer_rel_volume(volume, 1); return(0); } @@ -2161,31 +2073,20 @@ hammer_sync_cluster(hammer_cluster_t cluster, void *data) RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL, hammer_sync_buffer, info); - switch(cluster->state) { - case HAMMER_CLUSTER_OPEN: - case HAMMER_CLUSTER_IDLE: - if (hammer_ref_cluster(cluster) == 0) { - hammer_io_flush(&cluster->io, info); - hammer_rel_cluster(cluster, 0); - } - break; - default: - break; - } + /*hammer_io_waitdep(&cluster->io);*/ + if (hammer_ref_cluster(cluster) == 0) + hammer_rel_cluster(cluster, 1); return(0); } int -hammer_sync_buffer(hammer_buffer_t buffer, void *data) +hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused) { - struct hammer_sync_info *info = data; - if (hammer_ref_buffer(buffer) == 0) { hammer_lock_ex(&buffer->io.lock); hammer_flush_buffer_nodes(buffer); hammer_unlock(&buffer->io.lock); - hammer_io_flush(&buffer->io, info); - hammer_rel_buffer(buffer, 0); + hammer_rel_buffer(buffer, 1); } return(0); } @@ -2304,7 +2205,6 @@ hammer_alloc_master(hammer_cluster_t cluster, int nblks, nblks, HAMMER_ALIST_BLOCK_MAX); } } - hammer_modify_cluster_done(cluster); /* * Recover space from empty record, b-tree, and data a-lists. @@ -2340,9 +2240,6 @@ hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks) cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks; break; } - hammer_modify_cluster_done(cluster); - hammer_modify_volume_done(cluster->volume); - hammer_modify_volume_done(cluster->volume->hmp->rootvol); } /* @@ -2388,7 +2285,6 @@ buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count) hammer_modify_buffer(buffer); error = hammer_alist_recover(&buffer->alist, blk, 0, count); /* free block count is returned if >= 0 */ - hammer_modify_buffer_done(buffer); hammer_rel_buffer(buffer, 0); } else { error = -error; @@ -2434,7 +2330,6 @@ buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix, r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk); if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; - hammer_modify_buffer_done(buffer); *fullp = hammer_alist_isfull(&buffer->alist); hammer_rel_buffer(buffer, 0); } else { @@ -2461,7 +2356,6 @@ buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix, r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk); if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; - hammer_modify_buffer_done(buffer); *fullp = hammer_alist_isfull(&buffer->alist); hammer_rel_buffer(buffer, 0); } else { @@ -2486,7 +2380,6 @@ buffer_alist_free(void *info, int32_t blk, int32_t radix, KKASSERT(buffer->ondisk->head.buf_type != 0); hammer_modify_buffer(buffer); hammer_alist_free(&buffer->alist, base_blk, count); - hammer_modify_buffer_done(buffer); *emptyp = hammer_alist_isempty(&buffer->alist); /* XXX don't bother updating the buffer is completely empty? */ hammer_rel_buffer(buffer, 0); @@ -2553,7 +2446,6 @@ super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count) hammer_modify_supercl(supercl); error = hammer_alist_recover(&supercl->alist, blk, 0, count); /* free block count is returned if >= 0 */ - hammer_modify_supercl_done(supercl); hammer_rel_supercl(supercl, 0); } else { error = -error; @@ -2609,7 +2501,6 @@ super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix, r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk); if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; - hammer_modify_supercl_done(supercl); *fullp = hammer_alist_isfull(&supercl->alist); hammer_rel_supercl(supercl, 0); } else { @@ -2636,7 +2527,6 @@ super_alist_alloc_rev(void *info, int32_t blk, int32_t radix, r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk); if (r != HAMMER_ALIST_BLOCK_NONE) r += blk; - hammer_modify_supercl_done(supercl); *fullp = hammer_alist_isfull(&supercl->alist); hammer_rel_supercl(supercl, 0); } else { @@ -2660,7 +2550,6 @@ super_alist_free(void *info, int32_t blk, int32_t radix, if (supercl) { hammer_modify_supercl(supercl); hammer_alist_free(&supercl->alist, base_blk, count); - hammer_modify_supercl_done(supercl); *emptyp = hammer_alist_isempty(&supercl->alist); hammer_rel_supercl(supercl, 0); } else { diff --git a/sys/vfs/hammer/hammer_recover.c b/sys/vfs/hammer/hammer_recover.c index 34fd223319..509ac56a4d 100644 --- a/sys/vfs/hammer/hammer_recover.c +++ b/sys/vfs/hammer/hammer_recover.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.1 2008/01/09 00:46:22 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_recover.c,v 1.2 2008/01/10 07:41:03 dillon Exp $ */ #include "hammer.h" @@ -142,7 +142,6 @@ hammer_recover(hammer_cluster_t cluster) bzero(croot->ondisk, sizeof(*croot->ondisk)); croot->ondisk->count = 0; croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF; - hammer_modify_node_done(croot); cluster->ondisk->clu_btree_root = croot->node_offset; } } @@ -163,7 +162,6 @@ hammer_recover(hammer_cluster_t cluster) continue; hammer_recover_buffer_stage2(cluster, buf_no); } - hammer_modify_cluster_done(cluster); /* * Validate the parent cluster pointer. XXX @@ -218,7 +216,6 @@ hammer_recover_buffer_stage1(hammer_cluster_t cluster, int32_t buf_no) hammer_alist_free(&buffer->alist, rec_no, 1); } } - hammer_modify_buffer_done(buffer); hammer_rel_buffer(buffer, 0); } @@ -356,7 +353,6 @@ hammer_recover_record(hammer_cluster_t cluster, hammer_buffer_t buffer, &dbuf->ondisk->head, HAMMER_FSBUF_DATA); dbuf->buf_type = HAMMER_FSBUF_DATA; - hammer_modify_buffer_done(dbuf); } } else { /* diff --git a/sys/vfs/hammer/hammer_spike.c b/sys/vfs/hammer/hammer_spike.c index 29f85cde03..6e808da3bc 100644 --- a/sys/vfs/hammer/hammer_spike.c +++ b/sys/vfs/hammer/hammer_spike.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.4 2007/12/31 05:33:12 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/Attic/hammer_spike.c,v 1.5 2008/01/10 07:41:03 dillon Exp $ */ #include "hammer.h" @@ -164,7 +164,6 @@ hammer_spike(struct hammer_cursor **spikep) elm->internal.subtree_clu_no = ncluster->clu_no; elm->internal.subtree_vol_no = ncluster->volume->vol_no; elm->internal.subtree_count = onode->ondisk->count; /*XXX*/ - hammer_modify_node_done(spike->parent); onode->flags |= HAMMER_NODE_MODIFIED; hammer_flush_node(onode); } @@ -177,7 +176,6 @@ hammer_spike(struct hammer_cursor **spikep) ondisk->clu_btree_parent_clu_no = ocluster->clu_no; ondisk->clu_btree_parent_offset = spike->parent->node_offset; ondisk->clu_btree_parent_clu_gen = ocluster->ondisk->clu_gen; - hammer_modify_cluster_done(ncluster); } /* diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index c76fe7489e..a2e3433ada 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.6 2008/01/01 01:00:03 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.7 2008/01/10 07:41:03 dillon Exp $ */ #include "hammer.h" @@ -97,7 +97,6 @@ hammer_alloc_tid(hammer_transaction_t trans) tid, (int)(tid / 1000000000LL)); } ondisk->vol0_nexttid = tid + 2; - hammer_modify_volume_done(trans->rootvol); return(tid); } @@ -111,6 +110,5 @@ hammer_alloc_recid(hammer_transaction_t trans) hammer_modify_volume(trans->rootvol); ondisk = trans->rootvol->ondisk; recid = ++ondisk->vol0_recid; - hammer_modify_volume_done(trans->rootvol); return(recid); } diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index e20f2ff313..5eef2aecfa 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.16 2008/01/03 06:48:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.17 2008/01/10 07:41:03 dillon Exp $ */ #include @@ -294,7 +294,7 @@ hammer_vop_write(struct vop_write_args *ap) * This case is used by vop_stdputpages(). */ bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE, - 0, 0); + GETBLK_BHEAVY, 0); if ((bp->b_flags & B_CACHE) == 0) { bqrelse(bp); error = bread(ap->a_vp, @@ -310,13 +310,13 @@ hammer_vop_write(struct vop_write_args *ap) * entirely overwrite the buffer */ bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE, - 0, 0); + GETBLK_BHEAVY, 0); } else if (offset == 0 && uio->uio_offset >= ip->ino_rec.ino_size) { /* * XXX */ bp = getblk(ap->a_vp, uio->uio_offset, HAMMER_BUFSIZE, - 0, 0); + GETBLK_BHEAVY, 0); vfs_bio_clrbuf(bp); } else { /* @@ -329,6 +329,7 @@ hammer_vop_write(struct vop_write_args *ap) brelse(bp); break; } + bheavy(bp); } n = HAMMER_BUFSIZE - offset; if (n > uio->uio_resid) -- 2.11.4.GIT