From 668b649690a2639256562c0624f488c0578db351 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Tue, 10 Jun 2008 08:51:02 +0000 Subject: [PATCH] HAMMER 53G/Many: Performance tuning. * Implement multiple flusher threads. * Move the call to hammer_inode_waitreclaims() yet again. Call it from hammer_vop_reclaim(). --- sys/vfs/hammer/hammer.h | 46 +++++++-- sys/vfs/hammer/hammer_flusher.c | 193 ++++++++++++++++++++++++++---------- sys/vfs/hammer/hammer_freemap.c | 4 +- sys/vfs/hammer/hammer_inode.c | 40 +++++--- sys/vfs/hammer/hammer_ioctl.c | 10 +- sys/vfs/hammer/hammer_transaction.c | 4 +- 6 files changed, 213 insertions(+), 84 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index a7f477055c..8b9379428f 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.78 2008/06/10 05:06:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.79 2008/06/10 08:51:01 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -554,7 +554,8 @@ typedef struct hammer_reserve *hammer_reserve_t; * * This is strictly a heuristic. */ -#define HAMMER_MAX_UNDOS 256 +#define HAMMER_MAX_UNDOS 1024 +#define HAMMER_MAX_FLUSHERS 4 struct hammer_undo { RB_ENTRY(hammer_undo) rb_node; @@ -566,6 +567,35 @@ struct hammer_undo { typedef struct hammer_undo *hammer_undo_t; /* + * Support structures for the flusher threads. + */ +struct hammer_flusher_info { + struct hammer_mount *hmp; + TAILQ_HEAD(, hammer_inode) work_list; + thread_t td; + int running; +}; + +typedef struct hammer_flusher_info *hammer_flusher_info_t; + +struct hammer_flusher { + int signal; /* flusher thread sequencer */ + int act; /* currently active flush group */ + int done; /* set to act when complete */ + int next; /* next flush group */ + int group_lock; /* lock sequencing of the next flush */ + int exiting; /* request master exit */ + int count; /* number of slave flushers */ + int running; /* number of slave flushers running */ + thread_t td; /* master flusher thread */ + hammer_tid_t tid; /* last flushed transaction id */ + int finalize_want; /* serialize finalization */ + struct hammer_lock finalize_lock; /* serialize finalization */ + struct hammer_transaction trans; /* shared transaction */ + struct hammer_flusher_info *info[HAMMER_MAX_FLUSHERS]; +}; + +/* * Internal hammer mount data structure */ struct hammer_mount { @@ -590,16 +620,12 @@ struct hammer_mount { int rsv_databufs; /* reserved space due to dirty buffers */ int rsv_databytes; /* reserved space due to record data */ int rsv_recs; /* reserved space due to dirty records */ - int flusher_signal; /* flusher thread sequencer */ - int flusher_act; /* currently active flush group */ - int flusher_done; /* set to act when complete */ - int flusher_next; /* next flush group */ - int flusher_lock; /* lock sequencing of the next flush */ - int flusher_exiting; + int inode_reclaims; /* inodes pending reclaim by flusher */ int count_inodes; /* total number of inodes */ - hammer_tid_t flusher_tid; /* last flushed transaction id */ - thread_t flusher_td; + + struct hammer_flusher flusher; + u_int check_interrupt; uuid_t fsid; udev_t fsid_udev; diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index bdfd2bd777..fda28b6366 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.22 2008/06/10 05:06:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.23 2008/06/10 08:51:01 dillon Exp $ */ /* * HAMMER dependancy flusher thread @@ -42,7 +42,8 @@ #include "hammer.h" -static void hammer_flusher_thread(void *arg); +static void hammer_flusher_master_thread(void *arg); +static void hammer_flusher_slave_thread(void *arg); static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp); static void hammer_flusher_flush(hammer_mount_t hmp); static void hammer_flusher_flush_inode(hammer_inode_t ip, @@ -57,82 +58,150 @@ hammer_flusher_sync(hammer_mount_t hmp) { int seq; - if (hmp->flusher_td) { - seq = hmp->flusher_next; - if (hmp->flusher_signal++ == 0) - wakeup(&hmp->flusher_signal); - while ((int)(seq - hmp->flusher_done) > 0) - tsleep(&hmp->flusher_done, 0, "hmrfls", 0); + if (hmp->flusher.td) { + seq = hmp->flusher.next; + if (hmp->flusher.signal++ == 0) + wakeup(&hmp->flusher.signal); + while ((int)(seq - hmp->flusher.done) > 0) + tsleep(&hmp->flusher.done, 0, "hmrfls", 0); } } void hammer_flusher_async(hammer_mount_t hmp) { - if (hmp->flusher_td) { - if (hmp->flusher_signal++ == 0) - wakeup(&hmp->flusher_signal); + if (hmp->flusher.td) { + if (hmp->flusher.signal++ == 0) + wakeup(&hmp->flusher.signal); } } void hammer_flusher_create(hammer_mount_t hmp) { - hmp->flusher_signal = 0; - hmp->flusher_act = 0; - hmp->flusher_done = 0; - hmp->flusher_next = 1; - lwkt_create(hammer_flusher_thread, hmp, &hmp->flusher_td, NULL, - 0, -1, "hammer"); + hammer_flusher_info_t info; + int i; + + hmp->flusher.signal = 0; + hmp->flusher.act = 0; + hmp->flusher.done = 0; + hmp->flusher.next = 1; + hmp->flusher.count = 0; + hammer_ref(&hmp->flusher.finalize_lock); + + lwkt_create(hammer_flusher_master_thread, hmp, + &hmp->flusher.td, NULL, 0, -1, "hammer-M"); + for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) { + info = kmalloc(sizeof(*info), M_HAMMER, M_WAITOK|M_ZERO); + info->hmp = hmp; + TAILQ_INIT(&info->work_list); + ++hmp->flusher.count; + hmp->flusher.info[i] = info; + lwkt_create(hammer_flusher_slave_thread, info, + &info->td, NULL, 0, -1, "hammer-S%d", i); + } } void hammer_flusher_destroy(hammer_mount_t hmp) { - if (hmp->flusher_td) { - hmp->flusher_exiting = 1; - while (hmp->flusher_td) { - ++hmp->flusher_signal; - wakeup(&hmp->flusher_signal); - tsleep(&hmp->flusher_exiting, 0, "hmrwex", 0); + hammer_flusher_info_t info; + int i; + + /* + * Kill the master + */ + hmp->flusher.exiting = 1; + while (hmp->flusher.td) { + ++hmp->flusher.signal; + wakeup(&hmp->flusher.signal); + tsleep(&hmp->flusher.exiting, 0, "hmrwex", hz); + } + + /* + * Kill the slaves + */ + for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) { + if ((info = hmp->flusher.info[i]) != NULL) { + KKASSERT(info->running == 0); + info->running = -1; + wakeup(&info->running); + while (info->td) { + tsleep(&info->td, 0, "hmrwwc", 0); + } + hmp->flusher.info[i] = NULL; + kfree(info, M_HAMMER); + --hmp->flusher.count; } } + KKASSERT(hmp->flusher.count == 0); } static void -hammer_flusher_thread(void *arg) +hammer_flusher_master_thread(void *arg) { hammer_mount_t hmp = arg; for (;;) { - while (hmp->flusher_lock) - tsleep(&hmp->flusher_lock, 0, "hmrhld", 0); + while (hmp->flusher.group_lock) + tsleep(&hmp->flusher.group_lock, 0, "hmrhld", 0); kprintf("S"); - hmp->flusher_act = hmp->flusher_next; - ++hmp->flusher_next; + hmp->flusher.act = hmp->flusher.next; + ++hmp->flusher.next; hammer_flusher_clean_loose_ios(hmp); hammer_flusher_flush(hmp); hammer_flusher_clean_loose_ios(hmp); - hmp->flusher_done = hmp->flusher_act; - - wakeup(&hmp->flusher_done); + hmp->flusher.done = hmp->flusher.act; + wakeup(&hmp->flusher.done); /* * Wait for activity. */ - if (hmp->flusher_exiting && TAILQ_EMPTY(&hmp->flush_list)) + if (hmp->flusher.exiting && TAILQ_EMPTY(&hmp->flush_list)) break; /* * This is a hack until we can dispose of frontend buffer * cache buffers on the frontend. */ - while (hmp->flusher_signal == 0) - tsleep(&hmp->flusher_signal, 0, "hmrwwa", 0); - hmp->flusher_signal = 0; + while (hmp->flusher.signal == 0) + tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0); + hmp->flusher.signal = 0; + } + + /* + * And we are done. + */ + hmp->flusher.td = NULL; + wakeup(&hmp->flusher.exiting); + lwkt_exit(); +} + +static void +hammer_flusher_slave_thread(void *arg) +{ + hammer_flusher_info_t info; + hammer_mount_t hmp; + hammer_inode_t ip; + + info = arg; + hmp = info->hmp; + + for (;;) { + while (info->running == 0) + tsleep(&info->running, 0, "hmrssw", 0); + if (info->running < 0) + break; + while ((ip = TAILQ_FIRST(&info->work_list)) != NULL) { + TAILQ_REMOVE(&info->work_list, ip, flush_entry); + hammer_flusher_flush_inode(ip, &hmp->flusher.trans); + } + info->running = 0; + if (--hmp->flusher.running == 0) + wakeup(&hmp->flusher.running); } - hmp->flusher_td = NULL; - wakeup(&hmp->flusher_exiting); + info->td = NULL; + wakeup(&info->td); lwkt_exit(); } @@ -163,22 +232,37 @@ hammer_flusher_clean_loose_ios(hammer_mount_t hmp) static void hammer_flusher_flush(hammer_mount_t hmp) { - struct hammer_transaction trans; + hammer_flusher_info_t info; hammer_inode_t ip; hammer_reserve_t resv; + int i; /* * Flush the inodes */ - hammer_start_transaction_fls(&trans, hmp); + hammer_start_transaction_fls(&hmp->flusher.trans, hmp); + i = 0; while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) { - if (ip->flush_group != hmp->flusher_act) + if (ip->flush_group != hmp->flusher.act) break; TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry); - hammer_flusher_flush_inode(ip, &trans); + info = hmp->flusher.info[i]; + TAILQ_INSERT_TAIL(&info->work_list, ip, flush_entry); + if (info->running == 0) { + ++hmp->flusher.running; + info->running = 1; + wakeup(&info->running); + } + /*hammer_flusher_flush_inode(ip, &trans);*/ + ++i; + if (i == HAMMER_MAX_FLUSHERS || hmp->flusher.info[i] == NULL) + i = 0; } - hammer_flusher_finalize(&trans, 1); - hmp->flusher_tid = trans.tid; + while (hmp->flusher.running) + tsleep(&hmp->flusher.running, 0, "hmrfcc", 0); + + hammer_flusher_finalize(&hmp->flusher.trans, 1); + hmp->flusher.tid = hmp->flusher.trans.tid; /* * Clean up any freed big-blocks (typically zone-2). @@ -187,14 +271,12 @@ hammer_flusher_flush(hammer_mount_t hmp) * it can no longer be reused. */ while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) { - if (resv->flush_group != hmp->flusher_act) + if (resv->flush_group != hmp->flusher.act) break; TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry); hammer_blockmap_reserve_complete(hmp, resv); } - - - hammer_done_transaction(&trans); + hammer_done_transaction(&hmp->flusher.trans); } /* @@ -206,18 +288,29 @@ hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans) { hammer_mount_t hmp = ip->hmp; - /*hammer_lock_ex(&ip->lock);*/ + hammer_lock_sh(&hmp->flusher.finalize_lock); ip->error = hammer_sync_inode(ip); hammer_flush_inode_done(ip); - /*hammer_unlock(&ip->lock);*/ - + hammer_unlock(&hmp->flusher.finalize_lock); + while (hmp->flusher.finalize_want) + tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0); if (hammer_must_finalize_undo(hmp)) { + hmp->flusher.finalize_want = 1; + hammer_lock_ex(&hmp->flusher.finalize_lock); kprintf("HAMMER: Warning: UNDO area too small!"); hammer_flusher_finalize(trans, 1); + hammer_unlock(&hmp->flusher.finalize_lock); + hmp->flusher.finalize_want = 0; + wakeup(&hmp->flusher.finalize_want); } else if (trans->hmp->locked_dirty_count + trans->hmp->io_running_count > hammer_limit_dirtybufs) { + hmp->flusher.finalize_want = 1; + hammer_lock_ex(&hmp->flusher.finalize_lock); kprintf("t"); hammer_flusher_finalize(trans, 0); + hammer_unlock(&hmp->flusher.finalize_lock); + hmp->flusher.finalize_want = 0; + wakeup(&hmp->flusher.finalize_want); } } diff --git a/sys/vfs/hammer/hammer_freemap.c b/sys/vfs/hammer/hammer_freemap.c index 1bb77cc8d1..178b4de704 100644 --- a/sys/vfs/hammer/hammer_freemap.c +++ b/sys/vfs/hammer/hammer_freemap.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.15 2008/06/10 00:40:31 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_freemap.c,v 1.16 2008/06/10 08:51:01 dillon Exp $ */ /* @@ -179,7 +179,7 @@ hammer_freemap_free(hammer_transaction_t trans, hammer_off_t phys_offset, resv = kmalloc(sizeof(*resv), M_HAMMER, M_WAITOK|M_ZERO); resv->refs = 1; resv->zone_offset = phys_offset; - resv->flush_group = hmp->flusher_next + 1; + resv->flush_group = hmp->flusher.next + 1; RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv); TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry); ++hammer_count_reservations; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 8a6d6fea16..25000d5146 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.68 2008/06/10 05:06:20 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.69 2008/06/10 08:51:01 dillon Exp $ */ #include "hammer.h" @@ -96,20 +96,28 @@ hammer_vop_inactive(struct vop_inactive_args *ap) int hammer_vop_reclaim(struct vop_reclaim_args *ap) { + hammer_mount_t hmp; struct hammer_inode *ip; struct vnode *vp; vp = ap->a_vp; if ((ip = vp->v_data) != NULL) { + hmp = ip->hmp; vp->v_data = NULL; ip->vp = NULL; if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) { ++hammer_count_reclaiming; - ++ip->hmp->inode_reclaims; + ++hmp->inode_reclaims; ip->flags |= HAMMER_INODE_RECLAIM; } hammer_rel_inode(ip, 1); + + /* + * Do not let too many reclaimed inodes build up. + * + */ + hammer_inode_waitreclaims(hmp); } return(0); } @@ -237,14 +245,16 @@ loop: return(ip); } +#if 0 /* * Impose a slow-down if HAMMER is heavily backlogged on cleaning * out reclaimed inodes. */ if (hmp->inode_reclaims > HAMMER_RECLAIM_MIN && - curthread != hmp->flusher_td) { + trans->type != HAMMER_TRANS_FLS) { hammer_inode_waitreclaims(hmp); - } + } +#endif /* * Allocate a new inode structure and deal with races later. @@ -911,7 +921,7 @@ hammer_setup_parent_inodes(hammer_record_t record) * allow the operation yet anyway (the second return -1). */ if (record->flush_state == HAMMER_FST_FLUSH) { - if (record->flush_group != hmp->flusher_next) { + if (record->flush_group != hmp->flusher.next) { ip->flags |= HAMMER_INODE_REFLUSH; return(-1); } @@ -973,7 +983,7 @@ hammer_setup_parent_inodes(hammer_record_t record) return(-1); } else #endif - if (ip->flush_group == ip->hmp->flusher_next) { + if (ip->flush_group == ip->hmp->flusher.next) { /* * This is the record we wanted to synchronize. */ @@ -1015,8 +1025,8 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags) if (ip->flush_state == HAMMER_FST_IDLE) hammer_ref(&ip->lock); ip->flush_state = HAMMER_FST_FLUSH; - ip->flush_group = ip->hmp->flusher_next; - ++ip->hmp->flusher_lock; + ip->flush_group = ip->hmp->flusher.next; + ++ip->hmp->flusher.group_lock; /* * We need to be able to vfsync/truncate from the backend. @@ -1056,8 +1066,8 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags) ip->flags |= HAMMER_INODE_RESIGNAL; hammer_flusher_async(ip->hmp); } - if (--ip->hmp->flusher_lock == 0) - wakeup(&ip->hmp->flusher_lock); + if (--ip->hmp->flusher.group_lock == 0) + wakeup(&ip->hmp->flusher.group_lock); return; } } @@ -1087,8 +1097,8 @@ hammer_flush_inode_core(hammer_inode_t ip, int flags) * The flusher list inherits our inode and reference. */ TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry); - if (--ip->hmp->flusher_lock == 0) - wakeup(&ip->hmp->flusher_lock); + if (--ip->hmp->flusher.group_lock == 0) + wakeup(&ip->hmp->flusher.group_lock); if (flags & HAMMER_FLUSH_SIGNAL) { hammer_flusher_async(ip->hmp); @@ -1501,7 +1511,7 @@ hammer_sync_inode(hammer_inode_t ip) while ((depend = next) != NULL) { next = TAILQ_NEXT(depend, target_entry); if (depend->flush_state == HAMMER_FST_FLUSH && - depend->flush_group == ip->hmp->flusher_act) { + depend->flush_group == ip->hmp->flusher.act) { /* * If this is an ADD that was deleted by the frontend * the frontend nlinks count will have already been @@ -1856,7 +1866,9 @@ hammer_inode_waitreclaims(hammer_mount_t hmp) int maxpt; while (hmp->inode_reclaims > HAMMER_RECLAIM_MIN) { - count = hmp->count_inodes; + count = hmp->count_inodes - hmp->inode_reclaims; + if (count < 100) + count = 100; minpt = count * HAMMER_RECLAIM_SLOPCT / 100; maxpt = count * HAMMER_RECLAIM_MAXPCT / 100; diff --git a/sys/vfs/hammer/hammer_ioctl.c b/sys/vfs/hammer/hammer_ioctl.c index 8efccbde7e..156bbcdc07 100644 --- a/sys/vfs/hammer/hammer_ioctl.c +++ b/sys/vfs/hammer/hammer_ioctl.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.19 2008/05/18 01:48:50 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.20 2008/06/10 08:51:01 dillon Exp $ */ #include "hammer.h" @@ -303,22 +303,22 @@ hammer_ioc_synctid(hammer_transaction_t trans, hammer_inode_t ip, switch(std->op) { case HAMMER_SYNCTID_NONE: - std->tid = hmp->flusher_tid; /* inaccurate */ + std->tid = hmp->flusher.tid; /* inaccurate */ break; case HAMMER_SYNCTID_ASYNC: hammer_queue_inodes_flusher(hmp, MNT_NOWAIT); - std->tid = hmp->flusher_tid; /* inaccurate */ + std->tid = hmp->flusher.tid; /* inaccurate */ hammer_flusher_async(hmp); break; case HAMMER_SYNCTID_SYNC1: hammer_queue_inodes_flusher(hmp, MNT_WAIT); hammer_flusher_sync(hmp); - std->tid = hmp->flusher_tid; + std->tid = hmp->flusher.tid; break; case HAMMER_SYNCTID_SYNC2: hammer_queue_inodes_flusher(hmp, MNT_WAIT); hammer_flusher_sync(hmp); - std->tid = hmp->flusher_tid; + std->tid = hmp->flusher.tid; hammer_flusher_sync(hmp); break; default: diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index 21e5487446..7ea41cff1b 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.16 2008/06/09 04:19:10 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.17 2008/06/10 08:51:02 dillon Exp $ */ #include "hammer.h" @@ -89,8 +89,6 @@ hammer_start_transaction_fls(struct hammer_transaction *trans, { int error; - KKASSERT(curthread == hmp->flusher_td); - bzero(trans, sizeof(*trans)); trans->type = HAMMER_TRANS_FLS; -- 2.11.4.GIT