From 4889cbd4a112d640408111b521aee58156f8a202 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 31 Jul 2008 04:42:04 +0000 Subject: [PATCH] HAMMER: Mirroring, misc bug fixes * Adjust hammer_flusher_async() to queue an extra flush if called twice in quick succession. This fixes the 'sync' command to properly sync the entire filesytem. Previously two syncs were needed. * Fix a bug where a user application could get stuck due to HAMMER losing track of an inode flush. * Mirroring masters now use the most recent fully committed transaction id instead of the last flushed (but still subject to rollback) tid. This fixes an issue where a mirror could pass information still subject to crash recovery rollback to the slave. Now only fully committed information is passed to the slave. * Fix a transitory bug where the mirroring code would sometimes not sync the correct delete state to the slave. The slave would always be corrected in the next pass, however. Now the slave is correct at all times. * Fix a bug in hammer_mirror_write() where a delete-to operation could livelock. * Add a new HAMMER ioctl which waits for the committed data transaction id to change. This will be used by the mirroring code to implement continuous streaming operation. Reported-by: Francois Tigeot , Michael Neumann (the user application freeze bug) --- sys/vfs/hammer/hammer.h | 8 ++++-- sys/vfs/hammer/hammer_disk.h | 4 +-- sys/vfs/hammer/hammer_flusher.c | 30 ++++++++++++++++++++-- sys/vfs/hammer/hammer_inode.c | 31 ++++++++++++++++------- sys/vfs/hammer/hammer_ioctl.c | 8 +++++- sys/vfs/hammer/hammer_ioctl.h | 4 ++- sys/vfs/hammer/hammer_mirror.c | 7 +++--- sys/vfs/hammer/hammer_ondisk.c | 3 ++- sys/vfs/hammer/hammer_pfs.c | 55 +++++++++++++++++++++++++++++++++++++++-- sys/vfs/hammer/hammer_vfsops.c | 6 ++--- 10 files changed, 129 insertions(+), 27 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 49d44ec68a..305993c5b6 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.123 2008/07/27 23:01:25 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.124 2008/07/31 04:42:04 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -720,7 +720,9 @@ struct hammer_mount { int error; /* critical I/O error */ struct krate krate; /* rate limited kprintf */ hammer_tid_t asof; /* snapshot mount */ - hammer_off_t next_tid; + hammer_tid_t next_tid; + hammer_tid_t flush_tid1; /* flusher tid sequencing */ + hammer_tid_t flush_tid2; /* flusher tid sequencing */ int64_t copy_stat_freebigblocks; /* number of free bigblocks */ u_int32_t namekey_iterator; @@ -1140,6 +1142,8 @@ int hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_pseudofs_rw *pfs); int hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_pseudofs_rw *pfs); +int hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_pseudofs_rw *pfs); int hammer_signal_check(hammer_mount_t hmp); diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 08c83ccc44..43e9996dee 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.51 2008/07/19 18:44:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.52 2008/07/31 04:42:04 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -498,7 +498,7 @@ struct hammer_volume_ondisk { int64_t vol0_stat_inodes; /* for statfs only */ int64_t vol0_stat_records; /* total records in filesystem */ hammer_off_t vol0_btree_root; /* B-Tree root */ - hammer_tid_t vol0_next_tid; /* highest synchronized TID */ + hammer_tid_t vol0_next_tid; /* highest partially synchronized TID */ hammer_off_t vol0_unused03; /* diff --git a/sys/vfs/hammer/hammer_flusher.c b/sys/vfs/hammer/hammer_flusher.c index 7209e5c61f..c7fdfcf470 100644 --- a/sys/vfs/hammer/hammer_flusher.c +++ b/sys/vfs/hammer/hammer_flusher.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.44 2008/07/19 04:49:39 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.45 2008/07/31 04:42:04 dillon Exp $ */ /* * HAMMER dependancy flusher thread @@ -231,7 +231,13 @@ hammer_flusher_master_thread(void *arg) break; while (hmp->flusher.signal == 0) tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0); - hmp->flusher.signal = 0; + + /* + * Flush for each count on signal but only allow one extra + * flush request to build up. + */ + if (--hmp->flusher.signal != 0) + hmp->flusher.signal = 1; } /* @@ -665,6 +671,13 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final) hammer_modify_volume_done(root_volume); } + /* + * vol0_next_tid is used for TID selection and is updated without + * an UNDO so we do not reuse a TID that may have been rolled-back. + * + * vol0_last_tid is the highest fully-synchronized TID. It is + * set-up when the UNDO fifo is fully synced, later on (not here). + */ if (root_volume->io.modified) { hammer_modify_volume(NULL, root_volume, NULL, 0); if (root_volume->ondisk->vol0_next_tid < trans->tid) @@ -722,6 +735,18 @@ hammer_flusher_finalize(hammer_transaction_t trans, int final) hmp->hflags |= HMNT_UNDO_DIRTY; } hammer_clear_undo_history(hmp); + + /* + * Flush tid sequencing. flush_tid1 is fully synchronized, + * meaning a crash will not roll it back. flush_tid2 has + * been written out asynchronously and a crash will roll + * it back. flush_tid1 is used for all mirroring masters. + */ + if (hmp->flush_tid1 != hmp->flush_tid2) { + hmp->flush_tid1 = hmp->flush_tid2; + wakeup(&hmp->flush_tid1); + } + hmp->flush_tid2 = trans->tid; } /* @@ -738,6 +763,7 @@ failed: done: hammer_unlock(&hmp->flusher.finalize_lock); + if (--hmp->flusher.finalize_want == 0) wakeup(&hmp->flusher.finalize_want); hammer_stats_commits += final; diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 91bb2bc142..48949de9fc 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.106 2008/07/27 23:01:25 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.107 2008/07/31 04:42:04 dillon Exp $ */ #include "hammer.h" @@ -1360,8 +1360,9 @@ hammer_modify_inode(hammer_inode_t ip, int flags) * place the inode in a flushing state if it is currently idle and flag it * to reflush if it is currently flushing. * - * If the HAMMER_FLUSH_SYNCHRONOUS flag is specified we will attempt to - * flush the indoe synchronously using the caller's context. + * Upon return if the inode could not be flushed due to a setup + * dependancy, then it will be automatically flushed when the dependancy + * is satisfied. */ void hammer_flush_inode(hammer_inode_t ip, int flags) @@ -1440,10 +1441,14 @@ hammer_flush_inode(hammer_inode_t ip, int flags) hammer_flush_inode_core(ip, flg, flags); } else { /* - * parent has no connectivity, tell it to flush + * Parent has no connectivity, tell it to flush * us as soon as it does. + * + * The REFLUSH flag is also needed to trigger + * dependancy wakeups. */ - ip->flags |= HAMMER_INODE_CONN_DOWN; + ip->flags |= HAMMER_INODE_CONN_DOWN | + HAMMER_INODE_REFLUSH; if (flags & HAMMER_FLUSH_SIGNAL) { ip->flags |= HAMMER_INODE_RESIGNAL; hammer_flusher_async(ip->hmp, flg); @@ -1454,6 +1459,9 @@ hammer_flush_inode(hammer_inode_t ip, int flags) /* * We are already flushing, flag the inode to reflush * if needed after it completes its current flush. + * + * The REFLUSH flag is also needed to trigger + * dependancy wakeups. */ if ((ip->flags & HAMMER_INODE_REFLUSH) == 0) ip->flags |= HAMMER_INODE_REFLUSH; @@ -1706,17 +1714,22 @@ hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags) */ if (go_count == 0) { if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) { - ip->flags |= HAMMER_INODE_REFLUSH; - --ip->hmp->count_iqueued; --hammer_count_iqueued; + --flg->total_count; ip->flush_state = HAMMER_FST_SETUP; ip->flush_group = NULL; if (ip->flags & HAMMER_INODE_VHELD) { ip->flags &= ~HAMMER_INODE_VHELD; vrele(ip->vp); } + + /* + * REFLUSH is needed to trigger dependancy wakeups + * when an inode is in SETUP. + */ + ip->flags |= HAMMER_INODE_REFLUSH; if (flags & HAMMER_FLUSH_SIGNAL) { ip->flags |= HAMMER_INODE_RESIGNAL; hammer_flusher_async(ip->hmp, flg); @@ -1909,8 +1922,8 @@ hammer_setup_child_callback(hammer_record_t rec, void *data) * flush groups before it can be completely * flushed. */ - ip->flags |= HAMMER_INODE_REFLUSH; - ip->flags |= HAMMER_INODE_RESIGNAL; + ip->flags |= HAMMER_INODE_RESIGNAL | + HAMMER_INODE_REFLUSH; r = -1; } else if (rec->type == HAMMER_MEM_RECORD_ADD) { /* diff --git a/sys/vfs/hammer/hammer_ioctl.c b/sys/vfs/hammer/hammer_ioctl.c index 364dc18332..b7f29e35a3 100644 --- a/sys/vfs/hammer/hammer_ioctl.c +++ b/sys/vfs/hammer/hammer_ioctl.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.29 2008/07/16 18:30:59 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.30 2008/07/31 04:42:04 dillon Exp $ */ #include "hammer.h" @@ -101,6 +101,12 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, (struct hammer_ioc_pseudofs_rw *)data); } break; + case HAMMERIOC_WAI_PSEUDOFS: + if (error == 0) { + error = hammer_ioc_wait_pseudofs(&trans, ip, + (struct hammer_ioc_pseudofs_rw *)data); + } + break; case HAMMERIOC_MIRROR_READ: if (error == 0) { error = hammer_ioc_mirror_read(&trans, ip, diff --git a/sys/vfs/hammer/hammer_ioctl.h b/sys/vfs/hammer/hammer_ioctl.h index 2c31b8ece4..80d783fde4 100644 --- a/sys/vfs/hammer/hammer_ioctl.h +++ b/sys/vfs/hammer/hammer_ioctl.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.21 2008/07/12 23:04:50 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.22 2008/07/31 04:42:04 dillon Exp $ */ /* * HAMMER ioctl's. This file can be #included from userland @@ -300,6 +300,7 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t; #define HAMMER_MREC_TYPE_SKIP 5 /* skip-range */ #define HAMMER_MREC_TYPE_PASS 6 /* record for cmp only (pass) */ #define HAMMER_MREC_TYPE_TERM 7 /* (userland only) */ +#define HAMMER_MREC_TYPE_IDLE 8 /* (userland only) */ #define HAMMER_MREC_CRCOFF (offsetof(struct hammer_ioc_mrecord_head, rec_size)) #define HAMMER_MREC_HEADSIZE sizeof(struct hammer_ioc_mrecord_head) @@ -322,6 +323,7 @@ typedef union hammer_ioc_mrecord_any *hammer_ioc_mrecord_any_t; #define HAMMERIOC_UPG_PSEUDOFS _IOWR('h',9,struct hammer_ioc_pseudofs_rw) #define HAMMERIOC_DGD_PSEUDOFS _IOWR('h',10,struct hammer_ioc_pseudofs_rw) #define HAMMERIOC_RMR_PSEUDOFS _IOWR('h',11,struct hammer_ioc_pseudofs_rw) +#define HAMMERIOC_WAI_PSEUDOFS _IOWR('h',12,struct hammer_ioc_pseudofs_rw) #endif diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index 34c63c317e..f752db2115 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.15 2008/07/13 01:12:41 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.16 2008/07/31 04:42:04 dillon Exp $ */ /* * HAMMER mirroring ioctls - serialize and deserialize modifications made @@ -246,7 +246,7 @@ retry: mrec.head.type = HAMMER_MREC_TYPE_REC; mrec.head.rec_size = bytes; mrec.rec.leaf = *elm; - if (elm->base.delete_tid >= mirror->tid_end) + if (elm->base.delete_tid > mirror->tid_end) mrec.rec.leaf.base.delete_tid = 0; rec_crc = crc32(&mrec.head.rec_size, sizeof(mrec.rec) - crc_start); @@ -668,14 +668,13 @@ hammer_mirror_delete_to(hammer_cursor_t cursor, while (error == 0) { elm = &cursor->node->ondisk->elms[cursor->index].leaf; KKASSERT(elm->base.btype == HAMMER_BTREE_TYPE_RECORD); + cursor->flags |= HAMMER_CURSOR_ATEDISK; if (elm->base.delete_tid == 0) { error = hammer_delete_at_cursor(cursor, HAMMER_DELETE_ADJUST, mirror->tid_end, time_second, 1, NULL); - if (error == 0) - cursor->flags |= HAMMER_CURSOR_ATEDISK; } if (error == 0) error = hammer_btree_iterate(cursor); diff --git a/sys/vfs/hammer/hammer_ondisk.c b/sys/vfs/hammer/hammer_ondisk.c index f462c4aa5b..44f267eec8 100644 --- a/sys/vfs/hammer/hammer_ondisk.c +++ b/sys/vfs/hammer/hammer_ondisk.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.72 2008/07/27 21:34:04 mneumann Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.73 2008/07/31 04:42:04 dillon Exp $ */ /* * Manage HAMMER's on-disk structures. These routines are primarily @@ -1478,6 +1478,7 @@ hammer_sync_hmp(hammer_mount_t hmp, int waitfor) hammer_flusher_sync(hmp); } else { hammer_flusher_async(hmp, NULL); + hammer_flusher_async(hmp, NULL); } return(info.error); } diff --git a/sys/vfs/hammer/hammer_pfs.c b/sys/vfs/hammer/hammer_pfs.c index 639c53a4d0..e92b05f2ef 100644 --- a/sys/vfs/hammer/hammer_pfs.c +++ b/sys/vfs/hammer/hammer_pfs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.4 2008/07/19 18:44:49 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $ */ /* * HAMMER PFS ioctls - Manage pseudo-fs configurations @@ -76,9 +76,13 @@ hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, * If the PFS is a master the sync tid is set by normal operation * rather then the mirroring code, and will always track the * real HAMMER filesystem. + * + * We use flush_tid1, which is the highest fully committed TID. + * flush_tid2 is the TID most recently flushed, but the UNDO hasn't + * caught up to it yet so a crash will roll us back to flush_tid1. */ if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) - pfsm->pfsd.sync_end_tid = trans->rootvol->ondisk->vol0_next_tid; + pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; /* * Copy out to userland. @@ -126,6 +130,11 @@ hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, error = hammer_mkroot_pseudofs(trans, cred, pfsm); if (error == 0) error = hammer_save_pseudofs(trans, pfsm); + + /* + * Wakeup anyone waiting for a TID update for this PFS + */ + wakeup(&pfsm->pfsd.sync_end_tid); hammer_rel_pseudofs(trans->hmp, pfsm); } return(error); @@ -256,6 +265,48 @@ hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, } /* + * Wait for the PFS to sync past the specified TID + */ +int +hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_pseudofs_rw *pfs) +{ + hammer_pseudofs_inmem_t pfsm; + struct hammer_pseudofs_data pfsd; + u_int32_t localization; + hammer_tid_t tid; + void *waitp; + int error; + + if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) + return(error); + localization = (u_int32_t)pfs->pfs_id << 16; + + if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) + return(error); + + pfsm = hammer_load_pseudofs(trans, localization, &error); + if (error == 0) { + if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { + tid = pfsm->pfsd.sync_end_tid; + waitp = &pfsm->pfsd.sync_end_tid; + } else { + tid = trans->hmp->flush_tid1; + waitp = &trans->hmp->flush_tid1; + } + if (tid <= pfsd.sync_end_tid) + tsleep(waitp, PCATCH, "hmrmwt", 0); + } + hammer_rel_pseudofs(trans->hmp, pfsm); + if (error == EINTR) { + pfs->head.flags |= HAMMER_IOC_HEAD_INTR; + error = 0; + } + return(error); +} + + +/* * Auto-detect the pseudofs and do basic bounds checking. */ static diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 995149872f..286fd5ad20 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.69 2008/07/27 23:01:25 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.70 2008/07/31 04:42:04 dillon Exp $ */ #include @@ -583,6 +583,8 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, * on-disk first_offset represents the LAST flush cycle. */ hmp->next_tid = rootvol->ondisk->vol0_next_tid; + hmp->flush_tid1 = hmp->next_tid; + hmp->flush_tid2 = hmp->next_tid; bcopy(rootvol->ondisk->vol0_blockmap, hmp->blockmap, sizeof(hmp->blockmap)); hmp->copy_stat_freebigblocks = rootvol->ondisk->vol0_stat_freebigblocks; @@ -872,8 +874,6 @@ hammer_vfs_sync(struct mount *mp, int waitfor) if (panicstr == NULL) { error = hammer_sync_hmp(hmp, waitfor); - if (error == 0) - error = hammer_sync_hmp(hmp, waitfor); } else { error = EIO; } -- 2.11.4.GIT