From 5de0c0e50ad44269de9c756b21c5106f503d6df7 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Mon, 23 Jun 2008 21:42:48 +0000 Subject: [PATCH] HAMMER 58A/Many: Mirroring support part 1 * Implement mastership domains for transaction ids to support multi-master mirroring and implement mastership selection at mount time. Mastership domains work by having the low 4 bits of the transaction id specify the mastership id (0-15). This allows the mirroring code to distinguish between changes originating on a particular node and changes mirrored from another node. This also ensures that filesystem objects can be created on the mirrors in parallel without resulting in conflicitng object ids. * Eliminate time-based TID generation. Just increment the TID as appropriate. NOTE: Portions of this change may be reverted at a later time depending on how the mirroring implementation proceeds. * Minor code cleanups. --- sys/vfs/hammer/hammer.h | 6 ++-- sys/vfs/hammer/hammer_btree.h | 8 ++--- sys/vfs/hammer/hammer_disk.h | 8 ++++- sys/vfs/hammer/hammer_inode.c | 4 +-- sys/vfs/hammer/hammer_ioctl.h | 3 +- sys/vfs/hammer/hammer_mount.h | 9 ++--- sys/vfs/hammer/hammer_subs.c | 19 ++-------- sys/vfs/hammer/hammer_transaction.c | 72 ++++++++++++++++++------------------- sys/vfs/hammer/hammer_vfsops.c | 10 +++++- 9 files changed, 70 insertions(+), 69 deletions(-) diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 9d34b00d81..f01e9b447b 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.90 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.91 2008/06/23 21:42:48 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -630,6 +630,7 @@ struct hammer_mount { int ronly; int nvolumes; int volume_iterator; + int masterid; /* -1 or 0-15 - clustering and mirroring */ int rsv_inodes; /* reserved space due to dirty inodes */ int rsv_databufs; /* reserved space due to dirty buffers */ int rsv_databytes; /* reserved space due to record data */ @@ -801,9 +802,8 @@ u_int32_t hammer_to_unix_xid(uuid_t *uuid); void hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid); void hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts); u_int64_t hammer_timespec_to_time(struct timespec *ts); -hammer_tid_t hammer_now_tid(void); hammer_tid_t hammer_str_to_tid(const char *str); -hammer_tid_t hammer_alloc_objid(hammer_transaction_t trans, hammer_inode_t dip); +hammer_tid_t hammer_alloc_objid(hammer_mount_t hmp, hammer_inode_t dip); void hammer_clear_objid(hammer_inode_t dip); void hammer_destroy_objid_cache(hammer_mount_t hmp); diff --git a/sys/vfs/hammer/hammer_btree.h b/sys/vfs/hammer/hammer_btree.h index a8b42771ab..a264b1f69a 100644 --- a/sys/vfs/hammer/hammer_btree.h +++ b/sys/vfs/hammer/hammer_btree.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.21 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.22 2008/06/23 21:42:48 dillon Exp $ */ /* @@ -139,7 +139,7 @@ typedef struct hammer_base_elm *hammer_base_elm_t; */ struct hammer_btree_internal_elm { struct hammer_base_elm base; - hammer_off_t serialno; /* mirroring support */ + hammer_tid_t mirror_tid; /* mirroring support */ hammer_off_t subtree_offset; int32_t unused02; int32_t unused03; @@ -152,7 +152,7 @@ struct hammer_btree_internal_elm { */ struct hammer_btree_leaf_elm { struct hammer_base_elm base; - hammer_off_t serialno; /* mirroring support */ + hammer_tid_t unused01; hammer_off_t data_offset; int32_t data_len; hammer_crc_t data_crc; @@ -226,7 +226,7 @@ struct hammer_node_ondisk { hammer_off_t reserved04; /* future link_right */ hammer_off_t reserved05; hammer_off_t reserved06; - hammer_off_t serialno; /* mirroring support (aggregator) */ + hammer_tid_t mirror_tid; /* mirroring support (aggregator) */ /* * Element array. Internal nodes have one less logical element diff --git a/sys/vfs/hammer/hammer_disk.h b/sys/vfs/hammer/hammer_disk.h index 369b11064e..84be158942 100644 --- a/sys/vfs/hammer/hammer_disk.h +++ b/sys/vfs/hammer/hammer_disk.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.41 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.42 2008/06/23 21:42:48 dillon Exp $ */ #ifndef VFS_HAMMER_DISK_H_ @@ -211,6 +211,12 @@ typedef u_int32_t hammer_crc_t; ((hammer_off_t)HAMMER_BUFFERS_PER_LARGEBLOCK_MASK) /* + * Maximum number of mirrors operating in master mode (multi-master + * clustering and mirroring). + */ +#define HAMMER_MAX_MASTERS 16 + +/* * The blockmap is somewhat of a degenerate structure. HAMMER only actually * uses it in its original incarnation to implement the free-map. * diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 194ffb9ad9..254f4271b1 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.82 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.83 2008/06/23 21:42:48 dillon Exp $ */ #include "hammer.h" @@ -491,7 +491,7 @@ hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, if (pseudofs) ip->obj_id = HAMMER_OBJID_ROOT; else - ip->obj_id = hammer_alloc_objid(trans, dip); + ip->obj_id = hammer_alloc_objid(hmp, dip); ip->obj_localization = localization; KKASSERT(ip->obj_id != 0); diff --git a/sys/vfs/hammer/hammer_ioctl.h b/sys/vfs/hammer/hammer_ioctl.h index aaa55fbe47..2b2250b9b1 100644 --- a/sys/vfs/hammer/hammer_ioctl.h +++ b/sys/vfs/hammer/hammer_ioctl.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.12 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.13 2008/06/23 21:42:48 dillon Exp $ */ /* * HAMMER ioctl's. This file can be #included from userland @@ -210,6 +210,7 @@ struct hammer_ioc_make_pseudofs { struct hammer_ioc_get_pseudofs { struct hammer_ioc_head head; u_int32_t pseudoid; + int masterid; }; diff --git a/sys/vfs/hammer/hammer_mount.h b/sys/vfs/hammer/hammer_mount.h index 2792af963d..12b5f87302 100644 --- a/sys/vfs/hammer/hammer_mount.h +++ b/sys/vfs/hammer/hammer_mount.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.5 2008/06/03 18:47:25 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.6 2008/06/23 21:42:48 dillon Exp $ */ #ifndef _SYS_TYPES_H_ @@ -49,14 +49,15 @@ struct hammer_mount_info { const char **volumes; /* array of pointers to device names */ int nvolumes; /* number of devices */ int hflags; /* extended hammer mount flags */ - int unused01; + int masterid; u_int64_t asof; /* asof - HAMMER_MAX_TID is current */ struct export_args export; /* export arguments */ u_int64_t reserved[15]; }; #define HMNT_NOHISTORY 0x00000001 -#define HMNT_EXPORTREQ 0x00000002 +#define HMNT_MASTERID 0x00000002 /* masterid field set */ +#define HMNT_EXPORTREQ 0x00000004 -#define HMNT_USERFLAGS (HMNT_NOHISTORY) +#define HMNT_USERFLAGS (HMNT_NOHISTORY | HMNT_MASTERID) diff --git a/sys/vfs/hammer/hammer_subs.c b/sys/vfs/hammer/hammer_subs.c index 79b74503a8..6ef57bef35 100644 --- a/sys/vfs/hammer/hammer_subs.c +++ b/sys/vfs/hammer/hammer_subs.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.27 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.28 2008/06/23 21:42:48 dillon Exp $ */ /* * HAMMER structural locking @@ -430,26 +430,11 @@ hammer_directory_namekey(const void *name, int len) } hammer_tid_t -hammer_now_tid(void) -{ - struct timespec ts; - hammer_tid_t tid; - - getnanotime(&ts); - tid = ts.tv_sec * 1000000000LL + ts.tv_nsec; - return(tid); -} - -hammer_tid_t hammer_str_to_tid(const char *str) { hammer_tid_t tid; - int len = strlen(str); - if (len > 10) - tid = strtouq(str, NULL, 0); /* full TID */ - else - tid = strtouq(str, NULL, 0) * 1000000000LL; /* time_t */ + tid = strtouq(str, NULL, 0); /* full TID */ return(tid); } diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index 3c2837973e..5639da19ce 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,12 +31,12 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.19 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.20 2008/06/23 21:42:48 dillon Exp $ */ #include "hammer.h" -static hammer_tid_t hammer_alloc_tid(hammer_transaction_t trans, int count); +static hammer_tid_t hammer_alloc_tid(hammer_mount_t hmp, int count); /* @@ -104,7 +104,7 @@ hammer_start_transaction_fls(struct hammer_transaction *trans, trans->hmp = hmp; trans->rootvol = hammer_get_root_volume(hmp, &error); KKASSERT(error == 0); - trans->tid = hammer_alloc_tid(trans, 1); + trans->tid = hammer_alloc_tid(hmp, 1); trans->sync_lock_refs = 1; getmicrotime(&tv); @@ -125,34 +125,30 @@ hammer_done_transaction(struct hammer_transaction *trans) } /* - * Note: Successive transaction ids must be at least 2 apart so the - * B-Tree code can make a separator that does not match either the - * left or right hand sides. + * Allocate (count) TIDs. If running in multi-master mode the returned + * base will be aligned to a 16-count plus the master id (0-15). + * Multi-master mode allows non-conflicting to run and new objects to be + * created on multiple masters in parallel. The transaction id identifies + * the original master. The object_id is also subject to this rule in + * order to allow objects to be created on multiple masters in parallel. + * + * Directories may pre-allocate a large number of object ids (100,000). + * + * NOTE: There is no longer a requirement that successive transaction + * ids be 2 apart for separator generation. */ static hammer_tid_t -hammer_alloc_tid(hammer_transaction_t trans, int count) +hammer_alloc_tid(hammer_mount_t hmp, int count) { -#if 0 - struct timespec ts; -#endif hammer_tid_t tid; + int multiplier = (hmp->masterid < 0) ? 1 : HAMMER_MAX_MASTERS; -#if 0 - getnanotime(&ts); -#endif - tid = time_second * 1000000000LL; -#if 0 - tid = ts.tv_sec * 1000000000LL + ts.tv_nsec; -#endif - if (tid < trans->hmp->next_tid) - tid = trans->hmp->next_tid; + tid = (hmp->next_tid + multiplier) & ~(hammer_tid_t)(multiplier - 1); if (tid >= 0xFFFFFFFFFFFFF000ULL) panic("hammer_start_transaction: Ran out of TIDs!"); - trans->hmp->next_tid = tid + count * 2; - if (hammer_debug_tid) { - kprintf("alloc_tid %016llx (0x%08x)\n", - tid, (int)(tid / 1000000000LL)); - } + hmp->next_tid = tid + count * multiplier; + if (hammer_debug_tid) + kprintf("alloc_tid %016llx\n", tid); return(tid); } @@ -160,43 +156,47 @@ hammer_alloc_tid(hammer_transaction_t trans, int count) * Allocate an object id */ hammer_tid_t -hammer_alloc_objid(hammer_transaction_t trans, hammer_inode_t dip) +hammer_alloc_objid(hammer_mount_t hmp, hammer_inode_t dip) { hammer_objid_cache_t ocp; hammer_tid_t tid; while ((ocp = dip->objid_cache) == NULL) { - if (trans->hmp->objid_cache_count < OBJID_CACHE_SIZE) { + if (hmp->objid_cache_count < OBJID_CACHE_SIZE) { ocp = kmalloc(sizeof(*ocp), M_HAMMER, M_WAITOK|M_ZERO); - ocp->next_tid = hammer_alloc_tid(trans, - OBJID_CACHE_BULK); + ocp->next_tid = hammer_alloc_tid(hmp, OBJID_CACHE_BULK); ocp->count = OBJID_CACHE_BULK; - TAILQ_INSERT_HEAD(&trans->hmp->objid_cache_list, ocp, - entry); - ++trans->hmp->objid_cache_count; + TAILQ_INSERT_HEAD(&hmp->objid_cache_list, ocp, entry); + ++hmp->objid_cache_count; /* may have blocked, recheck */ if (dip->objid_cache == NULL) { dip->objid_cache = ocp; ocp->dip = dip; } } else { - ocp = TAILQ_FIRST(&trans->hmp->objid_cache_list); + ocp = TAILQ_FIRST(&hmp->objid_cache_list); if (ocp->dip) ocp->dip->objid_cache = NULL; dip->objid_cache = ocp; ocp->dip = dip; } } - TAILQ_REMOVE(&trans->hmp->objid_cache_list, ocp, entry); + TAILQ_REMOVE(&hmp->objid_cache_list, ocp, entry); + + /* + * The TID is incremented by 1 or by 16 depending what mode the + * mount is operating in. + */ tid = ocp->next_tid; - ocp->next_tid += 2; + ocp->next_tid += (hmp->masterid < 0) ? 1 : HAMMER_MAX_MASTERS; + if (--ocp->count == 0) { dip->objid_cache = NULL; - --trans->hmp->objid_cache_count; + --hmp->objid_cache_count; ocp->dip = NULL; kfree(ocp, M_HAMMER); } else { - TAILQ_INSERT_TAIL(&trans->hmp->objid_cache_list, ocp, entry); + TAILQ_INSERT_TAIL(&hmp->objid_cache_list, ocp, entry); } return(tid); } diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index 95d78d36fe..470001aa86 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.51 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.52 2008/06/23 21:42:48 dillon Exp $ */ #include @@ -249,6 +249,10 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, if (info.nvolumes <= 0 || info.nvolumes >= 32768) return (EINVAL); } + if ((info.hflags & HMNT_MASTERID) && + (info.masterid < -1 || info.masterid >= HAMMER_MAX_MASTERS)) { + return (EINVAL); + } /* * Interal mount data structure @@ -292,6 +296,10 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, } hmp->hflags &= ~HMNT_USERFLAGS; hmp->hflags |= info.hflags & HMNT_USERFLAGS; + if (info.hflags & HMNT_MASTERID) + hmp->masterid = -1; + else + hmp->masterid = info.masterid; if (info.asof) { kprintf("ASOF\n"); mp->mnt_flag |= MNT_RDONLY; -- 2.11.4.GIT