From c82af9046364dc3a6599ca61f70341882ae2c4a5 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 26 Jun 2008 04:06:23 +0000 Subject: [PATCH] HAMMER 59A/Many: Mirroring related work (and one bug fix). * BUG FIX: Fix a bug in directory hashkey generation. The iterator could sometimes conflict with a key already on-disk and interfere with a pending deletion. The chance of this occuring was miniscule but not 0. Now fixed. The fix also revamps the directory iterator code, moving it all to one place and removing it from two other places. * PRUNING CHANGE: The pruning code no longer shifts the create_tid and delete_tid of adjacent records to fill gaps. This means that historical queries must either use snapshot softlinks or use a fine-grained transaction id greater then the most recent snapshot softlink. fine-grained historical access still works up to the first snapshot softlink. * Clean up the cursor code responsible for acquiring the parent node. * Add the core mirror ioctl read/write infrastructure. This work is still in progress. - ioctl commands - pseudofs enhancements, including st_dev munging. - mount options - transaction id and object id conflictless allocation - initial mirror_tid recursion up the B-Tree (not finished) - B-Tree mirror scan optimizations to skip sub-hierarchies that do not need to be scanned (requires mirror_tid recursion to be 100% working). --- sys/conf/files | 3 +- sys/vfs/hammer/Makefile | 4 +- sys/vfs/hammer/hammer.h | 13 +- sys/vfs/hammer/hammer_btree.c | 176 +++++++++++++++++- sys/vfs/hammer/hammer_btree.h | 4 +- sys/vfs/hammer/hammer_cursor.c | 53 +----- sys/vfs/hammer/hammer_cursor.h | 5 +- sys/vfs/hammer/hammer_inode.c | 4 +- sys/vfs/hammer/hammer_ioctl.c | 10 +- sys/vfs/hammer/hammer_ioctl.h | 11 +- sys/vfs/hammer/hammer_mirror.c | 360 ++++++++++++++++++++++++++++++++++-- sys/vfs/hammer/hammer_mount.h | 3 +- sys/vfs/hammer/hammer_object.c | 77 ++++---- sys/vfs/hammer/hammer_prune.c | 43 ++--- sys/vfs/hammer/hammer_transaction.c | 16 +- sys/vfs/hammer/hammer_vfsops.c | 6 +- sys/vfs/hammer/hammer_vnops.c | 8 +- 17 files changed, 650 insertions(+), 146 deletions(-) diff --git a/sys/conf/files b/sys/conf/files index 6215814ddb..a7af7646c8 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.217 2008/06/10 05:06:18 dillon Exp $ +# $DragonFly: src/sys/conf/files,v 1.218 2008/06/26 04:06:21 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -1143,6 +1143,7 @@ vfs/hammer/hammer_freemap.c optional hammer vfs/hammer/hammer_inode.c optional hammer vfs/hammer/hammer_io.c optional hammer vfs/hammer/hammer_ioctl.c optional hammer +vfs/hammer/hammer_mirror.c optional hammer vfs/hammer/hammer_object.c optional hammer vfs/hammer/hammer_ondisk.c optional hammer vfs/hammer/hammer_prune.c optional hammer diff --git a/sys/vfs/hammer/Makefile b/sys/vfs/hammer/Makefile index 1bf6d435d2..82f7605c07 100644 --- a/sys/vfs/hammer/Makefile +++ b/sys/vfs/hammer/Makefile @@ -1,5 +1,5 @@ # -# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.11 2008/04/22 19:00:14 dillon Exp $ +# $DragonFly: src/sys/vfs/hammer/Makefile,v 1.12 2008/06/26 04:06:22 dillon Exp $ KMOD= hammer SRCS= hammer_vfsops.c hammer_vnops.c hammer_inode.c \ @@ -7,7 +7,7 @@ SRCS= hammer_vfsops.c hammer_vnops.c hammer_inode.c \ hammer_cursor.c hammer_btree.c hammer_transaction.c \ hammer_object.c hammer_recover.c hammer_ioctl.c \ hammer_blockmap.c hammer_freemap.c hammer_undo.c \ - hammer_reblock.c hammer_flusher.c + hammer_reblock.c hammer_flusher.c hammer_mirror.c NOMAN= diff --git a/sys/vfs/hammer/hammer.h b/sys/vfs/hammer/hammer.h index 3291f6e79c..40e5785bbb 100644 --- a/sys/vfs/hammer/hammer.h +++ b/sys/vfs/hammer/hammer.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.92 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer.h,v 1.93 2008/06/26 04:06:22 dillon Exp $ */ /* * This header file contains structures used internally by the HAMMERFS @@ -523,6 +523,7 @@ struct hammer_node { #define HAMMER_NODE_FLUSH 0x0002 #define HAMMER_NODE_CRCGOOD 0x0004 #define HAMMER_NODE_NEEDSCRC 0x0008 +#define HAMMER_NODE_NEEDSMIRROR 0x0010 typedef struct hammer_node *hammer_node_t; @@ -833,6 +834,10 @@ int hammer_btree_iterate_reverse(hammer_cursor_t cursor); int hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_leaf_elm_t elm); int hammer_btree_delete(hammer_cursor_t cursor); +int hammer_btree_mirror_propagate(hammer_transaction_t trans, + hammer_node_t node, int index, + hammer_tid_t mirror_tid); + int hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2); int hammer_btree_chkts(hammer_tid_t ts, hammer_base_elm_t key); int hammer_btree_correct_rhb(hammer_cursor_t cursor, hammer_tid_t tid); @@ -844,6 +849,8 @@ int hammer_btree_lock_children(hammer_cursor_t cursor, struct hammer_node_locklist **locklistp); void hammer_btree_unlock_children(struct hammer_node_locklist **locklistp); int hammer_btree_search_node(hammer_base_elm_t elm, hammer_node_ondisk_t node); +hammer_node_t hammer_btree_get_parent(hammer_node_t node, int *parent_indexp, + int *errorp, int try_exclusive); void hammer_print_btree_node(hammer_node_ondisk_t ondisk); void hammer_print_btree_elm(hammer_btree_elm_t elm, u_int8_t type, int i); @@ -1005,6 +1012,10 @@ int hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_reblock *reblock); int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_prune *prune); +int hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_mirror_rw *mirror); +int hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_mirror_rw *mirror); int hammer_signal_check(hammer_mount_t hmp); diff --git a/sys/vfs/hammer/hammer_btree.c b/sys/vfs/hammer/hammer_btree.c index 881bc20a03..b097493075 100644 --- a/sys/vfs/hammer/hammer_btree.c +++ b/sys/vfs/hammer/hammer_btree.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.57 2008/06/20 21:24:53 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.c,v 1.58 2008/06/26 04:06:22 dillon Exp $ */ /* @@ -184,6 +184,7 @@ hammer_btree_iterate(hammer_cursor_t cursor) */ if (node->type == HAMMER_BTREE_TYPE_INTERNAL) { elm = &node->elms[cursor->index]; + r = hammer_btree_cmp(&cursor->key_end, &elm[0].base); s = hammer_btree_cmp(&cursor->key_beg, &elm[1].base); if (hammer_debug_btree) { @@ -224,6 +225,18 @@ hammer_btree_iterate(hammer_cursor_t cursor) */ KKASSERT(elm->internal.subtree_offset != 0); + /* + * If running the mirror filter see if we can skip + * the entire sub-tree. + */ + if (cursor->flags & HAMMER_CURSOR_MIRROR_FILTERED) { + if (elm->internal.mirror_tid < + cursor->mirror_tid) { + ++cursor->index; + continue; + } + } + error = hammer_cursor_down(cursor); if (error) break; @@ -682,9 +695,30 @@ hammer_btree_insert(hammer_cursor_t cursor, hammer_btree_leaf_elm_t elm) } node->elms[i].leaf = *elm; ++node->count; + + /* + * Update the leaf node's aggregate mirror_tid for mirroring + * support. + */ + if (node->mirror_tid < elm->base.delete_tid) + node->mirror_tid = elm->base.delete_tid; + if (node->mirror_tid < elm->base.create_tid) + node->mirror_tid = elm->base.create_tid; hammer_modify_node_done(cursor->node); /* + * What we really want to do is propogate mirror_tid all the way + * up the parent chain to the B-Tree root. That would be + * ultra-expensive, though. + */ + if (cursor->parent && + (cursor->trans->hmp->hflags & (HMNT_MASTERID|HMNT_SLAVE))) { + hammer_btree_mirror_propagate(cursor->trans, cursor->parent, + cursor->parent_index, + node->mirror_tid); + } + + /* * Debugging sanity checks. */ KKASSERT(hammer_btree_cmp(cursor->left_bound, &elm->base) <= 0); @@ -1966,6 +2000,11 @@ hammer_btree_correct_lhb(hammer_cursor_t cursor, hammer_tid_t tid) * This routine is always called with an empty, locked leaf but may recurse * into want-to-be-empty parents as part of its operation. * + * It should also be noted that when removing empty leaves we must be sure + * to test and update mirror_tid because another thread may have deadlocked + * against us (or someone) trying to propogate it up and cannot retry once + * the node has been deleted. + * * On return the cursor may end up pointing to an internal node, suitable * for further iteration but not for an immediate insertion or deletion. */ @@ -1985,10 +2024,11 @@ btree_remove(hammer_cursor_t cursor) * When deleting the root of the filesystem convert it to * an empty leaf node. Internal nodes cannot be empty. */ - if (node->ondisk->parent == 0) { + ondisk = node->ondisk; + if (ondisk->parent == 0) { KKASSERT(cursor->parent == NULL); hammer_modify_node_all(cursor->trans, node); - ondisk = node->ondisk; + KKASSERT(ondisk == node->ondisk); ondisk->type = HAMMER_BTREE_TYPE_LEAF; ondisk->count = 0; hammer_modify_node_done(node); @@ -1996,13 +2036,26 @@ btree_remove(hammer_cursor_t cursor) return(0); } + parent = cursor->parent; + + /* + * If another thread deadlocked trying to propogate mirror_tid up + * we have to finish the job before deleting node. XXX + */ + if (parent->ondisk->mirror_tid < node->ondisk->mirror_tid && + (cursor->trans->hmp->hflags & (HMNT_MASTERID|HMNT_SLAVE))) { + hammer_btree_mirror_propagate(cursor->trans, + parent, + cursor->parent_index, + node->ondisk->mirror_tid); + + } + /* * Attempt to remove the parent's reference to the child. If the * parent would become empty we have to recurse. If we fail we * leave the parent pointing to an empty leaf node. */ - parent = cursor->parent; - if (parent->ondisk->count == 1) { /* * This special cursor_up_locked() call leaves the original @@ -2042,6 +2095,7 @@ btree_remove(hammer_cursor_t cursor) hammer_modify_node_all(cursor->trans, parent); ondisk = parent->ondisk; KKASSERT(ondisk->type == HAMMER_BTREE_TYPE_INTERNAL); + elm = &ondisk->elms[cursor->parent_index]; KKASSERT(elm->internal.subtree_offset == node->node_offset); KKASSERT(ondisk->count > 0); @@ -2062,6 +2116,118 @@ btree_remove(hammer_cursor_t cursor) } /* + * Propagate a mirror TID update upwards through the B-Tree to the root. + * + * A locked internal node must be passed in. The node will remain locked + * on return. + * + * This function syncs mirror_tid at the specified internal node's element, + * adjusts the node's aggregation mirror_tid, and then recurses upwards. + */ +int +hammer_btree_mirror_propagate(hammer_transaction_t trans, hammer_node_t node, + int index, hammer_tid_t mirror_tid) +{ + hammer_btree_internal_elm_t elm; + hammer_node_t parent; + int parent_index; + int error; + + KKASSERT (node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL); + + /* + * Adjust the node's element + */ + elm = &node->ondisk->elms[index].internal; + if (elm->mirror_tid >= mirror_tid) + return(0); + hammer_modify_node(trans, node, &elm->mirror_tid, + sizeof(elm->mirror_tid)); + elm->mirror_tid = mirror_tid; + hammer_modify_node_done(node); + + /* + * Adjust the node's mirror_tid aggragator + */ + if (node->ondisk->mirror_tid >= mirror_tid) + return(0); + hammer_modify_node_field(trans, node, mirror_tid); + node->ondisk->mirror_tid = mirror_tid; + hammer_modify_node_done(node); + + error = 0; + error = 0; + if (node->ondisk->parent && + (trans->hmp->hflags & (HMNT_MASTERID|HMNT_SLAVE))) { + parent = hammer_btree_get_parent(node, &parent_index, + &error, 1); + if (parent) { + hammer_btree_mirror_propagate(trans, parent, + parent_index, mirror_tid); + hammer_unlock(&parent->lock); + hammer_rel_node(parent); + } + } + return(error); +} + +hammer_node_t +hammer_btree_get_parent(hammer_node_t node, int *parent_indexp, int *errorp, + int try_exclusive) +{ + hammer_node_t parent; + hammer_btree_elm_t elm; + int i; + + /* + * Get the node + */ + parent = hammer_get_node(node->hmp, node->ondisk->parent, 0, errorp); + if (*errorp) { + KKASSERT(parent == NULL); + return(NULL); + } + KKASSERT ((parent->flags & HAMMER_NODE_DELETED) == 0); + + /* + * Lock the node + */ + if (try_exclusive) { + if (hammer_lock_ex_try(&parent->lock)) { + hammer_rel_node(parent); + *errorp = EDEADLK; + return(NULL); + } + } else { + hammer_lock_sh(&parent->lock); + } + + /* + * Figure out which element in the parent is pointing to the + * child. + */ + if (node->ondisk->count) { + i = hammer_btree_search_node(&node->ondisk->elms[0].base, + parent->ondisk); + } else { + i = 0; + } + while (i < parent->ondisk->count) { + elm = &parent->ondisk->elms[i]; + if (elm->internal.subtree_offset == node->node_offset) + break; + ++i; + } + if (i == parent->ondisk->count) { + hammer_unlock(&parent->lock); + panic("Bad B-Tree link: parent %p node %p\n", parent, node); + } + *parent_indexp = i; + KKASSERT(*errorp == 0); + return(parent); +} + +/* * The element (elm) has been moved to a new internal node (node). * * If the element represents a pointer to an internal node that node's diff --git a/sys/vfs/hammer/hammer_btree.h b/sys/vfs/hammer/hammer_btree.h index ac5d266635..43fccb998c 100644 --- a/sys/vfs/hammer/hammer_btree.h +++ b/sys/vfs/hammer/hammer_btree.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.23 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_btree.h,v 1.24 2008/06/26 04:06:22 dillon Exp $ */ /* @@ -145,6 +145,8 @@ struct hammer_btree_internal_elm { int32_t unused03; }; +typedef struct hammer_btree_internal_elm *hammer_btree_internal_elm_t; + /* * Leaf B-Tree element (40 + 24 = 64 bytes). * diff --git a/sys/vfs/hammer/hammer_cursor.c b/sys/vfs/hammer/hammer_cursor.c index 62e0a5e08a..d575e38892 100644 --- a/sys/vfs/hammer/hammer_cursor.c +++ b/sys/vfs/hammer/hammer_cursor.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.32 2008/06/20 05:38:26 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.33 2008/06/26 04:06:22 dillon Exp $ */ /* @@ -308,54 +308,21 @@ hammer_load_cursor_parent(hammer_cursor_t cursor, int try_exclusive) hammer_node_t node; hammer_btree_elm_t elm; int error; - int i; + int parent_index; hmp = cursor->trans->hmp; if (cursor->node->ondisk->parent) { node = cursor->node; - parent = hammer_get_node(hmp, node->ondisk->parent, 0, &error); - if (error) - return(error); - if (try_exclusive) { - if (hammer_lock_ex_try(&parent->lock)) { - hammer_rel_node(parent); - return(EDEADLK); - } - } else { - hammer_lock_sh(&parent->lock); - } - KKASSERT ((parent->flags & HAMMER_NODE_DELETED) == 0); - elm = NULL; - - /* - * Locate the parent index to the child node as quickly - * as possible. - */ - if (node->ondisk->count) { - i = hammer_btree_search_node( - &node->ondisk->elms[0].base, node->ondisk); - } else { - i = 0; - } - while (i < parent->ondisk->count) { - elm = &parent->ondisk->elms[i]; - if (parent->ondisk->elms[i].internal.subtree_offset == - node->node_offset) { - break; - } - ++i; - } - if (i == parent->ondisk->count) { - hammer_unlock(&parent->lock); - panic("Bad B-Tree link: parent %p node %p\n", parent, node); + parent = hammer_btree_get_parent(node, &parent_index, + &error, try_exclusive); + if (error == 0) { + elm = &parent->ondisk->elms[parent_index]; + cursor->parent = parent; + cursor->parent_index = parent_index; + cursor->left_bound = &elm[0].internal.base; + cursor->right_bound = &elm[1].internal.base; } - KKASSERT(i != parent->ondisk->count); - cursor->parent = parent; - cursor->parent_index = i; - cursor->left_bound = &elm[0].internal.base; - cursor->right_bound = &elm[1].internal.base; - return(error); } else { cursor->parent = NULL; cursor->parent_index = 0; diff --git a/sys/vfs/hammer/hammer_cursor.h b/sys/vfs/hammer/hammer_cursor.h index c6829aff4a..3bc2d7e2be 100644 --- a/sys/vfs/hammer/hammer_cursor.h +++ b/sys/vfs/hammer/hammer_cursor.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.21 2008/06/14 01:42:13 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_cursor.h,v 1.22 2008/06/26 04:06:23 dillon Exp $ */ /* @@ -88,6 +88,7 @@ struct hammer_cursor { struct hammer_base_elm key_beg; struct hammer_base_elm key_end; hammer_tid_t asof; + hammer_tid_t mirror_tid; /* * Related data and record references. Note that the related buffers @@ -126,7 +127,7 @@ typedef struct hammer_cursor *hammer_cursor_t; #define HAMMER_CURSOR_DISKEOF 0x0400 #define HAMMER_CURSOR_MEMEOF 0x0800 #define HAMMER_CURSOR_DELBTREE 0x1000 /* ip_delete from b-tree */ -#define HAMMER_CURSOR_UNUSED2000 0x2000 +#define HAMMER_CURSOR_MIRROR_FILTERED 0x2000 /* mirror_tid filter */ #define HAMMER_CURSOR_ASOF 0x4000 /* as-of lookup */ #define HAMMER_CURSOR_CREATE_CHECK 0x8000 /* as-of lookup */ diff --git a/sys/vfs/hammer/hammer_inode.c b/sys/vfs/hammer/hammer_inode.c index 2abfd5c509..565faa12d0 100644 --- a/sys/vfs/hammer/hammer_inode.c +++ b/sys/vfs/hammer/hammer_inode.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.84 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.85 2008/06/26 04:06:23 dillon Exp $ */ #include "hammer.h" @@ -344,7 +344,7 @@ loop: ip->flags = flags & HAMMER_INODE_RO; ip->cache[0].ip = ip; ip->cache[1].ip = ip; - if (hmp->ronly) + if (hmp->ronly || (hmp->hflags & HMNT_SLAVE)) ip->flags |= HAMMER_INODE_RO; ip->sync_trunc_off = ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL; RB_INIT(&ip->rec_tree); diff --git a/sys/vfs/hammer/hammer_ioctl.c b/sys/vfs/hammer/hammer_ioctl.c index 563644d036..cf25cbbf29 100644 --- a/sys/vfs/hammer/hammer_ioctl.c +++ b/sys/vfs/hammer/hammer_ioctl.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.23 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.c,v 1.24 2008/06/26 04:06:23 dillon Exp $ */ #include "hammer.h" @@ -78,6 +78,14 @@ hammer_ioctl(hammer_inode_t ip, u_long com, caddr_t data, int fflag, error = hammer_ioc_get_pseudofs(&trans, ip, (struct hammer_ioc_get_pseudofs *)data); break; + case HAMMERIOC_MIRROR_READ: + error = hammer_ioc_mirror_read(&trans, ip, + (struct hammer_ioc_mirror_rw *)data); + break; + case HAMMERIOC_MIRROR_WRITE: + error = hammer_ioc_mirror_write(&trans, ip, + (struct hammer_ioc_mirror_rw *)data); + break; default: error = EOPNOTSUPP; break; diff --git a/sys/vfs/hammer/hammer_ioctl.h b/sys/vfs/hammer/hammer_ioctl.h index f0e1e3c1e8..b816d33159 100644 --- a/sys/vfs/hammer/hammer_ioctl.h +++ b/sys/vfs/hammer/hammer_ioctl.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.14 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_ioctl.h,v 1.15 2008/06/26 04:06:23 dillon Exp $ */ /* * HAMMER ioctl's. This file can be #included from userland @@ -219,13 +219,22 @@ struct hammer_ioc_mirror_rw { int size; /* max size */ }; +/* + * NOTE: crc is for the data block starting at rec_size, not including the + * data[] array. + */ typedef struct hammer_ioc_mrecord { u_int32_t signature; /* signature for byte order */ + u_int32_t rec_crc; u_int32_t rec_size; + u_int32_t unused01; struct hammer_btree_leaf_elm leaf; char data[8]; /* extended */ } *hammer_ioc_mrecord_t; +#define HAMMER_MREC_CRCOFF (offsetof(struct hammer_ioc_mrecord, rec_size)) +#define HAMMER_MREC_HEADSIZE (offsetof(struct hammer_ioc_mrecord, data[0])) + #define HAMMER_IOC_MIRROR_SIGNATURE 0x4dd97272U #define HAMMER_IOC_MIRROR_SIGNATURE_REV 0x7272d94dU diff --git a/sys/vfs/hammer/hammer_mirror.c b/sys/vfs/hammer/hammer_mirror.c index 76815a1b7c..af09097096 100644 --- a/sys/vfs/hammer/hammer_mirror.c +++ b/sys/vfs/hammer/hammer_mirror.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.1 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mirror.c,v 1.2 2008/06/26 04:06:23 dillon Exp $ */ /* * HAMMER mirroring ioctls - serialize and deserialize modifications made @@ -40,13 +40,35 @@ #include "hammer.h" +static int hammer_mirror_check(hammer_cursor_t cursor, + struct hammer_ioc_mrecord *mrec); +static int hammer_mirror_update(hammer_cursor_t cursor, + struct hammer_ioc_mrecord *mrec); +static int hammer_mirror_write(hammer_cursor_t cursor, + struct hammer_ioc_mrecord *mrec, + char *udata); +static int hammer_mirror_localize_data(hammer_data_ondisk_t data, + hammer_btree_leaf_elm_t leaf); + +/* + * All B-Tree records within the specified key range which also conform + * to the transaction id range are returned. Mirroring code keeps track + * of the last transaction id fully scanned and can efficiently pick up + * where it left off if interrupted. + */ int hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_mirror_rw *mirror) { struct hammer_cursor cursor; - hammer_btree_elm_t elm; + struct hammer_ioc_mrecord mrec; + hammer_btree_leaf_elm_t elm; + const int head_size = HAMMER_MREC_HEADSIZE; + const int crc_start = HAMMER_MREC_CRCOFF; + char *uptr; int error; + int data_len; + int bytes; if ((mirror->key_beg.localization | mirror->key_end.localization) & HAMMER_LOCALIZE_PSEUDOFS_MASK) { @@ -57,6 +79,7 @@ hammer_ioc_mirror_read(hammer_transaction_t trans, hammer_inode_t ip, mirror->key_cur = mirror->key_beg; mirror->key_cur.localization += ip->obj_localization; + bzero(&mrec, sizeof(mrec)); retry: error = hammer_init_cursor(trans, &cursor, NULL, NULL); @@ -72,20 +95,31 @@ retry: cursor.flags |= HAMMER_CURSOR_BACKEND; /* - * This flag allows the btree scan code to return internal nodes - * at every index, giving the mirroring code the ability to skip - * whole sub-trees based on mirror_tid. + * This flag filters the search to only return elements whos create + * or delete TID is >= mirror_tid. The B-Tree uses the mirror_tid + * field stored with internal and leaf nodes to shortcut the scan. */ - cursor.flags |= HAMMER_CURSOR_MIRRORING; + cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; + cursor.mirror_tid = mirror->tid_beg; error = hammer_btree_first(&cursor); while (error == 0) { /* - * Internal or Leaf node + * Leaf node. Only return elements modified in the range + * requested by userland. */ - elm = &cursor.node->ondisk->elms[cursor.index]; - reblock->key_cur.obj_id = elm->base.obj_id; - reblock->key_cur.localization = elm->base.localization; + KKASSERT(cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF); + elm = &cursor.node->ondisk->elms[cursor.index].leaf; + + if (elm->base.create_tid < mirror->tid_beg || + elm->base.create_tid >= mirror->tid_end) { + if (elm->base.delete_tid < mirror->tid_beg || + elm->base.delete_tid >= mirror->tid_end) { + goto skip; + } + } + + mirror->key_cur = elm->base; /* * Yield to more important tasks @@ -101,29 +135,59 @@ retry: tsleep(trans, 0, "hmrslo", hz / 10); } -#if 0 /* - * Acquiring the sync_lock prevents the operation from - * crossing a synchronization boundary. + * The core code exports the data to userland. + */ + data_len = (elm->data_offset) ? elm->data_len : 0; + if (data_len) { + error = hammer_btree_extract(&cursor, + HAMMER_CURSOR_GET_DATA); + if (error) + break; + } + bytes = offsetof(struct hammer_ioc_mrecord, data[data_len]); + bytes = (bytes + HAMMER_HEAD_ALIGN_MASK) & + ~HAMMER_HEAD_ALIGN_MASK; + if (mirror->count + bytes > mirror->size) + break; + + /* + * Construct the record for userland and copyout. * - * NOTE: cursor.node may have changed on return. + * The user is asking for a snapshot, if the record was + * deleted beyond the user-requested ending tid, the record + * is not considered deleted from the point of view of + * userland and delete_tid is cleared. */ - hammer_sync_lock_sh(trans); - error = hammer_reblock_helper(reblock, &cursor, elm); - hammer_sync_unlock(trans); -#endif + mrec.signature = HAMMER_IOC_MIRROR_SIGNATURE; + mrec.rec_size = bytes; + mrec.leaf = *elm; + if (elm->base.delete_tid >= mirror->tid_end) + mrec.leaf.base.delete_tid = 0; + mrec.rec_crc = crc32(&mrec.rec_size, head_size - crc_start); + uptr = (char *)mirror->ubuf + mirror->count; + error = copyout(&mrec, uptr, head_size); + if (data_len && error == 0) { + error = copyout(cursor.data, uptr + head_size, + data_len); + } + if (error == 0) + mirror->count += bytes; +skip: if (error == 0) { cursor.flags |= HAMMER_CURSOR_ATEDISK; error = hammer_btree_iterate(&cursor); } } - if (error == ENOENT) + if (error == ENOENT) { + mirror->key_cur = mirror->key_end; error = 0; + } hammer_done_cursor(&cursor); if (error == EDEADLK) goto retry; if (error == EINTR) { - reblock->head.flags |= HAMMER_IOC_HEAD_INTR; + mirror->head.flags |= HAMMER_IOC_HEAD_INTR; error = 0; } failed: @@ -131,3 +195,259 @@ failed: return(error); } +/* + * Copy records from userland to the target mirror. Records which already + * exist may only have their delete_tid updated. + */ +int +hammer_ioc_mirror_write(hammer_transaction_t trans, hammer_inode_t ip, + struct hammer_ioc_mirror_rw *mirror) +{ + struct hammer_cursor cursor; + struct hammer_ioc_mrecord mrec; + const int head_size = HAMMER_MREC_HEADSIZE; + const int crc_start = HAMMER_MREC_CRCOFF; + u_int32_t rec_crc; + int error; + char *uptr; + + if (mirror->size < 0 || mirror->size > 0x70000000) + return(EINVAL); + + error = hammer_init_cursor(trans, &cursor, NULL, NULL); +retry: + hammer_normalize_cursor(&cursor); + + while (error == 0 && mirror->count + head_size <= mirror->size) { + /* + * Acquire and validate header + */ + uptr = (char *)mirror->ubuf + mirror->count; + error = copyin(uptr, &mrec, head_size); + if (error) + break; + rec_crc = crc32(&mrec.rec_size, head_size - crc_start); + if (mrec.signature != HAMMER_IOC_MIRROR_SIGNATURE) { + error = EINVAL; + break; + } + if (rec_crc != mrec.rec_crc) { + error = EINVAL; + break; + } + if (mrec.rec_size < head_size || + mrec.rec_size > head_size + HAMMER_XBUFSIZE + 16 || + mirror->count + mrec.rec_size > mirror->size) { + error = EINVAL; + break; + } + if (mrec.leaf.data_len < 0 || + mrec.leaf.data_len > HAMMER_XBUFSIZE || + offsetof(struct hammer_ioc_mrecord, data[mrec.leaf.data_len]) > mrec.rec_size) { + error = EINVAL; + } + + /* + * Re-localize for target. relocalization of data is handled + * by hammer_mirror_write(). + */ + mrec.leaf.base.localization &= HAMMER_LOCALIZE_MASK; + mrec.leaf.base.localization += ip->obj_localization; + + /* + * Locate the record. + * + * If the record exists only the delete_tid may be updated. + * + * If the record does not exist we create it. For now we + * ignore records with a non-zero delete_tid. Note that + * mirror operations are effective an as-of operation and + * delete_tid can be 0 for mirroring purposes even if it is + * not actually 0 at the originator. + */ + hammer_normalize_cursor(&cursor); + cursor.key_beg = mrec.leaf.base; + cursor.flags |= HAMMER_CURSOR_BACKEND; + cursor.flags &= ~HAMMER_CURSOR_INSERT; + error = hammer_btree_lookup(&cursor); + + if (error == 0 && hammer_mirror_check(&cursor, &mrec)) { + hammer_sync_lock_sh(trans); + error = hammer_mirror_update(&cursor, &mrec); + hammer_sync_unlock(trans); + } else if (error == ENOENT && mrec.leaf.base.delete_tid == 0) { + hammer_sync_lock_sh(trans); + error = hammer_mirror_write(&cursor, &mrec, + uptr + head_size); + hammer_sync_unlock(trans); + } + + /* + * Setup for loop + */ + if (error == EDEADLK) { + hammer_done_cursor(&cursor); + error = hammer_init_cursor(trans, &cursor, NULL, NULL); + goto retry; + } + if (error == 0) { + mirror->count += mrec.rec_size; + } + } + hammer_done_cursor(&cursor); + return(0); +} + +/* + * Check whether an update is needed in the case where a match already + * exists on the target. The only type of update allowed in this case + * is an update of the delete_tid. + * + * Return non-zero if the update should proceed. + */ +static +int +hammer_mirror_check(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec) +{ + hammer_btree_leaf_elm_t leaf = cursor->leaf; + + if (leaf->base.delete_tid != mrec->leaf.base.delete_tid) { + if (leaf->base.delete_tid != 0) + return(1); + } + return(0); +} + +/* + * Update a record in-place. Only the delete_tid can change. + */ +static +int +hammer_mirror_update(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec) +{ + hammer_btree_leaf_elm_t elm; + + elm = cursor->leaf; + KKASSERT(elm->base.create_tid < mrec->leaf.base.delete_tid); + hammer_modify_node(cursor->trans, cursor->node, elm, sizeof(*elm)); + elm->base.delete_tid = mrec->leaf.base.delete_tid; + elm->delete_ts = mrec->leaf.delete_ts; + hammer_modify_node_done(cursor->node); + return(0); +} + +/* + * Write out a new record. + * + * XXX this is messy. + */ +static +int +hammer_mirror_write(hammer_cursor_t cursor, struct hammer_ioc_mrecord *mrec, + char *udata) +{ + hammer_buffer_t data_buffer = NULL; + hammer_off_t ndata_offset; + void *ndata; + int error; + int wanted_skip = 0; + + if (mrec->leaf.data_len && mrec->leaf.data_offset) { + ndata = hammer_alloc_data(cursor->trans, mrec->leaf.data_len, + mrec->leaf.base.rec_type, + &ndata_offset, &data_buffer, &error); + if (ndata == NULL) + return(error); + mrec->leaf.data_offset = ndata_offset; + hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0); + error = copyin(udata, ndata, mrec->leaf.data_len); + if (error == 0) { + if (hammer_crc_test_leaf(ndata, &mrec->leaf) == 0) { + kprintf("data crc mismatch on pipe\n"); + error = EINVAL; + } else { + error = hammer_mirror_localize_data( + ndata, &mrec->leaf); + if (error) + wanted_skip = 1; + } + } + hammer_modify_buffer_done(data_buffer); + } else { + mrec->leaf.data_offset = 0; + error = 0; + ndata = NULL; + } + if (error) + goto failed; + cursor->flags |= HAMMER_CURSOR_INSERT; + error = hammer_btree_lookup(cursor); + if (error != ENOENT) { + if (error == 0) + error = EALREADY; + goto failed; + } + error = 0; + + /* + * Physical insertion + */ + error = hammer_btree_insert(cursor, &mrec->leaf); + +failed: + /* + * Cleanup + */ + if (error && mrec->leaf.data_offset) { + hammer_blockmap_free(cursor->trans, + mrec->leaf.data_offset, + mrec->leaf.data_len); + } + if (data_buffer) + hammer_rel_buffer(data_buffer, 0); + if (wanted_skip) + error = 0; + return(error); +} + +/* + * Localize the data payload. Directory entries may need their + * localization adjusted. + * + * Pseudo-fs directory entries must be skipped entirely (EBADF). + * + * The root inode must be skipped, it will exist on the target with a + * different create_tid so updating it would result in a duplicate. This + * also prevents inode updates on the root directory (aka mtime, ctime, etc) + * from mirroring, which is ok. + * + * XXX Root directory inode updates - parent_obj_localization is broken. + */ +static +int +hammer_mirror_localize_data(hammer_data_ondisk_t data, + hammer_btree_leaf_elm_t leaf) +{ + int modified = 0; + int error = 0; + u_int32_t localization; + + if (leaf->base.rec_type == HAMMER_RECTYPE_DIRENTRY) { + localization = leaf->base.localization & + HAMMER_LOCALIZE_PSEUDOFS_MASK; + if (data->entry.localization != localization) { + data->entry.localization = localization; + modified = 1; + } + if (data->entry.obj_id == 1) + error = EBADF; + } + if (leaf->base.rec_type == HAMMER_RECTYPE_INODE) { + if (leaf->base.obj_id == HAMMER_OBJID_ROOT) + error = EBADF; + } + if (modified) + hammer_crc_set_leaf(data, leaf); + return(error); +} + diff --git a/sys/vfs/hammer/hammer_mount.h b/sys/vfs/hammer/hammer_mount.h index 12b5f87302..6d0883bcbd 100644 --- a/sys/vfs/hammer/hammer_mount.h +++ b/sys/vfs/hammer/hammer_mount.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.6 2008/06/23 21:42:48 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_mount.h,v 1.7 2008/06/26 04:06:23 dillon Exp $ */ #ifndef _SYS_TYPES_H_ @@ -58,6 +58,7 @@ struct hammer_mount_info { #define HMNT_NOHISTORY 0x00000001 #define HMNT_MASTERID 0x00000002 /* masterid field set */ #define HMNT_EXPORTREQ 0x00000004 +#define HMNT_SLAVE 0x00000008 /* slave mode (RW mount, RO access) */ #define HMNT_USERFLAGS (HMNT_NOHISTORY | HMNT_MASTERID) diff --git a/sys/vfs/hammer/hammer_object.c b/sys/vfs/hammer/hammer_object.c index ca071ee887..66f82064a9 100644 --- a/sys/vfs/hammer/hammer_object.c +++ b/sys/vfs/hammer/hammer_object.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.75 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_object.c,v 1.76 2008/06/26 04:06:23 dillon Exp $ */ #include "hammer.h" @@ -590,8 +590,11 @@ hammer_ip_add_directory(struct hammer_transaction *trans, struct hammer_inode *dip, const char *name, int bytes, struct hammer_inode *ip) { + struct hammer_cursor cursor; hammer_record_t record; int error; + int count; + u_int32_t iterator; record = hammer_alloc_mem_record(dip, HAMMER_ENTRY_SIZE(bytes)); if (++trans->hmp->namekey_iterator == 0) @@ -613,6 +616,31 @@ hammer_ip_add_directory(struct hammer_transaction *trans, hammer_modify_inode(ip, HAMMER_INODE_DDIRTY); /* + * Find an unused namekey. Both the in-memory record tree and + * the B-Tree are checked. Exact matches also match create_tid + * so use an ASOF search to (mostly) ignore it. + */ + hammer_init_cursor(trans, &cursor, &dip->cache[1], dip); + cursor.key_beg = record->leaf.base; + cursor.flags |= HAMMER_CURSOR_ASOF; + cursor.asof = ip->obj_asof; + + count = 0; + while (hammer_ip_lookup(&cursor) == 0) { + iterator = (u_int32_t)record->leaf.base.key + 1; + if (iterator == 0) + iterator = 1; + record->leaf.base.key &= ~0xFFFFFFFFLL; + record->leaf.base.key |= iterator; + cursor.key_beg.key = record->leaf.base.key; + if (++count == 1000000000) { + hammer_rel_mem_record(record); + error = ENOSPC; + goto failed; + } + } + + /* * The target inode and the directory entry are bound together. */ record->target_ip = ip; @@ -628,6 +656,8 @@ hammer_ip_add_directory(struct hammer_transaction *trans, ip->flush_state = HAMMER_FST_SETUP; } error = hammer_mem_add(record); +failed: + hammer_done_cursor(&cursor); return(error); } @@ -1038,24 +1068,14 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) */ cursor->flags |= HAMMER_CURSOR_INSERT; - for (;;) { - error = hammer_btree_lookup(cursor); - if (hammer_debug_inode) - kprintf("DOINSERT LOOKUP %d\n", error); - if (error) - break; - if (record->leaf.base.rec_type != HAMMER_RECTYPE_DIRENTRY) { - kprintf("hammer_ip_sync_record: duplicate rec " - "at (%016llx)\n", record->leaf.base.key); - Debugger("duplicate record1"); - error = EIO; - break; - } - if (++trans->hmp->namekey_iterator == 0) - ++trans->hmp->namekey_iterator; - record->leaf.base.key &= ~(0xFFFFFFFFLL); - record->leaf.base.key |= trans->hmp->namekey_iterator; - cursor->key_beg.key = record->leaf.base.key; + error = hammer_btree_lookup(cursor); + if (hammer_debug_inode) + kprintf("DOINSERT LOOKUP %d\n", error); + if (error == 0) { + kprintf("hammer_ip_sync_record: duplicate rec " + "at (%016llx)\n", record->leaf.base.key); + Debugger("duplicate record1"); + error = EIO; } #if 0 if (record->type == HAMMER_MEM_RECORD_DATA) @@ -1063,7 +1083,6 @@ hammer_ip_sync_record_cursor(hammer_cursor_t cursor, hammer_record_t record) record->leaf.base.key - record->leaf.data_len, record->leaf.data_offset, error); #endif - if (error != ENOENT) goto done; @@ -1166,19 +1185,13 @@ hammer_mem_add(hammer_record_t record) KKASSERT(record->flags & HAMMER_RECF_ALLOCDATA); /* - * Insert into the RB tree, find an unused iterator if this is - * a directory entry. + * Insert into the RB tree. A unique key should have already + * been selected if this is a directory entry. */ - while (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) { - if (record->leaf.base.rec_type != HAMMER_RECTYPE_DIRENTRY){ - record->flags |= HAMMER_RECF_DELETED_FE; - hammer_rel_mem_record(record); - return (EEXIST); - } - if (++hmp->namekey_iterator == 0) - ++hmp->namekey_iterator; - record->leaf.base.key &= ~(0xFFFFFFFFLL); - record->leaf.base.key |= hmp->namekey_iterator; + if (RB_INSERT(hammer_rec_rb_tree, &record->ip->rec_tree, record)) { + record->flags |= HAMMER_RECF_DELETED_FE; + hammer_rel_mem_record(record); + return (EEXIST); } ++hmp->count_newrecords; ++hmp->rsv_recs; diff --git a/sys/vfs/hammer/hammer_prune.c b/sys/vfs/hammer/hammer_prune.c index 469e447e7d..0940ab4f51 100644 --- a/sys/vfs/hammer/hammer_prune.c +++ b/sys/vfs/hammer/hammer_prune.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.7 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_prune.c,v 1.8 2008/06/26 04:06:23 dillon Exp $ */ #include "hammer.h" @@ -44,10 +44,7 @@ * created during the iteration due to alignments. This also allows us * to adjust alignments without blowing up the B-Tree. */ -static int check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, - int *realign_cre, int *realign_del); -static int realign_prune(struct hammer_ioc_prune *prune, hammer_cursor_t cursor, - int realign_cre, int realign_del); +static int check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm); int hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, @@ -59,8 +56,6 @@ hammer_ioc_prune(hammer_transaction_t trans, hammer_inode_t ip, struct hammer_ioc_prune_elm *user_elms; int error; int isdir; - int realign_cre; - int realign_del; int elm_array_size; if (prune->nelms < 0 || prune->nelms > HAMMER_MAX_PRUNE_ELMS) @@ -167,7 +162,7 @@ retry: elm->base.delete_tid); } - if (check_prune(prune, elm, &realign_cre, &realign_del) == 0) { + if (check_prune(prune, elm) == 0) { if (hammer_debug_general & 0x0200) { kprintf("check %016llx %016llx: DELETE\n", elm->base.obj_id, elm->base.key); @@ -198,18 +193,6 @@ retry: * to skip it (since we are iterating backwards). */ cursor.flags |= HAMMER_CURSOR_ATEDISK; - } else if (realign_cre >= 0 || realign_del >= 0) { - error = realign_prune(prune, &cursor, - realign_cre, realign_del); - if (error == 0) { - cursor.flags |= HAMMER_CURSOR_ATEDISK; - if (hammer_debug_general & 0x0200) { - kprintf("check %016llx %016llx: " - "REALIGN\n", - elm->base.obj_id, - elm->base.key); - } - } } else { cursor.flags |= HAMMER_CURSOR_ATEDISK; if (hammer_debug_general & 0x0100) { @@ -241,15 +224,11 @@ failed: * Check pruning list. The list must be sorted in descending order. */ static int -check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, - int *realign_cre, int *realign_del) +check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm) { struct hammer_ioc_prune_elm *scan; int i; - *realign_cre = -1; - *realign_del = -1; - /* * If pruning everything remove all records with a non-zero * delete_tid. @@ -263,6 +242,7 @@ check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, for (i = 0; i < prune->nelms; ++i) { scan = &prune->elms[i]; +#if 0 /* * Locate the scan index covering the create and delete TIDs. */ @@ -276,6 +256,7 @@ check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, elm->base.delete_tid <= scan->end_tid) { *realign_del = i; } +#endif /* * Now check for loop termination. @@ -299,7 +280,18 @@ check_prune(struct hammer_ioc_prune *prune, hammer_btree_elm_t elm, return(-1); } +#if 0 + /* + * NOTE: THIS CODE HAS BEEN REMOVED! Pruning no longer attempts to realign + * adjacent records because it seriously interferes with every + * mirroring algorithm I could come up with. + * + * This means that historical accesses beyond the first snapshot + * softlink should be on snapshot boundaries only. Historical + * accesses from "now" to the first snapshot softlink continue to + * be fine-grained. + * * Align the record to cover any gaps created through the deletion of * records within the pruning space. If we were to just delete the records * there would be gaps which in turn would cause a snapshot that is NOT on @@ -392,3 +384,4 @@ realign_prune(struct hammer_ioc_prune *prune, return (error); } +#endif diff --git a/sys/vfs/hammer/hammer_transaction.c b/sys/vfs/hammer/hammer_transaction.c index d0f1d1a0c9..ef4e1ed513 100644 --- a/sys/vfs/hammer/hammer_transaction.c +++ b/sys/vfs/hammer/hammer_transaction.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.21 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_transaction.c,v 1.22 2008/06/26 04:06:23 dillon Exp $ */ #include "hammer.h" @@ -141,12 +141,18 @@ static hammer_tid_t hammer_alloc_tid(hammer_mount_t hmp, int count) { hammer_tid_t tid; - int multiplier = (hmp->masterid < 0) ? 1 : HAMMER_MAX_MASTERS; - tid = (hmp->next_tid + multiplier) & ~(hammer_tid_t)(multiplier - 1); - if (tid >= 0xFFFFFFFFFFFFF000ULL) + if (hmp->masterid < 0) { + tid = hmp->next_tid + 1; + hmp->next_tid = tid + count; + } else { + tid = (hmp->next_tid + HAMMER_MAX_MASTERS) & + ~(hammer_tid_t)(HAMMER_MAX_MASTERS - 1); + hmp->next_tid = tid + count * HAMMER_MAX_MASTERS; + tid |= hmp->masterid; + } + if (tid >= 0xFFFFFFFFFF000000ULL) panic("hammer_start_transaction: Ran out of TIDs!"); - hmp->next_tid = tid + count * multiplier; if (hammer_debug_tid) kprintf("alloc_tid %016llx\n", tid); return(tid); diff --git a/sys/vfs/hammer/hammer_vfsops.c b/sys/vfs/hammer/hammer_vfsops.c index de70dc65b9..49da5503f1 100644 --- a/sys/vfs/hammer/hammer_vfsops.c +++ b/sys/vfs/hammer/hammer_vfsops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.53 2008/06/24 17:38:17 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vfsops.c,v 1.54 2008/06/26 04:06:23 dillon Exp $ */ #include @@ -297,9 +297,9 @@ hammer_vfs_mount(struct mount *mp, char *mntpt, caddr_t data, hmp->hflags &= ~HMNT_USERFLAGS; hmp->hflags |= info.hflags & HMNT_USERFLAGS; if (info.hflags & HMNT_MASTERID) - hmp->masterid = -1; - else hmp->masterid = info.masterid; + else + hmp->masterid = -1; if (info.asof) { kprintf("ASOF\n"); mp->mnt_flag |= MNT_RDONLY; diff --git a/sys/vfs/hammer/hammer_vnops.c b/sys/vfs/hammer/hammer_vnops.c index f04d0a2bda..4779b294b1 100644 --- a/sys/vfs/hammer/hammer_vnops.c +++ b/sys/vfs/hammer/hammer_vnops.c @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.76 2008/06/23 07:31:14 dillon Exp $ + * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.77 2008/06/26 04:06:23 dillon Exp $ */ #include @@ -699,6 +699,12 @@ hammer_vop_getattr(struct vop_getattr_args *ap) struct vattr *vap = ap->a_vap; vap->va_fsid = ip->hmp->fsid_udev; + /* + * XXX munge the device if we are in a pseudo-fs, so user utilities + * do not think its the same 'filesystem'. + */ + if (ip->obj_localization) + vap->va_fsid += ip->obj_localization; vap->va_fileid = ip->ino_leaf.base.obj_id; vap->va_mode = ip->ino_data.mode; vap->va_nlink = ip->ino_data.nlinks; -- 2.11.4.GIT