From 6d9d83d8169ddea0b9680b376a850934a456bb55 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sun, 23 Jul 2017 00:57:20 -0700 Subject: [PATCH] hammer2 - Adjust blockref to create an embedded area, start dirent work * Create a type-specific embedded area in the blockref structure. Move data_count and inode_count into the new area. The blockref structure size does not change. * Adjust code to access data_count and inode_count conditionally for BREF_TYPE_INODE, DATA, and INDIRECT types only. * Now that we have abandoned embedding inodes directly in directories for normal operation, start working on removing HAMMER2_OBJTYPE_HARDLINK and creating a real directory entry abstraction. The real directory entry abstraction will allow directory entries to be directly embedded in blockref structures, without requiring a data reference for any filename <= 64 bytes. This will be accomplished by using the new embedded area in the blockref for the directory entry header and the check area for the filename (up to 64 bytes). This will significantly improve directory compactness and I/O efficiency by reducing the directory entry overhead from 1152 bytes (1024 + 128) to just 128 bytes and guaranteeing locality since the blockrefs are collected together in indirect blocks. Another nice facet is that since inodes can embed up to four direct blockrefs, any directory with <= 4 entries in it can embed those entries in the directory inode itself. So small directories will wind up being VERY compact. We haven't entirely abandoned embedding inodes in directories as directory entries. In fact, the feature is still used for superroot entries, and may be allowed in the future mixed into normal directories for 'special' non-hardlinkable directory inodes for quota control, subdirectory snapshot, and (NFS) export purposes. --- sbin/newfs_hammer2/newfs_hammer2.8 | 26 +++++++++++-------- sys/vfs/hammer2/hammer2_chain.c | 52 +++++++++++++++++++++++++++----------- sys/vfs/hammer2/hammer2_disk.h | 44 ++++++++++++++++++++++++++++++-- sys/vfs/hammer2/hammer2_ioctl.c | 14 +++++++--- sys/vfs/hammer2/hammer2_vfsops.c | 18 ++----------- sys/vfs/hammer2/hammer2_vnops.c | 7 +++-- 6 files changed, 112 insertions(+), 49 deletions(-) diff --git a/sbin/newfs_hammer2/newfs_hammer2.8 b/sbin/newfs_hammer2/newfs_hammer2.8 index b7c5cb8729..e071a491a2 100644 --- a/sbin/newfs_hammer2/newfs_hammer2.8 +++ b/sbin/newfs_hammer2/newfs_hammer2.8 @@ -91,16 +91,16 @@ to create any PFSs other than "LOCAL". Generally speaking this command is not used to create clusters. It is used to format volumes which are then made available for additional clustering commands. -Once formatted the device@LOCAL volume can be mounted in order to make the -block device available to the cluster. -You can then use +Once formatted the device@LOCAL volume can be mounted in order to make it +and all other PFSs created on the volume available to the cluster. +.Pp +You can use .Xr hammer2 8 directives to construct your cluster, including the creation of additional PFSs within various volumes. If you are managing several machines, or a large number of machines, .Fl L Ar none -is typically specified and each machine's ROOT is individually named, -for example, "FUBAR-ROOT". +is typically specified to reduce clutter. .Pp .Nm HAMMER2 file systems are sector-size agnostic, however the @@ -184,12 +184,18 @@ you may specify the version with this option. .It Fl L Ar label By default .Nm -creates three local master PFSs on the new volume: "LOCAL", "BOOT", and "ROOT". -If you specify one or more label options +always creates a local master PFSs on the new volume called "LOCAL", +and will conditionally also create "BOOT", "ROOT", or "DATA" depending +on the partition ('b' creates "BOOT", 'd' creates "ROOT", and any other +partition creates "DATA"). +.Pp +If you specify one or more label options to create your own named local +PFSs, .Nm -will not automatically create "BOOT" or "ROOT". -"LOCAL" is always created and should never be specified. -If you don't want any extra PFSs to be created, use +will not create any conditional PFSs. +However, "LOCAL" is still always created and should not be +specified with this option. +If you don't want any PFSs to be created (other than "LOCAL"), use .Fl L Ar none . .Pp Typically simple HAMMER2 filesystems just use the defaults and diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index d9e33a8d6f..f3461c78f4 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -4527,6 +4527,7 @@ hammer2_base_delete(hammer2_chain_t *parent, int *cache_indexp, hammer2_chain_t *chain) { hammer2_blockref_t *elm = &chain->bref; + hammer2_blockref_t *scan; hammer2_key_t key_next; int i; @@ -4539,10 +4540,11 @@ hammer2_base_delete(hammer2_chain_t *parent, key_next = 0; /* max range */ i = hammer2_base_find(parent, base, count, cache_indexp, &key_next, elm->key, elm->key); - if (i == count || base[i].type == 0 || - base[i].key != elm->key || + scan = &base[i]; + if (i == count || scan->type == 0 || + scan->key != elm->key || ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && - base[i].keybits != elm->keybits)) { + scan->keybits != elm->keybits)) { hammer2_spin_unex(&parent->core.spin); panic("delete base %p element not found at %d/%d elm %p\n", base, i, count, elm); @@ -4552,14 +4554,24 @@ hammer2_base_delete(hammer2_chain_t *parent, /* * Update stats and zero the entry */ - parent->bref.data_count -= base[i].data_count; - parent->bref.data_count -= (hammer2_off_t)1 << - (int)(base[i].data_off & HAMMER2_OFF_MASK_RADIX); - parent->bref.inode_count -= base[i].inode_count; - if (base[i].type == HAMMER2_BREF_TYPE_INODE) - parent->bref.inode_count -= 1; + parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << + (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); + switch(scan->type) { + case HAMMER2_BREF_TYPE_INODE: + parent->bref.embed.stats.inode_count -= 1; + /* fall through */ + case HAMMER2_BREF_TYPE_DATA: + case HAMMER2_BREF_TYPE_INDIRECT: + parent->bref.embed.stats.data_count -= + scan->embed.stats.data_count; + parent->bref.embed.stats.inode_count -= + scan->embed.stats.inode_count; + break; + default: + break; + } - bzero(&base[i], sizeof(*base)); + bzero(scan, sizeof(*scan)); /* * We can only optimize parent->core.live_zero for live chains. @@ -4625,12 +4637,22 @@ hammer2_base_insert(hammer2_chain_t *parent, /* * Update stats and zero the entry */ - parent->bref.data_count += elm->data_count; - parent->bref.data_count += (hammer2_off_t)1 << + parent->bref.embed.stats.data_count += (hammer2_off_t)1 << (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); - parent->bref.inode_count += elm->inode_count; - if (elm->type == HAMMER2_BREF_TYPE_INODE) - parent->bref.inode_count += 1; + switch(elm->type) { + case HAMMER2_BREF_TYPE_INODE: + parent->bref.embed.stats.inode_count += 1; + /* fall through */ + case HAMMER2_BREF_TYPE_DATA: + case HAMMER2_BREF_TYPE_INDIRECT: + parent->bref.embed.stats.data_count += + elm->embed.stats.data_count; + parent->bref.embed.stats.inode_count += + elm->embed.stats.inode_count; + break; + default: + break; + } /* diff --git a/sys/vfs/hammer2/hammer2_disk.h b/sys/vfs/hammer2/hammer2_disk.h index 2a9b4824f7..5c9d9113e1 100644 --- a/sys/vfs/hammer2/hammer2_disk.h +++ b/sys/vfs/hammer2/hammer2_disk.h @@ -525,6 +525,22 @@ typedef struct dmsg_lnk_hammer2_volconf dmsg_lnk_hammer2_volconf_t; #define H2_LNK_VOLCONF(msg) ((dmsg_lnk_hammer2_volconf_t *)(msg)->any.buf) +#if 0 +/* + * HAMMER2 directory entry header (embedded in blockref) exactly 16 bytes + */ +struct hammer2_dirent_head { + hammer2_tid_t inum; /* inode number */ + uint16_t namlen; /* name length */ + uint8_t type; /* OBJTYPE_* */ + uint8_t unused0B; + uint8_t unused0C[4]; +} __packed; + +typedef struct hammer2_dirent_head hammer2_dirent_head_t; + +#endif + /* * The media block reference structure. This forms the core of the HAMMER2 * media topology recursion. This 128-byte data structure is embedded in the @@ -589,9 +605,33 @@ struct hammer2_blockref { /* MUST BE EXACTLY 64 BYTES */ hammer2_tid_t mirror_tid; /* media flush topology & freemap */ hammer2_tid_t modify_tid; /* clc modify (not propagated) */ hammer2_off_t data_off; /* low 6 bits is phys size (radix)*/ - hammer2_key_t data_count; /* statistics aggregation */ - hammer2_key_t inode_count; /* statistics aggregation */ hammer2_tid_t update_tid; /* clc modify (propagated upward) */ + union { + char buf[16]; +#if 0 + /* + * Directory entry header (BREF_TYPE_DIRENT) + * + * NOTE: check.buf contains filename if <= 64 bytes. Longer + * filenames are stored in a data reference of size + * HAMMER2_ALLOC_MIN (at least 256, typically 1024). + * + * NOTE: inode structure may contain a copy of a recently + * associated filename, for recovery purposes. + * + * NOTE: Superroot entries are INODEs, not DIRENTs. Code + * allows both cases. + */ + hammer2_dirent_head_t dirent; +#endif + /* + * Statistics aggregation (BREF_TYPE_INODE, BREF_TYPE_INDIRECT) + */ + struct { + hammer2_key_t data_count; + hammer2_key_t inode_count; + } stats; + } embed; union { /* check info */ char buf[64]; struct { diff --git a/sys/vfs/hammer2/hammer2_ioctl.c b/sys/vfs/hammer2/hammer2_ioctl.c index 4f1c102d8f..c93bd1505d 100644 --- a/sys/vfs/hammer2/hammer2_ioctl.c +++ b/sys/vfs/hammer2/hammer2_ioctl.c @@ -830,10 +830,16 @@ hammer2_ioctl_inode_get(hammer2_inode_t *ip, void *data) ino->inode_count = 0; for (i = 0; i < ip->cluster.nchains; ++i) { if ((chain = ip->cluster.array[i].chain) != NULL) { - if (ino->data_count < chain->bref.data_count) - ino->data_count = chain->bref.data_count; - if (ino->inode_count < chain->bref.inode_count) - ino->inode_count = chain->bref.inode_count; + if (ino->data_count < + chain->bref.embed.stats.data_count) { + ino->data_count = + chain->bref.embed.stats.data_count; + } + if (ino->inode_count < + chain->bref.embed.stats.inode_count) { + ino->inode_count = + chain->bref.embed.stats.inode_count; + } } } bzero(&ino->ip_data, sizeof(ino->ip_data)); diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 37bc39f81c..1389fae2b2 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -1839,15 +1839,8 @@ hammer2_vfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) else bzero(&bref, sizeof(bref)); - mp->mnt_stat.f_files = bref.inode_count; + mp->mnt_stat.f_files = bref.embed.stats.inode_count; mp->mnt_stat.f_ffree = 0; -#if 0 - mp->mnt_stat.f_blocks = (bref.data_count + - hmp->voldata.allocator_free) / - mp->mnt_vstat.f_bsize; - mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / - mp->mnt_vstat.f_bsize; -#endif mp->mnt_stat.f_blocks = hmp->voldata.allocator_size / mp->mnt_vstat.f_bsize; mp->mnt_stat.f_bfree = hmp->voldata.allocator_free / @@ -1890,15 +1883,8 @@ hammer2_vfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) bzero(&bref, sizeof(bref)); mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; - mp->mnt_vstat.f_files = bref.inode_count; + mp->mnt_vstat.f_files = bref.embed.stats.inode_count; mp->mnt_vstat.f_ffree = 0; -#if 0 - mp->mnt_vstat.f_blocks = (bref.data_count + - hmp->voldata.allocator_free) / - mp->mnt_vstat.f_bsize; - mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / - mp->mnt_vstat.f_bsize; -#endif mp->mnt_vstat.f_blocks = hmp->voldata.allocator_size / mp->mnt_vstat.f_bsize; mp->mnt_vstat.f_bfree = hmp->voldata.allocator_free / diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index d5affc971b..5dcfc85f7a 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -318,8 +318,11 @@ hammer2_vop_getattr(struct vop_getattr_args *ap) } else { for (i = 0; i < ip->cluster.nchains; ++i) { if ((chain = ip->cluster.array[i].chain) != NULL) { - if (vap->va_bytes < chain->bref.data_count) - vap->va_bytes = chain->bref.data_count; + if (vap->va_bytes < + chain->bref.embed.stats.data_count) { + vap->va_bytes = + chain->bref.embed.stats.data_count; + } } } } -- 2.11.4.GIT