2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
46 RB_GENERATE2(hammer2_inode_tree
, hammer2_inode
, rbnode
, hammer2_inode_cmp
,
47 hammer2_tid_t
, meta
.inum
);
50 hammer2_inode_cmp(hammer2_inode_t
*ip1
, hammer2_inode_t
*ip2
)
52 if (ip1
->meta
.inum
< ip2
->meta
.inum
)
54 if (ip1
->meta
.inum
> ip2
->meta
.inum
)
61 hammer2_knote(struct vnode
*vp
, int flags
)
64 KNOTE(&vp
->v_pollinfo
.vpi_kqinfo
.ki_note
, flags
);
69 hammer2_inode_delayed_sideq(hammer2_inode_t
*ip
)
71 hammer2_inode_sideq_t
*ipul
;
72 hammer2_pfs_t
*pmp
= ip
->pmp
;
74 if ((ip
->flags
& HAMMER2_INODE_ONSIDEQ
) == 0) {
75 ipul
= kmalloc(sizeof(*ipul
), pmp
->minode
,
78 hammer2_spin_ex(&pmp
->list_spin
);
79 if ((ip
->flags
& HAMMER2_INODE_ONSIDEQ
) == 0) {
80 hammer2_inode_ref(ip
);
81 atomic_set_int(&ip
->flags
,
82 HAMMER2_INODE_ONSIDEQ
);
83 TAILQ_INSERT_TAIL(&pmp
->sideq
, ipul
, entry
);
85 hammer2_spin_unex(&pmp
->list_spin
);
87 hammer2_spin_unex(&pmp
->list_spin
);
88 kfree(ipul
, pmp
->minode
);
96 * HAMMER2 offers shared and exclusive locks on inodes. Pass a mask of
99 * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired. The
100 * inode locking function will automatically set the RDONLY flag.
102 * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
103 * Most front-end inode locks do.
105 * - pass HAMMER2_RESOLVE_NEVER if you do not want to require that
106 * the inode data be resolved. This is used by the syncthr because
107 * it can run on an unresolved/out-of-sync cluster, and also by the
108 * vnode reclamation code to avoid unnecessary I/O (particularly when
109 * disposing of hundreds of thousands of cached vnodes).
111 * The inode locking function locks the inode itself, resolves any stale
112 * chains in the inode's cluster, and allocates a fresh copy of the
113 * cluster with 1 ref and all the underlying chains locked.
115 * ip->cluster will be stable while the inode is locked.
117 * NOTE: We don't combine the inode/chain lock because putting away an
118 * inode would otherwise confuse multiple lock holders of the inode.
120 * NOTE: In-memory inodes always point to hardlink targets (the actual file),
121 * and never point to a hardlink pointer.
123 * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code
124 * will feel free to reduce the chain set in the cluster as an
125 * optimization. It will still be validated against the quorum if
126 * appropriate, but the optimization might be able to reduce data
127 * accesses to one node. This flag is automatically set if the inode
128 * is locked with HAMMER2_RESOLVE_SHARED.
131 hammer2_inode_lock(hammer2_inode_t
*ip
, int how
)
133 hammer2_inode_ref(ip
);
136 * Inode structure mutex
138 if (how
& HAMMER2_RESOLVE_SHARED
) {
139 /*how |= HAMMER2_RESOLVE_RDONLY; not used */
140 hammer2_mtx_sh(&ip
->lock
);
142 hammer2_mtx_ex(&ip
->lock
);
147 * Select a chain out of an inode's cluster and lock it.
149 * The inode does not have to be locked.
152 hammer2_inode_chain(hammer2_inode_t
*ip
, int clindex
, int how
)
154 hammer2_chain_t
*chain
;
155 hammer2_cluster_t
*cluster
;
157 hammer2_spin_sh(&ip
->cluster_spin
);
158 cluster
= &ip
->cluster
;
159 if (clindex
>= cluster
->nchains
)
162 chain
= cluster
->array
[clindex
].chain
;
164 hammer2_chain_ref(chain
);
165 hammer2_spin_unsh(&ip
->cluster_spin
);
166 hammer2_chain_lock(chain
, how
);
168 hammer2_spin_unsh(&ip
->cluster_spin
);
174 hammer2_inode_chain_and_parent(hammer2_inode_t
*ip
, int clindex
,
175 hammer2_chain_t
**parentp
, int how
)
177 hammer2_chain_t
*chain
;
178 hammer2_chain_t
*parent
;
181 hammer2_spin_sh(&ip
->cluster_spin
);
182 if (clindex
>= ip
->cluster
.nchains
)
185 chain
= ip
->cluster
.array
[clindex
].chain
;
187 hammer2_chain_ref(chain
);
188 hammer2_spin_unsh(&ip
->cluster_spin
);
189 hammer2_chain_lock(chain
, how
);
191 hammer2_spin_unsh(&ip
->cluster_spin
);
195 * Get parent, lock order must be (parent, chain).
197 parent
= chain
->parent
;
199 hammer2_chain_ref(parent
);
200 hammer2_chain_unlock(chain
);
201 hammer2_chain_lock(parent
, how
);
202 hammer2_chain_lock(chain
, how
);
204 if (ip
->cluster
.array
[clindex
].chain
== chain
&&
205 chain
->parent
== parent
) {
212 hammer2_chain_unlock(chain
);
213 hammer2_chain_drop(chain
);
215 hammer2_chain_unlock(parent
);
216 hammer2_chain_drop(parent
);
225 hammer2_inode_unlock(hammer2_inode_t
*ip
)
227 hammer2_mtx_unlock(&ip
->lock
);
228 hammer2_inode_drop(ip
);
232 * Temporarily release a lock held shared or exclusive. Caller must
233 * hold the lock shared or exclusive on call and lock will be released
236 * Restore a lock that was temporarily released.
239 hammer2_inode_lock_temp_release(hammer2_inode_t
*ip
)
241 return hammer2_mtx_temp_release(&ip
->lock
);
245 hammer2_inode_lock_temp_restore(hammer2_inode_t
*ip
, hammer2_mtx_state_t ostate
)
247 hammer2_mtx_temp_restore(&ip
->lock
, ostate
);
251 * Upgrade a shared inode lock to exclusive and return. If the inode lock
252 * is already held exclusively this is a NOP.
254 * The caller MUST hold the inode lock either shared or exclusive on call
255 * and will own the lock exclusively on return.
257 * Returns non-zero if the lock was already exclusive prior to the upgrade.
260 hammer2_inode_lock_upgrade(hammer2_inode_t
*ip
)
264 if (mtx_islocked_ex(&ip
->lock
)) {
267 hammer2_mtx_unlock(&ip
->lock
);
268 hammer2_mtx_ex(&ip
->lock
);
275 * Downgrade an inode lock from exclusive to shared only if the inode
276 * lock was previously shared. If the inode lock was previously exclusive,
280 hammer2_inode_lock_downgrade(hammer2_inode_t
*ip
, int wasexclusive
)
282 if (wasexclusive
== 0)
283 mtx_downgrade(&ip
->lock
);
287 * Lookup an inode by inode number
290 hammer2_inode_lookup(hammer2_pfs_t
*pmp
, hammer2_tid_t inum
)
298 hammer2_spin_ex(&pmp
->inum_spin
);
299 ip
= RB_LOOKUP(hammer2_inode_tree
, &pmp
->inum_tree
, inum
);
301 hammer2_inode_ref(ip
);
302 hammer2_spin_unex(&pmp
->inum_spin
);
308 * Adding a ref to an inode is only legal if the inode already has at least
311 * (can be called with spinlock held)
314 hammer2_inode_ref(hammer2_inode_t
*ip
)
316 atomic_add_int(&ip
->refs
, 1);
317 if (hammer2_debug
& 0x80000) {
318 kprintf("INODE+1 %p (%d->%d)\n", ip
, ip
->refs
- 1, ip
->refs
);
324 * Drop an inode reference, freeing the inode when the last reference goes
328 hammer2_inode_drop(hammer2_inode_t
*ip
)
334 if (hammer2_debug
& 0x80000) {
335 kprintf("INODE-1 %p (%d->%d)\n",
336 ip
, ip
->refs
, ip
->refs
- 1);
343 * Transition to zero, must interlock with
344 * the inode inumber lookup tree (if applicable).
345 * It should not be possible for anyone to race
346 * the transition to 0.
350 hammer2_spin_ex(&pmp
->inum_spin
);
352 if (atomic_cmpset_int(&ip
->refs
, 1, 0)) {
353 KKASSERT(hammer2_mtx_refs(&ip
->lock
) == 0);
354 if (ip
->flags
& HAMMER2_INODE_ONRBTREE
) {
355 atomic_clear_int(&ip
->flags
,
356 HAMMER2_INODE_ONRBTREE
);
357 RB_REMOVE(hammer2_inode_tree
,
358 &pmp
->inum_tree
, ip
);
361 hammer2_spin_unex(&pmp
->inum_spin
);
366 * Cleaning out ip->cluster isn't entirely
369 hammer2_inode_repoint(ip
, NULL
, NULL
);
371 kfree(ip
, pmp
->minode
);
372 atomic_add_long(&pmp
->inmem_inodes
, -1);
373 ip
= NULL
; /* will terminate loop */
375 hammer2_spin_unex(&ip
->pmp
->inum_spin
);
379 * Non zero transition
381 if (atomic_cmpset_int(&ip
->refs
, refs
, refs
- 1))
388 * Get the vnode associated with the given inode, allocating the vnode if
389 * necessary. The vnode will be returned exclusively locked.
391 * *errorp is set to a UNIX error, not a HAMMER2 error.
393 * The caller must lock the inode (shared or exclusive).
395 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
399 hammer2_igetv(hammer2_inode_t
*ip
, int *errorp
)
405 KKASSERT(pmp
!= NULL
);
410 * Attempt to reuse an existing vnode assignment. It is
411 * possible to race a reclaim so the vget() may fail. The
412 * inode must be unlocked during the vget() to avoid a
413 * deadlock against a reclaim.
420 * Inode must be unlocked during the vget() to avoid
421 * possible deadlocks, but leave the ip ref intact.
423 * vnode is held to prevent destruction during the
424 * vget(). The vget() can still fail if we lost
425 * a reclaim race on the vnode.
427 hammer2_mtx_state_t ostate
;
430 ostate
= hammer2_inode_lock_temp_release(ip
);
431 if (vget(vp
, LK_EXCLUSIVE
)) {
433 hammer2_inode_lock_temp_restore(ip
, ostate
);
436 hammer2_inode_lock_temp_restore(ip
, ostate
);
438 /* vp still locked and ref from vget */
440 kprintf("hammer2: igetv race %p/%p\n",
450 * No vnode exists, allocate a new vnode. Beware of
451 * allocation races. This function will return an
452 * exclusively locked and referenced vnode.
454 *errorp
= getnewvnode(VT_HAMMER2
, pmp
->mp
, &vp
, 0, 0);
456 kprintf("hammer2: igetv getnewvnode failed %d\n",
463 * Lock the inode and check for an allocation race.
465 wasexclusive
= hammer2_inode_lock_upgrade(ip
);
466 if (ip
->vp
!= NULL
) {
469 hammer2_inode_lock_downgrade(ip
, wasexclusive
);
473 switch (ip
->meta
.type
) {
474 case HAMMER2_OBJTYPE_DIRECTORY
:
477 case HAMMER2_OBJTYPE_REGFILE
:
479 * Regular file must use buffer cache I/O
480 * (VKVABIO cpu sync semantics supported)
483 vsetflags(vp
, VKVABIO
);
484 vinitvmio(vp
, ip
->meta
.size
,
486 (int)ip
->meta
.size
& HAMMER2_LBUFMASK
);
488 case HAMMER2_OBJTYPE_SOFTLINK
:
490 * XXX for now we are using the generic file_read
491 * and file_write code so we need a buffer cache
494 * (VKVABIO cpu sync semantics supported)
497 vsetflags(vp
, VKVABIO
);
498 vinitvmio(vp
, ip
->meta
.size
,
500 (int)ip
->meta
.size
& HAMMER2_LBUFMASK
);
502 case HAMMER2_OBJTYPE_CDEV
:
505 case HAMMER2_OBJTYPE_BDEV
:
506 vp
->v_ops
= &pmp
->mp
->mnt_vn_spec_ops
;
507 if (ip
->meta
.type
!= HAMMER2_OBJTYPE_CDEV
)
513 case HAMMER2_OBJTYPE_FIFO
:
515 vp
->v_ops
= &pmp
->mp
->mnt_vn_fifo_ops
;
517 case HAMMER2_OBJTYPE_SOCKET
:
521 panic("hammer2: unhandled objtype %d",
526 if (ip
== pmp
->iroot
)
527 vsetflags(vp
, VROOT
);
531 hammer2_inode_ref(ip
); /* vp association */
532 hammer2_inode_lock_downgrade(ip
, wasexclusive
);
537 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
539 if (hammer2_debug
& 0x0002) {
540 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
541 vp
, vp
->v_refcnt
, vp
->v_auxrefs
);
547 * Returns the inode associated with the passed-in cluster, creating the
548 * inode if necessary and synchronizing it to the passed-in cluster otherwise.
549 * When synchronizing, if idx >= 0, only cluster index (idx) is synchronized.
550 * Otherwise the whole cluster is synchronized.
552 * The passed-in cluster must be locked and will remain locked on return.
553 * The returned inode will be locked and the caller may dispose of both
554 * via hammer2_inode_unlock() + hammer2_inode_drop(). However, if the caller
555 * needs to resolve a hardlink it must ref/unlock/relock/drop the inode.
557 * The hammer2_inode structure regulates the interface between the high level
558 * kernel VNOPS API and the filesystem backend (the chains).
560 * On return the inode is locked with the supplied cluster.
563 hammer2_inode_get(hammer2_pfs_t
*pmp
, hammer2_inode_t
*dip
,
564 hammer2_xop_head_t
*xop
, int idx
)
566 hammer2_inode_t
*nip
;
567 const hammer2_inode_data_t
*iptmp
;
568 const hammer2_inode_data_t
*nipdata
;
570 KKASSERT(xop
== NULL
||
571 hammer2_cluster_type(&xop
->cluster
) ==
572 HAMMER2_BREF_TYPE_INODE
);
576 * Interlocked lookup/ref of the inode. This code is only needed
577 * when looking up inodes with nlinks != 0 (TODO: optimize out
578 * otherwise and test for duplicates).
580 * Cluster can be NULL during the initial pfs allocation.
584 iptmp
= &hammer2_xop_gdata(xop
)->ipdata
;
585 nip
= hammer2_inode_lookup(pmp
, iptmp
->meta
.inum
);
586 hammer2_xop_pdata(xop
);
590 hammer2_mtx_ex(&nip
->lock
);
593 * Handle SMP race (not applicable to the super-root spmp
594 * which can't index inodes due to duplicative inode numbers).
596 if (pmp
->spmp_hmp
== NULL
&&
597 (nip
->flags
& HAMMER2_INODE_ONRBTREE
) == 0) {
598 hammer2_mtx_unlock(&nip
->lock
);
599 hammer2_inode_drop(nip
);
603 hammer2_inode_repoint_one(nip
, &xop
->cluster
, idx
);
605 hammer2_inode_repoint(nip
, NULL
, &xop
->cluster
);
611 * We couldn't find the inode number, create a new inode.
613 nip
= kmalloc(sizeof(*nip
), pmp
->minode
, M_WAITOK
| M_ZERO
);
614 spin_init(&nip
->cluster_spin
, "h2clspin");
615 atomic_add_long(&pmp
->inmem_inodes
, 1);
616 hammer2_pfs_memory_inc(pmp
);
617 hammer2_pfs_memory_wakeup(pmp
);
619 nip
->flags
= HAMMER2_INODE_SROOT
;
622 * Initialize nip's cluster. A cluster is provided for normal
623 * inodes but typically not for the super-root or PFS inodes.
625 nip
->cluster
.refs
= 1;
626 nip
->cluster
.pmp
= pmp
;
627 nip
->cluster
.flags
|= HAMMER2_CLUSTER_INODE
;
629 nipdata
= &hammer2_xop_gdata(xop
)->ipdata
;
630 nip
->meta
= nipdata
->meta
;
631 hammer2_xop_pdata(xop
);
632 atomic_set_int(&nip
->flags
, HAMMER2_INODE_METAGOOD
);
633 hammer2_inode_repoint(nip
, NULL
, &xop
->cluster
);
635 nip
->meta
.inum
= 1; /* PFS inum is always 1 XXX */
636 /* mtime will be updated when a cluster is available */
637 atomic_set_int(&nip
->flags
, HAMMER2_INODE_METAGOOD
);/*XXX*/
643 * ref and lock on nip gives it state compatible to after a
644 * hammer2_inode_lock() call.
647 hammer2_mtx_init(&nip
->lock
, "h2inode");
648 hammer2_mtx_ex(&nip
->lock
);
649 /* combination of thread lock and chain lock == inode lock */
652 * Attempt to add the inode. If it fails we raced another inode
653 * get. Undo all the work and try again.
655 if (pmp
->spmp_hmp
== NULL
) {
656 hammer2_spin_ex(&pmp
->inum_spin
);
657 if (RB_INSERT(hammer2_inode_tree
, &pmp
->inum_tree
, nip
)) {
658 hammer2_spin_unex(&pmp
->inum_spin
);
659 hammer2_mtx_unlock(&nip
->lock
);
660 hammer2_inode_drop(nip
);
663 atomic_set_int(&nip
->flags
, HAMMER2_INODE_ONRBTREE
);
665 hammer2_spin_unex(&pmp
->inum_spin
);
674 * Create a new inode using the vattr to figure out the type. A non-zero
675 * type field overrides vattr. We need the directory to set iparent or to
676 * use when the inode is directly embedded in a directory (typically super-root
677 * entries), but note that this really only applies OBJTYPE_DIRECTORY as
678 * non-directory inodes can be hardlinked.
680 * If no error occurs the new inode is returned, otherwise NULL is returned.
681 * It is possible for an error to create a junk inode and then fail later.
682 * It will attempt to delete the junk inode and return NULL in this situation.
684 * If vap and/or cred are NULL the related fields are not set and the
685 * inode type defaults to a directory. This is used when creating PFSs
686 * under the super-root, so the inode number is set to 1 in this case.
688 * dip is not locked on entry.
690 * NOTE: This function is used to create all manners of inodes, including
691 * super-root entries for snapshots and PFSs. When used to create a
692 * snapshot the inode will be temporarily associated with the spmp.
694 * NOTE: When creating a normal file or directory the name/name_len/lhc
695 * is optional, but is typically specified to make debugging and
699 hammer2_inode_create(hammer2_inode_t
*dip
, hammer2_inode_t
*pip
,
700 struct vattr
*vap
, struct ucred
*cred
,
701 const uint8_t *name
, size_t name_len
, hammer2_key_t lhc
,
703 uint8_t type
, uint8_t target_type
,
704 int flags
, int *errorp
)
706 hammer2_xop_create_t
*xop
;
707 hammer2_inode_t
*nip
;
713 uint8_t pip_comp_algo
;
714 uint8_t pip_check_algo
;
715 hammer2_tid_t pip_inum
;
718 lhc
= hammer2_dirhash(name
, name_len
);
723 * Locate the inode or indirect block to create the new
724 * entry in. At the same time check for key collisions
725 * and iterate until we don't get one.
727 * Lock the directory exclusively for now to guarantee that
728 * we can find an unused lhc for the name. Due to collisions,
729 * two different creates can end up with the same lhc so we
730 * cannot depend on the OS to prevent the collision.
732 hammer2_inode_lock(dip
, 0);
734 pip_uid
= pip
->meta
.uid
;
735 pip_gid
= pip
->meta
.gid
;
736 pip_mode
= pip
->meta
.mode
;
737 pip_comp_algo
= pip
->meta
.comp_algo
;
738 pip_check_algo
= pip
->meta
.check_algo
;
739 pip_inum
= (pip
== pip
->pmp
->iroot
) ? 1 : pip
->meta
.inum
;
742 * If name specified, locate an unused key in the collision space.
743 * Otherwise use the passed-in lhc directly.
746 hammer2_xop_scanlhc_t
*sxop
;
747 hammer2_key_t lhcbase
;
750 sxop
= hammer2_xop_alloc(dip
, HAMMER2_XOP_MODIFYING
);
752 hammer2_xop_start(&sxop
->head
, &hammer2_scanlhc_desc
);
753 while ((error
= hammer2_xop_collect(&sxop
->head
, 0)) == 0) {
754 if (lhc
!= sxop
->head
.cluster
.focus
->bref
.key
)
758 hammer2_xop_retire(&sxop
->head
, HAMMER2_XOPMASK_VOP
);
761 if (error
!= HAMMER2_ERROR_ENOENT
)
766 if ((lhcbase
^ lhc
) & ~HAMMER2_DIRHASH_LOMASK
) {
767 error
= HAMMER2_ERROR_ENOSPC
;
773 * Create the inode with the lhc as the key.
775 xop
= hammer2_xop_alloc(dip
, HAMMER2_XOP_MODIFYING
);
778 bzero(&xop
->meta
, sizeof(xop
->meta
));
781 xop
->meta
.type
= hammer2_get_obj_type(vap
->va_type
);
783 switch (xop
->meta
.type
) {
784 case HAMMER2_OBJTYPE_CDEV
:
785 case HAMMER2_OBJTYPE_BDEV
:
786 xop
->meta
.rmajor
= vap
->va_rmajor
;
787 xop
->meta
.rminor
= vap
->va_rminor
;
792 type
= xop
->meta
.type
;
794 xop
->meta
.type
= type
;
795 xop
->meta
.target_type
= target_type
;
797 xop
->meta
.inum
= inum
;
798 xop
->meta
.iparent
= pip_inum
;
800 /* Inherit parent's inode compression mode. */
801 xop
->meta
.comp_algo
= pip_comp_algo
;
802 xop
->meta
.check_algo
= pip_check_algo
;
803 xop
->meta
.version
= HAMMER2_INODE_VERSION_ONE
;
804 hammer2_update_time(&xop
->meta
.ctime
);
805 xop
->meta
.mtime
= xop
->meta
.ctime
;
807 xop
->meta
.mode
= vap
->va_mode
;
808 xop
->meta
.nlinks
= 1;
811 xuid
= hammer2_to_unix_xid(&pip_uid
);
812 xuid
= vop_helper_create_uid(dip
->pmp
->mp
,
818 /* super-root has no dip and/or pmp */
821 if (vap
->va_vaflags
& VA_UID_UUID_VALID
)
822 xop
->meta
.uid
= vap
->va_uid_uuid
;
823 else if (vap
->va_uid
!= (uid_t
)VNOVAL
)
824 hammer2_guid_to_uuid(&xop
->meta
.uid
, vap
->va_uid
);
826 hammer2_guid_to_uuid(&xop
->meta
.uid
, xuid
);
828 if (vap
->va_vaflags
& VA_GID_UUID_VALID
)
829 xop
->meta
.gid
= vap
->va_gid_uuid
;
830 else if (vap
->va_gid
!= (gid_t
)VNOVAL
)
831 hammer2_guid_to_uuid(&xop
->meta
.gid
, vap
->va_gid
);
833 xop
->meta
.gid
= pip_gid
;
837 * Regular files and softlinks allow a small amount of data to be
838 * directly embedded in the inode. This flag will be cleared if
839 * the size is extended past the embedded limit.
841 if (xop
->meta
.type
== HAMMER2_OBJTYPE_REGFILE
||
842 xop
->meta
.type
== HAMMER2_OBJTYPE_SOFTLINK
) {
843 xop
->meta
.op_flags
|= HAMMER2_OPFLAG_DIRECTDATA
;
846 hammer2_xop_setname(&xop
->head
, name
, name_len
);
848 name_len
= hammer2_xop_setname_inum(&xop
->head
, inum
);
849 KKASSERT(lhc
== inum
);
851 xop
->meta
.name_len
= name_len
;
852 xop
->meta
.name_key
= lhc
;
853 KKASSERT(name_len
< HAMMER2_INODE_MAXNAME
);
855 hammer2_xop_start(&xop
->head
, &hammer2_inode_create_desc
);
857 error
= hammer2_xop_collect(&xop
->head
, 0);
859 kprintf("CREATE INODE %*.*s\n",
860 (int)name_len
, (int)name_len
, name
);
869 * Set up the new inode if not a hardlink pointer.
871 * NOTE: *_get() integrates chain's lock into the inode lock.
873 * NOTE: Only one new inode can currently be created per
874 * transaction. If the need arises we can adjust
875 * hammer2_trans_init() to allow more.
877 * NOTE: nipdata will have chain's blockset data.
879 nip
= hammer2_inode_get(dip
->pmp
, dip
, &xop
->head
, -1);
880 nip
->comp_heuristic
= 0;
882 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
884 hammer2_inode_unlock(dip
);
890 * Create a directory entry under dip with the specified name, inode number,
891 * and OBJTYPE (type).
893 * This returns a UNIX errno code, not a HAMMER2_ERROR_* code.
896 hammer2_dirent_create(hammer2_inode_t
*dip
, const char *name
, size_t name_len
,
897 hammer2_key_t inum
, uint8_t type
)
899 hammer2_xop_mkdirent_t
*xop
;
906 KKASSERT(name
!= NULL
);
907 lhc
= hammer2_dirhash(name
, name_len
);
910 * Locate the inode or indirect block to create the new
911 * entry in. At the same time check for key collisions
912 * and iterate until we don't get one.
914 * Lock the directory exclusively for now to guarantee that
915 * we can find an unused lhc for the name. Due to collisions,
916 * two different creates can end up with the same lhc so we
917 * cannot depend on the OS to prevent the collision.
919 hammer2_inode_lock(dip
, 0);
922 * If name specified, locate an unused key in the collision space.
923 * Otherwise use the passed-in lhc directly.
926 hammer2_xop_scanlhc_t
*sxop
;
927 hammer2_key_t lhcbase
;
930 sxop
= hammer2_xop_alloc(dip
, HAMMER2_XOP_MODIFYING
);
932 hammer2_xop_start(&sxop
->head
, &hammer2_scanlhc_desc
);
933 while ((error
= hammer2_xop_collect(&sxop
->head
, 0)) == 0) {
934 if (lhc
!= sxop
->head
.cluster
.focus
->bref
.key
)
938 hammer2_xop_retire(&sxop
->head
, HAMMER2_XOPMASK_VOP
);
941 if (error
!= HAMMER2_ERROR_ENOENT
)
946 if ((lhcbase
^ lhc
) & ~HAMMER2_DIRHASH_LOMASK
) {
947 error
= HAMMER2_ERROR_ENOSPC
;
953 * Create the directory entry with the lhc as the key.
955 xop
= hammer2_xop_alloc(dip
, HAMMER2_XOP_MODIFYING
);
957 bzero(&xop
->dirent
, sizeof(xop
->dirent
));
958 xop
->dirent
.inum
= inum
;
959 xop
->dirent
.type
= type
;
960 xop
->dirent
.namlen
= name_len
;
962 KKASSERT(name_len
< HAMMER2_INODE_MAXNAME
);
963 hammer2_xop_setname(&xop
->head
, name
, name_len
);
965 hammer2_xop_start(&xop
->head
, &hammer2_inode_mkdirent_desc
);
967 error
= hammer2_xop_collect(&xop
->head
, 0);
969 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
971 error
= hammer2_error_to_errno(error
);
972 hammer2_inode_unlock(dip
);
978 * Repoint ip->cluster's chains to cluster's chains and fixup the default
979 * focus. All items, valid or invalid, are repointed. hammer2_xop_start()
980 * filters out invalid or non-matching elements.
982 * Caller must hold the inode and cluster exclusive locked, if not NULL,
983 * must also be locked.
985 * Cluster may be NULL to clean out any chains in ip->cluster.
988 hammer2_inode_repoint(hammer2_inode_t
*ip
, hammer2_inode_t
*pip
,
989 hammer2_cluster_t
*cluster
)
991 hammer2_chain_t
*dropch
[HAMMER2_MAXCLUSTER
];
992 hammer2_chain_t
*ochain
;
993 hammer2_chain_t
*nchain
;
996 bzero(dropch
, sizeof(dropch
));
999 * Replace chains in ip->cluster with chains from cluster and
1000 * adjust the focus if necessary.
1002 * NOTE: nchain and/or ochain can be NULL due to gaps
1003 * in the cluster arrays.
1005 hammer2_spin_ex(&ip
->cluster_spin
);
1006 for (i
= 0; cluster
&& i
< cluster
->nchains
; ++i
) {
1008 * Do not replace elements which are the same. Also handle
1009 * element count discrepancies.
1011 nchain
= cluster
->array
[i
].chain
;
1012 if (i
< ip
->cluster
.nchains
) {
1013 ochain
= ip
->cluster
.array
[i
].chain
;
1014 if (ochain
== nchain
)
1023 ip
->cluster
.array
[i
].chain
= nchain
;
1024 ip
->cluster
.array
[i
].flags
&= ~HAMMER2_CITEM_INVALID
;
1025 ip
->cluster
.array
[i
].flags
|= cluster
->array
[i
].flags
&
1026 HAMMER2_CITEM_INVALID
;
1028 hammer2_chain_ref(nchain
);
1033 * Release any left-over chains in ip->cluster.
1035 while (i
< ip
->cluster
.nchains
) {
1036 nchain
= ip
->cluster
.array
[i
].chain
;
1038 ip
->cluster
.array
[i
].chain
= NULL
;
1039 ip
->cluster
.array
[i
].flags
|= HAMMER2_CITEM_INVALID
;
1046 * Fixup fields. Note that the inode-embedded cluster is never
1050 ip
->cluster
.nchains
= cluster
->nchains
;
1051 ip
->cluster
.focus
= cluster
->focus
;
1052 ip
->cluster
.flags
= cluster
->flags
& ~HAMMER2_CLUSTER_LOCKED
;
1054 ip
->cluster
.nchains
= 0;
1055 ip
->cluster
.focus
= NULL
;
1056 ip
->cluster
.flags
&= ~HAMMER2_CLUSTER_ZFLAGS
;
1059 hammer2_spin_unex(&ip
->cluster_spin
);
1062 * Cleanup outside of spinlock
1066 hammer2_chain_drop(dropch
[i
]);
1071 * Repoint a single element from the cluster to the ip. Used by the
1072 * synchronization threads to piecemeal update inodes. Does not change
1073 * focus and requires inode to be re-locked to clean-up flags (XXX).
1076 hammer2_inode_repoint_one(hammer2_inode_t
*ip
, hammer2_cluster_t
*cluster
,
1079 hammer2_chain_t
*ochain
;
1080 hammer2_chain_t
*nchain
;
1083 hammer2_spin_ex(&ip
->cluster_spin
);
1084 KKASSERT(idx
< cluster
->nchains
);
1085 if (idx
< ip
->cluster
.nchains
) {
1086 ochain
= ip
->cluster
.array
[idx
].chain
;
1087 nchain
= cluster
->array
[idx
].chain
;
1090 nchain
= cluster
->array
[idx
].chain
;
1091 for (i
= ip
->cluster
.nchains
; i
<= idx
; ++i
) {
1092 bzero(&ip
->cluster
.array
[i
],
1093 sizeof(ip
->cluster
.array
[i
]));
1094 ip
->cluster
.array
[i
].flags
|= HAMMER2_CITEM_INVALID
;
1096 ip
->cluster
.nchains
= idx
+ 1;
1098 if (ochain
!= nchain
) {
1102 ip
->cluster
.array
[idx
].chain
= nchain
;
1103 ip
->cluster
.array
[idx
].flags
&= ~HAMMER2_CITEM_INVALID
;
1104 ip
->cluster
.array
[idx
].flags
|= cluster
->array
[idx
].flags
&
1105 HAMMER2_CITEM_INVALID
;
1107 hammer2_spin_unex(&ip
->cluster_spin
);
1108 if (ochain
!= nchain
) {
1110 hammer2_chain_ref(nchain
);
1112 hammer2_chain_drop(ochain
);
1117 * Called with a locked inode to finish unlinking an inode after xop_unlink
1118 * had been run. This function is responsible for decrementing nlinks.
1120 * We don't bother decrementing nlinks if the file is not open and this was
1123 * If the inode is a hardlink target it's chain has not yet been deleted,
1124 * otherwise it's chain has been deleted.
1126 * If isopen then any prior deletion was not permanent and the inode is
1127 * left intact with nlinks == 0;
1130 hammer2_inode_unlink_finisher(hammer2_inode_t
*ip
, int isopen
)
1138 * Decrement nlinks. If this is the last link and the file is
1139 * not open we can just delete the inode and not bother dropping
1140 * nlinks to 0 (avoiding unnecessary block updates).
1142 if (ip
->meta
.nlinks
== 1) {
1143 atomic_set_int(&ip
->flags
, HAMMER2_INODE_ISUNLINKED
);
1148 hammer2_inode_modify(ip
);
1150 if ((int64_t)ip
->meta
.nlinks
< 0)
1151 ip
->meta
.nlinks
= 0; /* safety */
1154 * If nlinks is not zero we are done. However, this should only be
1155 * possible with a hardlink target. If the inode is an embedded
1156 * hardlink nlinks should have dropped to zero, warn and proceed
1157 * with the next step.
1159 if (ip
->meta
.nlinks
) {
1160 if ((ip
->meta
.name_key
& HAMMER2_DIRHASH_VISIBLE
) == 0)
1162 kprintf("hammer2_inode_unlink: nlinks was not 0 (%jd)\n",
1163 (intmax_t)ip
->meta
.nlinks
);
1168 hammer2_knote(ip
->vp
, NOTE_DELETE
);
1171 * nlinks is now an implied zero, delete the inode if not open.
1172 * We avoid unnecessary media updates by not bothering to actually
1173 * decrement nlinks for the 1->0 transition
1175 * Put the inode on the sideq to ensure that any disconnected chains
1176 * get properly flushed (so they can be freed).
1179 hammer2_xop_destroy_t
*xop
;
1182 hammer2_inode_delayed_sideq(ip
);
1183 atomic_set_int(&ip
->flags
, HAMMER2_INODE_ISDELETED
);
1184 xop
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
);
1185 hammer2_xop_start(&xop
->head
, &hammer2_inode_destroy_desc
);
1186 error
= hammer2_xop_collect(&xop
->head
, 0);
1187 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1189 error
= 0; /* XXX */
1195 * Mark an inode as being modified, meaning that the caller will modify
1198 * If a vnode is present we set the vnode dirty and the nominal filesystem
1199 * sync will also handle synchronizing the inode meta-data. If no vnode
1200 * is present we must ensure that the inode is on pmp->sideq.
1202 * NOTE: No mtid (modify_tid) is passed into this routine. The caller is
1203 * only modifying the in-memory inode. A modify_tid is synchronized
1204 * later when the inode gets flushed.
1206 * NOTE: As an exception to the general rule, the inode MAY be locked
1207 * shared for this particular call.
1210 hammer2_inode_modify(hammer2_inode_t
*ip
)
1212 atomic_set_int(&ip
->flags
, HAMMER2_INODE_MODIFIED
);
1214 vsetisdirty(ip
->vp
);
1215 } else if (ip
->pmp
&& (ip
->flags
& HAMMER2_INODE_NOSIDEQ
) == 0) {
1216 hammer2_inode_delayed_sideq(ip
);
1221 * Synchronize the inode's frontend state with the chain state prior
1222 * to any explicit flush of the inode or any strategy write call. This
1223 * does not flush the inode's chain or its sub-topology to media (higher
1224 * level layers are responsible for doing that).
1226 * Called with a locked inode inside a normal transaction.
1228 * inode must be locked.
1231 hammer2_inode_chain_sync(hammer2_inode_t
*ip
)
1236 if (ip
->flags
& (HAMMER2_INODE_RESIZED
| HAMMER2_INODE_MODIFIED
)) {
1237 hammer2_xop_fsync_t
*xop
;
1239 xop
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
);
1240 xop
->clear_directdata
= 0;
1241 if (ip
->flags
& HAMMER2_INODE_RESIZED
) {
1242 if ((ip
->meta
.op_flags
& HAMMER2_OPFLAG_DIRECTDATA
) &&
1243 ip
->meta
.size
> HAMMER2_EMBEDDED_BYTES
) {
1244 ip
->meta
.op_flags
&= ~HAMMER2_OPFLAG_DIRECTDATA
;
1245 xop
->clear_directdata
= 1;
1247 xop
->osize
= ip
->osize
;
1249 xop
->osize
= ip
->meta
.size
; /* safety */
1251 xop
->ipflags
= ip
->flags
;
1252 xop
->meta
= ip
->meta
;
1254 atomic_clear_int(&ip
->flags
, HAMMER2_INODE_RESIZED
|
1255 HAMMER2_INODE_MODIFIED
);
1256 hammer2_xop_start(&xop
->head
, &hammer2_inode_chain_sync_desc
);
1257 error
= hammer2_xop_collect(&xop
->head
, 0);
1258 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1259 if (error
== HAMMER2_ERROR_ENOENT
)
1262 kprintf("hammer2: unable to fsync inode %p\n", ip
);
1264 atomic_set_int(&ip->flags,
1265 xop->ipflags & (HAMMER2_INODE_RESIZED |
1266 HAMMER2_INODE_MODIFIED));
1268 /* XXX return error somehow? */
1275 * Flushes the inode's chain and its sub-topology to media. Interlocks
1276 * HAMMER2_INODE_DIRTYDATA by clearing it prior to the flush. Any strategy
1277 * function creating or modifying a chain under this inode will re-set the
1280 * inode must be locked.
1283 hammer2_inode_chain_flush(hammer2_inode_t
*ip
)
1285 hammer2_xop_fsync_t
*xop
;
1288 atomic_clear_int(&ip
->flags
, HAMMER2_INODE_DIRTYDATA
);
1289 xop
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
|
1290 HAMMER2_XOP_INODE_STOP
);
1291 hammer2_xop_start(&xop
->head
, &hammer2_inode_flush_desc
);
1292 error
= hammer2_xop_collect(&xop
->head
, HAMMER2_XOP_COLLECT_WAITALL
);
1293 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1294 if (error
== HAMMER2_ERROR_ENOENT
)
1301 * The normal filesystem sync no longer has visibility to an inode structure
1302 * after its vnode has been reclaimed. In this situation a dirty inode may
1303 * require additional processing to synchronize ip->meta to its underlying
1306 * In particular, reclaims can occur in almost any state (for example, when
1307 * doing operations on unrelated vnodes) and flushing the reclaimed inode
1308 * in the reclaim path itself is a non-starter.
1310 * Caller must be in a transaction.
1313 hammer2_inode_run_sideq(hammer2_pfs_t
*pmp
, int doall
)
1315 hammer2_xop_destroy_t
*xop
;
1316 hammer2_inode_sideq_t
*ipul
;
1317 hammer2_inode_t
*ip
;
1321 * Nothing to do if sideq is empty or (if doall == 0) there just
1322 * aren't very many sideq entries.
1324 if (TAILQ_EMPTY(&pmp
->sideq
))
1327 if (pmp
->sideq_count
> (pmp
->inum_count
>> 3)) {
1328 if (hammer2_debug
& 0x0001) {
1329 kprintf("hammer2: flush sideq %ld/%ld\n",
1330 pmp
->sideq_count
, pmp
->inum_count
);
1335 if (doall
== 0 && pmp
->sideq_count
<= (pmp
->inum_count
>> 3))
1338 hammer2_spin_ex(&pmp
->list_spin
);
1339 while ((ipul
= TAILQ_FIRST(&pmp
->sideq
)) != NULL
) {
1340 TAILQ_REMOVE(&pmp
->sideq
, ipul
, entry
);
1343 KKASSERT(ip
->flags
& HAMMER2_INODE_ONSIDEQ
);
1344 atomic_clear_int(&ip
->flags
, HAMMER2_INODE_ONSIDEQ
);
1345 hammer2_spin_unex(&pmp
->list_spin
);
1346 kfree(ipul
, pmp
->minode
);
1348 hammer2_inode_lock(ip
, 0);
1349 if (ip
->flags
& HAMMER2_INODE_ISDELETED
) {
1351 * The inode has already been deleted. This is a
1352 * fairly rare circumstance. For now we don't rock
1353 * the boat and synchronize it normally.
1355 hammer2_inode_chain_sync(ip
);
1356 hammer2_inode_chain_flush(ip
);
1357 } else if (ip
->flags
& HAMMER2_INODE_ISUNLINKED
) {
1359 * The inode was unlinked while open. The inode must
1360 * be deleted and destroyed.
1362 xop
= hammer2_xop_alloc(ip
, HAMMER2_XOP_MODIFYING
);
1363 hammer2_xop_start(&xop
->head
,
1364 &hammer2_inode_destroy_desc
);
1365 error
= hammer2_xop_collect(&xop
->head
, 0);
1366 /* XXX error handling */
1367 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1370 * The inode was dirty as-of the reclaim, requiring
1371 * synchronization of ip->meta with its underlying
1374 hammer2_inode_chain_sync(ip
);
1375 hammer2_inode_chain_flush(ip
);
1378 hammer2_inode_unlock(ip
);
1379 hammer2_inode_drop(ip
); /* ipul ref */
1381 hammer2_spin_ex(&pmp
->list_spin
);
1384 * If doall is 0 the original sideq_count was greater than
1385 * 1/8 the inode count. Add some hysteresis in the loop,
1386 * don't stop flushing until sideq_count drops below 1/16.
1388 if (doall
== 0 && pmp
->sideq_count
<= (pmp
->inum_count
>> 4)) {
1389 if (hammer2_debug
& 0x0001) {
1390 kprintf("hammer2: flush sideq %ld/%ld (end)\n",
1391 pmp
->sideq_count
, pmp
->inum_count
);
1396 hammer2_spin_unex(&pmp
->list_spin
);