2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.60 2008/05/18 01:48:50 dillon Exp $
38 #include <vm/vm_extern.h>
42 static int hammer_unload_inode(struct hammer_inode
*ip
);
43 static void hammer_flush_inode_core(hammer_inode_t ip
, int flags
);
44 static int hammer_setup_child_callback(hammer_record_t rec
, void *data
);
45 static int hammer_setup_parent_inodes(hammer_record_t record
);
48 * The kernel is not actively referencing this vnode but is still holding
51 * This is called from the frontend.
54 hammer_vop_inactive(struct vop_inactive_args
*ap
)
56 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
67 * If the inode no longer has visibility in the filesystem and is
68 * fairly clean, try to recycle it immediately. This can deadlock
69 * in vfsync() if we aren't careful.
71 * Do not queue the inode to the flusher if we still have visibility,
72 * otherwise namespace calls such as chmod will unnecessarily generate
73 * multiple inode updates.
75 hammer_inode_unloadable_check(ip
, 0);
76 if (ip
->ino_data
.nlinks
== 0) {
77 if (ip
->flags
& HAMMER_INODE_MODMASK
)
78 hammer_flush_inode(ip
, 0);
86 * Release the vnode association. This is typically (but not always)
87 * the last reference on the inode.
89 * Once the association is lost we are on our own with regards to
93 hammer_vop_reclaim(struct vop_reclaim_args
*ap
)
95 struct hammer_inode
*ip
;
100 if ((ip
= vp
->v_data
) != NULL
) {
103 hammer_rel_inode(ip
, 1);
109 * Return a locked vnode for the specified inode. The inode must be
110 * referenced but NOT LOCKED on entry and will remain referenced on
113 * Called from the frontend.
116 hammer_get_vnode(struct hammer_inode
*ip
, struct vnode
**vpp
)
122 if ((vp
= ip
->vp
) == NULL
) {
123 error
= getnewvnode(VT_HAMMER
, ip
->hmp
->mp
, vpp
, 0, 0);
126 hammer_lock_ex(&ip
->lock
);
127 if (ip
->vp
!= NULL
) {
128 hammer_unlock(&ip
->lock
);
133 hammer_ref(&ip
->lock
);
137 hammer_get_vnode_type(ip
->ino_data
.obj_type
);
139 switch(ip
->ino_data
.obj_type
) {
140 case HAMMER_OBJTYPE_CDEV
:
141 case HAMMER_OBJTYPE_BDEV
:
142 vp
->v_ops
= &ip
->hmp
->mp
->mnt_vn_spec_ops
;
143 addaliasu(vp
, ip
->ino_data
.rmajor
,
144 ip
->ino_data
.rminor
);
146 case HAMMER_OBJTYPE_FIFO
:
147 vp
->v_ops
= &ip
->hmp
->mp
->mnt_vn_fifo_ops
;
154 * Only mark as the root vnode if the ip is not
155 * historical, otherwise the VFS cache will get
156 * confused. The other half of the special handling
157 * is in hammer_vop_nlookupdotdot().
159 if (ip
->obj_id
== HAMMER_OBJID_ROOT
&&
160 ip
->obj_asof
== ip
->hmp
->asof
) {
164 vp
->v_data
= (void *)ip
;
165 /* vnode locked by getnewvnode() */
166 /* make related vnode dirty if inode dirty? */
167 hammer_unlock(&ip
->lock
);
168 if (vp
->v_type
== VREG
)
169 vinitvmio(vp
, ip
->ino_data
.size
);
174 * loop if the vget fails (aka races), or if the vp
175 * no longer matches ip->vp.
177 if (vget(vp
, LK_EXCLUSIVE
) == 0) {
188 * Acquire a HAMMER inode. The returned inode is not locked. These functions
189 * do not attach or detach the related vnode (use hammer_get_vnode() for
192 * The flags argument is only applied for newly created inodes, and only
193 * certain flags are inherited.
195 * Called from the frontend.
197 struct hammer_inode
*
198 hammer_get_inode(hammer_transaction_t trans
, struct hammer_node
**cache
,
199 u_int64_t obj_id
, hammer_tid_t asof
, int flags
, int *errorp
)
201 hammer_mount_t hmp
= trans
->hmp
;
202 struct hammer_inode_info iinfo
;
203 struct hammer_cursor cursor
;
204 struct hammer_inode
*ip
;
207 * Determine if we already have an inode cached. If we do then
210 iinfo
.obj_id
= obj_id
;
211 iinfo
.obj_asof
= asof
;
213 ip
= hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp
->rb_inos_root
, &iinfo
);
215 hammer_ref(&ip
->lock
);
220 ip
= kmalloc(sizeof(*ip
), M_HAMMER
, M_WAITOK
|M_ZERO
);
221 ++hammer_count_inodes
;
223 ip
->obj_asof
= iinfo
.obj_asof
;
225 ip
->flags
= flags
& HAMMER_INODE_RO
;
227 ip
->flags
|= HAMMER_INODE_RO
;
228 ip
->trunc_off
= 0x7FFFFFFFFFFFFFFFLL
;
229 RB_INIT(&ip
->rec_tree
);
230 TAILQ_INIT(&ip
->bio_list
);
231 TAILQ_INIT(&ip
->bio_alt_list
);
232 TAILQ_INIT(&ip
->target_list
);
235 * Locate the on-disk inode.
238 hammer_init_cursor(trans
, &cursor
, cache
, NULL
);
239 cursor
.key_beg
.localization
= HAMMER_LOCALIZE_INODE
;
240 cursor
.key_beg
.obj_id
= ip
->obj_id
;
241 cursor
.key_beg
.key
= 0;
242 cursor
.key_beg
.create_tid
= 0;
243 cursor
.key_beg
.delete_tid
= 0;
244 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
245 cursor
.key_beg
.obj_type
= 0;
246 cursor
.asof
= iinfo
.obj_asof
;
247 cursor
.flags
= HAMMER_CURSOR_GET_LEAF
| HAMMER_CURSOR_GET_DATA
|
250 *errorp
= hammer_btree_lookup(&cursor
);
251 if (*errorp
== EDEADLK
) {
252 hammer_done_cursor(&cursor
);
257 * On success the B-Tree lookup will hold the appropriate
258 * buffer cache buffers and provide a pointer to the requested
259 * information. Copy the information to the in-memory inode
260 * and cache the B-Tree node to improve future operations.
263 ip
->ino_leaf
= cursor
.node
->ondisk
->elms
[cursor
.index
].leaf
;
264 ip
->ino_data
= cursor
.data
->inode
;
265 hammer_cache_node(cursor
.node
, &ip
->cache
[0]);
267 hammer_cache_node(cursor
.node
, cache
);
271 * On success load the inode's record and data and insert the
272 * inode into the B-Tree. It is possible to race another lookup
273 * insertion of the same inode so deal with that condition too.
275 * The cursor's locked node interlocks against others creating and
276 * destroying ip while we were blocked.
279 hammer_ref(&ip
->lock
);
280 if (RB_INSERT(hammer_ino_rb_tree
, &hmp
->rb_inos_root
, ip
)) {
281 hammer_uncache_node(&ip
->cache
[0]);
282 hammer_uncache_node(&ip
->cache
[1]);
283 KKASSERT(ip
->lock
.refs
== 1);
284 --hammer_count_inodes
;
286 hammer_done_cursor(&cursor
);
289 ip
->flags
|= HAMMER_INODE_ONDISK
;
292 * Do not panic on read-only accesses which fail, particularly
293 * historical accesses where the snapshot might not have
294 * complete connectivity.
296 if ((flags
& HAMMER_INODE_RO
) == 0) {
297 kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
298 ip
, ip
->obj_id
, &cursor
, *errorp
);
301 --hammer_count_inodes
;
305 hammer_done_cursor(&cursor
);
310 * Create a new filesystem object, returning the inode in *ipp. The
311 * returned inode will be referenced.
313 * The inode is created in-memory.
316 hammer_create_inode(hammer_transaction_t trans
, struct vattr
*vap
,
317 struct ucred
*cred
, hammer_inode_t dip
,
318 struct hammer_inode
**ipp
)
325 ip
= kmalloc(sizeof(*ip
), M_HAMMER
, M_WAITOK
|M_ZERO
);
326 ++hammer_count_inodes
;
327 ip
->obj_id
= hammer_alloc_objid(trans
, dip
);
328 KKASSERT(ip
->obj_id
!= 0);
329 ip
->obj_asof
= hmp
->asof
;
331 ip
->flush_state
= HAMMER_FST_IDLE
;
332 ip
->flags
= HAMMER_INODE_DDIRTY
| HAMMER_INODE_ITIMES
;
334 ip
->trunc_off
= 0x7FFFFFFFFFFFFFFFLL
;
335 RB_INIT(&ip
->rec_tree
);
336 TAILQ_INIT(&ip
->bio_list
);
337 TAILQ_INIT(&ip
->bio_alt_list
);
338 TAILQ_INIT(&ip
->target_list
);
340 ip
->ino_leaf
.atime
= trans
->time
;
341 ip
->ino_data
.mtime
= trans
->time
;
342 ip
->ino_data
.size
= 0;
343 ip
->ino_data
.nlinks
= 0;
345 ip
->ino_leaf
.base
.btype
= HAMMER_BTREE_TYPE_RECORD
;
346 ip
->ino_leaf
.base
.localization
= HAMMER_LOCALIZE_INODE
;
347 ip
->ino_leaf
.base
.obj_id
= ip
->obj_id
;
348 ip
->ino_leaf
.base
.key
= 0;
349 ip
->ino_leaf
.base
.create_tid
= 0;
350 ip
->ino_leaf
.base
.delete_tid
= 0;
351 ip
->ino_leaf
.base
.rec_type
= HAMMER_RECTYPE_INODE
;
352 ip
->ino_leaf
.base
.obj_type
= hammer_get_obj_type(vap
->va_type
);
354 ip
->ino_data
.obj_type
= ip
->ino_leaf
.base
.obj_type
;
355 ip
->ino_data
.version
= HAMMER_INODE_DATA_VERSION
;
356 ip
->ino_data
.mode
= vap
->va_mode
;
357 ip
->ino_data
.ctime
= trans
->time
;
358 ip
->ino_data
.parent_obj_id
= (dip
) ? dip
->ino_leaf
.base
.obj_id
: 0;
360 switch(ip
->ino_leaf
.base
.obj_type
) {
361 case HAMMER_OBJTYPE_CDEV
:
362 case HAMMER_OBJTYPE_BDEV
:
363 ip
->ino_data
.rmajor
= vap
->va_rmajor
;
364 ip
->ino_data
.rminor
= vap
->va_rminor
;
371 * Calculate default uid/gid and overwrite with information from
374 xuid
= hammer_to_unix_xid(&dip
->ino_data
.uid
);
375 ip
->ino_data
.gid
= dip
->ino_data
.gid
;
376 xuid
= vop_helper_create_uid(hmp
->mp
, dip
->ino_data
.mode
, xuid
, cred
,
378 ip
->ino_data
.mode
= vap
->va_mode
;
380 if (vap
->va_vaflags
& VA_UID_UUID_VALID
)
381 ip
->ino_data
.uid
= vap
->va_uid_uuid
;
382 else if (vap
->va_uid
!= (uid_t
)VNOVAL
)
383 hammer_guid_to_uuid(&ip
->ino_data
.uid
, xuid
);
384 if (vap
->va_vaflags
& VA_GID_UUID_VALID
)
385 ip
->ino_data
.gid
= vap
->va_gid_uuid
;
386 else if (vap
->va_gid
!= (gid_t
)VNOVAL
)
387 hammer_guid_to_uuid(&ip
->ino_data
.gid
, vap
->va_gid
);
389 hammer_ref(&ip
->lock
);
390 if (RB_INSERT(hammer_ino_rb_tree
, &hmp
->rb_inos_root
, ip
)) {
391 hammer_unref(&ip
->lock
);
392 panic("hammer_create_inode: duplicate obj_id %llx", ip
->obj_id
);
399 * Called by hammer_sync_inode().
402 hammer_update_inode(hammer_cursor_t cursor
, hammer_inode_t ip
)
404 hammer_transaction_t trans
= cursor
->trans
;
405 hammer_record_t record
;
412 * If the inode has a presence on-disk then locate it and mark
413 * it deleted, setting DELONDISK.
415 * The record may or may not be physically deleted, depending on
416 * the retention policy.
418 if ((ip
->flags
& (HAMMER_INODE_ONDISK
|HAMMER_INODE_DELONDISK
)) ==
419 HAMMER_INODE_ONDISK
) {
420 hammer_normalize_cursor(cursor
);
421 cursor
->key_beg
.localization
= HAMMER_LOCALIZE_INODE
;
422 cursor
->key_beg
.obj_id
= ip
->obj_id
;
423 cursor
->key_beg
.key
= 0;
424 cursor
->key_beg
.create_tid
= 0;
425 cursor
->key_beg
.delete_tid
= 0;
426 cursor
->key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
427 cursor
->key_beg
.obj_type
= 0;
428 cursor
->asof
= ip
->obj_asof
;
429 cursor
->flags
&= ~HAMMER_CURSOR_INITMASK
;
430 cursor
->flags
|= HAMMER_CURSOR_GET_LEAF
| HAMMER_CURSOR_ASOF
;
431 cursor
->flags
|= HAMMER_CURSOR_BACKEND
;
433 error
= hammer_btree_lookup(cursor
);
434 if (hammer_debug_inode
)
435 kprintf("IPDEL %p %08x %d", ip
, ip
->flags
, error
);
437 kprintf("error %d\n", error
);
438 Debugger("hammer_update_inode");
442 error
= hammer_ip_delete_record(cursor
, trans
->tid
);
443 if (hammer_debug_inode
)
444 kprintf(" error %d\n", error
);
445 if (error
&& error
!= EDEADLK
) {
446 kprintf("error %d\n", error
);
447 Debugger("hammer_update_inode2");
450 ip
->flags
|= HAMMER_INODE_DELONDISK
;
453 hammer_cache_node(cursor
->node
, &ip
->cache
[0]);
455 if (error
== EDEADLK
) {
456 hammer_done_cursor(cursor
);
457 error
= hammer_init_cursor(trans
, cursor
,
459 if (hammer_debug_inode
)
460 kprintf("IPDED %p %d\n", ip
, error
);
467 * Ok, write out the initial record or a new record (after deleting
468 * the old one), unless the DELETED flag is set. This routine will
469 * clear DELONDISK if it writes out a record.
471 * Update our inode statistics if this is the first application of
474 if (error
== 0 && (ip
->flags
& HAMMER_INODE_DELETED
) == 0) {
476 * Generate a record and write it to the media
478 record
= hammer_alloc_mem_record(ip
, 0);
479 record
->type
= HAMMER_MEM_RECORD_INODE
;
480 record
->flush_state
= HAMMER_FST_FLUSH
;
481 record
->leaf
= ip
->sync_ino_leaf
;
482 record
->leaf
.base
.create_tid
= trans
->tid
;
483 record
->leaf
.data_len
= sizeof(ip
->sync_ino_data
);
484 record
->data
= (void *)&ip
->sync_ino_data
;
485 record
->flags
|= HAMMER_RECF_INTERLOCK_BE
;
487 error
= hammer_ip_sync_record_cursor(cursor
, record
);
488 if (hammer_debug_inode
)
489 kprintf("GENREC %p rec %08x %d\n",
490 ip
, record
->flags
, error
);
491 if (error
!= EDEADLK
)
493 hammer_done_cursor(cursor
);
494 error
= hammer_init_cursor(trans
, cursor
,
496 if (hammer_debug_inode
)
497 kprintf("GENREC reinit %d\n", error
);
502 kprintf("error %d\n", error
);
503 Debugger("hammer_update_inode3");
507 * The record isn't managed by the inode's record tree,
508 * destroy it whether we succeed or fail.
510 record
->flags
&= ~HAMMER_RECF_INTERLOCK_BE
;
511 record
->flags
|= HAMMER_RECF_DELETED_FE
;
512 record
->flush_state
= HAMMER_FST_IDLE
;
513 hammer_rel_mem_record(record
);
519 if (hammer_debug_inode
)
520 kprintf("CLEANDELOND %p %08x\n", ip
, ip
->flags
);
521 ip
->sync_flags
&= ~(HAMMER_INODE_DDIRTY
|
522 HAMMER_INODE_ITIMES
);
523 ip
->flags
&= ~HAMMER_INODE_DELONDISK
;
526 * Root volume count of inodes
528 if ((ip
->flags
& HAMMER_INODE_ONDISK
) == 0) {
529 hammer_modify_volume_field(trans
,
532 ++ip
->hmp
->rootvol
->ondisk
->vol0_stat_inodes
;
533 hammer_modify_volume_done(trans
->rootvol
);
534 ip
->flags
|= HAMMER_INODE_ONDISK
;
535 if (hammer_debug_inode
)
536 kprintf("NOWONDISK %p\n", ip
);
542 * If the inode has been destroyed, clean out any left-over flags
543 * that may have been set by the frontend.
545 if (error
== 0 && (ip
->flags
& HAMMER_INODE_DELETED
)) {
546 ip
->sync_flags
&= ~(HAMMER_INODE_DDIRTY
|
547 HAMMER_INODE_ITIMES
);
553 * Update only the itimes fields. This is done no-historically. The
554 * record is updated in-place on the disk.
557 hammer_update_itimes(hammer_cursor_t cursor
, hammer_inode_t ip
)
559 hammer_transaction_t trans
= cursor
->trans
;
560 struct hammer_btree_leaf_elm
*leaf
;
565 if ((ip
->flags
& (HAMMER_INODE_ONDISK
|HAMMER_INODE_DELONDISK
)) ==
566 HAMMER_INODE_ONDISK
) {
567 hammer_normalize_cursor(cursor
);
568 cursor
->key_beg
.localization
= HAMMER_LOCALIZE_INODE
;
569 cursor
->key_beg
.obj_id
= ip
->obj_id
;
570 cursor
->key_beg
.key
= 0;
571 cursor
->key_beg
.create_tid
= 0;
572 cursor
->key_beg
.delete_tid
= 0;
573 cursor
->key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
574 cursor
->key_beg
.obj_type
= 0;
575 cursor
->asof
= ip
->obj_asof
;
576 cursor
->flags
&= ~HAMMER_CURSOR_INITMASK
;
577 cursor
->flags
|= HAMMER_CURSOR_GET_LEAF
| HAMMER_CURSOR_ASOF
;
578 cursor
->flags
|= HAMMER_CURSOR_BACKEND
;
580 error
= hammer_btree_lookup(cursor
);
582 kprintf("error %d\n", error
);
583 Debugger("hammer_update_itimes1");
587 * Do not generate UNDO records for atime updates.
590 hammer_modify_node(trans
, cursor
->node
,
591 &leaf
->atime
, sizeof(leaf
->atime
));
592 leaf
->atime
= ip
->sync_ino_leaf
.atime
;
593 hammer_modify_node_done(cursor
->node
);
594 /*rec->ino_mtime = ip->sync_ino_rec.ino_mtime;*/
595 ip
->sync_flags
&= ~HAMMER_INODE_ITIMES
;
596 /* XXX recalculate crc */
597 hammer_cache_node(cursor
->node
, &ip
->cache
[0]);
599 if (error
== EDEADLK
) {
600 hammer_done_cursor(cursor
);
601 error
= hammer_init_cursor(trans
, cursor
,
611 * Release a reference on an inode, flush as requested.
613 * On the last reference we queue the inode to the flusher for its final
617 hammer_rel_inode(struct hammer_inode
*ip
, int flush
)
619 hammer_mount_t hmp
= ip
->hmp
;
622 * Handle disposition when dropping the last ref.
625 if (ip
->lock
.refs
== 1) {
627 * Determine whether on-disk action is needed for
628 * the inode's final disposition.
630 KKASSERT(ip
->vp
== NULL
);
631 hammer_inode_unloadable_check(ip
, 0);
632 if (ip
->flags
& HAMMER_INODE_MODMASK
) {
633 hammer_flush_inode(ip
, 0);
634 } else if (ip
->lock
.refs
== 1) {
635 hammer_unload_inode(ip
);
640 hammer_flush_inode(ip
, 0);
643 * The inode still has multiple refs, try to drop
646 KKASSERT(ip
->lock
.refs
>= 1);
647 if (ip
->lock
.refs
> 1) {
648 hammer_unref(&ip
->lock
);
655 * XXX bad hack until I add code to track inodes in SETUP. We
656 * can queue a lot of inodes to the syncer but if we don't wake
657 * it up the undo sets will be too large or too many unflushed
658 * records will build up and blow our malloc limit.
660 if (++hmp
->reclaim_count
> 256) {
661 hmp
->reclaim_count
= 0;
662 hammer_flusher_async(hmp
);
667 * Unload and destroy the specified inode. Must be called with one remaining
668 * reference. The reference is disposed of.
670 * This can only be called in the context of the flusher.
673 hammer_unload_inode(struct hammer_inode
*ip
)
675 KASSERT(ip
->lock
.refs
== 1,
676 ("hammer_unload_inode: %d refs\n", ip
->lock
.refs
));
677 KKASSERT(ip
->vp
== NULL
);
678 KKASSERT(ip
->flush_state
== HAMMER_FST_IDLE
);
679 KKASSERT(ip
->cursor_ip_refs
== 0);
680 KKASSERT(ip
->lock
.lockcount
== 0);
681 KKASSERT((ip
->flags
& HAMMER_INODE_MODMASK
) == 0);
683 KKASSERT(RB_EMPTY(&ip
->rec_tree
));
684 KKASSERT(TAILQ_EMPTY(&ip
->target_list
));
685 KKASSERT(TAILQ_EMPTY(&ip
->bio_list
));
686 KKASSERT(TAILQ_EMPTY(&ip
->bio_alt_list
));
688 RB_REMOVE(hammer_ino_rb_tree
, &ip
->hmp
->rb_inos_root
, ip
);
690 hammer_uncache_node(&ip
->cache
[0]);
691 hammer_uncache_node(&ip
->cache
[1]);
693 hammer_clear_objid(ip
);
694 --hammer_count_inodes
;
701 * A transaction has modified an inode, requiring updates as specified by
704 * HAMMER_INODE_DDIRTY: Inode data has been updated
705 * HAMMER_INODE_XDIRTY: Dirty in-memory records
706 * HAMMER_INODE_BUFS: Dirty buffer cache buffers
707 * HAMMER_INODE_DELETED: Inode record/data must be deleted
708 * HAMMER_INODE_ITIMES: mtime/atime has been updated
711 hammer_modify_inode(hammer_transaction_t trans
, hammer_inode_t ip
, int flags
)
713 KKASSERT ((ip
->flags
& HAMMER_INODE_RO
) == 0 ||
714 (flags
& (HAMMER_INODE_DDIRTY
|
715 HAMMER_INODE_XDIRTY
| HAMMER_INODE_BUFS
|
716 HAMMER_INODE_DELETED
| HAMMER_INODE_ITIMES
)) == 0);
722 * Request that an inode be flushed. This whole mess cannot block and may
723 * recurse. Once requested HAMMER will attempt to actively flush it until
724 * the flush can be done.
726 * The inode may already be flushing, or may be in a setup state. We can
727 * place the inode in a flushing state if it is currently idle and flag it
728 * to reflush if it is currently flushing.
731 hammer_flush_inode(hammer_inode_t ip
, int flags
)
733 hammer_record_t depend
;
737 * Trivial 'nothing to flush' case. If the inode is ina SETUP
738 * state we have to put it back into an IDLE state so we can
739 * drop the extra ref.
741 if ((ip
->flags
& HAMMER_INODE_MODMASK
) == 0) {
742 if (ip
->flush_state
== HAMMER_FST_SETUP
) {
743 ip
->flush_state
= HAMMER_FST_IDLE
;
744 hammer_rel_inode(ip
, 0);
750 * Our flush action will depend on the current state.
752 switch(ip
->flush_state
) {
753 case HAMMER_FST_IDLE
:
755 * We have no dependancies and can flush immediately. Some
756 * our children may not be flushable so we have to re-test
757 * with that additional knowledge.
759 hammer_flush_inode_core(ip
, flags
);
761 case HAMMER_FST_SETUP
:
763 * Recurse upwards through dependancies via target_list
764 * and start their flusher actions going if possible.
766 * 'good' is our connectivity. -1 means we have none and
767 * can't flush, 0 means there weren't any dependancies, and
768 * 1 means we have good connectivity.
771 TAILQ_FOREACH(depend
, &ip
->target_list
, target_entry
) {
772 r
= hammer_setup_parent_inodes(depend
);
773 if (r
< 0 && good
== 0)
780 * We can continue if good >= 0. Determine how many records
781 * under our inode can be flushed (and mark them).
784 hammer_flush_inode_core(ip
, flags
);
786 ip
->flags
|= HAMMER_INODE_REFLUSH
;
787 if (flags
& HAMMER_FLUSH_SIGNAL
) {
788 ip
->flags
|= HAMMER_INODE_RESIGNAL
;
789 hammer_flusher_async(ip
->hmp
);
795 * We are already flushing, flag the inode to reflush
796 * if needed after it completes its current flush.
798 if ((ip
->flags
& HAMMER_INODE_REFLUSH
) == 0)
799 ip
->flags
|= HAMMER_INODE_REFLUSH
;
800 if (flags
& HAMMER_FLUSH_SIGNAL
) {
801 ip
->flags
|= HAMMER_INODE_RESIGNAL
;
802 hammer_flusher_async(ip
->hmp
);
809 * We are asked to recurse upwards and convert the record from SETUP
810 * to FLUSH if possible. record->ip is a parent of the caller's inode,
811 * and record->target_ip is the caller's inode.
813 * Return 1 if the record gives us connectivity
815 * Return 0 if the record is not relevant
817 * Return -1 if we can't resolve the dependancy and there is no connectivity.
820 hammer_setup_parent_inodes(hammer_record_t record
)
822 hammer_mount_t hmp
= record
->ip
->hmp
;
823 hammer_record_t depend
;
827 KKASSERT(record
->flush_state
!= HAMMER_FST_IDLE
);
831 * If the record is already flushing, is it in our flush group?
833 * If it is in our flush group but it is a general record or a
834 * delete-on-disk, it does not improve our connectivity (return 0),
835 * and if the target inode is not trying to destroy itself we can't
836 * allow the operation yet anyway (the second return -1).
838 if (record
->flush_state
== HAMMER_FST_FLUSH
) {
839 if (record
->flush_group
!= hmp
->flusher_next
) {
840 ip
->flags
|= HAMMER_INODE_REFLUSH
;
843 if (record
->type
== HAMMER_MEM_RECORD_ADD
)
850 * It must be a setup record. Try to resolve the setup dependancies
851 * by recursing upwards so we can place ip on the flush list.
853 KKASSERT(record
->flush_state
== HAMMER_FST_SETUP
);
856 TAILQ_FOREACH(depend
, &ip
->target_list
, target_entry
) {
857 r
= hammer_setup_parent_inodes(depend
);
858 if (r
< 0 && good
== 0)
865 * We can't flush ip because it has no connectivity (XXX also check
866 * nlinks for pre-existing connectivity!). Flag it so any resolution
867 * recurses back down.
870 ip
->flags
|= HAMMER_INODE_REFLUSH
;
875 * We are go, place the parent inode in a flushing state so we can
876 * place its record in a flushing state. Note that the parent
877 * may already be flushing. The record must be in the same flush
878 * group as the parent.
880 if (ip
->flush_state
!= HAMMER_FST_FLUSH
)
881 hammer_flush_inode_core(ip
, HAMMER_FLUSH_RECURSION
);
882 KKASSERT(ip
->flush_state
== HAMMER_FST_FLUSH
);
883 KKASSERT(record
->flush_state
== HAMMER_FST_SETUP
);
886 if (record
->type
== HAMMER_MEM_RECORD_DEL
&&
887 (record
->target_ip
->flags
& (HAMMER_INODE_DELETED
|HAMMER_INODE_DELONDISK
)) == 0) {
889 * Regardless of flushing state we cannot sync this path if the
890 * record represents a delete-on-disk but the target inode
891 * is not ready to sync its own deletion.
893 * XXX need to count effective nlinks to determine whether
894 * the flush is ok, otherwise removing a hardlink will
895 * just leave the DEL record to rot.
897 record
->target_ip
->flags
|= HAMMER_INODE_REFLUSH
;
901 if (ip
->flush_group
== ip
->hmp
->flusher_next
) {
903 * This is the record we wanted to synchronize.
905 record
->flush_state
= HAMMER_FST_FLUSH
;
906 record
->flush_group
= ip
->flush_group
;
907 hammer_ref(&record
->lock
);
908 if (record
->type
== HAMMER_MEM_RECORD_ADD
)
912 * A general or delete-on-disk record does not contribute
913 * to our visibility. We can still flush it, however.
918 * We couldn't resolve the dependancies, request that the
919 * inode be flushed when the dependancies can be resolved.
921 ip
->flags
|= HAMMER_INODE_REFLUSH
;
927 * This is the core routine placing an inode into the FST_FLUSH state.
930 hammer_flush_inode_core(hammer_inode_t ip
, int flags
)
935 * Set flush state and prevent the flusher from cycling into
936 * the next flush group. Do not place the ip on the list yet.
937 * Inodes not in the idle state get an extra reference.
939 KKASSERT(ip
->flush_state
!= HAMMER_FST_FLUSH
);
940 if (ip
->flush_state
== HAMMER_FST_IDLE
)
941 hammer_ref(&ip
->lock
);
942 ip
->flush_state
= HAMMER_FST_FLUSH
;
943 ip
->flush_group
= ip
->hmp
->flusher_next
;
944 ++ip
->hmp
->flusher_lock
;
947 * We need to be able to vfsync/truncate from the backend.
949 KKASSERT((ip
->flags
& HAMMER_INODE_VHELD
) == 0);
950 if (ip
->vp
&& (ip
->vp
->v_flag
& VINACTIVE
) == 0) {
951 ip
->flags
|= HAMMER_INODE_VHELD
;
956 * Figure out how many in-memory records we can actually flush
957 * (not including inode meta-data, buffers, etc).
959 if (flags
& HAMMER_FLUSH_RECURSION
) {
962 go_count
= RB_SCAN(hammer_rec_rb_tree
, &ip
->rec_tree
, NULL
,
963 hammer_setup_child_callback
, NULL
);
967 * This is a more involved test that includes go_count. If we
968 * can't flush, flag the inode and return. If go_count is 0 we
969 * were are unable to flush any records in our rec_tree and
970 * must ignore the XDIRTY flag.
973 if ((ip
->flags
& HAMMER_INODE_MODMASK_NOXDIRTY
) == 0) {
974 ip
->flags
|= HAMMER_INODE_REFLUSH
;
975 ip
->flush_state
= HAMMER_FST_SETUP
;
976 if (ip
->flags
& HAMMER_INODE_VHELD
) {
977 ip
->flags
&= ~HAMMER_INODE_VHELD
;
980 if (flags
& HAMMER_FLUSH_SIGNAL
) {
981 ip
->flags
|= HAMMER_INODE_RESIGNAL
;
982 hammer_flusher_async(ip
->hmp
);
984 if (--ip
->hmp
->flusher_lock
== 0)
985 wakeup(&ip
->hmp
->flusher_lock
);
991 * Snapshot the state of the inode for the backend flusher.
993 * The truncation must be retained in the frontend until after
994 * we've actually performed the record deletion.
996 * NOTE: The DELETING flag is a mod flag, but it is also sticky,
997 * and stays in ip->flags. Once set, it stays set until the
998 * inode is destroyed.
1000 ip
->sync_flags
= (ip
->flags
& HAMMER_INODE_MODMASK
);
1001 ip
->sync_trunc_off
= ip
->trunc_off
;
1002 ip
->sync_ino_leaf
= ip
->ino_leaf
;
1003 ip
->sync_ino_data
= ip
->ino_data
;
1004 ip
->flags
&= ~HAMMER_INODE_MODMASK
| HAMMER_INODE_TRUNCATED
;
1007 * The flusher list inherits our inode and reference.
1009 TAILQ_INSERT_TAIL(&ip
->hmp
->flush_list
, ip
, flush_entry
);
1010 if (--ip
->hmp
->flusher_lock
== 0)
1011 wakeup(&ip
->hmp
->flusher_lock
);
1013 if (flags
& HAMMER_FLUSH_SIGNAL
)
1014 hammer_flusher_async(ip
->hmp
);
1018 * Callback for scan of ip->rec_tree. Try to include each record in our
1019 * flush. ip->flush_group has been set but the inode has not yet been
1020 * moved into a flushing state.
1022 * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1025 * We return 1 for any record placed or found in FST_FLUSH, which prevents
1026 * the caller from shortcutting the flush.
1029 hammer_setup_child_callback(hammer_record_t rec
, void *data
)
1031 hammer_inode_t target_ip
;
1036 * If the record has been deleted by the backend (it's being held
1037 * by the frontend in a race), just ignore it.
1039 if (rec
->flags
& HAMMER_RECF_DELETED_BE
)
1043 * If the record is in an idle state it has no dependancies and
1049 switch(rec
->flush_state
) {
1050 case HAMMER_FST_IDLE
:
1052 * Record has no setup dependancy, we can flush it.
1054 KKASSERT(rec
->target_ip
== NULL
);
1055 rec
->flush_state
= HAMMER_FST_FLUSH
;
1056 rec
->flush_group
= ip
->flush_group
;
1057 hammer_ref(&rec
->lock
);
1060 case HAMMER_FST_SETUP
:
1062 * Record has a setup dependancy. Try to include the
1063 * target ip in the flush.
1065 * We have to be careful here, if we do not do the right
1066 * thing we can lose track of dirty inodes and the system
1067 * will lockup trying to allocate buffers.
1069 target_ip
= rec
->target_ip
;
1070 KKASSERT(target_ip
!= NULL
);
1071 KKASSERT(target_ip
->flush_state
!= HAMMER_FST_IDLE
);
1072 if (target_ip
->flush_state
== HAMMER_FST_FLUSH
) {
1074 * If the target IP is already flushing in our group
1075 * we are golden, otherwise make sure the target
1078 if (target_ip
->flush_group
== ip
->flush_group
) {
1079 rec
->flush_state
= HAMMER_FST_FLUSH
;
1080 rec
->flush_group
= ip
->flush_group
;
1081 hammer_ref(&rec
->lock
);
1084 target_ip
->flags
|= HAMMER_INODE_REFLUSH
;
1086 } else if (rec
->type
== HAMMER_MEM_RECORD_ADD
) {
1088 * If the target IP is not flushing we can force
1089 * it to flush, even if it is unable to write out
1090 * any of its own records we have at least one in
1091 * hand that we CAN deal with.
1093 rec
->flush_state
= HAMMER_FST_FLUSH
;
1094 rec
->flush_group
= ip
->flush_group
;
1095 hammer_ref(&rec
->lock
);
1096 hammer_flush_inode_core(target_ip
,
1097 HAMMER_FLUSH_RECURSION
);
1101 * General or delete-on-disk record.
1103 * XXX this needs help. If a delete-on-disk we could
1104 * disconnect the target. If the target has its own
1105 * dependancies they really need to be flushed.
1109 rec
->flush_state
= HAMMER_FST_FLUSH
;
1110 rec
->flush_group
= ip
->flush_group
;
1111 hammer_ref(&rec
->lock
);
1112 hammer_flush_inode_core(target_ip
,
1113 HAMMER_FLUSH_RECURSION
);
1117 case HAMMER_FST_FLUSH
:
1119 * Record already associated with a flush group. It had
1122 KKASSERT(rec
->flush_group
== ip
->flush_group
);
1130 * Wait for a previously queued flush to complete
1133 hammer_wait_inode(hammer_inode_t ip
)
1135 while (ip
->flush_state
!= HAMMER_FST_IDLE
) {
1136 ip
->flags
|= HAMMER_INODE_FLUSHW
;
1137 tsleep(&ip
->flags
, 0, "hmrwin", 0);
1142 * Called by the backend code when a flush has been completed.
1143 * The inode has already been removed from the flush list.
1145 * A pipelined flush can occur, in which case we must re-enter the
1146 * inode on the list and re-copy its fields.
1149 hammer_flush_inode_done(hammer_inode_t ip
)
1154 KKASSERT(ip
->flush_state
== HAMMER_FST_FLUSH
);
1157 * Allow BIOs to queue to the inode's primary bioq again.
1159 ip
->flags
&= ~HAMMER_INODE_WRITE_ALT
;
1162 * Merge left-over flags back into the frontend and fix the state.
1164 ip
->flags
|= ip
->sync_flags
;
1167 * The backend may have adjusted nlinks, so if the adjusted nlinks
1168 * does not match the fronttend set the frontend's RDIRTY flag again.
1170 if (ip
->ino_data
.nlinks
!= ip
->sync_ino_data
.nlinks
)
1171 ip
->flags
|= HAMMER_INODE_DDIRTY
;
1174 * Reflush any BIOs that wound up in the alt list. Our inode will
1175 * also wind up at the end of the flusher's list.
1177 while ((bio
= TAILQ_FIRST(&ip
->bio_alt_list
)) != NULL
) {
1178 TAILQ_REMOVE(&ip
->bio_alt_list
, bio
, bio_act
);
1179 TAILQ_INSERT_TAIL(&ip
->bio_list
, bio
, bio_act
);
1182 * Fix up the dirty buffer status.
1184 if (TAILQ_FIRST(&ip
->bio_list
) ||
1185 (ip
->vp
&& RB_ROOT(&ip
->vp
->v_rbdirty_tree
))) {
1186 ip
->flags
|= HAMMER_INODE_BUFS
;
1190 * Re-set the XDIRTY flag if some of the inode's in-memory records
1191 * could not be flushed.
1193 if (RB_ROOT(&ip
->rec_tree
))
1194 ip
->flags
|= HAMMER_INODE_XDIRTY
;
1197 * Do not lose track of inodes which no longer have vnode
1198 * assocations, otherwise they may never get flushed again.
1200 if ((ip
->flags
& HAMMER_INODE_MODMASK
) && ip
->vp
== NULL
)
1201 ip
->flags
|= HAMMER_INODE_REFLUSH
;
1204 * Adjust flush_state. The target state (idle or setup) shouldn't
1205 * be terribly important since we will reflush if we really need
1206 * to do anything. XXX
1208 if (TAILQ_EMPTY(&ip
->target_list
) && RB_EMPTY(&ip
->rec_tree
)) {
1209 ip
->flush_state
= HAMMER_FST_IDLE
;
1212 ip
->flush_state
= HAMMER_FST_SETUP
;
1216 * Clean up the vnode ref
1218 if (ip
->flags
& HAMMER_INODE_VHELD
) {
1219 ip
->flags
&= ~HAMMER_INODE_VHELD
;
1224 * If the frontend made more changes and requested another flush,
1225 * then try to get it running.
1227 if (ip
->flags
& HAMMER_INODE_REFLUSH
) {
1228 ip
->flags
&= ~HAMMER_INODE_REFLUSH
;
1229 if (ip
->flags
& HAMMER_INODE_RESIGNAL
) {
1230 ip
->flags
&= ~HAMMER_INODE_RESIGNAL
;
1231 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
1233 hammer_flush_inode(ip
, 0);
1238 * Finally, if the frontend is waiting for a flush to complete,
1241 if (ip
->flush_state
!= HAMMER_FST_FLUSH
) {
1242 if (ip
->flags
& HAMMER_INODE_FLUSHW
) {
1243 ip
->flags
&= ~HAMMER_INODE_FLUSHW
;
1248 hammer_rel_inode(ip
, 0);
1252 * Called from hammer_sync_inode() to synchronize in-memory records
1256 hammer_sync_record_callback(hammer_record_t record
, void *data
)
1258 hammer_cursor_t cursor
= data
;
1259 hammer_transaction_t trans
= cursor
->trans
;
1263 * Skip records that do not belong to the current flush.
1265 if (record
->flush_state
!= HAMMER_FST_FLUSH
)
1267 KKASSERT((record
->flags
& HAMMER_RECF_DELETED_BE
) == 0);
1269 if (record
->flush_group
!= record
->ip
->flush_group
) {
1270 kprintf("sync_record %p ip %p bad flush group %d %d\n", record
, record
->ip
, record
->flush_group
,record
->ip
->flush_group
);
1275 KKASSERT(record
->flush_group
== record
->ip
->flush_group
);
1278 * Interlock the record using the BE flag. Once BE is set the
1279 * frontend cannot change the state of FE.
1281 * NOTE: If FE is set prior to us setting BE we still sync the
1282 * record out, but the flush completion code converts it to
1283 * a delete-on-disk record instead of destroying it.
1285 KKASSERT((record
->flags
& HAMMER_RECF_INTERLOCK_BE
) == 0);
1286 record
->flags
|= HAMMER_RECF_INTERLOCK_BE
;
1289 * If the whole inode is being deleting all on-disk records will
1290 * be deleted very soon, we can't sync any new records to disk
1291 * because they will be deleted in the same transaction they were
1292 * created in (delete_tid == create_tid), which will assert.
1294 * XXX There may be a case with RECORD_ADD with DELETED_FE set
1295 * that we currently panic on.
1297 if (record
->ip
->sync_flags
& HAMMER_INODE_DELETING
) {
1298 switch(record
->type
) {
1299 case HAMMER_MEM_RECORD_GENERAL
:
1300 record
->flags
|= HAMMER_RECF_DELETED_FE
;
1301 record
->flags
|= HAMMER_RECF_DELETED_BE
;
1304 case HAMMER_MEM_RECORD_ADD
:
1305 panic("hammer_sync_record_callback: illegal add "
1306 "during inode deletion record %p", record
);
1307 break; /* NOT REACHED */
1308 case HAMMER_MEM_RECORD_INODE
:
1309 panic("hammer_sync_record_callback: attempt to "
1310 "sync inode record %p?", record
);
1311 break; /* NOT REACHED */
1312 case HAMMER_MEM_RECORD_DEL
:
1314 * Follow through and issue the on-disk deletion
1321 * If DELETED_FE is set we may have already sent dependant pieces
1322 * to the disk and we must flush the record as if it hadn't been
1323 * deleted. This creates a bit of a mess because we have to
1324 * have ip_sync_record convert the record to MEM_RECORD_DEL before
1325 * it inserts the B-Tree record. Otherwise the media sync might
1326 * be visible to the frontend.
1328 if (record
->flags
& HAMMER_RECF_DELETED_FE
) {
1329 if (record
->type
== HAMMER_MEM_RECORD_ADD
) {
1330 record
->flags
|= HAMMER_RECF_CONVERT_DELETE
;
1332 KKASSERT(record
->type
!= HAMMER_MEM_RECORD_DEL
);
1338 * Assign the create_tid for new records. Deletions already
1339 * have the record's entire key properly set up.
1341 if (record
->type
!= HAMMER_MEM_RECORD_DEL
)
1342 record
->leaf
.base
.create_tid
= trans
->tid
;
1344 error
= hammer_ip_sync_record_cursor(cursor
, record
);
1345 if (error
!= EDEADLK
)
1347 hammer_done_cursor(cursor
);
1348 error
= hammer_init_cursor(trans
, cursor
, &record
->ip
->cache
[0],
1353 record
->flags
&= ~HAMMER_RECF_CONVERT_DELETE
;
1357 if (error
!= -ENOSPC
) {
1358 kprintf("hammer_sync_record_callback: sync failed rec "
1359 "%p, error %d\n", record
, error
);
1360 Debugger("sync failed rec");
1364 hammer_flush_record_done(record
, error
);
1369 * XXX error handling
1372 hammer_sync_inode(hammer_inode_t ip
)
1374 struct hammer_transaction trans
;
1375 struct hammer_cursor cursor
;
1377 hammer_record_t depend
;
1378 hammer_record_t next
;
1379 int error
, tmp_error
;
1382 if ((ip
->sync_flags
& HAMMER_INODE_MODMASK
) == 0)
1385 hammer_start_transaction_fls(&trans
, ip
->hmp
);
1386 error
= hammer_init_cursor(&trans
, &cursor
, &ip
->cache
[0], ip
);
1391 * Any directory records referencing this inode which are not in
1392 * our current flush group must adjust our nlink count for the
1393 * purposes of synchronization to disk.
1395 * Records which are in our flush group can be unlinked from our
1396 * inode now, potentially allowing the inode to be physically
1399 nlinks
= ip
->ino_data
.nlinks
;
1400 next
= TAILQ_FIRST(&ip
->target_list
);
1401 while ((depend
= next
) != NULL
) {
1402 next
= TAILQ_NEXT(depend
, target_entry
);
1403 if (depend
->flush_state
== HAMMER_FST_FLUSH
&&
1404 depend
->flush_group
== ip
->hmp
->flusher_act
) {
1406 * If this is an ADD that was deleted by the frontend
1407 * the frontend nlinks count will have already been
1408 * decremented, but the backend is going to sync its
1409 * directory entry and must account for it. The
1410 * record will be converted to a delete-on-disk when
1413 * If the ADD was not deleted by the frontend we
1414 * can remove the dependancy from our target_list.
1416 if (depend
->flags
& HAMMER_RECF_DELETED_FE
) {
1419 TAILQ_REMOVE(&ip
->target_list
, depend
,
1421 depend
->target_ip
= NULL
;
1423 } else if ((depend
->flags
& HAMMER_RECF_DELETED_FE
) == 0) {
1425 * Not part of our flush group
1427 KKASSERT((depend
->flags
& HAMMER_RECF_DELETED_BE
) == 0);
1428 switch(depend
->type
) {
1429 case HAMMER_MEM_RECORD_ADD
:
1432 case HAMMER_MEM_RECORD_DEL
:
1442 * Set dirty if we had to modify the link count.
1444 if (ip
->sync_ino_data
.nlinks
!= nlinks
) {
1445 KKASSERT((int64_t)nlinks
>= 0);
1446 ip
->sync_ino_data
.nlinks
= nlinks
;
1447 ip
->sync_flags
|= HAMMER_INODE_DDIRTY
;
1451 * Queue up as many dirty buffers as we can then set a flag to
1452 * cause any further BIOs to go to the alternative queue.
1454 if (ip
->flags
& HAMMER_INODE_VHELD
)
1455 error
= vfsync(ip
->vp
, MNT_NOWAIT
, 1, NULL
, NULL
);
1456 ip
->flags
|= HAMMER_INODE_WRITE_ALT
;
1459 * The buffer cache may contain dirty buffers beyond the inode
1460 * state we copied from the frontend to the backend. Because
1461 * we are syncing our buffer cache on the backend, resync
1462 * the truncation point and the file size so we don't wipe out
1465 * Syncing the buffer cache on the frontend has serious problems
1466 * because it prevents us from passively queueing dirty inodes
1467 * to the backend (the BIO's could stall indefinitely).
1469 if (ip
->flags
& HAMMER_INODE_TRUNCATED
) {
1470 ip
->sync_trunc_off
= ip
->trunc_off
;
1471 ip
->sync_flags
|= HAMMER_INODE_TRUNCATED
;
1473 if (ip
->sync_ino_data
.size
!= ip
->ino_data
.size
) {
1474 ip
->sync_ino_data
.size
= ip
->ino_data
.size
;
1475 ip
->sync_flags
|= HAMMER_INODE_DDIRTY
;
1479 * If there is a trunction queued destroy any data past the (aligned)
1480 * truncation point. Userland will have dealt with the buffer
1481 * containing the truncation point for us.
1483 * We don't flush pending frontend data buffers until after we've
1484 * dealth with the truncation.
1486 * Don't bother if the inode is or has been deleted.
1488 if (ip
->sync_flags
& HAMMER_INODE_TRUNCATED
) {
1490 * Interlock trunc_off. The VOP front-end may continue to
1491 * make adjustments to it while we are blocked.
1494 off_t aligned_trunc_off
;
1496 trunc_off
= ip
->sync_trunc_off
;
1497 aligned_trunc_off
= (trunc_off
+ HAMMER_BUFMASK
) &
1501 * Delete any whole blocks on-media. The front-end has
1502 * already cleaned out any partial block and made it
1503 * pending. The front-end may have updated trunc_off
1504 * while we were blocked so do not just unconditionally
1505 * set it to the maximum offset.
1507 error
= hammer_ip_delete_range(&cursor
, ip
,
1509 0x7FFFFFFFFFFFFFFFLL
);
1511 Debugger("hammer_ip_delete_range errored");
1512 ip
->sync_flags
&= ~HAMMER_INODE_TRUNCATED
;
1513 if (ip
->trunc_off
>= trunc_off
) {
1514 ip
->trunc_off
= 0x7FFFFFFFFFFFFFFFLL
;
1515 ip
->flags
&= ~HAMMER_INODE_TRUNCATED
;
1522 * Now sync related records. These will typically be directory
1523 * entries or delete-on-disk records.
1525 * Not all records will be flushed, but clear XDIRTY anyway. We
1526 * will set it again in the frontend hammer_flush_inode_done()
1527 * if records remain.
1530 tmp_error
= RB_SCAN(hammer_rec_rb_tree
, &ip
->rec_tree
, NULL
,
1531 hammer_sync_record_callback
, &cursor
);
1536 if (RB_EMPTY(&ip
->rec_tree
))
1537 ip
->sync_flags
&= ~HAMMER_INODE_XDIRTY
;
1541 * If we are deleting the inode the frontend had better not have
1542 * any active references on elements making up the inode.
1544 if (error
== 0 && ip
->sync_ino_data
.nlinks
== 0 &&
1545 RB_EMPTY(&ip
->rec_tree
) &&
1546 (ip
->sync_flags
& HAMMER_INODE_DELETING
) &&
1547 (ip
->flags
& HAMMER_INODE_DELETED
) == 0) {
1551 ip
->flags
|= HAMMER_INODE_DELETED
;
1552 error
= hammer_ip_delete_range_all(&cursor
, ip
, &count1
);
1554 ip
->sync_flags
&= ~HAMMER_INODE_DELETING
;
1555 ip
->sync_flags
&= ~HAMMER_INODE_TRUNCATED
;
1556 KKASSERT(RB_EMPTY(&ip
->rec_tree
));
1559 * Set delete_tid in both the frontend and backend
1560 * copy of the inode record. The DELETED flag handles
1561 * this, do not set RDIRTY.
1563 ip
->ino_leaf
.base
.delete_tid
= trans
.tid
;
1564 ip
->sync_ino_leaf
.base
.delete_tid
= trans
.tid
;
1567 * Adjust the inode count in the volume header
1569 if (ip
->flags
& HAMMER_INODE_ONDISK
) {
1570 hammer_modify_volume_field(&trans
,
1573 --ip
->hmp
->rootvol
->ondisk
->vol0_stat_inodes
;
1574 hammer_modify_volume_done(trans
.rootvol
);
1577 ip
->flags
&= ~HAMMER_INODE_DELETED
;
1578 Debugger("hammer_ip_delete_range_all errored");
1583 * Flush any queued BIOs. These will just biodone() the IO's if
1584 * the inode has been deleted.
1586 while ((bio
= TAILQ_FIRST(&ip
->bio_list
)) != NULL
) {
1587 TAILQ_REMOVE(&ip
->bio_list
, bio
, bio_act
);
1588 tmp_error
= hammer_dowrite(&cursor
, ip
, bio
);
1592 ip
->sync_flags
&= ~HAMMER_INODE_BUFS
;
1595 Debugger("RB_SCAN errored");
1598 * Now update the inode's on-disk inode-data and/or on-disk record.
1599 * DELETED and ONDISK are managed only in ip->flags.
1601 switch(ip
->flags
& (HAMMER_INODE_DELETED
| HAMMER_INODE_ONDISK
)) {
1602 case HAMMER_INODE_DELETED
|HAMMER_INODE_ONDISK
:
1604 * If deleted and on-disk, don't set any additional flags.
1605 * the delete flag takes care of things.
1607 * Clear flags which may have been set by the frontend.
1609 ip
->sync_flags
&= ~(HAMMER_INODE_DDIRTY
|
1610 HAMMER_INODE_XDIRTY
|HAMMER_INODE_ITIMES
|
1611 HAMMER_INODE_DELETING
);
1613 case HAMMER_INODE_DELETED
:
1615 * Take care of the case where a deleted inode was never
1616 * flushed to the disk in the first place.
1618 * Clear flags which may have been set by the frontend.
1620 ip
->sync_flags
&= ~(HAMMER_INODE_DDIRTY
|
1621 HAMMER_INODE_XDIRTY
|HAMMER_INODE_ITIMES
|
1622 HAMMER_INODE_DELETING
);
1623 while (RB_ROOT(&ip
->rec_tree
)) {
1624 hammer_record_t record
= RB_ROOT(&ip
->rec_tree
);
1625 hammer_ref(&record
->lock
);
1626 KKASSERT(record
->lock
.refs
== 1);
1627 record
->flags
|= HAMMER_RECF_DELETED_FE
;
1628 record
->flags
|= HAMMER_RECF_DELETED_BE
;
1629 hammer_rel_mem_record(record
);
1632 case HAMMER_INODE_ONDISK
:
1634 * If already on-disk, do not set any additional flags.
1639 * If not on-disk and not deleted, set both dirty flags
1640 * to force an initial record to be written. Also set
1641 * the create_tid for the inode.
1643 * Set create_tid in both the frontend and backend
1644 * copy of the inode record.
1646 ip
->ino_leaf
.base
.create_tid
= trans
.tid
;
1647 ip
->sync_ino_leaf
.base
.create_tid
= trans
.tid
;
1648 ip
->sync_flags
|= HAMMER_INODE_DDIRTY
;
1653 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1654 * is already on-disk the old record is marked as deleted.
1656 * If DELETED is set hammer_update_inode() will delete the existing
1657 * record without writing out a new one.
1659 * If *ONLY* the ITIMES flag is set we can update the record in-place.
1661 if (ip
->flags
& HAMMER_INODE_DELETED
) {
1662 error
= hammer_update_inode(&cursor
, ip
);
1664 if ((ip
->sync_flags
& (HAMMER_INODE_DDIRTY
| HAMMER_INODE_ITIMES
)) ==
1665 HAMMER_INODE_ITIMES
) {
1666 error
= hammer_update_itimes(&cursor
, ip
);
1668 if (ip
->sync_flags
& (HAMMER_INODE_DDIRTY
| HAMMER_INODE_ITIMES
)) {
1669 error
= hammer_update_inode(&cursor
, ip
);
1672 Debugger("hammer_update_itimes/inode errored");
1675 * Save the TID we used to sync the inode with to make sure we
1676 * do not improperly reuse it.
1678 hammer_done_cursor(&cursor
);
1679 hammer_done_transaction(&trans
);
1684 * This routine is called when the OS is no longer actively referencing
1685 * the inode (but might still be keeping it cached), or when releasing
1686 * the last reference to an inode.
1688 * At this point if the inode's nlinks count is zero we want to destroy
1689 * it, which may mean destroying it on-media too.
1692 hammer_inode_unloadable_check(hammer_inode_t ip
, int getvp
)
1697 * Set the DELETING flag when the link count drops to 0 and the
1698 * OS no longer has any opens on the inode.
1700 * The backend will clear DELETING (a mod flag) and set DELETED
1701 * (a state flag) when it is actually able to perform the
1704 if (ip
->ino_data
.nlinks
== 0 &&
1705 (ip
->flags
& (HAMMER_INODE_DELETING
|HAMMER_INODE_DELETED
)) == 0) {
1706 ip
->flags
|= HAMMER_INODE_DELETING
;
1707 ip
->flags
|= HAMMER_INODE_TRUNCATED
;
1711 if (hammer_get_vnode(ip
, &vp
) != 0)
1715 vtruncbuf(ip
->vp
, 0, HAMMER_BUFSIZE
);
1716 vnode_pager_setsize(ip
->vp
, 0);
1725 * Re-test an inode when a dependancy had gone away to see if we
1726 * can chain flush it.
1729 hammer_test_inode(hammer_inode_t ip
)
1731 if (ip
->flags
& HAMMER_INODE_REFLUSH
) {
1732 ip
->flags
&= ~HAMMER_INODE_REFLUSH
;
1733 hammer_ref(&ip
->lock
);
1734 if (ip
->flags
& HAMMER_INODE_RESIGNAL
) {
1735 ip
->flags
&= ~HAMMER_INODE_RESIGNAL
;
1736 hammer_flush_inode(ip
, HAMMER_FLUSH_SIGNAL
);
1738 hammer_flush_inode(ip
, 0);
1740 hammer_rel_inode(ip
, 0);