2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.42 2008/04/27 21:07:15 dillon Exp $
41 static int hammer_unload_inode(struct hammer_inode
*ip
);
42 static void hammer_flush_inode_copysync(hammer_inode_t ip
);
43 static int hammer_mark_record_callback(hammer_record_t rec
, void *data
);
46 * The kernel is not actively referencing this vnode but is still holding
49 * This is called from the frontend.
52 hammer_vop_inactive(struct vop_inactive_args
*ap
)
54 struct hammer_inode
*ip
= VTOI(ap
->a_vp
);
65 * If the inode no longer has any references we recover its
66 * in-memory resources immediately.
68 * NOTE: called from frontend, use ino_rec instead of sync_ino_rec.
70 if (ip
->ino_rec
.ino_nlinks
== 0)
76 * Release the vnode association. This is typically (but not always)
77 * the last reference on the inode and will flush the inode to the
80 * XXX Currently our sync code only runs through inodes with vnode
81 * associations, so we depend on hammer_rel_inode() to sync any inode
82 * record data to the block device prior to losing the association.
83 * Otherwise transactions that the user expected to be distinct by
84 * doing a manual sync may be merged.
87 hammer_vop_reclaim(struct vop_reclaim_args
*ap
)
89 struct hammer_inode
*ip
;
94 if ((ip
= vp
->v_data
) != NULL
) {
99 * Don't let too many dependancies build up on unreferenced
100 * inodes or we could run ourselves out of memory.
102 if (TAILQ_FIRST(&ip
->depend_list
)) {
103 ip
->hmp
->reclaim_count
+= ip
->depend_count
;
104 if (ip
->hmp
->reclaim_count
> 256) {
105 ip
->hmp
->reclaim_count
= 0;
106 hammer_flusher_async(ip
->hmp
);
109 hammer_rel_inode(ip
, 1);
115 * Return a locked vnode for the specified inode. The inode must be
116 * referenced but NOT LOCKED on entry and will remain referenced on
119 * Called from the frontend.
122 hammer_get_vnode(struct hammer_inode
*ip
, int lktype
, struct vnode
**vpp
)
128 if ((vp
= ip
->vp
) == NULL
) {
129 error
= getnewvnode(VT_HAMMER
, ip
->hmp
->mp
, vpp
, 0, 0);
132 hammer_lock_ex(&ip
->lock
);
133 if (ip
->vp
!= NULL
) {
134 hammer_unlock(&ip
->lock
);
139 hammer_ref(&ip
->lock
);
142 vp
->v_type
= hammer_get_vnode_type(
143 ip
->ino_rec
.base
.base
.obj_type
);
145 switch(ip
->ino_rec
.base
.base
.obj_type
) {
146 case HAMMER_OBJTYPE_CDEV
:
147 case HAMMER_OBJTYPE_BDEV
:
148 vp
->v_ops
= &ip
->hmp
->mp
->mnt_vn_spec_ops
;
149 addaliasu(vp
, ip
->ino_data
.rmajor
,
150 ip
->ino_data
.rminor
);
152 case HAMMER_OBJTYPE_FIFO
:
153 vp
->v_ops
= &ip
->hmp
->mp
->mnt_vn_fifo_ops
;
160 * Only mark as the root vnode if the ip is not
161 * historical, otherwise the VFS cache will get
162 * confused. The other half of the special handling
163 * is in hammer_vop_nlookupdotdot().
165 if (ip
->obj_id
== HAMMER_OBJID_ROOT
&&
166 ip
->obj_asof
== ip
->hmp
->asof
) {
170 vp
->v_data
= (void *)ip
;
171 /* vnode locked by getnewvnode() */
172 /* make related vnode dirty if inode dirty? */
173 hammer_unlock(&ip
->lock
);
174 if (vp
->v_type
== VREG
)
175 vinitvmio(vp
, ip
->ino_rec
.ino_size
);
180 * loop if the vget fails (aka races), or if the vp
181 * no longer matches ip->vp.
183 if (vget(vp
, LK_EXCLUSIVE
) == 0) {
194 * Acquire a HAMMER inode. The returned inode is not locked. These functions
195 * do not attach or detach the related vnode (use hammer_get_vnode() for
198 * The flags argument is only applied for newly created inodes, and only
199 * certain flags are inherited.
201 * Called from the frontend.
203 struct hammer_inode
*
204 hammer_get_inode(hammer_transaction_t trans
, struct hammer_node
**cache
,
205 u_int64_t obj_id
, hammer_tid_t asof
, int flags
, int *errorp
)
207 hammer_mount_t hmp
= trans
->hmp
;
208 struct hammer_inode_info iinfo
;
209 struct hammer_cursor cursor
;
210 struct hammer_inode
*ip
;
213 * Determine if we already have an inode cached. If we do then
216 iinfo
.obj_id
= obj_id
;
217 iinfo
.obj_asof
= asof
;
219 ip
= hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp
->rb_inos_root
, &iinfo
);
221 hammer_ref(&ip
->lock
);
226 ip
= kmalloc(sizeof(*ip
), M_HAMMER
, M_WAITOK
|M_ZERO
);
227 ++hammer_count_inodes
;
229 ip
->obj_asof
= iinfo
.obj_asof
;
231 ip
->flags
= flags
& HAMMER_INODE_RO
;
233 ip
->flags
|= HAMMER_INODE_RO
;
234 ip
->trunc_off
= 0x7FFFFFFFFFFFFFFFLL
;
235 RB_INIT(&ip
->rec_tree
);
236 TAILQ_INIT(&ip
->bio_list
);
237 TAILQ_INIT(&ip
->bio_alt_list
);
238 TAILQ_INIT(&ip
->depend_list
);
241 * Locate the on-disk inode.
244 hammer_init_cursor(trans
, &cursor
, cache
);
245 cursor
.key_beg
.obj_id
= ip
->obj_id
;
246 cursor
.key_beg
.key
= 0;
247 cursor
.key_beg
.create_tid
= 0;
248 cursor
.key_beg
.delete_tid
= 0;
249 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
250 cursor
.key_beg
.obj_type
= 0;
251 cursor
.asof
= iinfo
.obj_asof
;
252 cursor
.flags
= HAMMER_CURSOR_GET_RECORD
| HAMMER_CURSOR_GET_DATA
|
255 *errorp
= hammer_btree_lookup(&cursor
);
256 if (*errorp
== EDEADLK
) {
257 hammer_done_cursor(&cursor
);
262 * On success the B-Tree lookup will hold the appropriate
263 * buffer cache buffers and provide a pointer to the requested
264 * information. Copy the information to the in-memory inode
265 * and cache the B-Tree node to improve future operations.
268 ip
->ino_rec
= cursor
.record
->inode
;
269 ip
->ino_data
= cursor
.data
->inode
;
270 hammer_cache_node(cursor
.node
, &ip
->cache
[0]);
272 hammer_cache_node(cursor
.node
, cache
);
276 * On success load the inode's record and data and insert the
277 * inode into the B-Tree. It is possible to race another lookup
278 * insertion of the same inode so deal with that condition too.
280 * The cursor's locked node interlocks against others creating and
281 * destroying ip while we were blocked.
284 hammer_ref(&ip
->lock
);
285 if (RB_INSERT(hammer_ino_rb_tree
, &hmp
->rb_inos_root
, ip
)) {
286 hammer_uncache_node(&ip
->cache
[0]);
287 hammer_uncache_node(&ip
->cache
[1]);
288 KKASSERT(ip
->lock
.refs
== 1);
289 --hammer_count_inodes
;
291 hammer_done_cursor(&cursor
);
294 ip
->flags
|= HAMMER_INODE_ONDISK
;
296 --hammer_count_inodes
;
300 hammer_done_cursor(&cursor
);
305 * Create a new filesystem object, returning the inode in *ipp. The
306 * returned inode will be referenced and also marked HAMMER_INODE_NEW,
307 * preventing it from being synchronized too early. The caller must
308 * call hammer_finalize_inode() to make it available for media sync.
310 * The inode is created in-memory.
313 hammer_create_inode(hammer_transaction_t trans
, struct vattr
*vap
,
314 struct ucred
*cred
, hammer_inode_t dip
,
315 struct hammer_inode
**ipp
)
322 ip
= kmalloc(sizeof(*ip
), M_HAMMER
, M_WAITOK
|M_ZERO
);
323 ++hammer_count_inodes
;
324 ip
->obj_id
= hammer_alloc_tid(trans
);
325 KKASSERT(ip
->obj_id
!= 0);
326 ip
->obj_asof
= hmp
->asof
;
328 ip
->flush_state
= HAMMER_FST_IDLE
;
329 ip
->flags
= HAMMER_INODE_DDIRTY
| HAMMER_INODE_RDIRTY
|
331 ip
->flags
|= HAMMER_INODE_NEW
;
333 ip
->trunc_off
= 0x7FFFFFFFFFFFFFFFLL
;
334 RB_INIT(&ip
->rec_tree
);
335 TAILQ_INIT(&ip
->bio_list
);
336 TAILQ_INIT(&ip
->bio_alt_list
);
337 TAILQ_INIT(&ip
->depend_list
);
339 ip
->ino_rec
.ino_atime
= trans
->time
;
340 ip
->ino_rec
.ino_mtime
= trans
->time
;
341 ip
->ino_rec
.ino_size
= 0;
342 ip
->ino_rec
.ino_nlinks
= 0;
344 ip
->ino_rec
.base
.base
.btype
= HAMMER_BTREE_TYPE_RECORD
;
345 ip
->ino_rec
.base
.base
.obj_id
= ip
->obj_id
;
346 ip
->ino_rec
.base
.base
.key
= 0;
347 ip
->ino_rec
.base
.base
.create_tid
= 0;
348 ip
->ino_rec
.base
.base
.delete_tid
= 0;
349 ip
->ino_rec
.base
.base
.rec_type
= HAMMER_RECTYPE_INODE
;
350 ip
->ino_rec
.base
.base
.obj_type
= hammer_get_obj_type(vap
->va_type
);
352 ip
->ino_data
.version
= HAMMER_INODE_DATA_VERSION
;
353 ip
->ino_data
.mode
= vap
->va_mode
;
354 ip
->ino_data
.ctime
= trans
->time
;
355 ip
->ino_data
.parent_obj_id
= (dip
) ? dip
->ino_rec
.base
.base
.obj_id
: 0;
357 switch(ip
->ino_rec
.base
.base
.obj_type
) {
358 case HAMMER_OBJTYPE_CDEV
:
359 case HAMMER_OBJTYPE_BDEV
:
360 ip
->ino_data
.rmajor
= vap
->va_rmajor
;
361 ip
->ino_data
.rminor
= vap
->va_rminor
;
368 * Calculate default uid/gid and overwrite with information from
371 xuid
= hammer_to_unix_xid(&dip
->ino_data
.uid
);
372 ip
->ino_data
.gid
= dip
->ino_data
.gid
;
373 xuid
= vop_helper_create_uid(hmp
->mp
, dip
->ino_data
.mode
, xuid
, cred
,
375 ip
->ino_data
.mode
= vap
->va_mode
;
377 if (vap
->va_vaflags
& VA_UID_UUID_VALID
)
378 ip
->ino_data
.uid
= vap
->va_uid_uuid
;
379 else if (vap
->va_uid
!= (uid_t
)VNOVAL
)
380 hammer_guid_to_uuid(&ip
->ino_data
.uid
, xuid
);
381 if (vap
->va_vaflags
& VA_GID_UUID_VALID
)
382 ip
->ino_data
.gid
= vap
->va_gid_uuid
;
383 else if (vap
->va_gid
!= (gid_t
)VNOVAL
)
384 hammer_guid_to_uuid(&ip
->ino_data
.gid
, vap
->va_gid
);
386 hammer_ref(&ip
->lock
);
387 if (RB_INSERT(hammer_ino_rb_tree
, &hmp
->rb_inos_root
, ip
)) {
388 hammer_unref(&ip
->lock
);
389 panic("hammer_create_inode: duplicate obj_id %llx", ip
->obj_id
);
396 * Finalize a newly created inode, allowing it to be synchronized to the
397 * media. If an error occured make sure the inode has been cleaned up and
398 * will not be synchronized to the media.
401 hammer_finalize_inode(hammer_transaction_t trans
, hammer_inode_t ip
, int error
)
404 ip
->flags
&= ~HAMMER_INODE_MODMASK
;
406 KASSERT(ip
->lock
.refs
== 1,
407 ("hammer_unload_inode: %d refs\n", ip
->lock
.refs
));
408 KKASSERT(ip
->vp
== NULL
);
409 KKASSERT(ip
->flush_state
== HAMMER_FST_IDLE
);
410 KKASSERT(ip
->cursor_ip_refs
== 0);
411 KKASSERT((ip
->flags
& HAMMER_INODE_MODMASK
) == 0);
413 KKASSERT(RB_EMPTY(&ip
->rec_tree
));
414 KKASSERT(TAILQ_EMPTY(&ip
->bio_list
));
415 KKASSERT(TAILQ_EMPTY(&ip
->bio_alt_list
));
417 ip
->flags
&= ~HAMMER_INODE_NEW
;
421 * Called by hammer_sync_inode().
424 hammer_update_inode(hammer_transaction_t trans
, hammer_inode_t ip
)
426 struct hammer_cursor cursor
;
427 hammer_record_t record
;
431 * Locate the record on-disk and mark it as deleted. Both the B-Tree
432 * node and the record must be marked deleted. The record may or
433 * may not be physically deleted, depending on the retention policy.
435 * If the inode has already been deleted on-disk we have nothing
438 * XXX Update the inode record and data in-place if the retention
444 if ((ip
->flags
& (HAMMER_INODE_ONDISK
|HAMMER_INODE_DELONDISK
)) ==
445 HAMMER_INODE_ONDISK
) {
446 hammer_init_cursor(trans
, &cursor
, &ip
->cache
[0]);
447 cursor
.key_beg
.obj_id
= ip
->obj_id
;
448 cursor
.key_beg
.key
= 0;
449 cursor
.key_beg
.create_tid
= 0;
450 cursor
.key_beg
.delete_tid
= 0;
451 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
452 cursor
.key_beg
.obj_type
= 0;
453 cursor
.asof
= ip
->obj_asof
;
454 cursor
.flags
|= HAMMER_CURSOR_GET_RECORD
| HAMMER_CURSOR_ASOF
;
455 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
457 error
= hammer_btree_lookup(&cursor
);
459 kprintf("error %d\n", error
);
460 Debugger("hammer_update_inode");
465 error
= hammer_ip_delete_record(&cursor
, trans
->tid
);
466 if (error
&& error
!= EDEADLK
) {
467 kprintf("error %d\n", error
);
468 Debugger("hammer_update_inode2");
471 ip
->flags
|= HAMMER_INODE_DELONDISK
;
472 hammer_cache_node(cursor
.node
, &ip
->cache
[0]);
474 hammer_done_cursor(&cursor
);
475 if (error
== EDEADLK
)
480 * Write out a new record if the in-memory inode is not marked
481 * as having been deleted. Update our inode statistics if this
482 * is the first application of the inode on-disk.
484 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK
485 * will remain set and prevent further updates.
487 if (error
== 0 && (ip
->flags
& HAMMER_INODE_DELETED
) == 0) {
488 record
= hammer_alloc_mem_record(ip
);
489 record
->state
= HAMMER_FST_FLUSH
;
490 record
->rec
.inode
= ip
->sync_ino_rec
;
491 record
->rec
.inode
.base
.base
.create_tid
= trans
->tid
;
492 record
->rec
.inode
.base
.data_len
= sizeof(ip
->sync_ino_data
);
493 record
->data
= (void *)&ip
->sync_ino_data
;
494 record
->flags
|= HAMMER_RECF_INTERLOCK_BE
;
495 error
= hammer_ip_sync_record(trans
, record
);
497 kprintf("error %d\n", error
);
498 Debugger("hammer_update_inode3");
502 * The record isn't managed by the inode's record tree,
503 * destroy it whether we succeed or fail.
505 record
->flags
&= ~HAMMER_RECF_INTERLOCK_BE
;
506 record
->flags
|= HAMMER_RECF_DELETED_FE
;
507 record
->state
= HAMMER_FST_IDLE
;
508 KKASSERT(TAILQ_FIRST(&record
->depend_list
) == NULL
);
509 hammer_rel_mem_record(record
);
512 ip
->sync_flags
&= ~(HAMMER_INODE_RDIRTY
|
513 HAMMER_INODE_DDIRTY
|
514 HAMMER_INODE_ITIMES
);
515 ip
->flags
&= ~HAMMER_INODE_DELONDISK
;
516 if ((ip
->flags
& HAMMER_INODE_ONDISK
) == 0) {
517 hammer_modify_volume(trans
, trans
->rootvol
,
519 ++ip
->hmp
->rootvol
->ondisk
->vol0_stat_inodes
;
520 hammer_modify_volume_done(trans
->rootvol
);
521 ip
->flags
|= HAMMER_INODE_ONDISK
;
525 if (error
== 0 && (ip
->flags
& HAMMER_INODE_DELETED
)) {
527 * Clean out any left-over flags if the inode has been
530 ip
->sync_flags
&= ~(HAMMER_INODE_RDIRTY
|
531 HAMMER_INODE_DDIRTY
|
532 HAMMER_INODE_ITIMES
);
538 * Update only the itimes fields. This is done no-historically. The
539 * record is updated in-place on the disk.
542 hammer_update_itimes(hammer_transaction_t trans
, hammer_inode_t ip
)
544 struct hammer_cursor cursor
;
545 struct hammer_inode_record
*rec
;
550 if ((ip
->flags
& (HAMMER_INODE_ONDISK
|HAMMER_INODE_DELONDISK
)) ==
551 HAMMER_INODE_ONDISK
) {
552 hammer_init_cursor(trans
, &cursor
, &ip
->cache
[0]);
553 cursor
.key_beg
.obj_id
= ip
->obj_id
;
554 cursor
.key_beg
.key
= 0;
555 cursor
.key_beg
.create_tid
= 0;
556 cursor
.key_beg
.delete_tid
= 0;
557 cursor
.key_beg
.rec_type
= HAMMER_RECTYPE_INODE
;
558 cursor
.key_beg
.obj_type
= 0;
559 cursor
.asof
= ip
->obj_asof
;
560 cursor
.flags
|= HAMMER_CURSOR_GET_RECORD
| HAMMER_CURSOR_ASOF
;
561 cursor
.flags
|= HAMMER_CURSOR_BACKEND
;
563 error
= hammer_btree_lookup(&cursor
);
565 kprintf("error %d\n", error
);
566 Debugger("hammer_update_itimes1");
570 * Do not generate UNDO records for atime/mtime
573 rec
= &cursor
.record
->inode
;
574 hammer_modify_buffer(cursor
.trans
, cursor
.record_buffer
,
576 rec
->ino_atime
= ip
->sync_ino_rec
.ino_atime
;
577 rec
->ino_mtime
= ip
->sync_ino_rec
.ino_mtime
;
578 hammer_modify_buffer_done(cursor
.record_buffer
);
579 ip
->sync_flags
&= ~HAMMER_INODE_ITIMES
;
580 /* XXX recalculate crc */
581 hammer_cache_node(cursor
.node
, &ip
->cache
[0]);
583 hammer_done_cursor(&cursor
);
584 if (error
== EDEADLK
)
591 * Release a reference on an inode. If asked to flush the last release
592 * will flush the inode.
594 * On the last reference we queue the inode to the flusher for its final
598 hammer_rel_inode(struct hammer_inode
*ip
, int flush
)
601 * Handle disposition when dropping the last ref.
603 while (ip
->lock
.refs
== 1) {
604 if ((ip
->flags
& HAMMER_INODE_MODMASK
) == 0) {
605 hammer_unload_inode(ip
);
610 * Hand the inode over to the flusher, which will
611 * add another ref to it.
613 if (++ip
->hmp
->reclaim_count
> 256) {
614 ip
->hmp
->reclaim_count
= 0;
615 hammer_flush_inode(ip
, HAMMER_FLUSH_FORCE
|
616 HAMMER_FLUSH_SIGNAL
);
618 hammer_flush_inode(ip
, HAMMER_FLUSH_FORCE
);
624 * The inode still has multiple refs, drop one ref. If a flush was
625 * requested make sure the flusher sees it. New inodes which have
626 * not been finalized cannot be flushed.
628 if (flush
&& ip
->flush_state
== HAMMER_FST_IDLE
&&
629 (ip
->flags
& HAMMER_INODE_NEW
) == 0) {
630 hammer_flush_inode(ip
, HAMMER_FLUSH_RELEASE
);
632 hammer_unref(&ip
->lock
);
637 * Unload and destroy the specified inode. Must be called with one remaining
638 * reference. The reference is disposed of.
640 * This can only be called in the context of the flusher.
643 hammer_unload_inode(struct hammer_inode
*ip
)
645 KASSERT(ip
->lock
.refs
== 1,
646 ("hammer_unload_inode: %d refs\n", ip
->lock
.refs
));
647 KKASSERT(ip
->vp
== NULL
);
648 KKASSERT(ip
->flush_state
== HAMMER_FST_IDLE
);
649 KKASSERT(ip
->cursor_ip_refs
== 0);
650 KKASSERT((ip
->flags
& HAMMER_INODE_MODMASK
) == 0);
652 KKASSERT(RB_EMPTY(&ip
->rec_tree
));
653 KKASSERT(TAILQ_EMPTY(&ip
->bio_list
));
654 KKASSERT(TAILQ_EMPTY(&ip
->bio_alt_list
));
656 RB_REMOVE(hammer_ino_rb_tree
, &ip
->hmp
->rb_inos_root
, ip
);
658 hammer_uncache_node(&ip
->cache
[0]);
659 hammer_uncache_node(&ip
->cache
[1]);
660 --hammer_count_inodes
;
667 * A transaction has modified an inode, requiring updates as specified by
670 * HAMMER_INODE_RDIRTY: Inode record has been updated
671 * HAMMER_INODE_DDIRTY: Inode data has been updated
672 * HAMMER_INODE_XDIRTY: Dirty frontend buffer cache buffer strategized
673 * HAMMER_INODE_DELETED: Inode record/data must be deleted
674 * HAMMER_INODE_ITIMES: mtime/atime has been updated
677 hammer_modify_inode(hammer_transaction_t trans
, hammer_inode_t ip
, int flags
)
679 KKASSERT ((ip
->flags
& HAMMER_INODE_RO
) == 0 ||
680 (flags
& (HAMMER_INODE_RDIRTY
|HAMMER_INODE_DDIRTY
|
682 HAMMER_INODE_DELETED
|HAMMER_INODE_ITIMES
)) == 0);
688 * Flush an inode. If the inode is already being flushed wait for
689 * it to complete, then flush it again. The interlock is against
690 * front-end transactions, the backend flusher does not hold the lock.
692 * The flusher must distinguish between the records that are part of the
693 * flush and any new records created in parallel with the flush. The
694 * inode data and truncation fields are also copied. BIOs are a bit more
695 * troublesome because some dirty buffers may not have been queued yet.
698 hammer_flush_inode(hammer_inode_t ip
, int flags
)
700 KKASSERT((ip
->flags
& HAMMER_INODE_NEW
) == 0);
701 if (ip
->flush_state
!= HAMMER_FST_IDLE
&&
702 (ip
->flags
& HAMMER_INODE_MODMASK
)) {
703 ip
->flags
|= HAMMER_INODE_REFLUSH
;
704 if (flags
& HAMMER_FLUSH_RELEASE
) {
705 hammer_unref(&ip
->lock
);
706 KKASSERT(ip
->lock
.refs
> 0);
710 if (ip
->flush_state
== HAMMER_FST_IDLE
) {
711 if ((ip
->flags
& HAMMER_INODE_MODMASK
) ||
712 (flags
& HAMMER_FLUSH_FORCE
)) {
714 * Add a reference to represent the inode being queued
715 * to the flusher. If the caller wants us to
716 * release a reference the two cancel each other out.
718 if ((flags
& HAMMER_FLUSH_RELEASE
) == 0)
719 hammer_ref(&ip
->lock
);
721 hammer_flush_inode_copysync(ip
);
723 * Move the inode to the flush list and add a ref to
724 * it representing it on the list.
726 TAILQ_INSERT_TAIL(&ip
->hmp
->flush_list
, ip
, flush_entry
);
727 if (flags
& HAMMER_FLUSH_SIGNAL
)
728 hammer_flusher_async(ip
->hmp
);
734 * Helper routine to copy the frontend synchronization state to the backend.
735 * This routine may be called by either the frontend or the backend.
738 hammer_flush_inode_copysync(hammer_inode_t ip
)
744 * Prevent anyone else from trying to do the same thing.
746 ip
->flush_state
= HAMMER_FST_SETUP
;
749 * Sync the buffer cache. This will queue the BIOs. If called
750 * from the context of the flusher the BIO's are thrown into bio_list
751 * regardless of ip->flush_state.
754 error
= vfsync(ip
->vp
, MNT_NOWAIT
, 1, NULL
, NULL
);
759 * This freezes strategy writes, any further BIOs will be
760 * queued to alt_bio (unless we are
762 ip
->flush_state
= HAMMER_FST_FLUSH
;
765 * Snapshot the state of the inode for the backend flusher.
767 * The truncation must be retained in the frontend until after
768 * we've actually performed the record deletion.
770 ip
->sync_flags
= (ip
->flags
& HAMMER_INODE_MODMASK
);
771 ip
->sync_trunc_off
= ip
->trunc_off
;
772 ip
->sync_ino_rec
= ip
->ino_rec
;
773 ip
->sync_ino_data
= ip
->ino_data
;
774 ip
->flags
&= ~HAMMER_INODE_MODMASK
|
775 HAMMER_INODE_TRUNCATED
| HAMMER_INODE_BUFS
;
778 * Fix up the dirty buffer status.
780 if (ip
->vp
== NULL
|| RB_ROOT(&ip
->vp
->v_rbdirty_tree
) == NULL
)
781 ip
->flags
&= ~HAMMER_INODE_BUFS
;
782 if (TAILQ_FIRST(&ip
->bio_list
))
783 ip
->sync_flags
|= HAMMER_INODE_BUFS
;
785 ip
->sync_flags
&= ~HAMMER_INODE_BUFS
;
788 * Set the state for the inode's in-memory records. If some records
789 * could not be marked for backend flush (i.e. deleted records),
790 * re-set the XDIRTY flag.
792 count
= RB_SCAN(hammer_rec_rb_tree
, &ip
->rec_tree
, NULL
,
793 hammer_mark_record_callback
, NULL
);
795 ip
->flags
|= HAMMER_INODE_XDIRTY
;
799 * Mark records for backend flush, accumulate a count of the number of
800 * records which could not be marked. Records marked for deletion
801 * by the frontend never make it to the media. It is possible for
802 * a record queued to the backend to wind up with FE set after the
803 * fact, as long as BE has not yet been set. The backend deals with
804 * this race by syncing the record as if FE had not been set, and
805 * then converting the record to a delete-on-disk record.
808 hammer_mark_record_callback(hammer_record_t rec
, void *data
)
810 if (rec
->state
== HAMMER_FST_FLUSH
) {
812 } else if ((rec
->flags
& HAMMER_RECF_DELETED_FE
) == 0) {
813 rec
->state
= HAMMER_FST_FLUSH
;
814 hammer_ref(&rec
->lock
);
824 * Wait for a previously queued flush to complete
827 hammer_wait_inode(hammer_inode_t ip
)
829 while (ip
->flush_state
== HAMMER_FST_FLUSH
) {
830 ip
->flags
|= HAMMER_INODE_FLUSHW
;
831 tsleep(&ip
->flags
, 0, "hmrwin", 0);
836 * Called by the backend code when a flush has been completed.
837 * The inode has already been removed from the flush list.
839 * A pipelined flush can occur, in which case we must re-enter the
840 * inode on the list and re-copy its fields.
843 hammer_flush_inode_done(hammer_inode_t ip
)
847 KKASSERT(ip
->flush_state
== HAMMER_FST_FLUSH
);
850 kprintf("ip %p leftover sync_flags %08x\n", ip
, ip
->sync_flags
);
851 ip
->flags
|= ip
->sync_flags
;
852 ip
->flush_state
= HAMMER_FST_IDLE
;
855 * Reflush any BIOs that wound up in the alt list. Our inode will
856 * also wind up at the end of the flusher's list.
858 while ((bio
= TAILQ_FIRST(&ip
->bio_alt_list
)) != NULL
) {
859 TAILQ_REMOVE(&ip
->bio_alt_list
, bio
, bio_act
);
860 TAILQ_INSERT_TAIL(&ip
->bio_list
, bio
, bio_act
);
861 ip
->flags
|= HAMMER_INODE_XDIRTY
;
862 ip
->flags
|= HAMMER_INODE_REFLUSH
;
863 kprintf("rebio %p ip %p @%016llx,%d\n", bio
, ip
, bio
->bio_offset
, bio
->bio_buf
->b_bufsize
);
867 * If the frontend made more changes and requested another flush,
870 if (ip
->flags
& HAMMER_INODE_REFLUSH
) {
871 ip
->flags
&= ~HAMMER_INODE_REFLUSH
;
872 hammer_flush_inode(ip
, 0);
874 if (ip
->flags
& HAMMER_INODE_FLUSHW
) {
875 ip
->flags
&= ~HAMMER_INODE_FLUSHW
;
879 hammer_rel_inode(ip
, 0);
883 * Called from hammer_sync_inode() to synchronize in-memory records
887 hammer_sync_record_callback(hammer_record_t record
, void *data
)
889 hammer_transaction_t trans
= data
;
893 * Skip records that do not belong to the current flush. Records
894 * belonging to the flush will have been referenced for us.
896 if (record
->state
!= HAMMER_FST_FLUSH
)
900 * Interlock the record using the BE flag. Once BE is set the
901 * frontend cannot change the state of FE.
903 * NOTE: If FE is set prior to us setting BE we still sync the
904 * record out, but the flush completion code converts it to
905 * a delete-on-disk record instead of destroying it.
907 hammer_lock_ex(&record
->lock
);
908 if (record
->flags
& HAMMER_RECF_INTERLOCK_BE
) {
909 hammer_unlock(&record
->lock
);
912 record
->flags
|= HAMMER_RECF_INTERLOCK_BE
;
915 * If DELETED_FE is set we may have already sent dependant pieces
916 * to the disk and we must flush the record as if it hadn't been
917 * deleted. This creates a bit of a mess because we have to
918 * have ip_sync_record convert the record to DELETE_ONDISK before
919 * it inserts the B-Tree record. Otherwise the media sync might
920 * be visible to the frontend.
922 if (record
->flags
& HAMMER_RECF_DELETED_FE
)
923 record
->flags
|= HAMMER_RECF_CONVERT_DELETE_ONDISK
;
926 * Assign the create_tid for new records. Deletions already
927 * have the record's entire key properly set up.
929 if ((record
->flags
& HAMMER_RECF_DELETE_ONDISK
) == 0)
930 record
->rec
.inode
.base
.base
.create_tid
= trans
->tid
;
931 error
= hammer_ip_sync_record(trans
, record
);
935 if (error
!= -ENOSPC
) {
936 kprintf("hammer_sync_record_callback: sync failed rec "
937 "%p, error %d\n", record
, error
);
938 Debugger("sync failed rec");
941 hammer_flush_record_done(record
, error
);
949 hammer_sync_inode(hammer_inode_t ip
, int handle_delete
)
951 struct hammer_transaction trans
;
953 hammer_depend_t depend
;
954 int error
, tmp_error
;
956 if ((ip
->sync_flags
& HAMMER_INODE_MODMASK
) == 0 &&
957 handle_delete
== 0) {
961 hammer_start_transaction_fls(&trans
, ip
->hmp
);
964 * Any (directory) records this inode depends on must also be
965 * synchronized. The directory itself only needs to be flushed
966 * if its inode is not already on-disk.
968 while ((depend
= TAILQ_FIRST(&ip
->depend_list
)) != NULL
) {
969 hammer_record_t record
;
971 record
= depend
->record
;
972 TAILQ_REMOVE(&depend
->record
->depend_list
, depend
, rec_entry
);
973 TAILQ_REMOVE(&ip
->depend_list
, depend
, ip_entry
);
975 if (record
->state
!= HAMMER_FST_FLUSH
) {
976 record
->state
= HAMMER_FST_FLUSH
;
977 /* add ref (steal ref from dependancy) */
979 /* remove ref related to dependancy */
980 /* record still has at least one ref from state */
981 hammer_unref(&record
->lock
);
982 KKASSERT(record
->lock
.refs
> 0);
984 if (record
->ip
->flags
& HAMMER_INODE_ONDISK
) {
986 hammer_sync_record_callback(record
, &trans
);
989 KKASSERT((record
->ip
->flags
& HAMMER_INODE_NEW
) == 0);
990 hammer_flush_inode(record
->ip
, 0);
992 hammer_unref(&ip
->lock
);
993 KKASSERT(ip
->lock
.refs
> 0);
994 kfree(depend
, M_HAMMER
);
999 * Sync inode deletions and truncations.
1001 if (ip
->sync_ino_rec
.ino_nlinks
== 0 && handle_delete
&&
1002 (ip
->flags
& HAMMER_INODE_GONE
) == 0) {
1004 * Handle the case where the inode has been completely deleted
1005 * and is no longer referenceable from the filesystem
1008 * NOTE: We do not set the RDIRTY flag when updating the
1009 * delete_tid, setting HAMMER_INODE_DELETED takes care of it.
1012 ip
->flags
|= HAMMER_INODE_GONE
| HAMMER_INODE_DELETED
;
1013 ip
->flags
&= ~HAMMER_INODE_TRUNCATED
;
1014 ip
->sync_flags
&= ~HAMMER_INODE_TRUNCATED
;
1016 vtruncbuf(ip
->vp
, 0, HAMMER_BUFSIZE
);
1017 error
= hammer_ip_delete_range_all(&trans
, ip
);
1019 Debugger("hammer_ip_delete_range_all errored");
1022 * Sanity check. The only records that remain should be
1023 * marked for back-end deletion.
1026 hammer_record_t rec
;
1028 RB_FOREACH(rec
, hammer_rec_rb_tree
, &ip
->rec_tree
) {
1029 KKASSERT(rec
->state
== HAMMER_FST_FLUSH
);
1034 * Set delete_tid in both the frontend and backend
1035 * copy of the inode record.
1037 ip
->ino_rec
.base
.base
.delete_tid
= trans
.tid
;
1038 ip
->sync_ino_rec
.base
.base
.delete_tid
= trans
.tid
;
1041 * Indicate that the inode has/is-being deleted.
1043 ip
->flags
|= HAMMER_NODE_DELETED
;
1044 hammer_modify_inode(&trans
, ip
, HAMMER_INODE_RDIRTY
);
1045 hammer_modify_volume(&trans
, trans
.rootvol
, NULL
, 0);
1046 --ip
->hmp
->rootvol
->ondisk
->vol0_stat_inodes
;
1047 hammer_modify_volume_done(trans
.rootvol
);
1048 } else if (ip
->sync_flags
& HAMMER_INODE_TRUNCATED
) {
1050 * Interlock trunc_off. The VOP front-end may continue to
1051 * make adjustments to it while we are blocked.
1054 off_t aligned_trunc_off
;
1056 trunc_off
= ip
->sync_trunc_off
;
1057 aligned_trunc_off
= (trunc_off
+ HAMMER_BUFMASK
) &
1061 * Delete any whole blocks on-media. The front-end has
1062 * already cleaned out any partial block and made it
1063 * pending. The front-end may have updated trunc_off
1064 * while we were blocked so do not just unconditionally
1065 * set it to the maximum offset.
1067 kprintf("sync truncation range @ %016llx\n", aligned_trunc_off
);
1068 error
= hammer_ip_delete_range(&trans
, ip
,
1070 0x7FFFFFFFFFFFFFFFLL
);
1072 Debugger("hammer_ip_delete_range errored");
1073 ip
->sync_flags
&= ~HAMMER_INODE_TRUNCATED
;
1074 if (ip
->trunc_off
>= trunc_off
) {
1075 ip
->trunc_off
= 0x7FFFFFFFFFFFFFFFLL
;
1076 ip
->flags
&= ~HAMMER_INODE_TRUNCATED
;
1080 error
= 0; /* XXX vfsync used to be here */
1083 * Flush any queued BIOs.
1085 while ((bio
= TAILQ_FIRST(&ip
->bio_list
)) != NULL
) {
1086 TAILQ_REMOVE(&ip
->bio_list
, bio
, bio_act
);
1088 kprintf("dowrite %016llx ip %p bio %p @ %016llx\n", trans
.tid
, ip
, bio
, bio
->bio_offset
);
1090 tmp_error
= hammer_dowrite(&trans
, ip
, bio
);
1094 ip
->sync_flags
&= ~HAMMER_INODE_BUFS
;
1097 * Now sync related records.
1100 tmp_error
= RB_SCAN(hammer_rec_rb_tree
, &ip
->rec_tree
, NULL
,
1101 hammer_sync_record_callback
, &trans
);
1102 KKASSERT(error
<= 0);
1111 * XDIRTY represents rec_tree and bio_list. However, rec_tree may
1112 * contain new front-end records so short of scanning it we can't
1113 * just test whether it is empty or not.
1115 * If no error occured assume we succeeded.
1118 ip
->sync_flags
&= ~HAMMER_INODE_XDIRTY
;
1121 Debugger("RB_SCAN errored");
1124 * Now update the inode's on-disk inode-data and/or on-disk record.
1125 * DELETED and ONDISK are managed only in ip->flags.
1127 switch(ip
->flags
& (HAMMER_INODE_DELETED
| HAMMER_INODE_ONDISK
)) {
1128 case HAMMER_INODE_DELETED
|HAMMER_INODE_ONDISK
:
1130 * If deleted and on-disk, don't set any additional flags.
1131 * the delete flag takes care of things.
1134 case HAMMER_INODE_DELETED
:
1136 * Take care of the case where a deleted inode was never
1137 * flushed to the disk in the first place.
1139 ip
->sync_flags
&= ~(HAMMER_INODE_RDIRTY
|HAMMER_INODE_DDIRTY
|
1140 HAMMER_INODE_XDIRTY
|HAMMER_INODE_ITIMES
);
1141 while (RB_ROOT(&ip
->rec_tree
)) {
1142 hammer_record_t record
= RB_ROOT(&ip
->rec_tree
);
1143 hammer_ref(&record
->lock
);
1144 KKASSERT(record
->lock
.refs
== 1);
1145 record
->flags
|= HAMMER_RECF_DELETED_FE
;
1146 record
->flags
|= HAMMER_RECF_DELETED_BE
;
1147 hammer_cleardep_mem_record(record
);
1148 hammer_rel_mem_record(record
);
1151 case HAMMER_INODE_ONDISK
:
1153 * If already on-disk, do not set any additional flags.
1158 * If not on-disk and not deleted, set both dirty flags
1159 * to force an initial record to be written. Also set
1160 * the create_tid for the inode.
1162 * Set create_tid in both the frontend and backend
1163 * copy of the inode record.
1165 ip
->ino_rec
.base
.base
.create_tid
= trans
.tid
;
1166 ip
->sync_ino_rec
.base
.base
.create_tid
= trans
.tid
;
1167 ip
->sync_flags
|= HAMMER_INODE_RDIRTY
| HAMMER_INODE_DDIRTY
;
1172 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1173 * is already on-disk the old record is marked as deleted.
1175 * If DELETED is set hammer_update_inode() will delete the existing
1176 * record without writing out a new one.
1178 * If *ONLY* the ITIMES flag is set we can update the record in-place.
1180 if (ip
->flags
& HAMMER_INODE_DELETED
) {
1181 error
= hammer_update_inode(&trans
, ip
);
1183 if ((ip
->sync_flags
& (HAMMER_INODE_RDIRTY
| HAMMER_INODE_DDIRTY
|
1184 HAMMER_INODE_ITIMES
)) == HAMMER_INODE_ITIMES
) {
1185 error
= hammer_update_itimes(&trans
, ip
);
1187 if (ip
->sync_flags
& (HAMMER_INODE_RDIRTY
| HAMMER_INODE_DDIRTY
|
1188 HAMMER_INODE_ITIMES
)) {
1189 error
= hammer_update_inode(&trans
, ip
);
1192 Debugger("hammer_update_itimes/inode errored");
1195 * Save the TID we used to sync the inode with to make sure we
1196 * do not improperly reuse it.
1198 hammer_done_transaction(&trans
);