2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.39 2008/04/26 02:54:00 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
48 static void hammer_free_volume(hammer_volume_t volume
);
49 static int hammer_load_volume(hammer_volume_t volume
);
50 static int hammer_load_buffer(hammer_buffer_t buffer
, int isnew
);
51 static int hammer_load_node(hammer_node_t node
);
54 * Red-Black tree support for various structures
57 hammer_ino_rb_compare(hammer_inode_t ip1
, hammer_inode_t ip2
)
59 if (ip1
->obj_id
< ip2
->obj_id
)
61 if (ip1
->obj_id
> ip2
->obj_id
)
63 if (ip1
->obj_asof
< ip2
->obj_asof
)
65 if (ip1
->obj_asof
> ip2
->obj_asof
)
71 hammer_inode_info_cmp(hammer_inode_info_t info
, hammer_inode_t ip
)
73 if (info
->obj_id
< ip
->obj_id
)
75 if (info
->obj_id
> ip
->obj_id
)
77 if (info
->obj_asof
< ip
->obj_asof
)
79 if (info
->obj_asof
> ip
->obj_asof
)
85 hammer_vol_rb_compare(hammer_volume_t vol1
, hammer_volume_t vol2
)
87 if (vol1
->vol_no
< vol2
->vol_no
)
89 if (vol1
->vol_no
> vol2
->vol_no
)
95 hammer_buf_rb_compare(hammer_buffer_t buf1
, hammer_buffer_t buf2
)
97 if (buf1
->zone2_offset
< buf2
->zone2_offset
)
99 if (buf1
->zone2_offset
> buf2
->zone2_offset
)
105 hammer_nod_rb_compare(hammer_node_t node1
, hammer_node_t node2
)
107 if (node1
->node_offset
< node2
->node_offset
)
109 if (node1
->node_offset
> node2
->node_offset
)
115 * Note: The lookup function for hammer_ino_rb_tree winds up being named
116 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
117 * functions are normal, e.g. hammer_buf_rb_tree_RB_LOOKUP(root, zone2_offset).
119 RB_GENERATE(hammer_ino_rb_tree
, hammer_inode
, rb_node
, hammer_ino_rb_compare
);
120 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree
, INFO
, hammer_inode
, rb_node
,
121 hammer_inode_info_cmp
, hammer_inode_info_t
);
122 RB_GENERATE2(hammer_vol_rb_tree
, hammer_volume
, rb_node
,
123 hammer_vol_rb_compare
, int32_t, vol_no
);
124 RB_GENERATE2(hammer_buf_rb_tree
, hammer_buffer
, rb_node
,
125 hammer_buf_rb_compare
, hammer_off_t
, zone2_offset
);
126 RB_GENERATE2(hammer_nod_rb_tree
, hammer_node
, rb_node
,
127 hammer_nod_rb_compare
, hammer_off_t
, node_offset
);
129 /************************************************************************
131 ************************************************************************
133 * Load a HAMMER volume by name. Returns 0 on success or a positive error
134 * code on failure. Volumes must be loaded at mount time, get_volume() will
135 * not load a new volume.
137 * Calls made to hammer_load_volume() or single-threaded
140 hammer_install_volume(struct hammer_mount
*hmp
, const char *volname
)
143 hammer_volume_t volume
;
144 struct hammer_volume_ondisk
*ondisk
;
145 struct nlookupdata nd
;
146 struct buf
*bp
= NULL
;
152 ronly
= ((mp
->mnt_flag
& MNT_RDONLY
) ? 1 : 0);
155 * Allocate a volume structure
157 ++hammer_count_volumes
;
158 volume
= kmalloc(sizeof(*volume
), M_HAMMER
, M_WAITOK
|M_ZERO
);
159 volume
->vol_name
= kstrdup(volname
, M_HAMMER
);
160 hammer_io_init(&volume
->io
, hmp
, HAMMER_STRUCTURE_VOLUME
);
161 volume
->io
.offset
= 0LL;
164 * Get the device vnode
166 error
= nlookup_init(&nd
, volume
->vol_name
, UIO_SYSSPACE
, NLC_FOLLOW
);
168 error
= nlookup(&nd
);
170 error
= cache_vref(&nd
.nl_nch
, nd
.nl_cred
, &volume
->devvp
);
173 if (vn_isdisk(volume
->devvp
, &error
)) {
174 error
= vfs_mountedon(volume
->devvp
);
178 count_udev(volume
->devvp
->v_umajor
, volume
->devvp
->v_uminor
) > 0) {
182 vn_lock(volume
->devvp
, LK_EXCLUSIVE
| LK_RETRY
);
183 error
= vinvalbuf(volume
->devvp
, V_SAVE
, 0, 0);
185 error
= VOP_OPEN(volume
->devvp
,
186 (ronly
? FREAD
: FREAD
|FWRITE
),
189 vn_unlock(volume
->devvp
);
192 hammer_free_volume(volume
);
195 volume
->devvp
->v_rdev
->si_mountpoint
= mp
;
199 * Extract the volume number from the volume header and do various
202 error
= bread(volume
->devvp
, 0LL, HAMMER_BUFSIZE
, &bp
);
205 ondisk
= (void *)bp
->b_data
;
206 if (ondisk
->vol_signature
!= HAMMER_FSBUF_VOLUME
) {
207 kprintf("hammer_mount: volume %s has an invalid header\n",
212 volume
->vol_no
= ondisk
->vol_no
;
213 volume
->buffer_base
= ondisk
->vol_buf_beg
;
214 volume
->vol_flags
= ondisk
->vol_flags
;
215 volume
->nblocks
= ondisk
->vol_nblocks
;
216 volume
->maxbuf_off
= HAMMER_ENCODE_RAW_BUFFER(volume
->vol_no
,
217 ondisk
->vol_buf_end
- ondisk
->vol_buf_beg
);
218 RB_INIT(&volume
->rb_bufs_root
);
220 hmp
->mp
->mnt_stat
.f_blocks
+= volume
->nblocks
;
222 if (RB_EMPTY(&hmp
->rb_vols_root
)) {
223 hmp
->fsid
= ondisk
->vol_fsid
;
224 } else if (bcmp(&hmp
->fsid
, &ondisk
->vol_fsid
, sizeof(uuid_t
))) {
225 kprintf("hammer_mount: volume %s's fsid does not match "
226 "other volumes\n", volume
->vol_name
);
232 * Insert the volume structure into the red-black tree.
234 if (RB_INSERT(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, volume
)) {
235 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
236 volume
->vol_name
, volume
->vol_no
);
241 * Set the root volume . HAMMER special cases rootvol the structure.
242 * We do not hold a ref because this would prevent related I/O
243 * from being flushed.
245 if (error
== 0 && ondisk
->vol_rootvol
== ondisk
->vol_no
) {
246 hmp
->rootvol
= volume
;
251 hmp
->fsid_udev
= dev2udev(vn_todev(volume
->devvp
));
257 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
259 volume
->devvp
->v_rdev
->si_mountpoint
= NULL
;
260 VOP_CLOSE(volume
->devvp
, ronly
? FREAD
: FREAD
|FWRITE
);
261 hammer_free_volume(volume
);
267 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
268 * so returns -1 on failure.
271 hammer_unload_volume(hammer_volume_t volume
, void *data __unused
)
273 struct hammer_mount
*hmp
= volume
->io
.hmp
;
274 int ronly
= ((hmp
->mp
->mnt_flag
& MNT_RDONLY
) ? 1 : 0);
277 * Sync clusters, sync volume
280 hmp
->mp
->mnt_stat
.f_blocks
-= volume
->nblocks
;
283 * Clean up the root volume pointer, which is held unlocked in hmp.
285 if (hmp
->rootvol
== volume
)
291 RB_SCAN(hammer_buf_rb_tree
, &volume
->rb_bufs_root
, NULL
,
292 hammer_unload_buffer
, NULL
);
295 * Release our buffer and flush anything left in the buffer cache.
297 volume
->io
.flush
= 1;
298 volume
->io
.waitdep
= 1;
299 hammer_io_release(&volume
->io
);
302 * There should be no references on the volume, no clusters, and
305 KKASSERT(volume
->io
.lock
.refs
== 0);
306 KKASSERT(RB_EMPTY(&volume
->rb_bufs_root
));
308 volume
->ondisk
= NULL
;
310 if (volume
->devvp
->v_rdev
&&
311 volume
->devvp
->v_rdev
->si_mountpoint
== hmp
->mp
313 volume
->devvp
->v_rdev
->si_mountpoint
= NULL
;
316 vinvalbuf(volume
->devvp
, 0, 0, 0);
317 VOP_CLOSE(volume
->devvp
, FREAD
);
319 vinvalbuf(volume
->devvp
, V_SAVE
, 0, 0);
320 VOP_CLOSE(volume
->devvp
, FREAD
|FWRITE
);
325 * Destroy the structure
327 RB_REMOVE(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, volume
);
328 hammer_free_volume(volume
);
334 hammer_free_volume(hammer_volume_t volume
)
336 if (volume
->vol_name
) {
337 kfree(volume
->vol_name
, M_HAMMER
);
338 volume
->vol_name
= NULL
;
341 vrele(volume
->devvp
);
342 volume
->devvp
= NULL
;
344 --hammer_count_volumes
;
345 kfree(volume
, M_HAMMER
);
349 * Get a HAMMER volume. The volume must already exist.
352 hammer_get_volume(struct hammer_mount
*hmp
, int32_t vol_no
, int *errorp
)
354 struct hammer_volume
*volume
;
357 * Locate the volume structure
359 volume
= RB_LOOKUP(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, vol_no
);
360 if (volume
== NULL
) {
364 hammer_ref(&volume
->io
.lock
);
367 * Deal with on-disk info
369 if (volume
->ondisk
== NULL
|| volume
->io
.loading
) {
370 *errorp
= hammer_load_volume(volume
);
372 hammer_rel_volume(volume
, 1);
382 hammer_ref_volume(hammer_volume_t volume
)
386 hammer_ref(&volume
->io
.lock
);
389 * Deal with on-disk info
391 if (volume
->ondisk
== NULL
|| volume
->io
.loading
) {
392 error
= hammer_load_volume(volume
);
394 hammer_rel_volume(volume
, 1);
402 hammer_get_root_volume(struct hammer_mount
*hmp
, int *errorp
)
404 hammer_volume_t volume
;
406 volume
= hmp
->rootvol
;
407 KKASSERT(volume
!= NULL
);
408 hammer_ref(&volume
->io
.lock
);
411 * Deal with on-disk info
413 if (volume
->ondisk
== NULL
|| volume
->io
.loading
) {
414 *errorp
= hammer_load_volume(volume
);
416 hammer_rel_volume(volume
, 1);
426 * Load a volume's on-disk information. The volume must be referenced and
427 * not locked. We temporarily acquire an exclusive lock to interlock
428 * against releases or multiple get's.
431 hammer_load_volume(hammer_volume_t volume
)
435 ++volume
->io
.loading
;
436 hammer_lock_ex(&volume
->io
.lock
);
438 if (volume
->ondisk
== NULL
) {
439 error
= hammer_io_read(volume
->devvp
, &volume
->io
);
441 volume
->ondisk
= (void *)volume
->io
.bp
->b_data
;
445 --volume
->io
.loading
;
446 hammer_unlock(&volume
->io
.lock
);
451 * Release a volume. Call hammer_io_release on the last reference. We have
452 * to acquire an exclusive lock to interlock against volume->ondisk tests
453 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
456 * Volumes are not unloaded from memory during normal operation.
459 hammer_rel_volume(hammer_volume_t volume
, int flush
)
462 volume
->io
.flush
= 1;
464 if (volume
->io
.lock
.refs
== 1) {
465 ++volume
->io
.loading
;
466 hammer_lock_ex(&volume
->io
.lock
);
467 if (volume
->io
.lock
.refs
== 1) {
468 volume
->ondisk
= NULL
;
469 hammer_io_release(&volume
->io
);
471 --volume
->io
.loading
;
472 hammer_unlock(&volume
->io
.lock
);
474 hammer_unref(&volume
->io
.lock
);
478 /************************************************************************
480 ************************************************************************
482 * Manage buffers. Currently all blockmap-backed zones are translated
483 * to zone-2 buffer offsets.
486 hammer_get_buffer(hammer_mount_t hmp
, hammer_off_t buf_offset
,
487 int isnew
, int *errorp
)
489 hammer_buffer_t buffer
;
490 hammer_volume_t volume
;
491 hammer_off_t zoneX_offset
;
492 hammer_io_type_t iotype
;
496 zoneX_offset
= buf_offset
;
497 zone
= HAMMER_ZONE_DECODE(buf_offset
);
500 * What is the buffer class?
503 case HAMMER_ZONE_LARGE_DATA_INDEX
:
504 case HAMMER_ZONE_SMALL_DATA_INDEX
:
505 iotype
= HAMMER_STRUCTURE_DATA_BUFFER
;
507 case HAMMER_ZONE_UNDO_INDEX
:
508 iotype
= HAMMER_STRUCTURE_UNDO_BUFFER
;
511 iotype
= HAMMER_STRUCTURE_META_BUFFER
;
516 * Handle blockmap offset translations
518 if (zone
>= HAMMER_ZONE_BTREE_INDEX
) {
519 buf_offset
= hammer_blockmap_lookup(hmp
, buf_offset
, errorp
);
520 KKASSERT(*errorp
== 0);
521 } else if (zone
== HAMMER_ZONE_UNDO_INDEX
) {
522 buf_offset
= hammer_undo_lookup(hmp
, buf_offset
, errorp
);
523 KKASSERT(*errorp
== 0);
527 * Locate the buffer given its zone-2 offset.
529 buf_offset
&= ~HAMMER_BUFMASK64
;
530 KKASSERT((buf_offset
& HAMMER_ZONE_RAW_BUFFER
) ==
531 HAMMER_ZONE_RAW_BUFFER
);
532 vol_no
= HAMMER_VOL_DECODE(buf_offset
);
533 volume
= hammer_get_volume(hmp
, vol_no
, errorp
);
538 * NOTE: buf_offset and maxbuf_off are both full offset
541 KKASSERT(buf_offset
< volume
->maxbuf_off
);
544 * Locate and lock the buffer structure, creating one if necessary.
547 buffer
= RB_LOOKUP(hammer_buf_rb_tree
, &volume
->rb_bufs_root
,
549 if (buffer
== NULL
) {
550 ++hammer_count_buffers
;
551 buffer
= kmalloc(sizeof(*buffer
), M_HAMMER
, M_WAITOK
|M_ZERO
);
552 buffer
->zone2_offset
= buf_offset
;
553 buffer
->volume
= volume
;
555 hammer_io_init(&buffer
->io
, hmp
, iotype
);
556 buffer
->io
.offset
= volume
->ondisk
->vol_buf_beg
+
557 (buf_offset
& HAMMER_OFF_SHORT_MASK
);
558 TAILQ_INIT(&buffer
->clist
);
559 hammer_ref(&buffer
->io
.lock
);
562 * Insert the buffer into the RB tree and handle late
565 if (RB_INSERT(hammer_buf_rb_tree
, &volume
->rb_bufs_root
, buffer
)) {
566 hammer_unref(&buffer
->io
.lock
);
567 --hammer_count_buffers
;
568 kfree(buffer
, M_HAMMER
);
571 hammer_ref(&volume
->io
.lock
);
573 hammer_ref(&buffer
->io
.lock
);
576 * The buffer is no longer loose if it has a ref.
578 if (buffer
->io
.mod_list
== &hmp
->lose_list
) {
579 TAILQ_REMOVE(buffer
->io
.mod_list
, &buffer
->io
,
581 buffer
->io
.mod_list
= NULL
;
583 if (buffer
->io
.lock
.refs
== 1)
584 hammer_io_reinit(&buffer
->io
, iotype
);
586 KKASSERT(buffer
->io
.type
== iotype
);
590 * Cache the blockmap translation
592 if ((zoneX_offset
& HAMMER_ZONE_RAW_BUFFER
) != HAMMER_ZONE_RAW_BUFFER
)
593 buffer
->zoneX_offset
= zoneX_offset
;
596 * Deal with on-disk info
598 if (buffer
->ondisk
== NULL
|| buffer
->io
.loading
) {
599 *errorp
= hammer_load_buffer(buffer
, isnew
);
601 hammer_rel_buffer(buffer
, 1);
607 hammer_rel_volume(volume
, 0);
612 hammer_load_buffer(hammer_buffer_t buffer
, int isnew
)
614 hammer_volume_t volume
;
618 * Load the buffer's on-disk info
620 volume
= buffer
->volume
;
621 ++buffer
->io
.loading
;
622 hammer_lock_ex(&buffer
->io
.lock
);
624 if (buffer
->ondisk
== NULL
) {
626 error
= hammer_io_new(volume
->devvp
, &buffer
->io
);
628 error
= hammer_io_read(volume
->devvp
, &buffer
->io
);
631 buffer
->ondisk
= (void *)buffer
->io
.bp
->b_data
;
633 error
= hammer_io_new(volume
->devvp
, &buffer
->io
);
637 --buffer
->io
.loading
;
638 hammer_unlock(&buffer
->io
.lock
);
643 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
646 hammer_unload_buffer(hammer_buffer_t buffer
, void *data __unused
)
648 hammer_ref(&buffer
->io
.lock
);
649 hammer_flush_buffer_nodes(buffer
);
650 KKASSERT(buffer
->io
.lock
.refs
== 1);
651 hammer_rel_buffer(buffer
, 2);
656 * Reference a buffer that is either already referenced or via a specially
657 * handled pointer (aka cursor->buffer).
660 hammer_ref_buffer(hammer_buffer_t buffer
)
664 hammer_ref(&buffer
->io
.lock
);
669 if (buffer
->io
.mod_list
== &buffer
->io
.hmp
->lose_list
) {
670 TAILQ_REMOVE(buffer
->io
.mod_list
, &buffer
->io
, mod_entry
);
671 buffer
->io
.mod_list
= NULL
;
674 if (buffer
->ondisk
== NULL
|| buffer
->io
.loading
) {
675 error
= hammer_load_buffer(buffer
, 0);
677 hammer_rel_buffer(buffer
, 1);
679 * NOTE: buffer pointer can become stale after
690 * Release a buffer. We have to deal with several places where
691 * another thread can ref the buffer.
693 * Only destroy the structure itself if the related buffer cache buffer
694 * was disassociated from it. This ties the management of the structure
695 * to the buffer cache subsystem. buffer->ondisk determines whether the
696 * embedded io is referenced or not.
699 hammer_rel_buffer(hammer_buffer_t buffer
, int flush
)
701 hammer_volume_t volume
;
705 buffer
->io
.flush
= 1;
707 if (buffer
->io
.lock
.refs
== 1) {
708 ++buffer
->io
.loading
; /* force interlock check */
709 hammer_lock_ex(&buffer
->io
.lock
);
710 if (buffer
->io
.lock
.refs
== 1) {
711 hammer_io_release(&buffer
->io
);
712 hammer_flush_buffer_nodes(buffer
);
713 KKASSERT(TAILQ_EMPTY(&buffer
->clist
));
715 if (buffer
->io
.bp
== NULL
&&
716 buffer
->io
.lock
.refs
== 1) {
720 volume
= buffer
->volume
;
721 RB_REMOVE(hammer_buf_rb_tree
,
722 &volume
->rb_bufs_root
, buffer
);
723 buffer
->volume
= NULL
; /* sanity */
724 hammer_rel_volume(volume
, 0);
728 --buffer
->io
.loading
;
729 hammer_unlock(&buffer
->io
.lock
);
731 hammer_unref(&buffer
->io
.lock
);
734 KKASSERT(buffer
->io
.mod_list
== NULL
);
735 --hammer_count_buffers
;
736 kfree(buffer
, M_HAMMER
);
741 * Remove the zoneX translation cache for a buffer given its zone-2 offset.
744 hammer_uncache_buffer(hammer_mount_t hmp
, hammer_off_t buf_offset
)
746 hammer_volume_t volume
;
747 hammer_buffer_t buffer
;
751 buf_offset
&= ~HAMMER_BUFMASK64
;
752 KKASSERT((buf_offset
& HAMMER_ZONE_RAW_BUFFER
) ==
753 HAMMER_ZONE_RAW_BUFFER
);
754 vol_no
= HAMMER_VOL_DECODE(buf_offset
);
755 volume
= hammer_get_volume(hmp
, vol_no
, &error
);
756 KKASSERT(volume
!= 0);
757 KKASSERT(buf_offset
< volume
->maxbuf_off
);
759 buffer
= RB_LOOKUP(hammer_buf_rb_tree
, &volume
->rb_bufs_root
,
762 buffer
->zoneX_offset
= 0;
763 hammer_rel_volume(volume
, 0);
767 * Access the filesystem buffer containing the specified hammer offset.
768 * buf_offset is a conglomeration of the volume number and vol_buf_beg
769 * relative buffer offset. It must also have bit 55 set to be valid.
770 * (see hammer_off_t in hammer_disk.h).
772 * Any prior buffer in *bufferp will be released and replaced by the
776 hammer_bread(hammer_mount_t hmp
, hammer_off_t buf_offset
, int *errorp
,
777 struct hammer_buffer
**bufferp
)
779 hammer_buffer_t buffer
;
780 int32_t xoff
= (int32_t)buf_offset
& HAMMER_BUFMASK
;
782 buf_offset
&= ~HAMMER_BUFMASK64
;
783 KKASSERT((buf_offset
& HAMMER_OFF_ZONE_MASK
) != 0);
786 if (buffer
== NULL
|| (buffer
->zone2_offset
!= buf_offset
&&
787 buffer
->zoneX_offset
!= buf_offset
)) {
789 hammer_rel_buffer(buffer
, 0);
790 buffer
= hammer_get_buffer(hmp
, buf_offset
, 0, errorp
);
797 * Return a pointer to the buffer data.
802 return((char *)buffer
->ondisk
+ xoff
);
806 * Access the filesystem buffer containing the specified hammer offset.
807 * No disk read operation occurs. The result buffer may contain garbage.
809 * Any prior buffer in *bufferp will be released and replaced by the
812 * This function marks the buffer dirty but does not increment its
816 hammer_bnew(hammer_mount_t hmp
, hammer_off_t buf_offset
, int *errorp
,
817 struct hammer_buffer
**bufferp
)
819 hammer_buffer_t buffer
;
820 int32_t xoff
= (int32_t)buf_offset
& HAMMER_BUFMASK
;
822 buf_offset
&= ~HAMMER_BUFMASK64
;
825 if (buffer
== NULL
|| (buffer
->zone2_offset
!= buf_offset
&&
826 buffer
->zoneX_offset
!= buf_offset
)) {
828 hammer_rel_buffer(buffer
, 0);
829 buffer
= hammer_get_buffer(hmp
, buf_offset
, 1, errorp
);
836 * Return a pointer to the buffer data.
841 return((char *)buffer
->ondisk
+ xoff
);
844 /************************************************************************
846 ************************************************************************
848 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
849 * method used by the HAMMER filesystem.
851 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
852 * associated with its buffer, and will only referenced the buffer while
853 * the node itself is referenced.
855 * A hammer_node can also be passively associated with other HAMMER
856 * structures, such as inodes, while retaining 0 references. These
857 * associations can be cleared backwards using a pointer-to-pointer in
860 * This allows the HAMMER implementation to cache hammer_nodes long-term
861 * and short-cut a great deal of the infrastructure's complexity. In
862 * most cases a cached node can be reacquired without having to dip into
863 * either the buffer or cluster management code.
865 * The caller must pass a referenced cluster on call and will retain
866 * ownership of the reference on return. The node will acquire its own
867 * additional references, if necessary.
870 hammer_get_node(hammer_mount_t hmp
, hammer_off_t node_offset
, int *errorp
)
874 KKASSERT((node_offset
& HAMMER_OFF_ZONE_MASK
) == HAMMER_ZONE_BTREE
);
877 * Locate the structure, allocating one if necessary.
880 node
= RB_LOOKUP(hammer_nod_rb_tree
, &hmp
->rb_nods_root
, node_offset
);
882 ++hammer_count_nodes
;
883 node
= kmalloc(sizeof(*node
), M_HAMMER
, M_WAITOK
|M_ZERO
);
884 node
->node_offset
= node_offset
;
886 if (RB_INSERT(hammer_nod_rb_tree
, &hmp
->rb_nods_root
, node
)) {
887 --hammer_count_nodes
;
888 kfree(node
, M_HAMMER
);
892 hammer_ref(&node
->lock
);
896 *errorp
= hammer_load_node(node
);
898 hammer_rel_node(node
);
905 * Reference an already-referenced node.
908 hammer_ref_node(hammer_node_t node
)
910 KKASSERT(node
->lock
.refs
> 0 && node
->ondisk
!= NULL
);
911 hammer_ref(&node
->lock
);
915 * Load a node's on-disk data reference.
918 hammer_load_node(hammer_node_t node
)
920 hammer_buffer_t buffer
;
925 hammer_lock_ex(&node
->lock
);
926 if (node
->ondisk
== NULL
) {
928 * This is a little confusing but the jist is that
929 * node->buffer determines whether the node is on
930 * the buffer's clist and node->ondisk determines
931 * whether the buffer is referenced.
933 * We could be racing a buffer release, in which case
934 * node->buffer may become NULL while we are blocked
935 * referencing the buffer.
937 if ((buffer
= node
->buffer
) != NULL
) {
938 error
= hammer_ref_buffer(buffer
);
939 if (error
== 0 && node
->buffer
== NULL
) {
940 TAILQ_INSERT_TAIL(&buffer
->clist
,
942 node
->buffer
= buffer
;
945 buffer
= hammer_get_buffer(node
->hmp
,
946 node
->node_offset
, 0,
949 KKASSERT(error
== 0);
950 TAILQ_INSERT_TAIL(&buffer
->clist
,
952 node
->buffer
= buffer
;
956 node
->ondisk
= (void *)((char *)buffer
->ondisk
+
957 (node
->node_offset
& HAMMER_BUFMASK
));
961 hammer_unlock(&node
->lock
);
966 * Safely reference a node, interlock against flushes via the IO subsystem.
969 hammer_ref_node_safe(struct hammer_mount
*hmp
, struct hammer_node
**cache
,
976 hammer_ref(&node
->lock
);
980 *errorp
= hammer_load_node(node
);
982 hammer_rel_node(node
);
992 * Release a hammer_node. On the last release the node dereferences
993 * its underlying buffer and may or may not be destroyed.
996 hammer_rel_node(hammer_node_t node
)
998 hammer_buffer_t buffer
;
1001 * If this isn't the last ref just decrement the ref count and
1004 if (node
->lock
.refs
> 1) {
1005 hammer_unref(&node
->lock
);
1010 * If there is no ondisk info or no buffer the node failed to load,
1011 * remove the last reference and destroy the node.
1013 if (node
->ondisk
== NULL
) {
1014 hammer_unref(&node
->lock
);
1015 hammer_flush_node(node
);
1016 /* node is stale now */
1021 * Do final cleanups and then either destroy the node and leave it
1022 * passively cached. The buffer reference is removed regardless.
1024 buffer
= node
->buffer
;
1025 node
->ondisk
= NULL
;
1027 if ((node
->flags
& HAMMER_NODE_FLUSH
) == 0) {
1028 hammer_unref(&node
->lock
);
1029 hammer_rel_buffer(buffer
, 0);
1036 hammer_unref(&node
->lock
);
1037 hammer_flush_node(node
);
1039 hammer_rel_buffer(buffer
, 0);
1047 hammer_delete_node(hammer_transaction_t trans
, hammer_node_t node
)
1049 node
->flags
|= HAMMER_NODE_DELETED
;
1050 hammer_blockmap_free(trans
, node
->node_offset
, sizeof(*node
->ondisk
));
1054 * Passively cache a referenced hammer_node in *cache. The caller may
1055 * release the node on return.
1058 hammer_cache_node(hammer_node_t node
, struct hammer_node
**cache
)
1063 * If the node is being deleted, don't cache it!
1065 if (node
->flags
& HAMMER_NODE_DELETED
)
1069 * Cache the node. If we previously cached a different node we
1070 * have to give HAMMER a chance to destroy it.
1073 if (node
->cache1
!= cache
) {
1074 if (node
->cache2
!= cache
) {
1075 if ((old
= *cache
) != NULL
) {
1076 KKASSERT(node
->lock
.refs
!= 0);
1077 hammer_uncache_node(cache
);
1081 *node
->cache2
= NULL
;
1082 node
->cache2
= node
->cache1
;
1083 node
->cache1
= cache
;
1086 struct hammer_node
**tmp
;
1088 node
->cache1
= node
->cache2
;
1095 hammer_uncache_node(struct hammer_node
**cache
)
1099 if ((node
= *cache
) != NULL
) {
1101 if (node
->cache1
== cache
) {
1102 node
->cache1
= node
->cache2
;
1103 node
->cache2
= NULL
;
1104 } else if (node
->cache2
== cache
) {
1105 node
->cache2
= NULL
;
1107 panic("hammer_uncache_node: missing cache linkage");
1109 if (node
->cache1
== NULL
&& node
->cache2
== NULL
)
1110 hammer_flush_node(node
);
1115 * Remove a node's cache references and destroy the node if it has no
1116 * other references or backing store.
1119 hammer_flush_node(hammer_node_t node
)
1121 hammer_buffer_t buffer
;
1124 *node
->cache1
= NULL
;
1126 *node
->cache2
= NULL
;
1127 if (node
->lock
.refs
== 0 && node
->ondisk
== NULL
) {
1128 RB_REMOVE(hammer_nod_rb_tree
, &node
->hmp
->rb_nods_root
, node
);
1129 if ((buffer
= node
->buffer
) != NULL
) {
1130 node
->buffer
= NULL
;
1131 TAILQ_REMOVE(&buffer
->clist
, node
, entry
);
1132 /* buffer is unreferenced because ondisk is NULL */
1134 --hammer_count_nodes
;
1135 kfree(node
, M_HAMMER
);
1140 * Flush passively cached B-Tree nodes associated with this buffer.
1141 * This is only called when the buffer is about to be destroyed, so
1142 * none of the nodes should have any references. The buffer is locked.
1144 * We may be interlocked with the buffer.
1147 hammer_flush_buffer_nodes(hammer_buffer_t buffer
)
1151 while ((node
= TAILQ_FIRST(&buffer
->clist
)) != NULL
) {
1152 KKASSERT(node
->ondisk
== NULL
);
1154 if (node
->lock
.refs
== 0) {
1155 hammer_ref(&node
->lock
);
1156 node
->flags
|= HAMMER_NODE_FLUSH
;
1157 hammer_rel_node(node
);
1159 KKASSERT(node
->loading
!= 0);
1160 KKASSERT(node
->buffer
!= NULL
);
1161 buffer
= node
->buffer
;
1162 node
->buffer
= NULL
;
1163 TAILQ_REMOVE(&buffer
->clist
, node
, entry
);
1164 /* buffer is unreferenced because ondisk is NULL */
1170 /************************************************************************
1172 ************************************************************************/
1175 * Allocate a B-Tree node.
1178 hammer_alloc_btree(hammer_transaction_t trans
, int *errorp
)
1180 hammer_buffer_t buffer
= NULL
;
1181 hammer_node_t node
= NULL
;
1182 hammer_off_t node_offset
;
1184 node_offset
= hammer_blockmap_alloc(trans
, HAMMER_ZONE_BTREE_INDEX
,
1185 sizeof(struct hammer_node_ondisk
),
1188 node
= hammer_get_node(trans
->hmp
, node_offset
, errorp
);
1189 hammer_modify_node_noundo(trans
, node
);
1190 bzero(node
->ondisk
, sizeof(*node
->ondisk
));
1191 hammer_modify_node_done(node
);
1194 hammer_rel_buffer(buffer
, 0);
1199 * The returned buffers are already appropriately marked as being modified.
1200 * If the caller marks them again unnecessary undo records may be generated.
1202 * In-band data is indicated by data_bufferp == NULL. Pass a data_len of 0
1203 * for zero-fill (caller modifies data_len afterwords).
1205 * If the caller is responsible for calling hammer_modify_*() prior to making
1206 * any additional modifications to either the returned record buffer or the
1207 * returned data buffer.
1210 hammer_alloc_record(hammer_transaction_t trans
,
1211 hammer_off_t
*rec_offp
, u_int16_t rec_type
,
1212 struct hammer_buffer
**rec_bufferp
,
1213 int32_t data_len
, void **datap
,
1214 struct hammer_buffer
**data_bufferp
, int *errorp
)
1216 hammer_record_ondisk_t rec
;
1217 hammer_off_t rec_offset
;
1218 hammer_off_t data_offset
;
1225 * Allocate the record
1227 rec_offset
= hammer_blockmap_alloc(trans
, HAMMER_ZONE_RECORD_INDEX
,
1228 HAMMER_RECORD_SIZE
, errorp
);
1236 if (data_bufferp
== NULL
) {
1238 case HAMMER_RECTYPE_DATA
:
1239 reclen
= offsetof(struct hammer_data_record
,
1242 case HAMMER_RECTYPE_DIRENTRY
:
1243 reclen
= offsetof(struct hammer_entry_record
,
1247 panic("hammer_alloc_record: illegal "
1253 KKASSERT(reclen
+ data_len
<= HAMMER_RECORD_SIZE
);
1254 data_offset
= rec_offset
+ reclen
;
1255 } else if (data_len
< HAMMER_BUFSIZE
) {
1256 data_offset
= hammer_blockmap_alloc(trans
,
1257 HAMMER_ZONE_SMALL_DATA_INDEX
,
1260 data_offset
= hammer_blockmap_alloc(trans
,
1261 HAMMER_ZONE_LARGE_DATA_INDEX
,
1268 hammer_blockmap_free(trans
, rec_offset
, HAMMER_RECORD_SIZE
);
1273 * Basic return values.
1275 * Note that because this is a 'new' buffer, there is no need to
1276 * generate UNDO records for it.
1278 *rec_offp
= rec_offset
;
1279 rec
= hammer_bread(trans
->hmp
, rec_offset
, errorp
, rec_bufferp
);
1280 hammer_modify_buffer(trans
, *rec_bufferp
, NULL
, 0);
1281 bzero(rec
, sizeof(*rec
));
1282 KKASSERT(*errorp
== 0);
1283 rec
->base
.data_off
= data_offset
;
1284 rec
->base
.data_len
= data_len
;
1285 hammer_modify_buffer_done(*rec_bufferp
);
1289 *datap
= hammer_bread(trans
->hmp
, data_offset
, errorp
,
1291 KKASSERT(*errorp
== 0);
1295 } else if (data_len
) {
1296 KKASSERT(data_offset
+ data_len
- rec_offset
<=
1297 HAMMER_RECORD_SIZE
);
1299 *datap
= (void *)((char *)rec
+
1300 (int32_t)(data_offset
- rec_offset
));
1303 KKASSERT(datap
== NULL
);
1305 KKASSERT(*errorp
== 0);
1310 * Allocate data. If the address of a data buffer is supplied then
1311 * any prior non-NULL *data_bufferp will be released and *data_bufferp
1312 * will be set to the related buffer. The caller must release it when
1313 * finally done. The initial *data_bufferp should be set to NULL by
1316 * The caller is responsible for making hammer_modify*() calls on the
1320 hammer_alloc_data(hammer_transaction_t trans
, int32_t data_len
,
1321 hammer_off_t
*data_offsetp
,
1322 struct hammer_buffer
**data_bufferp
, int *errorp
)
1330 if (data_len
< HAMMER_BUFSIZE
) {
1331 *data_offsetp
= hammer_blockmap_alloc(trans
,
1332 HAMMER_ZONE_SMALL_DATA_INDEX
,
1335 *data_offsetp
= hammer_blockmap_alloc(trans
,
1336 HAMMER_ZONE_LARGE_DATA_INDEX
,
1342 if (*errorp
== 0 && data_bufferp
) {
1344 data
= hammer_bread(trans
->hmp
, *data_offsetp
, errorp
,
1346 KKASSERT(*errorp
== 0);
1353 KKASSERT(*errorp
== 0);
1358 * Sync dirty buffers to the media and clean-up any loose ends.
1360 static int hammer_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
);
1361 static int hammer_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
);
1364 hammer_sync_hmp(hammer_mount_t hmp
, int waitfor
)
1366 struct hammer_sync_info info
;
1369 info
.waitfor
= waitfor
;
1371 vmntvnodescan(hmp
->mp
, VMSC_GETVP
|VMSC_NOWAIT
,
1372 hammer_sync_scan1
, hammer_sync_scan2
, &info
);
1373 if (waitfor
== MNT_WAIT
)
1374 hammer_flusher_sync(hmp
);
1376 hammer_flusher_async(hmp
);
1382 hammer_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
)
1384 struct hammer_inode
*ip
;
1387 if (vp
->v_type
== VNON
|| ip
== NULL
||
1388 ((ip
->flags
& HAMMER_INODE_MODMASK
) == 0 &&
1389 RB_EMPTY(&vp
->v_rbdirty_tree
))) {
1396 hammer_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
)
1398 struct hammer_sync_info
*info
= data
;
1399 struct hammer_inode
*ip
;
1403 if (vp
->v_type
== VNON
|| vp
->v_type
== VBAD
||
1404 ((ip
->flags
& HAMMER_INODE_MODMASK
) == 0 &&
1405 RB_EMPTY(&vp
->v_rbdirty_tree
))) {
1408 error
= VOP_FSYNC(vp
, info
->waitfor
);
1410 info
->error
= error
;