2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.18 2008/01/10 07:41:03 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
48 static void hammer_free_volume(hammer_volume_t volume
);
49 static int hammer_load_volume(hammer_volume_t volume
);
50 static int hammer_load_supercl(hammer_supercl_t supercl
,
51 hammer_alloc_state_t isnew
);
52 static int hammer_load_cluster(hammer_cluster_t cluster
,
53 hammer_alloc_state_t isnew
);
54 static int hammer_load_buffer(hammer_buffer_t buffer
, u_int64_t buf_type
);
55 static int hammer_load_node(hammer_node_t node
);
56 static void alloc_new_buffer(hammer_cluster_t cluster
, u_int64_t type
,
58 int32_t start
, int *errorp
,
59 struct hammer_buffer
**bufferp
);
61 static void readhammerbuf(hammer_volume_t vol
, void *data
,
63 static void writehammerbuf(hammer_volume_t vol
, const void *data
,
66 static int64_t calculate_cluster_offset(hammer_volume_t vol
, int32_t clu_no
);
67 static int64_t calculate_supercl_offset(hammer_volume_t vol
, int32_t scl_no
);
68 static int32_t hammer_alloc_master(hammer_cluster_t cluster
, int nblks
,
69 int32_t start
, int isfwd
);
70 static void hammer_adjust_stats(hammer_cluster_t cluster
,
71 u_int64_t buf_type
, int nblks
);
73 struct hammer_alist_config Buf_alist_config
;
74 struct hammer_alist_config Vol_normal_alist_config
;
75 struct hammer_alist_config Vol_super_alist_config
;
76 struct hammer_alist_config Supercl_alist_config
;
77 struct hammer_alist_config Clu_master_alist_config
;
78 struct hammer_alist_config Clu_slave_alist_config
;
81 * Red-Black tree support for various structures
84 hammer_ino_rb_compare(hammer_inode_t ip1
, hammer_inode_t ip2
)
86 if (ip1
->obj_id
< ip2
->obj_id
)
88 if (ip1
->obj_id
> ip2
->obj_id
)
90 if (ip1
->obj_asof
< ip2
->obj_asof
)
92 if (ip1
->obj_asof
> ip2
->obj_asof
)
98 hammer_inode_info_cmp(hammer_inode_info_t info
, hammer_inode_t ip
)
100 if (info
->obj_id
< ip
->obj_id
)
102 if (info
->obj_id
> ip
->obj_id
)
104 if (info
->obj_asof
< ip
->obj_asof
)
106 if (info
->obj_asof
> ip
->obj_asof
)
112 hammer_vol_rb_compare(hammer_volume_t vol1
, hammer_volume_t vol2
)
114 if (vol1
->vol_no
< vol2
->vol_no
)
116 if (vol1
->vol_no
> vol2
->vol_no
)
122 hammer_scl_rb_compare(hammer_supercl_t cl1
, hammer_supercl_t cl2
)
124 if (cl1
->scl_no
< cl2
->scl_no
)
126 if (cl1
->scl_no
> cl2
->scl_no
)
132 hammer_clu_rb_compare(hammer_cluster_t cl1
, hammer_cluster_t cl2
)
134 if (cl1
->clu_no
< cl2
->clu_no
)
136 if (cl1
->clu_no
> cl2
->clu_no
)
142 hammer_buf_rb_compare(hammer_buffer_t buf1
, hammer_buffer_t buf2
)
144 if (buf1
->buf_no
< buf2
->buf_no
)
146 if (buf1
->buf_no
> buf2
->buf_no
)
152 hammer_nod_rb_compare(hammer_node_t node1
, hammer_node_t node2
)
154 if (node1
->node_offset
< node2
->node_offset
)
156 if (node1
->node_offset
> node2
->node_offset
)
162 * Note: The lookup function for hammer_ino_rb_tree winds up being named
163 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
164 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
166 RB_GENERATE(hammer_ino_rb_tree
, hammer_inode
, rb_node
, hammer_ino_rb_compare
);
167 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree
, INFO
, hammer_inode
, rb_node
,
168 hammer_inode_info_cmp
, hammer_inode_info_t
);
169 RB_GENERATE2(hammer_vol_rb_tree
, hammer_volume
, rb_node
,
170 hammer_vol_rb_compare
, int32_t, vol_no
);
171 RB_GENERATE2(hammer_scl_rb_tree
, hammer_supercl
, rb_node
,
172 hammer_scl_rb_compare
, int32_t, scl_no
);
173 RB_GENERATE2(hammer_clu_rb_tree
, hammer_cluster
, rb_node
,
174 hammer_clu_rb_compare
, int32_t, clu_no
);
175 RB_GENERATE2(hammer_buf_rb_tree
, hammer_buffer
, rb_node
,
176 hammer_buf_rb_compare
, int32_t, buf_no
);
177 RB_GENERATE2(hammer_nod_rb_tree
, hammer_node
, rb_node
,
178 hammer_nod_rb_compare
, int32_t, node_offset
);
180 /************************************************************************
182 ************************************************************************
184 * Load a HAMMER volume by name. Returns 0 on success or a positive error
185 * code on failure. Volumes must be loaded at mount time, get_volume() will
186 * not load a new volume.
188 * Calls made to hammer_load_volume() or single-threaded
191 hammer_install_volume(struct hammer_mount
*hmp
, const char *volname
)
194 hammer_volume_t volume
;
195 struct hammer_volume_ondisk
*ondisk
;
196 struct nlookupdata nd
;
197 struct buf
*bp
= NULL
;
202 ronly
= ((mp
->mnt_flag
& MNT_RDONLY
) ? 1 : 0);
205 * Allocate a volume structure
207 ++hammer_count_volumes
;
208 volume
= kmalloc(sizeof(*volume
), M_HAMMER
, M_WAITOK
|M_ZERO
);
209 volume
->vol_name
= kstrdup(volname
, M_HAMMER
);
211 hammer_io_init(&volume
->io
, HAMMER_STRUCTURE_VOLUME
);
212 volume
->io
.offset
= 0LL;
215 * Get the device vnode
217 error
= nlookup_init(&nd
, volume
->vol_name
, UIO_SYSSPACE
, NLC_FOLLOW
);
219 error
= nlookup(&nd
);
221 error
= cache_vref(&nd
.nl_nch
, nd
.nl_cred
, &volume
->devvp
);
224 vn_isdisk(volume
->devvp
, &error
);
227 vn_lock(volume
->devvp
, LK_EXCLUSIVE
| LK_RETRY
);
228 error
= VOP_OPEN(volume
->devvp
, (ronly
? FREAD
: FREAD
|FWRITE
),
230 vn_unlock(volume
->devvp
);
233 hammer_free_volume(volume
);
238 * Extract the volume number from the volume header and do various
241 error
= bread(volume
->devvp
, 0LL, HAMMER_BUFSIZE
, &bp
);
244 ondisk
= (void *)bp
->b_data
;
245 if (ondisk
->head
.buf_type
!= HAMMER_FSBUF_VOLUME
) {
246 kprintf("hammer_mount: volume %s has an invalid header\n",
251 volume
->vol_no
= ondisk
->vol_no
;
252 volume
->cluster_base
= ondisk
->vol_clo_beg
;
253 volume
->vol_clsize
= ondisk
->vol_clsize
;
254 volume
->vol_flags
= ondisk
->vol_flags
;
255 volume
->nblocks
= ondisk
->vol_nblocks
;
256 RB_INIT(&volume
->rb_clus_root
);
257 RB_INIT(&volume
->rb_scls_root
);
259 hmp
->mp
->mnt_stat
.f_blocks
+= volume
->nblocks
;
261 if (RB_EMPTY(&hmp
->rb_vols_root
)) {
262 hmp
->fsid
= ondisk
->vol_fsid
;
263 } else if (bcmp(&hmp
->fsid
, &ondisk
->vol_fsid
, sizeof(uuid_t
))) {
264 kprintf("hammer_mount: volume %s's fsid does not match "
265 "other volumes\n", volume
->vol_name
);
271 * Insert the volume structure into the red-black tree.
273 if (RB_INSERT(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, volume
)) {
274 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
275 volume
->vol_name
, volume
->vol_no
);
280 * Set the root volume and load the root cluster. HAMMER special
281 * cases rootvol and rootcl and will not deallocate the structures.
282 * We do not hold a ref because this would prevent related I/O
283 * from being flushed.
285 if (error
== 0 && ondisk
->vol_rootvol
== ondisk
->vol_no
) {
286 hmp
->rootvol
= volume
;
287 hmp
->rootcl
= hammer_get_cluster(volume
,
288 ondisk
->vol0_root_clu_no
,
290 hammer_rel_cluster(hmp
->rootcl
, 0);
291 hmp
->fsid_udev
= dev2udev(vn_todev(volume
->devvp
));
297 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
298 VOP_CLOSE(volume
->devvp
, ronly
? FREAD
: FREAD
|FWRITE
);
299 hammer_free_volume(volume
);
305 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
306 * so returns -1 on failure.
309 hammer_unload_volume(hammer_volume_t volume
, void *data __unused
)
311 struct hammer_mount
*hmp
= volume
->hmp
;
312 hammer_cluster_t rootcl
;
313 int ronly
= ((hmp
->mp
->mnt_flag
& MNT_RDONLY
) ? 1 : 0);
316 * Sync clusters, sync volume
319 hmp
->mp
->mnt_stat
.f_blocks
-= volume
->nblocks
;
322 * Clean up the root cluster, which is held unlocked in the root
325 if (hmp
->rootvol
== volume
) {
326 if ((rootcl
= hmp
->rootcl
) != NULL
)
332 * Unload clusters and super-clusters. Unloading a super-cluster
333 * also unloads related clusters, but the filesystem may not be
334 * using super-clusters so unload clusters anyway.
336 RB_SCAN(hammer_clu_rb_tree
, &volume
->rb_clus_root
, NULL
,
337 hammer_unload_cluster
, NULL
);
338 RB_SCAN(hammer_scl_rb_tree
, &volume
->rb_scls_root
, NULL
,
339 hammer_unload_supercl
, NULL
);
340 hammer_io_waitdep(&volume
->io
);
343 * Release our buffer and flush anything left in the buffer cache.
345 hammer_io_release(&volume
->io
, 2);
348 * There should be no references on the volume, no clusters, and
351 KKASSERT(volume
->io
.lock
.refs
== 0);
352 KKASSERT(RB_EMPTY(&volume
->rb_clus_root
));
353 KKASSERT(RB_EMPTY(&volume
->rb_scls_root
));
355 volume
->ondisk
= NULL
;
358 vinvalbuf(volume
->devvp
, 0, 0, 0);
359 VOP_CLOSE(volume
->devvp
, FREAD
);
361 vinvalbuf(volume
->devvp
, V_SAVE
, 0, 0);
362 VOP_CLOSE(volume
->devvp
, FREAD
|FWRITE
);
367 * Destroy the structure
369 RB_REMOVE(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, volume
);
370 hammer_free_volume(volume
);
376 hammer_free_volume(hammer_volume_t volume
)
378 if (volume
->vol_name
) {
379 kfree(volume
->vol_name
, M_HAMMER
);
380 volume
->vol_name
= NULL
;
383 vrele(volume
->devvp
);
384 volume
->devvp
= NULL
;
386 --hammer_count_volumes
;
387 kfree(volume
, M_HAMMER
);
391 * Get a HAMMER volume. The volume must already exist.
394 hammer_get_volume(struct hammer_mount
*hmp
, int32_t vol_no
, int *errorp
)
396 struct hammer_volume
*volume
;
399 * Locate the volume structure
401 volume
= RB_LOOKUP(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, vol_no
);
402 if (volume
== NULL
) {
406 hammer_ref(&volume
->io
.lock
);
409 * Deal with on-disk info
411 if (volume
->ondisk
== NULL
) {
412 *errorp
= hammer_load_volume(volume
);
414 hammer_rel_volume(volume
, 1);
424 hammer_ref_volume(hammer_volume_t volume
)
428 hammer_ref(&volume
->io
.lock
);
431 * Deal with on-disk info
433 if (volume
->ondisk
== NULL
) {
434 error
= hammer_load_volume(volume
);
436 hammer_rel_volume(volume
, 1);
444 hammer_get_root_volume(struct hammer_mount
*hmp
, int *errorp
)
446 hammer_volume_t volume
;
448 volume
= hmp
->rootvol
;
449 KKASSERT(volume
!= NULL
);
450 hammer_ref(&volume
->io
.lock
);
453 * Deal with on-disk info
455 if (volume
->ondisk
== NULL
) {
456 *errorp
= hammer_load_volume(volume
);
458 hammer_rel_volume(volume
, 1);
468 * Load a volume's on-disk information. The volume must be referenced and
469 * not locked. We temporarily acquire an exclusive lock to interlock
470 * against releases or multiple get's.
473 hammer_load_volume(hammer_volume_t volume
)
475 struct hammer_volume_ondisk
*ondisk
;
478 hammer_lock_ex(&volume
->io
.lock
);
479 if (volume
->ondisk
== NULL
) {
480 error
= hammer_io_read(volume
->devvp
, &volume
->io
);
482 hammer_unlock(&volume
->io
.lock
);
485 volume
->ondisk
= ondisk
= (void *)volume
->io
.bp
->b_data
;
488 * Configure the volume's A-lists. These are used to
491 if (volume
->vol_flags
& HAMMER_VOLF_USINGSUPERCL
) {
492 volume
->alist
.config
= &Vol_super_alist_config
;
493 volume
->alist
.meta
= ondisk
->vol_almeta
.super
;
494 volume
->alist
.info
= volume
;
496 volume
->alist
.config
= &Vol_normal_alist_config
;
497 volume
->alist
.meta
= ondisk
->vol_almeta
.normal
;
498 volume
->alist
.info
= NULL
;
503 hammer_unlock(&volume
->io
.lock
);
508 * Release a volume. Call hammer_io_release on the last reference. We have
509 * to acquire an exclusive lock to interlock against volume->ondisk tests
510 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
513 * Volumes are not unloaded from memory during normal operation.
516 hammer_rel_volume(hammer_volume_t volume
, int flush
)
518 if (volume
->io
.lock
.refs
== 1) {
519 hammer_lock_ex(&volume
->io
.lock
);
520 if (volume
->io
.lock
.refs
== 1) {
521 volume
->ondisk
= NULL
;
522 hammer_io_release(&volume
->io
, flush
);
524 hammer_io_flush(&volume
->io
);
526 hammer_unlock(&volume
->io
.lock
);
528 hammer_unref(&volume
->io
.lock
);
531 /************************************************************************
533 ************************************************************************
535 * Manage super-clusters. Note that a supercl holds a reference to its
539 hammer_find_supercl(hammer_volume_t volume
, int32_t scl_no
)
541 if (RB_LOOKUP(hammer_scl_rb_tree
, &volume
->rb_scls_root
, scl_no
))
547 hammer_get_supercl(hammer_volume_t volume
, int32_t scl_no
,
548 int *errorp
, hammer_alloc_state_t isnew
)
550 hammer_supercl_t supercl
;
553 * Locate and lock the super-cluster structure, creating one
557 supercl
= RB_LOOKUP(hammer_scl_rb_tree
, &volume
->rb_scls_root
, scl_no
);
558 if (supercl
== NULL
) {
559 ++hammer_count_supercls
;
560 supercl
= kmalloc(sizeof(*supercl
), M_HAMMER
, M_WAITOK
|M_ZERO
);
561 supercl
->scl_no
= scl_no
;
562 supercl
->volume
= volume
;
563 supercl
->io
.offset
= calculate_supercl_offset(volume
, scl_no
);
564 hammer_io_init(&supercl
->io
, HAMMER_STRUCTURE_SUPERCL
);
565 hammer_ref(&supercl
->io
.lock
);
568 * Insert the cluster into the RB tree and handle late
571 if (RB_INSERT(hammer_scl_rb_tree
, &volume
->rb_scls_root
, supercl
)) {
572 hammer_unref(&supercl
->io
.lock
);
573 --hammer_count_supercls
;
574 kfree(supercl
, M_HAMMER
);
577 hammer_ref(&volume
->io
.lock
);
579 hammer_ref(&supercl
->io
.lock
);
583 * Deal with on-disk info
585 if (supercl
->ondisk
== NULL
|| isnew
) {
586 *errorp
= hammer_load_supercl(supercl
, isnew
);
588 hammer_rel_supercl(supercl
, 1);
598 hammer_load_supercl(hammer_supercl_t supercl
, hammer_alloc_state_t isnew
)
600 struct hammer_supercl_ondisk
*ondisk
;
601 hammer_volume_t volume
= supercl
->volume
;
605 hammer_lock_ex(&supercl
->io
.lock
);
606 if (supercl
->ondisk
== NULL
) {
608 error
= hammer_io_new(volume
->devvp
, &supercl
->io
);
610 error
= hammer_io_read(volume
->devvp
, &supercl
->io
);
612 hammer_unlock(&supercl
->io
.lock
);
615 supercl
->ondisk
= ondisk
= (void *)supercl
->io
.bp
->b_data
;
617 supercl
->alist
.config
= &Supercl_alist_config
;
618 supercl
->alist
.meta
= ondisk
->scl_meta
;
619 supercl
->alist
.info
= NULL
;
621 error
= hammer_io_new(volume
->devvp
, &supercl
->io
);
625 if (error
== 0 && isnew
) {
627 * If this is a new super-cluster we have to initialize
628 * various ondisk structural elements. The caller is
629 * responsible for the remainder.
631 struct hammer_alist_live dummy
;
633 hammer_modify_supercl(supercl
);
635 ondisk
= supercl
->ondisk
;
636 dummy
.config
= &Buf_alist_config
;
637 dummy
.meta
= ondisk
->head
.buf_almeta
;
639 hammer_initbuffer(&dummy
, &ondisk
->head
, HAMMER_FSBUF_SUPERCL
);
641 nclusters
= volume
->ondisk
->vol_nclusters
-
642 ((int64_t)supercl
->scl_no
* HAMMER_SCL_MAXCLUSTERS
);
643 KKASSERT(nclusters
> 0);
644 if (nclusters
> HAMMER_SCL_MAXCLUSTERS
)
645 nclusters
= HAMMER_SCL_MAXCLUSTERS
;
646 hammer_alist_init(&supercl
->alist
, 0, (int32_t)nclusters
,
649 hammer_unlock(&supercl
->io
.lock
);
654 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
657 hammer_unload_supercl(hammer_supercl_t supercl
, void *data __unused
)
659 KKASSERT(supercl
->io
.lock
.refs
== 0);
660 hammer_ref(&supercl
->io
.lock
);
661 hammer_rel_supercl(supercl
, 2);
666 * Release a super-cluster. We have to deal with several places where
667 * another thread can ref the super-cluster.
669 * Only destroy the structure itself if the related buffer cache buffer
670 * was disassociated from it. This ties the management of the structure
671 * to the buffer cache subsystem.
674 hammer_rel_supercl(hammer_supercl_t supercl
, int flush
)
676 hammer_volume_t volume
;
678 if (supercl
->io
.lock
.refs
== 1) {
679 hammer_lock_ex(&supercl
->io
.lock
);
680 if (supercl
->io
.lock
.refs
== 1) {
681 hammer_io_release(&supercl
->io
, flush
);
682 if (supercl
->io
.bp
== NULL
&&
683 supercl
->io
.lock
.refs
== 1) {
684 volume
= supercl
->volume
;
685 RB_REMOVE(hammer_scl_rb_tree
,
686 &volume
->rb_scls_root
, supercl
);
687 supercl
->volume
= NULL
; /* sanity */
688 --hammer_count_supercls
;
689 kfree(supercl
, M_HAMMER
);
690 hammer_rel_volume(volume
, 0);
694 hammer_io_flush(&supercl
->io
);
696 hammer_unlock(&supercl
->io
.lock
);
698 hammer_unref(&supercl
->io
.lock
);
701 /************************************************************************
703 ************************************************************************
707 hammer_get_cluster(hammer_volume_t volume
, int32_t clu_no
,
708 int *errorp
, hammer_alloc_state_t isnew
)
710 hammer_cluster_t cluster
;
713 cluster
= RB_LOOKUP(hammer_clu_rb_tree
, &volume
->rb_clus_root
, clu_no
);
714 if (cluster
== NULL
) {
715 ++hammer_count_clusters
;
716 cluster
= kmalloc(sizeof(*cluster
), M_HAMMER
, M_WAITOK
|M_ZERO
);
717 cluster
->clu_no
= clu_no
;
718 cluster
->volume
= volume
;
719 RB_INIT(&cluster
->rb_bufs_root
);
720 RB_INIT(&cluster
->rb_nods_root
);
721 hammer_io_init(&cluster
->io
, HAMMER_STRUCTURE_CLUSTER
);
722 cluster
->io
.offset
= calculate_cluster_offset(volume
, clu_no
);
723 hammer_ref(&cluster
->io
.lock
);
726 * Insert the cluster into the RB tree and handle late
729 if (RB_INSERT(hammer_clu_rb_tree
, &volume
->rb_clus_root
, cluster
)) {
730 hammer_unref(&cluster
->io
.lock
);
731 --hammer_count_clusters
;
732 kfree(cluster
, M_HAMMER
);
735 hammer_ref(&volume
->io
.lock
);
737 hammer_ref(&cluster
->io
.lock
);
741 * Deal with on-disk info
743 if (cluster
->ondisk
== NULL
|| isnew
) {
744 *errorp
= hammer_load_cluster(cluster
, isnew
);
746 hammer_rel_cluster(cluster
, 1);
756 hammer_get_root_cluster(struct hammer_mount
*hmp
, int *errorp
)
758 hammer_cluster_t cluster
;
760 cluster
= hmp
->rootcl
;
761 KKASSERT(cluster
!= NULL
);
762 hammer_ref(&cluster
->io
.lock
);
765 * Deal with on-disk info
767 if (cluster
->ondisk
== NULL
) {
768 *errorp
= hammer_load_cluster(cluster
, 0);
770 hammer_rel_cluster(cluster
, 1);
781 hammer_load_cluster(hammer_cluster_t cluster
, hammer_alloc_state_t isnew
)
783 hammer_volume_t volume
= cluster
->volume
;
784 struct hammer_cluster_ondisk
*ondisk
;
788 * Load the cluster's on-disk info
790 hammer_lock_ex(&cluster
->io
.lock
);
791 if (cluster
->ondisk
== NULL
) {
793 error
= hammer_io_new(volume
->devvp
, &cluster
->io
);
795 error
= hammer_io_read(volume
->devvp
, &cluster
->io
);
797 hammer_unlock(&cluster
->io
.lock
);
800 cluster
->ondisk
= ondisk
= (void *)cluster
->io
.bp
->b_data
;
802 cluster
->alist_master
.config
= &Clu_master_alist_config
;
803 cluster
->alist_master
.meta
= ondisk
->clu_master_meta
;
804 cluster
->alist_btree
.config
= &Clu_slave_alist_config
;
805 cluster
->alist_btree
.meta
= ondisk
->clu_btree_meta
;
806 cluster
->alist_btree
.info
= cluster
;
807 cluster
->alist_record
.config
= &Clu_slave_alist_config
;
808 cluster
->alist_record
.meta
= ondisk
->clu_record_meta
;
809 cluster
->alist_record
.info
= cluster
;
810 cluster
->alist_mdata
.config
= &Clu_slave_alist_config
;
811 cluster
->alist_mdata
.meta
= ondisk
->clu_mdata_meta
;
812 cluster
->alist_mdata
.info
= cluster
;
816 * Recover a cluster that was marked open. This
817 * can be rather involved and block for a hefty
820 if (ondisk
->clu_flags
& HAMMER_CLUF_OPEN
)
821 hammer_recover(cluster
);
823 cluster
->clu_btree_beg
= ondisk
->clu_btree_beg
;
824 cluster
->clu_btree_end
= ondisk
->clu_btree_end
;
827 error
= hammer_io_new(volume
->devvp
, &cluster
->io
);
831 if (error
== 0 && isnew
) {
833 * If this is a new cluster we have to initialize
834 * various ondisk structural elements. The caller is
835 * responsible for the remainder.
837 struct hammer_alist_live dummy
;
839 hammer_volume_ondisk_t voldisk
;
842 hammer_modify_cluster(cluster
);
843 ondisk
= cluster
->ondisk
;
844 voldisk
= volume
->ondisk
;
846 dummy
.config
= &Buf_alist_config
;
847 dummy
.meta
= ondisk
->head
.buf_almeta
;
849 hammer_initbuffer(&dummy
, &ondisk
->head
, HAMMER_FSBUF_CLUSTER
);
851 ondisk
->vol_fsid
= voldisk
->vol_fsid
;
852 ondisk
->vol_fstype
= voldisk
->vol_fstype
;
854 ondisk
->clu_id
= 0; /* XXX */
855 ondisk
->clu_no
= cluster
->clu_no
;
856 ondisk
->clu_flags
= 0;
857 ondisk
->clu_start
= HAMMER_BUFSIZE
;
858 KKASSERT(voldisk
->vol_clo_end
> cluster
->io
.offset
);
859 if (voldisk
->vol_clo_end
- cluster
->io
.offset
>
860 voldisk
->vol_clsize
) {
861 ondisk
->clu_limit
= voldisk
->vol_clsize
;
863 ondisk
->clu_limit
= (int32_t)(voldisk
->vol_clo_end
-
866 nbuffers
= ondisk
->clu_limit
/ HAMMER_BUFSIZE
;
867 KKASSERT(isnew
== HAMMER_ASTATE_FREE
);
868 hammer_alist_init(&cluster
->alist_master
, 1, nbuffers
- 1,
870 hammer_alist_init(&cluster
->alist_btree
,
871 HAMMER_FSBUF_MAXBLKS
,
872 (nbuffers
- 1) * HAMMER_FSBUF_MAXBLKS
,
873 HAMMER_ASTATE_ALLOC
);
874 hammer_alist_init(&cluster
->alist_record
,
875 HAMMER_FSBUF_MAXBLKS
,
876 (nbuffers
- 1) * HAMMER_FSBUF_MAXBLKS
,
877 HAMMER_ASTATE_ALLOC
);
878 hammer_alist_init(&cluster
->alist_mdata
,
879 HAMMER_FSBUF_MAXBLKS
,
880 (nbuffers
- 1) * HAMMER_FSBUF_MAXBLKS
,
881 HAMMER_ASTATE_ALLOC
);
883 ondisk
->idx_data
= 1 * HAMMER_FSBUF_MAXBLKS
;
884 ondisk
->idx_index
= 0 * HAMMER_FSBUF_MAXBLKS
;
885 ondisk
->idx_record
= nbuffers
* HAMMER_FSBUF_MAXBLKS
;
888 * Initialize the B-Tree. We don't know what the caller
889 * intends to do with the cluster so make sure it causes
890 * an assertion if the caller makes no changes.
892 ondisk
->clu_btree_parent_vol_no
= -2;
893 ondisk
->clu_btree_parent_clu_no
= -2;
894 ondisk
->clu_btree_parent_offset
= -2;
895 ondisk
->clu_btree_parent_clu_gen
= -2;
897 croot
= hammer_alloc_btree(cluster
, &error
);
899 hammer_modify_node(croot
);
900 bzero(croot
->ondisk
, sizeof(*croot
->ondisk
));
901 croot
->ondisk
->count
= 0;
902 croot
->ondisk
->type
= HAMMER_BTREE_TYPE_LEAF
;
903 hammer_modify_cluster(cluster
);
904 ondisk
->clu_btree_root
= croot
->node_offset
;
905 hammer_rel_node(croot
);
908 hammer_unlock(&cluster
->io
.lock
);
913 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
916 hammer_unload_cluster(hammer_cluster_t cluster
, void *data __unused
)
918 hammer_ref(&cluster
->io
.lock
);
919 RB_SCAN(hammer_buf_rb_tree
, &cluster
->rb_bufs_root
, NULL
,
920 hammer_unload_buffer
, NULL
);
921 hammer_io_waitdep(&cluster
->io
);
922 KKASSERT(cluster
->io
.lock
.refs
== 1);
923 hammer_rel_cluster(cluster
, 2);
928 * Update the cluster's synchronization TID, which is used during cluster
929 * recovery. NOTE: The cluster header is not written out until all related
930 * records have been written out.
933 hammer_update_syncid(hammer_cluster_t cluster
, hammer_tid_t tid
)
935 hammer_modify_cluster(cluster
);
936 if (cluster
->ondisk
->synchronized_tid
< tid
)
937 cluster
->ondisk
->synchronized_tid
= tid
;
941 * Reference a cluster that is either already referenced or via a specially
942 * handled pointer (aka rootcl).
945 hammer_ref_cluster(hammer_cluster_t cluster
)
949 KKASSERT(cluster
!= NULL
);
950 hammer_ref(&cluster
->io
.lock
);
953 * Deal with on-disk info
955 if (cluster
->ondisk
== NULL
) {
956 error
= hammer_load_cluster(cluster
, 0);
958 hammer_rel_cluster(cluster
, 1);
966 * Release a cluster. We have to deal with several places where
967 * another thread can ref the cluster.
969 * Only destroy the structure itself if we no longer have an IO or any
970 * hammer buffers associated with the structure.
973 hammer_rel_cluster(hammer_cluster_t cluster
, int flush
)
975 hammer_volume_t volume
;
977 if (cluster
->io
.lock
.refs
== 1) {
978 hammer_lock_ex(&cluster
->io
.lock
);
979 if (cluster
->io
.lock
.refs
== 1) {
981 * Release the I/O. If we or the kernel wants to
982 * flush, this will release the bp. Otherwise the
983 * bp may be written and flushed passively by the
986 hammer_io_release(&cluster
->io
, flush
);
991 if (cluster
!= cluster
->volume
->hmp
->rootcl
&&
992 cluster
->io
.bp
== NULL
&&
993 cluster
->io
.lock
.refs
== 1 &&
994 RB_EMPTY(&cluster
->rb_bufs_root
)) {
995 KKASSERT(RB_EMPTY(&cluster
->rb_nods_root
));
996 volume
= cluster
->volume
;
997 RB_REMOVE(hammer_clu_rb_tree
,
998 &volume
->rb_clus_root
, cluster
);
999 cluster
->volume
= NULL
; /* sanity */
1000 --hammer_count_clusters
;
1001 kfree(cluster
, M_HAMMER
);
1002 hammer_rel_volume(volume
, 0);
1006 hammer_io_flush(&cluster
->io
);
1008 hammer_unlock(&cluster
->io
.lock
);
1010 hammer_unref(&cluster
->io
.lock
);
1013 /************************************************************************
1015 ************************************************************************
1017 * Manage buffers. Note that a buffer holds a reference to its associated
1018 * cluster, and its cluster will hold a reference to the cluster's volume.
1020 * A non-zero buf_type indicates that a new buffer should be created and
1024 hammer_get_buffer(hammer_cluster_t cluster
, int32_t buf_no
,
1025 u_int64_t buf_type
, int *errorp
)
1027 hammer_buffer_t buffer
;
1030 * Find the buffer. Note that buffer 0 corresponds to the cluster
1031 * header and should never be requested.
1033 KKASSERT(buf_no
>= cluster
->ondisk
->clu_start
/ HAMMER_BUFSIZE
&&
1034 buf_no
< cluster
->ondisk
->clu_limit
/ HAMMER_BUFSIZE
);
1037 * Locate and lock the buffer structure, creating one if necessary.
1040 buffer
= RB_LOOKUP(hammer_buf_rb_tree
, &cluster
->rb_bufs_root
, buf_no
);
1041 if (buffer
== NULL
) {
1042 ++hammer_count_buffers
;
1043 buffer
= kmalloc(sizeof(*buffer
), M_HAMMER
, M_WAITOK
|M_ZERO
);
1044 buffer
->buf_no
= buf_no
;
1045 buffer
->cluster
= cluster
;
1046 buffer
->volume
= cluster
->volume
;
1047 hammer_io_init(&buffer
->io
, HAMMER_STRUCTURE_BUFFER
);
1048 buffer
->io
.offset
= cluster
->io
.offset
+
1049 (buf_no
* HAMMER_BUFSIZE
);
1050 TAILQ_INIT(&buffer
->clist
);
1051 hammer_ref(&buffer
->io
.lock
);
1054 * Insert the cluster into the RB tree and handle late
1057 if (RB_INSERT(hammer_buf_rb_tree
, &cluster
->rb_bufs_root
, buffer
)) {
1058 hammer_unref(&buffer
->io
.lock
);
1059 --hammer_count_buffers
;
1060 kfree(buffer
, M_HAMMER
);
1063 hammer_ref(&cluster
->io
.lock
);
1065 hammer_ref(&buffer
->io
.lock
);
1069 * Deal with on-disk info
1071 if (buffer
->ondisk
== NULL
|| buf_type
) {
1072 *errorp
= hammer_load_buffer(buffer
, buf_type
);
1074 hammer_rel_buffer(buffer
, 1);
1084 hammer_load_buffer(hammer_buffer_t buffer
, u_int64_t buf_type
)
1086 hammer_volume_t volume
;
1087 hammer_fsbuf_ondisk_t ondisk
;
1091 * Load the buffer's on-disk info
1093 volume
= buffer
->volume
;
1094 hammer_lock_ex(&buffer
->io
.lock
);
1095 if (buffer
->ondisk
== NULL
) {
1097 error
= hammer_io_new(volume
->devvp
, &buffer
->io
);
1099 error
= hammer_io_read(volume
->devvp
, &buffer
->io
);
1102 hammer_unlock(&buffer
->io
.lock
);
1105 buffer
->ondisk
= ondisk
= (void *)buffer
->io
.bp
->b_data
;
1106 buffer
->alist
.config
= &Buf_alist_config
;
1107 buffer
->alist
.meta
= ondisk
->head
.buf_almeta
;
1108 buffer
->buf_type
= ondisk
->head
.buf_type
;
1109 } else if (buf_type
) {
1110 error
= hammer_io_new(volume
->devvp
, &buffer
->io
);
1114 if (error
== 0 && buf_type
) {
1115 hammer_modify_buffer(buffer
);
1116 ondisk
= buffer
->ondisk
;
1117 hammer_initbuffer(&buffer
->alist
, &ondisk
->head
, buf_type
);
1118 buffer
->buf_type
= ondisk
->head
.buf_type
;
1120 hammer_unlock(&buffer
->io
.lock
);
1125 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1128 hammer_unload_buffer(hammer_buffer_t buffer
, void *data __unused
)
1130 hammer_ref(&buffer
->io
.lock
);
1131 hammer_flush_buffer_nodes(buffer
);
1132 KKASSERT(buffer
->io
.lock
.refs
== 1);
1133 hammer_rel_buffer(buffer
, 2);
1138 * Reference a buffer that is either already referenced or via a specially
1139 * handled pointer (aka cursor->buffer).
1142 hammer_ref_buffer(hammer_buffer_t buffer
)
1146 hammer_ref(&buffer
->io
.lock
);
1147 if (buffer
->ondisk
== NULL
) {
1148 error
= hammer_load_buffer(buffer
, 0);
1150 hammer_rel_buffer(buffer
, 1);
1152 * NOTE: buffer pointer can become stale after
1153 * the above release.
1156 KKASSERT(buffer
->buf_type
==
1157 buffer
->ondisk
->head
.buf_type
);
1166 * Release a buffer. We have to deal with several places where
1167 * another thread can ref the buffer.
1169 * Only destroy the structure itself if the related buffer cache buffer
1170 * was disassociated from it. This ties the management of the structure
1171 * to the buffer cache subsystem. buffer->ondisk determines whether the
1172 * embedded io is referenced or not.
1175 hammer_rel_buffer(hammer_buffer_t buffer
, int flush
)
1177 hammer_cluster_t cluster
;
1179 if (buffer
->io
.lock
.refs
== 1) {
1180 hammer_lock_ex(&buffer
->io
.lock
);
1181 if (buffer
->io
.lock
.refs
== 1) {
1182 hammer_io_release(&buffer
->io
, flush
);
1184 if (buffer
->io
.bp
== NULL
&&
1185 buffer
->io
.lock
.refs
== 1) {
1186 hammer_flush_buffer_nodes(buffer
);
1187 KKASSERT(TAILQ_EMPTY(&buffer
->clist
));
1188 cluster
= buffer
->cluster
;
1189 RB_REMOVE(hammer_buf_rb_tree
,
1190 &cluster
->rb_bufs_root
, buffer
);
1191 buffer
->cluster
= NULL
; /* sanity */
1192 --hammer_count_buffers
;
1193 kfree(buffer
, M_HAMMER
);
1194 hammer_rel_cluster(cluster
, 0);
1198 hammer_io_flush(&buffer
->io
);
1200 hammer_unlock(&buffer
->io
.lock
);
1202 hammer_unref(&buffer
->io
.lock
);
1205 /************************************************************************
1207 ************************************************************************
1209 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1210 * method used by the HAMMER filesystem.
1212 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1213 * associated with its buffer, and will only referenced the buffer while
1214 * the node itself is referenced.
1216 * A hammer_node can also be passively associated with other HAMMER
1217 * structures, such as inodes, while retaining 0 references. These
1218 * associations can be cleared backwards using a pointer-to-pointer in
1221 * This allows the HAMMER implementation to cache hammer_nodes long-term
1222 * and short-cut a great deal of the infrastructure's complexity. In
1223 * most cases a cached node can be reacquired without having to dip into
1224 * either the buffer or cluster management code.
1226 * The caller must pass a referenced cluster on call and will retain
1227 * ownership of the reference on return. The node will acquire its own
1228 * additional references, if necessary.
1231 hammer_get_node(hammer_cluster_t cluster
, int32_t node_offset
, int *errorp
)
1236 * Locate the structure, allocating one if necessary.
1239 node
= RB_LOOKUP(hammer_nod_rb_tree
, &cluster
->rb_nods_root
,
1242 ++hammer_count_nodes
;
1243 node
= kmalloc(sizeof(*node
), M_HAMMER
, M_WAITOK
|M_ZERO
);
1244 node
->node_offset
= node_offset
;
1245 node
->cluster
= cluster
;
1246 if (RB_INSERT(hammer_nod_rb_tree
, &cluster
->rb_nods_root
,
1248 --hammer_count_nodes
;
1249 kfree(node
, M_HAMMER
);
1253 hammer_ref(&node
->lock
);
1254 *errorp
= hammer_load_node(node
);
1256 hammer_rel_node(node
);
1263 * Reference an already-referenced node.
1266 hammer_ref_node(hammer_node_t node
)
1270 KKASSERT(node
->lock
.refs
> 0);
1271 hammer_ref(&node
->lock
);
1272 if ((error
= hammer_load_node(node
)) != 0)
1273 hammer_rel_node(node
);
1278 * Load a node's on-disk data reference.
1281 hammer_load_node(hammer_node_t node
)
1283 hammer_buffer_t buffer
;
1290 hammer_lock_ex(&node
->lock
);
1291 if (node
->ondisk
== NULL
) {
1293 * This is a little confusing but the jist is that
1294 * node->buffer determines whether the node is on
1295 * the buffer's clist and node->ondisk determines
1296 * whether the buffer is referenced.
1298 if ((buffer
= node
->buffer
) != NULL
) {
1299 error
= hammer_ref_buffer(buffer
);
1301 buf_no
= node
->node_offset
/ HAMMER_BUFSIZE
;
1302 buffer
= hammer_get_buffer(node
->cluster
,
1305 KKASSERT(error
== 0);
1306 TAILQ_INSERT_TAIL(&buffer
->clist
,
1308 node
->buffer
= buffer
;
1312 node
->ondisk
= (void *)((char *)buffer
->ondisk
+
1313 (node
->node_offset
& HAMMER_BUFMASK
));
1316 hammer_unlock(&node
->lock
);
1321 * Safely reference a node, interlock against flushes via the IO subsystem.
1324 hammer_ref_node_safe(struct hammer_mount
*hmp
, struct hammer_node
**cache
,
1329 if ((node
= *cache
) != NULL
)
1330 hammer_ref(&node
->lock
);
1332 *errorp
= hammer_load_node(node
);
1334 hammer_rel_node(node
);
1344 * Release a hammer_node. On the last release the node dereferences
1345 * its underlying buffer and may or may not be destroyed.
1348 hammer_rel_node(hammer_node_t node
)
1350 hammer_cluster_t cluster
;
1351 hammer_buffer_t buffer
;
1352 int32_t node_offset
;
1356 * If this isn't the last ref just decrement the ref count and
1359 if (node
->lock
.refs
> 1) {
1360 hammer_unref(&node
->lock
);
1365 * If there is no ondisk info or no buffer the node failed to load,
1366 * remove the last reference and destroy the node.
1368 if (node
->ondisk
== NULL
) {
1369 hammer_unref(&node
->lock
);
1370 hammer_flush_node(node
);
1371 /* node is stale now */
1376 * Do final cleanups and then either destroy the node and leave it
1377 * passively cached. The buffer reference is removed regardless.
1379 buffer
= node
->buffer
;
1380 node
->ondisk
= NULL
;
1382 if ((node
->flags
& (HAMMER_NODE_DELETED
|HAMMER_NODE_FLUSH
)) == 0) {
1383 hammer_unref(&node
->lock
);
1384 hammer_rel_buffer(buffer
, 0);
1389 * Destroy the node. Record pertainant data because the node
1390 * becomes stale the instant we flush it.
1392 flags
= node
->flags
;
1393 node_offset
= node
->node_offset
;
1394 hammer_unref(&node
->lock
);
1395 hammer_flush_node(node
);
1398 cluster
= buffer
->cluster
;
1399 if (flags
& HAMMER_NODE_DELETED
) {
1400 hammer_free_btree(cluster
, node_offset
);
1401 if (node_offset
== cluster
->ondisk
->clu_btree_root
) {
1402 kprintf("FREE CLUSTER %d\n", cluster
->clu_no
);
1403 hammer_free_cluster(cluster
);
1404 /*hammer_io_undirty(&cluster->io);*/
1407 hammer_rel_buffer(buffer
, 0);
1411 * Passively cache a referenced hammer_node in *cache. The caller may
1412 * release the node on return.
1415 hammer_cache_node(hammer_node_t node
, struct hammer_node
**cache
)
1420 * If the node is being deleted, don't cache it!
1422 if (node
->flags
& HAMMER_NODE_DELETED
)
1426 * Cache the node. If we previously cached a different node we
1427 * have to give HAMMER a chance to destroy it.
1430 if (node
->cache1
!= cache
) {
1431 if (node
->cache2
!= cache
) {
1432 if ((old
= *cache
) != NULL
) {
1433 KKASSERT(node
->lock
.refs
!= 0);
1434 hammer_uncache_node(cache
);
1438 *node
->cache2
= NULL
;
1439 node
->cache2
= node
->cache1
;
1440 node
->cache1
= cache
;
1443 struct hammer_node
**tmp
;
1445 node
->cache1
= node
->cache2
;
1452 hammer_uncache_node(struct hammer_node
**cache
)
1456 if ((node
= *cache
) != NULL
) {
1458 if (node
->cache1
== cache
) {
1459 node
->cache1
= node
->cache2
;
1460 node
->cache2
= NULL
;
1461 } else if (node
->cache2
== cache
) {
1462 node
->cache2
= NULL
;
1464 panic("hammer_uncache_node: missing cache linkage");
1466 if (node
->cache1
== NULL
&& node
->cache2
== NULL
)
1467 hammer_flush_node(node
);
1472 * Remove a node's cache references and destroy the node if it has no
1473 * other references or backing store.
1476 hammer_flush_node(hammer_node_t node
)
1478 hammer_buffer_t buffer
;
1481 *node
->cache1
= NULL
;
1483 *node
->cache2
= NULL
;
1484 if (node
->lock
.refs
== 0 && node
->ondisk
== NULL
) {
1485 RB_REMOVE(hammer_nod_rb_tree
, &node
->cluster
->rb_nods_root
,
1487 if ((buffer
= node
->buffer
) != NULL
) {
1488 node
->buffer
= NULL
;
1489 TAILQ_REMOVE(&buffer
->clist
, node
, entry
);
1490 /* buffer is unreferenced because ondisk is NULL */
1492 --hammer_count_nodes
;
1493 kfree(node
, M_HAMMER
);
1498 * Flush passively cached B-Tree nodes associated with this buffer.
1499 * This is only called when the buffer is about to be destroyed, so
1500 * none of the nodes should have any references.
1503 hammer_flush_buffer_nodes(hammer_buffer_t buffer
)
1507 while ((node
= TAILQ_FIRST(&buffer
->clist
)) != NULL
) {
1508 KKASSERT(node
->lock
.refs
== 0 && node
->ondisk
== NULL
);
1509 hammer_ref(&node
->lock
);
1510 node
->flags
|= HAMMER_NODE_FLUSH
;
1511 hammer_rel_node(node
);
1515 /************************************************************************
1516 * A-LIST ALLOCATORS *
1517 ************************************************************************/
1520 * Allocate HAMMER clusters
1523 hammer_alloc_cluster(hammer_mount_t hmp
, hammer_cluster_t cluster_hint
,
1526 hammer_volume_t volume
;
1527 hammer_cluster_t cluster
;
1534 * Figure out our starting volume and hint.
1537 vol_beg
= cluster_hint
->volume
->vol_no
;
1538 clu_hint
= cluster_hint
->clu_no
;
1540 vol_beg
= hmp
->volume_iterator
;
1545 * Loop through volumes looking for a free cluster. If allocating
1546 * a new cluster relative to an existing cluster try to find a free
1547 * cluster on either side (clu_hint >= 0), otherwise just do a
1548 * forwards iteration.
1552 volume
= hammer_get_volume(hmp
, vol_no
, errorp
);
1553 kprintf("VOLUME %p %d\n", volume
, vol_no
);
1555 clu_no
= HAMMER_ALIST_BLOCK_NONE
;
1558 hammer_modify_volume(volume
);
1559 if (clu_hint
== -1) {
1560 clu_hint
= volume
->clu_iterator
;
1561 clu_no
= hammer_alist_alloc_fwd(&volume
->alist
, 1,
1563 if (clu_no
== HAMMER_ALIST_BLOCK_NONE
) {
1564 clu_no
= hammer_alist_alloc_fwd(&volume
->alist
,
1568 clu_no
= hammer_alist_alloc_fwd(&volume
->alist
, 1,
1570 if (clu_no
== HAMMER_ALIST_BLOCK_NONE
) {
1571 clu_no
= hammer_alist_alloc_rev(&volume
->alist
,
1575 if (clu_no
!= HAMMER_ALIST_BLOCK_NONE
)
1577 hammer_rel_volume(volume
, 0);
1580 vol_no
= (vol_no
+ 1) % hmp
->nvolumes
;
1582 } while (vol_no
!= vol_beg
);
1585 * Acquire the cluster. On success this will force *errorp to 0.
1587 if (clu_no
!= HAMMER_ALIST_BLOCK_NONE
) {
1588 kprintf("ALLOC CLUSTER %d\n", clu_no
);
1589 cluster
= hammer_get_cluster(volume
, clu_no
, errorp
,
1590 HAMMER_ASTATE_FREE
);
1591 volume
->clu_iterator
= clu_no
;
1592 hammer_rel_volume(volume
, 0);
1597 hammer_lock_ex(&cluster
->io
.lock
);
1602 hammer_init_cluster(hammer_cluster_t cluster
, hammer_base_elm_t left_bound
,
1603 hammer_base_elm_t right_bound
)
1605 hammer_cluster_ondisk_t ondisk
= cluster
->ondisk
;
1607 hammer_modify_cluster(cluster
);
1608 ondisk
->clu_btree_beg
= *left_bound
;
1609 ondisk
->clu_btree_end
= *right_bound
;
1610 cluster
->clu_btree_beg
= ondisk
->clu_btree_beg
;
1611 cluster
->clu_btree_end
= ondisk
->clu_btree_end
;
1615 * Deallocate a cluster
1618 hammer_free_cluster(hammer_cluster_t cluster
)
1620 hammer_modify_cluster(cluster
);
1621 hammer_alist_free(&cluster
->volume
->alist
, cluster
->clu_no
, 1);
1625 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1627 * The passed *bufferp should be initialized to NULL. On successive calls
1628 * *bufferp caches the most recent buffer used until put away by the caller.
1629 * Note that previously returned pointers using the cached buffer become
1630 * invalid on successive calls which reuse *bufferp.
1632 * All allocations first attempt to use the block found at the specified
1633 * iterator. If that fails the first available block is used. If that
1634 * fails a new buffer is allocated and associated with the buffer type
1635 * A-list and the element is allocated out of the new buffer.
1639 hammer_alloc_btree(hammer_cluster_t cluster
, int *errorp
)
1641 hammer_buffer_t buffer
;
1642 hammer_alist_t live
;
1646 int32_t node_offset
;
1649 * Allocate a B-Tree element
1651 hammer_modify_cluster(cluster
);
1653 live
= &cluster
->alist_btree
;
1654 elm_no
= hammer_alist_alloc_fwd(live
, 1, cluster
->ondisk
->idx_index
);
1655 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
)
1656 elm_no
= hammer_alist_alloc_fwd(live
, 1, 0);
1657 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
) {
1658 alloc_new_buffer(cluster
, HAMMER_FSBUF_BTREE
, live
,
1659 cluster
->ondisk
->idx_index
, errorp
, &buffer
);
1660 elm_no
= hammer_alist_alloc(live
, 1);
1661 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
) {
1664 hammer_rel_buffer(buffer
, 0);
1668 cluster
->ondisk
->idx_index
= elm_no
;
1669 KKASSERT((elm_no
& HAMMER_FSBUF_BLKMASK
) < HAMMER_BTREE_NODES
);
1672 * Load and return the B-Tree element
1674 buf_no
= elm_no
/ HAMMER_FSBUF_MAXBLKS
;
1675 node_offset
= buf_no
* HAMMER_BUFSIZE
+
1676 offsetof(union hammer_fsbuf_ondisk
,
1677 btree
.nodes
[elm_no
& HAMMER_FSBUF_BLKMASK
]);
1678 node
= hammer_get_node(cluster
, node_offset
, errorp
);
1680 hammer_modify_node(node
);
1681 bzero(node
->ondisk
, sizeof(*node
->ondisk
));
1683 hammer_alist_free(live
, elm_no
, 1);
1684 hammer_rel_node(node
);
1688 hammer_rel_buffer(buffer
, 0);
1693 hammer_alloc_data(hammer_cluster_t cluster
, int32_t bytes
,
1694 int *errorp
, struct hammer_buffer
**bufferp
)
1696 hammer_buffer_t buffer
;
1697 hammer_alist_t live
;
1704 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1705 * for these allocations.
1707 hammer_modify_cluster(cluster
);
1708 if ((bytes
& HAMMER_BUFMASK
) == 0) {
1709 nblks
= bytes
/ HAMMER_BUFSIZE
;
1710 /* only one block allowed for now (so buffer can hold it) */
1711 KKASSERT(nblks
== 1);
1713 buf_no
= hammer_alloc_master(cluster
, nblks
,
1714 cluster
->ondisk
->idx_ldata
, 1);
1715 if (buf_no
== HAMMER_ALIST_BLOCK_NONE
) {
1719 hammer_adjust_stats(cluster
, HAMMER_FSBUF_DATA
, nblks
);
1720 cluster
->ondisk
->idx_ldata
= buf_no
;
1722 *bufferp
= hammer_get_buffer(cluster
, buf_no
, -1, errorp
);
1724 hammer_rel_buffer(buffer
, 0);
1726 return(buffer
->ondisk
);
1730 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1731 * (64 bytes) for these allocations.
1733 nblks
= (bytes
+ HAMMER_DATA_BLKMASK
) & ~HAMMER_DATA_BLKMASK
;
1734 nblks
/= HAMMER_DATA_BLKSIZE
;
1735 live
= &cluster
->alist_mdata
;
1736 elm_no
= hammer_alist_alloc_fwd(live
, nblks
, cluster
->ondisk
->idx_data
);
1737 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
)
1738 elm_no
= hammer_alist_alloc_fwd(live
, nblks
, 0);
1739 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
) {
1740 alloc_new_buffer(cluster
, HAMMER_FSBUF_DATA
, live
,
1741 cluster
->ondisk
->idx_data
, errorp
, bufferp
);
1742 elm_no
= hammer_alist_alloc(live
, nblks
);
1743 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
) {
1748 cluster
->ondisk
->idx_index
= elm_no
;
1751 * Load and return the B-Tree element
1753 buf_no
= elm_no
/ HAMMER_FSBUF_MAXBLKS
;
1755 if (buffer
== NULL
|| buffer
->cluster
!= cluster
||
1756 buffer
->buf_no
!= buf_no
) {
1758 hammer_rel_buffer(buffer
, 0);
1759 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, errorp
);
1762 KKASSERT(buffer
->ondisk
->head
.buf_type
== HAMMER_FSBUF_DATA
);
1763 KKASSERT((elm_no
& HAMMER_FSBUF_BLKMASK
) < HAMMER_DATA_NODES
);
1764 hammer_modify_buffer(buffer
);
1765 item
= &buffer
->ondisk
->data
.data
[elm_no
& HAMMER_FSBUF_BLKMASK
];
1766 bzero(item
, nblks
* HAMMER_DATA_BLKSIZE
);
1772 hammer_alloc_record(hammer_cluster_t cluster
,
1773 int *errorp
, struct hammer_buffer
**bufferp
)
1775 hammer_buffer_t buffer
;
1776 hammer_alist_t live
;
1782 * Allocate a record element
1784 hammer_modify_cluster(cluster
);
1785 live
= &cluster
->alist_record
;
1786 elm_no
= hammer_alist_alloc_rev(live
, 1, cluster
->ondisk
->idx_record
);
1787 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
)
1788 elm_no
= hammer_alist_alloc_rev(live
, 1,HAMMER_ALIST_BLOCK_MAX
);
1789 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
) {
1790 alloc_new_buffer(cluster
, HAMMER_FSBUF_RECORDS
, live
,
1791 cluster
->ondisk
->idx_record
, errorp
, bufferp
);
1792 elm_no
= hammer_alist_alloc_rev(live
, 1,HAMMER_ALIST_BLOCK_MAX
);
1793 kprintf("hammer_alloc_record elm again %08x\n", elm_no
);
1794 if (elm_no
== HAMMER_ALIST_BLOCK_NONE
) {
1799 cluster
->ondisk
->idx_record
= elm_no
;
1802 * Load and return the record element
1804 buf_no
= elm_no
/ HAMMER_FSBUF_MAXBLKS
;
1806 if (buffer
== NULL
|| buffer
->cluster
!= cluster
||
1807 buffer
->buf_no
!= buf_no
) {
1809 hammer_rel_buffer(buffer
, 0);
1810 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, errorp
);
1813 KKASSERT(buffer
->ondisk
->head
.buf_type
== HAMMER_FSBUF_RECORDS
);
1814 KKASSERT((elm_no
& HAMMER_FSBUF_BLKMASK
) < HAMMER_RECORD_NODES
);
1815 hammer_modify_buffer(buffer
);
1816 item
= &buffer
->ondisk
->record
.recs
[elm_no
& HAMMER_FSBUF_BLKMASK
];
1817 bzero(item
, sizeof(union hammer_record_ondisk
));
1823 hammer_free_data_ptr(hammer_buffer_t buffer
, void *data
, int bytes
)
1827 hammer_alist_t live
;
1829 hammer_modify_cluster(buffer
->cluster
);
1830 if ((bytes
& HAMMER_BUFMASK
) == 0) {
1831 nblks
= bytes
/ HAMMER_BUFSIZE
;
1832 KKASSERT(nblks
== 1 && data
== (void *)buffer
->ondisk
);
1833 hammer_alist_free(&buffer
->cluster
->alist_master
,
1834 buffer
->buf_no
, nblks
);
1835 hammer_adjust_stats(buffer
->cluster
, HAMMER_FSBUF_DATA
, -nblks
);
1839 elm_no
= ((char *)data
- (char *)buffer
->ondisk
->data
.data
) /
1840 HAMMER_DATA_BLKSIZE
;
1841 KKASSERT(elm_no
>= 0 && elm_no
< HAMMER_DATA_NODES
);
1842 elm_no
+= buffer
->buf_no
* HAMMER_FSBUF_MAXBLKS
;
1843 nblks
= (bytes
+ HAMMER_DATA_BLKMASK
) & ~HAMMER_DATA_BLKMASK
;
1844 nblks
/= HAMMER_DATA_BLKSIZE
;
1845 live
= &buffer
->cluster
->alist_mdata
;
1846 hammer_alist_free(live
, elm_no
, nblks
);
1850 hammer_free_record_ptr(hammer_buffer_t buffer
, union hammer_record_ondisk
*rec
)
1853 hammer_alist_t live
;
1855 hammer_modify_cluster(buffer
->cluster
);
1856 elm_no
= rec
- &buffer
->ondisk
->record
.recs
[0];
1857 KKASSERT(elm_no
>= 0 && elm_no
< HAMMER_BTREE_NODES
);
1858 elm_no
+= buffer
->buf_no
* HAMMER_FSBUF_MAXBLKS
;
1859 live
= &buffer
->cluster
->alist_record
;
1860 hammer_alist_free(live
, elm_no
, 1);
1864 hammer_free_btree(hammer_cluster_t cluster
, int32_t bclu_offset
)
1866 const int32_t blksize
= sizeof(struct hammer_node_ondisk
);
1867 int32_t fsbuf_offset
= bclu_offset
& HAMMER_BUFMASK
;
1868 hammer_alist_t live
;
1871 hammer_modify_cluster(cluster
);
1872 elm_no
= bclu_offset
/ HAMMER_BUFSIZE
* HAMMER_FSBUF_MAXBLKS
;
1873 fsbuf_offset
-= offsetof(union hammer_fsbuf_ondisk
, btree
.nodes
[0]);
1874 live
= &cluster
->alist_btree
;
1875 KKASSERT(fsbuf_offset
>= 0 && fsbuf_offset
% blksize
== 0);
1876 elm_no
+= fsbuf_offset
/ blksize
;
1877 hammer_alist_free(live
, elm_no
, 1);
1881 hammer_free_data(hammer_cluster_t cluster
, int32_t bclu_offset
, int32_t bytes
)
1883 const int32_t blksize
= HAMMER_DATA_BLKSIZE
;
1884 int32_t fsbuf_offset
= bclu_offset
& HAMMER_BUFMASK
;
1885 hammer_alist_t live
;
1890 hammer_modify_cluster(cluster
);
1891 if ((bytes
& HAMMER_BUFMASK
) == 0) {
1892 nblks
= bytes
/ HAMMER_BUFSIZE
;
1893 KKASSERT(nblks
== 1 && (bclu_offset
& HAMMER_BUFMASK
) == 0);
1894 buf_no
= bclu_offset
/ HAMMER_BUFSIZE
;
1895 hammer_alist_free(&cluster
->alist_master
, buf_no
, nblks
);
1896 hammer_adjust_stats(cluster
, HAMMER_FSBUF_DATA
, -nblks
);
1900 elm_no
= bclu_offset
/ HAMMER_BUFSIZE
* HAMMER_FSBUF_MAXBLKS
;
1901 fsbuf_offset
-= offsetof(union hammer_fsbuf_ondisk
, data
.data
[0][0]);
1902 live
= &cluster
->alist_mdata
;
1903 nblks
= (bytes
+ HAMMER_DATA_BLKMASK
) & ~HAMMER_DATA_BLKMASK
;
1904 nblks
/= HAMMER_DATA_BLKSIZE
;
1905 KKASSERT(fsbuf_offset
>= 0 && fsbuf_offset
% blksize
== 0);
1906 elm_no
+= fsbuf_offset
/ blksize
;
1907 hammer_alist_free(live
, elm_no
, nblks
);
1911 hammer_free_record(hammer_cluster_t cluster
, int32_t bclu_offset
)
1913 const int32_t blksize
= sizeof(union hammer_record_ondisk
);
1914 int32_t fsbuf_offset
= bclu_offset
& HAMMER_BUFMASK
;
1915 hammer_alist_t live
;
1918 hammer_modify_cluster(cluster
);
1919 elm_no
= bclu_offset
/ HAMMER_BUFSIZE
* HAMMER_FSBUF_MAXBLKS
;
1920 fsbuf_offset
-= offsetof(union hammer_fsbuf_ondisk
, record
.recs
[0]);
1921 live
= &cluster
->alist_record
;
1922 KKASSERT(fsbuf_offset
>= 0 && fsbuf_offset
% blksize
== 0);
1923 elm_no
+= fsbuf_offset
/ blksize
;
1924 hammer_alist_free(live
, elm_no
, 1);
1929 * Allocate a new filesystem buffer and assign it to the specified
1930 * filesystem buffer type. The new buffer will be added to the
1931 * type-specific A-list and initialized.
1933 * buffers used for records will also be added to the clu_record_buf_bitmap.
1936 alloc_new_buffer(hammer_cluster_t cluster
, u_int64_t type
, hammer_alist_t live
,
1937 int start
, int *errorp
, struct hammer_buffer
**bufferp
)
1939 hammer_buffer_t buffer
;
1944 hammer_rel_buffer(*bufferp
, 0);
1947 start
= start
/ HAMMER_FSBUF_MAXBLKS
; /* convert to buf_no */
1948 isfwd
= (type
!= HAMMER_FSBUF_RECORDS
);
1949 buf_no
= hammer_alloc_master(cluster
, 1, start
, isfwd
);
1950 if (buf_no
== HAMMER_ALIST_BLOCK_NONE
) {
1956 * The new buffer must be initialized (type != 0) regardless of
1957 * whether we already have it cached or not, so don't try to
1958 * optimize the cached buffer check. Just call hammer_get_buffer().
1960 buffer
= hammer_get_buffer(cluster
, buf_no
, type
, errorp
);
1964 * Do a meta-free of the buffer's elements into the type-specific
1965 * A-list and update our statistics to reflect the allocation.
1969 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1970 buf_no
, type
, nelements
);
1972 hammer_modify_buffer(buffer
); /*XXX*/
1973 hammer_adjust_stats(cluster
, type
, 1);
1976 * Free the buffer to the appropriate slave list so the
1977 * cluster-based allocator sees it.
1979 hammer_alist_free(live
, buf_no
* HAMMER_FSBUF_MAXBLKS
,
1980 HAMMER_FSBUF_MAXBLKS
);
1985 * And, finally, update clu_record_buf_bitmap for record buffers.
1986 * Since buffers are synced to disk before their associated cluster
1987 * header, a recovery operation will only see synced record buffers
1988 * in the bitmap. XXX We can't use alist_record for recovery due
1989 * to the way we currently manage it.
1991 if (buffer
&& type
== HAMMER_FSBUF_RECORDS
) {
1992 KKASSERT(buf_no
>= 0 && buf_no
< HAMMER_CLU_MAXBUFFERS
);
1993 hammer_modify_cluster(cluster
);
1994 cluster
->ondisk
->clu_record_buf_bitmap
[buf_no
>> 5] |=
1995 (1 << (buf_no
& 31));
2000 * Sync dirty buffers to the media
2003 static int hammer_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
);
2004 static int hammer_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
);
2007 hammer_sync_hmp(hammer_mount_t hmp
, int waitfor
)
2009 struct hammer_sync_info info
;
2012 info
.waitfor
= waitfor
;
2014 kprintf("hammer_sync\n");
2015 vmntvnodescan(hmp
->mp
, VMSC_GETVP
|VMSC_NOWAIT
,
2016 hammer_sync_scan1
, hammer_sync_scan2
, &info
);
2018 RB_SCAN(hammer_vol_rb_tree
, &hmp
->rb_vols_root
, NULL
,
2019 hammer_sync_volume
, &info
);
2024 hammer_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
)
2026 struct hammer_inode
*ip
;
2029 if (vp
->v_type
== VNON
|| ((ip
->flags
& HAMMER_INODE_MODMASK
) == 0 &&
2030 RB_EMPTY(&vp
->v_rbdirty_tree
))) {
2037 hammer_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
)
2039 struct hammer_sync_info
*info
= data
;
2040 struct hammer_inode
*ip
;
2044 if (vp
->v_type
== VNON
|| vp
->v_type
== VBAD
||
2045 ((ip
->flags
& HAMMER_INODE_MODMASK
) == 0 &&
2046 RB_EMPTY(&vp
->v_rbdirty_tree
))) {
2049 if (vp
->v_type
!= VCHR
) {
2050 error
= VOP_FSYNC(vp
, info
->waitfor
);
2052 info
->error
= error
;
2058 hammer_sync_volume(hammer_volume_t volume
, void *data
)
2060 struct hammer_sync_info
*info
= data
;
2062 RB_SCAN(hammer_clu_rb_tree
, &volume
->rb_clus_root
, NULL
,
2063 hammer_sync_cluster
, info
);
2064 if (hammer_ref_volume(volume
) == 0)
2065 hammer_rel_volume(volume
, 1);
2070 hammer_sync_cluster(hammer_cluster_t cluster
, void *data
)
2072 struct hammer_sync_info
*info
= data
;
2074 RB_SCAN(hammer_buf_rb_tree
, &cluster
->rb_bufs_root
, NULL
,
2075 hammer_sync_buffer
, info
);
2076 /*hammer_io_waitdep(&cluster->io);*/
2077 if (hammer_ref_cluster(cluster
) == 0)
2078 hammer_rel_cluster(cluster
, 1);
2083 hammer_sync_buffer(hammer_buffer_t buffer
, void *data __unused
)
2085 if (hammer_ref_buffer(buffer
) == 0) {
2086 hammer_lock_ex(&buffer
->io
.lock
);
2087 hammer_flush_buffer_nodes(buffer
);
2088 hammer_unlock(&buffer
->io
.lock
);
2089 hammer_rel_buffer(buffer
, 1);
2095 * Generic buffer initialization
2098 hammer_initbuffer(hammer_alist_t live
, hammer_fsbuf_head_t head
, u_int64_t type
)
2100 head
->buf_type
= type
;
2103 case HAMMER_FSBUF_BTREE
:
2104 hammer_alist_init(live
, 0, HAMMER_BTREE_NODES
, HAMMER_ASTATE_FREE
);
2106 case HAMMER_FSBUF_DATA
:
2107 hammer_alist_init(live
, 0, HAMMER_DATA_NODES
, HAMMER_ASTATE_FREE
);
2109 case HAMMER_FSBUF_RECORDS
:
2110 hammer_alist_init(live
, 0, HAMMER_RECORD_NODES
, HAMMER_ASTATE_FREE
);
2113 hammer_alist_init(live
, 0, 0, HAMMER_ASTATE_ALLOC
);
2119 * Calculate the cluster's offset in the volume. This calculation is
2120 * slightly more complex when using superclusters because superclusters
2121 * are grouped in blocks of 16, followed by 16 x N clusters where N
2122 * is the number of clusters a supercluster can manage.
2125 calculate_cluster_offset(hammer_volume_t volume
, int32_t clu_no
)
2128 int64_t scl_group_size
;
2131 if (volume
->vol_flags
& HAMMER_VOLF_USINGSUPERCL
) {
2132 scl_group
= clu_no
/ HAMMER_VOL_SUPERCLUSTER_GROUP
/
2133 HAMMER_SCL_MAXCLUSTERS
;
2135 ((int64_t)HAMMER_BUFSIZE
*
2136 HAMMER_VOL_SUPERCLUSTER_GROUP
) +
2137 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP
*
2138 volume
->vol_clsize
* HAMMER_SCL_MAXCLUSTERS
);
2140 HAMMER_VOL_SUPERCLUSTER_GROUP
* HAMMER_BUFSIZE
;
2142 off
= volume
->cluster_base
+
2143 scl_group
* scl_group_size
+
2144 (HAMMER_BUFSIZE
* HAMMER_VOL_SUPERCLUSTER_GROUP
) +
2145 ((int64_t)clu_no
% ((int64_t)HAMMER_SCL_MAXCLUSTERS
*
2146 HAMMER_VOL_SUPERCLUSTER_GROUP
))
2147 * volume
->vol_clsize
;
2149 off
= volume
->cluster_base
+
2150 (int64_t)clu_no
* volume
->vol_clsize
;
2156 * Calculate a super-cluster's offset in the volume.
2159 calculate_supercl_offset(hammer_volume_t volume
, int32_t scl_no
)
2163 int64_t scl_group_size
;
2165 KKASSERT (volume
->vol_flags
& HAMMER_VOLF_USINGSUPERCL
);
2166 scl_group
= scl_no
/ HAMMER_VOL_SUPERCLUSTER_GROUP
;
2169 ((int64_t)HAMMER_BUFSIZE
*
2170 HAMMER_VOL_SUPERCLUSTER_GROUP
) +
2171 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP
*
2172 volume
->vol_clsize
* HAMMER_SCL_MAXCLUSTERS
);
2174 HAMMER_VOL_SUPERCLUSTER_GROUP
* HAMMER_BUFSIZE
;
2175 off
= volume
->cluster_base
+ (scl_group
* scl_group_size
) +
2176 (scl_no
% HAMMER_VOL_SUPERCLUSTER_GROUP
) * HAMMER_BUFSIZE
;
2178 off
= volume
->cluster_base
+ (scl_no
* HAMMER_BUFSIZE
);
2184 * Allocate nblks buffers from the cluster's master alist.
2187 hammer_alloc_master(hammer_cluster_t cluster
, int nblks
,
2188 int32_t start
, int isfwd
)
2192 hammer_modify_cluster(cluster
);
2194 buf_no
= hammer_alist_alloc_fwd(&cluster
->alist_master
,
2196 if (buf_no
== HAMMER_ALIST_BLOCK_NONE
) {
2197 buf_no
= hammer_alist_alloc_fwd(&cluster
->alist_master
,
2201 buf_no
= hammer_alist_alloc_rev(&cluster
->alist_master
,
2203 if (buf_no
== HAMMER_ALIST_BLOCK_NONE
) {
2204 buf_no
= hammer_alist_alloc_rev(&cluster
->alist_master
,
2205 nblks
, HAMMER_ALIST_BLOCK_MAX
);
2210 * Recover space from empty record, b-tree, and data a-lists.
2217 * Adjust allocation statistics
2220 hammer_adjust_stats(hammer_cluster_t cluster
, u_int64_t buf_type
, int nblks
)
2222 hammer_modify_cluster(cluster
);
2223 hammer_modify_volume(cluster
->volume
);
2224 hammer_modify_volume(cluster
->volume
->hmp
->rootvol
);
2227 case HAMMER_FSBUF_BTREE
:
2228 cluster
->ondisk
->stat_idx_bufs
+= nblks
;
2229 cluster
->volume
->ondisk
->vol_stat_idx_bufs
+= nblks
;
2230 cluster
->volume
->hmp
->rootvol
->ondisk
->vol0_stat_idx_bufs
+= nblks
;
2232 case HAMMER_FSBUF_DATA
:
2233 cluster
->ondisk
->stat_data_bufs
+= nblks
;
2234 cluster
->volume
->ondisk
->vol_stat_data_bufs
+= nblks
;
2235 cluster
->volume
->hmp
->rootvol
->ondisk
->vol0_stat_data_bufs
+= nblks
;
2237 case HAMMER_FSBUF_RECORDS
:
2238 cluster
->ondisk
->stat_rec_bufs
+= nblks
;
2239 cluster
->volume
->ondisk
->vol_stat_rec_bufs
+= nblks
;
2240 cluster
->volume
->hmp
->rootvol
->ondisk
->vol0_stat_rec_bufs
+= nblks
;
2248 * Setup the parameters for the various A-lists we use in hammer. The
2249 * supercluster A-list must be chained to the cluster A-list and cluster
2250 * slave A-lists are chained to buffer A-lists.
2252 * See hammer_init_alist_config() below.
2256 * A-LIST - cluster recursion into a filesystem buffer
2258 * In the init case the buffer has already been initialized by
2259 * alloc_new_buffer() when it allocated the buffer out of the master
2260 * alist and marked it as free in the slave alist.
2262 * Because we use a somewhat odd mechanism to assign buffers to slave
2263 * pools we can't actually free the buffer back to the master alist in
2264 * buffer_alist_destroy(), but instead must deal with that logic somewhere
2268 buffer_alist_init(void *info
, int32_t blk
, int32_t radix
,
2269 hammer_alloc_state_t state
)
2275 buffer_alist_recover(void *info
, int32_t blk
, int32_t radix
, int32_t count
)
2277 hammer_cluster_t cluster
= info
;
2278 hammer_buffer_t buffer
;
2282 buf_no
= blk
/ HAMMER_FSBUF_MAXBLKS
;
2283 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, &error
);
2285 hammer_modify_buffer(buffer
);
2286 error
= hammer_alist_recover(&buffer
->alist
, blk
, 0, count
);
2287 /* free block count is returned if >= 0 */
2288 hammer_rel_buffer(buffer
, 0);
2296 * Note: This routine is only called when freeing the last elements of
2297 * an initialized buffer. Freeing all elements of the buffer when the
2298 * buffer was not previously initialized does not call this routine.
2301 buffer_alist_destroy(void *info
, int32_t blk
, int32_t radix
)
2303 hammer_cluster_t cluster
= info
;
2306 buf_no
= blk
/ HAMMER_FSBUF_MAXBLKS
;
2307 kprintf("destroy buffer %d:%d:%d\n", cluster
->volume
->vol_no
, cluster
->clu_no
, buf_no
);
2312 * Note: atblk can be negative and atblk - blk can go negative.
2315 buffer_alist_alloc_fwd(void *info
, int32_t blk
, int32_t radix
,
2316 int32_t count
, int32_t atblk
, int32_t *fullp
)
2318 hammer_cluster_t cluster
= info
;
2319 hammer_buffer_t buffer
;
2324 buf_no
= blk
/ HAMMER_FSBUF_MAXBLKS
;
2325 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, &error
);
2327 KKASSERT(buffer
->ondisk
->head
.buf_type
!= 0);
2329 hammer_modify_buffer(buffer
);
2330 r
= hammer_alist_alloc_fwd(&buffer
->alist
, count
, atblk
- blk
);
2331 if (r
!= HAMMER_ALIST_BLOCK_NONE
)
2333 *fullp
= hammer_alist_isfull(&buffer
->alist
);
2334 hammer_rel_buffer(buffer
, 0);
2336 r
= HAMMER_ALIST_BLOCK_NONE
;
2342 buffer_alist_alloc_rev(void *info
, int32_t blk
, int32_t radix
,
2343 int32_t count
, int32_t atblk
, int32_t *fullp
)
2345 hammer_cluster_t cluster
= info
;
2346 hammer_buffer_t buffer
;
2351 buf_no
= blk
/ HAMMER_FSBUF_MAXBLKS
;
2352 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, &error
);
2354 KKASSERT(buffer
->ondisk
->head
.buf_type
!= 0);
2355 hammer_modify_buffer(buffer
);
2356 r
= hammer_alist_alloc_rev(&buffer
->alist
, count
, atblk
- blk
);
2357 if (r
!= HAMMER_ALIST_BLOCK_NONE
)
2359 *fullp
= hammer_alist_isfull(&buffer
->alist
);
2360 hammer_rel_buffer(buffer
, 0);
2362 r
= HAMMER_ALIST_BLOCK_NONE
;
2369 buffer_alist_free(void *info
, int32_t blk
, int32_t radix
,
2370 int32_t base_blk
, int32_t count
, int32_t *emptyp
)
2372 hammer_cluster_t cluster
= info
;
2373 hammer_buffer_t buffer
;
2377 buf_no
= blk
/ HAMMER_FSBUF_MAXBLKS
;
2378 buffer
= hammer_get_buffer(cluster
, buf_no
, 0, &error
);
2380 KKASSERT(buffer
->ondisk
->head
.buf_type
!= 0);
2381 hammer_modify_buffer(buffer
);
2382 hammer_alist_free(&buffer
->alist
, base_blk
, count
);
2383 *emptyp
= hammer_alist_isempty(&buffer
->alist
);
2384 /* XXX don't bother updating the buffer is completely empty? */
2385 hammer_rel_buffer(buffer
, 0);
2392 buffer_alist_print(void *info
, int32_t blk
, int32_t radix
, int tab
)
2397 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2398 * into a filesystem buffer. A-List's are mostly self-contained entities,
2399 * but callbacks must be installed to recurse from one A-List to another.
2401 * Implementing these callbacks allows us to operate a multi-layered A-List
2402 * as a single entity.
2406 * This occurs when allocating a cluster via the volume a-list and the
2407 * entry in the volume a-list indicated all-free. The underlying supercl
2408 * has not yet been initialized.
2411 super_alist_init(void *info
, int32_t blk
, int32_t radix
,
2412 hammer_alloc_state_t state
)
2414 hammer_volume_t volume
= info
;
2415 hammer_supercl_t supercl
;
2420 * Calculate the super-cluster number containing the cluster (blk)
2421 * and obtain the super-cluster buffer.
2423 scl_no
= blk
/ HAMMER_SCL_MAXCLUSTERS
;
2424 supercl
= hammer_get_supercl(volume
, scl_no
, &error
, state
);
2426 hammer_rel_supercl(supercl
, 0);
2431 super_alist_recover(void *info
, int32_t blk
, int32_t radix
, int32_t count
)
2433 hammer_volume_t volume
= info
;
2434 hammer_supercl_t supercl
;
2439 * Calculate the super-cluster number containing the cluster (blk)
2440 * and obtain the super-cluster buffer.
2442 scl_no
= blk
/ HAMMER_SCL_MAXCLUSTERS
;
2443 supercl
= hammer_get_supercl(volume
, scl_no
, &error
,
2444 HAMMER_ASTATE_NONE
);
2446 hammer_modify_supercl(supercl
);
2447 error
= hammer_alist_recover(&supercl
->alist
, blk
, 0, count
);
2448 /* free block count is returned if >= 0 */
2449 hammer_rel_supercl(supercl
, 0);
2457 * This occurs when freeing a cluster via the volume a-list and the
2458 * supercl is now 100% free. We can destroy the supercl.
2460 * What we actually do is just unset the modify bit so it doesn't get
2464 super_alist_destroy(void *info
, int32_t blk
, int32_t radix
)
2466 hammer_volume_t volume
= info
;
2467 hammer_supercl_t supercl
;
2472 * Calculate the super-cluster number containing the cluster (blk)
2473 * and obtain the super-cluster buffer.
2475 scl_no
= blk
/ HAMMER_SCL_MAXCLUSTERS
;
2476 if (hammer_find_supercl(volume
, scl_no
)) {
2477 supercl
= hammer_get_supercl(volume
, scl_no
, &error
,
2478 HAMMER_ASTATE_FREE
);
2480 hammer_io_clear_modify(&supercl
->io
);
2482 hammer_rel_supercl(supercl
, 0);
2488 super_alist_alloc_fwd(void *info
, int32_t blk
, int32_t radix
,
2489 int32_t count
, int32_t atblk
, int32_t *fullp
)
2491 hammer_volume_t volume
= info
;
2492 hammer_supercl_t supercl
;
2497 scl_no
= blk
/ HAMMER_SCL_MAXCLUSTERS
;
2498 supercl
= hammer_get_supercl(volume
, scl_no
, &error
, 0);
2500 hammer_modify_supercl(supercl
);
2501 r
= hammer_alist_alloc_fwd(&supercl
->alist
, count
, atblk
- blk
);
2502 if (r
!= HAMMER_ALIST_BLOCK_NONE
)
2504 *fullp
= hammer_alist_isfull(&supercl
->alist
);
2505 hammer_rel_supercl(supercl
, 0);
2507 r
= HAMMER_ALIST_BLOCK_NONE
;
2514 super_alist_alloc_rev(void *info
, int32_t blk
, int32_t radix
,
2515 int32_t count
, int32_t atblk
, int32_t *fullp
)
2517 hammer_volume_t volume
= info
;
2518 hammer_supercl_t supercl
;
2523 scl_no
= blk
/ HAMMER_SCL_MAXCLUSTERS
;
2524 supercl
= hammer_get_supercl(volume
, scl_no
, &error
, 0);
2526 hammer_modify_supercl(supercl
);
2527 r
= hammer_alist_alloc_rev(&supercl
->alist
, count
, atblk
- blk
);
2528 if (r
!= HAMMER_ALIST_BLOCK_NONE
)
2530 *fullp
= hammer_alist_isfull(&supercl
->alist
);
2531 hammer_rel_supercl(supercl
, 0);
2533 r
= HAMMER_ALIST_BLOCK_NONE
;
2540 super_alist_free(void *info
, int32_t blk
, int32_t radix
,
2541 int32_t base_blk
, int32_t count
, int32_t *emptyp
)
2543 hammer_volume_t volume
= info
;
2544 hammer_supercl_t supercl
;
2548 scl_no
= blk
/ HAMMER_SCL_MAXCLUSTERS
;
2549 supercl
= hammer_get_supercl(volume
, scl_no
, &error
, 0);
2551 hammer_modify_supercl(supercl
);
2552 hammer_alist_free(&supercl
->alist
, base_blk
, count
);
2553 *emptyp
= hammer_alist_isempty(&supercl
->alist
);
2554 hammer_rel_supercl(supercl
, 0);
2561 super_alist_print(void *info
, int32_t blk
, int32_t radix
, int tab
)
2566 hammer_init_alist_config(void)
2568 hammer_alist_config_t config
;
2570 hammer_alist_template(&Buf_alist_config
, HAMMER_FSBUF_MAXBLKS
,
2571 1, HAMMER_FSBUF_METAELMS
);
2572 hammer_alist_template(&Vol_normal_alist_config
, HAMMER_VOL_MAXCLUSTERS
,
2573 1, HAMMER_VOL_METAELMS_1LYR
);
2574 hammer_alist_template(&Vol_super_alist_config
,
2575 HAMMER_VOL_MAXSUPERCLUSTERS
* HAMMER_SCL_MAXCLUSTERS
,
2576 HAMMER_SCL_MAXCLUSTERS
, HAMMER_VOL_METAELMS_2LYR
);
2577 hammer_alist_template(&Supercl_alist_config
, HAMMER_VOL_MAXCLUSTERS
,
2578 1, HAMMER_SUPERCL_METAELMS
);
2579 hammer_alist_template(&Clu_master_alist_config
, HAMMER_CLU_MAXBUFFERS
,
2580 1, HAMMER_CLU_MASTER_METAELMS
);
2581 hammer_alist_template(&Clu_slave_alist_config
,
2582 HAMMER_CLU_MAXBUFFERS
* HAMMER_FSBUF_MAXBLKS
,
2583 HAMMER_FSBUF_MAXBLKS
, HAMMER_CLU_SLAVE_METAELMS
);
2585 config
= &Vol_super_alist_config
;
2586 config
->bl_radix_init
= super_alist_init
;
2587 config
->bl_radix_recover
= super_alist_recover
;
2588 config
->bl_radix_destroy
= super_alist_destroy
;
2589 config
->bl_radix_alloc_fwd
= super_alist_alloc_fwd
;
2590 config
->bl_radix_alloc_rev
= super_alist_alloc_rev
;
2591 config
->bl_radix_free
= super_alist_free
;
2592 config
->bl_radix_print
= super_alist_print
;
2594 config
= &Clu_slave_alist_config
;
2595 config
->bl_radix_init
= buffer_alist_init
;
2596 config
->bl_radix_recover
= buffer_alist_recover
;
2597 config
->bl_radix_destroy
= buffer_alist_destroy
;
2598 config
->bl_radix_alloc_fwd
= buffer_alist_alloc_fwd
;
2599 config
->bl_radix_alloc_rev
= buffer_alist_alloc_rev
;
2600 config
->bl_radix_free
= buffer_alist_free
;
2601 config
->bl_radix_print
= buffer_alist_print
;