HAMMER 17/many: Refactor IO backend, clean up buffer cache deadlocks.
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
blob21af44f2a0786444a193758d5461951252eb1da0
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.18 2008/01/10 07:41:03 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl,
51 hammer_alloc_state_t isnew);
52 static int hammer_load_cluster(hammer_cluster_t cluster,
53 hammer_alloc_state_t isnew);
54 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
55 static int hammer_load_node(hammer_node_t node);
56 static void alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type,
57 hammer_alist_t live,
58 int32_t start, int *errorp,
59 struct hammer_buffer **bufferp);
60 #if 0
61 static void readhammerbuf(hammer_volume_t vol, void *data,
62 int64_t offset);
63 static void writehammerbuf(hammer_volume_t vol, const void *data,
64 int64_t offset);
65 #endif
66 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
67 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
68 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
69 int32_t start, int isfwd);
70 static void hammer_adjust_stats(hammer_cluster_t cluster,
71 u_int64_t buf_type, int nblks);
73 struct hammer_alist_config Buf_alist_config;
74 struct hammer_alist_config Vol_normal_alist_config;
75 struct hammer_alist_config Vol_super_alist_config;
76 struct hammer_alist_config Supercl_alist_config;
77 struct hammer_alist_config Clu_master_alist_config;
78 struct hammer_alist_config Clu_slave_alist_config;
81 * Red-Black tree support for various structures
83 static int
84 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
86 if (ip1->obj_id < ip2->obj_id)
87 return(-1);
88 if (ip1->obj_id > ip2->obj_id)
89 return(1);
90 if (ip1->obj_asof < ip2->obj_asof)
91 return(-1);
92 if (ip1->obj_asof > ip2->obj_asof)
93 return(1);
94 return(0);
97 static int
98 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
100 if (info->obj_id < ip->obj_id)
101 return(-1);
102 if (info->obj_id > ip->obj_id)
103 return(1);
104 if (info->obj_asof < ip->obj_asof)
105 return(-1);
106 if (info->obj_asof > ip->obj_asof)
107 return(1);
108 return(0);
111 static int
112 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
114 if (vol1->vol_no < vol2->vol_no)
115 return(-1);
116 if (vol1->vol_no > vol2->vol_no)
117 return(1);
118 return(0);
121 static int
122 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
124 if (cl1->scl_no < cl2->scl_no)
125 return(-1);
126 if (cl1->scl_no > cl2->scl_no)
127 return(1);
128 return(0);
131 static int
132 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
134 if (cl1->clu_no < cl2->clu_no)
135 return(-1);
136 if (cl1->clu_no > cl2->clu_no)
137 return(1);
138 return(0);
141 static int
142 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
144 if (buf1->buf_no < buf2->buf_no)
145 return(-1);
146 if (buf1->buf_no > buf2->buf_no)
147 return(1);
148 return(0);
151 static int
152 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
154 if (node1->node_offset < node2->node_offset)
155 return(-1);
156 if (node1->node_offset > node2->node_offset)
157 return(1);
158 return(0);
162 * Note: The lookup function for hammer_ino_rb_tree winds up being named
163 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
164 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
166 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
167 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
168 hammer_inode_info_cmp, hammer_inode_info_t);
169 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
170 hammer_vol_rb_compare, int32_t, vol_no);
171 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
172 hammer_scl_rb_compare, int32_t, scl_no);
173 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
174 hammer_clu_rb_compare, int32_t, clu_no);
175 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
176 hammer_buf_rb_compare, int32_t, buf_no);
177 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
178 hammer_nod_rb_compare, int32_t, node_offset);
180 /************************************************************************
181 * VOLUMES *
182 ************************************************************************
184 * Load a HAMMER volume by name. Returns 0 on success or a positive error
185 * code on failure. Volumes must be loaded at mount time, get_volume() will
186 * not load a new volume.
188 * Calls made to hammer_load_volume() or single-threaded
191 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
193 struct mount *mp;
194 hammer_volume_t volume;
195 struct hammer_volume_ondisk *ondisk;
196 struct nlookupdata nd;
197 struct buf *bp = NULL;
198 int error;
199 int ronly;
201 mp = hmp->mp;
202 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
205 * Allocate a volume structure
207 ++hammer_count_volumes;
208 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
209 volume->vol_name = kstrdup(volname, M_HAMMER);
210 volume->hmp = hmp;
211 hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME);
212 volume->io.offset = 0LL;
215 * Get the device vnode
217 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
218 if (error == 0)
219 error = nlookup(&nd);
220 if (error == 0)
221 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
222 nlookup_done(&nd);
223 if (error == 0) {
224 vn_isdisk(volume->devvp, &error);
226 if (error == 0) {
227 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
228 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
229 FSCRED, NULL);
230 vn_unlock(volume->devvp);
232 if (error) {
233 hammer_free_volume(volume);
234 return(error);
238 * Extract the volume number from the volume header and do various
239 * sanity checks.
241 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
242 if (error)
243 goto late_failure;
244 ondisk = (void *)bp->b_data;
245 if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
246 kprintf("hammer_mount: volume %s has an invalid header\n",
247 volume->vol_name);
248 error = EFTYPE;
249 goto late_failure;
251 volume->vol_no = ondisk->vol_no;
252 volume->cluster_base = ondisk->vol_clo_beg;
253 volume->vol_clsize = ondisk->vol_clsize;
254 volume->vol_flags = ondisk->vol_flags;
255 volume->nblocks = ondisk->vol_nblocks;
256 RB_INIT(&volume->rb_clus_root);
257 RB_INIT(&volume->rb_scls_root);
259 hmp->mp->mnt_stat.f_blocks += volume->nblocks;
261 if (RB_EMPTY(&hmp->rb_vols_root)) {
262 hmp->fsid = ondisk->vol_fsid;
263 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
264 kprintf("hammer_mount: volume %s's fsid does not match "
265 "other volumes\n", volume->vol_name);
266 error = EFTYPE;
267 goto late_failure;
271 * Insert the volume structure into the red-black tree.
273 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
274 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
275 volume->vol_name, volume->vol_no);
276 error = EEXIST;
280 * Set the root volume and load the root cluster. HAMMER special
281 * cases rootvol and rootcl and will not deallocate the structures.
282 * We do not hold a ref because this would prevent related I/O
283 * from being flushed.
285 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
286 hmp->rootvol = volume;
287 hmp->rootcl = hammer_get_cluster(volume,
288 ondisk->vol0_root_clu_no,
289 &error, 0);
290 hammer_rel_cluster(hmp->rootcl, 0);
291 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
293 late_failure:
294 if (bp)
295 brelse(bp);
296 if (error) {
297 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
298 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
299 hammer_free_volume(volume);
301 return (error);
305 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
306 * so returns -1 on failure.
309 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
311 struct hammer_mount *hmp = volume->hmp;
312 hammer_cluster_t rootcl;
313 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
316 * Sync clusters, sync volume
319 hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
322 * Clean up the root cluster, which is held unlocked in the root
323 * volume.
325 if (hmp->rootvol == volume) {
326 if ((rootcl = hmp->rootcl) != NULL)
327 hmp->rootcl = NULL;
328 hmp->rootvol = NULL;
332 * Unload clusters and super-clusters. Unloading a super-cluster
333 * also unloads related clusters, but the filesystem may not be
334 * using super-clusters so unload clusters anyway.
336 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
337 hammer_unload_cluster, NULL);
338 RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
339 hammer_unload_supercl, NULL);
340 hammer_io_waitdep(&volume->io);
343 * Release our buffer and flush anything left in the buffer cache.
345 hammer_io_release(&volume->io, 2);
348 * There should be no references on the volume, no clusters, and
349 * no super-clusters.
351 KKASSERT(volume->io.lock.refs == 0);
352 KKASSERT(RB_EMPTY(&volume->rb_clus_root));
353 KKASSERT(RB_EMPTY(&volume->rb_scls_root));
355 volume->ondisk = NULL;
356 if (volume->devvp) {
357 if (ronly) {
358 vinvalbuf(volume->devvp, 0, 0, 0);
359 VOP_CLOSE(volume->devvp, FREAD);
360 } else {
361 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
362 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
367 * Destroy the structure
369 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
370 hammer_free_volume(volume);
371 return(0);
374 static
375 void
376 hammer_free_volume(hammer_volume_t volume)
378 if (volume->vol_name) {
379 kfree(volume->vol_name, M_HAMMER);
380 volume->vol_name = NULL;
382 if (volume->devvp) {
383 vrele(volume->devvp);
384 volume->devvp = NULL;
386 --hammer_count_volumes;
387 kfree(volume, M_HAMMER);
391 * Get a HAMMER volume. The volume must already exist.
393 hammer_volume_t
394 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
396 struct hammer_volume *volume;
399 * Locate the volume structure
401 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
402 if (volume == NULL) {
403 *errorp = ENOENT;
404 return(NULL);
406 hammer_ref(&volume->io.lock);
409 * Deal with on-disk info
411 if (volume->ondisk == NULL) {
412 *errorp = hammer_load_volume(volume);
413 if (*errorp) {
414 hammer_rel_volume(volume, 1);
415 volume = NULL;
417 } else {
418 *errorp = 0;
420 return(volume);
424 hammer_ref_volume(hammer_volume_t volume)
426 int error;
428 hammer_ref(&volume->io.lock);
431 * Deal with on-disk info
433 if (volume->ondisk == NULL) {
434 error = hammer_load_volume(volume);
435 if (error)
436 hammer_rel_volume(volume, 1);
437 } else {
438 error = 0;
440 return (error);
443 hammer_volume_t
444 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
446 hammer_volume_t volume;
448 volume = hmp->rootvol;
449 KKASSERT(volume != NULL);
450 hammer_ref(&volume->io.lock);
453 * Deal with on-disk info
455 if (volume->ondisk == NULL) {
456 *errorp = hammer_load_volume(volume);
457 if (*errorp) {
458 hammer_rel_volume(volume, 1);
459 volume = NULL;
461 } else {
462 *errorp = 0;
464 return (volume);
468 * Load a volume's on-disk information. The volume must be referenced and
469 * not locked. We temporarily acquire an exclusive lock to interlock
470 * against releases or multiple get's.
472 static int
473 hammer_load_volume(hammer_volume_t volume)
475 struct hammer_volume_ondisk *ondisk;
476 int error;
478 hammer_lock_ex(&volume->io.lock);
479 if (volume->ondisk == NULL) {
480 error = hammer_io_read(volume->devvp, &volume->io);
481 if (error) {
482 hammer_unlock(&volume->io.lock);
483 return (error);
485 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
488 * Configure the volume's A-lists. These are used to
489 * allocate clusters.
491 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
492 volume->alist.config = &Vol_super_alist_config;
493 volume->alist.meta = ondisk->vol_almeta.super;
494 volume->alist.info = volume;
495 } else {
496 volume->alist.config = &Vol_normal_alist_config;
497 volume->alist.meta = ondisk->vol_almeta.normal;
498 volume->alist.info = NULL;
500 } else {
501 error = 0;
503 hammer_unlock(&volume->io.lock);
504 return(0);
508 * Release a volume. Call hammer_io_release on the last reference. We have
509 * to acquire an exclusive lock to interlock against volume->ondisk tests
510 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
511 * lock to be held.
513 * Volumes are not unloaded from memory during normal operation.
515 void
516 hammer_rel_volume(hammer_volume_t volume, int flush)
518 if (volume->io.lock.refs == 1) {
519 hammer_lock_ex(&volume->io.lock);
520 if (volume->io.lock.refs == 1) {
521 volume->ondisk = NULL;
522 hammer_io_release(&volume->io, flush);
523 } else if (flush) {
524 hammer_io_flush(&volume->io);
526 hammer_unlock(&volume->io.lock);
528 hammer_unref(&volume->io.lock);
531 /************************************************************************
532 * SUPER-CLUSTERS *
533 ************************************************************************
535 * Manage super-clusters. Note that a supercl holds a reference to its
536 * associated volume.
538 static int
539 hammer_find_supercl(hammer_volume_t volume, int32_t scl_no)
541 if (RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no))
542 return(1);
543 return(0);
546 hammer_supercl_t
547 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
548 int *errorp, hammer_alloc_state_t isnew)
550 hammer_supercl_t supercl;
553 * Locate and lock the super-cluster structure, creating one
554 * if necessary.
556 again:
557 supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
558 if (supercl == NULL) {
559 ++hammer_count_supercls;
560 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
561 supercl->scl_no = scl_no;
562 supercl->volume = volume;
563 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
564 hammer_io_init(&supercl->io, HAMMER_STRUCTURE_SUPERCL);
565 hammer_ref(&supercl->io.lock);
568 * Insert the cluster into the RB tree and handle late
569 * collisions.
571 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
572 hammer_unref(&supercl->io.lock);
573 --hammer_count_supercls;
574 kfree(supercl, M_HAMMER);
575 goto again;
577 hammer_ref(&volume->io.lock);
578 } else {
579 hammer_ref(&supercl->io.lock);
583 * Deal with on-disk info
585 if (supercl->ondisk == NULL || isnew) {
586 *errorp = hammer_load_supercl(supercl, isnew);
587 if (*errorp) {
588 hammer_rel_supercl(supercl, 1);
589 supercl = NULL;
591 } else {
592 *errorp = 0;
594 return(supercl);
597 static int
598 hammer_load_supercl(hammer_supercl_t supercl, hammer_alloc_state_t isnew)
600 struct hammer_supercl_ondisk *ondisk;
601 hammer_volume_t volume = supercl->volume;
602 int error;
603 int64_t nclusters;
605 hammer_lock_ex(&supercl->io.lock);
606 if (supercl->ondisk == NULL) {
607 if (isnew)
608 error = hammer_io_new(volume->devvp, &supercl->io);
609 else
610 error = hammer_io_read(volume->devvp, &supercl->io);
611 if (error) {
612 hammer_unlock(&supercl->io.lock);
613 return (error);
615 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
617 supercl->alist.config = &Supercl_alist_config;
618 supercl->alist.meta = ondisk->scl_meta;
619 supercl->alist.info = NULL;
620 } else if (isnew) {
621 error = hammer_io_new(volume->devvp, &supercl->io);
622 } else {
623 error = 0;
625 if (error == 0 && isnew) {
627 * If this is a new super-cluster we have to initialize
628 * various ondisk structural elements. The caller is
629 * responsible for the remainder.
631 struct hammer_alist_live dummy;
633 hammer_modify_supercl(supercl);
635 ondisk = supercl->ondisk;
636 dummy.config = &Buf_alist_config;
637 dummy.meta = ondisk->head.buf_almeta;
638 dummy.info = NULL;
639 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
641 nclusters = volume->ondisk->vol_nclusters -
642 ((int64_t)supercl->scl_no * HAMMER_SCL_MAXCLUSTERS);
643 KKASSERT(nclusters > 0);
644 if (nclusters > HAMMER_SCL_MAXCLUSTERS)
645 nclusters = HAMMER_SCL_MAXCLUSTERS;
646 hammer_alist_init(&supercl->alist, 0, (int32_t)nclusters,
647 isnew);
649 hammer_unlock(&supercl->io.lock);
650 return (error);
654 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
657 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
659 KKASSERT(supercl->io.lock.refs == 0);
660 hammer_ref(&supercl->io.lock);
661 hammer_rel_supercl(supercl, 2);
662 return(0);
666 * Release a super-cluster. We have to deal with several places where
667 * another thread can ref the super-cluster.
669 * Only destroy the structure itself if the related buffer cache buffer
670 * was disassociated from it. This ties the management of the structure
671 * to the buffer cache subsystem.
673 void
674 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
676 hammer_volume_t volume;
678 if (supercl->io.lock.refs == 1) {
679 hammer_lock_ex(&supercl->io.lock);
680 if (supercl->io.lock.refs == 1) {
681 hammer_io_release(&supercl->io, flush);
682 if (supercl->io.bp == NULL &&
683 supercl->io.lock.refs == 1) {
684 volume = supercl->volume;
685 RB_REMOVE(hammer_scl_rb_tree,
686 &volume->rb_scls_root, supercl);
687 supercl->volume = NULL; /* sanity */
688 --hammer_count_supercls;
689 kfree(supercl, M_HAMMER);
690 hammer_rel_volume(volume, 0);
691 return;
693 } else if (flush) {
694 hammer_io_flush(&supercl->io);
696 hammer_unlock(&supercl->io.lock);
698 hammer_unref(&supercl->io.lock);
701 /************************************************************************
702 * CLUSTERS *
703 ************************************************************************
706 hammer_cluster_t
707 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
708 int *errorp, hammer_alloc_state_t isnew)
710 hammer_cluster_t cluster;
712 again:
713 cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
714 if (cluster == NULL) {
715 ++hammer_count_clusters;
716 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
717 cluster->clu_no = clu_no;
718 cluster->volume = volume;
719 RB_INIT(&cluster->rb_bufs_root);
720 RB_INIT(&cluster->rb_nods_root);
721 hammer_io_init(&cluster->io, HAMMER_STRUCTURE_CLUSTER);
722 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
723 hammer_ref(&cluster->io.lock);
726 * Insert the cluster into the RB tree and handle late
727 * collisions.
729 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
730 hammer_unref(&cluster->io.lock);
731 --hammer_count_clusters;
732 kfree(cluster, M_HAMMER);
733 goto again;
735 hammer_ref(&volume->io.lock);
736 } else {
737 hammer_ref(&cluster->io.lock);
741 * Deal with on-disk info
743 if (cluster->ondisk == NULL || isnew) {
744 *errorp = hammer_load_cluster(cluster, isnew);
745 if (*errorp) {
746 hammer_rel_cluster(cluster, 1);
747 cluster = NULL;
749 } else {
750 *errorp = 0;
752 return (cluster);
755 hammer_cluster_t
756 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
758 hammer_cluster_t cluster;
760 cluster = hmp->rootcl;
761 KKASSERT(cluster != NULL);
762 hammer_ref(&cluster->io.lock);
765 * Deal with on-disk info
767 if (cluster->ondisk == NULL) {
768 *errorp = hammer_load_cluster(cluster, 0);
769 if (*errorp) {
770 hammer_rel_cluster(cluster, 1);
771 cluster = NULL;
773 } else {
774 *errorp = 0;
776 return (cluster);
779 static
781 hammer_load_cluster(hammer_cluster_t cluster, hammer_alloc_state_t isnew)
783 hammer_volume_t volume = cluster->volume;
784 struct hammer_cluster_ondisk *ondisk;
785 int error;
788 * Load the cluster's on-disk info
790 hammer_lock_ex(&cluster->io.lock);
791 if (cluster->ondisk == NULL) {
792 if (isnew)
793 error = hammer_io_new(volume->devvp, &cluster->io);
794 else
795 error = hammer_io_read(volume->devvp, &cluster->io);
796 if (error) {
797 hammer_unlock(&cluster->io.lock);
798 return (error);
800 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
802 cluster->alist_master.config = &Clu_master_alist_config;
803 cluster->alist_master.meta = ondisk->clu_master_meta;
804 cluster->alist_btree.config = &Clu_slave_alist_config;
805 cluster->alist_btree.meta = ondisk->clu_btree_meta;
806 cluster->alist_btree.info = cluster;
807 cluster->alist_record.config = &Clu_slave_alist_config;
808 cluster->alist_record.meta = ondisk->clu_record_meta;
809 cluster->alist_record.info = cluster;
810 cluster->alist_mdata.config = &Clu_slave_alist_config;
811 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
812 cluster->alist_mdata.info = cluster;
814 if (isnew == 0) {
816 * Recover a cluster that was marked open. This
817 * can be rather involved and block for a hefty
818 * chunk of time.
820 if (ondisk->clu_flags & HAMMER_CLUF_OPEN)
821 hammer_recover(cluster);
823 cluster->clu_btree_beg = ondisk->clu_btree_beg;
824 cluster->clu_btree_end = ondisk->clu_btree_end;
826 } else if (isnew) {
827 error = hammer_io_new(volume->devvp, &cluster->io);
828 } else {
829 error = 0;
831 if (error == 0 && isnew) {
833 * If this is a new cluster we have to initialize
834 * various ondisk structural elements. The caller is
835 * responsible for the remainder.
837 struct hammer_alist_live dummy;
838 hammer_node_t croot;
839 hammer_volume_ondisk_t voldisk;
840 int32_t nbuffers;
842 hammer_modify_cluster(cluster);
843 ondisk = cluster->ondisk;
844 voldisk = volume->ondisk;
846 dummy.config = &Buf_alist_config;
847 dummy.meta = ondisk->head.buf_almeta;
848 dummy.info = NULL;
849 hammer_initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
851 ondisk->vol_fsid = voldisk->vol_fsid;
852 ondisk->vol_fstype = voldisk->vol_fstype;
853 ondisk->clu_gen = 1;
854 ondisk->clu_id = 0; /* XXX */
855 ondisk->clu_no = cluster->clu_no;
856 ondisk->clu_flags = 0;
857 ondisk->clu_start = HAMMER_BUFSIZE;
858 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
859 if (voldisk->vol_clo_end - cluster->io.offset >
860 voldisk->vol_clsize) {
861 ondisk->clu_limit = voldisk->vol_clsize;
862 } else {
863 ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
864 cluster->io.offset);
866 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
867 KKASSERT(isnew == HAMMER_ASTATE_FREE);
868 hammer_alist_init(&cluster->alist_master, 1, nbuffers - 1,
869 HAMMER_ASTATE_FREE);
870 hammer_alist_init(&cluster->alist_btree,
871 HAMMER_FSBUF_MAXBLKS,
872 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
873 HAMMER_ASTATE_ALLOC);
874 hammer_alist_init(&cluster->alist_record,
875 HAMMER_FSBUF_MAXBLKS,
876 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
877 HAMMER_ASTATE_ALLOC);
878 hammer_alist_init(&cluster->alist_mdata,
879 HAMMER_FSBUF_MAXBLKS,
880 (nbuffers - 1) * HAMMER_FSBUF_MAXBLKS,
881 HAMMER_ASTATE_ALLOC);
883 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
884 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
885 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
888 * Initialize the B-Tree. We don't know what the caller
889 * intends to do with the cluster so make sure it causes
890 * an assertion if the caller makes no changes.
892 ondisk->clu_btree_parent_vol_no = -2;
893 ondisk->clu_btree_parent_clu_no = -2;
894 ondisk->clu_btree_parent_offset = -2;
895 ondisk->clu_btree_parent_clu_gen = -2;
897 croot = hammer_alloc_btree(cluster, &error);
898 if (error == 0) {
899 hammer_modify_node(croot);
900 bzero(croot->ondisk, sizeof(*croot->ondisk));
901 croot->ondisk->count = 0;
902 croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
903 hammer_modify_cluster(cluster);
904 ondisk->clu_btree_root = croot->node_offset;
905 hammer_rel_node(croot);
908 hammer_unlock(&cluster->io.lock);
909 return (error);
913 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
916 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
918 hammer_ref(&cluster->io.lock);
919 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
920 hammer_unload_buffer, NULL);
921 hammer_io_waitdep(&cluster->io);
922 KKASSERT(cluster->io.lock.refs == 1);
923 hammer_rel_cluster(cluster, 2);
924 return(0);
928 * Update the cluster's synchronization TID, which is used during cluster
929 * recovery. NOTE: The cluster header is not written out until all related
930 * records have been written out.
932 void
933 hammer_update_syncid(hammer_cluster_t cluster, hammer_tid_t tid)
935 hammer_modify_cluster(cluster);
936 if (cluster->ondisk->synchronized_tid < tid)
937 cluster->ondisk->synchronized_tid = tid;
941 * Reference a cluster that is either already referenced or via a specially
942 * handled pointer (aka rootcl).
945 hammer_ref_cluster(hammer_cluster_t cluster)
947 int error;
949 KKASSERT(cluster != NULL);
950 hammer_ref(&cluster->io.lock);
953 * Deal with on-disk info
955 if (cluster->ondisk == NULL) {
956 error = hammer_load_cluster(cluster, 0);
957 if (error)
958 hammer_rel_cluster(cluster, 1);
959 } else {
960 error = 0;
962 return(error);
966 * Release a cluster. We have to deal with several places where
967 * another thread can ref the cluster.
969 * Only destroy the structure itself if we no longer have an IO or any
970 * hammer buffers associated with the structure.
972 void
973 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
975 hammer_volume_t volume;
977 if (cluster->io.lock.refs == 1) {
978 hammer_lock_ex(&cluster->io.lock);
979 if (cluster->io.lock.refs == 1) {
981 * Release the I/O. If we or the kernel wants to
982 * flush, this will release the bp. Otherwise the
983 * bp may be written and flushed passively by the
984 * kernel later on.
986 hammer_io_release(&cluster->io, flush);
989 * Final cleanup
991 if (cluster != cluster->volume->hmp->rootcl &&
992 cluster->io.bp == NULL &&
993 cluster->io.lock.refs == 1 &&
994 RB_EMPTY(&cluster->rb_bufs_root)) {
995 KKASSERT(RB_EMPTY(&cluster->rb_nods_root));
996 volume = cluster->volume;
997 RB_REMOVE(hammer_clu_rb_tree,
998 &volume->rb_clus_root, cluster);
999 cluster->volume = NULL; /* sanity */
1000 --hammer_count_clusters;
1001 kfree(cluster, M_HAMMER);
1002 hammer_rel_volume(volume, 0);
1003 return;
1005 } else if (flush) {
1006 hammer_io_flush(&cluster->io);
1008 hammer_unlock(&cluster->io.lock);
1010 hammer_unref(&cluster->io.lock);
1013 /************************************************************************
1014 * BUFFERS *
1015 ************************************************************************
1017 * Manage buffers. Note that a buffer holds a reference to its associated
1018 * cluster, and its cluster will hold a reference to the cluster's volume.
1020 * A non-zero buf_type indicates that a new buffer should be created and
1021 * zero'd.
1023 hammer_buffer_t
1024 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
1025 u_int64_t buf_type, int *errorp)
1027 hammer_buffer_t buffer;
1030 * Find the buffer. Note that buffer 0 corresponds to the cluster
1031 * header and should never be requested.
1033 KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1034 buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1037 * Locate and lock the buffer structure, creating one if necessary.
1039 again:
1040 buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1041 if (buffer == NULL) {
1042 ++hammer_count_buffers;
1043 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1044 buffer->buf_no = buf_no;
1045 buffer->cluster = cluster;
1046 buffer->volume = cluster->volume;
1047 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER);
1048 buffer->io.offset = cluster->io.offset +
1049 (buf_no * HAMMER_BUFSIZE);
1050 TAILQ_INIT(&buffer->clist);
1051 hammer_ref(&buffer->io.lock);
1054 * Insert the cluster into the RB tree and handle late
1055 * collisions.
1057 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1058 hammer_unref(&buffer->io.lock);
1059 --hammer_count_buffers;
1060 kfree(buffer, M_HAMMER);
1061 goto again;
1063 hammer_ref(&cluster->io.lock);
1064 } else {
1065 hammer_ref(&buffer->io.lock);
1069 * Deal with on-disk info
1071 if (buffer->ondisk == NULL || buf_type) {
1072 *errorp = hammer_load_buffer(buffer, buf_type);
1073 if (*errorp) {
1074 hammer_rel_buffer(buffer, 1);
1075 buffer = NULL;
1077 } else {
1078 *errorp = 0;
1080 return(buffer);
1083 static int
1084 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1086 hammer_volume_t volume;
1087 hammer_fsbuf_ondisk_t ondisk;
1088 int error;
1091 * Load the buffer's on-disk info
1093 volume = buffer->volume;
1094 hammer_lock_ex(&buffer->io.lock);
1095 if (buffer->ondisk == NULL) {
1096 if (buf_type) {
1097 error = hammer_io_new(volume->devvp, &buffer->io);
1098 } else {
1099 error = hammer_io_read(volume->devvp, &buffer->io);
1101 if (error) {
1102 hammer_unlock(&buffer->io.lock);
1103 return (error);
1105 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1106 buffer->alist.config = &Buf_alist_config;
1107 buffer->alist.meta = ondisk->head.buf_almeta;
1108 buffer->buf_type = ondisk->head.buf_type;
1109 } else if (buf_type) {
1110 error = hammer_io_new(volume->devvp, &buffer->io);
1111 } else {
1112 error = 0;
1114 if (error == 0 && buf_type) {
1115 hammer_modify_buffer(buffer);
1116 ondisk = buffer->ondisk;
1117 hammer_initbuffer(&buffer->alist, &ondisk->head, buf_type);
1118 buffer->buf_type = ondisk->head.buf_type;
1120 hammer_unlock(&buffer->io.lock);
1121 return (error);
1125 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1128 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1130 hammer_ref(&buffer->io.lock);
1131 hammer_flush_buffer_nodes(buffer);
1132 KKASSERT(buffer->io.lock.refs == 1);
1133 hammer_rel_buffer(buffer, 2);
1134 return(0);
1138 * Reference a buffer that is either already referenced or via a specially
1139 * handled pointer (aka cursor->buffer).
1142 hammer_ref_buffer(hammer_buffer_t buffer)
1144 int error;
1146 hammer_ref(&buffer->io.lock);
1147 if (buffer->ondisk == NULL) {
1148 error = hammer_load_buffer(buffer, 0);
1149 if (error) {
1150 hammer_rel_buffer(buffer, 1);
1152 * NOTE: buffer pointer can become stale after
1153 * the above release.
1155 } else {
1156 KKASSERT(buffer->buf_type ==
1157 buffer->ondisk->head.buf_type);
1159 } else {
1160 error = 0;
1162 return(error);
1166 * Release a buffer. We have to deal with several places where
1167 * another thread can ref the buffer.
1169 * Only destroy the structure itself if the related buffer cache buffer
1170 * was disassociated from it. This ties the management of the structure
1171 * to the buffer cache subsystem. buffer->ondisk determines whether the
1172 * embedded io is referenced or not.
1174 void
1175 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1177 hammer_cluster_t cluster;
1179 if (buffer->io.lock.refs == 1) {
1180 hammer_lock_ex(&buffer->io.lock);
1181 if (buffer->io.lock.refs == 1) {
1182 hammer_io_release(&buffer->io, flush);
1184 if (buffer->io.bp == NULL &&
1185 buffer->io.lock.refs == 1) {
1186 hammer_flush_buffer_nodes(buffer);
1187 KKASSERT(TAILQ_EMPTY(&buffer->clist));
1188 cluster = buffer->cluster;
1189 RB_REMOVE(hammer_buf_rb_tree,
1190 &cluster->rb_bufs_root, buffer);
1191 buffer->cluster = NULL; /* sanity */
1192 --hammer_count_buffers;
1193 kfree(buffer, M_HAMMER);
1194 hammer_rel_cluster(cluster, 0);
1195 return;
1197 } else if (flush) {
1198 hammer_io_flush(&buffer->io);
1200 hammer_unlock(&buffer->io.lock);
1202 hammer_unref(&buffer->io.lock);
1205 /************************************************************************
1206 * NODES *
1207 ************************************************************************
1209 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1210 * method used by the HAMMER filesystem.
1212 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1213 * associated with its buffer, and will only referenced the buffer while
1214 * the node itself is referenced.
1216 * A hammer_node can also be passively associated with other HAMMER
1217 * structures, such as inodes, while retaining 0 references. These
1218 * associations can be cleared backwards using a pointer-to-pointer in
1219 * the hammer_node.
1221 * This allows the HAMMER implementation to cache hammer_nodes long-term
1222 * and short-cut a great deal of the infrastructure's complexity. In
1223 * most cases a cached node can be reacquired without having to dip into
1224 * either the buffer or cluster management code.
1226 * The caller must pass a referenced cluster on call and will retain
1227 * ownership of the reference on return. The node will acquire its own
1228 * additional references, if necessary.
1230 hammer_node_t
1231 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1233 hammer_node_t node;
1236 * Locate the structure, allocating one if necessary.
1238 again:
1239 node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1240 node_offset);
1241 if (node == NULL) {
1242 ++hammer_count_nodes;
1243 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1244 node->node_offset = node_offset;
1245 node->cluster = cluster;
1246 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1247 node)) {
1248 --hammer_count_nodes;
1249 kfree(node, M_HAMMER);
1250 goto again;
1253 hammer_ref(&node->lock);
1254 *errorp = hammer_load_node(node);
1255 if (*errorp) {
1256 hammer_rel_node(node);
1257 node = NULL;
1259 return(node);
1263 * Reference an already-referenced node.
1266 hammer_ref_node(hammer_node_t node)
1268 int error;
1270 KKASSERT(node->lock.refs > 0);
1271 hammer_ref(&node->lock);
1272 if ((error = hammer_load_node(node)) != 0)
1273 hammer_rel_node(node);
1274 return(error);
1278 * Load a node's on-disk data reference.
1280 static int
1281 hammer_load_node(hammer_node_t node)
1283 hammer_buffer_t buffer;
1284 int32_t buf_no;
1285 int error;
1287 if (node->ondisk)
1288 return(0);
1289 error = 0;
1290 hammer_lock_ex(&node->lock);
1291 if (node->ondisk == NULL) {
1293 * This is a little confusing but the jist is that
1294 * node->buffer determines whether the node is on
1295 * the buffer's clist and node->ondisk determines
1296 * whether the buffer is referenced.
1298 if ((buffer = node->buffer) != NULL) {
1299 error = hammer_ref_buffer(buffer);
1300 } else {
1301 buf_no = node->node_offset / HAMMER_BUFSIZE;
1302 buffer = hammer_get_buffer(node->cluster,
1303 buf_no, 0, &error);
1304 if (buffer) {
1305 KKASSERT(error == 0);
1306 TAILQ_INSERT_TAIL(&buffer->clist,
1307 node, entry);
1308 node->buffer = buffer;
1311 if (error == 0) {
1312 node->ondisk = (void *)((char *)buffer->ondisk +
1313 (node->node_offset & HAMMER_BUFMASK));
1316 hammer_unlock(&node->lock);
1317 return (error);
1321 * Safely reference a node, interlock against flushes via the IO subsystem.
1323 hammer_node_t
1324 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache,
1325 int *errorp)
1327 hammer_node_t node;
1329 if ((node = *cache) != NULL)
1330 hammer_ref(&node->lock);
1331 if (node) {
1332 *errorp = hammer_load_node(node);
1333 if (*errorp) {
1334 hammer_rel_node(node);
1335 node = NULL;
1337 } else {
1338 *errorp = ENOENT;
1340 return(node);
1344 * Release a hammer_node. On the last release the node dereferences
1345 * its underlying buffer and may or may not be destroyed.
1347 void
1348 hammer_rel_node(hammer_node_t node)
1350 hammer_cluster_t cluster;
1351 hammer_buffer_t buffer;
1352 int32_t node_offset;
1353 int flags;
1356 * If this isn't the last ref just decrement the ref count and
1357 * return.
1359 if (node->lock.refs > 1) {
1360 hammer_unref(&node->lock);
1361 return;
1365 * If there is no ondisk info or no buffer the node failed to load,
1366 * remove the last reference and destroy the node.
1368 if (node->ondisk == NULL) {
1369 hammer_unref(&node->lock);
1370 hammer_flush_node(node);
1371 /* node is stale now */
1372 return;
1376 * Do final cleanups and then either destroy the node and leave it
1377 * passively cached. The buffer reference is removed regardless.
1379 buffer = node->buffer;
1380 node->ondisk = NULL;
1382 if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) {
1383 hammer_unref(&node->lock);
1384 hammer_rel_buffer(buffer, 0);
1385 return;
1389 * Destroy the node. Record pertainant data because the node
1390 * becomes stale the instant we flush it.
1392 flags = node->flags;
1393 node_offset = node->node_offset;
1394 hammer_unref(&node->lock);
1395 hammer_flush_node(node);
1396 /* node is stale */
1398 cluster = buffer->cluster;
1399 if (flags & HAMMER_NODE_DELETED) {
1400 hammer_free_btree(cluster, node_offset);
1401 if (node_offset == cluster->ondisk->clu_btree_root) {
1402 kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1403 hammer_free_cluster(cluster);
1404 /*hammer_io_undirty(&cluster->io);*/
1407 hammer_rel_buffer(buffer, 0);
1411 * Passively cache a referenced hammer_node in *cache. The caller may
1412 * release the node on return.
1414 void
1415 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1417 hammer_node_t old;
1420 * If the node is being deleted, don't cache it!
1422 if (node->flags & HAMMER_NODE_DELETED)
1423 return;
1426 * Cache the node. If we previously cached a different node we
1427 * have to give HAMMER a chance to destroy it.
1429 again:
1430 if (node->cache1 != cache) {
1431 if (node->cache2 != cache) {
1432 if ((old = *cache) != NULL) {
1433 KKASSERT(node->lock.refs != 0);
1434 hammer_uncache_node(cache);
1435 goto again;
1437 if (node->cache2)
1438 *node->cache2 = NULL;
1439 node->cache2 = node->cache1;
1440 node->cache1 = cache;
1441 *cache = node;
1442 } else {
1443 struct hammer_node **tmp;
1444 tmp = node->cache1;
1445 node->cache1 = node->cache2;
1446 node->cache2 = tmp;
1451 void
1452 hammer_uncache_node(struct hammer_node **cache)
1454 hammer_node_t node;
1456 if ((node = *cache) != NULL) {
1457 *cache = NULL;
1458 if (node->cache1 == cache) {
1459 node->cache1 = node->cache2;
1460 node->cache2 = NULL;
1461 } else if (node->cache2 == cache) {
1462 node->cache2 = NULL;
1463 } else {
1464 panic("hammer_uncache_node: missing cache linkage");
1466 if (node->cache1 == NULL && node->cache2 == NULL)
1467 hammer_flush_node(node);
1472 * Remove a node's cache references and destroy the node if it has no
1473 * other references or backing store.
1475 void
1476 hammer_flush_node(hammer_node_t node)
1478 hammer_buffer_t buffer;
1480 if (node->cache1)
1481 *node->cache1 = NULL;
1482 if (node->cache2)
1483 *node->cache2 = NULL;
1484 if (node->lock.refs == 0 && node->ondisk == NULL) {
1485 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1486 node);
1487 if ((buffer = node->buffer) != NULL) {
1488 node->buffer = NULL;
1489 TAILQ_REMOVE(&buffer->clist, node, entry);
1490 /* buffer is unreferenced because ondisk is NULL */
1492 --hammer_count_nodes;
1493 kfree(node, M_HAMMER);
1498 * Flush passively cached B-Tree nodes associated with this buffer.
1499 * This is only called when the buffer is about to be destroyed, so
1500 * none of the nodes should have any references.
1502 void
1503 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1505 hammer_node_t node;
1507 while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) {
1508 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL);
1509 hammer_ref(&node->lock);
1510 node->flags |= HAMMER_NODE_FLUSH;
1511 hammer_rel_node(node);
1515 /************************************************************************
1516 * A-LIST ALLOCATORS *
1517 ************************************************************************/
1520 * Allocate HAMMER clusters
1522 hammer_cluster_t
1523 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1524 int *errorp)
1526 hammer_volume_t volume;
1527 hammer_cluster_t cluster;
1528 int32_t clu_no;
1529 int32_t clu_hint;
1530 int32_t vol_beg;
1531 int32_t vol_no;
1534 * Figure out our starting volume and hint.
1536 if (cluster_hint) {
1537 vol_beg = cluster_hint->volume->vol_no;
1538 clu_hint = cluster_hint->clu_no;
1539 } else {
1540 vol_beg = hmp->volume_iterator;
1541 clu_hint = -1;
1545 * Loop through volumes looking for a free cluster. If allocating
1546 * a new cluster relative to an existing cluster try to find a free
1547 * cluster on either side (clu_hint >= 0), otherwise just do a
1548 * forwards iteration.
1550 vol_no = vol_beg;
1551 do {
1552 volume = hammer_get_volume(hmp, vol_no, errorp);
1553 kprintf("VOLUME %p %d\n", volume, vol_no);
1554 if (*errorp) {
1555 clu_no = HAMMER_ALIST_BLOCK_NONE;
1556 break;
1558 hammer_modify_volume(volume);
1559 if (clu_hint == -1) {
1560 clu_hint = volume->clu_iterator;
1561 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1562 clu_hint);
1563 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1564 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1565 1, 0);
1567 } else {
1568 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1569 clu_hint);
1570 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1571 clu_no = hammer_alist_alloc_rev(&volume->alist,
1572 1, clu_hint);
1575 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1576 break;
1577 hammer_rel_volume(volume, 0);
1578 volume = NULL;
1579 *errorp = ENOSPC;
1580 vol_no = (vol_no + 1) % hmp->nvolumes;
1581 clu_hint = -1;
1582 } while (vol_no != vol_beg);
1585 * Acquire the cluster. On success this will force *errorp to 0.
1587 if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1588 kprintf("ALLOC CLUSTER %d\n", clu_no);
1589 cluster = hammer_get_cluster(volume, clu_no, errorp,
1590 HAMMER_ASTATE_FREE);
1591 volume->clu_iterator = clu_no;
1592 hammer_rel_volume(volume, 0);
1593 } else {
1594 cluster = NULL;
1596 if (cluster)
1597 hammer_lock_ex(&cluster->io.lock);
1598 return(cluster);
1601 void
1602 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound,
1603 hammer_base_elm_t right_bound)
1605 hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1607 hammer_modify_cluster(cluster);
1608 ondisk->clu_btree_beg = *left_bound;
1609 ondisk->clu_btree_end = *right_bound;
1610 cluster->clu_btree_beg = ondisk->clu_btree_beg;
1611 cluster->clu_btree_end = ondisk->clu_btree_end;
1615 * Deallocate a cluster
1617 void
1618 hammer_free_cluster(hammer_cluster_t cluster)
1620 hammer_modify_cluster(cluster);
1621 hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1625 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1627 * The passed *bufferp should be initialized to NULL. On successive calls
1628 * *bufferp caches the most recent buffer used until put away by the caller.
1629 * Note that previously returned pointers using the cached buffer become
1630 * invalid on successive calls which reuse *bufferp.
1632 * All allocations first attempt to use the block found at the specified
1633 * iterator. If that fails the first available block is used. If that
1634 * fails a new buffer is allocated and associated with the buffer type
1635 * A-list and the element is allocated out of the new buffer.
1638 hammer_node_t
1639 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1641 hammer_buffer_t buffer;
1642 hammer_alist_t live;
1643 hammer_node_t node;
1644 int32_t elm_no;
1645 int32_t buf_no;
1646 int32_t node_offset;
1649 * Allocate a B-Tree element
1651 hammer_modify_cluster(cluster);
1652 buffer = NULL;
1653 live = &cluster->alist_btree;
1654 elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1655 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1656 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1657 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1658 alloc_new_buffer(cluster, HAMMER_FSBUF_BTREE, live,
1659 cluster->ondisk->idx_index, errorp, &buffer);
1660 elm_no = hammer_alist_alloc(live, 1);
1661 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1662 *errorp = ENOSPC;
1663 if (buffer)
1664 hammer_rel_buffer(buffer, 0);
1665 return(NULL);
1668 cluster->ondisk->idx_index = elm_no;
1669 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1672 * Load and return the B-Tree element
1674 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1675 node_offset = buf_no * HAMMER_BUFSIZE +
1676 offsetof(union hammer_fsbuf_ondisk,
1677 btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1678 node = hammer_get_node(cluster, node_offset, errorp);
1679 if (node) {
1680 hammer_modify_node(node);
1681 bzero(node->ondisk, sizeof(*node->ondisk));
1682 } else {
1683 hammer_alist_free(live, elm_no, 1);
1684 hammer_rel_node(node);
1685 node = NULL;
1687 if (buffer)
1688 hammer_rel_buffer(buffer, 0);
1689 return(node);
1692 void *
1693 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1694 int *errorp, struct hammer_buffer **bufferp)
1696 hammer_buffer_t buffer;
1697 hammer_alist_t live;
1698 int32_t elm_no;
1699 int32_t buf_no;
1700 int32_t nblks;
1701 void *item;
1704 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1705 * for these allocations.
1707 hammer_modify_cluster(cluster);
1708 if ((bytes & HAMMER_BUFMASK) == 0) {
1709 nblks = bytes / HAMMER_BUFSIZE;
1710 /* only one block allowed for now (so buffer can hold it) */
1711 KKASSERT(nblks == 1);
1713 buf_no = hammer_alloc_master(cluster, nblks,
1714 cluster->ondisk->idx_ldata, 1);
1715 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1716 *errorp = ENOSPC;
1717 return(NULL);
1719 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1720 cluster->ondisk->idx_ldata = buf_no;
1721 buffer = *bufferp;
1722 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1723 if (buffer)
1724 hammer_rel_buffer(buffer, 0);
1725 buffer = *bufferp;
1726 return(buffer->ondisk);
1730 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1731 * (64 bytes) for these allocations.
1733 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1734 nblks /= HAMMER_DATA_BLKSIZE;
1735 live = &cluster->alist_mdata;
1736 elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1737 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1738 elm_no = hammer_alist_alloc_fwd(live, nblks, 0);
1739 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1740 alloc_new_buffer(cluster, HAMMER_FSBUF_DATA, live,
1741 cluster->ondisk->idx_data, errorp, bufferp);
1742 elm_no = hammer_alist_alloc(live, nblks);
1743 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1744 *errorp = ENOSPC;
1745 return(NULL);
1748 cluster->ondisk->idx_index = elm_no;
1751 * Load and return the B-Tree element
1753 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1754 buffer = *bufferp;
1755 if (buffer == NULL || buffer->cluster != cluster ||
1756 buffer->buf_no != buf_no) {
1757 if (buffer)
1758 hammer_rel_buffer(buffer, 0);
1759 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1760 *bufferp = buffer;
1762 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1763 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1764 hammer_modify_buffer(buffer);
1765 item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1766 bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1767 *errorp = 0;
1768 return(item);
1771 void *
1772 hammer_alloc_record(hammer_cluster_t cluster,
1773 int *errorp, struct hammer_buffer **bufferp)
1775 hammer_buffer_t buffer;
1776 hammer_alist_t live;
1777 int32_t elm_no;
1778 int32_t buf_no;
1779 void *item;
1782 * Allocate a record element
1784 hammer_modify_cluster(cluster);
1785 live = &cluster->alist_record;
1786 elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1787 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1788 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1789 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1790 alloc_new_buffer(cluster, HAMMER_FSBUF_RECORDS, live,
1791 cluster->ondisk->idx_record, errorp, bufferp);
1792 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1793 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1794 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1795 *errorp = ENOSPC;
1796 return(NULL);
1799 cluster->ondisk->idx_record = elm_no;
1802 * Load and return the record element
1804 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1805 buffer = *bufferp;
1806 if (buffer == NULL || buffer->cluster != cluster ||
1807 buffer->buf_no != buf_no) {
1808 if (buffer)
1809 hammer_rel_buffer(buffer, 0);
1810 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1811 *bufferp = buffer;
1813 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1814 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1815 hammer_modify_buffer(buffer);
1816 item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1817 bzero(item, sizeof(union hammer_record_ondisk));
1818 *errorp = 0;
1819 return(item);
1822 void
1823 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1825 int32_t elm_no;
1826 int32_t nblks;
1827 hammer_alist_t live;
1829 hammer_modify_cluster(buffer->cluster);
1830 if ((bytes & HAMMER_BUFMASK) == 0) {
1831 nblks = bytes / HAMMER_BUFSIZE;
1832 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1833 hammer_alist_free(&buffer->cluster->alist_master,
1834 buffer->buf_no, nblks);
1835 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1836 return;
1839 elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1840 HAMMER_DATA_BLKSIZE;
1841 KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1842 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1843 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1844 nblks /= HAMMER_DATA_BLKSIZE;
1845 live = &buffer->cluster->alist_mdata;
1846 hammer_alist_free(live, elm_no, nblks);
1849 void
1850 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1852 int32_t elm_no;
1853 hammer_alist_t live;
1855 hammer_modify_cluster(buffer->cluster);
1856 elm_no = rec - &buffer->ondisk->record.recs[0];
1857 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1858 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1859 live = &buffer->cluster->alist_record;
1860 hammer_alist_free(live, elm_no, 1);
1863 void
1864 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1866 const int32_t blksize = sizeof(struct hammer_node_ondisk);
1867 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1868 hammer_alist_t live;
1869 int32_t elm_no;
1871 hammer_modify_cluster(cluster);
1872 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1873 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1874 live = &cluster->alist_btree;
1875 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1876 elm_no += fsbuf_offset / blksize;
1877 hammer_alist_free(live, elm_no, 1);
1880 void
1881 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1883 const int32_t blksize = HAMMER_DATA_BLKSIZE;
1884 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1885 hammer_alist_t live;
1886 int32_t elm_no;
1887 int32_t buf_no;
1888 int32_t nblks;
1890 hammer_modify_cluster(cluster);
1891 if ((bytes & HAMMER_BUFMASK) == 0) {
1892 nblks = bytes / HAMMER_BUFSIZE;
1893 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1894 buf_no = bclu_offset / HAMMER_BUFSIZE;
1895 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1896 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1897 return;
1900 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1901 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1902 live = &cluster->alist_mdata;
1903 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1904 nblks /= HAMMER_DATA_BLKSIZE;
1905 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1906 elm_no += fsbuf_offset / blksize;
1907 hammer_alist_free(live, elm_no, nblks);
1910 void
1911 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1913 const int32_t blksize = sizeof(union hammer_record_ondisk);
1914 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1915 hammer_alist_t live;
1916 int32_t elm_no;
1918 hammer_modify_cluster(cluster);
1919 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1920 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1921 live = &cluster->alist_record;
1922 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1923 elm_no += fsbuf_offset / blksize;
1924 hammer_alist_free(live, elm_no, 1);
1929 * Allocate a new filesystem buffer and assign it to the specified
1930 * filesystem buffer type. The new buffer will be added to the
1931 * type-specific A-list and initialized.
1933 * buffers used for records will also be added to the clu_record_buf_bitmap.
1935 static void
1936 alloc_new_buffer(hammer_cluster_t cluster, u_int64_t type, hammer_alist_t live,
1937 int start, int *errorp, struct hammer_buffer **bufferp)
1939 hammer_buffer_t buffer;
1940 int32_t buf_no;
1941 int isfwd;
1943 if (*bufferp)
1944 hammer_rel_buffer(*bufferp, 0);
1945 *bufferp = NULL;
1947 start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */
1948 isfwd = (type != HAMMER_FSBUF_RECORDS);
1949 buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1950 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1951 *errorp = ENOSPC;
1952 return;
1956 * The new buffer must be initialized (type != 0) regardless of
1957 * whether we already have it cached or not, so don't try to
1958 * optimize the cached buffer check. Just call hammer_get_buffer().
1960 buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1961 *bufferp = buffer;
1964 * Do a meta-free of the buffer's elements into the type-specific
1965 * A-list and update our statistics to reflect the allocation.
1967 if (buffer) {
1968 #if 0
1969 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
1970 buf_no, type, nelements);
1971 #endif
1972 hammer_modify_buffer(buffer); /*XXX*/
1973 hammer_adjust_stats(cluster, type, 1);
1976 * Free the buffer to the appropriate slave list so the
1977 * cluster-based allocator sees it.
1979 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
1980 HAMMER_FSBUF_MAXBLKS);
1985 * And, finally, update clu_record_buf_bitmap for record buffers.
1986 * Since buffers are synced to disk before their associated cluster
1987 * header, a recovery operation will only see synced record buffers
1988 * in the bitmap. XXX We can't use alist_record for recovery due
1989 * to the way we currently manage it.
1991 if (buffer && type == HAMMER_FSBUF_RECORDS) {
1992 KKASSERT(buf_no >= 0 && buf_no < HAMMER_CLU_MAXBUFFERS);
1993 hammer_modify_cluster(cluster);
1994 cluster->ondisk->clu_record_buf_bitmap[buf_no >> 5] |=
1995 (1 << (buf_no & 31));
2000 * Sync dirty buffers to the media
2003 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2004 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2007 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2009 struct hammer_sync_info info;
2011 info.error = 0;
2012 info.waitfor = waitfor;
2014 kprintf("hammer_sync\n");
2015 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2016 hammer_sync_scan1, hammer_sync_scan2, &info);
2018 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2019 hammer_sync_volume, &info);
2020 return(info.error);
2023 static int
2024 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2026 struct hammer_inode *ip;
2028 ip = VTOI(vp);
2029 if (vp->v_type == VNON || ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2030 RB_EMPTY(&vp->v_rbdirty_tree))) {
2031 return(-1);
2033 return(0);
2036 static int
2037 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2039 struct hammer_sync_info *info = data;
2040 struct hammer_inode *ip;
2041 int error;
2043 ip = VTOI(vp);
2044 if (vp->v_type == VNON || vp->v_type == VBAD ||
2045 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2046 RB_EMPTY(&vp->v_rbdirty_tree))) {
2047 return(0);
2049 if (vp->v_type != VCHR) {
2050 error = VOP_FSYNC(vp, info->waitfor);
2051 if (error)
2052 info->error = error;
2054 return(0);
2058 hammer_sync_volume(hammer_volume_t volume, void *data)
2060 struct hammer_sync_info *info = data;
2062 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2063 hammer_sync_cluster, info);
2064 if (hammer_ref_volume(volume) == 0)
2065 hammer_rel_volume(volume, 1);
2066 return(0);
2070 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2072 struct hammer_sync_info *info = data;
2074 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2075 hammer_sync_buffer, info);
2076 /*hammer_io_waitdep(&cluster->io);*/
2077 if (hammer_ref_cluster(cluster) == 0)
2078 hammer_rel_cluster(cluster, 1);
2079 return(0);
2083 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused)
2085 if (hammer_ref_buffer(buffer) == 0) {
2086 hammer_lock_ex(&buffer->io.lock);
2087 hammer_flush_buffer_nodes(buffer);
2088 hammer_unlock(&buffer->io.lock);
2089 hammer_rel_buffer(buffer, 1);
2091 return(0);
2095 * Generic buffer initialization
2097 void
2098 hammer_initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2100 head->buf_type = type;
2102 switch(type) {
2103 case HAMMER_FSBUF_BTREE:
2104 hammer_alist_init(live, 0, HAMMER_BTREE_NODES, HAMMER_ASTATE_FREE);
2105 break;
2106 case HAMMER_FSBUF_DATA:
2107 hammer_alist_init(live, 0, HAMMER_DATA_NODES, HAMMER_ASTATE_FREE);
2108 break;
2109 case HAMMER_FSBUF_RECORDS:
2110 hammer_alist_init(live, 0, HAMMER_RECORD_NODES, HAMMER_ASTATE_FREE);
2111 break;
2112 default:
2113 hammer_alist_init(live, 0, 0, HAMMER_ASTATE_ALLOC);
2114 break;
2119 * Calculate the cluster's offset in the volume. This calculation is
2120 * slightly more complex when using superclusters because superclusters
2121 * are grouped in blocks of 16, followed by 16 x N clusters where N
2122 * is the number of clusters a supercluster can manage.
2124 static int64_t
2125 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2127 int32_t scl_group;
2128 int64_t scl_group_size;
2129 int64_t off;
2131 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2132 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2133 HAMMER_SCL_MAXCLUSTERS;
2134 scl_group_size =
2135 ((int64_t)HAMMER_BUFSIZE *
2136 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2137 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2138 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2139 scl_group_size +=
2140 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2142 off = volume->cluster_base +
2143 scl_group * scl_group_size +
2144 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2145 ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2146 HAMMER_VOL_SUPERCLUSTER_GROUP))
2147 * volume->vol_clsize;
2148 } else {
2149 off = volume->cluster_base +
2150 (int64_t)clu_no * volume->vol_clsize;
2152 return(off);
2156 * Calculate a super-cluster's offset in the volume.
2158 static int64_t
2159 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2161 int64_t off;
2162 int32_t scl_group;
2163 int64_t scl_group_size;
2165 KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2166 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2167 if (scl_group) {
2168 scl_group_size =
2169 ((int64_t)HAMMER_BUFSIZE *
2170 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2171 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2172 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2173 scl_group_size +=
2174 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2175 off = volume->cluster_base + (scl_group * scl_group_size) +
2176 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2177 } else {
2178 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2180 return(off);
2184 * Allocate nblks buffers from the cluster's master alist.
2186 static int32_t
2187 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2188 int32_t start, int isfwd)
2190 int32_t buf_no;
2192 hammer_modify_cluster(cluster);
2193 if (isfwd) {
2194 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2195 nblks, start);
2196 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2197 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2198 nblks, 0);
2200 } else {
2201 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2202 nblks, start);
2203 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2204 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2205 nblks, HAMMER_ALIST_BLOCK_MAX);
2210 * Recover space from empty record, b-tree, and data a-lists.
2213 return(buf_no);
2217 * Adjust allocation statistics
2219 static void
2220 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2222 hammer_modify_cluster(cluster);
2223 hammer_modify_volume(cluster->volume);
2224 hammer_modify_volume(cluster->volume->hmp->rootvol);
2226 switch(buf_type) {
2227 case HAMMER_FSBUF_BTREE:
2228 cluster->ondisk->stat_idx_bufs += nblks;
2229 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2230 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2231 break;
2232 case HAMMER_FSBUF_DATA:
2233 cluster->ondisk->stat_data_bufs += nblks;
2234 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2235 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2236 break;
2237 case HAMMER_FSBUF_RECORDS:
2238 cluster->ondisk->stat_rec_bufs += nblks;
2239 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2240 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2241 break;
2246 * A-LIST SUPPORT
2248 * Setup the parameters for the various A-lists we use in hammer. The
2249 * supercluster A-list must be chained to the cluster A-list and cluster
2250 * slave A-lists are chained to buffer A-lists.
2252 * See hammer_init_alist_config() below.
2256 * A-LIST - cluster recursion into a filesystem buffer
2258 * In the init case the buffer has already been initialized by
2259 * alloc_new_buffer() when it allocated the buffer out of the master
2260 * alist and marked it as free in the slave alist.
2262 * Because we use a somewhat odd mechanism to assign buffers to slave
2263 * pools we can't actually free the buffer back to the master alist in
2264 * buffer_alist_destroy(), but instead must deal with that logic somewhere
2265 * else.
2267 static int
2268 buffer_alist_init(void *info, int32_t blk, int32_t radix,
2269 hammer_alloc_state_t state)
2271 return(0);
2274 static int
2275 buffer_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2277 hammer_cluster_t cluster = info;
2278 hammer_buffer_t buffer;
2279 int32_t buf_no;
2280 int error = 0;
2282 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2283 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2284 if (buffer) {
2285 hammer_modify_buffer(buffer);
2286 error = hammer_alist_recover(&buffer->alist, blk, 0, count);
2287 /* free block count is returned if >= 0 */
2288 hammer_rel_buffer(buffer, 0);
2289 } else {
2290 error = -error;
2292 return (error);
2296 * Note: This routine is only called when freeing the last elements of
2297 * an initialized buffer. Freeing all elements of the buffer when the
2298 * buffer was not previously initialized does not call this routine.
2300 static int
2301 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2303 hammer_cluster_t cluster = info;
2304 int32_t buf_no;
2306 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2307 kprintf("destroy buffer %d:%d:%d\n", cluster->volume->vol_no, cluster->clu_no, buf_no);
2308 return (0);
2312 * Note: atblk can be negative and atblk - blk can go negative.
2314 static int
2315 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2316 int32_t count, int32_t atblk, int32_t *fullp)
2318 hammer_cluster_t cluster = info;
2319 hammer_buffer_t buffer;
2320 int32_t buf_no;
2321 int32_t r;
2322 int error = 0;
2324 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2325 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2326 if (buffer) {
2327 KKASSERT(buffer->ondisk->head.buf_type != 0);
2329 hammer_modify_buffer(buffer);
2330 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2331 if (r != HAMMER_ALIST_BLOCK_NONE)
2332 r += blk;
2333 *fullp = hammer_alist_isfull(&buffer->alist);
2334 hammer_rel_buffer(buffer, 0);
2335 } else {
2336 r = HAMMER_ALIST_BLOCK_NONE;
2338 return(r);
2341 static int
2342 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2343 int32_t count, int32_t atblk, int32_t *fullp)
2345 hammer_cluster_t cluster = info;
2346 hammer_buffer_t buffer;
2347 int32_t buf_no;
2348 int32_t r;
2349 int error = 0;
2351 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2352 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2353 if (buffer) {
2354 KKASSERT(buffer->ondisk->head.buf_type != 0);
2355 hammer_modify_buffer(buffer);
2356 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2357 if (r != HAMMER_ALIST_BLOCK_NONE)
2358 r += blk;
2359 *fullp = hammer_alist_isfull(&buffer->alist);
2360 hammer_rel_buffer(buffer, 0);
2361 } else {
2362 r = HAMMER_ALIST_BLOCK_NONE;
2363 *fullp = 0;
2365 return(r);
2368 static void
2369 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2370 int32_t base_blk, int32_t count, int32_t *emptyp)
2372 hammer_cluster_t cluster = info;
2373 hammer_buffer_t buffer;
2374 int32_t buf_no;
2375 int error = 0;
2377 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2378 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2379 if (buffer) {
2380 KKASSERT(buffer->ondisk->head.buf_type != 0);
2381 hammer_modify_buffer(buffer);
2382 hammer_alist_free(&buffer->alist, base_blk, count);
2383 *emptyp = hammer_alist_isempty(&buffer->alist);
2384 /* XXX don't bother updating the buffer is completely empty? */
2385 hammer_rel_buffer(buffer, 0);
2386 } else {
2387 *emptyp = 0;
2391 static void
2392 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2397 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2398 * into a filesystem buffer. A-List's are mostly self-contained entities,
2399 * but callbacks must be installed to recurse from one A-List to another.
2401 * Implementing these callbacks allows us to operate a multi-layered A-List
2402 * as a single entity.
2406 * This occurs when allocating a cluster via the volume a-list and the
2407 * entry in the volume a-list indicated all-free. The underlying supercl
2408 * has not yet been initialized.
2410 static int
2411 super_alist_init(void *info, int32_t blk, int32_t radix,
2412 hammer_alloc_state_t state)
2414 hammer_volume_t volume = info;
2415 hammer_supercl_t supercl;
2416 int32_t scl_no;
2417 int error = 0;
2420 * Calculate the super-cluster number containing the cluster (blk)
2421 * and obtain the super-cluster buffer.
2423 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2424 supercl = hammer_get_supercl(volume, scl_no, &error, state);
2425 if (supercl)
2426 hammer_rel_supercl(supercl, 0);
2427 return (error);
2430 static int
2431 super_alist_recover(void *info, int32_t blk, int32_t radix, int32_t count)
2433 hammer_volume_t volume = info;
2434 hammer_supercl_t supercl;
2435 int32_t scl_no;
2436 int error = 0;
2439 * Calculate the super-cluster number containing the cluster (blk)
2440 * and obtain the super-cluster buffer.
2442 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2443 supercl = hammer_get_supercl(volume, scl_no, &error,
2444 HAMMER_ASTATE_NONE);
2445 if (supercl) {
2446 hammer_modify_supercl(supercl);
2447 error = hammer_alist_recover(&supercl->alist, blk, 0, count);
2448 /* free block count is returned if >= 0 */
2449 hammer_rel_supercl(supercl, 0);
2450 } else {
2451 error = -error;
2453 return (error);
2457 * This occurs when freeing a cluster via the volume a-list and the
2458 * supercl is now 100% free. We can destroy the supercl.
2460 * What we actually do is just unset the modify bit so it doesn't get
2461 * written out.
2463 static int
2464 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2466 hammer_volume_t volume = info;
2467 hammer_supercl_t supercl;
2468 int32_t scl_no;
2469 int error = 0;
2472 * Calculate the super-cluster number containing the cluster (blk)
2473 * and obtain the super-cluster buffer.
2475 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2476 if (hammer_find_supercl(volume, scl_no)) {
2477 supercl = hammer_get_supercl(volume, scl_no, &error,
2478 HAMMER_ASTATE_FREE);
2479 /* XXX */
2480 hammer_io_clear_modify(&supercl->io);
2481 if (supercl)
2482 hammer_rel_supercl(supercl, 0);
2484 return (error);
2487 static int
2488 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2489 int32_t count, int32_t atblk, int32_t *fullp)
2491 hammer_volume_t volume = info;
2492 hammer_supercl_t supercl;
2493 int32_t scl_no;
2494 int32_t r;
2495 int error = 0;
2497 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2498 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2499 if (supercl) {
2500 hammer_modify_supercl(supercl);
2501 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2502 if (r != HAMMER_ALIST_BLOCK_NONE)
2503 r += blk;
2504 *fullp = hammer_alist_isfull(&supercl->alist);
2505 hammer_rel_supercl(supercl, 0);
2506 } else {
2507 r = HAMMER_ALIST_BLOCK_NONE;
2508 *fullp = 0;
2510 return(r);
2513 static int
2514 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2515 int32_t count, int32_t atblk, int32_t *fullp)
2517 hammer_volume_t volume = info;
2518 hammer_supercl_t supercl;
2519 int32_t scl_no;
2520 int32_t r;
2521 int error = 0;
2523 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2524 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2525 if (supercl) {
2526 hammer_modify_supercl(supercl);
2527 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2528 if (r != HAMMER_ALIST_BLOCK_NONE)
2529 r += blk;
2530 *fullp = hammer_alist_isfull(&supercl->alist);
2531 hammer_rel_supercl(supercl, 0);
2532 } else {
2533 r = HAMMER_ALIST_BLOCK_NONE;
2534 *fullp = 0;
2536 return(r);
2539 static void
2540 super_alist_free(void *info, int32_t blk, int32_t radix,
2541 int32_t base_blk, int32_t count, int32_t *emptyp)
2543 hammer_volume_t volume = info;
2544 hammer_supercl_t supercl;
2545 int32_t scl_no;
2546 int error = 0;
2548 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2549 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2550 if (supercl) {
2551 hammer_modify_supercl(supercl);
2552 hammer_alist_free(&supercl->alist, base_blk, count);
2553 *emptyp = hammer_alist_isempty(&supercl->alist);
2554 hammer_rel_supercl(supercl, 0);
2555 } else {
2556 *emptyp = 0;
2560 static void
2561 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2565 void
2566 hammer_init_alist_config(void)
2568 hammer_alist_config_t config;
2570 hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2571 1, HAMMER_FSBUF_METAELMS);
2572 hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2573 1, HAMMER_VOL_METAELMS_1LYR);
2574 hammer_alist_template(&Vol_super_alist_config,
2575 HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2576 HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2577 hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2578 1, HAMMER_SUPERCL_METAELMS);
2579 hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2580 1, HAMMER_CLU_MASTER_METAELMS);
2581 hammer_alist_template(&Clu_slave_alist_config,
2582 HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2583 HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2585 config = &Vol_super_alist_config;
2586 config->bl_radix_init = super_alist_init;
2587 config->bl_radix_recover = super_alist_recover;
2588 config->bl_radix_destroy = super_alist_destroy;
2589 config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2590 config->bl_radix_alloc_rev = super_alist_alloc_rev;
2591 config->bl_radix_free = super_alist_free;
2592 config->bl_radix_print = super_alist_print;
2594 config = &Clu_slave_alist_config;
2595 config->bl_radix_init = buffer_alist_init;
2596 config->bl_radix_recover = buffer_alist_recover;
2597 config->bl_radix_destroy = buffer_alist_destroy;
2598 config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2599 config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2600 config->bl_radix_free = buffer_alist_free;
2601 config->bl_radix_print = buffer_alist_print;