HAMMER 13/many - Stabilization commit
[dragonfly.git] / sys / vfs / hammer / hammer_ondisk.c
blobd7cc4cf47933c4965554244a06ce6a2ce1c94302
1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.14 2007/12/31 05:33:12 dillon Exp $
37 * Manage HAMMER's on-disk structures. These routines are primarily
38 * responsible for interfacing with the kernel's I/O subsystem and for
39 * managing in-memory structures.
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54 hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56 u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58 hammer_alist_t live, u_int64_t type, int32_t nelements,
59 int32_t start,
60 int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63 int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65 int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69 static int32_t hammer_alloc_master(hammer_cluster_t cluster, int nblks,
70 int32_t start, int isfwd);
71 static void hammer_adjust_stats(hammer_cluster_t cluster,
72 u_int64_t buf_type, int nblks);
74 struct hammer_alist_config Buf_alist_config;
75 struct hammer_alist_config Vol_normal_alist_config;
76 struct hammer_alist_config Vol_super_alist_config;
77 struct hammer_alist_config Supercl_alist_config;
78 struct hammer_alist_config Clu_master_alist_config;
79 struct hammer_alist_config Clu_slave_alist_config;
82 * Red-Black tree support for various structures
84 static int
85 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
87 if (ip1->obj_id < ip2->obj_id)
88 return(-1);
89 if (ip1->obj_id > ip2->obj_id)
90 return(1);
91 if (ip1->obj_asof < ip2->obj_asof)
92 return(-1);
93 if (ip1->obj_asof > ip2->obj_asof)
94 return(1);
95 return(0);
98 static int
99 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
101 if (info->obj_id < ip->obj_id)
102 return(-1);
103 if (info->obj_id > ip->obj_id)
104 return(1);
105 if (info->obj_asof < ip->obj_asof)
106 return(-1);
107 if (info->obj_asof > ip->obj_asof)
108 return(1);
109 return(0);
112 static int
113 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
115 if (vol1->vol_no < vol2->vol_no)
116 return(-1);
117 if (vol1->vol_no > vol2->vol_no)
118 return(1);
119 return(0);
122 static int
123 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
125 if (cl1->scl_no < cl2->scl_no)
126 return(-1);
127 if (cl1->scl_no > cl2->scl_no)
128 return(1);
129 return(0);
132 static int
133 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
135 if (cl1->clu_no < cl2->clu_no)
136 return(-1);
137 if (cl1->clu_no > cl2->clu_no)
138 return(1);
139 return(0);
142 static int
143 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
145 if (buf1->buf_no < buf2->buf_no)
146 return(-1);
147 if (buf1->buf_no > buf2->buf_no)
148 return(1);
149 return(0);
152 static int
153 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
155 if (node1->node_offset < node2->node_offset)
156 return(-1);
157 if (node1->node_offset > node2->node_offset)
158 return(1);
159 return(0);
163 * Note: The lookup function for hammer_ino_rb_tree winds up being named
164 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup
165 * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
167 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
168 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
169 hammer_inode_info_cmp, hammer_inode_info_t);
170 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
171 hammer_vol_rb_compare, int32_t, vol_no);
172 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
173 hammer_scl_rb_compare, int32_t, scl_no);
174 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
175 hammer_clu_rb_compare, int32_t, clu_no);
176 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
177 hammer_buf_rb_compare, int32_t, buf_no);
178 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
179 hammer_nod_rb_compare, int32_t, node_offset);
181 /************************************************************************
182 * VOLUMES *
183 ************************************************************************
185 * Load a HAMMER volume by name. Returns 0 on success or a positive error
186 * code on failure. Volumes must be loaded at mount time, get_volume() will
187 * not load a new volume.
189 * Calls made to hammer_load_volume() or single-threaded
192 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
194 struct mount *mp;
195 hammer_volume_t volume;
196 struct hammer_volume_ondisk *ondisk;
197 struct nlookupdata nd;
198 struct buf *bp = NULL;
199 int error;
200 int ronly;
202 mp = hmp->mp;
203 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
206 * Allocate a volume structure
208 ++hammer_count_volumes;
209 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
210 volume->vol_name = kstrdup(volname, M_HAMMER);
211 volume->hmp = hmp;
212 volume->io.type = HAMMER_STRUCTURE_VOLUME;
213 volume->io.offset = 0LL;
216 * Get the device vnode
218 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
219 if (error == 0)
220 error = nlookup(&nd);
221 if (error == 0)
222 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
223 nlookup_done(&nd);
224 if (error == 0) {
225 vn_isdisk(volume->devvp, &error);
227 if (error == 0) {
228 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
229 error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
230 FSCRED, NULL);
231 vn_unlock(volume->devvp);
233 if (error) {
234 hammer_free_volume(volume);
235 return(error);
239 * Extract the volume number from the volume header and do various
240 * sanity checks.
242 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
243 if (error)
244 goto late_failure;
245 ondisk = (void *)bp->b_data;
246 if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
247 kprintf("hammer_mount: volume %s has an invalid header\n",
248 volume->vol_name);
249 error = EFTYPE;
250 goto late_failure;
252 volume->vol_no = ondisk->vol_no;
253 volume->cluster_base = ondisk->vol_clo_beg;
254 volume->vol_clsize = ondisk->vol_clsize;
255 volume->vol_flags = ondisk->vol_flags;
256 volume->nblocks = ondisk->vol_nblocks;
257 RB_INIT(&volume->rb_clus_root);
258 RB_INIT(&volume->rb_scls_root);
260 hmp->mp->mnt_stat.f_blocks += volume->nblocks;
262 if (RB_EMPTY(&hmp->rb_vols_root)) {
263 hmp->fsid = ondisk->vol_fsid;
264 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
265 kprintf("hammer_mount: volume %s's fsid does not match "
266 "other volumes\n", volume->vol_name);
267 error = EFTYPE;
268 goto late_failure;
272 * Insert the volume structure into the red-black tree.
274 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
275 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
276 volume->vol_name, volume->vol_no);
277 error = EEXIST;
281 * Set the root volume and load the root cluster. HAMMER special
282 * cases rootvol and rootcl and will not deallocate the structures.
283 * We do not hold a ref because this would prevent related I/O
284 * from being flushed.
286 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
287 hmp->rootvol = volume;
288 hmp->rootcl = hammer_get_cluster(volume,
289 ondisk->vol0_root_clu_no,
290 &error, 0);
291 hammer_rel_cluster(hmp->rootcl, 0);
292 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
294 late_failure:
295 if (bp)
296 brelse(bp);
297 if (error) {
298 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
299 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
300 hammer_free_volume(volume);
302 return (error);
306 * Unload and free a HAMMER volume. Must return >= 0 to continue scan
307 * so returns -1 on failure.
310 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
312 struct hammer_mount *hmp = volume->hmp;
313 hammer_cluster_t rootcl;
314 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
317 * Sync clusters, sync volume
320 hmp->mp->mnt_stat.f_blocks -= volume->nblocks;
323 * Clean up the root cluster, which is held unlocked in the root
324 * volume.
326 if (hmp->rootvol == volume) {
327 if ((rootcl = hmp->rootcl) != NULL)
328 hmp->rootcl = NULL;
329 hmp->rootvol = NULL;
333 * Unload clusters and super-clusters. Unloading a super-cluster
334 * also unloads related clusters, but the filesystem may not be
335 * using super-clusters so unload clusters anyway.
337 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
338 hammer_unload_cluster, NULL);
339 RB_SCAN(hammer_scl_rb_tree, &volume->rb_scls_root, NULL,
340 hammer_unload_supercl, NULL);
343 * Release our buffer and flush anything left in the buffer cache.
345 hammer_io_release(&volume->io, 1);
348 * There should be no references on the volume, no clusters, and
349 * no super-clusters.
351 KKASSERT(volume->io.lock.refs == 0);
352 KKASSERT(RB_EMPTY(&volume->rb_clus_root));
353 KKASSERT(RB_EMPTY(&volume->rb_scls_root));
355 volume->ondisk = NULL;
356 if (volume->devvp) {
357 if (ronly) {
358 vinvalbuf(volume->devvp, 0, 0, 0);
359 VOP_CLOSE(volume->devvp, FREAD);
360 } else {
361 vinvalbuf(volume->devvp, V_SAVE, 0, 0);
362 VOP_CLOSE(volume->devvp, FREAD|FWRITE);
367 * Destroy the structure
369 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
370 hammer_free_volume(volume);
371 return(0);
374 static
375 void
376 hammer_free_volume(hammer_volume_t volume)
378 if (volume->vol_name) {
379 kfree(volume->vol_name, M_HAMMER);
380 volume->vol_name = NULL;
382 if (volume->devvp) {
383 vrele(volume->devvp);
384 volume->devvp = NULL;
386 --hammer_count_volumes;
387 kfree(volume, M_HAMMER);
391 * Get a HAMMER volume. The volume must already exist.
393 hammer_volume_t
394 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
396 struct hammer_volume *volume;
399 * Locate the volume structure
401 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
402 if (volume == NULL) {
403 *errorp = ENOENT;
404 return(NULL);
406 hammer_ref(&volume->io.lock);
409 * Deal with on-disk info
411 if (volume->ondisk == NULL) {
412 *errorp = hammer_load_volume(volume);
413 if (*errorp) {
414 hammer_rel_volume(volume, 1);
415 volume = NULL;
417 } else {
418 *errorp = 0;
420 return(volume);
424 hammer_ref_volume(hammer_volume_t volume)
426 int error;
428 hammer_ref(&volume->io.lock);
431 * Deal with on-disk info
433 if (volume->ondisk == NULL) {
434 error = hammer_load_volume(volume);
435 if (error)
436 hammer_rel_volume(volume, 1);
437 } else {
438 error = 0;
440 return (error);
443 hammer_volume_t
444 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
446 hammer_volume_t volume;
448 volume = hmp->rootvol;
449 KKASSERT(volume != NULL);
450 hammer_ref(&volume->io.lock);
453 * Deal with on-disk info
455 if (volume->ondisk == NULL) {
456 *errorp = hammer_load_volume(volume);
457 if (*errorp) {
458 hammer_rel_volume(volume, 1);
459 volume = NULL;
461 } else {
462 *errorp = 0;
464 return (volume);
468 * Load a volume's on-disk information. The volume must be referenced and
469 * not locked. We temporarily acquire an exclusive lock to interlock
470 * against releases or multiple get's.
472 static int
473 hammer_load_volume(hammer_volume_t volume)
475 struct hammer_volume_ondisk *ondisk;
476 int error;
478 hammer_lock_ex(&volume->io.lock);
479 if (volume->ondisk == NULL) {
480 error = hammer_io_read(volume->devvp, &volume->io);
481 if (error) {
482 hammer_unlock(&volume->io.lock);
483 return (error);
485 volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
488 * Configure the volume's A-lists. These are used to
489 * allocate clusters.
491 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
492 volume->alist.config = &Vol_super_alist_config;
493 volume->alist.meta = ondisk->vol_almeta.super;
494 volume->alist.info = volume;
495 } else {
496 volume->alist.config = &Vol_normal_alist_config;
497 volume->alist.meta = ondisk->vol_almeta.normal;
498 volume->alist.info = NULL;
500 } else {
501 error = 0;
503 hammer_unlock(&volume->io.lock);
504 return(0);
508 * Release a volume. Call hammer_io_release on the last reference. We have
509 * to acquire an exclusive lock to interlock against volume->ondisk tests
510 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive
511 * lock to be held.
513 * Volumes are not unloaded from memory during normal operation.
515 void
516 hammer_rel_volume(hammer_volume_t volume, int flush)
518 if (volume->io.lock.refs == 1) {
519 hammer_lock_ex(&volume->io.lock);
520 if (volume->io.lock.refs == 1) {
521 volume->ondisk = NULL;
522 hammer_io_release(&volume->io, flush);
524 hammer_unlock(&volume->io.lock);
526 hammer_unref(&volume->io.lock);
529 /************************************************************************
530 * SUPER-CLUSTERS *
531 ************************************************************************
533 * Manage super-clusters. Note that a supercl holds a reference to its
534 * associated volume.
536 hammer_supercl_t
537 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
538 int *errorp, int isnew)
540 hammer_supercl_t supercl;
543 * Locate and lock the super-cluster structure, creating one
544 * if necessary.
546 again:
547 supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
548 if (supercl == NULL) {
549 ++hammer_count_supercls;
550 supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
551 supercl->scl_no = scl_no;
552 supercl->volume = volume;
553 supercl->io.offset = calculate_supercl_offset(volume, scl_no);
554 supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
555 hammer_ref(&supercl->io.lock);
558 * Insert the cluster into the RB tree and handle late
559 * collisions.
561 if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
562 hammer_unref(&supercl->io.lock);
563 --hammer_count_supercls;
564 kfree(supercl, M_HAMMER);
565 goto again;
567 hammer_ref(&volume->io.lock);
568 } else {
569 hammer_ref(&supercl->io.lock);
573 * Deal with on-disk info
575 if (supercl->ondisk == NULL || isnew) {
576 *errorp = hammer_load_supercl(supercl, isnew);
577 if (*errorp) {
578 hammer_rel_supercl(supercl, 1);
579 supercl = NULL;
581 } else {
582 *errorp = 0;
584 return(supercl);
587 static int
588 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
590 struct hammer_supercl_ondisk *ondisk;
591 hammer_volume_t volume = supercl->volume;
592 int error;
594 hammer_lock_ex(&supercl->io.lock);
595 if (supercl->ondisk == NULL) {
596 if (isnew)
597 error = hammer_io_new(volume->devvp, &supercl->io);
598 else
599 error = hammer_io_read(volume->devvp, &supercl->io);
600 if (error) {
601 hammer_unlock(&supercl->io.lock);
602 return (error);
604 supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
606 supercl->alist.config = &Supercl_alist_config;
607 supercl->alist.meta = ondisk->scl_meta;
608 supercl->alist.info = NULL;
609 } else if (isnew) {
610 error = hammer_io_new(volume->devvp, &supercl->io);
611 } else {
612 error = 0;
614 if (error == 0 && isnew) {
616 * If this is a new super-cluster we have to initialize
617 * various ondisk structural elements. The caller is
618 * responsible for the remainder.
620 struct hammer_alist_live dummy;
622 ondisk = supercl->ondisk;
623 dummy.config = &Buf_alist_config;
624 dummy.meta = ondisk->head.buf_almeta;
625 dummy.info = NULL;
626 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
627 hammer_alist_init(&supercl->alist);
629 hammer_unlock(&supercl->io.lock);
630 return (error);
634 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
637 hammer_unload_supercl(hammer_supercl_t supercl, void *data __unused)
639 KKASSERT(supercl->io.lock.refs == 0);
640 hammer_ref(&supercl->io.lock);
641 hammer_rel_supercl(supercl, 1);
642 return(0);
646 * Release a super-cluster. We have to deal with several places where
647 * another thread can ref the super-cluster.
649 * Only destroy the structure itself if the related buffer cache buffer
650 * was disassociated from it. This ties the management of the structure
651 * to the buffer cache subsystem.
653 void
654 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
656 hammer_volume_t volume;
658 if (supercl->io.lock.refs == 1) {
659 hammer_lock_ex(&supercl->io.lock);
660 if (supercl->io.lock.refs == 1) {
661 hammer_io_release(&supercl->io, flush);
662 if (supercl->io.bp == NULL &&
663 supercl->io.lock.refs == 1) {
664 volume = supercl->volume;
665 RB_REMOVE(hammer_scl_rb_tree,
666 &volume->rb_scls_root, supercl);
667 supercl->volume = NULL; /* sanity */
668 --hammer_count_supercls;
669 kfree(supercl, M_HAMMER);
670 hammer_rel_volume(volume, 0);
671 return;
674 hammer_unlock(&supercl->io.lock);
676 hammer_unref(&supercl->io.lock);
679 /************************************************************************
680 * CLUSTERS *
681 ************************************************************************
684 hammer_cluster_t
685 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
686 int *errorp, int isnew)
688 hammer_cluster_t cluster;
690 again:
691 cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
692 if (cluster == NULL) {
693 ++hammer_count_clusters;
694 cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
695 cluster->clu_no = clu_no;
696 cluster->volume = volume;
697 cluster->io.offset = calculate_cluster_offset(volume, clu_no);
698 cluster->state = HAMMER_CLUSTER_IDLE;
699 RB_INIT(&cluster->rb_bufs_root);
700 RB_INIT(&cluster->rb_nods_root);
701 cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
702 hammer_ref(&cluster->io.lock);
705 * Insert the cluster into the RB tree and handle late
706 * collisions.
708 if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
709 hammer_unref(&cluster->io.lock);
710 --hammer_count_clusters;
711 kfree(cluster, M_HAMMER);
712 goto again;
714 hammer_ref(&volume->io.lock);
715 } else {
716 hammer_ref(&cluster->io.lock);
720 * Deal with on-disk info
722 if (cluster->ondisk == NULL || isnew) {
723 *errorp = hammer_load_cluster(cluster, isnew);
724 if (*errorp) {
725 hammer_rel_cluster(cluster, 1);
726 cluster = NULL;
728 } else {
729 *errorp = 0;
731 return (cluster);
734 hammer_cluster_t
735 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
737 hammer_cluster_t cluster;
739 cluster = hmp->rootcl;
740 KKASSERT(cluster != NULL);
741 hammer_ref(&cluster->io.lock);
744 * Deal with on-disk info
746 if (cluster->ondisk == NULL) {
747 *errorp = hammer_load_cluster(cluster, 0);
748 if (*errorp) {
749 hammer_rel_cluster(cluster, 1);
750 cluster = NULL;
752 } else {
753 *errorp = 0;
755 return (cluster);
758 static
760 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
762 hammer_volume_t volume = cluster->volume;
763 struct hammer_cluster_ondisk *ondisk;
764 int error;
767 * Load the cluster's on-disk info
769 hammer_lock_ex(&cluster->io.lock);
770 if (cluster->ondisk == NULL) {
771 if (isnew)
772 error = hammer_io_new(volume->devvp, &cluster->io);
773 else
774 error = hammer_io_read(volume->devvp, &cluster->io);
775 if (error) {
776 hammer_unlock(&cluster->io.lock);
777 return (error);
779 cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
781 cluster->alist_master.config = &Clu_master_alist_config;
782 cluster->alist_master.meta = ondisk->clu_master_meta;
783 cluster->alist_btree.config = &Clu_slave_alist_config;
784 cluster->alist_btree.meta = ondisk->clu_btree_meta;
785 cluster->alist_btree.info = cluster;
786 cluster->alist_record.config = &Clu_slave_alist_config;
787 cluster->alist_record.meta = ondisk->clu_record_meta;
788 cluster->alist_record.info = cluster;
789 cluster->alist_mdata.config = &Clu_slave_alist_config;
790 cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
791 cluster->alist_mdata.info = cluster;
793 if (isnew == 0) {
794 cluster->clu_btree_beg = ondisk->clu_btree_beg;
795 cluster->clu_btree_end = ondisk->clu_btree_end;
797 } else if (isnew) {
798 error = hammer_io_new(volume->devvp, &cluster->io);
799 } else {
800 error = 0;
802 if (error == 0 && isnew) {
804 * If this is a new cluster we have to initialize
805 * various ondisk structural elements. The caller is
806 * responsible for the remainder.
808 struct hammer_alist_live dummy;
809 hammer_node_t croot;
810 hammer_volume_ondisk_t voldisk;
811 int32_t nbuffers;
813 hammer_modify_cluster(cluster);
814 ondisk = cluster->ondisk;
815 voldisk = volume->ondisk;
817 dummy.config = &Buf_alist_config;
818 dummy.meta = ondisk->head.buf_almeta;
819 dummy.info = NULL;
820 initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
822 hammer_alist_init(&cluster->alist_master);
823 hammer_alist_init(&cluster->alist_btree);
824 hammer_alist_init(&cluster->alist_record);
825 hammer_alist_init(&cluster->alist_mdata);
827 ondisk->vol_fsid = voldisk->vol_fsid;
828 ondisk->vol_fstype = voldisk->vol_fstype;
829 ondisk->clu_gen = 1;
830 ondisk->clu_id = 0; /* XXX */
831 ondisk->clu_no = cluster->clu_no;
832 ondisk->clu_flags = 0;
833 ondisk->clu_start = HAMMER_BUFSIZE;
834 KKASSERT(voldisk->vol_clo_end > cluster->io.offset);
835 if (voldisk->vol_clo_end - cluster->io.offset >
836 voldisk->vol_clsize) {
837 ondisk->clu_limit = voldisk->vol_clsize;
838 } else {
839 ondisk->clu_limit = (int32_t)(voldisk->vol_clo_end -
840 cluster->io.offset);
842 nbuffers = ondisk->clu_limit / HAMMER_BUFSIZE;
843 hammer_alist_free(&cluster->alist_master, 1, nbuffers - 1);
844 ondisk->idx_data = 1 * HAMMER_FSBUF_MAXBLKS;
845 ondisk->idx_index = 0 * HAMMER_FSBUF_MAXBLKS;
846 ondisk->idx_record = nbuffers * HAMMER_FSBUF_MAXBLKS;
849 * Initialize the B-Tree. We don't know what the caller
850 * intends to do with the cluster so make sure it causes
851 * an assertion if the caller makes no changes.
853 ondisk->clu_btree_parent_vol_no = -2;
854 ondisk->clu_btree_parent_clu_no = -2;
855 ondisk->clu_btree_parent_offset = -2;
856 ondisk->clu_btree_parent_clu_gen = -2;
857 hammer_modify_cluster_done(cluster);
859 croot = hammer_alloc_btree(cluster, &error);
860 if (error == 0) {
861 hammer_modify_node(croot);
862 bzero(croot->ondisk, sizeof(*croot->ondisk));
863 croot->ondisk->count = 0;
864 croot->ondisk->type = HAMMER_BTREE_TYPE_LEAF;
865 hammer_modify_node_done(croot);
866 hammer_modify_cluster(cluster);
867 ondisk->clu_btree_root = croot->node_offset;
868 hammer_modify_cluster_done(cluster);
869 hammer_rel_node(croot);
872 hammer_unlock(&cluster->io.lock);
873 return (error);
877 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
880 hammer_unload_cluster(hammer_cluster_t cluster, void *data __unused)
882 hammer_ref(&cluster->io.lock);
883 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
884 hammer_unload_buffer, NULL);
885 KKASSERT(cluster->io.lock.refs == 1);
886 hammer_rel_cluster(cluster, 1);
887 return(0);
891 * Reference a cluster that is either already referenced or via a specially
892 * handled pointer (aka rootcl).
895 hammer_ref_cluster(hammer_cluster_t cluster)
897 int error;
899 KKASSERT(cluster != NULL);
900 hammer_ref(&cluster->io.lock);
903 * Deal with on-disk info
905 if (cluster->ondisk == NULL) {
906 error = hammer_load_cluster(cluster, 0);
907 if (error)
908 hammer_rel_cluster(cluster, 1);
909 } else {
910 error = 0;
912 return(error);
916 * Release a cluster. We have to deal with several places where
917 * another thread can ref the cluster.
919 * Only destroy the structure itself if the related buffer cache buffer
920 * was disassociated from it. This ties the management of the structure
921 * to the buffer cache subsystem.
923 void
924 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
926 hammer_node_t node;
927 hammer_volume_t volume;
929 if (cluster->io.lock.refs == 1) {
930 hammer_lock_ex(&cluster->io.lock);
931 if (cluster->io.lock.refs == 1) {
933 * Release the I/O. If we or the kernel wants to
934 * flush, this will release the bp. Otherwise the
935 * bp may be written and flushed passively by the
936 * kernel later on.
938 hammer_io_release(&cluster->io, flush);
941 * The B-Tree node cache is not counted in the
942 * cluster's reference count. Clean out the
943 * cache.
945 * If the cluster acquires a new reference while we
946 * are trying to clean it out, abort the cleaning.
948 * Any actively referenced nodes will reference the
949 * related buffer and cluster, so a ref count check
950 * should be sufficient.
952 while (cluster->io.bp == NULL &&
953 cluster->io.lock.refs == 1 &&
954 (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
956 KKASSERT(node->lock.refs == 0);
957 hammer_flush_node(node);
961 * Final cleanup
963 if (cluster != cluster->volume->hmp->rootcl &&
964 cluster->io.bp == NULL &&
965 cluster->io.lock.refs == 1 &&
966 RB_EMPTY(&cluster->rb_nods_root)) {
967 KKASSERT(RB_EMPTY(&cluster->rb_bufs_root));
968 volume = cluster->volume;
969 RB_REMOVE(hammer_clu_rb_tree,
970 &volume->rb_clus_root, cluster);
971 cluster->volume = NULL; /* sanity */
972 --hammer_count_clusters;
973 kfree(cluster, M_HAMMER);
974 hammer_rel_volume(volume, 0);
975 return;
978 hammer_unlock(&cluster->io.lock);
980 hammer_unref(&cluster->io.lock);
983 /************************************************************************
984 * BUFFERS *
985 ************************************************************************
987 * Manage buffers. Note that a buffer holds a reference to its associated
988 * cluster, and its cluster will hold a reference to the cluster's volume.
990 * A non-zero buf_type indicates that a new buffer should be created and
991 * zero'd.
993 hammer_buffer_t
994 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
995 u_int64_t buf_type, int *errorp)
997 hammer_buffer_t buffer;
1000 * Find the buffer. Note that buffer 0 corresponds to the cluster
1001 * header and should never be requested.
1003 KKASSERT(buf_no >= cluster->ondisk->clu_start / HAMMER_BUFSIZE &&
1004 buf_no < cluster->ondisk->clu_limit / HAMMER_BUFSIZE);
1007 * Locate and lock the buffer structure, creating one if necessary.
1009 again:
1010 buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
1011 if (buffer == NULL) {
1012 ++hammer_count_buffers;
1013 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO);
1014 buffer->buf_no = buf_no;
1015 buffer->cluster = cluster;
1016 buffer->volume = cluster->volume;
1017 buffer->io.offset = cluster->io.offset +
1018 (buf_no * HAMMER_BUFSIZE);
1019 buffer->io.type = HAMMER_STRUCTURE_BUFFER;
1020 TAILQ_INIT(&buffer->clist);
1021 hammer_ref(&buffer->io.lock);
1024 * Insert the cluster into the RB tree and handle late
1025 * collisions.
1027 if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
1028 hammer_unref(&buffer->io.lock);
1029 --hammer_count_buffers;
1030 kfree(buffer, M_HAMMER);
1031 goto again;
1033 hammer_ref(&cluster->io.lock);
1034 } else {
1035 hammer_ref(&buffer->io.lock);
1039 * Deal with on-disk info
1041 if (buffer->ondisk == NULL || buf_type) {
1042 *errorp = hammer_load_buffer(buffer, buf_type);
1043 if (*errorp) {
1044 hammer_rel_buffer(buffer, 1);
1045 buffer = NULL;
1047 } else {
1048 *errorp = 0;
1050 return(buffer);
1053 static int
1054 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
1056 hammer_volume_t volume;
1057 hammer_fsbuf_ondisk_t ondisk;
1058 int error;
1061 * Load the buffer's on-disk info
1063 volume = buffer->volume;
1064 hammer_lock_ex(&buffer->io.lock);
1065 if (buffer->ondisk == NULL) {
1066 if (buf_type) {
1067 error = hammer_io_new(volume->devvp, &buffer->io);
1068 } else {
1069 error = hammer_io_read(volume->devvp, &buffer->io);
1071 if (error) {
1072 hammer_unlock(&buffer->io.lock);
1073 return (error);
1075 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
1076 buffer->alist.config = &Buf_alist_config;
1077 buffer->alist.meta = ondisk->head.buf_almeta;
1078 buffer->buf_type = ondisk->head.buf_type;
1079 } else if (buf_type) {
1080 error = hammer_io_new(volume->devvp, &buffer->io);
1081 } else {
1082 error = 0;
1084 if (error == 0 && buf_type) {
1085 ondisk = buffer->ondisk;
1086 initbuffer(&buffer->alist, &ondisk->head, buf_type);
1087 buffer->buf_type = ondisk->head.buf_type;
1089 hammer_unlock(&buffer->io.lock);
1090 return (error);
1094 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue.
1097 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused)
1099 hammer_ref(&buffer->io.lock);
1100 hammer_flush_buffer_nodes(buffer);
1101 KKASSERT(buffer->io.lock.refs == 1);
1102 hammer_rel_buffer(buffer, 1);
1103 return(0);
1107 * Reference a buffer that is either already referenced or via a specially
1108 * handled pointer (aka cursor->buffer).
1111 hammer_ref_buffer(hammer_buffer_t buffer)
1113 int error;
1115 hammer_ref(&buffer->io.lock);
1116 if (buffer->ondisk == NULL) {
1117 error = hammer_load_buffer(buffer, 0);
1118 if (error) {
1119 hammer_rel_buffer(buffer, 1);
1121 * NOTE: buffer pointer can become stale after
1122 * the above release.
1124 } else {
1125 KKASSERT(buffer->buf_type ==
1126 buffer->ondisk->head.buf_type);
1128 } else {
1129 error = 0;
1131 return(error);
1135 * Release a buffer. We have to deal with several places where
1136 * another thread can ref the buffer.
1138 * Only destroy the structure itself if the related buffer cache buffer
1139 * was disassociated from it. This ties the management of the structure
1140 * to the buffer cache subsystem. buffer->ondisk determines whether the
1141 * embedded io is referenced or not.
1143 void
1144 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
1146 hammer_cluster_t cluster;
1147 hammer_node_t node;
1149 if (buffer->io.lock.refs == 1) {
1150 hammer_lock_ex(&buffer->io.lock);
1151 if (buffer->io.lock.refs == 1) {
1152 hammer_io_release(&buffer->io, flush);
1155 * Clean out the B-Tree node cache, if any, then
1156 * clean up the cluster ref and free the buffer.
1158 * If the buffer acquires a new reference while we
1159 * are trying to clean it out, abort the cleaning.
1161 while (buffer->io.bp == NULL &&
1162 buffer->io.lock.refs == 1 &&
1163 (node = TAILQ_FIRST(&buffer->clist)) != NULL
1165 KKASSERT(node->lock.refs == 0);
1166 hammer_flush_node(node);
1168 if (buffer->io.bp == NULL &&
1169 hammer_islastref(&buffer->io.lock)) {
1170 cluster = buffer->cluster;
1171 RB_REMOVE(hammer_buf_rb_tree,
1172 &cluster->rb_bufs_root, buffer);
1173 buffer->cluster = NULL; /* sanity */
1174 --hammer_count_buffers;
1175 kfree(buffer, M_HAMMER);
1176 hammer_rel_cluster(cluster, 0);
1177 return;
1180 hammer_unlock(&buffer->io.lock);
1182 hammer_unref(&buffer->io.lock);
1186 * Flush passively cached B-Tree nodes associated with this buffer.
1188 * NOTE: The buffer is referenced and locked.
1190 void
1191 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1193 hammer_node_t node;
1195 node = TAILQ_FIRST(&buffer->clist);
1196 while (node) {
1197 buffer->save_scan = TAILQ_NEXT(node, entry);
1198 if (node->lock.refs == 0) {
1199 hammer_ref(&node->lock);
1200 node->flags |= HAMMER_NODE_FLUSH;
1201 hammer_rel_node(node);
1203 node = buffer->save_scan;
1207 /************************************************************************
1208 * NODES *
1209 ************************************************************************
1211 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing
1212 * method used by the HAMMER filesystem.
1214 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1215 * associated with its buffer. It can have an active buffer reference
1216 * even when the node itself has no references. The node also passively
1217 * associates itself with its cluster without holding any cluster refs.
1218 * The cluster ref is indirectly maintained by the active buffer ref when
1219 * a node is acquired.
1221 * A hammer_node can also be passively associated with other HAMMER
1222 * structures, such as inodes, while retaining 0 references. These
1223 * associations can be cleared backwards using a pointer-to-pointer in
1224 * the hammer_node.
1226 * This allows the HAMMER implementation to cache hammer_node's long-term
1227 * and short-cut a great deal of the infrastructure's complexity. In
1228 * most cases a cached node can be reacquired without having to dip into
1229 * either the buffer or cluster management code.
1231 * The caller must pass a referenced cluster on call and will retain
1232 * ownership of the reference on return. The node will acquire its own
1233 * additional references, if necessary.
1235 hammer_node_t
1236 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1238 hammer_node_t node;
1241 * Locate the structure, allocating one if necessary.
1243 again:
1244 node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1245 node_offset);
1246 if (node == NULL) {
1247 ++hammer_count_nodes;
1248 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1249 node->node_offset = node_offset;
1250 node->cluster = cluster;
1251 if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1252 node)) {
1253 --hammer_count_nodes;
1254 kfree(node, M_HAMMER);
1255 goto again;
1258 *errorp = hammer_ref_node(node);
1259 if (*errorp) {
1261 * NOTE: The node pointer may be stale on error return.
1262 * In fact, its probably been destroyed.
1264 node = NULL;
1266 return(node);
1270 * Reference the node to prevent disassociations, then associate and
1271 * load the related buffer. This routine can also be called to reference
1272 * a node from a cache pointer.
1274 * NOTE: Because the caller does not have a ref on the node, the caller's
1275 * node pointer will be stale if an error is returned. We may also wind
1276 * up clearing the related cache pointers.
1278 * NOTE: The cluster is indirectly referenced by our buffer ref.
1281 hammer_ref_node(hammer_node_t node)
1283 hammer_buffer_t buffer;
1284 int32_t buf_no;
1285 int error;
1287 hammer_ref(&node->lock);
1288 error = 0;
1289 if (node->ondisk == NULL) {
1290 hammer_lock_ex(&node->lock);
1291 if (node->ondisk == NULL) {
1293 * This is a little confusing but the jist is that
1294 * node->buffer determines whether the node is on
1295 * the buffer's clist and node->ondisk determines
1296 * whether the buffer is referenced.
1298 if ((buffer = node->buffer) != NULL) {
1299 error = hammer_ref_buffer(buffer);
1300 } else {
1301 buf_no = node->node_offset / HAMMER_BUFSIZE;
1302 buffer = hammer_get_buffer(node->cluster,
1303 buf_no, 0, &error);
1304 if (buffer) {
1305 KKASSERT(error == 0);
1306 TAILQ_INSERT_TAIL(&buffer->clist,
1307 node, entry);
1308 node->buffer = buffer;
1311 if (error == 0) {
1312 node->ondisk = (void *)((char *)buffer->ondisk +
1313 (node->node_offset & HAMMER_BUFMASK));
1316 hammer_unlock(&node->lock);
1318 if (error)
1319 hammer_rel_node(node);
1320 return (error);
1324 * Release a hammer_node. The node retains a passive association with
1325 * its cluster, buffer and caches.
1327 * However, to avoid cluttering up kernel memory with tons of B-Tree
1328 * node cache structures we destroy the node if no passive cache or
1329 * (instantiated) buffer references exist.
1331 void
1332 hammer_rel_node(hammer_node_t node)
1334 hammer_cluster_t cluster;
1335 hammer_buffer_t buffer;
1337 if (hammer_islastref(&node->lock)) {
1338 cluster = node->cluster;
1341 * Destroy the node if it is being deleted. Free the node
1342 * in the bitmap after we have unhooked it.
1344 if (node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) {
1345 hammer_flush_node(node);
1346 RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1347 node);
1348 hammer_ref_cluster(cluster);
1349 if ((buffer = node->buffer) != NULL) {
1350 node->buffer = NULL;
1351 hammer_remove_node_clist(buffer, node);
1352 if (node->ondisk) {
1353 node->ondisk = NULL;
1354 hammer_rel_buffer(buffer, 0);
1357 if (node->flags & HAMMER_NODE_DELETED) {
1358 hammer_free_btree(node->cluster,
1359 node->node_offset);
1360 if (node->node_offset ==
1361 cluster->ondisk->clu_btree_root) {
1362 kprintf("FREE CLUSTER %d\n", cluster->clu_no);
1363 hammer_free_cluster(cluster);
1364 /*hammer_io_undirty(&cluster->io);*/
1367 hammer_rel_cluster(cluster, 0);
1368 --hammer_count_nodes;
1369 kfree(node, M_HAMMER);
1370 return;
1374 * node->ondisk determines whether we have a buffer reference
1375 * to get rid of or not. Only get rid of the reference if
1376 * the kernel tried to flush the buffer.
1378 * NOTE: Once unref'd the node can be physically destroyed,
1379 * so our node is stale afterwords.
1381 * This case occurs if the node still has cache references.
1382 * We could remove the references and free the structure
1383 * but for now we allow them (and the node structure) to
1384 * remain intact.
1386 if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1387 hammer_flush_node(node);
1388 buffer = node->buffer;
1389 node->buffer = NULL;
1390 node->ondisk = NULL;
1391 hammer_remove_node_clist(buffer, node);
1392 hammer_rel_buffer(buffer, 0);
1396 * Clutter control, this case only occurs after a failed
1397 * load since otherwise ondisk will be non-NULL.
1399 if (node->cache1 == NULL && node->cache2 == NULL &&
1400 node->ondisk == NULL) {
1401 RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1402 node);
1403 if ((buffer = node->buffer) != NULL) {
1404 node->buffer = NULL; /* sanity */
1405 node->ondisk = NULL; /* sanity */
1406 hammer_remove_node_clist(buffer, node);
1408 --hammer_count_nodes;
1409 kfree(node, M_HAMMER);
1410 return;
1413 hammer_unref(&node->lock);
1414 } else {
1415 hammer_unref(&node->lock);
1420 * Cache-and-release a hammer_node. Kinda like catching and releasing a
1421 * fish, but keeping an eye on him. The node is passively cached in *cache.
1423 * NOTE! HAMMER may NULL *cache at any time, even after you have
1424 * referenced the node!
1426 void
1427 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1429 hammer_node_t old;
1432 * If the node is being deleted, don't cache it!
1434 if (node->flags & HAMMER_NODE_DELETED)
1435 return;
1438 * Cache the node. If we previously cached a different node we
1439 * have to give HAMMER a chance to destroy it.
1441 again:
1442 if (node->cache1 != cache) {
1443 if (node->cache2 == cache) {
1444 struct hammer_node **tmp;
1445 tmp = node->cache1;
1446 node->cache1 = node->cache2;
1447 node->cache2 = tmp;
1448 } else {
1449 if ((old = *cache) != NULL) {
1450 *cache = NULL;
1451 hammer_flush_node(old); /* can block */
1452 goto again;
1454 if (node->cache2)
1455 *node->cache2 = NULL;
1456 node->cache2 = node->cache1;
1457 node->cache1 = cache;
1458 *cache = node;
1463 void
1464 hammer_uncache_node(struct hammer_node **cache)
1466 hammer_node_t node;
1468 if ((node = *cache) != NULL) {
1469 *cache = NULL;
1470 if (node->cache1 == cache) {
1471 node->cache1 = node->cache2;
1472 node->cache2 = NULL;
1473 } else if (node->cache2 == cache) {
1474 node->cache2 = NULL;
1475 } else {
1476 panic("hammer_uncache_node: missing cache linkage");
1478 if (node->cache1 == NULL && node->cache2 == NULL)
1479 hammer_flush_node(node);
1484 * Remove a node's cache references and destroy the node if it has no
1485 * other references or backing store.
1487 void
1488 hammer_flush_node(hammer_node_t node)
1490 hammer_buffer_t buffer;
1492 if (node->cache1)
1493 *node->cache1 = NULL;
1494 if (node->cache2)
1495 *node->cache2 = NULL;
1496 if (node->lock.refs == 0 && node->ondisk == NULL) {
1497 RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1498 node);
1499 if ((buffer = node->buffer) != NULL) {
1500 node->buffer = NULL;
1501 hammer_remove_node_clist(buffer, node);
1502 /* buffer is unreferenced because ondisk is NULL */
1504 --hammer_count_nodes;
1505 kfree(node, M_HAMMER);
1510 * Remove a node from the buffer's clist. Adjust save_scan as appropriate.
1511 * This is in its own little routine to properly handle interactions with
1512 * save_scan, so it is possible to block while scanning a buffer's node list.
1514 static
1515 void
1516 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1518 if (buffer->save_scan == node)
1519 buffer->save_scan = TAILQ_NEXT(node, entry);
1520 TAILQ_REMOVE(&buffer->clist, node, entry);
1523 /************************************************************************
1524 * A-LIST ALLOCATORS *
1525 ************************************************************************/
1528 * Allocate HAMMER clusters
1530 hammer_cluster_t
1531 hammer_alloc_cluster(hammer_mount_t hmp, hammer_cluster_t cluster_hint,
1532 int *errorp)
1534 hammer_volume_t volume;
1535 hammer_cluster_t cluster;
1536 int32_t clu_no;
1537 int32_t clu_hint;
1538 int32_t vol_beg;
1539 int32_t vol_no;
1542 * Figure out our starting volume and hint.
1544 if (cluster_hint) {
1545 vol_beg = cluster_hint->volume->vol_no;
1546 clu_hint = cluster_hint->clu_no;
1547 } else {
1548 vol_beg = hmp->volume_iterator;
1549 clu_hint = -1;
1553 * Loop through volumes looking for a free cluster. If allocating
1554 * a new cluster relative to an existing cluster try to find a free
1555 * cluster on either side (clu_hint >= 0), otherwise just do a
1556 * forwards iteration.
1558 vol_no = vol_beg;
1559 do {
1560 volume = hammer_get_volume(hmp, vol_no, errorp);
1561 kprintf("VOLUME %p %d\n", volume, vol_no);
1562 if (*errorp) {
1563 clu_no = HAMMER_ALIST_BLOCK_NONE;
1564 break;
1566 hammer_modify_volume(volume);
1567 if (clu_hint == -1) {
1568 clu_hint = volume->clu_iterator;
1569 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1570 clu_hint);
1571 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1572 clu_no = hammer_alist_alloc_fwd(&volume->alist,
1573 1, 0);
1575 } else {
1576 clu_no = hammer_alist_alloc_fwd(&volume->alist, 1,
1577 clu_hint);
1578 if (clu_no == HAMMER_ALIST_BLOCK_NONE) {
1579 clu_no = hammer_alist_alloc_rev(&volume->alist,
1580 1, clu_hint);
1583 hammer_modify_volume_done(volume);
1584 if (clu_no != HAMMER_ALIST_BLOCK_NONE)
1585 break;
1586 hammer_rel_volume(volume, 0);
1587 volume = NULL;
1588 *errorp = ENOSPC;
1589 vol_no = (vol_no + 1) % hmp->nvolumes;
1590 clu_hint = -1;
1591 } while (vol_no != vol_beg);
1594 * Acquire the cluster. On success this will force *errorp to 0.
1596 if (clu_no != HAMMER_ALIST_BLOCK_NONE) {
1597 kprintf("ALLOC CLUSTER %d\n", clu_no);
1598 cluster = hammer_get_cluster(volume, clu_no, errorp, 1);
1599 volume->clu_iterator = clu_no;
1600 hammer_rel_volume(volume, 0);
1601 } else {
1602 cluster = NULL;
1604 if (cluster)
1605 hammer_lock_ex(&cluster->io.lock);
1606 return(cluster);
1609 void
1610 hammer_init_cluster(hammer_cluster_t cluster, hammer_base_elm_t left_bound,
1611 hammer_base_elm_t right_bound)
1613 hammer_cluster_ondisk_t ondisk = cluster->ondisk;
1615 hammer_modify_cluster(cluster);
1616 ondisk->clu_btree_beg = *left_bound;
1617 ondisk->clu_btree_end = *right_bound;
1618 cluster->clu_btree_beg = ondisk->clu_btree_beg;
1619 cluster->clu_btree_end = ondisk->clu_btree_end;
1620 hammer_modify_cluster_done(cluster);
1624 * Deallocate a cluster
1626 void
1627 hammer_free_cluster(hammer_cluster_t cluster)
1629 hammer_modify_cluster(cluster);
1630 hammer_alist_free(&cluster->volume->alist, cluster->clu_no, 1);
1631 hammer_modify_cluster_done(cluster);
1635 * Allocate HAMMER elements - btree nodes, data storage, and record elements
1637 * The passed *bufferp should be initialized to NULL. On successive calls
1638 * *bufferp caches the most recent buffer used until put away by the caller.
1639 * Note that previously returned pointers using the cached buffer become
1640 * invalid on successive calls which reuse *bufferp.
1642 * All allocations first attempt to use the block found at the specified
1643 * iterator. If that fails the first available block is used. If that
1644 * fails a new buffer is allocated and associated with the buffer type
1645 * A-list and the element is allocated out of the new buffer.
1648 hammer_node_t
1649 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1651 hammer_buffer_t buffer;
1652 hammer_alist_t live;
1653 hammer_node_t node;
1654 int32_t elm_no;
1655 int32_t buf_no;
1656 int32_t node_offset;
1659 * Allocate a B-Tree element
1661 hammer_modify_cluster(cluster);
1662 buffer = NULL;
1663 live = &cluster->alist_btree;
1664 elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1665 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1666 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1667 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1668 alloc_new_buffer(cluster, live,
1669 HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1670 cluster->ondisk->idx_index, errorp, &buffer);
1671 elm_no = hammer_alist_alloc(live, 1);
1672 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1673 *errorp = ENOSPC;
1674 if (buffer)
1675 hammer_rel_buffer(buffer, 0);
1676 hammer_modify_cluster_done(cluster);
1677 return(NULL);
1680 cluster->ondisk->idx_index = elm_no;
1681 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_BTREE_NODES);
1684 * Load and return the B-Tree element
1686 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1687 node_offset = buf_no * HAMMER_BUFSIZE +
1688 offsetof(union hammer_fsbuf_ondisk,
1689 btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1690 node = hammer_get_node(cluster, node_offset, errorp);
1691 if (node) {
1692 hammer_modify_node(node);
1693 bzero(node->ondisk, sizeof(*node->ondisk));
1694 hammer_modify_node_done(node);
1695 } else {
1696 hammer_alist_free(live, elm_no, 1);
1697 hammer_rel_node(node);
1698 node = NULL;
1700 hammer_modify_cluster_done(cluster);
1701 if (buffer)
1702 hammer_rel_buffer(buffer, 0);
1703 return(node);
1706 void *
1707 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1708 int *errorp, struct hammer_buffer **bufferp)
1710 hammer_buffer_t buffer;
1711 hammer_alist_t live;
1712 int32_t elm_no;
1713 int32_t buf_no;
1714 int32_t nblks;
1715 void *item;
1718 * Deal with large data blocks. The blocksize is HAMMER_BUFSIZE
1719 * for these allocations.
1721 hammer_modify_cluster(cluster);
1722 if ((bytes & HAMMER_BUFMASK) == 0) {
1723 nblks = bytes / HAMMER_BUFSIZE;
1724 /* only one block allowed for now (so buffer can hold it) */
1725 KKASSERT(nblks == 1);
1727 buf_no = hammer_alloc_master(cluster, nblks,
1728 cluster->ondisk->idx_ldata, 1);
1729 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1730 *errorp = ENOSPC;
1731 hammer_modify_cluster_done(cluster);
1732 return(NULL);
1734 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, nblks);
1735 cluster->ondisk->idx_ldata = buf_no;
1736 hammer_modify_cluster_done(cluster);
1737 buffer = *bufferp;
1738 *bufferp = hammer_get_buffer(cluster, buf_no, -1, errorp);
1739 if (buffer)
1740 hammer_rel_buffer(buffer, 0);
1741 buffer = *bufferp;
1742 return(buffer->ondisk);
1746 * Allocate a data element. The block size is HAMMER_DATA_BLKSIZE
1747 * (64 bytes) for these allocations.
1749 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1750 nblks /= HAMMER_DATA_BLKSIZE;
1751 live = &cluster->alist_mdata;
1752 elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1753 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1754 elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1755 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1756 alloc_new_buffer(cluster, live,
1757 HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1758 cluster->ondisk->idx_data, errorp, bufferp);
1759 elm_no = hammer_alist_alloc(live, nblks);
1760 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1761 *errorp = ENOSPC;
1762 hammer_modify_cluster_done(cluster);
1763 return(NULL);
1766 cluster->ondisk->idx_index = elm_no;
1767 hammer_modify_cluster_done(cluster);
1770 * Load and return the B-Tree element
1772 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1773 buffer = *bufferp;
1774 if (buffer == NULL || buffer->cluster != cluster ||
1775 buffer->buf_no != buf_no) {
1776 if (buffer)
1777 hammer_rel_buffer(buffer, 0);
1778 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1779 *bufferp = buffer;
1781 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_DATA);
1782 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_DATA_NODES);
1783 hammer_modify_buffer(buffer);
1784 item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1785 bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1786 hammer_modify_buffer_done(buffer);
1787 *errorp = 0;
1788 return(item);
1791 void *
1792 hammer_alloc_record(hammer_cluster_t cluster,
1793 int *errorp, struct hammer_buffer **bufferp)
1795 hammer_buffer_t buffer;
1796 hammer_alist_t live;
1797 int32_t elm_no;
1798 int32_t buf_no;
1799 void *item;
1802 * Allocate a record element
1804 hammer_modify_cluster(cluster);
1805 live = &cluster->alist_record;
1806 elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1807 if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1808 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1809 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1810 alloc_new_buffer(cluster, live,
1811 HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1812 cluster->ondisk->idx_record, errorp, bufferp);
1813 elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1814 kprintf("hammer_alloc_record elm again %08x\n", elm_no);
1815 if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1816 *errorp = ENOSPC;
1817 hammer_modify_cluster_done(cluster);
1818 return(NULL);
1821 cluster->ondisk->idx_record = elm_no;
1822 hammer_modify_cluster_done(cluster);
1825 * Load and return the record element
1827 buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1828 buffer = *bufferp;
1829 if (buffer == NULL || buffer->cluster != cluster ||
1830 buffer->buf_no != buf_no) {
1831 if (buffer)
1832 hammer_rel_buffer(buffer, 0);
1833 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1834 *bufferp = buffer;
1836 KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_RECORDS);
1837 KKASSERT((elm_no & HAMMER_FSBUF_BLKMASK) < HAMMER_RECORD_NODES);
1838 hammer_modify_buffer(buffer);
1839 item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1840 bzero(item, sizeof(union hammer_record_ondisk));
1841 hammer_modify_buffer_done(buffer);
1842 *errorp = 0;
1843 return(item);
1846 void
1847 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1849 int32_t elm_no;
1850 int32_t nblks;
1851 hammer_alist_t live;
1853 hammer_modify_cluster(buffer->cluster);
1854 if ((bytes & HAMMER_BUFMASK) == 0) {
1855 nblks = bytes / HAMMER_BUFSIZE;
1856 KKASSERT(nblks == 1 && data == (void *)buffer->ondisk);
1857 hammer_alist_free(&buffer->cluster->alist_master,
1858 buffer->buf_no, nblks);
1859 hammer_adjust_stats(buffer->cluster, HAMMER_FSBUF_DATA, -nblks);
1860 hammer_modify_cluster_done(buffer->cluster);
1861 return;
1864 elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1865 HAMMER_DATA_BLKSIZE;
1866 KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1867 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1868 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1869 nblks /= HAMMER_DATA_BLKSIZE;
1870 live = &buffer->cluster->alist_mdata;
1871 hammer_alist_free(live, elm_no, nblks);
1872 hammer_modify_cluster_done(buffer->cluster);
1875 void
1876 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1878 int32_t elm_no;
1879 hammer_alist_t live;
1881 hammer_modify_cluster(buffer->cluster);
1882 elm_no = rec - &buffer->ondisk->record.recs[0];
1883 KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1884 elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1885 live = &buffer->cluster->alist_record;
1886 hammer_alist_free(live, elm_no, 1);
1887 hammer_modify_cluster_done(buffer->cluster);
1890 void
1891 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1893 const int32_t blksize = sizeof(struct hammer_node_ondisk);
1894 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1895 hammer_alist_t live;
1896 int32_t elm_no;
1898 hammer_modify_cluster(cluster);
1899 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1900 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1901 live = &cluster->alist_btree;
1902 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1903 elm_no += fsbuf_offset / blksize;
1904 hammer_alist_free(live, elm_no, 1);
1905 hammer_modify_cluster_done(cluster);
1908 void
1909 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1911 const int32_t blksize = HAMMER_DATA_BLKSIZE;
1912 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1913 hammer_alist_t live;
1914 int32_t elm_no;
1915 int32_t buf_no;
1916 int32_t nblks;
1918 hammer_modify_cluster(cluster);
1919 if ((bytes & HAMMER_BUFMASK) == 0) {
1920 nblks = bytes / HAMMER_BUFSIZE;
1921 KKASSERT(nblks == 1 && (bclu_offset & HAMMER_BUFMASK) == 0);
1922 buf_no = bclu_offset / HAMMER_BUFSIZE;
1923 hammer_alist_free(&cluster->alist_master, buf_no, nblks);
1924 hammer_adjust_stats(cluster, HAMMER_FSBUF_DATA, -nblks);
1925 hammer_modify_cluster_done(cluster);
1926 return;
1929 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1930 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1931 live = &cluster->alist_mdata;
1932 nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1933 nblks /= HAMMER_DATA_BLKSIZE;
1934 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1935 elm_no += fsbuf_offset / blksize;
1936 hammer_alist_free(live, elm_no, nblks);
1937 hammer_modify_cluster_done(cluster);
1940 void
1941 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1943 const int32_t blksize = sizeof(union hammer_record_ondisk);
1944 int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1945 hammer_alist_t live;
1946 int32_t elm_no;
1948 hammer_modify_cluster(cluster);
1949 elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1950 fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1951 live = &cluster->alist_record;
1952 KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1953 elm_no += fsbuf_offset / blksize;
1954 hammer_alist_free(live, elm_no, 1);
1955 hammer_modify_cluster_done(cluster);
1960 * Allocate a new filesystem buffer and assign it to the specified
1961 * filesystem buffer type. The new buffer will be added to the
1962 * type-specific A-list and initialized.
1964 static void
1965 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1966 u_int64_t type, int32_t nelements,
1967 int start, int *errorp, struct hammer_buffer **bufferp)
1969 hammer_buffer_t buffer;
1970 int32_t buf_no;
1971 int isfwd;
1973 if (*bufferp)
1974 hammer_rel_buffer(*bufferp, 0);
1975 *bufferp = NULL;
1977 start = start / HAMMER_FSBUF_MAXBLKS; /* convert to buf_no */
1978 isfwd = (type != HAMMER_FSBUF_RECORDS);
1979 buf_no = hammer_alloc_master(cluster, 1, start, isfwd);
1980 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1981 *errorp = ENOSPC;
1982 return;
1986 * The new buffer must be initialized (type != 0) regardless of
1987 * whether we already have it cached or not, so don't try to
1988 * optimize the cached buffer check. Just call hammer_get_buffer().
1990 buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1991 *bufferp = buffer;
1994 * Finally, do a meta-free of the buffer's elements into the
1995 * type-specific A-list and update our statistics to reflect
1996 * the allocation.
1998 if (buffer) {
1999 kprintf("alloc_new_buffer buf_no %d type %016llx nelms %d\n",
2000 buf_no, type, nelements);
2001 hammer_modify_buffer(buffer); /*XXX*/
2002 hammer_alist_free(live, buf_no * HAMMER_FSBUF_MAXBLKS,
2003 nelements);
2004 hammer_modify_buffer_done(buffer); /*XXX*/
2005 hammer_adjust_stats(cluster, type, 1);
2010 * Sync dirty buffers to the media
2013 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data);
2014 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
2017 hammer_sync_hmp(hammer_mount_t hmp, int waitfor)
2019 struct hammer_sync_info info;
2021 info.error = 0;
2022 info.waitfor = waitfor;
2024 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT,
2025 hammer_sync_scan1, hammer_sync_scan2, &info);
2027 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL,
2028 hammer_sync_volume, &info);
2029 return(info.error);
2032 static int
2033 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data)
2035 struct hammer_inode *ip;
2037 ip = VTOI(vp);
2038 if (vp->v_type == VNON || ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2039 RB_EMPTY(&vp->v_rbdirty_tree))) {
2040 return(-1);
2042 return(0);
2045 static int
2046 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
2048 struct hammer_sync_info *info = data;
2049 struct hammer_inode *ip;
2050 int error;
2052 ip = VTOI(vp);
2053 if (vp->v_type == VNON || vp->v_type == VBAD ||
2054 ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
2055 RB_EMPTY(&vp->v_rbdirty_tree))) {
2056 return(0);
2058 if (vp->v_type != VCHR) {
2059 error = VOP_FSYNC(vp, info->waitfor);
2060 if (error)
2061 info->error = error;
2063 return(0);
2067 hammer_sync_volume(hammer_volume_t volume, void *data)
2069 struct hammer_sync_info *info = data;
2071 RB_SCAN(hammer_clu_rb_tree, &volume->rb_clus_root, NULL,
2072 hammer_sync_cluster, info);
2073 if (hammer_ref_volume(volume) == 0) {
2074 hammer_io_flush(&volume->io, info);
2075 hammer_rel_volume(volume, 0);
2077 return(0);
2081 hammer_sync_cluster(hammer_cluster_t cluster, void *data)
2083 struct hammer_sync_info *info = data;
2085 RB_SCAN(hammer_buf_rb_tree, &cluster->rb_bufs_root, NULL,
2086 hammer_sync_buffer, info);
2087 switch(cluster->state) {
2088 case HAMMER_CLUSTER_OPEN:
2089 case HAMMER_CLUSTER_IDLE:
2090 if (hammer_ref_cluster(cluster) == 0) {
2091 hammer_io_flush(&cluster->io, info);
2092 hammer_rel_cluster(cluster, 0);
2094 break;
2095 default:
2096 break;
2098 return(0);
2102 hammer_sync_buffer(hammer_buffer_t buffer, void *data)
2104 struct hammer_sync_info *info = data;
2106 if (hammer_ref_buffer(buffer) == 0) {
2107 hammer_lock_ex(&buffer->io.lock);
2108 hammer_flush_buffer_nodes(buffer);
2109 hammer_unlock(&buffer->io.lock);
2110 hammer_io_flush(&buffer->io, info);
2111 hammer_rel_buffer(buffer, 0);
2113 return(0);
2117 * Generic buffer initialization
2119 static void
2120 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
2122 head->buf_type = type;
2123 hammer_alist_init(live);
2127 * Calculate the cluster's offset in the volume. This calculation is
2128 * slightly more complex when using superclusters because superclusters
2129 * are grouped in blocks of 16, followed by 16 x N clusters where N
2130 * is the number of clusters a supercluster can manage.
2132 static int64_t
2133 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
2135 int32_t scl_group;
2136 int64_t scl_group_size;
2137 int64_t off;
2139 if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
2140 scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
2141 HAMMER_SCL_MAXCLUSTERS;
2142 scl_group_size =
2143 ((int64_t)HAMMER_BUFSIZE *
2144 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2145 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2146 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2147 scl_group_size +=
2148 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2150 off = volume->cluster_base +
2151 scl_group * scl_group_size +
2152 (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
2153 ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
2154 HAMMER_VOL_SUPERCLUSTER_GROUP))
2155 * volume->vol_clsize;
2156 } else {
2157 off = volume->cluster_base +
2158 (int64_t)clu_no * volume->vol_clsize;
2160 return(off);
2164 * Calculate a super-cluster's offset in the volume.
2166 static int64_t
2167 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
2169 int64_t off;
2170 int32_t scl_group;
2171 int64_t scl_group_size;
2173 KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
2174 scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
2175 if (scl_group) {
2176 scl_group_size =
2177 ((int64_t)HAMMER_BUFSIZE *
2178 HAMMER_VOL_SUPERCLUSTER_GROUP) +
2179 ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
2180 volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
2181 scl_group_size +=
2182 HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
2183 off = volume->cluster_base + (scl_group * scl_group_size) +
2184 (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
2185 } else {
2186 off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
2188 return(off);
2195 static int32_t
2196 hammer_alloc_master(hammer_cluster_t cluster, int nblks,
2197 int32_t start, int isfwd)
2199 int32_t buf_no;
2201 hammer_modify_cluster(cluster);
2202 if (isfwd) {
2203 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2204 nblks, start);
2205 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2206 buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
2207 nblks, 0);
2209 } else {
2210 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2211 nblks, start);
2212 if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
2213 buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
2214 nblks, HAMMER_ALIST_BLOCK_MAX);
2217 hammer_modify_cluster_done(cluster);
2220 * Recover space from empty record, b-tree, and data a-lists.
2223 return(buf_no);
2227 * Adjust allocation statistics
2229 static void
2230 hammer_adjust_stats(hammer_cluster_t cluster, u_int64_t buf_type, int nblks)
2232 hammer_modify_cluster(cluster);
2233 hammer_modify_volume(cluster->volume);
2234 hammer_modify_volume(cluster->volume->hmp->rootvol);
2236 switch(buf_type) {
2237 case HAMMER_FSBUF_BTREE:
2238 cluster->ondisk->stat_idx_bufs += nblks;
2239 cluster->volume->ondisk->vol_stat_idx_bufs += nblks;
2240 cluster->volume->hmp->rootvol->ondisk->vol0_stat_idx_bufs += nblks;
2241 break;
2242 case HAMMER_FSBUF_DATA:
2243 cluster->ondisk->stat_data_bufs += nblks;
2244 cluster->volume->ondisk->vol_stat_data_bufs += nblks;
2245 cluster->volume->hmp->rootvol->ondisk->vol0_stat_data_bufs += nblks;
2246 break;
2247 case HAMMER_FSBUF_RECORDS:
2248 cluster->ondisk->stat_rec_bufs += nblks;
2249 cluster->volume->ondisk->vol_stat_rec_bufs += nblks;
2250 cluster->volume->hmp->rootvol->ondisk->vol0_stat_rec_bufs += nblks;
2251 break;
2253 hammer_modify_cluster_done(cluster);
2254 hammer_modify_volume_done(cluster->volume);
2255 hammer_modify_volume_done(cluster->volume->hmp->rootvol);
2259 * A-LIST SUPPORT
2261 * Setup the parameters for the various A-lists we use in hammer. The
2262 * supercluster A-list must be chained to the cluster A-list and cluster
2263 * slave A-lists are chained to buffer A-lists.
2265 * See hammer_init_alist_config() below.
2269 * A-LIST - cluster recursion into a filesystem buffer
2271 static int
2272 buffer_alist_init(void *info, int32_t blk, int32_t radix)
2274 return(0);
2275 #if 0
2276 hammer_cluster_t cluster = info;
2277 hammer_buffer_t buffer;
2278 int32_t buf_no;
2279 int error = 0;
2282 * Calculate the buffer number, initialize based on the buffer type.
2283 * The buffer has already been allocated so assert that it has been
2284 * initialized.
2286 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2287 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2288 if (buffer) {
2289 hammer_adjust_stats(cluster, buffer->ondisk->head.buf_type, 1);
2290 hammer_rel_buffer(buffer, 0);
2292 return (error);
2293 #endif
2296 static int
2297 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
2299 return(0);
2300 #if 0
2301 hammer_cluster_t cluster = info;
2302 hammer_buffer_t buffer;
2303 int32_t buf_no;
2304 int error = 0;
2307 * Calculate the buffer number, initialize based on the buffer type.
2308 * The buffer has already been allocated so assert that it has been
2309 * initialized.
2311 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2312 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2313 if (buffer) {
2314 hammer_adjust_stats(cluster, buffer->ondisk->head.buf_type, -1);
2315 hammer_rel_buffer(buffer, 0);
2317 return (error);
2318 #endif
2322 * Note: atblk can be negative and atblk - blk can go negative.
2324 static int
2325 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2326 int32_t count, int32_t atblk, int32_t *fullp)
2328 hammer_cluster_t cluster = info;
2329 hammer_buffer_t buffer;
2330 int32_t buf_no;
2331 int32_t r;
2332 int error = 0;
2334 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2335 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2336 if (buffer) {
2337 KKASSERT(buffer->ondisk->head.buf_type != 0);
2339 hammer_modify_buffer(buffer);
2340 r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
2341 if (r != HAMMER_ALIST_BLOCK_NONE)
2342 r += blk;
2343 hammer_modify_buffer_done(buffer);
2344 *fullp = hammer_alist_isfull(&buffer->alist);
2345 hammer_rel_buffer(buffer, 0);
2346 } else {
2347 r = HAMMER_ALIST_BLOCK_NONE;
2349 return(r);
2352 static int
2353 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2354 int32_t count, int32_t atblk, int32_t *fullp)
2356 hammer_cluster_t cluster = info;
2357 hammer_buffer_t buffer;
2358 int32_t buf_no;
2359 int32_t r;
2360 int error = 0;
2362 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2363 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2364 if (buffer) {
2365 KKASSERT(buffer->ondisk->head.buf_type != 0);
2366 hammer_modify_buffer(buffer);
2367 r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
2368 if (r != HAMMER_ALIST_BLOCK_NONE)
2369 r += blk;
2370 hammer_modify_buffer_done(buffer);
2371 *fullp = hammer_alist_isfull(&buffer->alist);
2372 hammer_rel_buffer(buffer, 0);
2373 } else {
2374 r = HAMMER_ALIST_BLOCK_NONE;
2375 *fullp = 0;
2377 return(r);
2380 static void
2381 buffer_alist_free(void *info, int32_t blk, int32_t radix,
2382 int32_t base_blk, int32_t count, int32_t *emptyp)
2384 hammer_cluster_t cluster = info;
2385 hammer_buffer_t buffer;
2386 int32_t buf_no;
2387 int error = 0;
2389 buf_no = blk / HAMMER_FSBUF_MAXBLKS;
2390 buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
2391 if (buffer) {
2392 KKASSERT(buffer->ondisk->head.buf_type != 0);
2393 hammer_modify_buffer(buffer);
2394 hammer_alist_free(&buffer->alist, base_blk, count);
2395 hammer_modify_buffer_done(buffer);
2396 *emptyp = hammer_alist_isempty(&buffer->alist);
2397 /* XXX don't bother updating the buffer is completely empty? */
2398 hammer_rel_buffer(buffer, 0);
2399 } else {
2400 *emptyp = 0;
2404 static void
2405 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2410 * A-LIST - super-cluster recursion into a cluster and cluster recursion
2411 * into a filesystem buffer. A-List's are mostly self-contained entities,
2412 * but callbacks must be installed to recurse from one A-List to another.
2414 * Implementing these callbacks allows us to operate a multi-layered A-List
2415 * as a single entity.
2417 static int
2418 super_alist_init(void *info, int32_t blk, int32_t radix)
2420 hammer_volume_t volume = info;
2421 hammer_supercl_t supercl;
2422 int32_t scl_no;
2423 int error = 0;
2426 * Calculate the super-cluster number containing the cluster (blk)
2427 * and obtain the super-cluster buffer.
2429 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2430 supercl = hammer_get_supercl(volume, scl_no, &error, 1);
2431 if (supercl)
2432 hammer_rel_supercl(supercl, 0);
2433 return (error);
2436 static int
2437 super_alist_destroy(void *info, int32_t blk, int32_t radix)
2439 return(0);
2442 static int
2443 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
2444 int32_t count, int32_t atblk, int32_t *fullp)
2446 hammer_volume_t volume = info;
2447 hammer_supercl_t supercl;
2448 int32_t scl_no;
2449 int32_t r;
2450 int error = 0;
2452 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2453 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2454 if (supercl) {
2455 hammer_modify_supercl(supercl);
2456 r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
2457 if (r != HAMMER_ALIST_BLOCK_NONE)
2458 r += blk;
2459 hammer_modify_supercl_done(supercl);
2460 *fullp = hammer_alist_isfull(&supercl->alist);
2461 hammer_rel_supercl(supercl, 0);
2462 } else {
2463 r = HAMMER_ALIST_BLOCK_NONE;
2464 *fullp = 0;
2466 return(r);
2469 static int
2470 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
2471 int32_t count, int32_t atblk, int32_t *fullp)
2473 hammer_volume_t volume = info;
2474 hammer_supercl_t supercl;
2475 int32_t scl_no;
2476 int32_t r;
2477 int error = 0;
2479 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2480 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2481 if (supercl) {
2482 hammer_modify_supercl(supercl);
2483 r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
2484 if (r != HAMMER_ALIST_BLOCK_NONE)
2485 r += blk;
2486 hammer_modify_supercl_done(supercl);
2487 *fullp = hammer_alist_isfull(&supercl->alist);
2488 hammer_rel_supercl(supercl, 0);
2489 } else {
2490 r = HAMMER_ALIST_BLOCK_NONE;
2491 *fullp = 0;
2493 return(r);
2496 static void
2497 super_alist_free(void *info, int32_t blk, int32_t radix,
2498 int32_t base_blk, int32_t count, int32_t *emptyp)
2500 hammer_volume_t volume = info;
2501 hammer_supercl_t supercl;
2502 int32_t scl_no;
2503 int error = 0;
2505 scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
2506 supercl = hammer_get_supercl(volume, scl_no, &error, 0);
2507 if (supercl) {
2508 hammer_modify_supercl(supercl);
2509 hammer_alist_free(&supercl->alist, base_blk, count);
2510 hammer_modify_supercl_done(supercl);
2511 *emptyp = hammer_alist_isempty(&supercl->alist);
2512 hammer_rel_supercl(supercl, 0);
2513 } else {
2514 *emptyp = 0;
2518 static void
2519 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
2523 void
2524 hammer_init_alist_config(void)
2526 hammer_alist_config_t config;
2528 hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
2529 1, HAMMER_FSBUF_METAELMS);
2530 hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
2531 1, HAMMER_VOL_METAELMS_1LYR);
2532 hammer_alist_template(&Vol_super_alist_config,
2533 HAMMER_VOL_MAXSUPERCLUSTERS * HAMMER_SCL_MAXCLUSTERS,
2534 HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
2535 hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
2536 1, HAMMER_SUPERCL_METAELMS);
2537 hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
2538 1, HAMMER_CLU_MASTER_METAELMS);
2539 hammer_alist_template(&Clu_slave_alist_config,
2540 HAMMER_CLU_MAXBUFFERS * HAMMER_FSBUF_MAXBLKS,
2541 HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2543 config = &Vol_super_alist_config;
2544 config->bl_radix_init = super_alist_init;
2545 config->bl_radix_destroy = super_alist_destroy;
2546 config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2547 config->bl_radix_alloc_rev = super_alist_alloc_rev;
2548 config->bl_radix_free = super_alist_free;
2549 config->bl_radix_print = super_alist_print;
2551 config = &Clu_slave_alist_config;
2552 config->bl_radix_init = buffer_alist_init;
2553 config->bl_radix_destroy = buffer_alist_destroy;
2554 config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2555 config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2556 config->bl_radix_free = buffer_alist_free;
2557 config->bl_radix_print = buffer_alist_print;