2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org>
5 * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved.
7 * This code is derived from software contributed to The DragonFly Project
8 * by Matthew Dillon <dillon@dragonflybsd.org>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
20 * 3. Neither the name of The DragonFly Project nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific, prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/nlookup.h>
42 #include <sys/vnode.h>
43 #include <sys/mount.h>
44 #include <sys/fcntl.h>
45 #include <sys/vfsops.h>
46 #include <sys/sysctl.h>
47 #include <sys/socket.h>
48 #include <sys/objcache.h>
56 TAILQ_HEAD(hammer2_mntlist
, hammer2_dev
);
57 static struct hammer2_mntlist hammer2_mntlist
;
59 struct hammer2_pfslist hammer2_pfslist
;
60 struct hammer2_pfslist hammer2_spmplist
;
61 struct lock hammer2_mntlk
;
63 int hammer2_supported_version
= HAMMER2_VOL_VERSION_DEFAULT
;
65 int hammer2_aux_flags
;
66 int hammer2_xop_nthreads
;
67 int hammer2_xop_sgroups
;
68 int hammer2_xop_xgroups
;
69 int hammer2_xop_xbase
;
71 long hammer2_debug_inode
;
72 int hammer2_cluster_meta_read
= 1; /* physical read-ahead */
73 int hammer2_cluster_data_read
= 4; /* physical read-ahead */
74 int hammer2_cluster_write
= 0; /* physical write clustering */
75 int hammer2_dedup_enable
= 1;
76 int hammer2_always_compress
= 0; /* always try to compress */
77 int hammer2_flush_pipe
= 100;
78 int hammer2_dio_count
;
79 int hammer2_dio_limit
= 256;
80 int hammer2_bulkfree_tps
= 5000;
81 int hammer2_spread_workers
;
82 int hammer2_limit_saved_depth
;
83 long hammer2_chain_allocs
;
84 long hammer2_limit_saved_chains
;
85 long hammer2_limit_dirty_chains
;
86 long hammer2_limit_dirty_inodes
;
87 long hammer2_count_modified_chains
;
88 long hammer2_iod_file_read
;
89 long hammer2_iod_meta_read
;
90 long hammer2_iod_indr_read
;
91 long hammer2_iod_fmap_read
;
92 long hammer2_iod_volu_read
;
93 long hammer2_iod_file_write
;
94 long hammer2_iod_file_wembed
;
95 long hammer2_iod_file_wzero
;
96 long hammer2_iod_file_wdedup
;
97 long hammer2_iod_meta_write
;
98 long hammer2_iod_indr_write
;
99 long hammer2_iod_fmap_write
;
100 long hammer2_iod_volu_write
;
101 static long hammer2_iod_inode_creates
;
102 static long hammer2_iod_inode_deletes
;
104 long hammer2_process_icrc32
;
105 long hammer2_process_xxhash64
;
111 MALLOC_DECLARE(M_HAMMER2_CBUFFER
);
112 MALLOC_DEFINE(M_HAMMER2_CBUFFER
, "HAMMER2-compbuffer",
113 "Buffer used for compression.");
115 MALLOC_DECLARE(M_HAMMER2_DEBUFFER
);
116 MALLOC_DEFINE(M_HAMMER2_DEBUFFER
, "HAMMER2-decompbuffer",
117 "Buffer used for decompression.");
119 SYSCTL_NODE(_vfs
, OID_AUTO
, hammer2
, CTLFLAG_RW
, 0, "HAMMER2 filesystem");
121 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, supported_version
, CTLFLAG_RD
,
122 &hammer2_supported_version
, 0, "");
123 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, aux_flags
, CTLFLAG_RW
,
124 &hammer2_aux_flags
, 0, "");
125 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, debug
, CTLFLAG_RW
,
126 &hammer2_debug
, 0, "");
127 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, debug_inode
, CTLFLAG_RW
,
128 &hammer2_debug_inode
, 0, "");
129 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, spread_workers
, CTLFLAG_RW
,
130 &hammer2_spread_workers
, 0, "");
131 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, cluster_meta_read
, CTLFLAG_RW
,
132 &hammer2_cluster_meta_read
, 0, "");
133 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, cluster_data_read
, CTLFLAG_RW
,
134 &hammer2_cluster_data_read
, 0, "");
135 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, cluster_write
, CTLFLAG_RW
,
136 &hammer2_cluster_write
, 0, "");
137 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, dedup_enable
, CTLFLAG_RW
,
138 &hammer2_dedup_enable
, 0, "");
139 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, always_compress
, CTLFLAG_RW
,
140 &hammer2_always_compress
, 0, "");
141 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, flush_pipe
, CTLFLAG_RW
,
142 &hammer2_flush_pipe
, 0, "");
143 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, bulkfree_tps
, CTLFLAG_RW
,
144 &hammer2_bulkfree_tps
, 0, "");
145 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, chain_allocs
, CTLFLAG_RD
,
146 &hammer2_chain_allocs
, 0, "");
147 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, limit_saved_chains
, CTLFLAG_RW
,
148 &hammer2_limit_saved_chains
, 0, "");
149 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, limit_saved_depth
, CTLFLAG_RW
,
150 &hammer2_limit_saved_depth
, 0, "");
151 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, limit_dirty_chains
, CTLFLAG_RW
,
152 &hammer2_limit_dirty_chains
, 0, "");
153 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, limit_dirty_inodes
, CTLFLAG_RW
,
154 &hammer2_limit_dirty_inodes
, 0, "");
155 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, count_modified_chains
, CTLFLAG_RD
,
156 &hammer2_count_modified_chains
, 0, "");
157 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, dio_count
, CTLFLAG_RD
,
158 &hammer2_dio_count
, 0, "");
159 SYSCTL_INT(_vfs_hammer2
, OID_AUTO
, dio_limit
, CTLFLAG_RW
,
160 &hammer2_dio_limit
, 0, "");
162 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_file_read
, CTLFLAG_RD
,
163 &hammer2_iod_file_read
, 0, "");
164 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_meta_read
, CTLFLAG_RD
,
165 &hammer2_iod_meta_read
, 0, "");
166 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_indr_read
, CTLFLAG_RD
,
167 &hammer2_iod_indr_read
, 0, "");
168 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_fmap_read
, CTLFLAG_RD
,
169 &hammer2_iod_fmap_read
, 0, "");
170 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_volu_read
, CTLFLAG_RD
,
171 &hammer2_iod_volu_read
, 0, "");
173 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_file_write
, CTLFLAG_RD
,
174 &hammer2_iod_file_write
, 0, "");
175 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_file_wembed
, CTLFLAG_RD
,
176 &hammer2_iod_file_wembed
, 0, "");
177 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_file_wzero
, CTLFLAG_RD
,
178 &hammer2_iod_file_wzero
, 0, "");
179 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_file_wdedup
, CTLFLAG_RD
,
180 &hammer2_iod_file_wdedup
, 0, "");
181 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_meta_write
, CTLFLAG_RD
,
182 &hammer2_iod_meta_write
, 0, "");
183 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_indr_write
, CTLFLAG_RD
,
184 &hammer2_iod_indr_write
, 0, "");
185 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_fmap_write
, CTLFLAG_RD
,
186 &hammer2_iod_fmap_write
, 0, "");
187 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_volu_write
, CTLFLAG_RD
,
188 &hammer2_iod_volu_write
, 0, "");
189 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_inode_creates
, CTLFLAG_RD
,
190 &hammer2_iod_inode_creates
, 0, "");
191 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, iod_inode_deletes
, CTLFLAG_RD
,
192 &hammer2_iod_inode_deletes
, 0, "");
194 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, process_icrc32
, CTLFLAG_RD
,
195 &hammer2_process_icrc32
, 0, "");
196 SYSCTL_LONG(_vfs_hammer2
, OID_AUTO
, process_xxhash64
, CTLFLAG_RD
,
197 &hammer2_process_xxhash64
, 0, "");
200 static int hammer2_vfs_init(struct vfsconf *conf);
201 static int hammer2_vfs_uninit(struct vfsconf *vfsp);
202 static int hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data,
204 static int hammer2_remount(hammer2_dev_t *, struct mount *, char *,
207 static int hammer2_recovery(hammer2_dev_t
*hmp
);
209 static int hammer2_vfs_unmount(struct mount *mp, int mntflags);
210 static int hammer2_vfs_root(struct mount *mp, struct m_vnode **vpp);
212 static int hammer2_vfs_statfs(struct mount
*mp
, struct statfs
*sbp
,
214 static int hammer2_vfs_statvfs(struct mount
*mp
, struct statvfs
*sbp
,
217 static int hammer2_vfs_fhtovp(struct mount *mp, struct m_vnode *rootvp,
218 struct fid *fhp, struct m_vnode **vpp);
219 static int hammer2_vfs_vptofh(struct m_vnode *vp, struct fid *fhp);
220 static int hammer2_vfs_checkexp(struct mount *mp, struct sockaddr *nam,
221 int *exflagsp, struct ucred **credanonp);
222 static int hammer2_vfs_modifying(struct mount *mp);
225 static void hammer2_update_pmps(hammer2_dev_t
*hmp
);
227 static void hammer2_mount_helper(struct mount
*mp
, hammer2_pfs_t
*pmp
);
228 static void hammer2_unmount_helper(struct mount
*mp
, hammer2_pfs_t
*pmp
,
230 static int hammer2_fixup_pfses(hammer2_dev_t
*hmp
);
233 * HAMMER2 vfs operations.
236 static struct vfsops hammer2_vfsops = {
238 .vfs_init = hammer2_vfs_init,
239 .vfs_uninit = hammer2_vfs_uninit,
240 .vfs_sync = hammer2_vfs_sync,
241 .vfs_mount = hammer2_vfs_mount,
242 .vfs_unmount = hammer2_vfs_unmount,
243 .vfs_root = hammer2_vfs_root,
244 .vfs_statfs = hammer2_vfs_statfs,
245 .vfs_statvfs = hammer2_vfs_statvfs,
246 .vfs_vget = hammer2_vfs_vget,
247 .vfs_vptofh = hammer2_vfs_vptofh,
248 .vfs_fhtovp = hammer2_vfs_fhtovp,
249 .vfs_checkexp = hammer2_vfs_checkexp,
250 .vfs_modifying = hammer2_vfs_modifying
254 MALLOC_DEFINE(M_HAMMER2
, "HAMMER2-mount", "");
256 VFS_SET(hammer2_vfsops
, hammer2
, VFCF_MPSAFE
);
257 MODULE_VERSION(hammer2
, 1);
260 hammer2_vfs_init(void)
263 static struct objcache_malloc_args margs_read;
264 static struct objcache_malloc_args margs_write;
265 static struct objcache_malloc_args margs_vop;
272 kmalloc_raise_limit(M_HAMMER2
, 0); /* unlimited */
275 * hammer2_xop_nthreads must be a multiple of ncpus,
280 hammer2_xop_mod
= mod
;
281 hammer2_xop_nthreads
= mod
* 2;
283 while (hammer2_xop_nthreads / mod < HAMMER2_XOPGROUPS_MIN ||
284 hammer2_xop_nthreads < HAMMER2_XOPTHREADS_MIN)
286 hammer2_xop_nthreads += mod;
288 hammer2_xop_sgroups = hammer2_xop_nthreads / mod / 2;
289 hammer2_xop_xgroups = hammer2_xop_nthreads / mod - hammer2_xop_sgroups;
290 hammer2_xop_xbase = hammer2_xop_sgroups * mod;
294 * A large DIO cache is needed to retain dedup enablement masks.
295 * The bulkfree code clears related masks as part of the disk block
296 * recycling algorithm, preventing it from being used for a later
299 * NOTE: A large buffer cache can actually interfere with dedup
300 * operation because we dedup based on media physical buffers
301 * and not logical buffers. Try to make the DIO case large
302 * enough to avoid this problem, but also cap it.
304 const long nbuf
= 100000; /* XXX */
305 hammer2_dio_limit
= nbuf
* 2;
306 if (hammer2_dio_limit
> 100000)
307 hammer2_dio_limit
= 100000;
309 if (HAMMER2_BLOCKREF_BYTES
!= sizeof(struct hammer2_blockref
))
311 if (HAMMER2_INODE_BYTES
!= sizeof(struct hammer2_inode_data
))
313 if (HAMMER2_VOLUME_BYTES
!= sizeof(struct hammer2_volume_data
))
317 kprintf("HAMMER2 structure size mismatch; cannot continue.\n");
322 margs_read
.objsize
= 65536;
323 margs_read
.mtype
= M_HAMMER2_DEBUFFER
;
325 margs_write
.objsize
= 32768;
326 margs_write
.mtype
= M_HAMMER2_CBUFFER
;
328 margs_vop
.objsize
= sizeof(hammer2_xop_t
);
329 margs_vop
.mtype
= M_HAMMER2
;
332 * Note thaht for the XOPS cache we want backing store allocations
333 * to use M_ZERO. This is not allowed in objcache_get() (to avoid
334 * confusion), so use the backing store function that does it. This
335 * means that initial XOPS objects are zerod but REUSED objects are
336 * not. So we are responsible for cleaning the object up sufficiently
337 * for our needs before objcache_put()ing it back (typically just the
340 cache_buffer_read
= objcache_create(margs_read
.mtype
->ks_shortdesc
,
341 0, 1, NULL
, NULL
, NULL
,
342 objcache_malloc_alloc
,
343 objcache_malloc_free
,
345 cache_buffer_write
= objcache_create(margs_write
.mtype
->ks_shortdesc
,
346 0, 1, NULL
, NULL
, NULL
,
347 objcache_malloc_alloc
,
348 objcache_malloc_free
,
350 cache_xops
= objcache_create(margs_vop
.mtype
->ks_shortdesc
,
351 0, 1, NULL
, NULL
, NULL
,
352 objcache_malloc_alloc_zero
,
353 objcache_malloc_free
,
358 lockinit(&hammer2_mntlk
, "mntlk", 0, 0);
359 TAILQ_INIT(&hammer2_mntlist
);
360 TAILQ_INIT(&hammer2_pfslist
);
361 TAILQ_INIT(&hammer2_spmplist
);
363 const int maxvnodes
= 100000; /* XXX */
364 hammer2_limit_dirty_chains
= maxvnodes
/ 10;
365 if (hammer2_limit_dirty_chains
> HAMMER2_LIMIT_DIRTY_CHAINS
)
366 hammer2_limit_dirty_chains
= HAMMER2_LIMIT_DIRTY_CHAINS
;
367 if (hammer2_limit_dirty_chains
< 1000)
368 hammer2_limit_dirty_chains
= 1000;
370 hammer2_limit_dirty_inodes
= maxvnodes
/ 25;
371 if (hammer2_limit_dirty_inodes
< 100)
372 hammer2_limit_dirty_inodes
= 100;
373 if (hammer2_limit_dirty_inodes
> HAMMER2_LIMIT_DIRTY_INODES
)
374 hammer2_limit_dirty_inodes
= HAMMER2_LIMIT_DIRTY_INODES
;
376 hammer2_limit_saved_chains
= hammer2_limit_dirty_chains
* 5;
382 hammer2_vfs_uninit(void)
385 objcache_destroy(cache_buffer_read);
386 objcache_destroy(cache_buffer_write);
387 objcache_destroy(cache_xops);
393 * Core PFS allocator. Used to allocate or reference the pmp structure
394 * for PFS cluster mounts and the spmp structure for media (hmp) structures.
395 * The pmp can be passed in or loaded by this function using the chain and
398 * pmp->modify_tid tracks new modify_tid transaction ids for front-end
399 * transactions. Note that synchronization does not use this field.
400 * (typically frontend operations and synchronization cannot run on the
401 * same PFS node at the same time).
406 hammer2_pfsalloc(hammer2_chain_t
*chain
,
407 const hammer2_inode_data_t
*ripdata
,
408 hammer2_dev_t
*force_local
)
411 hammer2_inode_t
*iroot
;
419 * Locate or create the PFS based on the cluster id. If ripdata
420 * is NULL this is a spmp which is unique and is always allocated.
422 * If the device is mounted in local mode all PFSs are considered
423 * independent and not part of any cluster (for debugging only).
426 TAILQ_FOREACH(pmp
, &hammer2_pfslist
, mntentry
) {
427 if (force_local
!= pmp
->force_local
)
429 if (force_local
== NULL
&&
430 bcmp(&pmp
->pfs_clid
, &ripdata
->meta
.pfs_clid
,
431 sizeof(pmp
->pfs_clid
)) == 0) {
433 } else if (force_local
&& pmp
->pfs_names
[0] &&
434 strcmp(pmp
->pfs_names
[0], (const char *)ripdata
->filename
) == 0) {
441 pmp
= kmalloc(sizeof(*pmp
), M_HAMMER2
, M_WAITOK
| M_ZERO
);
442 pmp
->force_local
= force_local
;
443 hammer2_trans_manage_init(pmp
);
444 kmalloc_create_obj(&pmp
->minode
, "HAMMER2-inodes",
445 sizeof(struct hammer2_inode
));
446 lockinit(&pmp
->lock
, "pfslk", 0, 0);
447 hammer2_spin_init(&pmp
->inum_spin
, "hm2pfsalloc_inum");
448 hammer2_spin_init(&pmp
->xop_spin
, "h2xop");
449 hammer2_spin_init(&pmp
->lru_spin
, "h2lru");
450 RB_INIT(&pmp
->inum_tree
);
451 TAILQ_INIT(&pmp
->syncq
);
452 TAILQ_INIT(&pmp
->depq
);
453 TAILQ_INIT(&pmp
->lru_list
);
454 hammer2_spin_init(&pmp
->list_spin
, "h2pfsalloc_list");
457 * Save the last media transaction id for the flusher. Set
461 pmp
->pfs_clid
= ripdata
->meta
.pfs_clid
;
462 TAILQ_INSERT_TAIL(&hammer2_pfslist
, pmp
, mntentry
);
464 pmp
->flags
|= HAMMER2_PMPF_SPMP
;
465 TAILQ_INSERT_TAIL(&hammer2_spmplist
, pmp
, mntentry
);
469 * The synchronization thread may start too early, make
470 * sure it stays frozen until we are ready to let it go.
474 pmp->primary_thr.flags = HAMMER2_THREAD_FROZEN |
475 HAMMER2_THREAD_REMASTER;
480 * Create the PFS's root inode and any missing XOP helper threads.
482 if ((iroot
= pmp
->iroot
) == NULL
) {
483 iroot
= hammer2_inode_get(pmp
, NULL
, 1, -1);
485 iroot
->meta
= ripdata
->meta
;
487 hammer2_inode_ref(iroot
);
488 hammer2_inode_unlock(iroot
);
492 * Stop here if no chain is passed in.
498 * When a chain is passed in we must add it to the PFS's root
499 * inode, update pmp->pfs_types[], and update the syncronization
502 * When forcing local mode, mark the PFS as a MASTER regardless.
504 * At the moment empty spots can develop due to removals or failures.
505 * Ultimately we want to re-fill these spots but doing so might
506 * confused running code. XXX
508 hammer2_inode_ref(iroot
);
509 hammer2_mtx_ex(&iroot
->lock
);
510 j
= iroot
->cluster
.nchains
;
512 if (j
== HAMMER2_MAXCLUSTER
) {
513 kprintf("hammer2_pfsalloc: cluster full!\n");
514 /* XXX fatal error? */
516 KKASSERT(chain
->pmp
== NULL
);
518 hammer2_chain_ref(chain
);
519 iroot
->cluster
.array
[j
].chain
= chain
;
521 pmp
->pfs_types
[j
] = HAMMER2_PFSTYPE_MASTER
;
523 pmp
->pfs_types
[j
] = ripdata
->meta
.pfs_type
;
524 pmp
->pfs_names
[j
] = kstrdup((const char *)ripdata
->filename
, M_HAMMER2
);
525 pmp
->pfs_hmps
[j
] = chain
->hmp
;
526 hammer2_spin_ex(&pmp
->inum_spin
);
527 pmp
->pfs_iroot_blocksets
[j
] = chain
->data
->ipdata
.u
.blockset
;
528 hammer2_spin_unex(&pmp
->inum_spin
);
531 * If the PFS is already mounted we must account
532 * for the mount_count here.
535 ++chain
->hmp
->mount_count
;
538 * May have to fixup dirty chain tracking. Previous
539 * pmp was NULL so nothing to undo.
541 if (chain
->flags
& HAMMER2_CHAIN_MODIFIED
)
542 hammer2_pfs_memory_inc(pmp
);
545 iroot
->cluster
.nchains
= j
;
548 * Update nmasters from any PFS inode which is part of the cluster.
549 * It is possible that this will result in a value which is too
550 * high. MASTER PFSs are authoritative for pfs_nmasters and will
551 * override this value later on.
553 * (This informs us of masters that might not currently be
554 * discoverable by this mount).
556 if (ripdata
&& pmp
->pfs_nmasters
< ripdata
->meta
.pfs_nmasters
) {
557 pmp
->pfs_nmasters
= ripdata
->meta
.pfs_nmasters
;
561 * Count visible masters. Masters are usually added with
562 * ripdata->meta.pfs_nmasters set to 1. This detects when there
563 * are more (XXX and must update the master inodes).
566 for (i
= 0; i
< iroot
->cluster
.nchains
; ++i
) {
567 if (pmp
->pfs_types
[i
] == HAMMER2_PFSTYPE_MASTER
)
570 if (pmp
->pfs_nmasters
< count
)
571 pmp
->pfs_nmasters
= count
;
574 * Create missing synchronization and support threads.
576 * Single-node masters (including snapshots) have nothing to
577 * synchronize and do not require this thread.
579 * Multi-node masters or any number of soft masters, slaves, copy,
580 * or other PFS types need the thread.
582 * Each thread is responsible for its particular cluster index.
583 * We use independent threads so stalls or mismatches related to
584 * any given target do not affect other targets.
586 for (i
= 0; i
< iroot
->cluster
.nchains
; ++i
) {
588 * Single-node masters (including snapshots) have nothing
589 * to synchronize and will make direct xops support calls,
590 * thus they do not require this thread.
592 * Note that there can be thousands of snapshots. We do not
593 * want to create thousands of threads.
595 if (pmp
->pfs_nmasters
<= 1 &&
596 pmp
->pfs_types
[i
] == HAMMER2_PFSTYPE_MASTER
) {
601 * Sync support thread
604 if (pmp->sync_thrs[i].td == NULL) {
605 hammer2_thr_create(&pmp->sync_thrs[i], pmp, NULL,
607 hammer2_primary_sync_thread);
613 * Create missing Xop threads
615 * NOTE: We create helper threads for all mounted PFSs or any
616 * PFSs with 2+ nodes (so the sync thread can update them,
617 * even if not mounted).
619 if (pmp
->mp
|| iroot
->cluster
.nchains
>= 2)
620 hammer2_xop_helper_create(pmp
);
622 hammer2_mtx_unlock(&iroot
->lock
);
623 hammer2_inode_drop(iroot
);
629 * Deallocate an element of a probed PFS. If destroying and this is a
630 * MASTER, adjust nmasters.
632 * This function does not physically destroy the PFS element in its device
633 * under the super-root (see hammer2_ioctl_pfs_delete()).
636 hammer2_pfsdealloc(hammer2_pfs_t
*pmp
, int clindex
, int destroying
)
638 hammer2_inode_t
*iroot
;
639 hammer2_chain_t
*chain
;
643 * Cleanup our reference on iroot. iroot is (should) not be needed
651 * XXX flush after acquiring the iroot lock.
652 * XXX clean out the cluster index from all inode structures.
654 hammer2_thr_delete(&pmp
->sync_thrs
[clindex
]);
657 * Remove the cluster index from the group. If destroying
658 * the PFS and this is a master, adjust pfs_nmasters.
660 hammer2_mtx_ex(&iroot
->lock
);
661 chain
= iroot
->cluster
.array
[clindex
].chain
;
662 iroot
->cluster
.array
[clindex
].chain
= NULL
;
664 switch(pmp
->pfs_types
[clindex
]) {
665 case HAMMER2_PFSTYPE_MASTER
:
666 if (destroying
&& pmp
->pfs_nmasters
> 0)
668 /* XXX adjust ripdata->meta.pfs_nmasters */
673 pmp
->pfs_types
[clindex
] = HAMMER2_PFSTYPE_NONE
;
675 hammer2_mtx_unlock(&iroot
->lock
);
681 atomic_set_int(&chain
->flags
, HAMMER2_CHAIN_RELEASE
);
682 hammer2_chain_drop(chain
);
686 * Terminate all XOP threads for the cluster index.
688 if (pmp
->xop_groups
) {
689 for (j
= 0; j
< hammer2_xop_nthreads
; ++j
) {
691 &pmp
->xop_groups
[j
].thrs
[clindex
]);
698 * Destroy a PFS, typically only occurs after the last mount on a device
702 hammer2_pfsfree(hammer2_pfs_t
*pmp
)
704 hammer2_inode_t
*iroot
;
705 hammer2_chain_t
*chain
;
706 int chains_still_present
= 0;
711 * Cleanup our reference on iroot. iroot is (should) not be needed
714 if (pmp
->flags
& HAMMER2_PMPF_SPMP
)
715 TAILQ_REMOVE(&hammer2_spmplist
, pmp
, mntentry
);
717 TAILQ_REMOVE(&hammer2_pfslist
, pmp
, mntentry
);
720 * Cleanup chains remaining on LRU list.
722 hammer2_spin_ex(&pmp
->lru_spin
);
723 while ((chain
= TAILQ_FIRST(&pmp
->lru_list
)) != NULL
) {
724 KKASSERT(chain
->flags
& HAMMER2_CHAIN_ONLRU
);
725 atomic_add_int(&pmp
->lru_count
, -1);
726 atomic_clear_int(&chain
->flags
, HAMMER2_CHAIN_ONLRU
);
727 TAILQ_REMOVE(&pmp
->lru_list
, chain
, lru_node
);
728 hammer2_chain_ref(chain
);
729 hammer2_spin_unex(&pmp
->lru_spin
);
730 atomic_set_int(&chain
->flags
, HAMMER2_CHAIN_RELEASE
);
731 hammer2_chain_drop(chain
);
732 hammer2_spin_ex(&pmp
->lru_spin
);
734 hammer2_spin_unex(&pmp
->lru_spin
);
741 for (i
= 0; i
< iroot
->cluster
.nchains
; ++i
) {
743 hammer2_thr_delete(&pmp->sync_thrs[i]);
744 if (pmp->xop_groups) {
745 for (j = 0; j < hammer2_xop_nthreads; ++j)
747 &pmp->xop_groups[j].thrs[i]);
750 chain
= iroot
->cluster
.array
[i
].chain
;
751 if (chain
&& !RB_EMPTY(&chain
->core
.rbtree
)) {
752 kprintf("hammer2: Warning pmp %p still "
753 "has active chains\n", pmp
);
754 chains_still_present
= 1;
757 KASSERT(iroot
->refs
== 1,
758 ("PMP->IROOT %p REFS WRONG %d", iroot
, iroot
->refs
));
761 hammer2_inode_drop(iroot
);
766 * Free remaining pmp resources
768 if (chains_still_present
) {
769 kprintf("hammer2: cannot free pmp %p, still in use\n", pmp
);
772 * In makefs HAMMER2, all inodes must be gone at this point.
773 * XXX vnode_count may not be 0 at this point.
775 assert(hammer2_pfs_inode_count(pmp
) == 0);
777 kmalloc_destroy_obj(&pmp
->minode
);
778 kfree(pmp
, M_HAMMER2
);
783 * Remove all references to hmp from the pfs list. Any PFS which becomes
784 * empty is terminated and freed.
789 hammer2_pfsfree_scan(hammer2_dev_t
*hmp
, int which
)
792 hammer2_inode_t
*iroot
;
793 hammer2_chain_t
*rchain
;
796 struct hammer2_pfslist
*wlist
;
799 wlist
= &hammer2_pfslist
;
801 wlist
= &hammer2_spmplist
;
803 TAILQ_FOREACH(pmp
, wlist
, mntentry
) {
804 if ((iroot
= pmp
->iroot
) == NULL
)
808 * Determine if this PFS is affected. If it is we must
809 * freeze all management threads and lock its iroot.
811 * Freezing a management thread forces it idle, operations
812 * in-progress will be aborted and it will have to start
813 * over again when unfrozen, or exit if told to exit.
815 for (i
= 0; i
< HAMMER2_MAXCLUSTER
; ++i
) {
816 if (pmp
->pfs_hmps
[i
] == hmp
)
819 if (i
== HAMMER2_MAXCLUSTER
)
822 hammer2_vfs_sync_pmp(pmp
, MNT_WAIT
);
825 * Make sure all synchronization threads are locked
829 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
830 if (pmp->pfs_hmps[i] == NULL)
832 hammer2_thr_freeze_async(&pmp->sync_thrs[i]);
833 if (pmp->xop_groups) {
834 for (j = 0; j < hammer2_xop_nthreads; ++j) {
835 hammer2_thr_freeze_async(
836 &pmp->xop_groups[j].thrs[i]);
840 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) {
841 if (pmp->pfs_hmps[i] == NULL)
843 hammer2_thr_freeze(&pmp->sync_thrs[i]);
844 if (pmp->xop_groups) {
845 for (j = 0; j < hammer2_xop_nthreads; ++j) {
847 &pmp->xop_groups[j].thrs[i]);
854 * Lock the inode and clean out matching chains.
855 * Note that we cannot use hammer2_inode_lock_*()
856 * here because that would attempt to validate the
857 * cluster that we are in the middle of ripping
860 * WARNING! We are working directly on the inodes
863 hammer2_mtx_ex(&iroot
->lock
);
866 * Remove the chain from matching elements of the PFS.
868 for (i
= 0; i
< HAMMER2_MAXCLUSTER
; ++i
) {
869 if (pmp
->pfs_hmps
[i
] != hmp
)
872 hammer2_thr_delete(&pmp->sync_thrs[i]);
873 if (pmp->xop_groups) {
874 for (j = 0; j < hammer2_xop_nthreads; ++j) {
876 &pmp->xop_groups[j].thrs[i]);
880 rchain
= iroot
->cluster
.array
[i
].chain
;
881 iroot
->cluster
.array
[i
].chain
= NULL
;
882 pmp
->pfs_types
[i
] = HAMMER2_PFSTYPE_NONE
;
883 if (pmp
->pfs_names
[i
]) {
884 kfree(pmp
->pfs_names
[i
], M_HAMMER2
);
885 pmp
->pfs_names
[i
] = NULL
;
888 hammer2_chain_drop(rchain
);
890 if (iroot
->cluster
.focus
== rchain
)
891 iroot
->cluster
.focus
= NULL
;
893 pmp
->pfs_hmps
[i
] = NULL
;
895 hammer2_mtx_unlock(&iroot
->lock
);
898 * Cleanup trailing chains. Gaps may remain.
900 for (i
= HAMMER2_MAXCLUSTER
- 1; i
>= 0; --i
) {
901 if (pmp
->pfs_hmps
[i
])
904 iroot
->cluster
.nchains
= i
+ 1;
907 * If the PMP has no elements remaining we can destroy it.
908 * (this will transition management threads from frozen->exit).
910 if (iroot
->cluster
.nchains
== 0) {
912 * If this was the hmp's spmp, we need to clean
913 * a little more stuff out.
915 if (hmp
->spmp
== pmp
) {
917 hmp
->vchain
.pmp
= NULL
;
918 hmp
->fchain
.pmp
= NULL
;
922 * Free the pmp and restart the loop
924 KKASSERT(TAILQ_EMPTY(&pmp
->syncq
));
925 KKASSERT(TAILQ_EMPTY(&pmp
->depq
));
926 hammer2_pfsfree(pmp
);
931 * If elements still remain we need to set the REMASTER
932 * flag and unfreeze it.
934 for (i
= 0; i
< HAMMER2_MAXCLUSTER
; ++i
) {
935 if (pmp
->pfs_hmps
[i
] == NULL
)
938 hammer2_thr_remaster(&pmp->sync_thrs[i]);
939 hammer2_thr_unfreeze(&pmp->sync_thrs[i]);
940 if (pmp->xop_groups) {
941 for (j = 0; j < hammer2_xop_nthreads; ++j) {
942 hammer2_thr_remaster(
943 &pmp->xop_groups[j].thrs[i]);
944 hammer2_thr_unfreeze(
945 &pmp->xop_groups[j].thrs[i]);
954 * Mount or remount HAMMER2 fileystem from physical media
957 * mp mount point structure
963 * mp mount point structure
964 * path path to mount point
965 * data pointer to argument structure in user space
966 * volume volume path (device@LABEL form)
967 * hflags user mount flags
968 * cred user credentials
974 hammer2_vfs_mount(struct m_vnode
*makefs_devvp
, struct mount
*mp
,
975 const char *label
, const struct hammer2_mount_info
*mi
)
977 struct hammer2_mount_info info
= *mi
;
980 hammer2_dev_t
*hmp
, *hmp_tmp
;
981 hammer2_dev_t
*force_local
;
982 hammer2_key_t key_next
;
983 hammer2_key_t key_dummy
;
985 hammer2_chain_t
*parent
;
986 hammer2_chain_t
*chain
;
987 const hammer2_inode_data_t
*ripdata
;
988 hammer2_devvp_list_t devvpl
;
989 hammer2_devvp_t
*e
, *e_tmp
;
991 int ronly
= ((mp
->mnt_flag
& MNT_RDONLY
) != 0);
999 kprintf("hammer2_mount: device=\"%s\" label=\"%s\" rdonly=%d\n",
1000 devstr
, label
, ronly
);
1003 * Initialize all device vnodes.
1005 TAILQ_INIT(&devvpl
);
1006 error
= hammer2_init_devvp(makefs_devvp
, &devvpl
);
1008 kprintf("hammer2: failed to initialize devvp in %s\n", devstr
);
1009 hammer2_cleanup_devvp(&devvpl
);
1014 * Determine if the device has already been mounted. After this
1015 * check hmp will be non-NULL if we are doing the second or more
1016 * hammer2 mounts from the same device.
1018 lockmgr(&hammer2_mntlk
, LK_EXCLUSIVE
);
1019 if (!TAILQ_EMPTY(&devvpl
)) {
1021 * Match the device. Due to the way devfs works,
1022 * we may not be able to directly match the vnode pointer,
1023 * so also check to see if the underlying device matches.
1025 TAILQ_FOREACH(hmp_tmp
, &hammer2_mntlist
, mntentry
) {
1026 TAILQ_FOREACH(e_tmp
, &hmp_tmp
->devvpl
, entry
) {
1027 int devvp_found
= 0;
1028 TAILQ_FOREACH(e
, &devvpl
, entry
) {
1030 if (e_tmp
->devvp
== e
->devvp
)
1033 if (e_tmp->devvp->v_rdev &&
1034 e_tmp->devvp->v_rdev == e->devvp->v_rdev)
1042 kprintf("hammer2_mount: hmp=%p matched\n", hmp
);
1049 * If no match this may be a fresh H2 mount, make sure
1050 * the device is not mounted on anything else.
1053 TAILQ_FOREACH(e
, &devvpl
, entry
) {
1054 struct m_vnode
*devvp
= e
->devvp
;
1056 error
= vfs_mountedon(devvp
);
1058 kprintf("hammer2_mount: %s mounted %d\n",
1060 hammer2_cleanup_devvp(&devvpl
);
1061 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1068 * Match the label to a pmp already probed.
1070 TAILQ_FOREACH(pmp
, &hammer2_pfslist
, mntentry
) {
1071 for (i
= 0; i
< HAMMER2_MAXCLUSTER
; ++i
) {
1072 if (pmp
->pfs_names
[i
] &&
1073 strcmp(pmp
->pfs_names
[i
], label
) == 0) {
1074 hmp
= pmp
->pfs_hmps
[i
];
1082 kprintf("hammer2_mount: PFS label \"%s\" not found\n",
1084 hammer2_cleanup_devvp(&devvpl
);
1085 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1091 * Open the device if this isn't a secondary mount and construct
1092 * the H2 device mount (hmp).
1095 hammer2_chain_t
*schain
;
1096 hammer2_xop_head_t xop
;
1099 * Now open the device
1101 KKASSERT(!TAILQ_EMPTY(&devvpl
));
1102 error
= hammer2_open_devvp(&devvpl
, ronly
);
1104 hammer2_close_devvp(&devvpl
, ronly
);
1105 hammer2_cleanup_devvp(&devvpl
);
1106 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1111 * Construct volumes and link with device vnodes.
1113 hmp
= kmalloc(sizeof(*hmp
), M_HAMMER2
, M_WAITOK
| M_ZERO
);
1115 error
= hammer2_init_vfsvolumes(mp
, &devvpl
, hmp
->volumes
,
1116 &hmp
->voldata
, &hmp
->volhdrno
,
1119 hammer2_close_devvp(&devvpl
, ronly
);
1120 hammer2_cleanup_devvp(&devvpl
);
1121 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1122 kfree(hmp
, M_HAMMER2
);
1126 kprintf("hammer2: failed to initialize root volume\n");
1127 hammer2_unmount_helper(mp
, NULL
, hmp
);
1128 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1129 hammer2_vfs_unmount(mp
, MNT_FORCE
);
1133 ksnprintf(hmp
->devrepname
, sizeof(hmp
->devrepname
), "%s", devstr
);
1135 hmp
->hflags
= info
.hflags
& HMNT2_DEVFLAGS
;
1136 kmalloc_create_obj(&hmp
->mchain
, "HAMMER2-chains",
1137 sizeof(struct hammer2_chain
));
1138 kmalloc_create_obj(&hmp
->mio
, "HAMMER2-dio",
1139 sizeof(struct hammer2_io
));
1140 kmalloc_create(&hmp
->mmsg
, "HAMMER2-msg");
1141 TAILQ_INSERT_TAIL(&hammer2_mntlist
, hmp
, mntentry
);
1142 RB_INIT(&hmp
->iotree
);
1143 hammer2_spin_init(&hmp
->io_spin
, "h2mount_io");
1144 hammer2_spin_init(&hmp
->list_spin
, "h2mount_list");
1146 lockinit(&hmp
->vollk
, "h2vol", 0, 0);
1147 lockinit(&hmp
->bulklk
, "h2bulk", 0, 0);
1148 lockinit(&hmp
->bflock
, "h2bflk", 0, 0);
1151 * vchain setup. vchain.data is embedded.
1152 * vchain.refs is initialized and will never drop to 0.
1154 hmp
->vchain
.hmp
= hmp
;
1155 hmp
->vchain
.refs
= 1;
1156 hmp
->vchain
.data
= (void *)&hmp
->voldata
;
1157 hmp
->vchain
.bref
.type
= HAMMER2_BREF_TYPE_VOLUME
;
1158 hmp
->vchain
.bref
.data_off
= 0 | HAMMER2_PBUFRADIX
;
1159 hmp
->vchain
.bref
.mirror_tid
= hmp
->voldata
.mirror_tid
;
1160 hammer2_chain_init(&hmp
->vchain
);
1163 * fchain setup. fchain.data is embedded.
1164 * fchain.refs is initialized and will never drop to 0.
1166 * The data is not used but needs to be initialized to
1167 * pass assertion muster. We use this chain primarily
1168 * as a placeholder for the freemap's top-level radix tree
1169 * so it does not interfere with the volume's topology
1172 hmp
->fchain
.hmp
= hmp
;
1173 hmp
->fchain
.refs
= 1;
1174 hmp
->fchain
.data
= (void *)&hmp
->voldata
.freemap_blockset
;
1175 hmp
->fchain
.bref
.type
= HAMMER2_BREF_TYPE_FREEMAP
;
1176 hmp
->fchain
.bref
.data_off
= 0 | HAMMER2_PBUFRADIX
;
1177 hmp
->fchain
.bref
.mirror_tid
= hmp
->voldata
.freemap_tid
;
1178 hmp
->fchain
.bref
.methods
=
1179 HAMMER2_ENC_CHECK(HAMMER2_CHECK_FREEMAP
) |
1180 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE
);
1181 hammer2_chain_init(&hmp
->fchain
);
1184 * Initialize volume header related fields.
1186 KKASSERT(hmp
->voldata
.magic
== HAMMER2_VOLUME_ID_HBO
||
1187 hmp
->voldata
.magic
== HAMMER2_VOLUME_ID_ABO
);
1188 hmp
->volsync
= hmp
->voldata
;
1189 hmp
->free_reserved
= hmp
->voldata
.allocator_size
/ 20;
1191 * Must use hmp instead of volume header for these two
1192 * in order to handle volume versions transparently.
1194 if (hmp
->voldata
.version
>= HAMMER2_VOL_VERSION_MULTI_VOLUMES
) {
1195 hmp
->nvolumes
= hmp
->voldata
.nvolumes
;
1196 hmp
->total_size
= hmp
->voldata
.total_size
;
1199 hmp
->total_size
= hmp
->voldata
.volu_size
;
1201 KKASSERT(hmp
->nvolumes
> 0);
1204 * Move devvpl entries to hmp.
1206 TAILQ_INIT(&hmp
->devvpl
);
1207 while ((e
= TAILQ_FIRST(&devvpl
)) != NULL
) {
1208 TAILQ_REMOVE(&devvpl
, e
, entry
);
1209 TAILQ_INSERT_TAIL(&hmp
->devvpl
, e
, entry
);
1211 KKASSERT(TAILQ_EMPTY(&devvpl
));
1212 KKASSERT(!TAILQ_EMPTY(&hmp
->devvpl
));
1215 * Really important to get these right or the flush and
1216 * teardown code will get confused.
1218 hmp
->spmp
= hammer2_pfsalloc(NULL
, NULL
, NULL
);
1220 spmp
->pfs_hmps
[0] = hmp
;
1223 * Dummy-up vchain and fchain's modify_tid. mirror_tid
1224 * is inherited from the volume header.
1226 hmp
->vchain
.bref
.mirror_tid
= hmp
->voldata
.mirror_tid
;
1227 hmp
->vchain
.bref
.modify_tid
= hmp
->vchain
.bref
.mirror_tid
;
1228 hmp
->vchain
.pmp
= spmp
;
1229 hmp
->fchain
.bref
.mirror_tid
= hmp
->voldata
.freemap_tid
;
1230 hmp
->fchain
.bref
.modify_tid
= hmp
->fchain
.bref
.mirror_tid
;
1231 hmp
->fchain
.pmp
= spmp
;
1234 * First locate the super-root inode, which is key 0
1235 * relative to the volume header's blockset.
1237 * Then locate the root inode by scanning the directory keyspace
1238 * represented by the label.
1240 parent
= hammer2_chain_lookup_init(&hmp
->vchain
, 0);
1241 schain
= hammer2_chain_lookup(&parent
, &key_dummy
,
1242 HAMMER2_SROOT_KEY
, HAMMER2_SROOT_KEY
,
1244 hammer2_chain_lookup_done(parent
);
1245 if (schain
== NULL
) {
1246 kprintf("hammer2_mount: invalid super-root\n");
1247 hammer2_unmount_helper(mp
, NULL
, hmp
);
1248 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1249 hammer2_vfs_unmount(mp
, MNT_FORCE
);
1252 if (schain
->error
) {
1253 kprintf("hammer2_mount: error %s reading super-root\n",
1254 hammer2_error_str(schain
->error
));
1255 hammer2_chain_unlock(schain
);
1256 hammer2_chain_drop(schain
);
1258 hammer2_unmount_helper(mp
, NULL
, hmp
);
1259 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1260 hammer2_vfs_unmount(mp
, MNT_FORCE
);
1265 * The super-root always uses an inode_tid of 1 when
1268 spmp
->inode_tid
= 1;
1269 spmp
->modify_tid
= schain
->bref
.modify_tid
+ 1;
1272 * Sanity-check schain's pmp and finish initialization.
1273 * Any chain belonging to the super-root topology should
1274 * have a NULL pmp (not even set to spmp).
1276 ripdata
= &schain
->data
->ipdata
;
1277 KKASSERT(schain
->pmp
== NULL
);
1278 spmp
->pfs_clid
= ripdata
->meta
.pfs_clid
;
1281 * Replace the dummy spmp->iroot with a real one. It's
1282 * easier to just do a wholesale replacement than to try
1283 * to update the chain and fixup the iroot fields.
1285 * The returned inode is locked with the supplied cluster.
1287 hammer2_dummy_xop_from_chain(&xop
, schain
);
1288 hammer2_inode_drop(spmp
->iroot
);
1289 spmp
->iroot
= hammer2_inode_get(spmp
, &xop
, -1, -1);
1290 spmp
->spmp_hmp
= hmp
;
1291 spmp
->pfs_types
[0] = ripdata
->meta
.pfs_type
;
1292 spmp
->pfs_hmps
[0] = hmp
;
1293 hammer2_inode_ref(spmp
->iroot
);
1294 hammer2_inode_unlock(spmp
->iroot
);
1295 hammer2_cluster_unlock(&xop
.cluster
);
1296 hammer2_chain_drop(schain
);
1297 /* do not call hammer2_cluster_drop() on an embedded cluster */
1298 schain
= NULL
; /* now invalid */
1299 /* leave spmp->iroot with one ref */
1302 error
= hammer2_recovery(hmp
);
1304 error
|= hammer2_fixup_pfses(hmp
);
1305 /* XXX do something with error */
1307 hammer2_update_pmps(hmp
);
1308 hammer2_iocom_init(hmp
);
1309 hammer2_bulkfree_init(hmp
);
1312 * Ref the cluster management messaging descriptor. The mount
1313 * program deals with the other end of the communications pipe.
1315 * Root mounts typically do not supply one.
1318 if (info.cluster_fd >= 0) {
1319 fp = holdfp(curthread, info.cluster_fd, -1);
1321 hammer2_cluster_reconnect(hmp, fp);
1323 kprintf("hammer2_mount: bad cluster_fd!\n");
1329 if (info
.hflags
& HMNT2_DEVFLAGS
) {
1330 kprintf("hammer2_mount: Warning: mount flags pertaining "
1331 "to the whole device may only be specified "
1332 "on the first mount of the device: %08x\n",
1333 info
.hflags
& HMNT2_DEVFLAGS
);
1338 * Force local mount (disassociate all PFSs from their clusters).
1339 * Used primarily for debugging.
1341 force_local
= (hmp
->hflags
& HMNT2_LOCAL
) ? hmp
: NULL
;
1344 * Lookup the mount point under the media-localized super-root.
1345 * Scanning hammer2_pfslist doesn't help us because it represents
1346 * PFS cluster ids which can aggregate several named PFSs together.
1348 * cluster->pmp will incorrectly point to spmp and must be fixed
1351 hammer2_inode_lock(spmp
->iroot
, 0);
1352 parent
= hammer2_inode_chain(spmp
->iroot
, 0, HAMMER2_RESOLVE_ALWAYS
);
1353 lhc
= hammer2_dirhash(label
, strlen(label
));
1354 chain
= hammer2_chain_lookup(&parent
, &key_next
,
1355 lhc
, lhc
+ HAMMER2_DIRHASH_LOMASK
,
1358 if (chain
->bref
.type
== HAMMER2_BREF_TYPE_INODE
&&
1359 strcmp(label
, (char *)chain
->data
->ipdata
.filename
) == 0) {
1362 chain
= hammer2_chain_next(&parent
, chain
, &key_next
,
1364 lhc
+ HAMMER2_DIRHASH_LOMASK
,
1368 hammer2_chain_unlock(parent
);
1369 hammer2_chain_drop(parent
);
1371 hammer2_inode_unlock(spmp
->iroot
);
1374 * PFS could not be found?
1376 if (chain
== NULL
) {
1377 hammer2_unmount_helper(mp
, NULL
, hmp
);
1378 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1379 hammer2_vfs_unmount(mp
, MNT_FORCE
);
1382 kprintf("hammer2_mount: PFS label I/O error\n");
1385 kprintf("hammer2_mount: PFS label \"%s\" not found\n",
1392 * Acquire the pmp structure (it should have already been allocated
1393 * via hammer2_update_pmps()).
1396 kprintf("hammer2_mount: PFS label I/O error\n");
1398 ripdata
= &chain
->data
->ipdata
;
1399 pmp
= hammer2_pfsalloc(NULL
, ripdata
, force_local
);
1401 hammer2_chain_unlock(chain
);
1402 hammer2_chain_drop(chain
);
1405 * PFS to mount must exist at this point.
1408 kprintf("hammer2_mount: Failed to acquire PFS structure\n");
1409 hammer2_unmount_helper(mp
, NULL
, hmp
);
1410 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1411 hammer2_vfs_unmount(mp
, MNT_FORCE
);
1418 kprintf("hammer2_mount: hmp=%p pmp=%p\n", hmp
, pmp
);
1420 /* Check if the pmp has already been mounted. */
1422 kprintf("hammer2_mount: PFS already mounted!\n");
1423 hammer2_unmount_helper(mp
, NULL
, hmp
);
1424 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1425 hammer2_vfs_unmount(mp
, MNT_FORCE
);
1429 pmp
->hflags
= info
.hflags
;
1430 mp
->mnt_flag
|= MNT_LOCAL
;
1431 mp
->mnt_kern_flag
|= MNTK_ALL_MPSAFE
; /* all entry pts are SMP */
1432 mp
->mnt_kern_flag
|= MNTK_THR_SYNC
; /* new vsyncscan semantics */
1435 * required mount structure initializations
1437 mp
->mnt_stat
.f_iosize
= HAMMER2_PBUFSIZE
;
1438 mp
->mnt_stat
.f_bsize
= HAMMER2_PBUFSIZE
;
1440 mp
->mnt_vstat
.f_frsize
= HAMMER2_PBUFSIZE
;
1441 mp
->mnt_vstat
.f_bsize
= HAMMER2_PBUFSIZE
;
1446 mp
->mnt_iosize_max
= MAXPHYS
;
1449 * Connect up mount pointers.
1451 hammer2_mount_helper(mp
, pmp
);
1452 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1459 vfs_add_vnodeops(mp
, &hammer2_vnode_vops
, &mp
->mnt_vn_norm_ops
);
1460 vfs_add_vnodeops(mp
, &hammer2_spec_vops
, &mp
->mnt_vn_spec_ops
);
1461 vfs_add_vnodeops(mp
, &hammer2_fifo_vops
, &mp
->mnt_vn_fifo_ops
);
1464 copyinstr(info
.volume
, mp
->mnt_stat
.f_mntfromname
,
1465 MNAMELEN
- 1, &size
);
1466 bzero(mp
->mnt_stat
.f_mntfromname
+ size
, MNAMELEN
- size
);
1467 } /* else root mount, already in there */
1469 bzero(mp
->mnt_stat
.f_mntonname
, sizeof(mp
->mnt_stat
.f_mntonname
));
1471 copyinstr(path
, mp
->mnt_stat
.f_mntonname
,
1472 sizeof(mp
->mnt_stat
.f_mntonname
) - 1,
1476 mp
->mnt_stat
.f_mntonname
[0] = '/';
1481 * Initial statfs to prime mnt_stat.
1483 hammer2_vfs_statfs(mp
, &mp
->mnt_stat
, NULL
);
1484 hammer2_vfs_statvfs(mp
, &mp
->mnt_vstat
, NULL
);
1490 * Scan PFSs under the super-root and create hammer2_pfs structures.
1494 hammer2_update_pmps(hammer2_dev_t
*hmp
)
1496 const hammer2_inode_data_t
*ripdata
;
1497 hammer2_chain_t
*parent
;
1498 hammer2_chain_t
*chain
;
1499 hammer2_dev_t
*force_local
;
1500 hammer2_pfs_t
*spmp
;
1501 hammer2_key_t key_next
;
1505 * Force local mount (disassociate all PFSs from their clusters).
1506 * Used primarily for debugging.
1508 force_local
= (hmp
->hflags
& HMNT2_LOCAL
) ? hmp
: NULL
;
1511 * Lookup mount point under the media-localized super-root.
1513 * cluster->pmp will incorrectly point to spmp and must be fixed
1517 hammer2_inode_lock(spmp
->iroot
, 0);
1518 parent
= hammer2_inode_chain(spmp
->iroot
, 0, HAMMER2_RESOLVE_ALWAYS
);
1519 chain
= hammer2_chain_lookup(&parent
, &key_next
,
1520 HAMMER2_KEY_MIN
, HAMMER2_KEY_MAX
,
1524 kprintf("I/O error scanning PFS labels\n");
1525 } else if (chain
->bref
.type
!= HAMMER2_BREF_TYPE_INODE
) {
1526 kprintf("Non inode chain type %d under super-root\n",
1529 ripdata
= &chain
->data
->ipdata
;
1530 hammer2_pfsalloc(chain
, ripdata
, force_local
);
1532 chain
= hammer2_chain_next(&parent
, chain
, &key_next
,
1533 key_next
, HAMMER2_KEY_MAX
,
1537 hammer2_chain_unlock(parent
);
1538 hammer2_chain_drop(parent
);
1540 hammer2_inode_unlock(spmp
->iroot
);
1546 hammer2_remount(hammer2_dev_t
*hmp
, struct mount
*mp
, char *path __unused
,
1549 hammer2_volume_t
*vol
;
1550 struct m_vnode
*devvp
;
1551 int i
, error
, result
= 0;
1553 if (!(hmp
->ronly
&& (mp
->mnt_kern_flag
& MNTK_WANTRDWR
)))
1556 for (i
= 0; i
< hmp
->nvolumes
; ++i
) {
1557 vol
= &hmp
->volumes
[i
];
1558 devvp
= vol
->dev
->devvp
;
1560 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1561 VOP_OPEN(devvp
, FREAD
| FWRITE
, FSCRED
, NULL
);
1564 if (vol
->id
== HAMMER2_ROOT_VOLUME
) {
1565 error
= hammer2_recovery(hmp
);
1567 error
|= hammer2_fixup_pfses(hmp
);
1569 vn_lock(devvp
, LK_EXCLUSIVE
| LK_RETRY
);
1571 VOP_CLOSE(devvp
, FREAD
, NULL
);
1573 VOP_CLOSE(devvp
, FREAD
| FWRITE
, NULL
);
1579 kprintf("hammer2: enable read/write\n");
1588 hammer2_vfs_unmount(struct mount
*mp
, int mntflags
)
1599 lockmgr(&hammer2_mntlk
, LK_EXCLUSIVE
);
1602 * If mount initialization proceeded far enough we must flush
1603 * its vnodes and sync the underlying mount points. Three syncs
1604 * are required to fully flush the filesystem (freemap updates lag
1605 * by one flush, and one extra for safety).
1607 if (mntflags
& MNT_FORCE
)
1612 error
= vflush(mp
, 0, flags
);
1615 hammer2_vfs_sync(mp
, MNT_WAIT
);
1616 hammer2_vfs_sync(mp
, MNT_WAIT
);
1617 hammer2_vfs_sync(mp
, MNT_WAIT
);
1621 * Cleanup the frontend support XOPS threads
1623 hammer2_xop_helper_cleanup(pmp
);
1626 hammer2_unmount_helper(mp
, pmp
, NULL
);
1630 lockmgr(&hammer2_mntlk
, LK_RELEASE
);
1636 * Mount helper, hook the system mount into our PFS.
1637 * The mount lock is held.
1639 * We must bump the mount_count on related devices for any
1644 hammer2_mount_helper(struct mount
*mp
, hammer2_pfs_t
*pmp
)
1646 hammer2_cluster_t
*cluster
;
1647 hammer2_chain_t
*rchain
;
1650 mp
->mnt_data
= (qaddr_t
)pmp
;
1654 * After pmp->mp is set we have to adjust hmp->mount_count.
1656 cluster
= &pmp
->iroot
->cluster
;
1657 for (i
= 0; i
< cluster
->nchains
; ++i
) {
1658 rchain
= cluster
->array
[i
].chain
;
1661 ++rchain
->hmp
->mount_count
;
1665 * Create missing Xop threads
1667 hammer2_xop_helper_create(pmp
);
1671 * Unmount helper, unhook the system mount from our PFS.
1672 * The mount lock is held.
1674 * If hmp is supplied a mount responsible for being the first to open
1675 * the block device failed and the block device and all PFSs using the
1676 * block device must be cleaned up.
1678 * If pmp is supplied multiple devices might be backing the PFS and each
1679 * must be disconnected. This might not be the last PFS using some of the
1680 * underlying devices. Also, we have to adjust our hmp->mount_count
1681 * accounting for the devices backing the pmp which is now undergoing an
1686 hammer2_unmount_helper(struct mount
*mp
, hammer2_pfs_t
*pmp
, hammer2_dev_t
*hmp
)
1688 hammer2_cluster_t
*cluster
;
1689 hammer2_chain_t
*rchain
;
1694 * If no device supplied this is a high-level unmount and we have to
1695 * to disconnect the mount, adjust mount_count, and locate devices
1696 * that might now have no mounts.
1699 KKASSERT(hmp
== NULL
);
1700 KKASSERT(MPTOPMP(mp
) == pmp
);
1702 mp
->mnt_data
= NULL
;
1705 * After pmp->mp is cleared we have to account for
1708 cluster
= &pmp
->iroot
->cluster
;
1709 for (i
= 0; i
< cluster
->nchains
; ++i
) {
1710 rchain
= cluster
->array
[i
].chain
;
1713 --rchain
->hmp
->mount_count
;
1714 /* scrapping hmp now may invalidate the pmp */
1717 TAILQ_FOREACH(hmp
, &hammer2_mntlist
, mntentry
) {
1718 if (hmp
->mount_count
== 0) {
1719 hammer2_unmount_helper(NULL
, NULL
, hmp
);
1727 * Try to terminate the block device. We can't terminate it if
1728 * there are still PFSs referencing it.
1730 if (hmp
->mount_count
)
1734 * Decomission the network before we start messing with the
1737 hammer2_iocom_uninit(hmp
);
1739 hammer2_bulkfree_uninit(hmp
);
1740 hammer2_pfsfree_scan(hmp
, 0);
1743 * Cycle the volume data lock as a safety (probably not needed any
1744 * more). To ensure everything is out we need to flush at least
1745 * three times. (1) The running of the sideq can dirty the
1746 * filesystem, (2) A normal flush can dirty the freemap, and
1747 * (3) ensure that the freemap is fully synchronized.
1749 * The next mount's recovery scan can clean everything up but we want
1750 * to leave the filesystem in a 100% clean state on a normal unmount.
1753 hammer2_voldata_lock(hmp
);
1754 hammer2_voldata_unlock(hmp
);
1758 * Flush whatever is left. Unmounted but modified PFS's might still
1759 * have some dirty chains on them.
1761 hammer2_chain_lock(&hmp
->vchain
, HAMMER2_RESOLVE_ALWAYS
);
1762 hammer2_chain_lock(&hmp
->fchain
, HAMMER2_RESOLVE_ALWAYS
);
1764 if (hmp
->fchain
.flags
& HAMMER2_CHAIN_FLUSH_MASK
) {
1765 hammer2_voldata_modify(hmp
);
1766 hammer2_flush(&hmp
->fchain
, HAMMER2_FLUSH_TOP
|
1769 hammer2_chain_unlock(&hmp
->fchain
);
1771 if (hmp
->vchain
.flags
& HAMMER2_CHAIN_FLUSH_MASK
) {
1772 hammer2_flush(&hmp
->vchain
, HAMMER2_FLUSH_TOP
|
1775 hammer2_chain_unlock(&hmp
->vchain
);
1777 if ((hmp
->vchain
.flags
| hmp
->fchain
.flags
) &
1778 HAMMER2_CHAIN_FLUSH_MASK
) {
1779 kprintf("hammer2_unmount: chains left over after final sync\n");
1780 kprintf(" vchain %08x\n", hmp
->vchain
.flags
);
1781 kprintf(" fchain %08x\n", hmp
->fchain
.flags
);
1783 if (hammer2_debug
& 0x0010)
1784 Debugger("entered debugger");
1787 hammer2_pfsfree_scan(hmp
, 1);
1789 KKASSERT(hmp
->spmp
== NULL
);
1792 * Finish up with the device vnode
1794 if (!TAILQ_EMPTY(&hmp
->devvpl
)) {
1795 hammer2_close_devvp(&hmp
->devvpl
, hmp
->ronly
);
1796 hammer2_cleanup_devvp(&hmp
->devvpl
);
1798 KKASSERT(TAILQ_EMPTY(&hmp
->devvpl
));
1801 * Clear vchain/fchain flags that might prevent final cleanup
1804 if (hmp
->vchain
.flags
& HAMMER2_CHAIN_MODIFIED
) {
1805 atomic_add_long(&hammer2_count_modified_chains
, -1);
1806 atomic_clear_int(&hmp
->vchain
.flags
, HAMMER2_CHAIN_MODIFIED
);
1807 hammer2_pfs_memory_wakeup(hmp
->vchain
.pmp
, -1);
1809 if (hmp
->vchain
.flags
& HAMMER2_CHAIN_UPDATE
) {
1810 atomic_clear_int(&hmp
->vchain
.flags
, HAMMER2_CHAIN_UPDATE
);
1813 if (hmp
->fchain
.flags
& HAMMER2_CHAIN_MODIFIED
) {
1814 atomic_add_long(&hammer2_count_modified_chains
, -1);
1815 atomic_clear_int(&hmp
->fchain
.flags
, HAMMER2_CHAIN_MODIFIED
);
1816 hammer2_pfs_memory_wakeup(hmp
->fchain
.pmp
, -1);
1818 if (hmp
->fchain
.flags
& HAMMER2_CHAIN_UPDATE
) {
1819 atomic_clear_int(&hmp
->fchain
.flags
, HAMMER2_CHAIN_UPDATE
);
1823 hammer2_dump_chain(&hmp
->vchain
, 0, 0, &dumpcnt
, 'v', (u_int
)-1);
1825 hammer2_dump_chain(&hmp
->fchain
, 0, 0, &dumpcnt
, 'f', (u_int
)-1);
1828 * Final drop of embedded freemap root chain to
1829 * clean up fchain.core (fchain structure is not
1830 * flagged ALLOCATED so it is cleaned out and then
1833 hammer2_chain_drop(&hmp
->fchain
);
1836 * Final drop of embedded volume root chain to clean
1837 * up vchain.core (vchain structure is not flagged
1838 * ALLOCATED so it is cleaned out and then left to
1841 hammer2_chain_drop(&hmp
->vchain
);
1843 hammer2_io_cleanup(hmp
, &hmp
->iotree
);
1844 if (hmp
->iofree_count
) {
1845 kprintf("io_cleanup: %d I/O's left hanging\n",
1849 TAILQ_REMOVE(&hammer2_mntlist
, hmp
, mntentry
);
1850 kmalloc_destroy_obj(&hmp
->mchain
);
1851 kmalloc_destroy_obj(&hmp
->mio
);
1852 kmalloc_destroy(&hmp
->mmsg
);
1853 kfree(hmp
, M_HAMMER2
);
1857 hammer2_vfs_vget(struct mount
*mp
, struct m_vnode
*dvp
,
1858 ino_t ino
, struct m_vnode
**vpp
)
1860 hammer2_xop_lookup_t
*xop
;
1862 hammer2_inode_t
*ip
;
1866 inum
= (hammer2_tid_t
)ino
& HAMMER2_DIRHASH_USERMSK
;
1872 * Easy if we already have it cached
1874 ip
= hammer2_inode_lookup(pmp
, inum
);
1876 hammer2_inode_lock(ip
, HAMMER2_RESOLVE_SHARED
);
1877 *vpp
= hammer2_igetv(ip
, &error
);
1878 hammer2_inode_unlock(ip
);
1879 hammer2_inode_drop(ip
); /* from lookup */
1885 * Otherwise we have to find the inode
1887 xop
= hammer2_xop_alloc(pmp
->iroot
, 0);
1889 hammer2_xop_start(&xop
->head
, &hammer2_lookup_desc
);
1890 error
= hammer2_xop_collect(&xop
->head
, 0);
1893 ip
= hammer2_inode_get(pmp
, &xop
->head
, -1, -1);
1894 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1897 *vpp
= hammer2_igetv(ip
, &error
);
1898 hammer2_inode_unlock(ip
);
1907 hammer2_vfs_root(struct mount
*mp
, struct m_vnode
**vpp
)
1914 if (pmp
->iroot
== NULL
) {
1915 kprintf("hammer2 (%s): no root inode\n",
1916 mp
->mnt_stat
.f_mntfromname
);
1922 hammer2_inode_lock(pmp
->iroot
, HAMMER2_RESOLVE_SHARED
);
1924 while (pmp
->inode_tid
== 0) {
1925 hammer2_xop_ipcluster_t
*xop
;
1926 const hammer2_inode_meta_t
*meta
;
1928 xop
= hammer2_xop_alloc(pmp
->iroot
, HAMMER2_XOP_MODIFYING
);
1929 hammer2_xop_start(&xop
->head
, &hammer2_ipcluster_desc
);
1930 error
= hammer2_xop_collect(&xop
->head
, 0);
1933 meta
= &hammer2_xop_gdata(&xop
->head
)->ipdata
.meta
;
1934 pmp
->iroot
->meta
= *meta
;
1935 pmp
->inode_tid
= meta
->pfs_inum
+ 1;
1936 hammer2_xop_pdata(&xop
->head
);
1939 if (pmp
->inode_tid
< HAMMER2_INODE_START
)
1940 pmp
->inode_tid
= HAMMER2_INODE_START
;
1942 xop
->head
.cluster
.focus
->bref
.modify_tid
+ 1;
1944 kprintf("PFS: Starting inode %jd\n",
1945 (intmax_t)pmp
->inode_tid
);
1946 kprintf("PMP focus good set nextino=%ld mod=%016jx\n",
1947 pmp
->inode_tid
, pmp
->modify_tid
);
1949 //wakeup(&pmp->iroot); XXX
1951 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1954 * Prime the mount info.
1956 hammer2_vfs_statfs(mp
, &mp
->mnt_stat
, NULL
);
1963 hammer2_xop_retire(&xop
->head
, HAMMER2_XOPMASK_VOP
);
1964 hammer2_inode_unlock(pmp
->iroot
);
1965 error
= tsleep(&pmp
->iroot
, PCATCH
, "h2root", hz
);
1966 hammer2_inode_lock(pmp
->iroot
, HAMMER2_RESOLVE_SHARED
);
1972 hammer2_inode_unlock(pmp
->iroot
);
1975 vp
= hammer2_igetv(pmp
->iroot
, &error
);
1976 hammer2_inode_unlock(pmp
->iroot
);
1986 * XXX incorporate ipdata->meta.inode_quota and data_quota
1990 hammer2_vfs_statfs(struct mount
*mp
, struct statfs
*sbp
, struct ucred
*cred
)
1994 hammer2_blockref_t bref
;
1999 * NOTE: iroot might not have validated the cluster yet.
2003 bzero(&tmp
, sizeof(tmp
));
2005 for (i
= 0; i
< pmp
->iroot
->cluster
.nchains
; ++i
) {
2006 hmp
= pmp
->pfs_hmps
[i
];
2009 if (pmp
->iroot
->cluster
.array
[i
].chain
)
2010 bref
= pmp
->iroot
->cluster
.array
[i
].chain
->bref
;
2012 bzero(&bref
, sizeof(bref
));
2014 tmp
.f_files
= bref
.embed
.stats
.inode_count
;
2016 tmp
.f_blocks
= hmp
->voldata
.allocator_size
/
2017 mp
->mnt_vstat
.f_bsize
;
2018 tmp
.f_bfree
= hmp
->voldata
.allocator_free
/
2019 mp
->mnt_vstat
.f_bsize
;
2020 tmp
.f_bavail
= tmp
.f_bfree
;
2022 if (cred
&& cred
->cr_uid
!= 0) {
2026 adj
= hmp
->free_reserved
/ mp
->mnt_vstat
.f_bsize
;
2027 tmp
.f_blocks
-= adj
;
2029 tmp
.f_bavail
-= adj
;
2032 mp
->mnt_stat
.f_blocks
= tmp
.f_blocks
;
2033 mp
->mnt_stat
.f_bfree
= tmp
.f_bfree
;
2034 mp
->mnt_stat
.f_bavail
= tmp
.f_bavail
;
2035 mp
->mnt_stat
.f_files
= tmp
.f_files
;
2036 mp
->mnt_stat
.f_ffree
= tmp
.f_ffree
;
2038 *sbp
= mp
->mnt_stat
;
2045 hammer2_vfs_statvfs(struct mount
*mp
, struct statvfs
*sbp
, struct ucred
*cred
)
2049 hammer2_blockref_t bref
;
2054 * NOTE: iroot might not have validated the cluster yet.
2057 bzero(&tmp
, sizeof(tmp
));
2059 for (i
= 0; i
< pmp
->iroot
->cluster
.nchains
; ++i
) {
2060 hmp
= pmp
->pfs_hmps
[i
];
2063 if (pmp
->iroot
->cluster
.array
[i
].chain
)
2064 bref
= pmp
->iroot
->cluster
.array
[i
].chain
->bref
;
2066 bzero(&bref
, sizeof(bref
));
2068 tmp
.f_files
= bref
.embed
.stats
.inode_count
;
2070 tmp
.f_blocks
= hmp
->voldata
.allocator_size
/
2071 mp
->mnt_vstat
.f_bsize
;
2072 tmp
.f_bfree
= hmp
->voldata
.allocator_free
/
2073 mp
->mnt_vstat
.f_bsize
;
2074 tmp
.f_bavail
= tmp
.f_bfree
;
2076 if (cred
&& cred
->cr_uid
!= 0) {
2080 adj
= hmp
->free_reserved
/ mp
->mnt_vstat
.f_bsize
;
2081 tmp
.f_blocks
-= adj
;
2083 tmp
.f_bavail
-= adj
;
2086 mp
->mnt_vstat
.f_blocks
= tmp
.f_blocks
;
2087 mp
->mnt_vstat
.f_bfree
= tmp
.f_bfree
;
2088 mp
->mnt_vstat
.f_bavail
= tmp
.f_bavail
;
2089 mp
->mnt_vstat
.f_files
= tmp
.f_files
;
2090 mp
->mnt_vstat
.f_ffree
= tmp
.f_ffree
;
2092 *sbp
= mp
->mnt_vstat
;
2098 * Mount-time recovery (RW mounts)
2100 * Updates to the free block table are allowed to lag flushes by one
2101 * transaction. In case of a crash, then on a fresh mount we must do an
2102 * incremental scan of the last committed transaction id and make sure that
2103 * all related blocks have been marked allocated.
2105 struct hammer2_recovery_elm
{
2106 TAILQ_ENTRY(hammer2_recovery_elm
) entry
;
2107 hammer2_chain_t
*chain
;
2108 hammer2_tid_t sync_tid
;
2111 TAILQ_HEAD(hammer2_recovery_list
, hammer2_recovery_elm
);
2113 struct hammer2_recovery_info
{
2114 struct hammer2_recovery_list list
;
2119 static int hammer2_recovery_scan(hammer2_dev_t
*hmp
,
2120 hammer2_chain_t
*parent
,
2121 struct hammer2_recovery_info
*info
,
2122 hammer2_tid_t sync_tid
);
2124 #define HAMMER2_RECOVERY_MAXDEPTH 10
2128 hammer2_recovery(hammer2_dev_t
*hmp
)
2130 struct hammer2_recovery_info info
;
2131 struct hammer2_recovery_elm
*elm
;
2132 hammer2_chain_t
*parent
;
2133 hammer2_tid_t sync_tid
;
2134 hammer2_tid_t mirror_tid
;
2137 hammer2_trans_init(hmp
->spmp
, 0);
2139 sync_tid
= hmp
->voldata
.freemap_tid
;
2140 mirror_tid
= hmp
->voldata
.mirror_tid
;
2142 kprintf("hammer2_mount: \"%s\": ", hmp
->devrepname
);
2143 if (sync_tid
>= mirror_tid
) {
2144 kprintf("no recovery needed\n");
2146 kprintf("freemap recovery %016jx-%016jx\n",
2147 sync_tid
+ 1, mirror_tid
);
2150 TAILQ_INIT(&info
.list
);
2152 parent
= hammer2_chain_lookup_init(&hmp
->vchain
, 0);
2153 error
= hammer2_recovery_scan(hmp
, parent
, &info
, sync_tid
);
2154 hammer2_chain_lookup_done(parent
);
2156 while ((elm
= TAILQ_FIRST(&info
.list
)) != NULL
) {
2157 TAILQ_REMOVE(&info
.list
, elm
, entry
);
2158 parent
= elm
->chain
;
2159 sync_tid
= elm
->sync_tid
;
2160 kfree(elm
, M_HAMMER2
);
2162 hammer2_chain_lock(parent
, HAMMER2_RESOLVE_ALWAYS
);
2163 error
|= hammer2_recovery_scan(hmp
, parent
, &info
,
2164 hmp
->voldata
.freemap_tid
);
2165 hammer2_chain_unlock(parent
);
2166 hammer2_chain_drop(parent
); /* drop elm->chain ref */
2169 hammer2_trans_done(hmp
->spmp
, 0);
2176 hammer2_recovery_scan(hammer2_dev_t
*hmp
, hammer2_chain_t
*parent
,
2177 struct hammer2_recovery_info
*info
,
2178 hammer2_tid_t sync_tid
)
2180 const hammer2_inode_data_t
*ripdata
;
2181 hammer2_chain_t
*chain
;
2182 hammer2_blockref_t bref
;
2189 * Adjust freemap to ensure that the block(s) are marked allocated.
2191 if (parent
->bref
.type
!= HAMMER2_BREF_TYPE_VOLUME
) {
2192 hammer2_freemap_adjust(hmp
, &parent
->bref
,
2193 HAMMER2_FREEMAP_DORECOVER
);
2197 * Check type for recursive scan
2199 switch(parent
->bref
.type
) {
2200 case HAMMER2_BREF_TYPE_VOLUME
:
2201 /* data already instantiated */
2203 case HAMMER2_BREF_TYPE_INODE
:
2205 * Must instantiate data for DIRECTDATA test and also
2208 hammer2_chain_lock(parent
, HAMMER2_RESOLVE_ALWAYS
);
2209 ripdata
= &parent
->data
->ipdata
;
2210 if (ripdata
->meta
.op_flags
& HAMMER2_OPFLAG_DIRECTDATA
) {
2211 /* not applicable to recovery scan */
2212 hammer2_chain_unlock(parent
);
2215 hammer2_chain_unlock(parent
);
2217 case HAMMER2_BREF_TYPE_INDIRECT
:
2219 * Must instantiate data for recursion
2221 hammer2_chain_lock(parent
, HAMMER2_RESOLVE_ALWAYS
);
2222 hammer2_chain_unlock(parent
);
2224 case HAMMER2_BREF_TYPE_DIRENT
:
2225 case HAMMER2_BREF_TYPE_DATA
:
2226 case HAMMER2_BREF_TYPE_FREEMAP
:
2227 case HAMMER2_BREF_TYPE_FREEMAP_NODE
:
2228 case HAMMER2_BREF_TYPE_FREEMAP_LEAF
:
2229 /* not applicable to recovery scan */
2233 return HAMMER2_ERROR_BADBREF
;
2237 * Defer operation if depth limit reached.
2239 if (info
->depth
>= HAMMER2_RECOVERY_MAXDEPTH
) {
2240 struct hammer2_recovery_elm
*elm
;
2242 elm
= kmalloc(sizeof(*elm
), M_HAMMER2
, M_ZERO
| M_WAITOK
);
2243 elm
->chain
= parent
;
2244 elm
->sync_tid
= sync_tid
;
2245 hammer2_chain_ref(parent
);
2246 TAILQ_INSERT_TAIL(&info
->list
, elm
, entry
);
2247 /* unlocked by caller */
2254 * Recursive scan of the last flushed transaction only. We are
2255 * doing this without pmp assignments so don't leave the chains
2256 * hanging around after we are done with them.
2258 * error Cumulative error this level only
2259 * rup_error Cumulative error for recursion
2260 * tmp_error Specific non-cumulative recursion error
2268 error
|= hammer2_chain_scan(parent
, &chain
, &bref
,
2270 HAMMER2_LOOKUP_NODATA
);
2273 * Problem during scan or EOF
2281 if (chain
== NULL
) {
2282 if (bref
.mirror_tid
> sync_tid
) {
2283 hammer2_freemap_adjust(hmp
, &bref
,
2284 HAMMER2_FREEMAP_DORECOVER
);
2290 * This may or may not be a recursive node.
2292 atomic_set_int(&chain
->flags
, HAMMER2_CHAIN_RELEASE
);
2293 if (bref
.mirror_tid
> sync_tid
) {
2295 tmp_error
= hammer2_recovery_scan(hmp
, chain
,
2303 * Flush the recovery at the PFS boundary to stage it for
2304 * the final flush of the super-root topology.
2306 if (tmp_error
== 0 &&
2307 (bref
.flags
& HAMMER2_BREF_FLAG_PFSROOT
) &&
2308 (chain
->flags
& HAMMER2_CHAIN_ONFLUSH
)) {
2309 hammer2_flush(chain
, HAMMER2_FLUSH_TOP
|
2312 rup_error
|= tmp_error
;
2314 return ((error
| rup_error
) & ~HAMMER2_ERROR_EOF
);
2318 * This fixes up an error introduced in earlier H2 implementations where
2319 * moving a PFS inode into an indirect block wound up causing the
2320 * HAMMER2_BREF_FLAG_PFSROOT flag in the bref to get cleared.
2324 hammer2_fixup_pfses(hammer2_dev_t
*hmp
)
2326 const hammer2_inode_data_t
*ripdata
;
2327 hammer2_chain_t
*parent
;
2328 hammer2_chain_t
*chain
;
2329 hammer2_key_t key_next
;
2330 hammer2_pfs_t
*spmp
;
2336 * Lookup mount point under the media-localized super-root.
2338 * cluster->pmp will incorrectly point to spmp and must be fixed
2342 hammer2_inode_lock(spmp
->iroot
, 0);
2343 parent
= hammer2_inode_chain(spmp
->iroot
, 0, HAMMER2_RESOLVE_ALWAYS
);
2344 chain
= hammer2_chain_lookup(&parent
, &key_next
,
2345 HAMMER2_KEY_MIN
, HAMMER2_KEY_MAX
,
2348 if (chain
->bref
.type
!= HAMMER2_BREF_TYPE_INODE
)
2351 kprintf("I/O error scanning PFS labels\n");
2352 error
|= chain
->error
;
2353 } else if ((chain
->bref
.flags
&
2354 HAMMER2_BREF_FLAG_PFSROOT
) == 0) {
2357 ripdata
= &chain
->data
->ipdata
;
2358 hammer2_trans_init(hmp
->spmp
, 0);
2359 error2
= hammer2_chain_modify(chain
,
2360 chain
->bref
.modify_tid
,
2363 kprintf("hammer2: Correct mis-flagged PFS %s\n",
2365 chain
->bref
.flags
|= HAMMER2_BREF_FLAG_PFSROOT
;
2369 hammer2_flush(chain
, HAMMER2_FLUSH_TOP
|
2371 hammer2_trans_done(hmp
->spmp
, 0);
2373 chain
= hammer2_chain_next(&parent
, chain
, &key_next
,
2374 key_next
, HAMMER2_KEY_MAX
,
2378 hammer2_chain_unlock(parent
);
2379 hammer2_chain_drop(parent
);
2381 hammer2_inode_unlock(spmp
->iroot
);
2387 * Sync a mount point; this is called periodically on a per-mount basis from
2388 * the filesystem syncer, and whenever a user issues a sync.
2391 hammer2_vfs_sync(struct mount
*mp
, int waitfor
)
2395 error
= hammer2_vfs_sync_pmp(MPTOPMP(mp
), waitfor
);
2401 * Because frontend operations lock vnodes before we get a chance to
2402 * lock the related inode, we can't just acquire a vnode lock without
2403 * risking a deadlock. The frontend may be holding a vnode lock while
2404 * also blocked on our SYNCQ flag while trying to get the inode lock.
2406 * To deal with this situation we can check the vnode lock situation
2407 * after locking the inode and perform a work-around.
2410 hammer2_vfs_sync_pmp(hammer2_pfs_t
*pmp
, int waitfor
)
2412 hammer2_inode_t
*ip
;
2413 hammer2_depend_t
*depend
;
2414 hammer2_depend_t
*depend_next
;
2422 * Move all inodes on sideq to syncq. This will clear sideq.
2423 * This should represent all flushable inodes. These inodes
2424 * will already have refs due to being on syncq or sideq. We
2425 * must do this all at once with the spinlock held to ensure that
2426 * all inode dependencies are part of the same flush.
2428 * We should be able to do this asynchronously from frontend
2429 * operations because we will be locking the inodes later on
2430 * to actually flush them, and that will partition any frontend
2431 * op using the same inode. Either it has already locked the
2432 * inode and we will block, or it has not yet locked the inode
2433 * and it will block until we are finished flushing that inode.
2435 * When restarting, only move the inodes flagged as PASS2 from
2436 * SIDEQ to SYNCQ. PASS2 propagation by inode_lock4() and
2437 * inode_depend() are atomic with the spin-lock.
2439 hammer2_trans_init(pmp
, HAMMER2_TRANS_ISFLUSH
);
2440 #ifdef HAMMER2_DEBUG_SYNC
2441 kprintf("FILESYSTEM SYNC BOUNDARY\n");
2446 * Move inodes from depq to syncq, releasing the related
2447 * depend structures.
2450 #ifdef HAMMER2_DEBUG_SYNC
2451 kprintf("FILESYSTEM SYNC RESTART (%d)\n", dorestart
);
2453 hammer2_trans_setflags(pmp
, 0/*HAMMER2_TRANS_COPYQ*/);
2454 hammer2_trans_clearflags(pmp
, HAMMER2_TRANS_RESCAN
);
2457 * Move inodes from depq to syncq. When restarting, only depq's
2458 * marked pass2 are moved.
2460 hammer2_spin_ex(&pmp
->list_spin
);
2461 depend_next
= TAILQ_FIRST(&pmp
->depq
);
2464 while ((depend
= depend_next
) != NULL
) {
2465 depend_next
= TAILQ_NEXT(depend
, entry
);
2466 if (dorestart
&& depend
->pass2
== 0)
2468 TAILQ_FOREACH(ip
, &depend
->sideq
, entry
) {
2469 KKASSERT(ip
->flags
& HAMMER2_INODE_SIDEQ
);
2470 atomic_set_int(&ip
->flags
, HAMMER2_INODE_SYNCQ
);
2471 atomic_clear_int(&ip
->flags
, HAMMER2_INODE_SIDEQ
);
2476 * NOTE: pmp->sideq_count includes both sideq and syncq
2478 TAILQ_CONCAT(&pmp
->syncq
, &depend
->sideq
, entry
);
2482 TAILQ_REMOVE(&pmp
->depq
, depend
, entry
);
2485 hammer2_spin_unex(&pmp
->list_spin
);
2486 hammer2_trans_clearflags(pmp
, /*HAMMER2_TRANS_COPYQ |*/
2487 HAMMER2_TRANS_WAITING
);
2491 * sideq_count may have dropped enough to allow us to unstall
2494 hammer2_pfs_memory_wakeup(pmp
, 0);
2497 * Now run through all inodes on syncq.
2499 * Flush transactions only interlock with other flush transactions.
2500 * Any conflicting frontend operations will block on the inode, but
2501 * may hold a vnode lock while doing so.
2503 hammer2_spin_ex(&pmp
->list_spin
);
2504 while ((ip
= TAILQ_FIRST(&pmp
->syncq
)) != NULL
) {
2506 * Remove the inode from the SYNCQ, transfer the syncq ref
2507 * to us. We must clear SYNCQ to allow any potential
2508 * front-end deadlock to proceed. We must set PASS2 so
2509 * the dependency code knows what to do.
2513 if (atomic_cmpset_int(&ip
->flags
,
2515 (pass2
& ~(HAMMER2_INODE_SYNCQ
|
2516 HAMMER2_INODE_SYNCQ_WAKEUP
)) |
2517 HAMMER2_INODE_SYNCQ_PASS2
) == 0)
2521 TAILQ_REMOVE(&pmp
->syncq
, ip
, entry
);
2523 hammer2_spin_unex(&pmp
->list_spin
);
2526 * Tickle anyone waiting on ip->flags or the hysteresis
2527 * on the dirty inode count.
2529 if (pass2
& HAMMER2_INODE_SYNCQ_WAKEUP
)
2531 if (++wakecount
>= hammer2_limit_dirty_inodes
/ 20 + 1) {
2533 hammer2_pfs_memory_wakeup(pmp
, 0);
2537 * Relock the inode, and we inherit a ref from the above.
2538 * We will check for a race after we acquire the vnode.
2540 hammer2_mtx_ex(&ip
->lock
);
2543 * We need the vp in order to vfsync() dirty buffers, so if
2544 * one isn't attached we can skip it.
2546 * Ordering the inode lock and then the vnode lock has the
2547 * potential to deadlock. If we had left SYNCQ set that could
2548 * also deadlock us against the frontend even if we don't hold
2549 * any locks, but the latter is not a problem now since we
2550 * cleared it. igetv will temporarily release the inode lock
2551 * in a safe manner to work-around the deadlock.
2553 * Unfortunately it is still possible to deadlock when the
2554 * frontend obtains multiple inode locks, because all the
2555 * related vnodes are already locked (nor can the vnode locks
2556 * be released and reacquired without messing up RECLAIM and
2557 * INACTIVE sequencing).
2559 * The solution for now is to move the vp back onto SIDEQ
2560 * and set dorestart, which will restart the flush after we
2561 * exhaust the current SYNCQ. Note that additional
2562 * dependencies may build up, so we definitely need to move
2563 * the whole SIDEQ back to SYNCQ when we restart.
2567 if (vget(vp
, LK_EXCLUSIVE
|LK_NOWAIT
)) {
2569 * Failed to get the vnode, requeue the inode
2570 * (PASS2 is already set so it will be found
2571 * again on the restart).
2573 * Then unlock, possibly sleep, and retry
2574 * later. We sleep if PASS2 was *previously*
2575 * set, before we set it again above.
2579 #ifdef HAMMER2_DEBUG_SYNC
2580 kprintf("inum %ld (sync delayed by vnode)\n",
2581 (long)ip
->meta
.inum
);
2583 hammer2_inode_delayed_sideq(ip
);
2585 hammer2_mtx_unlock(&ip
->lock
);
2586 hammer2_inode_drop(ip
);
2588 if (pass2
& HAMMER2_INODE_SYNCQ_PASS2
) {
2589 tsleep(&dorestart
, 0, "h2syndel", 2);
2591 hammer2_spin_ex(&pmp
->list_spin
);
2599 * If the inode wound up on a SIDEQ again it will already be
2600 * prepped for another PASS2. In this situation if we flush
2601 * it now we will just wind up flushing it again in the same
2602 * syncer run, so we might as well not flush it now.
2604 if (ip
->flags
& HAMMER2_INODE_SIDEQ
) {
2605 hammer2_mtx_unlock(&ip
->lock
);
2606 hammer2_inode_drop(ip
);
2610 hammer2_spin_ex(&pmp
->list_spin
);
2615 * Ok we have the inode exclusively locked and if vp is
2616 * not NULL that will also be exclusively locked. Do the
2617 * meat of the flush.
2619 * vp token needed for v_rbdirty_tree check / vclrisdirty
2620 * sequencing. Though we hold the vnode exclusively so
2621 * we shouldn't need to hold the token also in this case.
2624 vfsync(vp
, MNT_WAIT
, 1, NULL
, NULL
);
2625 bio_track_wait(NULL
, 0, 0); /* XXX */
2629 * If the inode has not yet been inserted into the tree
2630 * we must do so. Then sync and flush it. The flush should
2631 * update the parent.
2633 if (ip
->flags
& HAMMER2_INODE_DELETING
) {
2634 #ifdef HAMMER2_DEBUG_SYNC
2635 kprintf("inum %ld destroy\n", (long)ip
->meta
.inum
);
2637 hammer2_inode_chain_des(ip
);
2638 atomic_add_long(&hammer2_iod_inode_deletes
, 1);
2639 } else if (ip
->flags
& HAMMER2_INODE_CREATING
) {
2640 #ifdef HAMMER2_DEBUG_SYNC
2641 kprintf("inum %ld insert\n", (long)ip
->meta
.inum
);
2643 hammer2_inode_chain_ins(ip
);
2644 atomic_add_long(&hammer2_iod_inode_creates
, 1);
2646 #ifdef HAMMER2_DEBUG_SYNC
2647 kprintf("inum %ld chain-sync\n", (long)ip
->meta
.inum
);
2651 * Because I kinda messed up the design and index the inodes
2652 * under the root inode, along side the directory entries,
2653 * we can't flush the inode index under the iroot until the
2654 * end. If we do it now we might miss effects created by
2655 * other inodes on the SYNCQ.
2657 * Do a normal (non-FSSYNC) flush instead, which allows the
2658 * vnode code to work the same. We don't want to force iroot
2659 * back onto the SIDEQ, and we also don't want the flush code
2660 * to update pfs_iroot_blocksets until the final flush later.
2662 * XXX at the moment this will likely result in a double-flush
2663 * of the iroot chain.
2665 hammer2_inode_chain_sync(ip
);
2666 if (ip
== pmp
->iroot
) {
2667 hammer2_inode_chain_flush(ip
, HAMMER2_XOP_INODE_STOP
);
2669 hammer2_inode_chain_flush(ip
, HAMMER2_XOP_INODE_STOP
|
2670 HAMMER2_XOP_FSSYNC
);
2673 lwkt_gettoken(NULL
);
2674 if ((ip
->flags
& (HAMMER2_INODE_MODIFIED
|
2675 HAMMER2_INODE_RESIZED
|
2676 HAMMER2_INODE_DIRTYDATA
)) == 0) {
2677 //RB_EMPTY(&vp->v_rbdirty_tree) &&
2678 //!bio_track_active(&vp->v_track_write)) {
2681 hammer2_inode_delayed_sideq(ip
);
2683 lwkt_reltoken(NULL
);
2685 vp
= NULL
; /* safety */
2687 atomic_clear_int(&ip
->flags
, HAMMER2_INODE_SYNCQ_PASS2
);
2688 hammer2_inode_unlock(ip
); /* unlock+drop */
2689 /* ip pointer invalid */
2692 * If the inode got dirted after we dropped our locks,
2693 * it will have already been moved back to the SIDEQ.
2695 hammer2_spin_ex(&pmp
->list_spin
);
2697 hammer2_spin_unex(&pmp
->list_spin
);
2698 hammer2_pfs_memory_wakeup(pmp
, 0);
2700 if (dorestart
|| (pmp
->trans
.flags
& HAMMER2_TRANS_RESCAN
)) {
2701 #ifdef HAMMER2_DEBUG_SYNC
2702 kprintf("FILESYSTEM SYNC STAGE 1 RESTART\n");
2703 /*tsleep(&dorestart, 0, "h2STG1-R", hz*20);*/
2708 #ifdef HAMMER2_DEBUG_SYNC
2709 kprintf("FILESYSTEM SYNC STAGE 2 BEGIN\n");
2710 /*tsleep(&dorestart, 0, "h2STG2", hz*20);*/
2714 * We have to flush the PFS root last, even if it does not appear to
2715 * be dirty, because all the inodes in the PFS are indexed under it.
2716 * The normal flushing of iroot above would only occur if directory
2717 * entries under the root were changed.
2719 * Specifying VOLHDR will cause an additionl flush of hmp->spmp
2720 * for the media making up the cluster.
2722 if ((ip
= pmp
->iroot
) != NULL
) {
2723 hammer2_inode_ref(ip
);
2724 hammer2_mtx_ex(&ip
->lock
);
2725 hammer2_inode_chain_sync(ip
);
2726 hammer2_inode_chain_flush(ip
, HAMMER2_XOP_INODE_STOP
|
2727 HAMMER2_XOP_FSSYNC
|
2728 HAMMER2_XOP_VOLHDR
);
2729 hammer2_inode_unlock(ip
); /* unlock+drop */
2731 #ifdef HAMMER2_DEBUG_SYNC
2732 kprintf("FILESYSTEM SYNC STAGE 2 DONE\n");
2738 hammer2_bioq_sync(pmp
);
2740 error
= 0; /* XXX */
2741 hammer2_trans_done(pmp
, HAMMER2_TRANS_ISFLUSH
);
2749 hammer2_vfs_vptofh(struct m_vnode
*vp
, struct fid
*fhp
)
2751 hammer2_inode_t
*ip
;
2753 KKASSERT(MAXFIDSZ
>= 16);
2755 fhp
->fid_len
= offsetof(struct fid
, fid_data
[16]);
2757 ((hammer2_tid_t
*)fhp
->fid_data
)[0] = ip
->meta
.inum
;
2758 ((hammer2_tid_t
*)fhp
->fid_data
)[1] = 0;
2765 hammer2_vfs_fhtovp(struct mount
*mp
, struct m_vnode
*rootvp
,
2766 struct fid
*fhp
, struct m_vnode
**vpp
)
2771 inum
= ((hammer2_tid_t
*)fhp
->fid_data
)[0] & HAMMER2_DIRHASH_USERMSK
;
2774 error
= hammer2_vfs_root(mp
, vpp
);
2776 error
= hammer2_vfs_vget(mp
, NULL
, inum
, vpp
);
2785 hammer2_vfs_checkexp(struct mount
*mp
, struct sockaddr
*nam
,
2786 int *exflagsp
, struct ucred
**credanonp
)
2793 np
= vfs_export_lookup(mp
, &pmp
->export
, nam
);
2795 *exflagsp
= np
->netc_exflags
;
2796 *credanonp
= &np
->netc_anon
;
2806 * This handles hysteresis on regular file flushes. Because the BIOs are
2807 * routed to a thread it is possible for an excessive number to build up
2808 * and cause long front-end stalls long before the runningbuffspace limit
2809 * is hit, so we implement hammer2_flush_pipe to control the
2812 * This is a particular problem when compression is used.
2815 hammer2_lwinprog_ref(hammer2_pfs_t
*pmp
)
2817 atomic_add_int(&pmp
->count_lwinprog
, 1);
2821 hammer2_lwinprog_drop(hammer2_pfs_t
*pmp
)
2826 lwinprog
= atomic_fetchadd_int(&pmp
->count_lwinprog
, -1);
2827 if ((lwinprog
& HAMMER2_LWINPROG_WAITING
) &&
2828 (lwinprog
& HAMMER2_LWINPROG_MASK
) <= hammer2_flush_pipe
* 2 / 3) {
2829 atomic_clear_int(&pmp
->count_lwinprog
,
2830 HAMMER2_LWINPROG_WAITING
);
2831 wakeup(&pmp
->count_lwinprog
);
2833 if ((lwinprog
& HAMMER2_LWINPROG_WAITING0
) &&
2834 (lwinprog
& HAMMER2_LWINPROG_MASK
) <= 0) {
2835 atomic_clear_int(&pmp
->count_lwinprog
,
2836 HAMMER2_LWINPROG_WAITING0
);
2837 wakeup(&pmp
->count_lwinprog
);
2843 hammer2_lwinprog_wait(hammer2_pfs_t
*pmp
, int flush_pipe
)
2847 int lwflag
= (flush_pipe
) ? HAMMER2_LWINPROG_WAITING
:
2848 HAMMER2_LWINPROG_WAITING0
;
2851 lwinprog
= pmp
->count_lwinprog
;
2853 if ((lwinprog
& HAMMER2_LWINPROG_MASK
) <= flush_pipe
)
2855 tsleep_interlock(&pmp
->count_lwinprog
, 0);
2856 atomic_set_int(&pmp
->count_lwinprog
, lwflag
);
2857 lwinprog
= pmp
->count_lwinprog
;
2858 if ((lwinprog
& HAMMER2_LWINPROG_MASK
) <= flush_pipe
)
2860 tsleep(&pmp
->count_lwinprog
, PINTERLOCKED
, "h2wpipe", hz
);
2867 * It is possible for an excessive number of dirty chains or dirty inodes
2868 * to build up. When this occurs we start an asynchronous filesystem sync.
2869 * If the level continues to build up, we stall, waiting for it to drop,
2870 * with some hysteresis.
2872 * This relies on the kernel calling hammer2_vfs_modifying() prior to
2873 * obtaining any vnode locks before making a modifying VOP call.
2876 hammer2_vfs_modifying(struct mount
*mp
)
2878 if (mp
->mnt_flag
& MNT_RDONLY
)
2880 hammer2_pfs_memory_wait(MPTOPMP(mp
));
2887 * Initiate an asynchronous filesystem sync and, with hysteresis,
2888 * stall if the internal data structure count becomes too bloated.
2891 hammer2_pfs_memory_wait(hammer2_pfs_t
*pmp
)
2897 if (pmp
== NULL
|| pmp
->mp
== NULL
)
2901 waiting
= pmp
->inmem_dirty_chains
& HAMMER2_DIRTYCHAIN_MASK
;
2905 * Start the syncer running at 1/2 the limit
2907 if (waiting
> hammer2_limit_dirty_chains
/ 2 ||
2908 pmp
->sideq_count
> hammer2_limit_dirty_inodes
/ 2) {
2909 trigger_syncer(pmp
->mp
);
2913 * Stall at the limit waiting for the counts to drop.
2914 * This code will typically be woken up once the count
2915 * drops below 3/4 the limit, or in one second.
2917 if (waiting
< hammer2_limit_dirty_chains
&&
2918 pmp
->sideq_count
< hammer2_limit_dirty_inodes
) {
2922 pcatch
= curthread
->td_proc
? PCATCH
: 0;
2924 tsleep_interlock(&pmp
->inmem_dirty_chains
, pcatch
);
2925 atomic_set_int(&pmp
->inmem_dirty_chains
,
2926 HAMMER2_DIRTYCHAIN_WAITING
);
2927 if (waiting
< hammer2_limit_dirty_chains
&&
2928 pmp
->sideq_count
< hammer2_limit_dirty_inodes
) {
2931 trigger_syncer(pmp
->mp
);
2932 error
= tsleep(&pmp
->inmem_dirty_chains
, PINTERLOCKED
| pcatch
,
2934 if (error
== ERESTART
)
2940 * Wake up any stalled frontend ops waiting, with hysteresis, using
2944 hammer2_pfs_memory_wakeup(hammer2_pfs_t
*pmp
, int count
)
2949 waiting
= atomic_fetchadd_int(&pmp
->inmem_dirty_chains
, count
);
2950 /* don't need --waiting to test flag */
2952 if ((waiting
& HAMMER2_DIRTYCHAIN_WAITING
) &&
2953 (pmp
->inmem_dirty_chains
& HAMMER2_DIRTYCHAIN_MASK
) <=
2954 hammer2_limit_dirty_chains
* 2 / 3 &&
2955 pmp
->sideq_count
<= hammer2_limit_dirty_inodes
* 2 / 3) {
2956 atomic_clear_int(&pmp
->inmem_dirty_chains
,
2957 HAMMER2_DIRTYCHAIN_WAITING
);
2958 wakeup(&pmp
->inmem_dirty_chains
);
2964 hammer2_pfs_memory_inc(hammer2_pfs_t
*pmp
)
2967 atomic_add_int(&pmp
->inmem_dirty_chains
, 1);
2972 * Volume header data locks
2975 hammer2_voldata_lock(hammer2_dev_t
*hmp
)
2977 lockmgr(&hmp
->vollk
, LK_EXCLUSIVE
);
2981 hammer2_voldata_unlock(hammer2_dev_t
*hmp
)
2983 lockmgr(&hmp
->vollk
, LK_RELEASE
);
2987 * Caller indicates that the volume header is being modified. Flag
2988 * the related chain and adjust its transaction id.
2990 * The transaction id is set to voldata.mirror_tid + 1, similar to
2991 * what hammer2_chain_modify() does. Be very careful here, volume
2992 * data can be updated independently of the rest of the filesystem.
2995 hammer2_voldata_modify(hammer2_dev_t
*hmp
)
2997 if ((hmp
->vchain
.flags
& HAMMER2_CHAIN_MODIFIED
) == 0) {
2998 atomic_add_long(&hammer2_count_modified_chains
, 1);
2999 atomic_set_int(&hmp
->vchain
.flags
, HAMMER2_CHAIN_MODIFIED
);
3000 hammer2_pfs_memory_inc(hmp
->vchain
.pmp
);
3001 hmp
->vchain
.bref
.mirror_tid
= hmp
->voldata
.mirror_tid
+ 1;
3006 * Returns 0 if the filesystem has tons of free space
3007 * Returns 1 if the filesystem has less than 10% remaining
3008 * Returns 2 if the filesystem has less than 2%/5% (user/root) remaining.
3011 hammer2_vfs_enospace(hammer2_inode_t
*ip
, off_t bytes
, struct ucred
*cred
)
3015 hammer2_off_t free_reserved
;
3016 hammer2_off_t free_nominal
;
3021 if (/*XXX*/ 1 || pmp
->free_ticks
== 0 || pmp
->free_ticks
!= ticks
) {
3022 free_reserved
= HAMMER2_SEGSIZE
;
3023 free_nominal
= 0x7FFFFFFFFFFFFFFFLLU
;
3024 for (i
= 0; i
< pmp
->iroot
->cluster
.nchains
; ++i
) {
3025 hmp
= pmp
->pfs_hmps
[i
];
3028 if (pmp
->pfs_types
[i
] != HAMMER2_PFSTYPE_MASTER
&&
3029 pmp
->pfs_types
[i
] != HAMMER2_PFSTYPE_SOFT_MASTER
)
3032 if (free_nominal
> hmp
->voldata
.allocator_free
)
3033 free_nominal
= hmp
->voldata
.allocator_free
;
3034 if (free_reserved
< hmp
->free_reserved
)
3035 free_reserved
= hmp
->free_reserved
;
3041 pmp
->free_reserved
= free_reserved
;
3042 pmp
->free_nominal
= free_nominal
;
3043 pmp
->free_ticks
= ticks
;
3045 free_reserved
= pmp
->free_reserved
;
3046 free_nominal
= pmp
->free_nominal
;
3048 if (cred
&& cred
->cr_uid
!= 0) {
3049 if ((int64_t)(free_nominal
- bytes
) <
3050 (int64_t)free_reserved
) {
3054 if ((int64_t)(free_nominal
- bytes
) <
3055 (int64_t)free_reserved
/ 2) {
3059 if ((int64_t)(free_nominal
- bytes
) < (int64_t)free_reserved
* 2)