4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2016 Nexenta Systems, Inc.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * University Copyright- Copyright (c) 1982, 1986, 1988
33 * The Regents of the University of California
36 * University Acknowledgment- Portions of this document are derived from
37 * software developed by the University of California, Berkeley, and its
41 #include <sys/types.h>
42 #include <sys/t_lock.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/bitmap.h>
46 #include <sys/sysmacros.h>
48 #include <sys/signal.h>
53 #include <sys/pathname.h>
55 #include <sys/vnode.h>
57 #include <sys/atomic.h>
63 #include <sys/kstat.h>
65 #include <sys/fs/ufs_fsdir.h>
66 #include <sys/fs/ufs_fs.h>
67 #include <sys/fs/ufs_inode.h>
68 #include <sys/fs/ufs_mount.h>
69 #include <sys/fs/ufs_acl.h>
70 #include <sys/fs/ufs_panic.h>
71 #include <sys/fs/ufs_bio.h>
72 #include <sys/fs/ufs_quota.h>
73 #include <sys/fs/ufs_log.h>
75 #include <sys/statvfs.h>
76 #include <sys/mount.h>
77 #include <sys/mntent.h>
79 #include <sys/errno.h>
80 #include <sys/debug.h>
81 #include "sys/fs_subr.h"
82 #include <sys/cmn_err.h>
84 #include <sys/fssnap_if.h>
85 #include <sys/sunddi.h>
86 #include <sys/bootconf.h>
87 #include <sys/policy.h>
91 * This is the loadable module wrapper.
93 #include <sys/modctl.h>
96 static const struct vfsops ufs_vfsops
;
97 static int ufsinit(int, char *);
100 extern struct instats ins
;
101 extern struct vnode
*common_specvp(struct vnode
*vp
);
102 extern vfs_t EIO_vfs
;
104 struct dquot
*dquot
, *dquotNDQUOT
;
107 * Cylinder group summary information handling tunable.
108 * This defines when these deltas get logged.
109 * If the number of cylinders in the file system is over the
110 * tunable then we log csum updates. Otherwise the updates are only
111 * done for performance on unmount. After a panic they can be
112 * quickly constructed during mounting. See ufs_construct_si()
113 * called from ufs_getsummaryinfo().
115 * This performance feature can of course be disabled by setting
116 * ufs_ncg_log to 0, and fully enabled by setting it to 0xffffffff.
118 #define UFS_LOG_NCG_DEFAULT 10000
119 uint32_t ufs_ncg_log
= UFS_LOG_NCG_DEFAULT
;
122 * ufs_clean_root indicates whether the root fs went down cleanly
124 static int ufs_clean_root
= 0;
127 * UFS Mount options table
129 static char *intr_cancel
[] = { MNTOPT_NOINTR
, NULL
};
130 static char *nointr_cancel
[] = { MNTOPT_INTR
, NULL
};
131 static char *forcedirectio_cancel
[] = { MNTOPT_NOFORCEDIRECTIO
, NULL
};
132 static char *noforcedirectio_cancel
[] = { MNTOPT_FORCEDIRECTIO
, NULL
};
133 static char *largefiles_cancel
[] = { MNTOPT_NOLARGEFILES
, NULL
};
134 static char *nolargefiles_cancel
[] = { MNTOPT_LARGEFILES
, NULL
};
135 static char *logging_cancel
[] = { MNTOPT_NOLOGGING
, NULL
};
136 static char *nologging_cancel
[] = { MNTOPT_LOGGING
, NULL
};
137 static char *xattr_cancel
[] = { MNTOPT_NOXATTR
, NULL
};
138 static char *noxattr_cancel
[] = { MNTOPT_XATTR
, NULL
};
139 static char *quota_cancel
[] = { MNTOPT_NOQUOTA
, NULL
};
140 static char *noquota_cancel
[] = { MNTOPT_QUOTA
, NULL
};
141 static char *dfratime_cancel
[] = { MNTOPT_NODFRATIME
, NULL
};
142 static char *nodfratime_cancel
[] = { MNTOPT_DFRATIME
, NULL
};
144 static mntopt_t mntopts
[] = {
146 * option name cancel option default arg flags
149 { MNTOPT_INTR
, intr_cancel
, NULL
, MO_DEFAULT
,
151 { MNTOPT_NOINTR
, nointr_cancel
, NULL
, 0,
152 (void *)UFSMNT_NOINTR
},
153 { MNTOPT_SYNCDIR
, NULL
, NULL
, 0,
154 (void *)UFSMNT_SYNCDIR
},
155 { MNTOPT_FORCEDIRECTIO
, forcedirectio_cancel
, NULL
, 0,
156 (void *)UFSMNT_FORCEDIRECTIO
},
157 { MNTOPT_NOFORCEDIRECTIO
, noforcedirectio_cancel
, NULL
, 0,
158 (void *)UFSMNT_NOFORCEDIRECTIO
},
159 { MNTOPT_NOSETSEC
, NULL
, NULL
, 0,
160 (void *)UFSMNT_NOSETSEC
},
161 { MNTOPT_LARGEFILES
, largefiles_cancel
, NULL
, MO_DEFAULT
,
162 (void *)UFSMNT_LARGEFILES
},
163 { MNTOPT_NOLARGEFILES
, nolargefiles_cancel
, NULL
, 0,
165 { MNTOPT_LOGGING
, logging_cancel
, NULL
, MO_TAG
,
166 (void *)UFSMNT_LOGGING
},
167 { MNTOPT_NOLOGGING
, nologging_cancel
, NULL
,
168 MO_NODISPLAY
|MO_DEFAULT
|MO_TAG
, NULL
},
169 { MNTOPT_QUOTA
, quota_cancel
, NULL
, MO_IGNORE
,
171 { MNTOPT_NOQUOTA
, noquota_cancel
, NULL
,
172 MO_NODISPLAY
|MO_DEFAULT
, NULL
},
173 { MNTOPT_XATTR
, xattr_cancel
, NULL
, MO_DEFAULT
,
175 { MNTOPT_NOXATTR
, noxattr_cancel
, NULL
, 0,
177 { MNTOPT_NOATIME
, NULL
, NULL
, 0,
178 (void *)UFSMNT_NOATIME
},
179 { MNTOPT_DFRATIME
, dfratime_cancel
, NULL
, 0,
181 { MNTOPT_NODFRATIME
, nodfratime_cancel
, NULL
,
182 MO_NODISPLAY
|MO_DEFAULT
, (void *)UFSMNT_NODFRATIME
},
183 { MNTOPT_ONERROR
, NULL
, UFSMNT_ONERROR_PANIC_STR
,
184 MO_DEFAULT
|MO_HASVALUE
, NULL
},
187 static mntopts_t ufs_mntopts
= {
188 sizeof (mntopts
) / sizeof (mntopt_t
),
192 static vfsdef_t vfw
= {
196 VSW_HASPROTO
|VSW_CANREMOUNT
|VSW_STATS
|VSW_CANLOFI
,
201 * Module linkage information for the kernel.
203 extern struct mod_ops mod_fsops
;
205 static struct modlfs modlfs
= {
206 &mod_fsops
, "filesystem for ufs", &vfw
209 static struct modlinkage modlinkage
= {
210 MODREV_1
, (void *)&modlfs
, NULL
214 * An attempt has been made to make this module unloadable. In order to
215 * test it, we need a system in which the root fs is NOT ufs. THIS HAS NOT
219 extern kstat_t
*ufs_inode_kstat
;
220 extern uint_t ufs_lockfs_key
;
221 extern void ufs_lockfs_tsd_destructor(void *);
222 extern uint_t bypass_snapshot_throttle_key
;
228 * Create an index into the per thread array so that any thread doing
229 * VOP will have a lockfs mark on it.
231 tsd_create(&ufs_lockfs_key
, ufs_lockfs_tsd_destructor
);
232 tsd_create(&bypass_snapshot_throttle_key
, NULL
);
233 return (mod_install(&modlinkage
));
243 _info(struct modinfo
*modinfop
)
245 return (mod_info(&modlinkage
, modinfop
));
248 extern struct vnode
*makespecvp(dev_t dev
, vtype_t type
);
250 extern kmutex_t ufs_scan_lock
;
252 static int mountfs(struct vfs
*, enum whymountroot
, struct vnode
*, char *,
253 struct cred
*, int, void *, int);
257 ufs_mount(struct vfs
*vfsp
, struct vnode
*mvp
, struct mounta
*uap
,
261 char *data
= uap
->dataptr
;
262 int datalen
= uap
->datalen
;
264 struct vnode
*lvp
= NULL
;
265 struct vnode
*svp
= NULL
;
268 enum whymountroot why
= ROOT_INIT
;
269 struct ufs_args args
;
271 int fromspace
= (uap
->flags
& MS_SYSSPACE
) ?
272 UIO_SYSSPACE
: UIO_USERSPACE
;
274 if ((error
= secpolicy_fs_mount(cr
, mvp
, vfsp
)) != 0)
277 if (mvp
->v_type
!= VDIR
)
280 mutex_enter(&mvp
->v_lock
);
281 if ((uap
->flags
& MS_REMOUNT
) == 0 &&
282 (uap
->flags
& MS_OVERLAY
) == 0 &&
283 (mvp
->v_count
!= 1 || (mvp
->v_flag
& VROOT
))) {
284 mutex_exit(&mvp
->v_lock
);
287 mutex_exit(&mvp
->v_lock
);
292 bzero(&args
, sizeof (args
));
293 if ((uap
->flags
& MS_DATA
) && data
!= NULL
&& datalen
!= 0) {
296 if (datalen
> sizeof (args
))
298 if (uap
->flags
& MS_SYSSPACE
)
299 bcopy(data
, &args
, datalen
);
301 copy_result
= copyin(data
, &args
, datalen
);
304 datalen
= sizeof (struct ufs_args
);
309 if ((vfsp
->vfs_flag
& VFS_RDONLY
) != 0 ||
310 (uap
->flags
& MS_RDONLY
) != 0) {
314 oflag
= FREAD
| FWRITE
;
315 aflag
= VREAD
| VWRITE
;
319 * Read in the mount point pathname
320 * (so we can record the directory the file system was last mounted on).
322 if (error
= pn_get(uap
->dir
, fromspace
, &dpn
))
326 * Resolve path name of special file being mounted.
328 if (error
= lookupname(uap
->spec
, fromspace
, FOLLOW
, NULL
, &svp
)) {
333 error
= vfs_get_lofi(vfsp
, &lvp
);
339 } else if (error
== 0) {
342 if (getmajor(dev
) >= devcnt
) {
349 if (svp
->v_type
!= VBLK
) {
355 if (getmajor(dev
) >= devcnt
) {
360 if ((error
= secpolicy_spec_open(cr
, svp
, oflag
)) != 0) {
367 if (uap
->flags
& MS_REMOUNT
)
371 * Open device/file mounted on. We need this to check whether
372 * the caller has sufficient rights to access the resource in
373 * question. When bio is fixed for vnodes this can all be vnode
376 if ((error
= fop_access(svp
, aflag
, 0, cr
, NULL
)) != 0)
380 * Ensure that this device isn't already mounted or in progress on a
381 * mount unless this is a REMOUNT request or we are told to suppress
382 * mount checks. Global mounts require special handling.
384 if ((uap
->flags
& MS_NOCHECK
) == 0) {
385 if (vfs_devmounting(dev
, vfsp
)) {
389 if (vfs_devismounted(dev
)) {
390 if ((uap
->flags
& MS_REMOUNT
) == 0) {
398 * If the device is a tape, mount it read only
400 if (devopsp
[getmajor(dev
)]->devo_cb_ops
->cb_flag
& D_TAPE
) {
401 vfsp
->vfs_flag
|= VFS_RDONLY
;
402 vfs_setmntopt(vfsp
, MNTOPT_RO
, NULL
, 0);
404 if (uap
->flags
& MS_RDONLY
)
405 vfsp
->vfs_flag
|= VFS_RDONLY
;
408 * Mount the filesystem, free the device vnode on error.
410 error
= mountfs(vfsp
, why
, lvp
!= NULL
? lvp
: svp
,
411 dpn
.pn_path
, cr
, 0, &args
, datalen
);
414 vfs_set_feature(vfsp
, VFSFT_SYSATTR_VIEWS
);
417 * If lofi, drop our reference to the original file.
436 * Mount root file system.
437 * "why" is ROOT_INIT on initial call ROOT_REMOUNT if called to
438 * remount the root file system, and ROOT_UNMOUNT if called to
439 * unmount the root (e.g., as part of a system shutdown).
441 * XXX - this may be partially machine-dependent; it, along with the VFS_SWAPVP
442 * operation, goes along with auto-configuration. A mechanism should be
443 * provided by which machine-INdependent code in the kernel can say "get me the
444 * right root file system" and "get me the right initial swap area", and have
445 * that done in what may well be a machine-dependent fashion.
446 * Unfortunately, it is also file-system-type dependent (NFS gets it via
447 * bootparams calls, UFS gets it from various and sundry machine-dependent
448 * mechanisms, as SPECFS does for swap).
451 ufs_mountroot(struct vfs
*vfsp
, enum whymountroot why
)
455 static int ufsrootdone
= 0;
458 struct vnode
*devvp
= 0;
463 if (why
== ROOT_INIT
) {
466 rootdev
= getrootdev();
467 if (rootdev
== (dev_t
)NODEV
)
469 vfsp
->vfs_dev
= rootdev
;
470 vfsp
->vfs_flag
|= VFS_RDONLY
;
471 } else if (why
== ROOT_REMOUNT
) {
472 vp
= ((struct ufsvfs
*)vfsp
->vfs_data
)->vfs_devvp
;
473 (void) dnlc_purge_vfsp(vfsp
, 0);
474 vp
= common_specvp(vp
);
475 (void) fop_putpage(vp
, 0, (size_t)0, B_INVAL
,
477 (void) bfinval(vfsp
->vfs_dev
, 0);
480 ovflags
= vfsp
->vfs_flag
;
481 vfsp
->vfs_flag
&= ~VFS_RDONLY
;
482 vfsp
->vfs_flag
|= VFS_REMOUNT
;
483 rootdev
= vfsp
->vfs_dev
;
484 } else if (why
== ROOT_UNMOUNT
) {
485 if (vfs_lock(vfsp
) == 0) {
486 (void) ufs_flush(vfsp
);
488 * Mark the log as fully rolled
490 ufsvfsp
= (ufsvfs_t
*)vfsp
->vfs_data
;
491 fsp
= ufsvfsp
->vfs_fs
;
492 if (TRANS_ISTRANS(ufsvfsp
) &&
493 !TRANS_ISERROR(ufsvfsp
) &&
494 (fsp
->fs_rolled
== FS_NEED_ROLL
)) {
495 ml_unit_t
*ul
= ufsvfsp
->vfs_log
;
497 error
= ufs_putsummaryinfo(ul
->un_dev
,
500 fsp
->fs_rolled
= FS_ALL_ROLLED
;
501 UFS_BWRITE2(NULL
, ufsvfsp
->vfs_bufp
);
509 vp
= ((struct ufsvfs
*)vfsp
->vfs_data
)->vfs_devvp
;
510 (void) fop_close(vp
, FREAD
|FWRITE
, 1,
514 error
= vfs_lock(vfsp
);
518 devvp
= makespecvp(rootdev
, VBLK
);
520 /* If RO media, don't call clkset() (see below) */
522 if (why
== ROOT_INIT
) {
523 error
= fop_open(&devvp
, FREAD
|FWRITE
, CRED(), NULL
);
525 (void) fop_close(devvp
, FREAD
|FWRITE
, 1,
532 error
= mountfs(vfsp
, why
, devvp
, "/", CRED(), 1, NULL
, 0);
534 * XXX - assumes root device is not indirect, because we don't set
535 * rootvp. Is rootvp used for anything? If so, make another arg
540 if (why
== ROOT_REMOUNT
)
541 vfsp
->vfs_flag
= ovflags
;
549 if (why
== ROOT_INIT
)
551 (vfsp
->vfs_flag
& VFS_RDONLY
) ? MS_RDONLY
: 0);
554 clkset(doclkset
? fsp
->fs_time
: -1);
555 ufsvfsp
= (ufsvfs_t
*)vfsp
->vfs_data
;
556 if (ufsvfsp
->vfs_log
) {
557 vfs_setmntopt(vfsp
, MNTOPT_LOGGING
, NULL
, 0);
563 remountfs(struct vfs
*vfsp
, dev_t dev
, void *raw_argsp
, int args_len
)
565 struct ufsvfs
*ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
566 struct ulockfs
*ulp
= &ufsvfsp
->vfs_ulockfs
;
567 struct buf
*bp
= ufsvfsp
->vfs_bufp
;
568 struct fs
*fsp
= (struct fs
*)bp
->b_un
.b_addr
;
574 if (args_len
== sizeof (struct ufs_args
) && raw_argsp
)
575 flags
= ((struct ufs_args
*)raw_argsp
)->flags
;
577 /* cannot remount to RDONLY */
578 if (vfsp
->vfs_flag
& VFS_RDONLY
)
581 /* whoops, wrong dev */
582 if (vfsp
->vfs_dev
!= dev
)
586 * synchronize w/ufs ioctls
588 mutex_enter(&ulp
->ul_lock
);
589 atomic_inc_ulong(&ufs_quiesce_pend
);
594 ufsvfsp
->vfs_nointr
= flags
& UFSMNT_NOINTR
;
595 ufsvfsp
->vfs_syncdir
= flags
& UFSMNT_SYNCDIR
;
596 ufsvfsp
->vfs_nosetsec
= flags
& UFSMNT_NOSETSEC
;
597 ufsvfsp
->vfs_noatime
= flags
& UFSMNT_NOATIME
;
598 if ((flags
& UFSMNT_NODFRATIME
) || ufsvfsp
->vfs_noatime
)
599 ufsvfsp
->vfs_dfritime
&= ~UFS_DFRATIME
;
600 else /* dfratime, default behavior */
601 ufsvfsp
->vfs_dfritime
|= UFS_DFRATIME
;
602 if (flags
& UFSMNT_FORCEDIRECTIO
)
603 ufsvfsp
->vfs_forcedirectio
= 1;
604 else /* default is no direct I/O */
605 ufsvfsp
->vfs_forcedirectio
= 0;
606 ufsvfsp
->vfs_iotstamp
= ddi_get_lbolt();
609 * set largefiles flag in ufsvfs equal to the
610 * value passed in by the mount command. If
611 * it is "nolargefiles", and the flag is set
612 * in the superblock, the mount fails.
614 if (!(flags
& UFSMNT_LARGEFILES
)) { /* "nolargefiles" */
615 if (fsp
->fs_flags
& FSLARGEFILES
) {
619 ufsvfsp
->vfs_lfflags
&= ~UFS_LARGEFILES
;
620 } else /* "largefiles" */
621 ufsvfsp
->vfs_lfflags
|= UFS_LARGEFILES
;
623 * read/write to read/write; all done
625 if (fsp
->fs_ronly
== 0)
629 * fix-on-panic assumes RO->RW remount implies system-critical fs
630 * if it is shortly after boot; so, don't attempt to lock and fix
631 * (unless the user explicitly asked for another action on error)
632 * XXX UFSMNT_ONERROR_RDONLY rather than UFSMNT_ONERROR_PANIC
634 #define BOOT_TIME_LIMIT (180*hz)
635 if (!(flags
& UFSMNT_ONERROR_FLGMASK
) &&
636 ddi_get_lbolt() < BOOT_TIME_LIMIT
) {
637 cmn_err(CE_WARN
, "%s is required to be mounted onerror=%s",
638 ufsvfsp
->vfs_fs
->fs_fsmnt
, UFSMNT_ONERROR_PANIC_STR
);
639 flags
|= UFSMNT_ONERROR_PANIC
;
642 if ((error
= ufsfx_mount(ufsvfsp
, flags
)) != 0)
646 * quiesce the file system
648 error
= ufs_quiesce(ulp
);
652 tpt
= UFS_BREAD(ufsvfsp
, ufsvfsp
->vfs_dev
, SBLOCK
, SBSIZE
);
653 if (tpt
->b_flags
& B_ERROR
) {
657 fspt
= (struct fs
*)tpt
->b_un
.b_addr
;
658 if (((fspt
->fs_magic
!= FS_MAGIC
) &&
659 (fspt
->fs_magic
!= MTB_UFS_MAGIC
)) ||
660 (fspt
->fs_magic
== FS_MAGIC
&&
661 (fspt
->fs_version
!= UFS_EFISTYLE4NONEFI_VERSION_2
&&
662 fspt
->fs_version
!= UFS_VERSION_MIN
)) ||
663 (fspt
->fs_magic
== MTB_UFS_MAGIC
&&
664 (fspt
->fs_version
> MTB_UFS_VERSION_1
||
665 fspt
->fs_version
< MTB_UFS_VERSION_MIN
)) ||
666 fspt
->fs_bsize
> MAXBSIZE
|| fspt
->fs_frag
> MAXFRAG
||
667 fspt
->fs_bsize
< sizeof (struct fs
) || fspt
->fs_bsize
< PAGESIZE
) {
668 tpt
->b_flags
|= B_STALE
| B_AGE
;
673 if (ufsvfsp
->vfs_log
&& (ufsvfsp
->vfs_log
->un_flags
& LDL_NOROLL
)) {
674 ufsvfsp
->vfs_log
->un_flags
&= ~LDL_NOROLL
;
675 logmap_start_roll(ufsvfsp
->vfs_log
);
678 if (TRANS_ISERROR(ufsvfsp
))
680 TRANS_DOMATAMAP(ufsvfsp
);
682 if ((fspt
->fs_state
+ fspt
->fs_time
== FSOKAY
) &&
683 fspt
->fs_clean
== FSLOG
&& !TRANS_ISTRANS(ufsvfsp
)) {
684 ufsvfsp
->vfs_log
= NULL
;
685 ufsvfsp
->vfs_domatamap
= 0;
690 if (fspt
->fs_state
+ fspt
->fs_time
== FSOKAY
&&
691 (fspt
->fs_clean
== FSCLEAN
||
692 fspt
->fs_clean
== FSSTABLE
||
693 fspt
->fs_clean
== FSLOG
)) {
696 * Ensure that ufs_getsummaryinfo doesn't reconstruct
699 error
= ufs_getsummaryinfo(vfsp
->vfs_dev
, ufsvfsp
, fspt
);
703 /* preserve mount name */
704 (void) strncpy(fspt
->fs_fsmnt
, fsp
->fs_fsmnt
, MAXMNTLEN
);
705 /* free the old cg space */
706 kmem_free(fsp
->fs_u
.fs_csp
, fsp
->fs_cssize
);
707 /* switch in the new superblock */
708 fspt
->fs_rolled
= FS_NEED_ROLL
;
709 bcopy(tpt
->b_un
.b_addr
, bp
->b_un
.b_addr
, fspt
->fs_sbsize
);
711 fsp
->fs_clean
= FSSTABLE
;
712 } /* superblock updated in memory */
713 tpt
->b_flags
|= B_STALE
| B_AGE
;
717 if (fsp
->fs_clean
!= FSSTABLE
) {
723 if (TRANS_ISTRANS(ufsvfsp
)) {
724 fsp
->fs_clean
= FSLOG
;
725 ufsvfsp
->vfs_dio
= 0;
727 if (ufsvfsp
->vfs_dio
)
728 fsp
->fs_clean
= FSSUSPEND
;
730 TRANS_MATA_MOUNT(ufsvfsp
);
735 atomic_dec_ulong(&ufs_quiesce_pend
);
736 cv_broadcast(&ulp
->ul_cv
);
737 mutex_exit(&ulp
->ul_lock
);
739 if (TRANS_ISTRANS(ufsvfsp
)) {
742 * start the delete thread
744 ufs_thread_start(&ufsvfsp
->vfs_delete
, ufs_thread_delete
, vfsp
);
747 * start the reclaim thread
749 if (fsp
->fs_reclaim
& (FS_RECLAIM
|FS_RECLAIMING
)) {
750 fsp
->fs_reclaim
&= ~FS_RECLAIM
;
751 fsp
->fs_reclaim
|= FS_RECLAIMING
;
752 ufs_thread_start(&ufsvfsp
->vfs_reclaim
,
753 ufs_thread_reclaim
, vfsp
);
757 TRANS_SBWRITE(ufsvfsp
, TOP_MOUNT
);
764 atomic_dec_ulong(&ufs_quiesce_pend
);
765 cv_broadcast(&ulp
->ul_cv
);
766 mutex_exit(&ulp
->ul_lock
);
771 * If the device maxtransfer size is not available, we use ufs_maxmaxphys
772 * along with the system value for maxphys to determine the value for
775 int ufs_maxmaxphys
= (1024 * 1024);
777 #include <sys/ddi.h> /* for delay(9f) */
779 int ufs_mount_error_delay
= 20; /* default to 20ms */
780 int ufs_mount_timeout
= 60000; /* default to 1 minute */
783 mountfs(struct vfs
*vfsp
, enum whymountroot why
, struct vnode
*devvp
,
784 char *path
, cred_t
*cr
, int isroot
, void *raw_argsp
, int args_len
)
786 dev_t dev
= devvp
->v_rdev
;
788 struct ufsvfs
*ufsvfsp
= 0;
797 struct vnode
*rvp
= NULL
;
804 if (args_len
== sizeof (struct ufs_args
) && raw_argsp
)
805 flags
= ((struct ufs_args
*)raw_argsp
)->flags
;
807 ASSERT(vfs_lock_held(vfsp
));
809 if (why
== ROOT_INIT
) {
811 * Open block device mounted on.
812 * When bio is fixed for vnodes this can all be vnode
815 error
= fop_open(&devvp
,
816 (vfsp
->vfs_flag
& VFS_RDONLY
) ? FREAD
: FREAD
|FWRITE
,
823 * Refuse to go any further if this
824 * device is being used for swapping.
826 if (IS_SWAPVP(devvp
)) {
833 * check for dev already mounted on
835 if (vfsp
->vfs_flag
& VFS_REMOUNT
) {
836 error
= remountfs(vfsp
, dev
, raw_argsp
, args_len
);
845 * Flush back any dirty pages on the block device to
846 * try and keep the buffer cache in sync with the page
847 * cache if someone is trying to use block devices when
848 * they really should be using the raw device.
850 (void) fop_putpage(common_specvp(devvp
), 0,
851 (size_t)0, B_INVAL
, cr
, NULL
);
856 ufsvfsp
= kmem_zalloc(sizeof (struct ufsvfs
), KM_SLEEP
);
857 tp
= UFS_BREAD(ufsvfsp
, dev
, SBLOCK
, SBSIZE
);
858 if (tp
->b_flags
& B_ERROR
)
860 fsp
= (struct fs
*)tp
->b_un
.b_addr
;
862 if ((fsp
->fs_magic
!= FS_MAGIC
) && (fsp
->fs_magic
!= MTB_UFS_MAGIC
)) {
864 "mount: not a UFS magic number (0x%x)", fsp
->fs_magic
);
869 if ((fsp
->fs_magic
== FS_MAGIC
) &&
870 (fsp
->fs_version
!= UFS_EFISTYLE4NONEFI_VERSION_2
&&
871 fsp
->fs_version
!= UFS_VERSION_MIN
)) {
873 "mount: unrecognized version of UFS on-disk format: %d",
879 if ((fsp
->fs_magic
== MTB_UFS_MAGIC
) &&
880 (fsp
->fs_version
> MTB_UFS_VERSION_1
||
881 fsp
->fs_version
< MTB_UFS_VERSION_MIN
)) {
883 "mount: unrecognized version of UFS on-disk format: %d",
890 if (fsp
->fs_magic
== MTB_UFS_MAGIC
) {
892 * Find the size of the device in sectors. If the
893 * the size in sectors is greater than INT_MAX, it's
894 * a multi-terabyte file system, which can't be
895 * mounted by a 32-bit kernel. We can't use the
896 * fsbtodb() macro in the next line because the macro
897 * casts the intermediate values to daddr_t, which is
898 * a 32-bit quantity in a 32-bit kernel. Here we
899 * really do need the intermediate values to be held
900 * in 64-bit quantities because we're checking for
901 * overflow of a 32-bit field.
903 if ((((diskaddr_t
)(fsp
->fs_size
)) << fsp
->fs_fsbtodb
)
906 "mount: multi-terabyte UFS cannot be"
907 " mounted by a 32-bit kernel");
915 if (fsp
->fs_bsize
> MAXBSIZE
|| fsp
->fs_frag
> MAXFRAG
||
916 fsp
->fs_bsize
< sizeof (struct fs
) || fsp
->fs_bsize
< PAGESIZE
) {
917 error
= EINVAL
; /* also needs translation */
922 * Allocate VFS private data.
924 vfsp
->vfs_bcount
= 0;
925 vfsp
->vfs_data
= (caddr_t
)ufsvfsp
;
926 vfsp
->vfs_fstype
= ufsfstype
;
928 vfsp
->vfs_flag
|= VFS_NOTRUNC
;
929 vfs_make_fsid(&vfsp
->vfs_fsid
, dev
, ufsfstype
);
930 ufsvfsp
->vfs_devvp
= devvp
;
933 * Cross-link with vfs and add to instance list.
935 ufsvfsp
->vfs_vfs
= vfsp
;
936 ufs_vfs_add(ufsvfsp
);
938 ufsvfsp
->vfs_dev
= dev
;
939 ufsvfsp
->vfs_bufp
= tp
;
941 ufsvfsp
->vfs_dirsize
= INODESIZE
+ (4 * ALLOCSIZE
) + fsp
->fs_fsize
;
942 ufsvfsp
->vfs_minfrags
=
943 (int)((int64_t)fsp
->fs_dsize
* fsp
->fs_minfree
/ 100);
945 * if mount allows largefiles, indicate so in ufsvfs
947 if (flags
& UFSMNT_LARGEFILES
)
948 ufsvfsp
->vfs_lfflags
|= UFS_LARGEFILES
;
952 ufs_delete_init(ufsvfsp
, 1);
953 ufs_thread_init(&ufsvfsp
->vfs_reclaim
, 0);
956 * Chicken and egg problem. The superblock may have deltas
957 * in the log. So after the log is scanned we reread the
958 * superblock. We guarantee that the fields needed to
959 * scan the log will not be in the log.
961 if (fsp
->fs_logbno
&& fsp
->fs_clean
== FSLOG
&&
962 (fsp
->fs_state
+ fsp
->fs_time
== FSOKAY
)) {
963 error
= lufs_snarf(ufsvfsp
, fsp
, (vfsp
->vfs_flag
& VFS_RDONLY
));
966 * Allow a ro mount to continue even if the
967 * log cannot be processed - yet.
969 if (!(vfsp
->vfs_flag
& VFS_RDONLY
)) {
970 cmn_err(CE_WARN
, "Error accessing ufs "
971 "log for %s; Please run fsck(8)", path
);
975 tp
->b_flags
|= (B_AGE
| B_STALE
);
977 tp
= UFS_BREAD(ufsvfsp
, dev
, SBLOCK
, SBSIZE
);
978 fsp
= (struct fs
*)tp
->b_un
.b_addr
;
979 ufsvfsp
->vfs_bufp
= tp
;
980 if (tp
->b_flags
& B_ERROR
)
985 * Set logging mounted flag used by lockfs
987 ufsvfsp
->vfs_validfs
= UT_MOUNTED
;
990 * Copy the super block into a buffer in its native size.
991 * Use ngeteblk to allocate the buffer
993 bp
= ngeteblk(fsp
->fs_bsize
);
994 ufsvfsp
->vfs_bufp
= bp
;
996 bp
->b_dev
= cmpdev(dev
);
997 bp
->b_blkno
= SBLOCK
;
998 bp
->b_bcount
= fsp
->fs_sbsize
;
999 bcopy(tp
->b_un
.b_addr
, bp
->b_un
.b_addr
, fsp
->fs_sbsize
);
1000 tp
->b_flags
|= B_STALE
| B_AGE
;
1004 fsp
= (struct fs
*)bp
->b_un
.b_addr
;
1006 * Mount fails if superblock flag indicates presence of large
1007 * files and filesystem is attempted to be mounted 'nolargefiles'.
1008 * The exception is for a read only mount of root, which we
1009 * always want to succeed, so fsck can fix potential problems.
1010 * The assumption is that we will remount root at some point,
1011 * and the remount will enforce the mount option.
1013 if (!(isroot
& (vfsp
->vfs_flag
& VFS_RDONLY
)) &&
1014 (fsp
->fs_flags
& FSLARGEFILES
) &&
1015 !(flags
& UFSMNT_LARGEFILES
)) {
1020 if (vfsp
->vfs_flag
& VFS_RDONLY
) {
1023 if (((fsp
->fs_state
+ fsp
->fs_time
) == FSOKAY
) &&
1024 ((fsp
->fs_clean
== FSCLEAN
) ||
1025 (fsp
->fs_clean
== FSSTABLE
) ||
1026 (fsp
->fs_clean
== FSLOG
))) {
1028 if (fsp
->fs_clean
== FSLOG
) {
1029 if (fsp
->fs_rolled
== FS_ALL_ROLLED
) {
1036 fsp
->fs_clean
= FSSTABLE
;
1038 fsp
->fs_clean
= FSBAD
;
1045 TRANS_DOMATAMAP(ufsvfsp
);
1047 if ((TRANS_ISERROR(ufsvfsp
)) ||
1048 (((fsp
->fs_state
+ fsp
->fs_time
) == FSOKAY
) &&
1049 fsp
->fs_clean
== FSLOG
&& !TRANS_ISTRANS(ufsvfsp
))) {
1050 ufsvfsp
->vfs_log
= NULL
;
1051 ufsvfsp
->vfs_domatamap
= 0;
1056 if (((fsp
->fs_state
+ fsp
->fs_time
) == FSOKAY
) &&
1057 (fsp
->fs_clean
== FSCLEAN
||
1058 fsp
->fs_clean
== FSSTABLE
||
1059 fsp
->fs_clean
== FSLOG
))
1060 fsp
->fs_clean
= FSSTABLE
;
1064 * allow root partition to be mounted even
1065 * when fs_state is not ok
1066 * will be fixed later by a remount root
1068 fsp
->fs_clean
= FSBAD
;
1069 ufsvfsp
->vfs_log
= NULL
;
1070 ufsvfsp
->vfs_domatamap
= 0;
1077 if (fsp
->fs_clean
== FSSTABLE
&& TRANS_ISTRANS(ufsvfsp
))
1078 fsp
->fs_clean
= FSLOG
;
1080 TRANS_MATA_MOUNT(ufsvfsp
);
1083 vfsp
->vfs_bsize
= fsp
->fs_bsize
;
1086 * Read in summary info
1088 if (error
= ufs_getsummaryinfo(dev
, ufsvfsp
, fsp
))
1092 * lastwhinetime is set to zero rather than lbolt, so that after
1093 * mounting if the filesystem is found to be full, then immediately the
1094 * "file system message" will be logged.
1096 ufsvfsp
->vfs_lastwhinetime
= 0L;
1099 mutex_init(&ufsvfsp
->vfs_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1100 (void) copystr(path
, fsp
->fs_fsmnt
, sizeof (fsp
->fs_fsmnt
) - 1, &len
);
1101 bzero(fsp
->fs_fsmnt
+ len
, sizeof (fsp
->fs_fsmnt
) - len
);
1104 * Sanity checks for old file systems
1106 if (fsp
->fs_postblformat
== FS_42POSTBLFMT
)
1107 ufsvfsp
->vfs_nrpos
= 8;
1109 ufsvfsp
->vfs_nrpos
= fsp
->fs_nrpos
;
1112 * Initialize lockfs structure to support file system locking
1114 bzero(&ufsvfsp
->vfs_ulockfs
.ul_lockfs
,
1115 sizeof (struct lockfs
));
1116 ufsvfsp
->vfs_ulockfs
.ul_fs_lock
= ULOCKFS_ULOCK
;
1117 mutex_init(&ufsvfsp
->vfs_ulockfs
.ul_lock
, NULL
,
1118 MUTEX_DEFAULT
, NULL
);
1119 cv_init(&ufsvfsp
->vfs_ulockfs
.ul_cv
, NULL
, CV_DEFAULT
, NULL
);
1122 * We don't need to grab vfs_dqrwlock for this ufs_iget() call.
1123 * We are in the process of mounting the file system so there
1124 * is no need to grab the quota lock. If a quota applies to the
1125 * root inode, then it will be updated when quotas are enabled.
1127 * However, we have an ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock))
1128 * in getinoquota() that we want to keep so grab it anyway.
1130 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
1132 error
= ufs_iget_alloced(vfsp
, UFSROOTINO
, &rip
, cr
);
1134 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
1140 * make sure root inode is a directory. Returning ENOTDIR might
1141 * be confused with the mount point not being a directory, so
1142 * we use EIO instead.
1144 if ((rip
->i_mode
& IFMT
) != IFDIR
) {
1146 * Mark this inode as subject for cleanup
1147 * to avoid stray inodes in the cache.
1155 mutex_enter(&rvp
->v_lock
);
1156 rvp
->v_flag
|= VROOT
;
1157 mutex_exit(&rvp
->v_lock
);
1158 ufsvfsp
->vfs_root
= rvp
;
1159 /* The buffer for the root inode does not contain a valid b_vp */
1160 (void) bfinval(dev
, 0);
1163 ufsvfsp
->vfs_nosetsec
= flags
& UFSMNT_NOSETSEC
;
1164 ufsvfsp
->vfs_nointr
= flags
& UFSMNT_NOINTR
;
1165 ufsvfsp
->vfs_syncdir
= flags
& UFSMNT_SYNCDIR
;
1166 ufsvfsp
->vfs_noatime
= flags
& UFSMNT_NOATIME
;
1167 if ((flags
& UFSMNT_NODFRATIME
) || ufsvfsp
->vfs_noatime
)
1168 ufsvfsp
->vfs_dfritime
&= ~UFS_DFRATIME
;
1169 else /* dfratime, default behavior */
1170 ufsvfsp
->vfs_dfritime
|= UFS_DFRATIME
;
1171 if (flags
& UFSMNT_FORCEDIRECTIO
)
1172 ufsvfsp
->vfs_forcedirectio
= 1;
1173 else if (flags
& UFSMNT_NOFORCEDIRECTIO
)
1174 ufsvfsp
->vfs_forcedirectio
= 0;
1175 ufsvfsp
->vfs_iotstamp
= ddi_get_lbolt();
1177 ufsvfsp
->vfs_nindiroffset
= fsp
->fs_nindir
- 1;
1178 ufsvfsp
->vfs_nindirshift
= highbit(ufsvfsp
->vfs_nindiroffset
);
1179 ufsvfsp
->vfs_ioclustsz
= fsp
->fs_bsize
* fsp
->fs_maxcontig
;
1181 if (cdev_ioctl(dev
, DKIOCINFO
, (intptr_t)&ci
,
1182 FKIOCTL
|FNATIVE
|FREAD
, CRED(), &status
) == 0) {
1183 ufsvfsp
->vfs_iotransz
= ci
.dki_maxtransfer
* DEV_BSIZE
;
1185 ufsvfsp
->vfs_iotransz
= MIN(maxphys
, ufs_maxmaxphys
);
1188 if (ufsvfsp
->vfs_iotransz
<= 0) {
1189 ufsvfsp
->vfs_iotransz
= MIN(maxphys
, ufs_maxmaxphys
);
1193 * When logging, used to reserve log space for writes and truncs
1195 ufsvfsp
->vfs_avgbfree
= fsp
->fs_cstotal
.cs_nbfree
/ fsp
->fs_ncg
;
1198 * Determine whether to log cylinder group summary info.
1200 ufsvfsp
->vfs_nolog_si
= (fsp
->fs_ncg
< ufs_ncg_log
);
1202 if (TRANS_ISTRANS(ufsvfsp
)) {
1204 * start the delete thread
1206 ufs_thread_start(&ufsvfsp
->vfs_delete
, ufs_thread_delete
, vfsp
);
1209 * start reclaim thread if the filesystem was not mounted
1212 if (!fsp
->fs_ronly
&& (fsp
->fs_reclaim
&
1213 (FS_RECLAIM
|FS_RECLAIMING
))) {
1214 fsp
->fs_reclaim
&= ~FS_RECLAIM
;
1215 fsp
->fs_reclaim
|= FS_RECLAIMING
;
1216 ufs_thread_start(&ufsvfsp
->vfs_reclaim
,
1217 ufs_thread_reclaim
, vfsp
);
1220 /* Mark the fs as unrolled */
1221 fsp
->fs_rolled
= FS_NEED_ROLL
;
1222 } else if (!fsp
->fs_ronly
&& (fsp
->fs_reclaim
&
1223 (FS_RECLAIM
|FS_RECLAIMING
))) {
1225 * If a file system that is mounted nologging, after
1226 * having previously been mounted logging, becomes
1227 * unmounted whilst the reclaim thread is in the throes
1228 * of reclaiming open/deleted inodes, a subsequent mount
1229 * of such a file system with logging disabled could lead
1230 * to inodes becoming lost. So, start reclaim now, even
1231 * though logging was disabled for the previous mount, to
1234 fsp
->fs_reclaim
&= ~FS_RECLAIM
;
1235 fsp
->fs_reclaim
|= FS_RECLAIMING
;
1236 ufs_thread_start(&ufsvfsp
->vfs_reclaim
,
1237 ufs_thread_reclaim
, vfsp
);
1240 if (!fsp
->fs_ronly
) {
1241 TRANS_SBWRITE(ufsvfsp
, TOP_MOUNT
);
1242 if (error
= geterror(ufsvfsp
->vfs_bufp
))
1246 /* fix-on-panic initialization */
1247 if (isroot
&& !(flags
& UFSMNT_ONERROR_FLGMASK
))
1248 flags
|= UFSMNT_ONERROR_PANIC
; /* XXX ..._RDONLY */
1250 if ((error
= ufsfx_mount(ufsvfsp
, flags
)) != 0)
1253 if (why
== ROOT_INIT
&& isroot
)
1261 /* the following sequence is similar to ufs_unmount() */
1264 * There's a problem that ufs_iget() puts inodes into
1265 * the inode cache before it returns them. If someone
1266 * traverses that cache and gets a reference to our
1267 * inode, there's a chance they'll still be using it
1268 * after we've destroyed it. This is a hard race to
1269 * hit, but it's happened (putting in a medium delay
1270 * here, and a large delay in ufs_scan_inodes() for
1271 * inodes on the device we're bailing out on, makes
1272 * the race easy to demonstrate). The symptom is some
1273 * other part of UFS faulting on bad inode contents,
1274 * or when grabbing one of the locks inside the inode,
1275 * etc. The usual victim is ufs_scan_inodes() or
1276 * someone called by it.
1280 * First, isolate it so that no new references can be
1281 * gotten via the inode cache.
1283 ihm
= &ih_lock
[INOHASH(UFSROOTINO
)];
1289 * Now wait for all outstanding references except our
1290 * own to drain. This could, in theory, take forever,
1291 * so don't wait *too* long. If we time out, mark
1292 * it stale and leak it, so we don't hit the problem
1295 * Note that v_count is an int, which means we can read
1296 * it in one operation. Thus, there's no need to lock
1300 while ((rvp
->v_count
> 1) && (elapsed
< ufs_mount_timeout
)) {
1301 ddi_msleep(ufs_mount_error_delay
);
1302 elapsed
+= ufs_mount_error_delay
;
1305 if (rvp
->v_count
> 1) {
1306 mutex_enter(&rip
->i_tlock
);
1307 rip
->i_flag
|= ISTALE
;
1308 mutex_exit(&rip
->i_tlock
);
1310 "Timed out while cleaning up after "
1311 "failed mount of %s", path
);
1315 * Now we're the only one with a handle left, so tear
1316 * it down the rest of the way.
1318 if (ufs_rmidle(rip
))
1321 rip
->i_ufsvfs
= NULL
;
1328 TRANS_MATA_UMOUNT(ufsvfsp
);
1331 ufs_vfs_remove(ufsvfsp
);
1332 ufs_thread_exit(&ufsvfsp
->vfs_delete
);
1333 ufs_thread_exit(&ufsvfsp
->vfs_reclaim
);
1334 mutex_destroy(&ufsvfsp
->vfs_lock
);
1335 if (ufsvfsp
->vfs_log
) {
1336 lufs_unsnarf(ufsvfsp
);
1338 kmem_free(ufsvfsp
, sizeof (struct ufsvfs
));
1341 bp
->b_flags
|= (B_STALE
|B_AGE
);
1345 tp
->b_flags
|= (B_STALE
|B_AGE
);
1349 (void) fop_close(devvp
, (vfsp
->vfs_flag
& VFS_RDONLY
) ?
1350 FREAD
: FREAD
|FWRITE
, 1, 0, cr
, NULL
);
1352 (void) bfinval(dev
, 1);
1361 ufs_unmount(struct vfs
*vfsp
, int fflag
, struct cred
*cr
)
1363 dev_t dev
= vfsp
->vfs_dev
;
1364 struct ufsvfs
*ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
1365 struct fs
*fs
= ufsvfsp
->vfs_fs
;
1366 struct ulockfs
*ulp
= &ufsvfsp
->vfs_ulockfs
;
1367 struct vnode
*bvp
, *vp
;
1369 struct inode
*ip
, *inext
, *rip
;
1372 struct lockfs lockfs
;
1373 int poll_events
= POLLPRI
;
1374 extern struct pollhead ufs_pollhd
;
1375 refstr_t
*mountpoint
;
1377 ASSERT(vfs_lock_held(vfsp
));
1379 if (secpolicy_fs_unmount(cr
, vfsp
) != 0)
1382 * Forced unmount is now supported through the
1385 if (fflag
& MS_FORCE
) {
1387 * Mark the filesystem as being unmounted now in
1388 * case of a forcible umount before we take any
1389 * locks inside UFS to prevent racing with a VFS_VGET()
1390 * request. Throw these VFS_VGET() requests away for
1391 * the duration of the forcible umount so they won't
1392 * use stale or even freed data later on when we're done.
1393 * It may happen that the VFS has had a additional hold
1394 * placed on it by someone other than UFS and thus will
1395 * not get freed immediately once we're done with the
1396 * umount by dounmount() - use VFS_UNMOUNTED to inform
1397 * users of this still-alive VFS that its corresponding
1398 * filesystem being gone so they can detect that and error
1401 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
1403 ufs_thread_suspend(&ufsvfsp
->vfs_delete
);
1404 mutex_enter(&ulp
->ul_lock
);
1406 * If file system is already hard locked,
1407 * unmount the file system, otherwise
1408 * hard lock it before unmounting.
1410 if (!ULOCKFS_IS_HLOCK(ulp
)) {
1411 atomic_inc_ulong(&ufs_quiesce_pend
);
1412 lockfs
.lf_lock
= LOCKFS_HLOCK
;
1413 lockfs
.lf_flags
= 0;
1414 lockfs
.lf_key
= ulp
->ul_lockfs
.lf_key
+ 1;
1415 lockfs
.lf_comlen
= 0;
1416 lockfs
.lf_comment
= NULL
;
1417 ufs_freeze(ulp
, &lockfs
);
1418 ULOCKFS_SET_BUSY(ulp
);
1419 LOCKFS_SET_BUSY(&ulp
->ul_lockfs
);
1420 (void) ufs_quiesce(ulp
);
1421 (void) ufs_flush(vfsp
);
1422 (void) ufs_thaw(vfsp
, ufsvfsp
, ulp
);
1423 atomic_dec_ulong(&ufs_quiesce_pend
);
1424 ULOCKFS_CLR_BUSY(ulp
);
1425 LOCKFS_CLR_BUSY(&ulp
->ul_lockfs
);
1426 poll_events
|= POLLERR
;
1427 pollwakeup(&ufs_pollhd
, poll_events
);
1429 ufs_thread_continue(&ufsvfsp
->vfs_delete
);
1430 mutex_exit(&ulp
->ul_lock
);
1433 /* let all types of writes go through */
1434 ufsvfsp
->vfs_iotstamp
= ddi_get_lbolt();
1436 /* coordinate with global hlock thread */
1437 if (TRANS_ISTRANS(ufsvfsp
) && (ufsvfsp
->vfs_validfs
== UT_HLOCKING
)) {
1439 * last possibility for a forced umount to fail hence clear
1440 * VFS_UNMOUNTED if appropriate.
1442 if (fflag
& MS_FORCE
)
1443 vfsp
->vfs_flag
&= ~VFS_UNMOUNTED
;
1447 ufsvfsp
->vfs_validfs
= UT_UNMOUNTED
;
1449 /* kill the reclaim thread */
1450 ufs_thread_exit(&ufsvfsp
->vfs_reclaim
);
1452 /* suspend the delete thread */
1453 ufs_thread_suspend(&ufsvfsp
->vfs_delete
);
1456 * drain the delete and idle queues
1458 ufs_delete_drain(vfsp
, -1, 1);
1459 ufs_idle_drain(vfsp
);
1462 * use the lockfs protocol to prevent new ops from starting
1463 * a forcible umount can not fail beyond this point as
1464 * we hard-locked the filesystem and drained all current consumers
1467 mutex_enter(&ulp
->ul_lock
);
1470 * if the file system is busy; return EBUSY
1472 if (ulp
->ul_vnops_cnt
|| ulp
->ul_falloc_cnt
|| ULOCKFS_IS_SLOCK(ulp
)) {
1478 * if this is not a forced unmount (!hard/error locked), then
1479 * get rid of every inode except the root and quota inodes
1480 * also, commit any outstanding transactions
1482 if (!ULOCKFS_IS_HLOCK(ulp
) && !ULOCKFS_IS_ELOCK(ulp
))
1483 if (error
= ufs_flush(vfsp
))
1487 * ignore inodes in the cache if fs is hard locked or error locked
1489 rip
= VTOI(ufsvfsp
->vfs_root
);
1490 if (!ULOCKFS_IS_HLOCK(ulp
) && !ULOCKFS_IS_ELOCK(ulp
)) {
1492 * Otherwise, only the quota and root inodes are in the cache.
1494 * Avoid racing with ufs_update() and ufs_sync().
1496 mutex_enter(&ufs_scan_lock
);
1498 for (i
= 0, ih
= ihead
; i
< inohsz
; i
++, ih
++) {
1499 mutex_enter(&ih_lock
[i
]);
1500 for (ip
= ih
->ih_chain
[0];
1501 ip
!= (struct inode
*)ih
;
1503 if (ip
->i_ufsvfs
!= ufsvfsp
)
1505 if (ip
== ufsvfsp
->vfs_qinod
)
1507 if (ip
== rip
&& ITOV(ip
)->v_count
== 1)
1509 mutex_exit(&ih_lock
[i
]);
1510 mutex_exit(&ufs_scan_lock
);
1514 mutex_exit(&ih_lock
[i
]);
1516 mutex_exit(&ufs_scan_lock
);
1520 * if a snapshot exists and this is a forced unmount, then delete
1521 * the snapshot. Otherwise return EBUSY. This will insure the
1522 * snapshot always belongs to a valid file system.
1524 if (ufsvfsp
->vfs_snapshot
) {
1525 if (ULOCKFS_IS_HLOCK(ulp
) || ULOCKFS_IS_ELOCK(ulp
)) {
1526 (void) fssnap_delete(&ufsvfsp
->vfs_snapshot
);
1534 * Close the quota file and invalidate anything left in the quota
1535 * cache for this file system. Pass kcred to allow all quota
1538 (void) closedq(ufsvfsp
, kcred
);
1539 invalidatedq(ufsvfsp
);
1541 * drain the delete and idle queues
1543 ufs_delete_drain(vfsp
, -1, 0);
1544 ufs_idle_drain(vfsp
);
1547 * discard the inodes for this fs (including root, shadow, and quota)
1549 for (i
= 0, ih
= ihead
; i
< inohsz
; i
++, ih
++) {
1550 mutex_enter(&ih_lock
[i
]);
1551 for (inext
= 0, ip
= ih
->ih_chain
[0];
1552 ip
!= (struct inode
*)ih
;
1555 if (ip
->i_ufsvfs
!= ufsvfsp
)
1559 * We've found the inode in the cache and as we
1560 * hold the hash mutex the inode can not
1561 * disappear from underneath us.
1562 * We also know it must have at least a vnode
1563 * reference count of 1.
1564 * We perform an additional VN_HOLD so the VN_RELE
1565 * in case we take the inode off the idle queue
1566 * can not be the last one.
1567 * It is safe to grab the writer contents lock here
1568 * to prevent a race with ufs_iinactive() putting
1569 * inodes into the idle queue while we operate on
1572 rw_enter(&ip
->i_contents
, RW_WRITER
);
1581 * rip->i_ufsvfsp is needed by bflush()
1584 ip
->i_ufsvfs
= NULL
;
1586 * Set vnode's vfsops to dummy ops, which return
1587 * EIO. This is needed to forced unmounts to work
1588 * with lofs/nfs properly.
1590 if (ULOCKFS_IS_HLOCK(ulp
) || ULOCKFS_IS_ELOCK(ulp
))
1591 vp
->v_vfsp
= &EIO_vfs
;
1596 rw_exit(&ip
->i_contents
);
1600 mutex_exit(&ih_lock
[i
]);
1602 ufs_si_cache_flush(dev
);
1605 * kill the delete thread and drain the idle queue
1607 ufs_thread_exit(&ufsvfsp
->vfs_delete
);
1608 ufs_idle_drain(vfsp
);
1610 bp
= ufsvfsp
->vfs_bufp
;
1611 bvp
= ufsvfsp
->vfs_devvp
;
1612 flag
= !fs
->fs_ronly
;
1615 if (fs
->fs_clean
!= FSBAD
) {
1616 if (fs
->fs_clean
== FSSTABLE
)
1617 fs
->fs_clean
= FSCLEAN
;
1618 fs
->fs_reclaim
&= ~FS_RECLAIM
;
1620 if (TRANS_ISTRANS(ufsvfsp
) &&
1621 !TRANS_ISERROR(ufsvfsp
) &&
1622 !ULOCKFS_IS_HLOCK(ulp
) &&
1623 (fs
->fs_rolled
== FS_NEED_ROLL
)) {
1625 * ufs_flush() above has flushed the last Moby.
1626 * This is needed to ensure the following superblock
1627 * update really is the last metadata update
1629 error
= ufs_putsummaryinfo(dev
, ufsvfsp
, fs
);
1631 fs
->fs_rolled
= FS_ALL_ROLLED
;
1634 TRANS_SBUPDATE(ufsvfsp
, vfsp
, TOP_SBUPDATE_UNMOUNT
);
1636 * push this last transaction
1638 curthread
->t_flag
|= T_DONTBLOCK
;
1639 TRANS_BEGIN_SYNC(ufsvfsp
, TOP_COMMIT_UNMOUNT
,
1640 TOP_COMMIT_SIZE
, &error
);
1642 TRANS_END_SYNC(ufsvfsp
, &error
, TOP_COMMIT_UNMOUNT
,
1644 curthread
->t_flag
&= ~T_DONTBLOCK
;
1647 TRANS_MATA_UMOUNT(ufsvfsp
);
1648 lufs_unsnarf(ufsvfsp
); /* Release the in-memory structs */
1649 ufsfx_unmount(ufsvfsp
); /* fix-on-panic bookkeeping */
1650 kmem_free(fs
->fs_u
.fs_csp
, fs
->fs_cssize
);
1652 bp
->b_flags
|= B_STALE
|B_AGE
;
1653 ufsvfsp
->vfs_bufp
= NULL
; /* don't point at freed buf */
1654 brelse(bp
); /* free the superblock buf */
1656 (void) fop_putpage(common_specvp(bvp
), 0, (size_t)0,
1658 (void) fop_close(bvp
, flag
, 1, 0, cr
, NULL
);
1660 (void) bfinval(dev
, 1);
1664 * It is now safe to NULL out the ufsvfs pointer and discard
1667 rip
->i_ufsvfs
= NULL
;
1670 /* free up lockfs comment structure, if any */
1671 if (ulp
->ul_lockfs
.lf_comlen
&& ulp
->ul_lockfs
.lf_comment
)
1672 kmem_free(ulp
->ul_lockfs
.lf_comment
, ulp
->ul_lockfs
.lf_comlen
);
1675 * Remove from instance list.
1677 ufs_vfs_remove(ufsvfsp
);
1680 * For a forcible unmount, threads may be asleep in
1681 * ufs_lockfs_begin/ufs_check_lockfs. These threads will need
1682 * the ufsvfs structure so we don't free it, yet. ufs_update
1683 * will free it up after awhile.
1685 if (ULOCKFS_IS_HLOCK(ulp
) || ULOCKFS_IS_ELOCK(ulp
)) {
1686 extern kmutex_t ufsvfs_mutex
;
1687 extern struct ufsvfs
*ufsvfslist
;
1689 mutex_enter(&ufsvfs_mutex
);
1690 ufsvfsp
->vfs_dontblock
= 1;
1691 ufsvfsp
->vfs_next
= ufsvfslist
;
1692 ufsvfslist
= ufsvfsp
;
1693 mutex_exit(&ufsvfs_mutex
);
1694 /* wakeup any suspended threads */
1695 cv_broadcast(&ulp
->ul_cv
);
1696 mutex_exit(&ulp
->ul_lock
);
1698 mutex_destroy(&ufsvfsp
->vfs_lock
);
1699 kmem_free(ufsvfsp
, sizeof (struct ufsvfs
));
1703 * Now mark the filesystem as unmounted since we're done with it.
1705 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
1709 /* open the fs to new ops */
1710 cv_broadcast(&ulp
->ul_cv
);
1711 mutex_exit(&ulp
->ul_lock
);
1713 if (TRANS_ISTRANS(ufsvfsp
)) {
1714 /* allow the delete thread to continue */
1715 ufs_thread_continue(&ufsvfsp
->vfs_delete
);
1716 /* restart the reclaim thread */
1717 ufs_thread_start(&ufsvfsp
->vfs_reclaim
, ufs_thread_reclaim
,
1719 /* coordinate with global hlock thread */
1720 ufsvfsp
->vfs_validfs
= UT_MOUNTED
;
1721 /* check for trans errors during umount */
1722 ufs_trans_onerror();
1725 * if we have a separate /usr it will never unmount
1726 * when halting. In order to not re-read all the
1727 * cylinder group summary info on mounting after
1728 * reboot the logging of summary info is re-enabled
1729 * and the super block written out.
1731 mountpoint
= vfs_getmntpoint(vfsp
);
1732 if ((fs
->fs_si
== FS_SI_OK
) &&
1733 (strcmp("/usr", refstr_value(mountpoint
)) == 0)) {
1734 ufsvfsp
->vfs_nolog_si
= 0;
1735 UFS_BWRITE2(NULL
, ufsvfsp
->vfs_bufp
);
1737 refstr_rele(mountpoint
);
1744 ufs_root(struct vfs
*vfsp
, struct vnode
**vpp
)
1746 struct ufsvfs
*ufsvfsp
;
1752 ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
1753 if (!ufsvfsp
|| !ufsvfsp
->vfs_root
)
1754 return (EIO
); /* forced unmount */
1756 vp
= ufsvfsp
->vfs_root
;
1763 * Get file system statistics.
1766 ufs_statvfs(struct vfs
*vfsp
, struct statvfs64
*sp
)
1769 struct ufsvfs
*ufsvfsp
;
1771 long max_avail
, used
;
1774 if (vfsp
->vfs_flag
& VFS_UNMOUNTED
)
1777 ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
1778 fsp
= ufsvfsp
->vfs_fs
;
1779 if ((fsp
->fs_magic
!= FS_MAGIC
) && (fsp
->fs_magic
!= MTB_UFS_MAGIC
))
1781 if (fsp
->fs_magic
== FS_MAGIC
&&
1782 (fsp
->fs_version
!= UFS_EFISTYLE4NONEFI_VERSION_2
&&
1783 fsp
->fs_version
!= UFS_VERSION_MIN
))
1785 if (fsp
->fs_magic
== MTB_UFS_MAGIC
&&
1786 (fsp
->fs_version
> MTB_UFS_VERSION_1
||
1787 fsp
->fs_version
< MTB_UFS_VERSION_MIN
))
1791 * get the basic numbers
1793 (void) bzero(sp
, sizeof (*sp
));
1795 sp
->f_bsize
= fsp
->fs_bsize
;
1796 sp
->f_frsize
= fsp
->fs_fsize
;
1797 sp
->f_blocks
= (fsblkcnt64_t
)fsp
->fs_dsize
;
1798 sp
->f_bfree
= (fsblkcnt64_t
)fsp
->fs_cstotal
.cs_nbfree
* fsp
->fs_frag
+
1799 fsp
->fs_cstotal
.cs_nffree
;
1801 sp
->f_files
= (fsfilcnt64_t
)fsp
->fs_ncg
* fsp
->fs_ipg
;
1802 sp
->f_ffree
= (fsfilcnt64_t
)fsp
->fs_cstotal
.cs_nifree
;
1805 * Adjust the numbers based on things waiting to be deleted.
1806 * modifies f_bfree and f_ffree. Afterwards, everything we
1807 * come up with will be self-consistent. By definition, this
1808 * is a point-in-time snapshot, so the fact that the delete
1809 * thread's probably already invalidated the results is not a
1810 * problem. Note that if the delete thread is ever extended to
1811 * non-logging ufs, this adjustment must always be made.
1813 if (TRANS_ISTRANS(ufsvfsp
))
1814 ufs_delete_adjust_stats(ufsvfsp
, sp
);
1817 * avail = MAX(max_avail - used, 0)
1819 max_avail
= fsp
->fs_dsize
- ufsvfsp
->vfs_minfrags
;
1821 used
= (fsp
->fs_dsize
- sp
->f_bfree
);
1823 if (max_avail
> used
)
1824 sp
->f_bavail
= (fsblkcnt64_t
)max_avail
- used
;
1826 sp
->f_bavail
= (fsblkcnt64_t
)0;
1828 sp
->f_favail
= sp
->f_ffree
;
1829 (void) cmpldev(&d32
, vfsp
->vfs_dev
);
1831 (void) strcpy(sp
->f_basetype
, vfssw
[vfsp
->vfs_fstype
].vsw_name
);
1832 sp
->f_flag
= vf_to_stf(vfsp
->vfs_flag
);
1834 /* keep coordinated with ufs_l_pathconf() */
1835 sp
->f_namemax
= MAXNAMLEN
;
1837 if (fsp
->fs_cpc
== 0) {
1838 bzero(sp
->f_fstr
, 14);
1841 blk
= fsp
->fs_spc
* fsp
->fs_cpc
/ NSPF(fsp
);
1842 for (i
= 0; i
< blk
; i
+= fsp
->fs_frag
) /* CSTYLED */
1845 blk
= i
/ fsp
->fs_frag
;
1846 bcopy(&(fs_rotbl(fsp
)[blk
]), sp
->f_fstr
, 14);
1851 * Flush any pending I/O to file system vfsp.
1852 * The ufs_update() routine will only flush *all* ufs files.
1853 * If vfsp is non-NULL, only sync this ufs (in preparation
1858 ufs_sync(struct vfs
*vfsp
, short flag
, struct cred
*cr
)
1860 struct ufsvfs
*ufsvfsp
;
1862 int cheap
= flag
& SYNC_ATTR
;
1866 * SYNC_CLOSE means we're rebooting. Toss everything
1867 * on the idle queue so we don't have to slog through
1868 * a bunch of uninteresting inodes over and over again.
1870 if (flag
& SYNC_CLOSE
)
1871 ufs_idle_drain(NULL
);
1878 /* Flush a single ufs */
1879 if (!vfs_matchops(vfsp
, &ufs_vfsops
) || vfs_lock(vfsp
) != 0)
1882 ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
1885 fs
= ufsvfsp
->vfs_fs
;
1886 mutex_enter(&ufsvfsp
->vfs_lock
);
1888 if (ufsvfsp
->vfs_dio
&&
1889 fs
->fs_ronly
== 0 &&
1890 fs
->fs_clean
!= FSBAD
&&
1891 fs
->fs_clean
!= FSLOG
) {
1892 /* turn off fast-io on unmount, so no fsck needed (4029401) */
1893 ufsvfsp
->vfs_dio
= 0;
1894 fs
->fs_clean
= FSACTIVE
;
1898 /* Write back modified superblock */
1899 if (fs
->fs_fmod
== 0) {
1900 mutex_exit(&ufsvfsp
->vfs_lock
);
1902 if (fs
->fs_ronly
!= 0) {
1903 mutex_exit(&ufsvfsp
->vfs_lock
);
1905 return (ufs_fault(ufsvfsp
->vfs_root
,
1906 "fs = %s update: ro fs mod\n", fs
->fs_fsmnt
));
1909 mutex_exit(&ufsvfsp
->vfs_lock
);
1911 TRANS_SBUPDATE(ufsvfsp
, vfsp
, TOP_SBUPDATE_UPDATE
);
1916 * Avoid racing with ufs_update() and ufs_unmount().
1919 mutex_enter(&ufs_scan_lock
);
1921 (void) ufs_scan_inodes(1, ufs_sync_inode
,
1922 (void *)(uintptr_t)cheap
, ufsvfsp
);
1924 mutex_exit(&ufs_scan_lock
);
1926 bflush((dev_t
)vfsp
->vfs_dev
);
1929 * commit any outstanding async transactions
1931 curthread
->t_flag
|= T_DONTBLOCK
;
1932 TRANS_BEGIN_SYNC(ufsvfsp
, TOP_COMMIT_UPDATE
, TOP_COMMIT_SIZE
, &error
);
1934 TRANS_END_SYNC(ufsvfsp
, &error
, TOP_COMMIT_UPDATE
,
1937 curthread
->t_flag
&= ~T_DONTBLOCK
;
1944 sbupdate(struct vfs
*vfsp
)
1946 struct ufsvfs
*ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
1947 struct fs
*fs
= ufsvfsp
->vfs_fs
;
1955 * for ulockfs processing, limit the superblock writes
1957 if ((ufsvfsp
->vfs_ulockfs
.ul_sbowner
) &&
1958 (curthread
!= ufsvfsp
->vfs_ulockfs
.ul_sbowner
)) {
1963 ULOCKFS_SET_MOD((&ufsvfsp
->vfs_ulockfs
));
1965 if (TRANS_ISTRANS(ufsvfsp
)) {
1966 mutex_enter(&ufsvfsp
->vfs_lock
);
1967 ufs_sbwrite(ufsvfsp
);
1968 mutex_exit(&ufsvfsp
->vfs_lock
);
1972 blks
= howmany(fs
->fs_cssize
, fs
->fs_fsize
);
1973 space
= (caddr_t
)fs
->fs_u
.fs_csp
;
1974 for (i
= 0; i
< blks
; i
+= fs
->fs_frag
) {
1975 size
= fs
->fs_bsize
;
1976 if (i
+ fs
->fs_frag
> blks
)
1977 size
= (blks
- i
) * fs
->fs_fsize
;
1978 bp
= UFS_GETBLK(ufsvfsp
, ufsvfsp
->vfs_dev
,
1979 (daddr_t
)(fsbtodb(fs
, fs
->fs_csaddr
+ i
)),
1981 bcopy(space
, bp
->b_un
.b_addr
, size
);
1983 bp
->b_bcount
= size
;
1984 UFS_BRWRITE(ufsvfsp
, bp
);
1986 mutex_enter(&ufsvfsp
->vfs_lock
);
1987 ufs_sbwrite(ufsvfsp
);
1988 mutex_exit(&ufsvfsp
->vfs_lock
);
1991 int ufs_vget_idle_count
= 2; /* Number of inodes to idle each time */
1993 ufs_vget(struct vfs
*vfsp
, struct vnode
**vpp
, struct fid
*fidp
)
1998 struct ufsvfs
*ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
1999 struct ulockfs
*ulp
;
2002 * Check for unmounted filesystem.
2004 if (vfsp
->vfs_flag
& VFS_UNMOUNTED
) {
2010 * Keep the idle queue from getting too long by
2011 * idling an inode before attempting to allocate another.
2012 * This operation must be performed before entering
2013 * lockfs or a transaction.
2015 if (ufs_idle_q
.uq_ne
> ufs_idle_q
.uq_hiwat
)
2016 if ((curthread
->t_flag
& T_DONTBLOCK
) == 0) {
2017 ins
.in_vidles
.value
.ul
+= ufs_vget_idle_count
;
2018 ufs_idle_some(ufs_vget_idle_count
);
2021 ufid
= (struct ufid
*)fidp
;
2023 if (error
= ufs_lockfs_begin(ufsvfsp
, &ulp
, ULOCKFS_VGET_MASK
))
2026 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
2028 error
= ufs_iget(vfsp
, ufid
->ufid_ino
, &ip
, CRED());
2030 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
2032 ufs_lockfs_end(ulp
);
2038 * Check if the inode has been deleted or freed or is in transient state
2039 * since the last VFS_VGET() request for it, release it and don't return
2040 * it to the caller, presumably NFS, as it's no longer valid.
2042 if (ip
->i_gen
!= ufid
->ufid_gen
|| ip
->i_mode
== 0 ||
2043 (ip
->i_nlink
<= 0)) {
2057 static const struct vfsops ufs_vfsops
= {
2058 .vfs_mount
= ufs_mount
,
2059 .vfs_unmount
= ufs_unmount
,
2060 .vfs_root
= ufs_root
,
2061 .vfs_statvfs
= ufs_statvfs
,
2062 .vfs_sync
= ufs_sync
,
2063 .vfs_vget
= ufs_vget
,
2064 .vfs_mountroot
= ufs_mountroot
,
2068 ufsinit(int fstype
, char *name
)
2074 error
= vfs_setfsops(fstype
, &ufs_vfsops
);
2076 cmn_err(CE_WARN
, "ufsinit: bad fstype");