4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
29 * Portions of this source code were derived from Berkeley 4.3 BSD
30 * under license from the Regents of the University of California.
36 #include <sys/types.h>
37 #include <sys/t_lock.h>
39 #include <sys/vnode.h>
40 #include <sys/statvfs.h>
41 #include <sys/refstr.h>
50 * Data associated with mounted file systems.
54 * Operations vector. This is used internal to the kernel; file systems
55 * supply their list of operations via vfs_setfsops().
58 typedef struct vfsops vfsops_t
;
61 * File system identifier. Should be unique (at least per machine).
64 int val
[2]; /* file system id type */
68 * File identifier. Should be unique per filesystem on a single
69 * machine. This is typically called by a stateless file server
70 * in order to generate "file handles".
72 * Do not change the definition of struct fid ... fid_t without
73 * letting the CacheFS group know about it! They will have to do at
74 * least two things, in the same change that changes this structure:
75 * 1. change CFSVERSION in usr/src/uts/common/sys/fs/cachefs_fs.h
76 * 2. put the old version # in the canupgrade array
77 * in cachfs_upgrade() in usr/src/cmd/fs.d/cachefs/fsck/fsck.c
78 * This is necessary because CacheFS stores FIDs on disk.
80 * Many underlying file systems cast a struct fid into other
81 * file system dependent structures which may require 4 byte alignment.
82 * Because a fid starts with a short it may not be 4 byte aligned, the
83 * fid_pad will force the alignment.
86 #define OLD_MAXFIDSZ 16
92 ushort_t len
; /* length of data in bytes */
93 char data
[MAXFIDSZ
]; /* data (variable len) */
100 * Solaris 64 - use old-style cache format with 32-bit aligned fid for on-disk
101 * struct compatibility.
103 typedef struct fid32
{
107 uint16_t len
; /* length of data in bytes */
108 char data
[MAXFIDSZ
]; /* data (variable len) */
112 #else /* not _SYSCALL32 */
114 typedef fid_t fid32_t
;
115 #endif /* _SYSCALL32 */
117 #define fid_len un._fid.len
118 #define fid_data un._fid.data
121 * Structure defining a mount option for a filesystem.
122 * option names are found in mntent.h
124 typedef struct mntopt
{
125 char *mo_name
; /* option name */
126 char **mo_cancel
; /* list of options cancelled by this one */
127 char *mo_arg
; /* argument string for this option */
128 int mo_flags
; /* flags for this mount option */
129 void *mo_data
; /* filesystem specific data */
133 * Flags that apply to mount options
136 #define MO_SET 0x01 /* option is set */
137 #define MO_NODISPLAY 0x02 /* option not listed in mnttab */
138 #define MO_HASVALUE 0x04 /* option takes a value */
139 #define MO_IGNORE 0x08 /* option ignored by parser */
140 #define MO_DEFAULT MO_SET /* option is on by default */
141 #define MO_TAG 0x10 /* flags a tag set by user program */
142 #define MO_EMPTY 0x20 /* empty space in option table */
144 #define VFS_NOFORCEOPT 0x01 /* honor MO_IGNORE (don't set option) */
145 #define VFS_DISPLAY 0x02 /* Turn off MO_NODISPLAY bit for opt */
146 #define VFS_NODISPLAY 0x04 /* Turn on MO_NODISPLAY bit for opt */
147 #define VFS_CREATEOPT 0x08 /* Create the opt if it's not there */
150 * Structure holding mount option strings for the mounted file system.
152 typedef struct mntopts
{
153 uint_t mo_count
; /* number of entries in table */
154 mntopt_t
*mo_list
; /* list of mount options */
158 * The kstat structures associated with the vopstats are kept in an
159 * AVL tree. This is to avoid the case where a file system does not
160 * use a unique fsid_t for each vfs (e.g., namefs). In order to do
161 * this, we need a structure that the AVL tree can use that also
162 * references the kstat.
163 * Note that the vks_fsid is generated from the value reported by
166 typedef struct vskstat_anchor
{
167 avl_node_t vsk_node
; /* Required for use by AVL routines */
168 kstat_t
*vsk_ksp
; /* kstat structure for vopstats */
169 ulong_t vsk_fsid
; /* fsid associated w/this FS */
172 extern avl_tree_t vskstat_tree
;
173 extern kmutex_t vskstat_tree_lock
;
176 * Private vfs data, NOT to be used by a file system implementation.
179 #define VFS_FEATURE_MAXSZ 4
181 typedef struct vfs_impl
{
182 /* Counted array - Bitmap of vfs features */
183 uint32_t vi_featureset
[VFS_FEATURE_MAXSZ
];
185 * Support for statistics on the vnode operations
187 vsk_anchor_t
*vi_vskap
; /* anchor for vopstats' kstat */
188 vopstats_t
*vi_fstypevsp
; /* ptr to per-fstype vopstats */
189 vopstats_t vi_vopstats
; /* per-mount vnode op stats */
191 timespec_t vi_hrctime
; /* High-res creation time */
196 * Structure per mounted file system. Each mounted file system has
197 * an array of operations and an instance record.
199 * The file systems are kept on a doubly linked circular list headed by
201 * File system implementations should not access this list;
202 * it's intended for use only in the kernel's vfs layer.
204 * Each zone also has its own list of mounts, containing filesystems mounted
205 * somewhere within the filesystem tree rooted at the zone's rootpath. The
206 * list is doubly linked to match the global list.
208 * mnttab locking: the in-kernel mnttab uses the vfs_mntpt, vfs_resource and
209 * vfs_mntopts fields in the vfs_t. mntpt and resource are refstr_ts that
210 * are set at mount time and can only be modified during a remount.
211 * It is safe to read these fields if you can prevent a remount on the vfs,
212 * or through the convenience funcs vfs_getmntpoint() and vfs_getresource().
213 * The mntopts field may only be accessed through the provided convenience
214 * functions, as it is protected by the vfs list lock. Modifying a mount
215 * option requires grabbing the vfs list write lock, which can be a very
218 struct zone
; /* from zone.h */
219 struct fem_head
; /* from fem.h */
222 struct vfs
*vfs_next
; /* next VFS in VFS list */
223 struct vfs
*vfs_prev
; /* prev VFS in VFS list */
225 /* vfs_op should not be used directly. Accessor functions are provided */
226 vfsops_t
*vfs_op
; /* operations on VFS */
228 struct vnode
*vfs_vnodecovered
; /* vnode mounted on */
229 uint_t vfs_flag
; /* flags */
230 uint_t vfs_bsize
; /* native block size */
231 int vfs_fstype
; /* file system type index */
232 fsid_t vfs_fsid
; /* file system id */
233 void *vfs_data
; /* private data */
234 dev_t vfs_dev
; /* device of mounted VFS */
235 ulong_t vfs_bcount
; /* I/O count (accounting) */
236 struct vfs
*vfs_list
; /* sync list pointer */
237 struct vfs
*vfs_hash
; /* hash list pointer */
238 ksema_t vfs_reflock
; /* mount/unmount/sync lock */
239 uint_t vfs_count
; /* vfs reference count */
240 mntopts_t vfs_mntopts
; /* options mounted with */
241 refstr_t
*vfs_resource
; /* mounted resource name */
242 refstr_t
*vfs_mntpt
; /* mount point name */
243 time_t vfs_mtime
; /* time we were mounted */
244 vfs_impl_t
*vfs_implp
; /* impl specific data */
246 * Zones support. Note that the zone that "owns" the mount isn't
247 * necessarily the same as the zone in which the zone is visible.
248 * That is, vfs_zone and (vfs_zone_next|vfs_zone_prev) may refer to
251 struct zone
*vfs_zone
; /* zone that owns the mount */
252 struct vfs
*vfs_zone_next
; /* next VFS visible in zone */
253 struct vfs
*vfs_zone_prev
; /* prev VFS visible in zone */
255 struct fem_head
*vfs_femhead
; /* fs monitoring */
256 minor_t vfs_lofi_minor
; /* minor if lofi mount */
259 #define vfs_featureset vfs_implp->vi_featureset
260 #define vfs_vskap vfs_implp->vi_vskap
261 #define vfs_fstypevsp vfs_implp->vi_fstypevsp
262 #define vfs_vopstats vfs_implp->vi_vopstats
263 #define vfs_hrctime vfs_implp->vi_hrctime
268 #define VFS_RDONLY 0x01 /* read-only vfs */
269 #define VFS_NOMNTTAB 0x02 /* vfs not seen in mnttab */
270 #define VFS_NOSETUID 0x08 /* setuid disallowed */
271 #define VFS_REMOUNT 0x10 /* modify mount options only */
272 #define VFS_NOTRUNC 0x20 /* does not truncate long file names */
273 #define VFS_UNLINKABLE 0x40 /* unlink(2) can be applied to root */
274 #define VFS_PXFS 0x80 /* clustering: global fs proxy vfs */
275 #define VFS_UNMOUNTED 0x100 /* file system has been unmounted */
276 #define VFS_NBMAND 0x200 /* allow non-blocking mandatory locks */
277 #define VFS_XATTR 0x400 /* fs supports extended attributes */
278 #define VFS_NODEVICES 0x800 /* device-special files disallowed */
279 #define VFS_NOEXEC 0x1000 /* executables disallowed */
280 #define VFS_STATS 0x2000 /* file system can collect stats */
281 #define VFS_XID 0x4000 /* file system supports extended ids */
283 #define VFS_NORESOURCE "unspecified_resource"
284 #define VFS_NOMNTPT "unspecified_mountpoint"
287 * VFS features are implemented as bits set in the vfs_t.
288 * The vfs_feature_t typedef is a 64-bit number that will translate
289 * into an element in an array of bitmaps and a bit in that element.
290 * Developers must not depend on the implementation of this and
291 * need to use vfs_has_feature()/vfs_set_feature() routines.
293 typedef uint64_t vfs_feature_t
;
295 #define VFSFT_XVATTR 0x100000001 /* Supports xvattr for attrs */
296 #define VFSFT_CASEINSENSITIVE 0x100000002 /* Supports case-insensitive */
297 #define VFSFT_NOCASESENSITIVE 0x100000004 /* NOT case-sensitive */
298 #define VFSFT_DIRENTFLAGS 0x100000008 /* Supports dirent flags */
299 #define VFSFT_ACLONCREATE 0x100000010 /* Supports ACL on create */
300 #define VFSFT_ACEMASKONACCESS 0x100000020 /* Can use ACEMASK for access */
301 #define VFSFT_SYSATTR_VIEWS 0x100000040 /* Supports sysattr view i/f */
302 #define VFSFT_ACCESS_FILTER 0x100000080 /* dirents filtered by access */
303 #define VFSFT_REPARSE 0x100000100 /* Supports reparse point */
304 #define VFSFT_ZEROCOPY_SUPPORTED 0x100000200
305 /* Support loaning /returning cache buffer */
307 * Argument structure for mount(2).
309 * Flags are defined in <sys/mount.h>.
311 * Note that if the MS_SYSSPACE bit is set in flags, the pointer fields in
312 * this structure are to be interpreted as kernel addresses. File systems
313 * should be prepared for this possibility.
327 * Reasons for calling the vfs_mountroot() operation.
329 enum whymountroot
{ ROOT_INIT
, ROOT_REMOUNT
, ROOT_UNMOUNT
};
330 typedef enum whymountroot whymountroot_t
;
333 * Reasons for calling the VFS_VNSTATE():
341 typedef enum vntrans vntrans_t
;
344 * VFS_OPS defines all the vfs operations. It is used to define
345 * the vfsops structure (below) and the fs_func_p union (vfs_opreg.h).
348 int (*vfs_mount)(vfs_t *, vnode_t *, struct mounta *, cred_t *); \
349 int (*vfs_unmount)(vfs_t *, int, cred_t *); \
350 int (*vfs_root)(vfs_t *, vnode_t **); \
351 int (*vfs_statvfs)(vfs_t *, statvfs64_t *); \
352 int (*vfs_sync)(vfs_t *, short, cred_t *); \
353 int (*vfs_vget)(vfs_t *, vnode_t **, fid_t *); \
354 int (*vfs_mountroot)(vfs_t *, enum whymountroot); \
355 void (*vfs_freevfs)(vfs_t *); \
356 int (*vfs_vnstate)(vfs_t *, vnode_t *, vntrans_t) /* NB: No ";" */
359 * Operations supported on virtual file system.
362 VFS_OPS
; /* Signature of all vfs operations (vfsops) */
365 extern int fsop_mount(vfs_t
*, vnode_t
*, struct mounta
*, cred_t
*);
366 extern int fsop_unmount(vfs_t
*, int, cred_t
*);
367 extern int fsop_root(vfs_t
*, vnode_t
**);
368 extern int fsop_statfs(vfs_t
*, statvfs64_t
*);
369 extern int fsop_sync(vfs_t
*, short, cred_t
*);
370 extern int fsop_vget(vfs_t
*, vnode_t
**, fid_t
*);
371 extern int fsop_mountroot(vfs_t
*, enum whymountroot
);
372 extern void fsop_freefs(vfs_t
*);
373 extern int fsop_sync_by_kind(int, short, cred_t
*);
374 extern int fsop_vnstate(vfs_t
*, vnode_t
*, vntrans_t
);
376 #define VFS_MOUNT(vfsp, mvp, uap, cr) fsop_mount(vfsp, mvp, uap, cr)
377 #define VFS_UNMOUNT(vfsp, flag, cr) fsop_unmount(vfsp, flag, cr)
378 #define VFS_ROOT(vfsp, vpp) fsop_root(vfsp, vpp)
379 #define VFS_STATVFS(vfsp, sp) fsop_statfs(vfsp, sp)
380 #define VFS_SYNC(vfsp, flag, cr) fsop_sync(vfsp, flag, cr)
381 #define VFS_VGET(vfsp, vpp, fidp) fsop_vget(vfsp, vpp, fidp)
382 #define VFS_MOUNTROOT(vfsp, init) fsop_mountroot(vfsp, init)
383 #define VFS_FREEVFS(vfsp) fsop_freefs(vfsp)
384 #define VFS_VNSTATE(vfsp, vn, ns) fsop_vnstate(vfsp, vn, ns)
386 #define VFSNAME_MOUNT "mount"
387 #define VFSNAME_UNMOUNT "unmount"
388 #define VFSNAME_ROOT "root"
389 #define VFSNAME_STATVFS "statvfs"
390 #define VFSNAME_SYNC "sync"
391 #define VFSNAME_VGET "vget"
392 #define VFSNAME_MOUNTROOT "mountroot"
393 #define VFSNAME_FREEVFS "freevfs"
394 #define VFSNAME_VNSTATE "vnstate"
396 * Filesystem type switch table.
399 typedef struct vfssw
{
400 char *vsw_name
; /* type name -- max len _ST_FSTYPSZ */
401 int (*vsw_init
) (int, char *);
402 /* init routine (for non-loadable fs only) */
403 int vsw_flag
; /* flags */
404 mntopts_t vsw_optproto
; /* mount options table prototype */
405 uint_t vsw_count
; /* count of references */
406 kmutex_t vsw_lock
; /* lock to protect vsw_count */
407 vfsops_t vsw_vfsops
; /* filesystem operations vector */
411 * Filesystem type definition record. All file systems must export a record
412 * of this type through their modlfs structure. N.B., changing the version
413 * number requires a change in sys/modctl.h.
416 typedef struct vfsdef_v5
{
417 int def_version
; /* structure version, must be first */
418 char *name
; /* filesystem type name */
419 int (*init
) (int, char *); /* init routine */
420 int flags
; /* filesystem flags */
421 mntopts_t
*optproto
; /* mount options table prototype */
424 typedef struct vfsdef_v5 vfsdef_t
;
431 * flags for vfssw and vfsdef
433 #define VSW_HASPROTO 0x01 /* struct has a mount options prototype */
434 #define VSW_CANRWRO 0x02 /* file system can transition from rw to ro */
435 #define VSW_CANREMOUNT 0x04 /* file system supports remounts */
436 #define VSW_NOTZONESAFE 0x08 /* zone_enter(2) should fail for these files */
437 #define VSW_VOLATILEDEV 0x10 /* vfs_dev can change each time fs is mounted */
438 #define VSW_STATS 0x20 /* file system can collect stats */
439 #define VSW_XID 0x40 /* file system supports extended ids */
440 #define VSW_CANLOFI 0x80 /* file system supports lofi mounts */
441 #define VSW_ZMOUNT 0x100 /* file system always allowed in a zone */
443 #define VSW_INSTALLED 0x8000 /* this vsw is associated with a file system */
453 void vfs_freevfsops(vfsops_t
*);
454 int vfs_freevfsops_by_type(int);
455 void vfs_setops(vfs_t
*, vfsops_t
*);
456 vfsops_t
*vfs_getops(vfs_t
*vfsp
);
457 int vfs_matchops(vfs_t
*, vfsops_t
*);
458 int vfs_can_sync(vfs_t
*vfsp
);
459 vfs_t
*vfs_alloc(int);
460 void vfs_free(vfs_t
*);
461 void vfs_init(vfs_t
*vfsp
, vfsops_t
*, void *);
462 void vfsimpl_setup(vfs_t
*vfsp
);
463 void vfsimpl_teardown(vfs_t
*vfsp
);
464 void vn_exists(vnode_t
*);
465 void vn_idle(vnode_t
*);
466 void vn_reclaim(vnode_t
*);
467 void vn_invalid(vnode_t
*);
470 int svm_rootconf(void);
471 int domount(char *, struct mounta
*, vnode_t
*, struct cred
*,
473 int dounmount(struct vfs
*, int, cred_t
*);
474 int vfs_lock(struct vfs
*);
475 int vfs_rlock(struct vfs
*);
476 void vfs_lock_wait(struct vfs
*);
477 void vfs_rlock_wait(struct vfs
*);
478 void vfs_unlock(struct vfs
*);
479 int vfs_lock_held(struct vfs
*);
480 struct _kthread
*vfs_lock_owner(struct vfs
*);
483 void vfs_mountroot(void);
484 void vfs_add(vnode_t
*, struct vfs
*, int);
485 void vfs_remove(struct vfs
*);
487 /* VFS feature routines */
488 void vfs_set_feature(vfs_t
*, vfs_feature_t
);
489 int vfs_has_feature(vfs_t
*, vfs_feature_t
);
490 void vfs_propagate_features(vfs_t
*, vfs_t
*);
492 /* The following functions are not for general use by filesystems */
494 void vfs_createopttbl(mntopts_t
*, const char *);
495 void vfs_copyopttbl(const mntopts_t
*, mntopts_t
*);
496 void vfs_mergeopttbl(const mntopts_t
*, const mntopts_t
*, mntopts_t
*);
497 void vfs_freeopttbl(mntopts_t
*);
498 void vfs_parsemntopts(mntopts_t
*, char *, int);
499 int vfs_buildoptionstr(const mntopts_t
*, char *, int);
500 struct mntopt
*vfs_hasopt(const mntopts_t
*, const char *);
501 void vfs_mnttab_modtimeupd(void);
503 void vfs_clearmntopt(struct vfs
*, const char *);
504 void vfs_setmntopt(struct vfs
*, const char *, const char *, int);
505 void vfs_setresource(struct vfs
*, const char *);
506 void vfs_setmntpoint(struct vfs
*, const char *);
507 refstr_t
*vfs_getresource(const struct vfs
*);
508 refstr_t
*vfs_getmntpoint(const struct vfs
*);
509 int vfs_optionisset(const struct vfs
*, const char *, char **);
510 int vfs_settag(uint_t
, uint_t
, const char *, const char *, cred_t
*);
511 int vfs_clrtag(uint_t
, uint_t
, const char *, const char *, cred_t
*);
512 void vfs_syncall(void);
513 void vfs_syncprogress(void);
515 void vfs_unmountall(void);
516 void vfs_make_fsid(fsid_t
*, dev_t
, int);
517 void vfs_addmip(dev_t
, struct vfs
*);
518 void vfs_delmip(struct vfs
*);
519 int vfs_devismounted(dev_t
);
520 int vfs_devmounting(dev_t
, struct vfs
*);
521 int vfs_opsinuse(vfsops_t
*);
522 struct vfs
*getvfs(fsid_t
*);
523 struct vfs
*vfs_dev2vfsp(dev_t
);
524 struct vfs
*vfs_mntpoint2vfsp(const char *);
525 struct vfssw
*allocate_vfssw(const char *);
526 struct vfssw
*vfs_getvfssw(const char *);
527 struct vfssw
*vfs_getvfsswbyname(const char *);
528 struct vfssw
*vfs_getvfsswbyvfsops(vfsops_t
*);
529 void vfs_refvfssw(struct vfssw
*);
530 void vfs_unrefvfssw(struct vfssw
*);
531 uint_t
vf_to_stf(uint_t
);
532 void vfs_mnttab_modtime(timespec_t
*);
533 void vfs_mnttab_poll(timespec_t
*, struct pollhead
**);
535 void vfs_list_lock(void);
536 void vfs_list_read_lock(void);
537 void vfs_list_unlock(void);
538 void vfs_list_add(struct vfs
*);
539 void vfs_list_remove(struct vfs
*);
540 void vfs_hold(vfs_t
*vfsp
);
541 void vfs_rele(vfs_t
*vfsp
);
542 void fs_freevfs(vfs_t
*);
543 void vfs_root_redev(vfs_t
*vfsp
, dev_t ndev
, int fstype
);
545 int vfs_zone_change_safe(vfs_t
*);
547 int vfs_get_lofi(vfs_t
*, vnode_t
**);
549 #define VFSHASH(maj, min) (((int)((maj)+(min))) & (vfshsz - 1))
550 #define VFS_ON_LIST(vfsp) \
551 ((vfsp)->vfs_next != (vfsp) && (vfsp)->vfs_next != NULL)
557 extern struct vfssw vfssw
[]; /* table of filesystem types */
558 extern krwlock_t vfssw_lock
;
559 extern char rootfstype
[]; /* name of root fstype */
560 extern const int nfstype
; /* # of elements in vfssw array */
561 extern vfsops_t
*EIO_vfsops
; /* operations for vfs being torn-down */
564 * The following variables are private to the the kernel's vfs layer. File
565 * system implementations should not access them.
567 extern struct vfs
*rootvfs
; /* ptr to root vfs structure */
569 struct vfs
*rvfs_head
; /* head vfs in chain */
570 kmutex_t rvfs_lock
; /* mutex protecting this chain */
571 uint32_t rvfs_len
; /* length of this chain */
573 extern rvfs_t
*rvfs_list
;
574 extern int vfshsz
; /* # of elements in rvfs_head array */
575 extern const mntopts_t vfs_mntopts
; /* globally recognized options */
577 #endif /* defined(_KERNEL) */
579 #define VFS_HOLD(vfsp) { \
583 #define VFS_RELE(vfsp) { \
587 #define VFS_INIT(vfsp, op, data) { \
588 vfs_init((vfsp), (op), (data)); \
592 #define VFS_INSTALLED(vfsswp) (((vfsswp)->vsw_flag & VSW_INSTALLED) != 0)
593 #define ALLOCATED_VFSSW(vswp) ((vswp)->vsw_name[0] != '\0')
594 #define RLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_READER))
595 #define RUNLOCK_VFSSW() (rw_exit(&vfssw_lock))
596 #define WLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_WRITER))
597 #define WUNLOCK_VFSSW() (rw_exit(&vfssw_lock))
598 #define VFSSW_LOCKED() (RW_LOCK_HELD(&vfssw_lock))
599 #define VFSSW_WRITE_LOCKED() (RW_WRITE_HELD(&vfssw_lock))
603 #define SYNC_ATTR 0x01 /* sync attributes only */
604 #define SYNC_CLOSE 0x02 /* close open file */
605 #define SYNC_ALL 0x04 /* force to sync all fs */
611 #endif /* _SYS_VFS_H */