4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Toomas Soome <tsoome@me.com>
25 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
26 * Copyright 2016 Nexenta Systems, Inc.
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
33 * Portions of this source code were derived from Berkeley 4.3 BSD
34 * under license from the Regents of the University of California.
41 #include <sys/types.h>
42 #include <sys/t_lock.h>
44 #include <sys/vnode.h>
45 #include <sys/statvfs.h>
46 #include <sys/refstr.h>
55 * Data associated with mounted file systems.
59 * Operations vector. This is used internal to the kernel; file systems
60 * supply their list of operations via vfs_setfsops().
63 typedef struct vfsops vfsops_t
;
66 * File system identifier. Should be unique (at least per machine).
69 int val
[2]; /* file system id type */
73 * File identifier. Should be unique per filesystem on a single
74 * machine. This is typically called by a stateless file server
75 * in order to generate "file handles".
77 * Many underlying file systems cast a struct fid into other
78 * file system dependent structures which may require 4 byte alignment.
79 * Because a fid starts with a short it may not be 4 byte aligned, the
80 * fid_pad will force the alignment.
83 #define OLD_MAXFIDSZ 16
89 ushort_t len
; /* length of data in bytes */
90 char data
[MAXFIDSZ
]; /* data (variable len) */
97 * Solaris 64 - use old-style cache format with 32-bit aligned fid for on-disk
98 * struct compatibility.
100 typedef struct fid32
{
104 uint16_t len
; /* length of data in bytes */
105 char data
[MAXFIDSZ
]; /* data (variable len) */
109 #else /* not _SYSCALL32 */
111 typedef fid_t fid32_t
;
112 #endif /* _SYSCALL32 */
114 #define fid_len un._fid.len
115 #define fid_data un._fid.data
118 * Structure defining a mount option for a filesystem.
119 * option names are found in mntent.h
121 typedef struct mntopt
{
122 char *mo_name
; /* option name */
123 char **mo_cancel
; /* list of options cancelled by this one */
124 char *mo_arg
; /* argument string for this option */
125 int mo_flags
; /* flags for this mount option */
126 void *mo_data
; /* filesystem specific data */
130 * Flags that apply to mount options
133 #define MO_SET 0x01 /* option is set */
134 #define MO_NODISPLAY 0x02 /* option not listed in mnttab */
135 #define MO_HASVALUE 0x04 /* option takes a value */
136 #define MO_IGNORE 0x08 /* option ignored by parser */
137 #define MO_DEFAULT MO_SET /* option is on by default */
138 #define MO_TAG 0x10 /* flags a tag set by user program */
139 #define MO_EMPTY 0x20 /* empty space in option table */
141 #define VFS_NOFORCEOPT 0x01 /* honor MO_IGNORE (don't set option) */
142 #define VFS_DISPLAY 0x02 /* Turn off MO_NODISPLAY bit for opt */
143 #define VFS_NODISPLAY 0x04 /* Turn on MO_NODISPLAY bit for opt */
144 #define VFS_CREATEOPT 0x08 /* Create the opt if it's not there */
147 * Structure holding mount option strings for the mounted file system.
149 typedef struct mntopts
{
150 uint_t mo_count
; /* number of entries in table */
151 mntopt_t
*mo_list
; /* list of mount options */
155 * The kstat structures associated with the vopstats are kept in an
156 * AVL tree. This is to avoid the case where a file system does not
157 * use a unique fsid_t for each vfs (e.g., namefs). In order to do
158 * this, we need a structure that the AVL tree can use that also
159 * references the kstat.
160 * Note that the vks_fsid is generated from the value reported by
163 typedef struct vskstat_anchor
{
164 avl_node_t vsk_node
; /* Required for use by AVL routines */
165 kstat_t
*vsk_ksp
; /* kstat structure for vopstats */
166 ulong_t vsk_fsid
; /* fsid associated w/this FS */
169 extern avl_tree_t vskstat_tree
;
170 extern kmutex_t vskstat_tree_lock
;
173 * Structure per mounted file system. Each mounted file system has
174 * an array of operations and an instance record.
176 * The file systems are kept on a doubly linked circular list headed by
178 * File system implementations should not access this list;
179 * it's intended for use only in the kernel's vfs layer.
181 * Each zone also has its own list of mounts, containing filesystems mounted
182 * somewhere within the filesystem tree rooted at the zone's rootpath. The
183 * list is doubly linked to match the global list.
185 * mnttab locking: the in-kernel mnttab uses the vfs_mntpt, vfs_resource and
186 * vfs_mntopts fields in the vfs_t. mntpt and resource are refstr_ts that
187 * are set at mount time and can only be modified during a remount.
188 * It is safe to read these fields if you can prevent a remount on the vfs,
189 * or through the convenience funcs vfs_getmntpoint() and vfs_getresource().
190 * The mntopts field may only be accessed through the provided convenience
191 * functions, as it is protected by the vfs list lock. Modifying a mount
192 * option requires grabbing the vfs list write lock, which can be a very
195 struct zone
; /* from zone.h */
196 struct fem_head
; /* from fem.h */
199 struct vfs
*vfs_next
; /* next VFS in VFS list */
200 struct vfs
*vfs_prev
; /* prev VFS in VFS list */
202 /* vfs_op should not be used directly. Accessor functions are provided */
203 const struct vfsops
*vfs_op
; /* operations on VFS */
205 struct vnode
*vfs_vnodecovered
; /* vnode mounted on */
206 uint_t vfs_flag
; /* flags */
207 uint_t vfs_bsize
; /* native block size */
208 int vfs_fstype
; /* file system type index */
209 fsid_t vfs_fsid
; /* file system id */
210 void *vfs_data
; /* private data */
211 dev_t vfs_dev
; /* device of mounted VFS */
212 ulong_t vfs_bcount
; /* I/O count (accounting) */
213 struct vfs
*vfs_list
; /* sync list pointer */
214 struct vfs
*vfs_hash
; /* hash list pointer */
215 ksema_t vfs_reflock
; /* mount/unmount/sync lock */
216 uint_t vfs_count
; /* vfs reference count */
217 mntopts_t vfs_mntopts
; /* options mounted with */
218 refstr_t
*vfs_resource
; /* mounted resource name */
219 refstr_t
*vfs_mntpt
; /* mount point name */
220 time_t vfs_mtime
; /* time we were mounted */
221 struct vfs_impl
*vfs_implp
; /* impl specific data */
223 * Zones support. Note that the zone that "owns" the mount isn't
224 * necessarily the same as the zone in which the zone is visible.
225 * That is, vfs_zone and (vfs_zone_next|vfs_zone_prev) may refer to
228 struct zone
*vfs_zone
; /* zone that owns the mount */
229 struct vfs
*vfs_zone_next
; /* next VFS visible in zone */
230 struct vfs
*vfs_zone_prev
; /* prev VFS visible in zone */
232 struct fem_head
*vfs_femhead
; /* fs monitoring */
233 uint32_t vfs_lofi_id
; /* ID if lofi mount */
236 #define vfs_featureset vfs_implp->vi_featureset
237 #define vfs_vskap vfs_implp->vi_vskap
238 #define vfs_fstypevsp vfs_implp->vi_fstypevsp
239 #define vfs_vopstats vfs_implp->vi_vopstats
240 #define vfs_hrctime vfs_implp->vi_hrctime
245 #define VFS_RDONLY 0x01 /* read-only vfs */
246 #define VFS_NOMNTTAB 0x02 /* vfs not seen in mnttab */
247 #define VFS_NOSETUID 0x08 /* setuid disallowed */
248 #define VFS_REMOUNT 0x10 /* modify mount options only */
249 #define VFS_NOTRUNC 0x20 /* does not truncate long file names */
250 #define VFS_UNLINKABLE 0x40 /* unlink(2) can be applied to root */
251 #define VFS_PXFS 0x80 /* clustering: global fs proxy vfs */
252 #define VFS_UNMOUNTED 0x100 /* file system has been unmounted */
253 #define VFS_NBMAND 0x200 /* allow non-blocking mandatory locks */
254 #define VFS_XATTR 0x400 /* fs supports extended attributes */
255 #define VFS_NODEVICES 0x800 /* device-special files disallowed */
256 #define VFS_NOEXEC 0x1000 /* executables disallowed */
257 #define VFS_STATS 0x2000 /* file system can collect stats */
258 #define VFS_XID 0x4000 /* file system supports extended ids */
260 #define VFS_NORESOURCE "unspecified_resource"
261 #define VFS_NOMNTPT "unspecified_mountpoint"
264 * VFS features are implemented as bits set in the vfs_t.
265 * The vfs_feature_t typedef is a 64-bit number that will translate
266 * into an element in an array of bitmaps and a bit in that element.
267 * Developers must not depend on the implementation of this and
268 * need to use vfs_has_feature()/vfs_set_feature() routines.
270 typedef uint64_t vfs_feature_t
;
272 #define VFSFT_XVATTR 0x100000001 /* Supports xvattr for attrs */
273 #define VFSFT_CASEINSENSITIVE 0x100000002 /* Supports case-insensitive */
274 #define VFSFT_NOCASESENSITIVE 0x100000004 /* NOT case-sensitive */
275 #define VFSFT_DIRENTFLAGS 0x100000008 /* Supports dirent flags */
276 #define VFSFT_ACLONCREATE 0x100000010 /* Supports ACL on create */
277 #define VFSFT_ACEMASKONACCESS 0x100000020 /* Can use ACEMASK for access */
278 #define VFSFT_SYSATTR_VIEWS 0x100000040 /* Supports sysattr view i/f */
279 #define VFSFT_ACCESS_FILTER 0x100000080 /* dirents filtered by access */
280 #define VFSFT_REPARSE 0x100000100 /* Supports reparse point */
281 #define VFSFT_ZEROCOPY_SUPPORTED 0x100000200
282 /* Support loaning /returning cache buffer */
284 * Argument structure for mount(2).
286 * Flags are defined in <sys/mount.h>.
288 * Note that if the MS_SYSSPACE bit is set in flags, the pointer fields in
289 * this structure are to be interpreted as kernel addresses. File systems
290 * should be prepared for this possibility.
304 * Reasons for calling the vfs_mountroot() operation.
306 enum whymountroot
{ ROOT_INIT
, ROOT_REMOUNT
, ROOT_UNMOUNT
};
307 typedef enum whymountroot whymountroot_t
;
310 * Reasons for calling the VFS_VNSTATE():
318 typedef enum vntrans vntrans_t
;
321 * Operations supported on virtual file system.
324 int (*vfs_mount
)(struct vfs
*, struct vnode
*, struct mounta
*, cred_t
*);
325 int (*vfs_unmount
)(struct vfs
*, int, cred_t
*);
326 int (*vfs_root
)(struct vfs
*, struct vnode
**);
327 int (*vfs_statvfs
)(struct vfs
*, statvfs64_t
*);
328 int (*vfs_sync
)(struct vfs
*, short, cred_t
*);
329 int (*vfs_vget
)(struct vfs
*, struct vnode
**, fid_t
*);
330 int (*vfs_mountroot
)(struct vfs
*, enum whymountroot
);
331 void (*vfs_freevfs
)(struct vfs
*);
332 int (*vfs_vnstate
)(struct vfs
*, struct vnode
*, vntrans_t
);
335 extern int fsop_mount(vfs_t
*, vnode_t
*, struct mounta
*, cred_t
*);
336 extern int fsop_unmount(vfs_t
*, int, cred_t
*);
337 extern int fsop_root(vfs_t
*, vnode_t
**);
338 extern int fsop_statfs(vfs_t
*, statvfs64_t
*);
339 extern int fsop_sync(vfs_t
*, short, cred_t
*);
340 extern int fsop_vget(vfs_t
*, vnode_t
**, fid_t
*);
341 extern int fsop_mountroot(vfs_t
*, enum whymountroot
);
342 extern void fsop_freefs(vfs_t
*);
343 extern int fsop_sync_by_kind(int, short, cred_t
*);
344 extern int fsop_vnstate(vfs_t
*, vnode_t
*, vntrans_t
);
346 #define VFS_MOUNT(vfsp, mvp, uap, cr) fsop_mount(vfsp, mvp, uap, cr)
347 #define VFS_UNMOUNT(vfsp, flag, cr) fsop_unmount(vfsp, flag, cr)
348 #define VFS_ROOT(vfsp, vpp) fsop_root(vfsp, vpp)
349 #define VFS_STATVFS(vfsp, sp) fsop_statfs(vfsp, sp)
350 #define VFS_SYNC(vfsp, flag, cr) fsop_sync(vfsp, flag, cr)
351 #define VFS_VGET(vfsp, vpp, fidp) fsop_vget(vfsp, vpp, fidp)
352 #define VFS_MOUNTROOT(vfsp, init) fsop_mountroot(vfsp, init)
353 #define VFS_FREEVFS(vfsp) fsop_freefs(vfsp)
354 #define VFS_VNSTATE(vfsp, vn, ns) fsop_vnstate(vfsp, vn, ns)
356 #define VFSNAME_MOUNT "mount"
357 #define VFSNAME_UNMOUNT "unmount"
358 #define VFSNAME_ROOT "root"
359 #define VFSNAME_STATVFS "statvfs"
360 #define VFSNAME_SYNC "sync"
361 #define VFSNAME_VGET "vget"
362 #define VFSNAME_MOUNTROOT "mountroot"
363 #define VFSNAME_FREEVFS "freevfs"
364 #define VFSNAME_VNSTATE "vnstate"
366 * Filesystem type switch table.
369 typedef struct vfssw
{
370 char *vsw_name
; /* type name -- max len _ST_FSTYPSZ */
371 int (*vsw_init
) (int, char *);
372 /* init routine (for non-loadable fs only) */
373 int vsw_flag
; /* flags */
374 mntopts_t vsw_optproto
; /* mount options table prototype */
375 uint_t vsw_count
; /* count of references */
376 kmutex_t vsw_lock
; /* lock to protect vsw_count */
377 struct vfsops vsw_vfsops
; /* filesystem operations vector */
381 * Filesystem type definition record. All file systems must export a record
382 * of this type through their modlfs structure. N.B., changing the version
383 * number requires a change in sys/modctl.h.
386 typedef struct vfsdef_v5
{
387 int def_version
; /* structure version, must be first */
388 char *name
; /* filesystem type name */
389 int (*init
) (int, char *); /* init routine */
390 int flags
; /* filesystem flags */
391 mntopts_t
*optproto
; /* mount options table prototype */
394 typedef struct vfsdef_v5 vfsdef_t
;
401 * flags for vfssw and vfsdef
403 #define VSW_HASPROTO 0x01 /* struct has a mount options prototype */
404 #define VSW_CANRWRO 0x02 /* file system can transition from rw to ro */
405 #define VSW_CANREMOUNT 0x04 /* file system supports remounts */
406 #define VSW_NOTZONESAFE 0x08 /* zone_enter(2) should fail for these files */
407 #define VSW_VOLATILEDEV 0x10 /* vfs_dev can change each time fs is mounted */
408 #define VSW_STATS 0x20 /* file system can collect stats */
409 #define VSW_XID 0x40 /* file system supports extended ids */
410 #define VSW_CANLOFI 0x80 /* file system supports lofi mounts */
411 #define VSW_ZMOUNT 0x100 /* file system always allowed in a zone */
412 #define VSW_MOUNTDEV 0x200 /* file system is mounted via device path */
414 #define VSW_INSTALLED 0x8000 /* this vsw is associated with a file system */
417 * A flag for vfs_setpath().
419 #define VFSSP_VERBATIM 0x1 /* do not prefix the supplied path */
424 * Private vfs data, NOT to be used by a file system implementation.
427 #define VFS_FEATURE_MAXSZ 4
429 typedef struct vfs_impl
{
430 /* Counted array - Bitmap of vfs features */
431 uint32_t vi_featureset
[VFS_FEATURE_MAXSZ
];
433 * Support for statistics on the vnode operations
435 vsk_anchor_t
*vi_vskap
; /* anchor for vopstats' kstat */
436 vopstats_t
*vi_fstypevsp
; /* ptr to per-fstype vopstats */
437 vopstats_t vi_vopstats
; /* per-mount vnode op stats */
439 timespec_t vi_hrctime
; /* High-res creation time */
441 zone_ref_t vi_zone_ref
; /* reference to zone */
451 int vfs_setfsops(int, const struct vfsops
*);
452 int vfs_freevfsops_by_type(int);
453 void vfs_setops(struct vfs
*, const struct vfsops
*);
454 const struct vfsops
*vfs_getops(struct vfs
*);
455 int vfs_matchops(struct vfs
*, const struct vfsops
*);
456 int vfs_can_sync(vfs_t
*vfsp
);
457 vfs_t
*vfs_alloc(int);
458 void vfs_free(vfs_t
*);
459 void vfs_init(struct vfs
*vfsp
, const struct vfsops
*, void *);
460 void vfsimpl_setup(vfs_t
*vfsp
);
461 void vfsimpl_teardown(vfs_t
*vfsp
);
462 void vn_exists(vnode_t
*);
463 void vn_idle(vnode_t
*);
464 void vn_reclaim(vnode_t
*);
465 void vn_invalid(vnode_t
*);
468 int domount(char *, struct mounta
*, vnode_t
*, struct cred
*,
470 int dounmount(struct vfs
*, int, cred_t
*);
471 int vfs_lock(struct vfs
*);
472 int vfs_rlock(struct vfs
*);
473 void vfs_lock_wait(struct vfs
*);
474 void vfs_rlock_wait(struct vfs
*);
475 void vfs_unlock(struct vfs
*);
476 int vfs_lock_held(struct vfs
*);
477 struct _kthread
*vfs_lock_owner(struct vfs
*);
480 void vfs_mountroot(void);
481 void vfs_add(vnode_t
*, struct vfs
*, int);
482 void vfs_remove(struct vfs
*);
484 /* VFS feature routines */
485 void vfs_set_feature(vfs_t
*, vfs_feature_t
);
486 void vfs_clear_feature(vfs_t
*, vfs_feature_t
);
487 int vfs_has_feature(vfs_t
*, vfs_feature_t
);
488 void vfs_propagate_features(vfs_t
*, vfs_t
*);
490 /* The following functions are not for general use by filesystems */
492 void vfs_createopttbl(mntopts_t
*, const char *);
493 void vfs_copyopttbl(const mntopts_t
*, mntopts_t
*);
494 void vfs_mergeopttbl(const mntopts_t
*, const mntopts_t
*, mntopts_t
*);
495 void vfs_freeopttbl(mntopts_t
*);
496 void vfs_parsemntopts(mntopts_t
*, char *, int);
497 int vfs_buildoptionstr(const mntopts_t
*, char *, int);
498 struct mntopt
*vfs_hasopt(const mntopts_t
*, const char *);
499 void vfs_mnttab_modtimeupd(void);
501 void vfs_clearmntopt(struct vfs
*, const char *);
502 void vfs_setmntopt(struct vfs
*, const char *, const char *, int);
503 void vfs_setresource(struct vfs
*, const char *, uint32_t);
504 void vfs_setmntpoint(struct vfs
*, const char *, uint32_t);
505 refstr_t
*vfs_getresource(const struct vfs
*);
506 refstr_t
*vfs_getmntpoint(const struct vfs
*);
507 int vfs_optionisset(const struct vfs
*, const char *, char **);
508 int vfs_settag(uint_t
, uint_t
, const char *, const char *, cred_t
*);
509 int vfs_clrtag(uint_t
, uint_t
, const char *, const char *, cred_t
*);
510 void vfs_syncall(void);
512 void vfs_unmountall(void);
513 void vfs_make_fsid(fsid_t
*, dev_t
, int);
514 void vfs_addmip(dev_t
, struct vfs
*);
515 void vfs_delmip(struct vfs
*);
516 int vfs_devismounted(dev_t
);
517 int vfs_devmounting(dev_t
, struct vfs
*);
518 int vfs_opsinuse(const struct vfsops
*);
519 struct vfs
*getvfs(fsid_t
*);
520 struct vfs
*vfs_dev2vfsp(dev_t
);
521 struct vfs
*vfs_mntpoint2vfsp(const char *);
522 struct vfssw
*allocate_vfssw(const char *);
523 struct vfssw
*vfs_getvfssw(const char *);
524 struct vfssw
*vfs_getvfsswbyname(const char *);
525 struct vfssw
*vfs_getvfsswbyvfsops(const struct vfsops
*);
526 void vfs_refvfssw(struct vfssw
*);
527 void vfs_unrefvfssw(struct vfssw
*);
528 uint_t
vf_to_stf(uint_t
);
529 void vfs_mnttab_modtime(timespec_t
*);
530 void vfs_mnttab_poll(timespec_t
*, struct pollhead
**);
532 void vfs_list_lock(void);
533 void vfs_list_read_lock(void);
534 void vfs_list_unlock(void);
535 void vfs_list_add(struct vfs
*);
536 void vfs_list_remove(struct vfs
*);
537 void vfs_hold(vfs_t
*vfsp
);
538 void vfs_rele(vfs_t
*vfsp
);
539 void fs_freevfs(vfs_t
*);
540 void vfs_root_redev(vfs_t
*vfsp
, dev_t ndev
, int fstype
);
542 int vfs_zone_change_safe(vfs_t
*);
544 int vfs_get_lofi(vfs_t
*, vnode_t
**);
546 #define VFSHASH(maj, min) (((int)((maj)+(min))) & (vfshsz - 1))
547 #define VFS_ON_LIST(vfsp) \
548 ((vfsp)->vfs_next != (vfsp) && (vfsp)->vfs_next != NULL)
554 extern struct vfssw vfssw
[]; /* table of filesystem types */
555 extern krwlock_t vfssw_lock
;
556 extern char rootfstype
[]; /* name of root fstype */
557 extern const int nfstype
; /* # of elements in vfssw array */
558 extern const struct vfsops EIO_vfsops
; /* operations for vfs being torn-down */
561 * The following variables are private to the the kernel's vfs layer. File
562 * system implementations should not access them.
564 extern struct vfs
*rootvfs
; /* ptr to root vfs structure */
566 struct vfs
*rvfs_head
; /* head vfs in chain */
567 kmutex_t rvfs_lock
; /* mutex protecting this chain */
568 uint32_t rvfs_len
; /* length of this chain */
570 extern rvfs_t
*rvfs_list
;
571 extern int vfshsz
; /* # of elements in rvfs_head array */
572 extern const mntopts_t vfs_mntopts
; /* globally recognized options */
574 #endif /* defined(_KERNEL) */
576 #define VFS_HOLD(vfsp) { \
580 #define VFS_RELE(vfsp) { \
584 #define VFS_INIT(vfsp, op, data) { \
585 vfs_init((vfsp), (op), (data)); \
589 #define VFS_INSTALLED(vfsswp) (((vfsswp)->vsw_flag & VSW_INSTALLED) != 0)
590 #define ALLOCATED_VFSSW(vswp) ((vswp)->vsw_name[0] != '\0')
591 #define RLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_READER))
592 #define RUNLOCK_VFSSW() (rw_exit(&vfssw_lock))
593 #define WLOCK_VFSSW() (rw_enter(&vfssw_lock, RW_WRITER))
594 #define WUNLOCK_VFSSW() (rw_exit(&vfssw_lock))
595 #define VFSSW_LOCKED() (RW_LOCK_HELD(&vfssw_lock))
596 #define VFSSW_WRITE_LOCKED() (RW_WRITE_HELD(&vfssw_lock))
600 #define SYNC_ATTR 0x01 /* sync attributes only */
601 #define SYNC_CLOSE 0x02 /* close open file */
602 #define SYNC_ALL 0x04 /* force to sync all fs */
608 #endif /* _SYS_VFS_H */