4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2017, Joyent, Inc.
25 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
26 * Copyright 2017 RackTop Systems.
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
45 #include <sys/types.h>
46 #include <sys/t_lock.h>
47 #include <sys/rwstlock.h>
48 #include <sys/time_impl.h>
51 #include <sys/resource.h>
52 #include <vm/seg_enum.h>
53 #include <sys/kstat.h>
66 * Statistics for all vnode operations.
67 * All operations record number of ops (since boot/mount/zero'ed).
68 * Certain I/O operations (read, write, readdir) also record number
69 * of bytes transferred.
70 * This appears in two places in the system: one is embedded in each
71 * vfs_t. There is also an array of vopstats_t structures allocated
72 * on a per-fstype basis.
75 #define VOPSTATS_STR "vopstats_" /* Initial string for vopstat kstats */
77 typedef struct vopstats
{
78 kstat_named_t nopen
; /* VOP_OPEN */
79 kstat_named_t nclose
; /* VOP_CLOSE */
80 kstat_named_t nread
; /* VOP_READ */
81 kstat_named_t read_bytes
;
82 kstat_named_t nwrite
; /* VOP_WRITE */
83 kstat_named_t write_bytes
;
84 kstat_named_t nioctl
; /* VOP_IOCTL */
85 kstat_named_t nsetfl
; /* VOP_SETFL */
86 kstat_named_t ngetattr
; /* VOP_GETATTR */
87 kstat_named_t nsetattr
; /* VOP_SETATTR */
88 kstat_named_t naccess
; /* VOP_ACCESS */
89 kstat_named_t nlookup
; /* VOP_LOOKUP */
90 kstat_named_t ncreate
; /* VOP_CREATE */
91 kstat_named_t nremove
; /* VOP_REMOVE */
92 kstat_named_t nlink
; /* VOP_LINK */
93 kstat_named_t nrename
; /* VOP_RENAME */
94 kstat_named_t nmkdir
; /* VOP_MKDIR */
95 kstat_named_t nrmdir
; /* VOP_RMDIR */
96 kstat_named_t nreaddir
; /* VOP_READDIR */
97 kstat_named_t readdir_bytes
;
98 kstat_named_t nsymlink
; /* VOP_SYMLINK */
99 kstat_named_t nreadlink
; /* VOP_READLINK */
100 kstat_named_t nfsync
; /* VOP_FSYNC */
101 kstat_named_t ninactive
; /* VOP_INACTIVE */
102 kstat_named_t nfid
; /* VOP_FID */
103 kstat_named_t nrwlock
; /* VOP_RWLOCK */
104 kstat_named_t nrwunlock
; /* VOP_RWUNLOCK */
105 kstat_named_t nseek
; /* VOP_SEEK */
106 kstat_named_t ncmp
; /* VOP_CMP */
107 kstat_named_t nfrlock
; /* VOP_FRLOCK */
108 kstat_named_t nspace
; /* VOP_SPACE */
109 kstat_named_t nrealvp
; /* VOP_REALVP */
110 kstat_named_t ngetpage
; /* VOP_GETPAGE */
111 kstat_named_t nputpage
; /* VOP_PUTPAGE */
112 kstat_named_t nmap
; /* VOP_MAP */
113 kstat_named_t naddmap
; /* VOP_ADDMAP */
114 kstat_named_t ndelmap
; /* VOP_DELMAP */
115 kstat_named_t npoll
; /* VOP_POLL */
116 kstat_named_t ndump
; /* VOP_DUMP */
117 kstat_named_t npathconf
; /* VOP_PATHCONF */
118 kstat_named_t npageio
; /* VOP_PAGEIO */
119 kstat_named_t ndumpctl
; /* VOP_DUMPCTL */
120 kstat_named_t ndispose
; /* VOP_DISPOSE */
121 kstat_named_t nsetsecattr
; /* VOP_SETSECATTR */
122 kstat_named_t ngetsecattr
; /* VOP_GETSECATTR */
123 kstat_named_t nshrlock
; /* VOP_SHRLOCK */
124 kstat_named_t nvnevent
; /* VOP_VNEVENT */
125 kstat_named_t nreqzcbuf
; /* VOP_REQZCBUF */
126 kstat_named_t nretzcbuf
; /* VOP_RETZCBUF */
130 * The vnode is the focus of all file activity in UNIX.
131 * A vnode is allocated for each active file, each current
132 * directory, each mounted-on file, and the root.
134 * Each vnode is usually associated with a file-system-specific node (for
135 * UFS, this is the in-memory inode). Generally, a vnode and an fs-node
136 * should be created and destroyed together as a pair.
138 * If a vnode is reused for a new file, it should be reinitialized by calling
139 * either vn_reinit() or vn_recycle().
141 * vn_reinit() resets the entire vnode as if it was returned by vn_alloc().
142 * The caller is responsible for setting up the entire vnode after calling
143 * vn_reinit(). This is important when using kmem caching where the vnode is
144 * allocated by a constructor, for instance.
146 * vn_recycle() is used when the file system keeps some state around in both
147 * the vnode and the associated FS-node. In UFS, for example, the inode of
148 * a deleted file can be reused immediately. The v_data, v_vfsp, v_op, etc.
149 * remains the same but certain fields related to the previous instance need
150 * to be reset. In particular:
154 * v_mmap_read, v_mmap_write
158 * vnode types. VNON means no type. These values are unrelated to
159 * values in on-disk inodes.
177 * VSD - Vnode Specific Data
178 * Used to associate additional private data with a vnode.
181 list_node_t vs_nodes
; /* list of all VSD nodes */
182 uint_t vs_nkeys
; /* entries in value array */
183 void **vs_value
; /* array of value/key */
187 * Many of the fields in the vnode are read-only once they are initialized
188 * at vnode creation time. Other fields are protected by locks.
190 * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc(). They
191 * may not be embedded into the file-system specific node (inode). The
192 * size of vnodes may change.
194 * The v_lock protects:
203 * A special lock (implemented by vn_vfswlock in vnode.c) protects:
206 * The global flock_lock mutex (in flock.c) protects:
211 * The following vnode fields are considered public and may safely be
212 * accessed by file systems or other consumers:
223 * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD.
224 * In particular, file systems should not access other fields; they may
225 * change or even be removed. The functionality which was once provided
226 * by these fields is available through vn_* functions.
229 * In each vnode, the v_path field holds a cached version of the canonical
230 * filesystem path which that node represents. Because vnodes lack contextual
231 * information about their own name or position in the VFS hierarchy, this path
232 * must be calculated when the vnode is instantiated by operations such as
233 * fop_create, fop_lookup, or fop_mkdir. During said operations, both the
234 * parent vnode (and its cached v_path) and future name are known, so the
235 * v_path of the resulting object can easily be set.
237 * The caching nature of v_path is complicated in the face of directory
238 * renames. Filesystem drivers are responsible for calling vn_renamepath when
239 * a fop_rename operation succeeds. While the v_path on the renamed vnode will
240 * be updated, existing children of the directory (direct, or at deeper levels)
241 * will now possess v_path caches which are stale.
243 * It is expensive (and for non-directories, impossible) to recalculate stale
244 * v_path entries during operations such as vnodetopath. The best time during
245 * which to correct such wrongs is the same as when v_path is first
246 * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
247 * context is available to generate the current path.
249 * In order to quickly detect stale v_path entries (without full lookup
250 * verification) to trigger a v_path update, the v_path_stamp field has been
251 * added to vnode_t. As part of successful fop_create/fop_lookup/fop_mkdir
252 * operations, where the name and parent vnode are available, the following
253 * rules are used to determine updates to the child:
255 * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
256 * on the child. Until the parent v_path is refreshed to a valid state, the
257 * child v_path must be considered invalid too.
259 * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
260 * v_path_stamp value from its parent and its v_path is updated.
262 * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
263 * an indication that the child v_path is stale. The v_path is updated and
264 * v_path_stamp in the child is set to the current hrtime().
266 * It does _not_ inherit the parent v_path_stamp in order to propagate the
267 * the time of v_path invalidation through the directory structure. This
268 * prevents concurrent invalidations (operating with a now-incorrect v_path)
269 * at deeper levels in the tree from persisting.
271 * 4. If the child v_path_stamp is greater or equal to the parent, no action
274 * Note that fop_rename operations do not follow this ruleset. They perform an
275 * explicit update of v_path and v_path_stamp (setting it to the current time)
277 * With these constraints in place, v_path invalidations and updates should
278 * proceed in a timely manner as vnodes are accessed. While there still are
279 * limited cases where vnodetopath operations will fail, the risk is minimized.
282 struct fem_head
; /* from fem.h */
284 typedef struct vnode
{
285 kmutex_t v_lock
; /* protects vnode fields */
286 uint_t v_flag
; /* vnode flags (see below) */
287 uint_t v_count
; /* reference count */
288 void *v_data
; /* private data for fs */
289 struct vfs
*v_vfsp
; /* ptr to containing VFS */
290 struct stdata
*v_stream
; /* associated stream */
291 enum vtype v_type
; /* vnode type */
292 dev_t v_rdev
; /* device (VCHR, VBLK) */
294 /* PRIVATE FIELDS BELOW - DO NOT USE */
296 struct vfs
*v_vfsmountedhere
; /* ptr to vfs mounted here */
297 struct vnodeops
*v_op
; /* vnode operations */
298 struct page
*v_pages
; /* vnode pages list */
299 struct filock
*v_filocks
; /* ptr to filock list */
300 struct shrlocklist
*v_shrlocks
; /* ptr to shrlock list */
301 krwlock_t v_nbllock
; /* sync for NBMAND locks */
302 kcondvar_t v_cv
; /* synchronize locking */
303 void *v_locality
; /* hook for locality info */
304 struct fem_head
*v_femhead
; /* fs monitoring */
305 char *v_path
; /* cached path */
306 hrtime_t v_path_stamp
; /* timestamp for cached path */
307 uint_t v_rdcnt
; /* open for read count (VREG only) */
308 uint_t v_wrcnt
; /* open for write count (VREG only) */
309 u_longlong_t v_mmap_read
; /* mmap read count */
310 u_longlong_t v_mmap_write
; /* mmap write count */
311 void *v_mpssdata
; /* info for large page mappings */
312 void *v_fopdata
; /* list of file ops event watches */
313 kmutex_t v_vsd_lock
; /* protects v_vsd field */
314 struct vsd_node
*v_vsd
; /* vnode specific data */
315 struct vnode
*v_xattrdir
; /* unnamed extended attr dir (GFS) */
316 uint_t v_count_dnlc
; /* dnlc reference count */
319 #define IS_DEVVP(vp) \
320 ((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
322 #define VNODE_ALIGN 64
323 /* Count of low-order 0 bits in a vnode *, based on size and alignment. */
325 #define VNODE_ALIGN_LOG2 8
327 #define VNODE_ALIGN_LOG2 7
333 #define VROOT 0x01 /* root of its file system */
334 #define VNOCACHE 0x02 /* don't keep cache pages on vnode */
335 #define VNOMAP 0x04 /* file cannot be mapped/faulted */
336 #define VDUP 0x08 /* file should be dup'ed rather then opened */
337 #define VNOSWAP 0x10 /* file cannot be used as virtual swap device */
338 #define VNOMOUNT 0x20 /* file cannot be covered by mount */
339 #define VISSWAP 0x40 /* vnode is being used for swap */
340 #define VSWAPLIKE 0x80 /* vnode acts like swap (but may not be) */
342 #define IS_SWAPVP(vp) (((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0)
345 typedef struct vn_vfslocks_entry
{
348 struct vn_vfslocks_entry
*ve_next
;
350 char pad
[64 - sizeof (rwstlock_t
) - 2 * sizeof (void *) - \
352 } vn_vfslocks_entry_t
;
356 * The following two flags are used to lock the v_vfsmountedhere field
358 #define VVFSLOCK 0x100
359 #define VVFSWAIT 0x200
362 * Used to serialize VM operations on a vnode
364 #define VVMLOCK 0x400
367 * Tell vn_open() not to fail a directory open for writing but
368 * to go ahead and call VOP_OPEN() to let the filesystem check.
370 #define VDIROPEN 0x800
373 * Flag to let the VM system know that this file is most likely a binary
374 * or shared library since it has been mmap()ed EXEC at some time.
376 #define VVMEXEC 0x1000
378 #define VPXFS 0x2000 /* clustering: global fs proxy vnode */
380 #define IS_PXFSVP(vp) ((vp)->v_flag & VPXFS)
382 #define V_XATTRDIR 0x4000 /* attribute unnamed directory */
384 #define IS_XATTRDIR(vp) ((vp)->v_flag & V_XATTRDIR)
386 #define V_LOCALITY 0x8000 /* whether locality aware */
389 * Flag that indicates the VM should maintain the v_pages list with all modified
390 * pages on one end and unmodified pages at the other. This makes finding dirty
391 * pages to write back to disk much faster at the expense of taking a minor
392 * fault on the first store instruction which touches a writable page.
394 #define VMODSORT (0x10000)
395 #define IS_VMODSORT(vp) \
396 (pvn_vmodsort_supported != 0 && ((vp)->v_flag & VMODSORT) != 0)
398 #define VISSWAPFS 0x20000 /* vnode is being used for swapfs */
401 * The mdb memstat command assumes that IS_SWAPFSVP only uses the
402 * vnode's v_flag field. If this changes, cache the additional
403 * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
405 #define IS_SWAPFSVP(vp) (((vp)->v_flag & VISSWAPFS) != 0)
407 #define V_SYSATTR 0x40000 /* vnode is a GFS system attribute */
410 * Indication that VOP_LOOKUP operations on this vnode may yield results from a
411 * different VFS instance. The main use of this is to suppress v_path
412 * calculation logic when filesystems such as procfs emit results which defy
413 * expectations about normal VFS behavior.
415 #define VTRAVERSE 0x80000
418 * Vnode attributes. A bit-mask is supplied as part of the
419 * structure to indicate the attributes the caller wants to
420 * set (setattr) or extract (getattr).
424 * Note that va_nodeid and va_nblocks are 64bit data type.
425 * We support large files over NFSV3. With Solaris client and
426 * Server that generates 64bit ino's and sizes these fields
427 * will overflow if they are 32 bit sizes.
430 typedef struct vattr
{
431 uint_t va_mask
; /* bit-mask of attributes */
432 vtype_t va_type
; /* vnode type (for create) */
433 mode_t va_mode
; /* file access mode */
434 uid_t va_uid
; /* owner user id */
435 gid_t va_gid
; /* owner group id */
436 dev_t va_fsid
; /* file system id (dev for now) */
437 u_longlong_t va_nodeid
; /* node id */
438 nlink_t va_nlink
; /* number of references to file */
439 u_offset_t va_size
; /* file size in bytes */
440 timestruc_t va_atime
; /* time of last access */
441 timestruc_t va_mtime
; /* time of last modification */
442 timestruc_t va_ctime
; /* time of last status change */
443 dev_t va_rdev
; /* device the file represents */
444 uint_t va_blksize
; /* fundamental block size */
445 u_longlong_t va_nblocks
; /* # of blocks allocated */
446 uint_t va_seq
; /* sequence number */
449 #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
452 * Structure of all optional attributes.
454 typedef struct xoptattr
{
455 timestruc_t xoa_createtime
; /* Create time of file */
458 uint8_t xoa_readonly
;
460 uint8_t xoa_nounlink
;
461 uint8_t xoa_immutable
;
462 uint8_t xoa_appendonly
;
465 uint8_t xoa_av_quarantined
;
466 uint8_t xoa_av_modified
;
467 uint8_t xoa_av_scanstamp
[AV_SCANSTAMP_SZ
];
469 uint64_t xoa_generation
;
475 * The xvattr structure is really a variable length structure that
477 * - The classic vattr_t (xva_vattr)
478 * - a 32 bit quantity (xva_mapsize) that specifies the size of the
479 * attribute bitmaps in 32 bit words.
480 * - A pointer to the returned attribute bitmap (needed because the
481 * previous element, the requested attribute bitmap) is variable lenth.
482 * - The requested attribute bitmap, which is an array of 32 bit words.
483 * Callers use the XVA_SET_REQ() macro to set the bits corresponding to
484 * the attributes that are being requested.
485 * - The returned attribute bitmap, which is an array of 32 bit words.
486 * File systems that support optional attributes use the XVA_SET_RTN()
487 * macro to set the bits corresponding to the attributes that are being
489 * - The xoptattr_t structure which contains the attribute values
491 * xva_mapsize determines how many words in the attribute bitmaps.
492 * Immediately following the attribute bitmaps is the xoptattr_t.
493 * xva_getxoptattr() is used to get the pointer to the xoptattr_t
497 #define XVA_MAPSIZE 3 /* Size of attr bitmaps */
498 #define XVA_MAGIC 0x78766174 /* Magic # for verification */
501 * The xvattr structure is an extensible structure which permits optional
502 * attributes to be requested/returned. File systems may or may not support
503 * optional attributes. They do so at their own discretion but if they do
504 * support optional attributes, they must register the VFSFT_XVATTR feature
505 * so that the optional attributes can be set/retrived.
507 * The fields of the xvattr structure are:
509 * xva_vattr - The first element of an xvattr is a legacy vattr structure
510 * which includes the common attributes. If AT_XVATTR is set in the va_mask
511 * then the entire structure is treated as an xvattr. If AT_XVATTR is not
512 * set, then only the xva_vattr structure can be used.
514 * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
516 * xva_mapsize - Size of requested and returned attribute bitmaps.
518 * xva_rtnattrmapp - Pointer to xva_rtnattrmap[]. We need this since the
519 * size of the array before it, xva_reqattrmap[], could change which means
520 * the location of xva_rtnattrmap[] could change. This will allow unbundled
521 * file systems to find the location of xva_rtnattrmap[] when the sizes change.
523 * xva_reqattrmap[] - Array of requested attributes. Attributes are
524 * represented by a specific bit in a specific element of the attribute
525 * map array. Callers set the bits corresponding to the attributes
526 * that the caller wants to get/set.
528 * xva_rtnattrmap[] - Array of attributes that the file system was able to
529 * process. Not all file systems support all optional attributes. This map
530 * informs the caller which attributes the underlying file system was able
531 * to set/get. (Same structure as the requested attributes array in terms
532 * of each attribute corresponding to specific bits and array elements.)
534 * xva_xoptattrs - Structure containing values of optional attributes.
535 * These values are only valid if the corresponding bits in xva_reqattrmap
536 * are set and the underlying file system supports those attributes.
538 typedef struct xvattr
{
539 vattr_t xva_vattr
; /* Embedded vattr structure */
540 uint32_t xva_magic
; /* Magic Number */
541 uint32_t xva_mapsize
; /* Size of attr bitmap (32-bit words) */
542 uint32_t *xva_rtnattrmapp
; /* Ptr to xva_rtnattrmap[] */
543 uint32_t xva_reqattrmap
[XVA_MAPSIZE
]; /* Requested attrs */
544 uint32_t xva_rtnattrmap
[XVA_MAPSIZE
]; /* Returned attrs */
545 xoptattr_t xva_xoptattrs
; /* Optional attributes */
550 * For bigtypes time_t changed to 64 bit on the 64-bit kernel.
551 * Define an old version for user/kernel interface
554 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
558 typedef struct vattr32
{
559 uint32_t va_mask
; /* bit-mask of attributes */
560 vtype_t va_type
; /* vnode type (for create) */
561 mode32_t va_mode
; /* file access mode */
562 uid32_t va_uid
; /* owner user id */
563 gid32_t va_gid
; /* owner group id */
564 dev32_t va_fsid
; /* file system id (dev for now) */
565 u_longlong_t va_nodeid
; /* node id */
566 nlink_t va_nlink
; /* number of references to file */
567 u_offset_t va_size
; /* file size in bytes */
568 timestruc32_t va_atime
; /* time of last access */
569 timestruc32_t va_mtime
; /* time of last modification */
570 timestruc32_t va_ctime
; /* time of last status change */
571 dev32_t va_rdev
; /* device the file represents */
572 uint32_t va_blksize
; /* fundamental block size */
573 u_longlong_t va_nblocks
; /* # of blocks allocated */
574 uint32_t va_seq
; /* sequence number */
577 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
581 #else /* not _SYSCALL32 */
582 #define vattr32 vattr
583 typedef vattr_t vattr32_t
;
584 #endif /* _SYSCALL32 */
587 * Attributes of interest to the caller of setattr or getattr.
589 #define AT_TYPE 0x00001
590 #define AT_MODE 0x00002
591 #define AT_UID 0x00004
592 #define AT_GID 0x00008
593 #define AT_FSID 0x00010
594 #define AT_NODEID 0x00020
595 #define AT_NLINK 0x00040
596 #define AT_SIZE 0x00080
597 #define AT_ATIME 0x00100
598 #define AT_MTIME 0x00200
599 #define AT_CTIME 0x00400
600 #define AT_RDEV 0x00800
601 #define AT_BLKSIZE 0x01000
602 #define AT_NBLOCKS 0x02000
603 /* 0x04000 */ /* unused */
604 #define AT_SEQ 0x08000
606 * If AT_XVATTR is set then there are additional bits to process in
607 * the xvattr_t's attribute bitmap. If this is not set then the bitmap
608 * MUST be ignored. Note that this bit must be set/cleared explicitly.
609 * That is, setting AT_ALL will NOT set AT_XVATTR.
611 #define AT_XVATTR 0x10000
613 #define AT_ALL (AT_TYPE|AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\
614 AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\
615 AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
617 #define AT_STAT (AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\
618 AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV|AT_TYPE)
620 #define AT_TIMES (AT_ATIME|AT_MTIME|AT_CTIME)
622 #define AT_NOSET (AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\
623 AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
626 * Attribute bits used in the extensible attribute's (xva's) attribute
627 * bitmaps. Note that the bitmaps are made up of a variable length number
628 * of 32-bit words. The convention is to use XAT{n}_{attrname} where "n"
629 * is the element in the bitmap (starting at 1). This convention is for
630 * the convenience of the maintainer to keep track of which element each
631 * attribute belongs to.
633 * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY. CONSUMERS
634 * MUST USE THE XAT_* DEFINES.
636 #define XAT0_INDEX 0LL /* Index into bitmap for XAT0 attrs */
637 #define XAT0_CREATETIME 0x00000001 /* Create time of file */
638 #define XAT0_ARCHIVE 0x00000002 /* Archive */
639 #define XAT0_SYSTEM 0x00000004 /* System */
640 #define XAT0_READONLY 0x00000008 /* Readonly */
641 #define XAT0_HIDDEN 0x00000010 /* Hidden */
642 #define XAT0_NOUNLINK 0x00000020 /* Nounlink */
643 #define XAT0_IMMUTABLE 0x00000040 /* immutable */
644 #define XAT0_APPENDONLY 0x00000080 /* appendonly */
645 #define XAT0_NODUMP 0x00000100 /* nodump */
646 #define XAT0_OPAQUE 0x00000200 /* opaque */
647 #define XAT0_AV_QUARANTINED 0x00000400 /* anti-virus quarantine */
648 #define XAT0_AV_MODIFIED 0x00000800 /* anti-virus modified */
649 #define XAT0_AV_SCANSTAMP 0x00001000 /* anti-virus scanstamp */
650 #define XAT0_REPARSE 0x00002000 /* FS reparse point */
651 #define XAT0_GEN 0x00004000 /* object generation number */
652 #define XAT0_OFFLINE 0x00008000 /* offline */
653 #define XAT0_SPARSE 0x00010000 /* sparse */
655 #define XAT0_ALL_ATTRS (XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \
656 XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \
657 XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED| XAT0_AV_MODIFIED| \
658 XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE)
660 /* Support for XAT_* optional attributes */
661 #define XVA_MASK 0xffffffff /* Used to mask off 32 bits */
662 #define XVA_SHFT 32 /* Used to shift index */
665 * Used to pry out the index and attribute bits from the XAT_* attributes
666 * defined below. Note that we're masking things down to 32 bits then
667 * casting to uint32_t.
669 #define XVA_INDEX(attr) ((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
670 #define XVA_ATTRBIT(attr) ((uint32_t)((attr) & XVA_MASK))
673 * The following defines present a "flat namespace" so that consumers don't
674 * need to keep track of which element belongs to which bitmap entry.
676 * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
678 #define XAT_CREATETIME ((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
679 #define XAT_ARCHIVE ((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
680 #define XAT_SYSTEM ((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
681 #define XAT_READONLY ((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
682 #define XAT_HIDDEN ((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
683 #define XAT_NOUNLINK ((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
684 #define XAT_IMMUTABLE ((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
685 #define XAT_APPENDONLY ((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
686 #define XAT_NODUMP ((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
687 #define XAT_OPAQUE ((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
688 #define XAT_AV_QUARANTINED ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
689 #define XAT_AV_MODIFIED ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
690 #define XAT_AV_SCANSTAMP ((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
691 #define XAT_REPARSE ((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
692 #define XAT_GEN ((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
693 #define XAT_OFFLINE ((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
694 #define XAT_SPARSE ((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
697 * The returned attribute map array (xva_rtnattrmap[]) is located past the
698 * requested attribute map array (xva_reqattrmap[]). Its location changes
699 * when the array sizes change. We use a separate pointer in a known location
700 * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[]. This is
703 #define XVA_RTNATTRMAP(xvap) ((xvap)->xva_rtnattrmapp)
706 * XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap
707 * of requested attributes (xva_reqattrmap[]).
709 #define XVA_SET_REQ(xvap, attr) \
710 ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \
711 ASSERT((xvap)->xva_magic == XVA_MAGIC); \
712 (xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
714 * XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap
715 * of requested attributes (xva_reqattrmap[]).
717 #define XVA_CLR_REQ(xvap, attr) \
718 ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \
719 ASSERT((xvap)->xva_magic == XVA_MAGIC); \
720 (xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr)
723 * XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap
724 * of returned attributes (xva_rtnattrmap[]).
726 #define XVA_SET_RTN(xvap, attr) \
727 ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR); \
728 ASSERT((xvap)->xva_magic == XVA_MAGIC); \
729 (XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
732 * XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[])
733 * to see of the corresponding attribute bit is set. If so, returns non-zero.
735 #define XVA_ISSET_REQ(xvap, attr) \
736 ((((xvap)->xva_vattr.va_mask | AT_XVATTR) && \
737 ((xvap)->xva_magic == XVA_MAGIC) && \
738 ((xvap)->xva_mapsize > XVA_INDEX(attr))) ? \
739 ((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
742 * XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[])
743 * to see of the corresponding attribute bit is set. If so, returns non-zero.
745 #define XVA_ISSET_RTN(xvap, attr) \
746 ((((xvap)->xva_vattr.va_mask | AT_XVATTR) && \
747 ((xvap)->xva_magic == XVA_MAGIC) && \
748 ((xvap)->xva_mapsize > XVA_INDEX(attr))) ? \
749 ((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
752 * Modes. Some values same as S_xxx entries from stat.h for convenience.
754 #define VSUID 04000 /* set user id on execution */
755 #define VSGID 02000 /* set group id on execution */
756 #define VSVTX 01000 /* save swapped text even after use */
765 #define MODEMASK 07777 /* mode bits plus permission bits */
766 #define PERMMASK 00777 /* permission bits */
771 #define V_ACE_MASK 0x1 /* mask represents NFSv4 ACE permissions */
772 #define V_APPEND 0x2 /* want to do append only check */
775 * Check whether mandatory file locking is enabled.
778 #define MANDMODE(mode) (((mode) & (VSGID|(VEXEC>>3))) == VSGID)
779 #define MANDLOCK(vp, mode) ((vp)->v_type == VREG && MANDMODE(mode))
782 * Flags for vnode operations.
784 enum rm
{ RMFILE
, RMDIRECTORY
}; /* rm or rmdir (remove) */
785 enum symfollow
{ NO_FOLLOW
, FOLLOW
}; /* follow symlinks (or not) */
786 enum vcexcl
{ NONEXCL
, EXCL
}; /* (non)excl create */
787 enum create
{ CRCREAT
, CRMKNOD
, CRMKDIR
}; /* reason for create */
789 typedef enum rm rm_t
;
790 typedef enum symfollow symfollow_t
;
791 typedef enum vcexcl vcexcl_t
;
792 typedef enum create create_t
;
795 * Vnode Events - Used by VOP_VNEVENT
796 * The VE_PRE_RENAME_* events fire before the rename operation and are
797 * primarily used for specialized applications, such as NFSv4 delegation, which
798 * need to know about rename before it occurs.
800 typedef enum vnevent
{
801 VE_SUPPORT
= 0, /* Query */
802 VE_RENAME_SRC
= 1, /* Rename, with vnode as source */
803 VE_RENAME_DEST
= 2, /* Rename, with vnode as target/destination */
804 VE_REMOVE
= 3, /* Remove of vnode's name */
805 VE_RMDIR
= 4, /* Remove of directory vnode's name */
806 VE_CREATE
= 5, /* Create with vnode's name which exists */
807 VE_LINK
= 6, /* Link with vnode's name as source */
808 VE_RENAME_DEST_DIR
= 7, /* Rename with vnode as target dir */
809 VE_MOUNTEDOVER
= 8, /* File or Filesystem got mounted over vnode */
810 VE_TRUNCATE
= 9, /* Truncate */
811 VE_PRE_RENAME_SRC
= 10, /* Pre-rename, with vnode as source */
812 VE_PRE_RENAME_DEST
= 11, /* Pre-rename, with vnode as target/dest. */
813 VE_PRE_RENAME_DEST_DIR
= 12 /* Pre-rename with vnode as target dir */
817 * Values for checking vnode open and map counts
819 enum v_mode
{ V_READ
, V_WRITE
, V_RDORWR
, V_RDANDWR
};
821 typedef enum v_mode v_mode_t
;
827 * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
830 typedef struct vsecattr
{
831 uint_t vsa_mask
; /* See below */
832 int vsa_aclcnt
; /* ACL entry count */
833 void *vsa_aclentp
; /* pointer to ACL entries */
834 int vsa_dfaclcnt
; /* default ACL entry count */
835 void *vsa_dfaclentp
; /* pointer to default ACL entries */
836 size_t vsa_aclentsz
; /* ACE size in bytes of vsa_aclentp */
837 uint_t vsa_aclflags
; /* ACE ACL flags */
840 /* vsa_mask values */
841 #define VSA_ACL 0x0001
842 #define VSA_ACLCNT 0x0002
843 #define VSA_DFACL 0x0004
844 #define VSA_DFACLCNT 0x0008
845 #define VSA_ACE 0x0010
846 #define VSA_ACECNT 0x0020
847 #define VSA_ACE_ALLTYPES 0x0040
848 #define VSA_ACE_ACLFLAGS 0x0080 /* get/set ACE ACL flags */
851 * Structure used by various vnode operations to determine
852 * the context (pid, host, identity) of a caller.
854 * The cc_caller_id is used to identify one or more callers who invoke
855 * operations, possibly on behalf of others. For example, the NFS
856 * server could have it's own cc_caller_id which can be detected by
857 * vnode/vfs operations or (FEM) monitors on those operations. New
858 * caller IDs are generated by fs_new_caller_id().
860 typedef struct caller_context
{
861 pid_t cc_pid
; /* Process ID of the caller */
862 int cc_sysid
; /* System ID, used for remote calls */
863 u_longlong_t cc_caller_id
; /* Identifier for (set of) caller(s) */
868 * Flags for caller context. The caller sets CC_DONTBLOCK if it does not
869 * want to block inside of a FEM monitor. The monitor will set CC_WOULDBLOCK
870 * and return EAGAIN if the operation would have blocked.
872 #define CC_WOULDBLOCK 0x01
873 #define CC_DONTBLOCK 0x02
876 * Structure tags for function prototypes, defined elsewhere.
892 * VNODE_OPS defines all the vnode operations. It is used to define
893 * the vnodeops structure (below) and the fs_func_p union (vfs_opreg.h).
896 int (*vop_open)(vnode_t **, int, cred_t *, \
897 caller_context_t *); \
898 int (*vop_close)(vnode_t *, int, int, offset_t, cred_t *, \
899 caller_context_t *); \
900 int (*vop_read)(vnode_t *, uio_t *, int, cred_t *, \
901 caller_context_t *); \
902 int (*vop_write)(vnode_t *, uio_t *, int, cred_t *, \
903 caller_context_t *); \
904 int (*vop_ioctl)(vnode_t *, int, intptr_t, int, cred_t *, \
905 int *, caller_context_t *); \
906 int (*vop_setfl)(vnode_t *, int, int, cred_t *, \
907 caller_context_t *); \
908 int (*vop_getattr)(vnode_t *, vattr_t *, int, cred_t *, \
909 caller_context_t *); \
910 int (*vop_setattr)(vnode_t *, vattr_t *, int, cred_t *, \
911 caller_context_t *); \
912 int (*vop_access)(vnode_t *, int, int, cred_t *, \
913 caller_context_t *); \
914 int (*vop_lookup)(vnode_t *, char *, vnode_t **, \
916 int, vnode_t *, cred_t *, \
917 caller_context_t *, int *, \
918 struct pathname *); \
919 int (*vop_create)(vnode_t *, char *, vattr_t *, vcexcl_t, \
920 int, vnode_t **, cred_t *, int, \
921 caller_context_t *, vsecattr_t *); \
922 int (*vop_remove)(vnode_t *, char *, cred_t *, \
923 caller_context_t *, int); \
924 int (*vop_link)(vnode_t *, vnode_t *, char *, cred_t *, \
925 caller_context_t *, int); \
926 int (*vop_rename)(vnode_t *, char *, vnode_t *, char *, \
927 cred_t *, caller_context_t *, int); \
928 int (*vop_mkdir)(vnode_t *, char *, vattr_t *, vnode_t **, \
929 cred_t *, caller_context_t *, int, \
931 int (*vop_rmdir)(vnode_t *, char *, vnode_t *, cred_t *, \
932 caller_context_t *, int); \
933 int (*vop_readdir)(vnode_t *, uio_t *, cred_t *, int *, \
934 caller_context_t *, int); \
935 int (*vop_symlink)(vnode_t *, char *, vattr_t *, char *, \
936 cred_t *, caller_context_t *, int); \
937 int (*vop_readlink)(vnode_t *, uio_t *, cred_t *, \
938 caller_context_t *); \
939 int (*vop_fsync)(vnode_t *, int, cred_t *, \
940 caller_context_t *); \
941 void (*vop_inactive)(vnode_t *, cred_t *, \
942 caller_context_t *); \
943 int (*vop_fid)(vnode_t *, struct fid *, \
944 caller_context_t *); \
945 int (*vop_rwlock)(vnode_t *, int, caller_context_t *); \
946 void (*vop_rwunlock)(vnode_t *, int, caller_context_t *); \
947 int (*vop_seek)(vnode_t *, offset_t, offset_t *, \
948 caller_context_t *); \
949 int (*vop_cmp)(vnode_t *, vnode_t *, caller_context_t *); \
950 int (*vop_frlock)(vnode_t *, int, struct flock64 *, \
952 struct flk_callback *, cred_t *, \
953 caller_context_t *); \
954 int (*vop_space)(vnode_t *, int, struct flock64 *, \
956 cred_t *, caller_context_t *); \
957 int (*vop_realvp)(vnode_t *, vnode_t **, \
958 caller_context_t *); \
959 int (*vop_getpage)(vnode_t *, offset_t, size_t, uint_t *, \
960 struct page **, size_t, struct seg *, \
961 caddr_t, enum seg_rw, cred_t *, \
962 caller_context_t *); \
963 int (*vop_putpage)(vnode_t *, offset_t, size_t, \
964 int, cred_t *, caller_context_t *); \
965 int (*vop_map)(vnode_t *, offset_t, struct as *, \
967 uchar_t, uchar_t, uint_t, cred_t *, \
968 caller_context_t *); \
969 int (*vop_addmap)(vnode_t *, offset_t, struct as *, \
971 uchar_t, uchar_t, uint_t, cred_t *, \
972 caller_context_t *); \
973 int (*vop_delmap)(vnode_t *, offset_t, struct as *, \
975 uint_t, uint_t, uint_t, cred_t *, \
976 caller_context_t *); \
977 int (*vop_poll)(vnode_t *, short, int, short *, \
978 struct pollhead **, \
979 caller_context_t *); \
980 int (*vop_dump)(vnode_t *, caddr_t, offset_t, offset_t, \
981 caller_context_t *); \
982 int (*vop_pathconf)(vnode_t *, int, ulong_t *, cred_t *, \
983 caller_context_t *); \
984 int (*vop_pageio)(vnode_t *, struct page *, \
985 u_offset_t, size_t, int, cred_t *, \
986 caller_context_t *); \
987 int (*vop_dumpctl)(vnode_t *, int, offset_t *, \
988 caller_context_t *); \
989 void (*vop_dispose)(vnode_t *, struct page *, \
990 int, int, cred_t *, \
991 caller_context_t *); \
992 int (*vop_setsecattr)(vnode_t *, vsecattr_t *, \
993 int, cred_t *, caller_context_t *); \
994 int (*vop_getsecattr)(vnode_t *, vsecattr_t *, \
995 int, cred_t *, caller_context_t *); \
996 int (*vop_shrlock)(vnode_t *, int, struct shrlock *, \
997 int, cred_t *, caller_context_t *); \
998 int (*vop_vnevent)(vnode_t *, vnevent_t, vnode_t *, \
999 char *, caller_context_t *); \
1000 int (*vop_reqzcbuf)(vnode_t *, enum uio_rw, xuio_t *, \
1001 cred_t *, caller_context_t *); \
1002 int (*vop_retzcbuf)(vnode_t *, xuio_t *, cred_t *, \
1007 * Operations on vnodes. Note: File systems must never operate directly
1008 * on a 'vnodeops' structure -- it WILL change in future releases! They
1009 * must use vn_make_ops() to create the structure.
1011 typedef struct vnodeops
{
1012 const char *vnop_name
;
1013 VNODE_OPS
; /* Signatures of all vnode operations (vops) */
1016 typedef int (*fs_generic_func_p
) (); /* Generic vop/vfsop/femop/fsemop ptr */
1018 extern int fop_open(vnode_t
**, int, cred_t
*, caller_context_t
*);
1019 extern int fop_close(vnode_t
*, int, int, offset_t
, cred_t
*,
1020 caller_context_t
*);
1021 extern int fop_read(vnode_t
*, uio_t
*, int, cred_t
*, caller_context_t
*);
1022 extern int fop_write(vnode_t
*, uio_t
*, int, cred_t
*,
1023 caller_context_t
*);
1024 extern int fop_ioctl(vnode_t
*, int, intptr_t, int, cred_t
*, int *,
1025 caller_context_t
*);
1026 extern int fop_setfl(vnode_t
*, int, int, cred_t
*, caller_context_t
*);
1027 extern int fop_getattr(vnode_t
*, vattr_t
*, int, cred_t
*,
1028 caller_context_t
*);
1029 extern int fop_setattr(vnode_t
*, vattr_t
*, int, cred_t
*,
1030 caller_context_t
*);
1031 extern int fop_access(vnode_t
*, int, int, cred_t
*, caller_context_t
*);
1032 extern int fop_lookup(vnode_t
*, char *, vnode_t
**, struct pathname
*,
1033 int, vnode_t
*, cred_t
*, caller_context_t
*,
1034 int *, struct pathname
*);
1035 extern int fop_create(vnode_t
*, char *, vattr_t
*, vcexcl_t
, int,
1036 vnode_t
**, cred_t
*, int, caller_context_t
*,
1038 extern int fop_remove(vnode_t
*vp
, char *, cred_t
*, caller_context_t
*,
1040 extern int fop_link(vnode_t
*, vnode_t
*, char *, cred_t
*,
1041 caller_context_t
*, int);
1042 extern int fop_rename(vnode_t
*, char *, vnode_t
*, char *, cred_t
*,
1043 caller_context_t
*, int);
1044 extern int fop_mkdir(vnode_t
*, char *, vattr_t
*, vnode_t
**, cred_t
*,
1045 caller_context_t
*, int, vsecattr_t
*);
1046 extern int fop_rmdir(vnode_t
*, char *, vnode_t
*, cred_t
*,
1047 caller_context_t
*, int);
1048 extern int fop_readdir(vnode_t
*, uio_t
*, cred_t
*, int *,
1049 caller_context_t
*, int);
1050 extern int fop_symlink(vnode_t
*, char *, vattr_t
*, char *, cred_t
*,
1051 caller_context_t
*, int);
1052 extern int fop_readlink(vnode_t
*, uio_t
*, cred_t
*, caller_context_t
*);
1053 extern int fop_fsync(vnode_t
*, int, cred_t
*, caller_context_t
*);
1054 extern void fop_inactive(vnode_t
*, cred_t
*, caller_context_t
*);
1055 extern int fop_fid(vnode_t
*, struct fid
*, caller_context_t
*);
1056 extern int fop_rwlock(vnode_t
*, int, caller_context_t
*);
1057 extern void fop_rwunlock(vnode_t
*, int, caller_context_t
*);
1058 extern int fop_seek(vnode_t
*, offset_t
, offset_t
*, caller_context_t
*);
1059 extern int fop_cmp(vnode_t
*, vnode_t
*, caller_context_t
*);
1060 extern int fop_frlock(vnode_t
*, int, struct flock64
*, int, offset_t
,
1061 struct flk_callback
*, cred_t
*,
1062 caller_context_t
*);
1063 extern int fop_space(vnode_t
*, int, struct flock64
*, int, offset_t
,
1064 cred_t
*, caller_context_t
*);
1065 extern int fop_realvp(vnode_t
*, vnode_t
**, caller_context_t
*);
1066 extern int fop_getpage(vnode_t
*, offset_t
, size_t, uint_t
*,
1067 struct page
**, size_t, struct seg
*,
1068 caddr_t
, enum seg_rw
, cred_t
*,
1069 caller_context_t
*);
1070 extern int fop_putpage(vnode_t
*, offset_t
, size_t, int, cred_t
*,
1071 caller_context_t
*);
1072 extern int fop_map(vnode_t
*, offset_t
, struct as
*, caddr_t
*, size_t,
1073 uchar_t
, uchar_t
, uint_t
, cred_t
*cr
,
1074 caller_context_t
*);
1075 extern int fop_addmap(vnode_t
*, offset_t
, struct as
*, caddr_t
, size_t,
1076 uchar_t
, uchar_t
, uint_t
, cred_t
*,
1077 caller_context_t
*);
1078 extern int fop_delmap(vnode_t
*, offset_t
, struct as
*, caddr_t
, size_t,
1079 uint_t
, uint_t
, uint_t
, cred_t
*,
1080 caller_context_t
*);
1081 extern int fop_poll(vnode_t
*, short, int, short *, struct pollhead
**,
1082 caller_context_t
*);
1083 extern int fop_dump(vnode_t
*, caddr_t
, offset_t
, offset_t
,
1084 caller_context_t
*);
1085 extern int fop_pathconf(vnode_t
*, int, ulong_t
*, cred_t
*,
1086 caller_context_t
*);
1087 extern int fop_pageio(vnode_t
*, struct page
*, u_offset_t
, size_t, int,
1088 cred_t
*, caller_context_t
*);
1089 extern int fop_dumpctl(vnode_t
*, int, offset_t
*, caller_context_t
*);
1090 extern void fop_dispose(vnode_t
*, struct page
*, int, int, cred_t
*,
1091 caller_context_t
*);
1092 extern int fop_setsecattr(vnode_t
*, vsecattr_t
*, int, cred_t
*,
1093 caller_context_t
*);
1094 extern int fop_getsecattr(vnode_t
*, vsecattr_t
*, int, cred_t
*,
1095 caller_context_t
*);
1096 extern int fop_shrlock(vnode_t
*, int, struct shrlock
*, int, cred_t
*,
1097 caller_context_t
*);
1098 extern int fop_vnevent(vnode_t
*, vnevent_t
, vnode_t
*, char *,
1099 caller_context_t
*);
1100 extern int fop_reqzcbuf(vnode_t
*, enum uio_rw
, xuio_t
*, cred_t
*,
1101 caller_context_t
*);
1102 extern int fop_retzcbuf(vnode_t
*, xuio_t
*, cred_t
*, caller_context_t
*);
1104 #endif /* _KERNEL */
1106 #define VOP_OPEN(vpp, mode, cr, ct) \
1107 fop_open(vpp, mode, cr, ct)
1108 #define VOP_CLOSE(vp, f, c, o, cr, ct) \
1109 fop_close(vp, f, c, o, cr, ct)
1110 #define VOP_READ(vp, uiop, iof, cr, ct) \
1111 fop_read(vp, uiop, iof, cr, ct)
1112 #define VOP_WRITE(vp, uiop, iof, cr, ct) \
1113 fop_write(vp, uiop, iof, cr, ct)
1114 #define VOP_IOCTL(vp, cmd, a, f, cr, rvp, ct) \
1115 fop_ioctl(vp, cmd, a, f, cr, rvp, ct)
1116 #define VOP_SETFL(vp, f, a, cr, ct) \
1117 fop_setfl(vp, f, a, cr, ct)
1118 #define VOP_GETATTR(vp, vap, f, cr, ct) \
1119 fop_getattr(vp, vap, f, cr, ct)
1120 #define VOP_SETATTR(vp, vap, f, cr, ct) \
1121 fop_setattr(vp, vap, f, cr, ct)
1122 #define VOP_ACCESS(vp, mode, f, cr, ct) \
1123 fop_access(vp, mode, f, cr, ct)
1124 #define VOP_LOOKUP(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp) \
1125 fop_lookup(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp)
1126 #define VOP_CREATE(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap) \
1127 fop_create(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap)
1128 #define VOP_REMOVE(dvp, p, cr, ct, f) \
1129 fop_remove(dvp, p, cr, ct, f)
1130 #define VOP_LINK(tdvp, fvp, p, cr, ct, f) \
1131 fop_link(tdvp, fvp, p, cr, ct, f)
1132 #define VOP_RENAME(fvp, fnm, tdvp, tnm, cr, ct, f) \
1133 fop_rename(fvp, fnm, tdvp, tnm, cr, ct, f)
1134 #define VOP_MKDIR(dp, p, vap, vpp, cr, ct, f, vsap) \
1135 fop_mkdir(dp, p, vap, vpp, cr, ct, f, vsap)
1136 #define VOP_RMDIR(dp, p, cdir, cr, ct, f) \
1137 fop_rmdir(dp, p, cdir, cr, ct, f)
1138 #define VOP_READDIR(vp, uiop, cr, eofp, ct, f) \
1139 fop_readdir(vp, uiop, cr, eofp, ct, f)
1140 #define VOP_SYMLINK(dvp, lnm, vap, tnm, cr, ct, f) \
1141 fop_symlink(dvp, lnm, vap, tnm, cr, ct, f)
1142 #define VOP_READLINK(vp, uiop, cr, ct) \
1143 fop_readlink(vp, uiop, cr, ct)
1144 #define VOP_FSYNC(vp, syncflag, cr, ct) \
1145 fop_fsync(vp, syncflag, cr, ct)
1146 #define VOP_INACTIVE(vp, cr, ct) \
1147 fop_inactive(vp, cr, ct)
1148 #define VOP_FID(vp, fidp, ct) \
1149 fop_fid(vp, fidp, ct)
1150 #define VOP_RWLOCK(vp, w, ct) \
1151 fop_rwlock(vp, w, ct)
1152 #define VOP_RWUNLOCK(vp, w, ct) \
1153 fop_rwunlock(vp, w, ct)
1154 #define VOP_SEEK(vp, ooff, noffp, ct) \
1155 fop_seek(vp, ooff, noffp, ct)
1156 #define VOP_CMP(vp1, vp2, ct) \
1157 fop_cmp(vp1, vp2, ct)
1158 #define VOP_FRLOCK(vp, cmd, a, f, o, cb, cr, ct) \
1159 fop_frlock(vp, cmd, a, f, o, cb, cr, ct)
1160 #define VOP_SPACE(vp, cmd, a, f, o, cr, ct) \
1161 fop_space(vp, cmd, a, f, o, cr, ct)
1162 #define VOP_REALVP(vp1, vp2, ct) \
1163 fop_realvp(vp1, vp2, ct)
1164 #define VOP_GETPAGE(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct) \
1165 fop_getpage(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct)
1166 #define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) \
1167 fop_putpage(vp, of, sz, fl, cr, ct)
1168 #define VOP_MAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
1169 fop_map(vp, of, as, a, sz, p, mp, fl, cr, ct)
1170 #define VOP_ADDMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
1171 fop_addmap(vp, of, as, a, sz, p, mp, fl, cr, ct)
1172 #define VOP_DELMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
1173 fop_delmap(vp, of, as, a, sz, p, mp, fl, cr, ct)
1174 #define VOP_POLL(vp, events, anyyet, reventsp, phpp, ct) \
1175 fop_poll(vp, events, anyyet, reventsp, phpp, ct)
1176 #define VOP_DUMP(vp, addr, bn, count, ct) \
1177 fop_dump(vp, addr, bn, count, ct)
1178 #define VOP_PATHCONF(vp, cmd, valp, cr, ct) \
1179 fop_pathconf(vp, cmd, valp, cr, ct)
1180 #define VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct) \
1181 fop_pageio(vp, pp, io_off, io_len, flags, cr, ct)
1182 #define VOP_DUMPCTL(vp, action, blkp, ct) \
1183 fop_dumpctl(vp, action, blkp, ct)
1184 #define VOP_DISPOSE(vp, pp, flag, dn, cr, ct) \
1185 fop_dispose(vp, pp, flag, dn, cr, ct)
1186 #define VOP_GETSECATTR(vp, vsap, f, cr, ct) \
1187 fop_getsecattr(vp, vsap, f, cr, ct)
1188 #define VOP_SETSECATTR(vp, vsap, f, cr, ct) \
1189 fop_setsecattr(vp, vsap, f, cr, ct)
1190 #define VOP_SHRLOCK(vp, cmd, shr, f, cr, ct) \
1191 fop_shrlock(vp, cmd, shr, f, cr, ct)
1192 #define VOP_VNEVENT(vp, vnevent, dvp, fnm, ct) \
1193 fop_vnevent(vp, vnevent, dvp, fnm, ct)
1194 #define VOP_REQZCBUF(vp, rwflag, xuiop, cr, ct) \
1195 fop_reqzcbuf(vp, rwflag, xuiop, cr, ct)
1196 #define VOP_RETZCBUF(vp, xuiop, cr, ct) \
1197 fop_retzcbuf(vp, xuiop, cr, ct)
1199 #define VOPNAME_OPEN "open"
1200 #define VOPNAME_CLOSE "close"
1201 #define VOPNAME_READ "read"
1202 #define VOPNAME_WRITE "write"
1203 #define VOPNAME_IOCTL "ioctl"
1204 #define VOPNAME_SETFL "setfl"
1205 #define VOPNAME_GETATTR "getattr"
1206 #define VOPNAME_SETATTR "setattr"
1207 #define VOPNAME_ACCESS "access"
1208 #define VOPNAME_LOOKUP "lookup"
1209 #define VOPNAME_CREATE "create"
1210 #define VOPNAME_REMOVE "remove"
1211 #define VOPNAME_LINK "link"
1212 #define VOPNAME_RENAME "rename"
1213 #define VOPNAME_MKDIR "mkdir"
1214 #define VOPNAME_RMDIR "rmdir"
1215 #define VOPNAME_READDIR "readdir"
1216 #define VOPNAME_SYMLINK "symlink"
1217 #define VOPNAME_READLINK "readlink"
1218 #define VOPNAME_FSYNC "fsync"
1219 #define VOPNAME_INACTIVE "inactive"
1220 #define VOPNAME_FID "fid"
1221 #define VOPNAME_RWLOCK "rwlock"
1222 #define VOPNAME_RWUNLOCK "rwunlock"
1223 #define VOPNAME_SEEK "seek"
1224 #define VOPNAME_CMP "cmp"
1225 #define VOPNAME_FRLOCK "frlock"
1226 #define VOPNAME_SPACE "space"
1227 #define VOPNAME_REALVP "realvp"
1228 #define VOPNAME_GETPAGE "getpage"
1229 #define VOPNAME_PUTPAGE "putpage"
1230 #define VOPNAME_MAP "map"
1231 #define VOPNAME_ADDMAP "addmap"
1232 #define VOPNAME_DELMAP "delmap"
1233 #define VOPNAME_POLL "poll"
1234 #define VOPNAME_DUMP "dump"
1235 #define VOPNAME_PATHCONF "pathconf"
1236 #define VOPNAME_PAGEIO "pageio"
1237 #define VOPNAME_DUMPCTL "dumpctl"
1238 #define VOPNAME_DISPOSE "dispose"
1239 #define VOPNAME_GETSECATTR "getsecattr"
1240 #define VOPNAME_SETSECATTR "setsecattr"
1241 #define VOPNAME_SHRLOCK "shrlock"
1242 #define VOPNAME_VNEVENT "vnevent"
1243 #define VOPNAME_REQZCBUF "reqzcbuf"
1244 #define VOPNAME_RETZCBUF "retzcbuf"
1247 * Flags for VOP_LOOKUP
1249 * Defined in file.h, but also possible, FIGNORECASE and FSEARCH
1252 #define LOOKUP_DIR 0x01 /* want parent dir vp */
1253 #define LOOKUP_XATTR 0x02 /* lookup up extended attr dir */
1254 #define CREATE_XATTR_DIR 0x04 /* Create extended attr dir */
1255 #define LOOKUP_HAVE_SYSATTR_DIR 0x08 /* Already created virtual GFS dir */
1258 * Flags for VOP_READDIR
1260 #define V_RDDIR_ENTFLAGS 0x01 /* request dirent flags */
1261 #define V_RDDIR_ACCFILTER 0x02 /* filter out inaccessible dirents */
1264 * Flags for VOP_RWLOCK/VOP_RWUNLOCK
1265 * VOP_RWLOCK will return the flag that was actually set, or -1 if none.
1267 #define V_WRITELOCK_TRUE (1) /* Request write-lock on the vnode */
1268 #define V_WRITELOCK_FALSE (0) /* Request read-lock on the vnode */
1271 * Flags for VOP_DUMPCTL
1273 #define DUMP_ALLOC 0
1278 * Public vnode manipulation functions.
1282 vnode_t
*vn_alloc(int);
1283 void vn_reinit(vnode_t
*);
1284 void vn_recycle(vnode_t
*);
1285 void vn_free(vnode_t
*);
1287 int vn_is_readonly(vnode_t
*);
1288 int vn_is_opened(vnode_t
*, v_mode_t
);
1289 int vn_is_mapped(vnode_t
*, v_mode_t
);
1290 int vn_has_other_opens(vnode_t
*, v_mode_t
);
1291 void vn_open_upgrade(vnode_t
*, int);
1292 void vn_open_downgrade(vnode_t
*, int);
1294 int vn_can_change_zones(vnode_t
*vp
);
1296 int vn_has_flocks(vnode_t
*);
1297 int vn_has_mandatory_locks(vnode_t
*, int);
1298 int vn_has_cached_data(vnode_t
*);
1300 void vn_setops(vnode_t
*, vnodeops_t
*);
1301 vnodeops_t
*vn_getops(vnode_t
*);
1302 int vn_matchops(vnode_t
*, vnodeops_t
*);
1303 int vn_matchopval(vnode_t
*, char *, fs_generic_func_p
);
1304 int vn_ismntpt(vnode_t
*);
1306 struct vfs
*vn_mountedvfs(vnode_t
*);
1308 int vn_in_dnlc(vnode_t
*);
1310 void vn_create_cache(void);
1311 void vn_destroy_cache(void);
1313 void vn_freevnodeops(vnodeops_t
*);
1315 int vn_open(char *pnamep
, enum uio_seg seg
, int filemode
, int createmode
,
1316 struct vnode
**vpp
, enum create crwhy
, mode_t umask
);
1317 int vn_openat(char *pnamep
, enum uio_seg seg
, int filemode
, int createmode
,
1318 struct vnode
**vpp
, enum create crwhy
,
1319 mode_t umask
, struct vnode
*startvp
, int fd
);
1320 int vn_create(char *pnamep
, enum uio_seg seg
, struct vattr
*vap
,
1321 enum vcexcl excl
, int mode
, struct vnode
**vpp
,
1322 enum create why
, int flag
, mode_t umask
);
1323 int vn_createat(char *pnamep
, enum uio_seg seg
, struct vattr
*vap
,
1324 enum vcexcl excl
, int mode
, struct vnode
**vpp
,
1325 enum create why
, int flag
, mode_t umask
, struct vnode
*startvp
);
1326 int vn_rdwr(enum uio_rw rw
, struct vnode
*vp
, caddr_t base
, ssize_t len
,
1327 offset_t offset
, enum uio_seg seg
, int ioflag
, rlim64_t ulimit
,
1328 cred_t
*cr
, ssize_t
*residp
);
1329 void vn_rele(struct vnode
*vp
);
1330 void vn_rele_async(struct vnode
*vp
, struct taskq
*taskq
);
1331 void vn_rele_dnlc(struct vnode
*vp
);
1332 void vn_rele_stream(struct vnode
*vp
);
1333 int vn_link(char *from
, char *to
, enum uio_seg seg
);
1334 int vn_linkat(vnode_t
*fstartvp
, char *from
, enum symfollow follow
,
1335 vnode_t
*tstartvp
, char *to
, enum uio_seg seg
);
1336 int vn_rename(char *from
, char *to
, enum uio_seg seg
);
1337 int vn_renameat(vnode_t
*fdvp
, char *fname
, vnode_t
*tdvp
, char *tname
,
1339 int vn_remove(char *fnamep
, enum uio_seg seg
, enum rm dirflag
);
1340 int vn_removeat(vnode_t
*startvp
, char *fnamep
, enum uio_seg seg
,
1342 int vn_compare(vnode_t
*vp1
, vnode_t
*vp2
);
1343 int vn_vfswlock(struct vnode
*vp
);
1344 int vn_vfswlock_wait(struct vnode
*vp
);
1345 int vn_vfsrlock(struct vnode
*vp
);
1346 int vn_vfsrlock_wait(struct vnode
*vp
);
1347 void vn_vfsunlock(struct vnode
*vp
);
1348 int vn_vfswlock_held(struct vnode
*vp
);
1349 vnode_t
*specvp(struct vnode
*vp
, dev_t dev
, vtype_t type
, struct cred
*cr
);
1350 vnode_t
*makespecvp(dev_t dev
, vtype_t type
);
1351 vn_vfslocks_entry_t
*vn_vfslocks_getlock(void *);
1352 void vn_vfslocks_rele(vn_vfslocks_entry_t
*);
1353 boolean_t
vn_is_reparse(vnode_t
*, cred_t
*, caller_context_t
*);
1355 void vn_copypath(struct vnode
*src
, struct vnode
*dst
);
1356 void vn_setpath_str(struct vnode
*vp
, const char *str
, size_t len
);
1357 void vn_setpath(vnode_t
*rootvp
, struct vnode
*startvp
, struct vnode
*vp
,
1358 const char *path
, size_t plen
);
1359 void vn_renamepath(vnode_t
*dvp
, vnode_t
*vp
, const char *nm
, size_t len
);
1361 /* Private vnode manipulation functions */
1362 void vn_clearpath(vnode_t
*, hrtime_t
);
1363 void vn_updatepath(vnode_t
*, vnode_t
*, const char *);
1366 /* Vnode event notification */
1367 void vnevent_rename_src(vnode_t
*, vnode_t
*, char *, caller_context_t
*);
1368 void vnevent_rename_dest(vnode_t
*, vnode_t
*, char *, caller_context_t
*);
1369 void vnevent_remove(vnode_t
*, vnode_t
*, char *, caller_context_t
*);
1370 void vnevent_rmdir(vnode_t
*, vnode_t
*, char *, caller_context_t
*);
1371 void vnevent_create(vnode_t
*, caller_context_t
*);
1372 void vnevent_link(vnode_t
*, caller_context_t
*);
1373 void vnevent_rename_dest_dir(vnode_t
*, caller_context_t
*ct
);
1374 void vnevent_mountedover(vnode_t
*, caller_context_t
*);
1375 void vnevent_truncate(vnode_t
*, caller_context_t
*);
1376 int vnevent_support(vnode_t
*, caller_context_t
*);
1377 void vnevent_pre_rename_src(vnode_t
*, vnode_t
*, char *,
1378 caller_context_t
*);
1379 void vnevent_pre_rename_dest(vnode_t
*, vnode_t
*, char *,
1380 caller_context_t
*);
1381 void vnevent_pre_rename_dest_dir(vnode_t
*, vnode_t
*, char *,
1382 caller_context_t
*);
1384 /* Vnode specific data */
1385 void vsd_create(uint_t
*, void (*)(void *));
1386 void vsd_destroy(uint_t
*);
1387 void *vsd_get(vnode_t
*, uint_t
);
1388 int vsd_set(vnode_t
*, uint_t
, void *);
1389 void vsd_free(vnode_t
*);
1392 * Extensible vnode attribute (xva) routines:
1393 * xva_init() initializes an xvattr_t (zero struct, init mapsize, set AT_XATTR)
1394 * xva_getxoptattr() returns a ponter to the xoptattr_t section of xvattr_t
1396 void xva_init(xvattr_t
*);
1397 xoptattr_t
*xva_getxoptattr(xvattr_t
*); /* Get ptr to xoptattr_t */
1399 void xattr_init(void); /* Initialize vnodeops for xattrs */
1401 /* GFS tunnel for xattrs */
1402 int xattr_dir_lookup(vnode_t
*, vnode_t
**, int, cred_t
*);
1405 void reparse_point_init(void);
1407 /* Context identification */
1408 u_longlong_t
fs_new_caller_id();
1410 int vn_vmpss_usepageio(vnode_t
*);
1412 /* Empty v_path placeholder */
1413 extern char *vn_vpath_empty
;
1416 * Needed for use of IS_VMODSORT() in kernel.
1418 extern uint_t pvn_vmodsort_supported
;
1421 * All changes to v_count should be done through VN_HOLD() or VN_RELE(), or
1422 * one of their variants. This makes it possible to ensure proper locking,
1423 * and to guarantee that all modifications are accompanied by a firing of
1424 * the vn-hold or vn-rele SDT DTrace probe.
1426 * Example DTrace command for tracing vnode references using these probes:
1428 * dtrace -q -n 'sdt:::vn-hold,sdt:::vn-rele
1430 * this->vp = (vnode_t *)arg0;
1431 * printf("%s %s(%p[%s]) %d\n", execname, probename, this->vp,
1432 * this->vp->v_path == NULL ? "NULL" : stringof(this->vp->v_path),
1433 * this->vp->v_count)
1436 #define VN_HOLD_LOCKED(vp) { \
1437 ASSERT(mutex_owned(&(vp)->v_lock)); \
1439 DTRACE_PROBE1(vn__hold, vnode_t *, vp); \
1442 #define VN_HOLD(vp) { \
1443 mutex_enter(&(vp)->v_lock); \
1444 VN_HOLD_LOCKED(vp); \
1445 mutex_exit(&(vp)->v_lock); \
1448 #define VN_RELE(vp) { \
1452 #define VN_RELE_ASYNC(vp, taskq) { \
1453 vn_rele_async(vp, taskq); \
1456 #define VN_RELE_LOCKED(vp) { \
1457 ASSERT(mutex_owned(&(vp)->v_lock)); \
1458 ASSERT((vp)->v_count >= 1); \
1460 DTRACE_PROBE1(vn__rele, vnode_t *, vp); \
1463 #define VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev) { \
1464 (vp)->v_vfsp = (vfsp); \
1465 (vp)->v_type = (type); \
1466 (vp)->v_rdev = (dev); \
1470 * Compare two vnodes for equality. In general this macro should be used
1471 * in preference to calling VOP_CMP directly.
1473 #define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \
1474 ((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \
1475 VOP_CMP(VP1, VP2, NULL) : 0))
1478 * Some well-known global vnodes used by the VM system to name pages.
1480 extern struct vnode kvps
[];
1483 KV_KVP
, /* vnode for all segkmem pages */
1484 KV_ZVP
, /* vnode for all ZFS pages */
1485 #if defined(__sparc)
1486 KV_MPVP
, /* vnode for all page_t meta-pages */
1487 KV_PROMVP
, /* vnode for all PROM pages */
1488 #endif /* __sparc */
1489 KV_MAX
/* total number of vnodes in kvps[] */
1492 #define VN_ISKAS(vp) ((vp) >= &kvps[0] && (vp) < &kvps[KV_MAX])
1494 #endif /* _KERNEL */
1497 * Flags to VOP_SETATTR/VOP_GETATTR.
1499 #define ATTR_UTIME 0x01 /* non-default utime(2) request */
1500 #define ATTR_EXEC 0x02 /* invocation from exec(2) */
1501 #define ATTR_COMM 0x04 /* yield common vp attributes */
1502 #define ATTR_HINT 0x08 /* information returned will be `hint' */
1503 #define ATTR_REAL 0x10 /* yield attributes of the real vp */
1504 #define ATTR_NOACLCHECK 0x20 /* Don't check ACL when checking permissions */
1505 #define ATTR_TRIGGER 0x40 /* Mount first if vnode is a trigger mount */
1507 * Generally useful macros.
1509 #define VBSIZE(vp) ((vp)->v_vfsp->vfs_bsize)
1511 #define VTOZONE(vp) ((vp)->v_vfsp->vfs_zone)
1513 #define NULLVP ((struct vnode *)0)
1514 #define NULLVPP ((struct vnode **)0)
1519 * Structure used while handling asynchronous VOP_PUTPAGE operations.
1522 struct async_reqs
*a_next
; /* pointer to next arg struct */
1523 struct vnode
*a_vp
; /* vnode pointer */
1524 u_offset_t a_off
; /* offset in file */
1525 uint_t a_len
; /* size of i/o request */
1526 int a_flags
; /* flags to indicate operation type */
1527 struct cred
*a_cred
; /* cred pointer */
1528 ushort_t a_prealloced
; /* set if struct is pre-allocated */
1532 * VN_DISPOSE() -- given a page pointer, safely invoke VOP_DISPOSE().
1533 * Note that there is no guarantee that the page passed in will be
1534 * freed. If that is required, then a check after calling VN_DISPOSE would
1535 * be necessary to ensure the page was freed.
1537 #define VN_DISPOSE(pp, flag, dn, cr) { \
1538 if ((pp)->p_vnode != NULL && !VN_ISKAS((pp)->p_vnode)) \
1539 VOP_DISPOSE((pp)->p_vnode, (pp), (flag), (dn), (cr), NULL); \
1540 else if ((flag) == B_FREE) \
1541 page_free((pp), (dn)); \
1543 page_destroy((pp), (dn)); \
1546 #endif /* _KERNEL */
1552 #endif /* _SYS_VNODE_H */