4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/t_lock.h>
45 #include <sys/errno.h>
50 #include <sys/pathname.h>
52 #include <sys/vfs_opreg.h>
53 #include <sys/vnode.h>
54 #include <sys/vnode_dispatch.h>
55 #include <sys/rwstlock.h>
60 #include <sys/sysmacros.h>
61 #include <sys/cmn_err.h>
62 #include <sys/systm.h>
64 #include <sys/debug.h>
67 #include <sys/nbmlock.h>
68 #include <sys/fcntl.h>
69 #include <sys/fs_subr.h>
70 #include <sys/taskq.h>
71 #include <sys/fs_reparse.h>
73 /* Determine if this vnode is a file that is read-only */
74 #define ISROFILE(vp) \
75 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
76 (vp)->v_type != VFIFO && vn_is_readonly(vp))
78 /* Tunable via /etc/system; used only by admin/install */
79 int nfs_global_client_only
;
82 * Array of vopstats_t for per-FS-type vopstats. This array has the same
83 * number of entries as and parallel to the vfssw table. (Arguably, it could
84 * be part of the vfssw table.) Once it's initialized, it's accessed using
85 * the same fstype index that is used to index into the vfssw table.
87 vopstats_t
**vopstats_fstype
;
89 /* vopstats initialization template used for fast initialization via bcopy() */
90 static vopstats_t
*vs_templatep
;
92 /* Kmem cache handle for vsk_anchor_t allocations */
93 kmem_cache_t
*vsk_anchor_cache
;
95 /* file events cleanup routine */
96 extern void free_fopdata(vnode_t
*);
99 * Root of AVL tree for the kstats associated with vopstats. Lock protects
100 * updates to vsktat_tree.
102 avl_tree_t vskstat_tree
;
103 kmutex_t vskstat_tree_lock
;
105 /* Global variable which enables/disables the vopstats collection */
106 int vopstats_enabled
= 1;
109 * forward declarations for internal vnode specific data (vsd)
111 static void *vsd_realloc(void *, size_t, size_t);
114 * forward declarations for reparse point functions
116 static int fs_reparse_mark(char *target
, vattr_t
*vap
, xvattr_t
*xvattr
);
119 * VSD -- VNODE SPECIFIC DATA
120 * The v_data pointer is typically used by a file system to store a
121 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
122 * However, there are times when additional project private data needs
123 * to be stored separately from the data (node) pointed to by v_data.
124 * This additional data could be stored by the file system itself or
125 * by a completely different kernel entity. VSD provides a way for
126 * callers to obtain a key and store a pointer to private data associated
129 * Callers are responsible for protecting the vsd by holding v_vsd_lock
130 * for calls to vsd_set() and vsd_get().
135 * vsd_nkeys - creation and deletion of vsd keys
136 * vsd_list - insertion and deletion of vsd_node in the vsd_list
137 * vsd_destructor - adding and removing destructors to the list
139 static kmutex_t vsd_lock
;
140 static uint_t vsd_nkeys
; /* size of destructor array */
141 /* list of vsd_node's */
142 static list_t
*vsd_list
= NULL
;
143 /* per-key destructor funcs */
144 static void (**vsd_destructor
)(void *);
147 * The following is the common set of actions needed to update the
148 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
149 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
150 * recording of the bytes transferred. Since the code is similar
151 * but small, it is nearly a duplicate. Consequently any changes
152 * to one may need to be reflected in the other.
153 * Rundown of the variables:
154 * vp - Pointer to the vnode
155 * counter - Partial name structure member to update in vopstats for counts
156 * bytecounter - Partial name structure member to update in vopstats for bytes
157 * bytesval - Value to update in vopstats for bytes
158 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
159 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
162 #define VOPSTATS_UPDATE(vp, counter) { \
163 vfs_t *vfsp = (vp)->v_vfsp; \
164 if (vfsp && vfsp->vfs_implp && \
165 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
166 vopstats_t *vsp = &vfsp->vfs_vopstats; \
167 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
168 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
169 size_t, uint64_t *); \
170 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
172 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
173 vsp->n##counter.value.ui64++; \
178 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
179 vfs_t *vfsp = (vp)->v_vfsp; \
180 if (vfsp && vfsp->vfs_implp && \
181 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
182 vopstats_t *vsp = &vfsp->vfs_vopstats; \
183 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
184 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
185 size_t, uint64_t *); \
186 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
188 vsp->bytecounter.value.ui64 += bytesval; \
189 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
190 vsp->n##counter.value.ui64++; \
191 vsp->bytecounter.value.ui64 += bytesval; \
197 * If the filesystem does not support XIDs map credential
198 * If the vfsp is NULL, perhaps we should also map?
200 #define VOPXID_MAP_CR(vp, cr) { \
201 vfs_t *vfsp = (vp)->v_vfsp; \
202 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
203 cr = crgetmapped(cr); \
207 * Convert stat(2) formats to vnode types and vice versa. (Knows about
208 * numerical order of S_IFMT and vnode types.)
210 enum vtype iftovt_tab
[] = {
211 VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
,
212 VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VNON
215 ushort_t vttoif_tab
[] = {
216 0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
, S_IFIFO
,
217 S_IFDOOR
, 0, S_IFSOCK
, S_IFPORT
, 0
221 * The system vnode cache.
224 kmem_cache_t
*vn_cache
;
228 * Vnode operations vector.
231 static const fs_operation_trans_def_t vn_ops_table
[] = {
232 VOPNAME_OPEN
, offsetof(struct vnodeops
, vop_open
),
235 VOPNAME_CLOSE
, offsetof(struct vnodeops
, vop_close
),
238 VOPNAME_READ
, offsetof(struct vnodeops
, vop_read
),
241 VOPNAME_WRITE
, offsetof(struct vnodeops
, vop_write
),
244 VOPNAME_IOCTL
, offsetof(struct vnodeops
, vop_ioctl
),
247 VOPNAME_SETFL
, offsetof(struct vnodeops
, vop_setfl
),
250 VOPNAME_GETATTR
, offsetof(struct vnodeops
, vop_getattr
),
253 VOPNAME_SETATTR
, offsetof(struct vnodeops
, vop_setattr
),
256 VOPNAME_ACCESS
, offsetof(struct vnodeops
, vop_access
),
259 VOPNAME_LOOKUP
, offsetof(struct vnodeops
, vop_lookup
),
262 VOPNAME_CREATE
, offsetof(struct vnodeops
, vop_create
),
265 VOPNAME_REMOVE
, offsetof(struct vnodeops
, vop_remove
),
268 VOPNAME_LINK
, offsetof(struct vnodeops
, vop_link
),
271 VOPNAME_RENAME
, offsetof(struct vnodeops
, vop_rename
),
274 VOPNAME_MKDIR
, offsetof(struct vnodeops
, vop_mkdir
),
277 VOPNAME_RMDIR
, offsetof(struct vnodeops
, vop_rmdir
),
280 VOPNAME_READDIR
, offsetof(struct vnodeops
, vop_readdir
),
283 VOPNAME_SYMLINK
, offsetof(struct vnodeops
, vop_symlink
),
286 VOPNAME_READLINK
, offsetof(struct vnodeops
, vop_readlink
),
289 VOPNAME_FSYNC
, offsetof(struct vnodeops
, vop_fsync
),
292 VOPNAME_INACTIVE
, offsetof(struct vnodeops
, vop_inactive
),
295 VOPNAME_FID
, offsetof(struct vnodeops
, vop_fid
),
298 VOPNAME_RWLOCK
, offsetof(struct vnodeops
, vop_rwlock
),
301 VOPNAME_RWUNLOCK
, offsetof(struct vnodeops
, vop_rwunlock
),
302 (fs_generic_func_p
) fs_rwunlock
,
304 VOPNAME_SEEK
, offsetof(struct vnodeops
, vop_seek
),
307 VOPNAME_CMP
, offsetof(struct vnodeops
, vop_cmp
),
310 VOPNAME_FRLOCK
, offsetof(struct vnodeops
, vop_frlock
),
313 VOPNAME_SPACE
, offsetof(struct vnodeops
, vop_space
),
316 VOPNAME_REALVP
, offsetof(struct vnodeops
, vop_realvp
),
319 VOPNAME_GETPAGE
, offsetof(struct vnodeops
, vop_getpage
),
322 VOPNAME_PUTPAGE
, offsetof(struct vnodeops
, vop_putpage
),
325 VOPNAME_MAP
, offsetof(struct vnodeops
, vop_map
),
326 (fs_generic_func_p
) fs_nosys_map
,
328 VOPNAME_ADDMAP
, offsetof(struct vnodeops
, vop_addmap
),
329 (fs_generic_func_p
) fs_nosys_addmap
,
331 VOPNAME_DELMAP
, offsetof(struct vnodeops
, vop_delmap
),
334 VOPNAME_POLL
, offsetof(struct vnodeops
, vop_poll
),
335 (fs_generic_func_p
) fs_poll
,
337 VOPNAME_DUMP
, offsetof(struct vnodeops
, vop_dump
),
340 VOPNAME_PATHCONF
, offsetof(struct vnodeops
, vop_pathconf
),
343 VOPNAME_PAGEIO
, offsetof(struct vnodeops
, vop_pageio
),
346 VOPNAME_DUMPCTL
, offsetof(struct vnodeops
, vop_dumpctl
),
349 VOPNAME_DISPOSE
, offsetof(struct vnodeops
, vop_dispose
),
350 (fs_generic_func_p
) fs_dispose
,
352 VOPNAME_SETSECATTR
, offsetof(struct vnodeops
, vop_setsecattr
),
355 VOPNAME_GETSECATTR
, offsetof(struct vnodeops
, vop_getsecattr
),
358 VOPNAME_SHRLOCK
, offsetof(struct vnodeops
, vop_shrlock
),
361 VOPNAME_VNEVENT
, offsetof(struct vnodeops
, vop_vnevent
),
362 (fs_generic_func_p
) fs_vnevent_nosupport
,
364 VOPNAME_REQZCBUF
, offsetof(struct vnodeops
, vop_reqzcbuf
),
367 VOPNAME_RETZCBUF
, offsetof(struct vnodeops
, vop_retzcbuf
),
373 /* Extensible attribute (xva) routines. */
376 * Zero out the structure, set the size of the requested/returned bitmaps,
377 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
378 * to the returned attributes array.
381 xva_init(xvattr_t
*xvap
)
383 bzero(xvap
, sizeof (xvattr_t
));
384 xvap
->xva_mapsize
= XVA_MAPSIZE
;
385 xvap
->xva_magic
= XVA_MAGIC
;
386 xvap
->xva_vattr
.va_mask
= AT_XVATTR
;
387 xvap
->xva_rtnattrmapp
= &(xvap
->xva_rtnattrmap
)[0];
391 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
392 * structure. Otherwise, returns NULL.
395 xva_getxoptattr(xvattr_t
*xvap
)
397 xoptattr_t
*xoap
= NULL
;
398 if (xvap
->xva_vattr
.va_mask
& AT_XVATTR
)
399 xoap
= &xvap
->xva_xoptattrs
;
404 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
405 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
409 vska_compar(const void *n1
, const void *n2
)
412 ulong_t p1
= ((vsk_anchor_t
*)n1
)->vsk_fsid
;
413 ulong_t p2
= ((vsk_anchor_t
*)n2
)->vsk_fsid
;
417 } else if (p1
> p2
) {
427 * Used to create a single template which will be bcopy()ed to a newly
428 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
431 create_vopstats_template()
435 vsp
= kmem_alloc(sizeof (vopstats_t
), KM_SLEEP
);
436 bzero(vsp
, sizeof (*vsp
)); /* Start fresh */
439 kstat_named_init(&vsp
->nopen
, "nopen", KSTAT_DATA_UINT64
);
441 kstat_named_init(&vsp
->nclose
, "nclose", KSTAT_DATA_UINT64
);
443 kstat_named_init(&vsp
->nread
, "nread", KSTAT_DATA_UINT64
);
444 kstat_named_init(&vsp
->read_bytes
, "read_bytes", KSTAT_DATA_UINT64
);
446 kstat_named_init(&vsp
->nwrite
, "nwrite", KSTAT_DATA_UINT64
);
447 kstat_named_init(&vsp
->write_bytes
, "write_bytes", KSTAT_DATA_UINT64
);
449 kstat_named_init(&vsp
->nioctl
, "nioctl", KSTAT_DATA_UINT64
);
451 kstat_named_init(&vsp
->nsetfl
, "nsetfl", KSTAT_DATA_UINT64
);
453 kstat_named_init(&vsp
->ngetattr
, "ngetattr", KSTAT_DATA_UINT64
);
455 kstat_named_init(&vsp
->nsetattr
, "nsetattr", KSTAT_DATA_UINT64
);
457 kstat_named_init(&vsp
->naccess
, "naccess", KSTAT_DATA_UINT64
);
459 kstat_named_init(&vsp
->nlookup
, "nlookup", KSTAT_DATA_UINT64
);
461 kstat_named_init(&vsp
->ncreate
, "ncreate", KSTAT_DATA_UINT64
);
463 kstat_named_init(&vsp
->nremove
, "nremove", KSTAT_DATA_UINT64
);
465 kstat_named_init(&vsp
->nlink
, "nlink", KSTAT_DATA_UINT64
);
467 kstat_named_init(&vsp
->nrename
, "nrename", KSTAT_DATA_UINT64
);
469 kstat_named_init(&vsp
->nmkdir
, "nmkdir", KSTAT_DATA_UINT64
);
471 kstat_named_init(&vsp
->nrmdir
, "nrmdir", KSTAT_DATA_UINT64
);
472 /* fop_readdir I/O */
473 kstat_named_init(&vsp
->nreaddir
, "nreaddir", KSTAT_DATA_UINT64
);
474 kstat_named_init(&vsp
->readdir_bytes
, "readdir_bytes",
477 kstat_named_init(&vsp
->nsymlink
, "nsymlink", KSTAT_DATA_UINT64
);
479 kstat_named_init(&vsp
->nreadlink
, "nreadlink", KSTAT_DATA_UINT64
);
481 kstat_named_init(&vsp
->nfsync
, "nfsync", KSTAT_DATA_UINT64
);
483 kstat_named_init(&vsp
->ninactive
, "ninactive", KSTAT_DATA_UINT64
);
485 kstat_named_init(&vsp
->nfid
, "nfid", KSTAT_DATA_UINT64
);
487 kstat_named_init(&vsp
->nrwlock
, "nrwlock", KSTAT_DATA_UINT64
);
489 kstat_named_init(&vsp
->nrwunlock
, "nrwunlock", KSTAT_DATA_UINT64
);
491 kstat_named_init(&vsp
->nseek
, "nseek", KSTAT_DATA_UINT64
);
493 kstat_named_init(&vsp
->ncmp
, "ncmp", KSTAT_DATA_UINT64
);
495 kstat_named_init(&vsp
->nfrlock
, "nfrlock", KSTAT_DATA_UINT64
);
497 kstat_named_init(&vsp
->nspace
, "nspace", KSTAT_DATA_UINT64
);
499 kstat_named_init(&vsp
->nrealvp
, "nrealvp", KSTAT_DATA_UINT64
);
501 kstat_named_init(&vsp
->ngetpage
, "ngetpage", KSTAT_DATA_UINT64
);
503 kstat_named_init(&vsp
->nputpage
, "nputpage", KSTAT_DATA_UINT64
);
505 kstat_named_init(&vsp
->nmap
, "nmap", KSTAT_DATA_UINT64
);
507 kstat_named_init(&vsp
->naddmap
, "naddmap", KSTAT_DATA_UINT64
);
509 kstat_named_init(&vsp
->ndelmap
, "ndelmap", KSTAT_DATA_UINT64
);
511 kstat_named_init(&vsp
->npoll
, "npoll", KSTAT_DATA_UINT64
);
513 kstat_named_init(&vsp
->ndump
, "ndump", KSTAT_DATA_UINT64
);
515 kstat_named_init(&vsp
->npathconf
, "npathconf", KSTAT_DATA_UINT64
);
517 kstat_named_init(&vsp
->npageio
, "npageio", KSTAT_DATA_UINT64
);
519 kstat_named_init(&vsp
->ndumpctl
, "ndumpctl", KSTAT_DATA_UINT64
);
521 kstat_named_init(&vsp
->ndispose
, "ndispose", KSTAT_DATA_UINT64
);
523 kstat_named_init(&vsp
->nsetsecattr
, "nsetsecattr", KSTAT_DATA_UINT64
);
525 kstat_named_init(&vsp
->ngetsecattr
, "ngetsecattr", KSTAT_DATA_UINT64
);
527 kstat_named_init(&vsp
->nshrlock
, "nshrlock", KSTAT_DATA_UINT64
);
529 kstat_named_init(&vsp
->nvnevent
, "nvnevent", KSTAT_DATA_UINT64
);
531 kstat_named_init(&vsp
->nreqzcbuf
, "nreqzcbuf", KSTAT_DATA_UINT64
);
533 kstat_named_init(&vsp
->nretzcbuf
, "nretzcbuf", KSTAT_DATA_UINT64
);
539 * Creates a kstat structure associated with a vopstats structure.
542 new_vskstat(char *ksname
, vopstats_t
*vsp
)
546 if (!vopstats_enabled
) {
550 ksp
= kstat_create("unix", 0, ksname
, "misc", KSTAT_TYPE_NAMED
,
551 sizeof (vopstats_t
)/sizeof (kstat_named_t
),
552 KSTAT_FLAG_VIRTUAL
|KSTAT_FLAG_WRITABLE
);
562 * Called from vfsinit() to initialize the support mechanisms for vopstats
567 if (!vopstats_enabled
)
571 * Creates the AVL tree which holds per-vfs vopstat anchors. This
572 * is necessary since we need to check if a kstat exists before we
573 * attempt to create it. Also, initialize its lock.
575 avl_create(&vskstat_tree
, vska_compar
, sizeof (vsk_anchor_t
),
576 offsetof(vsk_anchor_t
, vsk_node
));
577 mutex_init(&vskstat_tree_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
579 vsk_anchor_cache
= kmem_cache_create("vsk_anchor_cache",
580 sizeof (vsk_anchor_t
), sizeof (uintptr_t), NULL
, NULL
, NULL
,
584 * Set up the array of pointers for the vopstats-by-FS-type.
585 * The entries will be allocated/initialized as each file system
586 * goes through modload/mod_installfs.
588 vopstats_fstype
= (vopstats_t
**)kmem_zalloc(
589 (sizeof (vopstats_t
*) * nfstype
), KM_SLEEP
);
591 /* Set up the global vopstats initialization template */
592 vs_templatep
= create_vopstats_template();
596 * We need to have the all of the counters zeroed.
597 * The initialization of the vopstats_t includes on the order of
598 * 50 calls to kstat_named_init(). Rather that do that on every call,
599 * we do it once in a template (vs_templatep) then bcopy it over.
602 initialize_vopstats(vopstats_t
*vsp
)
607 bcopy(vs_templatep
, vsp
, sizeof (vopstats_t
));
611 * If possible, determine which vopstats by fstype to use and
612 * return a pointer to the caller.
615 get_fstype_vopstats(vfs_t
*vfsp
, struct vfssw
*vswp
)
617 int fstype
= 0; /* Index into vfssw[] */
618 vopstats_t
*vsp
= NULL
;
620 if (vfsp
== NULL
|| (vfsp
->vfs_flag
& VFS_STATS
) == 0 ||
624 * Set up the fstype. We go to so much trouble because all versions
625 * of NFS use the same fstype in their vfs even though they have
626 * distinct entries in the vfssw[] table.
627 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
630 fstype
= vswp
- vfssw
; /* Gets us the index */
632 fstype
= vfsp
->vfs_fstype
;
636 * Point to the per-fstype vopstats. The only valid values are
637 * non-zero positive values less than the number of vfssw[] table
640 if (fstype
> 0 && fstype
< nfstype
) {
641 vsp
= vopstats_fstype
[fstype
];
648 * Generate a kstat name, create the kstat structure, and allocate a
649 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
650 * to the caller. This must only be called from a mount.
653 get_vskstat_anchor(vfs_t
*vfsp
)
655 char kstatstr
[KSTAT_STRLEN
]; /* kstat name for vopstats */
656 statvfs64_t statvfsbuf
; /* Needed to find f_fsid */
657 vsk_anchor_t
*vskp
= NULL
; /* vfs <--> kstat anchor */
658 kstat_t
*ksp
; /* Ptr to new kstat */
659 avl_index_t where
; /* Location in the AVL tree */
661 if (vfsp
== NULL
|| vfsp
->vfs_implp
== NULL
||
662 (vfsp
->vfs_flag
& VFS_STATS
) == 0 || !vopstats_enabled
)
665 /* Need to get the fsid to build a kstat name */
666 if (VFS_STATVFS(vfsp
, &statvfsbuf
) == 0) {
667 /* Create a name for our kstats based on fsid */
668 (void) snprintf(kstatstr
, KSTAT_STRLEN
, "%s%lx",
669 VOPSTATS_STR
, statvfsbuf
.f_fsid
);
671 /* Allocate and initialize the vsk_anchor_t */
672 vskp
= kmem_cache_alloc(vsk_anchor_cache
, KM_SLEEP
);
673 bzero(vskp
, sizeof (*vskp
));
674 vskp
->vsk_fsid
= statvfsbuf
.f_fsid
;
676 mutex_enter(&vskstat_tree_lock
);
677 if (avl_find(&vskstat_tree
, vskp
, &where
) == NULL
) {
678 avl_insert(&vskstat_tree
, vskp
, where
);
679 mutex_exit(&vskstat_tree_lock
);
682 * Now that we've got the anchor in the AVL
683 * tree, we can create the kstat.
685 ksp
= new_vskstat(kstatstr
, &vfsp
->vfs_vopstats
);
690 /* Oops, found one! Release memory and lock. */
691 mutex_exit(&vskstat_tree_lock
);
692 kmem_cache_free(vsk_anchor_cache
, vskp
);
700 * We're in the process of tearing down the vfs and need to cleanup
701 * the data structures associated with the vopstats. Must only be called
705 teardown_vopstats(vfs_t
*vfsp
)
710 if (vfsp
== NULL
|| vfsp
->vfs_implp
== NULL
||
711 (vfsp
->vfs_flag
& VFS_STATS
) == 0 || !vopstats_enabled
)
714 /* This is a safe check since VFS_STATS must be set (see above) */
715 if ((vskap
= vfsp
->vfs_vskap
) == NULL
)
718 /* Whack the pointer right away */
719 vfsp
->vfs_vskap
= NULL
;
721 /* Lock the tree, remove the node, and delete the kstat */
722 mutex_enter(&vskstat_tree_lock
);
723 if (avl_find(&vskstat_tree
, vskap
, &where
)) {
724 avl_remove(&vskstat_tree
, vskap
);
727 if (vskap
->vsk_ksp
) {
728 kstat_delete(vskap
->vsk_ksp
);
730 mutex_exit(&vskstat_tree_lock
);
732 kmem_cache_free(vsk_anchor_cache
, vskap
);
736 * Read or write a vnode. Called from kernel code.
747 rlim64_t ulimit
, /* meaningful only if rw is UIO_WRITE */
756 if (rw
== UIO_WRITE
&& ISROFILE(vp
))
762 VOPXID_MAP_CR(vp
, cr
);
768 uio
.uio_loffset
= offset
;
769 uio
.uio_segflg
= (short)seg
;
771 uio
.uio_llimit
= ulimit
;
774 * We have to enter the critical region before calling fop_rwlock
775 * to avoid a deadlock with ufs.
777 if (nbl_need_check(vp
)) {
780 nbl_start_crit(vp
, RW_READER
);
782 error
= nbl_svmand(vp
, cr
, &svmand
);
785 if (nbl_conflict(vp
, rw
== UIO_WRITE
? NBL_WRITE
: NBL_READ
,
786 uio
.uio_offset
, uio
.uio_resid
, svmand
, NULL
)) {
792 (void) fop_rwlock(vp
,
793 rw
== UIO_WRITE
? V_WRITELOCK_TRUE
: V_WRITELOCK_FALSE
, NULL
);
794 if (rw
== UIO_WRITE
) {
795 uio
.uio_fmode
= FWRITE
;
796 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
797 error
= fop_write(vp
, &uio
, ioflag
, cr
, NULL
);
799 uio
.uio_fmode
= FREAD
;
800 uio
.uio_extflg
= UIO_COPY_CACHED
;
801 error
= fop_read(vp
, &uio
, ioflag
, cr
, NULL
);
804 rw
== UIO_WRITE
? V_WRITELOCK_TRUE
: V_WRITELOCK_FALSE
, NULL
);
806 *residp
= uio
.uio_resid
;
807 else if (uio
.uio_resid
)
817 * Release a vnode. Call fop_inactive on last reference or
818 * decrement reference count.
820 * To avoid race conditions, the v_count is left at 1 for
821 * the call to fop_inactive. This prevents another thread
822 * from reclaiming and releasing the vnode *before* the
823 * fop_inactive routine has a chance to destroy the vnode.
824 * We can't have more than 1 thread calling fop_inactive
830 VERIFY(vp
->v_count
> 0);
831 mutex_enter(&vp
->v_lock
);
832 if (vp
->v_count
== 1) {
833 mutex_exit(&vp
->v_lock
);
834 fop_inactive(vp
, CRED(), NULL
);
838 mutex_exit(&vp
->v_lock
);
842 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
843 * as a single reference, so v_count is not decremented until the last DNLC hold
844 * is released. This makes it possible to distinguish vnodes that are referenced
848 vn_rele_dnlc(vnode_t
*vp
)
850 VERIFY((vp
->v_count
> 0) && (vp
->v_count_dnlc
> 0));
851 mutex_enter(&vp
->v_lock
);
852 if (--vp
->v_count_dnlc
== 0) {
853 if (vp
->v_count
== 1) {
854 mutex_exit(&vp
->v_lock
);
855 fop_inactive(vp
, CRED(), NULL
);
860 mutex_exit(&vp
->v_lock
);
864 * Like vn_rele() except that it clears v_stream under v_lock.
865 * This is used by sockfs when it dismantles the association between
866 * the sockfs node and the vnode in the underlying file system.
867 * v_lock has to be held to prevent a thread coming through the lookupname
868 * path from accessing a stream head that is going away.
871 vn_rele_stream(vnode_t
*vp
)
873 VERIFY(vp
->v_count
> 0);
874 mutex_enter(&vp
->v_lock
);
876 if (vp
->v_count
== 1) {
877 mutex_exit(&vp
->v_lock
);
878 fop_inactive(vp
, CRED(), NULL
);
882 mutex_exit(&vp
->v_lock
);
886 vn_rele_inactive(vnode_t
*vp
)
888 fop_inactive(vp
, CRED(), NULL
);
892 * Like vn_rele() except if we are going to call fop_inactive() then do it
893 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
894 * the file system as a result of releasing the vnode. Note, file systems
895 * already have to handle the race where the vnode is incremented before the
896 * inactive routine is called and does its locking.
898 * Warning: Excessive use of this routine can lead to performance problems.
899 * This is because taskqs throttle back allocation if too many are created.
902 vn_rele_async(vnode_t
*vp
, taskq_t
*taskq
)
904 VERIFY(vp
->v_count
> 0);
905 mutex_enter(&vp
->v_lock
);
906 if (vp
->v_count
== 1) {
907 mutex_exit(&vp
->v_lock
);
908 VERIFY(taskq_dispatch(taskq
, (task_func_t
*)vn_rele_inactive
,
909 vp
, TQ_SLEEP
) != (uintptr_t)NULL
);
913 mutex_exit(&vp
->v_lock
);
926 return (vn_openat(pnamep
, seg
, filemode
, createmode
, vpp
, crwhy
,
932 * Open/create a vnode.
933 * This may be callable by the kernel, the only known use
934 * of user context being that the current user credentials
935 * are used for permissions. crwhy is defined iff filemode & FCREAT.
946 struct vnode
*startvp
,
955 int shrlock_done
= 0;
957 enum symfollow follow
;
958 int estale_retry
= 0;
960 struct shr_locowner shr_own
;
962 if (filemode
& FSEARCH
)
963 filemode
|= FDIRECTORY
;
967 if (filemode
& FREAD
)
969 if (filemode
& (FWRITE
|FTRUNC
))
971 if (filemode
& (FSEARCH
|FEXEC
|FXATTRDIROPEN
))
974 /* symlink interpretation */
975 if (filemode
& FNOFOLLOW
)
980 if (filemode
& FAPPEND
)
981 accessflags
|= V_APPEND
;
984 if (filemode
& FCREAT
&& !(filemode
& FDIRECTORY
)) {
987 /* Wish to create a file. */
988 vattr
.va_type
= VREG
;
989 vattr
.va_mode
= createmode
;
990 vattr
.va_mask
= AT_TYPE
|AT_MODE
;
991 if (filemode
& FTRUNC
) {
993 vattr
.va_mask
|= AT_SIZE
;
995 if (filemode
& FEXCL
)
1001 vn_createat(pnamep
, seg
, &vattr
, excl
, mode
, &vp
, crwhy
,
1002 (filemode
& ~(FTRUNC
|FEXCL
)), umask
, startvp
))
1005 /* Wish to open a file. Just look it up. */
1006 if (error
= lookupnameat(pnamep
, seg
, follow
,
1007 NULLVPP
, &vp
, startvp
)) {
1008 if ((error
== ESTALE
) &&
1009 fs_need_estale_retry(estale_retry
++))
1015 * Get the attributes to check whether file is large.
1016 * We do this only if the FOFFMAX flag is not set and
1017 * only for regular files.
1020 if (!(filemode
& FOFFMAX
) && (vp
->v_type
== VREG
)) {
1021 vattr
.va_mask
= AT_SIZE
;
1022 if ((error
= fop_getattr(vp
, &vattr
, 0,
1026 if (vattr
.va_size
> (uoff_t
)MAXOFF32_T
) {
1028 * Large File API - regular open fails
1029 * if FOFFMAX flag is set in file mode
1036 * Can't write directories, active texts, or
1037 * read-only filesystems. Can't truncate files
1038 * on which mandatory locking is in effect.
1040 if (filemode
& (FWRITE
|FTRUNC
)) {
1042 * Allow writable directory if VDIROPEN flag is set.
1044 if (vp
->v_type
== VDIR
&& !(vp
->v_flag
& VDIROPEN
)) {
1053 * Can't truncate files on which
1054 * sysv mandatory locking is in effect.
1056 if (filemode
& FTRUNC
) {
1059 if (fop_realvp(vp
, &rvp
, NULL
) != 0)
1061 if (rvp
->v_filocks
!= NULL
) {
1062 vattr
.va_mask
= AT_MODE
;
1063 if ((error
= fop_getattr(vp
,
1064 &vattr
, 0, CRED(), NULL
)) == 0 &&
1065 MANDLOCK(vp
, vattr
.va_mode
))
1073 * Check permissions.
1075 if (error
= fop_access(vp
, mode
, accessflags
, CRED(), NULL
))
1078 * Require FDIRECTORY to return a directory.
1079 * Require FEXEC to return a regular file.
1081 if ((filemode
& FDIRECTORY
) && vp
->v_type
!= VDIR
) {
1085 if ((filemode
& FEXEC
) && vp
->v_type
!= VREG
) {
1086 error
= ENOEXEC
; /* XXX: error code? */
1092 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1094 if ((filemode
& FNOFOLLOW
) && vp
->v_type
== VLNK
) {
1098 if (filemode
& FNOLINKS
) {
1099 vattr
.va_mask
= AT_NLINK
;
1100 if ((error
= fop_getattr(vp
, &vattr
, 0, CRED(), NULL
))) {
1103 if (vattr
.va_nlink
!= 1) {
1110 * Opening a socket corresponding to the AF_UNIX pathname
1111 * in the filesystem name space is not supported.
1112 * However, VSOCK nodes in namefs are supported in order
1113 * to make fattach work for sockets.
1115 * XXX This uses fop_realvp to distinguish between
1116 * an unopened namefs node (where fop_realvp returns a
1117 * different VSOCK vnode) and a VSOCK created by vn_create
1118 * in some file system (where fop_realvp would never return
1119 * a different vnode).
1121 if (vp
->v_type
== VSOCK
) {
1124 error
= fop_realvp(vp
, &nvp
, NULL
);
1125 if (error
!= 0 || nvp
== NULL
|| nvp
== vp
||
1126 nvp
->v_type
!= VSOCK
) {
1132 if ((vp
->v_type
== VREG
) && nbl_need_check(vp
)) {
1133 /* get share reservation */
1135 if (filemode
& FWRITE
)
1136 shr
.s_access
|= F_WRACC
;
1137 if (filemode
& FREAD
)
1138 shr
.s_access
|= F_RDACC
;
1141 shr
.s_pid
= ttoproc(curthread
)->p_pid
;
1142 shr_own
.sl_pid
= shr
.s_pid
;
1144 shr
.s_own_len
= sizeof (shr_own
);
1145 shr
.s_owner
= (caddr_t
)&shr_own
;
1146 error
= fop_shrlock(vp
, F_SHARE_NBMAND
, &shr
, filemode
, CRED(),
1152 /* nbmand conflict check if truncating file */
1153 if ((filemode
& FTRUNC
) && !(filemode
& FCREAT
)) {
1154 nbl_start_crit(vp
, RW_READER
);
1157 vattr
.va_mask
= AT_SIZE
;
1158 if (error
= fop_getattr(vp
, &vattr
, 0, CRED(), NULL
))
1160 if (nbl_conflict(vp
, NBL_WRITE
, 0, vattr
.va_size
, 0,
1169 * Do opening protocol.
1171 error
= fop_open(&vp
, filemode
, CRED(), NULL
);
1177 * Truncate if required.
1179 if ((filemode
& FTRUNC
) && !(filemode
& FCREAT
)) {
1181 vattr
.va_mask
= AT_SIZE
;
1182 if ((error
= fop_setattr(vp
, &vattr
, 0, CRED(), NULL
)) != 0)
1186 ASSERT(vp
->v_count
> 0);
1194 (void) fop_close(vp
, filemode
, 1, (offset_t
)0, CRED(),
1200 (void) fop_shrlock(vp
, F_UNSHARE
, &shr
, 0, CRED(),
1206 * The following clause was added to handle a problem
1207 * with NFS consistency. It is possible that a lookup
1208 * of the file to be opened succeeded, but the file
1209 * itself doesn't actually exist on the server. This
1210 * is chiefly due to the DNLC containing an entry for
1211 * the file which has been removed on the server. In
1212 * this case, we just start over. If there was some
1213 * other cause for the ESTALE error, then the lookup
1214 * of the file will fail and the error will be returned
1215 * above instead of looping around from here.
1218 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1226 * The following two accessor functions are for the NFSv4 server. Since there
1227 * is no fop_open_UP/DOWNGRADE we need a way for the NFS server to keep the
1228 * vnode open counts correct when a client "upgrades" an open or does an
1229 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1230 * open mode (add or subtract read or write), but also change the share/deny
1231 * modes. However, share reservations are not integrated with OPEN, yet, so
1232 * we need to handle each separately. These functions are cleaner than having
1233 * the NFS server manipulate the counts directly, however, nobody else should
1234 * use these functions.
1241 ASSERT(vp
->v_type
== VREG
);
1243 if (filemode
& FREAD
)
1244 atomic_inc_32(&vp
->v_rdcnt
);
1245 if (filemode
& FWRITE
)
1246 atomic_inc_32(&vp
->v_wrcnt
);
1255 ASSERT(vp
->v_type
== VREG
);
1257 if (filemode
& FREAD
) {
1258 ASSERT(vp
->v_rdcnt
> 0);
1259 atomic_dec_32(&vp
->v_rdcnt
);
1261 if (filemode
& FWRITE
) {
1262 ASSERT(vp
->v_wrcnt
> 0);
1263 atomic_dec_32(&vp
->v_wrcnt
);
1280 return (vn_createat(pnamep
, seg
, vap
, excl
, mode
, vpp
, why
, flag
,
1285 * Create a vnode (makenode).
1298 struct vnode
*startvp
)
1300 struct vnode
*dvp
; /* ptr to parent dir vnode */
1301 struct vnode
*vp
= NULL
;
1306 enum symfollow follow
;
1307 int estale_retry
= 0;
1308 uint32_t auditing
= AU_AUDITING();
1310 ASSERT((vap
->va_mask
& (AT_TYPE
|AT_MODE
)) == (AT_TYPE
|AT_MODE
));
1312 /* symlink interpretation */
1313 if ((flag
& FNOFOLLOW
) || excl
== EXCL
)
1317 flag
&= ~(FNOFOLLOW
|FNOLINKS
);
1322 * If new object is a file, call lower level to create it.
1323 * Note that it is up to the lower level to enforce exclusive
1324 * creation, if the file is already there.
1325 * This allows the lower level to do whatever
1326 * locking or protocol that is needed to prevent races.
1327 * If the new object is directory call lower level to make
1328 * the new directory, with "." and "..".
1330 if (error
= pn_get(pnamep
, seg
, &pn
))
1333 audit_vncreate_start();
1337 * lookup will find the parent directory for the vnode.
1338 * When it is done the pn holds the name of the entry
1340 * If this is a non-exclusive create we also find the node itself.
1342 error
= lookuppnat(&pn
, NULL
, follow
, &dvp
,
1343 (excl
== EXCL
) ? NULLVPP
: vpp
, startvp
);
1346 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1348 if (why
== CRMKDIR
&& error
== EINVAL
)
1349 error
= EEXIST
; /* SVID */
1354 vap
->va_mode
&= ~VSVTX
;
1357 * If default ACLs are defined for the directory don't apply the
1358 * umask if umask is passed.
1365 vsec
.vsa_aclcnt
= 0;
1366 vsec
.vsa_aclentp
= NULL
;
1367 vsec
.vsa_dfaclcnt
= 0;
1368 vsec
.vsa_dfaclentp
= NULL
;
1369 vsec
.vsa_mask
= VSA_DFACLCNT
;
1370 error
= fop_getsecattr(dvp
, &vsec
, 0, CRED(), NULL
);
1372 * If error is ENOSYS then treat it as no error
1373 * Don't want to force all file systems to support
1374 * aclent_t style of ACL's.
1376 if (error
== ENOSYS
)
1384 * Apply the umask if no default ACLs.
1386 if (vsec
.vsa_dfaclcnt
== 0)
1387 vap
->va_mode
&= ~umask
;
1390 * fop_getsecattr() may have allocated memory for
1391 * ACLs we didn't request, so double-check and
1392 * free it if necessary.
1394 if (vsec
.vsa_aclcnt
&& vsec
.vsa_aclentp
!= NULL
)
1395 kmem_free((caddr_t
)vsec
.vsa_aclentp
,
1396 vsec
.vsa_aclcnt
* sizeof (aclent_t
));
1397 if (vsec
.vsa_dfaclcnt
&& vsec
.vsa_dfaclentp
!= NULL
)
1398 kmem_free((caddr_t
)vsec
.vsa_dfaclentp
,
1399 vsec
.vsa_dfaclcnt
* sizeof (aclent_t
));
1404 * In general we want to generate EROFS if the file system is
1405 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1406 * documents the open system call, and it says that O_CREAT has no
1407 * effect if the file already exists. Bug 1119649 states
1408 * that open(path, O_CREAT, ...) fails when attempting to open an
1409 * existing file on a read only file system. Thus, the first part
1410 * of the following if statement has 3 checks:
1411 * if the file exists &&
1412 * it is being open with write access &&
1413 * the file system is read only
1414 * then generate EROFS
1416 if ((*vpp
!= NULL
&& (mode
& VWRITE
) && ISROFILE(*vpp
)) ||
1417 (*vpp
== NULL
&& dvp
->v_vfsp
->vfs_flag
& VFS_RDONLY
)) {
1421 } else if (excl
== NONEXCL
&& *vpp
!= NULL
) {
1425 * File already exists. If a mandatory lock has been
1426 * applied, return error.
1429 if (fop_realvp(vp
, &rvp
, NULL
) != 0)
1431 if ((vap
->va_mask
& AT_SIZE
) && nbl_need_check(vp
)) {
1432 nbl_start_crit(vp
, RW_READER
);
1435 if (rvp
->v_filocks
!= NULL
|| rvp
->v_shrlocks
!= NULL
) {
1436 vattr
.va_mask
= AT_MODE
|AT_SIZE
;
1437 if (error
= fop_getattr(vp
, &vattr
, 0, CRED(), NULL
)) {
1440 if (MANDLOCK(vp
, vattr
.va_mode
)) {
1445 * File cannot be truncated if non-blocking mandatory
1446 * locks are currently on the file.
1448 if ((vap
->va_mask
& AT_SIZE
) && in_crit
) {
1452 offset
= vap
->va_size
> vattr
.va_size
?
1453 vattr
.va_size
: vap
->va_size
;
1454 length
= vap
->va_size
> vattr
.va_size
?
1455 vap
->va_size
- vattr
.va_size
:
1456 vattr
.va_size
- vap
->va_size
;
1457 if (nbl_conflict(vp
, NBL_WRITE
, offset
,
1466 * If the file is the root of a VFS, we've crossed a
1467 * mount point and the "containing" directory that we
1468 * acquired above (dvp) is irrelevant because it's in
1469 * a different file system. We apply fop_create to the
1470 * target itself instead of to the containing directory
1471 * and supply a null path name to indicate (conventionally)
1472 * the node itself as the "component" of interest.
1474 * The call to fop_create() is necessary to ensure
1475 * that the appropriate permission checks are made,
1476 * i.e. EISDIR, EACCES, etc. We already know that vpp
1477 * exists since we are in the else condition where this
1480 if (vp
->v_flag
& VROOT
) {
1481 ASSERT(why
!= CRMKDIR
);
1482 error
= fop_create(vp
, "", vap
, excl
, mode
, vpp
,
1483 CRED(), flag
, NULL
, NULL
);
1485 * If the create succeeded, it will have created a
1486 * new reference on a new vnode (*vpp) in the child
1487 * file system, so we want to drop our reference on
1488 * the old (vp) upon exit.
1494 * Large File API - non-large open (FOFFMAX flag not set)
1495 * of regular file fails if the file size exceeds MAXOFF32_T.
1497 if (why
!= CRMKDIR
&&
1498 !(flag
& FOFFMAX
) &&
1499 (vp
->v_type
== VREG
)) {
1500 vattr
.va_mask
= AT_SIZE
;
1501 if ((error
= fop_getattr(vp
, &vattr
, 0,
1505 if ((vattr
.va_size
> (uoff_t
)MAXOFF32_T
)) {
1514 * Call mkdir() if specified, otherwise create().
1516 int must_be_dir
= pn_fixslash(&pn
); /* trailing '/'? */
1520 * N.B., if vn_createat() ever requests
1521 * case-insensitive behavior then it will need
1522 * to be passed to fop_mkdir(). fop_create()
1523 * will already get it via "flag"
1525 error
= fop_mkdir(dvp
, pn
.pn_path
, vap
, vpp
, CRED(),
1527 else if (!must_be_dir
)
1528 error
= fop_create(dvp
, pn
.pn_path
, vap
,
1529 excl
, mode
, vpp
, CRED(), flag
, NULL
, NULL
);
1537 audit_vncreate_finish(*vpp
, error
);
1549 * The following clause was added to handle a problem
1550 * with NFS consistency. It is possible that a lookup
1551 * of the file to be created succeeded, but the file
1552 * itself doesn't actually exist on the server. This
1553 * is chiefly due to the DNLC containing an entry for
1554 * the file which has been removed on the server. In
1555 * this case, we just start over. If there was some
1556 * other cause for the ESTALE error, then the lookup
1557 * of the file will fail and the error will be returned
1558 * above instead of looping around from here.
1560 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1566 vn_link(char *from
, char *to
, enum uio_seg seg
)
1568 return (vn_linkat(NULL
, from
, NO_FOLLOW
, NULL
, to
, seg
));
1572 vn_linkat(vnode_t
*fstartvp
, char *from
, enum symfollow follow
,
1573 vnode_t
*tstartvp
, char *to
, enum uio_seg seg
)
1575 struct vnode
*fvp
; /* from vnode ptr */
1576 struct vnode
*tdvp
; /* to directory vnode ptr */
1581 int estale_retry
= 0;
1582 uint32_t auditing
= AU_AUDITING();
1586 if (error
= pn_get(to
, seg
, &pn
))
1588 if (auditing
&& fstartvp
!= NULL
)
1589 audit_setfsat_path(1);
1590 if (error
= lookupnameat(from
, seg
, follow
, NULLVPP
, &fvp
, fstartvp
))
1592 if (auditing
&& tstartvp
!= NULL
)
1593 audit_setfsat_path(3);
1594 if (error
= lookuppnat(&pn
, NULL
, NO_FOLLOW
, &tdvp
, NULLVPP
, tstartvp
))
1597 * Make sure both source vnode and target directory vnode are
1598 * in the same vfs and that it is writeable.
1600 vattr
.va_mask
= AT_FSID
;
1601 if (error
= fop_getattr(fvp
, &vattr
, 0, CRED(), NULL
))
1603 fsid
= vattr
.va_fsid
;
1604 vattr
.va_mask
= AT_FSID
;
1605 if (error
= fop_getattr(tdvp
, &vattr
, 0, CRED(), NULL
))
1607 if (fsid
!= vattr
.va_fsid
) {
1611 if (tdvp
->v_vfsp
->vfs_flag
& VFS_RDONLY
) {
1618 (void) pn_fixslash(&pn
);
1619 error
= fop_link(tdvp
, fvp
, pn
.pn_path
, CRED(), NULL
, 0);
1626 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1632 vn_rename(char *from
, char *to
, enum uio_seg seg
)
1634 return (vn_renameat(NULL
, from
, NULL
, to
, seg
));
1638 vn_renameat(vnode_t
*fdvp
, char *fname
, vnode_t
*tdvp
,
1639 char *tname
, enum uio_seg seg
)
1643 struct pathname fpn
; /* from pathname */
1644 struct pathname tpn
; /* to pathname */
1646 int in_crit_src
, in_crit_targ
;
1647 vnode_t
*fromvp
, *fvp
;
1648 vnode_t
*tovp
, *targvp
;
1649 int estale_retry
= 0;
1650 uint32_t auditing
= AU_AUDITING();
1653 fvp
= fromvp
= tovp
= targvp
= NULL
;
1654 in_crit_src
= in_crit_targ
= 0;
1656 * Get to and from pathnames.
1658 if (error
= pn_get(fname
, seg
, &fpn
))
1660 if (error
= pn_get(tname
, seg
, &tpn
)) {
1666 * First we need to resolve the correct directories
1667 * The passed in directories may only be a starting point,
1668 * but we need the real directories the file(s) live in.
1669 * For example the fname may be something like usr/lib/sparc
1670 * and we were passed in the / directory, but we need to
1671 * use the lib directory for the rename.
1674 if (auditing
&& fdvp
!= NULL
)
1675 audit_setfsat_path(1);
1677 * Lookup to and from directories.
1679 if (error
= lookuppnat(&fpn
, NULL
, NO_FOLLOW
, &fromvp
, &fvp
, fdvp
)) {
1684 * Make sure there is an entry.
1691 if (auditing
&& tdvp
!= NULL
)
1692 audit_setfsat_path(3);
1693 if (error
= lookuppnat(&tpn
, NULL
, NO_FOLLOW
, &tovp
, &targvp
, tdvp
)) {
1698 * Make sure both the from vnode directory and the to directory
1699 * are in the same vfs and the to directory is writable.
1700 * We check fsid's, not vfs pointers, so loopback fs works.
1702 if (fromvp
!= tovp
) {
1703 vattr
.va_mask
= AT_FSID
;
1704 if (error
= fop_getattr(fromvp
, &vattr
, 0, CRED(), NULL
))
1706 fsid
= vattr
.va_fsid
;
1707 vattr
.va_mask
= AT_FSID
;
1708 if (error
= fop_getattr(tovp
, &vattr
, 0, CRED(), NULL
))
1710 if (fsid
!= vattr
.va_fsid
) {
1716 if (tovp
->v_vfsp
->vfs_flag
& VFS_RDONLY
) {
1722 * Make sure "from" vp is not a mount point.
1723 * Note, lookup did traverse() already, so
1724 * we'll be looking at the mounted FS root.
1725 * (but allow files like mnttab)
1727 if ((fvp
->v_flag
& VROOT
) != 0 && fvp
->v_type
== VDIR
) {
1732 if (targvp
&& (fvp
!= targvp
)) {
1733 nbl_start_crit(targvp
, RW_READER
);
1735 if (nbl_conflict(targvp
, NBL_REMOVE
, 0, 0, 0, NULL
)) {
1741 if (nbl_need_check(fvp
)) {
1742 nbl_start_crit(fvp
, RW_READER
);
1744 if (nbl_conflict(fvp
, NBL_RENAME
, 0, 0, 0, NULL
)) {
1753 (void) pn_fixslash(&tpn
);
1754 error
= fop_rename(fromvp
, fpn
.pn_path
, tovp
, tpn
.pn_path
, CRED(),
1763 nbl_end_crit(targvp
);
1772 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1778 * Remove a file or directory.
1781 vn_remove(char *fnamep
, enum uio_seg seg
, enum rm dirflag
)
1783 return (vn_removeat(NULL
, fnamep
, seg
, dirflag
));
1787 vn_removeat(vnode_t
*startvp
, char *fnamep
, enum uio_seg seg
, enum rm dirflag
)
1789 struct vnode
*vp
; /* entry vnode */
1790 struct vnode
*dvp
; /* ptr to parent dir vnode */
1791 struct vnode
*coveredvp
;
1792 struct pathname pn
; /* name of entry */
1796 struct vfs
*dvfsp
; /* ptr to parent dir vfs */
1798 int estale_retry
= 0;
1801 if (error
= pn_get(fnamep
, seg
, &pn
))
1804 if (error
= lookuppnat(&pn
, NULL
, NO_FOLLOW
, &dvp
, &vp
, startvp
)) {
1806 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1812 * Make sure there is an entry.
1820 dvfsp
= dvp
->v_vfsp
;
1823 * If the named file is the root of a mounted filesystem, fail,
1824 * unless it's marked unlinkable. In that case, unmount the
1825 * filesystem and proceed to unlink the covered vnode. (If the
1826 * covered vnode is a directory, use rmdir instead of unlink,
1827 * to avoid file system corruption.)
1829 if (vp
->v_flag
& VROOT
) {
1830 if ((vfsp
->vfs_flag
& VFS_UNLINKABLE
) == 0) {
1836 * Namefs specific code starts here.
1839 if (dirflag
== RMDIRECTORY
) {
1841 * User called rmdir(2) on a file that has
1842 * been namefs mounted on top of. Since
1843 * namefs doesn't allow directories to
1844 * be mounted on other files we know
1845 * vp is not of type VDIR so fail to operation.
1852 * If VROOT is still set after grabbing vp->v_lock,
1853 * noone has finished nm_unmount so far and coveredvp
1855 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1856 * vp->v_lock, any race window is eliminated.
1859 mutex_enter(&vp
->v_lock
);
1860 if ((vp
->v_flag
& VROOT
) == 0) {
1861 /* Someone beat us to the unmount */
1862 mutex_exit(&vp
->v_lock
);
1867 coveredvp
= vfsp
->vfs_vnodecovered
;
1870 * Note: Implementation of vn_vfswlock shows that ordering of
1871 * v_lock / vn_vfswlock is not an issue here.
1873 error
= vn_vfswlock(coveredvp
);
1874 mutex_exit(&vp
->v_lock
);
1881 error
= dounmount(vfsp
, 0, CRED());
1884 * Unmounted the namefs file system; now get
1885 * the object it was mounted over.
1889 * If namefs was mounted over a directory, then
1890 * we want to use rmdir() instead of unlink().
1892 if (vp
->v_type
== VDIR
)
1893 dirflag
= RMDIRECTORY
;
1900 * Make sure filesystem is writeable.
1901 * We check the parent directory's vfs in case this is an lofs vnode.
1903 if (dvfsp
&& dvfsp
->vfs_flag
& VFS_RDONLY
) {
1911 * If there is the possibility of an nbmand share reservation, make
1912 * sure it's okay to remove the file. Keep a reference to the
1913 * vnode, so that we can exit the nbl critical region after
1914 * calling fop_remove.
1915 * If there is no possibility of an nbmand share reservation,
1916 * release the vnode reference now. Filesystems like NFS may
1917 * behave differently if there is an extra reference, so get rid of
1918 * this one. Fortunately, we can't have nbmand mounts on NFS
1921 if (nbl_need_check(vp
)) {
1922 nbl_start_crit(vp
, RW_READER
);
1924 if (nbl_conflict(vp
, NBL_REMOVE
, 0, 0, 0, NULL
)) {
1933 if (dirflag
== RMDIRECTORY
) {
1935 * Caller is using rmdir(2), which can only be applied to
1938 if (vtype
!= VDIR
) {
1942 proc_t
*pp
= curproc
;
1944 mutex_enter(&pp
->p_lock
);
1945 cwd
= PTOU(pp
)->u_cdir
;
1947 mutex_exit(&pp
->p_lock
);
1948 error
= fop_rmdir(dvp
, pn
.pn_path
, cwd
, CRED(),
1954 * Unlink(2) can be applied to anything.
1956 error
= fop_remove(dvp
, pn
.pn_path
, CRED(), NULL
, 0);
1969 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1975 * Utility function to compare equality of vnodes.
1976 * Compare the underlying real vnodes, if there are underlying vnodes.
1977 * This is a more thorough comparison than the VN_CMP() macro provides.
1980 vn_compare(vnode_t
*vp1
, vnode_t
*vp2
)
1984 if (vp1
!= NULL
&& fop_realvp(vp1
, &realvp
, NULL
) == 0)
1986 if (vp2
!= NULL
&& fop_realvp(vp2
, &realvp
, NULL
) == 0)
1988 return (VN_CMP(vp1
, vp2
));
1992 * The number of locks to hash into. This value must be a power
1993 * of 2 minus 1 and should probably also be prime.
1995 #define NUM_BUCKETS 1023
1997 struct vn_vfslocks_bucket
{
1999 vn_vfslocks_entry_t
*vb_list
;
2000 char pad
[64 - sizeof (kmutex_t
) - sizeof (void *)];
2004 * Total number of buckets will be NUM_BUCKETS + 1 .
2007 #pragma align 64(vn_vfslocks_buckets)
2008 static struct vn_vfslocks_bucket vn_vfslocks_buckets
[NUM_BUCKETS
+ 1];
2010 #define VN_VFSLOCKS_SHIFT 9
2012 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2013 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2016 * vn_vfslocks_getlock() uses an HASH scheme to generate
2017 * rwstlock using vfs/vnode pointer passed to it.
2019 * vn_vfslocks_rele() releases a reference in the
2020 * HASH table which allows the entry allocated by
2021 * vn_vfslocks_getlock() to be freed at a later
2022 * stage when the refcount drops to zero.
2025 vn_vfslocks_entry_t
*
2026 vn_vfslocks_getlock(void *vfsvpptr
)
2028 struct vn_vfslocks_bucket
*bp
;
2029 vn_vfslocks_entry_t
*vep
;
2030 vn_vfslocks_entry_t
*tvep
;
2032 ASSERT(vfsvpptr
!= NULL
);
2033 bp
= &vn_vfslocks_buckets
[VN_VFSLOCKS_HASH(vfsvpptr
)];
2035 mutex_enter(&bp
->vb_lock
);
2036 for (vep
= bp
->vb_list
; vep
!= NULL
; vep
= vep
->ve_next
) {
2037 if (vep
->ve_vpvfs
== vfsvpptr
) {
2039 mutex_exit(&bp
->vb_lock
);
2043 mutex_exit(&bp
->vb_lock
);
2044 vep
= kmem_alloc(sizeof (*vep
), KM_SLEEP
);
2045 rwst_init(&vep
->ve_lock
, NULL
, RW_DEFAULT
, NULL
);
2046 vep
->ve_vpvfs
= (char *)vfsvpptr
;
2048 mutex_enter(&bp
->vb_lock
);
2049 for (tvep
= bp
->vb_list
; tvep
!= NULL
; tvep
= tvep
->ve_next
) {
2050 if (tvep
->ve_vpvfs
== vfsvpptr
) {
2052 mutex_exit(&bp
->vb_lock
);
2055 * There is already an entry in the hash
2056 * destroy what we just allocated.
2058 rwst_destroy(&vep
->ve_lock
);
2059 kmem_free(vep
, sizeof (*vep
));
2063 vep
->ve_next
= bp
->vb_list
;
2065 mutex_exit(&bp
->vb_lock
);
2070 vn_vfslocks_rele(vn_vfslocks_entry_t
*vepent
)
2072 struct vn_vfslocks_bucket
*bp
;
2073 vn_vfslocks_entry_t
*vep
;
2074 vn_vfslocks_entry_t
*pvep
;
2076 ASSERT(vepent
!= NULL
);
2077 ASSERT(vepent
->ve_vpvfs
!= NULL
);
2079 bp
= &vn_vfslocks_buckets
[VN_VFSLOCKS_HASH(vepent
->ve_vpvfs
)];
2081 mutex_enter(&bp
->vb_lock
);
2082 vepent
->ve_refcnt
--;
2084 if ((int32_t)vepent
->ve_refcnt
< 0)
2085 cmn_err(CE_PANIC
, "vn_vfslocks_rele: refcount negative");
2087 if (vepent
->ve_refcnt
== 0) {
2088 for (vep
= bp
->vb_list
; vep
!= NULL
; vep
= vep
->ve_next
) {
2089 if (vep
->ve_vpvfs
== vepent
->ve_vpvfs
) {
2090 if (bp
->vb_list
== vep
)
2091 bp
->vb_list
= vep
->ve_next
;
2094 pvep
->ve_next
= vep
->ve_next
;
2096 mutex_exit(&bp
->vb_lock
);
2097 rwst_destroy(&vep
->ve_lock
);
2098 kmem_free(vep
, sizeof (*vep
));
2103 cmn_err(CE_PANIC
, "vn_vfslocks_rele: vp/vfs not found");
2105 mutex_exit(&bp
->vb_lock
);
2109 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2110 * lock protecting the v_vfsmountedhere field.
2111 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2112 * except that it blocks to acquire the lock VVFSLOCK.
2114 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2115 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2116 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2119 vn_vfswlock_wait(vnode_t
*vp
)
2122 vn_vfslocks_entry_t
*vpvfsentry
;
2125 vpvfsentry
= vn_vfslocks_getlock(vp
);
2126 retval
= rwst_enter_sig(&vpvfsentry
->ve_lock
, RW_WRITER
);
2128 if (retval
== EINTR
) {
2129 vn_vfslocks_rele(vpvfsentry
);
2136 vn_vfsrlock_wait(vnode_t
*vp
)
2139 vn_vfslocks_entry_t
*vpvfsentry
;
2142 vpvfsentry
= vn_vfslocks_getlock(vp
);
2143 retval
= rwst_enter_sig(&vpvfsentry
->ve_lock
, RW_READER
);
2145 if (retval
== EINTR
) {
2146 vn_vfslocks_rele(vpvfsentry
);
2155 * vn_vfswlock is used to implement a lock which is logically a writers lock
2156 * protecting the v_vfsmountedhere field.
2159 vn_vfswlock(vnode_t
*vp
)
2161 vn_vfslocks_entry_t
*vpvfsentry
;
2164 * If vp is NULL then somebody is trying to lock the covered vnode
2165 * of /. (vfs_vnodecovered is NULL for /). This situation will
2166 * only happen when unmounting /. Since that operation will fail
2167 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2172 vpvfsentry
= vn_vfslocks_getlock(vp
);
2174 if (rwst_tryenter(&vpvfsentry
->ve_lock
, RW_WRITER
))
2177 vn_vfslocks_rele(vpvfsentry
);
2182 vn_vfsrlock(vnode_t
*vp
)
2184 vn_vfslocks_entry_t
*vpvfsentry
;
2187 * If vp is NULL then somebody is trying to lock the covered vnode
2188 * of /. (vfs_vnodecovered is NULL for /). This situation will
2189 * only happen when unmounting /. Since that operation will fail
2190 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2195 vpvfsentry
= vn_vfslocks_getlock(vp
);
2197 if (rwst_tryenter(&vpvfsentry
->ve_lock
, RW_READER
))
2200 vn_vfslocks_rele(vpvfsentry
);
2205 vn_vfsunlock(vnode_t
*vp
)
2207 vn_vfslocks_entry_t
*vpvfsentry
;
2210 * ve_refcnt needs to be decremented twice.
2211 * 1. To release refernce after a call to vn_vfslocks_getlock()
2212 * 2. To release the reference from the locking routines like
2213 * vn_vfsrlock/vn_vfswlock etc,.
2215 vpvfsentry
= vn_vfslocks_getlock(vp
);
2216 vn_vfslocks_rele(vpvfsentry
);
2218 rwst_exit(&vpvfsentry
->ve_lock
);
2219 vn_vfslocks_rele(vpvfsentry
);
2223 vn_vfswlock_held(vnode_t
*vp
)
2226 vn_vfslocks_entry_t
*vpvfsentry
;
2230 vpvfsentry
= vn_vfslocks_getlock(vp
);
2231 held
= rwst_lock_held(&vpvfsentry
->ve_lock
, RW_WRITER
);
2233 vn_vfslocks_rele(vpvfsentry
);
2240 const char *name
, /* Name of file system */
2241 const fs_operation_def_t
*templ
, /* Operation specification */
2242 vnodeops_t
**actual
) /* Return the vnodeops */
2247 *actual
= (vnodeops_t
*)kmem_alloc(sizeof (vnodeops_t
), KM_SLEEP
);
2249 (*actual
)->vnop_name
= name
;
2251 error
= fs_build_vector(*actual
, &unused_ops
, vn_ops_table
, templ
);
2253 kmem_free(*actual
, sizeof (vnodeops_t
));
2257 if (unused_ops
!= 0)
2258 cmn_err(CE_WARN
, "vn_make_ops: %s: %d operations supplied "
2259 "but not used", name
, unused_ops
);
2266 * Free the vnodeops created as a result of vn_make_ops()
2269 vn_freevnodeops(vnodeops_t
*vnops
)
2271 kmem_free(vnops
, sizeof (vnodeops_t
));
2280 vn_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
2286 mutex_init(&vp
->v_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2287 mutex_init(&vp
->v_vsd_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2288 cv_init(&vp
->v_cv
, NULL
, CV_DEFAULT
, NULL
);
2289 rw_init(&vp
->v_nbllock
, NULL
, RW_DEFAULT
, NULL
);
2290 vp
->v_femhead
= NULL
; /* Must be done before vn_reinit() */
2292 vp
->v_mpssdata
= NULL
;
2294 vp
->v_fopdata
= NULL
;
2296 vmobject_init(&vp
->v_object
, vp
);
2303 vn_cache_destructor(void *buf
, void *cdrarg
)
2309 vmobject_fini(&vp
->v_object
);
2311 rw_destroy(&vp
->v_nbllock
);
2312 cv_destroy(&vp
->v_cv
);
2313 mutex_destroy(&vp
->v_vsd_lock
);
2314 mutex_destroy(&vp
->v_lock
);
2318 vn_create_cache(void)
2321 ASSERT((1 << VNODE_ALIGN_LOG2
) ==
2322 P2ROUNDUP(sizeof (struct vnode
), VNODE_ALIGN
));
2323 vn_cache
= kmem_cache_create("vn_cache", sizeof (struct vnode
),
2324 VNODE_ALIGN
, vn_cache_constructor
, vn_cache_destructor
, NULL
, NULL
,
2329 vn_destroy_cache(void)
2331 kmem_cache_destroy(vn_cache
);
2335 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2336 * cached by the file system and vnodes remain associated.
2339 vn_recycle(vnode_t
*vp
)
2341 ASSERT(!vn_has_cached_data(vp
));
2344 * XXX - This really belongs in vn_reinit(), but we have some issues
2345 * with the counts. Best to have it here for clean initialization.
2349 vp
->v_mmap_read
= 0;
2350 vp
->v_mmap_write
= 0;
2353 * If FEM was in use, make sure everything gets cleaned up
2354 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2357 if (vp
->v_femhead
) {
2358 /* XXX - There should be a free_femhead() that does all this */
2359 ASSERT(vp
->v_femhead
->femh_list
== NULL
);
2360 mutex_destroy(&vp
->v_femhead
->femh_lock
);
2361 kmem_free(vp
->v_femhead
, sizeof (*(vp
->v_femhead
)));
2362 vp
->v_femhead
= NULL
;
2365 kmem_free(vp
->v_path
, strlen(vp
->v_path
) + 1);
2369 if (vp
->v_fopdata
!= NULL
) {
2372 vp
->v_mpssdata
= NULL
;
2377 * Used to reset the vnode fields including those that are directly accessible
2378 * as well as those which require an accessor function.
2380 * Does not initialize:
2381 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2382 * v_data (since FS-nodes and vnodes point to each other and should
2383 * be updated simultaneously)
2384 * v_op (in case someone needs to make a VOP call on this object)
2387 vn_reinit(vnode_t
*vp
)
2390 vp
->v_count_dnlc
= 0;
2392 vp
->v_stream
= NULL
;
2393 vp
->v_vfsmountedhere
= NULL
;
2398 vp
->v_filocks
= NULL
;
2399 vp
->v_shrlocks
= NULL
;
2400 VERIFY(!vn_has_cached_data(vp
));
2402 vp
->v_locality
= NULL
;
2403 vp
->v_xattrdir
= NULL
;
2405 /* Handles v_femhead, v_path, and the r/w/map counts */
2410 vn_alloc(int kmflag
)
2414 vp
= kmem_cache_alloc(vn_cache
, kmflag
);
2417 vp
->v_femhead
= NULL
; /* Must be done before vn_reinit() */
2418 vp
->v_fopdata
= NULL
;
2426 vn_free(vnode_t
*vp
)
2428 ASSERT(vp
->v_shrlocks
== NULL
);
2429 ASSERT(vp
->v_filocks
== NULL
);
2432 * Some file systems call vn_free() with v_count of zero,
2433 * some with v_count of 1. In any case, the value should
2434 * never be anything else.
2436 ASSERT((vp
->v_count
== 0) || (vp
->v_count
== 1));
2437 ASSERT(vp
->v_count_dnlc
== 0);
2438 if (vp
->v_path
!= NULL
) {
2439 kmem_free(vp
->v_path
, strlen(vp
->v_path
) + 1);
2443 /* If FEM was in use, make sure everything gets cleaned up */
2444 if (vp
->v_femhead
) {
2445 /* XXX - There should be a free_femhead() that does all this */
2446 ASSERT(vp
->v_femhead
->femh_list
== NULL
);
2447 mutex_destroy(&vp
->v_femhead
->femh_lock
);
2448 kmem_free(vp
->v_femhead
, sizeof (*(vp
->v_femhead
)));
2449 vp
->v_femhead
= NULL
;
2452 if (vp
->v_fopdata
!= NULL
) {
2455 vp
->v_mpssdata
= NULL
;
2457 kmem_cache_free(vn_cache
, vp
);
2461 * vnode status changes, should define better states than 1, 0.
2464 vn_reclaim(vnode_t
*vp
)
2466 vfs_t
*vfsp
= vp
->v_vfsp
;
2469 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2472 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_RECLAIMED
);
2476 vn_idle(vnode_t
*vp
)
2478 vfs_t
*vfsp
= vp
->v_vfsp
;
2481 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2484 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_IDLED
);
2487 vn_exists(vnode_t
*vp
)
2489 vfs_t
*vfsp
= vp
->v_vfsp
;
2492 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2495 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_EXISTS
);
2499 vn_invalid(vnode_t
*vp
)
2501 vfs_t
*vfsp
= vp
->v_vfsp
;
2504 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2507 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_DESTROYED
);
2510 /* Vnode event notification */
2513 vnevent_support(vnode_t
*vp
, caller_context_t
*ct
)
2518 return (fop_vnevent(vp
, VE_SUPPORT
, NULL
, NULL
, ct
));
2522 vnevent_rename_src(vnode_t
*vp
, vnode_t
*dvp
, char *name
, caller_context_t
*ct
)
2524 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2527 (void) fop_vnevent(vp
, VE_RENAME_SRC
, dvp
, name
, ct
);
2531 vnevent_rename_dest(vnode_t
*vp
, vnode_t
*dvp
, char *name
,
2532 caller_context_t
*ct
)
2534 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2537 (void) fop_vnevent(vp
, VE_RENAME_DEST
, dvp
, name
, ct
);
2541 vnevent_rename_dest_dir(vnode_t
*vp
, caller_context_t
*ct
)
2543 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2546 (void) fop_vnevent(vp
, VE_RENAME_DEST_DIR
, NULL
, NULL
, ct
);
2550 vnevent_remove(vnode_t
*vp
, vnode_t
*dvp
, char *name
, caller_context_t
*ct
)
2552 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2555 (void) fop_vnevent(vp
, VE_REMOVE
, dvp
, name
, ct
);
2559 vnevent_rmdir(vnode_t
*vp
, vnode_t
*dvp
, char *name
, caller_context_t
*ct
)
2561 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2564 (void) fop_vnevent(vp
, VE_RMDIR
, dvp
, name
, ct
);
2568 vnevent_pre_rename_src(vnode_t
*vp
, vnode_t
*dvp
, char *name
,
2569 caller_context_t
*ct
)
2571 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2574 (void) fop_vnevent(vp
, VE_PRE_RENAME_SRC
, dvp
, name
, ct
);
2578 vnevent_pre_rename_dest(vnode_t
*vp
, vnode_t
*dvp
, char *name
,
2579 caller_context_t
*ct
)
2581 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2584 (void) fop_vnevent(vp
, VE_PRE_RENAME_DEST
, dvp
, name
, ct
);
2588 vnevent_pre_rename_dest_dir(vnode_t
*vp
, vnode_t
*nvp
, char *name
,
2589 caller_context_t
*ct
)
2591 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2594 (void) fop_vnevent(vp
, VE_PRE_RENAME_DEST_DIR
, nvp
, name
, ct
);
2598 vnevent_create(vnode_t
*vp
, caller_context_t
*ct
)
2600 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2603 (void) fop_vnevent(vp
, VE_CREATE
, NULL
, NULL
, ct
);
2607 vnevent_link(vnode_t
*vp
, caller_context_t
*ct
)
2609 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2612 (void) fop_vnevent(vp
, VE_LINK
, NULL
, NULL
, ct
);
2616 vnevent_mountedover(vnode_t
*vp
, caller_context_t
*ct
)
2618 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2621 (void) fop_vnevent(vp
, VE_MOUNTEDOVER
, NULL
, NULL
, ct
);
2625 vnevent_truncate(vnode_t
*vp
, caller_context_t
*ct
)
2627 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2630 (void) fop_vnevent(vp
, VE_TRUNCATE
, NULL
, NULL
, ct
);
2638 vn_is_readonly(vnode_t
*vp
)
2640 return (vp
->v_vfsp
->vfs_flag
& VFS_RDONLY
);
2644 vn_has_flocks(vnode_t
*vp
)
2646 return (vp
->v_filocks
!= NULL
);
2650 vn_has_mandatory_locks(vnode_t
*vp
, int mode
)
2652 return ((vp
->v_filocks
!= NULL
) && (MANDLOCK(vp
, mode
)));
2656 vn_has_cached_data(vnode_t
*vp
)
2658 return (!list_is_empty(&vp
->v_object
.list
));
2662 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2666 vn_can_change_zones(vnode_t
*vp
)
2672 if (nfs_global_client_only
!= 0)
2676 * We always want to look at the underlying vnode if there is one.
2678 if (fop_realvp(vp
, &rvp
, NULL
) != 0)
2681 * Some pseudo filesystems (including doorfs) don't actually register
2682 * their vfsops_t, so the following may return NULL; we happily let
2683 * such vnodes switch zones.
2685 vswp
= vfs_getvfsswbyvfsops(vfs_getops(rvp
->v_vfsp
));
2687 if (vswp
->vsw_flag
& VSW_NOTZONESAFE
)
2689 vfs_unrefvfssw(vswp
);
2695 * Return nonzero if the vnode is a mount point, zero if not.
2698 vn_ismntpt(vnode_t
*vp
)
2700 return (vp
->v_vfsmountedhere
!= NULL
);
2703 /* Retrieve the vfs (if any) mounted on this vnode */
2705 vn_mountedvfs(vnode_t
*vp
)
2707 return (vp
->v_vfsmountedhere
);
2711 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2714 vn_in_dnlc(vnode_t
*vp
)
2716 return (vp
->v_count_dnlc
> 0);
2720 * vn_has_other_opens() checks whether a particular file is opened by more than
2721 * just the caller and whether the open is for read and/or write.
2722 * This routine is for calling after the caller has already called fop_open()
2723 * and the caller wishes to know if they are the only one with it open for
2724 * the mode(s) specified.
2726 * Vnode counts are only kept on regular files (v_type=VREG).
2738 if (vp
->v_wrcnt
> 1)
2742 if ((vp
->v_rdcnt
> 1) || (vp
->v_wrcnt
> 1))
2746 if ((vp
->v_rdcnt
> 1) && (vp
->v_wrcnt
> 1))
2750 if (vp
->v_rdcnt
> 1)
2759 * vn_is_opened() checks whether a particular file is opened and
2760 * whether the open is for read and/or write.
2762 * Vnode counts are only kept on regular files (v_type=VREG).
2778 if (vp
->v_rdcnt
&& vp
->v_wrcnt
)
2782 if (vp
->v_rdcnt
|| vp
->v_wrcnt
)
2795 * vn_is_mapped() checks whether a particular file is mapped and whether
2796 * the file is mapped read and/or write.
2809 * The atomic_add_64_nv functions force atomicity in the
2810 * case of 32 bit architectures. Otherwise the 64 bit values
2811 * require two fetches. The value of the fields may be
2812 * (potentially) changed between the first fetch and the
2816 if (atomic_add_64_nv((&(vp
->v_mmap_write
)), 0))
2820 if ((atomic_add_64_nv((&(vp
->v_mmap_read
)), 0)) &&
2821 (atomic_add_64_nv((&(vp
->v_mmap_write
)), 0)))
2825 if ((atomic_add_64_nv((&(vp
->v_mmap_read
)), 0)) ||
2826 (atomic_add_64_nv((&(vp
->v_mmap_write
)), 0)))
2830 if (atomic_add_64_nv((&(vp
->v_mmap_read
)), 0))
2837 if (vp
->v_mmap_write
)
2841 if (vp
->v_mmap_read
&& vp
->v_mmap_write
)
2845 if (vp
->v_mmap_read
|| vp
->v_mmap_write
)
2849 if (vp
->v_mmap_read
)
2859 * Set the operations vector for a vnode.
2861 * FEM ensures that the v_femhead pointer is filled in before the
2862 * v_op pointer is changed. This means that if the v_femhead pointer
2863 * is NULL, and the v_op field hasn't changed since before which checked
2864 * the v_femhead pointer; then our update is ok - we are not racing with
2868 vn_setops(vnode_t
*vp
, vnodeops_t
*vnodeops
)
2873 ASSERT(vnodeops
!= NULL
);
2878 * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2879 * the compare-and-swap on vp->v_op. If either fails, then FEM is
2880 * in effect on the vnode and we need to have FEM deal with it.
2882 if (vp
->v_femhead
!= NULL
|| atomic_cas_ptr(&vp
->v_op
, op
, vnodeops
) !=
2884 fem_setvnops(vp
, vnodeops
);
2889 * Retrieve the operations vector for a vnode
2890 * As with vn_setops(above); make sure we aren't racing with FEM.
2891 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2892 * make sense to the callers of this routine.
2895 vn_getops(vnode_t
*vp
)
2903 if (vp
->v_femhead
== NULL
&& op
== vp
->v_op
) {
2906 return (fem_getvnops(vp
));
2911 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2912 * Returns zero (0) if not.
2915 vn_matchops(vnode_t
*vp
, vnodeops_t
*vnodeops
)
2917 return (vn_getops(vp
) == vnodeops
);
2921 * Returns non-zero (1) if the specified operation matches the
2922 * corresponding operation for that the vnode.
2923 * Returns zero (0) if not.
2926 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2929 vn_matchopval(vnode_t
*vp
, char *vopname
, fs_generic_func_p funcp
)
2931 const fs_operation_trans_def_t
*otdp
;
2932 fs_generic_func_p
*loc
= NULL
;
2933 vnodeops_t
*vop
= vn_getops(vp
);
2935 ASSERT(vopname
!= NULL
);
2937 for (otdp
= vn_ops_table
; otdp
->name
!= NULL
; otdp
++) {
2938 if (MATCHNAME(otdp
->name
, vopname
)) {
2939 loc
= (fs_generic_func_p
*)
2940 ((char *)(vop
) + otdp
->offset
);
2945 return ((loc
!= NULL
) && (*loc
== funcp
));
2949 * fs_new_caller_id() needs to return a unique ID on a given local system.
2950 * The IDs do not need to survive across reboots. These are primarily
2951 * used so that (FEM) monitors can detect particular callers (such as
2952 * the NFS server) to a given vnode/vfs operation.
2957 static uint64_t next_caller_id
= 0LL; /* First call returns 1 */
2959 return ((u_longlong_t
)atomic_inc_64_nv(&next_caller_id
));
2963 * Given a starting vnode and a path, updates the path in the target vnode in
2964 * a safe manner. If the vnode already has path information embedded, then the
2965 * cached path is left untouched.
2968 size_t max_vnode_path
= 4 * MAXPATHLEN
;
2971 vn_setpath(vnode_t
*rootvp
, struct vnode
*startvp
, struct vnode
*vp
,
2972 const char *path
, size_t plen
)
2976 size_t rpathlen
, rpathalloc
;
2988 * We cannot grab base->v_lock while we hold vp->v_lock because of
2989 * the potential for deadlock.
2991 mutex_enter(&base
->v_lock
);
2992 if (base
->v_path
== NULL
) {
2993 mutex_exit(&base
->v_lock
);
2997 rpathlen
= strlen(base
->v_path
);
2998 rpathalloc
= rpathlen
+ plen
+ 1;
2999 /* Avoid adding a slash if there's already one there */
3000 if (base
->v_path
[rpathlen
-1] == '/')
3006 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
3007 * so we must do this dance. If, by chance, something changes the path,
3008 * just give up since there is no real harm.
3010 mutex_exit(&base
->v_lock
);
3012 /* Paths should stay within reason */
3013 if (rpathalloc
> max_vnode_path
)
3016 rpath
= kmem_alloc(rpathalloc
, KM_SLEEP
);
3018 mutex_enter(&base
->v_lock
);
3019 if (base
->v_path
== NULL
|| strlen(base
->v_path
) != rpathlen
) {
3020 mutex_exit(&base
->v_lock
);
3021 kmem_free(rpath
, rpathalloc
);
3024 bcopy(base
->v_path
, rpath
, rpathlen
);
3025 mutex_exit(&base
->v_lock
);
3028 rpath
[rpathlen
++] = '/';
3029 bcopy(path
, rpath
+ rpathlen
, plen
);
3030 rpath
[rpathlen
+ plen
] = '\0';
3032 mutex_enter(&vp
->v_lock
);
3033 if (vp
->v_path
!= NULL
) {
3034 mutex_exit(&vp
->v_lock
);
3035 kmem_free(rpath
, rpathalloc
);
3038 mutex_exit(&vp
->v_lock
);
3043 * Sets the path to the vnode to be the given string, regardless of current
3044 * context. The string must be a complete path from rootdir. This is only used
3045 * by fsop_root() for setting the path based on the mountpoint.
3048 vn_setpath_str(struct vnode
*vp
, const char *str
, size_t len
)
3050 char *buf
= kmem_alloc(len
+ 1, KM_SLEEP
);
3052 mutex_enter(&vp
->v_lock
);
3053 if (vp
->v_path
!= NULL
) {
3054 mutex_exit(&vp
->v_lock
);
3055 kmem_free(buf
, len
+ 1);
3060 bcopy(str
, vp
->v_path
, len
);
3061 vp
->v_path
[len
] = '\0';
3063 mutex_exit(&vp
->v_lock
);
3067 * Called from within filesystem's vop_rename() to handle renames once the
3068 * target vnode is available.
3071 vn_renamepath(vnode_t
*dvp
, vnode_t
*vp
, const char *nm
, size_t len
)
3075 mutex_enter(&vp
->v_lock
);
3078 mutex_exit(&vp
->v_lock
);
3079 vn_setpath(rootdir
, dvp
, vp
, nm
, len
);
3081 kmem_free(tmp
, strlen(tmp
) + 1);
3085 * Similar to vn_setpath_str(), this function sets the path of the destination
3086 * vnode to the be the same as the source vnode.
3089 vn_copypath(struct vnode
*src
, struct vnode
*dst
)
3094 mutex_enter(&src
->v_lock
);
3095 if (src
->v_path
== NULL
) {
3096 mutex_exit(&src
->v_lock
);
3099 alloc
= strlen(src
->v_path
) + 1;
3101 /* avoid kmem_alloc() with lock held */
3102 mutex_exit(&src
->v_lock
);
3103 buf
= kmem_alloc(alloc
, KM_SLEEP
);
3104 mutex_enter(&src
->v_lock
);
3105 if (src
->v_path
== NULL
|| strlen(src
->v_path
) + 1 != alloc
) {
3106 mutex_exit(&src
->v_lock
);
3107 kmem_free(buf
, alloc
);
3110 bcopy(src
->v_path
, buf
, alloc
);
3111 mutex_exit(&src
->v_lock
);
3113 mutex_enter(&dst
->v_lock
);
3114 if (dst
->v_path
!= NULL
) {
3115 mutex_exit(&dst
->v_lock
);
3116 kmem_free(buf
, alloc
);
3120 mutex_exit(&dst
->v_lock
);
3124 * XXX Private interface for segvn routines that handle vnode
3125 * large page segments.
3127 * return 1 if vp's file system fop_pageio() implementation
3128 * can be safely used instead of fop_getpage() for handling
3129 * pagefaults against regular non swap files. fop_pageio()
3130 * interface is considered safe here if its implementation
3131 * is very close to fop_getpage() implementation.
3132 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3133 * panic if there're file holes but instead returns an error.
3134 * Doesn't assume file won't be changed by user writes, etc.
3136 * return 0 otherwise.
3138 * For now allow segvn to only use fop_pageio() with ufs and nfs.
3141 vn_vmpss_usepageio(vnode_t
*vp
)
3143 vfs_t
*vfsp
= vp
->v_vfsp
;
3144 char *fsname
= vfssw
[vfsp
->vfs_fstype
].vsw_name
;
3145 char *pageio_ok_fss
[] = {"ufs", "nfs", NULL
};
3146 char **fsok
= pageio_ok_fss
;
3148 if (fsname
== NULL
) {
3152 for (; *fsok
; fsok
++) {
3153 if (strcmp(*fsok
, fsname
) == 0) {
3160 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3167 caller_context_t
*ct
)
3174 * Adding to the vnode counts before calling open
3175 * avoids the need for a mutex. It circumvents a race
3176 * condition where a query made on the vnode counts results in a
3177 * false negative. The inquirer goes away believing the file is
3178 * not open when there is an open on the file already under way.
3180 * The counts are meant to prevent NFS from granting a delegation
3181 * when it would be dangerous to do so.
3183 * The vnode counts are only kept on regular files
3185 if ((*vpp
)->v_type
== VREG
) {
3187 atomic_inc_32(&(*vpp
)->v_rdcnt
);
3189 atomic_inc_32(&(*vpp
)->v_wrcnt
);
3192 VOPXID_MAP_CR(vp
, cr
);
3194 ret
= fop_open_dispatch(vpp
, mode
, cr
, ct
);
3198 * Use the saved vp just in case the vnode ptr got trashed
3201 VOPSTATS_UPDATE(vp
, open
);
3202 if ((vp
->v_type
== VREG
) && (mode
& FREAD
))
3203 atomic_dec_32(&vp
->v_rdcnt
);
3204 if ((vp
->v_type
== VREG
) && (mode
& FWRITE
))
3205 atomic_dec_32(&vp
->v_wrcnt
);
3208 * Some filesystems will return a different vnode,
3209 * but the same path was still used to open it.
3210 * So if we do change the vnode and need to
3211 * copy over the path, do so here, rather than special
3212 * casing each filesystem. Adjust the vnode counts to
3213 * reflect the vnode switch.
3215 VOPSTATS_UPDATE(*vpp
, open
);
3216 if (*vpp
!= vp
&& *vpp
!= NULL
) {
3217 vn_copypath(vp
, *vpp
);
3218 if (((*vpp
)->v_type
== VREG
) && (mode
& FREAD
))
3219 atomic_inc_32(&(*vpp
)->v_rdcnt
);
3220 if ((vp
->v_type
== VREG
) && (mode
& FREAD
))
3221 atomic_dec_32(&vp
->v_rdcnt
);
3222 if (((*vpp
)->v_type
== VREG
) && (mode
& FWRITE
))
3223 atomic_inc_32(&(*vpp
)->v_wrcnt
);
3224 if ((vp
->v_type
== VREG
) && (mode
& FWRITE
))
3225 atomic_dec_32(&vp
->v_wrcnt
);
3239 caller_context_t
*ct
)
3243 VOPXID_MAP_CR(vp
, cr
);
3245 err
= fop_close_dispatch(vp
, flag
, count
, offset
, cr
, ct
);
3247 VOPSTATS_UPDATE(vp
, close
);
3249 * Check passed in count to handle possible dups. Vnode counts are only
3250 * kept on regular files
3252 if ((vp
->v_type
== VREG
) && (count
== 1)) {
3254 ASSERT(vp
->v_rdcnt
> 0);
3255 atomic_dec_32(&vp
->v_rdcnt
);
3257 if (flag
& FWRITE
) {
3258 ASSERT(vp
->v_wrcnt
> 0);
3259 atomic_dec_32(&vp
->v_wrcnt
);
3271 caller_context_t
*ct
)
3274 ssize_t resid_start
= uiop
->uio_resid
;
3276 VOPXID_MAP_CR(vp
, cr
);
3278 err
= fop_read_dispatch(vp
, uiop
, ioflag
, cr
, ct
);
3280 VOPSTATS_UPDATE_IO(vp
, read
,
3281 read_bytes
, (resid_start
- uiop
->uio_resid
));
3291 caller_context_t
*ct
)
3294 ssize_t resid_start
= uiop
->uio_resid
;
3296 VOPXID_MAP_CR(vp
, cr
);
3298 err
= fop_write_dispatch(vp
, uiop
, ioflag
, cr
, ct
);
3300 VOPSTATS_UPDATE_IO(vp
, write
,
3301 write_bytes
, (resid_start
- uiop
->uio_resid
));
3313 caller_context_t
*ct
)
3317 VOPXID_MAP_CR(vp
, cr
);
3319 err
= fop_ioctl_dispatch(vp
, cmd
, arg
, flag
, cr
, rvalp
, ct
);
3321 VOPSTATS_UPDATE(vp
, ioctl
);
3331 caller_context_t
*ct
)
3335 VOPXID_MAP_CR(vp
, cr
);
3337 if (vp
->v_op
->vop_setfl
== NULL
)
3338 err
= fs_setfl(vp
, oflags
, nflags
, cr
, ct
);
3340 err
= vp
->v_op
->vop_setfl(vp
, oflags
, nflags
, cr
, ct
);
3342 VOPSTATS_UPDATE(vp
, setfl
);
3352 caller_context_t
*ct
)
3356 VOPXID_MAP_CR(vp
, cr
);
3359 * If this file system doesn't understand the xvattr extensions
3360 * then turn off the xvattr bit.
3362 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_XVATTR
) == 0) {
3363 vap
->va_mask
&= ~AT_XVATTR
;
3367 * We're only allowed to skip the ACL check iff we used a 32 bit
3368 * ACE mask with fop_access() to determine permissions.
3370 if ((flags
& ATTR_NOACLCHECK
) &&
3371 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0)
3374 err
= fop_getattr_dispatch(vp
, vap
, flags
, cr
, ct
);
3376 VOPSTATS_UPDATE(vp
, getattr
);
3386 caller_context_t
*ct
)
3390 VOPXID_MAP_CR(vp
, cr
);
3393 * If this file system doesn't understand the xvattr extensions
3394 * then turn off the xvattr bit.
3396 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_XVATTR
) == 0) {
3397 vap
->va_mask
&= ~AT_XVATTR
;
3401 * We're only allowed to skip the ACL check iff we used a 32 bit
3402 * ACE mask with fop_access() to determine permissions.
3404 if ((flags
& ATTR_NOACLCHECK
) &&
3405 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0)
3408 err
= fop_setattr_dispatch(vp
, vap
, flags
, cr
, ct
);
3410 VOPSTATS_UPDATE(vp
, setattr
);
3420 caller_context_t
*ct
)
3424 if ((flags
& V_ACE_MASK
) &&
3425 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0) {
3429 VOPXID_MAP_CR(vp
, cr
);
3431 err
= fop_access_dispatch(vp
, mode
, flags
, cr
, ct
);
3433 VOPSTATS_UPDATE(vp
, access
);
3446 caller_context_t
*ct
,
3447 int *deflags
, /* Returned per-dirent flags */
3448 pathname_t
*ppnp
) /* Returned case-preserved name in directory */
3453 * If this file system doesn't support case-insensitive access
3454 * and said access is requested, fail quickly. It is required
3455 * that if the vfs supports case-insensitive lookup, it also
3456 * supports extended dirent flags.
3458 if (flags
& FIGNORECASE
&&
3459 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3460 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3463 VOPXID_MAP_CR(dvp
, cr
);
3465 if ((flags
& LOOKUP_XATTR
) && (flags
& LOOKUP_HAVE_SYSATTR_DIR
) == 0) {
3466 ret
= xattr_dir_lookup(dvp
, vpp
, flags
, cr
);
3467 } else if (dvp
->v_op
->vop_lookup
== NULL
) {
3470 ret
= dvp
->v_op
->vop_lookup(dvp
, nm
, vpp
, pnp
, flags
, rdir
,
3471 cr
, ct
, deflags
, ppnp
);
3474 if (ret
== 0 && *vpp
) {
3475 VOPSTATS_UPDATE(*vpp
, lookup
);
3476 if ((*vpp
)->v_path
== NULL
) {
3477 vn_setpath(rootdir
, dvp
, *vpp
, nm
, strlen(nm
));
3494 caller_context_t
*ct
,
3495 vsecattr_t
*vsecp
) /* ACL to set during create */
3499 if (vsecp
!= NULL
&&
3500 vfs_has_feature(dvp
->v_vfsp
, VFSFT_ACLONCREATE
) == 0) {
3504 * If this file system doesn't support case-insensitive access
3505 * and said access is requested, fail quickly.
3507 if (flags
& FIGNORECASE
&&
3508 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3509 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3512 VOPXID_MAP_CR(dvp
, cr
);
3514 if (dvp
->v_op
->vop_create
== NULL
)
3517 ret
= dvp
->v_op
->vop_create(dvp
, name
, vap
, excl
, mode
, vpp
,
3518 cr
, flags
, ct
, vsecp
);
3520 if (ret
== 0 && *vpp
) {
3521 VOPSTATS_UPDATE(*vpp
, create
);
3522 if ((*vpp
)->v_path
== NULL
) {
3523 vn_setpath(rootdir
, dvp
, *vpp
, name
, strlen(name
));
3535 caller_context_t
*ct
,
3541 * If this file system doesn't support case-insensitive access
3542 * and said access is requested, fail quickly.
3544 if (flags
& FIGNORECASE
&&
3545 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3546 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3549 VOPXID_MAP_CR(dvp
, cr
);
3551 if (dvp
->v_op
->vop_remove
== NULL
)
3554 err
= dvp
->v_op
->vop_remove(dvp
, nm
, cr
, ct
, flags
);
3556 VOPSTATS_UPDATE(dvp
, remove
);
3566 caller_context_t
*ct
,
3572 * If the target file system doesn't support case-insensitive access
3573 * and said access is requested, fail quickly.
3575 if (flags
& FIGNORECASE
&&
3576 (vfs_has_feature(tdvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3577 vfs_has_feature(tdvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3580 VOPXID_MAP_CR(tdvp
, cr
);
3582 if (tdvp
->v_op
->vop_link
== NULL
)
3585 err
= tdvp
->v_op
->vop_link(tdvp
, svp
, tnm
, cr
, ct
, flags
);
3587 VOPSTATS_UPDATE(tdvp
, link
);
3598 caller_context_t
*ct
,
3604 * If the file system involved does not support
3605 * case-insensitive access and said access is requested, fail
3608 if (flags
& FIGNORECASE
&&
3609 ((vfs_has_feature(sdvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3610 vfs_has_feature(sdvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0)))
3613 VOPXID_MAP_CR(tdvp
, cr
);
3615 if (sdvp
->v_op
->vop_rename
== NULL
)
3618 err
= sdvp
->v_op
->vop_rename(sdvp
, snm
, tdvp
, tnm
, cr
, ct
,
3621 VOPSTATS_UPDATE(sdvp
, rename
);
3632 caller_context_t
*ct
,
3634 vsecattr_t
*vsecp
) /* ACL to set during create */
3638 if (vsecp
!= NULL
&&
3639 vfs_has_feature(dvp
->v_vfsp
, VFSFT_ACLONCREATE
) == 0) {
3643 * If this file system doesn't support case-insensitive access
3644 * and said access is requested, fail quickly.
3646 if (flags
& FIGNORECASE
&&
3647 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3648 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3651 VOPXID_MAP_CR(dvp
, cr
);
3653 if (dvp
->v_op
->vop_mkdir
== NULL
)
3656 ret
= dvp
->v_op
->vop_mkdir(dvp
, dirname
, vap
, vpp
, cr
, ct
,
3659 if (ret
== 0 && *vpp
) {
3660 VOPSTATS_UPDATE(*vpp
, mkdir
);
3661 if ((*vpp
)->v_path
== NULL
) {
3662 vn_setpath(rootdir
, dvp
, *vpp
, dirname
,
3676 caller_context_t
*ct
,
3682 * If this file system doesn't support case-insensitive access
3683 * and said access is requested, fail quickly.
3685 if (flags
& FIGNORECASE
&&
3686 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3687 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3690 VOPXID_MAP_CR(dvp
, cr
);
3692 if (dvp
->v_op
->vop_rmdir
== NULL
)
3695 err
= dvp
->v_op
->vop_rmdir(dvp
, nm
, cdir
, cr
, ct
, flags
);
3697 VOPSTATS_UPDATE(dvp
, rmdir
);
3707 caller_context_t
*ct
,
3711 ssize_t resid_start
= uiop
->uio_resid
;
3714 * If this file system doesn't support retrieving directory
3715 * entry flags and said access is requested, fail quickly.
3717 if (flags
& V_RDDIR_ENTFLAGS
&&
3718 vfs_has_feature(vp
->v_vfsp
, VFSFT_DIRENTFLAGS
) == 0)
3721 VOPXID_MAP_CR(vp
, cr
);
3723 err
= fop_readdir_dispatch(vp
, uiop
, cr
, eofp
, ct
, flags
);
3725 VOPSTATS_UPDATE_IO(vp
, readdir
,
3726 readdir_bytes
, (resid_start
- uiop
->uio_resid
));
3737 caller_context_t
*ct
,
3744 * If this file system doesn't support case-insensitive access
3745 * and said access is requested, fail quickly.
3747 if (flags
& FIGNORECASE
&&
3748 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3749 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3752 VOPXID_MAP_CR(dvp
, cr
);
3754 /* check for reparse point */
3755 if ((vfs_has_feature(dvp
->v_vfsp
, VFSFT_REPARSE
)) &&
3756 (strncmp(target
, FS_REPARSE_TAG_STR
,
3757 strlen(FS_REPARSE_TAG_STR
)) == 0)) {
3758 if (!fs_reparse_mark(target
, vap
, &xvattr
))
3759 vap
= (vattr_t
*)&xvattr
;
3762 if (dvp
->v_op
->vop_symlink
== NULL
)
3765 err
= dvp
->v_op
->vop_symlink(dvp
, linkname
, vap
, target
, cr
,
3768 VOPSTATS_UPDATE(dvp
, symlink
);
3777 caller_context_t
*ct
)
3781 VOPXID_MAP_CR(vp
, cr
);
3783 err
= fop_readlink_dispatch(vp
, uiop
, cr
, ct
);
3785 VOPSTATS_UPDATE(vp
, readlink
);
3794 caller_context_t
*ct
)
3798 VOPXID_MAP_CR(vp
, cr
);
3800 err
= fop_fsync_dispatch(vp
, syncflag
, cr
, ct
);
3802 VOPSTATS_UPDATE(vp
, fsync
);
3810 caller_context_t
*ct
)
3812 /* Need to update stats before vop call since we may lose the vnode */
3813 VOPSTATS_UPDATE(vp
, inactive
);
3815 VOPXID_MAP_CR(vp
, cr
);
3817 if (vp
->v_op
->vop_inactive
!= NULL
)
3818 vp
->v_op
->vop_inactive(vp
, cr
, ct
);
3825 caller_context_t
*ct
)
3829 err
= fop_fid_dispatch(vp
, fidp
, ct
);
3831 VOPSTATS_UPDATE(vp
, fid
);
3839 caller_context_t
*ct
)
3843 if (vp
->v_op
->vop_rwlock
== NULL
)
3844 ret
= fs_rwlock(vp
, write_lock
, ct
);
3846 ret
= vp
->v_op
->vop_rwlock(vp
, write_lock
, ct
);
3848 VOPSTATS_UPDATE(vp
, rwlock
);
3856 caller_context_t
*ct
)
3858 if (vp
->v_op
->vop_rwunlock
== NULL
)
3859 fs_rwunlock(vp
, write_lock
, ct
);
3861 vp
->v_op
->vop_rwunlock(vp
, write_lock
, ct
);
3863 VOPSTATS_UPDATE(vp
, rwunlock
);
3871 caller_context_t
*ct
)
3875 err
= fop_seek_dispatch(vp
, ooff
, noffp
, ct
);
3877 VOPSTATS_UPDATE(vp
, seek
);
3885 caller_context_t
*ct
)
3889 if (vp1
->v_op
->vop_cmp
== NULL
)
3890 err
= fs_cmp(vp1
, vp2
, ct
);
3892 err
= vp1
->v_op
->vop_cmp(vp1
, vp2
, ct
);
3894 VOPSTATS_UPDATE(vp1
, cmp
);
3905 struct flk_callback
*flk_cbp
,
3907 caller_context_t
*ct
)
3911 VOPXID_MAP_CR(vp
, cr
);
3913 if (vp
->v_op
->vop_frlock
== NULL
)
3914 err
= fs_frlock(vp
, cmd
, bfp
, flag
, offset
, flk_cbp
, cr
, ct
);
3916 err
= vp
->v_op
->vop_frlock(vp
, cmd
, bfp
, flag
, offset
,
3919 VOPSTATS_UPDATE(vp
, frlock
);
3931 caller_context_t
*ct
)
3935 VOPXID_MAP_CR(vp
, cr
);
3937 err
= fop_space_dispatch(vp
, cmd
, bfp
, flag
, offset
, cr
, ct
);
3939 VOPSTATS_UPDATE(vp
, space
);
3947 caller_context_t
*ct
)
3951 err
= fop_realvp_dispatch(vp
, vpp
, ct
);
3953 VOPSTATS_UPDATE(vp
, realvp
);
3969 caller_context_t
*ct
)
3973 VOPXID_MAP_CR(vp
, cr
);
3975 err
= fop_getpage_dispatch(vp
, off
, len
, protp
, plarr
, plsz
, seg
,
3978 VOPSTATS_UPDATE(vp
, getpage
);
3989 caller_context_t
*ct
)
3993 VOPXID_MAP_CR(vp
, cr
);
3995 err
= fop_putpage_dispatch(vp
, off
, len
, flags
, cr
, ct
);
3997 VOPSTATS_UPDATE(vp
, putpage
);
4012 caller_context_t
*ct
)
4016 VOPXID_MAP_CR(vp
, cr
);
4018 err
= fop_map_dispatch(vp
, off
, as
, addrp
, len
, prot
, maxprot
,
4021 VOPSTATS_UPDATE(vp
, map
);
4036 caller_context_t
*ct
)
4041 VOPXID_MAP_CR(vp
, cr
);
4043 error
= fop_addmap_dispatch(vp
, off
, as
, addr
, len
, prot
, maxprot
,
4046 if ((!error
) && (vp
->v_type
== VREG
)) {
4047 delta
= (u_longlong_t
)btopr(len
);
4049 * If file is declared MAP_PRIVATE, it can't be written back
4050 * even if open for write. Handle as read.
4052 if (flags
& MAP_PRIVATE
) {
4053 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4057 * atomic_add_64 forces the fetch of a 64 bit value to
4058 * be atomic on 32 bit machines
4060 if (maxprot
& PROT_WRITE
)
4061 atomic_add_64((uint64_t *)(&(vp
->v_mmap_write
)),
4063 if (maxprot
& PROT_READ
)
4064 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4066 if (maxprot
& PROT_EXEC
)
4067 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4071 VOPSTATS_UPDATE(vp
, addmap
);
4086 caller_context_t
*ct
)
4091 VOPXID_MAP_CR(vp
, cr
);
4093 error
= fop_delmap_dispatch(vp
, off
, as
, addr
, len
, prot
, maxprot
,
4097 * NFS calls into delmap twice, the first time
4098 * it simply establishes a callback mechanism and returns EAGAIN
4099 * while the real work is being done upon the second invocation.
4100 * We have to detect this here and only decrement the counts upon
4101 * the second delmap request.
4103 if ((error
!= EAGAIN
) && (vp
->v_type
== VREG
)) {
4105 delta
= (u_longlong_t
)btopr(len
);
4107 if (flags
& MAP_PRIVATE
) {
4108 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4112 * atomic_add_64 forces the fetch of a 64 bit value
4113 * to be atomic on 32 bit machines
4115 if (maxprot
& PROT_WRITE
)
4116 atomic_add_64((uint64_t *)(&(vp
->v_mmap_write
)),
4118 if (maxprot
& PROT_READ
)
4119 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4121 if (maxprot
& PROT_EXEC
)
4122 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4126 VOPSTATS_UPDATE(vp
, delmap
);
4137 struct pollhead
**phpp
,
4138 caller_context_t
*ct
)
4142 if (vp
->v_op
->vop_poll
== NULL
)
4143 err
= fs_poll(vp
, events
, anyyet
, reventsp
, phpp
, ct
);
4145 err
= vp
->v_op
->vop_poll(vp
, events
, anyyet
, reventsp
, phpp
,
4148 VOPSTATS_UPDATE(vp
, poll
);
4158 caller_context_t
*ct
)
4162 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4163 if ((lbdn
!= (daddr_t
)lbdn
) || (dblks
!= (int)dblks
))
4166 err
= fop_dump_dispatch(vp
, addr
, lbdn
, dblks
, ct
);
4168 VOPSTATS_UPDATE(vp
, dump
);
4178 caller_context_t
*ct
)
4182 VOPXID_MAP_CR(vp
, cr
);
4184 if (vp
->v_op
->vop_pathconf
== NULL
)
4185 err
= fs_pathconf(vp
, cmd
, valp
, cr
, ct
);
4187 err
= vp
->v_op
->vop_pathconf(vp
, cmd
, valp
, cr
, ct
);
4189 VOPSTATS_UPDATE(vp
, pathconf
);
4201 caller_context_t
*ct
)
4205 VOPXID_MAP_CR(vp
, cr
);
4207 err
= fop_pageio_dispatch(vp
, pp
, io_off
, io_len
, flags
, cr
, ct
);
4209 VOPSTATS_UPDATE(vp
, pageio
);
4218 caller_context_t
*ct
)
4222 err
= fop_dumpctl_dispatch(vp
, action
, blkp
, ct
);
4224 VOPSTATS_UPDATE(vp
, dumpctl
);
4235 caller_context_t
*ct
)
4237 /* Must do stats first since it's possible to lose the vnode */
4238 VOPSTATS_UPDATE(vp
, dispose
);
4240 VOPXID_MAP_CR(vp
, cr
);
4242 if (vp
->v_op
->vop_dispose
== NULL
)
4243 fs_dispose(vp
, pp
, flag
, dn
, cr
, ct
);
4245 vp
->v_op
->vop_dispose(vp
, pp
, flag
, dn
, cr
, ct
);
4254 caller_context_t
*ct
)
4258 VOPXID_MAP_CR(vp
, cr
);
4261 * We're only allowed to skip the ACL check iff we used a 32 bit
4262 * ACE mask with fop_access() to determine permissions.
4264 if ((flag
& ATTR_NOACLCHECK
) &&
4265 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0) {
4269 err
= fop_setsecattr_dispatch(vp
, vsap
, flag
, cr
, ct
);
4271 VOPSTATS_UPDATE(vp
, setsecattr
);
4281 caller_context_t
*ct
)
4286 * We're only allowed to skip the ACL check iff we used a 32 bit
4287 * ACE mask with fop_access() to determine permissions.
4289 if ((flag
& ATTR_NOACLCHECK
) &&
4290 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0) {
4294 VOPXID_MAP_CR(vp
, cr
);
4296 if (vp
->v_op
->vop_getsecattr
== NULL
)
4297 err
= fs_fab_acl(vp
, vsap
, flag
, cr
, ct
);
4299 err
= vp
->v_op
->vop_getsecattr(vp
, vsap
, flag
, cr
, ct
);
4301 VOPSTATS_UPDATE(vp
, getsecattr
);
4309 struct shrlock
*shr
,
4312 caller_context_t
*ct
)
4316 VOPXID_MAP_CR(vp
, cr
);
4318 if (vp
->v_op
->vop_shrlock
== NULL
)
4319 err
= fs_shrlock(vp
, cmd
, shr
, flag
, cr
, ct
);
4321 err
= vp
->v_op
->vop_shrlock(vp
, cmd
, shr
, flag
, cr
, ct
);
4323 VOPSTATS_UPDATE(vp
, shrlock
);
4328 fop_vnevent(vnode_t
*vp
, vnevent_t vnevent
, vnode_t
*dvp
, char *fnm
,
4329 caller_context_t
*ct
)
4333 err
= fop_vnevent_dispatch(vp
, vnevent
, dvp
, fnm
, ct
);
4335 VOPSTATS_UPDATE(vp
, vnevent
);
4340 fop_reqzcbuf(vnode_t
*vp
, enum uio_rw ioflag
, xuio_t
*uiop
, cred_t
*cr
,
4341 caller_context_t
*ct
)
4345 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_ZEROCOPY_SUPPORTED
) == 0)
4348 err
= fop_reqzcbuf_dispatch(vp
, ioflag
, uiop
, cr
, ct
);
4350 VOPSTATS_UPDATE(vp
, reqzcbuf
);
4355 fop_retzcbuf(vnode_t
*vp
, xuio_t
*uiop
, cred_t
*cr
, caller_context_t
*ct
)
4359 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_ZEROCOPY_SUPPORTED
) == 0)
4362 err
= fop_retzcbuf_dispatch(vp
, uiop
, cr
, ct
);
4364 VOPSTATS_UPDATE(vp
, retzcbuf
);
4369 * Default destructor
4370 * Needed because NULL destructor means that the key is unused
4374 vsd_defaultdestructor(void *value
)
4378 * Create a key (index into per vnode array)
4379 * Locks out vsd_create, vsd_destroy, and vsd_free
4380 * May allocate memory with lock held
4383 vsd_create(uint_t
*keyp
, void (*destructor
)(void *))
4389 * if key is allocated, do nothing
4391 mutex_enter(&vsd_lock
);
4393 mutex_exit(&vsd_lock
);
4397 * find an unused key
4399 if (destructor
== NULL
)
4400 destructor
= vsd_defaultdestructor
;
4402 for (i
= 0; i
< vsd_nkeys
; ++i
)
4403 if (vsd_destructor
[i
] == NULL
)
4407 * if no unused keys, increase the size of the destructor array
4409 if (i
== vsd_nkeys
) {
4410 if ((nkeys
= (vsd_nkeys
<< 1)) == 0)
4413 (void (**)(void *))vsd_realloc((void *)vsd_destructor
,
4414 (size_t)(vsd_nkeys
* sizeof (void (*)(void *))),
4415 (size_t)(nkeys
* sizeof (void (*)(void *))));
4420 * allocate the next available unused key
4422 vsd_destructor
[i
] = destructor
;
4425 /* create vsd_list, if it doesn't exist */
4426 if (vsd_list
== NULL
) {
4427 vsd_list
= kmem_alloc(sizeof (list_t
), KM_SLEEP
);
4428 list_create(vsd_list
, sizeof (struct vsd_node
),
4429 offsetof(struct vsd_node
, vs_nodes
));
4432 mutex_exit(&vsd_lock
);
4438 * Assumes that the caller is preventing vsd_set and vsd_get
4439 * Locks out vsd_create, vsd_destroy, and vsd_free
4440 * May free memory with lock held
4443 vsd_destroy(uint_t
*keyp
)
4446 struct vsd_node
*vsd
;
4449 * protect the key namespace and our destructor lists
4451 mutex_enter(&vsd_lock
);
4455 ASSERT(key
<= vsd_nkeys
);
4458 * if the key is valid
4463 * for every vnode with VSD, call key's destructor
4465 for (vsd
= list_head(vsd_list
); vsd
!= NULL
;
4466 vsd
= list_next(vsd_list
, vsd
)) {
4468 * no VSD for key in this vnode
4470 if (key
> vsd
->vs_nkeys
)
4473 * call destructor for key
4475 if (vsd
->vs_value
[k
] && vsd_destructor
[k
])
4476 (*vsd_destructor
[k
])(vsd
->vs_value
[k
]);
4478 * reset value for key
4480 vsd
->vs_value
[k
] = NULL
;
4483 * actually free the key (NULL destructor == unused)
4485 vsd_destructor
[k
] = NULL
;
4488 mutex_exit(&vsd_lock
);
4492 * Quickly return the per vnode value that was stored with the specified key
4493 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4494 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4497 vsd_get(vnode_t
*vp
, uint_t key
)
4499 struct vsd_node
*vsd
;
4502 ASSERT(mutex_owned(&vp
->v_vsd_lock
));
4506 if (key
&& vsd
!= NULL
&& key
<= vsd
->vs_nkeys
)
4507 return (vsd
->vs_value
[key
- 1]);
4512 * Set a per vnode value indexed with the specified key
4513 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4516 vsd_set(vnode_t
*vp
, uint_t key
, void *value
)
4518 struct vsd_node
*vsd
;
4521 ASSERT(mutex_owned(&vp
->v_vsd_lock
));
4528 vsd
= vp
->v_vsd
= kmem_zalloc(sizeof (*vsd
), KM_SLEEP
);
4531 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4532 * code won't happen and we will continue down and allocate space for
4533 * the vs_value array.
4534 * If the caller is replacing one value with another, then it is up
4535 * to the caller to free/rele/destroy the previous value (if needed).
4537 if (key
<= vsd
->vs_nkeys
) {
4538 vsd
->vs_value
[key
- 1] = value
;
4542 ASSERT(key
<= vsd_nkeys
);
4544 if (vsd
->vs_nkeys
== 0) {
4545 mutex_enter(&vsd_lock
); /* lock out vsd_destroy() */
4547 * Link onto list of all VSD nodes.
4549 list_insert_head(vsd_list
, vsd
);
4550 mutex_exit(&vsd_lock
);
4554 * Allocate vnode local storage and set the value for key
4556 vsd
->vs_value
= vsd_realloc(vsd
->vs_value
,
4557 vsd
->vs_nkeys
* sizeof (void *),
4558 key
* sizeof (void *));
4559 vsd
->vs_nkeys
= key
;
4560 vsd
->vs_value
[key
- 1] = value
;
4566 * Called from vn_free() to run the destructor function for each vsd
4567 * Locks out vsd_create and vsd_destroy
4568 * Assumes that the destructor *DOES NOT* use vsd
4571 vsd_free(vnode_t
*vp
)
4574 struct vsd_node
*vsd
= vp
->v_vsd
;
4579 if (vsd
->vs_nkeys
== 0) {
4580 kmem_free(vsd
, sizeof (*vsd
));
4586 * lock out vsd_create and vsd_destroy, call
4587 * the destructor, and mark the value as destroyed.
4589 mutex_enter(&vsd_lock
);
4591 for (i
= 0; i
< vsd
->vs_nkeys
; i
++) {
4592 if (vsd
->vs_value
[i
] && vsd_destructor
[i
])
4593 (*vsd_destructor
[i
])(vsd
->vs_value
[i
]);
4594 vsd
->vs_value
[i
] = NULL
;
4598 * remove from linked list of VSD nodes
4600 list_remove(vsd_list
, vsd
);
4602 mutex_exit(&vsd_lock
);
4607 kmem_free(vsd
->vs_value
, vsd
->vs_nkeys
* sizeof (void *));
4608 kmem_free(vsd
, sizeof (struct vsd_node
));
4616 vsd_realloc(void *old
, size_t osize
, size_t nsize
)
4620 new = kmem_zalloc(nsize
, KM_SLEEP
);
4622 bcopy(old
, new, osize
);
4623 kmem_free(old
, osize
);
4629 * Setup the extensible system attribute for creating a reparse point.
4630 * The symlink data 'target' is validated for proper format of a reparse
4631 * string and a check also made to make sure the symlink data does not
4632 * point to an existing file.
4634 * return 0 if ok else -1.
4637 fs_reparse_mark(char *target
, vattr_t
*vap
, xvattr_t
*xvattr
)
4641 if ((!target
) || (!vap
) || (!xvattr
))
4644 /* validate reparse string */
4645 if (reparse_validate((const char *)target
))
4649 xvattr
->xva_vattr
= *vap
;
4650 xvattr
->xva_vattr
.va_mask
|= AT_XVATTR
;
4651 xoap
= xva_getxoptattr(xvattr
);
4653 XVA_SET_REQ(xvattr
, XAT_REPARSE
);
4654 xoap
->xoa_reparse
= 1;
4660 * Function to check whether a symlink is a reparse point.
4661 * Return B_TRUE if it is a reparse point, else return B_FALSE
4664 vn_is_reparse(vnode_t
*vp
, cred_t
*cr
, caller_context_t
*ct
)
4669 if ((vp
->v_type
!= VLNK
) ||
4670 !(vfs_has_feature(vp
->v_vfsp
, VFSFT_XVATTR
)))
4674 xoap
= xva_getxoptattr(&xvattr
);
4676 XVA_SET_REQ(&xvattr
, XAT_REPARSE
);
4678 if (fop_getattr(vp
, &xvattr
.xva_vattr
, 0, cr
, ct
))
4681 if ((!(xvattr
.xva_vattr
.va_mask
& AT_XVATTR
)) ||
4682 (!(XVA_ISSET_RTN(&xvattr
, XAT_REPARSE
))))
4685 return (xoap
->xoa_reparse
? B_TRUE
: B_FALSE
);