4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * University Copyright- Copyright (c) 1982, 1986, 1988
33 * The Regents of the University of California
36 * University Acknowledgment- Portions of this document are derived from
37 * software developed by the University of California, Berkeley, and its
41 #include <sys/types.h>
42 #include <sys/param.h>
43 #include <sys/t_lock.h>
44 #include <sys/errno.h>
49 #include <sys/pathname.h>
51 #include <sys/vfs_opreg.h>
52 #include <sys/vnode.h>
53 #include <sys/rwstlock.h>
58 #include <sys/sysmacros.h>
59 #include <sys/cmn_err.h>
60 #include <sys/systm.h>
62 #include <sys/debug.h>
65 #include <sys/nbmlock.h>
66 #include <sys/fcntl.h>
67 #include <sys/fs_subr.h>
68 #include <sys/taskq.h>
69 #include <sys/fs_reparse.h>
71 /* Determine if this vnode is a file that is read-only */
72 #define ISROFILE(vp) \
73 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
74 (vp)->v_type != VFIFO && vn_is_readonly(vp))
76 /* Tunable via /etc/system; used only by admin/install */
77 int nfs_global_client_only
;
80 * Array of vopstats_t for per-FS-type vopstats. This array has the same
81 * number of entries as and parallel to the vfssw table. (Arguably, it could
82 * be part of the vfssw table.) Once it's initialized, it's accessed using
83 * the same fstype index that is used to index into the vfssw table.
85 vopstats_t
**vopstats_fstype
;
87 /* vopstats initialization template used for fast initialization via bcopy() */
88 static vopstats_t
*vs_templatep
;
90 /* Kmem cache handle for vsk_anchor_t allocations */
91 kmem_cache_t
*vsk_anchor_cache
;
93 /* file events cleanup routine */
94 extern void free_fopdata(vnode_t
*);
97 * Root of AVL tree for the kstats associated with vopstats. Lock protects
98 * updates to vsktat_tree.
100 avl_tree_t vskstat_tree
;
101 kmutex_t vskstat_tree_lock
;
103 /* Global variable which enables/disables the vopstats collection */
104 int vopstats_enabled
= 1;
107 * forward declarations for internal vnode specific data (vsd)
109 static void *vsd_realloc(void *, size_t, size_t);
112 * forward declarations for reparse point functions
114 static int fs_reparse_mark(char *target
, vattr_t
*vap
, xvattr_t
*xvattr
);
117 * VSD -- VNODE SPECIFIC DATA
118 * The v_data pointer is typically used by a file system to store a
119 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
120 * However, there are times when additional project private data needs
121 * to be stored separately from the data (node) pointed to by v_data.
122 * This additional data could be stored by the file system itself or
123 * by a completely different kernel entity. VSD provides a way for
124 * callers to obtain a key and store a pointer to private data associated
127 * Callers are responsible for protecting the vsd by holding v_vsd_lock
128 * for calls to vsd_set() and vsd_get().
133 * vsd_nkeys - creation and deletion of vsd keys
134 * vsd_list - insertion and deletion of vsd_node in the vsd_list
135 * vsd_destructor - adding and removing destructors to the list
137 static kmutex_t vsd_lock
;
138 static uint_t vsd_nkeys
; /* size of destructor array */
139 /* list of vsd_node's */
140 static list_t
*vsd_list
= NULL
;
141 /* per-key destructor funcs */
142 static void (**vsd_destructor
)(void *);
145 * The following is the common set of actions needed to update the
146 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
147 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
148 * recording of the bytes transferred. Since the code is similar
149 * but small, it is nearly a duplicate. Consequently any changes
150 * to one may need to be reflected in the other.
151 * Rundown of the variables:
152 * vp - Pointer to the vnode
153 * counter - Partial name structure member to update in vopstats for counts
154 * bytecounter - Partial name structure member to update in vopstats for bytes
155 * bytesval - Value to update in vopstats for bytes
156 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
157 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
160 #define VOPSTATS_UPDATE(vp, counter) { \
161 vfs_t *vfsp = (vp)->v_vfsp; \
162 if (vfsp && vfsp->vfs_implp && \
163 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
164 vopstats_t *vsp = &vfsp->vfs_vopstats; \
165 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
166 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
167 size_t, uint64_t *); \
168 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
170 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
171 vsp->n##counter.value.ui64++; \
176 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
177 vfs_t *vfsp = (vp)->v_vfsp; \
178 if (vfsp && vfsp->vfs_implp && \
179 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
180 vopstats_t *vsp = &vfsp->vfs_vopstats; \
181 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
182 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
183 size_t, uint64_t *); \
184 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
186 vsp->bytecounter.value.ui64 += bytesval; \
187 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
188 vsp->n##counter.value.ui64++; \
189 vsp->bytecounter.value.ui64 += bytesval; \
195 * If the filesystem does not support XIDs map credential
196 * If the vfsp is NULL, perhaps we should also map?
198 #define VOPXID_MAP_CR(vp, cr) { \
199 vfs_t *vfsp = (vp)->v_vfsp; \
200 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
201 cr = crgetmapped(cr); \
205 * Convert stat(2) formats to vnode types and vice versa. (Knows about
206 * numerical order of S_IFMT and vnode types.)
208 enum vtype iftovt_tab
[] = {
209 VNON
, VFIFO
, VCHR
, VNON
, VDIR
, VNON
, VBLK
, VNON
,
210 VREG
, VNON
, VLNK
, VNON
, VSOCK
, VNON
, VNON
, VNON
213 ushort_t vttoif_tab
[] = {
214 0, S_IFREG
, S_IFDIR
, S_IFBLK
, S_IFCHR
, S_IFLNK
, S_IFIFO
,
215 S_IFDOOR
, 0, S_IFSOCK
, S_IFPORT
, 0
219 * The system vnode cache.
222 kmem_cache_t
*vn_cache
;
226 * Vnode operations vector.
229 static const fs_operation_trans_def_t vn_ops_table
[] = {
230 VOPNAME_OPEN
, offsetof(struct vnodeops
, vop_open
),
233 VOPNAME_CLOSE
, offsetof(struct vnodeops
, vop_close
),
236 VOPNAME_READ
, offsetof(struct vnodeops
, vop_read
),
239 VOPNAME_WRITE
, offsetof(struct vnodeops
, vop_write
),
242 VOPNAME_IOCTL
, offsetof(struct vnodeops
, vop_ioctl
),
245 VOPNAME_SETFL
, offsetof(struct vnodeops
, vop_setfl
),
248 VOPNAME_GETATTR
, offsetof(struct vnodeops
, vop_getattr
),
251 VOPNAME_SETATTR
, offsetof(struct vnodeops
, vop_setattr
),
254 VOPNAME_ACCESS
, offsetof(struct vnodeops
, vop_access
),
257 VOPNAME_LOOKUP
, offsetof(struct vnodeops
, vop_lookup
),
260 VOPNAME_CREATE
, offsetof(struct vnodeops
, vop_create
),
263 VOPNAME_REMOVE
, offsetof(struct vnodeops
, vop_remove
),
266 VOPNAME_LINK
, offsetof(struct vnodeops
, vop_link
),
269 VOPNAME_RENAME
, offsetof(struct vnodeops
, vop_rename
),
272 VOPNAME_MKDIR
, offsetof(struct vnodeops
, vop_mkdir
),
275 VOPNAME_RMDIR
, offsetof(struct vnodeops
, vop_rmdir
),
278 VOPNAME_READDIR
, offsetof(struct vnodeops
, vop_readdir
),
281 VOPNAME_SYMLINK
, offsetof(struct vnodeops
, vop_symlink
),
284 VOPNAME_READLINK
, offsetof(struct vnodeops
, vop_readlink
),
287 VOPNAME_FSYNC
, offsetof(struct vnodeops
, vop_fsync
),
290 VOPNAME_INACTIVE
, offsetof(struct vnodeops
, vop_inactive
),
293 VOPNAME_FID
, offsetof(struct vnodeops
, vop_fid
),
296 VOPNAME_RWLOCK
, offsetof(struct vnodeops
, vop_rwlock
),
299 VOPNAME_RWUNLOCK
, offsetof(struct vnodeops
, vop_rwunlock
),
300 (fs_generic_func_p
) fs_rwunlock
,
302 VOPNAME_SEEK
, offsetof(struct vnodeops
, vop_seek
),
305 VOPNAME_CMP
, offsetof(struct vnodeops
, vop_cmp
),
308 VOPNAME_FRLOCK
, offsetof(struct vnodeops
, vop_frlock
),
311 VOPNAME_SPACE
, offsetof(struct vnodeops
, vop_space
),
314 VOPNAME_REALVP
, offsetof(struct vnodeops
, vop_realvp
),
317 VOPNAME_GETPAGE
, offsetof(struct vnodeops
, vop_getpage
),
320 VOPNAME_PUTPAGE
, offsetof(struct vnodeops
, vop_putpage
),
323 VOPNAME_MAP
, offsetof(struct vnodeops
, vop_map
),
324 (fs_generic_func_p
) fs_nosys_map
,
326 VOPNAME_ADDMAP
, offsetof(struct vnodeops
, vop_addmap
),
327 (fs_generic_func_p
) fs_nosys_addmap
,
329 VOPNAME_DELMAP
, offsetof(struct vnodeops
, vop_delmap
),
332 VOPNAME_POLL
, offsetof(struct vnodeops
, vop_poll
),
333 (fs_generic_func_p
) fs_poll
,
335 VOPNAME_DUMP
, offsetof(struct vnodeops
, vop_dump
),
338 VOPNAME_PATHCONF
, offsetof(struct vnodeops
, vop_pathconf
),
341 VOPNAME_PAGEIO
, offsetof(struct vnodeops
, vop_pageio
),
344 VOPNAME_DUMPCTL
, offsetof(struct vnodeops
, vop_dumpctl
),
347 VOPNAME_DISPOSE
, offsetof(struct vnodeops
, vop_dispose
),
348 (fs_generic_func_p
) fs_dispose
,
350 VOPNAME_SETSECATTR
, offsetof(struct vnodeops
, vop_setsecattr
),
353 VOPNAME_GETSECATTR
, offsetof(struct vnodeops
, vop_getsecattr
),
356 VOPNAME_SHRLOCK
, offsetof(struct vnodeops
, vop_shrlock
),
359 VOPNAME_VNEVENT
, offsetof(struct vnodeops
, vop_vnevent
),
360 (fs_generic_func_p
) fs_vnevent_nosupport
,
362 VOPNAME_REQZCBUF
, offsetof(struct vnodeops
, vop_reqzcbuf
),
365 VOPNAME_RETZCBUF
, offsetof(struct vnodeops
, vop_retzcbuf
),
371 /* Extensible attribute (xva) routines. */
374 * Zero out the structure, set the size of the requested/returned bitmaps,
375 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
376 * to the returned attributes array.
379 xva_init(xvattr_t
*xvap
)
381 bzero(xvap
, sizeof (xvattr_t
));
382 xvap
->xva_mapsize
= XVA_MAPSIZE
;
383 xvap
->xva_magic
= XVA_MAGIC
;
384 xvap
->xva_vattr
.va_mask
= AT_XVATTR
;
385 xvap
->xva_rtnattrmapp
= &(xvap
->xva_rtnattrmap
)[0];
389 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
390 * structure. Otherwise, returns NULL.
393 xva_getxoptattr(xvattr_t
*xvap
)
395 xoptattr_t
*xoap
= NULL
;
396 if (xvap
->xva_vattr
.va_mask
& AT_XVATTR
)
397 xoap
= &xvap
->xva_xoptattrs
;
402 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
403 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
407 vska_compar(const void *n1
, const void *n2
)
410 ulong_t p1
= ((vsk_anchor_t
*)n1
)->vsk_fsid
;
411 ulong_t p2
= ((vsk_anchor_t
*)n2
)->vsk_fsid
;
415 } else if (p1
> p2
) {
425 * Used to create a single template which will be bcopy()ed to a newly
426 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
429 create_vopstats_template()
433 vsp
= kmem_alloc(sizeof (vopstats_t
), KM_SLEEP
);
434 bzero(vsp
, sizeof (*vsp
)); /* Start fresh */
437 kstat_named_init(&vsp
->nopen
, "nopen", KSTAT_DATA_UINT64
);
439 kstat_named_init(&vsp
->nclose
, "nclose", KSTAT_DATA_UINT64
);
441 kstat_named_init(&vsp
->nread
, "nread", KSTAT_DATA_UINT64
);
442 kstat_named_init(&vsp
->read_bytes
, "read_bytes", KSTAT_DATA_UINT64
);
444 kstat_named_init(&vsp
->nwrite
, "nwrite", KSTAT_DATA_UINT64
);
445 kstat_named_init(&vsp
->write_bytes
, "write_bytes", KSTAT_DATA_UINT64
);
447 kstat_named_init(&vsp
->nioctl
, "nioctl", KSTAT_DATA_UINT64
);
449 kstat_named_init(&vsp
->nsetfl
, "nsetfl", KSTAT_DATA_UINT64
);
451 kstat_named_init(&vsp
->ngetattr
, "ngetattr", KSTAT_DATA_UINT64
);
453 kstat_named_init(&vsp
->nsetattr
, "nsetattr", KSTAT_DATA_UINT64
);
455 kstat_named_init(&vsp
->naccess
, "naccess", KSTAT_DATA_UINT64
);
457 kstat_named_init(&vsp
->nlookup
, "nlookup", KSTAT_DATA_UINT64
);
459 kstat_named_init(&vsp
->ncreate
, "ncreate", KSTAT_DATA_UINT64
);
461 kstat_named_init(&vsp
->nremove
, "nremove", KSTAT_DATA_UINT64
);
463 kstat_named_init(&vsp
->nlink
, "nlink", KSTAT_DATA_UINT64
);
465 kstat_named_init(&vsp
->nrename
, "nrename", KSTAT_DATA_UINT64
);
467 kstat_named_init(&vsp
->nmkdir
, "nmkdir", KSTAT_DATA_UINT64
);
469 kstat_named_init(&vsp
->nrmdir
, "nrmdir", KSTAT_DATA_UINT64
);
470 /* fop_readdir I/O */
471 kstat_named_init(&vsp
->nreaddir
, "nreaddir", KSTAT_DATA_UINT64
);
472 kstat_named_init(&vsp
->readdir_bytes
, "readdir_bytes",
475 kstat_named_init(&vsp
->nsymlink
, "nsymlink", KSTAT_DATA_UINT64
);
477 kstat_named_init(&vsp
->nreadlink
, "nreadlink", KSTAT_DATA_UINT64
);
479 kstat_named_init(&vsp
->nfsync
, "nfsync", KSTAT_DATA_UINT64
);
481 kstat_named_init(&vsp
->ninactive
, "ninactive", KSTAT_DATA_UINT64
);
483 kstat_named_init(&vsp
->nfid
, "nfid", KSTAT_DATA_UINT64
);
485 kstat_named_init(&vsp
->nrwlock
, "nrwlock", KSTAT_DATA_UINT64
);
487 kstat_named_init(&vsp
->nrwunlock
, "nrwunlock", KSTAT_DATA_UINT64
);
489 kstat_named_init(&vsp
->nseek
, "nseek", KSTAT_DATA_UINT64
);
491 kstat_named_init(&vsp
->ncmp
, "ncmp", KSTAT_DATA_UINT64
);
493 kstat_named_init(&vsp
->nfrlock
, "nfrlock", KSTAT_DATA_UINT64
);
495 kstat_named_init(&vsp
->nspace
, "nspace", KSTAT_DATA_UINT64
);
497 kstat_named_init(&vsp
->nrealvp
, "nrealvp", KSTAT_DATA_UINT64
);
499 kstat_named_init(&vsp
->ngetpage
, "ngetpage", KSTAT_DATA_UINT64
);
501 kstat_named_init(&vsp
->nputpage
, "nputpage", KSTAT_DATA_UINT64
);
503 kstat_named_init(&vsp
->nmap
, "nmap", KSTAT_DATA_UINT64
);
505 kstat_named_init(&vsp
->naddmap
, "naddmap", KSTAT_DATA_UINT64
);
507 kstat_named_init(&vsp
->ndelmap
, "ndelmap", KSTAT_DATA_UINT64
);
509 kstat_named_init(&vsp
->npoll
, "npoll", KSTAT_DATA_UINT64
);
511 kstat_named_init(&vsp
->ndump
, "ndump", KSTAT_DATA_UINT64
);
513 kstat_named_init(&vsp
->npathconf
, "npathconf", KSTAT_DATA_UINT64
);
515 kstat_named_init(&vsp
->npageio
, "npageio", KSTAT_DATA_UINT64
);
517 kstat_named_init(&vsp
->ndumpctl
, "ndumpctl", KSTAT_DATA_UINT64
);
519 kstat_named_init(&vsp
->ndispose
, "ndispose", KSTAT_DATA_UINT64
);
521 kstat_named_init(&vsp
->nsetsecattr
, "nsetsecattr", KSTAT_DATA_UINT64
);
523 kstat_named_init(&vsp
->ngetsecattr
, "ngetsecattr", KSTAT_DATA_UINT64
);
525 kstat_named_init(&vsp
->nshrlock
, "nshrlock", KSTAT_DATA_UINT64
);
527 kstat_named_init(&vsp
->nvnevent
, "nvnevent", KSTAT_DATA_UINT64
);
529 kstat_named_init(&vsp
->nreqzcbuf
, "nreqzcbuf", KSTAT_DATA_UINT64
);
531 kstat_named_init(&vsp
->nretzcbuf
, "nretzcbuf", KSTAT_DATA_UINT64
);
537 * Creates a kstat structure associated with a vopstats structure.
540 new_vskstat(char *ksname
, vopstats_t
*vsp
)
544 if (!vopstats_enabled
) {
548 ksp
= kstat_create("unix", 0, ksname
, "misc", KSTAT_TYPE_NAMED
,
549 sizeof (vopstats_t
)/sizeof (kstat_named_t
),
550 KSTAT_FLAG_VIRTUAL
|KSTAT_FLAG_WRITABLE
);
560 * Called from vfsinit() to initialize the support mechanisms for vopstats
565 if (!vopstats_enabled
)
569 * Creates the AVL tree which holds per-vfs vopstat anchors. This
570 * is necessary since we need to check if a kstat exists before we
571 * attempt to create it. Also, initialize its lock.
573 avl_create(&vskstat_tree
, vska_compar
, sizeof (vsk_anchor_t
),
574 offsetof(vsk_anchor_t
, vsk_node
));
575 mutex_init(&vskstat_tree_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
577 vsk_anchor_cache
= kmem_cache_create("vsk_anchor_cache",
578 sizeof (vsk_anchor_t
), sizeof (uintptr_t), NULL
, NULL
, NULL
,
582 * Set up the array of pointers for the vopstats-by-FS-type.
583 * The entries will be allocated/initialized as each file system
584 * goes through modload/mod_installfs.
586 vopstats_fstype
= (vopstats_t
**)kmem_zalloc(
587 (sizeof (vopstats_t
*) * nfstype
), KM_SLEEP
);
589 /* Set up the global vopstats initialization template */
590 vs_templatep
= create_vopstats_template();
594 * We need to have the all of the counters zeroed.
595 * The initialization of the vopstats_t includes on the order of
596 * 50 calls to kstat_named_init(). Rather that do that on every call,
597 * we do it once in a template (vs_templatep) then bcopy it over.
600 initialize_vopstats(vopstats_t
*vsp
)
605 bcopy(vs_templatep
, vsp
, sizeof (vopstats_t
));
609 * If possible, determine which vopstats by fstype to use and
610 * return a pointer to the caller.
613 get_fstype_vopstats(vfs_t
*vfsp
, struct vfssw
*vswp
)
615 int fstype
= 0; /* Index into vfssw[] */
616 vopstats_t
*vsp
= NULL
;
618 if (vfsp
== NULL
|| (vfsp
->vfs_flag
& VFS_STATS
) == 0 ||
622 * Set up the fstype. We go to so much trouble because all versions
623 * of NFS use the same fstype in their vfs even though they have
624 * distinct entries in the vfssw[] table.
625 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
628 fstype
= vswp
- vfssw
; /* Gets us the index */
630 fstype
= vfsp
->vfs_fstype
;
634 * Point to the per-fstype vopstats. The only valid values are
635 * non-zero positive values less than the number of vfssw[] table
638 if (fstype
> 0 && fstype
< nfstype
) {
639 vsp
= vopstats_fstype
[fstype
];
646 * Generate a kstat name, create the kstat structure, and allocate a
647 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
648 * to the caller. This must only be called from a mount.
651 get_vskstat_anchor(vfs_t
*vfsp
)
653 char kstatstr
[KSTAT_STRLEN
]; /* kstat name for vopstats */
654 statvfs64_t statvfsbuf
; /* Needed to find f_fsid */
655 vsk_anchor_t
*vskp
= NULL
; /* vfs <--> kstat anchor */
656 kstat_t
*ksp
; /* Ptr to new kstat */
657 avl_index_t where
; /* Location in the AVL tree */
659 if (vfsp
== NULL
|| vfsp
->vfs_implp
== NULL
||
660 (vfsp
->vfs_flag
& VFS_STATS
) == 0 || !vopstats_enabled
)
663 /* Need to get the fsid to build a kstat name */
664 if (VFS_STATVFS(vfsp
, &statvfsbuf
) == 0) {
665 /* Create a name for our kstats based on fsid */
666 (void) snprintf(kstatstr
, KSTAT_STRLEN
, "%s%lx",
667 VOPSTATS_STR
, statvfsbuf
.f_fsid
);
669 /* Allocate and initialize the vsk_anchor_t */
670 vskp
= kmem_cache_alloc(vsk_anchor_cache
, KM_SLEEP
);
671 bzero(vskp
, sizeof (*vskp
));
672 vskp
->vsk_fsid
= statvfsbuf
.f_fsid
;
674 mutex_enter(&vskstat_tree_lock
);
675 if (avl_find(&vskstat_tree
, vskp
, &where
) == NULL
) {
676 avl_insert(&vskstat_tree
, vskp
, where
);
677 mutex_exit(&vskstat_tree_lock
);
680 * Now that we've got the anchor in the AVL
681 * tree, we can create the kstat.
683 ksp
= new_vskstat(kstatstr
, &vfsp
->vfs_vopstats
);
688 /* Oops, found one! Release memory and lock. */
689 mutex_exit(&vskstat_tree_lock
);
690 kmem_cache_free(vsk_anchor_cache
, vskp
);
698 * We're in the process of tearing down the vfs and need to cleanup
699 * the data structures associated with the vopstats. Must only be called
703 teardown_vopstats(vfs_t
*vfsp
)
708 if (vfsp
== NULL
|| vfsp
->vfs_implp
== NULL
||
709 (vfsp
->vfs_flag
& VFS_STATS
) == 0 || !vopstats_enabled
)
712 /* This is a safe check since VFS_STATS must be set (see above) */
713 if ((vskap
= vfsp
->vfs_vskap
) == NULL
)
716 /* Whack the pointer right away */
717 vfsp
->vfs_vskap
= NULL
;
719 /* Lock the tree, remove the node, and delete the kstat */
720 mutex_enter(&vskstat_tree_lock
);
721 if (avl_find(&vskstat_tree
, vskap
, &where
)) {
722 avl_remove(&vskstat_tree
, vskap
);
725 if (vskap
->vsk_ksp
) {
726 kstat_delete(vskap
->vsk_ksp
);
728 mutex_exit(&vskstat_tree_lock
);
730 kmem_cache_free(vsk_anchor_cache
, vskap
);
734 * Read or write a vnode. Called from kernel code.
745 rlim64_t ulimit
, /* meaningful only if rw is UIO_WRITE */
754 if (rw
== UIO_WRITE
&& ISROFILE(vp
))
760 VOPXID_MAP_CR(vp
, cr
);
766 uio
.uio_loffset
= offset
;
767 uio
.uio_segflg
= (short)seg
;
769 uio
.uio_llimit
= ulimit
;
772 * We have to enter the critical region before calling fop_rwlock
773 * to avoid a deadlock with ufs.
775 if (nbl_need_check(vp
)) {
778 nbl_start_crit(vp
, RW_READER
);
780 error
= nbl_svmand(vp
, cr
, &svmand
);
783 if (nbl_conflict(vp
, rw
== UIO_WRITE
? NBL_WRITE
: NBL_READ
,
784 uio
.uio_offset
, uio
.uio_resid
, svmand
, NULL
)) {
790 (void) fop_rwlock(vp
,
791 rw
== UIO_WRITE
? V_WRITELOCK_TRUE
: V_WRITELOCK_FALSE
, NULL
);
792 if (rw
== UIO_WRITE
) {
793 uio
.uio_fmode
= FWRITE
;
794 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
795 error
= fop_write(vp
, &uio
, ioflag
, cr
, NULL
);
797 uio
.uio_fmode
= FREAD
;
798 uio
.uio_extflg
= UIO_COPY_CACHED
;
799 error
= fop_read(vp
, &uio
, ioflag
, cr
, NULL
);
802 rw
== UIO_WRITE
? V_WRITELOCK_TRUE
: V_WRITELOCK_FALSE
, NULL
);
804 *residp
= uio
.uio_resid
;
805 else if (uio
.uio_resid
)
815 * Release a vnode. Call fop_inactive on last reference or
816 * decrement reference count.
818 * To avoid race conditions, the v_count is left at 1 for
819 * the call to fop_inactive. This prevents another thread
820 * from reclaiming and releasing the vnode *before* the
821 * fop_inactive routine has a chance to destroy the vnode.
822 * We can't have more than 1 thread calling fop_inactive
828 VERIFY(vp
->v_count
> 0);
829 mutex_enter(&vp
->v_lock
);
830 if (vp
->v_count
== 1) {
831 mutex_exit(&vp
->v_lock
);
832 fop_inactive(vp
, CRED(), NULL
);
836 mutex_exit(&vp
->v_lock
);
840 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
841 * as a single reference, so v_count is not decremented until the last DNLC hold
842 * is released. This makes it possible to distinguish vnodes that are referenced
846 vn_rele_dnlc(vnode_t
*vp
)
848 VERIFY((vp
->v_count
> 0) && (vp
->v_count_dnlc
> 0));
849 mutex_enter(&vp
->v_lock
);
850 if (--vp
->v_count_dnlc
== 0) {
851 if (vp
->v_count
== 1) {
852 mutex_exit(&vp
->v_lock
);
853 fop_inactive(vp
, CRED(), NULL
);
858 mutex_exit(&vp
->v_lock
);
862 * Like vn_rele() except that it clears v_stream under v_lock.
863 * This is used by sockfs when it dismantels the association between
864 * the sockfs node and the vnode in the underlaying file system.
865 * v_lock has to be held to prevent a thread coming through the lookupname
866 * path from accessing a stream head that is going away.
869 vn_rele_stream(vnode_t
*vp
)
871 VERIFY(vp
->v_count
> 0);
872 mutex_enter(&vp
->v_lock
);
874 if (vp
->v_count
== 1) {
875 mutex_exit(&vp
->v_lock
);
876 fop_inactive(vp
, CRED(), NULL
);
880 mutex_exit(&vp
->v_lock
);
884 vn_rele_inactive(vnode_t
*vp
)
886 fop_inactive(vp
, CRED(), NULL
);
890 * Like vn_rele() except if we are going to call fop_inactive() then do it
891 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
892 * the file system as a result of releasing the vnode. Note, file systems
893 * already have to handle the race where the vnode is incremented before the
894 * inactive routine is called and does its locking.
896 * Warning: Excessive use of this routine can lead to performance problems.
897 * This is because taskqs throttle back allocation if too many are created.
900 vn_rele_async(vnode_t
*vp
, taskq_t
*taskq
)
902 VERIFY(vp
->v_count
> 0);
903 mutex_enter(&vp
->v_lock
);
904 if (vp
->v_count
== 1) {
905 mutex_exit(&vp
->v_lock
);
906 VERIFY(taskq_dispatch(taskq
, (task_func_t
*)vn_rele_inactive
,
907 vp
, TQ_SLEEP
) != (uintptr_t)NULL
);
911 mutex_exit(&vp
->v_lock
);
924 return (vn_openat(pnamep
, seg
, filemode
, createmode
, vpp
, crwhy
,
930 * Open/create a vnode.
931 * This may be callable by the kernel, the only known use
932 * of user context being that the current user credentials
933 * are used for permissions. crwhy is defined iff filemode & FCREAT.
944 struct vnode
*startvp
,
953 int shrlock_done
= 0;
955 enum symfollow follow
;
956 int estale_retry
= 0;
958 struct shr_locowner shr_own
;
960 if (filemode
& FSEARCH
)
961 filemode
|= FDIRECTORY
;
965 if (filemode
& FREAD
)
967 if (filemode
& (FWRITE
|FTRUNC
))
969 if (filemode
& (FSEARCH
|FEXEC
|FXATTRDIROPEN
))
972 /* symlink interpretation */
973 if (filemode
& FNOFOLLOW
)
978 if (filemode
& FAPPEND
)
979 accessflags
|= V_APPEND
;
982 if (filemode
& FCREAT
&& !(filemode
& FDIRECTORY
)) {
985 /* Wish to create a file. */
986 vattr
.va_type
= VREG
;
987 vattr
.va_mode
= createmode
;
988 vattr
.va_mask
= AT_TYPE
|AT_MODE
;
989 if (filemode
& FTRUNC
) {
991 vattr
.va_mask
|= AT_SIZE
;
993 if (filemode
& FEXCL
)
999 vn_createat(pnamep
, seg
, &vattr
, excl
, mode
, &vp
, crwhy
,
1000 (filemode
& ~(FTRUNC
|FEXCL
)), umask
, startvp
))
1003 /* Wish to open a file. Just look it up. */
1004 if (error
= lookupnameat(pnamep
, seg
, follow
,
1005 NULLVPP
, &vp
, startvp
)) {
1006 if ((error
== ESTALE
) &&
1007 fs_need_estale_retry(estale_retry
++))
1013 * Get the attributes to check whether file is large.
1014 * We do this only if the FOFFMAX flag is not set and
1015 * only for regular files.
1018 if (!(filemode
& FOFFMAX
) && (vp
->v_type
== VREG
)) {
1019 vattr
.va_mask
= AT_SIZE
;
1020 if ((error
= fop_getattr(vp
, &vattr
, 0,
1024 if (vattr
.va_size
> (uoff_t
)MAXOFF32_T
) {
1026 * Large File API - regular open fails
1027 * if FOFFMAX flag is set in file mode
1034 * Can't write directories, active texts, or
1035 * read-only filesystems. Can't truncate files
1036 * on which mandatory locking is in effect.
1038 if (filemode
& (FWRITE
|FTRUNC
)) {
1040 * Allow writable directory if VDIROPEN flag is set.
1042 if (vp
->v_type
== VDIR
&& !(vp
->v_flag
& VDIROPEN
)) {
1051 * Can't truncate files on which
1052 * sysv mandatory locking is in effect.
1054 if (filemode
& FTRUNC
) {
1057 if (fop_realvp(vp
, &rvp
, NULL
) != 0)
1059 if (rvp
->v_filocks
!= NULL
) {
1060 vattr
.va_mask
= AT_MODE
;
1061 if ((error
= fop_getattr(vp
,
1062 &vattr
, 0, CRED(), NULL
)) == 0 &&
1063 MANDLOCK(vp
, vattr
.va_mode
))
1071 * Check permissions.
1073 if (error
= fop_access(vp
, mode
, accessflags
, CRED(), NULL
))
1076 * Require FDIRECTORY to return a directory.
1077 * Require FEXEC to return a regular file.
1079 if ((filemode
& FDIRECTORY
) && vp
->v_type
!= VDIR
) {
1083 if ((filemode
& FEXEC
) && vp
->v_type
!= VREG
) {
1084 error
= ENOEXEC
; /* XXX: error code? */
1090 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1092 if ((filemode
& FNOFOLLOW
) && vp
->v_type
== VLNK
) {
1096 if (filemode
& FNOLINKS
) {
1097 vattr
.va_mask
= AT_NLINK
;
1098 if ((error
= fop_getattr(vp
, &vattr
, 0, CRED(), NULL
))) {
1101 if (vattr
.va_nlink
!= 1) {
1108 * Opening a socket corresponding to the AF_UNIX pathname
1109 * in the filesystem name space is not supported.
1110 * However, VSOCK nodes in namefs are supported in order
1111 * to make fattach work for sockets.
1113 * XXX This uses fop_realvp to distinguish between
1114 * an unopened namefs node (where fop_realvp returns a
1115 * different VSOCK vnode) and a VSOCK created by vn_create
1116 * in some file system (where fop_realvp would never return
1117 * a different vnode).
1119 if (vp
->v_type
== VSOCK
) {
1122 error
= fop_realvp(vp
, &nvp
, NULL
);
1123 if (error
!= 0 || nvp
== NULL
|| nvp
== vp
||
1124 nvp
->v_type
!= VSOCK
) {
1130 if ((vp
->v_type
== VREG
) && nbl_need_check(vp
)) {
1131 /* get share reservation */
1133 if (filemode
& FWRITE
)
1134 shr
.s_access
|= F_WRACC
;
1135 if (filemode
& FREAD
)
1136 shr
.s_access
|= F_RDACC
;
1139 shr
.s_pid
= ttoproc(curthread
)->p_pid
;
1140 shr_own
.sl_pid
= shr
.s_pid
;
1142 shr
.s_own_len
= sizeof (shr_own
);
1143 shr
.s_owner
= (caddr_t
)&shr_own
;
1144 error
= fop_shrlock(vp
, F_SHARE_NBMAND
, &shr
, filemode
, CRED(),
1150 /* nbmand conflict check if truncating file */
1151 if ((filemode
& FTRUNC
) && !(filemode
& FCREAT
)) {
1152 nbl_start_crit(vp
, RW_READER
);
1155 vattr
.va_mask
= AT_SIZE
;
1156 if (error
= fop_getattr(vp
, &vattr
, 0, CRED(), NULL
))
1158 if (nbl_conflict(vp
, NBL_WRITE
, 0, vattr
.va_size
, 0,
1167 * Do opening protocol.
1169 error
= fop_open(&vp
, filemode
, CRED(), NULL
);
1175 * Truncate if required.
1177 if ((filemode
& FTRUNC
) && !(filemode
& FCREAT
)) {
1179 vattr
.va_mask
= AT_SIZE
;
1180 if ((error
= fop_setattr(vp
, &vattr
, 0, CRED(), NULL
)) != 0)
1184 ASSERT(vp
->v_count
> 0);
1192 (void) fop_close(vp
, filemode
, 1, (offset_t
)0, CRED(),
1198 (void) fop_shrlock(vp
, F_UNSHARE
, &shr
, 0, CRED(),
1204 * The following clause was added to handle a problem
1205 * with NFS consistency. It is possible that a lookup
1206 * of the file to be opened succeeded, but the file
1207 * itself doesn't actually exist on the server. This
1208 * is chiefly due to the DNLC containing an entry for
1209 * the file which has been removed on the server. In
1210 * this case, we just start over. If there was some
1211 * other cause for the ESTALE error, then the lookup
1212 * of the file will fail and the error will be returned
1213 * above instead of looping around from here.
1216 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1224 * The following two accessor functions are for the NFSv4 server. Since there
1225 * is no fop_open_UP/DOWNGRADE we need a way for the NFS server to keep the
1226 * vnode open counts correct when a client "upgrades" an open or does an
1227 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1228 * open mode (add or subtract read or write), but also change the share/deny
1229 * modes. However, share reservations are not integrated with OPEN, yet, so
1230 * we need to handle each separately. These functions are cleaner than having
1231 * the NFS server manipulate the counts directly, however, nobody else should
1232 * use these functions.
1239 ASSERT(vp
->v_type
== VREG
);
1241 if (filemode
& FREAD
)
1242 atomic_inc_32(&vp
->v_rdcnt
);
1243 if (filemode
& FWRITE
)
1244 atomic_inc_32(&vp
->v_wrcnt
);
1253 ASSERT(vp
->v_type
== VREG
);
1255 if (filemode
& FREAD
) {
1256 ASSERT(vp
->v_rdcnt
> 0);
1257 atomic_dec_32(&vp
->v_rdcnt
);
1259 if (filemode
& FWRITE
) {
1260 ASSERT(vp
->v_wrcnt
> 0);
1261 atomic_dec_32(&vp
->v_wrcnt
);
1278 return (vn_createat(pnamep
, seg
, vap
, excl
, mode
, vpp
, why
, flag
,
1283 * Create a vnode (makenode).
1296 struct vnode
*startvp
)
1298 struct vnode
*dvp
; /* ptr to parent dir vnode */
1299 struct vnode
*vp
= NULL
;
1304 enum symfollow follow
;
1305 int estale_retry
= 0;
1306 uint32_t auditing
= AU_AUDITING();
1308 ASSERT((vap
->va_mask
& (AT_TYPE
|AT_MODE
)) == (AT_TYPE
|AT_MODE
));
1310 /* symlink interpretation */
1311 if ((flag
& FNOFOLLOW
) || excl
== EXCL
)
1315 flag
&= ~(FNOFOLLOW
|FNOLINKS
);
1320 * If new object is a file, call lower level to create it.
1321 * Note that it is up to the lower level to enforce exclusive
1322 * creation, if the file is already there.
1323 * This allows the lower level to do whatever
1324 * locking or protocol that is needed to prevent races.
1325 * If the new object is directory call lower level to make
1326 * the new directory, with "." and "..".
1328 if (error
= pn_get(pnamep
, seg
, &pn
))
1331 audit_vncreate_start();
1335 * lookup will find the parent directory for the vnode.
1336 * When it is done the pn holds the name of the entry
1338 * If this is a non-exclusive create we also find the node itself.
1340 error
= lookuppnat(&pn
, NULL
, follow
, &dvp
,
1341 (excl
== EXCL
) ? NULLVPP
: vpp
, startvp
);
1344 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1346 if (why
== CRMKDIR
&& error
== EINVAL
)
1347 error
= EEXIST
; /* SVID */
1352 vap
->va_mode
&= ~VSVTX
;
1355 * If default ACLs are defined for the directory don't apply the
1356 * umask if umask is passed.
1363 vsec
.vsa_aclcnt
= 0;
1364 vsec
.vsa_aclentp
= NULL
;
1365 vsec
.vsa_dfaclcnt
= 0;
1366 vsec
.vsa_dfaclentp
= NULL
;
1367 vsec
.vsa_mask
= VSA_DFACLCNT
;
1368 error
= fop_getsecattr(dvp
, &vsec
, 0, CRED(), NULL
);
1370 * If error is ENOSYS then treat it as no error
1371 * Don't want to force all file systems to support
1372 * aclent_t style of ACL's.
1374 if (error
== ENOSYS
)
1382 * Apply the umask if no default ACLs.
1384 if (vsec
.vsa_dfaclcnt
== 0)
1385 vap
->va_mode
&= ~umask
;
1388 * fop_getsecattr() may have allocated memory for
1389 * ACLs we didn't request, so double-check and
1390 * free it if necessary.
1392 if (vsec
.vsa_aclcnt
&& vsec
.vsa_aclentp
!= NULL
)
1393 kmem_free((caddr_t
)vsec
.vsa_aclentp
,
1394 vsec
.vsa_aclcnt
* sizeof (aclent_t
));
1395 if (vsec
.vsa_dfaclcnt
&& vsec
.vsa_dfaclentp
!= NULL
)
1396 kmem_free((caddr_t
)vsec
.vsa_dfaclentp
,
1397 vsec
.vsa_dfaclcnt
* sizeof (aclent_t
));
1402 * In general we want to generate EROFS if the file system is
1403 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1404 * documents the open system call, and it says that O_CREAT has no
1405 * effect if the file already exists. Bug 1119649 states
1406 * that open(path, O_CREAT, ...) fails when attempting to open an
1407 * existing file on a read only file system. Thus, the first part
1408 * of the following if statement has 3 checks:
1409 * if the file exists &&
1410 * it is being open with write access &&
1411 * the file system is read only
1412 * then generate EROFS
1414 if ((*vpp
!= NULL
&& (mode
& VWRITE
) && ISROFILE(*vpp
)) ||
1415 (*vpp
== NULL
&& dvp
->v_vfsp
->vfs_flag
& VFS_RDONLY
)) {
1419 } else if (excl
== NONEXCL
&& *vpp
!= NULL
) {
1423 * File already exists. If a mandatory lock has been
1424 * applied, return error.
1427 if (fop_realvp(vp
, &rvp
, NULL
) != 0)
1429 if ((vap
->va_mask
& AT_SIZE
) && nbl_need_check(vp
)) {
1430 nbl_start_crit(vp
, RW_READER
);
1433 if (rvp
->v_filocks
!= NULL
|| rvp
->v_shrlocks
!= NULL
) {
1434 vattr
.va_mask
= AT_MODE
|AT_SIZE
;
1435 if (error
= fop_getattr(vp
, &vattr
, 0, CRED(), NULL
)) {
1438 if (MANDLOCK(vp
, vattr
.va_mode
)) {
1443 * File cannot be truncated if non-blocking mandatory
1444 * locks are currently on the file.
1446 if ((vap
->va_mask
& AT_SIZE
) && in_crit
) {
1450 offset
= vap
->va_size
> vattr
.va_size
?
1451 vattr
.va_size
: vap
->va_size
;
1452 length
= vap
->va_size
> vattr
.va_size
?
1453 vap
->va_size
- vattr
.va_size
:
1454 vattr
.va_size
- vap
->va_size
;
1455 if (nbl_conflict(vp
, NBL_WRITE
, offset
,
1464 * If the file is the root of a VFS, we've crossed a
1465 * mount point and the "containing" directory that we
1466 * acquired above (dvp) is irrelevant because it's in
1467 * a different file system. We apply fop_create to the
1468 * target itself instead of to the containing directory
1469 * and supply a null path name to indicate (conventionally)
1470 * the node itself as the "component" of interest.
1472 * The call to fop_create() is necessary to ensure
1473 * that the appropriate permission checks are made,
1474 * i.e. EISDIR, EACCES, etc. We already know that vpp
1475 * exists since we are in the else condition where this
1478 if (vp
->v_flag
& VROOT
) {
1479 ASSERT(why
!= CRMKDIR
);
1480 error
= fop_create(vp
, "", vap
, excl
, mode
, vpp
,
1481 CRED(), flag
, NULL
, NULL
);
1483 * If the create succeeded, it will have created a
1484 * new reference on a new vnode (*vpp) in the child
1485 * file system, so we want to drop our reference on
1486 * the old (vp) upon exit.
1492 * Large File API - non-large open (FOFFMAX flag not set)
1493 * of regular file fails if the file size exceeds MAXOFF32_T.
1495 if (why
!= CRMKDIR
&&
1496 !(flag
& FOFFMAX
) &&
1497 (vp
->v_type
== VREG
)) {
1498 vattr
.va_mask
= AT_SIZE
;
1499 if ((error
= fop_getattr(vp
, &vattr
, 0,
1503 if ((vattr
.va_size
> (uoff_t
)MAXOFF32_T
)) {
1512 * Call mkdir() if specified, otherwise create().
1514 int must_be_dir
= pn_fixslash(&pn
); /* trailing '/'? */
1518 * N.B., if vn_createat() ever requests
1519 * case-insensitive behavior then it will need
1520 * to be passed to fop_mkdir(). fop_create()
1521 * will already get it via "flag"
1523 error
= fop_mkdir(dvp
, pn
.pn_path
, vap
, vpp
, CRED(),
1525 else if (!must_be_dir
)
1526 error
= fop_create(dvp
, pn
.pn_path
, vap
,
1527 excl
, mode
, vpp
, CRED(), flag
, NULL
, NULL
);
1535 audit_vncreate_finish(*vpp
, error
);
1547 * The following clause was added to handle a problem
1548 * with NFS consistency. It is possible that a lookup
1549 * of the file to be created succeeded, but the file
1550 * itself doesn't actually exist on the server. This
1551 * is chiefly due to the DNLC containing an entry for
1552 * the file which has been removed on the server. In
1553 * this case, we just start over. If there was some
1554 * other cause for the ESTALE error, then the lookup
1555 * of the file will fail and the error will be returned
1556 * above instead of looping around from here.
1558 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1564 vn_link(char *from
, char *to
, enum uio_seg seg
)
1566 return (vn_linkat(NULL
, from
, NO_FOLLOW
, NULL
, to
, seg
));
1570 vn_linkat(vnode_t
*fstartvp
, char *from
, enum symfollow follow
,
1571 vnode_t
*tstartvp
, char *to
, enum uio_seg seg
)
1573 struct vnode
*fvp
; /* from vnode ptr */
1574 struct vnode
*tdvp
; /* to directory vnode ptr */
1579 int estale_retry
= 0;
1580 uint32_t auditing
= AU_AUDITING();
1584 if (error
= pn_get(to
, seg
, &pn
))
1586 if (auditing
&& fstartvp
!= NULL
)
1587 audit_setfsat_path(1);
1588 if (error
= lookupnameat(from
, seg
, follow
, NULLVPP
, &fvp
, fstartvp
))
1590 if (auditing
&& tstartvp
!= NULL
)
1591 audit_setfsat_path(3);
1592 if (error
= lookuppnat(&pn
, NULL
, NO_FOLLOW
, &tdvp
, NULLVPP
, tstartvp
))
1595 * Make sure both source vnode and target directory vnode are
1596 * in the same vfs and that it is writeable.
1598 vattr
.va_mask
= AT_FSID
;
1599 if (error
= fop_getattr(fvp
, &vattr
, 0, CRED(), NULL
))
1601 fsid
= vattr
.va_fsid
;
1602 vattr
.va_mask
= AT_FSID
;
1603 if (error
= fop_getattr(tdvp
, &vattr
, 0, CRED(), NULL
))
1605 if (fsid
!= vattr
.va_fsid
) {
1609 if (tdvp
->v_vfsp
->vfs_flag
& VFS_RDONLY
) {
1616 (void) pn_fixslash(&pn
);
1617 error
= fop_link(tdvp
, fvp
, pn
.pn_path
, CRED(), NULL
, 0);
1624 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1630 vn_rename(char *from
, char *to
, enum uio_seg seg
)
1632 return (vn_renameat(NULL
, from
, NULL
, to
, seg
));
1636 vn_renameat(vnode_t
*fdvp
, char *fname
, vnode_t
*tdvp
,
1637 char *tname
, enum uio_seg seg
)
1641 struct pathname fpn
; /* from pathname */
1642 struct pathname tpn
; /* to pathname */
1644 int in_crit_src
, in_crit_targ
;
1645 vnode_t
*fromvp
, *fvp
;
1646 vnode_t
*tovp
, *targvp
;
1647 int estale_retry
= 0;
1648 uint32_t auditing
= AU_AUDITING();
1651 fvp
= fromvp
= tovp
= targvp
= NULL
;
1652 in_crit_src
= in_crit_targ
= 0;
1654 * Get to and from pathnames.
1656 if (error
= pn_get(fname
, seg
, &fpn
))
1658 if (error
= pn_get(tname
, seg
, &tpn
)) {
1664 * First we need to resolve the correct directories
1665 * The passed in directories may only be a starting point,
1666 * but we need the real directories the file(s) live in.
1667 * For example the fname may be something like usr/lib/sparc
1668 * and we were passed in the / directory, but we need to
1669 * use the lib directory for the rename.
1672 if (auditing
&& fdvp
!= NULL
)
1673 audit_setfsat_path(1);
1675 * Lookup to and from directories.
1677 if (error
= lookuppnat(&fpn
, NULL
, NO_FOLLOW
, &fromvp
, &fvp
, fdvp
)) {
1682 * Make sure there is an entry.
1689 if (auditing
&& tdvp
!= NULL
)
1690 audit_setfsat_path(3);
1691 if (error
= lookuppnat(&tpn
, NULL
, NO_FOLLOW
, &tovp
, &targvp
, tdvp
)) {
1696 * Make sure both the from vnode directory and the to directory
1697 * are in the same vfs and the to directory is writable.
1698 * We check fsid's, not vfs pointers, so loopback fs works.
1700 if (fromvp
!= tovp
) {
1701 vattr
.va_mask
= AT_FSID
;
1702 if (error
= fop_getattr(fromvp
, &vattr
, 0, CRED(), NULL
))
1704 fsid
= vattr
.va_fsid
;
1705 vattr
.va_mask
= AT_FSID
;
1706 if (error
= fop_getattr(tovp
, &vattr
, 0, CRED(), NULL
))
1708 if (fsid
!= vattr
.va_fsid
) {
1714 if (tovp
->v_vfsp
->vfs_flag
& VFS_RDONLY
) {
1720 * Make sure "from" vp is not a mount point.
1721 * Note, lookup did traverse() already, so
1722 * we'll be looking at the mounted FS root.
1723 * (but allow files like mnttab)
1725 if ((fvp
->v_flag
& VROOT
) != 0 && fvp
->v_type
== VDIR
) {
1730 if (targvp
&& (fvp
!= targvp
)) {
1731 nbl_start_crit(targvp
, RW_READER
);
1733 if (nbl_conflict(targvp
, NBL_REMOVE
, 0, 0, 0, NULL
)) {
1739 if (nbl_need_check(fvp
)) {
1740 nbl_start_crit(fvp
, RW_READER
);
1742 if (nbl_conflict(fvp
, NBL_RENAME
, 0, 0, 0, NULL
)) {
1751 (void) pn_fixslash(&tpn
);
1752 error
= fop_rename(fromvp
, fpn
.pn_path
, tovp
, tpn
.pn_path
, CRED(),
1761 nbl_end_crit(targvp
);
1770 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1776 * Remove a file or directory.
1779 vn_remove(char *fnamep
, enum uio_seg seg
, enum rm dirflag
)
1781 return (vn_removeat(NULL
, fnamep
, seg
, dirflag
));
1785 vn_removeat(vnode_t
*startvp
, char *fnamep
, enum uio_seg seg
, enum rm dirflag
)
1787 struct vnode
*vp
; /* entry vnode */
1788 struct vnode
*dvp
; /* ptr to parent dir vnode */
1789 struct vnode
*coveredvp
;
1790 struct pathname pn
; /* name of entry */
1794 struct vfs
*dvfsp
; /* ptr to parent dir vfs */
1796 int estale_retry
= 0;
1799 if (error
= pn_get(fnamep
, seg
, &pn
))
1802 if (error
= lookuppnat(&pn
, NULL
, NO_FOLLOW
, &dvp
, &vp
, startvp
)) {
1804 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1810 * Make sure there is an entry.
1818 dvfsp
= dvp
->v_vfsp
;
1821 * If the named file is the root of a mounted filesystem, fail,
1822 * unless it's marked unlinkable. In that case, unmount the
1823 * filesystem and proceed to unlink the covered vnode. (If the
1824 * covered vnode is a directory, use rmdir instead of unlink,
1825 * to avoid file system corruption.)
1827 if (vp
->v_flag
& VROOT
) {
1828 if ((vfsp
->vfs_flag
& VFS_UNLINKABLE
) == 0) {
1834 * Namefs specific code starts here.
1837 if (dirflag
== RMDIRECTORY
) {
1839 * User called rmdir(2) on a file that has
1840 * been namefs mounted on top of. Since
1841 * namefs doesn't allow directories to
1842 * be mounted on other files we know
1843 * vp is not of type VDIR so fail to operation.
1850 * If VROOT is still set after grabbing vp->v_lock,
1851 * noone has finished nm_unmount so far and coveredvp
1853 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1854 * vp->v_lock, any race window is eliminated.
1857 mutex_enter(&vp
->v_lock
);
1858 if ((vp
->v_flag
& VROOT
) == 0) {
1859 /* Someone beat us to the unmount */
1860 mutex_exit(&vp
->v_lock
);
1865 coveredvp
= vfsp
->vfs_vnodecovered
;
1868 * Note: Implementation of vn_vfswlock shows that ordering of
1869 * v_lock / vn_vfswlock is not an issue here.
1871 error
= vn_vfswlock(coveredvp
);
1872 mutex_exit(&vp
->v_lock
);
1879 error
= dounmount(vfsp
, 0, CRED());
1882 * Unmounted the namefs file system; now get
1883 * the object it was mounted over.
1887 * If namefs was mounted over a directory, then
1888 * we want to use rmdir() instead of unlink().
1890 if (vp
->v_type
== VDIR
)
1891 dirflag
= RMDIRECTORY
;
1898 * Make sure filesystem is writeable.
1899 * We check the parent directory's vfs in case this is an lofs vnode.
1901 if (dvfsp
&& dvfsp
->vfs_flag
& VFS_RDONLY
) {
1909 * If there is the possibility of an nbmand share reservation, make
1910 * sure it's okay to remove the file. Keep a reference to the
1911 * vnode, so that we can exit the nbl critical region after
1912 * calling fop_remove.
1913 * If there is no possibility of an nbmand share reservation,
1914 * release the vnode reference now. Filesystems like NFS may
1915 * behave differently if there is an extra reference, so get rid of
1916 * this one. Fortunately, we can't have nbmand mounts on NFS
1919 if (nbl_need_check(vp
)) {
1920 nbl_start_crit(vp
, RW_READER
);
1922 if (nbl_conflict(vp
, NBL_REMOVE
, 0, 0, 0, NULL
)) {
1931 if (dirflag
== RMDIRECTORY
) {
1933 * Caller is using rmdir(2), which can only be applied to
1936 if (vtype
!= VDIR
) {
1940 proc_t
*pp
= curproc
;
1942 mutex_enter(&pp
->p_lock
);
1943 cwd
= PTOU(pp
)->u_cdir
;
1945 mutex_exit(&pp
->p_lock
);
1946 error
= fop_rmdir(dvp
, pn
.pn_path
, cwd
, CRED(),
1952 * Unlink(2) can be applied to anything.
1954 error
= fop_remove(dvp
, pn
.pn_path
, CRED(), NULL
, 0);
1967 if ((error
== ESTALE
) && fs_need_estale_retry(estale_retry
++))
1973 * Utility function to compare equality of vnodes.
1974 * Compare the underlying real vnodes, if there are underlying vnodes.
1975 * This is a more thorough comparison than the VN_CMP() macro provides.
1978 vn_compare(vnode_t
*vp1
, vnode_t
*vp2
)
1982 if (vp1
!= NULL
&& fop_realvp(vp1
, &realvp
, NULL
) == 0)
1984 if (vp2
!= NULL
&& fop_realvp(vp2
, &realvp
, NULL
) == 0)
1986 return (VN_CMP(vp1
, vp2
));
1990 * The number of locks to hash into. This value must be a power
1991 * of 2 minus 1 and should probably also be prime.
1993 #define NUM_BUCKETS 1023
1995 struct vn_vfslocks_bucket
{
1997 vn_vfslocks_entry_t
*vb_list
;
1998 char pad
[64 - sizeof (kmutex_t
) - sizeof (void *)];
2002 * Total number of buckets will be NUM_BUCKETS + 1 .
2005 #pragma align 64(vn_vfslocks_buckets)
2006 static struct vn_vfslocks_bucket vn_vfslocks_buckets
[NUM_BUCKETS
+ 1];
2008 #define VN_VFSLOCKS_SHIFT 9
2010 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2011 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2014 * vn_vfslocks_getlock() uses an HASH scheme to generate
2015 * rwstlock using vfs/vnode pointer passed to it.
2017 * vn_vfslocks_rele() releases a reference in the
2018 * HASH table which allows the entry allocated by
2019 * vn_vfslocks_getlock() to be freed at a later
2020 * stage when the refcount drops to zero.
2023 vn_vfslocks_entry_t
*
2024 vn_vfslocks_getlock(void *vfsvpptr
)
2026 struct vn_vfslocks_bucket
*bp
;
2027 vn_vfslocks_entry_t
*vep
;
2028 vn_vfslocks_entry_t
*tvep
;
2030 ASSERT(vfsvpptr
!= NULL
);
2031 bp
= &vn_vfslocks_buckets
[VN_VFSLOCKS_HASH(vfsvpptr
)];
2033 mutex_enter(&bp
->vb_lock
);
2034 for (vep
= bp
->vb_list
; vep
!= NULL
; vep
= vep
->ve_next
) {
2035 if (vep
->ve_vpvfs
== vfsvpptr
) {
2037 mutex_exit(&bp
->vb_lock
);
2041 mutex_exit(&bp
->vb_lock
);
2042 vep
= kmem_alloc(sizeof (*vep
), KM_SLEEP
);
2043 rwst_init(&vep
->ve_lock
, NULL
, RW_DEFAULT
, NULL
);
2044 vep
->ve_vpvfs
= (char *)vfsvpptr
;
2046 mutex_enter(&bp
->vb_lock
);
2047 for (tvep
= bp
->vb_list
; tvep
!= NULL
; tvep
= tvep
->ve_next
) {
2048 if (tvep
->ve_vpvfs
== vfsvpptr
) {
2050 mutex_exit(&bp
->vb_lock
);
2053 * There is already an entry in the hash
2054 * destroy what we just allocated.
2056 rwst_destroy(&vep
->ve_lock
);
2057 kmem_free(vep
, sizeof (*vep
));
2061 vep
->ve_next
= bp
->vb_list
;
2063 mutex_exit(&bp
->vb_lock
);
2068 vn_vfslocks_rele(vn_vfslocks_entry_t
*vepent
)
2070 struct vn_vfslocks_bucket
*bp
;
2071 vn_vfslocks_entry_t
*vep
;
2072 vn_vfslocks_entry_t
*pvep
;
2074 ASSERT(vepent
!= NULL
);
2075 ASSERT(vepent
->ve_vpvfs
!= NULL
);
2077 bp
= &vn_vfslocks_buckets
[VN_VFSLOCKS_HASH(vepent
->ve_vpvfs
)];
2079 mutex_enter(&bp
->vb_lock
);
2080 vepent
->ve_refcnt
--;
2082 if ((int32_t)vepent
->ve_refcnt
< 0)
2083 cmn_err(CE_PANIC
, "vn_vfslocks_rele: refcount negative");
2085 if (vepent
->ve_refcnt
== 0) {
2086 for (vep
= bp
->vb_list
; vep
!= NULL
; vep
= vep
->ve_next
) {
2087 if (vep
->ve_vpvfs
== vepent
->ve_vpvfs
) {
2088 if (bp
->vb_list
== vep
)
2089 bp
->vb_list
= vep
->ve_next
;
2092 pvep
->ve_next
= vep
->ve_next
;
2094 mutex_exit(&bp
->vb_lock
);
2095 rwst_destroy(&vep
->ve_lock
);
2096 kmem_free(vep
, sizeof (*vep
));
2101 cmn_err(CE_PANIC
, "vn_vfslocks_rele: vp/vfs not found");
2103 mutex_exit(&bp
->vb_lock
);
2107 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2108 * lock protecting the v_vfsmountedhere field.
2109 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2110 * except that it blocks to acquire the lock VVFSLOCK.
2112 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2113 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2114 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2117 vn_vfswlock_wait(vnode_t
*vp
)
2120 vn_vfslocks_entry_t
*vpvfsentry
;
2123 vpvfsentry
= vn_vfslocks_getlock(vp
);
2124 retval
= rwst_enter_sig(&vpvfsentry
->ve_lock
, RW_WRITER
);
2126 if (retval
== EINTR
) {
2127 vn_vfslocks_rele(vpvfsentry
);
2134 vn_vfsrlock_wait(vnode_t
*vp
)
2137 vn_vfslocks_entry_t
*vpvfsentry
;
2140 vpvfsentry
= vn_vfslocks_getlock(vp
);
2141 retval
= rwst_enter_sig(&vpvfsentry
->ve_lock
, RW_READER
);
2143 if (retval
== EINTR
) {
2144 vn_vfslocks_rele(vpvfsentry
);
2153 * vn_vfswlock is used to implement a lock which is logically a writers lock
2154 * protecting the v_vfsmountedhere field.
2157 vn_vfswlock(vnode_t
*vp
)
2159 vn_vfslocks_entry_t
*vpvfsentry
;
2162 * If vp is NULL then somebody is trying to lock the covered vnode
2163 * of /. (vfs_vnodecovered is NULL for /). This situation will
2164 * only happen when unmounting /. Since that operation will fail
2165 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2170 vpvfsentry
= vn_vfslocks_getlock(vp
);
2172 if (rwst_tryenter(&vpvfsentry
->ve_lock
, RW_WRITER
))
2175 vn_vfslocks_rele(vpvfsentry
);
2180 vn_vfsrlock(vnode_t
*vp
)
2182 vn_vfslocks_entry_t
*vpvfsentry
;
2185 * If vp is NULL then somebody is trying to lock the covered vnode
2186 * of /. (vfs_vnodecovered is NULL for /). This situation will
2187 * only happen when unmounting /. Since that operation will fail
2188 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2193 vpvfsentry
= vn_vfslocks_getlock(vp
);
2195 if (rwst_tryenter(&vpvfsentry
->ve_lock
, RW_READER
))
2198 vn_vfslocks_rele(vpvfsentry
);
2203 vn_vfsunlock(vnode_t
*vp
)
2205 vn_vfslocks_entry_t
*vpvfsentry
;
2208 * ve_refcnt needs to be decremented twice.
2209 * 1. To release refernce after a call to vn_vfslocks_getlock()
2210 * 2. To release the reference from the locking routines like
2211 * vn_vfsrlock/vn_vfswlock etc,.
2213 vpvfsentry
= vn_vfslocks_getlock(vp
);
2214 vn_vfslocks_rele(vpvfsentry
);
2216 rwst_exit(&vpvfsentry
->ve_lock
);
2217 vn_vfslocks_rele(vpvfsentry
);
2221 vn_vfswlock_held(vnode_t
*vp
)
2224 vn_vfslocks_entry_t
*vpvfsentry
;
2228 vpvfsentry
= vn_vfslocks_getlock(vp
);
2229 held
= rwst_lock_held(&vpvfsentry
->ve_lock
, RW_WRITER
);
2231 vn_vfslocks_rele(vpvfsentry
);
2238 const char *name
, /* Name of file system */
2239 const fs_operation_def_t
*templ
, /* Operation specification */
2240 vnodeops_t
**actual
) /* Return the vnodeops */
2245 *actual
= (vnodeops_t
*)kmem_alloc(sizeof (vnodeops_t
), KM_SLEEP
);
2247 (*actual
)->vnop_name
= name
;
2249 error
= fs_build_vector(*actual
, &unused_ops
, vn_ops_table
, templ
);
2251 kmem_free(*actual
, sizeof (vnodeops_t
));
2255 if (unused_ops
!= 0)
2256 cmn_err(CE_WARN
, "vn_make_ops: %s: %d operations supplied "
2257 "but not used", name
, unused_ops
);
2264 * Free the vnodeops created as a result of vn_make_ops()
2267 vn_freevnodeops(vnodeops_t
*vnops
)
2269 kmem_free(vnops
, sizeof (vnodeops_t
));
2278 vn_cache_constructor(void *buf
, void *cdrarg
, int kmflags
)
2284 mutex_init(&vp
->v_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2285 mutex_init(&vp
->v_vsd_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2286 cv_init(&vp
->v_cv
, NULL
, CV_DEFAULT
, NULL
);
2287 rw_init(&vp
->v_nbllock
, NULL
, RW_DEFAULT
, NULL
);
2288 vp
->v_femhead
= NULL
; /* Must be done before vn_reinit() */
2290 vp
->v_mpssdata
= NULL
;
2292 vp
->v_fopdata
= NULL
;
2301 vn_cache_destructor(void *buf
, void *cdrarg
)
2309 rw_destroy(&vp
->v_nbllock
);
2310 cv_destroy(&vp
->v_cv
);
2311 mutex_destroy(&vp
->v_vsd_lock
);
2312 mutex_destroy(&vp
->v_lock
);
2316 vn_create_cache(void)
2319 ASSERT((1 << VNODE_ALIGN_LOG2
) ==
2320 P2ROUNDUP(sizeof (struct vnode
), VNODE_ALIGN
));
2321 vn_cache
= kmem_cache_create("vn_cache", sizeof (struct vnode
),
2322 VNODE_ALIGN
, vn_cache_constructor
, vn_cache_destructor
, NULL
, NULL
,
2327 vn_destroy_cache(void)
2329 kmem_cache_destroy(vn_cache
);
2333 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2334 * cached by the file system and vnodes remain associated.
2337 vn_recycle(vnode_t
*vp
)
2339 ASSERT(!vn_has_cached_data(vp
));
2342 * XXX - This really belongs in vn_reinit(), but we have some issues
2343 * with the counts. Best to have it here for clean initialization.
2347 vp
->v_mmap_read
= 0;
2348 vp
->v_mmap_write
= 0;
2351 * If FEM was in use, make sure everything gets cleaned up
2352 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2355 if (vp
->v_femhead
) {
2356 /* XXX - There should be a free_femhead() that does all this */
2357 ASSERT(vp
->v_femhead
->femh_list
== NULL
);
2358 mutex_destroy(&vp
->v_femhead
->femh_lock
);
2359 kmem_free(vp
->v_femhead
, sizeof (*(vp
->v_femhead
)));
2360 vp
->v_femhead
= NULL
;
2363 kmem_free(vp
->v_path
, strlen(vp
->v_path
) + 1);
2367 if (vp
->v_fopdata
!= NULL
) {
2370 vp
->v_mpssdata
= NULL
;
2375 * Used to reset the vnode fields including those that are directly accessible
2376 * as well as those which require an accessor function.
2378 * Does not initialize:
2379 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2380 * v_data (since FS-nodes and vnodes point to each other and should
2381 * be updated simultaneously)
2382 * v_op (in case someone needs to make a VOP call on this object)
2385 vn_reinit(vnode_t
*vp
)
2388 vp
->v_count_dnlc
= 0;
2390 vp
->v_stream
= NULL
;
2391 vp
->v_vfsmountedhere
= NULL
;
2396 vp
->v_filocks
= NULL
;
2397 vp
->v_shrlocks
= NULL
;
2398 VERIFY(!vn_has_cached_data(vp
));
2400 vp
->v_locality
= NULL
;
2401 vp
->v_xattrdir
= NULL
;
2403 /* Handles v_femhead, v_path, and the r/w/map counts */
2408 vn_alloc(int kmflag
)
2412 vp
= kmem_cache_alloc(vn_cache
, kmflag
);
2415 vp
->v_femhead
= NULL
; /* Must be done before vn_reinit() */
2416 vp
->v_fopdata
= NULL
;
2424 vn_free(vnode_t
*vp
)
2426 ASSERT(vp
->v_shrlocks
== NULL
);
2427 ASSERT(vp
->v_filocks
== NULL
);
2430 * Some file systems call vn_free() with v_count of zero,
2431 * some with v_count of 1. In any case, the value should
2432 * never be anything else.
2434 ASSERT((vp
->v_count
== 0) || (vp
->v_count
== 1));
2435 ASSERT(vp
->v_count_dnlc
== 0);
2436 if (vp
->v_path
!= NULL
) {
2437 kmem_free(vp
->v_path
, strlen(vp
->v_path
) + 1);
2441 /* If FEM was in use, make sure everything gets cleaned up */
2442 if (vp
->v_femhead
) {
2443 /* XXX - There should be a free_femhead() that does all this */
2444 ASSERT(vp
->v_femhead
->femh_list
== NULL
);
2445 mutex_destroy(&vp
->v_femhead
->femh_lock
);
2446 kmem_free(vp
->v_femhead
, sizeof (*(vp
->v_femhead
)));
2447 vp
->v_femhead
= NULL
;
2450 if (vp
->v_fopdata
!= NULL
) {
2453 vp
->v_mpssdata
= NULL
;
2455 kmem_cache_free(vn_cache
, vp
);
2459 * vnode status changes, should define better states than 1, 0.
2462 vn_reclaim(vnode_t
*vp
)
2464 vfs_t
*vfsp
= vp
->v_vfsp
;
2467 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2470 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_RECLAIMED
);
2474 vn_idle(vnode_t
*vp
)
2476 vfs_t
*vfsp
= vp
->v_vfsp
;
2479 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2482 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_IDLED
);
2485 vn_exists(vnode_t
*vp
)
2487 vfs_t
*vfsp
= vp
->v_vfsp
;
2490 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2493 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_EXISTS
);
2497 vn_invalid(vnode_t
*vp
)
2499 vfs_t
*vfsp
= vp
->v_vfsp
;
2502 vfsp
->vfs_implp
== NULL
|| vfsp
->vfs_femhead
== NULL
) {
2505 (void) VFS_VNSTATE(vfsp
, vp
, VNTRANS_DESTROYED
);
2508 /* Vnode event notification */
2511 vnevent_support(vnode_t
*vp
, caller_context_t
*ct
)
2516 return (fop_vnevent(vp
, VE_SUPPORT
, NULL
, NULL
, ct
));
2520 vnevent_rename_src(vnode_t
*vp
, vnode_t
*dvp
, char *name
, caller_context_t
*ct
)
2522 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2525 (void) fop_vnevent(vp
, VE_RENAME_SRC
, dvp
, name
, ct
);
2529 vnevent_rename_dest(vnode_t
*vp
, vnode_t
*dvp
, char *name
,
2530 caller_context_t
*ct
)
2532 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2535 (void) fop_vnevent(vp
, VE_RENAME_DEST
, dvp
, name
, ct
);
2539 vnevent_rename_dest_dir(vnode_t
*vp
, caller_context_t
*ct
)
2541 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2544 (void) fop_vnevent(vp
, VE_RENAME_DEST_DIR
, NULL
, NULL
, ct
);
2548 vnevent_remove(vnode_t
*vp
, vnode_t
*dvp
, char *name
, caller_context_t
*ct
)
2550 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2553 (void) fop_vnevent(vp
, VE_REMOVE
, dvp
, name
, ct
);
2557 vnevent_rmdir(vnode_t
*vp
, vnode_t
*dvp
, char *name
, caller_context_t
*ct
)
2559 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2562 (void) fop_vnevent(vp
, VE_RMDIR
, dvp
, name
, ct
);
2566 vnevent_pre_rename_src(vnode_t
*vp
, vnode_t
*dvp
, char *name
,
2567 caller_context_t
*ct
)
2569 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2572 (void) fop_vnevent(vp
, VE_PRE_RENAME_SRC
, dvp
, name
, ct
);
2576 vnevent_pre_rename_dest(vnode_t
*vp
, vnode_t
*dvp
, char *name
,
2577 caller_context_t
*ct
)
2579 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2582 (void) fop_vnevent(vp
, VE_PRE_RENAME_DEST
, dvp
, name
, ct
);
2586 vnevent_pre_rename_dest_dir(vnode_t
*vp
, vnode_t
*nvp
, char *name
,
2587 caller_context_t
*ct
)
2589 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2592 (void) fop_vnevent(vp
, VE_PRE_RENAME_DEST_DIR
, nvp
, name
, ct
);
2596 vnevent_create(vnode_t
*vp
, caller_context_t
*ct
)
2598 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2601 (void) fop_vnevent(vp
, VE_CREATE
, NULL
, NULL
, ct
);
2605 vnevent_link(vnode_t
*vp
, caller_context_t
*ct
)
2607 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2610 (void) fop_vnevent(vp
, VE_LINK
, NULL
, NULL
, ct
);
2614 vnevent_mountedover(vnode_t
*vp
, caller_context_t
*ct
)
2616 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2619 (void) fop_vnevent(vp
, VE_MOUNTEDOVER
, NULL
, NULL
, ct
);
2623 vnevent_truncate(vnode_t
*vp
, caller_context_t
*ct
)
2625 if (vp
== NULL
|| vp
->v_femhead
== NULL
) {
2628 (void) fop_vnevent(vp
, VE_TRUNCATE
, NULL
, NULL
, ct
);
2636 vn_is_readonly(vnode_t
*vp
)
2638 return (vp
->v_vfsp
->vfs_flag
& VFS_RDONLY
);
2642 vn_has_flocks(vnode_t
*vp
)
2644 return (vp
->v_filocks
!= NULL
);
2648 vn_has_mandatory_locks(vnode_t
*vp
, int mode
)
2650 return ((vp
->v_filocks
!= NULL
) && (MANDLOCK(vp
, mode
)));
2654 vn_has_cached_data(vnode_t
*vp
)
2656 return (!list_is_empty(&vp
->v_pagecache_list
));
2660 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2664 vn_can_change_zones(vnode_t
*vp
)
2670 if (nfs_global_client_only
!= 0)
2674 * We always want to look at the underlying vnode if there is one.
2676 if (fop_realvp(vp
, &rvp
, NULL
) != 0)
2679 * Some pseudo filesystems (including doorfs) don't actually register
2680 * their vfsops_t, so the following may return NULL; we happily let
2681 * such vnodes switch zones.
2683 vswp
= vfs_getvfsswbyvfsops(vfs_getops(rvp
->v_vfsp
));
2685 if (vswp
->vsw_flag
& VSW_NOTZONESAFE
)
2687 vfs_unrefvfssw(vswp
);
2693 * Return nonzero if the vnode is a mount point, zero if not.
2696 vn_ismntpt(vnode_t
*vp
)
2698 return (vp
->v_vfsmountedhere
!= NULL
);
2701 /* Retrieve the vfs (if any) mounted on this vnode */
2703 vn_mountedvfs(vnode_t
*vp
)
2705 return (vp
->v_vfsmountedhere
);
2709 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2712 vn_in_dnlc(vnode_t
*vp
)
2714 return (vp
->v_count_dnlc
> 0);
2718 * vn_has_other_opens() checks whether a particular file is opened by more than
2719 * just the caller and whether the open is for read and/or write.
2720 * This routine is for calling after the caller has already called fop_open()
2721 * and the caller wishes to know if they are the only one with it open for
2722 * the mode(s) specified.
2724 * Vnode counts are only kept on regular files (v_type=VREG).
2736 if (vp
->v_wrcnt
> 1)
2740 if ((vp
->v_rdcnt
> 1) || (vp
->v_wrcnt
> 1))
2744 if ((vp
->v_rdcnt
> 1) && (vp
->v_wrcnt
> 1))
2748 if (vp
->v_rdcnt
> 1)
2757 * vn_is_opened() checks whether a particular file is opened and
2758 * whether the open is for read and/or write.
2760 * Vnode counts are only kept on regular files (v_type=VREG).
2776 if (vp
->v_rdcnt
&& vp
->v_wrcnt
)
2780 if (vp
->v_rdcnt
|| vp
->v_wrcnt
)
2793 * vn_is_mapped() checks whether a particular file is mapped and whether
2794 * the file is mapped read and/or write.
2807 * The atomic_add_64_nv functions force atomicity in the
2808 * case of 32 bit architectures. Otherwise the 64 bit values
2809 * require two fetches. The value of the fields may be
2810 * (potentially) changed between the first fetch and the
2814 if (atomic_add_64_nv((&(vp
->v_mmap_write
)), 0))
2818 if ((atomic_add_64_nv((&(vp
->v_mmap_read
)), 0)) &&
2819 (atomic_add_64_nv((&(vp
->v_mmap_write
)), 0)))
2823 if ((atomic_add_64_nv((&(vp
->v_mmap_read
)), 0)) ||
2824 (atomic_add_64_nv((&(vp
->v_mmap_write
)), 0)))
2828 if (atomic_add_64_nv((&(vp
->v_mmap_read
)), 0))
2835 if (vp
->v_mmap_write
)
2839 if (vp
->v_mmap_read
&& vp
->v_mmap_write
)
2843 if (vp
->v_mmap_read
|| vp
->v_mmap_write
)
2847 if (vp
->v_mmap_read
)
2857 * Set the operations vector for a vnode.
2859 * FEM ensures that the v_femhead pointer is filled in before the
2860 * v_op pointer is changed. This means that if the v_femhead pointer
2861 * is NULL, and the v_op field hasn't changed since before which checked
2862 * the v_femhead pointer; then our update is ok - we are not racing with
2866 vn_setops(vnode_t
*vp
, vnodeops_t
*vnodeops
)
2871 ASSERT(vnodeops
!= NULL
);
2876 * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2877 * the compare-and-swap on vp->v_op. If either fails, then FEM is
2878 * in effect on the vnode and we need to have FEM deal with it.
2880 if (vp
->v_femhead
!= NULL
|| atomic_cas_ptr(&vp
->v_op
, op
, vnodeops
) !=
2882 fem_setvnops(vp
, vnodeops
);
2887 * Retrieve the operations vector for a vnode
2888 * As with vn_setops(above); make sure we aren't racing with FEM.
2889 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2890 * make sense to the callers of this routine.
2893 vn_getops(vnode_t
*vp
)
2901 if (vp
->v_femhead
== NULL
&& op
== vp
->v_op
) {
2904 return (fem_getvnops(vp
));
2909 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2910 * Returns zero (0) if not.
2913 vn_matchops(vnode_t
*vp
, vnodeops_t
*vnodeops
)
2915 return (vn_getops(vp
) == vnodeops
);
2919 * Returns non-zero (1) if the specified operation matches the
2920 * corresponding operation for that the vnode.
2921 * Returns zero (0) if not.
2924 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2927 vn_matchopval(vnode_t
*vp
, char *vopname
, fs_generic_func_p funcp
)
2929 const fs_operation_trans_def_t
*otdp
;
2930 fs_generic_func_p
*loc
= NULL
;
2931 vnodeops_t
*vop
= vn_getops(vp
);
2933 ASSERT(vopname
!= NULL
);
2935 for (otdp
= vn_ops_table
; otdp
->name
!= NULL
; otdp
++) {
2936 if (MATCHNAME(otdp
->name
, vopname
)) {
2937 loc
= (fs_generic_func_p
*)
2938 ((char *)(vop
) + otdp
->offset
);
2943 return ((loc
!= NULL
) && (*loc
== funcp
));
2947 * fs_new_caller_id() needs to return a unique ID on a given local system.
2948 * The IDs do not need to survive across reboots. These are primarily
2949 * used so that (FEM) monitors can detect particular callers (such as
2950 * the NFS server) to a given vnode/vfs operation.
2955 static uint64_t next_caller_id
= 0LL; /* First call returns 1 */
2957 return ((u_longlong_t
)atomic_inc_64_nv(&next_caller_id
));
2961 * Given a starting vnode and a path, updates the path in the target vnode in
2962 * a safe manner. If the vnode already has path information embedded, then the
2963 * cached path is left untouched.
2966 size_t max_vnode_path
= 4 * MAXPATHLEN
;
2969 vn_setpath(vnode_t
*rootvp
, struct vnode
*startvp
, struct vnode
*vp
,
2970 const char *path
, size_t plen
)
2974 size_t rpathlen
, rpathalloc
;
2986 * We cannot grab base->v_lock while we hold vp->v_lock because of
2987 * the potential for deadlock.
2989 mutex_enter(&base
->v_lock
);
2990 if (base
->v_path
== NULL
) {
2991 mutex_exit(&base
->v_lock
);
2995 rpathlen
= strlen(base
->v_path
);
2996 rpathalloc
= rpathlen
+ plen
+ 1;
2997 /* Avoid adding a slash if there's already one there */
2998 if (base
->v_path
[rpathlen
-1] == '/')
3004 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
3005 * so we must do this dance. If, by chance, something changes the path,
3006 * just give up since there is no real harm.
3008 mutex_exit(&base
->v_lock
);
3010 /* Paths should stay within reason */
3011 if (rpathalloc
> max_vnode_path
)
3014 rpath
= kmem_alloc(rpathalloc
, KM_SLEEP
);
3016 mutex_enter(&base
->v_lock
);
3017 if (base
->v_path
== NULL
|| strlen(base
->v_path
) != rpathlen
) {
3018 mutex_exit(&base
->v_lock
);
3019 kmem_free(rpath
, rpathalloc
);
3022 bcopy(base
->v_path
, rpath
, rpathlen
);
3023 mutex_exit(&base
->v_lock
);
3026 rpath
[rpathlen
++] = '/';
3027 bcopy(path
, rpath
+ rpathlen
, plen
);
3028 rpath
[rpathlen
+ plen
] = '\0';
3030 mutex_enter(&vp
->v_lock
);
3031 if (vp
->v_path
!= NULL
) {
3032 mutex_exit(&vp
->v_lock
);
3033 kmem_free(rpath
, rpathalloc
);
3036 mutex_exit(&vp
->v_lock
);
3041 * Sets the path to the vnode to be the given string, regardless of current
3042 * context. The string must be a complete path from rootdir. This is only used
3043 * by fsop_root() for setting the path based on the mountpoint.
3046 vn_setpath_str(struct vnode
*vp
, const char *str
, size_t len
)
3048 char *buf
= kmem_alloc(len
+ 1, KM_SLEEP
);
3050 mutex_enter(&vp
->v_lock
);
3051 if (vp
->v_path
!= NULL
) {
3052 mutex_exit(&vp
->v_lock
);
3053 kmem_free(buf
, len
+ 1);
3058 bcopy(str
, vp
->v_path
, len
);
3059 vp
->v_path
[len
] = '\0';
3061 mutex_exit(&vp
->v_lock
);
3065 * Called from within filesystem's vop_rename() to handle renames once the
3066 * target vnode is available.
3069 vn_renamepath(vnode_t
*dvp
, vnode_t
*vp
, const char *nm
, size_t len
)
3073 mutex_enter(&vp
->v_lock
);
3076 mutex_exit(&vp
->v_lock
);
3077 vn_setpath(rootdir
, dvp
, vp
, nm
, len
);
3079 kmem_free(tmp
, strlen(tmp
) + 1);
3083 * Similar to vn_setpath_str(), this function sets the path of the destination
3084 * vnode to the be the same as the source vnode.
3087 vn_copypath(struct vnode
*src
, struct vnode
*dst
)
3092 mutex_enter(&src
->v_lock
);
3093 if (src
->v_path
== NULL
) {
3094 mutex_exit(&src
->v_lock
);
3097 alloc
= strlen(src
->v_path
) + 1;
3099 /* avoid kmem_alloc() with lock held */
3100 mutex_exit(&src
->v_lock
);
3101 buf
= kmem_alloc(alloc
, KM_SLEEP
);
3102 mutex_enter(&src
->v_lock
);
3103 if (src
->v_path
== NULL
|| strlen(src
->v_path
) + 1 != alloc
) {
3104 mutex_exit(&src
->v_lock
);
3105 kmem_free(buf
, alloc
);
3108 bcopy(src
->v_path
, buf
, alloc
);
3109 mutex_exit(&src
->v_lock
);
3111 mutex_enter(&dst
->v_lock
);
3112 if (dst
->v_path
!= NULL
) {
3113 mutex_exit(&dst
->v_lock
);
3114 kmem_free(buf
, alloc
);
3118 mutex_exit(&dst
->v_lock
);
3122 * XXX Private interface for segvn routines that handle vnode
3123 * large page segments.
3125 * return 1 if vp's file system fop_pageio() implementation
3126 * can be safely used instead of fop_getpage() for handling
3127 * pagefaults against regular non swap files. fop_pageio()
3128 * interface is considered safe here if its implementation
3129 * is very close to fop_getpage() implementation.
3130 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3131 * panic if there're file holes but instead returns an error.
3132 * Doesn't assume file won't be changed by user writes, etc.
3134 * return 0 otherwise.
3136 * For now allow segvn to only use fop_pageio() with ufs and nfs.
3139 vn_vmpss_usepageio(vnode_t
*vp
)
3141 vfs_t
*vfsp
= vp
->v_vfsp
;
3142 char *fsname
= vfssw
[vfsp
->vfs_fstype
].vsw_name
;
3143 char *pageio_ok_fss
[] = {"ufs", "nfs", NULL
};
3144 char **fsok
= pageio_ok_fss
;
3146 if (fsname
== NULL
) {
3150 for (; *fsok
; fsok
++) {
3151 if (strcmp(*fsok
, fsname
) == 0) {
3158 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3165 caller_context_t
*ct
)
3172 * Adding to the vnode counts before calling open
3173 * avoids the need for a mutex. It circumvents a race
3174 * condition where a query made on the vnode counts results in a
3175 * false negative. The inquirer goes away believing the file is
3176 * not open when there is an open on the file already under way.
3178 * The counts are meant to prevent NFS from granting a delegation
3179 * when it would be dangerous to do so.
3181 * The vnode counts are only kept on regular files
3183 if ((*vpp
)->v_type
== VREG
) {
3185 atomic_inc_32(&(*vpp
)->v_rdcnt
);
3187 atomic_inc_32(&(*vpp
)->v_wrcnt
);
3190 VOPXID_MAP_CR(vp
, cr
);
3192 if ((*vpp
)->v_op
->vop_open
== NULL
)
3195 ret
= (*vpp
)->v_op
->vop_open(vpp
, mode
, cr
, ct
);
3199 * Use the saved vp just in case the vnode ptr got trashed
3202 VOPSTATS_UPDATE(vp
, open
);
3203 if ((vp
->v_type
== VREG
) && (mode
& FREAD
))
3204 atomic_dec_32(&vp
->v_rdcnt
);
3205 if ((vp
->v_type
== VREG
) && (mode
& FWRITE
))
3206 atomic_dec_32(&vp
->v_wrcnt
);
3209 * Some filesystems will return a different vnode,
3210 * but the same path was still used to open it.
3211 * So if we do change the vnode and need to
3212 * copy over the path, do so here, rather than special
3213 * casing each filesystem. Adjust the vnode counts to
3214 * reflect the vnode switch.
3216 VOPSTATS_UPDATE(*vpp
, open
);
3217 if (*vpp
!= vp
&& *vpp
!= NULL
) {
3218 vn_copypath(vp
, *vpp
);
3219 if (((*vpp
)->v_type
== VREG
) && (mode
& FREAD
))
3220 atomic_inc_32(&(*vpp
)->v_rdcnt
);
3221 if ((vp
->v_type
== VREG
) && (mode
& FREAD
))
3222 atomic_dec_32(&vp
->v_rdcnt
);
3223 if (((*vpp
)->v_type
== VREG
) && (mode
& FWRITE
))
3224 atomic_inc_32(&(*vpp
)->v_wrcnt
);
3225 if ((vp
->v_type
== VREG
) && (mode
& FWRITE
))
3226 atomic_dec_32(&vp
->v_wrcnt
);
3240 caller_context_t
*ct
)
3244 VOPXID_MAP_CR(vp
, cr
);
3246 if (vp
->v_op
->vop_close
== NULL
)
3249 err
= vp
->v_op
->vop_close(vp
, flag
, count
, offset
, cr
, ct
);
3251 VOPSTATS_UPDATE(vp
, close
);
3253 * Check passed in count to handle possible dups. Vnode counts are only
3254 * kept on regular files
3256 if ((vp
->v_type
== VREG
) && (count
== 1)) {
3258 ASSERT(vp
->v_rdcnt
> 0);
3259 atomic_dec_32(&vp
->v_rdcnt
);
3261 if (flag
& FWRITE
) {
3262 ASSERT(vp
->v_wrcnt
> 0);
3263 atomic_dec_32(&vp
->v_wrcnt
);
3275 caller_context_t
*ct
)
3278 ssize_t resid_start
= uiop
->uio_resid
;
3280 VOPXID_MAP_CR(vp
, cr
);
3282 if (vp
->v_op
->vop_read
== NULL
)
3285 err
= vp
->v_op
->vop_read(vp
, uiop
, ioflag
, cr
, ct
);
3287 VOPSTATS_UPDATE_IO(vp
, read
,
3288 read_bytes
, (resid_start
- uiop
->uio_resid
));
3298 caller_context_t
*ct
)
3301 ssize_t resid_start
= uiop
->uio_resid
;
3303 VOPXID_MAP_CR(vp
, cr
);
3305 if (vp
->v_op
->vop_write
== NULL
)
3308 err
= vp
->v_op
->vop_write(vp
, uiop
, ioflag
, cr
, ct
);
3310 VOPSTATS_UPDATE_IO(vp
, write
,
3311 write_bytes
, (resid_start
- uiop
->uio_resid
));
3323 caller_context_t
*ct
)
3327 VOPXID_MAP_CR(vp
, cr
);
3329 if (vp
->v_op
->vop_ioctl
== NULL
)
3332 err
= vp
->v_op
->vop_ioctl(vp
, cmd
, arg
, flag
, cr
, rvalp
, ct
);
3334 VOPSTATS_UPDATE(vp
, ioctl
);
3344 caller_context_t
*ct
)
3348 VOPXID_MAP_CR(vp
, cr
);
3350 if (vp
->v_op
->vop_setfl
== NULL
)
3351 err
= fs_setfl(vp
, oflags
, nflags
, cr
, ct
);
3353 err
= vp
->v_op
->vop_setfl(vp
, oflags
, nflags
, cr
, ct
);
3355 VOPSTATS_UPDATE(vp
, setfl
);
3365 caller_context_t
*ct
)
3369 VOPXID_MAP_CR(vp
, cr
);
3372 * If this file system doesn't understand the xvattr extensions
3373 * then turn off the xvattr bit.
3375 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_XVATTR
) == 0) {
3376 vap
->va_mask
&= ~AT_XVATTR
;
3380 * We're only allowed to skip the ACL check iff we used a 32 bit
3381 * ACE mask with fop_access() to determine permissions.
3383 if ((flags
& ATTR_NOACLCHECK
) &&
3384 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0)
3387 if (vp
->v_op
->vop_getattr
== NULL
)
3390 err
= vp
->v_op
->vop_getattr(vp
, vap
, flags
, cr
, ct
);
3392 VOPSTATS_UPDATE(vp
, getattr
);
3402 caller_context_t
*ct
)
3406 VOPXID_MAP_CR(vp
, cr
);
3409 * If this file system doesn't understand the xvattr extensions
3410 * then turn off the xvattr bit.
3412 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_XVATTR
) == 0) {
3413 vap
->va_mask
&= ~AT_XVATTR
;
3417 * We're only allowed to skip the ACL check iff we used a 32 bit
3418 * ACE mask with fop_access() to determine permissions.
3420 if ((flags
& ATTR_NOACLCHECK
) &&
3421 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0)
3424 if (vp
->v_op
->vop_setattr
== NULL
)
3427 err
= vp
->v_op
->vop_setattr(vp
, vap
, flags
, cr
, ct
);
3429 VOPSTATS_UPDATE(vp
, setattr
);
3439 caller_context_t
*ct
)
3443 if ((flags
& V_ACE_MASK
) &&
3444 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0) {
3448 VOPXID_MAP_CR(vp
, cr
);
3450 if (vp
->v_op
->vop_access
== NULL
)
3453 err
= vp
->v_op
->vop_access(vp
, mode
, flags
, cr
, ct
);
3455 VOPSTATS_UPDATE(vp
, access
);
3468 caller_context_t
*ct
,
3469 int *deflags
, /* Returned per-dirent flags */
3470 pathname_t
*ppnp
) /* Returned case-preserved name in directory */
3475 * If this file system doesn't support case-insensitive access
3476 * and said access is requested, fail quickly. It is required
3477 * that if the vfs supports case-insensitive lookup, it also
3478 * supports extended dirent flags.
3480 if (flags
& FIGNORECASE
&&
3481 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3482 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3485 VOPXID_MAP_CR(dvp
, cr
);
3487 if ((flags
& LOOKUP_XATTR
) && (flags
& LOOKUP_HAVE_SYSATTR_DIR
) == 0) {
3488 ret
= xattr_dir_lookup(dvp
, vpp
, flags
, cr
);
3489 } else if (dvp
->v_op
->vop_lookup
== NULL
) {
3492 ret
= dvp
->v_op
->vop_lookup(dvp
, nm
, vpp
, pnp
, flags
, rdir
,
3493 cr
, ct
, deflags
, ppnp
);
3496 if (ret
== 0 && *vpp
) {
3497 VOPSTATS_UPDATE(*vpp
, lookup
);
3498 if ((*vpp
)->v_path
== NULL
) {
3499 vn_setpath(rootdir
, dvp
, *vpp
, nm
, strlen(nm
));
3516 caller_context_t
*ct
,
3517 vsecattr_t
*vsecp
) /* ACL to set during create */
3521 if (vsecp
!= NULL
&&
3522 vfs_has_feature(dvp
->v_vfsp
, VFSFT_ACLONCREATE
) == 0) {
3526 * If this file system doesn't support case-insensitive access
3527 * and said access is requested, fail quickly.
3529 if (flags
& FIGNORECASE
&&
3530 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3531 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3534 VOPXID_MAP_CR(dvp
, cr
);
3536 if (dvp
->v_op
->vop_create
== NULL
)
3539 ret
= dvp
->v_op
->vop_create(dvp
, name
, vap
, excl
, mode
, vpp
,
3540 cr
, flags
, ct
, vsecp
);
3542 if (ret
== 0 && *vpp
) {
3543 VOPSTATS_UPDATE(*vpp
, create
);
3544 if ((*vpp
)->v_path
== NULL
) {
3545 vn_setpath(rootdir
, dvp
, *vpp
, name
, strlen(name
));
3557 caller_context_t
*ct
,
3563 * If this file system doesn't support case-insensitive access
3564 * and said access is requested, fail quickly.
3566 if (flags
& FIGNORECASE
&&
3567 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3568 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3571 VOPXID_MAP_CR(dvp
, cr
);
3573 if (dvp
->v_op
->vop_remove
== NULL
)
3576 err
= dvp
->v_op
->vop_remove(dvp
, nm
, cr
, ct
, flags
);
3578 VOPSTATS_UPDATE(dvp
, remove
);
3588 caller_context_t
*ct
,
3594 * If the target file system doesn't support case-insensitive access
3595 * and said access is requested, fail quickly.
3597 if (flags
& FIGNORECASE
&&
3598 (vfs_has_feature(tdvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3599 vfs_has_feature(tdvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3602 VOPXID_MAP_CR(tdvp
, cr
);
3604 if (tdvp
->v_op
->vop_link
== NULL
)
3607 err
= tdvp
->v_op
->vop_link(tdvp
, svp
, tnm
, cr
, ct
, flags
);
3609 VOPSTATS_UPDATE(tdvp
, link
);
3620 caller_context_t
*ct
,
3626 * If the file system involved does not support
3627 * case-insensitive access and said access is requested, fail
3630 if (flags
& FIGNORECASE
&&
3631 ((vfs_has_feature(sdvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3632 vfs_has_feature(sdvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0)))
3635 VOPXID_MAP_CR(tdvp
, cr
);
3637 if (sdvp
->v_op
->vop_rename
== NULL
)
3640 err
= sdvp
->v_op
->vop_rename(sdvp
, snm
, tdvp
, tnm
, cr
, ct
,
3643 VOPSTATS_UPDATE(sdvp
, rename
);
3654 caller_context_t
*ct
,
3656 vsecattr_t
*vsecp
) /* ACL to set during create */
3660 if (vsecp
!= NULL
&&
3661 vfs_has_feature(dvp
->v_vfsp
, VFSFT_ACLONCREATE
) == 0) {
3665 * If this file system doesn't support case-insensitive access
3666 * and said access is requested, fail quickly.
3668 if (flags
& FIGNORECASE
&&
3669 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3670 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3673 VOPXID_MAP_CR(dvp
, cr
);
3675 if (dvp
->v_op
->vop_mkdir
== NULL
)
3678 ret
= dvp
->v_op
->vop_mkdir(dvp
, dirname
, vap
, vpp
, cr
, ct
,
3681 if (ret
== 0 && *vpp
) {
3682 VOPSTATS_UPDATE(*vpp
, mkdir
);
3683 if ((*vpp
)->v_path
== NULL
) {
3684 vn_setpath(rootdir
, dvp
, *vpp
, dirname
,
3698 caller_context_t
*ct
,
3704 * If this file system doesn't support case-insensitive access
3705 * and said access is requested, fail quickly.
3707 if (flags
& FIGNORECASE
&&
3708 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3709 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3712 VOPXID_MAP_CR(dvp
, cr
);
3714 if (dvp
->v_op
->vop_rmdir
== NULL
)
3717 err
= dvp
->v_op
->vop_rmdir(dvp
, nm
, cdir
, cr
, ct
, flags
);
3719 VOPSTATS_UPDATE(dvp
, rmdir
);
3729 caller_context_t
*ct
,
3733 ssize_t resid_start
= uiop
->uio_resid
;
3736 * If this file system doesn't support retrieving directory
3737 * entry flags and said access is requested, fail quickly.
3739 if (flags
& V_RDDIR_ENTFLAGS
&&
3740 vfs_has_feature(vp
->v_vfsp
, VFSFT_DIRENTFLAGS
) == 0)
3743 VOPXID_MAP_CR(vp
, cr
);
3745 if (vp
->v_op
->vop_readdir
== NULL
)
3748 err
= vp
->v_op
->vop_readdir(vp
, uiop
, cr
, eofp
, ct
, flags
);
3750 VOPSTATS_UPDATE_IO(vp
, readdir
,
3751 readdir_bytes
, (resid_start
- uiop
->uio_resid
));
3762 caller_context_t
*ct
,
3769 * If this file system doesn't support case-insensitive access
3770 * and said access is requested, fail quickly.
3772 if (flags
& FIGNORECASE
&&
3773 (vfs_has_feature(dvp
->v_vfsp
, VFSFT_CASEINSENSITIVE
) == 0 &&
3774 vfs_has_feature(dvp
->v_vfsp
, VFSFT_NOCASESENSITIVE
) == 0))
3777 VOPXID_MAP_CR(dvp
, cr
);
3779 /* check for reparse point */
3780 if ((vfs_has_feature(dvp
->v_vfsp
, VFSFT_REPARSE
)) &&
3781 (strncmp(target
, FS_REPARSE_TAG_STR
,
3782 strlen(FS_REPARSE_TAG_STR
)) == 0)) {
3783 if (!fs_reparse_mark(target
, vap
, &xvattr
))
3784 vap
= (vattr_t
*)&xvattr
;
3787 if (dvp
->v_op
->vop_symlink
== NULL
)
3790 err
= dvp
->v_op
->vop_symlink(dvp
, linkname
, vap
, target
, cr
,
3793 VOPSTATS_UPDATE(dvp
, symlink
);
3802 caller_context_t
*ct
)
3806 VOPXID_MAP_CR(vp
, cr
);
3808 if (vp
->v_op
->vop_readlink
== NULL
)
3811 err
= vp
->v_op
->vop_readlink(vp
, uiop
, cr
, ct
);
3813 VOPSTATS_UPDATE(vp
, readlink
);
3822 caller_context_t
*ct
)
3826 VOPXID_MAP_CR(vp
, cr
);
3828 if (vp
->v_op
->vop_fsync
== NULL
)
3831 err
= vp
->v_op
->vop_fsync(vp
, syncflag
, cr
, ct
);
3833 VOPSTATS_UPDATE(vp
, fsync
);
3841 caller_context_t
*ct
)
3843 /* Need to update stats before vop call since we may lose the vnode */
3844 VOPSTATS_UPDATE(vp
, inactive
);
3846 VOPXID_MAP_CR(vp
, cr
);
3848 if (vp
->v_op
->vop_inactive
!= NULL
)
3849 vp
->v_op
->vop_inactive(vp
, cr
, ct
);
3856 caller_context_t
*ct
)
3860 if (vp
->v_op
->vop_fid
== NULL
)
3863 err
= vp
->v_op
->vop_fid(vp
, fidp
, ct
);
3865 VOPSTATS_UPDATE(vp
, fid
);
3873 caller_context_t
*ct
)
3877 if (vp
->v_op
->vop_rwlock
== NULL
)
3878 ret
= fs_rwlock(vp
, write_lock
, ct
);
3880 ret
= vp
->v_op
->vop_rwlock(vp
, write_lock
, ct
);
3882 VOPSTATS_UPDATE(vp
, rwlock
);
3890 caller_context_t
*ct
)
3892 if (vp
->v_op
->vop_rwunlock
== NULL
)
3893 fs_rwunlock(vp
, write_lock
, ct
);
3895 vp
->v_op
->vop_rwunlock(vp
, write_lock
, ct
);
3897 VOPSTATS_UPDATE(vp
, rwunlock
);
3905 caller_context_t
*ct
)
3909 if (vp
->v_op
->vop_seek
== NULL
)
3912 err
= vp
->v_op
->vop_seek(vp
, ooff
, noffp
, ct
);
3914 VOPSTATS_UPDATE(vp
, seek
);
3922 caller_context_t
*ct
)
3926 if (vp1
->v_op
->vop_cmp
== NULL
)
3927 err
= fs_cmp(vp1
, vp2
, ct
);
3929 err
= vp1
->v_op
->vop_cmp(vp1
, vp2
, ct
);
3931 VOPSTATS_UPDATE(vp1
, cmp
);
3942 struct flk_callback
*flk_cbp
,
3944 caller_context_t
*ct
)
3948 VOPXID_MAP_CR(vp
, cr
);
3950 if (vp
->v_op
->vop_frlock
== NULL
)
3951 err
= fs_frlock(vp
, cmd
, bfp
, flag
, offset
, flk_cbp
, cr
, ct
);
3953 err
= vp
->v_op
->vop_frlock(vp
, cmd
, bfp
, flag
, offset
,
3956 VOPSTATS_UPDATE(vp
, frlock
);
3968 caller_context_t
*ct
)
3972 VOPXID_MAP_CR(vp
, cr
);
3974 if (vp
->v_op
->vop_space
== NULL
)
3977 err
= vp
->v_op
->vop_space(vp
, cmd
, bfp
, flag
, offset
, cr
, ct
);
3979 VOPSTATS_UPDATE(vp
, space
);
3987 caller_context_t
*ct
)
3991 if (vp
->v_op
->vop_realvp
== NULL
)
3994 err
= vp
->v_op
->vop_realvp(vp
, vpp
, ct
);
3996 VOPSTATS_UPDATE(vp
, realvp
);
4012 caller_context_t
*ct
)
4016 VOPXID_MAP_CR(vp
, cr
);
4018 if (vp
->v_op
->vop_getpage
== NULL
)
4021 err
= vp
->v_op
->vop_getpage(vp
, off
, len
, protp
, plarr
,
4022 plsz
, seg
, addr
, rw
, cr
, ct
);
4024 VOPSTATS_UPDATE(vp
, getpage
);
4035 caller_context_t
*ct
)
4039 VOPXID_MAP_CR(vp
, cr
);
4041 if (vp
->v_op
->vop_putpage
== NULL
)
4044 err
= (*(vp
)->v_op
->vop_putpage
)(vp
, off
, len
, flags
, cr
, ct
);
4045 VOPSTATS_UPDATE(vp
, putpage
);
4060 caller_context_t
*ct
)
4064 VOPXID_MAP_CR(vp
, cr
);
4066 if (vp
->v_op
->vop_map
== NULL
)
4069 err
= vp
->v_op
->vop_map(vp
, off
, as
, addrp
, len
, prot
,
4070 maxprot
, flags
, cr
, ct
);
4072 VOPSTATS_UPDATE(vp
, map
);
4087 caller_context_t
*ct
)
4092 VOPXID_MAP_CR(vp
, cr
);
4094 if (vp
->v_op
->vop_addmap
== NULL
)
4097 error
= vp
->v_op
->vop_addmap(vp
, off
, as
, addr
, len
, prot
,
4098 maxprot
, flags
, cr
, ct
);
4100 if ((!error
) && (vp
->v_type
== VREG
)) {
4101 delta
= (u_longlong_t
)btopr(len
);
4103 * If file is declared MAP_PRIVATE, it can't be written back
4104 * even if open for write. Handle as read.
4106 if (flags
& MAP_PRIVATE
) {
4107 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4111 * atomic_add_64 forces the fetch of a 64 bit value to
4112 * be atomic on 32 bit machines
4114 if (maxprot
& PROT_WRITE
)
4115 atomic_add_64((uint64_t *)(&(vp
->v_mmap_write
)),
4117 if (maxprot
& PROT_READ
)
4118 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4120 if (maxprot
& PROT_EXEC
)
4121 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4125 VOPSTATS_UPDATE(vp
, addmap
);
4140 caller_context_t
*ct
)
4145 VOPXID_MAP_CR(vp
, cr
);
4147 if (vp
->v_op
->vop_delmap
== NULL
)
4150 error
= vp
->v_op
->vop_delmap(vp
, off
, as
, addr
, len
, prot
,
4151 maxprot
, flags
, cr
, ct
);
4154 * NFS calls into delmap twice, the first time
4155 * it simply establishes a callback mechanism and returns EAGAIN
4156 * while the real work is being done upon the second invocation.
4157 * We have to detect this here and only decrement the counts upon
4158 * the second delmap request.
4160 if ((error
!= EAGAIN
) && (vp
->v_type
== VREG
)) {
4162 delta
= (u_longlong_t
)btopr(len
);
4164 if (flags
& MAP_PRIVATE
) {
4165 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4169 * atomic_add_64 forces the fetch of a 64 bit value
4170 * to be atomic on 32 bit machines
4172 if (maxprot
& PROT_WRITE
)
4173 atomic_add_64((uint64_t *)(&(vp
->v_mmap_write
)),
4175 if (maxprot
& PROT_READ
)
4176 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4178 if (maxprot
& PROT_EXEC
)
4179 atomic_add_64((uint64_t *)(&(vp
->v_mmap_read
)),
4183 VOPSTATS_UPDATE(vp
, delmap
);
4194 struct pollhead
**phpp
,
4195 caller_context_t
*ct
)
4199 if (vp
->v_op
->vop_poll
== NULL
)
4200 err
= fs_poll(vp
, events
, anyyet
, reventsp
, phpp
, ct
);
4202 err
= vp
->v_op
->vop_poll(vp
, events
, anyyet
, reventsp
, phpp
,
4205 VOPSTATS_UPDATE(vp
, poll
);
4215 caller_context_t
*ct
)
4219 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4220 if ((lbdn
!= (daddr_t
)lbdn
) || (dblks
!= (int)dblks
))
4223 if (vp
->v_op
->vop_dump
== NULL
)
4226 err
= vp
->v_op
->vop_dump(vp
, addr
, lbdn
, dblks
, ct
);
4228 VOPSTATS_UPDATE(vp
, dump
);
4238 caller_context_t
*ct
)
4242 VOPXID_MAP_CR(vp
, cr
);
4244 if (vp
->v_op
->vop_pathconf
== NULL
)
4245 err
= fs_pathconf(vp
, cmd
, valp
, cr
, ct
);
4247 err
= vp
->v_op
->vop_pathconf(vp
, cmd
, valp
, cr
, ct
);
4249 VOPSTATS_UPDATE(vp
, pathconf
);
4261 caller_context_t
*ct
)
4265 VOPXID_MAP_CR(vp
, cr
);
4267 if (vp
->v_op
->vop_pageio
== NULL
)
4270 err
= vp
->v_op
->vop_pageio(vp
, pp
, io_off
, io_len
, flags
,
4273 VOPSTATS_UPDATE(vp
, pageio
);
4282 caller_context_t
*ct
)
4286 if (vp
->v_op
->vop_dumpctl
== NULL
)
4289 err
= vp
->v_op
->vop_dumpctl(vp
, action
, blkp
, ct
);
4291 VOPSTATS_UPDATE(vp
, dumpctl
);
4302 caller_context_t
*ct
)
4304 /* Must do stats first since it's possible to lose the vnode */
4305 VOPSTATS_UPDATE(vp
, dispose
);
4307 VOPXID_MAP_CR(vp
, cr
);
4309 if (vp
->v_op
->vop_dispose
== NULL
)
4310 fs_dispose(vp
, pp
, flag
, dn
, cr
, ct
);
4312 vp
->v_op
->vop_dispose(vp
, pp
, flag
, dn
, cr
, ct
);
4321 caller_context_t
*ct
)
4325 VOPXID_MAP_CR(vp
, cr
);
4328 * We're only allowed to skip the ACL check iff we used a 32 bit
4329 * ACE mask with fop_access() to determine permissions.
4331 if ((flag
& ATTR_NOACLCHECK
) &&
4332 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0) {
4336 if (vp
->v_op
->vop_setsecattr
== NULL
)
4339 err
= vp
->v_op
->vop_setsecattr(vp
, vsap
, flag
, cr
, ct
);
4341 VOPSTATS_UPDATE(vp
, setsecattr
);
4351 caller_context_t
*ct
)
4356 * We're only allowed to skip the ACL check iff we used a 32 bit
4357 * ACE mask with fop_access() to determine permissions.
4359 if ((flag
& ATTR_NOACLCHECK
) &&
4360 vfs_has_feature(vp
->v_vfsp
, VFSFT_ACEMASKONACCESS
) == 0) {
4364 VOPXID_MAP_CR(vp
, cr
);
4366 if (vp
->v_op
->vop_getsecattr
== NULL
)
4367 err
= fs_fab_acl(vp
, vsap
, flag
, cr
, ct
);
4369 err
= vp
->v_op
->vop_getsecattr(vp
, vsap
, flag
, cr
, ct
);
4371 VOPSTATS_UPDATE(vp
, getsecattr
);
4379 struct shrlock
*shr
,
4382 caller_context_t
*ct
)
4386 VOPXID_MAP_CR(vp
, cr
);
4388 if (vp
->v_op
->vop_shrlock
== NULL
)
4389 err
= fs_shrlock(vp
, cmd
, shr
, flag
, cr
, ct
);
4391 err
= vp
->v_op
->vop_shrlock(vp
, cmd
, shr
, flag
, cr
, ct
);
4393 VOPSTATS_UPDATE(vp
, shrlock
);
4398 fop_vnevent(vnode_t
*vp
, vnevent_t vnevent
, vnode_t
*dvp
, char *fnm
,
4399 caller_context_t
*ct
)
4403 if (vp
->v_op
->vop_vnevent
== NULL
)
4406 err
= vp
->v_op
->vop_vnevent(vp
, vnevent
, dvp
, fnm
, ct
);
4408 VOPSTATS_UPDATE(vp
, vnevent
);
4413 fop_reqzcbuf(vnode_t
*vp
, enum uio_rw ioflag
, xuio_t
*uiop
, cred_t
*cr
,
4414 caller_context_t
*ct
)
4418 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_ZEROCOPY_SUPPORTED
) == 0)
4421 if (vp
->v_op
->vop_reqzcbuf
== NULL
)
4424 err
= vp
->v_op
->vop_reqzcbuf(vp
, ioflag
, uiop
, cr
, ct
);
4426 VOPSTATS_UPDATE(vp
, reqzcbuf
);
4431 fop_retzcbuf(vnode_t
*vp
, xuio_t
*uiop
, cred_t
*cr
, caller_context_t
*ct
)
4435 if (vfs_has_feature(vp
->v_vfsp
, VFSFT_ZEROCOPY_SUPPORTED
) == 0)
4438 if (vp
->v_op
->vop_retzcbuf
== NULL
)
4441 err
= vp
->v_op
->vop_retzcbuf(vp
, uiop
, cr
, ct
);
4443 VOPSTATS_UPDATE(vp
, retzcbuf
);
4448 * Default destructor
4449 * Needed because NULL destructor means that the key is unused
4453 vsd_defaultdestructor(void *value
)
4457 * Create a key (index into per vnode array)
4458 * Locks out vsd_create, vsd_destroy, and vsd_free
4459 * May allocate memory with lock held
4462 vsd_create(uint_t
*keyp
, void (*destructor
)(void *))
4468 * if key is allocated, do nothing
4470 mutex_enter(&vsd_lock
);
4472 mutex_exit(&vsd_lock
);
4476 * find an unused key
4478 if (destructor
== NULL
)
4479 destructor
= vsd_defaultdestructor
;
4481 for (i
= 0; i
< vsd_nkeys
; ++i
)
4482 if (vsd_destructor
[i
] == NULL
)
4486 * if no unused keys, increase the size of the destructor array
4488 if (i
== vsd_nkeys
) {
4489 if ((nkeys
= (vsd_nkeys
<< 1)) == 0)
4492 (void (**)(void *))vsd_realloc((void *)vsd_destructor
,
4493 (size_t)(vsd_nkeys
* sizeof (void (*)(void *))),
4494 (size_t)(nkeys
* sizeof (void (*)(void *))));
4499 * allocate the next available unused key
4501 vsd_destructor
[i
] = destructor
;
4504 /* create vsd_list, if it doesn't exist */
4505 if (vsd_list
== NULL
) {
4506 vsd_list
= kmem_alloc(sizeof (list_t
), KM_SLEEP
);
4507 list_create(vsd_list
, sizeof (struct vsd_node
),
4508 offsetof(struct vsd_node
, vs_nodes
));
4511 mutex_exit(&vsd_lock
);
4517 * Assumes that the caller is preventing vsd_set and vsd_get
4518 * Locks out vsd_create, vsd_destroy, and vsd_free
4519 * May free memory with lock held
4522 vsd_destroy(uint_t
*keyp
)
4525 struct vsd_node
*vsd
;
4528 * protect the key namespace and our destructor lists
4530 mutex_enter(&vsd_lock
);
4534 ASSERT(key
<= vsd_nkeys
);
4537 * if the key is valid
4542 * for every vnode with VSD, call key's destructor
4544 for (vsd
= list_head(vsd_list
); vsd
!= NULL
;
4545 vsd
= list_next(vsd_list
, vsd
)) {
4547 * no VSD for key in this vnode
4549 if (key
> vsd
->vs_nkeys
)
4552 * call destructor for key
4554 if (vsd
->vs_value
[k
] && vsd_destructor
[k
])
4555 (*vsd_destructor
[k
])(vsd
->vs_value
[k
]);
4557 * reset value for key
4559 vsd
->vs_value
[k
] = NULL
;
4562 * actually free the key (NULL destructor == unused)
4564 vsd_destructor
[k
] = NULL
;
4567 mutex_exit(&vsd_lock
);
4571 * Quickly return the per vnode value that was stored with the specified key
4572 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4573 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4576 vsd_get(vnode_t
*vp
, uint_t key
)
4578 struct vsd_node
*vsd
;
4581 ASSERT(mutex_owned(&vp
->v_vsd_lock
));
4585 if (key
&& vsd
!= NULL
&& key
<= vsd
->vs_nkeys
)
4586 return (vsd
->vs_value
[key
- 1]);
4591 * Set a per vnode value indexed with the specified key
4592 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4595 vsd_set(vnode_t
*vp
, uint_t key
, void *value
)
4597 struct vsd_node
*vsd
;
4600 ASSERT(mutex_owned(&vp
->v_vsd_lock
));
4607 vsd
= vp
->v_vsd
= kmem_zalloc(sizeof (*vsd
), KM_SLEEP
);
4610 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4611 * code won't happen and we will continue down and allocate space for
4612 * the vs_value array.
4613 * If the caller is replacing one value with another, then it is up
4614 * to the caller to free/rele/destroy the previous value (if needed).
4616 if (key
<= vsd
->vs_nkeys
) {
4617 vsd
->vs_value
[key
- 1] = value
;
4621 ASSERT(key
<= vsd_nkeys
);
4623 if (vsd
->vs_nkeys
== 0) {
4624 mutex_enter(&vsd_lock
); /* lock out vsd_destroy() */
4626 * Link onto list of all VSD nodes.
4628 list_insert_head(vsd_list
, vsd
);
4629 mutex_exit(&vsd_lock
);
4633 * Allocate vnode local storage and set the value for key
4635 vsd
->vs_value
= vsd_realloc(vsd
->vs_value
,
4636 vsd
->vs_nkeys
* sizeof (void *),
4637 key
* sizeof (void *));
4638 vsd
->vs_nkeys
= key
;
4639 vsd
->vs_value
[key
- 1] = value
;
4645 * Called from vn_free() to run the destructor function for each vsd
4646 * Locks out vsd_create and vsd_destroy
4647 * Assumes that the destructor *DOES NOT* use vsd
4650 vsd_free(vnode_t
*vp
)
4653 struct vsd_node
*vsd
= vp
->v_vsd
;
4658 if (vsd
->vs_nkeys
== 0) {
4659 kmem_free(vsd
, sizeof (*vsd
));
4665 * lock out vsd_create and vsd_destroy, call
4666 * the destructor, and mark the value as destroyed.
4668 mutex_enter(&vsd_lock
);
4670 for (i
= 0; i
< vsd
->vs_nkeys
; i
++) {
4671 if (vsd
->vs_value
[i
] && vsd_destructor
[i
])
4672 (*vsd_destructor
[i
])(vsd
->vs_value
[i
]);
4673 vsd
->vs_value
[i
] = NULL
;
4677 * remove from linked list of VSD nodes
4679 list_remove(vsd_list
, vsd
);
4681 mutex_exit(&vsd_lock
);
4686 kmem_free(vsd
->vs_value
, vsd
->vs_nkeys
* sizeof (void *));
4687 kmem_free(vsd
, sizeof (struct vsd_node
));
4695 vsd_realloc(void *old
, size_t osize
, size_t nsize
)
4699 new = kmem_zalloc(nsize
, KM_SLEEP
);
4701 bcopy(old
, new, osize
);
4702 kmem_free(old
, osize
);
4708 * Setup the extensible system attribute for creating a reparse point.
4709 * The symlink data 'target' is validated for proper format of a reparse
4710 * string and a check also made to make sure the symlink data does not
4711 * point to an existing file.
4713 * return 0 if ok else -1.
4716 fs_reparse_mark(char *target
, vattr_t
*vap
, xvattr_t
*xvattr
)
4720 if ((!target
) || (!vap
) || (!xvattr
))
4723 /* validate reparse string */
4724 if (reparse_validate((const char *)target
))
4728 xvattr
->xva_vattr
= *vap
;
4729 xvattr
->xva_vattr
.va_mask
|= AT_XVATTR
;
4730 xoap
= xva_getxoptattr(xvattr
);
4732 XVA_SET_REQ(xvattr
, XAT_REPARSE
);
4733 xoap
->xoa_reparse
= 1;
4739 * Function to check whether a symlink is a reparse point.
4740 * Return B_TRUE if it is a reparse point, else return B_FALSE
4743 vn_is_reparse(vnode_t
*vp
, cred_t
*cr
, caller_context_t
*ct
)
4748 if ((vp
->v_type
!= VLNK
) ||
4749 !(vfs_has_feature(vp
->v_vfsp
, VFSFT_XVATTR
)))
4753 xoap
= xva_getxoptattr(&xvattr
);
4755 XVA_SET_REQ(&xvattr
, XAT_REPARSE
);
4757 if (fop_getattr(vp
, &xvattr
.xva_vattr
, 0, cr
, ct
))
4760 if ((!(xvattr
.xva_vattr
.va_mask
& AT_XVATTR
)) ||
4761 (!(XVA_ISSET_RTN(&xvattr
, XAT_REPARSE
))))
4764 return (xoap
->xoa_reparse
? B_TRUE
: B_FALSE
);