fs: start using vnode dispatch functions
[unleashed.git] / kernel / fs / vnode.c
blob119ae5c9954dd16ed06d01b2249056916cbeffd4
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
35 * All Rights Reserved
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
39 * contributors.
42 #include <sys/types.h>
43 #include <sys/param.h>
44 #include <sys/t_lock.h>
45 #include <sys/errno.h>
46 #include <sys/cred.h>
47 #include <sys/user.h>
48 #include <sys/uio.h>
49 #include <sys/file.h>
50 #include <sys/pathname.h>
51 #include <sys/vfs.h>
52 #include <sys/vfs_opreg.h>
53 #include <sys/vnode.h>
54 #include <sys/vnode_dispatch.h>
55 #include <sys/rwstlock.h>
56 #include <sys/fem.h>
57 #include <sys/stat.h>
58 #include <sys/mode.h>
59 #include <sys/conf.h>
60 #include <sys/sysmacros.h>
61 #include <sys/cmn_err.h>
62 #include <sys/systm.h>
63 #include <sys/kmem.h>
64 #include <sys/debug.h>
65 #include <c2/audit.h>
66 #include <sys/acl.h>
67 #include <sys/nbmlock.h>
68 #include <sys/fcntl.h>
69 #include <sys/fs_subr.h>
70 #include <sys/taskq.h>
71 #include <sys/fs_reparse.h>
73 /* Determine if this vnode is a file that is read-only */
74 #define ISROFILE(vp) \
75 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
76 (vp)->v_type != VFIFO && vn_is_readonly(vp))
78 /* Tunable via /etc/system; used only by admin/install */
79 int nfs_global_client_only;
82 * Array of vopstats_t for per-FS-type vopstats. This array has the same
83 * number of entries as and parallel to the vfssw table. (Arguably, it could
84 * be part of the vfssw table.) Once it's initialized, it's accessed using
85 * the same fstype index that is used to index into the vfssw table.
87 vopstats_t **vopstats_fstype;
89 /* vopstats initialization template used for fast initialization via bcopy() */
90 static vopstats_t *vs_templatep;
92 /* Kmem cache handle for vsk_anchor_t allocations */
93 kmem_cache_t *vsk_anchor_cache;
95 /* file events cleanup routine */
96 extern void free_fopdata(vnode_t *);
99 * Root of AVL tree for the kstats associated with vopstats. Lock protects
100 * updates to vsktat_tree.
102 avl_tree_t vskstat_tree;
103 kmutex_t vskstat_tree_lock;
105 /* Global variable which enables/disables the vopstats collection */
106 int vopstats_enabled = 1;
109 * forward declarations for internal vnode specific data (vsd)
111 static void *vsd_realloc(void *, size_t, size_t);
114 * forward declarations for reparse point functions
116 static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
119 * VSD -- VNODE SPECIFIC DATA
120 * The v_data pointer is typically used by a file system to store a
121 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
122 * However, there are times when additional project private data needs
123 * to be stored separately from the data (node) pointed to by v_data.
124 * This additional data could be stored by the file system itself or
125 * by a completely different kernel entity. VSD provides a way for
126 * callers to obtain a key and store a pointer to private data associated
127 * with a vnode.
129 * Callers are responsible for protecting the vsd by holding v_vsd_lock
130 * for calls to vsd_set() and vsd_get().
134 * vsd_lock protects:
135 * vsd_nkeys - creation and deletion of vsd keys
136 * vsd_list - insertion and deletion of vsd_node in the vsd_list
137 * vsd_destructor - adding and removing destructors to the list
139 static kmutex_t vsd_lock;
140 static uint_t vsd_nkeys; /* size of destructor array */
141 /* list of vsd_node's */
142 static list_t *vsd_list = NULL;
143 /* per-key destructor funcs */
144 static void (**vsd_destructor)(void *);
147 * The following is the common set of actions needed to update the
148 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
149 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
150 * recording of the bytes transferred. Since the code is similar
151 * but small, it is nearly a duplicate. Consequently any changes
152 * to one may need to be reflected in the other.
153 * Rundown of the variables:
154 * vp - Pointer to the vnode
155 * counter - Partial name structure member to update in vopstats for counts
156 * bytecounter - Partial name structure member to update in vopstats for bytes
157 * bytesval - Value to update in vopstats for bytes
158 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
159 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
162 #define VOPSTATS_UPDATE(vp, counter) { \
163 vfs_t *vfsp = (vp)->v_vfsp; \
164 if (vfsp && vfsp->vfs_implp && \
165 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
166 vopstats_t *vsp = &vfsp->vfs_vopstats; \
167 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
168 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
169 size_t, uint64_t *); \
170 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
171 (*stataddr)++; \
172 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
173 vsp->n##counter.value.ui64++; \
178 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
179 vfs_t *vfsp = (vp)->v_vfsp; \
180 if (vfsp && vfsp->vfs_implp && \
181 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
182 vopstats_t *vsp = &vfsp->vfs_vopstats; \
183 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
184 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
185 size_t, uint64_t *); \
186 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
187 (*stataddr)++; \
188 vsp->bytecounter.value.ui64 += bytesval; \
189 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
190 vsp->n##counter.value.ui64++; \
191 vsp->bytecounter.value.ui64 += bytesval; \
197 * If the filesystem does not support XIDs map credential
198 * If the vfsp is NULL, perhaps we should also map?
200 #define VOPXID_MAP_CR(vp, cr) { \
201 vfs_t *vfsp = (vp)->v_vfsp; \
202 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
203 cr = crgetmapped(cr); \
207 * Convert stat(2) formats to vnode types and vice versa. (Knows about
208 * numerical order of S_IFMT and vnode types.)
210 enum vtype iftovt_tab[] = {
211 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
212 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
215 ushort_t vttoif_tab[] = {
216 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
217 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
221 * The system vnode cache.
224 kmem_cache_t *vn_cache;
228 * Vnode operations vector.
231 static const fs_operation_trans_def_t vn_ops_table[] = {
232 VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
233 fs_nosys,
235 VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
236 fs_nosys,
238 VOPNAME_READ, offsetof(struct vnodeops, vop_read),
239 fs_nosys,
241 VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
242 fs_nosys,
244 VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
245 fs_nosys,
247 VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
248 fs_setfl,
250 VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
251 fs_nosys,
253 VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
254 fs_nosys,
256 VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
257 fs_nosys,
259 VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
260 fs_nosys,
262 VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
263 fs_nosys,
265 VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
266 fs_nosys,
268 VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
269 fs_nosys,
271 VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
272 fs_nosys,
274 VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
275 fs_nosys,
277 VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
278 fs_nosys,
280 VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
281 fs_nosys,
283 VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
284 fs_nosys,
286 VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
287 fs_nosys,
289 VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
290 fs_nosys,
292 VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
293 fs_nosys,
295 VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
296 fs_nosys,
298 VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
299 fs_rwlock,
301 VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
302 (fs_generic_func_p) fs_rwunlock,
304 VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
305 fs_nosys,
307 VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
308 fs_cmp,
310 VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
311 fs_frlock,
313 VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
314 fs_nosys,
316 VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
317 fs_nosys,
319 VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
320 fs_nosys,
322 VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
323 fs_nosys,
325 VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
326 (fs_generic_func_p) fs_nosys_map,
328 VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
329 (fs_generic_func_p) fs_nosys_addmap,
331 VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
332 fs_nosys,
334 VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
335 (fs_generic_func_p) fs_poll,
337 VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
338 fs_nosys,
340 VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
341 fs_pathconf,
343 VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
344 fs_nosys,
346 VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
347 fs_nosys,
349 VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
350 (fs_generic_func_p) fs_dispose,
352 VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
353 fs_nosys,
355 VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
356 fs_fab_acl,
358 VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
359 fs_shrlock,
361 VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
362 (fs_generic_func_p) fs_vnevent_nosupport,
364 VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
365 fs_nosys,
367 VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
368 fs_nosys,
370 NULL, 0, NULL,
373 /* Extensible attribute (xva) routines. */
376 * Zero out the structure, set the size of the requested/returned bitmaps,
377 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
378 * to the returned attributes array.
380 void
381 xva_init(xvattr_t *xvap)
383 bzero(xvap, sizeof (xvattr_t));
384 xvap->xva_mapsize = XVA_MAPSIZE;
385 xvap->xva_magic = XVA_MAGIC;
386 xvap->xva_vattr.va_mask = AT_XVATTR;
387 xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
391 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
392 * structure. Otherwise, returns NULL.
394 xoptattr_t *
395 xva_getxoptattr(xvattr_t *xvap)
397 xoptattr_t *xoap = NULL;
398 if (xvap->xva_vattr.va_mask & AT_XVATTR)
399 xoap = &xvap->xva_xoptattrs;
400 return (xoap);
404 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
405 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
406 * kstat name.
408 static int
409 vska_compar(const void *n1, const void *n2)
411 int ret;
412 ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
413 ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
415 if (p1 < p2) {
416 ret = -1;
417 } else if (p1 > p2) {
418 ret = 1;
419 } else {
420 ret = 0;
423 return (ret);
427 * Used to create a single template which will be bcopy()ed to a newly
428 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
430 static vopstats_t *
431 create_vopstats_template()
433 vopstats_t *vsp;
435 vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
436 bzero(vsp, sizeof (*vsp)); /* Start fresh */
438 /* fop_open */
439 kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
440 /* fop_close */
441 kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
442 /* fop_read I/O */
443 kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
444 kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
445 /* fop_write I/O */
446 kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
447 kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
448 /* fop_ioctl */
449 kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
450 /* fop_setfl */
451 kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
452 /* fop_getattr */
453 kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
454 /* fop_setattr */
455 kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
456 /* fop_access */
457 kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
458 /* fop_lookup */
459 kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
460 /* fop_create */
461 kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
462 /* fop_remove */
463 kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
464 /* fop_link */
465 kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
466 /* fop_rename */
467 kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
468 /* fop_mkdir */
469 kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
470 /* fop_rmdir */
471 kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
472 /* fop_readdir I/O */
473 kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
474 kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
475 KSTAT_DATA_UINT64);
476 /* fop_symlink */
477 kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
478 /* fop_readlink */
479 kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
480 /* fop_fsync */
481 kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
482 /* fop_inactive */
483 kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
484 /* fop_fid */
485 kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
486 /* fop_rwlock */
487 kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
488 /* fop_rwunlock */
489 kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
490 /* fop_seek */
491 kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
492 /* fop_cmp */
493 kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
494 /* fop_frlock */
495 kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
496 /* fop_space */
497 kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
498 /* fop_realvp */
499 kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
500 /* fop_getpage */
501 kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
502 /* fop_putpage */
503 kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
504 /* fop_map */
505 kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
506 /* fop_addmap */
507 kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
508 /* fop_delmap */
509 kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
510 /* fop_poll */
511 kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
512 /* fop_dump */
513 kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
514 /* fop_pathconf */
515 kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
516 /* fop_pageio */
517 kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
518 /* fop_dumpctl */
519 kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
520 /* fop_dispose */
521 kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
522 /* fop_setsecattr */
523 kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
524 /* fop_getsecattr */
525 kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
526 /* fop_shrlock */
527 kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
528 /* fop_vnevent */
529 kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
530 /* fop_reqzcbuf */
531 kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
532 /* fop_retzcbuf */
533 kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
535 return (vsp);
539 * Creates a kstat structure associated with a vopstats structure.
541 kstat_t *
542 new_vskstat(char *ksname, vopstats_t *vsp)
544 kstat_t *ksp;
546 if (!vopstats_enabled) {
547 return (NULL);
550 ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
551 sizeof (vopstats_t)/sizeof (kstat_named_t),
552 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
553 if (ksp) {
554 ksp->ks_data = vsp;
555 kstat_install(ksp);
558 return (ksp);
562 * Called from vfsinit() to initialize the support mechanisms for vopstats
564 void
565 vopstats_startup()
567 if (!vopstats_enabled)
568 return;
571 * Creates the AVL tree which holds per-vfs vopstat anchors. This
572 * is necessary since we need to check if a kstat exists before we
573 * attempt to create it. Also, initialize its lock.
575 avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
576 offsetof(vsk_anchor_t, vsk_node));
577 mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
579 vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
580 sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
581 NULL, NULL, 0);
584 * Set up the array of pointers for the vopstats-by-FS-type.
585 * The entries will be allocated/initialized as each file system
586 * goes through modload/mod_installfs.
588 vopstats_fstype = (vopstats_t **)kmem_zalloc(
589 (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
591 /* Set up the global vopstats initialization template */
592 vs_templatep = create_vopstats_template();
596 * We need to have the all of the counters zeroed.
597 * The initialization of the vopstats_t includes on the order of
598 * 50 calls to kstat_named_init(). Rather that do that on every call,
599 * we do it once in a template (vs_templatep) then bcopy it over.
601 void
602 initialize_vopstats(vopstats_t *vsp)
604 if (vsp == NULL)
605 return;
607 bcopy(vs_templatep, vsp, sizeof (vopstats_t));
611 * If possible, determine which vopstats by fstype to use and
612 * return a pointer to the caller.
614 vopstats_t *
615 get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
617 int fstype = 0; /* Index into vfssw[] */
618 vopstats_t *vsp = NULL;
620 if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
621 !vopstats_enabled)
622 return (NULL);
624 * Set up the fstype. We go to so much trouble because all versions
625 * of NFS use the same fstype in their vfs even though they have
626 * distinct entries in the vfssw[] table.
627 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
629 if (vswp) {
630 fstype = vswp - vfssw; /* Gets us the index */
631 } else {
632 fstype = vfsp->vfs_fstype;
636 * Point to the per-fstype vopstats. The only valid values are
637 * non-zero positive values less than the number of vfssw[] table
638 * entries.
640 if (fstype > 0 && fstype < nfstype) {
641 vsp = vopstats_fstype[fstype];
644 return (vsp);
648 * Generate a kstat name, create the kstat structure, and allocate a
649 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
650 * to the caller. This must only be called from a mount.
652 vsk_anchor_t *
653 get_vskstat_anchor(vfs_t *vfsp)
655 char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
656 statvfs64_t statvfsbuf; /* Needed to find f_fsid */
657 vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */
658 kstat_t *ksp; /* Ptr to new kstat */
659 avl_index_t where; /* Location in the AVL tree */
661 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
662 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
663 return (NULL);
665 /* Need to get the fsid to build a kstat name */
666 if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
667 /* Create a name for our kstats based on fsid */
668 (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
669 VOPSTATS_STR, statvfsbuf.f_fsid);
671 /* Allocate and initialize the vsk_anchor_t */
672 vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
673 bzero(vskp, sizeof (*vskp));
674 vskp->vsk_fsid = statvfsbuf.f_fsid;
676 mutex_enter(&vskstat_tree_lock);
677 if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
678 avl_insert(&vskstat_tree, vskp, where);
679 mutex_exit(&vskstat_tree_lock);
682 * Now that we've got the anchor in the AVL
683 * tree, we can create the kstat.
685 ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
686 if (ksp) {
687 vskp->vsk_ksp = ksp;
689 } else {
690 /* Oops, found one! Release memory and lock. */
691 mutex_exit(&vskstat_tree_lock);
692 kmem_cache_free(vsk_anchor_cache, vskp);
693 vskp = NULL;
696 return (vskp);
700 * We're in the process of tearing down the vfs and need to cleanup
701 * the data structures associated with the vopstats. Must only be called
702 * from dounmount().
704 void
705 teardown_vopstats(vfs_t *vfsp)
707 vsk_anchor_t *vskap;
708 avl_index_t where;
710 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
711 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
712 return;
714 /* This is a safe check since VFS_STATS must be set (see above) */
715 if ((vskap = vfsp->vfs_vskap) == NULL)
716 return;
718 /* Whack the pointer right away */
719 vfsp->vfs_vskap = NULL;
721 /* Lock the tree, remove the node, and delete the kstat */
722 mutex_enter(&vskstat_tree_lock);
723 if (avl_find(&vskstat_tree, vskap, &where)) {
724 avl_remove(&vskstat_tree, vskap);
727 if (vskap->vsk_ksp) {
728 kstat_delete(vskap->vsk_ksp);
730 mutex_exit(&vskstat_tree_lock);
732 kmem_cache_free(vsk_anchor_cache, vskap);
736 * Read or write a vnode. Called from kernel code.
739 vn_rdwr(
740 enum uio_rw rw,
741 struct vnode *vp,
742 caddr_t base,
743 ssize_t len,
744 offset_t offset,
745 enum uio_seg seg,
746 int ioflag,
747 rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */
748 cred_t *cr,
749 ssize_t *residp)
751 struct uio uio;
752 struct iovec iov;
753 int error;
754 int in_crit = 0;
756 if (rw == UIO_WRITE && ISROFILE(vp))
757 return (EROFS);
759 if (len < 0)
760 return (EIO);
762 VOPXID_MAP_CR(vp, cr);
764 iov.iov_base = base;
765 iov.iov_len = len;
766 uio.uio_iov = &iov;
767 uio.uio_iovcnt = 1;
768 uio.uio_loffset = offset;
769 uio.uio_segflg = (short)seg;
770 uio.uio_resid = len;
771 uio.uio_llimit = ulimit;
774 * We have to enter the critical region before calling fop_rwlock
775 * to avoid a deadlock with ufs.
777 if (nbl_need_check(vp)) {
778 int svmand;
780 nbl_start_crit(vp, RW_READER);
781 in_crit = 1;
782 error = nbl_svmand(vp, cr, &svmand);
783 if (error != 0)
784 goto done;
785 if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
786 uio.uio_offset, uio.uio_resid, svmand, NULL)) {
787 error = EACCES;
788 goto done;
792 (void) fop_rwlock(vp,
793 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
794 if (rw == UIO_WRITE) {
795 uio.uio_fmode = FWRITE;
796 uio.uio_extflg = UIO_COPY_DEFAULT;
797 error = fop_write(vp, &uio, ioflag, cr, NULL);
798 } else {
799 uio.uio_fmode = FREAD;
800 uio.uio_extflg = UIO_COPY_CACHED;
801 error = fop_read(vp, &uio, ioflag, cr, NULL);
803 fop_rwunlock(vp,
804 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
805 if (residp)
806 *residp = uio.uio_resid;
807 else if (uio.uio_resid)
808 error = EIO;
810 done:
811 if (in_crit)
812 nbl_end_crit(vp);
813 return (error);
817 * Release a vnode. Call fop_inactive on last reference or
818 * decrement reference count.
820 * To avoid race conditions, the v_count is left at 1 for
821 * the call to fop_inactive. This prevents another thread
822 * from reclaiming and releasing the vnode *before* the
823 * fop_inactive routine has a chance to destroy the vnode.
824 * We can't have more than 1 thread calling fop_inactive
825 * on a vnode.
827 void
828 vn_rele(vnode_t *vp)
830 VERIFY(vp->v_count > 0);
831 mutex_enter(&vp->v_lock);
832 if (vp->v_count == 1) {
833 mutex_exit(&vp->v_lock);
834 fop_inactive(vp, CRED(), NULL);
835 return;
837 VN_RELE_LOCKED(vp);
838 mutex_exit(&vp->v_lock);
842 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
843 * as a single reference, so v_count is not decremented until the last DNLC hold
844 * is released. This makes it possible to distinguish vnodes that are referenced
845 * only by the DNLC.
847 void
848 vn_rele_dnlc(vnode_t *vp)
850 VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
851 mutex_enter(&vp->v_lock);
852 if (--vp->v_count_dnlc == 0) {
853 if (vp->v_count == 1) {
854 mutex_exit(&vp->v_lock);
855 fop_inactive(vp, CRED(), NULL);
856 return;
858 VN_RELE_LOCKED(vp);
860 mutex_exit(&vp->v_lock);
864 * Like vn_rele() except that it clears v_stream under v_lock.
865 * This is used by sockfs when it dismantles the association between
866 * the sockfs node and the vnode in the underlying file system.
867 * v_lock has to be held to prevent a thread coming through the lookupname
868 * path from accessing a stream head that is going away.
870 void
871 vn_rele_stream(vnode_t *vp)
873 VERIFY(vp->v_count > 0);
874 mutex_enter(&vp->v_lock);
875 vp->v_stream = NULL;
876 if (vp->v_count == 1) {
877 mutex_exit(&vp->v_lock);
878 fop_inactive(vp, CRED(), NULL);
879 return;
881 VN_RELE_LOCKED(vp);
882 mutex_exit(&vp->v_lock);
885 static void
886 vn_rele_inactive(vnode_t *vp)
888 fop_inactive(vp, CRED(), NULL);
892 * Like vn_rele() except if we are going to call fop_inactive() then do it
893 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
894 * the file system as a result of releasing the vnode. Note, file systems
895 * already have to handle the race where the vnode is incremented before the
896 * inactive routine is called and does its locking.
898 * Warning: Excessive use of this routine can lead to performance problems.
899 * This is because taskqs throttle back allocation if too many are created.
901 void
902 vn_rele_async(vnode_t *vp, taskq_t *taskq)
904 VERIFY(vp->v_count > 0);
905 mutex_enter(&vp->v_lock);
906 if (vp->v_count == 1) {
907 mutex_exit(&vp->v_lock);
908 VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
909 vp, TQ_SLEEP) != (uintptr_t)NULL);
910 return;
912 VN_RELE_LOCKED(vp);
913 mutex_exit(&vp->v_lock);
917 vn_open(
918 char *pnamep,
919 enum uio_seg seg,
920 int filemode,
921 int createmode,
922 struct vnode **vpp,
923 enum create crwhy,
924 mode_t umask)
926 return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
927 umask, NULL, -1));
932 * Open/create a vnode.
933 * This may be callable by the kernel, the only known use
934 * of user context being that the current user credentials
935 * are used for permissions. crwhy is defined iff filemode & FCREAT.
938 vn_openat(
939 char *pnamep,
940 enum uio_seg seg,
941 int filemode,
942 int createmode,
943 struct vnode **vpp,
944 enum create crwhy,
945 mode_t umask,
946 struct vnode *startvp,
947 int fd)
949 struct vnode *vp;
950 int mode;
951 int accessflags;
952 int error;
953 int in_crit = 0;
954 int open_done = 0;
955 int shrlock_done = 0;
956 struct vattr vattr;
957 enum symfollow follow;
958 int estale_retry = 0;
959 struct shrlock shr;
960 struct shr_locowner shr_own;
962 if (filemode & FSEARCH)
963 filemode |= FDIRECTORY;
965 mode = 0;
966 accessflags = 0;
967 if (filemode & FREAD)
968 mode |= VREAD;
969 if (filemode & (FWRITE|FTRUNC))
970 mode |= VWRITE;
971 if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
972 mode |= VEXEC;
974 /* symlink interpretation */
975 if (filemode & FNOFOLLOW)
976 follow = NO_FOLLOW;
977 else
978 follow = FOLLOW;
980 if (filemode & FAPPEND)
981 accessflags |= V_APPEND;
983 top:
984 if (filemode & FCREAT && !(filemode & FDIRECTORY)) {
985 enum vcexcl excl;
987 /* Wish to create a file. */
988 vattr.va_type = VREG;
989 vattr.va_mode = createmode;
990 vattr.va_mask = AT_TYPE|AT_MODE;
991 if (filemode & FTRUNC) {
992 vattr.va_size = 0;
993 vattr.va_mask |= AT_SIZE;
995 if (filemode & FEXCL)
996 excl = EXCL;
997 else
998 excl = NONEXCL;
1000 if (error =
1001 vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1002 (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1003 return (error);
1004 } else {
1005 /* Wish to open a file. Just look it up. */
1006 if (error = lookupnameat(pnamep, seg, follow,
1007 NULLVPP, &vp, startvp)) {
1008 if ((error == ESTALE) &&
1009 fs_need_estale_retry(estale_retry++))
1010 goto top;
1011 return (error);
1015 * Get the attributes to check whether file is large.
1016 * We do this only if the FOFFMAX flag is not set and
1017 * only for regular files.
1020 if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1021 vattr.va_mask = AT_SIZE;
1022 if ((error = fop_getattr(vp, &vattr, 0,
1023 CRED(), NULL))) {
1024 goto out;
1026 if (vattr.va_size > (uoff_t)MAXOFF32_T) {
1028 * Large File API - regular open fails
1029 * if FOFFMAX flag is set in file mode
1031 error = EOVERFLOW;
1032 goto out;
1036 * Can't write directories, active texts, or
1037 * read-only filesystems. Can't truncate files
1038 * on which mandatory locking is in effect.
1040 if (filemode & (FWRITE|FTRUNC)) {
1042 * Allow writable directory if VDIROPEN flag is set.
1044 if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1045 error = EISDIR;
1046 goto out;
1048 if (ISROFILE(vp)) {
1049 error = EROFS;
1050 goto out;
1053 * Can't truncate files on which
1054 * sysv mandatory locking is in effect.
1056 if (filemode & FTRUNC) {
1057 vnode_t *rvp;
1059 if (fop_realvp(vp, &rvp, NULL) != 0)
1060 rvp = vp;
1061 if (rvp->v_filocks != NULL) {
1062 vattr.va_mask = AT_MODE;
1063 if ((error = fop_getattr(vp,
1064 &vattr, 0, CRED(), NULL)) == 0 &&
1065 MANDLOCK(vp, vattr.va_mode))
1066 error = EAGAIN;
1069 if (error)
1070 goto out;
1073 * Check permissions.
1075 if (error = fop_access(vp, mode, accessflags, CRED(), NULL))
1076 goto out;
1078 * Require FDIRECTORY to return a directory.
1079 * Require FEXEC to return a regular file.
1081 if ((filemode & FDIRECTORY) && vp->v_type != VDIR) {
1082 error = ENOTDIR;
1083 goto out;
1085 if ((filemode & FEXEC) && vp->v_type != VREG) {
1086 error = ENOEXEC; /* XXX: error code? */
1087 goto out;
1092 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1094 if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1095 error = ELOOP;
1096 goto out;
1098 if (filemode & FNOLINKS) {
1099 vattr.va_mask = AT_NLINK;
1100 if ((error = fop_getattr(vp, &vattr, 0, CRED(), NULL))) {
1101 goto out;
1103 if (vattr.va_nlink != 1) {
1104 error = EMLINK;
1105 goto out;
1110 * Opening a socket corresponding to the AF_UNIX pathname
1111 * in the filesystem name space is not supported.
1112 * However, VSOCK nodes in namefs are supported in order
1113 * to make fattach work for sockets.
1115 * XXX This uses fop_realvp to distinguish between
1116 * an unopened namefs node (where fop_realvp returns a
1117 * different VSOCK vnode) and a VSOCK created by vn_create
1118 * in some file system (where fop_realvp would never return
1119 * a different vnode).
1121 if (vp->v_type == VSOCK) {
1122 struct vnode *nvp;
1124 error = fop_realvp(vp, &nvp, NULL);
1125 if (error != 0 || nvp == NULL || nvp == vp ||
1126 nvp->v_type != VSOCK) {
1127 error = EOPNOTSUPP;
1128 goto out;
1132 if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1133 /* get share reservation */
1134 shr.s_access = 0;
1135 if (filemode & FWRITE)
1136 shr.s_access |= F_WRACC;
1137 if (filemode & FREAD)
1138 shr.s_access |= F_RDACC;
1139 shr.s_deny = 0;
1140 shr.s_sysid = 0;
1141 shr.s_pid = ttoproc(curthread)->p_pid;
1142 shr_own.sl_pid = shr.s_pid;
1143 shr_own.sl_id = fd;
1144 shr.s_own_len = sizeof (shr_own);
1145 shr.s_owner = (caddr_t)&shr_own;
1146 error = fop_shrlock(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1147 NULL);
1148 if (error)
1149 goto out;
1150 shrlock_done = 1;
1152 /* nbmand conflict check if truncating file */
1153 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1154 nbl_start_crit(vp, RW_READER);
1155 in_crit = 1;
1157 vattr.va_mask = AT_SIZE;
1158 if (error = fop_getattr(vp, &vattr, 0, CRED(), NULL))
1159 goto out;
1160 if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1161 NULL)) {
1162 error = EACCES;
1163 goto out;
1169 * Do opening protocol.
1171 error = fop_open(&vp, filemode, CRED(), NULL);
1172 if (error)
1173 goto out;
1174 open_done = 1;
1177 * Truncate if required.
1179 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1180 vattr.va_size = 0;
1181 vattr.va_mask = AT_SIZE;
1182 if ((error = fop_setattr(vp, &vattr, 0, CRED(), NULL)) != 0)
1183 goto out;
1185 out:
1186 ASSERT(vp->v_count > 0);
1188 if (in_crit) {
1189 nbl_end_crit(vp);
1190 in_crit = 0;
1192 if (error) {
1193 if (open_done) {
1194 (void) fop_close(vp, filemode, 1, (offset_t)0, CRED(),
1195 NULL);
1196 open_done = 0;
1197 shrlock_done = 0;
1199 if (shrlock_done) {
1200 (void) fop_shrlock(vp, F_UNSHARE, &shr, 0, CRED(),
1201 NULL);
1202 shrlock_done = 0;
1206 * The following clause was added to handle a problem
1207 * with NFS consistency. It is possible that a lookup
1208 * of the file to be opened succeeded, but the file
1209 * itself doesn't actually exist on the server. This
1210 * is chiefly due to the DNLC containing an entry for
1211 * the file which has been removed on the server. In
1212 * this case, we just start over. If there was some
1213 * other cause for the ESTALE error, then the lookup
1214 * of the file will fail and the error will be returned
1215 * above instead of looping around from here.
1217 VN_RELE(vp);
1218 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1219 goto top;
1220 } else
1221 *vpp = vp;
1222 return (error);
1226 * The following two accessor functions are for the NFSv4 server. Since there
1227 * is no fop_open_UP/DOWNGRADE we need a way for the NFS server to keep the
1228 * vnode open counts correct when a client "upgrades" an open or does an
1229 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1230 * open mode (add or subtract read or write), but also change the share/deny
1231 * modes. However, share reservations are not integrated with OPEN, yet, so
1232 * we need to handle each separately. These functions are cleaner than having
1233 * the NFS server manipulate the counts directly, however, nobody else should
1234 * use these functions.
1236 void
1237 vn_open_upgrade(
1238 vnode_t *vp,
1239 int filemode)
1241 ASSERT(vp->v_type == VREG);
1243 if (filemode & FREAD)
1244 atomic_inc_32(&vp->v_rdcnt);
1245 if (filemode & FWRITE)
1246 atomic_inc_32(&vp->v_wrcnt);
1250 void
1251 vn_open_downgrade(
1252 vnode_t *vp,
1253 int filemode)
1255 ASSERT(vp->v_type == VREG);
1257 if (filemode & FREAD) {
1258 ASSERT(vp->v_rdcnt > 0);
1259 atomic_dec_32(&vp->v_rdcnt);
1261 if (filemode & FWRITE) {
1262 ASSERT(vp->v_wrcnt > 0);
1263 atomic_dec_32(&vp->v_wrcnt);
1269 vn_create(
1270 char *pnamep,
1271 enum uio_seg seg,
1272 struct vattr *vap,
1273 enum vcexcl excl,
1274 int mode,
1275 struct vnode **vpp,
1276 enum create why,
1277 int flag,
1278 mode_t umask)
1280 return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1281 umask, NULL));
1285 * Create a vnode (makenode).
1288 vn_createat(
1289 char *pnamep,
1290 enum uio_seg seg,
1291 struct vattr *vap,
1292 enum vcexcl excl,
1293 int mode,
1294 struct vnode **vpp,
1295 enum create why,
1296 int flag,
1297 mode_t umask,
1298 struct vnode *startvp)
1300 struct vnode *dvp; /* ptr to parent dir vnode */
1301 struct vnode *vp = NULL;
1302 struct pathname pn;
1303 int error;
1304 int in_crit = 0;
1305 struct vattr vattr;
1306 enum symfollow follow;
1307 int estale_retry = 0;
1308 uint32_t auditing = AU_AUDITING();
1310 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1312 /* symlink interpretation */
1313 if ((flag & FNOFOLLOW) || excl == EXCL)
1314 follow = NO_FOLLOW;
1315 else
1316 follow = FOLLOW;
1317 flag &= ~(FNOFOLLOW|FNOLINKS);
1319 top:
1321 * Lookup directory.
1322 * If new object is a file, call lower level to create it.
1323 * Note that it is up to the lower level to enforce exclusive
1324 * creation, if the file is already there.
1325 * This allows the lower level to do whatever
1326 * locking or protocol that is needed to prevent races.
1327 * If the new object is directory call lower level to make
1328 * the new directory, with "." and "..".
1330 if (error = pn_get(pnamep, seg, &pn))
1331 return (error);
1332 if (auditing)
1333 audit_vncreate_start();
1334 dvp = NULL;
1335 *vpp = NULL;
1337 * lookup will find the parent directory for the vnode.
1338 * When it is done the pn holds the name of the entry
1339 * in the directory.
1340 * If this is a non-exclusive create we also find the node itself.
1342 error = lookuppnat(&pn, NULL, follow, &dvp,
1343 (excl == EXCL) ? NULLVPP : vpp, startvp);
1344 if (error) {
1345 pn_free(&pn);
1346 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1347 goto top;
1348 if (why == CRMKDIR && error == EINVAL)
1349 error = EEXIST; /* SVID */
1350 return (error);
1353 if (why != CRMKNOD)
1354 vap->va_mode &= ~VSVTX;
1357 * If default ACLs are defined for the directory don't apply the
1358 * umask if umask is passed.
1361 if (umask) {
1363 vsecattr_t vsec;
1365 vsec.vsa_aclcnt = 0;
1366 vsec.vsa_aclentp = NULL;
1367 vsec.vsa_dfaclcnt = 0;
1368 vsec.vsa_dfaclentp = NULL;
1369 vsec.vsa_mask = VSA_DFACLCNT;
1370 error = fop_getsecattr(dvp, &vsec, 0, CRED(), NULL);
1372 * If error is ENOSYS then treat it as no error
1373 * Don't want to force all file systems to support
1374 * aclent_t style of ACL's.
1376 if (error == ENOSYS)
1377 error = 0;
1378 if (error) {
1379 if (*vpp != NULL)
1380 VN_RELE(*vpp);
1381 goto out;
1382 } else {
1384 * Apply the umask if no default ACLs.
1386 if (vsec.vsa_dfaclcnt == 0)
1387 vap->va_mode &= ~umask;
1390 * fop_getsecattr() may have allocated memory for
1391 * ACLs we didn't request, so double-check and
1392 * free it if necessary.
1394 if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1395 kmem_free((caddr_t)vsec.vsa_aclentp,
1396 vsec.vsa_aclcnt * sizeof (aclent_t));
1397 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1398 kmem_free((caddr_t)vsec.vsa_dfaclentp,
1399 vsec.vsa_dfaclcnt * sizeof (aclent_t));
1404 * In general we want to generate EROFS if the file system is
1405 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1406 * documents the open system call, and it says that O_CREAT has no
1407 * effect if the file already exists. Bug 1119649 states
1408 * that open(path, O_CREAT, ...) fails when attempting to open an
1409 * existing file on a read only file system. Thus, the first part
1410 * of the following if statement has 3 checks:
1411 * if the file exists &&
1412 * it is being open with write access &&
1413 * the file system is read only
1414 * then generate EROFS
1416 if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1417 (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1418 if (*vpp)
1419 VN_RELE(*vpp);
1420 error = EROFS;
1421 } else if (excl == NONEXCL && *vpp != NULL) {
1422 vnode_t *rvp;
1425 * File already exists. If a mandatory lock has been
1426 * applied, return error.
1428 vp = *vpp;
1429 if (fop_realvp(vp, &rvp, NULL) != 0)
1430 rvp = vp;
1431 if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1432 nbl_start_crit(vp, RW_READER);
1433 in_crit = 1;
1435 if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1436 vattr.va_mask = AT_MODE|AT_SIZE;
1437 if (error = fop_getattr(vp, &vattr, 0, CRED(), NULL)) {
1438 goto out;
1440 if (MANDLOCK(vp, vattr.va_mode)) {
1441 error = EAGAIN;
1442 goto out;
1445 * File cannot be truncated if non-blocking mandatory
1446 * locks are currently on the file.
1448 if ((vap->va_mask & AT_SIZE) && in_crit) {
1449 uoff_t offset;
1450 ssize_t length;
1452 offset = vap->va_size > vattr.va_size ?
1453 vattr.va_size : vap->va_size;
1454 length = vap->va_size > vattr.va_size ?
1455 vap->va_size - vattr.va_size :
1456 vattr.va_size - vap->va_size;
1457 if (nbl_conflict(vp, NBL_WRITE, offset,
1458 length, 0, NULL)) {
1459 error = EACCES;
1460 goto out;
1466 * If the file is the root of a VFS, we've crossed a
1467 * mount point and the "containing" directory that we
1468 * acquired above (dvp) is irrelevant because it's in
1469 * a different file system. We apply fop_create to the
1470 * target itself instead of to the containing directory
1471 * and supply a null path name to indicate (conventionally)
1472 * the node itself as the "component" of interest.
1474 * The call to fop_create() is necessary to ensure
1475 * that the appropriate permission checks are made,
1476 * i.e. EISDIR, EACCES, etc. We already know that vpp
1477 * exists since we are in the else condition where this
1478 * was checked.
1480 if (vp->v_flag & VROOT) {
1481 ASSERT(why != CRMKDIR);
1482 error = fop_create(vp, "", vap, excl, mode, vpp,
1483 CRED(), flag, NULL, NULL);
1485 * If the create succeeded, it will have created a
1486 * new reference on a new vnode (*vpp) in the child
1487 * file system, so we want to drop our reference on
1488 * the old (vp) upon exit.
1490 goto out;
1494 * Large File API - non-large open (FOFFMAX flag not set)
1495 * of regular file fails if the file size exceeds MAXOFF32_T.
1497 if (why != CRMKDIR &&
1498 !(flag & FOFFMAX) &&
1499 (vp->v_type == VREG)) {
1500 vattr.va_mask = AT_SIZE;
1501 if ((error = fop_getattr(vp, &vattr, 0,
1502 CRED(), NULL))) {
1503 goto out;
1505 if ((vattr.va_size > (uoff_t)MAXOFF32_T)) {
1506 error = EOVERFLOW;
1507 goto out;
1512 if (error == 0) {
1514 * Call mkdir() if specified, otherwise create().
1516 int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */
1518 if (why == CRMKDIR)
1520 * N.B., if vn_createat() ever requests
1521 * case-insensitive behavior then it will need
1522 * to be passed to fop_mkdir(). fop_create()
1523 * will already get it via "flag"
1525 error = fop_mkdir(dvp, pn.pn_path, vap, vpp, CRED(),
1526 NULL, 0, NULL);
1527 else if (!must_be_dir)
1528 error = fop_create(dvp, pn.pn_path, vap,
1529 excl, mode, vpp, CRED(), flag, NULL, NULL);
1530 else
1531 error = ENOTDIR;
1534 out:
1536 if (auditing)
1537 audit_vncreate_finish(*vpp, error);
1538 if (in_crit) {
1539 nbl_end_crit(vp);
1540 in_crit = 0;
1542 if (vp != NULL) {
1543 VN_RELE(vp);
1544 vp = NULL;
1546 pn_free(&pn);
1547 VN_RELE(dvp);
1549 * The following clause was added to handle a problem
1550 * with NFS consistency. It is possible that a lookup
1551 * of the file to be created succeeded, but the file
1552 * itself doesn't actually exist on the server. This
1553 * is chiefly due to the DNLC containing an entry for
1554 * the file which has been removed on the server. In
1555 * this case, we just start over. If there was some
1556 * other cause for the ESTALE error, then the lookup
1557 * of the file will fail and the error will be returned
1558 * above instead of looping around from here.
1560 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1561 goto top;
1562 return (error);
1566 vn_link(char *from, char *to, enum uio_seg seg)
1568 return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1572 vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1573 vnode_t *tstartvp, char *to, enum uio_seg seg)
1575 struct vnode *fvp; /* from vnode ptr */
1576 struct vnode *tdvp; /* to directory vnode ptr */
1577 struct pathname pn;
1578 int error;
1579 struct vattr vattr;
1580 dev_t fsid;
1581 int estale_retry = 0;
1582 uint32_t auditing = AU_AUDITING();
1584 top:
1585 fvp = tdvp = NULL;
1586 if (error = pn_get(to, seg, &pn))
1587 return (error);
1588 if (auditing && fstartvp != NULL)
1589 audit_setfsat_path(1);
1590 if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1591 goto out;
1592 if (auditing && tstartvp != NULL)
1593 audit_setfsat_path(3);
1594 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1595 goto out;
1597 * Make sure both source vnode and target directory vnode are
1598 * in the same vfs and that it is writeable.
1600 vattr.va_mask = AT_FSID;
1601 if (error = fop_getattr(fvp, &vattr, 0, CRED(), NULL))
1602 goto out;
1603 fsid = vattr.va_fsid;
1604 vattr.va_mask = AT_FSID;
1605 if (error = fop_getattr(tdvp, &vattr, 0, CRED(), NULL))
1606 goto out;
1607 if (fsid != vattr.va_fsid) {
1608 error = EXDEV;
1609 goto out;
1611 if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1612 error = EROFS;
1613 goto out;
1616 * Do the link.
1618 (void) pn_fixslash(&pn);
1619 error = fop_link(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1620 out:
1621 pn_free(&pn);
1622 if (fvp)
1623 VN_RELE(fvp);
1624 if (tdvp)
1625 VN_RELE(tdvp);
1626 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1627 goto top;
1628 return (error);
1632 vn_rename(char *from, char *to, enum uio_seg seg)
1634 return (vn_renameat(NULL, from, NULL, to, seg));
1638 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1639 char *tname, enum uio_seg seg)
1641 int error;
1642 struct vattr vattr;
1643 struct pathname fpn; /* from pathname */
1644 struct pathname tpn; /* to pathname */
1645 dev_t fsid;
1646 int in_crit_src, in_crit_targ;
1647 vnode_t *fromvp, *fvp;
1648 vnode_t *tovp, *targvp;
1649 int estale_retry = 0;
1650 uint32_t auditing = AU_AUDITING();
1652 top:
1653 fvp = fromvp = tovp = targvp = NULL;
1654 in_crit_src = in_crit_targ = 0;
1656 * Get to and from pathnames.
1658 if (error = pn_get(fname, seg, &fpn))
1659 return (error);
1660 if (error = pn_get(tname, seg, &tpn)) {
1661 pn_free(&fpn);
1662 return (error);
1666 * First we need to resolve the correct directories
1667 * The passed in directories may only be a starting point,
1668 * but we need the real directories the file(s) live in.
1669 * For example the fname may be something like usr/lib/sparc
1670 * and we were passed in the / directory, but we need to
1671 * use the lib directory for the rename.
1674 if (auditing && fdvp != NULL)
1675 audit_setfsat_path(1);
1677 * Lookup to and from directories.
1679 if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1680 goto out;
1684 * Make sure there is an entry.
1686 if (fvp == NULL) {
1687 error = ENOENT;
1688 goto out;
1691 if (auditing && tdvp != NULL)
1692 audit_setfsat_path(3);
1693 if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1694 goto out;
1698 * Make sure both the from vnode directory and the to directory
1699 * are in the same vfs and the to directory is writable.
1700 * We check fsid's, not vfs pointers, so loopback fs works.
1702 if (fromvp != tovp) {
1703 vattr.va_mask = AT_FSID;
1704 if (error = fop_getattr(fromvp, &vattr, 0, CRED(), NULL))
1705 goto out;
1706 fsid = vattr.va_fsid;
1707 vattr.va_mask = AT_FSID;
1708 if (error = fop_getattr(tovp, &vattr, 0, CRED(), NULL))
1709 goto out;
1710 if (fsid != vattr.va_fsid) {
1711 error = EXDEV;
1712 goto out;
1716 if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1717 error = EROFS;
1718 goto out;
1722 * Make sure "from" vp is not a mount point.
1723 * Note, lookup did traverse() already, so
1724 * we'll be looking at the mounted FS root.
1725 * (but allow files like mnttab)
1727 if ((fvp->v_flag & VROOT) != 0 && fvp->v_type == VDIR) {
1728 error = EBUSY;
1729 goto out;
1732 if (targvp && (fvp != targvp)) {
1733 nbl_start_crit(targvp, RW_READER);
1734 in_crit_targ = 1;
1735 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1736 error = EACCES;
1737 goto out;
1741 if (nbl_need_check(fvp)) {
1742 nbl_start_crit(fvp, RW_READER);
1743 in_crit_src = 1;
1744 if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1745 error = EACCES;
1746 goto out;
1751 * Do the rename.
1753 (void) pn_fixslash(&tpn);
1754 error = fop_rename(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1755 NULL, 0);
1757 out:
1758 pn_free(&fpn);
1759 pn_free(&tpn);
1760 if (in_crit_src)
1761 nbl_end_crit(fvp);
1762 if (in_crit_targ)
1763 nbl_end_crit(targvp);
1764 if (fromvp)
1765 VN_RELE(fromvp);
1766 if (tovp)
1767 VN_RELE(tovp);
1768 if (targvp)
1769 VN_RELE(targvp);
1770 if (fvp)
1771 VN_RELE(fvp);
1772 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1773 goto top;
1774 return (error);
1778 * Remove a file or directory.
1781 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1783 return (vn_removeat(NULL, fnamep, seg, dirflag));
1787 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1789 struct vnode *vp; /* entry vnode */
1790 struct vnode *dvp; /* ptr to parent dir vnode */
1791 struct vnode *coveredvp;
1792 struct pathname pn; /* name of entry */
1793 enum vtype vtype;
1794 int error;
1795 struct vfs *vfsp;
1796 struct vfs *dvfsp; /* ptr to parent dir vfs */
1797 int in_crit = 0;
1798 int estale_retry = 0;
1800 top:
1801 if (error = pn_get(fnamep, seg, &pn))
1802 return (error);
1803 dvp = vp = NULL;
1804 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1805 pn_free(&pn);
1806 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1807 goto top;
1808 return (error);
1812 * Make sure there is an entry.
1814 if (vp == NULL) {
1815 error = ENOENT;
1816 goto out;
1819 vfsp = vp->v_vfsp;
1820 dvfsp = dvp->v_vfsp;
1823 * If the named file is the root of a mounted filesystem, fail,
1824 * unless it's marked unlinkable. In that case, unmount the
1825 * filesystem and proceed to unlink the covered vnode. (If the
1826 * covered vnode is a directory, use rmdir instead of unlink,
1827 * to avoid file system corruption.)
1829 if (vp->v_flag & VROOT) {
1830 if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1831 error = EBUSY;
1832 goto out;
1836 * Namefs specific code starts here.
1839 if (dirflag == RMDIRECTORY) {
1841 * User called rmdir(2) on a file that has
1842 * been namefs mounted on top of. Since
1843 * namefs doesn't allow directories to
1844 * be mounted on other files we know
1845 * vp is not of type VDIR so fail to operation.
1847 error = ENOTDIR;
1848 goto out;
1852 * If VROOT is still set after grabbing vp->v_lock,
1853 * noone has finished nm_unmount so far and coveredvp
1854 * is valid.
1855 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1856 * vp->v_lock, any race window is eliminated.
1859 mutex_enter(&vp->v_lock);
1860 if ((vp->v_flag & VROOT) == 0) {
1861 /* Someone beat us to the unmount */
1862 mutex_exit(&vp->v_lock);
1863 error = EBUSY;
1864 goto out;
1866 vfsp = vp->v_vfsp;
1867 coveredvp = vfsp->vfs_vnodecovered;
1868 ASSERT(coveredvp);
1870 * Note: Implementation of vn_vfswlock shows that ordering of
1871 * v_lock / vn_vfswlock is not an issue here.
1873 error = vn_vfswlock(coveredvp);
1874 mutex_exit(&vp->v_lock);
1876 if (error)
1877 goto out;
1879 VN_HOLD(coveredvp);
1880 VN_RELE(vp);
1881 error = dounmount(vfsp, 0, CRED());
1884 * Unmounted the namefs file system; now get
1885 * the object it was mounted over.
1887 vp = coveredvp;
1889 * If namefs was mounted over a directory, then
1890 * we want to use rmdir() instead of unlink().
1892 if (vp->v_type == VDIR)
1893 dirflag = RMDIRECTORY;
1895 if (error)
1896 goto out;
1900 * Make sure filesystem is writeable.
1901 * We check the parent directory's vfs in case this is an lofs vnode.
1903 if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1904 error = EROFS;
1905 goto out;
1908 vtype = vp->v_type;
1911 * If there is the possibility of an nbmand share reservation, make
1912 * sure it's okay to remove the file. Keep a reference to the
1913 * vnode, so that we can exit the nbl critical region after
1914 * calling fop_remove.
1915 * If there is no possibility of an nbmand share reservation,
1916 * release the vnode reference now. Filesystems like NFS may
1917 * behave differently if there is an extra reference, so get rid of
1918 * this one. Fortunately, we can't have nbmand mounts on NFS
1919 * filesystems.
1921 if (nbl_need_check(vp)) {
1922 nbl_start_crit(vp, RW_READER);
1923 in_crit = 1;
1924 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1925 error = EACCES;
1926 goto out;
1928 } else {
1929 VN_RELE(vp);
1930 vp = NULL;
1933 if (dirflag == RMDIRECTORY) {
1935 * Caller is using rmdir(2), which can only be applied to
1936 * directories.
1938 if (vtype != VDIR) {
1939 error = ENOTDIR;
1940 } else {
1941 vnode_t *cwd;
1942 proc_t *pp = curproc;
1944 mutex_enter(&pp->p_lock);
1945 cwd = PTOU(pp)->u_cdir;
1946 VN_HOLD(cwd);
1947 mutex_exit(&pp->p_lock);
1948 error = fop_rmdir(dvp, pn.pn_path, cwd, CRED(),
1949 NULL, 0);
1950 VN_RELE(cwd);
1952 } else {
1954 * Unlink(2) can be applied to anything.
1956 error = fop_remove(dvp, pn.pn_path, CRED(), NULL, 0);
1959 out:
1960 pn_free(&pn);
1961 if (in_crit) {
1962 nbl_end_crit(vp);
1963 in_crit = 0;
1965 if (vp != NULL)
1966 VN_RELE(vp);
1967 if (dvp != NULL)
1968 VN_RELE(dvp);
1969 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1970 goto top;
1971 return (error);
1975 * Utility function to compare equality of vnodes.
1976 * Compare the underlying real vnodes, if there are underlying vnodes.
1977 * This is a more thorough comparison than the VN_CMP() macro provides.
1980 vn_compare(vnode_t *vp1, vnode_t *vp2)
1982 vnode_t *realvp;
1984 if (vp1 != NULL && fop_realvp(vp1, &realvp, NULL) == 0)
1985 vp1 = realvp;
1986 if (vp2 != NULL && fop_realvp(vp2, &realvp, NULL) == 0)
1987 vp2 = realvp;
1988 return (VN_CMP(vp1, vp2));
1992 * The number of locks to hash into. This value must be a power
1993 * of 2 minus 1 and should probably also be prime.
1995 #define NUM_BUCKETS 1023
1997 struct vn_vfslocks_bucket {
1998 kmutex_t vb_lock;
1999 vn_vfslocks_entry_t *vb_list;
2000 char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
2004 * Total number of buckets will be NUM_BUCKETS + 1 .
2007 #pragma align 64(vn_vfslocks_buckets)
2008 static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1];
2010 #define VN_VFSLOCKS_SHIFT 9
2012 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2013 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2016 * vn_vfslocks_getlock() uses an HASH scheme to generate
2017 * rwstlock using vfs/vnode pointer passed to it.
2019 * vn_vfslocks_rele() releases a reference in the
2020 * HASH table which allows the entry allocated by
2021 * vn_vfslocks_getlock() to be freed at a later
2022 * stage when the refcount drops to zero.
2025 vn_vfslocks_entry_t *
2026 vn_vfslocks_getlock(void *vfsvpptr)
2028 struct vn_vfslocks_bucket *bp;
2029 vn_vfslocks_entry_t *vep;
2030 vn_vfslocks_entry_t *tvep;
2032 ASSERT(vfsvpptr != NULL);
2033 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2035 mutex_enter(&bp->vb_lock);
2036 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2037 if (vep->ve_vpvfs == vfsvpptr) {
2038 vep->ve_refcnt++;
2039 mutex_exit(&bp->vb_lock);
2040 return (vep);
2043 mutex_exit(&bp->vb_lock);
2044 vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2045 rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2046 vep->ve_vpvfs = (char *)vfsvpptr;
2047 vep->ve_refcnt = 1;
2048 mutex_enter(&bp->vb_lock);
2049 for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2050 if (tvep->ve_vpvfs == vfsvpptr) {
2051 tvep->ve_refcnt++;
2052 mutex_exit(&bp->vb_lock);
2055 * There is already an entry in the hash
2056 * destroy what we just allocated.
2058 rwst_destroy(&vep->ve_lock);
2059 kmem_free(vep, sizeof (*vep));
2060 return (tvep);
2063 vep->ve_next = bp->vb_list;
2064 bp->vb_list = vep;
2065 mutex_exit(&bp->vb_lock);
2066 return (vep);
2069 void
2070 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2072 struct vn_vfslocks_bucket *bp;
2073 vn_vfslocks_entry_t *vep;
2074 vn_vfslocks_entry_t *pvep;
2076 ASSERT(vepent != NULL);
2077 ASSERT(vepent->ve_vpvfs != NULL);
2079 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2081 mutex_enter(&bp->vb_lock);
2082 vepent->ve_refcnt--;
2084 if ((int32_t)vepent->ve_refcnt < 0)
2085 cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2087 if (vepent->ve_refcnt == 0) {
2088 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2089 if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2090 if (bp->vb_list == vep)
2091 bp->vb_list = vep->ve_next;
2092 else {
2093 /* LINTED */
2094 pvep->ve_next = vep->ve_next;
2096 mutex_exit(&bp->vb_lock);
2097 rwst_destroy(&vep->ve_lock);
2098 kmem_free(vep, sizeof (*vep));
2099 return;
2101 pvep = vep;
2103 cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2105 mutex_exit(&bp->vb_lock);
2109 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2110 * lock protecting the v_vfsmountedhere field.
2111 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2112 * except that it blocks to acquire the lock VVFSLOCK.
2114 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2115 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2116 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2119 vn_vfswlock_wait(vnode_t *vp)
2121 int retval;
2122 vn_vfslocks_entry_t *vpvfsentry;
2123 ASSERT(vp != NULL);
2125 vpvfsentry = vn_vfslocks_getlock(vp);
2126 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2128 if (retval == EINTR) {
2129 vn_vfslocks_rele(vpvfsentry);
2130 return (EINTR);
2132 return (retval);
2136 vn_vfsrlock_wait(vnode_t *vp)
2138 int retval;
2139 vn_vfslocks_entry_t *vpvfsentry;
2140 ASSERT(vp != NULL);
2142 vpvfsentry = vn_vfslocks_getlock(vp);
2143 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2145 if (retval == EINTR) {
2146 vn_vfslocks_rele(vpvfsentry);
2147 return (EINTR);
2150 return (retval);
2155 * vn_vfswlock is used to implement a lock which is logically a writers lock
2156 * protecting the v_vfsmountedhere field.
2159 vn_vfswlock(vnode_t *vp)
2161 vn_vfslocks_entry_t *vpvfsentry;
2164 * If vp is NULL then somebody is trying to lock the covered vnode
2165 * of /. (vfs_vnodecovered is NULL for /). This situation will
2166 * only happen when unmounting /. Since that operation will fail
2167 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2169 if (vp == NULL)
2170 return (EBUSY);
2172 vpvfsentry = vn_vfslocks_getlock(vp);
2174 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2175 return (0);
2177 vn_vfslocks_rele(vpvfsentry);
2178 return (EBUSY);
2182 vn_vfsrlock(vnode_t *vp)
2184 vn_vfslocks_entry_t *vpvfsentry;
2187 * If vp is NULL then somebody is trying to lock the covered vnode
2188 * of /. (vfs_vnodecovered is NULL for /). This situation will
2189 * only happen when unmounting /. Since that operation will fail
2190 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2192 if (vp == NULL)
2193 return (EBUSY);
2195 vpvfsentry = vn_vfslocks_getlock(vp);
2197 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2198 return (0);
2200 vn_vfslocks_rele(vpvfsentry);
2201 return (EBUSY);
2204 void
2205 vn_vfsunlock(vnode_t *vp)
2207 vn_vfslocks_entry_t *vpvfsentry;
2210 * ve_refcnt needs to be decremented twice.
2211 * 1. To release refernce after a call to vn_vfslocks_getlock()
2212 * 2. To release the reference from the locking routines like
2213 * vn_vfsrlock/vn_vfswlock etc,.
2215 vpvfsentry = vn_vfslocks_getlock(vp);
2216 vn_vfslocks_rele(vpvfsentry);
2218 rwst_exit(&vpvfsentry->ve_lock);
2219 vn_vfslocks_rele(vpvfsentry);
2223 vn_vfswlock_held(vnode_t *vp)
2225 int held;
2226 vn_vfslocks_entry_t *vpvfsentry;
2228 ASSERT(vp != NULL);
2230 vpvfsentry = vn_vfslocks_getlock(vp);
2231 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2233 vn_vfslocks_rele(vpvfsentry);
2234 return (held);
2239 vn_make_ops(
2240 const char *name, /* Name of file system */
2241 const fs_operation_def_t *templ, /* Operation specification */
2242 vnodeops_t **actual) /* Return the vnodeops */
2244 int unused_ops;
2245 int error;
2247 *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2249 (*actual)->vnop_name = name;
2251 error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2252 if (error) {
2253 kmem_free(*actual, sizeof (vnodeops_t));
2256 #if DEBUG
2257 if (unused_ops != 0)
2258 cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2259 "but not used", name, unused_ops);
2260 #endif
2262 return (error);
2266 * Free the vnodeops created as a result of vn_make_ops()
2268 void
2269 vn_freevnodeops(vnodeops_t *vnops)
2271 kmem_free(vnops, sizeof (vnodeops_t));
2275 * Vnode cache.
2278 /* ARGSUSED */
2279 static int
2280 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2282 struct vnode *vp;
2284 vp = buf;
2286 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2287 mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2288 cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2289 rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2290 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2291 vp->v_path = NULL;
2292 vp->v_mpssdata = NULL;
2293 vp->v_vsd = NULL;
2294 vp->v_fopdata = NULL;
2296 vmobject_init(&vp->v_object, vp);
2298 return (0);
2301 /* ARGSUSED */
2302 static void
2303 vn_cache_destructor(void *buf, void *cdrarg)
2305 struct vnode *vp;
2307 vp = buf;
2309 vmobject_fini(&vp->v_object);
2311 rw_destroy(&vp->v_nbllock);
2312 cv_destroy(&vp->v_cv);
2313 mutex_destroy(&vp->v_vsd_lock);
2314 mutex_destroy(&vp->v_lock);
2317 void
2318 vn_create_cache(void)
2320 /* LINTED */
2321 ASSERT((1 << VNODE_ALIGN_LOG2) ==
2322 P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2323 vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2324 VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2325 NULL, 0);
2328 void
2329 vn_destroy_cache(void)
2331 kmem_cache_destroy(vn_cache);
2335 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2336 * cached by the file system and vnodes remain associated.
2338 void
2339 vn_recycle(vnode_t *vp)
2341 ASSERT(!vn_has_cached_data(vp));
2344 * XXX - This really belongs in vn_reinit(), but we have some issues
2345 * with the counts. Best to have it here for clean initialization.
2347 vp->v_rdcnt = 0;
2348 vp->v_wrcnt = 0;
2349 vp->v_mmap_read = 0;
2350 vp->v_mmap_write = 0;
2353 * If FEM was in use, make sure everything gets cleaned up
2354 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2355 * constructor.
2357 if (vp->v_femhead) {
2358 /* XXX - There should be a free_femhead() that does all this */
2359 ASSERT(vp->v_femhead->femh_list == NULL);
2360 mutex_destroy(&vp->v_femhead->femh_lock);
2361 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2362 vp->v_femhead = NULL;
2364 if (vp->v_path) {
2365 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2366 vp->v_path = NULL;
2369 if (vp->v_fopdata != NULL) {
2370 free_fopdata(vp);
2372 vp->v_mpssdata = NULL;
2373 vsd_free(vp);
2377 * Used to reset the vnode fields including those that are directly accessible
2378 * as well as those which require an accessor function.
2380 * Does not initialize:
2381 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2382 * v_data (since FS-nodes and vnodes point to each other and should
2383 * be updated simultaneously)
2384 * v_op (in case someone needs to make a VOP call on this object)
2386 void
2387 vn_reinit(vnode_t *vp)
2389 vp->v_count = 1;
2390 vp->v_count_dnlc = 0;
2391 vp->v_vfsp = NULL;
2392 vp->v_stream = NULL;
2393 vp->v_vfsmountedhere = NULL;
2394 vp->v_flag = 0;
2395 vp->v_type = VNON;
2396 vp->v_rdev = NODEV;
2398 vp->v_filocks = NULL;
2399 vp->v_shrlocks = NULL;
2400 VERIFY(!vn_has_cached_data(vp));
2402 vp->v_locality = NULL;
2403 vp->v_xattrdir = NULL;
2405 /* Handles v_femhead, v_path, and the r/w/map counts */
2406 vn_recycle(vp);
2409 vnode_t *
2410 vn_alloc(int kmflag)
2412 vnode_t *vp;
2414 vp = kmem_cache_alloc(vn_cache, kmflag);
2416 if (vp != NULL) {
2417 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2418 vp->v_fopdata = NULL;
2419 vn_reinit(vp);
2422 return (vp);
2425 void
2426 vn_free(vnode_t *vp)
2428 ASSERT(vp->v_shrlocks == NULL);
2429 ASSERT(vp->v_filocks == NULL);
2432 * Some file systems call vn_free() with v_count of zero,
2433 * some with v_count of 1. In any case, the value should
2434 * never be anything else.
2436 ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2437 ASSERT(vp->v_count_dnlc == 0);
2438 if (vp->v_path != NULL) {
2439 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2440 vp->v_path = NULL;
2443 /* If FEM was in use, make sure everything gets cleaned up */
2444 if (vp->v_femhead) {
2445 /* XXX - There should be a free_femhead() that does all this */
2446 ASSERT(vp->v_femhead->femh_list == NULL);
2447 mutex_destroy(&vp->v_femhead->femh_lock);
2448 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2449 vp->v_femhead = NULL;
2452 if (vp->v_fopdata != NULL) {
2453 free_fopdata(vp);
2455 vp->v_mpssdata = NULL;
2456 vsd_free(vp);
2457 kmem_cache_free(vn_cache, vp);
2461 * vnode status changes, should define better states than 1, 0.
2463 void
2464 vn_reclaim(vnode_t *vp)
2466 vfs_t *vfsp = vp->v_vfsp;
2468 if (vfsp == NULL ||
2469 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2470 return;
2472 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2475 void
2476 vn_idle(vnode_t *vp)
2478 vfs_t *vfsp = vp->v_vfsp;
2480 if (vfsp == NULL ||
2481 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2482 return;
2484 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2486 void
2487 vn_exists(vnode_t *vp)
2489 vfs_t *vfsp = vp->v_vfsp;
2491 if (vfsp == NULL ||
2492 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2493 return;
2495 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2498 void
2499 vn_invalid(vnode_t *vp)
2501 vfs_t *vfsp = vp->v_vfsp;
2503 if (vfsp == NULL ||
2504 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2505 return;
2507 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2510 /* Vnode event notification */
2513 vnevent_support(vnode_t *vp, caller_context_t *ct)
2515 if (vp == NULL)
2516 return (EINVAL);
2518 return (fop_vnevent(vp, VE_SUPPORT, NULL, NULL, ct));
2521 void
2522 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2524 if (vp == NULL || vp->v_femhead == NULL) {
2525 return;
2527 (void) fop_vnevent(vp, VE_RENAME_SRC, dvp, name, ct);
2530 void
2531 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2532 caller_context_t *ct)
2534 if (vp == NULL || vp->v_femhead == NULL) {
2535 return;
2537 (void) fop_vnevent(vp, VE_RENAME_DEST, dvp, name, ct);
2540 void
2541 vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2543 if (vp == NULL || vp->v_femhead == NULL) {
2544 return;
2546 (void) fop_vnevent(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2549 void
2550 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2552 if (vp == NULL || vp->v_femhead == NULL) {
2553 return;
2555 (void) fop_vnevent(vp, VE_REMOVE, dvp, name, ct);
2558 void
2559 vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2561 if (vp == NULL || vp->v_femhead == NULL) {
2562 return;
2564 (void) fop_vnevent(vp, VE_RMDIR, dvp, name, ct);
2567 void
2568 vnevent_pre_rename_src(vnode_t *vp, vnode_t *dvp, char *name,
2569 caller_context_t *ct)
2571 if (vp == NULL || vp->v_femhead == NULL) {
2572 return;
2574 (void) fop_vnevent(vp, VE_PRE_RENAME_SRC, dvp, name, ct);
2577 void
2578 vnevent_pre_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2579 caller_context_t *ct)
2581 if (vp == NULL || vp->v_femhead == NULL) {
2582 return;
2584 (void) fop_vnevent(vp, VE_PRE_RENAME_DEST, dvp, name, ct);
2587 void
2588 vnevent_pre_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
2589 caller_context_t *ct)
2591 if (vp == NULL || vp->v_femhead == NULL) {
2592 return;
2594 (void) fop_vnevent(vp, VE_PRE_RENAME_DEST_DIR, nvp, name, ct);
2597 void
2598 vnevent_create(vnode_t *vp, caller_context_t *ct)
2600 if (vp == NULL || vp->v_femhead == NULL) {
2601 return;
2603 (void) fop_vnevent(vp, VE_CREATE, NULL, NULL, ct);
2606 void
2607 vnevent_link(vnode_t *vp, caller_context_t *ct)
2609 if (vp == NULL || vp->v_femhead == NULL) {
2610 return;
2612 (void) fop_vnevent(vp, VE_LINK, NULL, NULL, ct);
2615 void
2616 vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2618 if (vp == NULL || vp->v_femhead == NULL) {
2619 return;
2621 (void) fop_vnevent(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2624 void
2625 vnevent_truncate(vnode_t *vp, caller_context_t *ct)
2627 if (vp == NULL || vp->v_femhead == NULL) {
2628 return;
2630 (void) fop_vnevent(vp, VE_TRUNCATE, NULL, NULL, ct);
2634 * Vnode accessors.
2638 vn_is_readonly(vnode_t *vp)
2640 return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2644 vn_has_flocks(vnode_t *vp)
2646 return (vp->v_filocks != NULL);
2650 vn_has_mandatory_locks(vnode_t *vp, int mode)
2652 return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2656 vn_has_cached_data(vnode_t *vp)
2658 return (!list_is_empty(&vp->v_object.list));
2662 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2663 * zone_enter(2).
2666 vn_can_change_zones(vnode_t *vp)
2668 struct vfssw *vswp;
2669 int allow = 1;
2670 vnode_t *rvp;
2672 if (nfs_global_client_only != 0)
2673 return (1);
2676 * We always want to look at the underlying vnode if there is one.
2678 if (fop_realvp(vp, &rvp, NULL) != 0)
2679 rvp = vp;
2681 * Some pseudo filesystems (including doorfs) don't actually register
2682 * their vfsops_t, so the following may return NULL; we happily let
2683 * such vnodes switch zones.
2685 vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2686 if (vswp != NULL) {
2687 if (vswp->vsw_flag & VSW_NOTZONESAFE)
2688 allow = 0;
2689 vfs_unrefvfssw(vswp);
2691 return (allow);
2695 * Return nonzero if the vnode is a mount point, zero if not.
2698 vn_ismntpt(vnode_t *vp)
2700 return (vp->v_vfsmountedhere != NULL);
2703 /* Retrieve the vfs (if any) mounted on this vnode */
2704 vfs_t *
2705 vn_mountedvfs(vnode_t *vp)
2707 return (vp->v_vfsmountedhere);
2711 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2714 vn_in_dnlc(vnode_t *vp)
2716 return (vp->v_count_dnlc > 0);
2720 * vn_has_other_opens() checks whether a particular file is opened by more than
2721 * just the caller and whether the open is for read and/or write.
2722 * This routine is for calling after the caller has already called fop_open()
2723 * and the caller wishes to know if they are the only one with it open for
2724 * the mode(s) specified.
2726 * Vnode counts are only kept on regular files (v_type=VREG).
2729 vn_has_other_opens(
2730 vnode_t *vp,
2731 v_mode_t mode)
2734 ASSERT(vp != NULL);
2736 switch (mode) {
2737 case V_WRITE:
2738 if (vp->v_wrcnt > 1)
2739 return (V_TRUE);
2740 break;
2741 case V_RDORWR:
2742 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2743 return (V_TRUE);
2744 break;
2745 case V_RDANDWR:
2746 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2747 return (V_TRUE);
2748 break;
2749 case V_READ:
2750 if (vp->v_rdcnt > 1)
2751 return (V_TRUE);
2752 break;
2755 return (V_FALSE);
2759 * vn_is_opened() checks whether a particular file is opened and
2760 * whether the open is for read and/or write.
2762 * Vnode counts are only kept on regular files (v_type=VREG).
2765 vn_is_opened(
2766 vnode_t *vp,
2767 v_mode_t mode)
2770 ASSERT(vp != NULL);
2772 switch (mode) {
2773 case V_WRITE:
2774 if (vp->v_wrcnt)
2775 return (V_TRUE);
2776 break;
2777 case V_RDANDWR:
2778 if (vp->v_rdcnt && vp->v_wrcnt)
2779 return (V_TRUE);
2780 break;
2781 case V_RDORWR:
2782 if (vp->v_rdcnt || vp->v_wrcnt)
2783 return (V_TRUE);
2784 break;
2785 case V_READ:
2786 if (vp->v_rdcnt)
2787 return (V_TRUE);
2788 break;
2791 return (V_FALSE);
2795 * vn_is_mapped() checks whether a particular file is mapped and whether
2796 * the file is mapped read and/or write.
2799 vn_is_mapped(
2800 vnode_t *vp,
2801 v_mode_t mode)
2804 ASSERT(vp != NULL);
2806 #if !defined(_LP64)
2807 switch (mode) {
2809 * The atomic_add_64_nv functions force atomicity in the
2810 * case of 32 bit architectures. Otherwise the 64 bit values
2811 * require two fetches. The value of the fields may be
2812 * (potentially) changed between the first fetch and the
2813 * second
2815 case V_WRITE:
2816 if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2817 return (V_TRUE);
2818 break;
2819 case V_RDANDWR:
2820 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2821 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2822 return (V_TRUE);
2823 break;
2824 case V_RDORWR:
2825 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2826 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2827 return (V_TRUE);
2828 break;
2829 case V_READ:
2830 if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2831 return (V_TRUE);
2832 break;
2834 #else
2835 switch (mode) {
2836 case V_WRITE:
2837 if (vp->v_mmap_write)
2838 return (V_TRUE);
2839 break;
2840 case V_RDANDWR:
2841 if (vp->v_mmap_read && vp->v_mmap_write)
2842 return (V_TRUE);
2843 break;
2844 case V_RDORWR:
2845 if (vp->v_mmap_read || vp->v_mmap_write)
2846 return (V_TRUE);
2847 break;
2848 case V_READ:
2849 if (vp->v_mmap_read)
2850 return (V_TRUE);
2851 break;
2853 #endif
2855 return (V_FALSE);
2859 * Set the operations vector for a vnode.
2861 * FEM ensures that the v_femhead pointer is filled in before the
2862 * v_op pointer is changed. This means that if the v_femhead pointer
2863 * is NULL, and the v_op field hasn't changed since before which checked
2864 * the v_femhead pointer; then our update is ok - we are not racing with
2865 * FEM.
2867 void
2868 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2870 vnodeops_t *op;
2872 ASSERT(vp != NULL);
2873 ASSERT(vnodeops != NULL);
2875 op = vp->v_op;
2876 membar_consumer();
2878 * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2879 * the compare-and-swap on vp->v_op. If either fails, then FEM is
2880 * in effect on the vnode and we need to have FEM deal with it.
2882 if (vp->v_femhead != NULL || atomic_cas_ptr(&vp->v_op, op, vnodeops) !=
2883 op) {
2884 fem_setvnops(vp, vnodeops);
2889 * Retrieve the operations vector for a vnode
2890 * As with vn_setops(above); make sure we aren't racing with FEM.
2891 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2892 * make sense to the callers of this routine.
2894 vnodeops_t *
2895 vn_getops(vnode_t *vp)
2897 vnodeops_t *op;
2899 ASSERT(vp != NULL);
2901 op = vp->v_op;
2902 membar_consumer();
2903 if (vp->v_femhead == NULL && op == vp->v_op) {
2904 return (op);
2905 } else {
2906 return (fem_getvnops(vp));
2911 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2912 * Returns zero (0) if not.
2915 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2917 return (vn_getops(vp) == vnodeops);
2921 * Returns non-zero (1) if the specified operation matches the
2922 * corresponding operation for that the vnode.
2923 * Returns zero (0) if not.
2926 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2929 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2931 const fs_operation_trans_def_t *otdp;
2932 fs_generic_func_p *loc = NULL;
2933 vnodeops_t *vop = vn_getops(vp);
2935 ASSERT(vopname != NULL);
2937 for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2938 if (MATCHNAME(otdp->name, vopname)) {
2939 loc = (fs_generic_func_p *)
2940 ((char *)(vop) + otdp->offset);
2941 break;
2945 return ((loc != NULL) && (*loc == funcp));
2949 * fs_new_caller_id() needs to return a unique ID on a given local system.
2950 * The IDs do not need to survive across reboots. These are primarily
2951 * used so that (FEM) monitors can detect particular callers (such as
2952 * the NFS server) to a given vnode/vfs operation.
2954 u_longlong_t
2955 fs_new_caller_id()
2957 static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2959 return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
2963 * Given a starting vnode and a path, updates the path in the target vnode in
2964 * a safe manner. If the vnode already has path information embedded, then the
2965 * cached path is left untouched.
2968 size_t max_vnode_path = 4 * MAXPATHLEN;
2970 void
2971 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2972 const char *path, size_t plen)
2974 char *rpath;
2975 vnode_t *base;
2976 size_t rpathlen, rpathalloc;
2977 int doslash = 1;
2979 if (*path == '/') {
2980 base = rootvp;
2981 path++;
2982 plen--;
2983 } else {
2984 base = startvp;
2988 * We cannot grab base->v_lock while we hold vp->v_lock because of
2989 * the potential for deadlock.
2991 mutex_enter(&base->v_lock);
2992 if (base->v_path == NULL) {
2993 mutex_exit(&base->v_lock);
2994 return;
2997 rpathlen = strlen(base->v_path);
2998 rpathalloc = rpathlen + plen + 1;
2999 /* Avoid adding a slash if there's already one there */
3000 if (base->v_path[rpathlen-1] == '/')
3001 doslash = 0;
3002 else
3003 rpathalloc++;
3006 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
3007 * so we must do this dance. If, by chance, something changes the path,
3008 * just give up since there is no real harm.
3010 mutex_exit(&base->v_lock);
3012 /* Paths should stay within reason */
3013 if (rpathalloc > max_vnode_path)
3014 return;
3016 rpath = kmem_alloc(rpathalloc, KM_SLEEP);
3018 mutex_enter(&base->v_lock);
3019 if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
3020 mutex_exit(&base->v_lock);
3021 kmem_free(rpath, rpathalloc);
3022 return;
3024 bcopy(base->v_path, rpath, rpathlen);
3025 mutex_exit(&base->v_lock);
3027 if (doslash)
3028 rpath[rpathlen++] = '/';
3029 bcopy(path, rpath + rpathlen, plen);
3030 rpath[rpathlen + plen] = '\0';
3032 mutex_enter(&vp->v_lock);
3033 if (vp->v_path != NULL) {
3034 mutex_exit(&vp->v_lock);
3035 kmem_free(rpath, rpathalloc);
3036 } else {
3037 vp->v_path = rpath;
3038 mutex_exit(&vp->v_lock);
3043 * Sets the path to the vnode to be the given string, regardless of current
3044 * context. The string must be a complete path from rootdir. This is only used
3045 * by fsop_root() for setting the path based on the mountpoint.
3047 void
3048 vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3050 char *buf = kmem_alloc(len + 1, KM_SLEEP);
3052 mutex_enter(&vp->v_lock);
3053 if (vp->v_path != NULL) {
3054 mutex_exit(&vp->v_lock);
3055 kmem_free(buf, len + 1);
3056 return;
3059 vp->v_path = buf;
3060 bcopy(str, vp->v_path, len);
3061 vp->v_path[len] = '\0';
3063 mutex_exit(&vp->v_lock);
3067 * Called from within filesystem's vop_rename() to handle renames once the
3068 * target vnode is available.
3070 void
3071 vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3073 char *tmp;
3075 mutex_enter(&vp->v_lock);
3076 tmp = vp->v_path;
3077 vp->v_path = NULL;
3078 mutex_exit(&vp->v_lock);
3079 vn_setpath(rootdir, dvp, vp, nm, len);
3080 if (tmp != NULL)
3081 kmem_free(tmp, strlen(tmp) + 1);
3085 * Similar to vn_setpath_str(), this function sets the path of the destination
3086 * vnode to the be the same as the source vnode.
3088 void
3089 vn_copypath(struct vnode *src, struct vnode *dst)
3091 char *buf;
3092 int alloc;
3094 mutex_enter(&src->v_lock);
3095 if (src->v_path == NULL) {
3096 mutex_exit(&src->v_lock);
3097 return;
3099 alloc = strlen(src->v_path) + 1;
3101 /* avoid kmem_alloc() with lock held */
3102 mutex_exit(&src->v_lock);
3103 buf = kmem_alloc(alloc, KM_SLEEP);
3104 mutex_enter(&src->v_lock);
3105 if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3106 mutex_exit(&src->v_lock);
3107 kmem_free(buf, alloc);
3108 return;
3110 bcopy(src->v_path, buf, alloc);
3111 mutex_exit(&src->v_lock);
3113 mutex_enter(&dst->v_lock);
3114 if (dst->v_path != NULL) {
3115 mutex_exit(&dst->v_lock);
3116 kmem_free(buf, alloc);
3117 return;
3119 dst->v_path = buf;
3120 mutex_exit(&dst->v_lock);
3124 * XXX Private interface for segvn routines that handle vnode
3125 * large page segments.
3127 * return 1 if vp's file system fop_pageio() implementation
3128 * can be safely used instead of fop_getpage() for handling
3129 * pagefaults against regular non swap files. fop_pageio()
3130 * interface is considered safe here if its implementation
3131 * is very close to fop_getpage() implementation.
3132 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3133 * panic if there're file holes but instead returns an error.
3134 * Doesn't assume file won't be changed by user writes, etc.
3136 * return 0 otherwise.
3138 * For now allow segvn to only use fop_pageio() with ufs and nfs.
3141 vn_vmpss_usepageio(vnode_t *vp)
3143 vfs_t *vfsp = vp->v_vfsp;
3144 char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3145 char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3146 char **fsok = pageio_ok_fss;
3148 if (fsname == NULL) {
3149 return (0);
3152 for (; *fsok; fsok++) {
3153 if (strcmp(*fsok, fsname) == 0) {
3154 return (1);
3157 return (0);
3160 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3163 fop_open(
3164 vnode_t **vpp,
3165 int mode,
3166 cred_t *cr,
3167 caller_context_t *ct)
3169 int ret;
3170 vnode_t *vp = *vpp;
3172 VN_HOLD(vp);
3174 * Adding to the vnode counts before calling open
3175 * avoids the need for a mutex. It circumvents a race
3176 * condition where a query made on the vnode counts results in a
3177 * false negative. The inquirer goes away believing the file is
3178 * not open when there is an open on the file already under way.
3180 * The counts are meant to prevent NFS from granting a delegation
3181 * when it would be dangerous to do so.
3183 * The vnode counts are only kept on regular files
3185 if ((*vpp)->v_type == VREG) {
3186 if (mode & FREAD)
3187 atomic_inc_32(&(*vpp)->v_rdcnt);
3188 if (mode & FWRITE)
3189 atomic_inc_32(&(*vpp)->v_wrcnt);
3192 VOPXID_MAP_CR(vp, cr);
3194 ret = fop_open_dispatch(vpp, mode, cr, ct);
3196 if (ret) {
3198 * Use the saved vp just in case the vnode ptr got trashed
3199 * by the error.
3201 VOPSTATS_UPDATE(vp, open);
3202 if ((vp->v_type == VREG) && (mode & FREAD))
3203 atomic_dec_32(&vp->v_rdcnt);
3204 if ((vp->v_type == VREG) && (mode & FWRITE))
3205 atomic_dec_32(&vp->v_wrcnt);
3206 } else {
3208 * Some filesystems will return a different vnode,
3209 * but the same path was still used to open it.
3210 * So if we do change the vnode and need to
3211 * copy over the path, do so here, rather than special
3212 * casing each filesystem. Adjust the vnode counts to
3213 * reflect the vnode switch.
3215 VOPSTATS_UPDATE(*vpp, open);
3216 if (*vpp != vp && *vpp != NULL) {
3217 vn_copypath(vp, *vpp);
3218 if (((*vpp)->v_type == VREG) && (mode & FREAD))
3219 atomic_inc_32(&(*vpp)->v_rdcnt);
3220 if ((vp->v_type == VREG) && (mode & FREAD))
3221 atomic_dec_32(&vp->v_rdcnt);
3222 if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3223 atomic_inc_32(&(*vpp)->v_wrcnt);
3224 if ((vp->v_type == VREG) && (mode & FWRITE))
3225 atomic_dec_32(&vp->v_wrcnt);
3228 VN_RELE(vp);
3229 return (ret);
3233 fop_close(
3234 vnode_t *vp,
3235 int flag,
3236 int count,
3237 offset_t offset,
3238 cred_t *cr,
3239 caller_context_t *ct)
3241 int err;
3243 VOPXID_MAP_CR(vp, cr);
3245 err = fop_close_dispatch(vp, flag, count, offset, cr, ct);
3247 VOPSTATS_UPDATE(vp, close);
3249 * Check passed in count to handle possible dups. Vnode counts are only
3250 * kept on regular files
3252 if ((vp->v_type == VREG) && (count == 1)) {
3253 if (flag & FREAD) {
3254 ASSERT(vp->v_rdcnt > 0);
3255 atomic_dec_32(&vp->v_rdcnt);
3257 if (flag & FWRITE) {
3258 ASSERT(vp->v_wrcnt > 0);
3259 atomic_dec_32(&vp->v_wrcnt);
3262 return (err);
3266 fop_read(
3267 vnode_t *vp,
3268 uio_t *uiop,
3269 int ioflag,
3270 cred_t *cr,
3271 caller_context_t *ct)
3273 int err;
3274 ssize_t resid_start = uiop->uio_resid;
3276 VOPXID_MAP_CR(vp, cr);
3278 err = fop_read_dispatch(vp, uiop, ioflag, cr, ct);
3280 VOPSTATS_UPDATE_IO(vp, read,
3281 read_bytes, (resid_start - uiop->uio_resid));
3282 return (err);
3286 fop_write(
3287 vnode_t *vp,
3288 uio_t *uiop,
3289 int ioflag,
3290 cred_t *cr,
3291 caller_context_t *ct)
3293 int err;
3294 ssize_t resid_start = uiop->uio_resid;
3296 VOPXID_MAP_CR(vp, cr);
3298 err = fop_write_dispatch(vp, uiop, ioflag, cr, ct);
3300 VOPSTATS_UPDATE_IO(vp, write,
3301 write_bytes, (resid_start - uiop->uio_resid));
3302 return (err);
3306 fop_ioctl(
3307 vnode_t *vp,
3308 int cmd,
3309 intptr_t arg,
3310 int flag,
3311 cred_t *cr,
3312 int *rvalp,
3313 caller_context_t *ct)
3315 int err;
3317 VOPXID_MAP_CR(vp, cr);
3319 err = fop_ioctl_dispatch(vp, cmd, arg, flag, cr, rvalp, ct);
3321 VOPSTATS_UPDATE(vp, ioctl);
3322 return (err);
3326 fop_setfl(
3327 vnode_t *vp,
3328 int oflags,
3329 int nflags,
3330 cred_t *cr,
3331 caller_context_t *ct)
3333 int err;
3335 VOPXID_MAP_CR(vp, cr);
3337 if (vp->v_op->vop_setfl == NULL)
3338 err = fs_setfl(vp, oflags, nflags, cr, ct);
3339 else
3340 err = vp->v_op->vop_setfl(vp, oflags, nflags, cr, ct);
3342 VOPSTATS_UPDATE(vp, setfl);
3343 return (err);
3347 fop_getattr(
3348 vnode_t *vp,
3349 vattr_t *vap,
3350 int flags,
3351 cred_t *cr,
3352 caller_context_t *ct)
3354 int err;
3356 VOPXID_MAP_CR(vp, cr);
3359 * If this file system doesn't understand the xvattr extensions
3360 * then turn off the xvattr bit.
3362 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3363 vap->va_mask &= ~AT_XVATTR;
3367 * We're only allowed to skip the ACL check iff we used a 32 bit
3368 * ACE mask with fop_access() to determine permissions.
3370 if ((flags & ATTR_NOACLCHECK) &&
3371 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0)
3372 return (EINVAL);
3374 err = fop_getattr_dispatch(vp, vap, flags, cr, ct);
3376 VOPSTATS_UPDATE(vp, getattr);
3377 return (err);
3381 fop_setattr(
3382 vnode_t *vp,
3383 vattr_t *vap,
3384 int flags,
3385 cred_t *cr,
3386 caller_context_t *ct)
3388 int err;
3390 VOPXID_MAP_CR(vp, cr);
3393 * If this file system doesn't understand the xvattr extensions
3394 * then turn off the xvattr bit.
3396 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3397 vap->va_mask &= ~AT_XVATTR;
3401 * We're only allowed to skip the ACL check iff we used a 32 bit
3402 * ACE mask with fop_access() to determine permissions.
3404 if ((flags & ATTR_NOACLCHECK) &&
3405 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0)
3406 return (EINVAL);
3408 err = fop_setattr_dispatch(vp, vap, flags, cr, ct);
3410 VOPSTATS_UPDATE(vp, setattr);
3411 return (err);
3415 fop_access(
3416 vnode_t *vp,
3417 int mode,
3418 int flags,
3419 cred_t *cr,
3420 caller_context_t *ct)
3422 int err;
3424 if ((flags & V_ACE_MASK) &&
3425 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3426 return (EINVAL);
3429 VOPXID_MAP_CR(vp, cr);
3431 err = fop_access_dispatch(vp, mode, flags, cr, ct);
3433 VOPSTATS_UPDATE(vp, access);
3434 return (err);
3438 fop_lookup(
3439 vnode_t *dvp,
3440 char *nm,
3441 vnode_t **vpp,
3442 pathname_t *pnp,
3443 int flags,
3444 vnode_t *rdir,
3445 cred_t *cr,
3446 caller_context_t *ct,
3447 int *deflags, /* Returned per-dirent flags */
3448 pathname_t *ppnp) /* Returned case-preserved name in directory */
3450 int ret;
3453 * If this file system doesn't support case-insensitive access
3454 * and said access is requested, fail quickly. It is required
3455 * that if the vfs supports case-insensitive lookup, it also
3456 * supports extended dirent flags.
3458 if (flags & FIGNORECASE &&
3459 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3460 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3461 return (EINVAL);
3463 VOPXID_MAP_CR(dvp, cr);
3465 if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3466 ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3467 } else if (dvp->v_op->vop_lookup == NULL) {
3468 ret = ENOSYS;
3469 } else {
3470 ret = dvp->v_op->vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
3471 cr, ct, deflags, ppnp);
3474 if (ret == 0 && *vpp) {
3475 VOPSTATS_UPDATE(*vpp, lookup);
3476 if ((*vpp)->v_path == NULL) {
3477 vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3481 return (ret);
3485 fop_create(
3486 vnode_t *dvp,
3487 char *name,
3488 vattr_t *vap,
3489 vcexcl_t excl,
3490 int mode,
3491 vnode_t **vpp,
3492 cred_t *cr,
3493 int flags,
3494 caller_context_t *ct,
3495 vsecattr_t *vsecp) /* ACL to set during create */
3497 int ret;
3499 if (vsecp != NULL &&
3500 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3501 return (EINVAL);
3504 * If this file system doesn't support case-insensitive access
3505 * and said access is requested, fail quickly.
3507 if (flags & FIGNORECASE &&
3508 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3509 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3510 return (EINVAL);
3512 VOPXID_MAP_CR(dvp, cr);
3514 if (dvp->v_op->vop_create == NULL)
3515 ret = ENOSYS;
3516 else
3517 ret = dvp->v_op->vop_create(dvp, name, vap, excl, mode, vpp,
3518 cr, flags, ct, vsecp);
3520 if (ret == 0 && *vpp) {
3521 VOPSTATS_UPDATE(*vpp, create);
3522 if ((*vpp)->v_path == NULL) {
3523 vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3527 return (ret);
3531 fop_remove(
3532 vnode_t *dvp,
3533 char *nm,
3534 cred_t *cr,
3535 caller_context_t *ct,
3536 int flags)
3538 int err;
3541 * If this file system doesn't support case-insensitive access
3542 * and said access is requested, fail quickly.
3544 if (flags & FIGNORECASE &&
3545 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3546 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3547 return (EINVAL);
3549 VOPXID_MAP_CR(dvp, cr);
3551 if (dvp->v_op->vop_remove == NULL)
3552 err = ENOSYS;
3553 else
3554 err = dvp->v_op->vop_remove(dvp, nm, cr, ct, flags);
3556 VOPSTATS_UPDATE(dvp, remove);
3557 return (err);
3561 fop_link(
3562 vnode_t *tdvp,
3563 vnode_t *svp,
3564 char *tnm,
3565 cred_t *cr,
3566 caller_context_t *ct,
3567 int flags)
3569 int err;
3572 * If the target file system doesn't support case-insensitive access
3573 * and said access is requested, fail quickly.
3575 if (flags & FIGNORECASE &&
3576 (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3577 vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3578 return (EINVAL);
3580 VOPXID_MAP_CR(tdvp, cr);
3582 if (tdvp->v_op->vop_link == NULL)
3583 err = ENOSYS;
3584 else
3585 err = tdvp->v_op->vop_link(tdvp, svp, tnm, cr, ct, flags);
3587 VOPSTATS_UPDATE(tdvp, link);
3588 return (err);
3592 fop_rename(
3593 vnode_t *sdvp,
3594 char *snm,
3595 vnode_t *tdvp,
3596 char *tnm,
3597 cred_t *cr,
3598 caller_context_t *ct,
3599 int flags)
3601 int err;
3604 * If the file system involved does not support
3605 * case-insensitive access and said access is requested, fail
3606 * quickly.
3608 if (flags & FIGNORECASE &&
3609 ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3610 vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3611 return (EINVAL);
3613 VOPXID_MAP_CR(tdvp, cr);
3615 if (sdvp->v_op->vop_rename == NULL)
3616 err = ENOSYS;
3617 else
3618 err = sdvp->v_op->vop_rename(sdvp, snm, tdvp, tnm, cr, ct,
3619 flags);
3621 VOPSTATS_UPDATE(sdvp, rename);
3622 return (err);
3626 fop_mkdir(
3627 vnode_t *dvp,
3628 char *dirname,
3629 vattr_t *vap,
3630 vnode_t **vpp,
3631 cred_t *cr,
3632 caller_context_t *ct,
3633 int flags,
3634 vsecattr_t *vsecp) /* ACL to set during create */
3636 int ret;
3638 if (vsecp != NULL &&
3639 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3640 return (EINVAL);
3643 * If this file system doesn't support case-insensitive access
3644 * and said access is requested, fail quickly.
3646 if (flags & FIGNORECASE &&
3647 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3648 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3649 return (EINVAL);
3651 VOPXID_MAP_CR(dvp, cr);
3653 if (dvp->v_op->vop_mkdir == NULL)
3654 ret = ENOSYS;
3655 else
3656 ret = dvp->v_op->vop_mkdir(dvp, dirname, vap, vpp, cr, ct,
3657 flags, vsecp);
3659 if (ret == 0 && *vpp) {
3660 VOPSTATS_UPDATE(*vpp, mkdir);
3661 if ((*vpp)->v_path == NULL) {
3662 vn_setpath(rootdir, dvp, *vpp, dirname,
3663 strlen(dirname));
3667 return (ret);
3671 fop_rmdir(
3672 vnode_t *dvp,
3673 char *nm,
3674 vnode_t *cdir,
3675 cred_t *cr,
3676 caller_context_t *ct,
3677 int flags)
3679 int err;
3682 * If this file system doesn't support case-insensitive access
3683 * and said access is requested, fail quickly.
3685 if (flags & FIGNORECASE &&
3686 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3687 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3688 return (EINVAL);
3690 VOPXID_MAP_CR(dvp, cr);
3692 if (dvp->v_op->vop_rmdir == NULL)
3693 err = ENOSYS;
3694 else
3695 err = dvp->v_op->vop_rmdir(dvp, nm, cdir, cr, ct, flags);
3697 VOPSTATS_UPDATE(dvp, rmdir);
3698 return (err);
3702 fop_readdir(
3703 vnode_t *vp,
3704 uio_t *uiop,
3705 cred_t *cr,
3706 int *eofp,
3707 caller_context_t *ct,
3708 int flags)
3710 int err;
3711 ssize_t resid_start = uiop->uio_resid;
3714 * If this file system doesn't support retrieving directory
3715 * entry flags and said access is requested, fail quickly.
3717 if (flags & V_RDDIR_ENTFLAGS &&
3718 vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3719 return (EINVAL);
3721 VOPXID_MAP_CR(vp, cr);
3723 err = fop_readdir_dispatch(vp, uiop, cr, eofp, ct, flags);
3725 VOPSTATS_UPDATE_IO(vp, readdir,
3726 readdir_bytes, (resid_start - uiop->uio_resid));
3727 return (err);
3731 fop_symlink(
3732 vnode_t *dvp,
3733 char *linkname,
3734 vattr_t *vap,
3735 char *target,
3736 cred_t *cr,
3737 caller_context_t *ct,
3738 int flags)
3740 int err;
3741 xvattr_t xvattr;
3744 * If this file system doesn't support case-insensitive access
3745 * and said access is requested, fail quickly.
3747 if (flags & FIGNORECASE &&
3748 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3749 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3750 return (EINVAL);
3752 VOPXID_MAP_CR(dvp, cr);
3754 /* check for reparse point */
3755 if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3756 (strncmp(target, FS_REPARSE_TAG_STR,
3757 strlen(FS_REPARSE_TAG_STR)) == 0)) {
3758 if (!fs_reparse_mark(target, vap, &xvattr))
3759 vap = (vattr_t *)&xvattr;
3762 if (dvp->v_op->vop_symlink == NULL)
3763 err = ENOSYS;
3764 else
3765 err = dvp->v_op->vop_symlink(dvp, linkname, vap, target, cr,
3766 ct, flags);
3768 VOPSTATS_UPDATE(dvp, symlink);
3769 return (err);
3773 fop_readlink(
3774 vnode_t *vp,
3775 uio_t *uiop,
3776 cred_t *cr,
3777 caller_context_t *ct)
3779 int err;
3781 VOPXID_MAP_CR(vp, cr);
3783 err = fop_readlink_dispatch(vp, uiop, cr, ct);
3785 VOPSTATS_UPDATE(vp, readlink);
3786 return (err);
3790 fop_fsync(
3791 vnode_t *vp,
3792 int syncflag,
3793 cred_t *cr,
3794 caller_context_t *ct)
3796 int err;
3798 VOPXID_MAP_CR(vp, cr);
3800 err = fop_fsync_dispatch(vp, syncflag, cr, ct);
3802 VOPSTATS_UPDATE(vp, fsync);
3803 return (err);
3806 void
3807 fop_inactive(
3808 vnode_t *vp,
3809 cred_t *cr,
3810 caller_context_t *ct)
3812 /* Need to update stats before vop call since we may lose the vnode */
3813 VOPSTATS_UPDATE(vp, inactive);
3815 VOPXID_MAP_CR(vp, cr);
3817 if (vp->v_op->vop_inactive != NULL)
3818 vp->v_op->vop_inactive(vp, cr, ct);
3822 fop_fid(
3823 vnode_t *vp,
3824 fid_t *fidp,
3825 caller_context_t *ct)
3827 int err;
3829 err = fop_fid_dispatch(vp, fidp, ct);
3831 VOPSTATS_UPDATE(vp, fid);
3832 return (err);
3836 fop_rwlock(
3837 vnode_t *vp,
3838 int write_lock,
3839 caller_context_t *ct)
3841 int ret;
3843 if (vp->v_op->vop_rwlock == NULL)
3844 ret = fs_rwlock(vp, write_lock, ct);
3845 else
3846 ret = vp->v_op->vop_rwlock(vp, write_lock, ct);
3848 VOPSTATS_UPDATE(vp, rwlock);
3849 return (ret);
3852 void
3853 fop_rwunlock(
3854 vnode_t *vp,
3855 int write_lock,
3856 caller_context_t *ct)
3858 if (vp->v_op->vop_rwunlock == NULL)
3859 fs_rwunlock(vp, write_lock, ct);
3860 else
3861 vp->v_op->vop_rwunlock(vp, write_lock, ct);
3863 VOPSTATS_UPDATE(vp, rwunlock);
3867 fop_seek(
3868 vnode_t *vp,
3869 offset_t ooff,
3870 offset_t *noffp,
3871 caller_context_t *ct)
3873 int err;
3875 err = fop_seek_dispatch(vp, ooff, noffp, ct);
3877 VOPSTATS_UPDATE(vp, seek);
3878 return (err);
3882 fop_cmp(
3883 vnode_t *vp1,
3884 vnode_t *vp2,
3885 caller_context_t *ct)
3887 int err;
3889 if (vp1->v_op->vop_cmp == NULL)
3890 err = fs_cmp(vp1, vp2, ct);
3891 else
3892 err = vp1->v_op->vop_cmp(vp1, vp2, ct);
3894 VOPSTATS_UPDATE(vp1, cmp);
3895 return (err);
3899 fop_frlock(
3900 vnode_t *vp,
3901 int cmd,
3902 flock64_t *bfp,
3903 int flag,
3904 offset_t offset,
3905 struct flk_callback *flk_cbp,
3906 cred_t *cr,
3907 caller_context_t *ct)
3909 int err;
3911 VOPXID_MAP_CR(vp, cr);
3913 if (vp->v_op->vop_frlock == NULL)
3914 err = fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3915 else
3916 err = vp->v_op->vop_frlock(vp, cmd, bfp, flag, offset,
3917 flk_cbp, cr, ct);
3919 VOPSTATS_UPDATE(vp, frlock);
3920 return (err);
3924 fop_space(
3925 vnode_t *vp,
3926 int cmd,
3927 flock64_t *bfp,
3928 int flag,
3929 offset_t offset,
3930 cred_t *cr,
3931 caller_context_t *ct)
3933 int err;
3935 VOPXID_MAP_CR(vp, cr);
3937 err = fop_space_dispatch(vp, cmd, bfp, flag, offset, cr, ct);
3939 VOPSTATS_UPDATE(vp, space);
3940 return (err);
3944 fop_realvp(
3945 vnode_t *vp,
3946 vnode_t **vpp,
3947 caller_context_t *ct)
3949 int err;
3951 err = fop_realvp_dispatch(vp, vpp, ct);
3953 VOPSTATS_UPDATE(vp, realvp);
3954 return (err);
3958 fop_getpage(
3959 vnode_t *vp,
3960 offset_t off,
3961 size_t len,
3962 uint_t *protp,
3963 page_t **plarr,
3964 size_t plsz,
3965 struct seg *seg,
3966 caddr_t addr,
3967 enum seg_rw rw,
3968 cred_t *cr,
3969 caller_context_t *ct)
3971 int err;
3973 VOPXID_MAP_CR(vp, cr);
3975 err = fop_getpage_dispatch(vp, off, len, protp, plarr, plsz, seg,
3976 addr, rw, cr, ct);
3978 VOPSTATS_UPDATE(vp, getpage);
3979 return (err);
3983 fop_putpage(
3984 vnode_t *vp,
3985 offset_t off,
3986 size_t len,
3987 int flags,
3988 cred_t *cr,
3989 caller_context_t *ct)
3991 int err;
3993 VOPXID_MAP_CR(vp, cr);
3995 err = fop_putpage_dispatch(vp, off, len, flags, cr, ct);
3997 VOPSTATS_UPDATE(vp, putpage);
3998 return (err);
4002 fop_map(
4003 vnode_t *vp,
4004 offset_t off,
4005 struct as *as,
4006 caddr_t *addrp,
4007 size_t len,
4008 uchar_t prot,
4009 uchar_t maxprot,
4010 uint_t flags,
4011 cred_t *cr,
4012 caller_context_t *ct)
4014 int err;
4016 VOPXID_MAP_CR(vp, cr);
4018 err = fop_map_dispatch(vp, off, as, addrp, len, prot, maxprot,
4019 flags, cr, ct);
4021 VOPSTATS_UPDATE(vp, map);
4022 return (err);
4026 fop_addmap(
4027 vnode_t *vp,
4028 offset_t off,
4029 struct as *as,
4030 caddr_t addr,
4031 size_t len,
4032 uchar_t prot,
4033 uchar_t maxprot,
4034 uint_t flags,
4035 cred_t *cr,
4036 caller_context_t *ct)
4038 int error;
4039 u_longlong_t delta;
4041 VOPXID_MAP_CR(vp, cr);
4043 error = fop_addmap_dispatch(vp, off, as, addr, len, prot, maxprot,
4044 flags, cr, ct);
4046 if ((!error) && (vp->v_type == VREG)) {
4047 delta = (u_longlong_t)btopr(len);
4049 * If file is declared MAP_PRIVATE, it can't be written back
4050 * even if open for write. Handle as read.
4052 if (flags & MAP_PRIVATE) {
4053 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4054 (int64_t)delta);
4055 } else {
4057 * atomic_add_64 forces the fetch of a 64 bit value to
4058 * be atomic on 32 bit machines
4060 if (maxprot & PROT_WRITE)
4061 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4062 (int64_t)delta);
4063 if (maxprot & PROT_READ)
4064 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4065 (int64_t)delta);
4066 if (maxprot & PROT_EXEC)
4067 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4068 (int64_t)delta);
4071 VOPSTATS_UPDATE(vp, addmap);
4072 return (error);
4076 fop_delmap(
4077 vnode_t *vp,
4078 offset_t off,
4079 struct as *as,
4080 caddr_t addr,
4081 size_t len,
4082 uint_t prot,
4083 uint_t maxprot,
4084 uint_t flags,
4085 cred_t *cr,
4086 caller_context_t *ct)
4088 int error;
4089 u_longlong_t delta;
4091 VOPXID_MAP_CR(vp, cr);
4093 error = fop_delmap_dispatch(vp, off, as, addr, len, prot, maxprot,
4094 flags, cr, ct);
4097 * NFS calls into delmap twice, the first time
4098 * it simply establishes a callback mechanism and returns EAGAIN
4099 * while the real work is being done upon the second invocation.
4100 * We have to detect this here and only decrement the counts upon
4101 * the second delmap request.
4103 if ((error != EAGAIN) && (vp->v_type == VREG)) {
4105 delta = (u_longlong_t)btopr(len);
4107 if (flags & MAP_PRIVATE) {
4108 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4109 (int64_t)(-delta));
4110 } else {
4112 * atomic_add_64 forces the fetch of a 64 bit value
4113 * to be atomic on 32 bit machines
4115 if (maxprot & PROT_WRITE)
4116 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4117 (int64_t)(-delta));
4118 if (maxprot & PROT_READ)
4119 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4120 (int64_t)(-delta));
4121 if (maxprot & PROT_EXEC)
4122 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4123 (int64_t)(-delta));
4126 VOPSTATS_UPDATE(vp, delmap);
4127 return (error);
4132 fop_poll(
4133 vnode_t *vp,
4134 short events,
4135 int anyyet,
4136 short *reventsp,
4137 struct pollhead **phpp,
4138 caller_context_t *ct)
4140 int err;
4142 if (vp->v_op->vop_poll == NULL)
4143 err = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
4144 else
4145 err = vp->v_op->vop_poll(vp, events, anyyet, reventsp, phpp,
4146 ct);
4148 VOPSTATS_UPDATE(vp, poll);
4149 return (err);
4153 fop_dump(
4154 vnode_t *vp,
4155 caddr_t addr,
4156 offset_t lbdn,
4157 offset_t dblks,
4158 caller_context_t *ct)
4160 int err;
4162 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4163 if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4164 return (EIO);
4166 err = fop_dump_dispatch(vp, addr, lbdn, dblks, ct);
4168 VOPSTATS_UPDATE(vp, dump);
4169 return (err);
4173 fop_pathconf(
4174 vnode_t *vp,
4175 int cmd,
4176 ulong_t *valp,
4177 cred_t *cr,
4178 caller_context_t *ct)
4180 int err;
4182 VOPXID_MAP_CR(vp, cr);
4184 if (vp->v_op->vop_pathconf == NULL)
4185 err = fs_pathconf(vp, cmd, valp, cr, ct);
4186 else
4187 err = vp->v_op->vop_pathconf(vp, cmd, valp, cr, ct);
4189 VOPSTATS_UPDATE(vp, pathconf);
4190 return (err);
4194 fop_pageio(
4195 vnode_t *vp,
4196 struct page *pp,
4197 uoff_t io_off,
4198 size_t io_len,
4199 int flags,
4200 cred_t *cr,
4201 caller_context_t *ct)
4203 int err;
4205 VOPXID_MAP_CR(vp, cr);
4207 err = fop_pageio_dispatch(vp, pp, io_off, io_len, flags, cr, ct);
4209 VOPSTATS_UPDATE(vp, pageio);
4210 return (err);
4214 fop_dumpctl(
4215 vnode_t *vp,
4216 int action,
4217 offset_t *blkp,
4218 caller_context_t *ct)
4220 int err;
4222 err = fop_dumpctl_dispatch(vp, action, blkp, ct);
4224 VOPSTATS_UPDATE(vp, dumpctl);
4225 return (err);
4228 void
4229 fop_dispose(
4230 vnode_t *vp,
4231 page_t *pp,
4232 int flag,
4233 int dn,
4234 cred_t *cr,
4235 caller_context_t *ct)
4237 /* Must do stats first since it's possible to lose the vnode */
4238 VOPSTATS_UPDATE(vp, dispose);
4240 VOPXID_MAP_CR(vp, cr);
4242 if (vp->v_op->vop_dispose == NULL)
4243 fs_dispose(vp, pp, flag, dn, cr, ct);
4244 else
4245 vp->v_op->vop_dispose(vp, pp, flag, dn, cr, ct);
4249 fop_setsecattr(
4250 vnode_t *vp,
4251 vsecattr_t *vsap,
4252 int flag,
4253 cred_t *cr,
4254 caller_context_t *ct)
4256 int err;
4258 VOPXID_MAP_CR(vp, cr);
4261 * We're only allowed to skip the ACL check iff we used a 32 bit
4262 * ACE mask with fop_access() to determine permissions.
4264 if ((flag & ATTR_NOACLCHECK) &&
4265 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4266 return (EINVAL);
4269 err = fop_setsecattr_dispatch(vp, vsap, flag, cr, ct);
4271 VOPSTATS_UPDATE(vp, setsecattr);
4272 return (err);
4276 fop_getsecattr(
4277 vnode_t *vp,
4278 vsecattr_t *vsap,
4279 int flag,
4280 cred_t *cr,
4281 caller_context_t *ct)
4283 int err;
4286 * We're only allowed to skip the ACL check iff we used a 32 bit
4287 * ACE mask with fop_access() to determine permissions.
4289 if ((flag & ATTR_NOACLCHECK) &&
4290 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4291 return (EINVAL);
4294 VOPXID_MAP_CR(vp, cr);
4296 if (vp->v_op->vop_getsecattr == NULL)
4297 err = fs_fab_acl(vp, vsap, flag, cr, ct);
4298 else
4299 err = vp->v_op->vop_getsecattr(vp, vsap, flag, cr, ct);
4301 VOPSTATS_UPDATE(vp, getsecattr);
4302 return (err);
4306 fop_shrlock(
4307 vnode_t *vp,
4308 int cmd,
4309 struct shrlock *shr,
4310 int flag,
4311 cred_t *cr,
4312 caller_context_t *ct)
4314 int err;
4316 VOPXID_MAP_CR(vp, cr);
4318 if (vp->v_op->vop_shrlock == NULL)
4319 err = fs_shrlock(vp, cmd, shr, flag, cr, ct);
4320 else
4321 err = vp->v_op->vop_shrlock(vp, cmd, shr, flag, cr, ct);
4323 VOPSTATS_UPDATE(vp, shrlock);
4324 return (err);
4328 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4329 caller_context_t *ct)
4331 int err;
4333 err = fop_vnevent_dispatch(vp, vnevent, dvp, fnm, ct);
4335 VOPSTATS_UPDATE(vp, vnevent);
4336 return (err);
4340 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4341 caller_context_t *ct)
4343 int err;
4345 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4346 return (ENOTSUP);
4348 err = fop_reqzcbuf_dispatch(vp, ioflag, uiop, cr, ct);
4350 VOPSTATS_UPDATE(vp, reqzcbuf);
4351 return (err);
4355 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4357 int err;
4359 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4360 return (ENOTSUP);
4362 err = fop_retzcbuf_dispatch(vp, uiop, cr, ct);
4364 VOPSTATS_UPDATE(vp, retzcbuf);
4365 return (err);
4369 * Default destructor
4370 * Needed because NULL destructor means that the key is unused
4372 /* ARGSUSED */
4373 void
4374 vsd_defaultdestructor(void *value)
4378 * Create a key (index into per vnode array)
4379 * Locks out vsd_create, vsd_destroy, and vsd_free
4380 * May allocate memory with lock held
4382 void
4383 vsd_create(uint_t *keyp, void (*destructor)(void *))
4385 int i;
4386 uint_t nkeys;
4389 * if key is allocated, do nothing
4391 mutex_enter(&vsd_lock);
4392 if (*keyp) {
4393 mutex_exit(&vsd_lock);
4394 return;
4397 * find an unused key
4399 if (destructor == NULL)
4400 destructor = vsd_defaultdestructor;
4402 for (i = 0; i < vsd_nkeys; ++i)
4403 if (vsd_destructor[i] == NULL)
4404 break;
4407 * if no unused keys, increase the size of the destructor array
4409 if (i == vsd_nkeys) {
4410 if ((nkeys = (vsd_nkeys << 1)) == 0)
4411 nkeys = 1;
4412 vsd_destructor =
4413 (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4414 (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4415 (size_t)(nkeys * sizeof (void (*)(void *))));
4416 vsd_nkeys = nkeys;
4420 * allocate the next available unused key
4422 vsd_destructor[i] = destructor;
4423 *keyp = i + 1;
4425 /* create vsd_list, if it doesn't exist */
4426 if (vsd_list == NULL) {
4427 vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4428 list_create(vsd_list, sizeof (struct vsd_node),
4429 offsetof(struct vsd_node, vs_nodes));
4432 mutex_exit(&vsd_lock);
4436 * Destroy a key
4438 * Assumes that the caller is preventing vsd_set and vsd_get
4439 * Locks out vsd_create, vsd_destroy, and vsd_free
4440 * May free memory with lock held
4442 void
4443 vsd_destroy(uint_t *keyp)
4445 uint_t key;
4446 struct vsd_node *vsd;
4449 * protect the key namespace and our destructor lists
4451 mutex_enter(&vsd_lock);
4452 key = *keyp;
4453 *keyp = 0;
4455 ASSERT(key <= vsd_nkeys);
4458 * if the key is valid
4460 if (key != 0) {
4461 uint_t k = key - 1;
4463 * for every vnode with VSD, call key's destructor
4465 for (vsd = list_head(vsd_list); vsd != NULL;
4466 vsd = list_next(vsd_list, vsd)) {
4468 * no VSD for key in this vnode
4470 if (key > vsd->vs_nkeys)
4471 continue;
4473 * call destructor for key
4475 if (vsd->vs_value[k] && vsd_destructor[k])
4476 (*vsd_destructor[k])(vsd->vs_value[k]);
4478 * reset value for key
4480 vsd->vs_value[k] = NULL;
4483 * actually free the key (NULL destructor == unused)
4485 vsd_destructor[k] = NULL;
4488 mutex_exit(&vsd_lock);
4492 * Quickly return the per vnode value that was stored with the specified key
4493 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4494 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4496 void *
4497 vsd_get(vnode_t *vp, uint_t key)
4499 struct vsd_node *vsd;
4501 ASSERT(vp != NULL);
4502 ASSERT(mutex_owned(&vp->v_vsd_lock));
4504 vsd = vp->v_vsd;
4506 if (key && vsd != NULL && key <= vsd->vs_nkeys)
4507 return (vsd->vs_value[key - 1]);
4508 return (NULL);
4512 * Set a per vnode value indexed with the specified key
4513 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4516 vsd_set(vnode_t *vp, uint_t key, void *value)
4518 struct vsd_node *vsd;
4520 ASSERT(vp != NULL);
4521 ASSERT(mutex_owned(&vp->v_vsd_lock));
4523 if (key == 0)
4524 return (EINVAL);
4526 vsd = vp->v_vsd;
4527 if (vsd == NULL)
4528 vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4531 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4532 * code won't happen and we will continue down and allocate space for
4533 * the vs_value array.
4534 * If the caller is replacing one value with another, then it is up
4535 * to the caller to free/rele/destroy the previous value (if needed).
4537 if (key <= vsd->vs_nkeys) {
4538 vsd->vs_value[key - 1] = value;
4539 return (0);
4542 ASSERT(key <= vsd_nkeys);
4544 if (vsd->vs_nkeys == 0) {
4545 mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4547 * Link onto list of all VSD nodes.
4549 list_insert_head(vsd_list, vsd);
4550 mutex_exit(&vsd_lock);
4554 * Allocate vnode local storage and set the value for key
4556 vsd->vs_value = vsd_realloc(vsd->vs_value,
4557 vsd->vs_nkeys * sizeof (void *),
4558 key * sizeof (void *));
4559 vsd->vs_nkeys = key;
4560 vsd->vs_value[key - 1] = value;
4562 return (0);
4566 * Called from vn_free() to run the destructor function for each vsd
4567 * Locks out vsd_create and vsd_destroy
4568 * Assumes that the destructor *DOES NOT* use vsd
4570 void
4571 vsd_free(vnode_t *vp)
4573 int i;
4574 struct vsd_node *vsd = vp->v_vsd;
4576 if (vsd == NULL)
4577 return;
4579 if (vsd->vs_nkeys == 0) {
4580 kmem_free(vsd, sizeof (*vsd));
4581 vp->v_vsd = NULL;
4582 return;
4586 * lock out vsd_create and vsd_destroy, call
4587 * the destructor, and mark the value as destroyed.
4589 mutex_enter(&vsd_lock);
4591 for (i = 0; i < vsd->vs_nkeys; i++) {
4592 if (vsd->vs_value[i] && vsd_destructor[i])
4593 (*vsd_destructor[i])(vsd->vs_value[i]);
4594 vsd->vs_value[i] = NULL;
4598 * remove from linked list of VSD nodes
4600 list_remove(vsd_list, vsd);
4602 mutex_exit(&vsd_lock);
4605 * free up the VSD
4607 kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4608 kmem_free(vsd, sizeof (struct vsd_node));
4609 vp->v_vsd = NULL;
4613 * realloc
4615 static void *
4616 vsd_realloc(void *old, size_t osize, size_t nsize)
4618 void *new;
4620 new = kmem_zalloc(nsize, KM_SLEEP);
4621 if (old) {
4622 bcopy(old, new, osize);
4623 kmem_free(old, osize);
4625 return (new);
4629 * Setup the extensible system attribute for creating a reparse point.
4630 * The symlink data 'target' is validated for proper format of a reparse
4631 * string and a check also made to make sure the symlink data does not
4632 * point to an existing file.
4634 * return 0 if ok else -1.
4636 static int
4637 fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4639 xoptattr_t *xoap;
4641 if ((!target) || (!vap) || (!xvattr))
4642 return (-1);
4644 /* validate reparse string */
4645 if (reparse_validate((const char *)target))
4646 return (-1);
4648 xva_init(xvattr);
4649 xvattr->xva_vattr = *vap;
4650 xvattr->xva_vattr.va_mask |= AT_XVATTR;
4651 xoap = xva_getxoptattr(xvattr);
4652 ASSERT(xoap);
4653 XVA_SET_REQ(xvattr, XAT_REPARSE);
4654 xoap->xoa_reparse = 1;
4656 return (0);
4660 * Function to check whether a symlink is a reparse point.
4661 * Return B_TRUE if it is a reparse point, else return B_FALSE
4663 boolean_t
4664 vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4666 xvattr_t xvattr;
4667 xoptattr_t *xoap;
4669 if ((vp->v_type != VLNK) ||
4670 !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4671 return (B_FALSE);
4673 xva_init(&xvattr);
4674 xoap = xva_getxoptattr(&xvattr);
4675 ASSERT(xoap);
4676 XVA_SET_REQ(&xvattr, XAT_REPARSE);
4678 if (fop_getattr(vp, &xvattr.xva_vattr, 0, cr, ct))
4679 return (B_FALSE);
4681 if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4682 (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4683 return (B_FALSE);
4685 return (xoap->xoa_reparse ? B_TRUE : B_FALSE);