uts: make emu10k non-verbose
[unleashed.git] / kernel / fs / vnode.c
bloba0edb06ed78880501126b4690e85fe60082cb710
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * University Copyright- Copyright (c) 1982, 1986, 1988
33 * The Regents of the University of California
34 * All Rights Reserved
36 * University Acknowledgment- Portions of this document are derived from
37 * software developed by the University of California, Berkeley, and its
38 * contributors.
41 #include <sys/types.h>
42 #include <sys/param.h>
43 #include <sys/t_lock.h>
44 #include <sys/errno.h>
45 #include <sys/cred.h>
46 #include <sys/user.h>
47 #include <sys/uio.h>
48 #include <sys/file.h>
49 #include <sys/pathname.h>
50 #include <sys/vfs.h>
51 #include <sys/vfs_opreg.h>
52 #include <sys/vnode.h>
53 #include <sys/rwstlock.h>
54 #include <sys/fem.h>
55 #include <sys/stat.h>
56 #include <sys/mode.h>
57 #include <sys/conf.h>
58 #include <sys/sysmacros.h>
59 #include <sys/cmn_err.h>
60 #include <sys/systm.h>
61 #include <sys/kmem.h>
62 #include <sys/debug.h>
63 #include <c2/audit.h>
64 #include <sys/acl.h>
65 #include <sys/nbmlock.h>
66 #include <sys/fcntl.h>
67 #include <sys/fs_subr.h>
68 #include <sys/taskq.h>
69 #include <sys/fs_reparse.h>
71 /* Determine if this vnode is a file that is read-only */
72 #define ISROFILE(vp) \
73 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
74 (vp)->v_type != VFIFO && vn_is_readonly(vp))
76 /* Tunable via /etc/system; used only by admin/install */
77 int nfs_global_client_only;
80 * Array of vopstats_t for per-FS-type vopstats. This array has the same
81 * number of entries as and parallel to the vfssw table. (Arguably, it could
82 * be part of the vfssw table.) Once it's initialized, it's accessed using
83 * the same fstype index that is used to index into the vfssw table.
85 vopstats_t **vopstats_fstype;
87 /* vopstats initialization template used for fast initialization via bcopy() */
88 static vopstats_t *vs_templatep;
90 /* Kmem cache handle for vsk_anchor_t allocations */
91 kmem_cache_t *vsk_anchor_cache;
93 /* file events cleanup routine */
94 extern void free_fopdata(vnode_t *);
97 * Root of AVL tree for the kstats associated with vopstats. Lock protects
98 * updates to vsktat_tree.
100 avl_tree_t vskstat_tree;
101 kmutex_t vskstat_tree_lock;
103 /* Global variable which enables/disables the vopstats collection */
104 int vopstats_enabled = 1;
107 * forward declarations for internal vnode specific data (vsd)
109 static void *vsd_realloc(void *, size_t, size_t);
112 * forward declarations for reparse point functions
114 static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
117 * VSD -- VNODE SPECIFIC DATA
118 * The v_data pointer is typically used by a file system to store a
119 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
120 * However, there are times when additional project private data needs
121 * to be stored separately from the data (node) pointed to by v_data.
122 * This additional data could be stored by the file system itself or
123 * by a completely different kernel entity. VSD provides a way for
124 * callers to obtain a key and store a pointer to private data associated
125 * with a vnode.
127 * Callers are responsible for protecting the vsd by holding v_vsd_lock
128 * for calls to vsd_set() and vsd_get().
132 * vsd_lock protects:
133 * vsd_nkeys - creation and deletion of vsd keys
134 * vsd_list - insertion and deletion of vsd_node in the vsd_list
135 * vsd_destructor - adding and removing destructors to the list
137 static kmutex_t vsd_lock;
138 static uint_t vsd_nkeys; /* size of destructor array */
139 /* list of vsd_node's */
140 static list_t *vsd_list = NULL;
141 /* per-key destructor funcs */
142 static void (**vsd_destructor)(void *);
145 * The following is the common set of actions needed to update the
146 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
147 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
148 * recording of the bytes transferred. Since the code is similar
149 * but small, it is nearly a duplicate. Consequently any changes
150 * to one may need to be reflected in the other.
151 * Rundown of the variables:
152 * vp - Pointer to the vnode
153 * counter - Partial name structure member to update in vopstats for counts
154 * bytecounter - Partial name structure member to update in vopstats for bytes
155 * bytesval - Value to update in vopstats for bytes
156 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
157 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
160 #define VOPSTATS_UPDATE(vp, counter) { \
161 vfs_t *vfsp = (vp)->v_vfsp; \
162 if (vfsp && vfsp->vfs_implp && \
163 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
164 vopstats_t *vsp = &vfsp->vfs_vopstats; \
165 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
166 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
167 size_t, uint64_t *); \
168 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
169 (*stataddr)++; \
170 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
171 vsp->n##counter.value.ui64++; \
176 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
177 vfs_t *vfsp = (vp)->v_vfsp; \
178 if (vfsp && vfsp->vfs_implp && \
179 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
180 vopstats_t *vsp = &vfsp->vfs_vopstats; \
181 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
182 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
183 size_t, uint64_t *); \
184 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
185 (*stataddr)++; \
186 vsp->bytecounter.value.ui64 += bytesval; \
187 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
188 vsp->n##counter.value.ui64++; \
189 vsp->bytecounter.value.ui64 += bytesval; \
195 * If the filesystem does not support XIDs map credential
196 * If the vfsp is NULL, perhaps we should also map?
198 #define VOPXID_MAP_CR(vp, cr) { \
199 vfs_t *vfsp = (vp)->v_vfsp; \
200 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
201 cr = crgetmapped(cr); \
205 * Convert stat(2) formats to vnode types and vice versa. (Knows about
206 * numerical order of S_IFMT and vnode types.)
208 enum vtype iftovt_tab[] = {
209 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
210 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
213 ushort_t vttoif_tab[] = {
214 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
215 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
219 * The system vnode cache.
222 kmem_cache_t *vn_cache;
226 * Vnode operations vector.
229 static const fs_operation_trans_def_t vn_ops_table[] = {
230 VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
231 fs_nosys,
233 VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
234 fs_nosys,
236 VOPNAME_READ, offsetof(struct vnodeops, vop_read),
237 fs_nosys,
239 VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
240 fs_nosys,
242 VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
243 fs_nosys,
245 VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
246 fs_setfl,
248 VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
249 fs_nosys,
251 VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
252 fs_nosys,
254 VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
255 fs_nosys,
257 VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
258 fs_nosys,
260 VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
261 fs_nosys,
263 VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
264 fs_nosys,
266 VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
267 fs_nosys,
269 VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
270 fs_nosys,
272 VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
273 fs_nosys,
275 VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
276 fs_nosys,
278 VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
279 fs_nosys,
281 VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
282 fs_nosys,
284 VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
285 fs_nosys,
287 VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
288 fs_nosys,
290 VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
291 fs_nosys,
293 VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
294 fs_nosys,
296 VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
297 fs_rwlock,
299 VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
300 (fs_generic_func_p) fs_rwunlock,
302 VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
303 fs_nosys,
305 VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
306 fs_cmp,
308 VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
309 fs_frlock,
311 VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
312 fs_nosys,
314 VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
315 fs_nosys,
317 VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
318 fs_nosys,
320 VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
321 fs_nosys,
323 VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
324 (fs_generic_func_p) fs_nosys_map,
326 VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
327 (fs_generic_func_p) fs_nosys_addmap,
329 VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
330 fs_nosys,
332 VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
333 (fs_generic_func_p) fs_poll,
335 VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
336 fs_nosys,
338 VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
339 fs_pathconf,
341 VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
342 fs_nosys,
344 VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
345 fs_nosys,
347 VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
348 (fs_generic_func_p) fs_dispose,
350 VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
351 fs_nosys,
353 VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
354 fs_fab_acl,
356 VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
357 fs_shrlock,
359 VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
360 (fs_generic_func_p) fs_vnevent_nosupport,
362 VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
363 fs_nosys,
365 VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
366 fs_nosys,
368 NULL, 0, NULL,
371 /* Extensible attribute (xva) routines. */
374 * Zero out the structure, set the size of the requested/returned bitmaps,
375 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
376 * to the returned attributes array.
378 void
379 xva_init(xvattr_t *xvap)
381 bzero(xvap, sizeof (xvattr_t));
382 xvap->xva_mapsize = XVA_MAPSIZE;
383 xvap->xva_magic = XVA_MAGIC;
384 xvap->xva_vattr.va_mask = AT_XVATTR;
385 xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
389 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
390 * structure. Otherwise, returns NULL.
392 xoptattr_t *
393 xva_getxoptattr(xvattr_t *xvap)
395 xoptattr_t *xoap = NULL;
396 if (xvap->xva_vattr.va_mask & AT_XVATTR)
397 xoap = &xvap->xva_xoptattrs;
398 return (xoap);
402 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
403 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
404 * kstat name.
406 static int
407 vska_compar(const void *n1, const void *n2)
409 int ret;
410 ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
411 ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
413 if (p1 < p2) {
414 ret = -1;
415 } else if (p1 > p2) {
416 ret = 1;
417 } else {
418 ret = 0;
421 return (ret);
425 * Used to create a single template which will be bcopy()ed to a newly
426 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
428 static vopstats_t *
429 create_vopstats_template()
431 vopstats_t *vsp;
433 vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
434 bzero(vsp, sizeof (*vsp)); /* Start fresh */
436 /* fop_open */
437 kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
438 /* fop_close */
439 kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
440 /* fop_read I/O */
441 kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
442 kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
443 /* fop_write I/O */
444 kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
445 kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
446 /* fop_ioctl */
447 kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
448 /* fop_setfl */
449 kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
450 /* fop_getattr */
451 kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
452 /* fop_setattr */
453 kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
454 /* fop_access */
455 kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
456 /* fop_lookup */
457 kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
458 /* fop_create */
459 kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
460 /* fop_remove */
461 kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
462 /* fop_link */
463 kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
464 /* fop_rename */
465 kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
466 /* fop_mkdir */
467 kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
468 /* fop_rmdir */
469 kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
470 /* fop_readdir I/O */
471 kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
472 kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
473 KSTAT_DATA_UINT64);
474 /* fop_symlink */
475 kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
476 /* fop_readlink */
477 kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
478 /* fop_fsync */
479 kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
480 /* fop_inactive */
481 kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
482 /* fop_fid */
483 kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
484 /* fop_rwlock */
485 kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
486 /* fop_rwunlock */
487 kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
488 /* fop_seek */
489 kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
490 /* fop_cmp */
491 kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
492 /* fop_frlock */
493 kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
494 /* fop_space */
495 kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
496 /* fop_realvp */
497 kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
498 /* fop_getpage */
499 kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
500 /* fop_putpage */
501 kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
502 /* fop_map */
503 kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
504 /* fop_addmap */
505 kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
506 /* fop_delmap */
507 kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
508 /* fop_poll */
509 kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
510 /* fop_dump */
511 kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
512 /* fop_pathconf */
513 kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
514 /* fop_pageio */
515 kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
516 /* fop_dumpctl */
517 kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
518 /* fop_dispose */
519 kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
520 /* fop_setsecattr */
521 kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
522 /* fop_getsecattr */
523 kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
524 /* fop_shrlock */
525 kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
526 /* fop_vnevent */
527 kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
528 /* fop_reqzcbuf */
529 kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
530 /* fop_retzcbuf */
531 kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
533 return (vsp);
537 * Creates a kstat structure associated with a vopstats structure.
539 kstat_t *
540 new_vskstat(char *ksname, vopstats_t *vsp)
542 kstat_t *ksp;
544 if (!vopstats_enabled) {
545 return (NULL);
548 ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
549 sizeof (vopstats_t)/sizeof (kstat_named_t),
550 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
551 if (ksp) {
552 ksp->ks_data = vsp;
553 kstat_install(ksp);
556 return (ksp);
560 * Called from vfsinit() to initialize the support mechanisms for vopstats
562 void
563 vopstats_startup()
565 if (!vopstats_enabled)
566 return;
569 * Creates the AVL tree which holds per-vfs vopstat anchors. This
570 * is necessary since we need to check if a kstat exists before we
571 * attempt to create it. Also, initialize its lock.
573 avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
574 offsetof(vsk_anchor_t, vsk_node));
575 mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
577 vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
578 sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
579 NULL, NULL, 0);
582 * Set up the array of pointers for the vopstats-by-FS-type.
583 * The entries will be allocated/initialized as each file system
584 * goes through modload/mod_installfs.
586 vopstats_fstype = (vopstats_t **)kmem_zalloc(
587 (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
589 /* Set up the global vopstats initialization template */
590 vs_templatep = create_vopstats_template();
594 * We need to have the all of the counters zeroed.
595 * The initialization of the vopstats_t includes on the order of
596 * 50 calls to kstat_named_init(). Rather that do that on every call,
597 * we do it once in a template (vs_templatep) then bcopy it over.
599 void
600 initialize_vopstats(vopstats_t *vsp)
602 if (vsp == NULL)
603 return;
605 bcopy(vs_templatep, vsp, sizeof (vopstats_t));
609 * If possible, determine which vopstats by fstype to use and
610 * return a pointer to the caller.
612 vopstats_t *
613 get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
615 int fstype = 0; /* Index into vfssw[] */
616 vopstats_t *vsp = NULL;
618 if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
619 !vopstats_enabled)
620 return (NULL);
622 * Set up the fstype. We go to so much trouble because all versions
623 * of NFS use the same fstype in their vfs even though they have
624 * distinct entries in the vfssw[] table.
625 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
627 if (vswp) {
628 fstype = vswp - vfssw; /* Gets us the index */
629 } else {
630 fstype = vfsp->vfs_fstype;
634 * Point to the per-fstype vopstats. The only valid values are
635 * non-zero positive values less than the number of vfssw[] table
636 * entries.
638 if (fstype > 0 && fstype < nfstype) {
639 vsp = vopstats_fstype[fstype];
642 return (vsp);
646 * Generate a kstat name, create the kstat structure, and allocate a
647 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
648 * to the caller. This must only be called from a mount.
650 vsk_anchor_t *
651 get_vskstat_anchor(vfs_t *vfsp)
653 char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
654 statvfs64_t statvfsbuf; /* Needed to find f_fsid */
655 vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */
656 kstat_t *ksp; /* Ptr to new kstat */
657 avl_index_t where; /* Location in the AVL tree */
659 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
660 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
661 return (NULL);
663 /* Need to get the fsid to build a kstat name */
664 if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
665 /* Create a name for our kstats based on fsid */
666 (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
667 VOPSTATS_STR, statvfsbuf.f_fsid);
669 /* Allocate and initialize the vsk_anchor_t */
670 vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
671 bzero(vskp, sizeof (*vskp));
672 vskp->vsk_fsid = statvfsbuf.f_fsid;
674 mutex_enter(&vskstat_tree_lock);
675 if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
676 avl_insert(&vskstat_tree, vskp, where);
677 mutex_exit(&vskstat_tree_lock);
680 * Now that we've got the anchor in the AVL
681 * tree, we can create the kstat.
683 ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
684 if (ksp) {
685 vskp->vsk_ksp = ksp;
687 } else {
688 /* Oops, found one! Release memory and lock. */
689 mutex_exit(&vskstat_tree_lock);
690 kmem_cache_free(vsk_anchor_cache, vskp);
691 vskp = NULL;
694 return (vskp);
698 * We're in the process of tearing down the vfs and need to cleanup
699 * the data structures associated with the vopstats. Must only be called
700 * from dounmount().
702 void
703 teardown_vopstats(vfs_t *vfsp)
705 vsk_anchor_t *vskap;
706 avl_index_t where;
708 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
709 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
710 return;
712 /* This is a safe check since VFS_STATS must be set (see above) */
713 if ((vskap = vfsp->vfs_vskap) == NULL)
714 return;
716 /* Whack the pointer right away */
717 vfsp->vfs_vskap = NULL;
719 /* Lock the tree, remove the node, and delete the kstat */
720 mutex_enter(&vskstat_tree_lock);
721 if (avl_find(&vskstat_tree, vskap, &where)) {
722 avl_remove(&vskstat_tree, vskap);
725 if (vskap->vsk_ksp) {
726 kstat_delete(vskap->vsk_ksp);
728 mutex_exit(&vskstat_tree_lock);
730 kmem_cache_free(vsk_anchor_cache, vskap);
734 * Read or write a vnode. Called from kernel code.
737 vn_rdwr(
738 enum uio_rw rw,
739 struct vnode *vp,
740 caddr_t base,
741 ssize_t len,
742 offset_t offset,
743 enum uio_seg seg,
744 int ioflag,
745 rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */
746 cred_t *cr,
747 ssize_t *residp)
749 struct uio uio;
750 struct iovec iov;
751 int error;
752 int in_crit = 0;
754 if (rw == UIO_WRITE && ISROFILE(vp))
755 return (EROFS);
757 if (len < 0)
758 return (EIO);
760 VOPXID_MAP_CR(vp, cr);
762 iov.iov_base = base;
763 iov.iov_len = len;
764 uio.uio_iov = &iov;
765 uio.uio_iovcnt = 1;
766 uio.uio_loffset = offset;
767 uio.uio_segflg = (short)seg;
768 uio.uio_resid = len;
769 uio.uio_llimit = ulimit;
772 * We have to enter the critical region before calling fop_rwlock
773 * to avoid a deadlock with ufs.
775 if (nbl_need_check(vp)) {
776 int svmand;
778 nbl_start_crit(vp, RW_READER);
779 in_crit = 1;
780 error = nbl_svmand(vp, cr, &svmand);
781 if (error != 0)
782 goto done;
783 if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
784 uio.uio_offset, uio.uio_resid, svmand, NULL)) {
785 error = EACCES;
786 goto done;
790 (void) fop_rwlock(vp,
791 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
792 if (rw == UIO_WRITE) {
793 uio.uio_fmode = FWRITE;
794 uio.uio_extflg = UIO_COPY_DEFAULT;
795 error = fop_write(vp, &uio, ioflag, cr, NULL);
796 } else {
797 uio.uio_fmode = FREAD;
798 uio.uio_extflg = UIO_COPY_CACHED;
799 error = fop_read(vp, &uio, ioflag, cr, NULL);
801 fop_rwunlock(vp,
802 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
803 if (residp)
804 *residp = uio.uio_resid;
805 else if (uio.uio_resid)
806 error = EIO;
808 done:
809 if (in_crit)
810 nbl_end_crit(vp);
811 return (error);
815 * Release a vnode. Call fop_inactive on last reference or
816 * decrement reference count.
818 * To avoid race conditions, the v_count is left at 1 for
819 * the call to fop_inactive. This prevents another thread
820 * from reclaiming and releasing the vnode *before* the
821 * fop_inactive routine has a chance to destroy the vnode.
822 * We can't have more than 1 thread calling fop_inactive
823 * on a vnode.
825 void
826 vn_rele(vnode_t *vp)
828 VERIFY(vp->v_count > 0);
829 mutex_enter(&vp->v_lock);
830 if (vp->v_count == 1) {
831 mutex_exit(&vp->v_lock);
832 fop_inactive(vp, CRED(), NULL);
833 return;
835 vp->v_count--;
836 mutex_exit(&vp->v_lock);
840 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
841 * as a single reference, so v_count is not decremented until the last DNLC hold
842 * is released. This makes it possible to distinguish vnodes that are referenced
843 * only by the DNLC.
845 void
846 vn_rele_dnlc(vnode_t *vp)
848 VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
849 mutex_enter(&vp->v_lock);
850 if (--vp->v_count_dnlc == 0) {
851 if (vp->v_count == 1) {
852 mutex_exit(&vp->v_lock);
853 fop_inactive(vp, CRED(), NULL);
854 return;
856 vp->v_count--;
858 mutex_exit(&vp->v_lock);
862 * Like vn_rele() except that it clears v_stream under v_lock.
863 * This is used by sockfs when it dismantels the association between
864 * the sockfs node and the vnode in the underlaying file system.
865 * v_lock has to be held to prevent a thread coming through the lookupname
866 * path from accessing a stream head that is going away.
868 void
869 vn_rele_stream(vnode_t *vp)
871 VERIFY(vp->v_count > 0);
872 mutex_enter(&vp->v_lock);
873 vp->v_stream = NULL;
874 if (vp->v_count == 1) {
875 mutex_exit(&vp->v_lock);
876 fop_inactive(vp, CRED(), NULL);
877 return;
879 vp->v_count--;
880 mutex_exit(&vp->v_lock);
883 static void
884 vn_rele_inactive(vnode_t *vp)
886 fop_inactive(vp, CRED(), NULL);
890 * Like vn_rele() except if we are going to call fop_inactive() then do it
891 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
892 * the file system as a result of releasing the vnode. Note, file systems
893 * already have to handle the race where the vnode is incremented before the
894 * inactive routine is called and does its locking.
896 * Warning: Excessive use of this routine can lead to performance problems.
897 * This is because taskqs throttle back allocation if too many are created.
899 void
900 vn_rele_async(vnode_t *vp, taskq_t *taskq)
902 VERIFY(vp->v_count > 0);
903 mutex_enter(&vp->v_lock);
904 if (vp->v_count == 1) {
905 mutex_exit(&vp->v_lock);
906 VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
907 vp, TQ_SLEEP) != (uintptr_t)NULL);
908 return;
910 vp->v_count--;
911 mutex_exit(&vp->v_lock);
915 vn_open(
916 char *pnamep,
917 enum uio_seg seg,
918 int filemode,
919 int createmode,
920 struct vnode **vpp,
921 enum create crwhy,
922 mode_t umask)
924 return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
925 umask, NULL, -1));
930 * Open/create a vnode.
931 * This may be callable by the kernel, the only known use
932 * of user context being that the current user credentials
933 * are used for permissions. crwhy is defined iff filemode & FCREAT.
936 vn_openat(
937 char *pnamep,
938 enum uio_seg seg,
939 int filemode,
940 int createmode,
941 struct vnode **vpp,
942 enum create crwhy,
943 mode_t umask,
944 struct vnode *startvp,
945 int fd)
947 struct vnode *vp;
948 int mode;
949 int accessflags;
950 int error;
951 int in_crit = 0;
952 int open_done = 0;
953 int shrlock_done = 0;
954 struct vattr vattr;
955 enum symfollow follow;
956 int estale_retry = 0;
957 struct shrlock shr;
958 struct shr_locowner shr_own;
960 if (filemode & FSEARCH)
961 filemode |= FDIRECTORY;
963 mode = 0;
964 accessflags = 0;
965 if (filemode & FREAD)
966 mode |= VREAD;
967 if (filemode & (FWRITE|FTRUNC))
968 mode |= VWRITE;
969 if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
970 mode |= VEXEC;
972 /* symlink interpretation */
973 if (filemode & FNOFOLLOW)
974 follow = NO_FOLLOW;
975 else
976 follow = FOLLOW;
978 if (filemode & FAPPEND)
979 accessflags |= V_APPEND;
981 top:
982 if (filemode & FCREAT && !(filemode & FDIRECTORY)) {
983 enum vcexcl excl;
985 /* Wish to create a file. */
986 vattr.va_type = VREG;
987 vattr.va_mode = createmode;
988 vattr.va_mask = AT_TYPE|AT_MODE;
989 if (filemode & FTRUNC) {
990 vattr.va_size = 0;
991 vattr.va_mask |= AT_SIZE;
993 if (filemode & FEXCL)
994 excl = EXCL;
995 else
996 excl = NONEXCL;
998 if (error =
999 vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1000 (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1001 return (error);
1002 } else {
1003 /* Wish to open a file. Just look it up. */
1004 if (error = lookupnameat(pnamep, seg, follow,
1005 NULLVPP, &vp, startvp)) {
1006 if ((error == ESTALE) &&
1007 fs_need_estale_retry(estale_retry++))
1008 goto top;
1009 return (error);
1013 * Get the attributes to check whether file is large.
1014 * We do this only if the FOFFMAX flag is not set and
1015 * only for regular files.
1018 if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1019 vattr.va_mask = AT_SIZE;
1020 if ((error = fop_getattr(vp, &vattr, 0,
1021 CRED(), NULL))) {
1022 goto out;
1024 if (vattr.va_size > (uoff_t)MAXOFF32_T) {
1026 * Large File API - regular open fails
1027 * if FOFFMAX flag is set in file mode
1029 error = EOVERFLOW;
1030 goto out;
1034 * Can't write directories, active texts, or
1035 * read-only filesystems. Can't truncate files
1036 * on which mandatory locking is in effect.
1038 if (filemode & (FWRITE|FTRUNC)) {
1040 * Allow writable directory if VDIROPEN flag is set.
1042 if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1043 error = EISDIR;
1044 goto out;
1046 if (ISROFILE(vp)) {
1047 error = EROFS;
1048 goto out;
1051 * Can't truncate files on which
1052 * sysv mandatory locking is in effect.
1054 if (filemode & FTRUNC) {
1055 vnode_t *rvp;
1057 if (fop_realvp(vp, &rvp, NULL) != 0)
1058 rvp = vp;
1059 if (rvp->v_filocks != NULL) {
1060 vattr.va_mask = AT_MODE;
1061 if ((error = fop_getattr(vp,
1062 &vattr, 0, CRED(), NULL)) == 0 &&
1063 MANDLOCK(vp, vattr.va_mode))
1064 error = EAGAIN;
1067 if (error)
1068 goto out;
1071 * Check permissions.
1073 if (error = fop_access(vp, mode, accessflags, CRED(), NULL))
1074 goto out;
1076 * Require FDIRECTORY to return a directory.
1077 * Require FEXEC to return a regular file.
1079 if ((filemode & FDIRECTORY) && vp->v_type != VDIR) {
1080 error = ENOTDIR;
1081 goto out;
1083 if ((filemode & FEXEC) && vp->v_type != VREG) {
1084 error = ENOEXEC; /* XXX: error code? */
1085 goto out;
1090 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1092 if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1093 error = ELOOP;
1094 goto out;
1096 if (filemode & FNOLINKS) {
1097 vattr.va_mask = AT_NLINK;
1098 if ((error = fop_getattr(vp, &vattr, 0, CRED(), NULL))) {
1099 goto out;
1101 if (vattr.va_nlink != 1) {
1102 error = EMLINK;
1103 goto out;
1108 * Opening a socket corresponding to the AF_UNIX pathname
1109 * in the filesystem name space is not supported.
1110 * However, VSOCK nodes in namefs are supported in order
1111 * to make fattach work for sockets.
1113 * XXX This uses fop_realvp to distinguish between
1114 * an unopened namefs node (where fop_realvp returns a
1115 * different VSOCK vnode) and a VSOCK created by vn_create
1116 * in some file system (where fop_realvp would never return
1117 * a different vnode).
1119 if (vp->v_type == VSOCK) {
1120 struct vnode *nvp;
1122 error = fop_realvp(vp, &nvp, NULL);
1123 if (error != 0 || nvp == NULL || nvp == vp ||
1124 nvp->v_type != VSOCK) {
1125 error = EOPNOTSUPP;
1126 goto out;
1130 if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1131 /* get share reservation */
1132 shr.s_access = 0;
1133 if (filemode & FWRITE)
1134 shr.s_access |= F_WRACC;
1135 if (filemode & FREAD)
1136 shr.s_access |= F_RDACC;
1137 shr.s_deny = 0;
1138 shr.s_sysid = 0;
1139 shr.s_pid = ttoproc(curthread)->p_pid;
1140 shr_own.sl_pid = shr.s_pid;
1141 shr_own.sl_id = fd;
1142 shr.s_own_len = sizeof (shr_own);
1143 shr.s_owner = (caddr_t)&shr_own;
1144 error = fop_shrlock(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1145 NULL);
1146 if (error)
1147 goto out;
1148 shrlock_done = 1;
1150 /* nbmand conflict check if truncating file */
1151 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1152 nbl_start_crit(vp, RW_READER);
1153 in_crit = 1;
1155 vattr.va_mask = AT_SIZE;
1156 if (error = fop_getattr(vp, &vattr, 0, CRED(), NULL))
1157 goto out;
1158 if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1159 NULL)) {
1160 error = EACCES;
1161 goto out;
1167 * Do opening protocol.
1169 error = fop_open(&vp, filemode, CRED(), NULL);
1170 if (error)
1171 goto out;
1172 open_done = 1;
1175 * Truncate if required.
1177 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1178 vattr.va_size = 0;
1179 vattr.va_mask = AT_SIZE;
1180 if ((error = fop_setattr(vp, &vattr, 0, CRED(), NULL)) != 0)
1181 goto out;
1183 out:
1184 ASSERT(vp->v_count > 0);
1186 if (in_crit) {
1187 nbl_end_crit(vp);
1188 in_crit = 0;
1190 if (error) {
1191 if (open_done) {
1192 (void) fop_close(vp, filemode, 1, (offset_t)0, CRED(),
1193 NULL);
1194 open_done = 0;
1195 shrlock_done = 0;
1197 if (shrlock_done) {
1198 (void) fop_shrlock(vp, F_UNSHARE, &shr, 0, CRED(),
1199 NULL);
1200 shrlock_done = 0;
1204 * The following clause was added to handle a problem
1205 * with NFS consistency. It is possible that a lookup
1206 * of the file to be opened succeeded, but the file
1207 * itself doesn't actually exist on the server. This
1208 * is chiefly due to the DNLC containing an entry for
1209 * the file which has been removed on the server. In
1210 * this case, we just start over. If there was some
1211 * other cause for the ESTALE error, then the lookup
1212 * of the file will fail and the error will be returned
1213 * above instead of looping around from here.
1215 VN_RELE(vp);
1216 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1217 goto top;
1218 } else
1219 *vpp = vp;
1220 return (error);
1224 * The following two accessor functions are for the NFSv4 server. Since there
1225 * is no fop_open_UP/DOWNGRADE we need a way for the NFS server to keep the
1226 * vnode open counts correct when a client "upgrades" an open or does an
1227 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1228 * open mode (add or subtract read or write), but also change the share/deny
1229 * modes. However, share reservations are not integrated with OPEN, yet, so
1230 * we need to handle each separately. These functions are cleaner than having
1231 * the NFS server manipulate the counts directly, however, nobody else should
1232 * use these functions.
1234 void
1235 vn_open_upgrade(
1236 vnode_t *vp,
1237 int filemode)
1239 ASSERT(vp->v_type == VREG);
1241 if (filemode & FREAD)
1242 atomic_inc_32(&vp->v_rdcnt);
1243 if (filemode & FWRITE)
1244 atomic_inc_32(&vp->v_wrcnt);
1248 void
1249 vn_open_downgrade(
1250 vnode_t *vp,
1251 int filemode)
1253 ASSERT(vp->v_type == VREG);
1255 if (filemode & FREAD) {
1256 ASSERT(vp->v_rdcnt > 0);
1257 atomic_dec_32(&vp->v_rdcnt);
1259 if (filemode & FWRITE) {
1260 ASSERT(vp->v_wrcnt > 0);
1261 atomic_dec_32(&vp->v_wrcnt);
1267 vn_create(
1268 char *pnamep,
1269 enum uio_seg seg,
1270 struct vattr *vap,
1271 enum vcexcl excl,
1272 int mode,
1273 struct vnode **vpp,
1274 enum create why,
1275 int flag,
1276 mode_t umask)
1278 return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1279 umask, NULL));
1283 * Create a vnode (makenode).
1286 vn_createat(
1287 char *pnamep,
1288 enum uio_seg seg,
1289 struct vattr *vap,
1290 enum vcexcl excl,
1291 int mode,
1292 struct vnode **vpp,
1293 enum create why,
1294 int flag,
1295 mode_t umask,
1296 struct vnode *startvp)
1298 struct vnode *dvp; /* ptr to parent dir vnode */
1299 struct vnode *vp = NULL;
1300 struct pathname pn;
1301 int error;
1302 int in_crit = 0;
1303 struct vattr vattr;
1304 enum symfollow follow;
1305 int estale_retry = 0;
1306 uint32_t auditing = AU_AUDITING();
1308 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1310 /* symlink interpretation */
1311 if ((flag & FNOFOLLOW) || excl == EXCL)
1312 follow = NO_FOLLOW;
1313 else
1314 follow = FOLLOW;
1315 flag &= ~(FNOFOLLOW|FNOLINKS);
1317 top:
1319 * Lookup directory.
1320 * If new object is a file, call lower level to create it.
1321 * Note that it is up to the lower level to enforce exclusive
1322 * creation, if the file is already there.
1323 * This allows the lower level to do whatever
1324 * locking or protocol that is needed to prevent races.
1325 * If the new object is directory call lower level to make
1326 * the new directory, with "." and "..".
1328 if (error = pn_get(pnamep, seg, &pn))
1329 return (error);
1330 if (auditing)
1331 audit_vncreate_start();
1332 dvp = NULL;
1333 *vpp = NULL;
1335 * lookup will find the parent directory for the vnode.
1336 * When it is done the pn holds the name of the entry
1337 * in the directory.
1338 * If this is a non-exclusive create we also find the node itself.
1340 error = lookuppnat(&pn, NULL, follow, &dvp,
1341 (excl == EXCL) ? NULLVPP : vpp, startvp);
1342 if (error) {
1343 pn_free(&pn);
1344 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1345 goto top;
1346 if (why == CRMKDIR && error == EINVAL)
1347 error = EEXIST; /* SVID */
1348 return (error);
1351 if (why != CRMKNOD)
1352 vap->va_mode &= ~VSVTX;
1355 * If default ACLs are defined for the directory don't apply the
1356 * umask if umask is passed.
1359 if (umask) {
1361 vsecattr_t vsec;
1363 vsec.vsa_aclcnt = 0;
1364 vsec.vsa_aclentp = NULL;
1365 vsec.vsa_dfaclcnt = 0;
1366 vsec.vsa_dfaclentp = NULL;
1367 vsec.vsa_mask = VSA_DFACLCNT;
1368 error = fop_getsecattr(dvp, &vsec, 0, CRED(), NULL);
1370 * If error is ENOSYS then treat it as no error
1371 * Don't want to force all file systems to support
1372 * aclent_t style of ACL's.
1374 if (error == ENOSYS)
1375 error = 0;
1376 if (error) {
1377 if (*vpp != NULL)
1378 VN_RELE(*vpp);
1379 goto out;
1380 } else {
1382 * Apply the umask if no default ACLs.
1384 if (vsec.vsa_dfaclcnt == 0)
1385 vap->va_mode &= ~umask;
1388 * fop_getsecattr() may have allocated memory for
1389 * ACLs we didn't request, so double-check and
1390 * free it if necessary.
1392 if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1393 kmem_free((caddr_t)vsec.vsa_aclentp,
1394 vsec.vsa_aclcnt * sizeof (aclent_t));
1395 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1396 kmem_free((caddr_t)vsec.vsa_dfaclentp,
1397 vsec.vsa_dfaclcnt * sizeof (aclent_t));
1402 * In general we want to generate EROFS if the file system is
1403 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1404 * documents the open system call, and it says that O_CREAT has no
1405 * effect if the file already exists. Bug 1119649 states
1406 * that open(path, O_CREAT, ...) fails when attempting to open an
1407 * existing file on a read only file system. Thus, the first part
1408 * of the following if statement has 3 checks:
1409 * if the file exists &&
1410 * it is being open with write access &&
1411 * the file system is read only
1412 * then generate EROFS
1414 if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1415 (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1416 if (*vpp)
1417 VN_RELE(*vpp);
1418 error = EROFS;
1419 } else if (excl == NONEXCL && *vpp != NULL) {
1420 vnode_t *rvp;
1423 * File already exists. If a mandatory lock has been
1424 * applied, return error.
1426 vp = *vpp;
1427 if (fop_realvp(vp, &rvp, NULL) != 0)
1428 rvp = vp;
1429 if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1430 nbl_start_crit(vp, RW_READER);
1431 in_crit = 1;
1433 if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1434 vattr.va_mask = AT_MODE|AT_SIZE;
1435 if (error = fop_getattr(vp, &vattr, 0, CRED(), NULL)) {
1436 goto out;
1438 if (MANDLOCK(vp, vattr.va_mode)) {
1439 error = EAGAIN;
1440 goto out;
1443 * File cannot be truncated if non-blocking mandatory
1444 * locks are currently on the file.
1446 if ((vap->va_mask & AT_SIZE) && in_crit) {
1447 uoff_t offset;
1448 ssize_t length;
1450 offset = vap->va_size > vattr.va_size ?
1451 vattr.va_size : vap->va_size;
1452 length = vap->va_size > vattr.va_size ?
1453 vap->va_size - vattr.va_size :
1454 vattr.va_size - vap->va_size;
1455 if (nbl_conflict(vp, NBL_WRITE, offset,
1456 length, 0, NULL)) {
1457 error = EACCES;
1458 goto out;
1464 * If the file is the root of a VFS, we've crossed a
1465 * mount point and the "containing" directory that we
1466 * acquired above (dvp) is irrelevant because it's in
1467 * a different file system. We apply fop_create to the
1468 * target itself instead of to the containing directory
1469 * and supply a null path name to indicate (conventionally)
1470 * the node itself as the "component" of interest.
1472 * The call to fop_create() is necessary to ensure
1473 * that the appropriate permission checks are made,
1474 * i.e. EISDIR, EACCES, etc. We already know that vpp
1475 * exists since we are in the else condition where this
1476 * was checked.
1478 if (vp->v_flag & VROOT) {
1479 ASSERT(why != CRMKDIR);
1480 error = fop_create(vp, "", vap, excl, mode, vpp,
1481 CRED(), flag, NULL, NULL);
1483 * If the create succeeded, it will have created a
1484 * new reference on a new vnode (*vpp) in the child
1485 * file system, so we want to drop our reference on
1486 * the old (vp) upon exit.
1488 goto out;
1492 * Large File API - non-large open (FOFFMAX flag not set)
1493 * of regular file fails if the file size exceeds MAXOFF32_T.
1495 if (why != CRMKDIR &&
1496 !(flag & FOFFMAX) &&
1497 (vp->v_type == VREG)) {
1498 vattr.va_mask = AT_SIZE;
1499 if ((error = fop_getattr(vp, &vattr, 0,
1500 CRED(), NULL))) {
1501 goto out;
1503 if ((vattr.va_size > (uoff_t)MAXOFF32_T)) {
1504 error = EOVERFLOW;
1505 goto out;
1510 if (error == 0) {
1512 * Call mkdir() if specified, otherwise create().
1514 int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */
1516 if (why == CRMKDIR)
1518 * N.B., if vn_createat() ever requests
1519 * case-insensitive behavior then it will need
1520 * to be passed to fop_mkdir(). fop_create()
1521 * will already get it via "flag"
1523 error = fop_mkdir(dvp, pn.pn_path, vap, vpp, CRED(),
1524 NULL, 0, NULL);
1525 else if (!must_be_dir)
1526 error = fop_create(dvp, pn.pn_path, vap,
1527 excl, mode, vpp, CRED(), flag, NULL, NULL);
1528 else
1529 error = ENOTDIR;
1532 out:
1534 if (auditing)
1535 audit_vncreate_finish(*vpp, error);
1536 if (in_crit) {
1537 nbl_end_crit(vp);
1538 in_crit = 0;
1540 if (vp != NULL) {
1541 VN_RELE(vp);
1542 vp = NULL;
1544 pn_free(&pn);
1545 VN_RELE(dvp);
1547 * The following clause was added to handle a problem
1548 * with NFS consistency. It is possible that a lookup
1549 * of the file to be created succeeded, but the file
1550 * itself doesn't actually exist on the server. This
1551 * is chiefly due to the DNLC containing an entry for
1552 * the file which has been removed on the server. In
1553 * this case, we just start over. If there was some
1554 * other cause for the ESTALE error, then the lookup
1555 * of the file will fail and the error will be returned
1556 * above instead of looping around from here.
1558 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1559 goto top;
1560 return (error);
1564 vn_link(char *from, char *to, enum uio_seg seg)
1566 return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1570 vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1571 vnode_t *tstartvp, char *to, enum uio_seg seg)
1573 struct vnode *fvp; /* from vnode ptr */
1574 struct vnode *tdvp; /* to directory vnode ptr */
1575 struct pathname pn;
1576 int error;
1577 struct vattr vattr;
1578 dev_t fsid;
1579 int estale_retry = 0;
1580 uint32_t auditing = AU_AUDITING();
1582 top:
1583 fvp = tdvp = NULL;
1584 if (error = pn_get(to, seg, &pn))
1585 return (error);
1586 if (auditing && fstartvp != NULL)
1587 audit_setfsat_path(1);
1588 if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1589 goto out;
1590 if (auditing && tstartvp != NULL)
1591 audit_setfsat_path(3);
1592 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1593 goto out;
1595 * Make sure both source vnode and target directory vnode are
1596 * in the same vfs and that it is writeable.
1598 vattr.va_mask = AT_FSID;
1599 if (error = fop_getattr(fvp, &vattr, 0, CRED(), NULL))
1600 goto out;
1601 fsid = vattr.va_fsid;
1602 vattr.va_mask = AT_FSID;
1603 if (error = fop_getattr(tdvp, &vattr, 0, CRED(), NULL))
1604 goto out;
1605 if (fsid != vattr.va_fsid) {
1606 error = EXDEV;
1607 goto out;
1609 if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1610 error = EROFS;
1611 goto out;
1614 * Do the link.
1616 (void) pn_fixslash(&pn);
1617 error = fop_link(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1618 out:
1619 pn_free(&pn);
1620 if (fvp)
1621 VN_RELE(fvp);
1622 if (tdvp)
1623 VN_RELE(tdvp);
1624 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1625 goto top;
1626 return (error);
1630 vn_rename(char *from, char *to, enum uio_seg seg)
1632 return (vn_renameat(NULL, from, NULL, to, seg));
1636 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1637 char *tname, enum uio_seg seg)
1639 int error;
1640 struct vattr vattr;
1641 struct pathname fpn; /* from pathname */
1642 struct pathname tpn; /* to pathname */
1643 dev_t fsid;
1644 int in_crit_src, in_crit_targ;
1645 vnode_t *fromvp, *fvp;
1646 vnode_t *tovp, *targvp;
1647 int estale_retry = 0;
1648 uint32_t auditing = AU_AUDITING();
1650 top:
1651 fvp = fromvp = tovp = targvp = NULL;
1652 in_crit_src = in_crit_targ = 0;
1654 * Get to and from pathnames.
1656 if (error = pn_get(fname, seg, &fpn))
1657 return (error);
1658 if (error = pn_get(tname, seg, &tpn)) {
1659 pn_free(&fpn);
1660 return (error);
1664 * First we need to resolve the correct directories
1665 * The passed in directories may only be a starting point,
1666 * but we need the real directories the file(s) live in.
1667 * For example the fname may be something like usr/lib/sparc
1668 * and we were passed in the / directory, but we need to
1669 * use the lib directory for the rename.
1672 if (auditing && fdvp != NULL)
1673 audit_setfsat_path(1);
1675 * Lookup to and from directories.
1677 if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1678 goto out;
1682 * Make sure there is an entry.
1684 if (fvp == NULL) {
1685 error = ENOENT;
1686 goto out;
1689 if (auditing && tdvp != NULL)
1690 audit_setfsat_path(3);
1691 if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1692 goto out;
1696 * Make sure both the from vnode directory and the to directory
1697 * are in the same vfs and the to directory is writable.
1698 * We check fsid's, not vfs pointers, so loopback fs works.
1700 if (fromvp != tovp) {
1701 vattr.va_mask = AT_FSID;
1702 if (error = fop_getattr(fromvp, &vattr, 0, CRED(), NULL))
1703 goto out;
1704 fsid = vattr.va_fsid;
1705 vattr.va_mask = AT_FSID;
1706 if (error = fop_getattr(tovp, &vattr, 0, CRED(), NULL))
1707 goto out;
1708 if (fsid != vattr.va_fsid) {
1709 error = EXDEV;
1710 goto out;
1714 if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1715 error = EROFS;
1716 goto out;
1720 * Make sure "from" vp is not a mount point.
1721 * Note, lookup did traverse() already, so
1722 * we'll be looking at the mounted FS root.
1723 * (but allow files like mnttab)
1725 if ((fvp->v_flag & VROOT) != 0 && fvp->v_type == VDIR) {
1726 error = EBUSY;
1727 goto out;
1730 if (targvp && (fvp != targvp)) {
1731 nbl_start_crit(targvp, RW_READER);
1732 in_crit_targ = 1;
1733 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1734 error = EACCES;
1735 goto out;
1739 if (nbl_need_check(fvp)) {
1740 nbl_start_crit(fvp, RW_READER);
1741 in_crit_src = 1;
1742 if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1743 error = EACCES;
1744 goto out;
1749 * Do the rename.
1751 (void) pn_fixslash(&tpn);
1752 error = fop_rename(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1753 NULL, 0);
1755 out:
1756 pn_free(&fpn);
1757 pn_free(&tpn);
1758 if (in_crit_src)
1759 nbl_end_crit(fvp);
1760 if (in_crit_targ)
1761 nbl_end_crit(targvp);
1762 if (fromvp)
1763 VN_RELE(fromvp);
1764 if (tovp)
1765 VN_RELE(tovp);
1766 if (targvp)
1767 VN_RELE(targvp);
1768 if (fvp)
1769 VN_RELE(fvp);
1770 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1771 goto top;
1772 return (error);
1776 * Remove a file or directory.
1779 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1781 return (vn_removeat(NULL, fnamep, seg, dirflag));
1785 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1787 struct vnode *vp; /* entry vnode */
1788 struct vnode *dvp; /* ptr to parent dir vnode */
1789 struct vnode *coveredvp;
1790 struct pathname pn; /* name of entry */
1791 enum vtype vtype;
1792 int error;
1793 struct vfs *vfsp;
1794 struct vfs *dvfsp; /* ptr to parent dir vfs */
1795 int in_crit = 0;
1796 int estale_retry = 0;
1798 top:
1799 if (error = pn_get(fnamep, seg, &pn))
1800 return (error);
1801 dvp = vp = NULL;
1802 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1803 pn_free(&pn);
1804 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1805 goto top;
1806 return (error);
1810 * Make sure there is an entry.
1812 if (vp == NULL) {
1813 error = ENOENT;
1814 goto out;
1817 vfsp = vp->v_vfsp;
1818 dvfsp = dvp->v_vfsp;
1821 * If the named file is the root of a mounted filesystem, fail,
1822 * unless it's marked unlinkable. In that case, unmount the
1823 * filesystem and proceed to unlink the covered vnode. (If the
1824 * covered vnode is a directory, use rmdir instead of unlink,
1825 * to avoid file system corruption.)
1827 if (vp->v_flag & VROOT) {
1828 if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1829 error = EBUSY;
1830 goto out;
1834 * Namefs specific code starts here.
1837 if (dirflag == RMDIRECTORY) {
1839 * User called rmdir(2) on a file that has
1840 * been namefs mounted on top of. Since
1841 * namefs doesn't allow directories to
1842 * be mounted on other files we know
1843 * vp is not of type VDIR so fail to operation.
1845 error = ENOTDIR;
1846 goto out;
1850 * If VROOT is still set after grabbing vp->v_lock,
1851 * noone has finished nm_unmount so far and coveredvp
1852 * is valid.
1853 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1854 * vp->v_lock, any race window is eliminated.
1857 mutex_enter(&vp->v_lock);
1858 if ((vp->v_flag & VROOT) == 0) {
1859 /* Someone beat us to the unmount */
1860 mutex_exit(&vp->v_lock);
1861 error = EBUSY;
1862 goto out;
1864 vfsp = vp->v_vfsp;
1865 coveredvp = vfsp->vfs_vnodecovered;
1866 ASSERT(coveredvp);
1868 * Note: Implementation of vn_vfswlock shows that ordering of
1869 * v_lock / vn_vfswlock is not an issue here.
1871 error = vn_vfswlock(coveredvp);
1872 mutex_exit(&vp->v_lock);
1874 if (error)
1875 goto out;
1877 VN_HOLD(coveredvp);
1878 VN_RELE(vp);
1879 error = dounmount(vfsp, 0, CRED());
1882 * Unmounted the namefs file system; now get
1883 * the object it was mounted over.
1885 vp = coveredvp;
1887 * If namefs was mounted over a directory, then
1888 * we want to use rmdir() instead of unlink().
1890 if (vp->v_type == VDIR)
1891 dirflag = RMDIRECTORY;
1893 if (error)
1894 goto out;
1898 * Make sure filesystem is writeable.
1899 * We check the parent directory's vfs in case this is an lofs vnode.
1901 if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1902 error = EROFS;
1903 goto out;
1906 vtype = vp->v_type;
1909 * If there is the possibility of an nbmand share reservation, make
1910 * sure it's okay to remove the file. Keep a reference to the
1911 * vnode, so that we can exit the nbl critical region after
1912 * calling fop_remove.
1913 * If there is no possibility of an nbmand share reservation,
1914 * release the vnode reference now. Filesystems like NFS may
1915 * behave differently if there is an extra reference, so get rid of
1916 * this one. Fortunately, we can't have nbmand mounts on NFS
1917 * filesystems.
1919 if (nbl_need_check(vp)) {
1920 nbl_start_crit(vp, RW_READER);
1921 in_crit = 1;
1922 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1923 error = EACCES;
1924 goto out;
1926 } else {
1927 VN_RELE(vp);
1928 vp = NULL;
1931 if (dirflag == RMDIRECTORY) {
1933 * Caller is using rmdir(2), which can only be applied to
1934 * directories.
1936 if (vtype != VDIR) {
1937 error = ENOTDIR;
1938 } else {
1939 vnode_t *cwd;
1940 proc_t *pp = curproc;
1942 mutex_enter(&pp->p_lock);
1943 cwd = PTOU(pp)->u_cdir;
1944 VN_HOLD(cwd);
1945 mutex_exit(&pp->p_lock);
1946 error = fop_rmdir(dvp, pn.pn_path, cwd, CRED(),
1947 NULL, 0);
1948 VN_RELE(cwd);
1950 } else {
1952 * Unlink(2) can be applied to anything.
1954 error = fop_remove(dvp, pn.pn_path, CRED(), NULL, 0);
1957 out:
1958 pn_free(&pn);
1959 if (in_crit) {
1960 nbl_end_crit(vp);
1961 in_crit = 0;
1963 if (vp != NULL)
1964 VN_RELE(vp);
1965 if (dvp != NULL)
1966 VN_RELE(dvp);
1967 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1968 goto top;
1969 return (error);
1973 * Utility function to compare equality of vnodes.
1974 * Compare the underlying real vnodes, if there are underlying vnodes.
1975 * This is a more thorough comparison than the VN_CMP() macro provides.
1978 vn_compare(vnode_t *vp1, vnode_t *vp2)
1980 vnode_t *realvp;
1982 if (vp1 != NULL && fop_realvp(vp1, &realvp, NULL) == 0)
1983 vp1 = realvp;
1984 if (vp2 != NULL && fop_realvp(vp2, &realvp, NULL) == 0)
1985 vp2 = realvp;
1986 return (VN_CMP(vp1, vp2));
1990 * The number of locks to hash into. This value must be a power
1991 * of 2 minus 1 and should probably also be prime.
1993 #define NUM_BUCKETS 1023
1995 struct vn_vfslocks_bucket {
1996 kmutex_t vb_lock;
1997 vn_vfslocks_entry_t *vb_list;
1998 char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
2002 * Total number of buckets will be NUM_BUCKETS + 1 .
2005 #pragma align 64(vn_vfslocks_buckets)
2006 static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1];
2008 #define VN_VFSLOCKS_SHIFT 9
2010 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2011 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2014 * vn_vfslocks_getlock() uses an HASH scheme to generate
2015 * rwstlock using vfs/vnode pointer passed to it.
2017 * vn_vfslocks_rele() releases a reference in the
2018 * HASH table which allows the entry allocated by
2019 * vn_vfslocks_getlock() to be freed at a later
2020 * stage when the refcount drops to zero.
2023 vn_vfslocks_entry_t *
2024 vn_vfslocks_getlock(void *vfsvpptr)
2026 struct vn_vfslocks_bucket *bp;
2027 vn_vfslocks_entry_t *vep;
2028 vn_vfslocks_entry_t *tvep;
2030 ASSERT(vfsvpptr != NULL);
2031 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2033 mutex_enter(&bp->vb_lock);
2034 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2035 if (vep->ve_vpvfs == vfsvpptr) {
2036 vep->ve_refcnt++;
2037 mutex_exit(&bp->vb_lock);
2038 return (vep);
2041 mutex_exit(&bp->vb_lock);
2042 vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2043 rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2044 vep->ve_vpvfs = (char *)vfsvpptr;
2045 vep->ve_refcnt = 1;
2046 mutex_enter(&bp->vb_lock);
2047 for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2048 if (tvep->ve_vpvfs == vfsvpptr) {
2049 tvep->ve_refcnt++;
2050 mutex_exit(&bp->vb_lock);
2053 * There is already an entry in the hash
2054 * destroy what we just allocated.
2056 rwst_destroy(&vep->ve_lock);
2057 kmem_free(vep, sizeof (*vep));
2058 return (tvep);
2061 vep->ve_next = bp->vb_list;
2062 bp->vb_list = vep;
2063 mutex_exit(&bp->vb_lock);
2064 return (vep);
2067 void
2068 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2070 struct vn_vfslocks_bucket *bp;
2071 vn_vfslocks_entry_t *vep;
2072 vn_vfslocks_entry_t *pvep;
2074 ASSERT(vepent != NULL);
2075 ASSERT(vepent->ve_vpvfs != NULL);
2077 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2079 mutex_enter(&bp->vb_lock);
2080 vepent->ve_refcnt--;
2082 if ((int32_t)vepent->ve_refcnt < 0)
2083 cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2085 if (vepent->ve_refcnt == 0) {
2086 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2087 if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2088 if (bp->vb_list == vep)
2089 bp->vb_list = vep->ve_next;
2090 else {
2091 /* LINTED */
2092 pvep->ve_next = vep->ve_next;
2094 mutex_exit(&bp->vb_lock);
2095 rwst_destroy(&vep->ve_lock);
2096 kmem_free(vep, sizeof (*vep));
2097 return;
2099 pvep = vep;
2101 cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2103 mutex_exit(&bp->vb_lock);
2107 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2108 * lock protecting the v_vfsmountedhere field.
2109 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2110 * except that it blocks to acquire the lock VVFSLOCK.
2112 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2113 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2114 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2117 vn_vfswlock_wait(vnode_t *vp)
2119 int retval;
2120 vn_vfslocks_entry_t *vpvfsentry;
2121 ASSERT(vp != NULL);
2123 vpvfsentry = vn_vfslocks_getlock(vp);
2124 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2126 if (retval == EINTR) {
2127 vn_vfslocks_rele(vpvfsentry);
2128 return (EINTR);
2130 return (retval);
2134 vn_vfsrlock_wait(vnode_t *vp)
2136 int retval;
2137 vn_vfslocks_entry_t *vpvfsentry;
2138 ASSERT(vp != NULL);
2140 vpvfsentry = vn_vfslocks_getlock(vp);
2141 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2143 if (retval == EINTR) {
2144 vn_vfslocks_rele(vpvfsentry);
2145 return (EINTR);
2148 return (retval);
2153 * vn_vfswlock is used to implement a lock which is logically a writers lock
2154 * protecting the v_vfsmountedhere field.
2157 vn_vfswlock(vnode_t *vp)
2159 vn_vfslocks_entry_t *vpvfsentry;
2162 * If vp is NULL then somebody is trying to lock the covered vnode
2163 * of /. (vfs_vnodecovered is NULL for /). This situation will
2164 * only happen when unmounting /. Since that operation will fail
2165 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2167 if (vp == NULL)
2168 return (EBUSY);
2170 vpvfsentry = vn_vfslocks_getlock(vp);
2172 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2173 return (0);
2175 vn_vfslocks_rele(vpvfsentry);
2176 return (EBUSY);
2180 vn_vfsrlock(vnode_t *vp)
2182 vn_vfslocks_entry_t *vpvfsentry;
2185 * If vp is NULL then somebody is trying to lock the covered vnode
2186 * of /. (vfs_vnodecovered is NULL for /). This situation will
2187 * only happen when unmounting /. Since that operation will fail
2188 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2190 if (vp == NULL)
2191 return (EBUSY);
2193 vpvfsentry = vn_vfslocks_getlock(vp);
2195 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2196 return (0);
2198 vn_vfslocks_rele(vpvfsentry);
2199 return (EBUSY);
2202 void
2203 vn_vfsunlock(vnode_t *vp)
2205 vn_vfslocks_entry_t *vpvfsentry;
2208 * ve_refcnt needs to be decremented twice.
2209 * 1. To release refernce after a call to vn_vfslocks_getlock()
2210 * 2. To release the reference from the locking routines like
2211 * vn_vfsrlock/vn_vfswlock etc,.
2213 vpvfsentry = vn_vfslocks_getlock(vp);
2214 vn_vfslocks_rele(vpvfsentry);
2216 rwst_exit(&vpvfsentry->ve_lock);
2217 vn_vfslocks_rele(vpvfsentry);
2221 vn_vfswlock_held(vnode_t *vp)
2223 int held;
2224 vn_vfslocks_entry_t *vpvfsentry;
2226 ASSERT(vp != NULL);
2228 vpvfsentry = vn_vfslocks_getlock(vp);
2229 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2231 vn_vfslocks_rele(vpvfsentry);
2232 return (held);
2237 vn_make_ops(
2238 const char *name, /* Name of file system */
2239 const fs_operation_def_t *templ, /* Operation specification */
2240 vnodeops_t **actual) /* Return the vnodeops */
2242 int unused_ops;
2243 int error;
2245 *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2247 (*actual)->vnop_name = name;
2249 error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2250 if (error) {
2251 kmem_free(*actual, sizeof (vnodeops_t));
2254 #if DEBUG
2255 if (unused_ops != 0)
2256 cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2257 "but not used", name, unused_ops);
2258 #endif
2260 return (error);
2264 * Free the vnodeops created as a result of vn_make_ops()
2266 void
2267 vn_freevnodeops(vnodeops_t *vnops)
2269 kmem_free(vnops, sizeof (vnodeops_t));
2273 * Vnode cache.
2276 /* ARGSUSED */
2277 static int
2278 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2280 struct vnode *vp;
2282 vp = buf;
2284 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2285 mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2286 cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2287 rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2288 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2289 vp->v_path = NULL;
2290 vp->v_mpssdata = NULL;
2291 vp->v_vsd = NULL;
2292 vp->v_fopdata = NULL;
2294 pagecache_init(vp);
2296 return (0);
2299 /* ARGSUSED */
2300 static void
2301 vn_cache_destructor(void *buf, void *cdrarg)
2303 struct vnode *vp;
2305 vp = buf;
2307 pagecache_fini(vp);
2309 rw_destroy(&vp->v_nbllock);
2310 cv_destroy(&vp->v_cv);
2311 mutex_destroy(&vp->v_vsd_lock);
2312 mutex_destroy(&vp->v_lock);
2315 void
2316 vn_create_cache(void)
2318 /* LINTED */
2319 ASSERT((1 << VNODE_ALIGN_LOG2) ==
2320 P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2321 vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2322 VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2323 NULL, 0);
2326 void
2327 vn_destroy_cache(void)
2329 kmem_cache_destroy(vn_cache);
2333 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2334 * cached by the file system and vnodes remain associated.
2336 void
2337 vn_recycle(vnode_t *vp)
2339 ASSERT(!vn_has_cached_data(vp));
2342 * XXX - This really belongs in vn_reinit(), but we have some issues
2343 * with the counts. Best to have it here for clean initialization.
2345 vp->v_rdcnt = 0;
2346 vp->v_wrcnt = 0;
2347 vp->v_mmap_read = 0;
2348 vp->v_mmap_write = 0;
2351 * If FEM was in use, make sure everything gets cleaned up
2352 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2353 * constructor.
2355 if (vp->v_femhead) {
2356 /* XXX - There should be a free_femhead() that does all this */
2357 ASSERT(vp->v_femhead->femh_list == NULL);
2358 mutex_destroy(&vp->v_femhead->femh_lock);
2359 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2360 vp->v_femhead = NULL;
2362 if (vp->v_path) {
2363 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2364 vp->v_path = NULL;
2367 if (vp->v_fopdata != NULL) {
2368 free_fopdata(vp);
2370 vp->v_mpssdata = NULL;
2371 vsd_free(vp);
2375 * Used to reset the vnode fields including those that are directly accessible
2376 * as well as those which require an accessor function.
2378 * Does not initialize:
2379 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2380 * v_data (since FS-nodes and vnodes point to each other and should
2381 * be updated simultaneously)
2382 * v_op (in case someone needs to make a VOP call on this object)
2384 void
2385 vn_reinit(vnode_t *vp)
2387 vp->v_count = 1;
2388 vp->v_count_dnlc = 0;
2389 vp->v_vfsp = NULL;
2390 vp->v_stream = NULL;
2391 vp->v_vfsmountedhere = NULL;
2392 vp->v_flag = 0;
2393 vp->v_type = VNON;
2394 vp->v_rdev = NODEV;
2396 vp->v_filocks = NULL;
2397 vp->v_shrlocks = NULL;
2398 VERIFY(!vn_has_cached_data(vp));
2400 vp->v_locality = NULL;
2401 vp->v_xattrdir = NULL;
2403 /* Handles v_femhead, v_path, and the r/w/map counts */
2404 vn_recycle(vp);
2407 vnode_t *
2408 vn_alloc(int kmflag)
2410 vnode_t *vp;
2412 vp = kmem_cache_alloc(vn_cache, kmflag);
2414 if (vp != NULL) {
2415 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2416 vp->v_fopdata = NULL;
2417 vn_reinit(vp);
2420 return (vp);
2423 void
2424 vn_free(vnode_t *vp)
2426 ASSERT(vp->v_shrlocks == NULL);
2427 ASSERT(vp->v_filocks == NULL);
2430 * Some file systems call vn_free() with v_count of zero,
2431 * some with v_count of 1. In any case, the value should
2432 * never be anything else.
2434 ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2435 ASSERT(vp->v_count_dnlc == 0);
2436 if (vp->v_path != NULL) {
2437 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2438 vp->v_path = NULL;
2441 /* If FEM was in use, make sure everything gets cleaned up */
2442 if (vp->v_femhead) {
2443 /* XXX - There should be a free_femhead() that does all this */
2444 ASSERT(vp->v_femhead->femh_list == NULL);
2445 mutex_destroy(&vp->v_femhead->femh_lock);
2446 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2447 vp->v_femhead = NULL;
2450 if (vp->v_fopdata != NULL) {
2451 free_fopdata(vp);
2453 vp->v_mpssdata = NULL;
2454 vsd_free(vp);
2455 kmem_cache_free(vn_cache, vp);
2459 * vnode status changes, should define better states than 1, 0.
2461 void
2462 vn_reclaim(vnode_t *vp)
2464 vfs_t *vfsp = vp->v_vfsp;
2466 if (vfsp == NULL ||
2467 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2468 return;
2470 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2473 void
2474 vn_idle(vnode_t *vp)
2476 vfs_t *vfsp = vp->v_vfsp;
2478 if (vfsp == NULL ||
2479 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2480 return;
2482 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2484 void
2485 vn_exists(vnode_t *vp)
2487 vfs_t *vfsp = vp->v_vfsp;
2489 if (vfsp == NULL ||
2490 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2491 return;
2493 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2496 void
2497 vn_invalid(vnode_t *vp)
2499 vfs_t *vfsp = vp->v_vfsp;
2501 if (vfsp == NULL ||
2502 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2503 return;
2505 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2508 /* Vnode event notification */
2511 vnevent_support(vnode_t *vp, caller_context_t *ct)
2513 if (vp == NULL)
2514 return (EINVAL);
2516 return (fop_vnevent(vp, VE_SUPPORT, NULL, NULL, ct));
2519 void
2520 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2522 if (vp == NULL || vp->v_femhead == NULL) {
2523 return;
2525 (void) fop_vnevent(vp, VE_RENAME_SRC, dvp, name, ct);
2528 void
2529 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2530 caller_context_t *ct)
2532 if (vp == NULL || vp->v_femhead == NULL) {
2533 return;
2535 (void) fop_vnevent(vp, VE_RENAME_DEST, dvp, name, ct);
2538 void
2539 vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2541 if (vp == NULL || vp->v_femhead == NULL) {
2542 return;
2544 (void) fop_vnevent(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2547 void
2548 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2550 if (vp == NULL || vp->v_femhead == NULL) {
2551 return;
2553 (void) fop_vnevent(vp, VE_REMOVE, dvp, name, ct);
2556 void
2557 vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2559 if (vp == NULL || vp->v_femhead == NULL) {
2560 return;
2562 (void) fop_vnevent(vp, VE_RMDIR, dvp, name, ct);
2565 void
2566 vnevent_pre_rename_src(vnode_t *vp, vnode_t *dvp, char *name,
2567 caller_context_t *ct)
2569 if (vp == NULL || vp->v_femhead == NULL) {
2570 return;
2572 (void) fop_vnevent(vp, VE_PRE_RENAME_SRC, dvp, name, ct);
2575 void
2576 vnevent_pre_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2577 caller_context_t *ct)
2579 if (vp == NULL || vp->v_femhead == NULL) {
2580 return;
2582 (void) fop_vnevent(vp, VE_PRE_RENAME_DEST, dvp, name, ct);
2585 void
2586 vnevent_pre_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
2587 caller_context_t *ct)
2589 if (vp == NULL || vp->v_femhead == NULL) {
2590 return;
2592 (void) fop_vnevent(vp, VE_PRE_RENAME_DEST_DIR, nvp, name, ct);
2595 void
2596 vnevent_create(vnode_t *vp, caller_context_t *ct)
2598 if (vp == NULL || vp->v_femhead == NULL) {
2599 return;
2601 (void) fop_vnevent(vp, VE_CREATE, NULL, NULL, ct);
2604 void
2605 vnevent_link(vnode_t *vp, caller_context_t *ct)
2607 if (vp == NULL || vp->v_femhead == NULL) {
2608 return;
2610 (void) fop_vnevent(vp, VE_LINK, NULL, NULL, ct);
2613 void
2614 vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2616 if (vp == NULL || vp->v_femhead == NULL) {
2617 return;
2619 (void) fop_vnevent(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2622 void
2623 vnevent_truncate(vnode_t *vp, caller_context_t *ct)
2625 if (vp == NULL || vp->v_femhead == NULL) {
2626 return;
2628 (void) fop_vnevent(vp, VE_TRUNCATE, NULL, NULL, ct);
2632 * Vnode accessors.
2636 vn_is_readonly(vnode_t *vp)
2638 return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2642 vn_has_flocks(vnode_t *vp)
2644 return (vp->v_filocks != NULL);
2648 vn_has_mandatory_locks(vnode_t *vp, int mode)
2650 return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2654 vn_has_cached_data(vnode_t *vp)
2656 return (!list_is_empty(&vp->v_pagecache_list));
2660 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2661 * zone_enter(2).
2664 vn_can_change_zones(vnode_t *vp)
2666 struct vfssw *vswp;
2667 int allow = 1;
2668 vnode_t *rvp;
2670 if (nfs_global_client_only != 0)
2671 return (1);
2674 * We always want to look at the underlying vnode if there is one.
2676 if (fop_realvp(vp, &rvp, NULL) != 0)
2677 rvp = vp;
2679 * Some pseudo filesystems (including doorfs) don't actually register
2680 * their vfsops_t, so the following may return NULL; we happily let
2681 * such vnodes switch zones.
2683 vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2684 if (vswp != NULL) {
2685 if (vswp->vsw_flag & VSW_NOTZONESAFE)
2686 allow = 0;
2687 vfs_unrefvfssw(vswp);
2689 return (allow);
2693 * Return nonzero if the vnode is a mount point, zero if not.
2696 vn_ismntpt(vnode_t *vp)
2698 return (vp->v_vfsmountedhere != NULL);
2701 /* Retrieve the vfs (if any) mounted on this vnode */
2702 vfs_t *
2703 vn_mountedvfs(vnode_t *vp)
2705 return (vp->v_vfsmountedhere);
2709 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2712 vn_in_dnlc(vnode_t *vp)
2714 return (vp->v_count_dnlc > 0);
2718 * vn_has_other_opens() checks whether a particular file is opened by more than
2719 * just the caller and whether the open is for read and/or write.
2720 * This routine is for calling after the caller has already called fop_open()
2721 * and the caller wishes to know if they are the only one with it open for
2722 * the mode(s) specified.
2724 * Vnode counts are only kept on regular files (v_type=VREG).
2727 vn_has_other_opens(
2728 vnode_t *vp,
2729 v_mode_t mode)
2732 ASSERT(vp != NULL);
2734 switch (mode) {
2735 case V_WRITE:
2736 if (vp->v_wrcnt > 1)
2737 return (V_TRUE);
2738 break;
2739 case V_RDORWR:
2740 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2741 return (V_TRUE);
2742 break;
2743 case V_RDANDWR:
2744 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2745 return (V_TRUE);
2746 break;
2747 case V_READ:
2748 if (vp->v_rdcnt > 1)
2749 return (V_TRUE);
2750 break;
2753 return (V_FALSE);
2757 * vn_is_opened() checks whether a particular file is opened and
2758 * whether the open is for read and/or write.
2760 * Vnode counts are only kept on regular files (v_type=VREG).
2763 vn_is_opened(
2764 vnode_t *vp,
2765 v_mode_t mode)
2768 ASSERT(vp != NULL);
2770 switch (mode) {
2771 case V_WRITE:
2772 if (vp->v_wrcnt)
2773 return (V_TRUE);
2774 break;
2775 case V_RDANDWR:
2776 if (vp->v_rdcnt && vp->v_wrcnt)
2777 return (V_TRUE);
2778 break;
2779 case V_RDORWR:
2780 if (vp->v_rdcnt || vp->v_wrcnt)
2781 return (V_TRUE);
2782 break;
2783 case V_READ:
2784 if (vp->v_rdcnt)
2785 return (V_TRUE);
2786 break;
2789 return (V_FALSE);
2793 * vn_is_mapped() checks whether a particular file is mapped and whether
2794 * the file is mapped read and/or write.
2797 vn_is_mapped(
2798 vnode_t *vp,
2799 v_mode_t mode)
2802 ASSERT(vp != NULL);
2804 #if !defined(_LP64)
2805 switch (mode) {
2807 * The atomic_add_64_nv functions force atomicity in the
2808 * case of 32 bit architectures. Otherwise the 64 bit values
2809 * require two fetches. The value of the fields may be
2810 * (potentially) changed between the first fetch and the
2811 * second
2813 case V_WRITE:
2814 if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2815 return (V_TRUE);
2816 break;
2817 case V_RDANDWR:
2818 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2819 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2820 return (V_TRUE);
2821 break;
2822 case V_RDORWR:
2823 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2824 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2825 return (V_TRUE);
2826 break;
2827 case V_READ:
2828 if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2829 return (V_TRUE);
2830 break;
2832 #else
2833 switch (mode) {
2834 case V_WRITE:
2835 if (vp->v_mmap_write)
2836 return (V_TRUE);
2837 break;
2838 case V_RDANDWR:
2839 if (vp->v_mmap_read && vp->v_mmap_write)
2840 return (V_TRUE);
2841 break;
2842 case V_RDORWR:
2843 if (vp->v_mmap_read || vp->v_mmap_write)
2844 return (V_TRUE);
2845 break;
2846 case V_READ:
2847 if (vp->v_mmap_read)
2848 return (V_TRUE);
2849 break;
2851 #endif
2853 return (V_FALSE);
2857 * Set the operations vector for a vnode.
2859 * FEM ensures that the v_femhead pointer is filled in before the
2860 * v_op pointer is changed. This means that if the v_femhead pointer
2861 * is NULL, and the v_op field hasn't changed since before which checked
2862 * the v_femhead pointer; then our update is ok - we are not racing with
2863 * FEM.
2865 void
2866 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2868 vnodeops_t *op;
2870 ASSERT(vp != NULL);
2871 ASSERT(vnodeops != NULL);
2873 op = vp->v_op;
2874 membar_consumer();
2876 * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2877 * the compare-and-swap on vp->v_op. If either fails, then FEM is
2878 * in effect on the vnode and we need to have FEM deal with it.
2880 if (vp->v_femhead != NULL || atomic_cas_ptr(&vp->v_op, op, vnodeops) !=
2881 op) {
2882 fem_setvnops(vp, vnodeops);
2887 * Retrieve the operations vector for a vnode
2888 * As with vn_setops(above); make sure we aren't racing with FEM.
2889 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2890 * make sense to the callers of this routine.
2892 vnodeops_t *
2893 vn_getops(vnode_t *vp)
2895 vnodeops_t *op;
2897 ASSERT(vp != NULL);
2899 op = vp->v_op;
2900 membar_consumer();
2901 if (vp->v_femhead == NULL && op == vp->v_op) {
2902 return (op);
2903 } else {
2904 return (fem_getvnops(vp));
2909 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2910 * Returns zero (0) if not.
2913 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2915 return (vn_getops(vp) == vnodeops);
2919 * Returns non-zero (1) if the specified operation matches the
2920 * corresponding operation for that the vnode.
2921 * Returns zero (0) if not.
2924 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2927 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2929 const fs_operation_trans_def_t *otdp;
2930 fs_generic_func_p *loc = NULL;
2931 vnodeops_t *vop = vn_getops(vp);
2933 ASSERT(vopname != NULL);
2935 for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2936 if (MATCHNAME(otdp->name, vopname)) {
2937 loc = (fs_generic_func_p *)
2938 ((char *)(vop) + otdp->offset);
2939 break;
2943 return ((loc != NULL) && (*loc == funcp));
2947 * fs_new_caller_id() needs to return a unique ID on a given local system.
2948 * The IDs do not need to survive across reboots. These are primarily
2949 * used so that (FEM) monitors can detect particular callers (such as
2950 * the NFS server) to a given vnode/vfs operation.
2952 u_longlong_t
2953 fs_new_caller_id()
2955 static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2957 return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
2961 * Given a starting vnode and a path, updates the path in the target vnode in
2962 * a safe manner. If the vnode already has path information embedded, then the
2963 * cached path is left untouched.
2966 size_t max_vnode_path = 4 * MAXPATHLEN;
2968 void
2969 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2970 const char *path, size_t plen)
2972 char *rpath;
2973 vnode_t *base;
2974 size_t rpathlen, rpathalloc;
2975 int doslash = 1;
2977 if (*path == '/') {
2978 base = rootvp;
2979 path++;
2980 plen--;
2981 } else {
2982 base = startvp;
2986 * We cannot grab base->v_lock while we hold vp->v_lock because of
2987 * the potential for deadlock.
2989 mutex_enter(&base->v_lock);
2990 if (base->v_path == NULL) {
2991 mutex_exit(&base->v_lock);
2992 return;
2995 rpathlen = strlen(base->v_path);
2996 rpathalloc = rpathlen + plen + 1;
2997 /* Avoid adding a slash if there's already one there */
2998 if (base->v_path[rpathlen-1] == '/')
2999 doslash = 0;
3000 else
3001 rpathalloc++;
3004 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
3005 * so we must do this dance. If, by chance, something changes the path,
3006 * just give up since there is no real harm.
3008 mutex_exit(&base->v_lock);
3010 /* Paths should stay within reason */
3011 if (rpathalloc > max_vnode_path)
3012 return;
3014 rpath = kmem_alloc(rpathalloc, KM_SLEEP);
3016 mutex_enter(&base->v_lock);
3017 if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
3018 mutex_exit(&base->v_lock);
3019 kmem_free(rpath, rpathalloc);
3020 return;
3022 bcopy(base->v_path, rpath, rpathlen);
3023 mutex_exit(&base->v_lock);
3025 if (doslash)
3026 rpath[rpathlen++] = '/';
3027 bcopy(path, rpath + rpathlen, plen);
3028 rpath[rpathlen + plen] = '\0';
3030 mutex_enter(&vp->v_lock);
3031 if (vp->v_path != NULL) {
3032 mutex_exit(&vp->v_lock);
3033 kmem_free(rpath, rpathalloc);
3034 } else {
3035 vp->v_path = rpath;
3036 mutex_exit(&vp->v_lock);
3041 * Sets the path to the vnode to be the given string, regardless of current
3042 * context. The string must be a complete path from rootdir. This is only used
3043 * by fsop_root() for setting the path based on the mountpoint.
3045 void
3046 vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3048 char *buf = kmem_alloc(len + 1, KM_SLEEP);
3050 mutex_enter(&vp->v_lock);
3051 if (vp->v_path != NULL) {
3052 mutex_exit(&vp->v_lock);
3053 kmem_free(buf, len + 1);
3054 return;
3057 vp->v_path = buf;
3058 bcopy(str, vp->v_path, len);
3059 vp->v_path[len] = '\0';
3061 mutex_exit(&vp->v_lock);
3065 * Called from within filesystem's vop_rename() to handle renames once the
3066 * target vnode is available.
3068 void
3069 vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3071 char *tmp;
3073 mutex_enter(&vp->v_lock);
3074 tmp = vp->v_path;
3075 vp->v_path = NULL;
3076 mutex_exit(&vp->v_lock);
3077 vn_setpath(rootdir, dvp, vp, nm, len);
3078 if (tmp != NULL)
3079 kmem_free(tmp, strlen(tmp) + 1);
3083 * Similar to vn_setpath_str(), this function sets the path of the destination
3084 * vnode to the be the same as the source vnode.
3086 void
3087 vn_copypath(struct vnode *src, struct vnode *dst)
3089 char *buf;
3090 int alloc;
3092 mutex_enter(&src->v_lock);
3093 if (src->v_path == NULL) {
3094 mutex_exit(&src->v_lock);
3095 return;
3097 alloc = strlen(src->v_path) + 1;
3099 /* avoid kmem_alloc() with lock held */
3100 mutex_exit(&src->v_lock);
3101 buf = kmem_alloc(alloc, KM_SLEEP);
3102 mutex_enter(&src->v_lock);
3103 if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3104 mutex_exit(&src->v_lock);
3105 kmem_free(buf, alloc);
3106 return;
3108 bcopy(src->v_path, buf, alloc);
3109 mutex_exit(&src->v_lock);
3111 mutex_enter(&dst->v_lock);
3112 if (dst->v_path != NULL) {
3113 mutex_exit(&dst->v_lock);
3114 kmem_free(buf, alloc);
3115 return;
3117 dst->v_path = buf;
3118 mutex_exit(&dst->v_lock);
3122 * XXX Private interface for segvn routines that handle vnode
3123 * large page segments.
3125 * return 1 if vp's file system fop_pageio() implementation
3126 * can be safely used instead of fop_getpage() for handling
3127 * pagefaults against regular non swap files. fop_pageio()
3128 * interface is considered safe here if its implementation
3129 * is very close to fop_getpage() implementation.
3130 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3131 * panic if there're file holes but instead returns an error.
3132 * Doesn't assume file won't be changed by user writes, etc.
3134 * return 0 otherwise.
3136 * For now allow segvn to only use fop_pageio() with ufs and nfs.
3139 vn_vmpss_usepageio(vnode_t *vp)
3141 vfs_t *vfsp = vp->v_vfsp;
3142 char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3143 char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3144 char **fsok = pageio_ok_fss;
3146 if (fsname == NULL) {
3147 return (0);
3150 for (; *fsok; fsok++) {
3151 if (strcmp(*fsok, fsname) == 0) {
3152 return (1);
3155 return (0);
3158 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3161 fop_open(
3162 vnode_t **vpp,
3163 int mode,
3164 cred_t *cr,
3165 caller_context_t *ct)
3167 int ret;
3168 vnode_t *vp = *vpp;
3170 VN_HOLD(vp);
3172 * Adding to the vnode counts before calling open
3173 * avoids the need for a mutex. It circumvents a race
3174 * condition where a query made on the vnode counts results in a
3175 * false negative. The inquirer goes away believing the file is
3176 * not open when there is an open on the file already under way.
3178 * The counts are meant to prevent NFS from granting a delegation
3179 * when it would be dangerous to do so.
3181 * The vnode counts are only kept on regular files
3183 if ((*vpp)->v_type == VREG) {
3184 if (mode & FREAD)
3185 atomic_inc_32(&(*vpp)->v_rdcnt);
3186 if (mode & FWRITE)
3187 atomic_inc_32(&(*vpp)->v_wrcnt);
3190 VOPXID_MAP_CR(vp, cr);
3192 if ((*vpp)->v_op->vop_open == NULL)
3193 ret = ENOSYS;
3194 else
3195 ret = (*vpp)->v_op->vop_open(vpp, mode, cr, ct);
3197 if (ret) {
3199 * Use the saved vp just in case the vnode ptr got trashed
3200 * by the error.
3202 VOPSTATS_UPDATE(vp, open);
3203 if ((vp->v_type == VREG) && (mode & FREAD))
3204 atomic_dec_32(&vp->v_rdcnt);
3205 if ((vp->v_type == VREG) && (mode & FWRITE))
3206 atomic_dec_32(&vp->v_wrcnt);
3207 } else {
3209 * Some filesystems will return a different vnode,
3210 * but the same path was still used to open it.
3211 * So if we do change the vnode and need to
3212 * copy over the path, do so here, rather than special
3213 * casing each filesystem. Adjust the vnode counts to
3214 * reflect the vnode switch.
3216 VOPSTATS_UPDATE(*vpp, open);
3217 if (*vpp != vp && *vpp != NULL) {
3218 vn_copypath(vp, *vpp);
3219 if (((*vpp)->v_type == VREG) && (mode & FREAD))
3220 atomic_inc_32(&(*vpp)->v_rdcnt);
3221 if ((vp->v_type == VREG) && (mode & FREAD))
3222 atomic_dec_32(&vp->v_rdcnt);
3223 if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3224 atomic_inc_32(&(*vpp)->v_wrcnt);
3225 if ((vp->v_type == VREG) && (mode & FWRITE))
3226 atomic_dec_32(&vp->v_wrcnt);
3229 VN_RELE(vp);
3230 return (ret);
3234 fop_close(
3235 vnode_t *vp,
3236 int flag,
3237 int count,
3238 offset_t offset,
3239 cred_t *cr,
3240 caller_context_t *ct)
3242 int err;
3244 VOPXID_MAP_CR(vp, cr);
3246 if (vp->v_op->vop_close == NULL)
3247 err = ENOSYS;
3248 else
3249 err = vp->v_op->vop_close(vp, flag, count, offset, cr, ct);
3251 VOPSTATS_UPDATE(vp, close);
3253 * Check passed in count to handle possible dups. Vnode counts are only
3254 * kept on regular files
3256 if ((vp->v_type == VREG) && (count == 1)) {
3257 if (flag & FREAD) {
3258 ASSERT(vp->v_rdcnt > 0);
3259 atomic_dec_32(&vp->v_rdcnt);
3261 if (flag & FWRITE) {
3262 ASSERT(vp->v_wrcnt > 0);
3263 atomic_dec_32(&vp->v_wrcnt);
3266 return (err);
3270 fop_read(
3271 vnode_t *vp,
3272 uio_t *uiop,
3273 int ioflag,
3274 cred_t *cr,
3275 caller_context_t *ct)
3277 int err;
3278 ssize_t resid_start = uiop->uio_resid;
3280 VOPXID_MAP_CR(vp, cr);
3282 if (vp->v_op->vop_read == NULL)
3283 err = ENOSYS;
3284 else
3285 err = vp->v_op->vop_read(vp, uiop, ioflag, cr, ct);
3287 VOPSTATS_UPDATE_IO(vp, read,
3288 read_bytes, (resid_start - uiop->uio_resid));
3289 return (err);
3293 fop_write(
3294 vnode_t *vp,
3295 uio_t *uiop,
3296 int ioflag,
3297 cred_t *cr,
3298 caller_context_t *ct)
3300 int err;
3301 ssize_t resid_start = uiop->uio_resid;
3303 VOPXID_MAP_CR(vp, cr);
3305 if (vp->v_op->vop_write == NULL)
3306 err = ENOSYS;
3307 else
3308 err = vp->v_op->vop_write(vp, uiop, ioflag, cr, ct);
3310 VOPSTATS_UPDATE_IO(vp, write,
3311 write_bytes, (resid_start - uiop->uio_resid));
3312 return (err);
3316 fop_ioctl(
3317 vnode_t *vp,
3318 int cmd,
3319 intptr_t arg,
3320 int flag,
3321 cred_t *cr,
3322 int *rvalp,
3323 caller_context_t *ct)
3325 int err;
3327 VOPXID_MAP_CR(vp, cr);
3329 if (vp->v_op->vop_ioctl == NULL)
3330 err = ENOSYS;
3331 else
3332 err = vp->v_op->vop_ioctl(vp, cmd, arg, flag, cr, rvalp, ct);
3334 VOPSTATS_UPDATE(vp, ioctl);
3335 return (err);
3339 fop_setfl(
3340 vnode_t *vp,
3341 int oflags,
3342 int nflags,
3343 cred_t *cr,
3344 caller_context_t *ct)
3346 int err;
3348 VOPXID_MAP_CR(vp, cr);
3350 if (vp->v_op->vop_setfl == NULL)
3351 err = fs_setfl(vp, oflags, nflags, cr, ct);
3352 else
3353 err = vp->v_op->vop_setfl(vp, oflags, nflags, cr, ct);
3355 VOPSTATS_UPDATE(vp, setfl);
3356 return (err);
3360 fop_getattr(
3361 vnode_t *vp,
3362 vattr_t *vap,
3363 int flags,
3364 cred_t *cr,
3365 caller_context_t *ct)
3367 int err;
3369 VOPXID_MAP_CR(vp, cr);
3372 * If this file system doesn't understand the xvattr extensions
3373 * then turn off the xvattr bit.
3375 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3376 vap->va_mask &= ~AT_XVATTR;
3380 * We're only allowed to skip the ACL check iff we used a 32 bit
3381 * ACE mask with fop_access() to determine permissions.
3383 if ((flags & ATTR_NOACLCHECK) &&
3384 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0)
3385 return (EINVAL);
3387 if (vp->v_op->vop_getattr == NULL)
3388 err = ENOSYS;
3389 else
3390 err = vp->v_op->vop_getattr(vp, vap, flags, cr, ct);
3392 VOPSTATS_UPDATE(vp, getattr);
3393 return (err);
3397 fop_setattr(
3398 vnode_t *vp,
3399 vattr_t *vap,
3400 int flags,
3401 cred_t *cr,
3402 caller_context_t *ct)
3404 int err;
3406 VOPXID_MAP_CR(vp, cr);
3409 * If this file system doesn't understand the xvattr extensions
3410 * then turn off the xvattr bit.
3412 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3413 vap->va_mask &= ~AT_XVATTR;
3417 * We're only allowed to skip the ACL check iff we used a 32 bit
3418 * ACE mask with fop_access() to determine permissions.
3420 if ((flags & ATTR_NOACLCHECK) &&
3421 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0)
3422 return (EINVAL);
3424 if (vp->v_op->vop_setattr == NULL)
3425 err = ENOSYS;
3426 else
3427 err = vp->v_op->vop_setattr(vp, vap, flags, cr, ct);
3429 VOPSTATS_UPDATE(vp, setattr);
3430 return (err);
3434 fop_access(
3435 vnode_t *vp,
3436 int mode,
3437 int flags,
3438 cred_t *cr,
3439 caller_context_t *ct)
3441 int err;
3443 if ((flags & V_ACE_MASK) &&
3444 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3445 return (EINVAL);
3448 VOPXID_MAP_CR(vp, cr);
3450 if (vp->v_op->vop_access == NULL)
3451 err = ENOSYS;
3452 else
3453 err = vp->v_op->vop_access(vp, mode, flags, cr, ct);
3455 VOPSTATS_UPDATE(vp, access);
3456 return (err);
3460 fop_lookup(
3461 vnode_t *dvp,
3462 char *nm,
3463 vnode_t **vpp,
3464 pathname_t *pnp,
3465 int flags,
3466 vnode_t *rdir,
3467 cred_t *cr,
3468 caller_context_t *ct,
3469 int *deflags, /* Returned per-dirent flags */
3470 pathname_t *ppnp) /* Returned case-preserved name in directory */
3472 int ret;
3475 * If this file system doesn't support case-insensitive access
3476 * and said access is requested, fail quickly. It is required
3477 * that if the vfs supports case-insensitive lookup, it also
3478 * supports extended dirent flags.
3480 if (flags & FIGNORECASE &&
3481 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3482 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3483 return (EINVAL);
3485 VOPXID_MAP_CR(dvp, cr);
3487 if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3488 ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3489 } else if (dvp->v_op->vop_lookup == NULL) {
3490 ret = ENOSYS;
3491 } else {
3492 ret = dvp->v_op->vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
3493 cr, ct, deflags, ppnp);
3496 if (ret == 0 && *vpp) {
3497 VOPSTATS_UPDATE(*vpp, lookup);
3498 if ((*vpp)->v_path == NULL) {
3499 vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3503 return (ret);
3507 fop_create(
3508 vnode_t *dvp,
3509 char *name,
3510 vattr_t *vap,
3511 vcexcl_t excl,
3512 int mode,
3513 vnode_t **vpp,
3514 cred_t *cr,
3515 int flags,
3516 caller_context_t *ct,
3517 vsecattr_t *vsecp) /* ACL to set during create */
3519 int ret;
3521 if (vsecp != NULL &&
3522 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3523 return (EINVAL);
3526 * If this file system doesn't support case-insensitive access
3527 * and said access is requested, fail quickly.
3529 if (flags & FIGNORECASE &&
3530 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3531 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3532 return (EINVAL);
3534 VOPXID_MAP_CR(dvp, cr);
3536 if (dvp->v_op->vop_create == NULL)
3537 ret = ENOSYS;
3538 else
3539 ret = dvp->v_op->vop_create(dvp, name, vap, excl, mode, vpp,
3540 cr, flags, ct, vsecp);
3542 if (ret == 0 && *vpp) {
3543 VOPSTATS_UPDATE(*vpp, create);
3544 if ((*vpp)->v_path == NULL) {
3545 vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3549 return (ret);
3553 fop_remove(
3554 vnode_t *dvp,
3555 char *nm,
3556 cred_t *cr,
3557 caller_context_t *ct,
3558 int flags)
3560 int err;
3563 * If this file system doesn't support case-insensitive access
3564 * and said access is requested, fail quickly.
3566 if (flags & FIGNORECASE &&
3567 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3568 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3569 return (EINVAL);
3571 VOPXID_MAP_CR(dvp, cr);
3573 if (dvp->v_op->vop_remove == NULL)
3574 err = ENOSYS;
3575 else
3576 err = dvp->v_op->vop_remove(dvp, nm, cr, ct, flags);
3578 VOPSTATS_UPDATE(dvp, remove);
3579 return (err);
3583 fop_link(
3584 vnode_t *tdvp,
3585 vnode_t *svp,
3586 char *tnm,
3587 cred_t *cr,
3588 caller_context_t *ct,
3589 int flags)
3591 int err;
3594 * If the target file system doesn't support case-insensitive access
3595 * and said access is requested, fail quickly.
3597 if (flags & FIGNORECASE &&
3598 (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3599 vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3600 return (EINVAL);
3602 VOPXID_MAP_CR(tdvp, cr);
3604 if (tdvp->v_op->vop_link == NULL)
3605 err = ENOSYS;
3606 else
3607 err = tdvp->v_op->vop_link(tdvp, svp, tnm, cr, ct, flags);
3609 VOPSTATS_UPDATE(tdvp, link);
3610 return (err);
3614 fop_rename(
3615 vnode_t *sdvp,
3616 char *snm,
3617 vnode_t *tdvp,
3618 char *tnm,
3619 cred_t *cr,
3620 caller_context_t *ct,
3621 int flags)
3623 int err;
3626 * If the file system involved does not support
3627 * case-insensitive access and said access is requested, fail
3628 * quickly.
3630 if (flags & FIGNORECASE &&
3631 ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3632 vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3633 return (EINVAL);
3635 VOPXID_MAP_CR(tdvp, cr);
3637 if (sdvp->v_op->vop_rename == NULL)
3638 err = ENOSYS;
3639 else
3640 err = sdvp->v_op->vop_rename(sdvp, snm, tdvp, tnm, cr, ct,
3641 flags);
3643 VOPSTATS_UPDATE(sdvp, rename);
3644 return (err);
3648 fop_mkdir(
3649 vnode_t *dvp,
3650 char *dirname,
3651 vattr_t *vap,
3652 vnode_t **vpp,
3653 cred_t *cr,
3654 caller_context_t *ct,
3655 int flags,
3656 vsecattr_t *vsecp) /* ACL to set during create */
3658 int ret;
3660 if (vsecp != NULL &&
3661 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3662 return (EINVAL);
3665 * If this file system doesn't support case-insensitive access
3666 * and said access is requested, fail quickly.
3668 if (flags & FIGNORECASE &&
3669 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3670 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3671 return (EINVAL);
3673 VOPXID_MAP_CR(dvp, cr);
3675 if (dvp->v_op->vop_mkdir == NULL)
3676 ret = ENOSYS;
3677 else
3678 ret = dvp->v_op->vop_mkdir(dvp, dirname, vap, vpp, cr, ct,
3679 flags, vsecp);
3681 if (ret == 0 && *vpp) {
3682 VOPSTATS_UPDATE(*vpp, mkdir);
3683 if ((*vpp)->v_path == NULL) {
3684 vn_setpath(rootdir, dvp, *vpp, dirname,
3685 strlen(dirname));
3689 return (ret);
3693 fop_rmdir(
3694 vnode_t *dvp,
3695 char *nm,
3696 vnode_t *cdir,
3697 cred_t *cr,
3698 caller_context_t *ct,
3699 int flags)
3701 int err;
3704 * If this file system doesn't support case-insensitive access
3705 * and said access is requested, fail quickly.
3707 if (flags & FIGNORECASE &&
3708 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3709 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3710 return (EINVAL);
3712 VOPXID_MAP_CR(dvp, cr);
3714 if (dvp->v_op->vop_rmdir == NULL)
3715 err = ENOSYS;
3716 else
3717 err = dvp->v_op->vop_rmdir(dvp, nm, cdir, cr, ct, flags);
3719 VOPSTATS_UPDATE(dvp, rmdir);
3720 return (err);
3724 fop_readdir(
3725 vnode_t *vp,
3726 uio_t *uiop,
3727 cred_t *cr,
3728 int *eofp,
3729 caller_context_t *ct,
3730 int flags)
3732 int err;
3733 ssize_t resid_start = uiop->uio_resid;
3736 * If this file system doesn't support retrieving directory
3737 * entry flags and said access is requested, fail quickly.
3739 if (flags & V_RDDIR_ENTFLAGS &&
3740 vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3741 return (EINVAL);
3743 VOPXID_MAP_CR(vp, cr);
3745 if (vp->v_op->vop_readdir == NULL)
3746 err = ENOSYS;
3747 else
3748 err = vp->v_op->vop_readdir(vp, uiop, cr, eofp, ct, flags);
3750 VOPSTATS_UPDATE_IO(vp, readdir,
3751 readdir_bytes, (resid_start - uiop->uio_resid));
3752 return (err);
3756 fop_symlink(
3757 vnode_t *dvp,
3758 char *linkname,
3759 vattr_t *vap,
3760 char *target,
3761 cred_t *cr,
3762 caller_context_t *ct,
3763 int flags)
3765 int err;
3766 xvattr_t xvattr;
3769 * If this file system doesn't support case-insensitive access
3770 * and said access is requested, fail quickly.
3772 if (flags & FIGNORECASE &&
3773 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3774 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3775 return (EINVAL);
3777 VOPXID_MAP_CR(dvp, cr);
3779 /* check for reparse point */
3780 if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3781 (strncmp(target, FS_REPARSE_TAG_STR,
3782 strlen(FS_REPARSE_TAG_STR)) == 0)) {
3783 if (!fs_reparse_mark(target, vap, &xvattr))
3784 vap = (vattr_t *)&xvattr;
3787 if (dvp->v_op->vop_symlink == NULL)
3788 err = ENOSYS;
3789 else
3790 err = dvp->v_op->vop_symlink(dvp, linkname, vap, target, cr,
3791 ct, flags);
3793 VOPSTATS_UPDATE(dvp, symlink);
3794 return (err);
3798 fop_readlink(
3799 vnode_t *vp,
3800 uio_t *uiop,
3801 cred_t *cr,
3802 caller_context_t *ct)
3804 int err;
3806 VOPXID_MAP_CR(vp, cr);
3808 if (vp->v_op->vop_readlink == NULL)
3809 err = ENOSYS;
3810 else
3811 err = vp->v_op->vop_readlink(vp, uiop, cr, ct);
3813 VOPSTATS_UPDATE(vp, readlink);
3814 return (err);
3818 fop_fsync(
3819 vnode_t *vp,
3820 int syncflag,
3821 cred_t *cr,
3822 caller_context_t *ct)
3824 int err;
3826 VOPXID_MAP_CR(vp, cr);
3828 if (vp->v_op->vop_fsync == NULL)
3829 err = ENOSYS;
3830 else
3831 err = vp->v_op->vop_fsync(vp, syncflag, cr, ct);
3833 VOPSTATS_UPDATE(vp, fsync);
3834 return (err);
3837 void
3838 fop_inactive(
3839 vnode_t *vp,
3840 cred_t *cr,
3841 caller_context_t *ct)
3843 /* Need to update stats before vop call since we may lose the vnode */
3844 VOPSTATS_UPDATE(vp, inactive);
3846 VOPXID_MAP_CR(vp, cr);
3848 if (vp->v_op->vop_inactive != NULL)
3849 vp->v_op->vop_inactive(vp, cr, ct);
3853 fop_fid(
3854 vnode_t *vp,
3855 fid_t *fidp,
3856 caller_context_t *ct)
3858 int err;
3860 if (vp->v_op->vop_fid == NULL)
3861 err = ENOSYS;
3862 else
3863 err = vp->v_op->vop_fid(vp, fidp, ct);
3865 VOPSTATS_UPDATE(vp, fid);
3866 return (err);
3870 fop_rwlock(
3871 vnode_t *vp,
3872 int write_lock,
3873 caller_context_t *ct)
3875 int ret;
3877 if (vp->v_op->vop_rwlock == NULL)
3878 ret = fs_rwlock(vp, write_lock, ct);
3879 else
3880 ret = vp->v_op->vop_rwlock(vp, write_lock, ct);
3882 VOPSTATS_UPDATE(vp, rwlock);
3883 return (ret);
3886 void
3887 fop_rwunlock(
3888 vnode_t *vp,
3889 int write_lock,
3890 caller_context_t *ct)
3892 if (vp->v_op->vop_rwunlock == NULL)
3893 fs_rwunlock(vp, write_lock, ct);
3894 else
3895 vp->v_op->vop_rwunlock(vp, write_lock, ct);
3897 VOPSTATS_UPDATE(vp, rwunlock);
3901 fop_seek(
3902 vnode_t *vp,
3903 offset_t ooff,
3904 offset_t *noffp,
3905 caller_context_t *ct)
3907 int err;
3909 if (vp->v_op->vop_seek == NULL)
3910 err = ENOSYS;
3911 else
3912 err = vp->v_op->vop_seek(vp, ooff, noffp, ct);
3914 VOPSTATS_UPDATE(vp, seek);
3915 return (err);
3919 fop_cmp(
3920 vnode_t *vp1,
3921 vnode_t *vp2,
3922 caller_context_t *ct)
3924 int err;
3926 if (vp1->v_op->vop_cmp == NULL)
3927 err = fs_cmp(vp1, vp2, ct);
3928 else
3929 err = vp1->v_op->vop_cmp(vp1, vp2, ct);
3931 VOPSTATS_UPDATE(vp1, cmp);
3932 return (err);
3936 fop_frlock(
3937 vnode_t *vp,
3938 int cmd,
3939 flock64_t *bfp,
3940 int flag,
3941 offset_t offset,
3942 struct flk_callback *flk_cbp,
3943 cred_t *cr,
3944 caller_context_t *ct)
3946 int err;
3948 VOPXID_MAP_CR(vp, cr);
3950 if (vp->v_op->vop_frlock == NULL)
3951 err = fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3952 else
3953 err = vp->v_op->vop_frlock(vp, cmd, bfp, flag, offset,
3954 flk_cbp, cr, ct);
3956 VOPSTATS_UPDATE(vp, frlock);
3957 return (err);
3961 fop_space(
3962 vnode_t *vp,
3963 int cmd,
3964 flock64_t *bfp,
3965 int flag,
3966 offset_t offset,
3967 cred_t *cr,
3968 caller_context_t *ct)
3970 int err;
3972 VOPXID_MAP_CR(vp, cr);
3974 if (vp->v_op->vop_space == NULL)
3975 err = ENOSYS;
3976 else
3977 err = vp->v_op->vop_space(vp, cmd, bfp, flag, offset, cr, ct);
3979 VOPSTATS_UPDATE(vp, space);
3980 return (err);
3984 fop_realvp(
3985 vnode_t *vp,
3986 vnode_t **vpp,
3987 caller_context_t *ct)
3989 int err;
3991 if (vp->v_op->vop_realvp == NULL)
3992 err = ENOSYS;
3993 else
3994 err = vp->v_op->vop_realvp(vp, vpp, ct);
3996 VOPSTATS_UPDATE(vp, realvp);
3997 return (err);
4001 fop_getpage(
4002 vnode_t *vp,
4003 offset_t off,
4004 size_t len,
4005 uint_t *protp,
4006 page_t **plarr,
4007 size_t plsz,
4008 struct seg *seg,
4009 caddr_t addr,
4010 enum seg_rw rw,
4011 cred_t *cr,
4012 caller_context_t *ct)
4014 int err;
4016 VOPXID_MAP_CR(vp, cr);
4018 if (vp->v_op->vop_getpage == NULL)
4019 err = ENOSYS;
4020 else
4021 err = vp->v_op->vop_getpage(vp, off, len, protp, plarr,
4022 plsz, seg, addr, rw, cr, ct);
4024 VOPSTATS_UPDATE(vp, getpage);
4025 return (err);
4029 fop_putpage(
4030 vnode_t *vp,
4031 offset_t off,
4032 size_t len,
4033 int flags,
4034 cred_t *cr,
4035 caller_context_t *ct)
4037 int err;
4039 VOPXID_MAP_CR(vp, cr);
4041 if (vp->v_op->vop_putpage == NULL)
4042 err = ENOSYS;
4043 else
4044 err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
4045 VOPSTATS_UPDATE(vp, putpage);
4046 return (err);
4050 fop_map(
4051 vnode_t *vp,
4052 offset_t off,
4053 struct as *as,
4054 caddr_t *addrp,
4055 size_t len,
4056 uchar_t prot,
4057 uchar_t maxprot,
4058 uint_t flags,
4059 cred_t *cr,
4060 caller_context_t *ct)
4062 int err;
4064 VOPXID_MAP_CR(vp, cr);
4066 if (vp->v_op->vop_map == NULL)
4067 err = ENOSYS;
4068 else
4069 err = vp->v_op->vop_map(vp, off, as, addrp, len, prot,
4070 maxprot, flags, cr, ct);
4072 VOPSTATS_UPDATE(vp, map);
4073 return (err);
4077 fop_addmap(
4078 vnode_t *vp,
4079 offset_t off,
4080 struct as *as,
4081 caddr_t addr,
4082 size_t len,
4083 uchar_t prot,
4084 uchar_t maxprot,
4085 uint_t flags,
4086 cred_t *cr,
4087 caller_context_t *ct)
4089 int error;
4090 u_longlong_t delta;
4092 VOPXID_MAP_CR(vp, cr);
4094 if (vp->v_op->vop_addmap == NULL)
4095 error = ENOSYS;
4096 else
4097 error = vp->v_op->vop_addmap(vp, off, as, addr, len, prot,
4098 maxprot, flags, cr, ct);
4100 if ((!error) && (vp->v_type == VREG)) {
4101 delta = (u_longlong_t)btopr(len);
4103 * If file is declared MAP_PRIVATE, it can't be written back
4104 * even if open for write. Handle as read.
4106 if (flags & MAP_PRIVATE) {
4107 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4108 (int64_t)delta);
4109 } else {
4111 * atomic_add_64 forces the fetch of a 64 bit value to
4112 * be atomic on 32 bit machines
4114 if (maxprot & PROT_WRITE)
4115 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4116 (int64_t)delta);
4117 if (maxprot & PROT_READ)
4118 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4119 (int64_t)delta);
4120 if (maxprot & PROT_EXEC)
4121 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4122 (int64_t)delta);
4125 VOPSTATS_UPDATE(vp, addmap);
4126 return (error);
4130 fop_delmap(
4131 vnode_t *vp,
4132 offset_t off,
4133 struct as *as,
4134 caddr_t addr,
4135 size_t len,
4136 uint_t prot,
4137 uint_t maxprot,
4138 uint_t flags,
4139 cred_t *cr,
4140 caller_context_t *ct)
4142 int error;
4143 u_longlong_t delta;
4145 VOPXID_MAP_CR(vp, cr);
4147 if (vp->v_op->vop_delmap == NULL)
4148 error = ENOSYS;
4149 else
4150 error = vp->v_op->vop_delmap(vp, off, as, addr, len, prot,
4151 maxprot, flags, cr, ct);
4154 * NFS calls into delmap twice, the first time
4155 * it simply establishes a callback mechanism and returns EAGAIN
4156 * while the real work is being done upon the second invocation.
4157 * We have to detect this here and only decrement the counts upon
4158 * the second delmap request.
4160 if ((error != EAGAIN) && (vp->v_type == VREG)) {
4162 delta = (u_longlong_t)btopr(len);
4164 if (flags & MAP_PRIVATE) {
4165 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4166 (int64_t)(-delta));
4167 } else {
4169 * atomic_add_64 forces the fetch of a 64 bit value
4170 * to be atomic on 32 bit machines
4172 if (maxprot & PROT_WRITE)
4173 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4174 (int64_t)(-delta));
4175 if (maxprot & PROT_READ)
4176 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4177 (int64_t)(-delta));
4178 if (maxprot & PROT_EXEC)
4179 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4180 (int64_t)(-delta));
4183 VOPSTATS_UPDATE(vp, delmap);
4184 return (error);
4189 fop_poll(
4190 vnode_t *vp,
4191 short events,
4192 int anyyet,
4193 short *reventsp,
4194 struct pollhead **phpp,
4195 caller_context_t *ct)
4197 int err;
4199 if (vp->v_op->vop_poll == NULL)
4200 err = fs_poll(vp, events, anyyet, reventsp, phpp, ct);
4201 else
4202 err = vp->v_op->vop_poll(vp, events, anyyet, reventsp, phpp,
4203 ct);
4205 VOPSTATS_UPDATE(vp, poll);
4206 return (err);
4210 fop_dump(
4211 vnode_t *vp,
4212 caddr_t addr,
4213 offset_t lbdn,
4214 offset_t dblks,
4215 caller_context_t *ct)
4217 int err;
4219 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4220 if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4221 return (EIO);
4223 if (vp->v_op->vop_dump == NULL)
4224 err = ENOSYS;
4225 else
4226 err = vp->v_op->vop_dump(vp, addr, lbdn, dblks, ct);
4228 VOPSTATS_UPDATE(vp, dump);
4229 return (err);
4233 fop_pathconf(
4234 vnode_t *vp,
4235 int cmd,
4236 ulong_t *valp,
4237 cred_t *cr,
4238 caller_context_t *ct)
4240 int err;
4242 VOPXID_MAP_CR(vp, cr);
4244 if (vp->v_op->vop_pathconf == NULL)
4245 err = fs_pathconf(vp, cmd, valp, cr, ct);
4246 else
4247 err = vp->v_op->vop_pathconf(vp, cmd, valp, cr, ct);
4249 VOPSTATS_UPDATE(vp, pathconf);
4250 return (err);
4254 fop_pageio(
4255 vnode_t *vp,
4256 struct page *pp,
4257 uoff_t io_off,
4258 size_t io_len,
4259 int flags,
4260 cred_t *cr,
4261 caller_context_t *ct)
4263 int err;
4265 VOPXID_MAP_CR(vp, cr);
4267 if (vp->v_op->vop_pageio == NULL)
4268 err = ENOSYS;
4269 else
4270 err = vp->v_op->vop_pageio(vp, pp, io_off, io_len, flags,
4271 cr, ct);
4273 VOPSTATS_UPDATE(vp, pageio);
4274 return (err);
4278 fop_dumpctl(
4279 vnode_t *vp,
4280 int action,
4281 offset_t *blkp,
4282 caller_context_t *ct)
4284 int err;
4286 if (vp->v_op->vop_dumpctl == NULL)
4287 err = ENOSYS;
4288 else
4289 err = vp->v_op->vop_dumpctl(vp, action, blkp, ct);
4291 VOPSTATS_UPDATE(vp, dumpctl);
4292 return (err);
4295 void
4296 fop_dispose(
4297 vnode_t *vp,
4298 page_t *pp,
4299 int flag,
4300 int dn,
4301 cred_t *cr,
4302 caller_context_t *ct)
4304 /* Must do stats first since it's possible to lose the vnode */
4305 VOPSTATS_UPDATE(vp, dispose);
4307 VOPXID_MAP_CR(vp, cr);
4309 if (vp->v_op->vop_dispose == NULL)
4310 fs_dispose(vp, pp, flag, dn, cr, ct);
4311 else
4312 vp->v_op->vop_dispose(vp, pp, flag, dn, cr, ct);
4316 fop_setsecattr(
4317 vnode_t *vp,
4318 vsecattr_t *vsap,
4319 int flag,
4320 cred_t *cr,
4321 caller_context_t *ct)
4323 int err;
4325 VOPXID_MAP_CR(vp, cr);
4328 * We're only allowed to skip the ACL check iff we used a 32 bit
4329 * ACE mask with fop_access() to determine permissions.
4331 if ((flag & ATTR_NOACLCHECK) &&
4332 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4333 return (EINVAL);
4336 if (vp->v_op->vop_setsecattr == NULL)
4337 err = ENOSYS;
4338 else
4339 err = vp->v_op->vop_setsecattr(vp, vsap, flag, cr, ct);
4341 VOPSTATS_UPDATE(vp, setsecattr);
4342 return (err);
4346 fop_getsecattr(
4347 vnode_t *vp,
4348 vsecattr_t *vsap,
4349 int flag,
4350 cred_t *cr,
4351 caller_context_t *ct)
4353 int err;
4356 * We're only allowed to skip the ACL check iff we used a 32 bit
4357 * ACE mask with fop_access() to determine permissions.
4359 if ((flag & ATTR_NOACLCHECK) &&
4360 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4361 return (EINVAL);
4364 VOPXID_MAP_CR(vp, cr);
4366 if (vp->v_op->vop_getsecattr == NULL)
4367 err = fs_fab_acl(vp, vsap, flag, cr, ct);
4368 else
4369 err = vp->v_op->vop_getsecattr(vp, vsap, flag, cr, ct);
4371 VOPSTATS_UPDATE(vp, getsecattr);
4372 return (err);
4376 fop_shrlock(
4377 vnode_t *vp,
4378 int cmd,
4379 struct shrlock *shr,
4380 int flag,
4381 cred_t *cr,
4382 caller_context_t *ct)
4384 int err;
4386 VOPXID_MAP_CR(vp, cr);
4388 if (vp->v_op->vop_shrlock == NULL)
4389 err = fs_shrlock(vp, cmd, shr, flag, cr, ct);
4390 else
4391 err = vp->v_op->vop_shrlock(vp, cmd, shr, flag, cr, ct);
4393 VOPSTATS_UPDATE(vp, shrlock);
4394 return (err);
4398 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4399 caller_context_t *ct)
4401 int err;
4403 if (vp->v_op->vop_vnevent == NULL)
4404 err = ENOTSUP;
4405 else
4406 err = vp->v_op->vop_vnevent(vp, vnevent, dvp, fnm, ct);
4408 VOPSTATS_UPDATE(vp, vnevent);
4409 return (err);
4413 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4414 caller_context_t *ct)
4416 int err;
4418 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4419 return (ENOTSUP);
4421 if (vp->v_op->vop_reqzcbuf == NULL)
4422 err = ENOSYS;
4423 else
4424 err = vp->v_op->vop_reqzcbuf(vp, ioflag, uiop, cr, ct);
4426 VOPSTATS_UPDATE(vp, reqzcbuf);
4427 return (err);
4431 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4433 int err;
4435 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4436 return (ENOTSUP);
4438 if (vp->v_op->vop_retzcbuf == NULL)
4439 err = ENOSYS;
4440 else
4441 err = vp->v_op->vop_retzcbuf(vp, uiop, cr, ct);
4443 VOPSTATS_UPDATE(vp, retzcbuf);
4444 return (err);
4448 * Default destructor
4449 * Needed because NULL destructor means that the key is unused
4451 /* ARGSUSED */
4452 void
4453 vsd_defaultdestructor(void *value)
4457 * Create a key (index into per vnode array)
4458 * Locks out vsd_create, vsd_destroy, and vsd_free
4459 * May allocate memory with lock held
4461 void
4462 vsd_create(uint_t *keyp, void (*destructor)(void *))
4464 int i;
4465 uint_t nkeys;
4468 * if key is allocated, do nothing
4470 mutex_enter(&vsd_lock);
4471 if (*keyp) {
4472 mutex_exit(&vsd_lock);
4473 return;
4476 * find an unused key
4478 if (destructor == NULL)
4479 destructor = vsd_defaultdestructor;
4481 for (i = 0; i < vsd_nkeys; ++i)
4482 if (vsd_destructor[i] == NULL)
4483 break;
4486 * if no unused keys, increase the size of the destructor array
4488 if (i == vsd_nkeys) {
4489 if ((nkeys = (vsd_nkeys << 1)) == 0)
4490 nkeys = 1;
4491 vsd_destructor =
4492 (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4493 (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4494 (size_t)(nkeys * sizeof (void (*)(void *))));
4495 vsd_nkeys = nkeys;
4499 * allocate the next available unused key
4501 vsd_destructor[i] = destructor;
4502 *keyp = i + 1;
4504 /* create vsd_list, if it doesn't exist */
4505 if (vsd_list == NULL) {
4506 vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4507 list_create(vsd_list, sizeof (struct vsd_node),
4508 offsetof(struct vsd_node, vs_nodes));
4511 mutex_exit(&vsd_lock);
4515 * Destroy a key
4517 * Assumes that the caller is preventing vsd_set and vsd_get
4518 * Locks out vsd_create, vsd_destroy, and vsd_free
4519 * May free memory with lock held
4521 void
4522 vsd_destroy(uint_t *keyp)
4524 uint_t key;
4525 struct vsd_node *vsd;
4528 * protect the key namespace and our destructor lists
4530 mutex_enter(&vsd_lock);
4531 key = *keyp;
4532 *keyp = 0;
4534 ASSERT(key <= vsd_nkeys);
4537 * if the key is valid
4539 if (key != 0) {
4540 uint_t k = key - 1;
4542 * for every vnode with VSD, call key's destructor
4544 for (vsd = list_head(vsd_list); vsd != NULL;
4545 vsd = list_next(vsd_list, vsd)) {
4547 * no VSD for key in this vnode
4549 if (key > vsd->vs_nkeys)
4550 continue;
4552 * call destructor for key
4554 if (vsd->vs_value[k] && vsd_destructor[k])
4555 (*vsd_destructor[k])(vsd->vs_value[k]);
4557 * reset value for key
4559 vsd->vs_value[k] = NULL;
4562 * actually free the key (NULL destructor == unused)
4564 vsd_destructor[k] = NULL;
4567 mutex_exit(&vsd_lock);
4571 * Quickly return the per vnode value that was stored with the specified key
4572 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4573 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4575 void *
4576 vsd_get(vnode_t *vp, uint_t key)
4578 struct vsd_node *vsd;
4580 ASSERT(vp != NULL);
4581 ASSERT(mutex_owned(&vp->v_vsd_lock));
4583 vsd = vp->v_vsd;
4585 if (key && vsd != NULL && key <= vsd->vs_nkeys)
4586 return (vsd->vs_value[key - 1]);
4587 return (NULL);
4591 * Set a per vnode value indexed with the specified key
4592 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4595 vsd_set(vnode_t *vp, uint_t key, void *value)
4597 struct vsd_node *vsd;
4599 ASSERT(vp != NULL);
4600 ASSERT(mutex_owned(&vp->v_vsd_lock));
4602 if (key == 0)
4603 return (EINVAL);
4605 vsd = vp->v_vsd;
4606 if (vsd == NULL)
4607 vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4610 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4611 * code won't happen and we will continue down and allocate space for
4612 * the vs_value array.
4613 * If the caller is replacing one value with another, then it is up
4614 * to the caller to free/rele/destroy the previous value (if needed).
4616 if (key <= vsd->vs_nkeys) {
4617 vsd->vs_value[key - 1] = value;
4618 return (0);
4621 ASSERT(key <= vsd_nkeys);
4623 if (vsd->vs_nkeys == 0) {
4624 mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4626 * Link onto list of all VSD nodes.
4628 list_insert_head(vsd_list, vsd);
4629 mutex_exit(&vsd_lock);
4633 * Allocate vnode local storage and set the value for key
4635 vsd->vs_value = vsd_realloc(vsd->vs_value,
4636 vsd->vs_nkeys * sizeof (void *),
4637 key * sizeof (void *));
4638 vsd->vs_nkeys = key;
4639 vsd->vs_value[key - 1] = value;
4641 return (0);
4645 * Called from vn_free() to run the destructor function for each vsd
4646 * Locks out vsd_create and vsd_destroy
4647 * Assumes that the destructor *DOES NOT* use vsd
4649 void
4650 vsd_free(vnode_t *vp)
4652 int i;
4653 struct vsd_node *vsd = vp->v_vsd;
4655 if (vsd == NULL)
4656 return;
4658 if (vsd->vs_nkeys == 0) {
4659 kmem_free(vsd, sizeof (*vsd));
4660 vp->v_vsd = NULL;
4661 return;
4665 * lock out vsd_create and vsd_destroy, call
4666 * the destructor, and mark the value as destroyed.
4668 mutex_enter(&vsd_lock);
4670 for (i = 0; i < vsd->vs_nkeys; i++) {
4671 if (vsd->vs_value[i] && vsd_destructor[i])
4672 (*vsd_destructor[i])(vsd->vs_value[i]);
4673 vsd->vs_value[i] = NULL;
4677 * remove from linked list of VSD nodes
4679 list_remove(vsd_list, vsd);
4681 mutex_exit(&vsd_lock);
4684 * free up the VSD
4686 kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4687 kmem_free(vsd, sizeof (struct vsd_node));
4688 vp->v_vsd = NULL;
4692 * realloc
4694 static void *
4695 vsd_realloc(void *old, size_t osize, size_t nsize)
4697 void *new;
4699 new = kmem_zalloc(nsize, KM_SLEEP);
4700 if (old) {
4701 bcopy(old, new, osize);
4702 kmem_free(old, osize);
4704 return (new);
4708 * Setup the extensible system attribute for creating a reparse point.
4709 * The symlink data 'target' is validated for proper format of a reparse
4710 * string and a check also made to make sure the symlink data does not
4711 * point to an existing file.
4713 * return 0 if ok else -1.
4715 static int
4716 fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4718 xoptattr_t *xoap;
4720 if ((!target) || (!vap) || (!xvattr))
4721 return (-1);
4723 /* validate reparse string */
4724 if (reparse_validate((const char *)target))
4725 return (-1);
4727 xva_init(xvattr);
4728 xvattr->xva_vattr = *vap;
4729 xvattr->xva_vattr.va_mask |= AT_XVATTR;
4730 xoap = xva_getxoptattr(xvattr);
4731 ASSERT(xoap);
4732 XVA_SET_REQ(xvattr, XAT_REPARSE);
4733 xoap->xoa_reparse = 1;
4735 return (0);
4739 * Function to check whether a symlink is a reparse point.
4740 * Return B_TRUE if it is a reparse point, else return B_FALSE
4742 boolean_t
4743 vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4745 xvattr_t xvattr;
4746 xoptattr_t *xoap;
4748 if ((vp->v_type != VLNK) ||
4749 !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4750 return (B_FALSE);
4752 xva_init(&xvattr);
4753 xoap = xva_getxoptattr(&xvattr);
4754 ASSERT(xoap);
4755 XVA_SET_REQ(&xvattr, XAT_REPARSE);
4757 if (fop_getattr(vp, &xvattr.xva_vattr, 0, cr, ct))
4758 return (B_FALSE);
4760 if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4761 (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4762 return (B_FALSE);
4764 return (xoap->xoa_reparse ? B_TRUE : B_FALSE);