4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2013, 2016 Joyent, Inc. All rights reserved.
25 * Copyright (c) 2014 by Delphix. All rights reserved.
28 /* vnode ops for the /dev/zvol directory */
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
34 #include <sys/sunndi.h>
35 #include <sys/sunldi.h>
36 #include <fs/fs_subr.h>
37 #include <sys/fs/dv_node.h>
38 #include <sys/fs/sdev_impl.h>
39 #include <sys/zfs_ioctl.h>
40 #include <sys/policy.h>
42 #include <sys/vfs_opreg.h>
44 struct vnodeops
*devzvol_vnodeops
;
45 static major_t devzvol_major
;
46 static taskq_ent_t devzvol_zclist_task
;
48 static kmutex_t devzvol_mtx
;
49 /* Below are protected by devzvol_mtx */
50 static boolean_t devzvol_isopen
;
51 static boolean_t devzvol_zclist_task_running
= B_FALSE
;
52 static uint64_t devzvol_gen
= 0;
53 static uint64_t devzvol_zclist
;
54 static size_t devzvol_zclist_size
;
55 static ldi_ident_t devzvol_li
;
56 static ldi_handle_t devzvol_lh
;
59 * we need to use ddi_mod* since fs/dev gets loaded early on in
60 * startup(), and linking fs/dev to fs/zfs would drag in a lot of
61 * other stuff (like drv/random) before the rest of the system is
64 ddi_modhandle_t zfs_mod
;
66 int (*szn2m
)(char *, minor_t
*);
70 * Enable/disable snapshots from being created in /dev/zvol. By default,
71 * they are enabled, preserving the historic behavior.
73 boolean_t devzvol_snaps_allowed
= B_TRUE
;
76 sdev_zvol_create_minor(char *dsname
)
80 return ((*szcm
)(dsname
));
84 sdev_zvol_name2minor(char *dsname
, minor_t
*minor
)
88 return ((*szn2m
)(dsname
, minor
));
97 devzvol_li
= ldi_ident_from_anon();
98 if (ldi_open_by_name("/dev/zfs", FREAD
| FWRITE
, kcred
,
99 &devzvol_lh
, devzvol_li
))
101 if (zfs_mod
== NULL
&& ((zfs_mod
= ddi_modopen("fs/zfs",
102 KRTLD_MODE_FIRST
, &rc
)) == NULL
)) {
105 ASSERT(szcm
== NULL
&& szn2m
== NULL
);
106 if ((szcm
= (int (*)(char *))
107 ddi_modsym(zfs_mod
, "zvol_create_minor", &rc
)) == NULL
) {
108 cmn_err(CE_WARN
, "couldn't resolve zvol_create_minor");
111 if ((szn2m
= (int(*)(char *, minor_t
*))
112 ddi_modsym(zfs_mod
, "zvol_name2minor", &rc
)) == NULL
) {
113 cmn_err(CE_WARN
, "couldn't resolve zvol_name2minor");
116 if (ldi_get_dev(devzvol_lh
, &dv
))
118 devzvol_major
= getmajor(dv
);
127 (void) ldi_close(devzvol_lh
, FREAD
|FWRITE
, kcred
);
128 ldi_ident_release(devzvol_li
);
129 if (zfs_mod
!= NULL
) {
130 (void) ddi_modclose(zfs_mod
);
136 devzvol_handle_ioctl(int cmd
, zfs_cmd_t
*zc
, size_t *alloc_size
)
143 if (cmd
!= ZFS_IOC_POOL_CONFIGS
)
144 mutex_enter(&devzvol_mtx
);
145 if (!devzvol_isopen
) {
146 if ((rc
= devzvol_open_zfs()) == 0) {
147 devzvol_isopen
= B_TRUE
;
149 if (cmd
!= ZFS_IOC_POOL_CONFIGS
)
150 mutex_exit(&devzvol_mtx
);
154 cookie
= zc
->zc_cookie
;
156 zc
->zc_nvlist_dst
= (uint64_t)(intptr_t)kmem_alloc(size
,
158 zc
->zc_nvlist_dst_size
= size
;
159 rc
= ldi_ioctl(devzvol_lh
, cmd
, (intptr_t)zc
, FKIOCTL
, kcred
,
163 newsize
= zc
->zc_nvlist_dst_size
;
164 ASSERT(newsize
> size
);
165 kmem_free((void *)(uintptr_t)zc
->zc_nvlist_dst
, size
);
167 zc
->zc_cookie
= cookie
;
170 if (alloc_size
== NULL
)
171 kmem_free((void *)(uintptr_t)zc
->zc_nvlist_dst
, size
);
174 if (cmd
!= ZFS_IOC_POOL_CONFIGS
)
175 mutex_exit(&devzvol_mtx
);
179 /* figures out if the objset exists and returns its type */
181 devzvol_objset_check(char *dsname
, dmu_objset_type_t
*type
)
183 boolean_t ispool
, is_snapshot
;
189 ispool
= (strchr(dsname
, '/') == NULL
);
190 is_snapshot
= (strchr(dsname
, '@') != NULL
);
192 if (is_snapshot
&& !devzvol_snaps_allowed
)
195 zc
= kmem_zalloc(sizeof (zfs_cmd_t
), KM_SLEEP
);
196 (void) strlcpy(zc
->zc_name
, dsname
, MAXPATHLEN
);
198 nvl
= fnvlist_alloc();
199 fnvlist_add_boolean_value(nvl
, "cachedpropsonly", B_TRUE
);
200 zc
->zc_nvlist_src
= (uintptr_t)fnvlist_pack(nvl
, &nvsz
);
201 zc
->zc_nvlist_src_size
= nvsz
;
204 rc
= devzvol_handle_ioctl(ispool
? ZFS_IOC_POOL_STATS
:
205 ZFS_IOC_OBJSET_STATS
, zc
, NULL
);
207 *type
= (ispool
) ? DMU_OST_ZFS
:
208 zc
->zc_objset_stats
.dds_type
;
209 fnvlist_pack_free((char *)(uintptr_t)zc
->zc_nvlist_src
, nvsz
);
210 kmem_free(zc
, sizeof (zfs_cmd_t
));
215 * Returns what the zfs dataset name should be, given the /dev/zvol
216 * path and an optional name (can be NULL).
218 * Note that if the name param is NULL, then path must be an
219 * actual dataset's directory and not one of the top-level
220 * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a
224 devzvol_make_dsname(const char *path
, const char *name
)
230 if (strcmp(path
, ZVOL_DIR
) == 0)
232 if (name
&& (strcmp(name
, ".") == 0 || strcmp(name
, "..") == 0))
234 ptr
= path
+ strlen(ZVOL_DIR
);
235 if (strncmp(ptr
, "/dsk", 4) == 0)
236 ptr
+= strlen("/dsk");
237 else if (strncmp(ptr
, "/rdsk", 5) == 0)
238 ptr
+= strlen("/rdsk");
244 else if (name
== NULL
)
249 dslen
++; /* plus null */
251 dslen
+= strlen(name
) + 1; /* plus slash */
252 dsname
= kmem_zalloc(dslen
, KM_SLEEP
);
254 (void) strlcpy(dsname
, ptr
, dslen
);
256 (void) strlcat(dsname
, "/", dslen
);
259 (void) strlcat(dsname
, name
, dslen
);
264 * check if the zvol's sdev_node is still valid, which means make
265 * sure the zvol is still valid. zvol minors aren't proactively
266 * destroyed when the zvol is destroyed, so we use a validator to clean
267 * these up (in other words, when such nodes are encountered during
268 * subsequent lookup() and readdir() operations) so that only valid
269 * nodes are returned. The ordering between devname_lookup_func and
270 * devzvol_validate is a little inefficient in the case of invalid
271 * or stale nodes because devname_lookup_func calls
272 * devzvol_create_{dir, link}, then the validator says it's invalid,
273 * and then the node gets cleaned up.
276 devzvol_validate(struct sdev_node
*dv
)
278 vnode_t
*vn
= SDEVTOV(dv
);
279 dmu_objset_type_t do_type
;
281 char *nm
= dv
->sdev_name
;
284 sdcmn_err13(("validating ('%s' '%s')", dv
->sdev_path
, nm
));
286 * validate only READY nodes; if someone is sitting on the
287 * directory of a dataset that just got destroyed we could
288 * get a zombie node which we just skip.
290 if (dv
->sdev_state
!= SDEV_READY
) {
291 sdcmn_err13(("skipping '%s'", nm
));
292 return (SDEV_VTOR_SKIP
);
295 if ((strcmp(dv
->sdev_path
, ZVOL_DIR
"/dsk") == 0) ||
296 (strcmp(dv
->sdev_path
, ZVOL_DIR
"/rdsk") == 0))
297 return (SDEV_VTOR_VALID
);
298 dsname
= devzvol_make_dsname(dv
->sdev_path
, NULL
);
300 return (SDEV_VTOR_INVALID
);
303 * Leave any nodes alone that have been explicitly created by
306 if (!(dv
->sdev_flags
& SDEV_GLOBAL
) && dv
->sdev_origin
!= NULL
) {
307 kmem_free(dsname
, strlen(dsname
) + 1);
308 return (SDEV_VTOR_VALID
);
311 rc
= devzvol_objset_check(dsname
, &do_type
);
312 sdcmn_err13((" '%s' rc %d", dsname
, rc
));
314 sdev_node_t
*parent
= dv
->sdev_dotdot
;
316 * Explicitly passed-through zvols in our sdev profile can't
317 * be created as prof_* shadow nodes, because in the GZ they
318 * are symlinks, but in the NGZ they are actual device files.
320 * The objset_check will fail on these as they are outside
321 * any delegated dataset (zfs will not allow ioctl access to
322 * them from this zone). We still want them to work, though.
324 if (!(parent
->sdev_flags
& SDEV_GLOBAL
) &&
325 parent
->sdev_origin
!= NULL
&&
326 !(dv
->sdev_flags
& SDEV_GLOBAL
) &&
327 (vn
->v_type
== VBLK
|| vn
->v_type
== VCHR
) &&
328 prof_name_matched(nm
, parent
)) {
329 do_type
= DMU_OST_ZVOL
;
331 kmem_free(dsname
, strlen(dsname
) + 1);
332 return (SDEV_VTOR_INVALID
);
336 sdcmn_err13((" v_type %d do_type %d",
337 vn
->v_type
, do_type
));
338 if ((vn
->v_type
== VLNK
&& do_type
!= DMU_OST_ZVOL
) ||
339 ((vn
->v_type
== VBLK
|| vn
->v_type
== VCHR
) &&
340 do_type
!= DMU_OST_ZVOL
) ||
341 (vn
->v_type
== VDIR
&& do_type
== DMU_OST_ZVOL
)) {
342 kmem_free(dsname
, strlen(dsname
) + 1);
343 return (SDEV_VTOR_STALE
);
345 if (vn
->v_type
== VLNK
) {
348 minor_t lminor
, ominor
;
350 rc
= sdev_getlink(vn
, &link
);
353 ptr
= strrchr(link
, ':') + 1;
354 rc
= ddi_strtol(ptr
, NULL
, 10, &val
);
355 kmem_free(link
, strlen(link
) + 1);
356 ASSERT(rc
== 0 && val
!= 0);
357 lminor
= (minor_t
)val
;
358 if (sdev_zvol_name2minor(dsname
, &ominor
) < 0 ||
360 kmem_free(dsname
, strlen(dsname
) + 1);
361 return (SDEV_VTOR_STALE
);
364 kmem_free(dsname
, strlen(dsname
) + 1);
365 return (SDEV_VTOR_VALID
);
369 * Taskq callback to update the devzvol_zclist.
371 * We need to defer this to the taskq to avoid it running with a user
372 * context that might be associated with some non-global zone, and thus
373 * not being able to list all of the pools on the entire system.
377 devzvol_update_zclist_cb(void *arg
)
383 zc
= kmem_zalloc(sizeof (zfs_cmd_t
), KM_SLEEP
);
384 mutex_enter(&devzvol_mtx
);
385 zc
->zc_cookie
= devzvol_gen
;
387 rc
= devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS
, zc
, &size
);
391 ASSERT(devzvol_gen
!= zc
->zc_cookie
);
392 devzvol_gen
= zc
->zc_cookie
;
394 kmem_free((void *)(uintptr_t)devzvol_zclist
,
395 devzvol_zclist_size
);
396 devzvol_zclist
= zc
->zc_nvlist_dst
;
397 /* Keep the alloc'd size, not the nvlist size. */
398 devzvol_zclist_size
= size
;
402 * Either there was no change in pool configuration
403 * since we last asked (rc == EEXIST) or we got a
404 * catastrophic error.
406 * Give up memory and exit.
408 kmem_free((void *)(uintptr_t)zc
->zc_nvlist_dst
,
413 VERIFY(devzvol_zclist_task_running
== B_TRUE
);
414 devzvol_zclist_task_running
= B_FALSE
;
415 mutex_exit(&devzvol_mtx
);
417 kmem_free(zc
, sizeof (zfs_cmd_t
));
421 devzvol_update_zclist(void)
423 mutex_enter(&devzvol_mtx
);
424 if (devzvol_zclist_task_running
== B_TRUE
) {
425 mutex_exit(&devzvol_mtx
);
429 devzvol_zclist_task_running
= B_TRUE
;
431 taskq_dispatch_ent(sdev_taskq
, devzvol_update_zclist_cb
, NULL
, 0,
432 &devzvol_zclist_task
);
434 mutex_exit(&devzvol_mtx
);
437 taskq_wait(sdev_taskq
);
441 * Creates sub-directories for each zpool as needed in response to a
442 * readdir on one of the /dev/zvol/{dsk,rdsk} directories.
445 devzvol_create_pool_dirs(struct vnode
*dvp
)
448 nvpair_t
*elem
= NULL
;
452 sdcmn_err13(("devzvol_create_pool_dirs"));
454 devzvol_update_zclist();
456 mutex_enter(&devzvol_mtx
);
458 rc
= nvlist_unpack((char *)(uintptr_t)devzvol_zclist
,
459 devzvol_zclist_size
, &nv
, 0);
462 kmem_free((void *)(uintptr_t)devzvol_zclist
,
463 devzvol_zclist_size
);
465 devzvol_zclist
= NULL
;
466 devzvol_zclist_size
= 0;
469 mutex_exit(&devzvol_mtx
);
470 while ((elem
= nvlist_next_nvpair(nv
, elem
)) != NULL
) {
472 ASSERT(dvp
->v_count
> 0);
473 rc
= VOP_LOOKUP(dvp
, nvpair_name(elem
), &vp
, NULL
, 0,
474 NULL
, kcred
, NULL
, 0, NULL
);
475 /* should either work, or not be visible from a zone */
476 ASSERT(rc
== 0 || rc
== ENOENT
);
482 mutex_enter(&devzvol_mtx
);
483 if (devzvol_isopen
&& pools
== 0) {
484 /* clean up so zfs can be unloaded */
486 devzvol_isopen
= B_FALSE
;
489 mutex_exit(&devzvol_mtx
);
494 devzvol_create_dir(struct sdev_node
*ddv
, char *nm
, void **arg
,
495 cred_t
*cred
, void *whatever
, char *whichever
)
498 struct vattr
*vap
= (struct vattr
*)arg
;
500 sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv
->sdev_name
,
501 ddv
->sdev_path
, nm
));
502 ASSERT(strncmp(ddv
->sdev_path
, ZVOL_DIR
,
503 strlen(ZVOL_DIR
)) == 0);
504 *vap
= *sdev_getdefault_attr(VDIR
);
514 devzvol_create_link(struct sdev_node
*ddv
, char *nm
,
515 void **arg
, cred_t
*cred
, void *whatever
, char *whichever
)
518 char *pathname
= (char *)*arg
;
522 char str
[MAXNAMELEN
];
523 sdcmn_err13(("create_link (%s) (%s) '%s'", ddv
->sdev_name
,
524 ddv
->sdev_path
, nm
));
525 dsname
= devzvol_make_dsname(ddv
->sdev_path
, nm
);
526 rc
= sdev_zvol_create_minor(dsname
);
527 if ((rc
!= 0 && rc
!= EEXIST
&& rc
!= EBUSY
) ||
528 sdev_zvol_name2minor(dsname
, &minor
)) {
529 sdcmn_err13(("devzvol_create_link %d", rc
));
530 kmem_free(dsname
, strlen(dsname
) + 1);
533 kmem_free(dsname
, strlen(dsname
) + 1);
536 * This is a valid zvol; create a symlink that points to the
537 * minor which was created under /devices/pseudo/zfs@0
540 for (x
= ddv
->sdev_path
; x
= strchr(x
, '/'); x
++)
541 (void) strcat(pathname
, "../");
542 (void) snprintf(str
, sizeof (str
), ZVOL_PSEUDO_DEV
"%u", minor
);
543 (void) strncat(pathname
, str
, MAXPATHLEN
);
544 if (strncmp(ddv
->sdev_path
, ZVOL_FULL_RDEV_DIR
,
545 strlen(ZVOL_FULL_RDEV_DIR
)) == 0)
546 (void) strcat(pathname
, ",raw");
550 /* Clean zvol sdev_nodes that are no longer valid. */
552 devzvol_prunedir(struct sdev_node
*ddv
)
554 struct sdev_node
*dv
;
556 ASSERT(RW_READ_HELD(&ddv
->sdev_contents
));
558 sdcmn_err13(("prunedir '%s'", ddv
->sdev_name
));
559 ASSERT(strncmp(ddv
->sdev_path
, ZVOL_DIR
, strlen(ZVOL_DIR
)) == 0);
560 if (rw_tryupgrade(&ddv
->sdev_contents
) == 0) {
561 rw_exit(&ddv
->sdev_contents
);
562 rw_enter(&ddv
->sdev_contents
, RW_WRITER
);
565 dv
= SDEV_FIRST_ENTRY(ddv
);
567 sdcmn_err13(("sdev_name '%s'", dv
->sdev_name
));
569 switch (devzvol_validate(dv
)) {
570 case SDEV_VTOR_VALID
:
572 dv
= SDEV_NEXT_ENTRY(ddv
, dv
);
574 case SDEV_VTOR_INVALID
:
575 sdcmn_err7(("prunedir: destroy invalid "
576 "node: %s\n", dv
->sdev_name
));
580 if ((SDEVTOV(dv
)->v_type
== VDIR
) &&
581 (sdev_cleandir(dv
, NULL
, 0) != 0)) {
582 dv
= SDEV_NEXT_ENTRY(ddv
, dv
);
586 /* remove the cache node */
587 sdev_cache_update(ddv
, &dv
, dv
->sdev_name
,
590 dv
= SDEV_FIRST_ENTRY(ddv
);
592 rw_downgrade(&ddv
->sdev_contents
);
596 * This function is used to create a dir or dev inside a zone's /dev when the
597 * zone has a zvol that is dynamically created within the zone (i.e. inside
598 * of a delegated dataset. Since there is no /devices tree within a zone,
599 * we create the chr/blk devices directly inside the zone's /dev instead of
603 devzvol_mk_ngz_node(struct sdev_node
*parent
, char *nm
)
607 enum vtype expected_type
= VDIR
;
608 dmu_objset_type_t do_type
;
609 struct sdev_node
*dv
= NULL
;
613 bzero(&vattr
, sizeof (vattr
));
615 vattr
.va_mask
= AT_TYPE
|AT_MODE
|AT_UID
|AT_GID
;
616 vattr
.va_uid
= SDEV_UID_DEFAULT
;
617 vattr
.va_gid
= SDEV_GID_DEFAULT
;
618 vattr
.va_type
= VNON
;
619 vattr
.va_atime
= now
;
620 vattr
.va_mtime
= now
;
621 vattr
.va_ctime
= now
;
623 if ((dsname
= devzvol_make_dsname(parent
->sdev_path
, nm
)) == NULL
)
626 if (devzvol_objset_check(dsname
, &do_type
) != 0) {
628 * objset_check will succeed on any valid objset in the global
629 * zone, and any valid delegated dataset. It will fail, however,
630 * in non-global zones on explicitly whitelisted zvol devices
631 * that are outside any delegated dataset.
633 * The directories leading up to the zvol device itself will be
634 * created by prof for us in advance (and will always validate
635 * because of the matching check in devzvol_validate). The zvol
636 * device itself can't be created by prof though because in the
637 * GZ it's a symlink, and in the NGZ it is not. So, we create
638 * such zvol device files here.
640 if (!(parent
->sdev_flags
& SDEV_GLOBAL
) &&
641 parent
->sdev_origin
!= NULL
&&
642 prof_name_matched(nm
, parent
)) {
643 do_type
= DMU_OST_ZVOL
;
645 kmem_free(dsname
, strlen(dsname
) + 1);
650 if (do_type
== DMU_OST_ZVOL
)
651 expected_type
= VBLK
;
653 if (expected_type
== VDIR
) {
654 vattr
.va_type
= VDIR
;
655 vattr
.va_mode
= SDEV_DIRMODE_DEFAULT
;
661 rc
= sdev_zvol_create_minor(dsname
);
662 if ((rc
!= 0 && rc
!= EEXIST
&& rc
!= EBUSY
) ||
663 sdev_zvol_name2minor(dsname
, &minor
)) {
664 kmem_free(dsname
, strlen(dsname
) + 1);
668 devnum
= makedevice(devzvol_major
, minor
);
669 vattr
.va_rdev
= devnum
;
671 if (strstr(parent
->sdev_path
, "/rdsk/") != NULL
)
672 vattr
.va_type
= VCHR
;
674 vattr
.va_type
= VBLK
;
675 vattr
.va_mode
= SDEV_DEVMODE_DEFAULT
;
677 kmem_free(dsname
, strlen(dsname
) + 1);
679 rw_enter(&parent
->sdev_contents
, RW_WRITER
);
681 res
= sdev_mknode(parent
, nm
, &dv
, &vattr
,
682 NULL
, NULL
, kcred
, SDEV_READY
);
683 rw_exit(&parent
->sdev_contents
);
693 devzvol_lookup(struct vnode
*dvp
, char *nm
, struct vnode
**vpp
,
694 struct pathname
*pnp
, int flags
, struct vnode
*rdir
, struct cred
*cred
,
695 caller_context_t
*ct
, int *direntflags
, pathname_t
*realpnp
)
697 enum vtype expected_type
= VDIR
;
698 struct sdev_node
*parent
= VTOSDEV(dvp
);
700 dmu_objset_type_t do_type
;
703 sdcmn_err13(("devzvol_lookup '%s' '%s'", parent
->sdev_path
, nm
));
705 /* execute access is required to search the directory */
706 if ((error
= VOP_ACCESS(dvp
, VEXEC
, 0, cred
, ct
)) != 0)
709 rw_enter(&parent
->sdev_contents
, RW_READER
);
710 if (!SDEV_IS_GLOBAL(parent
)) {
713 rw_exit(&parent
->sdev_contents
);
716 * If we're in the global zone and reach down into a non-global
717 * zone's /dev/zvol then this action could trigger the creation
718 * of all of the zvol devices for every zone into the non-global
719 * zone's /dev tree. This could be a big security hole. To
720 * prevent this, disallow the global zone from looking inside
721 * a non-global zones /dev/zvol. This behavior is similar to
722 * delegated datasets, which cannot be used by the global zone.
724 if (getzoneid() == GLOBAL_ZONEID
)
727 res
= prof_lookup(dvp
, nm
, vpp
, cred
);
730 * We won't find a zvol that was dynamically created inside
731 * a NGZ, within a delegated dataset, in the zone's dev profile
732 * but prof_lookup will also find it via sdev_cache_lookup.
736 * We have to create the sdev node for the dymamically
739 if (devzvol_mk_ngz_node(parent
, nm
) != 0)
741 res
= prof_lookup(dvp
, nm
, vpp
, cred
);
748 * Don't let the global-zone style lookup succeed here when we're not
749 * running in the global zone. This can happen because prof calls into
750 * us (in prof_filldir) trying to create an explicitly passed-through
751 * zvol device outside any delegated dataset.
753 * We have to stop this here or else we will create prof shadows of
754 * the global zone symlink, which will make no sense at all in the
755 * non-global zone (it has no /devices for the symlink to point at).
757 * These zvols will be created later (at access time) by mk_ngz_node
758 * instead. The dirs leading up to them will be created by prof
761 * We have to return EPERM here, because ENOENT is given special
762 * meaning by prof in this context.
764 if (getzoneid() != GLOBAL_ZONEID
) {
765 rw_exit(&parent
->sdev_contents
);
769 dsname
= devzvol_make_dsname(parent
->sdev_path
, nm
);
770 rw_exit(&parent
->sdev_contents
);
771 sdcmn_err13(("rvp dsname %s", dsname
? dsname
: "(null)"));
773 error
= devzvol_objset_check(dsname
, &do_type
);
778 if (do_type
== DMU_OST_ZVOL
)
779 expected_type
= VLNK
;
782 * the callbacks expect:
784 * parent->sdev_path nm
786 * /dev/zvol/{r}dsk <pool name>
787 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
789 * sdev_name is always last path component of sdev_path
791 if (expected_type
== VDIR
) {
792 error
= devname_lookup_func(parent
, nm
, vpp
, cred
,
793 devzvol_create_dir
, SDEV_VATTR
);
795 error
= devname_lookup_func(parent
, nm
, vpp
, cred
,
796 devzvol_create_link
, SDEV_VLINK
);
798 sdcmn_err13(("devzvol_lookup %d %d", expected_type
, error
));
799 ASSERT(error
|| ((*vpp
)->v_type
== expected_type
));
802 kmem_free(dsname
, strlen(dsname
) + 1);
803 sdcmn_err13(("devzvol_lookup %d", error
));
808 * We allow create to find existing nodes
809 * - if the node doesn't exist - EROFS
810 * - creating an existing dir read-only succeeds, otherwise EISDIR
811 * - exclusive creates fail - EEXIST
815 devzvol_create(struct vnode
*dvp
, char *nm
, struct vattr
*vap
, vcexcl_t excl
,
816 int mode
, struct vnode
**vpp
, struct cred
*cred
, int flag
,
817 caller_context_t
*ct
, vsecattr_t
*vsecp
)
824 error
= devzvol_lookup(dvp
, nm
, &vp
, NULL
, 0, NULL
, cred
, ct
, NULL
,
829 else if (vp
->v_type
== VDIR
&& (mode
& VWRITE
))
832 error
= VOP_ACCESS(vp
, mode
, 0, cred
, ct
);
838 } else if (error
== ENOENT
) {
845 void sdev_iter_snapshots(struct vnode
*dvp
, char *name
);
848 sdev_iter_datasets(struct vnode
*dvp
, int arg
, char *name
)
853 sdcmn_err13(("iter name is '%s' (arg %x)", name
, arg
));
854 zc
= kmem_zalloc(sizeof (zfs_cmd_t
), KM_SLEEP
);
855 (void) strcpy(zc
->zc_name
, name
);
857 while ((rc
= devzvol_handle_ioctl(arg
, zc
, B_FALSE
)) == 0) {
861 sdcmn_err13((" name %s", zc
->zc_name
));
862 if (strchr(zc
->zc_name
, '$') || strchr(zc
->zc_name
, '%'))
864 ptr
= strrchr(zc
->zc_name
, '/') + 1;
865 rc
= devzvol_lookup(dvp
, ptr
, &vpp
, NULL
, 0, NULL
,
866 kcred
, NULL
, NULL
, NULL
);
869 } else if (rc
== ENOENT
) {
873 * EBUSY == problem with zvols's dmu holds?
874 * EPERM when in a NGZ and traversing up and out.
878 if (arg
== ZFS_IOC_DATASET_LIST_NEXT
&&
879 zc
->zc_objset_stats
.dds_type
== DMU_OST_ZVOL
&&
880 devzvol_snaps_allowed
)
881 sdev_iter_snapshots(dvp
, zc
->zc_name
);
883 (void) strcpy(zc
->zc_name
, name
);
885 kmem_free(zc
, sizeof (zfs_cmd_t
));
889 sdev_iter_snapshots(struct vnode
*dvp
, char *name
)
891 sdev_iter_datasets(dvp
, ZFS_IOC_SNAPSHOT_LIST_NEXT
, name
);
896 devzvol_readdir(struct vnode
*dvp
, struct uio
*uiop
, struct cred
*cred
,
897 int *eofp
, caller_context_t
*ct_unused
, int flags_unused
)
899 struct sdev_node
*sdvp
= VTOSDEV(dvp
);
902 sdcmn_err13(("zv readdir of '%s' %s'", sdvp
->sdev_path
,
905 if (strcmp(sdvp
->sdev_path
, ZVOL_DIR
) == 0) {
908 rw_exit(&sdvp
->sdev_contents
);
909 (void) devname_lookup_func(sdvp
, "dsk", &vp
, cred
,
910 devzvol_create_dir
, SDEV_VATTR
);
912 (void) devname_lookup_func(sdvp
, "rdsk", &vp
, cred
,
913 devzvol_create_dir
, SDEV_VATTR
);
915 rw_enter(&sdvp
->sdev_contents
, RW_READER
);
916 return (devname_readdir_func(dvp
, uiop
, cred
, eofp
, 0));
918 if (uiop
->uio_offset
== 0)
919 devzvol_prunedir(sdvp
);
920 ptr
= sdvp
->sdev_path
+ strlen(ZVOL_DIR
);
921 if ((strcmp(ptr
, "/dsk") == 0) || (strcmp(ptr
, "/rdsk") == 0)) {
922 rw_exit(&sdvp
->sdev_contents
);
923 devzvol_create_pool_dirs(dvp
);
924 rw_enter(&sdvp
->sdev_contents
, RW_READER
);
925 return (devname_readdir_func(dvp
, uiop
, cred
, eofp
, 0));
928 ptr
= strchr(ptr
+ 1, '/');
932 rw_exit(&sdvp
->sdev_contents
);
933 sdev_iter_datasets(dvp
, ZFS_IOC_DATASET_LIST_NEXT
, ptr
);
934 rw_enter(&sdvp
->sdev_contents
, RW_READER
);
935 return (devname_readdir_func(dvp
, uiop
, cred
, eofp
, 0));
938 const fs_operation_def_t devzvol_vnodeops_tbl
[] = {
939 VOPNAME_READDIR
, { .vop_readdir
= devzvol_readdir
},
940 VOPNAME_LOOKUP
, { .vop_lookup
= devzvol_lookup
},
941 VOPNAME_CREATE
, { .vop_create
= devzvol_create
},
942 VOPNAME_RENAME
, { .error
= fs_nosys
},
943 VOPNAME_MKDIR
, { .error
= fs_nosys
},
944 VOPNAME_RMDIR
, { .error
= fs_nosys
},
945 VOPNAME_REMOVE
, { .error
= fs_nosys
},
946 VOPNAME_SYMLINK
, { .error
= fs_nosys
},