Merge commit '720b16875295d57e0e6a4e0ec32db4d47412f896'
[unleashed.git] / kernel / fs / zfs / zfs_ctldir.c
bloba6cb605cf64cde2e54d8ed1bd6b55fc6dd72f183
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
28 * ZFS control directory (a.k.a. ".zfs")
30 * This directory provides a common location for all ZFS meta-objects.
31 * Currently, this is only the 'snapshot' directory, but this may expand in the
32 * future. The elements are built using the GFS primitives, as the hierarchy
33 * does not actually exist on disk.
35 * For 'snapshot', we don't want to have all snapshots always mounted, because
36 * this would take up a huge amount of space in /etc/mnttab. We have three
37 * types of objects:
39 * ctldir ------> snapshotdir -------> snapshot
40 * |
41 * |
42 * V
43 * mounted fs
45 * The 'snapshot' node contains just enough information to lookup '..' and act
46 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
47 * perform an automount of the underlying filesystem and return the
48 * corresponding vnode.
50 * All mounts are handled automatically by the kernel, but unmounts are
51 * (currently) handled from user land. The main reason is that there is no
52 * reliable way to auto-unmount the filesystem when it's "no longer in use".
53 * When the user unmounts a filesystem, we call zfsctl_unmount(), which
54 * unmounts any snapshots within the snapshot directory.
56 * The '.zfs', '.zfs/snapshot', and all directories created under
57 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
58 * share the same vfs_t as the head filesystem (what '.zfs' lives under).
60 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
61 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
62 * However, vnodes within these mounted on file systems have their v_vfsp
63 * fields set to the head filesystem to make NFS happy (see
64 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
65 * so that it cannot be freed until all snapshots have been unmounted.
68 #include <sys/fs_subr.h>
69 #include <sys/zfs_ctldir.h>
70 #include <sys/zfs_ioctl.h>
71 #include <sys/zfs_vfsops.h>
72 #include <sys/vfs.h>
73 #include <sys/gfs.h>
74 #include <sys/stat.h>
75 #include <sys/dmu.h>
76 #include <sys/dsl_destroy.h>
77 #include <sys/dsl_deleg.h>
78 #include <sys/mount.h>
79 #include <sys/sunddi.h>
81 #include "zfs_namecheck.h"
83 typedef struct zfsctl_node {
84 gfs_dir_t zc_gfs_private;
85 uint64_t zc_id;
86 timestruc_t zc_cmtime; /* ctime and mtime, always the same */
87 } zfsctl_node_t;
89 typedef struct zfsctl_snapdir {
90 zfsctl_node_t sd_node;
91 kmutex_t sd_lock;
92 avl_tree_t sd_snaps;
93 } zfsctl_snapdir_t;
95 typedef struct {
96 char *se_name;
97 vnode_t *se_root;
98 avl_node_t se_node;
99 } zfs_snapentry_t;
101 static int
102 snapentry_compare(const void *a, const void *b)
104 const zfs_snapentry_t *sa = a;
105 const zfs_snapentry_t *sb = b;
106 int ret = strcmp(sa->se_name, sb->se_name);
108 if (ret < 0)
109 return (-1);
110 else if (ret > 0)
111 return (1);
112 else
113 return (0);
116 static const struct vnodeops zfsctl_ops_root;
117 static const struct vnodeops zfsctl_ops_snapdir;
118 static const struct vnodeops zfsctl_ops_snapshot;
119 static const struct vnodeops zfsctl_ops_shares;
121 static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
122 static vnode_t *zfsctl_mknode_shares(vnode_t *);
123 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
124 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
127 * Root directory elements. We only have two entries
128 * snapshot and shares.
130 static gfs_dirent_t zfsctl_root_entries[] = {
131 { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
132 { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
133 { NULL }
136 /* include . and .. in the calculation */
137 #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
138 sizeof (gfs_dirent_t)) + 1)
142 * Initialize the various GFS pieces we'll need to create and manipulate .zfs
143 * directories. This is called from the ZFS init routine, and initializes the
144 * vnode ops vectors that we'll be using.
146 void
147 zfsctl_init(void)
151 void
152 zfsctl_fini(void)
156 boolean_t
157 zfsctl_is_node(vnode_t *vp)
159 return (vn_matchops(vp, &zfsctl_ops_root) ||
160 vn_matchops(vp, &zfsctl_ops_snapdir) ||
161 vn_matchops(vp, &zfsctl_ops_snapshot) ||
162 vn_matchops(vp, &zfsctl_ops_shares));
167 * Return the inode number associated with the 'snapshot' or
168 * 'shares' directory.
170 /* ARGSUSED */
171 static ino64_t
172 zfsctl_root_inode_cb(vnode_t *vp, int index)
174 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
176 ASSERT(index < 2);
178 if (index == 0)
179 return (ZFSCTL_INO_SNAPDIR);
181 return (zfsvfs->z_shares_dir);
185 * Create the '.zfs' directory. This directory is cached as part of the VFS
186 * structure. This results in a hold on the vfs_t. The code in zfs_umount()
187 * therefore checks against a vfs_count of 2 instead of 1. This reference
188 * is removed when the ctldir is destroyed in the unmount.
190 void
191 zfsctl_create(zfsvfs_t *zfsvfs)
193 vnode_t *vp, *rvp;
194 zfsctl_node_t *zcp;
195 uint64_t crtime[2];
197 ASSERT(zfsvfs->z_ctldir == NULL);
199 vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
200 &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
201 zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
202 zcp = vp->v_data;
203 zcp->zc_id = ZFSCTL_INO_ROOT;
205 VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
206 VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
207 &crtime, sizeof (crtime)));
208 ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
209 VN_RELE(rvp);
212 * We're only faking the fact that we have a root of a filesystem for
213 * the sake of the GFS interfaces. Undo the flag manipulation it did
214 * for us.
216 vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
218 zfsvfs->z_ctldir = vp;
222 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
223 * There might still be more references if we were force unmounted, but only
224 * new zfs_inactive() calls can occur and they don't reference .zfs
226 void
227 zfsctl_destroy(zfsvfs_t *zfsvfs)
229 VN_RELE(zfsvfs->z_ctldir);
230 zfsvfs->z_ctldir = NULL;
234 * Given a root znode, retrieve the associated .zfs directory.
235 * Add a hold to the vnode and return it.
237 vnode_t *
238 zfsctl_root(znode_t *zp)
240 ASSERT(zfs_has_ctldir(zp));
241 VN_HOLD(zp->z_zfsvfs->z_ctldir);
242 return (zp->z_zfsvfs->z_ctldir);
246 * Common open routine. Disallow any write access.
248 /* ARGSUSED */
249 static int
250 zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
252 if (flags & FWRITE)
253 return (SET_ERROR(EACCES));
255 return (0);
259 * Common close routine. Nothing to do here.
261 /* ARGSUSED */
262 static int
263 zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
264 cred_t *cr, caller_context_t *ct)
266 return (0);
270 * Common access routine. Disallow writes.
272 /* ARGSUSED */
273 static int
274 zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
275 caller_context_t *ct)
277 if (flags & V_ACE_MASK) {
278 if (mode & ACE_ALL_WRITE_PERMS)
279 return (SET_ERROR(EACCES));
280 } else {
281 if (mode & VWRITE)
282 return (SET_ERROR(EACCES));
285 return (0);
289 * Common getattr function. Fill in basic information.
291 static void
292 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
294 timestruc_t now;
296 vap->va_uid = 0;
297 vap->va_gid = 0;
298 vap->va_rdev = 0;
300 * We are a purely virtual object, so we have no
301 * blocksize or allocated blocks.
303 vap->va_blksize = 0;
304 vap->va_nblocks = 0;
305 vap->va_seq = 0;
306 vap->va_fsid = vp->v_vfsp->vfs_dev;
307 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
308 S_IROTH | S_IXOTH;
309 vap->va_type = VDIR;
311 * We live in the now (for atime).
313 gethrestime(&now);
314 vap->va_atime = now;
317 /*ARGSUSED*/
318 static int
319 zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
321 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
322 zfsctl_node_t *zcp = vp->v_data;
323 uint64_t object = zcp->zc_id;
324 zfid_short_t *zfid;
325 int i;
327 ZFS_ENTER(zfsvfs);
329 if (fidp->fid_len < SHORT_FID_LEN) {
330 fidp->fid_len = SHORT_FID_LEN;
331 ZFS_EXIT(zfsvfs);
332 return (SET_ERROR(ENOSPC));
335 zfid = (zfid_short_t *)fidp;
337 zfid->zf_len = SHORT_FID_LEN;
339 for (i = 0; i < sizeof (zfid->zf_object); i++)
340 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
342 /* .zfs znodes always have a generation number of 0 */
343 for (i = 0; i < sizeof (zfid->zf_gen); i++)
344 zfid->zf_gen[i] = 0;
346 ZFS_EXIT(zfsvfs);
347 return (0);
351 /*ARGSUSED*/
352 static int
353 zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
355 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
356 znode_t *dzp;
357 int error;
359 ZFS_ENTER(zfsvfs);
361 if (zfsvfs->z_shares_dir == 0) {
362 ZFS_EXIT(zfsvfs);
363 return (SET_ERROR(ENOTSUP));
366 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
367 error = fop_fid(ZTOV(dzp), fidp, ct);
368 VN_RELE(ZTOV(dzp));
371 ZFS_EXIT(zfsvfs);
372 return (error);
375 * .zfs inode namespace
377 * We need to generate unique inode numbers for all files and directories
378 * within the .zfs pseudo-filesystem. We use the following scheme:
380 * ENTRY ZFSCTL_INODE
381 * .zfs 1
382 * .zfs/snapshot 2
383 * .zfs/snapshot/<snap> objectid(snap)
386 #define ZFSCTL_INO_SNAP(id) (id)
389 * Get root directory attributes.
391 /* ARGSUSED */
392 static int
393 zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
394 caller_context_t *ct)
396 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
397 zfsctl_node_t *zcp = vp->v_data;
399 ZFS_ENTER(zfsvfs);
400 vap->va_nodeid = ZFSCTL_INO_ROOT;
401 vap->va_nlink = vap->va_size = NROOT_ENTRIES;
402 vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
404 zfsctl_common_getattr(vp, vap);
405 ZFS_EXIT(zfsvfs);
407 return (0);
411 * Special case the handling of "..".
413 /* ARGSUSED */
415 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
416 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
417 int *direntflags, pathname_t *realpnp)
419 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
420 int err;
423 * No extended attributes allowed under .zfs
425 if (flags & LOOKUP_XATTR)
426 return (SET_ERROR(EINVAL));
428 ZFS_ENTER(zfsvfs);
430 if (strcmp(nm, "..") == 0) {
431 err = VFS_ROOT(dvp->v_vfsp, vpp);
432 } else {
433 err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
434 cr, ct, direntflags, realpnp);
437 ZFS_EXIT(zfsvfs);
439 return (err);
442 static int
443 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
444 caller_context_t *ct)
447 * We only care about ACL_ENABLED so that libsec can
448 * display ACL correctly and not default to POSIX draft.
450 if (cmd == _PC_ACL_ENABLED) {
451 *valp = _ACL_ACE_ENABLED;
452 return (0);
455 return (fs_pathconf(vp, cmd, valp, cr, ct));
458 static const struct vnodeops zfsctl_ops_root = {
459 .vnop_name = ".zfs",
460 .vop_open = zfsctl_common_open,
461 .vop_close = zfsctl_common_close,
462 .vop_ioctl = fs_inval,
463 .vop_getattr = zfsctl_root_getattr,
464 .vop_access = zfsctl_common_access,
465 .vop_readdir = gfs_vop_readdir,
466 .vop_lookup = zfsctl_root_lookup,
467 .vop_seek = fs_seek,
468 .vop_inactive = gfs_vop_inactive,
469 .vop_pathconf = zfsctl_pathconf,
470 .vop_fid = zfsctl_common_fid,
474 * Gets the full dataset name that corresponds to the given snapshot name
475 * Example:
476 * zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
478 static int
479 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
481 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
483 if (zfs_component_namecheck(name, NULL, NULL) != 0)
484 return (SET_ERROR(EILSEQ));
485 dmu_objset_name(os, zname);
486 if (strlen(zname) + 1 + strlen(name) >= len)
487 return (SET_ERROR(ENAMETOOLONG));
488 (void) strcat(zname, "@");
489 (void) strcat(zname, name);
490 return (0);
493 static int
494 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
496 vnode_t *svp = sep->se_root;
497 int error;
499 ASSERT(vn_ismntpt(svp));
501 /* this will be dropped by dounmount() */
502 if ((error = vn_vfswlock(svp)) != 0)
503 return (error);
505 VN_HOLD(svp);
506 error = dounmount(vn_mountedvfs(svp), fflags, cr);
507 if (error) {
508 VN_RELE(svp);
509 return (error);
513 * We can't use VN_RELE(), as that will try to invoke
514 * zfsctl_snapdir_inactive(), which would cause us to destroy
515 * the sd_lock mutex held by our caller.
517 ASSERT(svp->v_count == 1);
518 gfs_vop_inactive(svp, cr, NULL);
520 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
521 kmem_free(sep, sizeof (zfs_snapentry_t));
523 return (0);
526 static void
527 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
529 avl_index_t where;
530 vfs_t *vfsp;
531 refstr_t *pathref;
532 char newpath[MAXNAMELEN];
533 char *tail;
535 ASSERT(MUTEX_HELD(&sdp->sd_lock));
536 ASSERT(sep != NULL);
538 vfsp = vn_mountedvfs(sep->se_root);
539 ASSERT(vfsp != NULL);
541 vfs_lock_wait(vfsp);
544 * Change the name in the AVL tree.
546 avl_remove(&sdp->sd_snaps, sep);
547 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
548 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
549 (void) strcpy(sep->se_name, nm);
550 VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
551 avl_insert(&sdp->sd_snaps, sep, where);
554 * Change the current mountpoint info:
555 * - update the tail of the mntpoint path
556 * - update the tail of the resource path
558 pathref = vfs_getmntpoint(vfsp);
559 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
560 VERIFY((tail = strrchr(newpath, '/')) != NULL);
561 *(tail+1) = '\0';
562 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
563 (void) strcat(newpath, nm);
564 refstr_rele(pathref);
565 vfs_setmntpoint(vfsp, newpath, 0);
567 pathref = vfs_getresource(vfsp);
568 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
569 VERIFY((tail = strrchr(newpath, '@')) != NULL);
570 *(tail+1) = '\0';
571 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
572 (void) strcat(newpath, nm);
573 refstr_rele(pathref);
574 vfs_setresource(vfsp, newpath, 0);
576 vfs_unlock(vfsp);
579 /*ARGSUSED*/
580 static int
581 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
582 cred_t *cr, caller_context_t *ct, int flags)
584 zfsctl_snapdir_t *sdp = sdvp->v_data;
585 zfs_snapentry_t search, *sep;
586 zfsvfs_t *zfsvfs;
587 avl_index_t where;
588 char from[ZFS_MAX_DATASET_NAME_LEN], to[ZFS_MAX_DATASET_NAME_LEN];
589 char real[ZFS_MAX_DATASET_NAME_LEN], fsname[ZFS_MAX_DATASET_NAME_LEN];
590 int err;
592 zfsvfs = sdvp->v_vfsp->vfs_data;
593 ZFS_ENTER(zfsvfs);
595 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
596 err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
597 sizeof (real), NULL);
598 if (err == 0) {
599 snm = real;
600 } else if (err != ENOTSUP) {
601 ZFS_EXIT(zfsvfs);
602 return (err);
606 ZFS_EXIT(zfsvfs);
608 dmu_objset_name(zfsvfs->z_os, fsname);
610 err = zfsctl_snapshot_zname(sdvp, snm, sizeof (from), from);
611 if (err == 0)
612 err = zfsctl_snapshot_zname(tdvp, tnm, sizeof (to), to);
613 if (err == 0)
614 err = zfs_secpolicy_rename_perms(from, to, cr);
615 if (err != 0)
616 return (err);
619 * Cannot move snapshots out of the snapdir.
621 if (sdvp != tdvp)
622 return (SET_ERROR(EINVAL));
624 if (strcmp(snm, tnm) == 0)
625 return (0);
627 mutex_enter(&sdp->sd_lock);
629 search.se_name = (char *)snm;
630 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
631 mutex_exit(&sdp->sd_lock);
632 return (SET_ERROR(ENOENT));
635 err = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
636 if (err == 0)
637 zfsctl_rename_snap(sdp, sep, tnm);
639 mutex_exit(&sdp->sd_lock);
641 return (err);
644 /* ARGSUSED */
645 static int
646 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
647 caller_context_t *ct, int flags)
649 zfsctl_snapdir_t *sdp = dvp->v_data;
650 zfs_snapentry_t *sep;
651 zfs_snapentry_t search;
652 zfsvfs_t *zfsvfs;
653 char snapname[ZFS_MAX_DATASET_NAME_LEN];
654 char real[ZFS_MAX_DATASET_NAME_LEN];
655 int err;
657 zfsvfs = dvp->v_vfsp->vfs_data;
658 ZFS_ENTER(zfsvfs);
660 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
662 err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
663 sizeof (real), NULL);
664 if (err == 0) {
665 name = real;
666 } else if (err != ENOTSUP) {
667 ZFS_EXIT(zfsvfs);
668 return (err);
672 ZFS_EXIT(zfsvfs);
674 err = zfsctl_snapshot_zname(dvp, name, sizeof (snapname), snapname);
675 if (err == 0)
676 err = zfs_secpolicy_destroy_perms(snapname, cr);
677 if (err != 0)
678 return (err);
680 mutex_enter(&sdp->sd_lock);
682 search.se_name = name;
683 sep = avl_find(&sdp->sd_snaps, &search, NULL);
684 if (sep) {
685 avl_remove(&sdp->sd_snaps, sep);
686 err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
687 if (err != 0)
688 avl_add(&sdp->sd_snaps, sep);
689 else
690 err = dsl_destroy_snapshot(snapname, B_FALSE);
691 } else {
692 err = SET_ERROR(ENOENT);
695 mutex_exit(&sdp->sd_lock);
697 return (err);
701 * This creates a snapshot under '.zfs/snapshot'.
703 /* ARGSUSED */
704 static int
705 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
706 cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
708 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
709 char name[ZFS_MAX_DATASET_NAME_LEN];
710 int err;
711 static enum symfollow follow = NO_FOLLOW;
712 static enum uio_seg seg = UIO_SYSSPACE;
714 if (zfs_component_namecheck(dirname, NULL, NULL) != 0)
715 return (SET_ERROR(EILSEQ));
717 dmu_objset_name(zfsvfs->z_os, name);
719 *vpp = NULL;
721 err = zfs_secpolicy_snapshot_perms(name, cr);
722 if (err != 0)
723 return (err);
725 if (err == 0) {
726 err = dmu_objset_snapshot_one(name, dirname);
727 if (err != 0)
728 return (err);
729 err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
732 return (err);
736 * Lookup entry point for the 'snapshot' directory. Try to open the
737 * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
738 * Perform a mount of the associated dataset on top of the vnode.
740 /* ARGSUSED */
741 static int
742 zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
743 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
744 int *direntflags, pathname_t *realpnp)
746 zfsctl_snapdir_t *sdp = dvp->v_data;
747 objset_t *snap;
748 char snapname[ZFS_MAX_DATASET_NAME_LEN];
749 char real[ZFS_MAX_DATASET_NAME_LEN];
750 char *mountpoint;
751 zfs_snapentry_t *sep, search;
752 struct mounta margs;
753 vfs_t *vfsp;
754 size_t mountpoint_len;
755 avl_index_t where;
756 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
757 int err;
760 * No extended attributes allowed under .zfs
762 if (flags & LOOKUP_XATTR)
763 return (SET_ERROR(EINVAL));
765 ASSERT(dvp->v_type == VDIR);
768 * If we get a recursive call, that means we got called
769 * from the domount() code while it was trying to look up the
770 * spec (which looks like a local path for zfs). We need to
771 * add some flag to domount() to tell it not to do this lookup.
773 if (MUTEX_HELD(&sdp->sd_lock))
774 return (SET_ERROR(ENOENT));
776 ZFS_ENTER(zfsvfs);
778 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
779 ZFS_EXIT(zfsvfs);
780 return (0);
783 if (flags & FIGNORECASE) {
784 boolean_t conflict = B_FALSE;
786 err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
787 sizeof (real), &conflict);
788 if (err == 0) {
789 nm = real;
790 } else if (err != ENOTSUP) {
791 ZFS_EXIT(zfsvfs);
792 return (err);
794 if (realpnp)
795 (void) strlcpy(realpnp->pn_buf, nm,
796 realpnp->pn_bufsize);
797 if (conflict && direntflags)
798 *direntflags = ED_CASE_CONFLICT;
801 mutex_enter(&sdp->sd_lock);
802 search.se_name = (char *)nm;
803 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
804 *vpp = sep->se_root;
805 VN_HOLD(*vpp);
806 err = traverse(vpp);
807 if (err != 0) {
808 VN_RELE(*vpp);
809 *vpp = NULL;
810 } else if (*vpp == sep->se_root) {
812 * The snapshot was unmounted behind our backs,
813 * try to remount it.
815 goto domount;
816 } else {
818 * VROOT was set during the traverse call. We need
819 * to clear it since we're pretending to be part
820 * of our parent's vfs.
822 (*vpp)->v_flag &= ~VROOT;
824 mutex_exit(&sdp->sd_lock);
825 ZFS_EXIT(zfsvfs);
826 return (err);
830 * The requested snapshot is not currently mounted, look it up.
832 err = zfsctl_snapshot_zname(dvp, nm, sizeof (snapname), snapname);
833 if (err != 0) {
834 mutex_exit(&sdp->sd_lock);
835 ZFS_EXIT(zfsvfs);
837 * handle "ls *" or "?" in a graceful manner,
838 * forcing EILSEQ to ENOENT.
839 * Since shell ultimately passes "*" or "?" as name to lookup
841 return (err == EILSEQ ? ENOENT : err);
843 if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
844 mutex_exit(&sdp->sd_lock);
845 ZFS_EXIT(zfsvfs);
846 return (SET_ERROR(ENOENT));
849 sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
850 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
851 (void) strcpy(sep->se_name, nm);
852 *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
853 avl_insert(&sdp->sd_snaps, sep, where);
855 dmu_objset_rele(snap, FTAG);
856 domount:
857 mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
858 strlen("/.zfs/snapshot/") + strlen(nm) + 1;
859 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
860 (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
861 refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
863 margs.spec = snapname;
864 margs.dir = mountpoint;
865 margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
866 margs.fstype = "zfs";
867 margs.dataptr = NULL;
868 margs.datalen = 0;
869 margs.optptr = NULL;
870 margs.optlen = 0;
872 err = domount("zfs", &margs, *vpp, kcred, &vfsp);
873 kmem_free(mountpoint, mountpoint_len);
875 if (err == 0) {
877 * Return the mounted root rather than the covered mount point.
878 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
879 * the ZFS vnode mounted on top of the GFS node. This ZFS
880 * vnode is the root of the newly created vfsp.
882 VFS_RELE(vfsp);
883 err = traverse(vpp);
886 if (err == 0) {
888 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
890 * This is where we lie about our v_vfsp in order to
891 * make .zfs/snapshot/<snapname> accessible over NFS
892 * without requiring manual mounts of <snapname>.
894 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
895 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
896 (*vpp)->v_vfsp = zfsvfs->z_vfs;
897 (*vpp)->v_flag &= ~VROOT;
899 mutex_exit(&sdp->sd_lock);
900 ZFS_EXIT(zfsvfs);
903 * If we had an error, drop our hold on the vnode and
904 * zfsctl_snapshot_inactive() will clean up.
906 if (err != 0) {
907 VN_RELE(*vpp);
908 *vpp = NULL;
910 return (err);
913 /* ARGSUSED */
914 static int
915 zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
916 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
917 int *direntflags, pathname_t *realpnp)
919 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
920 znode_t *dzp;
921 int error;
923 ZFS_ENTER(zfsvfs);
925 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
926 ZFS_EXIT(zfsvfs);
927 return (0);
930 if (zfsvfs->z_shares_dir == 0) {
931 ZFS_EXIT(zfsvfs);
932 return (SET_ERROR(ENOTSUP));
934 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
935 error = fop_lookup(ZTOV(dzp), nm, vpp, pnp,
936 flags, rdir, cr, ct, direntflags, realpnp);
937 VN_RELE(ZTOV(dzp));
940 ZFS_EXIT(zfsvfs);
942 return (error);
945 /* ARGSUSED */
946 static int
947 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
948 offset_t *offp, offset_t *nextp, void *data, int flags)
950 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
951 char snapname[ZFS_MAX_DATASET_NAME_LEN];
952 uint64_t id, cookie;
953 boolean_t case_conflict;
954 int error;
956 ZFS_ENTER(zfsvfs);
958 cookie = *offp;
959 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
960 error = dmu_snapshot_list_next(zfsvfs->z_os,
961 sizeof (snapname), snapname, &id, &cookie, &case_conflict);
962 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
963 if (error) {
964 ZFS_EXIT(zfsvfs);
965 if (error == ENOENT) {
966 *eofp = 1;
967 return (0);
969 return (error);
972 if (flags & V_RDDIR_ENTFLAGS) {
973 edirent_t *eodp = dp;
975 (void) strcpy(eodp->ed_name, snapname);
976 eodp->ed_ino = ZFSCTL_INO_SNAP(id);
977 eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
978 } else {
979 struct dirent64 *odp = dp;
981 (void) strcpy(odp->d_name, snapname);
982 odp->d_ino = ZFSCTL_INO_SNAP(id);
984 *nextp = cookie;
986 ZFS_EXIT(zfsvfs);
988 return (0);
991 /* ARGSUSED */
992 static int
993 zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
994 caller_context_t *ct, int flags)
996 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
997 znode_t *dzp;
998 int error;
1000 ZFS_ENTER(zfsvfs);
1002 if (zfsvfs->z_shares_dir == 0) {
1003 ZFS_EXIT(zfsvfs);
1004 return (SET_ERROR(ENOTSUP));
1006 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1007 error = fop_readdir(ZTOV(dzp), uiop, cr, eofp, ct, flags);
1008 VN_RELE(ZTOV(dzp));
1009 } else {
1010 *eofp = 1;
1011 error = SET_ERROR(ENOENT);
1014 ZFS_EXIT(zfsvfs);
1015 return (error);
1019 * pvp is the '.zfs' directory (zfsctl_node_t).
1021 * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1023 * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1024 * when a lookup is performed on .zfs for "snapshot".
1026 vnode_t *
1027 zfsctl_mknode_snapdir(vnode_t *pvp)
1029 vnode_t *vp;
1030 zfsctl_snapdir_t *sdp;
1032 vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
1033 &zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
1034 zfsctl_snapdir_readdir_cb, NULL);
1035 sdp = vp->v_data;
1036 sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
1037 sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1038 mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
1039 avl_create(&sdp->sd_snaps, snapentry_compare,
1040 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
1041 return (vp);
1044 vnode_t *
1045 zfsctl_mknode_shares(vnode_t *pvp)
1047 vnode_t *vp;
1048 zfsctl_node_t *sdp;
1050 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1051 &zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
1052 NULL, NULL);
1053 sdp = vp->v_data;
1054 sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1055 return (vp);
1059 /* ARGSUSED */
1060 static int
1061 zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1062 caller_context_t *ct)
1064 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1065 znode_t *dzp;
1066 int error;
1068 ZFS_ENTER(zfsvfs);
1069 if (zfsvfs->z_shares_dir == 0) {
1070 ZFS_EXIT(zfsvfs);
1071 return (SET_ERROR(ENOTSUP));
1073 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1074 error = fop_getattr(ZTOV(dzp), vap, flags, cr, ct);
1075 VN_RELE(ZTOV(dzp));
1077 ZFS_EXIT(zfsvfs);
1078 return (error);
1083 /* ARGSUSED */
1084 static int
1085 zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1086 caller_context_t *ct)
1088 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1089 zfsctl_snapdir_t *sdp = vp->v_data;
1091 ZFS_ENTER(zfsvfs);
1092 zfsctl_common_getattr(vp, vap);
1093 vap->va_nodeid = gfs_file_inode(vp);
1094 vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
1095 vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1096 ZFS_EXIT(zfsvfs);
1098 return (0);
1101 /* ARGSUSED */
1102 static void
1103 zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1105 zfsctl_snapdir_t *sdp = vp->v_data;
1106 void *private;
1108 private = gfs_dir_inactive(vp);
1109 if (private != NULL) {
1110 ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
1111 mutex_destroy(&sdp->sd_lock);
1112 avl_destroy(&sdp->sd_snaps);
1113 kmem_free(private, sizeof (zfsctl_snapdir_t));
1117 static const struct vnodeops zfsctl_ops_snapdir = {
1118 .vnop_name = ".zfs/snapshot",
1119 .vop_open = zfsctl_common_open,
1120 .vop_close = zfsctl_common_close,
1121 .vop_ioctl = fs_inval,
1122 .vop_getattr = zfsctl_snapdir_getattr,
1123 .vop_access = zfsctl_common_access,
1124 .vop_rename = zfsctl_snapdir_rename,
1125 .vop_rmdir = zfsctl_snapdir_remove,
1126 .vop_mkdir = zfsctl_snapdir_mkdir,
1127 .vop_readdir = gfs_vop_readdir,
1128 .vop_lookup = zfsctl_snapdir_lookup,
1129 .vop_seek = fs_seek,
1130 .vop_inactive = zfsctl_snapdir_inactive,
1131 .vop_fid = zfsctl_common_fid,
1134 static const struct vnodeops zfsctl_ops_shares = {
1135 .vnop_name = ".zfs/shares",
1136 .vop_open = zfsctl_common_open,
1137 .vop_close = zfsctl_common_close,
1138 .vop_ioctl = fs_inval,
1139 .vop_getattr = zfsctl_shares_getattr,
1140 .vop_access = zfsctl_common_access,
1141 .vop_readdir = zfsctl_shares_readdir,
1142 .vop_lookup = zfsctl_shares_lookup,
1143 .vop_seek = fs_seek,
1144 .vop_inactive = gfs_vop_inactive,
1145 .vop_fid = zfsctl_shares_fid,
1149 * pvp is the GFS vnode '.zfs/snapshot'.
1151 * This creates a GFS node under '.zfs/snapshot' representing each
1152 * snapshot. This newly created GFS node is what we mount snapshot
1153 * vfs_t's ontop of.
1155 static vnode_t *
1156 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
1158 vnode_t *vp;
1159 zfsctl_node_t *zcp;
1161 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1162 &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
1163 zcp = vp->v_data;
1164 zcp->zc_id = objset;
1166 return (vp);
1169 static void
1170 zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1172 zfsctl_snapdir_t *sdp;
1173 zfs_snapentry_t *sep, *next;
1174 vnode_t *dvp;
1176 VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
1177 sdp = dvp->v_data;
1179 mutex_enter(&sdp->sd_lock);
1181 mutex_enter(&vp->v_lock);
1182 if (vp->v_count > 1) {
1183 VN_RELE_LOCKED(vp);
1184 mutex_exit(&vp->v_lock);
1185 mutex_exit(&sdp->sd_lock);
1186 VN_RELE(dvp);
1187 return;
1189 mutex_exit(&vp->v_lock);
1190 ASSERT(!vn_ismntpt(vp));
1192 sep = avl_first(&sdp->sd_snaps);
1193 while (sep != NULL) {
1194 next = AVL_NEXT(&sdp->sd_snaps, sep);
1196 if (sep->se_root == vp) {
1197 avl_remove(&sdp->sd_snaps, sep);
1198 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1199 kmem_free(sep, sizeof (zfs_snapentry_t));
1200 break;
1202 sep = next;
1204 ASSERT(sep != NULL);
1206 mutex_exit(&sdp->sd_lock);
1207 VN_RELE(dvp);
1210 * Dispose of the vnode for the snapshot mount point.
1211 * This is safe to do because once this entry has been removed
1212 * from the AVL tree, it can't be found again, so cannot become
1213 * "active". If we lookup the same name again we will end up
1214 * creating a new vnode.
1216 gfs_vop_inactive(vp, cr, ct);
1221 * These VP's should never see the light of day. They should always
1222 * be covered.
1224 static const struct vnodeops zfsctl_ops_snapshot = {
1225 .vnop_name = ".zfs/snapshot/vnode",
1226 .vop_inactive = zfsctl_snapshot_inactive,
1230 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1232 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1233 vnode_t *dvp, *vp;
1234 zfsctl_snapdir_t *sdp;
1235 zfsctl_node_t *zcp;
1236 zfs_snapentry_t *sep;
1237 int error;
1239 ASSERT(zfsvfs->z_ctldir != NULL);
1240 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1241 NULL, 0, NULL, kcred, NULL, NULL, NULL);
1242 if (error != 0)
1243 return (error);
1244 sdp = dvp->v_data;
1246 mutex_enter(&sdp->sd_lock);
1247 sep = avl_first(&sdp->sd_snaps);
1248 while (sep != NULL) {
1249 vp = sep->se_root;
1250 zcp = vp->v_data;
1251 if (zcp->zc_id == objsetid)
1252 break;
1254 sep = AVL_NEXT(&sdp->sd_snaps, sep);
1257 if (sep != NULL) {
1258 VN_HOLD(vp);
1260 * Return the mounted root rather than the covered mount point.
1261 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1262 * and returns the ZFS vnode mounted on top of the GFS node.
1263 * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1265 error = traverse(&vp);
1266 if (error == 0) {
1267 if (vp == sep->se_root)
1268 error = SET_ERROR(EINVAL);
1269 else
1270 *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1272 mutex_exit(&sdp->sd_lock);
1273 VN_RELE(vp);
1274 } else {
1275 error = SET_ERROR(EINVAL);
1276 mutex_exit(&sdp->sd_lock);
1279 VN_RELE(dvp);
1281 return (error);
1285 * Unmount any snapshots for the given filesystem. This is called from
1286 * zfs_umount() - if we have a ctldir, then go through and unmount all the
1287 * snapshots.
1290 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1292 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1293 vnode_t *dvp;
1294 zfsctl_snapdir_t *sdp;
1295 zfs_snapentry_t *sep, *next;
1296 int error;
1298 ASSERT(zfsvfs->z_ctldir != NULL);
1299 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1300 NULL, 0, NULL, cr, NULL, NULL, NULL);
1301 if (error != 0)
1302 return (error);
1303 sdp = dvp->v_data;
1305 mutex_enter(&sdp->sd_lock);
1307 sep = avl_first(&sdp->sd_snaps);
1308 while (sep != NULL) {
1309 next = AVL_NEXT(&sdp->sd_snaps, sep);
1312 * If this snapshot is not mounted, then it must
1313 * have just been unmounted by somebody else, and
1314 * will be cleaned up by zfsctl_snapdir_inactive().
1316 if (vn_ismntpt(sep->se_root)) {
1317 avl_remove(&sdp->sd_snaps, sep);
1318 error = zfsctl_unmount_snap(sep, fflags, cr);
1319 if (error) {
1320 avl_add(&sdp->sd_snaps, sep);
1321 break;
1324 sep = next;
1327 mutex_exit(&sdp->sd_lock);
1328 VN_RELE(dvp);
1330 return (error);