4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
27 * ZFS control directory (a.k.a. ".zfs")
29 * This directory provides a common location for all ZFS meta-objects.
30 * Currently, this is only the 'snapshot' directory, but this may expand in the
31 * future. The elements are built using the GFS primitives, as the hierarchy
32 * does not actually exist on disk.
34 * For 'snapshot', we don't want to have all snapshots always mounted, because
35 * this would take up a huge amount of space in /etc/mnttab. We have three
38 * ctldir ------> snapshotdir -------> snapshot
44 * The 'snapshot' node contains just enough information to lookup '..' and act
45 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
46 * perform an automount of the underlying filesystem and return the
47 * corresponding vnode.
49 * All mounts are handled automatically by the kernel, but unmounts are
50 * (currently) handled from user land. The main reason is that there is no
51 * reliable way to auto-unmount the filesystem when it's "no longer in use".
52 * When the user unmounts a filesystem, we call zfsctl_unmount(), which
53 * unmounts any snapshots within the snapshot directory.
55 * The '.zfs', '.zfs/snapshot', and all directories created under
56 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
57 * share the same vfs_t as the head filesystem (what '.zfs' lives under).
59 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
60 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
61 * However, vnodes within these mounted on file systems have their v_vfsp
62 * fields set to the head filesystem to make NFS happy (see
63 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
64 * so that it cannot be freed until all snapshots have been unmounted.
67 #include <fs/fs_subr.h>
68 #include <sys/zfs_ctldir.h>
69 #include <sys/zfs_ioctl.h>
70 #include <sys/zfs_vfsops.h>
71 #include <sys/vfs_opreg.h>
75 #include <sys/dsl_destroy.h>
76 #include <sys/dsl_deleg.h>
77 #include <sys/mount.h>
78 #include <sys/sunddi.h>
80 #include "zfs_namecheck.h"
82 typedef struct zfsctl_node
{
83 gfs_dir_t zc_gfs_private
;
85 timestruc_t zc_cmtime
; /* ctime and mtime, always the same */
88 typedef struct zfsctl_snapdir
{
89 zfsctl_node_t sd_node
;
101 snapentry_compare(const void *a
, const void *b
)
103 const zfs_snapentry_t
*sa
= a
;
104 const zfs_snapentry_t
*sb
= b
;
105 int ret
= strcmp(sa
->se_name
, sb
->se_name
);
115 vnodeops_t
*zfsctl_ops_root
;
116 vnodeops_t
*zfsctl_ops_snapdir
;
117 vnodeops_t
*zfsctl_ops_snapshot
;
118 vnodeops_t
*zfsctl_ops_shares
;
119 vnodeops_t
*zfsctl_ops_shares_dir
;
121 static const fs_operation_def_t zfsctl_tops_root
[];
122 static const fs_operation_def_t zfsctl_tops_snapdir
[];
123 static const fs_operation_def_t zfsctl_tops_snapshot
[];
124 static const fs_operation_def_t zfsctl_tops_shares
[];
126 static vnode_t
*zfsctl_mknode_snapdir(vnode_t
*);
127 static vnode_t
*zfsctl_mknode_shares(vnode_t
*);
128 static vnode_t
*zfsctl_snapshot_mknode(vnode_t
*, uint64_t objset
);
129 static int zfsctl_unmount_snap(zfs_snapentry_t
*, int, cred_t
*);
131 static gfs_opsvec_t zfsctl_opsvec
[] = {
132 { ".zfs", zfsctl_tops_root
, &zfsctl_ops_root
},
133 { ".zfs/snapshot", zfsctl_tops_snapdir
, &zfsctl_ops_snapdir
},
134 { ".zfs/snapshot/vnode", zfsctl_tops_snapshot
, &zfsctl_ops_snapshot
},
135 { ".zfs/shares", zfsctl_tops_shares
, &zfsctl_ops_shares_dir
},
136 { ".zfs/shares/vnode", zfsctl_tops_shares
, &zfsctl_ops_shares
},
141 * Root directory elements. We only have two entries
142 * snapshot and shares.
144 static gfs_dirent_t zfsctl_root_entries
[] = {
145 { "snapshot", zfsctl_mknode_snapdir
, GFS_CACHE_VNODE
},
146 { "shares", zfsctl_mknode_shares
, GFS_CACHE_VNODE
},
150 /* include . and .. in the calculation */
151 #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
152 sizeof (gfs_dirent_t)) + 1)
156 * Initialize the various GFS pieces we'll need to create and manipulate .zfs
157 * directories. This is called from the ZFS init routine, and initializes the
158 * vnode ops vectors that we'll be using.
163 VERIFY(gfs_make_opsvec(zfsctl_opsvec
) == 0);
170 * Remove vfsctl vnode ops
173 vn_freevnodeops(zfsctl_ops_root
);
174 if (zfsctl_ops_snapdir
)
175 vn_freevnodeops(zfsctl_ops_snapdir
);
176 if (zfsctl_ops_snapshot
)
177 vn_freevnodeops(zfsctl_ops_snapshot
);
178 if (zfsctl_ops_shares
)
179 vn_freevnodeops(zfsctl_ops_shares
);
180 if (zfsctl_ops_shares_dir
)
181 vn_freevnodeops(zfsctl_ops_shares_dir
);
183 zfsctl_ops_root
= NULL
;
184 zfsctl_ops_snapdir
= NULL
;
185 zfsctl_ops_snapshot
= NULL
;
186 zfsctl_ops_shares
= NULL
;
187 zfsctl_ops_shares_dir
= NULL
;
191 zfsctl_is_node(vnode_t
*vp
)
193 return (vn_matchops(vp
, zfsctl_ops_root
) ||
194 vn_matchops(vp
, zfsctl_ops_snapdir
) ||
195 vn_matchops(vp
, zfsctl_ops_snapshot
) ||
196 vn_matchops(vp
, zfsctl_ops_shares
) ||
197 vn_matchops(vp
, zfsctl_ops_shares_dir
));
202 * Return the inode number associated with the 'snapshot' or
203 * 'shares' directory.
207 zfsctl_root_inode_cb(vnode_t
*vp
, int index
)
209 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
214 return (ZFSCTL_INO_SNAPDIR
);
216 return (zfsvfs
->z_shares_dir
);
220 * Create the '.zfs' directory. This directory is cached as part of the VFS
221 * structure. This results in a hold on the vfs_t. The code in zfs_umount()
222 * therefore checks against a vfs_count of 2 instead of 1. This reference
223 * is removed when the ctldir is destroyed in the unmount.
226 zfsctl_create(zfsvfs_t
*zfsvfs
)
232 ASSERT(zfsvfs
->z_ctldir
== NULL
);
234 vp
= gfs_root_create(sizeof (zfsctl_node_t
), zfsvfs
->z_vfs
,
235 zfsctl_ops_root
, ZFSCTL_INO_ROOT
, zfsctl_root_entries
,
236 zfsctl_root_inode_cb
, MAXNAMELEN
, NULL
, NULL
);
238 zcp
->zc_id
= ZFSCTL_INO_ROOT
;
240 VERIFY(VFS_ROOT(zfsvfs
->z_vfs
, &rvp
) == 0);
241 VERIFY(0 == sa_lookup(VTOZ(rvp
)->z_sa_hdl
, SA_ZPL_CRTIME(zfsvfs
),
242 &crtime
, sizeof (crtime
)));
243 ZFS_TIME_DECODE(&zcp
->zc_cmtime
, crtime
);
247 * We're only faking the fact that we have a root of a filesystem for
248 * the sake of the GFS interfaces. Undo the flag manipulation it did
251 vp
->v_flag
&= ~(VROOT
| VNOCACHE
| VNOMAP
| VNOSWAP
| VNOMOUNT
);
253 zfsvfs
->z_ctldir
= vp
;
257 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
258 * There might still be more references if we were force unmounted, but only
259 * new zfs_inactive() calls can occur and they don't reference .zfs
262 zfsctl_destroy(zfsvfs_t
*zfsvfs
)
264 VN_RELE(zfsvfs
->z_ctldir
);
265 zfsvfs
->z_ctldir
= NULL
;
269 * Given a root znode, retrieve the associated .zfs directory.
270 * Add a hold to the vnode and return it.
273 zfsctl_root(znode_t
*zp
)
275 ASSERT(zfs_has_ctldir(zp
));
276 VN_HOLD(zp
->z_zfsvfs
->z_ctldir
);
277 return (zp
->z_zfsvfs
->z_ctldir
);
281 * Common open routine. Disallow any write access.
285 zfsctl_common_open(vnode_t
**vpp
, int flags
, cred_t
*cr
, caller_context_t
*ct
)
288 return (SET_ERROR(EACCES
));
294 * Common close routine. Nothing to do here.
298 zfsctl_common_close(vnode_t
*vpp
, int flags
, int count
, offset_t off
,
299 cred_t
*cr
, caller_context_t
*ct
)
305 * Common access routine. Disallow writes.
309 zfsctl_common_access(vnode_t
*vp
, int mode
, int flags
, cred_t
*cr
,
310 caller_context_t
*ct
)
312 if (flags
& V_ACE_MASK
) {
313 if (mode
& ACE_ALL_WRITE_PERMS
)
314 return (SET_ERROR(EACCES
));
317 return (SET_ERROR(EACCES
));
324 * Common getattr function. Fill in basic information.
327 zfsctl_common_getattr(vnode_t
*vp
, vattr_t
*vap
)
335 * We are a purely virtual object, so we have no
336 * blocksize or allocated blocks.
341 vap
->va_fsid
= vp
->v_vfsp
->vfs_dev
;
342 vap
->va_mode
= S_IRUSR
| S_IXUSR
| S_IRGRP
| S_IXGRP
|
346 * We live in the now (for atime).
354 zfsctl_common_fid(vnode_t
*vp
, fid_t
*fidp
, caller_context_t
*ct
)
356 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
357 zfsctl_node_t
*zcp
= vp
->v_data
;
358 uint64_t object
= zcp
->zc_id
;
364 if (fidp
->fid_len
< SHORT_FID_LEN
) {
365 fidp
->fid_len
= SHORT_FID_LEN
;
367 return (SET_ERROR(ENOSPC
));
370 zfid
= (zfid_short_t
*)fidp
;
372 zfid
->zf_len
= SHORT_FID_LEN
;
374 for (i
= 0; i
< sizeof (zfid
->zf_object
); i
++)
375 zfid
->zf_object
[i
] = (uint8_t)(object
>> (8 * i
));
377 /* .zfs znodes always have a generation number of 0 */
378 for (i
= 0; i
< sizeof (zfid
->zf_gen
); i
++)
388 zfsctl_shares_fid(vnode_t
*vp
, fid_t
*fidp
, caller_context_t
*ct
)
390 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
396 if (zfsvfs
->z_shares_dir
== 0) {
398 return (SET_ERROR(ENOTSUP
));
401 if ((error
= zfs_zget(zfsvfs
, zfsvfs
->z_shares_dir
, &dzp
)) == 0) {
402 error
= VOP_FID(ZTOV(dzp
), fidp
, ct
);
410 * .zfs inode namespace
412 * We need to generate unique inode numbers for all files and directories
413 * within the .zfs pseudo-filesystem. We use the following scheme:
418 * .zfs/snapshot/<snap> objectid(snap)
421 #define ZFSCTL_INO_SNAP(id) (id)
424 * Get root directory attributes.
428 zfsctl_root_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
,
429 caller_context_t
*ct
)
431 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
432 zfsctl_node_t
*zcp
= vp
->v_data
;
435 vap
->va_nodeid
= ZFSCTL_INO_ROOT
;
436 vap
->va_nlink
= vap
->va_size
= NROOT_ENTRIES
;
437 vap
->va_mtime
= vap
->va_ctime
= zcp
->zc_cmtime
;
439 zfsctl_common_getattr(vp
, vap
);
446 * Special case the handling of "..".
450 zfsctl_root_lookup(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
, pathname_t
*pnp
,
451 int flags
, vnode_t
*rdir
, cred_t
*cr
, caller_context_t
*ct
,
452 int *direntflags
, pathname_t
*realpnp
)
454 zfsvfs_t
*zfsvfs
= dvp
->v_vfsp
->vfs_data
;
458 * No extended attributes allowed under .zfs
460 if (flags
& LOOKUP_XATTR
)
461 return (SET_ERROR(EINVAL
));
465 if (strcmp(nm
, "..") == 0) {
466 err
= VFS_ROOT(dvp
->v_vfsp
, vpp
);
468 err
= gfs_vop_lookup(dvp
, nm
, vpp
, pnp
, flags
, rdir
,
469 cr
, ct
, direntflags
, realpnp
);
478 zfsctl_pathconf(vnode_t
*vp
, int cmd
, ulong_t
*valp
, cred_t
*cr
,
479 caller_context_t
*ct
)
482 * We only care about ACL_ENABLED so that libsec can
483 * display ACL correctly and not default to POSIX draft.
485 if (cmd
== _PC_ACL_ENABLED
) {
486 *valp
= _ACL_ACE_ENABLED
;
490 return (fs_pathconf(vp
, cmd
, valp
, cr
, ct
));
493 static const fs_operation_def_t zfsctl_tops_root
[] = {
494 { VOPNAME_OPEN
, { .vop_open
= zfsctl_common_open
} },
495 { VOPNAME_CLOSE
, { .vop_close
= zfsctl_common_close
} },
496 { VOPNAME_IOCTL
, { .error
= fs_inval
} },
497 { VOPNAME_GETATTR
, { .vop_getattr
= zfsctl_root_getattr
} },
498 { VOPNAME_ACCESS
, { .vop_access
= zfsctl_common_access
} },
499 { VOPNAME_READDIR
, { .vop_readdir
= gfs_vop_readdir
} },
500 { VOPNAME_LOOKUP
, { .vop_lookup
= zfsctl_root_lookup
} },
501 { VOPNAME_SEEK
, { .vop_seek
= fs_seek
} },
502 { VOPNAME_INACTIVE
, { .vop_inactive
= gfs_vop_inactive
} },
503 { VOPNAME_PATHCONF
, { .vop_pathconf
= zfsctl_pathconf
} },
504 { VOPNAME_FID
, { .vop_fid
= zfsctl_common_fid
} },
509 * Gets the full dataset name that corresponds to the given snapshot name
511 * zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
514 zfsctl_snapshot_zname(vnode_t
*vp
, const char *name
, int len
, char *zname
)
516 objset_t
*os
= ((zfsvfs_t
*)((vp
)->v_vfsp
->vfs_data
))->z_os
;
518 if (zfs_component_namecheck(name
, NULL
, NULL
) != 0)
519 return (SET_ERROR(EILSEQ
));
520 dmu_objset_name(os
, zname
);
521 if (strlen(zname
) + 1 + strlen(name
) >= len
)
522 return (SET_ERROR(ENAMETOOLONG
));
523 (void) strcat(zname
, "@");
524 (void) strcat(zname
, name
);
529 zfsctl_unmount_snap(zfs_snapentry_t
*sep
, int fflags
, cred_t
*cr
)
531 vnode_t
*svp
= sep
->se_root
;
534 ASSERT(vn_ismntpt(svp
));
536 /* this will be dropped by dounmount() */
537 if ((error
= vn_vfswlock(svp
)) != 0)
541 error
= dounmount(vn_mountedvfs(svp
), fflags
, cr
);
548 * We can't use VN_RELE(), as that will try to invoke
549 * zfsctl_snapdir_inactive(), which would cause us to destroy
550 * the sd_lock mutex held by our caller.
552 ASSERT(svp
->v_count
== 1);
553 gfs_vop_inactive(svp
, cr
, NULL
);
555 kmem_free(sep
->se_name
, strlen(sep
->se_name
) + 1);
556 kmem_free(sep
, sizeof (zfs_snapentry_t
));
562 zfsctl_rename_snap(zfsctl_snapdir_t
*sdp
, zfs_snapentry_t
*sep
, const char *nm
)
567 char newpath
[MAXNAMELEN
];
570 ASSERT(MUTEX_HELD(&sdp
->sd_lock
));
573 vfsp
= vn_mountedvfs(sep
->se_root
);
574 ASSERT(vfsp
!= NULL
);
579 * Change the name in the AVL tree.
581 avl_remove(&sdp
->sd_snaps
, sep
);
582 kmem_free(sep
->se_name
, strlen(sep
->se_name
) + 1);
583 sep
->se_name
= kmem_alloc(strlen(nm
) + 1, KM_SLEEP
);
584 (void) strcpy(sep
->se_name
, nm
);
585 VERIFY(avl_find(&sdp
->sd_snaps
, sep
, &where
) == NULL
);
586 avl_insert(&sdp
->sd_snaps
, sep
, where
);
589 * Change the current mountpoint info:
590 * - update the tail of the mntpoint path
591 * - update the tail of the resource path
593 pathref
= vfs_getmntpoint(vfsp
);
594 (void) strncpy(newpath
, refstr_value(pathref
), sizeof (newpath
));
595 VERIFY((tail
= strrchr(newpath
, '/')) != NULL
);
597 ASSERT3U(strlen(newpath
) + strlen(nm
), <, sizeof (newpath
));
598 (void) strcat(newpath
, nm
);
599 refstr_rele(pathref
);
600 vfs_setmntpoint(vfsp
, newpath
, 0);
602 pathref
= vfs_getresource(vfsp
);
603 (void) strncpy(newpath
, refstr_value(pathref
), sizeof (newpath
));
604 VERIFY((tail
= strrchr(newpath
, '@')) != NULL
);
606 ASSERT3U(strlen(newpath
) + strlen(nm
), <, sizeof (newpath
));
607 (void) strcat(newpath
, nm
);
608 refstr_rele(pathref
);
609 vfs_setresource(vfsp
, newpath
, 0);
616 zfsctl_snapdir_rename(vnode_t
*sdvp
, char *snm
, vnode_t
*tdvp
, char *tnm
,
617 cred_t
*cr
, caller_context_t
*ct
, int flags
)
619 zfsctl_snapdir_t
*sdp
= sdvp
->v_data
;
620 zfs_snapentry_t search
, *sep
;
623 char from
[MAXNAMELEN
], to
[MAXNAMELEN
];
624 char real
[MAXNAMELEN
], fsname
[MAXNAMELEN
];
627 zfsvfs
= sdvp
->v_vfsp
->vfs_data
;
630 if ((flags
& FIGNORECASE
) || zfsvfs
->z_case
== ZFS_CASE_INSENSITIVE
) {
631 err
= dmu_snapshot_realname(zfsvfs
->z_os
, snm
, real
,
635 } else if (err
!= ENOTSUP
) {
643 dmu_objset_name(zfsvfs
->z_os
, fsname
);
645 err
= zfsctl_snapshot_zname(sdvp
, snm
, MAXNAMELEN
, from
);
647 err
= zfsctl_snapshot_zname(tdvp
, tnm
, MAXNAMELEN
, to
);
649 err
= zfs_secpolicy_rename_perms(from
, to
, cr
);
654 * Cannot move snapshots out of the snapdir.
657 return (SET_ERROR(EINVAL
));
659 if (strcmp(snm
, tnm
) == 0)
662 mutex_enter(&sdp
->sd_lock
);
664 search
.se_name
= (char *)snm
;
665 if ((sep
= avl_find(&sdp
->sd_snaps
, &search
, &where
)) == NULL
) {
666 mutex_exit(&sdp
->sd_lock
);
667 return (SET_ERROR(ENOENT
));
670 err
= dsl_dataset_rename_snapshot(fsname
, snm
, tnm
, B_FALSE
);
672 zfsctl_rename_snap(sdp
, sep
, tnm
);
674 mutex_exit(&sdp
->sd_lock
);
681 zfsctl_snapdir_remove(vnode_t
*dvp
, char *name
, vnode_t
*cwd
, cred_t
*cr
,
682 caller_context_t
*ct
, int flags
)
684 zfsctl_snapdir_t
*sdp
= dvp
->v_data
;
685 zfs_snapentry_t
*sep
;
686 zfs_snapentry_t search
;
688 char snapname
[MAXNAMELEN
];
689 char real
[MAXNAMELEN
];
692 zfsvfs
= dvp
->v_vfsp
->vfs_data
;
695 if ((flags
& FIGNORECASE
) || zfsvfs
->z_case
== ZFS_CASE_INSENSITIVE
) {
697 err
= dmu_snapshot_realname(zfsvfs
->z_os
, name
, real
,
701 } else if (err
!= ENOTSUP
) {
709 err
= zfsctl_snapshot_zname(dvp
, name
, MAXNAMELEN
, snapname
);
711 err
= zfs_secpolicy_destroy_perms(snapname
, cr
);
715 mutex_enter(&sdp
->sd_lock
);
717 search
.se_name
= name
;
718 sep
= avl_find(&sdp
->sd_snaps
, &search
, NULL
);
720 avl_remove(&sdp
->sd_snaps
, sep
);
721 err
= zfsctl_unmount_snap(sep
, MS_FORCE
, cr
);
723 avl_add(&sdp
->sd_snaps
, sep
);
725 err
= dsl_destroy_snapshot(snapname
, B_FALSE
);
727 err
= SET_ERROR(ENOENT
);
730 mutex_exit(&sdp
->sd_lock
);
736 * This creates a snapshot under '.zfs/snapshot'.
740 zfsctl_snapdir_mkdir(vnode_t
*dvp
, char *dirname
, vattr_t
*vap
, vnode_t
**vpp
,
741 cred_t
*cr
, caller_context_t
*cc
, int flags
, vsecattr_t
*vsecp
)
743 zfsvfs_t
*zfsvfs
= dvp
->v_vfsp
->vfs_data
;
744 char name
[MAXNAMELEN
];
746 static enum symfollow follow
= NO_FOLLOW
;
747 static enum uio_seg seg
= UIO_SYSSPACE
;
749 if (zfs_component_namecheck(dirname
, NULL
, NULL
) != 0)
750 return (SET_ERROR(EILSEQ
));
752 dmu_objset_name(zfsvfs
->z_os
, name
);
756 err
= zfs_secpolicy_snapshot_perms(name
, cr
);
761 err
= dmu_objset_snapshot_one(name
, dirname
);
764 err
= lookupnameat(dirname
, seg
, follow
, NULL
, vpp
, dvp
);
771 * Lookup entry point for the 'snapshot' directory. Try to open the
772 * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
773 * Perform a mount of the associated dataset on top of the vnode.
777 zfsctl_snapdir_lookup(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
, pathname_t
*pnp
,
778 int flags
, vnode_t
*rdir
, cred_t
*cr
, caller_context_t
*ct
,
779 int *direntflags
, pathname_t
*realpnp
)
781 zfsctl_snapdir_t
*sdp
= dvp
->v_data
;
783 char snapname
[MAXNAMELEN
];
784 char real
[MAXNAMELEN
];
786 zfs_snapentry_t
*sep
, search
;
789 size_t mountpoint_len
;
791 zfsvfs_t
*zfsvfs
= dvp
->v_vfsp
->vfs_data
;
795 * No extended attributes allowed under .zfs
797 if (flags
& LOOKUP_XATTR
)
798 return (SET_ERROR(EINVAL
));
800 ASSERT(dvp
->v_type
== VDIR
);
803 * If we get a recursive call, that means we got called
804 * from the domount() code while it was trying to look up the
805 * spec (which looks like a local path for zfs). We need to
806 * add some flag to domount() to tell it not to do this lookup.
808 if (MUTEX_HELD(&sdp
->sd_lock
))
809 return (SET_ERROR(ENOENT
));
813 if (gfs_lookup_dot(vpp
, dvp
, zfsvfs
->z_ctldir
, nm
) == 0) {
818 if (flags
& FIGNORECASE
) {
819 boolean_t conflict
= B_FALSE
;
821 err
= dmu_snapshot_realname(zfsvfs
->z_os
, nm
, real
,
822 MAXNAMELEN
, &conflict
);
825 } else if (err
!= ENOTSUP
) {
830 (void) strlcpy(realpnp
->pn_buf
, nm
,
831 realpnp
->pn_bufsize
);
832 if (conflict
&& direntflags
)
833 *direntflags
= ED_CASE_CONFLICT
;
836 mutex_enter(&sdp
->sd_lock
);
837 search
.se_name
= (char *)nm
;
838 if ((sep
= avl_find(&sdp
->sd_snaps
, &search
, &where
)) != NULL
) {
845 } else if (*vpp
== sep
->se_root
) {
847 * The snapshot was unmounted behind our backs,
853 * VROOT was set during the traverse call. We need
854 * to clear it since we're pretending to be part
855 * of our parent's vfs.
857 (*vpp
)->v_flag
&= ~VROOT
;
859 mutex_exit(&sdp
->sd_lock
);
865 * The requested snapshot is not currently mounted, look it up.
867 err
= zfsctl_snapshot_zname(dvp
, nm
, MAXNAMELEN
, snapname
);
869 mutex_exit(&sdp
->sd_lock
);
872 * handle "ls *" or "?" in a graceful manner,
873 * forcing EILSEQ to ENOENT.
874 * Since shell ultimately passes "*" or "?" as name to lookup
876 return (err
== EILSEQ
? ENOENT
: err
);
878 if (dmu_objset_hold(snapname
, FTAG
, &snap
) != 0) {
879 mutex_exit(&sdp
->sd_lock
);
881 return (SET_ERROR(ENOENT
));
884 sep
= kmem_alloc(sizeof (zfs_snapentry_t
), KM_SLEEP
);
885 sep
->se_name
= kmem_alloc(strlen(nm
) + 1, KM_SLEEP
);
886 (void) strcpy(sep
->se_name
, nm
);
887 *vpp
= sep
->se_root
= zfsctl_snapshot_mknode(dvp
, dmu_objset_id(snap
));
888 avl_insert(&sdp
->sd_snaps
, sep
, where
);
890 dmu_objset_rele(snap
, FTAG
);
892 mountpoint_len
= strlen(refstr_value(dvp
->v_vfsp
->vfs_mntpt
)) +
893 strlen("/.zfs/snapshot/") + strlen(nm
) + 1;
894 mountpoint
= kmem_alloc(mountpoint_len
, KM_SLEEP
);
895 (void) snprintf(mountpoint
, mountpoint_len
, "%s/.zfs/snapshot/%s",
896 refstr_value(dvp
->v_vfsp
->vfs_mntpt
), nm
);
898 margs
.spec
= snapname
;
899 margs
.dir
= mountpoint
;
900 margs
.flags
= MS_SYSSPACE
| MS_NOMNTTAB
;
901 margs
.fstype
= "zfs";
902 margs
.dataptr
= NULL
;
907 err
= domount("zfs", &margs
, *vpp
, kcred
, &vfsp
);
908 kmem_free(mountpoint
, mountpoint_len
);
912 * Return the mounted root rather than the covered mount point.
913 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
914 * the ZFS vnode mounted on top of the GFS node. This ZFS
915 * vnode is the root of the newly created vfsp.
923 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
925 * This is where we lie about our v_vfsp in order to
926 * make .zfs/snapshot/<snapname> accessible over NFS
927 * without requiring manual mounts of <snapname>.
929 ASSERT(VTOZ(*vpp
)->z_zfsvfs
!= zfsvfs
);
930 VTOZ(*vpp
)->z_zfsvfs
->z_parent
= zfsvfs
;
931 (*vpp
)->v_vfsp
= zfsvfs
->z_vfs
;
932 (*vpp
)->v_flag
&= ~VROOT
;
934 mutex_exit(&sdp
->sd_lock
);
938 * If we had an error, drop our hold on the vnode and
939 * zfsctl_snapshot_inactive() will clean up.
950 zfsctl_shares_lookup(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
, pathname_t
*pnp
,
951 int flags
, vnode_t
*rdir
, cred_t
*cr
, caller_context_t
*ct
,
952 int *direntflags
, pathname_t
*realpnp
)
954 zfsvfs_t
*zfsvfs
= dvp
->v_vfsp
->vfs_data
;
960 if (gfs_lookup_dot(vpp
, dvp
, zfsvfs
->z_ctldir
, nm
) == 0) {
965 if (zfsvfs
->z_shares_dir
== 0) {
967 return (SET_ERROR(ENOTSUP
));
969 if ((error
= zfs_zget(zfsvfs
, zfsvfs
->z_shares_dir
, &dzp
)) == 0)
970 error
= VOP_LOOKUP(ZTOV(dzp
), nm
, vpp
, pnp
,
971 flags
, rdir
, cr
, ct
, direntflags
, realpnp
);
981 zfsctl_snapdir_readdir_cb(vnode_t
*vp
, void *dp
, int *eofp
,
982 offset_t
*offp
, offset_t
*nextp
, void *data
, int flags
)
984 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
985 char snapname
[MAXNAMELEN
];
987 boolean_t case_conflict
;
993 dsl_pool_config_enter(dmu_objset_pool(zfsvfs
->z_os
), FTAG
);
994 error
= dmu_snapshot_list_next(zfsvfs
->z_os
, MAXNAMELEN
, snapname
, &id
,
995 &cookie
, &case_conflict
);
996 dsl_pool_config_exit(dmu_objset_pool(zfsvfs
->z_os
), FTAG
);
999 if (error
== ENOENT
) {
1006 if (flags
& V_RDDIR_ENTFLAGS
) {
1007 edirent_t
*eodp
= dp
;
1009 (void) strcpy(eodp
->ed_name
, snapname
);
1010 eodp
->ed_ino
= ZFSCTL_INO_SNAP(id
);
1011 eodp
->ed_eflags
= case_conflict
? ED_CASE_CONFLICT
: 0;
1013 struct dirent64
*odp
= dp
;
1015 (void) strcpy(odp
->d_name
, snapname
);
1016 odp
->d_ino
= ZFSCTL_INO_SNAP(id
);
1027 zfsctl_shares_readdir(vnode_t
*vp
, uio_t
*uiop
, cred_t
*cr
, int *eofp
,
1028 caller_context_t
*ct
, int flags
)
1030 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
1036 if (zfsvfs
->z_shares_dir
== 0) {
1038 return (SET_ERROR(ENOTSUP
));
1040 if ((error
= zfs_zget(zfsvfs
, zfsvfs
->z_shares_dir
, &dzp
)) == 0) {
1041 error
= VOP_READDIR(ZTOV(dzp
), uiop
, cr
, eofp
, ct
, flags
);
1045 error
= SET_ERROR(ENOENT
);
1053 * pvp is the '.zfs' directory (zfsctl_node_t).
1055 * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1057 * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1058 * when a lookup is performed on .zfs for "snapshot".
1061 zfsctl_mknode_snapdir(vnode_t
*pvp
)
1064 zfsctl_snapdir_t
*sdp
;
1066 vp
= gfs_dir_create(sizeof (zfsctl_snapdir_t
), pvp
,
1067 zfsctl_ops_snapdir
, NULL
, NULL
, MAXNAMELEN
,
1068 zfsctl_snapdir_readdir_cb
, NULL
);
1070 sdp
->sd_node
.zc_id
= ZFSCTL_INO_SNAPDIR
;
1071 sdp
->sd_node
.zc_cmtime
= ((zfsctl_node_t
*)pvp
->v_data
)->zc_cmtime
;
1072 mutex_init(&sdp
->sd_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1073 avl_create(&sdp
->sd_snaps
, snapentry_compare
,
1074 sizeof (zfs_snapentry_t
), offsetof(zfs_snapentry_t
, se_node
));
1079 zfsctl_mknode_shares(vnode_t
*pvp
)
1084 vp
= gfs_dir_create(sizeof (zfsctl_node_t
), pvp
,
1085 zfsctl_ops_shares
, NULL
, NULL
, MAXNAMELEN
,
1088 sdp
->zc_cmtime
= ((zfsctl_node_t
*)pvp
->v_data
)->zc_cmtime
;
1095 zfsctl_shares_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
,
1096 caller_context_t
*ct
)
1098 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
1103 if (zfsvfs
->z_shares_dir
== 0) {
1105 return (SET_ERROR(ENOTSUP
));
1107 if ((error
= zfs_zget(zfsvfs
, zfsvfs
->z_shares_dir
, &dzp
)) == 0) {
1108 error
= VOP_GETATTR(ZTOV(dzp
), vap
, flags
, cr
, ct
);
1119 zfsctl_snapdir_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
,
1120 caller_context_t
*ct
)
1122 zfsvfs_t
*zfsvfs
= vp
->v_vfsp
->vfs_data
;
1123 zfsctl_snapdir_t
*sdp
= vp
->v_data
;
1126 zfsctl_common_getattr(vp
, vap
);
1127 vap
->va_nodeid
= gfs_file_inode(vp
);
1128 vap
->va_nlink
= vap
->va_size
= avl_numnodes(&sdp
->sd_snaps
) + 2;
1129 vap
->va_ctime
= vap
->va_mtime
= dmu_objset_snap_cmtime(zfsvfs
->z_os
);
1137 zfsctl_snapdir_inactive(vnode_t
*vp
, cred_t
*cr
, caller_context_t
*ct
)
1139 zfsctl_snapdir_t
*sdp
= vp
->v_data
;
1142 private = gfs_dir_inactive(vp
);
1143 if (private != NULL
) {
1144 ASSERT(avl_numnodes(&sdp
->sd_snaps
) == 0);
1145 mutex_destroy(&sdp
->sd_lock
);
1146 avl_destroy(&sdp
->sd_snaps
);
1147 kmem_free(private, sizeof (zfsctl_snapdir_t
));
1151 static const fs_operation_def_t zfsctl_tops_snapdir
[] = {
1152 { VOPNAME_OPEN
, { .vop_open
= zfsctl_common_open
} },
1153 { VOPNAME_CLOSE
, { .vop_close
= zfsctl_common_close
} },
1154 { VOPNAME_IOCTL
, { .error
= fs_inval
} },
1155 { VOPNAME_GETATTR
, { .vop_getattr
= zfsctl_snapdir_getattr
} },
1156 { VOPNAME_ACCESS
, { .vop_access
= zfsctl_common_access
} },
1157 { VOPNAME_RENAME
, { .vop_rename
= zfsctl_snapdir_rename
} },
1158 { VOPNAME_RMDIR
, { .vop_rmdir
= zfsctl_snapdir_remove
} },
1159 { VOPNAME_MKDIR
, { .vop_mkdir
= zfsctl_snapdir_mkdir
} },
1160 { VOPNAME_READDIR
, { .vop_readdir
= gfs_vop_readdir
} },
1161 { VOPNAME_LOOKUP
, { .vop_lookup
= zfsctl_snapdir_lookup
} },
1162 { VOPNAME_SEEK
, { .vop_seek
= fs_seek
} },
1163 { VOPNAME_INACTIVE
, { .vop_inactive
= zfsctl_snapdir_inactive
} },
1164 { VOPNAME_FID
, { .vop_fid
= zfsctl_common_fid
} },
1168 static const fs_operation_def_t zfsctl_tops_shares
[] = {
1169 { VOPNAME_OPEN
, { .vop_open
= zfsctl_common_open
} },
1170 { VOPNAME_CLOSE
, { .vop_close
= zfsctl_common_close
} },
1171 { VOPNAME_IOCTL
, { .error
= fs_inval
} },
1172 { VOPNAME_GETATTR
, { .vop_getattr
= zfsctl_shares_getattr
} },
1173 { VOPNAME_ACCESS
, { .vop_access
= zfsctl_common_access
} },
1174 { VOPNAME_READDIR
, { .vop_readdir
= zfsctl_shares_readdir
} },
1175 { VOPNAME_LOOKUP
, { .vop_lookup
= zfsctl_shares_lookup
} },
1176 { VOPNAME_SEEK
, { .vop_seek
= fs_seek
} },
1177 { VOPNAME_INACTIVE
, { .vop_inactive
= gfs_vop_inactive
} },
1178 { VOPNAME_FID
, { .vop_fid
= zfsctl_shares_fid
} },
1183 * pvp is the GFS vnode '.zfs/snapshot'.
1185 * This creates a GFS node under '.zfs/snapshot' representing each
1186 * snapshot. This newly created GFS node is what we mount snapshot
1190 zfsctl_snapshot_mknode(vnode_t
*pvp
, uint64_t objset
)
1195 vp
= gfs_dir_create(sizeof (zfsctl_node_t
), pvp
,
1196 zfsctl_ops_snapshot
, NULL
, NULL
, MAXNAMELEN
, NULL
, NULL
);
1198 zcp
->zc_id
= objset
;
1204 zfsctl_snapshot_inactive(vnode_t
*vp
, cred_t
*cr
, caller_context_t
*ct
)
1206 zfsctl_snapdir_t
*sdp
;
1207 zfs_snapentry_t
*sep
, *next
;
1210 VERIFY(gfs_dir_lookup(vp
, "..", &dvp
, cr
, 0, NULL
, NULL
) == 0);
1213 mutex_enter(&sdp
->sd_lock
);
1215 mutex_enter(&vp
->v_lock
);
1216 if (vp
->v_count
> 1) {
1218 mutex_exit(&vp
->v_lock
);
1219 mutex_exit(&sdp
->sd_lock
);
1223 mutex_exit(&vp
->v_lock
);
1224 ASSERT(!vn_ismntpt(vp
));
1226 sep
= avl_first(&sdp
->sd_snaps
);
1227 while (sep
!= NULL
) {
1228 next
= AVL_NEXT(&sdp
->sd_snaps
, sep
);
1230 if (sep
->se_root
== vp
) {
1231 avl_remove(&sdp
->sd_snaps
, sep
);
1232 kmem_free(sep
->se_name
, strlen(sep
->se_name
) + 1);
1233 kmem_free(sep
, sizeof (zfs_snapentry_t
));
1238 ASSERT(sep
!= NULL
);
1240 mutex_exit(&sdp
->sd_lock
);
1244 * Dispose of the vnode for the snapshot mount point.
1245 * This is safe to do because once this entry has been removed
1246 * from the AVL tree, it can't be found again, so cannot become
1247 * "active". If we lookup the same name again we will end up
1248 * creating a new vnode.
1250 gfs_vop_inactive(vp
, cr
, ct
);
1255 * These VP's should never see the light of day. They should always
1258 static const fs_operation_def_t zfsctl_tops_snapshot
[] = {
1259 VOPNAME_INACTIVE
, { .vop_inactive
= zfsctl_snapshot_inactive
},
1264 zfsctl_lookup_objset(vfs_t
*vfsp
, uint64_t objsetid
, zfsvfs_t
**zfsvfsp
)
1266 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1268 zfsctl_snapdir_t
*sdp
;
1270 zfs_snapentry_t
*sep
;
1273 ASSERT(zfsvfs
->z_ctldir
!= NULL
);
1274 error
= zfsctl_root_lookup(zfsvfs
->z_ctldir
, "snapshot", &dvp
,
1275 NULL
, 0, NULL
, kcred
, NULL
, NULL
, NULL
);
1280 mutex_enter(&sdp
->sd_lock
);
1281 sep
= avl_first(&sdp
->sd_snaps
);
1282 while (sep
!= NULL
) {
1285 if (zcp
->zc_id
== objsetid
)
1288 sep
= AVL_NEXT(&sdp
->sd_snaps
, sep
);
1294 * Return the mounted root rather than the covered mount point.
1295 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1296 * and returns the ZFS vnode mounted on top of the GFS node.
1297 * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1299 error
= traverse(&vp
);
1301 if (vp
== sep
->se_root
)
1302 error
= SET_ERROR(EINVAL
);
1304 *zfsvfsp
= VTOZ(vp
)->z_zfsvfs
;
1306 mutex_exit(&sdp
->sd_lock
);
1309 error
= SET_ERROR(EINVAL
);
1310 mutex_exit(&sdp
->sd_lock
);
1319 * Unmount any snapshots for the given filesystem. This is called from
1320 * zfs_umount() - if we have a ctldir, then go through and unmount all the
1324 zfsctl_umount_snapshots(vfs_t
*vfsp
, int fflags
, cred_t
*cr
)
1326 zfsvfs_t
*zfsvfs
= vfsp
->vfs_data
;
1328 zfsctl_snapdir_t
*sdp
;
1329 zfs_snapentry_t
*sep
, *next
;
1332 ASSERT(zfsvfs
->z_ctldir
!= NULL
);
1333 error
= zfsctl_root_lookup(zfsvfs
->z_ctldir
, "snapshot", &dvp
,
1334 NULL
, 0, NULL
, cr
, NULL
, NULL
, NULL
);
1339 mutex_enter(&sdp
->sd_lock
);
1341 sep
= avl_first(&sdp
->sd_snaps
);
1342 while (sep
!= NULL
) {
1343 next
= AVL_NEXT(&sdp
->sd_snaps
, sep
);
1346 * If this snapshot is not mounted, then it must
1347 * have just been unmounted by somebody else, and
1348 * will be cleaned up by zfsctl_snapdir_inactive().
1350 if (vn_ismntpt(sep
->se_root
)) {
1351 avl_remove(&sdp
->sd_snaps
, sep
);
1352 error
= zfsctl_unmount_snap(sep
, fflags
, cr
);
1354 avl_add(&sdp
->sd_snaps
, sep
);
1361 mutex_exit(&sdp
->sd_lock
);