4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
37 #include <sys/vnode.h>
38 #include <sys/pathname.h>
39 #include <sys/sysmacros.h>
41 #include <sys/mkdev.h>
42 #include <sys/mount.h>
43 #include <sys/statvfs.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/cmn_err.h>
47 #include <sys/utsname.h>
48 #include <sys/bootconf.h>
49 #include <sys/modctl.h>
51 #include <sys/flock.h>
54 #include <sys/policy.h>
55 #include <sys/socket.h>
56 #include <sys/netconfig.h>
59 #include <sys/mntent.h>
61 #include <rpc/types.h>
63 #include <rpc/rpcsec_gss.h>
67 #include <nfs/nfs_clnt.h>
68 #include <nfs/mount.h>
69 #include <nfs/nfs_acl.h>
71 #include <sys/fs_subr.h>
74 #include <nfs/rnode4.h>
75 #include <nfs/nfs4_clnt.h>
76 #include <sys/fs/autofs.h>
82 * Arguments passed to thread to free data structures from forced unmount.
91 static void async_free_mount(vfs_t
*, int, cred_t
*);
92 static void nfs4_free_mount(vfs_t
*, int, cred_t
*);
93 static void nfs4_free_mount_thread(freemountargs_t
*);
94 static int nfs4_chkdup_servinfo4(servinfo4_t
*, servinfo4_t
*);
97 * From rpcsec module (common/rpcsec).
99 extern int sec_clnt_loadinfo(struct sec_data
*, struct sec_data
**, model_t
);
100 extern void sec_clnt_freeinfo(struct sec_data
*);
103 * The order and contents of this structure must be kept in sync with that of
104 * rfsreqcnt_v4_tmpl in nfs_stats.c
106 static char *rfsnames_v4
[] = {
107 "null", "compound", "reserved", "access", "close", "commit", "create",
108 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock",
109 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr",
110 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh",
111 "read", "readdir", "readlink", "remove", "rename", "renew",
112 "restorefh", "savefh", "secinfo", "setattr", "setclientid",
113 "setclientid_confirm", "verify", "write"
117 * nfs4_max_mount_retry is the number of times the client will redrive
118 * a mount compound before giving up and returning failure. The intent
119 * is to redrive mount compounds which fail NFS4ERR_STALE so that
120 * if a component of the server path being mounted goes stale, it can
121 * "recover" by redriving the mount compund (LOOKUP ops). This recovery
122 * code is needed outside of the recovery framework because mount is a
123 * special case. The client doesn't create vnodes/rnodes for components
124 * of the server path being mounted. The recovery code recovers real
125 * client objects, not STALE FHs which map to components of the server
126 * path being mounted.
128 * We could just fail the mount on the first time, but that would
129 * instantly trigger failover (from nfs4_mount), and the client should
130 * try to re-lookup the STALE FH before doing failover. The easiest
131 * way to "re-lookup" is to simply redrive the mount compound.
133 static int nfs4_max_mount_retry
= 2;
136 * nfs4 vfs operations.
138 int nfs4_mount(vfs_t
*, vnode_t
*, struct mounta
*, cred_t
*);
139 static int nfs4_unmount(vfs_t
*, int, cred_t
*);
140 static int nfs4_root(vfs_t
*, vnode_t
**);
141 static int nfs4_statvfs(vfs_t
*, struct statvfs64
*);
142 static int nfs4_sync(vfs_t
*, short, cred_t
*);
143 static int nfs4_vget(vfs_t
*, vnode_t
**, fid_t
*);
144 static int nfs4_mountroot(vfs_t
*, whymountroot_t
);
145 static void nfs4_freevfs(vfs_t
*);
147 static int nfs4rootvp(vnode_t
**, vfs_t
*, struct servinfo4
*,
148 int, cred_t
*, zone_t
*);
151 int nfs4_vfsinit(void);
152 void nfs4_vfsfini(void);
153 static void nfs4setclientid_init(void);
154 static void nfs4setclientid_fini(void);
155 static void nfs4setclientid_otw(mntinfo4_t
*, servinfo4_t
*, cred_t
*,
156 struct nfs4_server
*, nfs4_error_t
*, int *);
157 static void destroy_nfs4_server(nfs4_server_t
*);
158 static void remove_mi(nfs4_server_t
*, mntinfo4_t
*);
160 extern void nfs4_ephemeral_init(void);
161 extern void nfs4_ephemeral_fini(void);
163 /* referral related routines */
164 static servinfo4_t
*copy_svp(servinfo4_t
*);
165 static void free_knconf_contents(struct knetconfig
*k
);
166 static char *extract_referral_point(const char *, int);
167 static void setup_newsvpath(servinfo4_t
*, int);
168 static void update_servinfo4(servinfo4_t
*, fs_location4
*,
169 struct nfs_fsl_info
*, char *, int);
172 * Initialize the vfs structure
175 static int nfs4fstyp
;
179 * Debug variable to check for rdma based
180 * transport startup and cleanup. Controlled
181 * through /etc/system. Off by default.
183 extern int rdma_debug
;
185 const struct vfsops nfs4_vfsops
= {
186 .vfs_mount
= nfs4_mount
,
187 .vfs_unmount
= nfs4_unmount
,
188 .vfs_root
= nfs4_root
,
189 .vfs_statvfs
= nfs4_statvfs
,
190 .vfs_sync
= nfs4_sync
,
191 .vfs_vget
= nfs4_vget
,
192 .vfs_mountroot
= nfs4_mountroot
,
193 .vfs_freevfs
= nfs4_freevfs
,
197 nfs4init(int fstyp
, char *name
)
201 error
= vfs_setfsops(fstyp
, &nfs4_vfsops
);
203 zcmn_err(GLOBAL_ZONEID
, CE_WARN
,
204 "nfs4init: bad fstyp");
209 (void) nfs4_vfsinit();
210 (void) nfs4_init_dot_entries();
214 (void) vfs_freevfsops_by_type(fstyp
);
222 (void) nfs4_destroy_dot_entries();
227 * Create a new sec_data structure to store AUTH_DH related data:
228 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC
229 * flag set for NFS V4 since we are avoiding to contact the rpcbind
230 * daemon and is using the IP time service (IPPORT_TIMESERVER).
232 * sec_data can be freed by sec_clnt_freeinfo().
234 static struct sec_data
*
235 create_authdh_data(char *netname
, int nlen
, struct netbuf
*syncaddr
,
236 struct knetconfig
*knconf
)
238 struct sec_data
*secdata
;
239 dh_k4_clntdata_t
*data
;
242 if (syncaddr
== NULL
|| syncaddr
->buf
== NULL
|| nlen
== 0)
245 secdata
= kmem_alloc(sizeof (*secdata
), KM_SLEEP
);
248 data
= kmem_alloc(sizeof (*data
), KM_SLEEP
);
250 data
->syncaddr
.maxlen
= syncaddr
->maxlen
;
251 data
->syncaddr
.len
= syncaddr
->len
;
252 data
->syncaddr
.buf
= kmem_alloc(syncaddr
->len
, KM_SLEEP
);
253 bcopy(syncaddr
->buf
, data
->syncaddr
.buf
, syncaddr
->len
);
256 * duplicate the knconf information for the
259 data
->knconf
= kmem_alloc(sizeof (*knconf
), KM_SLEEP
);
260 *data
->knconf
= *knconf
;
261 pf
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
262 p
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
263 bcopy(knconf
->knc_protofmly
, pf
, KNC_STRSIZE
);
264 bcopy(knconf
->knc_proto
, p
, KNC_STRSIZE
);
265 data
->knconf
->knc_protofmly
= pf
;
266 data
->knconf
->knc_proto
= p
;
268 /* move server netname to the sec_data structure */
269 data
->netname
= kmem_alloc(nlen
, KM_SLEEP
);
270 bcopy(netname
, data
->netname
, nlen
);
271 data
->netnamelen
= (int)nlen
;
273 secdata
->secmod
= AUTH_DH
;
274 secdata
->rpcflavor
= AUTH_DH
;
275 secdata
->data
= (caddr_t
)data
;
281 * Returns (deep) copy of sec_data_t. Allocates all memory required; caller
282 * is responsible for freeing.
285 copy_sec_data(sec_data_t
*fsecdata
)
287 sec_data_t
*tsecdata
;
289 if (fsecdata
== NULL
)
292 if (fsecdata
->rpcflavor
== AUTH_DH
) {
293 dh_k4_clntdata_t
*fdata
= (dh_k4_clntdata_t
*)fsecdata
->data
;
298 tsecdata
= (sec_data_t
*)create_authdh_data(fdata
->netname
,
299 fdata
->netnamelen
, &fdata
->syncaddr
, fdata
->knconf
);
304 tsecdata
= kmem_zalloc(sizeof (sec_data_t
), KM_SLEEP
);
306 tsecdata
->secmod
= fsecdata
->secmod
;
307 tsecdata
->rpcflavor
= fsecdata
->rpcflavor
;
308 tsecdata
->flags
= fsecdata
->flags
;
309 tsecdata
->uid
= fsecdata
->uid
;
311 if (fsecdata
->rpcflavor
== RPCSEC_GSS
) {
312 gss_clntdata_t
*gcd
= (gss_clntdata_t
*)fsecdata
->data
;
314 tsecdata
->data
= (caddr_t
)copy_sec_data_gss(gcd
);
316 tsecdata
->data
= NULL
;
323 copy_sec_data_gss(gss_clntdata_t
*fdata
)
325 gss_clntdata_t
*tdata
;
330 tdata
= kmem_zalloc(sizeof (gss_clntdata_t
), KM_SLEEP
);
332 tdata
->mechanism
.length
= fdata
->mechanism
.length
;
333 tdata
->mechanism
.elements
= kmem_zalloc(fdata
->mechanism
.length
,
335 bcopy(fdata
->mechanism
.elements
, tdata
->mechanism
.elements
,
336 fdata
->mechanism
.length
);
338 tdata
->service
= fdata
->service
;
340 (void) strcpy(tdata
->uname
, fdata
->uname
);
341 (void) strcpy(tdata
->inst
, fdata
->inst
);
342 (void) strcpy(tdata
->realm
, fdata
->realm
);
344 tdata
->qop
= fdata
->qop
;
350 nfs4_chkdup_servinfo4(servinfo4_t
*svp_head
, servinfo4_t
*svp
)
355 * Iterate over the servinfo4 list to make sure
356 * we do not have a duplicate. Skip any servinfo4
357 * that has been marked "NOT IN USE"
359 for (si
= svp_head
; si
; si
= si
->sv_next
) {
360 (void) nfs_rw_enter_sig(&si
->sv_lock
, RW_READER
, 0);
361 if (si
->sv_flags
& SV4_NOTINUSE
) {
362 nfs_rw_exit(&si
->sv_lock
);
365 nfs_rw_exit(&si
->sv_lock
);
368 if (si
->sv_addr
.len
== svp
->sv_addr
.len
&&
369 strcmp(si
->sv_knconf
->knc_protofmly
,
370 svp
->sv_knconf
->knc_protofmly
) == 0 &&
371 bcmp(si
->sv_addr
.buf
, svp
->sv_addr
.buf
,
372 si
->sv_addr
.len
) == 0) {
373 /* it's a duplicate */
377 /* it's not a duplicate */
382 nfs4_free_args(struct nfs_args
*nargs
)
385 if (nargs
->knconf
->knc_protofmly
)
386 kmem_free(nargs
->knconf
->knc_protofmly
,
388 if (nargs
->knconf
->knc_proto
)
389 kmem_free(nargs
->knconf
->knc_proto
, KNC_STRSIZE
);
390 kmem_free(nargs
->knconf
, sizeof (*nargs
->knconf
));
391 nargs
->knconf
= NULL
;
395 kmem_free(nargs
->fh
, strlen(nargs
->fh
) + 1);
399 if (nargs
->hostname
) {
400 kmem_free(nargs
->hostname
, strlen(nargs
->hostname
) + 1);
401 nargs
->hostname
= NULL
;
405 if (nargs
->addr
->buf
) {
406 ASSERT(nargs
->addr
->len
);
407 kmem_free(nargs
->addr
->buf
, nargs
->addr
->len
);
409 kmem_free(nargs
->addr
, sizeof (struct netbuf
));
413 if (nargs
->syncaddr
) {
414 ASSERT(nargs
->syncaddr
->len
);
415 if (nargs
->syncaddr
->buf
) {
416 ASSERT(nargs
->syncaddr
->len
);
417 kmem_free(nargs
->syncaddr
->buf
, nargs
->syncaddr
->len
);
419 kmem_free(nargs
->syncaddr
, sizeof (struct netbuf
));
420 nargs
->syncaddr
= NULL
;
423 if (nargs
->netname
) {
424 kmem_free(nargs
->netname
, strlen(nargs
->netname
) + 1);
425 nargs
->netname
= NULL
;
428 if (nargs
->nfs_ext_u
.nfs_extA
.secdata
) {
430 nargs
->nfs_ext_u
.nfs_extA
.secdata
);
431 nargs
->nfs_ext_u
.nfs_extA
.secdata
= NULL
;
437 nfs4_copyin(char *data
, int datalen
, struct nfs_args
*nargs
)
441 size_t hlen
; /* length of hostname */
442 size_t nlen
; /* length of netname */
443 char netname
[MAXNETNAMELEN
+1]; /* server's netname */
444 struct netbuf addr
; /* server's address */
445 struct netbuf syncaddr
; /* AUTH_DES time sync addr */
446 struct knetconfig
*knconf
; /* transport structure */
447 struct sec_data
*secdata
= NULL
; /* security data */
448 STRUCT_DECL(nfs_args
, args
); /* nfs mount arguments */
449 STRUCT_DECL(knetconfig
, knconf_tmp
);
450 STRUCT_DECL(netbuf
, addr_tmp
);
457 bzero(nargs
, sizeof (*nargs
));
459 STRUCT_INIT(args
, get_udatamodel());
460 bzero(STRUCT_BUF(args
), SIZEOF_STRUCT(nfs_args
, DATAMODEL_NATIVE
));
461 if (copyin(data
, STRUCT_BUF(args
), MIN(datalen
,
465 nargs
->wsize
= STRUCT_FGET(args
, wsize
);
466 nargs
->rsize
= STRUCT_FGET(args
, rsize
);
467 nargs
->timeo
= STRUCT_FGET(args
, timeo
);
468 nargs
->retrans
= STRUCT_FGET(args
, retrans
);
469 nargs
->acregmin
= STRUCT_FGET(args
, acregmin
);
470 nargs
->acregmax
= STRUCT_FGET(args
, acregmax
);
471 nargs
->acdirmin
= STRUCT_FGET(args
, acdirmin
);
472 nargs
->acdirmax
= STRUCT_FGET(args
, acdirmax
);
474 flags
= STRUCT_FGET(args
, flags
);
475 nargs
->flags
= flags
;
482 * Allocate space for a knetconfig structure and
483 * its strings and copy in from user-land.
485 knconf
= kmem_zalloc(sizeof (*knconf
), KM_SLEEP
);
486 STRUCT_INIT(knconf_tmp
, get_udatamodel());
487 if (copyin(STRUCT_FGETP(args
, knconf
), STRUCT_BUF(knconf_tmp
),
488 STRUCT_SIZE(knconf_tmp
))) {
489 kmem_free(knconf
, sizeof (*knconf
));
493 knconf
->knc_semantics
= STRUCT_FGET(knconf_tmp
, knc_semantics
);
494 knconf
->knc_protofmly
= STRUCT_FGETP(knconf_tmp
, knc_protofmly
);
495 knconf
->knc_proto
= STRUCT_FGETP(knconf_tmp
, knc_proto
);
496 if (get_udatamodel() != DATAMODEL_LP64
) {
497 knconf
->knc_rdev
= expldev(STRUCT_FGET(knconf_tmp
, knc_rdev
));
499 knconf
->knc_rdev
= STRUCT_FGET(knconf_tmp
, knc_rdev
);
502 pf
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
503 p
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
504 error
= copyinstr(knconf
->knc_protofmly
, pf
, KNC_STRSIZE
, NULL
);
506 kmem_free(pf
, KNC_STRSIZE
);
507 kmem_free(p
, KNC_STRSIZE
);
508 kmem_free(knconf
, sizeof (*knconf
));
512 error
= copyinstr(knconf
->knc_proto
, p
, KNC_STRSIZE
, NULL
);
514 kmem_free(pf
, KNC_STRSIZE
);
515 kmem_free(p
, KNC_STRSIZE
);
516 kmem_free(knconf
, sizeof (*knconf
));
521 knconf
->knc_protofmly
= pf
;
522 knconf
->knc_proto
= p
;
524 nargs
->knconf
= knconf
;
529 STRUCT_INIT(addr_tmp
, get_udatamodel());
530 if (copyin(STRUCT_FGETP(args
, addr
), STRUCT_BUF(addr_tmp
),
531 STRUCT_SIZE(addr_tmp
))) {
536 nargs
->addr
= kmem_zalloc(sizeof (struct netbuf
), KM_SLEEP
);
537 userbufptr
= STRUCT_FGETP(addr_tmp
, buf
);
538 addr
.len
= STRUCT_FGET(addr_tmp
, len
);
539 addr
.buf
= kmem_alloc(addr
.len
, KM_SLEEP
);
540 addr
.maxlen
= addr
.len
;
541 if (copyin(userbufptr
, addr
.buf
, addr
.len
)) {
542 kmem_free(addr
.buf
, addr
.len
);
546 bcopy(&addr
, nargs
->addr
, sizeof (struct netbuf
));
549 * Get the root fhandle
551 error
= pn_get(STRUCT_FGETP(args
, fh
), UIO_USERSPACE
, &pn
);
555 /* Volatile fh: keep server paths, so use actual-size strings */
556 nargs
->fh
= kmem_alloc(pn
.pn_pathlen
+ 1, KM_SLEEP
);
557 bcopy(pn
.pn_path
, nargs
->fh
, pn
.pn_pathlen
);
558 nargs
->fh
[pn
.pn_pathlen
] = '\0';
563 * Get server's hostname
565 if (flags
& NFSMNT_HOSTNAME
) {
566 error
= copyinstr(STRUCT_FGETP(args
, hostname
),
567 netname
, sizeof (netname
), &hlen
);
570 nargs
->hostname
= kmem_zalloc(hlen
, KM_SLEEP
);
571 (void) strcpy(nargs
->hostname
, netname
);
574 nargs
->hostname
= NULL
;
579 * If there are syncaddr and netname data, load them in. This is
580 * to support data needed for NFSV4 when AUTH_DH is the negotiated
581 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
584 if (flags
& NFSMNT_SECURE
) {
587 STRUCT_INIT(addr_tmp
, get_udatamodel());
588 if (copyin(STRUCT_FGETP(args
, syncaddr
), STRUCT_BUF(addr_tmp
),
589 STRUCT_SIZE(addr_tmp
))) {
593 userbufptr
= STRUCT_FGETP(addr_tmp
, buf
);
594 syncaddr
.len
= STRUCT_FGET(addr_tmp
, len
);
595 syncaddr
.buf
= kmem_alloc(syncaddr
.len
, KM_SLEEP
);
596 syncaddr
.maxlen
= syncaddr
.len
;
597 if (copyin(userbufptr
, syncaddr
.buf
, syncaddr
.len
)) {
598 kmem_free(syncaddr
.buf
, syncaddr
.len
);
603 nargs
->syncaddr
= kmem_alloc(sizeof (struct netbuf
), KM_SLEEP
);
604 bcopy(&syncaddr
, nargs
->syncaddr
, sizeof (struct netbuf
));
606 /* get server's netname */
607 if (copyinstr(STRUCT_FGETP(args
, netname
), netname
,
608 sizeof (netname
), &nlen
)) {
613 netname
[nlen
] = '\0';
614 nargs
->netname
= kmem_zalloc(nlen
, KM_SLEEP
);
615 (void) strcpy(nargs
->netname
, netname
);
619 * Get the extention data which has the security data structure.
620 * This includes data for AUTH_SYS as well.
622 if (flags
& NFSMNT_NEWARGS
) {
623 nargs
->nfs_args_ext
= STRUCT_FGET(args
, nfs_args_ext
);
624 if (nargs
->nfs_args_ext
== NFS_ARGS_EXTA
||
625 nargs
->nfs_args_ext
== NFS_ARGS_EXTB
) {
627 * Indicating the application is using the new
628 * sec_data structure to pass in the security
631 if (STRUCT_FGETP(args
,
632 nfs_ext_u
.nfs_extA
.secdata
) != NULL
) {
633 error
= sec_clnt_loadinfo(
634 (struct sec_data
*)STRUCT_FGETP(args
,
635 nfs_ext_u
.nfs_extA
.secdata
),
636 &secdata
, get_udatamodel());
638 nargs
->nfs_ext_u
.nfs_extA
.secdata
= secdata
;
648 * We may have a linked list of nfs_args structures,
649 * which means the user is looking for failover. If
650 * the mount is either not "read-only" or "soft",
651 * we want to bail out with EINVAL.
653 if (nargs
->nfs_args_ext
== NFS_ARGS_EXTB
)
654 nargs
->nfs_ext_u
.nfs_extB
.next
=
655 STRUCT_FGETP(args
, nfs_ext_u
.nfs_extB
.next
);
659 nfs4_free_args(nargs
);
667 * Set up mount info record and attach it to vfs struct.
670 nfs4_mount(vfs_t
*vfsp
, vnode_t
*mvp
, struct mounta
*uap
, cred_t
*cr
)
672 char *data
= uap
->dataptr
;
674 vnode_t
*rtvp
; /* the server's root */
675 mntinfo4_t
*mi
; /* mount info, pointed at by vfs */
676 struct knetconfig
*rdma_knconf
; /* rdma transport structure */
678 struct servinfo4
*svp
; /* nfs server info */
679 struct servinfo4
*svp_tail
= NULL
; /* previous nfs server info */
680 struct servinfo4
*svp_head
; /* first nfs server info */
681 struct servinfo4
*svp_2ndlast
; /* 2nd last in server info list */
682 struct sec_data
*secdata
; /* security data */
683 struct nfs_args
*args
= NULL
;
684 int flags
, addr_type
, removed
;
685 zone_t
*zone
= nfs_zone();
687 zone_t
*mntzone
= NULL
;
689 if (secpolicy_fs_mount(cr
, mvp
, vfsp
) != 0)
691 if (mvp
->v_type
!= VDIR
)
697 * nfs_args is now versioned and is extensible, so
698 * uap->datalen might be different from sizeof (args)
699 * in a compatible situation.
702 if (!(uap
->flags
& MS_SYSSPACE
)) {
704 args
= kmem_zalloc(sizeof (struct nfs_args
), KM_SLEEP
);
706 nfs4_free_args(args
);
707 error
= nfs4_copyin(data
, uap
->datalen
, args
);
710 kmem_free(args
, sizeof (*args
));
715 args
= (struct nfs_args
*)data
;
721 * If the request changes the locking type, disallow the remount,
722 * because it's questionable whether we can transfer the
723 * locking state correctly.
725 if (uap
->flags
& MS_REMOUNT
) {
726 if (!(uap
->flags
& MS_SYSSPACE
)) {
727 nfs4_free_args(args
);
728 kmem_free(args
, sizeof (*args
));
730 if ((mi
= VFTOMI4(vfsp
)) != NULL
) {
733 new_mi_llock
= (flags
& NFSMNT_LLOCK
) ? 1 : 0;
734 old_mi_llock
= (mi
->mi_flags
& MI4_LLOCK
) ? 1 : 0;
735 if (old_mi_llock
!= new_mi_llock
)
742 * For ephemeral mount trigger stub vnodes, we have two problems
743 * to solve: racing threads will likely fail the v_count check, and
744 * we want only one to proceed with the mount.
746 * For stubs, if the mount has already occurred (via a racing thread),
747 * just return success. If not, skip the v_count check and proceed.
748 * Note that we are already serialised at this point.
750 mutex_enter(&mvp
->v_lock
);
751 if (vn_matchops(mvp
, &nfs4_trigger_vnodeops
)) {
752 /* mntpt is a v4 stub vnode */
753 ASSERT(RP_ISSTUB(VTOR4(mvp
)));
754 ASSERT(!(uap
->flags
& MS_OVERLAY
));
755 ASSERT(!(mvp
->v_flag
& VROOT
));
756 if (vn_mountedvfs(mvp
) != NULL
) {
757 /* ephemeral mount has already occurred */
758 ASSERT(uap
->flags
& MS_SYSSPACE
);
759 mutex_exit(&mvp
->v_lock
);
763 /* mntpt is a non-v4 or v4 non-stub vnode */
764 if (!(uap
->flags
& MS_OVERLAY
) &&
765 (mvp
->v_count
!= 1 || (mvp
->v_flag
& VROOT
))) {
766 mutex_exit(&mvp
->v_lock
);
767 if (!(uap
->flags
& MS_SYSSPACE
)) {
768 nfs4_free_args(args
);
769 kmem_free(args
, sizeof (*args
));
774 mutex_exit(&mvp
->v_lock
);
776 /* make sure things are zeroed for errout: */
782 * A valid knetconfig structure is required.
784 if (!(flags
& NFSMNT_KNCONF
) ||
785 args
->knconf
== NULL
|| args
->knconf
->knc_protofmly
== NULL
||
786 args
->knconf
->knc_proto
== NULL
||
787 (strcmp(args
->knconf
->knc_proto
, NC_UDP
) == 0)) {
788 if (!(uap
->flags
& MS_SYSSPACE
)) {
789 nfs4_free_args(args
);
790 kmem_free(args
, sizeof (*args
));
795 if ((strlen(args
->knconf
->knc_protofmly
) >= KNC_STRSIZE
) ||
796 (strlen(args
->knconf
->knc_proto
) >= KNC_STRSIZE
)) {
797 if (!(uap
->flags
& MS_SYSSPACE
)) {
798 nfs4_free_args(args
);
799 kmem_free(args
, sizeof (*args
));
805 * Allocate a servinfo4 struct.
807 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
808 nfs_rw_init(&svp
->sv_lock
, NULL
, RW_DEFAULT
, NULL
);
810 svp_2ndlast
= svp_tail
;
811 svp_tail
->sv_next
= svp
;
818 svp
->sv_knconf
= args
->knconf
;
824 if (args
->addr
== NULL
|| args
->addr
->buf
== NULL
) {
829 svp
->sv_addr
.maxlen
= args
->addr
->maxlen
;
830 svp
->sv_addr
.len
= args
->addr
->len
;
831 svp
->sv_addr
.buf
= args
->addr
->buf
;
832 args
->addr
->buf
= NULL
;
835 * Get the root fhandle
837 if (args
->fh
== NULL
|| (strlen(args
->fh
) >= MAXPATHLEN
)) {
842 svp
->sv_path
= args
->fh
;
843 svp
->sv_pathlen
= strlen(args
->fh
) + 1;
847 * Get server's hostname
849 if (flags
& NFSMNT_HOSTNAME
) {
850 if (args
->hostname
== NULL
|| (strlen(args
->hostname
) >
855 svp
->sv_hostnamelen
= strlen(args
->hostname
) + 1;
856 svp
->sv_hostname
= args
->hostname
;
857 args
->hostname
= NULL
;
859 char *p
= "unknown-host";
860 svp
->sv_hostnamelen
= strlen(p
) + 1;
861 svp
->sv_hostname
= kmem_zalloc(svp
->sv_hostnamelen
, KM_SLEEP
);
862 (void) strcpy(svp
->sv_hostname
, p
);
866 * RDMA MOUNT SUPPORT FOR NFS v4.
867 * Establish, is it possible to use RDMA, if so overload the
868 * knconf with rdma specific knconf and free the orignal knconf.
870 if ((flags
& NFSMNT_TRYRDMA
) || (flags
& NFSMNT_DORDMA
)) {
872 * Determine the addr type for RDMA, IPv4 or v6.
874 if (strcmp(svp
->sv_knconf
->knc_protofmly
, NC_INET
) == 0)
876 else if (strcmp(svp
->sv_knconf
->knc_protofmly
, NC_INET6
) == 0)
877 addr_type
= AF_INET6
;
879 if (rdma_reachable(addr_type
, &svp
->sv_addr
,
880 &rdma_knconf
) == 0) {
882 * If successful, hijack the orignal knconf and
883 * replace with the new one, depending on the flags.
885 svp
->sv_origknconf
= svp
->sv_knconf
;
886 svp
->sv_knconf
= rdma_knconf
;
888 if (flags
& NFSMNT_TRYRDMA
) {
891 zcmn_err(getzoneid(), CE_WARN
,
892 "no RDMA onboard, revert\n");
896 if (flags
& NFSMNT_DORDMA
) {
898 * If proto=rdma is specified and no RDMA
899 * path to this server is avialable then
901 * This is not included in the mountable
902 * server list or the replica list.
903 * Check if more servers are specified;
904 * Failover case, otherwise bail out of mount.
906 if (args
->nfs_args_ext
== NFS_ARGS_EXTB
&&
907 args
->nfs_ext_u
.nfs_extB
.next
!= NULL
) {
909 args
->nfs_ext_u
.nfs_extB
.next
;
910 if (uap
->flags
& MS_RDONLY
&&
911 !(flags
& NFSMNT_SOFT
)) {
912 if (svp_head
->sv_next
== NULL
) {
918 svp_tail
= svp_2ndlast
;
919 svp_2ndlast
->sv_next
=
927 * This is the last server specified
928 * in the nfs_args list passed down
929 * and its not rdma capable.
931 if (svp_head
->sv_next
== NULL
) {
933 * Is this the only one
938 zcmn_err(getzoneid(),
945 * There is list, since some
946 * servers specified before
947 * this passed all requirements
949 svp_tail
= svp_2ndlast
;
950 svp_2ndlast
->sv_next
= NULL
;
960 * If there are syncaddr and netname data, load them in. This is
961 * to support data needed for NFSV4 when AUTH_DH is the negotiated
962 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
964 if (args
->flags
& NFSMNT_SECURE
) {
965 svp
->sv_dhsec
= create_authdh_data(args
->netname
,
966 strlen(args
->netname
),
967 args
->syncaddr
, svp
->sv_knconf
);
971 * Get the extention data which has the security data structure.
972 * This includes data for AUTH_SYS as well.
974 if (flags
& NFSMNT_NEWARGS
) {
975 switch (args
->nfs_args_ext
) {
979 * Indicating the application is using the new
980 * sec_data structure to pass in the security
983 secdata
= args
->nfs_ext_u
.nfs_extA
.secdata
;
984 if (secdata
== NULL
) {
986 } else if (uap
->flags
& MS_SYSSPACE
) {
988 * Need to validate the flavor here if
989 * sysspace, userspace was already
990 * validate from the nfs_copyin function.
992 switch (secdata
->rpcflavor
) {
1004 args
->nfs_ext_u
.nfs_extA
.secdata
= NULL
;
1012 } else if (flags
& NFSMNT_SECURE
) {
1014 * NFSMNT_SECURE is deprecated but we keep it
1015 * to support the rogue user-generated application
1016 * that may use this undocumented interface to do
1017 * AUTH_DH security, e.g. our own rexd.
1019 * Also note that NFSMNT_SECURE is used for passing
1020 * AUTH_DH info to be used in negotiation.
1022 secdata
= create_authdh_data(args
->netname
,
1023 strlen(args
->netname
), args
->syncaddr
, svp
->sv_knconf
);
1026 secdata
= kmem_alloc(sizeof (*secdata
), KM_SLEEP
);
1027 secdata
->secmod
= secdata
->rpcflavor
= AUTH_SYS
;
1028 secdata
->data
= NULL
;
1031 svp
->sv_secdata
= secdata
;
1034 * User does not explictly specify a flavor, and a user
1035 * defined default flavor is passed down.
1037 if (flags
& NFSMNT_SECDEFAULT
) {
1038 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1039 svp
->sv_flags
|= SV4_TRYSECDEFAULT
;
1040 nfs_rw_exit(&svp
->sv_lock
);
1046 * We may have a linked list of nfs_args structures,
1047 * which means the user is looking for failover. If
1048 * the mount is either not "read-only" or "soft",
1049 * we want to bail out with EINVAL.
1051 if (args
->nfs_args_ext
== NFS_ARGS_EXTB
&&
1052 args
->nfs_ext_u
.nfs_extB
.next
!= NULL
) {
1053 if (uap
->flags
& MS_RDONLY
&& !(flags
& NFSMNT_SOFT
)) {
1054 data
= (char *)args
->nfs_ext_u
.nfs_extB
.next
;
1062 * Determine the zone we're being mounted into.
1064 zone_hold(mntzone
= zone
); /* start with this assumption */
1065 if (getzoneid() == GLOBAL_ZONEID
) {
1067 mntzone
= zone_find_by_path(refstr_value(vfsp
->vfs_mntpt
));
1068 ASSERT(mntzone
!= NULL
);
1069 if (mntzone
!= zone
) {
1076 * Stop the mount from going any further if the zone is going away.
1078 if (zone_status_get(mntzone
) >= ZONE_IS_SHUTTING_DOWN
) {
1087 error
= nfs4rootvp(&rtvp
, vfsp
, svp_head
, flags
, cr
, mntzone
);
1089 /* if nfs4rootvp failed, it will free svp_head */
1097 * Send client id to the server, if necessary
1099 nfs4_error_zinit(&n4e
);
1100 nfs4setclientid(mi
, cr
, FALSE
, &n4e
);
1108 * Set option fields in the mount info record
1111 if (svp_head
->sv_next
) {
1112 mutex_enter(&mi
->mi_lock
);
1113 mi
->mi_flags
|= MI4_LLOCK
;
1114 mutex_exit(&mi
->mi_lock
);
1116 error
= nfs4_setopts(rtvp
, DATAMODEL_NATIVE
, args
);
1121 * Time to tie in the mirror mount info at last!
1123 if (flags
& NFSMNT_EPHEMERAL
)
1124 error
= nfs4_record_ephemeral_mount(mi
, mvp
);
1130 if (rp
->r_flags
& R4HASHED
)
1134 nfs4_async_stop(vfsp
);
1135 nfs4_async_manager_stop(vfsp
);
1136 nfs4_remove_mi_from_server(mi
, NULL
);
1139 if (mntzone
!= NULL
)
1141 /* need to remove it from the zone */
1142 removed
= nfs4_mi_zonelist_remove(mi
);
1144 zone_rele_ref(&mi
->mi_zone_ref
,
1147 if (!(uap
->flags
& MS_SYSSPACE
) && args
) {
1148 nfs4_free_args(args
);
1149 kmem_free(args
, sizeof (*args
));
1157 if (!(uap
->flags
& MS_SYSSPACE
) && args
) {
1158 nfs4_free_args(args
);
1159 kmem_free(args
, sizeof (*args
));
1164 if (mntzone
!= NULL
)
1171 #define VERS_MSG "NFS4 server "
1173 #define VERS_MSG "NFS server "
1177 VERS_MSG "%s returned 0 for read transfer size"
1179 VERS_MSG "%s returned 0 for write transfer size"
1181 VERS_MSG "%s returned 0 for maximum file size"
1184 * Get the symbolic link text from the server for a given filehandle
1187 * (get symlink text) PUTFH READLINK
1190 getlinktext_otw(mntinfo4_t
*mi
, nfs_fh4
*fh
, char **linktextp
, cred_t
*cr
,
1193 COMPOUND4args_clnt args
;
1194 COMPOUND4res_clnt res
;
1196 nfs_argop4 argop
[2];
1198 READLINK4res
*lr_res
;
1200 bool_t needrecov
= FALSE
;
1201 nfs4_recov_state_t recov_state
;
1202 nfs4_sharedfh_t
*sfh
;
1204 int num_retry
= nfs4_max_mount_retry
;
1205 int recovery
= !(flags
& NFS4_GETFH_NEEDSOP
);
1207 sfh
= sfh4_get(fh
, mi
);
1208 recov_state
.rs_flags
= 0;
1209 recov_state
.rs_num_retry_despite_err
= 0;
1212 nfs4_error_zinit(&e
);
1216 args
.ctag
= TAG_GET_SYMLINK
;
1219 e
.error
= nfs4_start_op(mi
, NULL
, NULL
, &recov_state
);
1226 /* 0. putfh symlink fh */
1227 argop
[0].argop
= OP_CPUTFH
;
1228 argop
[0].nfs_argop4_u
.opcputfh
.sfh
= sfh
;
1231 argop
[1].argop
= OP_READLINK
;
1235 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, &e
);
1237 needrecov
= nfs4_needs_recovery(&e
, FALSE
, mi
->mi_vfsp
);
1239 if (needrecov
&& !recovery
&& num_retry
-- > 0) {
1241 NFS4_DEBUG(nfs4_client_recov_debug
, (CE_NOTE
,
1242 "getlinktext_otw: initiating recovery\n"));
1244 if (nfs4_start_recovery(&e
, mi
, NULL
, NULL
, NULL
, NULL
,
1245 OP_READLINK
, NULL
, NULL
, NULL
) == FALSE
) {
1246 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1248 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1254 * If non-NFS4 pcol error and/or we weren't able to recover.
1258 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1264 e
.error
= geterrno4(res
.status
);
1265 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1267 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1272 /* res.status == NFS4_OK */
1273 ASSERT(res
.status
== NFS4_OK
);
1275 resop
= &res
.array
[1]; /* readlink res */
1276 lr_res
= &resop
->nfs_resop4_u
.opreadlink
;
1278 /* treat symlink name as data */
1279 *linktextp
= utf8_to_str((utf8string
*)&lr_res
->link
, &len
, NULL
);
1282 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1284 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1289 * Skip over consecutive slashes and "/./" in a pathname.
1292 pathname_skipslashdot(struct pathname
*pnp
)
1296 while (pnp
->pn_pathlen
> 0 && *pnp
->pn_path
== '/') {
1298 c1
= pnp
->pn_path
+ 1;
1299 c2
= pnp
->pn_path
+ 2;
1301 if (*c1
== '.' && (*c2
== '/' || *c2
== '\0')) {
1302 pnp
->pn_path
= pnp
->pn_path
+ 2; /* skip "/." */
1303 pnp
->pn_pathlen
= pnp
->pn_pathlen
- 2;
1312 * Resolve a symbolic link path. The symlink is in the nth component of
1313 * svp->sv_path and has an nfs4 file handle "fh".
1314 * Upon return, the sv_path will point to the new path that has the nth
1315 * component resolved to its symlink text.
1318 resolve_sympath(mntinfo4_t
*mi
, servinfo4_t
*svp
, int nth
, nfs_fh4
*fh
,
1319 cred_t
*cr
, int flags
)
1322 char *symlink
, *newpath
;
1323 struct pathname oldpn
, newpn
;
1324 char component
[MAXNAMELEN
];
1325 int i
, addlen
, error
= 0;
1328 /* Get the symbolic link text over the wire. */
1329 error
= getlinktext_otw(mi
, fh
, &symlink
, cr
, flags
);
1331 if (error
|| symlink
== NULL
|| strlen(symlink
) == 0)
1335 * Compose the new pathname.
1337 * - only the nth component is resolved for the pathname.
1338 * - pathname.pn_pathlen does not count the ending null byte.
1340 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1341 oldpath
= svp
->sv_path
;
1342 oldpathlen
= svp
->sv_pathlen
;
1343 if (error
= pn_get(oldpath
, UIO_SYSSPACE
, &oldpn
)) {
1344 nfs_rw_exit(&svp
->sv_lock
);
1345 kmem_free(symlink
, strlen(symlink
) + 1);
1348 nfs_rw_exit(&svp
->sv_lock
);
1352 * Skip over previous components from the oldpath so that the
1353 * oldpn.pn_path will point to the symlink component. Skip
1354 * leading slashes and "/./" (no OP_LOOKUP on ".") so that
1355 * pn_getcompnent can get the component.
1357 for (i
= 1; i
< nth
; i
++) {
1358 pathname_skipslashdot(&oldpn
);
1359 error
= pn_getcomponent(&oldpn
, component
);
1365 * Copy the old path upto the component right before the symlink
1366 * if the symlink is not an absolute path.
1368 if (symlink
[0] != '/') {
1369 addlen
= oldpn
.pn_path
- oldpn
.pn_buf
;
1370 bcopy(oldpn
.pn_buf
, newpn
.pn_path
, addlen
);
1371 newpn
.pn_pathlen
+= addlen
;
1372 newpn
.pn_path
+= addlen
;
1373 newpn
.pn_buf
[newpn
.pn_pathlen
] = '/';
1378 /* copy the resolved symbolic link text */
1379 addlen
= strlen(symlink
);
1380 if (newpn
.pn_pathlen
+ addlen
>= newpn
.pn_bufsize
) {
1381 error
= ENAMETOOLONG
;
1384 bcopy(symlink
, newpn
.pn_path
, addlen
);
1385 newpn
.pn_pathlen
+= addlen
;
1386 newpn
.pn_path
+= addlen
;
1389 * Check if there is any remaining path after the symlink component.
1390 * First, skip the symlink component.
1392 pathname_skipslashdot(&oldpn
);
1393 if (error
= pn_getcomponent(&oldpn
, component
))
1396 addlen
= pn_pathleft(&oldpn
); /* includes counting the slash */
1399 * Copy the remaining path to the new pathname if there is any.
1402 if (newpn
.pn_pathlen
+ addlen
>= newpn
.pn_bufsize
) {
1403 error
= ENAMETOOLONG
;
1406 bcopy(oldpn
.pn_path
, newpn
.pn_path
, addlen
);
1407 newpn
.pn_pathlen
+= addlen
;
1409 newpn
.pn_buf
[newpn
.pn_pathlen
] = '\0';
1411 /* get the newpath and store it in the servinfo4_t */
1412 newpath
= kmem_alloc(newpn
.pn_pathlen
+ 1, KM_SLEEP
);
1413 bcopy(newpn
.pn_buf
, newpath
, newpn
.pn_pathlen
);
1414 newpath
[newpn
.pn_pathlen
] = '\0';
1416 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1417 svp
->sv_path
= newpath
;
1418 svp
->sv_pathlen
= strlen(newpath
) + 1;
1419 nfs_rw_exit(&svp
->sv_lock
);
1421 kmem_free(oldpath
, oldpathlen
);
1423 kmem_free(symlink
, strlen(symlink
) + 1);
1431 * This routine updates servinfo4 structure with the new referred server
1433 * nfsfsloc has the location related information
1434 * fsp has the hostname and pathname info.
1435 * new path = pathname from referral + part of orig pathname(based on nth).
1438 update_servinfo4(servinfo4_t
*svp
, fs_location4
*fsp
,
1439 struct nfs_fsl_info
*nfsfsloc
, char *orig_path
, int nth
)
1441 struct knetconfig
*knconf
, *svknconf
;
1442 struct netbuf
*saddr
;
1443 sec_data_t
*secdata
;
1445 int i
= 0, num_slashes
= 0;
1446 char *p
, *spath
, *op
, *new_path
;
1449 knconf
= svp
->sv_knconf
;
1450 free_knconf_contents(knconf
);
1451 bzero(knconf
, sizeof (struct knetconfig
));
1452 svknconf
= nfsfsloc
->knconf
;
1453 knconf
->knc_semantics
= svknconf
->knc_semantics
;
1454 knconf
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1455 knconf
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1456 knconf
->knc_rdev
= svknconf
->knc_rdev
;
1457 bcopy(svknconf
->knc_protofmly
, knconf
->knc_protofmly
, KNC_STRSIZE
);
1458 bcopy(svknconf
->knc_proto
, knconf
->knc_proto
, KNC_STRSIZE
);
1460 /* Update server address */
1461 saddr
= &svp
->sv_addr
;
1462 if (saddr
->buf
!= NULL
)
1463 kmem_free(saddr
->buf
, saddr
->maxlen
);
1464 saddr
->buf
= kmem_alloc(nfsfsloc
->addr
->maxlen
, KM_SLEEP
);
1465 saddr
->len
= nfsfsloc
->addr
->len
;
1466 saddr
->maxlen
= nfsfsloc
->addr
->maxlen
;
1467 bcopy(nfsfsloc
->addr
->buf
, saddr
->buf
, nfsfsloc
->addr
->len
);
1469 /* Update server name */
1470 host
= fsp
->server_val
;
1471 kmem_free(svp
->sv_hostname
, svp
->sv_hostnamelen
);
1472 svp
->sv_hostname
= kmem_zalloc(host
->utf8string_len
+ 1, KM_SLEEP
);
1473 bcopy(host
->utf8string_val
, svp
->sv_hostname
, host
->utf8string_len
);
1474 svp
->sv_hostname
[host
->utf8string_len
] = '\0';
1475 svp
->sv_hostnamelen
= host
->utf8string_len
+ 1;
1478 * Update server path.
1479 * We need to setup proper path here.
1480 * For ex., If we got a path name serv1:/rp/aaa/bbb
1481 * where aaa is a referral and points to serv2:/rpool/aa
1482 * we need to set the path to serv2:/rpool/aa/bbb
1483 * The first part of this below code generates /rpool/aa
1484 * and the second part appends /bbb to the server path.
1486 spath
= p
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1488 for (i
= 0; i
< fsp
->rootpath
.pathname4_len
; i
++) {
1491 comp
= &fsp
->rootpath
.pathname4_val
[i
];
1492 /* If no space, null the string and bail */
1493 if ((p
- spath
) + comp
->utf8string_len
+ 1 > MAXPATHLEN
) {
1494 p
= spath
+ MAXPATHLEN
- 1;
1498 bcopy(comp
->utf8string_val
, p
, comp
->utf8string_len
);
1499 p
+= comp
->utf8string_len
;
1502 if (fsp
->rootpath
.pathname4_len
!= 0)
1508 new_path
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1509 (void) strlcpy(new_path
, p
, MAXPATHLEN
);
1510 kmem_free(p
, MAXPATHLEN
);
1511 i
= strlen(new_path
);
1513 for (op
= orig_path
; *op
; op
++) {
1516 if (num_slashes
== nth
+ 2) {
1517 while (*op
!= '\0') {
1527 kmem_free(svp
->sv_path
, svp
->sv_pathlen
);
1528 svp
->sv_pathlen
= strlen(new_path
) + 1;
1529 svp
->sv_path
= kmem_alloc(svp
->sv_pathlen
, KM_SLEEP
);
1530 bcopy(new_path
, svp
->sv_path
, svp
->sv_pathlen
);
1531 kmem_free(new_path
, MAXPATHLEN
);
1534 * All the security data is specific to old server.
1535 * Clean it up except secdata which deals with mount options.
1536 * We need to inherit that data. Copy secdata into our new servinfo4.
1538 if (svp
->sv_dhsec
) {
1539 sec_clnt_freeinfo(svp
->sv_dhsec
);
1540 svp
->sv_dhsec
= NULL
;
1542 if (svp
->sv_save_secinfo
&&
1543 svp
->sv_save_secinfo
!= svp
->sv_secinfo
) {
1544 secinfo_free(svp
->sv_save_secinfo
);
1545 svp
->sv_save_secinfo
= NULL
;
1547 if (svp
->sv_secinfo
) {
1548 secinfo_free(svp
->sv_secinfo
);
1549 svp
->sv_secinfo
= NULL
;
1551 svp
->sv_currsec
= NULL
;
1553 secdata
= kmem_alloc(sizeof (*secdata
), KM_SLEEP
);
1554 *secdata
= *svp
->sv_secdata
;
1555 secdata
->data
= NULL
;
1556 if (svp
->sv_secdata
) {
1557 sec_clnt_freeinfo(svp
->sv_secdata
);
1558 svp
->sv_secdata
= NULL
;
1560 svp
->sv_secdata
= secdata
;
1564 * Resolve a referral. The referral is in the n+1th component of
1565 * svp->sv_path and has a parent nfs4 file handle "fh".
1566 * Upon return, the sv_path will point to the new path that has referral
1567 * component resolved to its referred path and part of original path.
1568 * Hostname and other address information is also updated.
1571 resolve_referral(mntinfo4_t
*mi
, servinfo4_t
*svp
, cred_t
*cr
, int nth
,
1574 nfs4_sharedfh_t
*sfh
;
1575 struct nfs_fsl_info nfsfsloc
;
1577 COMPOUND4res_clnt callres
;
1579 char *nm
, *orig_path
;
1580 int orig_pathlen
= 0, ret
= -1, index
;
1582 if (svp
->sv_pathlen
<= 0)
1585 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1586 orig_pathlen
= svp
->sv_pathlen
;
1587 orig_path
= kmem_alloc(orig_pathlen
, KM_SLEEP
);
1588 bcopy(svp
->sv_path
, orig_path
, orig_pathlen
);
1589 nm
= extract_referral_point(svp
->sv_path
, nth
);
1590 setup_newsvpath(svp
, nth
);
1591 nfs_rw_exit(&svp
->sv_lock
);
1593 sfh
= sfh4_get(fh
, mi
);
1594 index
= nfs4_process_referral(mi
, sfh
, nm
, cr
,
1595 &garp
, &callres
, &nfsfsloc
);
1597 kmem_free(nm
, MAXPATHLEN
);
1599 kmem_free(orig_path
, orig_pathlen
);
1603 fsp
= &garp
.n4g_ext_res
->n4g_fslocations
.locations_val
[index
];
1604 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1605 update_servinfo4(svp
, fsp
, &nfsfsloc
, orig_path
, nth
);
1606 nfs_rw_exit(&svp
->sv_lock
);
1608 mutex_enter(&mi
->mi_lock
);
1609 mi
->mi_vfs_referral_loop_cnt
++;
1610 mutex_exit(&mi
->mi_lock
);
1614 /* Free up XDR memory allocated in nfs4_process_referral() */
1615 xdr_free(xdr_nfs_fsl_info
, (char *)&nfsfsloc
);
1616 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1617 kmem_free(orig_path
, orig_pathlen
);
1623 * Get the root filehandle for the given filesystem and server, and update
1626 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop
1627 * to coordinate with recovery. Otherwise, the caller is assumed to be
1628 * the recovery thread or have already done a start_fop.
1630 * Errors are returned by the nfs4_error_t parameter.
1633 nfs4getfh_otw(struct mntinfo4
*mi
, servinfo4_t
*svp
, vtype_t
*vtp
,
1634 int flags
, cred_t
*cr
, nfs4_error_t
*ep
)
1636 COMPOUND4args_clnt args
;
1637 COMPOUND4res_clnt res
;
1641 nfs4_ga_res_t
*garp
;
1643 lookup4_param_t lookuparg
;
1646 bool_t needrecov
= FALSE
;
1647 nfs4_recov_state_t recov_state
;
1650 int recovery
= !(flags
& NFS4_GETFH_NEEDSOP
);
1652 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1653 ASSERT(svp
->sv_path
!= NULL
);
1654 if (svp
->sv_path
[0] == '\0') {
1655 nfs_rw_exit(&svp
->sv_lock
);
1656 nfs4_error_init(ep
, EINVAL
);
1659 nfs_rw_exit(&svp
->sv_lock
);
1661 recov_state
.rs_flags
= 0;
1662 recov_state
.rs_num_retry_despite_err
= 0;
1665 if (mi
->mi_vfs_referral_loop_cnt
>= NFS4_REFERRAL_LOOP_MAX
) {
1666 DTRACE_PROBE3(nfs4clnt__debug__referral__loop
, mntinfo4
*,
1667 mi
, servinfo4_t
*, svp
, char *, "nfs4getfh_otw");
1668 nfs4_error_init(ep
, EINVAL
);
1671 nfs4_error_zinit(ep
);
1674 ep
->error
= nfs4_start_fop(mi
, NULL
, NULL
, OH_MOUNT
,
1675 &recov_state
, NULL
);
1678 * If recovery has been started and this request as
1679 * initiated by a mount, then we must wait for recovery
1680 * to finish before proceeding, otherwise, the error
1681 * cleanup would remove data structures needed by the
1685 mutex_enter(&mi
->mi_lock
);
1686 if (mi
->mi_flags
& MI4_MOUNTING
) {
1687 mi
->mi_flags
|= MI4_RECOV_FAIL
;
1690 NFS4_DEBUG(nfs4_client_recov_debug
, (CE_NOTE
,
1691 "nfs4getfh_otw: waiting 4 recovery\n"));
1693 while (mi
->mi_flags
& MI4_RECOV_ACTIV
)
1694 cv_wait(&mi
->mi_failover_cv
,
1697 mutex_exit(&mi
->mi_lock
);
1702 * If the client does not specify a specific flavor to use
1703 * and has not gotten a secinfo list from the server yet,
1704 * retrieve the secinfo list from the server and use a
1705 * flavor from the list to mount.
1707 * If fail to get the secinfo list from the server, then
1708 * try the default flavor.
1710 if ((svp
->sv_flags
& SV4_TRYSECDEFAULT
) &&
1711 svp
->sv_secinfo
== NULL
) {
1712 (void) nfs4_secinfo_path(mi
, cr
, FALSE
);
1717 args
.ctag
= TAG_REMAP_MOUNT
;
1719 args
.ctag
= TAG_MOUNT
;
1721 lookuparg
.l4_getattrs
= LKP4_ALL_ATTRIBUTES
;
1722 lookuparg
.argsp
= &args
;
1723 lookuparg
.resp
= &res
;
1724 lookuparg
.header_len
= 2; /* Putrootfh, getfh */
1725 lookuparg
.trailer_len
= 0;
1726 lookuparg
.ga_bits
= FATTR4_FSINFO_MASK
;
1729 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1730 ASSERT(svp
->sv_path
!= NULL
);
1731 llndx
= nfs4lookup_setup(svp
->sv_path
, &lookuparg
, 0);
1732 nfs_rw_exit(&svp
->sv_lock
);
1735 num_argops
= args
.array_len
;
1737 /* choose public or root filehandle */
1738 if (flags
& NFS4_GETFH_PUBLIC
)
1739 argop
[0].argop
= OP_PUTPUBFH
;
1741 argop
[0].argop
= OP_PUTROOTFH
;
1744 argop
[1].argop
= OP_GETFH
;
1746 NFS4_DEBUG(nfs4_client_call_debug
, (CE_NOTE
,
1747 "nfs4getfh_otw: %s call, mi 0x%p",
1748 needrecov
? "recov" : "first", (void *)mi
));
1750 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, RFSCALL_SOFT
, ep
);
1752 needrecov
= nfs4_needs_recovery(ep
, FALSE
, mi
->mi_vfsp
);
1758 nfs4args_lookup_free(argop
, num_argops
);
1760 lookuparg
.arglen
* sizeof (nfs_argop4
));
1762 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1766 NFS4_DEBUG(nfs4_client_recov_debug
,
1767 (CE_NOTE
, "nfs4getfh_otw: initiating recovery\n"));
1769 abort
= nfs4_start_recovery(ep
, mi
, NULL
,
1770 NULL
, NULL
, NULL
, OP_GETFH
, NULL
, NULL
, NULL
);
1772 ep
->error
= geterrno4(res
.status
);
1773 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1775 nfs4args_lookup_free(argop
, num_argops
);
1776 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1777 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
, needrecov
);
1778 /* have another go? */
1785 * No recovery, but check if error is set.
1788 nfs4args_lookup_free(argop
, num_argops
);
1789 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1791 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1798 /* for non-recovery errors */
1799 if (res
.status
&& res
.status
!= NFS4ERR_SYMLINK
&&
1800 res
.status
!= NFS4ERR_MOVED
) {
1802 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1805 nfs4args_lookup_free(argop
, num_argops
);
1806 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1807 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1812 * If any intermediate component in the path is a symbolic link,
1813 * resolve the symlink, then try mount again using the new path.
1815 if (res
.status
== NFS4ERR_SYMLINK
|| res
.status
== NFS4ERR_MOVED
) {
1819 * Need to call nfs4_end_op before resolve_sympath to avoid
1820 * potential nfs4_start_op deadlock.
1823 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1827 * This must be from OP_LOOKUP failure. The (cfh) for this
1828 * OP_LOOKUP is a symlink node. Found out where the
1829 * OP_GETFH is for the (cfh) that is a symlink node.
1832 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR,
1833 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR
1835 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink.
1836 * In this case, where = 7, nthcomp = 2.
1838 where
= res
.array_len
- 2;
1841 if (res
.status
== NFS4ERR_SYMLINK
) {
1843 resop
= &res
.array
[where
- 1];
1844 ASSERT(resop
->resop
== OP_GETFH
);
1845 tmpfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1846 nthcomp
= res
.array_len
/3 - 1;
1847 ep
->error
= resolve_sympath(mi
, svp
, nthcomp
,
1850 } else if (res
.status
== NFS4ERR_MOVED
) {
1852 resop
= &res
.array
[where
- 2];
1853 ASSERT(resop
->resop
== OP_GETFH
);
1854 tmpfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1855 nthcomp
= res
.array_len
/3 - 1;
1856 ep
->error
= resolve_referral(mi
, svp
, cr
, nthcomp
,
1860 nfs4args_lookup_free(argop
, num_argops
);
1861 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1862 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1871 resop
= &res
.array
[res
.array_len
- 2];
1872 ASSERT(resop
->resop
== OP_GETFH
);
1873 resfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1875 /* getattr fsinfo res */
1877 garp
= &resop
->nfs_resop4_u
.opgetattr
.ga_res
;
1879 *vtp
= garp
->n4g_va
.va_type
;
1881 mi
->mi_fh_expire_type
= garp
->n4g_ext_res
->n4g_fet
;
1883 mutex_enter(&mi
->mi_lock
);
1884 if (garp
->n4g_ext_res
->n4g_pc4
.pc4_link_support
)
1885 mi
->mi_flags
|= MI4_LINK
;
1886 if (garp
->n4g_ext_res
->n4g_pc4
.pc4_symlink_support
)
1887 mi
->mi_flags
|= MI4_SYMLINK
;
1888 if (garp
->n4g_ext_res
->n4g_suppattrs
& FATTR4_ACL_MASK
)
1889 mi
->mi_flags
|= MI4_ACL
;
1890 mutex_exit(&mi
->mi_lock
);
1892 if (garp
->n4g_ext_res
->n4g_maxread
== 0)
1894 MIN(MAXBSIZE
, mi
->mi_tsize
);
1897 MIN(garp
->n4g_ext_res
->n4g_maxread
,
1900 if (garp
->n4g_ext_res
->n4g_maxwrite
== 0)
1902 MIN(MAXBSIZE
, mi
->mi_stsize
);
1905 MIN(garp
->n4g_ext_res
->n4g_maxwrite
,
1908 if (garp
->n4g_ext_res
->n4g_maxfilesize
!= 0)
1909 mi
->mi_maxfilesize
=
1910 MIN(garp
->n4g_ext_res
->n4g_maxfilesize
,
1911 mi
->mi_maxfilesize
);
1914 * If the final component is a a symbolic link, resolve the symlink,
1915 * then try mount again using the new path.
1917 * Assume no symbolic link for root filesysm "/".
1921 * nthcomp is the total result length minus
1922 * the 1st 2 OPs (PUTROOTFH, GETFH),
1923 * then divided by 3 (LOOKUP,GETFH,GETATTR)
1925 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR
1926 * LOOKUP 2nd-comp GETFH GETATTR
1930 nthcomp
= (res
.array_len
- 2)/3;
1933 * Need to call nfs4_end_op before resolve_sympath to avoid
1934 * potential nfs4_start_op deadlock. See RFE 4777612.
1937 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1940 ep
->error
= resolve_sympath(mi
, svp
, nthcomp
, resfhp
, cr
,
1943 nfs4args_lookup_free(argop
, num_argops
);
1944 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1945 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1954 * We need to figure out where in the compound the getfh
1955 * for the parent directory is. If the object to be mounted is
1956 * the root, then there is no lookup at all:
1958 * If the object to be mounted is in the root, then the compound is:
1959 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR.
1960 * In either of these cases, the index of the GETFH is 1.
1961 * If it is not at the root, then it's something like:
1962 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR,
1963 * LOOKUP, GETFH, GETATTR
1964 * In this case, the index is llndx (last lookup index) - 2.
1966 if (llndx
== -1 || llndx
== 2)
1967 resop
= &res
.array
[1];
1970 resop
= &res
.array
[llndx
-2];
1973 ASSERT(resop
->resop
== OP_GETFH
);
1974 tmpfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1976 /* save the filehandles for the replica */
1977 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1978 ASSERT(tmpfhp
->nfs_fh4_len
<= NFS4_FHSIZE
);
1979 svp
->sv_pfhandle
.fh_len
= tmpfhp
->nfs_fh4_len
;
1980 bcopy(tmpfhp
->nfs_fh4_val
, svp
->sv_pfhandle
.fh_buf
,
1981 tmpfhp
->nfs_fh4_len
);
1982 ASSERT(resfhp
->nfs_fh4_len
<= NFS4_FHSIZE
);
1983 svp
->sv_fhandle
.fh_len
= resfhp
->nfs_fh4_len
;
1984 bcopy(resfhp
->nfs_fh4_val
, svp
->sv_fhandle
.fh_buf
, resfhp
->nfs_fh4_len
);
1986 /* initialize fsid and supp_attrs for server fs */
1987 svp
->sv_fsid
= garp
->n4g_fsid
;
1988 svp
->sv_supp_attrs
=
1989 garp
->n4g_ext_res
->n4g_suppattrs
| FATTR4_MANDATTR_MASK
;
1991 nfs_rw_exit(&svp
->sv_lock
);
1992 nfs4args_lookup_free(argop
, num_argops
);
1993 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1994 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1996 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
, needrecov
);
2000 * Save a copy of Servinfo4_t structure.
2001 * We might need when there is a failure in getting file handle
2002 * in case of a referral to replace servinfo4 struct and try again.
2004 static struct servinfo4
*
2005 copy_svp(servinfo4_t
*nsvp
)
2007 servinfo4_t
*svp
= NULL
;
2008 struct knetconfig
*sknconf
, *tknconf
;
2009 struct netbuf
*saddr
, *taddr
;
2011 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
2012 nfs_rw_init(&svp
->sv_lock
, NULL
, RW_DEFAULT
, NULL
);
2013 svp
->sv_flags
= nsvp
->sv_flags
;
2014 svp
->sv_fsid
= nsvp
->sv_fsid
;
2015 svp
->sv_hostnamelen
= nsvp
->sv_hostnamelen
;
2016 svp
->sv_pathlen
= nsvp
->sv_pathlen
;
2017 svp
->sv_supp_attrs
= nsvp
->sv_supp_attrs
;
2019 svp
->sv_path
= kmem_alloc(svp
->sv_pathlen
, KM_SLEEP
);
2020 svp
->sv_hostname
= kmem_alloc(svp
->sv_hostnamelen
, KM_SLEEP
);
2021 bcopy(nsvp
->sv_hostname
, svp
->sv_hostname
, svp
->sv_hostnamelen
);
2022 bcopy(nsvp
->sv_path
, svp
->sv_path
, svp
->sv_pathlen
);
2024 saddr
= &nsvp
->sv_addr
;
2025 taddr
= &svp
->sv_addr
;
2026 taddr
->maxlen
= saddr
->maxlen
;
2027 taddr
->len
= saddr
->len
;
2028 if (saddr
->len
> 0) {
2029 taddr
->buf
= kmem_zalloc(saddr
->maxlen
, KM_SLEEP
);
2030 bcopy(saddr
->buf
, taddr
->buf
, saddr
->len
);
2033 svp
->sv_knconf
= kmem_zalloc(sizeof (struct knetconfig
), KM_SLEEP
);
2034 sknconf
= nsvp
->sv_knconf
;
2035 tknconf
= svp
->sv_knconf
;
2036 tknconf
->knc_semantics
= sknconf
->knc_semantics
;
2037 tknconf
->knc_rdev
= sknconf
->knc_rdev
;
2038 if (sknconf
->knc_proto
!= NULL
) {
2039 tknconf
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
2040 bcopy(sknconf
->knc_proto
, (char *)tknconf
->knc_proto
,
2043 if (sknconf
->knc_protofmly
!= NULL
) {
2044 tknconf
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
2045 bcopy(sknconf
->knc_protofmly
, (char *)tknconf
->knc_protofmly
,
2049 if (nsvp
->sv_origknconf
!= NULL
) {
2050 svp
->sv_origknconf
= kmem_zalloc(sizeof (struct knetconfig
),
2052 sknconf
= nsvp
->sv_origknconf
;
2053 tknconf
= svp
->sv_origknconf
;
2054 tknconf
->knc_semantics
= sknconf
->knc_semantics
;
2055 tknconf
->knc_rdev
= sknconf
->knc_rdev
;
2056 if (sknconf
->knc_proto
!= NULL
) {
2057 tknconf
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
2058 bcopy(sknconf
->knc_proto
, (char *)tknconf
->knc_proto
,
2061 if (sknconf
->knc_protofmly
!= NULL
) {
2062 tknconf
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
,
2064 bcopy(sknconf
->knc_protofmly
,
2065 (char *)tknconf
->knc_protofmly
, KNC_STRSIZE
);
2069 svp
->sv_secdata
= copy_sec_data(nsvp
->sv_secdata
);
2070 svp
->sv_dhsec
= copy_sec_data(svp
->sv_dhsec
);
2072 * Rest of the security information is not copied as they are built
2073 * with the information available from secdata and dhsec.
2075 svp
->sv_next
= NULL
;
2081 restore_svp(mntinfo4_t
*mi
, servinfo4_t
*svp
, servinfo4_t
*origsvp
)
2083 servinfo4_t
*srvnext
, *tmpsrv
;
2085 if (strcmp(svp
->sv_hostname
, origsvp
->sv_hostname
) != 0) {
2087 * Since the hostname changed, we must be dealing
2088 * with a referral, and the lookup failed. We will
2089 * restore the whole servinfo4_t to what it was before.
2091 srvnext
= svp
->sv_next
;
2092 svp
->sv_next
= NULL
;
2093 tmpsrv
= copy_svp(origsvp
);
2096 svp
->sv_next
= srvnext
;
2097 mutex_enter(&mi
->mi_lock
);
2098 mi
->mi_servers
= svp
;
2099 mi
->mi_curr_serv
= svp
;
2100 mutex_exit(&mi
->mi_lock
);
2102 } else if (origsvp
->sv_pathlen
!= svp
->sv_pathlen
) {
2105 * For symlink case: restore original path because
2106 * it might have contained symlinks that were
2107 * expanded by nfsgetfh_otw before the failure occurred.
2109 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2110 kmem_free(svp
->sv_path
, svp
->sv_pathlen
);
2112 kmem_alloc(origsvp
->sv_pathlen
, KM_SLEEP
);
2113 svp
->sv_pathlen
= origsvp
->sv_pathlen
;
2114 bcopy(origsvp
->sv_path
, svp
->sv_path
,
2115 origsvp
->sv_pathlen
);
2116 nfs_rw_exit(&svp
->sv_lock
);
2121 static ushort_t nfs4_max_threads
= 8; /* max number of active async threads */
2122 uint_t nfs4_bsize
= 32 * 1024; /* client `block' size */
2123 static uint_t nfs4_async_clusters
= 1; /* # of reqs from each async queue */
2124 static uint_t nfs4_cots_timeo
= NFS_COTS_TIMEO
;
2127 * Remap the root filehandle for the given filesystem.
2129 * results returned via the nfs4_error_t parameter.
2132 nfs4_remap_root(mntinfo4_t
*mi
, nfs4_error_t
*ep
, int flags
)
2134 struct servinfo4
*svp
, *origsvp
;
2140 mutex_enter(&mi
->mi_lock
);
2143 svp
= mi
->mi_curr_serv
;
2145 (flags
& NFS4_REMAP_NEEDSOP
) ? NFS4_GETFH_NEEDSOP
: 0;
2147 (mi
->mi_flags
& MI4_PUBLIC
) ? NFS4_GETFH_PUBLIC
: 0;
2148 mutex_exit(&mi
->mi_lock
);
2151 * Just in case server path being mounted contains
2152 * symlinks and fails w/STALE, save the initial sv_path
2153 * so we can redrive the initial mount compound with the
2154 * initial sv_path -- not a symlink-expanded version.
2156 * This could only happen if a symlink was expanded
2157 * and the expanded mount compound failed stale. Because
2158 * it could be the case that the symlink was removed at
2159 * the server (and replaced with another symlink/dir,
2160 * we need to use the initial sv_path when attempting
2161 * to re-lookup everything and recover.
2163 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2164 origsvp
= copy_svp(svp
);
2165 nfs_rw_exit(&svp
->sv_lock
);
2167 num_retry
= nfs4_max_mount_retry
;
2171 * Get the root fh from the server. Retry nfs4_max_mount_retry
2172 * (2) times if it fails with STALE since the recovery
2173 * infrastructure doesn't do STALE recovery for components
2174 * of the server path to the object being mounted.
2176 nfs4getfh_otw(mi
, svp
, &vtype
, getfh_flags
, CRED(), ep
);
2178 if (ep
->error
== 0 && ep
->stat
== NFS4_OK
)
2182 * For some reason, the mount compound failed. Before
2183 * retrying, we need to restore original conditions.
2185 svp
= restore_svp(mi
, svp
, origsvp
);
2187 } while (num_retry
-- > 0);
2191 if (ep
->error
!= 0 || ep
->stat
!= 0) {
2195 if (vtype
!= VNON
&& vtype
!= mi
->mi_type
) {
2196 /* shouldn't happen */
2197 zcmn_err(mi
->mi_zone
->zone_id
, CE_WARN
,
2198 "nfs4_remap_root: server root vnode type (%d) doesn't "
2199 "match mount info (%d)", vtype
, mi
->mi_type
);
2202 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2203 rootfh
.nfs_fh4_val
= svp
->sv_fhandle
.fh_buf
;
2204 rootfh
.nfs_fh4_len
= svp
->sv_fhandle
.fh_len
;
2205 nfs_rw_exit(&svp
->sv_lock
);
2206 sfh4_update(mi
->mi_rootfh
, &rootfh
);
2209 * It's possible that recovery took place on the filesystem
2210 * and the server has been updated between the time we did
2211 * the nfs4getfh_otw and now. Re-drive the otw operation
2212 * to make sure we have a good fh.
2214 mutex_enter(&mi
->mi_lock
);
2215 if (mi
->mi_curr_serv
!= svp
)
2218 mutex_exit(&mi
->mi_lock
);
2222 nfs4rootvp(vnode_t
**rtvpp
, vfs_t
*vfsp
, struct servinfo4
*svp_head
,
2223 int flags
, cred_t
*cr
, zone_t
*zone
)
2225 vnode_t
*rtvp
= NULL
;
2232 vtype_t vtype
= VNON
;
2233 vtype_t tmp_vtype
= VNON
;
2234 struct servinfo4
*firstsvp
= NULL
, *svp
= svp_head
;
2235 nfs4_oo_hash_bucket_t
*bucketp
;
2237 char *droptext
= "";
2238 struct nfs_stats
*nfsstatsp
;
2239 nfs4_fname_t
*mfname
;
2241 int num_retry
, removed
;
2242 cred_t
*lcr
= NULL
, *tcr
= cr
;
2243 struct servinfo4
*origsvp
;
2246 nfsstatsp
= zone_getspecific(nfsstat_zone_key
, nfs_zone());
2247 ASSERT(nfsstatsp
!= NULL
);
2249 ASSERT(nfs_zone() == zone
);
2250 ASSERT(crgetref(cr
));
2253 * Create a mount record and link it to the vfs struct.
2255 mi
= kmem_zalloc(sizeof (*mi
), KM_SLEEP
);
2256 mutex_init(&mi
->mi_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2257 nfs_rw_init(&mi
->mi_recovlock
, NULL
, RW_DEFAULT
, NULL
);
2258 nfs_rw_init(&mi
->mi_rename_lock
, NULL
, RW_DEFAULT
, NULL
);
2259 nfs_rw_init(&mi
->mi_fh_lock
, NULL
, RW_DEFAULT
, NULL
);
2261 if (!(flags
& NFSMNT_SOFT
))
2262 mi
->mi_flags
|= MI4_HARD
;
2263 if ((flags
& NFSMNT_NOPRINT
))
2264 mi
->mi_flags
|= MI4_NOPRINT
;
2265 if (flags
& NFSMNT_INT
)
2266 mi
->mi_flags
|= MI4_INT
;
2267 if (flags
& NFSMNT_PUBLIC
)
2268 mi
->mi_flags
|= MI4_PUBLIC
;
2269 if (flags
& NFSMNT_MIRRORMOUNT
)
2270 mi
->mi_flags
|= MI4_MIRRORMOUNT
;
2271 if (flags
& NFSMNT_REFERRAL
)
2272 mi
->mi_flags
|= MI4_REFERRAL
;
2273 mi
->mi_retrans
= NFS_RETRIES
;
2274 if (svp
->sv_knconf
->knc_semantics
== NC_TPI_COTS_ORD
||
2275 svp
->sv_knconf
->knc_semantics
== NC_TPI_COTS
)
2276 mi
->mi_timeo
= nfs4_cots_timeo
;
2278 mi
->mi_timeo
= NFS_TIMEO
;
2279 mi
->mi_prog
= NFS_PROGRAM
;
2280 mi
->mi_vers
= NFS_V4
;
2281 mi
->mi_rfsnames
= rfsnames_v4
;
2282 mi
->mi_reqs
= nfsstatsp
->nfs_stats_v4
.rfsreqcnt_ptr
;
2283 cv_init(&mi
->mi_failover_cv
, NULL
, CV_DEFAULT
, NULL
);
2284 mi
->mi_servers
= svp
;
2285 mi
->mi_curr_serv
= svp
;
2286 mi
->mi_acregmin
= SEC2HR(ACREGMIN
);
2287 mi
->mi_acregmax
= SEC2HR(ACREGMAX
);
2288 mi
->mi_acdirmin
= SEC2HR(ACDIRMIN
);
2289 mi
->mi_acdirmax
= SEC2HR(ACDIRMAX
);
2290 mi
->mi_fh_expire_type
= FH4_PERSISTENT
;
2291 mi
->mi_clientid_next
= NULL
;
2292 mi
->mi_clientid_prev
= NULL
;
2294 mi
->mi_grace_wait
= 0;
2296 mi
->mi_srvsettime
= 0;
2297 mi
->mi_srvset_cnt
= 0;
2301 mi
->mi_tsize
= nfs4_tsize(svp
->sv_knconf
);
2302 mi
->mi_stsize
= mi
->mi_tsize
;
2304 if (flags
& NFSMNT_DIRECTIO
)
2305 mi
->mi_flags
|= MI4_DIRECTIO
;
2307 mi
->mi_flags
|= MI4_MOUNTING
;
2310 * Make a vfs struct for nfs. We do this here instead of below
2311 * because rtvp needs a vfs before we can do a getattr on it.
2313 * Assign a unique device id to the mount
2315 mutex_enter(&nfs_minor_lock
);
2317 nfs_minor
= (nfs_minor
+ 1) & MAXMIN32
;
2318 nfs_dev
= makedevice(nfs_major
, nfs_minor
);
2319 } while (vfs_devismounted(nfs_dev
));
2320 mutex_exit(&nfs_minor_lock
);
2322 vfsp
->vfs_dev
= nfs_dev
;
2323 vfs_make_fsid(&vfsp
->vfs_fsid
, nfs_dev
, nfs4fstyp
);
2324 vfsp
->vfs_data
= (caddr_t
)mi
;
2325 vfsp
->vfs_fstype
= nfsfstyp
;
2326 vfsp
->vfs_bsize
= nfs4_bsize
;
2329 * Initialize fields used to support async putpage operations.
2331 for (i
= 0; i
< NFS4_ASYNC_TYPES
; i
++)
2332 mi
->mi_async_clusters
[i
] = nfs4_async_clusters
;
2333 mi
->mi_async_init_clusters
= nfs4_async_clusters
;
2334 mi
->mi_async_curr
[NFS4_ASYNC_QUEUE
] =
2335 mi
->mi_async_curr
[NFS4_ASYNC_PGOPS_QUEUE
] = &mi
->mi_async_reqs
[0];
2336 mi
->mi_max_threads
= nfs4_max_threads
;
2337 mutex_init(&mi
->mi_async_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2338 cv_init(&mi
->mi_async_reqs_cv
, NULL
, CV_DEFAULT
, NULL
);
2339 cv_init(&mi
->mi_async_work_cv
[NFS4_ASYNC_QUEUE
], NULL
, CV_DEFAULT
,
2341 cv_init(&mi
->mi_async_work_cv
[NFS4_ASYNC_PGOPS_QUEUE
], NULL
,
2343 cv_init(&mi
->mi_async_cv
, NULL
, CV_DEFAULT
, NULL
);
2344 cv_init(&mi
->mi_inact_req_cv
, NULL
, CV_DEFAULT
, NULL
);
2348 zone_init_ref(&mi
->mi_zone_ref
);
2349 zone_hold_ref(zone
, &mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
2350 nfs4_mi_zonelist_add(mi
);
2353 * Initialize the <open owner/cred> hash table.
2355 for (i
= 0; i
< NFS4_NUM_OO_BUCKETS
; i
++) {
2356 bucketp
= &(mi
->mi_oo_list
[i
]);
2357 mutex_init(&bucketp
->b_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2358 list_create(&bucketp
->b_oo_hash_list
,
2359 sizeof (nfs4_open_owner_t
),
2360 offsetof(nfs4_open_owner_t
, oo_hash_node
));
2364 * Initialize the freed open owner list.
2367 mi
->mi_foo_max
= NFS4_NUM_FREED_OPEN_OWNERS
;
2368 list_create(&mi
->mi_foo_list
, sizeof (nfs4_open_owner_t
),
2369 offsetof(nfs4_open_owner_t
, oo_foo_node
));
2371 list_create(&mi
->mi_lost_state
, sizeof (nfs4_lost_rqst_t
),
2372 offsetof(nfs4_lost_rqst_t
, lr_node
));
2374 list_create(&mi
->mi_bseqid_list
, sizeof (nfs4_bseqid_entry_t
),
2375 offsetof(nfs4_bseqid_entry_t
, bs_node
));
2378 * Initialize the msg buffer.
2380 list_create(&mi
->mi_msg_list
, sizeof (nfs4_debug_msg_t
),
2381 offsetof(nfs4_debug_msg_t
, msg_node
));
2382 mi
->mi_msg_count
= 0;
2383 mutex_init(&mi
->mi_msg_list_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2388 nfs4_mnt_kstat_init(vfsp
);
2391 * Initialize the shared filehandle pool.
2393 sfh4_createtab(&mi
->mi_filehandles
);
2396 * Save server path we're attempting to mount.
2398 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2399 origsvp
= copy_svp(svp
);
2400 nfs_rw_exit(&svp
->sv_lock
);
2403 * Make the GETFH call to get root fh for each replica.
2405 if (svp_head
->sv_next
)
2406 droptext
= ", dropping replica";
2409 * If the uid is set then set the creds for secure mounts
2410 * by proxy processes such as automountd.
2412 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2413 if (svp
->sv_secdata
->uid
!= 0 &&
2414 svp
->sv_secdata
->rpcflavor
== RPCSEC_GSS
) {
2416 (void) crsetugid(lcr
, svp
->sv_secdata
->uid
, crgetgid(cr
));
2419 nfs_rw_exit(&svp
->sv_lock
);
2420 for (svp
= svp_head
; svp
; svp
= svp
->sv_next
) {
2421 if (nfs4_chkdup_servinfo4(svp_head
, svp
)) {
2422 nfs_cmn_err(error
, CE_WARN
,
2423 VERS_MSG
"Host %s is a duplicate%s",
2424 svp
->sv_hostname
, droptext
);
2425 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2426 svp
->sv_flags
|= SV4_NOTINUSE
;
2427 nfs_rw_exit(&svp
->sv_lock
);
2430 mi
->mi_curr_serv
= svp
;
2433 * Just in case server path being mounted contains
2434 * symlinks and fails w/STALE, save the initial sv_path
2435 * so we can redrive the initial mount compound with the
2436 * initial sv_path -- not a symlink-expanded version.
2438 * This could only happen if a symlink was expanded
2439 * and the expanded mount compound failed stale. Because
2440 * it could be the case that the symlink was removed at
2441 * the server (and replaced with another symlink/dir,
2442 * we need to use the initial sv_path when attempting
2443 * to re-lookup everything and recover.
2445 * Other mount errors should evenutally be handled here also
2446 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount
2447 * failures will result in mount being redriven a few times.
2449 num_retry
= nfs4_max_mount_retry
;
2451 nfs4getfh_otw(mi
, svp
, &tmp_vtype
,
2452 ((flags
& NFSMNT_PUBLIC
) ? NFS4_GETFH_PUBLIC
: 0) |
2453 NFS4_GETFH_NEEDSOP
, tcr
, &e
);
2455 if (e
.error
== 0 && e
.stat
== NFS4_OK
)
2459 * For some reason, the mount compound failed. Before
2460 * retrying, we need to restore original conditions.
2462 svp
= restore_svp(mi
, svp
, origsvp
);
2465 } while (num_retry
-- > 0);
2466 error
= e
.error
? e
.error
: geterrno4(e
.stat
);
2468 nfs_cmn_err(error
, CE_WARN
,
2469 VERS_MSG
"initial call to %s failed%s: %m",
2470 svp
->sv_hostname
, droptext
);
2471 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2472 svp
->sv_flags
|= SV4_NOTINUSE
;
2473 nfs_rw_exit(&svp
->sv_lock
);
2474 mi
->mi_flags
&= ~MI4_RECOV_FAIL
;
2479 if (tmp_vtype
== VBAD
) {
2480 zcmn_err(mi
->mi_zone
->zone_id
, CE_WARN
,
2481 VERS_MSG
"%s returned a bad file type for "
2482 "root%s", svp
->sv_hostname
, droptext
);
2483 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2484 svp
->sv_flags
|= SV4_NOTINUSE
;
2485 nfs_rw_exit(&svp
->sv_lock
);
2489 if (vtype
== VNON
) {
2491 } else if (vtype
!= tmp_vtype
) {
2492 zcmn_err(mi
->mi_zone
->zone_id
, CE_WARN
,
2493 VERS_MSG
"%s returned a different file type "
2494 "for root%s", svp
->sv_hostname
, droptext
);
2495 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2496 svp
->sv_flags
|= SV4_NOTINUSE
;
2497 nfs_rw_exit(&svp
->sv_lock
);
2500 if (firstsvp
== NULL
)
2504 if (firstsvp
== NULL
) {
2510 mi
->mi_curr_serv
= svp
= firstsvp
;
2511 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2512 ASSERT((mi
->mi_curr_serv
->sv_flags
& SV4_NOTINUSE
) == 0);
2513 fh
.nfs_fh4_len
= svp
->sv_fhandle
.fh_len
;
2514 fh
.nfs_fh4_val
= svp
->sv_fhandle
.fh_buf
;
2515 mi
->mi_rootfh
= sfh4_get(&fh
, mi
);
2516 fh
.nfs_fh4_len
= svp
->sv_pfhandle
.fh_len
;
2517 fh
.nfs_fh4_val
= svp
->sv_pfhandle
.fh_buf
;
2518 mi
->mi_srvparentfh
= sfh4_get(&fh
, mi
);
2519 nfs_rw_exit(&svp
->sv_lock
);
2522 * Get the fname for filesystem root.
2524 mi
->mi_fname
= fn_get(NULL
, ".", mi
->mi_rootfh
);
2525 mfname
= mi
->mi_fname
;
2529 * Make the root vnode without attributes.
2531 rtvp
= makenfs4node_by_fh(mi
->mi_rootfh
, NULL
,
2532 &mfname
, NULL
, mi
, cr
, gethrtime());
2533 rtvp
->v_type
= vtype
;
2535 mi
->mi_curread
= mi
->mi_tsize
;
2536 mi
->mi_curwrite
= mi
->mi_stsize
;
2539 * Start the manager thread responsible for handling async worker
2543 VFS_HOLD(vfsp
); /* add reference for thread */
2544 mi
->mi_manager_thread
= zthread_create(NULL
, 0, nfs4_async_manager
,
2545 vfsp
, 0, minclsyspri
);
2546 ASSERT(mi
->mi_manager_thread
!= NULL
);
2549 * Create the thread that handles over-the-wire calls for
2551 * This needs to happen after the manager thread is created.
2554 mi
->mi_inactive_thread
= zthread_create(NULL
, 0, nfs4_inactive_thread
,
2555 mi
, 0, minclsyspri
);
2556 ASSERT(mi
->mi_inactive_thread
!= NULL
);
2558 /* If we didn't get a type, get one now */
2559 if (rtvp
->v_type
== VNON
) {
2560 va
.va_mask
= AT_TYPE
;
2561 error
= nfs4getattr(rtvp
, &va
, tcr
);
2564 rtvp
->v_type
= va
.va_type
;
2567 mi
->mi_type
= rtvp
->v_type
;
2569 mutex_enter(&mi
->mi_lock
);
2570 mi
->mi_flags
&= ~MI4_MOUNTING
;
2571 mutex_exit(&mi
->mi_lock
);
2573 /* Update VFS with new server and path info */
2574 if ((strcmp(svp
->sv_hostname
, origsvp
->sv_hostname
) != 0) ||
2575 (strcmp(svp
->sv_path
, origsvp
->sv_path
) != 0)) {
2576 len
= svp
->sv_hostnamelen
+ svp
->sv_pathlen
;
2577 resource
= kmem_zalloc(len
, KM_SLEEP
);
2578 (void) strcat(resource
, svp
->sv_hostname
);
2579 (void) strcat(resource
, ":");
2580 (void) strcat(resource
, svp
->sv_path
);
2581 vfs_setresource(vfsp
, resource
, 0);
2582 kmem_free(resource
, len
);
2593 * An error occurred somewhere, need to clean up...
2600 * We need to release our reference to the root vnode and
2601 * destroy the mntinfo4 struct that we just created.
2604 if (rp
->r_flags
& R4HASHED
)
2608 nfs4_async_stop(vfsp
);
2609 nfs4_async_manager_stop(vfsp
);
2610 removed
= nfs4_mi_zonelist_remove(mi
);
2612 zone_rele_ref(&mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
2615 * This releases the initial "hold" of the mi since it will never
2616 * be referenced by the vfsp. Also, when mount returns to vfs.c
2617 * with an error, the vfsp will be destroyed, not rele'd.
2621 if (origsvp
!= NULL
)
2632 nfs4_unmount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
2640 nfs4_ephemeral_tree_t
*eph_tree
;
2642 if (secpolicy_fs_unmount(cr
, vfsp
) != 0)
2647 if (flag
& MS_FORCE
) {
2648 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
2649 if (nfs_zone() != mi
->mi_zone
) {
2651 * If the request is coming from the wrong zone,
2652 * we don't want to create any new threads, and
2653 * performance is not a concern. Do everything
2656 NFS4_DEBUG(nfs4_client_zone_debug
, (CE_NOTE
,
2657 "nfs4_unmount x-zone forced unmount of vfs %p\n",
2659 nfs4_free_mount(vfsp
, flag
, cr
);
2662 * Free data structures asynchronously, to avoid
2663 * blocking the current thread (for performance
2666 async_free_mount(vfsp
, flag
, cr
);
2673 * Wait until all asynchronous putpage operations on
2674 * this file system are complete before flushing rnodes
2677 omax
= mi
->mi_max_threads
;
2678 if (nfs4_async_stop_sig(vfsp
))
2684 * About the only reason that this would fail would be
2685 * that the harvester is already busy tearing down this
2686 * node. So we fail back to the caller and let them try
2687 * again when needed.
2689 if (nfs4_ephemeral_umount(mi
, flag
, cr
,
2690 &must_unlock
, &eph_tree
)) {
2691 ASSERT(must_unlock
== FALSE
);
2692 mutex_enter(&mi
->mi_async_lock
);
2693 mi
->mi_max_threads
= omax
;
2694 mutex_exit(&mi
->mi_async_lock
);
2700 * If there are any active vnodes on this file system,
2701 * then the file system is busy and can't be unmounted.
2703 if (check_rtable4(vfsp
)) {
2704 nfs4_ephemeral_umount_unlock(&must_unlock
, &eph_tree
);
2706 mutex_enter(&mi
->mi_async_lock
);
2707 mi
->mi_max_threads
= omax
;
2708 mutex_exit(&mi
->mi_async_lock
);
2714 * The unmount can't fail from now on, so record any
2715 * ephemeral changes.
2717 nfs4_ephemeral_umount_activate(mi
, &must_unlock
, &eph_tree
);
2720 * There are no active files that could require over-the-wire
2721 * calls to the server, so stop the async manager and the
2724 nfs4_async_manager_stop(vfsp
);
2727 * Destroy all rnodes belonging to this file system from the
2728 * rnode hash queues and purge any resources allocated to
2731 destroy_rtable4(vfsp
, cr
);
2732 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
2734 nfs4_remove_mi_from_server(mi
, NULL
);
2735 removed
= nfs4_mi_zonelist_remove(mi
);
2737 zone_rele_ref(&mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
2746 nfs4_root(vfs_t
*vfsp
, vnode_t
**vpp
)
2750 nfs4_fname_t
*mfname
;
2755 if (nfs_zone() != mi
->mi_zone
)
2758 svp
= mi
->mi_curr_serv
;
2760 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2761 if (svp
->sv_flags
& SV4_ROOT_STALE
) {
2762 nfs_rw_exit(&svp
->sv_lock
);
2764 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2765 if (svp
->sv_flags
& SV4_ROOT_STALE
) {
2766 svp
->sv_flags
&= ~SV4_ROOT_STALE
;
2767 nfs_rw_exit(&svp
->sv_lock
);
2770 nfs_rw_exit(&svp
->sv_lock
);
2772 nfs_rw_exit(&svp
->sv_lock
);
2775 mfname
= mi
->mi_fname
;
2777 vp
= makenfs4node_by_fh(mi
->mi_rootfh
, NULL
, &mfname
, NULL
,
2778 VFTOMI4(vfsp
), CRED(), gethrtime());
2780 if (VTOR4(vp
)->r_flags
& R4STALE
) {
2785 ASSERT(vp
->v_type
== VNON
|| vp
->v_type
== mi
->mi_type
);
2787 vp
->v_type
= mi
->mi_type
;
2795 nfs4_statfs_otw(vnode_t
*vp
, struct statvfs64
*sbp
, cred_t
*cr
)
2799 nfs4_ga_ext_res_t ger
;
2801 gar
.n4g_ext_res
= &ger
;
2803 if (error
= nfs4_attr_otw(vp
, TAG_FSINFO
, &gar
,
2804 NFS4_STATFS_ATTR_MASK
, cr
))
2807 *sbp
= gar
.n4g_ext_res
->n4g_sb
;
2813 * Get file system statistics.
2816 nfs4_statvfs(vfs_t
*vfsp
, struct statvfs64
*sbp
)
2822 error
= nfs4_root(vfsp
, &vp
);
2828 error
= nfs4_statfs_otw(vp
, sbp
, cr
);
2830 (void) strncpy(sbp
->f_basetype
,
2831 vfssw
[vfsp
->vfs_fstype
].vsw_name
, FSTYPSZ
);
2832 sbp
->f_flag
= vf_to_stf(vfsp
->vfs_flag
);
2834 nfs4_purge_stale_fh(error
, vp
, cr
);
2842 static kmutex_t nfs4_syncbusy
;
2845 * Flush dirty nfs files for file system vfsp.
2846 * If vfsp == NULL, all nfs files are flushed.
2848 * SYNC_CLOSE in flag is passed to us to
2849 * indicate that we are shutting down and or
2853 nfs4_sync(vfs_t
*vfsp
, short flag
, cred_t
*cr
)
2856 * Cross-zone calls are OK here, since this translates to a
2857 * fop_putpage(B_ASYNC), which gets picked up by the right zone.
2859 if (!(flag
& SYNC_ATTR
) && mutex_tryenter(&nfs4_syncbusy
) != 0) {
2861 mutex_exit(&nfs4_syncbusy
);
2865 * if SYNC_CLOSE is set then we know that
2866 * the system is rebooting, mark the mntinfo
2867 * for later examination.
2869 if (vfsp
&& (flag
& SYNC_CLOSE
)) {
2873 if (!(mi
->mi_flags
& MI4_SHUTDOWN
)) {
2874 mutex_enter(&mi
->mi_lock
);
2875 mi
->mi_flags
|= MI4_SHUTDOWN
;
2876 mutex_exit(&mi
->mi_lock
);
2883 * vget is difficult, if not impossible, to support in v4 because we don't
2884 * know the parent directory or name, which makes it impossible to create a
2885 * useful shadow vnode. And we need the shadow vnode for things like
2891 * XXX Check nfs4_vget_pseudo() for dependency.
2894 nfs4_vget(vfs_t
*vfsp
, vnode_t
**vpp
, fid_t
*fidp
)
2900 * nfs4_mountroot get called in the case where we are diskless booting. All
2901 * we need from here is the ability to get the server info and from there we
2902 * can simply call nfs4_rootvp.
2906 nfs4_mountroot(vfs_t
*vfsp
, whymountroot_t why
)
2909 char root_hostname
[SYS_NMLN
+1];
2910 struct servinfo4
*svp
;
2919 struct nfs_args args
; /* nfs mount arguments */
2920 static char token
[10];
2923 bzero(&args
, sizeof (args
));
2925 /* do this BEFORE getfile which causes xid stamps to be initialized */
2926 clkset(-1L); /* hack for now - until we get time svc? */
2928 if (why
== ROOT_REMOUNT
) {
2932 panic("nfs4_mountroot: why == ROOT_REMOUNT");
2935 if (why
== ROOT_UNMOUNT
) {
2937 * Nothing to do for NFS.
2948 (void) getfsname("root", name
, sizeof (token
));
2951 root_path
= pn
.pn_path
;
2953 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
2954 nfs_rw_init(&svp
->sv_lock
, NULL
, RW_DEFAULT
, NULL
);
2955 svp
->sv_knconf
= kmem_zalloc(sizeof (*svp
->sv_knconf
), KM_SLEEP
);
2956 svp
->sv_knconf
->knc_protofmly
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
2957 svp
->sv_knconf
->knc_proto
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
2960 * Get server address
2962 * Get server's transport
2963 * Get server's hostname
2966 args
.addr
= &svp
->sv_addr
;
2967 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2968 args
.fh
= (char *)&svp
->sv_fhandle
;
2969 args
.knconf
= svp
->sv_knconf
;
2970 args
.hostname
= root_hostname
;
2972 if (error
= mount_root(*name
? name
: "root", root_path
, NFS_V4
,
2973 &args
, &vfsflags
)) {
2974 if (error
== EPROTONOSUPPORT
)
2975 nfs_cmn_err(error
, CE_WARN
, "nfs4_mountroot: "
2976 "mount_root failed: server doesn't support NFS V4");
2978 nfs_cmn_err(error
, CE_WARN
,
2979 "nfs4_mountroot: mount_root failed: %m");
2980 nfs_rw_exit(&svp
->sv_lock
);
2985 nfs_rw_exit(&svp
->sv_lock
);
2986 svp
->sv_hostnamelen
= (int)(strlen(root_hostname
) + 1);
2987 svp
->sv_hostname
= kmem_alloc(svp
->sv_hostnamelen
, KM_SLEEP
);
2988 (void) strcpy(svp
->sv_hostname
, root_hostname
);
2990 svp
->sv_pathlen
= (int)(strlen(root_path
) + 1);
2991 svp
->sv_path
= kmem_alloc(svp
->sv_pathlen
, KM_SLEEP
);
2992 (void) strcpy(svp
->sv_path
, root_path
);
2995 * Force root partition to always be mounted with AUTH_UNIX for now
2997 svp
->sv_secdata
= kmem_alloc(sizeof (*svp
->sv_secdata
), KM_SLEEP
);
2998 svp
->sv_secdata
->secmod
= AUTH_UNIX
;
2999 svp
->sv_secdata
->rpcflavor
= AUTH_UNIX
;
3000 svp
->sv_secdata
->data
= NULL
;
3005 error
= nfs4rootvp(&rtvp
, vfsp
, svp
, args
.flags
, cr
, global_zone
);
3017 * Send client id to the server, if necessary
3019 nfs4_error_zinit(&n4e
);
3020 nfs4setclientid(mi
, cr
, FALSE
, &n4e
);
3030 error
= nfs4_setopts(rtvp
, DATAMODEL_NATIVE
, &args
);
3032 nfs_cmn_err(error
, CE_WARN
,
3033 "nfs4_mountroot: invalid root mount options");
3038 (void) vfs_lock_wait(vfsp
);
3039 vfs_add(NULL
, vfsp
, vfsflags
);
3042 size
= strlen(svp
->sv_hostname
);
3043 (void) strcpy(rootfs
.bo_name
, svp
->sv_hostname
);
3044 rootfs
.bo_name
[size
] = ':';
3045 (void) strcpy(&rootfs
.bo_name
[size
+ 1], root_path
);
3052 nfs4_async_stop(vfsp
);
3053 nfs4_async_manager_stop(vfsp
);
3063 * Initialization routine for VFS routines. Should only be called once
3068 mutex_init(&nfs4_syncbusy
, NULL
, MUTEX_DEFAULT
, NULL
);
3069 nfs4setclientid_init();
3070 nfs4_ephemeral_init();
3077 nfs4_ephemeral_fini();
3078 nfs4setclientid_fini();
3079 mutex_destroy(&nfs4_syncbusy
);
3083 nfs4_freevfs(vfs_t
*vfsp
)
3087 /* need to release the initial hold */
3091 * At this point, we can no longer reference the vfs
3092 * and need to inform other holders of the reference
3093 * to the mntinfo4_t.
3101 * Client side SETCLIENTID and SETCLIENTID_CONFIRM
3103 struct nfs4_server nfs4_server_lst
=
3104 { &nfs4_server_lst
, &nfs4_server_lst
};
3106 kmutex_t nfs4_server_lst_lock
;
3109 nfs4setclientid_init(void)
3111 mutex_init(&nfs4_server_lst_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3115 nfs4setclientid_fini(void)
3117 mutex_destroy(&nfs4_server_lst_lock
);
3120 int nfs4_retry_sclid_delay
= NFS4_RETRY_SCLID_DELAY
;
3121 int nfs4_num_sclid_retries
= NFS4_NUM_SCLID_RETRIES
;
3124 * Set the clientid for the server for "mi". No-op if the clientid is
3127 * The recovery boolean should be set to TRUE if this function was called
3128 * by the recovery code, and FALSE otherwise. This is used to determine
3129 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock
3130 * for adding a mntinfo4_t to a nfs4_server_t.
3132 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then
3133 * 'n4ep->error' is set to geterrno4(n4ep->stat).
3136 nfs4setclientid(mntinfo4_t
*mi
, cred_t
*cr
, bool_t recovery
, nfs4_error_t
*n4ep
)
3138 struct nfs4_server
*np
;
3139 struct servinfo4
*svp
= mi
->mi_curr_serv
;
3140 nfs4_recov_state_t recov_state
;
3141 int num_retries
= 0;
3144 int retry_inuse
= 1; /* only retry once on NFS4ERR_CLID_INUSE */
3145 time_t lease_time
= 0;
3147 recov_state
.rs_flags
= 0;
3148 recov_state
.rs_num_retry_despite_err
= 0;
3149 ASSERT(n4ep
!= NULL
);
3153 nfs4_error_zinit(n4ep
);
3155 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, 0);
3157 mutex_enter(&nfs4_server_lst_lock
);
3158 np
= servinfo4_to_nfs4_server(svp
); /* This locks np if it is found */
3159 mutex_exit(&nfs4_server_lst_lock
);
3161 struct nfs4_server
*tnp
;
3162 np
= new_nfs4_server(svp
, cr
);
3163 mutex_enter(&np
->s_lock
);
3165 mutex_enter(&nfs4_server_lst_lock
);
3166 tnp
= servinfo4_to_nfs4_server(svp
);
3169 * another thread snuck in and put server on list.
3170 * since we aren't adding it to the nfs4_server_list
3171 * we need to set the ref count to 0 and destroy it.
3174 destroy_nfs4_server(np
);
3178 * do not give list a reference until everything
3181 insque(np
, &nfs4_server_lst
);
3183 mutex_exit(&nfs4_server_lst_lock
);
3185 ASSERT(MUTEX_HELD(&np
->s_lock
));
3187 * If we find the server already has N4S_CLIENTID_SET, then
3188 * just return, we've already done SETCLIENTID to that server
3190 if (np
->s_flags
& N4S_CLIENTID_SET
) {
3191 /* add mi to np's mntinfo4_list */
3192 nfs4_add_mi_to_server(np
, mi
);
3194 nfs_rw_exit(&mi
->mi_recovlock
);
3195 mutex_exit(&np
->s_lock
);
3196 nfs4_server_rele(np
);
3199 mutex_exit(&np
->s_lock
);
3203 * Drop the mi_recovlock since nfs4_start_op will
3204 * acquire it again for us.
3207 nfs_rw_exit(&mi
->mi_recovlock
);
3209 n4ep
->error
= nfs4_start_op(mi
, NULL
, NULL
, &recov_state
);
3211 nfs4_server_rele(np
);
3216 mutex_enter(&np
->s_lock
);
3217 while (np
->s_flags
& N4S_CLIENTID_PEND
) {
3218 if (!cv_wait_sig(&np
->s_clientid_pend
, &np
->s_lock
)) {
3219 mutex_exit(&np
->s_lock
);
3220 nfs4_server_rele(np
);
3222 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
,
3224 n4ep
->error
= EINTR
;
3229 if (np
->s_flags
& N4S_CLIENTID_SET
) {
3230 /* XXX copied/pasted from above */
3231 /* add mi to np's mntinfo4_list */
3232 nfs4_add_mi_to_server(np
, mi
);
3233 mutex_exit(&np
->s_lock
);
3234 nfs4_server_rele(np
);
3236 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, recovery
);
3241 * Reset the N4S_CB_PINGED flag. This is used to
3242 * indicate if we have received a CB_NULL from the
3243 * server. Also we reset the waiter flag.
3245 np
->s_flags
&= ~(N4S_CB_PINGED
| N4S_CB_WAITER
);
3246 /* any failure must now clear this flag */
3247 np
->s_flags
|= N4S_CLIENTID_PEND
;
3248 mutex_exit(&np
->s_lock
);
3249 nfs4setclientid_otw(mi
, svp
, cr
, np
, n4ep
, &retry_inuse
);
3251 if (n4ep
->error
== EACCES
) {
3253 * If the uid is set then set the creds for secure mounts
3254 * by proxy processes such as automountd.
3256 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
3257 if (svp
->sv_secdata
->uid
!= 0) {
3259 (void) crsetugid(lcr
, svp
->sv_secdata
->uid
,
3262 nfs_rw_exit(&svp
->sv_lock
);
3265 mutex_enter(&np
->s_lock
);
3268 mutex_exit(&np
->s_lock
);
3269 nfs4setclientid_otw(mi
, svp
, lcr
, np
, n4ep
,
3273 mutex_enter(&np
->s_lock
);
3274 lease_time
= np
->s_lease_time
;
3275 np
->s_flags
&= ~N4S_CLIENTID_PEND
;
3276 mutex_exit(&np
->s_lock
);
3278 if (n4ep
->error
!= 0 || n4ep
->stat
!= NFS4_OK
) {
3280 * Start recovery if failover is a possibility. If
3281 * invoked by the recovery thread itself, then just
3282 * return and let it handle the failover first. NB:
3283 * recovery is not allowed if the mount is in progress
3284 * since the infrastructure is not sufficiently setup
3285 * to allow it. Just return the error (after suitable
3288 if (FAILOVER_MOUNT4(mi
) && nfs4_try_failover(n4ep
)) {
3289 (void) nfs4_start_recovery(n4ep
, mi
, NULL
,
3290 NULL
, NULL
, NULL
, OP_SETCLIENTID
, NULL
, NULL
, NULL
);
3292 * Don't retry here, just return and let
3293 * recovery take over.
3297 } else if (nfs4_rpc_retry_error(n4ep
->error
) ||
3298 n4ep
->stat
== NFS4ERR_RESOURCE
||
3299 n4ep
->stat
== NFS4ERR_STALE_CLIENTID
) {
3303 * Always retry if in recovery or once had
3304 * contact with the server (but now it's
3307 if (recovery
== TRUE
||
3308 n4ep
->error
== ETIMEDOUT
||
3309 n4ep
->error
== ECONNRESET
)
3311 } else if (retry_inuse
&& n4ep
->error
== 0 &&
3312 n4ep
->stat
== NFS4ERR_CLID_INUSE
) {
3318 * Since everything succeeded give the list a reference count if
3319 * it hasn't been given one by add_new_nfs4_server() or if this
3320 * is not a recovery situation in which case it is already on
3323 mutex_enter(&np
->s_lock
);
3324 if ((np
->s_flags
& N4S_INSERTED
) == 0) {
3326 np
->s_flags
|= N4S_INSERTED
;
3328 mutex_exit(&np
->s_lock
);
3332 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, recovery
);
3335 if (retry
&& num_retries
++ < nfs4_num_sclid_retries
) {
3337 ddi_sleep(lease_time
+ nfs4_retry_sclid_delay
);
3340 ddi_sleep(nfs4_retry_sclid_delay
);
3342 nfs4_server_rele(np
);
3347 if (n4ep
->error
== 0)
3348 n4ep
->error
= geterrno4(n4ep
->stat
);
3350 /* broadcast before release in case no other threads are waiting */
3351 cv_broadcast(&np
->s_clientid_pend
);
3352 nfs4_server_rele(np
);
3355 int nfs4setclientid_otw_debug
= 0;
3358 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM,
3359 * but nothing else; the calling function must be designed to handle those
3363 nfs4setclientid_otw(mntinfo4_t
*mi
, struct servinfo4
*svp
, cred_t
*cr
,
3364 struct nfs4_server
*np
, nfs4_error_t
*ep
, int *retry_inusep
)
3366 COMPOUND4args_clnt args
;
3367 COMPOUND4res_clnt res
;
3368 nfs_argop4 argop
[3];
3369 SETCLIENTID4args
*s_args
;
3370 SETCLIENTID4resok
*s_resok
;
3372 nfs4_ga_res_t
*garp
= NULL
;
3373 timespec_t prop_time
, after_time
;
3375 clientid4 tmp_clientid
;
3377 ASSERT(!MUTEX_HELD(&np
->s_lock
));
3379 args
.ctag
= TAG_SETCLIENTID
;
3385 argop
[0].argop
= OP_PUTROOTFH
;
3388 argop
[1].argop
= OP_GETATTR
;
3389 argop
[1].nfs_argop4_u
.opgetattr
.attr_request
= FATTR4_LEASE_TIME_MASK
;
3390 argop
[1].nfs_argop4_u
.opgetattr
.mi
= mi
;
3393 argop
[2].argop
= OP_SETCLIENTID
;
3395 s_args
= &argop
[2].nfs_argop4_u
.opsetclientid
;
3397 mutex_enter(&np
->s_lock
);
3399 s_args
->client
.verifier
= np
->clidtosend
.verifier
;
3400 s_args
->client
.id_len
= np
->clidtosend
.id_len
;
3401 ASSERT(s_args
->client
.id_len
<= NFS4_OPAQUE_LIMIT
);
3402 s_args
->client
.id_val
= np
->clidtosend
.id_val
;
3405 * Callback needs to happen on non-RDMA transport
3406 * Check if we have saved the original knetconfig
3407 * if so, use that instead.
3409 if (svp
->sv_origknconf
!= NULL
)
3410 nfs4_cb_args(np
, svp
->sv_origknconf
, s_args
);
3412 nfs4_cb_args(np
, svp
->sv_knconf
, s_args
);
3414 mutex_exit(&np
->s_lock
);
3416 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, ep
);
3421 /* getattr lease_time res */
3422 if ((res
.array_len
>= 2) &&
3423 (res
.array
[1].nfs_resop4_u
.opgetattr
.status
== NFS4_OK
)) {
3424 garp
= &res
.array
[1].nfs_resop4_u
.opgetattr
.ga_res
;
3428 * The 32 bit client cannot handle a lease time greater than
3429 * (INT32_MAX/1000000). This is due to the use of the
3430 * lease_time in calls to drv_usectohz() in
3431 * nfs4_renew_lease_thread(). The problem is that
3432 * drv_usectohz() takes a time_t (which is just a long = 4
3433 * bytes) as its parameter. The lease_time is multiplied by
3434 * 1000000 to convert seconds to usecs for the parameter. If
3435 * a number bigger than (INT32_MAX/1000000) is used then we
3436 * overflow on the 32bit client.
3438 if (garp
->n4g_ext_res
->n4g_leasetime
> (INT32_MAX
/1000000)) {
3439 garp
->n4g_ext_res
->n4g_leasetime
= INT32_MAX
/1000000;
3443 mutex_enter(&np
->s_lock
);
3444 np
->s_lease_time
= garp
->n4g_ext_res
->n4g_leasetime
;
3447 * Keep track of the lease period for the mi's
3448 * mi_msg_list. We need an appropiate time
3449 * bound to associate past facts with a current
3450 * event. The lease period is perfect for this.
3452 mutex_enter(&mi
->mi_msg_list_lock
);
3453 mi
->mi_lease_period
= np
->s_lease_time
;
3454 mutex_exit(&mi
->mi_msg_list_lock
);
3455 mutex_exit(&np
->s_lock
);
3459 if (res
.status
== NFS4ERR_CLID_INUSE
) {
3460 clientaddr4
*clid_inuse
;
3462 if (!(*retry_inusep
)) {
3463 clid_inuse
= &res
.array
->nfs_resop4_u
.
3464 opsetclientid
.SETCLIENTID4res_u
.client_using
;
3466 zcmn_err(mi
->mi_zone
->zone_id
, CE_NOTE
,
3467 "NFS4 mount (SETCLIENTID failed)."
3468 " nfs4_client_id.id is in"
3469 "use already by: r_netid<%s> r_addr<%s>",
3470 clid_inuse
->r_netid
, clid_inuse
->r_addr
);
3474 * XXX - The client should be more robust in its
3475 * handling of clientid in use errors (regen another
3476 * clientid and try again?)
3478 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3483 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3487 s_resok
= &res
.array
[2].nfs_resop4_u
.
3488 opsetclientid
.SETCLIENTID4res_u
.resok4
;
3490 tmp_clientid
= s_resok
->clientid
;
3492 verf
= s_resok
->setclientid_confirm
;
3495 if (nfs4setclientid_otw_debug
) {
3501 cid
.clientid
= s_resok
->clientid
;
3503 zcmn_err(mi
->mi_zone
->zone_id
, CE_NOTE
,
3504 "nfs4setclientid_otw: OK, clientid = %x,%x, "
3505 "verifier = %" PRIx64
"\n", cid
.foo
[0], cid
.foo
[1], verf
);
3509 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3511 /* Confirm the client id and get the lease_time attribute */
3513 args
.ctag
= TAG_SETCLIENTID_CF
;
3518 argop
[0].argop
= OP_SETCLIENTID_CONFIRM
;
3520 argop
[0].nfs_argop4_u
.opsetclientid_confirm
.clientid
= tmp_clientid
;
3521 argop
[0].nfs_argop4_u
.opsetclientid_confirm
.setclientid_confirm
= verf
;
3523 /* used to figure out RTT for np */
3524 gethrestime(&prop_time
);
3526 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4setlientid_otw: "
3527 "start time: %ld sec %ld nsec", prop_time
.tv_sec
,
3528 prop_time
.tv_nsec
));
3530 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, ep
);
3532 gethrestime(&after_time
);
3533 mutex_enter(&np
->s_lock
);
3534 np
->propagation_delay
.tv_sec
=
3535 MAX(1, after_time
.tv_sec
- prop_time
.tv_sec
);
3536 mutex_exit(&np
->s_lock
);
3538 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4setlcientid_otw: "
3539 "finish time: %ld sec ", after_time
.tv_sec
));
3541 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4setclientid_otw: "
3542 "propagation delay set to %ld sec",
3543 np
->propagation_delay
.tv_sec
));
3548 if (res
.status
== NFS4ERR_CLID_INUSE
) {
3549 clientaddr4
*clid_inuse
;
3551 if (!(*retry_inusep
)) {
3552 clid_inuse
= &res
.array
->nfs_resop4_u
.
3553 opsetclientid
.SETCLIENTID4res_u
.client_using
;
3555 zcmn_err(mi
->mi_zone
->zone_id
, CE_NOTE
,
3556 "SETCLIENTID_CONFIRM failed. "
3557 "nfs4_client_id.id is in use already by: "
3558 "r_netid<%s> r_addr<%s>",
3559 clid_inuse
->r_netid
, clid_inuse
->r_addr
);
3562 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3567 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3571 mutex_enter(&np
->s_lock
);
3572 np
->clientid
= tmp_clientid
;
3573 np
->s_flags
|= N4S_CLIENTID_SET
;
3575 /* Add mi to np's mntinfo4 list */
3576 nfs4_add_mi_to_server(np
, mi
);
3578 if (np
->lease_valid
== NFS4_LEASE_NOT_STARTED
) {
3580 * Start lease management thread.
3581 * Keep trying until we succeed.
3584 np
->s_refcnt
++; /* pass reference to thread */
3585 (void) zthread_create(NULL
, 0, nfs4_renew_lease_thread
, np
, 0,
3588 mutex_exit(&np
->s_lock
);
3590 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3594 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes
3595 * mi's clientid the same as sp's.
3596 * Assumes sp is locked down.
3599 nfs4_add_mi_to_server(nfs4_server_t
*sp
, mntinfo4_t
*mi
)
3604 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
3605 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
3606 ASSERT(sp
!= &nfs4_server_lst
);
3607 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3609 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3610 "nfs4_add_mi_to_server: add mi %p to sp %p",
3611 (void*)mi
, (void*)sp
));
3613 for (tmi
= sp
->mntinfo4_list
;
3615 tmi
= tmi
->mi_clientid_next
) {
3617 NFS4_DEBUG(nfs4_client_lease_debug
,
3619 "nfs4_add_mi_to_server: mi in list"));
3625 * First put a hold on the mntinfo4's vfsp so that references via
3626 * mntinfo4_list will be valid.
3629 VFS_HOLD(mi
->mi_vfsp
);
3631 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4_add_mi_to_server: "
3632 "hold vfs %p for mi: %p", (void*)mi
->mi_vfsp
, (void*)mi
));
3635 if (sp
->mntinfo4_list
)
3636 sp
->mntinfo4_list
->mi_clientid_prev
= mi
;
3637 mi
->mi_clientid_next
= sp
->mntinfo4_list
;
3639 sp
->mntinfo4_list
= mi
;
3640 mi
->mi_srvsettime
= gethrestime_sec();
3641 mi
->mi_srvset_cnt
++;
3644 /* set mi's clientid to that of sp's for later matching */
3645 mi
->mi_clientid
= sp
->clientid
;
3648 * Update the clientid for any other mi's belonging to sp. This
3649 * must be done here while we hold sp->s_lock, so that
3650 * find_nfs4_server() continues to work.
3653 for (tmi
= sp
->mntinfo4_list
;
3655 tmi
= tmi
->mi_clientid_next
) {
3657 tmi
->mi_clientid
= sp
->clientid
;
3663 * Remove the mi from sp's mntinfo4_list and release its reference.
3664 * Exception: if mi still has open files, flag it for later removal (when
3665 * all the files are closed).
3667 * If this is the last mntinfo4 in sp's list then tell the lease renewal
3671 nfs4_remove_mi_from_server_nolock(mntinfo4_t
*mi
, nfs4_server_t
*sp
)
3673 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3674 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p",
3675 (void*)mi
, (void*)sp
));
3678 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3679 ASSERT(mi
->mi_open_files
>= 0);
3682 * First make sure this mntinfo4 can be taken off of the list,
3683 * ie: it doesn't have any open files remaining.
3685 if (mi
->mi_open_files
> 0) {
3686 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3687 "nfs4_remove_mi_from_server_nolock: don't "
3688 "remove mi since it still has files open"));
3690 mutex_enter(&mi
->mi_lock
);
3691 mi
->mi_flags
|= MI4_REMOVE_ON_LAST_CLOSE
;
3692 mutex_exit(&mi
->mi_lock
);
3696 VFS_HOLD(mi
->mi_vfsp
);
3698 VFS_RELE(mi
->mi_vfsp
);
3700 if (sp
->mntinfo4_list
== NULL
) {
3701 /* last fs unmounted, kill the thread */
3702 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3703 "remove_mi_from_nfs4_server_nolock: kill the thread"));
3704 nfs4_mark_srv_dead(sp
);
3709 * Remove mi from sp's mntinfo4_list and release the vfs reference.
3712 remove_mi(nfs4_server_t
*sp
, mntinfo4_t
*mi
)
3714 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3717 * We release a reference, and the caller must still have a
3720 ASSERT(mi
->mi_vfsp
->vfs_count
>= 2);
3722 if (mi
->mi_clientid_prev
) {
3723 mi
->mi_clientid_prev
->mi_clientid_next
= mi
->mi_clientid_next
;
3725 /* This is the first mi in sp's mntinfo4_list */
3727 * Make sure the first mntinfo4 in the list is the actual
3728 * mntinfo4 passed in.
3730 ASSERT(sp
->mntinfo4_list
== mi
);
3732 sp
->mntinfo4_list
= mi
->mi_clientid_next
;
3734 if (mi
->mi_clientid_next
)
3735 mi
->mi_clientid_next
->mi_clientid_prev
= mi
->mi_clientid_prev
;
3737 /* Now mark the mntinfo4's links as being removed */
3738 mi
->mi_clientid_prev
= mi
->mi_clientid_next
= NULL
;
3740 mi
->mi_srvset_cnt
++;
3742 VFS_RELE(mi
->mi_vfsp
);
3746 * Free all the entries in sp's mntinfo4_list.
3749 remove_all_mi(nfs4_server_t
*sp
)
3753 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3755 while (sp
->mntinfo4_list
!= NULL
) {
3756 mi
= sp
->mntinfo4_list
;
3758 * Grab a reference in case there is only one left (which
3759 * remove_mi() frees).
3761 VFS_HOLD(mi
->mi_vfsp
);
3763 VFS_RELE(mi
->mi_vfsp
);
3768 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs.
3770 * This version can be called with a null nfs4_server_t arg,
3771 * and will either find the right one and handle locking, or
3772 * do nothing because the mi wasn't added to an sp's mntinfo4_list.
3775 nfs4_remove_mi_from_server(mntinfo4_t
*mi
, nfs4_server_t
*esp
)
3780 nfs4_remove_mi_from_server_nolock(mi
, esp
);
3784 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, 0);
3785 if (sp
= find_nfs4_server_all(mi
, 1)) {
3786 nfs4_remove_mi_from_server_nolock(mi
, sp
);
3787 mutex_exit(&sp
->s_lock
);
3788 nfs4_server_rele(sp
);
3790 nfs_rw_exit(&mi
->mi_recovlock
);
3794 * Return TRUE if the given server has any non-unmounted filesystems.
3798 nfs4_fs_active(nfs4_server_t
*sp
)
3802 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3804 for (mi
= sp
->mntinfo4_list
; mi
!= NULL
; mi
= mi
->mi_clientid_next
) {
3805 if (!(mi
->mi_vfsp
->vfs_flag
& VFS_UNMOUNTED
))
3813 * Mark sp as finished and notify any waiters.
3817 nfs4_mark_srv_dead(nfs4_server_t
*sp
)
3819 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3821 sp
->s_thread_exit
= NFS4_THREAD_EXIT
;
3822 cv_broadcast(&sp
->cv_thread_exit
);
3826 * Create a new nfs4_server_t structure.
3827 * Returns new node unlocked and not in list, but with a reference count of
3830 struct nfs4_server
*
3831 new_nfs4_server(struct servinfo4
*svp
, cred_t
*cr
)
3833 struct nfs4_server
*np
;
3840 verifier4 un_verifier
;
3841 } nfs4clientid_verifier
;
3843 * We change this ID string carefully and with the Solaris
3844 * NFS server behaviour in mind. "+referrals" indicates
3845 * a client that can handle an NFSv4 referral.
3847 char id_val
[] = "Solaris: %s, NFSv4 kernel client +referrals";
3850 np
= kmem_zalloc(sizeof (struct nfs4_server
), KM_SLEEP
);
3851 np
->saddr
.len
= svp
->sv_addr
.len
;
3852 np
->saddr
.maxlen
= svp
->sv_addr
.maxlen
;
3853 np
->saddr
.buf
= kmem_alloc(svp
->sv_addr
.maxlen
, KM_SLEEP
);
3854 bcopy(svp
->sv_addr
.buf
, np
->saddr
.buf
, svp
->sv_addr
.len
);
3858 * Build the nfs_client_id4 for this server mount. Ensure
3859 * the verifier is useful and that the identification is
3860 * somehow based on the server's address for the case of
3861 * multi-homed servers.
3863 nfs4clientid_verifier
.un_verifier
= 0;
3865 nfs4clientid_verifier
.un_curtime
.sec
= (uint32_t)tt
.tv_sec
;
3866 nfs4clientid_verifier
.un_curtime
.subsec
= (uint32_t)tt
.tv_nsec
;
3867 np
->clidtosend
.verifier
= nfs4clientid_verifier
.un_verifier
;
3870 * calculate the length of the opaque identifier. Subtract 2
3871 * for the "%s" and add the traditional +1 for null
3874 len
= strlen(id_val
) - 2 + strlen(uts_nodename()) + 1;
3875 np
->clidtosend
.id_len
= len
+ np
->saddr
.maxlen
;
3877 np
->clidtosend
.id_val
= kmem_alloc(np
->clidtosend
.id_len
, KM_SLEEP
);
3878 (void) sprintf(np
->clidtosend
.id_val
, id_val
, uts_nodename());
3879 bcopy(np
->saddr
.buf
, &np
->clidtosend
.id_val
[len
], np
->saddr
.len
);
3882 np
->mntinfo4_list
= NULL
;
3883 /* save cred for issuing rfs4calls inside the renew thread */
3886 cv_init(&np
->cv_thread_exit
, NULL
, CV_DEFAULT
, NULL
);
3887 mutex_init(&np
->s_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3888 nfs_rw_init(&np
->s_recovlock
, NULL
, RW_DEFAULT
, NULL
);
3889 list_create(&np
->s_deleg_list
, sizeof (rnode4_t
),
3890 offsetof(rnode4_t
, r_deleg_link
));
3891 np
->s_thread_exit
= 0;
3892 np
->state_ref_count
= 0;
3893 np
->lease_valid
= NFS4_LEASE_NOT_STARTED
;
3894 cv_init(&np
->s_cv_otw_count
, NULL
, CV_DEFAULT
, NULL
);
3895 cv_init(&np
->s_clientid_pend
, NULL
, CV_DEFAULT
, NULL
);
3896 np
->s_otw_call_count
= 0;
3897 cv_init(&np
->wait_cb_null
, NULL
, CV_DEFAULT
, NULL
);
3898 np
->zoneid
= getzoneid();
3899 np
->zone_globals
= nfs4_get_callback_globals();
3900 ASSERT(np
->zone_globals
!= NULL
);
3905 * Create a new nfs4_server_t structure and add it to the list.
3906 * Returns new node locked; reference must eventually be freed.
3908 static struct nfs4_server
*
3909 add_new_nfs4_server(struct servinfo4
*svp
, cred_t
*cr
)
3913 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock
));
3914 sp
= new_nfs4_server(svp
, cr
);
3915 mutex_enter(&sp
->s_lock
);
3916 insque(sp
, &nfs4_server_lst
);
3917 sp
->s_refcnt
++; /* list gets a reference */
3918 sp
->s_flags
|= N4S_INSERTED
;
3923 int nfs4_server_t_debug
= 0;
3928 dumpnfs4slist(char *txt
, mntinfo4_t
*mi
, clientid4 clientid
, servinfo4_t
*srv_p
)
3930 int hash16(void *p
, int len
);
3933 NFS4_DEBUG(nfs4_server_t_debug
, (CE_NOTE
,
3934 "dumping nfs4_server_t list in %s", txt
));
3935 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3936 "mi 0x%p, want clientid %llx, addr %d/%04X",
3937 mi
, (longlong_t
)clientid
, srv_p
->sv_addr
.len
,
3938 hash16((void *)srv_p
->sv_addr
.buf
, srv_p
->sv_addr
.len
)));
3939 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
;
3941 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3942 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d",
3943 np
, (longlong_t
)np
->clientid
, np
->saddr
.len
,
3944 hash16((void *)np
->saddr
.buf
, np
->saddr
.len
),
3945 np
->state_ref_count
));
3946 if (np
->saddr
.len
== srv_p
->sv_addr
.len
&&
3947 bcmp(np
->saddr
.buf
, srv_p
->sv_addr
.buf
,
3948 np
->saddr
.len
) == 0)
3949 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3950 " - address matches"));
3951 if (np
->clientid
== clientid
|| np
->clientid
== 0)
3952 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3953 " - clientid matches"));
3954 if (np
->s_thread_exit
!= NFS4_THREAD_EXIT
)
3955 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3956 " - thread not exiting"));
3964 * Move a mntinfo4_t from one server list to another.
3965 * Locking of the two nfs4_server_t nodes will be done in list order.
3967 * Returns NULL if the current nfs4_server_t for the filesystem could not
3968 * be found (e.g., due to forced unmount). Otherwise returns a reference
3969 * to the new nfs4_server_t, which must eventually be freed.
3972 nfs4_move_mi(mntinfo4_t
*mi
, servinfo4_t
*old
, servinfo4_t
*new)
3974 nfs4_server_t
*p
, *op
= NULL
, *np
= NULL
;
3976 zoneid_t zoneid
= nfs_zoneid();
3978 ASSERT(nfs_zone() == mi
->mi_zone
);
3980 mutex_enter(&nfs4_server_lst_lock
);
3982 if (nfs4_server_t_debug
)
3983 dumpnfs4slist("nfs4_move_mi", mi
, (clientid4
)0, new);
3985 for (p
= nfs4_server_lst
.forw
; p
!= &nfs4_server_lst
; p
= p
->forw
) {
3986 if (p
->zoneid
!= zoneid
)
3988 if (p
->saddr
.len
== old
->sv_addr
.len
&&
3989 bcmp(p
->saddr
.buf
, old
->sv_addr
.buf
, p
->saddr
.len
) == 0 &&
3990 p
->s_thread_exit
!= NFS4_THREAD_EXIT
) {
3992 mutex_enter(&op
->s_lock
);
3995 if (p
->saddr
.len
== new->sv_addr
.len
&&
3996 bcmp(p
->saddr
.buf
, new->sv_addr
.buf
, p
->saddr
.len
) == 0 &&
3997 p
->s_thread_exit
!= NFS4_THREAD_EXIT
) {
3999 mutex_enter(&np
->s_lock
);
4001 if (op
!= NULL
&& np
!= NULL
)
4006 * Filesystem has been forcibly unmounted. Bail out.
4009 mutex_exit(&np
->s_lock
);
4010 mutex_exit(&nfs4_server_lst_lock
);
4017 NFS4_DEBUG(nfs4_client_failover_debug
, (CE_NOTE
,
4018 "nfs4_move_mi: no target nfs4_server, will create."));
4020 np
= add_new_nfs4_server(new, kcred
);
4022 mutex_exit(&nfs4_server_lst_lock
);
4024 NFS4_DEBUG(nfs4_client_failover_debug
, (CE_NOTE
,
4025 "nfs4_move_mi: for mi 0x%p, "
4026 "old servinfo4 0x%p, new servinfo4 0x%p, "
4027 "old nfs4_server 0x%p, new nfs4_server 0x%p, ",
4028 (void*)mi
, (void*)old
, (void*)new,
4029 (void*)op
, (void*)np
));
4030 ASSERT(op
!= NULL
&& np
!= NULL
);
4032 /* discard any delegations */
4033 nfs4_deleg_discard(mi
, op
);
4035 num_open
= mi
->mi_open_files
;
4036 mi
->mi_open_files
= 0;
4037 op
->state_ref_count
-= num_open
;
4038 ASSERT(op
->state_ref_count
>= 0);
4039 np
->state_ref_count
+= num_open
;
4040 nfs4_remove_mi_from_server_nolock(mi
, op
);
4041 mi
->mi_open_files
= num_open
;
4042 NFS4_DEBUG(nfs4_client_failover_debug
, (CE_NOTE
,
4043 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d",
4044 mi
->mi_open_files
, op
->state_ref_count
, np
->state_ref_count
));
4046 nfs4_add_mi_to_server(np
, mi
);
4048 mutex_exit(&op
->s_lock
);
4049 mutex_exit(&np
->s_lock
);
4050 nfs4_server_rele(op
);
4056 * Need to have the nfs4_server_lst_lock.
4057 * Search the nfs4_server list to find a match on this servinfo4
4058 * based on its address.
4060 * Returns NULL if no match is found. Otherwise returns a reference (which
4061 * must eventually be freed) to a locked nfs4_server.
4064 servinfo4_to_nfs4_server(servinfo4_t
*srv_p
)
4067 zoneid_t zoneid
= nfs_zoneid();
4069 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock
));
4070 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
; np
= np
->forw
) {
4071 if (np
->zoneid
== zoneid
&&
4072 np
->saddr
.len
== srv_p
->sv_addr
.len
&&
4073 bcmp(np
->saddr
.buf
, srv_p
->sv_addr
.buf
,
4074 np
->saddr
.len
) == 0 &&
4075 np
->s_thread_exit
!= NFS4_THREAD_EXIT
) {
4076 mutex_enter(&np
->s_lock
);
4085 * Locks the nfs4_server down if it is found and returns a reference that
4086 * must eventually be freed.
4088 static nfs4_server_t
*
4089 lookup_nfs4_server(nfs4_server_t
*sp
, int any_state
)
4093 mutex_enter(&nfs4_server_lst_lock
);
4094 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
; np
= np
->forw
) {
4095 mutex_enter(&np
->s_lock
);
4096 if (np
== sp
&& np
->s_refcnt
> 0 &&
4097 (np
->s_thread_exit
!= NFS4_THREAD_EXIT
|| any_state
)) {
4098 mutex_exit(&nfs4_server_lst_lock
);
4102 mutex_exit(&np
->s_lock
);
4104 mutex_exit(&nfs4_server_lst_lock
);
4110 * The caller should be holding mi->mi_recovlock, and it should continue to
4111 * hold the lock until done with the returned nfs4_server_t. Once
4112 * mi->mi_recovlock is released, there is no guarantee that the returned
4113 * mi->nfs4_server_t will continue to correspond to mi.
4116 find_nfs4_server(mntinfo4_t
*mi
)
4118 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
4119 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
4121 return (lookup_nfs4_server(mi
->mi_srv
, 0));
4125 * Same as above, but takes an "any_state" parameter which can be
4126 * set to 1 if the caller wishes to find nfs4_server_t's which
4127 * have been marked for termination by the exit of the renew
4128 * thread. This should only be used by operations which are
4129 * cleaning up and will not cause an OTW op.
4132 find_nfs4_server_all(mntinfo4_t
*mi
, int any_state
)
4134 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
4135 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
4137 return (lookup_nfs4_server(mi
->mi_srv
, any_state
));
4141 * Lock sp, but only if it's still active (in the list and hasn't been
4142 * flagged as exiting) or 'any_state' is non-zero.
4143 * Returns TRUE if sp got locked and adds a reference to sp.
4146 nfs4_server_vlock(nfs4_server_t
*sp
, int any_state
)
4148 return (lookup_nfs4_server(sp
, any_state
) != NULL
);
4152 * Release the reference to sp and destroy it if that's the last one.
4156 nfs4_server_rele(nfs4_server_t
*sp
)
4158 mutex_enter(&sp
->s_lock
);
4159 ASSERT(sp
->s_refcnt
> 0);
4161 if (sp
->s_refcnt
> 0) {
4162 mutex_exit(&sp
->s_lock
);
4165 mutex_exit(&sp
->s_lock
);
4167 mutex_enter(&nfs4_server_lst_lock
);
4168 mutex_enter(&sp
->s_lock
);
4169 if (sp
->s_refcnt
> 0) {
4170 mutex_exit(&sp
->s_lock
);
4171 mutex_exit(&nfs4_server_lst_lock
);
4175 sp
->forw
= sp
->back
= NULL
;
4176 mutex_exit(&nfs4_server_lst_lock
);
4177 destroy_nfs4_server(sp
);
4181 destroy_nfs4_server(nfs4_server_t
*sp
)
4183 ASSERT(MUTEX_HELD(&sp
->s_lock
));
4184 ASSERT(sp
->s_refcnt
== 0);
4185 ASSERT(sp
->s_otw_call_count
== 0);
4190 kmem_free(sp
->saddr
.buf
, sp
->saddr
.maxlen
);
4191 kmem_free(sp
->clidtosend
.id_val
, sp
->clidtosend
.id_len
);
4192 mutex_exit(&sp
->s_lock
);
4194 /* destroy the nfs4_server */
4195 nfs4callback_destroy(sp
);
4196 list_destroy(&sp
->s_deleg_list
);
4197 mutex_destroy(&sp
->s_lock
);
4198 cv_destroy(&sp
->cv_thread_exit
);
4199 cv_destroy(&sp
->s_cv_otw_count
);
4200 cv_destroy(&sp
->s_clientid_pend
);
4201 cv_destroy(&sp
->wait_cb_null
);
4202 nfs_rw_destroy(&sp
->s_recovlock
);
4203 kmem_free(sp
, sizeof (*sp
));
4207 * Fork off a thread to free the data structures for a mount.
4211 async_free_mount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
4213 freemountargs_t
*args
;
4214 args
= kmem_alloc(sizeof (freemountargs_t
), KM_SLEEP
);
4215 args
->fm_vfsp
= vfsp
;
4217 MI4_HOLD(VFTOMI4(vfsp
));
4218 args
->fm_flag
= flag
;
4221 (void) zthread_create(NULL
, 0, nfs4_free_mount_thread
, args
, 0,
4226 nfs4_free_mount_thread(freemountargs_t
*args
)
4229 nfs4_free_mount(args
->fm_vfsp
, args
->fm_flag
, args
->fm_cr
);
4230 mi
= VFTOMI4(args
->fm_vfsp
);
4231 crfree(args
->fm_cr
);
4232 VFS_RELE(args
->fm_vfsp
);
4234 kmem_free(args
, sizeof (freemountargs_t
));
4240 * Thread to free the data structures for a given filesystem.
4243 nfs4_free_mount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
4245 mntinfo4_t
*mi
= VFTOMI4(vfsp
);
4247 callb_cpr_t cpr_info
;
4249 boolean_t async_thread
;
4253 nfs4_ephemeral_tree_t
*eph_tree
;
4256 * We need to participate in the CPR framework if this is a kernel
4259 async_thread
= (curproc
== nfs_zone()->zone_zsched
);
4261 mutex_init(&cpr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4262 CALLB_CPR_INIT(&cpr_info
, &cpr_lock
, callb_generic_cpr
,
4263 "nfsv4AsyncUnmount");
4267 * We need to wait for all outstanding OTW calls
4268 * and recovery to finish before we remove the mi
4269 * from the nfs4_server_t, as current pending
4270 * calls might still need this linkage (in order
4271 * to find a nfs4_server_t from a mntinfo4_t).
4273 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, FALSE
);
4274 sp
= find_nfs4_server(mi
);
4275 nfs_rw_exit(&mi
->mi_recovlock
);
4278 while (sp
->s_otw_call_count
!= 0) {
4280 mutex_enter(&cpr_lock
);
4281 CALLB_CPR_SAFE_BEGIN(&cpr_info
);
4282 mutex_exit(&cpr_lock
);
4284 cv_wait(&sp
->s_cv_otw_count
, &sp
->s_lock
);
4286 mutex_enter(&cpr_lock
);
4287 CALLB_CPR_SAFE_END(&cpr_info
, &cpr_lock
);
4288 mutex_exit(&cpr_lock
);
4291 mutex_exit(&sp
->s_lock
);
4292 nfs4_server_rele(sp
);
4296 mutex_enter(&mi
->mi_lock
);
4297 while (mi
->mi_in_recovery
!= 0) {
4299 mutex_enter(&cpr_lock
);
4300 CALLB_CPR_SAFE_BEGIN(&cpr_info
);
4301 mutex_exit(&cpr_lock
);
4303 cv_wait(&mi
->mi_cv_in_recov
, &mi
->mi_lock
);
4305 mutex_enter(&cpr_lock
);
4306 CALLB_CPR_SAFE_END(&cpr_info
, &cpr_lock
);
4307 mutex_exit(&cpr_lock
);
4310 mutex_exit(&mi
->mi_lock
);
4313 * If we got an error, then do not nuke the
4314 * tree. Either the harvester is busy reclaiming
4315 * this node or we ran into some busy condition.
4317 * The harvester will eventually come along and cleanup.
4318 * The only problem would be the root mount point.
4320 * Since the busy node can occur for a variety
4321 * of reasons and can result in an entry staying
4322 * in df output but no longer accessible from the
4323 * directory tree, we are okay.
4325 if (!nfs4_ephemeral_umount(mi
, flag
, cr
,
4326 &must_unlock
, &eph_tree
))
4327 nfs4_ephemeral_umount_activate(mi
, &must_unlock
,
4331 * The original purge of the dnlc via 'dounmount'
4332 * doesn't guarantee that another dnlc entry was not
4333 * added while we waitied for all outstanding OTW
4334 * and recovery calls to finish. So re-purge the
4337 (void) dnlc_purge_vfsp(vfsp
, 0);
4340 * We need to explicitly stop the manager thread; the asyc worker
4341 * threads can timeout and exit on their own.
4343 mutex_enter(&mi
->mi_async_lock
);
4344 mi
->mi_max_threads
= 0;
4345 NFS4_WAKEALL_ASYNC_WORKERS(mi
->mi_async_work_cv
);
4346 mutex_exit(&mi
->mi_async_lock
);
4347 if (mi
->mi_manager_thread
)
4348 nfs4_async_manager_stop(vfsp
);
4350 destroy_rtable4(vfsp
, cr
);
4352 nfs4_remove_mi_from_server(mi
, NULL
);
4355 mutex_enter(&cpr_lock
);
4356 CALLB_CPR_EXIT(&cpr_info
); /* drops cpr_lock */
4357 mutex_destroy(&cpr_lock
);
4360 removed
= nfs4_mi_zonelist_remove(mi
);
4362 zone_rele_ref(&mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
4365 /* Referral related sub-routines */
4367 /* Freeup knetconfig */
4369 free_knconf_contents(struct knetconfig
*k
)
4373 if (k
->knc_protofmly
)
4374 kmem_free(k
->knc_protofmly
, KNC_STRSIZE
);
4376 kmem_free(k
->knc_proto
, KNC_STRSIZE
);
4380 * This updates newpath variable with exact name component from the
4381 * path which gave us a NFS4ERR_MOVED error.
4382 * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned.
4385 extract_referral_point(const char *svp
, int nth
)
4387 int num_slashes
= 0;
4389 char *newpath
= NULL
;
4392 newpath
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
4393 for (p
= svp
; *p
; p
++) {
4396 if (num_slashes
== nth
+ 1) {
4405 newpath
[i
++] = '\0';
4413 * This sets up a new path in sv_path to do a lookup of the referral point.
4414 * If the path is /rp/aaa/bbb and the referral point is aaa,
4415 * this updates /rp/aaa. This path will be used to get referral
4419 setup_newsvpath(servinfo4_t
*svp
, int nth
)
4421 int num_slashes
= 0, pathlen
, i
= 0;
4424 newpath
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
4425 for (p
= svp
->sv_path
; *p
; p
++) {
4429 if (num_slashes
== nth
+ 1) {
4431 pathlen
= strlen(newpath
) + 1;
4432 kmem_free(svp
->sv_path
, svp
->sv_pathlen
);
4433 svp
->sv_path
= kmem_alloc(pathlen
, KM_SLEEP
);
4434 svp
->sv_pathlen
= pathlen
;
4435 bcopy(newpath
, svp
->sv_path
, pathlen
);
4440 kmem_free(newpath
, MAXPATHLEN
);