4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/vnode.h>
39 #include <sys/pathname.h>
40 #include <sys/sysmacros.h>
42 #include <sys/mkdev.h>
43 #include <sys/mount.h>
44 #include <sys/statvfs.h>
45 #include <sys/errno.h>
46 #include <sys/debug.h>
47 #include <sys/cmn_err.h>
48 #include <sys/utsname.h>
49 #include <sys/bootconf.h>
50 #include <sys/modctl.h>
52 #include <sys/flock.h>
55 #include <sys/policy.h>
56 #include <sys/socket.h>
57 #include <sys/netconfig.h>
60 #include <sys/mntent.h>
62 #include <rpc/types.h>
64 #include <rpc/rpcsec_gss.h>
68 #include <nfs/nfs_clnt.h>
69 #include <nfs/mount.h>
70 #include <nfs/nfs_acl.h>
72 #include <sys/fs_subr.h>
75 #include <nfs/rnode4.h>
76 #include <nfs/nfs4_clnt.h>
77 #include <sys/fs/autofs.h>
83 * Arguments passed to thread to free data structures from forced unmount.
92 static void async_free_mount(vfs_t
*, int, cred_t
*);
93 static void nfs4_free_mount(vfs_t
*, int, cred_t
*);
94 static void nfs4_free_mount_thread(freemountargs_t
*);
95 static int nfs4_chkdup_servinfo4(servinfo4_t
*, servinfo4_t
*);
98 * From rpcsec module (common/rpcsec).
100 extern int sec_clnt_loadinfo(struct sec_data
*, struct sec_data
**, model_t
);
101 extern void sec_clnt_freeinfo(struct sec_data
*);
104 * The order and contents of this structure must be kept in sync with that of
105 * rfsreqcnt_v4_tmpl in nfs_stats.c
107 static char *rfsnames_v4
[] = {
108 "null", "compound", "reserved", "access", "close", "commit", "create",
109 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock",
110 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr",
111 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh",
112 "read", "readdir", "readlink", "remove", "rename", "renew",
113 "restorefh", "savefh", "secinfo", "setattr", "setclientid",
114 "setclientid_confirm", "verify", "write"
118 * nfs4_max_mount_retry is the number of times the client will redrive
119 * a mount compound before giving up and returning failure. The intent
120 * is to redrive mount compounds which fail NFS4ERR_STALE so that
121 * if a component of the server path being mounted goes stale, it can
122 * "recover" by redriving the mount compund (LOOKUP ops). This recovery
123 * code is needed outside of the recovery framework because mount is a
124 * special case. The client doesn't create vnodes/rnodes for components
125 * of the server path being mounted. The recovery code recovers real
126 * client objects, not STALE FHs which map to components of the server
127 * path being mounted.
129 * We could just fail the mount on the first time, but that would
130 * instantly trigger failover (from nfs4_mount), and the client should
131 * try to re-lookup the STALE FH before doing failover. The easiest
132 * way to "re-lookup" is to simply redrive the mount compound.
134 static int nfs4_max_mount_retry
= 2;
137 * nfs4 vfs operations.
139 int nfs4_mount(vfs_t
*, vnode_t
*, struct mounta
*, cred_t
*);
140 static int nfs4_unmount(vfs_t
*, int, cred_t
*);
141 static int nfs4_root(vfs_t
*, vnode_t
**);
142 static int nfs4_statvfs(vfs_t
*, struct statvfs64
*);
143 static int nfs4_sync(vfs_t
*, short, cred_t
*);
144 static int nfs4_vget(vfs_t
*, vnode_t
**, fid_t
*);
145 static int nfs4_mountroot(vfs_t
*, whymountroot_t
);
146 static void nfs4_freevfs(vfs_t
*);
148 static int nfs4rootvp(vnode_t
**, vfs_t
*, struct servinfo4
*,
149 int, cred_t
*, zone_t
*);
151 vfsops_t
*nfs4_vfsops
;
153 int nfs4_vfsinit(void);
154 void nfs4_vfsfini(void);
155 static void nfs4setclientid_init(void);
156 static void nfs4setclientid_fini(void);
157 static void nfs4setclientid_otw(mntinfo4_t
*, servinfo4_t
*, cred_t
*,
158 struct nfs4_server
*, nfs4_error_t
*, int *);
159 static void destroy_nfs4_server(nfs4_server_t
*);
160 static void remove_mi(nfs4_server_t
*, mntinfo4_t
*);
162 extern void nfs4_ephemeral_init(void);
163 extern void nfs4_ephemeral_fini(void);
165 /* referral related routines */
166 static servinfo4_t
*copy_svp(servinfo4_t
*);
167 static void free_knconf_contents(struct knetconfig
*k
);
168 static char *extract_referral_point(const char *, int);
169 static void setup_newsvpath(servinfo4_t
*, int);
170 static void update_servinfo4(servinfo4_t
*, fs_location4
*,
171 struct nfs_fsl_info
*, char *, int);
174 * Initialize the vfs structure
177 static int nfs4fstyp
;
181 * Debug variable to check for rdma based
182 * transport startup and cleanup. Controlled
183 * through /etc/system. Off by default.
185 extern int rdma_debug
;
188 nfs4init(int fstyp
, char *name
)
190 static const fs_operation_def_t nfs4_vfsops_template
[] = {
191 VFSNAME_MOUNT
, { .vfs_mount
= nfs4_mount
},
192 VFSNAME_UNMOUNT
, { .vfs_unmount
= nfs4_unmount
},
193 VFSNAME_ROOT
, { .vfs_root
= nfs4_root
},
194 VFSNAME_STATVFS
, { .vfs_statvfs
= nfs4_statvfs
},
195 VFSNAME_SYNC
, { .vfs_sync
= nfs4_sync
},
196 VFSNAME_VGET
, { .vfs_vget
= nfs4_vget
},
197 VFSNAME_MOUNTROOT
, { .vfs_mountroot
= nfs4_mountroot
},
198 VFSNAME_FREEVFS
, { .vfs_freevfs
= nfs4_freevfs
},
204 nfs4_vnodeops
= NULL
;
205 nfs4_trigger_vnodeops
= NULL
;
207 error
= vfs_setfsops(fstyp
, nfs4_vfsops_template
, &nfs4_vfsops
);
209 zcmn_err(GLOBAL_ZONEID
, CE_WARN
,
210 "nfs4init: bad vfs ops template");
214 error
= vn_make_ops(name
, nfs4_vnodeops_template
, &nfs4_vnodeops
);
216 zcmn_err(GLOBAL_ZONEID
, CE_WARN
,
217 "nfs4init: bad vnode ops template");
221 error
= vn_make_ops("nfs4_trigger", nfs4_trigger_vnodeops_template
,
222 &nfs4_trigger_vnodeops
);
224 zcmn_err(GLOBAL_ZONEID
, CE_WARN
,
225 "nfs4init: bad trigger vnode ops template");
230 (void) nfs4_vfsinit();
231 (void) nfs4_init_dot_entries();
235 if (nfs4_trigger_vnodeops
!= NULL
)
236 vn_freevnodeops(nfs4_trigger_vnodeops
);
238 if (nfs4_vnodeops
!= NULL
)
239 vn_freevnodeops(nfs4_vnodeops
);
241 (void) vfs_freevfsops_by_type(fstyp
);
250 (void) nfs4_destroy_dot_entries();
255 * Create a new sec_data structure to store AUTH_DH related data:
256 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC
257 * flag set for NFS V4 since we are avoiding to contact the rpcbind
258 * daemon and is using the IP time service (IPPORT_TIMESERVER).
260 * sec_data can be freed by sec_clnt_freeinfo().
262 static struct sec_data
*
263 create_authdh_data(char *netname
, int nlen
, struct netbuf
*syncaddr
,
264 struct knetconfig
*knconf
) {
265 struct sec_data
*secdata
;
266 dh_k4_clntdata_t
*data
;
269 if (syncaddr
== NULL
|| syncaddr
->buf
== NULL
|| nlen
== 0)
272 secdata
= kmem_alloc(sizeof (*secdata
), KM_SLEEP
);
275 data
= kmem_alloc(sizeof (*data
), KM_SLEEP
);
277 data
->syncaddr
.maxlen
= syncaddr
->maxlen
;
278 data
->syncaddr
.len
= syncaddr
->len
;
279 data
->syncaddr
.buf
= kmem_alloc(syncaddr
->len
, KM_SLEEP
);
280 bcopy(syncaddr
->buf
, data
->syncaddr
.buf
, syncaddr
->len
);
283 * duplicate the knconf information for the
286 data
->knconf
= kmem_alloc(sizeof (*knconf
), KM_SLEEP
);
287 *data
->knconf
= *knconf
;
288 pf
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
289 p
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
290 bcopy(knconf
->knc_protofmly
, pf
, KNC_STRSIZE
);
291 bcopy(knconf
->knc_proto
, p
, KNC_STRSIZE
);
292 data
->knconf
->knc_protofmly
= pf
;
293 data
->knconf
->knc_proto
= p
;
295 /* move server netname to the sec_data structure */
296 data
->netname
= kmem_alloc(nlen
, KM_SLEEP
);
297 bcopy(netname
, data
->netname
, nlen
);
298 data
->netnamelen
= (int)nlen
;
300 secdata
->secmod
= AUTH_DH
;
301 secdata
->rpcflavor
= AUTH_DH
;
302 secdata
->data
= (caddr_t
)data
;
308 * Returns (deep) copy of sec_data_t. Allocates all memory required; caller
309 * is responsible for freeing.
312 copy_sec_data(sec_data_t
*fsecdata
) {
313 sec_data_t
*tsecdata
;
315 if (fsecdata
== NULL
)
318 if (fsecdata
->rpcflavor
== AUTH_DH
) {
319 dh_k4_clntdata_t
*fdata
= (dh_k4_clntdata_t
*)fsecdata
->data
;
324 tsecdata
= (sec_data_t
*)create_authdh_data(fdata
->netname
,
325 fdata
->netnamelen
, &fdata
->syncaddr
, fdata
->knconf
);
330 tsecdata
= kmem_zalloc(sizeof (sec_data_t
), KM_SLEEP
);
332 tsecdata
->secmod
= fsecdata
->secmod
;
333 tsecdata
->rpcflavor
= fsecdata
->rpcflavor
;
334 tsecdata
->flags
= fsecdata
->flags
;
335 tsecdata
->uid
= fsecdata
->uid
;
337 if (fsecdata
->rpcflavor
== RPCSEC_GSS
) {
338 gss_clntdata_t
*gcd
= (gss_clntdata_t
*)fsecdata
->data
;
340 tsecdata
->data
= (caddr_t
)copy_sec_data_gss(gcd
);
342 tsecdata
->data
= NULL
;
349 copy_sec_data_gss(gss_clntdata_t
*fdata
)
351 gss_clntdata_t
*tdata
;
356 tdata
= kmem_zalloc(sizeof (gss_clntdata_t
), KM_SLEEP
);
358 tdata
->mechanism
.length
= fdata
->mechanism
.length
;
359 tdata
->mechanism
.elements
= kmem_zalloc(fdata
->mechanism
.length
,
361 bcopy(fdata
->mechanism
.elements
, tdata
->mechanism
.elements
,
362 fdata
->mechanism
.length
);
364 tdata
->service
= fdata
->service
;
366 (void) strcpy(tdata
->uname
, fdata
->uname
);
367 (void) strcpy(tdata
->inst
, fdata
->inst
);
368 (void) strcpy(tdata
->realm
, fdata
->realm
);
370 tdata
->qop
= fdata
->qop
;
376 nfs4_chkdup_servinfo4(servinfo4_t
*svp_head
, servinfo4_t
*svp
)
381 * Iterate over the servinfo4 list to make sure
382 * we do not have a duplicate. Skip any servinfo4
383 * that has been marked "NOT IN USE"
385 for (si
= svp_head
; si
; si
= si
->sv_next
) {
386 (void) nfs_rw_enter_sig(&si
->sv_lock
, RW_READER
, 0);
387 if (si
->sv_flags
& SV4_NOTINUSE
) {
388 nfs_rw_exit(&si
->sv_lock
);
391 nfs_rw_exit(&si
->sv_lock
);
394 if (si
->sv_addr
.len
== svp
->sv_addr
.len
&&
395 strcmp(si
->sv_knconf
->knc_protofmly
,
396 svp
->sv_knconf
->knc_protofmly
) == 0 &&
397 bcmp(si
->sv_addr
.buf
, svp
->sv_addr
.buf
,
398 si
->sv_addr
.len
) == 0) {
399 /* it's a duplicate */
403 /* it's not a duplicate */
408 nfs4_free_args(struct nfs_args
*nargs
)
411 if (nargs
->knconf
->knc_protofmly
)
412 kmem_free(nargs
->knconf
->knc_protofmly
,
414 if (nargs
->knconf
->knc_proto
)
415 kmem_free(nargs
->knconf
->knc_proto
, KNC_STRSIZE
);
416 kmem_free(nargs
->knconf
, sizeof (*nargs
->knconf
));
417 nargs
->knconf
= NULL
;
421 kmem_free(nargs
->fh
, strlen(nargs
->fh
) + 1);
425 if (nargs
->hostname
) {
426 kmem_free(nargs
->hostname
, strlen(nargs
->hostname
) + 1);
427 nargs
->hostname
= NULL
;
431 if (nargs
->addr
->buf
) {
432 ASSERT(nargs
->addr
->len
);
433 kmem_free(nargs
->addr
->buf
, nargs
->addr
->len
);
435 kmem_free(nargs
->addr
, sizeof (struct netbuf
));
439 if (nargs
->syncaddr
) {
440 ASSERT(nargs
->syncaddr
->len
);
441 if (nargs
->syncaddr
->buf
) {
442 ASSERT(nargs
->syncaddr
->len
);
443 kmem_free(nargs
->syncaddr
->buf
, nargs
->syncaddr
->len
);
445 kmem_free(nargs
->syncaddr
, sizeof (struct netbuf
));
446 nargs
->syncaddr
= NULL
;
449 if (nargs
->netname
) {
450 kmem_free(nargs
->netname
, strlen(nargs
->netname
) + 1);
451 nargs
->netname
= NULL
;
454 if (nargs
->nfs_ext_u
.nfs_extA
.secdata
) {
456 nargs
->nfs_ext_u
.nfs_extA
.secdata
);
457 nargs
->nfs_ext_u
.nfs_extA
.secdata
= NULL
;
463 nfs4_copyin(char *data
, int datalen
, struct nfs_args
*nargs
)
467 size_t hlen
; /* length of hostname */
468 size_t nlen
; /* length of netname */
469 char netname
[MAXNETNAMELEN
+1]; /* server's netname */
470 struct netbuf addr
; /* server's address */
471 struct netbuf syncaddr
; /* AUTH_DES time sync addr */
472 struct knetconfig
*knconf
; /* transport structure */
473 struct sec_data
*secdata
= NULL
; /* security data */
474 STRUCT_DECL(nfs_args
, args
); /* nfs mount arguments */
475 STRUCT_DECL(knetconfig
, knconf_tmp
);
476 STRUCT_DECL(netbuf
, addr_tmp
);
483 bzero(nargs
, sizeof (*nargs
));
485 STRUCT_INIT(args
, get_udatamodel());
486 bzero(STRUCT_BUF(args
), SIZEOF_STRUCT(nfs_args
, DATAMODEL_NATIVE
));
487 if (copyin(data
, STRUCT_BUF(args
), MIN(datalen
,
491 nargs
->wsize
= STRUCT_FGET(args
, wsize
);
492 nargs
->rsize
= STRUCT_FGET(args
, rsize
);
493 nargs
->timeo
= STRUCT_FGET(args
, timeo
);
494 nargs
->retrans
= STRUCT_FGET(args
, retrans
);
495 nargs
->acregmin
= STRUCT_FGET(args
, acregmin
);
496 nargs
->acregmax
= STRUCT_FGET(args
, acregmax
);
497 nargs
->acdirmin
= STRUCT_FGET(args
, acdirmin
);
498 nargs
->acdirmax
= STRUCT_FGET(args
, acdirmax
);
500 flags
= STRUCT_FGET(args
, flags
);
501 nargs
->flags
= flags
;
508 * Allocate space for a knetconfig structure and
509 * its strings and copy in from user-land.
511 knconf
= kmem_zalloc(sizeof (*knconf
), KM_SLEEP
);
512 STRUCT_INIT(knconf_tmp
, get_udatamodel());
513 if (copyin(STRUCT_FGETP(args
, knconf
), STRUCT_BUF(knconf_tmp
),
514 STRUCT_SIZE(knconf_tmp
))) {
515 kmem_free(knconf
, sizeof (*knconf
));
519 knconf
->knc_semantics
= STRUCT_FGET(knconf_tmp
, knc_semantics
);
520 knconf
->knc_protofmly
= STRUCT_FGETP(knconf_tmp
, knc_protofmly
);
521 knconf
->knc_proto
= STRUCT_FGETP(knconf_tmp
, knc_proto
);
522 if (get_udatamodel() != DATAMODEL_LP64
) {
523 knconf
->knc_rdev
= expldev(STRUCT_FGET(knconf_tmp
, knc_rdev
));
525 knconf
->knc_rdev
= STRUCT_FGET(knconf_tmp
, knc_rdev
);
528 pf
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
529 p
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
530 error
= copyinstr(knconf
->knc_protofmly
, pf
, KNC_STRSIZE
, NULL
);
532 kmem_free(pf
, KNC_STRSIZE
);
533 kmem_free(p
, KNC_STRSIZE
);
534 kmem_free(knconf
, sizeof (*knconf
));
538 error
= copyinstr(knconf
->knc_proto
, p
, KNC_STRSIZE
, NULL
);
540 kmem_free(pf
, KNC_STRSIZE
);
541 kmem_free(p
, KNC_STRSIZE
);
542 kmem_free(knconf
, sizeof (*knconf
));
547 knconf
->knc_protofmly
= pf
;
548 knconf
->knc_proto
= p
;
550 nargs
->knconf
= knconf
;
555 STRUCT_INIT(addr_tmp
, get_udatamodel());
556 if (copyin(STRUCT_FGETP(args
, addr
), STRUCT_BUF(addr_tmp
),
557 STRUCT_SIZE(addr_tmp
))) {
562 nargs
->addr
= kmem_zalloc(sizeof (struct netbuf
), KM_SLEEP
);
563 userbufptr
= STRUCT_FGETP(addr_tmp
, buf
);
564 addr
.len
= STRUCT_FGET(addr_tmp
, len
);
565 addr
.buf
= kmem_alloc(addr
.len
, KM_SLEEP
);
566 addr
.maxlen
= addr
.len
;
567 if (copyin(userbufptr
, addr
.buf
, addr
.len
)) {
568 kmem_free(addr
.buf
, addr
.len
);
572 bcopy(&addr
, nargs
->addr
, sizeof (struct netbuf
));
575 * Get the root fhandle
577 error
= pn_get(STRUCT_FGETP(args
, fh
), UIO_USERSPACE
, &pn
);
581 /* Volatile fh: keep server paths, so use actual-size strings */
582 nargs
->fh
= kmem_alloc(pn
.pn_pathlen
+ 1, KM_SLEEP
);
583 bcopy(pn
.pn_path
, nargs
->fh
, pn
.pn_pathlen
);
584 nargs
->fh
[pn
.pn_pathlen
] = '\0';
589 * Get server's hostname
591 if (flags
& NFSMNT_HOSTNAME
) {
592 error
= copyinstr(STRUCT_FGETP(args
, hostname
),
593 netname
, sizeof (netname
), &hlen
);
596 nargs
->hostname
= kmem_zalloc(hlen
, KM_SLEEP
);
597 (void) strcpy(nargs
->hostname
, netname
);
600 nargs
->hostname
= NULL
;
605 * If there are syncaddr and netname data, load them in. This is
606 * to support data needed for NFSV4 when AUTH_DH is the negotiated
607 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
610 if (flags
& NFSMNT_SECURE
) {
613 STRUCT_INIT(addr_tmp
, get_udatamodel());
614 if (copyin(STRUCT_FGETP(args
, syncaddr
), STRUCT_BUF(addr_tmp
),
615 STRUCT_SIZE(addr_tmp
))) {
619 userbufptr
= STRUCT_FGETP(addr_tmp
, buf
);
620 syncaddr
.len
= STRUCT_FGET(addr_tmp
, len
);
621 syncaddr
.buf
= kmem_alloc(syncaddr
.len
, KM_SLEEP
);
622 syncaddr
.maxlen
= syncaddr
.len
;
623 if (copyin(userbufptr
, syncaddr
.buf
, syncaddr
.len
)) {
624 kmem_free(syncaddr
.buf
, syncaddr
.len
);
629 nargs
->syncaddr
= kmem_alloc(sizeof (struct netbuf
), KM_SLEEP
);
630 bcopy(&syncaddr
, nargs
->syncaddr
, sizeof (struct netbuf
));
632 /* get server's netname */
633 if (copyinstr(STRUCT_FGETP(args
, netname
), netname
,
634 sizeof (netname
), &nlen
)) {
639 netname
[nlen
] = '\0';
640 nargs
->netname
= kmem_zalloc(nlen
, KM_SLEEP
);
641 (void) strcpy(nargs
->netname
, netname
);
645 * Get the extention data which has the security data structure.
646 * This includes data for AUTH_SYS as well.
648 if (flags
& NFSMNT_NEWARGS
) {
649 nargs
->nfs_args_ext
= STRUCT_FGET(args
, nfs_args_ext
);
650 if (nargs
->nfs_args_ext
== NFS_ARGS_EXTA
||
651 nargs
->nfs_args_ext
== NFS_ARGS_EXTB
) {
653 * Indicating the application is using the new
654 * sec_data structure to pass in the security
657 if (STRUCT_FGETP(args
,
658 nfs_ext_u
.nfs_extA
.secdata
) != NULL
) {
659 error
= sec_clnt_loadinfo(
660 (struct sec_data
*)STRUCT_FGETP(args
,
661 nfs_ext_u
.nfs_extA
.secdata
),
662 &secdata
, get_udatamodel());
664 nargs
->nfs_ext_u
.nfs_extA
.secdata
= secdata
;
674 * We may have a linked list of nfs_args structures,
675 * which means the user is looking for failover. If
676 * the mount is either not "read-only" or "soft",
677 * we want to bail out with EINVAL.
679 if (nargs
->nfs_args_ext
== NFS_ARGS_EXTB
)
680 nargs
->nfs_ext_u
.nfs_extB
.next
=
681 STRUCT_FGETP(args
, nfs_ext_u
.nfs_extB
.next
);
685 nfs4_free_args(nargs
);
693 * Set up mount info record and attach it to vfs struct.
696 nfs4_mount(vfs_t
*vfsp
, vnode_t
*mvp
, struct mounta
*uap
, cred_t
*cr
)
698 char *data
= uap
->dataptr
;
700 vnode_t
*rtvp
; /* the server's root */
701 mntinfo4_t
*mi
; /* mount info, pointed at by vfs */
702 struct knetconfig
*rdma_knconf
; /* rdma transport structure */
704 struct servinfo4
*svp
; /* nfs server info */
705 struct servinfo4
*svp_tail
= NULL
; /* previous nfs server info */
706 struct servinfo4
*svp_head
; /* first nfs server info */
707 struct servinfo4
*svp_2ndlast
; /* 2nd last in server info list */
708 struct sec_data
*secdata
; /* security data */
709 struct nfs_args
*args
= NULL
;
710 int flags
, addr_type
, removed
;
711 zone_t
*zone
= nfs_zone();
713 zone_t
*mntzone
= NULL
;
715 if (secpolicy_fs_mount(cr
, mvp
, vfsp
) != 0)
717 if (mvp
->v_type
!= VDIR
)
723 * nfs_args is now versioned and is extensible, so
724 * uap->datalen might be different from sizeof (args)
725 * in a compatible situation.
728 if (!(uap
->flags
& MS_SYSSPACE
)) {
730 args
= kmem_zalloc(sizeof (struct nfs_args
), KM_SLEEP
);
732 nfs4_free_args(args
);
733 error
= nfs4_copyin(data
, uap
->datalen
, args
);
736 kmem_free(args
, sizeof (*args
));
741 args
= (struct nfs_args
*)data
;
747 * If the request changes the locking type, disallow the remount,
748 * because it's questionable whether we can transfer the
749 * locking state correctly.
751 if (uap
->flags
& MS_REMOUNT
) {
752 if (!(uap
->flags
& MS_SYSSPACE
)) {
753 nfs4_free_args(args
);
754 kmem_free(args
, sizeof (*args
));
756 if ((mi
= VFTOMI4(vfsp
)) != NULL
) {
759 new_mi_llock
= (flags
& NFSMNT_LLOCK
) ? 1 : 0;
760 old_mi_llock
= (mi
->mi_flags
& MI4_LLOCK
) ? 1 : 0;
761 if (old_mi_llock
!= new_mi_llock
)
768 * For ephemeral mount trigger stub vnodes, we have two problems
769 * to solve: racing threads will likely fail the v_count check, and
770 * we want only one to proceed with the mount.
772 * For stubs, if the mount has already occurred (via a racing thread),
773 * just return success. If not, skip the v_count check and proceed.
774 * Note that we are already serialised at this point.
776 mutex_enter(&mvp
->v_lock
);
777 if (vn_matchops(mvp
, nfs4_trigger_vnodeops
)) {
778 /* mntpt is a v4 stub vnode */
779 ASSERT(RP_ISSTUB(VTOR4(mvp
)));
780 ASSERT(!(uap
->flags
& MS_OVERLAY
));
781 ASSERT(!(mvp
->v_flag
& VROOT
));
782 if (vn_mountedvfs(mvp
) != NULL
) {
783 /* ephemeral mount has already occurred */
784 ASSERT(uap
->flags
& MS_SYSSPACE
);
785 mutex_exit(&mvp
->v_lock
);
789 /* mntpt is a non-v4 or v4 non-stub vnode */
790 if (!(uap
->flags
& MS_OVERLAY
) &&
791 (mvp
->v_count
!= 1 || (mvp
->v_flag
& VROOT
))) {
792 mutex_exit(&mvp
->v_lock
);
793 if (!(uap
->flags
& MS_SYSSPACE
)) {
794 nfs4_free_args(args
);
795 kmem_free(args
, sizeof (*args
));
800 mutex_exit(&mvp
->v_lock
);
802 /* make sure things are zeroed for errout: */
808 * A valid knetconfig structure is required.
810 if (!(flags
& NFSMNT_KNCONF
) ||
811 args
->knconf
== NULL
|| args
->knconf
->knc_protofmly
== NULL
||
812 args
->knconf
->knc_proto
== NULL
||
813 (strcmp(args
->knconf
->knc_proto
, NC_UDP
) == 0)) {
814 if (!(uap
->flags
& MS_SYSSPACE
)) {
815 nfs4_free_args(args
);
816 kmem_free(args
, sizeof (*args
));
821 if ((strlen(args
->knconf
->knc_protofmly
) >= KNC_STRSIZE
) ||
822 (strlen(args
->knconf
->knc_proto
) >= KNC_STRSIZE
)) {
823 if (!(uap
->flags
& MS_SYSSPACE
)) {
824 nfs4_free_args(args
);
825 kmem_free(args
, sizeof (*args
));
831 * Allocate a servinfo4 struct.
833 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
834 nfs_rw_init(&svp
->sv_lock
, NULL
, RW_DEFAULT
, NULL
);
836 svp_2ndlast
= svp_tail
;
837 svp_tail
->sv_next
= svp
;
844 svp
->sv_knconf
= args
->knconf
;
850 if (args
->addr
== NULL
|| args
->addr
->buf
== NULL
) {
855 svp
->sv_addr
.maxlen
= args
->addr
->maxlen
;
856 svp
->sv_addr
.len
= args
->addr
->len
;
857 svp
->sv_addr
.buf
= args
->addr
->buf
;
858 args
->addr
->buf
= NULL
;
861 * Get the root fhandle
863 if (args
->fh
== NULL
|| (strlen(args
->fh
) >= MAXPATHLEN
)) {
868 svp
->sv_path
= args
->fh
;
869 svp
->sv_pathlen
= strlen(args
->fh
) + 1;
873 * Get server's hostname
875 if (flags
& NFSMNT_HOSTNAME
) {
876 if (args
->hostname
== NULL
|| (strlen(args
->hostname
) >
881 svp
->sv_hostnamelen
= strlen(args
->hostname
) + 1;
882 svp
->sv_hostname
= args
->hostname
;
883 args
->hostname
= NULL
;
885 char *p
= "unknown-host";
886 svp
->sv_hostnamelen
= strlen(p
) + 1;
887 svp
->sv_hostname
= kmem_zalloc(svp
->sv_hostnamelen
, KM_SLEEP
);
888 (void) strcpy(svp
->sv_hostname
, p
);
892 * RDMA MOUNT SUPPORT FOR NFS v4.
893 * Establish, is it possible to use RDMA, if so overload the
894 * knconf with rdma specific knconf and free the orignal knconf.
896 if ((flags
& NFSMNT_TRYRDMA
) || (flags
& NFSMNT_DORDMA
)) {
898 * Determine the addr type for RDMA, IPv4 or v6.
900 if (strcmp(svp
->sv_knconf
->knc_protofmly
, NC_INET
) == 0)
902 else if (strcmp(svp
->sv_knconf
->knc_protofmly
, NC_INET6
) == 0)
903 addr_type
= AF_INET6
;
905 if (rdma_reachable(addr_type
, &svp
->sv_addr
,
906 &rdma_knconf
) == 0) {
908 * If successful, hijack the orignal knconf and
909 * replace with the new one, depending on the flags.
911 svp
->sv_origknconf
= svp
->sv_knconf
;
912 svp
->sv_knconf
= rdma_knconf
;
914 if (flags
& NFSMNT_TRYRDMA
) {
917 zcmn_err(getzoneid(), CE_WARN
,
918 "no RDMA onboard, revert\n");
922 if (flags
& NFSMNT_DORDMA
) {
924 * If proto=rdma is specified and no RDMA
925 * path to this server is avialable then
927 * This is not included in the mountable
928 * server list or the replica list.
929 * Check if more servers are specified;
930 * Failover case, otherwise bail out of mount.
932 if (args
->nfs_args_ext
== NFS_ARGS_EXTB
&&
933 args
->nfs_ext_u
.nfs_extB
.next
!= NULL
) {
935 args
->nfs_ext_u
.nfs_extB
.next
;
936 if (uap
->flags
& MS_RDONLY
&&
937 !(flags
& NFSMNT_SOFT
)) {
938 if (svp_head
->sv_next
== NULL
) {
944 svp_tail
= svp_2ndlast
;
945 svp_2ndlast
->sv_next
=
953 * This is the last server specified
954 * in the nfs_args list passed down
955 * and its not rdma capable.
957 if (svp_head
->sv_next
== NULL
) {
959 * Is this the only one
964 zcmn_err(getzoneid(),
971 * There is list, since some
972 * servers specified before
973 * this passed all requirements
975 svp_tail
= svp_2ndlast
;
976 svp_2ndlast
->sv_next
= NULL
;
986 * If there are syncaddr and netname data, load them in. This is
987 * to support data needed for NFSV4 when AUTH_DH is the negotiated
988 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
990 if (args
->flags
& NFSMNT_SECURE
) {
991 svp
->sv_dhsec
= create_authdh_data(args
->netname
,
992 strlen(args
->netname
),
993 args
->syncaddr
, svp
->sv_knconf
);
997 * Get the extention data which has the security data structure.
998 * This includes data for AUTH_SYS as well.
1000 if (flags
& NFSMNT_NEWARGS
) {
1001 switch (args
->nfs_args_ext
) {
1005 * Indicating the application is using the new
1006 * sec_data structure to pass in the security
1009 secdata
= args
->nfs_ext_u
.nfs_extA
.secdata
;
1010 if (secdata
== NULL
) {
1012 } else if (uap
->flags
& MS_SYSSPACE
) {
1014 * Need to validate the flavor here if
1015 * sysspace, userspace was already
1016 * validate from the nfs_copyin function.
1018 switch (secdata
->rpcflavor
) {
1030 args
->nfs_ext_u
.nfs_extA
.secdata
= NULL
;
1038 } else if (flags
& NFSMNT_SECURE
) {
1040 * NFSMNT_SECURE is deprecated but we keep it
1041 * to support the rogue user-generated application
1042 * that may use this undocumented interface to do
1043 * AUTH_DH security, e.g. our own rexd.
1045 * Also note that NFSMNT_SECURE is used for passing
1046 * AUTH_DH info to be used in negotiation.
1048 secdata
= create_authdh_data(args
->netname
,
1049 strlen(args
->netname
), args
->syncaddr
, svp
->sv_knconf
);
1052 secdata
= kmem_alloc(sizeof (*secdata
), KM_SLEEP
);
1053 secdata
->secmod
= secdata
->rpcflavor
= AUTH_SYS
;
1054 secdata
->data
= NULL
;
1057 svp
->sv_secdata
= secdata
;
1060 * User does not explictly specify a flavor, and a user
1061 * defined default flavor is passed down.
1063 if (flags
& NFSMNT_SECDEFAULT
) {
1064 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1065 svp
->sv_flags
|= SV4_TRYSECDEFAULT
;
1066 nfs_rw_exit(&svp
->sv_lock
);
1072 * We may have a linked list of nfs_args structures,
1073 * which means the user is looking for failover. If
1074 * the mount is either not "read-only" or "soft",
1075 * we want to bail out with EINVAL.
1077 if (args
->nfs_args_ext
== NFS_ARGS_EXTB
&&
1078 args
->nfs_ext_u
.nfs_extB
.next
!= NULL
) {
1079 if (uap
->flags
& MS_RDONLY
&& !(flags
& NFSMNT_SOFT
)) {
1080 data
= (char *)args
->nfs_ext_u
.nfs_extB
.next
;
1088 * Determine the zone we're being mounted into.
1090 zone_hold(mntzone
= zone
); /* start with this assumption */
1091 if (getzoneid() == GLOBAL_ZONEID
) {
1093 mntzone
= zone_find_by_path(refstr_value(vfsp
->vfs_mntpt
));
1094 ASSERT(mntzone
!= NULL
);
1095 if (mntzone
!= zone
) {
1102 * Stop the mount from going any further if the zone is going away.
1104 if (zone_status_get(mntzone
) >= ZONE_IS_SHUTTING_DOWN
) {
1113 error
= nfs4rootvp(&rtvp
, vfsp
, svp_head
, flags
, cr
, mntzone
);
1115 /* if nfs4rootvp failed, it will free svp_head */
1123 * Send client id to the server, if necessary
1125 nfs4_error_zinit(&n4e
);
1126 nfs4setclientid(mi
, cr
, FALSE
, &n4e
);
1134 * Set option fields in the mount info record
1137 if (svp_head
->sv_next
) {
1138 mutex_enter(&mi
->mi_lock
);
1139 mi
->mi_flags
|= MI4_LLOCK
;
1140 mutex_exit(&mi
->mi_lock
);
1142 error
= nfs4_setopts(rtvp
, DATAMODEL_NATIVE
, args
);
1147 * Time to tie in the mirror mount info at last!
1149 if (flags
& NFSMNT_EPHEMERAL
)
1150 error
= nfs4_record_ephemeral_mount(mi
, mvp
);
1156 if (rp
->r_flags
& R4HASHED
)
1160 nfs4_async_stop(vfsp
);
1161 nfs4_async_manager_stop(vfsp
);
1162 nfs4_remove_mi_from_server(mi
, NULL
);
1165 if (mntzone
!= NULL
)
1167 /* need to remove it from the zone */
1168 removed
= nfs4_mi_zonelist_remove(mi
);
1170 zone_rele_ref(&mi
->mi_zone_ref
,
1173 if (!(uap
->flags
& MS_SYSSPACE
) && args
) {
1174 nfs4_free_args(args
);
1175 kmem_free(args
, sizeof (*args
));
1183 if (!(uap
->flags
& MS_SYSSPACE
) && args
) {
1184 nfs4_free_args(args
);
1185 kmem_free(args
, sizeof (*args
));
1190 if (mntzone
!= NULL
)
1197 #define VERS_MSG "NFS4 server "
1199 #define VERS_MSG "NFS server "
1203 VERS_MSG "%s returned 0 for read transfer size"
1205 VERS_MSG "%s returned 0 for write transfer size"
1207 VERS_MSG "%s returned 0 for maximum file size"
1210 * Get the symbolic link text from the server for a given filehandle
1213 * (get symlink text) PUTFH READLINK
1216 getlinktext_otw(mntinfo4_t
*mi
, nfs_fh4
*fh
, char **linktextp
, cred_t
*cr
,
1219 COMPOUND4args_clnt args
;
1220 COMPOUND4res_clnt res
;
1222 nfs_argop4 argop
[2];
1224 READLINK4res
*lr_res
;
1226 bool_t needrecov
= FALSE
;
1227 nfs4_recov_state_t recov_state
;
1228 nfs4_sharedfh_t
*sfh
;
1230 int num_retry
= nfs4_max_mount_retry
;
1231 int recovery
= !(flags
& NFS4_GETFH_NEEDSOP
);
1233 sfh
= sfh4_get(fh
, mi
);
1234 recov_state
.rs_flags
= 0;
1235 recov_state
.rs_num_retry_despite_err
= 0;
1238 nfs4_error_zinit(&e
);
1242 args
.ctag
= TAG_GET_SYMLINK
;
1245 e
.error
= nfs4_start_op(mi
, NULL
, NULL
, &recov_state
);
1252 /* 0. putfh symlink fh */
1253 argop
[0].argop
= OP_CPUTFH
;
1254 argop
[0].nfs_argop4_u
.opcputfh
.sfh
= sfh
;
1257 argop
[1].argop
= OP_READLINK
;
1261 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, &e
);
1263 needrecov
= nfs4_needs_recovery(&e
, FALSE
, mi
->mi_vfsp
);
1265 if (needrecov
&& !recovery
&& num_retry
-- > 0) {
1267 NFS4_DEBUG(nfs4_client_recov_debug
, (CE_NOTE
,
1268 "getlinktext_otw: initiating recovery\n"));
1270 if (nfs4_start_recovery(&e
, mi
, NULL
, NULL
, NULL
, NULL
,
1271 OP_READLINK
, NULL
, NULL
, NULL
) == FALSE
) {
1272 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1274 (void) xdr_free(xdr_COMPOUND4res_clnt
,
1281 * If non-NFS4 pcol error and/or we weren't able to recover.
1285 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1291 e
.error
= geterrno4(res
.status
);
1292 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1294 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1299 /* res.status == NFS4_OK */
1300 ASSERT(res
.status
== NFS4_OK
);
1302 resop
= &res
.array
[1]; /* readlink res */
1303 lr_res
= &resop
->nfs_resop4_u
.opreadlink
;
1305 /* treat symlink name as data */
1306 *linktextp
= utf8_to_str((utf8string
*)&lr_res
->link
, &len
, NULL
);
1309 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, needrecov
);
1311 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1316 * Skip over consecutive slashes and "/./" in a pathname.
1319 pathname_skipslashdot(struct pathname
*pnp
)
1323 while (pnp
->pn_pathlen
> 0 && *pnp
->pn_path
== '/') {
1325 c1
= pnp
->pn_path
+ 1;
1326 c2
= pnp
->pn_path
+ 2;
1328 if (*c1
== '.' && (*c2
== '/' || *c2
== '\0')) {
1329 pnp
->pn_path
= pnp
->pn_path
+ 2; /* skip "/." */
1330 pnp
->pn_pathlen
= pnp
->pn_pathlen
- 2;
1339 * Resolve a symbolic link path. The symlink is in the nth component of
1340 * svp->sv_path and has an nfs4 file handle "fh".
1341 * Upon return, the sv_path will point to the new path that has the nth
1342 * component resolved to its symlink text.
1345 resolve_sympath(mntinfo4_t
*mi
, servinfo4_t
*svp
, int nth
, nfs_fh4
*fh
,
1346 cred_t
*cr
, int flags
)
1349 char *symlink
, *newpath
;
1350 struct pathname oldpn
, newpn
;
1351 char component
[MAXNAMELEN
];
1352 int i
, addlen
, error
= 0;
1355 /* Get the symbolic link text over the wire. */
1356 error
= getlinktext_otw(mi
, fh
, &symlink
, cr
, flags
);
1358 if (error
|| symlink
== NULL
|| strlen(symlink
) == 0)
1362 * Compose the new pathname.
1364 * - only the nth component is resolved for the pathname.
1365 * - pathname.pn_pathlen does not count the ending null byte.
1367 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1368 oldpath
= svp
->sv_path
;
1369 oldpathlen
= svp
->sv_pathlen
;
1370 if (error
= pn_get(oldpath
, UIO_SYSSPACE
, &oldpn
)) {
1371 nfs_rw_exit(&svp
->sv_lock
);
1372 kmem_free(symlink
, strlen(symlink
) + 1);
1375 nfs_rw_exit(&svp
->sv_lock
);
1379 * Skip over previous components from the oldpath so that the
1380 * oldpn.pn_path will point to the symlink component. Skip
1381 * leading slashes and "/./" (no OP_LOOKUP on ".") so that
1382 * pn_getcompnent can get the component.
1384 for (i
= 1; i
< nth
; i
++) {
1385 pathname_skipslashdot(&oldpn
);
1386 error
= pn_getcomponent(&oldpn
, component
);
1392 * Copy the old path upto the component right before the symlink
1393 * if the symlink is not an absolute path.
1395 if (symlink
[0] != '/') {
1396 addlen
= oldpn
.pn_path
- oldpn
.pn_buf
;
1397 bcopy(oldpn
.pn_buf
, newpn
.pn_path
, addlen
);
1398 newpn
.pn_pathlen
+= addlen
;
1399 newpn
.pn_path
+= addlen
;
1400 newpn
.pn_buf
[newpn
.pn_pathlen
] = '/';
1405 /* copy the resolved symbolic link text */
1406 addlen
= strlen(symlink
);
1407 if (newpn
.pn_pathlen
+ addlen
>= newpn
.pn_bufsize
) {
1408 error
= ENAMETOOLONG
;
1411 bcopy(symlink
, newpn
.pn_path
, addlen
);
1412 newpn
.pn_pathlen
+= addlen
;
1413 newpn
.pn_path
+= addlen
;
1416 * Check if there is any remaining path after the symlink component.
1417 * First, skip the symlink component.
1419 pathname_skipslashdot(&oldpn
);
1420 if (error
= pn_getcomponent(&oldpn
, component
))
1423 addlen
= pn_pathleft(&oldpn
); /* includes counting the slash */
1426 * Copy the remaining path to the new pathname if there is any.
1429 if (newpn
.pn_pathlen
+ addlen
>= newpn
.pn_bufsize
) {
1430 error
= ENAMETOOLONG
;
1433 bcopy(oldpn
.pn_path
, newpn
.pn_path
, addlen
);
1434 newpn
.pn_pathlen
+= addlen
;
1436 newpn
.pn_buf
[newpn
.pn_pathlen
] = '\0';
1438 /* get the newpath and store it in the servinfo4_t */
1439 newpath
= kmem_alloc(newpn
.pn_pathlen
+ 1, KM_SLEEP
);
1440 bcopy(newpn
.pn_buf
, newpath
, newpn
.pn_pathlen
);
1441 newpath
[newpn
.pn_pathlen
] = '\0';
1443 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1444 svp
->sv_path
= newpath
;
1445 svp
->sv_pathlen
= strlen(newpath
) + 1;
1446 nfs_rw_exit(&svp
->sv_lock
);
1448 kmem_free(oldpath
, oldpathlen
);
1450 kmem_free(symlink
, strlen(symlink
) + 1);
1458 * This routine updates servinfo4 structure with the new referred server
1460 * nfsfsloc has the location related information
1461 * fsp has the hostname and pathname info.
1462 * new path = pathname from referral + part of orig pathname(based on nth).
1465 update_servinfo4(servinfo4_t
*svp
, fs_location4
*fsp
,
1466 struct nfs_fsl_info
*nfsfsloc
, char *orig_path
, int nth
)
1468 struct knetconfig
*knconf
, *svknconf
;
1469 struct netbuf
*saddr
;
1470 sec_data_t
*secdata
;
1472 int i
= 0, num_slashes
= 0;
1473 char *p
, *spath
, *op
, *new_path
;
1476 knconf
= svp
->sv_knconf
;
1477 free_knconf_contents(knconf
);
1478 bzero(knconf
, sizeof (struct knetconfig
));
1479 svknconf
= nfsfsloc
->knconf
;
1480 knconf
->knc_semantics
= svknconf
->knc_semantics
;
1481 knconf
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1482 knconf
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
1483 knconf
->knc_rdev
= svknconf
->knc_rdev
;
1484 bcopy(svknconf
->knc_protofmly
, knconf
->knc_protofmly
, KNC_STRSIZE
);
1485 bcopy(svknconf
->knc_proto
, knconf
->knc_proto
, KNC_STRSIZE
);
1487 /* Update server address */
1488 saddr
= &svp
->sv_addr
;
1489 if (saddr
->buf
!= NULL
)
1490 kmem_free(saddr
->buf
, saddr
->maxlen
);
1491 saddr
->buf
= kmem_alloc(nfsfsloc
->addr
->maxlen
, KM_SLEEP
);
1492 saddr
->len
= nfsfsloc
->addr
->len
;
1493 saddr
->maxlen
= nfsfsloc
->addr
->maxlen
;
1494 bcopy(nfsfsloc
->addr
->buf
, saddr
->buf
, nfsfsloc
->addr
->len
);
1496 /* Update server name */
1497 host
= fsp
->server_val
;
1498 kmem_free(svp
->sv_hostname
, svp
->sv_hostnamelen
);
1499 svp
->sv_hostname
= kmem_zalloc(host
->utf8string_len
+ 1, KM_SLEEP
);
1500 bcopy(host
->utf8string_val
, svp
->sv_hostname
, host
->utf8string_len
);
1501 svp
->sv_hostname
[host
->utf8string_len
] = '\0';
1502 svp
->sv_hostnamelen
= host
->utf8string_len
+ 1;
1505 * Update server path.
1506 * We need to setup proper path here.
1507 * For ex., If we got a path name serv1:/rp/aaa/bbb
1508 * where aaa is a referral and points to serv2:/rpool/aa
1509 * we need to set the path to serv2:/rpool/aa/bbb
1510 * The first part of this below code generates /rpool/aa
1511 * and the second part appends /bbb to the server path.
1513 spath
= p
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1515 for (i
= 0; i
< fsp
->rootpath
.pathname4_len
; i
++) {
1518 comp
= &fsp
->rootpath
.pathname4_val
[i
];
1519 /* If no space, null the string and bail */
1520 if ((p
- spath
) + comp
->utf8string_len
+ 1 > MAXPATHLEN
) {
1521 p
= spath
+ MAXPATHLEN
- 1;
1525 bcopy(comp
->utf8string_val
, p
, comp
->utf8string_len
);
1526 p
+= comp
->utf8string_len
;
1529 if (fsp
->rootpath
.pathname4_len
!= 0)
1535 new_path
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
1536 (void) strlcpy(new_path
, p
, MAXPATHLEN
);
1537 kmem_free(p
, MAXPATHLEN
);
1538 i
= strlen(new_path
);
1540 for (op
= orig_path
; *op
; op
++) {
1543 if (num_slashes
== nth
+ 2) {
1544 while (*op
!= '\0') {
1554 kmem_free(svp
->sv_path
, svp
->sv_pathlen
);
1555 svp
->sv_pathlen
= strlen(new_path
) + 1;
1556 svp
->sv_path
= kmem_alloc(svp
->sv_pathlen
, KM_SLEEP
);
1557 bcopy(new_path
, svp
->sv_path
, svp
->sv_pathlen
);
1558 kmem_free(new_path
, MAXPATHLEN
);
1561 * All the security data is specific to old server.
1562 * Clean it up except secdata which deals with mount options.
1563 * We need to inherit that data. Copy secdata into our new servinfo4.
1565 if (svp
->sv_dhsec
) {
1566 sec_clnt_freeinfo(svp
->sv_dhsec
);
1567 svp
->sv_dhsec
= NULL
;
1569 if (svp
->sv_save_secinfo
&&
1570 svp
->sv_save_secinfo
!= svp
->sv_secinfo
) {
1571 secinfo_free(svp
->sv_save_secinfo
);
1572 svp
->sv_save_secinfo
= NULL
;
1574 if (svp
->sv_secinfo
) {
1575 secinfo_free(svp
->sv_secinfo
);
1576 svp
->sv_secinfo
= NULL
;
1578 svp
->sv_currsec
= NULL
;
1580 secdata
= kmem_alloc(sizeof (*secdata
), KM_SLEEP
);
1581 *secdata
= *svp
->sv_secdata
;
1582 secdata
->data
= NULL
;
1583 if (svp
->sv_secdata
) {
1584 sec_clnt_freeinfo(svp
->sv_secdata
);
1585 svp
->sv_secdata
= NULL
;
1587 svp
->sv_secdata
= secdata
;
1591 * Resolve a referral. The referral is in the n+1th component of
1592 * svp->sv_path and has a parent nfs4 file handle "fh".
1593 * Upon return, the sv_path will point to the new path that has referral
1594 * component resolved to its referred path and part of original path.
1595 * Hostname and other address information is also updated.
1598 resolve_referral(mntinfo4_t
*mi
, servinfo4_t
*svp
, cred_t
*cr
, int nth
,
1601 nfs4_sharedfh_t
*sfh
;
1602 struct nfs_fsl_info nfsfsloc
;
1604 COMPOUND4res_clnt callres
;
1606 char *nm
, *orig_path
;
1607 int orig_pathlen
= 0, ret
= -1, index
;
1609 if (svp
->sv_pathlen
<= 0)
1612 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1613 orig_pathlen
= svp
->sv_pathlen
;
1614 orig_path
= kmem_alloc(orig_pathlen
, KM_SLEEP
);
1615 bcopy(svp
->sv_path
, orig_path
, orig_pathlen
);
1616 nm
= extract_referral_point(svp
->sv_path
, nth
);
1617 setup_newsvpath(svp
, nth
);
1618 nfs_rw_exit(&svp
->sv_lock
);
1620 sfh
= sfh4_get(fh
, mi
);
1621 index
= nfs4_process_referral(mi
, sfh
, nm
, cr
,
1622 &garp
, &callres
, &nfsfsloc
);
1624 kmem_free(nm
, MAXPATHLEN
);
1626 kmem_free(orig_path
, orig_pathlen
);
1630 fsp
= &garp
.n4g_ext_res
->n4g_fslocations
.locations_val
[index
];
1631 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
1632 update_servinfo4(svp
, fsp
, &nfsfsloc
, orig_path
, nth
);
1633 nfs_rw_exit(&svp
->sv_lock
);
1635 mutex_enter(&mi
->mi_lock
);
1636 mi
->mi_vfs_referral_loop_cnt
++;
1637 mutex_exit(&mi
->mi_lock
);
1641 /* Free up XDR memory allocated in nfs4_process_referral() */
1642 xdr_free(xdr_nfs_fsl_info
, (char *)&nfsfsloc
);
1643 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&callres
);
1644 kmem_free(orig_path
, orig_pathlen
);
1650 * Get the root filehandle for the given filesystem and server, and update
1653 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop
1654 * to coordinate with recovery. Otherwise, the caller is assumed to be
1655 * the recovery thread or have already done a start_fop.
1657 * Errors are returned by the nfs4_error_t parameter.
1660 nfs4getfh_otw(struct mntinfo4
*mi
, servinfo4_t
*svp
, vtype_t
*vtp
,
1661 int flags
, cred_t
*cr
, nfs4_error_t
*ep
)
1663 COMPOUND4args_clnt args
;
1664 COMPOUND4res_clnt res
;
1668 nfs4_ga_res_t
*garp
;
1670 lookup4_param_t lookuparg
;
1673 bool_t needrecov
= FALSE
;
1674 nfs4_recov_state_t recov_state
;
1677 int recovery
= !(flags
& NFS4_GETFH_NEEDSOP
);
1679 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1680 ASSERT(svp
->sv_path
!= NULL
);
1681 if (svp
->sv_path
[0] == '\0') {
1682 nfs_rw_exit(&svp
->sv_lock
);
1683 nfs4_error_init(ep
, EINVAL
);
1686 nfs_rw_exit(&svp
->sv_lock
);
1688 recov_state
.rs_flags
= 0;
1689 recov_state
.rs_num_retry_despite_err
= 0;
1692 if (mi
->mi_vfs_referral_loop_cnt
>= NFS4_REFERRAL_LOOP_MAX
) {
1693 DTRACE_PROBE3(nfs4clnt__debug__referral__loop
, mntinfo4
*,
1694 mi
, servinfo4_t
*, svp
, char *, "nfs4getfh_otw");
1695 nfs4_error_init(ep
, EINVAL
);
1698 nfs4_error_zinit(ep
);
1701 ep
->error
= nfs4_start_fop(mi
, NULL
, NULL
, OH_MOUNT
,
1702 &recov_state
, NULL
);
1705 * If recovery has been started and this request as
1706 * initiated by a mount, then we must wait for recovery
1707 * to finish before proceeding, otherwise, the error
1708 * cleanup would remove data structures needed by the
1712 mutex_enter(&mi
->mi_lock
);
1713 if (mi
->mi_flags
& MI4_MOUNTING
) {
1714 mi
->mi_flags
|= MI4_RECOV_FAIL
;
1717 NFS4_DEBUG(nfs4_client_recov_debug
, (CE_NOTE
,
1718 "nfs4getfh_otw: waiting 4 recovery\n"));
1720 while (mi
->mi_flags
& MI4_RECOV_ACTIV
)
1721 cv_wait(&mi
->mi_failover_cv
,
1724 mutex_exit(&mi
->mi_lock
);
1729 * If the client does not specify a specific flavor to use
1730 * and has not gotten a secinfo list from the server yet,
1731 * retrieve the secinfo list from the server and use a
1732 * flavor from the list to mount.
1734 * If fail to get the secinfo list from the server, then
1735 * try the default flavor.
1737 if ((svp
->sv_flags
& SV4_TRYSECDEFAULT
) &&
1738 svp
->sv_secinfo
== NULL
) {
1739 (void) nfs4_secinfo_path(mi
, cr
, FALSE
);
1744 args
.ctag
= TAG_REMAP_MOUNT
;
1746 args
.ctag
= TAG_MOUNT
;
1748 lookuparg
.l4_getattrs
= LKP4_ALL_ATTRIBUTES
;
1749 lookuparg
.argsp
= &args
;
1750 lookuparg
.resp
= &res
;
1751 lookuparg
.header_len
= 2; /* Putrootfh, getfh */
1752 lookuparg
.trailer_len
= 0;
1753 lookuparg
.ga_bits
= FATTR4_FSINFO_MASK
;
1756 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
1757 ASSERT(svp
->sv_path
!= NULL
);
1758 llndx
= nfs4lookup_setup(svp
->sv_path
, &lookuparg
, 0);
1759 nfs_rw_exit(&svp
->sv_lock
);
1762 num_argops
= args
.array_len
;
1764 /* choose public or root filehandle */
1765 if (flags
& NFS4_GETFH_PUBLIC
)
1766 argop
[0].argop
= OP_PUTPUBFH
;
1768 argop
[0].argop
= OP_PUTROOTFH
;
1771 argop
[1].argop
= OP_GETFH
;
1773 NFS4_DEBUG(nfs4_client_call_debug
, (CE_NOTE
,
1774 "nfs4getfh_otw: %s call, mi 0x%p",
1775 needrecov
? "recov" : "first", (void *)mi
));
1777 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, RFSCALL_SOFT
, ep
);
1779 needrecov
= nfs4_needs_recovery(ep
, FALSE
, mi
->mi_vfsp
);
1785 nfs4args_lookup_free(argop
, num_argops
);
1787 lookuparg
.arglen
* sizeof (nfs_argop4
));
1789 (void) xdr_free(xdr_COMPOUND4res_clnt
,
1794 NFS4_DEBUG(nfs4_client_recov_debug
,
1795 (CE_NOTE
, "nfs4getfh_otw: initiating recovery\n"));
1797 abort
= nfs4_start_recovery(ep
, mi
, NULL
,
1798 NULL
, NULL
, NULL
, OP_GETFH
, NULL
, NULL
, NULL
);
1800 ep
->error
= geterrno4(res
.status
);
1801 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1803 nfs4args_lookup_free(argop
, num_argops
);
1804 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1805 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
, needrecov
);
1806 /* have another go? */
1813 * No recovery, but check if error is set.
1816 nfs4args_lookup_free(argop
, num_argops
);
1817 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1819 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1826 /* for non-recovery errors */
1827 if (res
.status
&& res
.status
!= NFS4ERR_SYMLINK
&&
1828 res
.status
!= NFS4ERR_MOVED
) {
1830 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1833 nfs4args_lookup_free(argop
, num_argops
);
1834 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1835 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1840 * If any intermediate component in the path is a symbolic link,
1841 * resolve the symlink, then try mount again using the new path.
1843 if (res
.status
== NFS4ERR_SYMLINK
|| res
.status
== NFS4ERR_MOVED
) {
1847 * Need to call nfs4_end_op before resolve_sympath to avoid
1848 * potential nfs4_start_op deadlock.
1851 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1855 * This must be from OP_LOOKUP failure. The (cfh) for this
1856 * OP_LOOKUP is a symlink node. Found out where the
1857 * OP_GETFH is for the (cfh) that is a symlink node.
1860 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR,
1861 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR
1863 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink.
1864 * In this case, where = 7, nthcomp = 2.
1866 where
= res
.array_len
- 2;
1869 if (res
.status
== NFS4ERR_SYMLINK
) {
1871 resop
= &res
.array
[where
- 1];
1872 ASSERT(resop
->resop
== OP_GETFH
);
1873 tmpfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1874 nthcomp
= res
.array_len
/3 - 1;
1875 ep
->error
= resolve_sympath(mi
, svp
, nthcomp
,
1878 } else if (res
.status
== NFS4ERR_MOVED
) {
1880 resop
= &res
.array
[where
- 2];
1881 ASSERT(resop
->resop
== OP_GETFH
);
1882 tmpfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1883 nthcomp
= res
.array_len
/3 - 1;
1884 ep
->error
= resolve_referral(mi
, svp
, cr
, nthcomp
,
1888 nfs4args_lookup_free(argop
, num_argops
);
1889 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1890 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1899 resop
= &res
.array
[res
.array_len
- 2];
1900 ASSERT(resop
->resop
== OP_GETFH
);
1901 resfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
1903 /* getattr fsinfo res */
1905 garp
= &resop
->nfs_resop4_u
.opgetattr
.ga_res
;
1907 *vtp
= garp
->n4g_va
.va_type
;
1909 mi
->mi_fh_expire_type
= garp
->n4g_ext_res
->n4g_fet
;
1911 mutex_enter(&mi
->mi_lock
);
1912 if (garp
->n4g_ext_res
->n4g_pc4
.pc4_link_support
)
1913 mi
->mi_flags
|= MI4_LINK
;
1914 if (garp
->n4g_ext_res
->n4g_pc4
.pc4_symlink_support
)
1915 mi
->mi_flags
|= MI4_SYMLINK
;
1916 if (garp
->n4g_ext_res
->n4g_suppattrs
& FATTR4_ACL_MASK
)
1917 mi
->mi_flags
|= MI4_ACL
;
1918 mutex_exit(&mi
->mi_lock
);
1920 if (garp
->n4g_ext_res
->n4g_maxread
== 0)
1922 MIN(MAXBSIZE
, mi
->mi_tsize
);
1925 MIN(garp
->n4g_ext_res
->n4g_maxread
,
1928 if (garp
->n4g_ext_res
->n4g_maxwrite
== 0)
1930 MIN(MAXBSIZE
, mi
->mi_stsize
);
1933 MIN(garp
->n4g_ext_res
->n4g_maxwrite
,
1936 if (garp
->n4g_ext_res
->n4g_maxfilesize
!= 0)
1937 mi
->mi_maxfilesize
=
1938 MIN(garp
->n4g_ext_res
->n4g_maxfilesize
,
1939 mi
->mi_maxfilesize
);
1942 * If the final component is a a symbolic link, resolve the symlink,
1943 * then try mount again using the new path.
1945 * Assume no symbolic link for root filesysm "/".
1949 * nthcomp is the total result length minus
1950 * the 1st 2 OPs (PUTROOTFH, GETFH),
1951 * then divided by 3 (LOOKUP,GETFH,GETATTR)
1953 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR
1954 * LOOKUP 2nd-comp GETFH GETATTR
1958 nthcomp
= (res
.array_len
- 2)/3;
1961 * Need to call nfs4_end_op before resolve_sympath to avoid
1962 * potential nfs4_start_op deadlock. See RFE 4777612.
1965 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
,
1968 ep
->error
= resolve_sympath(mi
, svp
, nthcomp
, resfhp
, cr
,
1971 nfs4args_lookup_free(argop
, num_argops
);
1972 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
1973 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1982 * We need to figure out where in the compound the getfh
1983 * for the parent directory is. If the object to be mounted is
1984 * the root, then there is no lookup at all:
1986 * If the object to be mounted is in the root, then the compound is:
1987 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR.
1988 * In either of these cases, the index of the GETFH is 1.
1989 * If it is not at the root, then it's something like:
1990 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR,
1991 * LOOKUP, GETFH, GETATTR
1992 * In this case, the index is llndx (last lookup index) - 2.
1994 if (llndx
== -1 || llndx
== 2)
1995 resop
= &res
.array
[1];
1998 resop
= &res
.array
[llndx
-2];
2001 ASSERT(resop
->resop
== OP_GETFH
);
2002 tmpfhp
= &resop
->nfs_resop4_u
.opgetfh
.object
;
2004 /* save the filehandles for the replica */
2005 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2006 ASSERT(tmpfhp
->nfs_fh4_len
<= NFS4_FHSIZE
);
2007 svp
->sv_pfhandle
.fh_len
= tmpfhp
->nfs_fh4_len
;
2008 bcopy(tmpfhp
->nfs_fh4_val
, svp
->sv_pfhandle
.fh_buf
,
2009 tmpfhp
->nfs_fh4_len
);
2010 ASSERT(resfhp
->nfs_fh4_len
<= NFS4_FHSIZE
);
2011 svp
->sv_fhandle
.fh_len
= resfhp
->nfs_fh4_len
;
2012 bcopy(resfhp
->nfs_fh4_val
, svp
->sv_fhandle
.fh_buf
, resfhp
->nfs_fh4_len
);
2014 /* initialize fsid and supp_attrs for server fs */
2015 svp
->sv_fsid
= garp
->n4g_fsid
;
2016 svp
->sv_supp_attrs
=
2017 garp
->n4g_ext_res
->n4g_suppattrs
| FATTR4_MANDATTR_MASK
;
2019 nfs_rw_exit(&svp
->sv_lock
);
2020 nfs4args_lookup_free(argop
, num_argops
);
2021 kmem_free(argop
, lookuparg
.arglen
* sizeof (nfs_argop4
));
2022 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
2024 nfs4_end_fop(mi
, NULL
, NULL
, OH_MOUNT
, &recov_state
, needrecov
);
2028 * Save a copy of Servinfo4_t structure.
2029 * We might need when there is a failure in getting file handle
2030 * in case of a referral to replace servinfo4 struct and try again.
2032 static struct servinfo4
*
2033 copy_svp(servinfo4_t
*nsvp
)
2035 servinfo4_t
*svp
= NULL
;
2036 struct knetconfig
*sknconf
, *tknconf
;
2037 struct netbuf
*saddr
, *taddr
;
2039 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
2040 nfs_rw_init(&svp
->sv_lock
, NULL
, RW_DEFAULT
, NULL
);
2041 svp
->sv_flags
= nsvp
->sv_flags
;
2042 svp
->sv_fsid
= nsvp
->sv_fsid
;
2043 svp
->sv_hostnamelen
= nsvp
->sv_hostnamelen
;
2044 svp
->sv_pathlen
= nsvp
->sv_pathlen
;
2045 svp
->sv_supp_attrs
= nsvp
->sv_supp_attrs
;
2047 svp
->sv_path
= kmem_alloc(svp
->sv_pathlen
, KM_SLEEP
);
2048 svp
->sv_hostname
= kmem_alloc(svp
->sv_hostnamelen
, KM_SLEEP
);
2049 bcopy(nsvp
->sv_hostname
, svp
->sv_hostname
, svp
->sv_hostnamelen
);
2050 bcopy(nsvp
->sv_path
, svp
->sv_path
, svp
->sv_pathlen
);
2052 saddr
= &nsvp
->sv_addr
;
2053 taddr
= &svp
->sv_addr
;
2054 taddr
->maxlen
= saddr
->maxlen
;
2055 taddr
->len
= saddr
->len
;
2056 if (saddr
->len
> 0) {
2057 taddr
->buf
= kmem_zalloc(saddr
->maxlen
, KM_SLEEP
);
2058 bcopy(saddr
->buf
, taddr
->buf
, saddr
->len
);
2061 svp
->sv_knconf
= kmem_zalloc(sizeof (struct knetconfig
), KM_SLEEP
);
2062 sknconf
= nsvp
->sv_knconf
;
2063 tknconf
= svp
->sv_knconf
;
2064 tknconf
->knc_semantics
= sknconf
->knc_semantics
;
2065 tknconf
->knc_rdev
= sknconf
->knc_rdev
;
2066 if (sknconf
->knc_proto
!= NULL
) {
2067 tknconf
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
2068 bcopy(sknconf
->knc_proto
, (char *)tknconf
->knc_proto
,
2071 if (sknconf
->knc_protofmly
!= NULL
) {
2072 tknconf
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
2073 bcopy(sknconf
->knc_protofmly
, (char *)tknconf
->knc_protofmly
,
2077 if (nsvp
->sv_origknconf
!= NULL
) {
2078 svp
->sv_origknconf
= kmem_zalloc(sizeof (struct knetconfig
),
2080 sknconf
= nsvp
->sv_origknconf
;
2081 tknconf
= svp
->sv_origknconf
;
2082 tknconf
->knc_semantics
= sknconf
->knc_semantics
;
2083 tknconf
->knc_rdev
= sknconf
->knc_rdev
;
2084 if (sknconf
->knc_proto
!= NULL
) {
2085 tknconf
->knc_proto
= kmem_zalloc(KNC_STRSIZE
, KM_SLEEP
);
2086 bcopy(sknconf
->knc_proto
, (char *)tknconf
->knc_proto
,
2089 if (sknconf
->knc_protofmly
!= NULL
) {
2090 tknconf
->knc_protofmly
= kmem_zalloc(KNC_STRSIZE
,
2092 bcopy(sknconf
->knc_protofmly
,
2093 (char *)tknconf
->knc_protofmly
, KNC_STRSIZE
);
2097 svp
->sv_secdata
= copy_sec_data(nsvp
->sv_secdata
);
2098 svp
->sv_dhsec
= copy_sec_data(svp
->sv_dhsec
);
2100 * Rest of the security information is not copied as they are built
2101 * with the information available from secdata and dhsec.
2103 svp
->sv_next
= NULL
;
2109 restore_svp(mntinfo4_t
*mi
, servinfo4_t
*svp
, servinfo4_t
*origsvp
)
2111 servinfo4_t
*srvnext
, *tmpsrv
;
2113 if (strcmp(svp
->sv_hostname
, origsvp
->sv_hostname
) != 0) {
2115 * Since the hostname changed, we must be dealing
2116 * with a referral, and the lookup failed. We will
2117 * restore the whole servinfo4_t to what it was before.
2119 srvnext
= svp
->sv_next
;
2120 svp
->sv_next
= NULL
;
2121 tmpsrv
= copy_svp(origsvp
);
2124 svp
->sv_next
= srvnext
;
2125 mutex_enter(&mi
->mi_lock
);
2126 mi
->mi_servers
= svp
;
2127 mi
->mi_curr_serv
= svp
;
2128 mutex_exit(&mi
->mi_lock
);
2130 } else if (origsvp
->sv_pathlen
!= svp
->sv_pathlen
) {
2133 * For symlink case: restore original path because
2134 * it might have contained symlinks that were
2135 * expanded by nfsgetfh_otw before the failure occurred.
2137 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2138 kmem_free(svp
->sv_path
, svp
->sv_pathlen
);
2140 kmem_alloc(origsvp
->sv_pathlen
, KM_SLEEP
);
2141 svp
->sv_pathlen
= origsvp
->sv_pathlen
;
2142 bcopy(origsvp
->sv_path
, svp
->sv_path
,
2143 origsvp
->sv_pathlen
);
2144 nfs_rw_exit(&svp
->sv_lock
);
2149 static ushort_t nfs4_max_threads
= 8; /* max number of active async threads */
2150 uint_t nfs4_bsize
= 32 * 1024; /* client `block' size */
2151 static uint_t nfs4_async_clusters
= 1; /* # of reqs from each async queue */
2152 static uint_t nfs4_cots_timeo
= NFS_COTS_TIMEO
;
2155 * Remap the root filehandle for the given filesystem.
2157 * results returned via the nfs4_error_t parameter.
2160 nfs4_remap_root(mntinfo4_t
*mi
, nfs4_error_t
*ep
, int flags
)
2162 struct servinfo4
*svp
, *origsvp
;
2168 mutex_enter(&mi
->mi_lock
);
2171 svp
= mi
->mi_curr_serv
;
2173 (flags
& NFS4_REMAP_NEEDSOP
) ? NFS4_GETFH_NEEDSOP
: 0;
2175 (mi
->mi_flags
& MI4_PUBLIC
) ? NFS4_GETFH_PUBLIC
: 0;
2176 mutex_exit(&mi
->mi_lock
);
2179 * Just in case server path being mounted contains
2180 * symlinks and fails w/STALE, save the initial sv_path
2181 * so we can redrive the initial mount compound with the
2182 * initial sv_path -- not a symlink-expanded version.
2184 * This could only happen if a symlink was expanded
2185 * and the expanded mount compound failed stale. Because
2186 * it could be the case that the symlink was removed at
2187 * the server (and replaced with another symlink/dir,
2188 * we need to use the initial sv_path when attempting
2189 * to re-lookup everything and recover.
2191 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2192 origsvp
= copy_svp(svp
);
2193 nfs_rw_exit(&svp
->sv_lock
);
2195 num_retry
= nfs4_max_mount_retry
;
2199 * Get the root fh from the server. Retry nfs4_max_mount_retry
2200 * (2) times if it fails with STALE since the recovery
2201 * infrastructure doesn't do STALE recovery for components
2202 * of the server path to the object being mounted.
2204 nfs4getfh_otw(mi
, svp
, &vtype
, getfh_flags
, CRED(), ep
);
2206 if (ep
->error
== 0 && ep
->stat
== NFS4_OK
)
2210 * For some reason, the mount compound failed. Before
2211 * retrying, we need to restore original conditions.
2213 svp
= restore_svp(mi
, svp
, origsvp
);
2215 } while (num_retry
-- > 0);
2219 if (ep
->error
!= 0 || ep
->stat
!= 0) {
2223 if (vtype
!= VNON
&& vtype
!= mi
->mi_type
) {
2224 /* shouldn't happen */
2225 zcmn_err(mi
->mi_zone
->zone_id
, CE_WARN
,
2226 "nfs4_remap_root: server root vnode type (%d) doesn't "
2227 "match mount info (%d)", vtype
, mi
->mi_type
);
2230 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2231 rootfh
.nfs_fh4_val
= svp
->sv_fhandle
.fh_buf
;
2232 rootfh
.nfs_fh4_len
= svp
->sv_fhandle
.fh_len
;
2233 nfs_rw_exit(&svp
->sv_lock
);
2234 sfh4_update(mi
->mi_rootfh
, &rootfh
);
2237 * It's possible that recovery took place on the filesystem
2238 * and the server has been updated between the time we did
2239 * the nfs4getfh_otw and now. Re-drive the otw operation
2240 * to make sure we have a good fh.
2242 mutex_enter(&mi
->mi_lock
);
2243 if (mi
->mi_curr_serv
!= svp
)
2246 mutex_exit(&mi
->mi_lock
);
2250 nfs4rootvp(vnode_t
**rtvpp
, vfs_t
*vfsp
, struct servinfo4
*svp_head
,
2251 int flags
, cred_t
*cr
, zone_t
*zone
)
2253 vnode_t
*rtvp
= NULL
;
2260 vtype_t vtype
= VNON
;
2261 vtype_t tmp_vtype
= VNON
;
2262 struct servinfo4
*firstsvp
= NULL
, *svp
= svp_head
;
2263 nfs4_oo_hash_bucket_t
*bucketp
;
2265 char *droptext
= "";
2266 struct nfs_stats
*nfsstatsp
;
2267 nfs4_fname_t
*mfname
;
2269 int num_retry
, removed
;
2270 cred_t
*lcr
= NULL
, *tcr
= cr
;
2271 struct servinfo4
*origsvp
;
2274 nfsstatsp
= zone_getspecific(nfsstat_zone_key
, nfs_zone());
2275 ASSERT(nfsstatsp
!= NULL
);
2277 ASSERT(nfs_zone() == zone
);
2278 ASSERT(crgetref(cr
));
2281 * Create a mount record and link it to the vfs struct.
2283 mi
= kmem_zalloc(sizeof (*mi
), KM_SLEEP
);
2284 mutex_init(&mi
->mi_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2285 nfs_rw_init(&mi
->mi_recovlock
, NULL
, RW_DEFAULT
, NULL
);
2286 nfs_rw_init(&mi
->mi_rename_lock
, NULL
, RW_DEFAULT
, NULL
);
2287 nfs_rw_init(&mi
->mi_fh_lock
, NULL
, RW_DEFAULT
, NULL
);
2289 if (!(flags
& NFSMNT_SOFT
))
2290 mi
->mi_flags
|= MI4_HARD
;
2291 if ((flags
& NFSMNT_NOPRINT
))
2292 mi
->mi_flags
|= MI4_NOPRINT
;
2293 if (flags
& NFSMNT_INT
)
2294 mi
->mi_flags
|= MI4_INT
;
2295 if (flags
& NFSMNT_PUBLIC
)
2296 mi
->mi_flags
|= MI4_PUBLIC
;
2297 if (flags
& NFSMNT_MIRRORMOUNT
)
2298 mi
->mi_flags
|= MI4_MIRRORMOUNT
;
2299 if (flags
& NFSMNT_REFERRAL
)
2300 mi
->mi_flags
|= MI4_REFERRAL
;
2301 mi
->mi_retrans
= NFS_RETRIES
;
2302 if (svp
->sv_knconf
->knc_semantics
== NC_TPI_COTS_ORD
||
2303 svp
->sv_knconf
->knc_semantics
== NC_TPI_COTS
)
2304 mi
->mi_timeo
= nfs4_cots_timeo
;
2306 mi
->mi_timeo
= NFS_TIMEO
;
2307 mi
->mi_prog
= NFS_PROGRAM
;
2308 mi
->mi_vers
= NFS_V4
;
2309 mi
->mi_rfsnames
= rfsnames_v4
;
2310 mi
->mi_reqs
= nfsstatsp
->nfs_stats_v4
.rfsreqcnt_ptr
;
2311 cv_init(&mi
->mi_failover_cv
, NULL
, CV_DEFAULT
, NULL
);
2312 mi
->mi_servers
= svp
;
2313 mi
->mi_curr_serv
= svp
;
2314 mi
->mi_acregmin
= SEC2HR(ACREGMIN
);
2315 mi
->mi_acregmax
= SEC2HR(ACREGMAX
);
2316 mi
->mi_acdirmin
= SEC2HR(ACDIRMIN
);
2317 mi
->mi_acdirmax
= SEC2HR(ACDIRMAX
);
2318 mi
->mi_fh_expire_type
= FH4_PERSISTENT
;
2319 mi
->mi_clientid_next
= NULL
;
2320 mi
->mi_clientid_prev
= NULL
;
2322 mi
->mi_grace_wait
= 0;
2324 mi
->mi_srvsettime
= 0;
2325 mi
->mi_srvset_cnt
= 0;
2329 mi
->mi_tsize
= nfs4_tsize(svp
->sv_knconf
);
2330 mi
->mi_stsize
= mi
->mi_tsize
;
2332 if (flags
& NFSMNT_DIRECTIO
)
2333 mi
->mi_flags
|= MI4_DIRECTIO
;
2335 mi
->mi_flags
|= MI4_MOUNTING
;
2338 * Make a vfs struct for nfs. We do this here instead of below
2339 * because rtvp needs a vfs before we can do a getattr on it.
2341 * Assign a unique device id to the mount
2343 mutex_enter(&nfs_minor_lock
);
2345 nfs_minor
= (nfs_minor
+ 1) & MAXMIN32
;
2346 nfs_dev
= makedevice(nfs_major
, nfs_minor
);
2347 } while (vfs_devismounted(nfs_dev
));
2348 mutex_exit(&nfs_minor_lock
);
2350 vfsp
->vfs_dev
= nfs_dev
;
2351 vfs_make_fsid(&vfsp
->vfs_fsid
, nfs_dev
, nfs4fstyp
);
2352 vfsp
->vfs_data
= (caddr_t
)mi
;
2353 vfsp
->vfs_fstype
= nfsfstyp
;
2354 vfsp
->vfs_bsize
= nfs4_bsize
;
2357 * Initialize fields used to support async putpage operations.
2359 for (i
= 0; i
< NFS4_ASYNC_TYPES
; i
++)
2360 mi
->mi_async_clusters
[i
] = nfs4_async_clusters
;
2361 mi
->mi_async_init_clusters
= nfs4_async_clusters
;
2362 mi
->mi_async_curr
[NFS4_ASYNC_QUEUE
] =
2363 mi
->mi_async_curr
[NFS4_ASYNC_PGOPS_QUEUE
] = &mi
->mi_async_reqs
[0];
2364 mi
->mi_max_threads
= nfs4_max_threads
;
2365 mutex_init(&mi
->mi_async_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2366 cv_init(&mi
->mi_async_reqs_cv
, NULL
, CV_DEFAULT
, NULL
);
2367 cv_init(&mi
->mi_async_work_cv
[NFS4_ASYNC_QUEUE
], NULL
, CV_DEFAULT
,
2369 cv_init(&mi
->mi_async_work_cv
[NFS4_ASYNC_PGOPS_QUEUE
], NULL
,
2371 cv_init(&mi
->mi_async_cv
, NULL
, CV_DEFAULT
, NULL
);
2372 cv_init(&mi
->mi_inact_req_cv
, NULL
, CV_DEFAULT
, NULL
);
2376 zone_init_ref(&mi
->mi_zone_ref
);
2377 zone_hold_ref(zone
, &mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
2378 nfs4_mi_zonelist_add(mi
);
2381 * Initialize the <open owner/cred> hash table.
2383 for (i
= 0; i
< NFS4_NUM_OO_BUCKETS
; i
++) {
2384 bucketp
= &(mi
->mi_oo_list
[i
]);
2385 mutex_init(&bucketp
->b_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2386 list_create(&bucketp
->b_oo_hash_list
,
2387 sizeof (nfs4_open_owner_t
),
2388 offsetof(nfs4_open_owner_t
, oo_hash_node
));
2392 * Initialize the freed open owner list.
2395 mi
->mi_foo_max
= NFS4_NUM_FREED_OPEN_OWNERS
;
2396 list_create(&mi
->mi_foo_list
, sizeof (nfs4_open_owner_t
),
2397 offsetof(nfs4_open_owner_t
, oo_foo_node
));
2399 list_create(&mi
->mi_lost_state
, sizeof (nfs4_lost_rqst_t
),
2400 offsetof(nfs4_lost_rqst_t
, lr_node
));
2402 list_create(&mi
->mi_bseqid_list
, sizeof (nfs4_bseqid_entry_t
),
2403 offsetof(nfs4_bseqid_entry_t
, bs_node
));
2406 * Initialize the msg buffer.
2408 list_create(&mi
->mi_msg_list
, sizeof (nfs4_debug_msg_t
),
2409 offsetof(nfs4_debug_msg_t
, msg_node
));
2410 mi
->mi_msg_count
= 0;
2411 mutex_init(&mi
->mi_msg_list_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2416 nfs4_mnt_kstat_init(vfsp
);
2419 * Initialize the shared filehandle pool.
2421 sfh4_createtab(&mi
->mi_filehandles
);
2424 * Save server path we're attempting to mount.
2426 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2427 origsvp
= copy_svp(svp
);
2428 nfs_rw_exit(&svp
->sv_lock
);
2431 * Make the GETFH call to get root fh for each replica.
2433 if (svp_head
->sv_next
)
2434 droptext
= ", dropping replica";
2437 * If the uid is set then set the creds for secure mounts
2438 * by proxy processes such as automountd.
2440 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2441 if (svp
->sv_secdata
->uid
!= 0 &&
2442 svp
->sv_secdata
->rpcflavor
== RPCSEC_GSS
) {
2444 (void) crsetugid(lcr
, svp
->sv_secdata
->uid
, crgetgid(cr
));
2447 nfs_rw_exit(&svp
->sv_lock
);
2448 for (svp
= svp_head
; svp
; svp
= svp
->sv_next
) {
2449 if (nfs4_chkdup_servinfo4(svp_head
, svp
)) {
2450 nfs_cmn_err(error
, CE_WARN
,
2451 VERS_MSG
"Host %s is a duplicate%s",
2452 svp
->sv_hostname
, droptext
);
2453 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2454 svp
->sv_flags
|= SV4_NOTINUSE
;
2455 nfs_rw_exit(&svp
->sv_lock
);
2458 mi
->mi_curr_serv
= svp
;
2461 * Just in case server path being mounted contains
2462 * symlinks and fails w/STALE, save the initial sv_path
2463 * so we can redrive the initial mount compound with the
2464 * initial sv_path -- not a symlink-expanded version.
2466 * This could only happen if a symlink was expanded
2467 * and the expanded mount compound failed stale. Because
2468 * it could be the case that the symlink was removed at
2469 * the server (and replaced with another symlink/dir,
2470 * we need to use the initial sv_path when attempting
2471 * to re-lookup everything and recover.
2473 * Other mount errors should evenutally be handled here also
2474 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount
2475 * failures will result in mount being redriven a few times.
2477 num_retry
= nfs4_max_mount_retry
;
2479 nfs4getfh_otw(mi
, svp
, &tmp_vtype
,
2480 ((flags
& NFSMNT_PUBLIC
) ? NFS4_GETFH_PUBLIC
: 0) |
2481 NFS4_GETFH_NEEDSOP
, tcr
, &e
);
2483 if (e
.error
== 0 && e
.stat
== NFS4_OK
)
2487 * For some reason, the mount compound failed. Before
2488 * retrying, we need to restore original conditions.
2490 svp
= restore_svp(mi
, svp
, origsvp
);
2493 } while (num_retry
-- > 0);
2494 error
= e
.error
? e
.error
: geterrno4(e
.stat
);
2496 nfs_cmn_err(error
, CE_WARN
,
2497 VERS_MSG
"initial call to %s failed%s: %m",
2498 svp
->sv_hostname
, droptext
);
2499 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2500 svp
->sv_flags
|= SV4_NOTINUSE
;
2501 nfs_rw_exit(&svp
->sv_lock
);
2502 mi
->mi_flags
&= ~MI4_RECOV_FAIL
;
2507 if (tmp_vtype
== VBAD
) {
2508 zcmn_err(mi
->mi_zone
->zone_id
, CE_WARN
,
2509 VERS_MSG
"%s returned a bad file type for "
2510 "root%s", svp
->sv_hostname
, droptext
);
2511 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2512 svp
->sv_flags
|= SV4_NOTINUSE
;
2513 nfs_rw_exit(&svp
->sv_lock
);
2517 if (vtype
== VNON
) {
2519 } else if (vtype
!= tmp_vtype
) {
2520 zcmn_err(mi
->mi_zone
->zone_id
, CE_WARN
,
2521 VERS_MSG
"%s returned a different file type "
2522 "for root%s", svp
->sv_hostname
, droptext
);
2523 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2524 svp
->sv_flags
|= SV4_NOTINUSE
;
2525 nfs_rw_exit(&svp
->sv_lock
);
2528 if (firstsvp
== NULL
)
2532 if (firstsvp
== NULL
) {
2538 mi
->mi_curr_serv
= svp
= firstsvp
;
2539 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2540 ASSERT((mi
->mi_curr_serv
->sv_flags
& SV4_NOTINUSE
) == 0);
2541 fh
.nfs_fh4_len
= svp
->sv_fhandle
.fh_len
;
2542 fh
.nfs_fh4_val
= svp
->sv_fhandle
.fh_buf
;
2543 mi
->mi_rootfh
= sfh4_get(&fh
, mi
);
2544 fh
.nfs_fh4_len
= svp
->sv_pfhandle
.fh_len
;
2545 fh
.nfs_fh4_val
= svp
->sv_pfhandle
.fh_buf
;
2546 mi
->mi_srvparentfh
= sfh4_get(&fh
, mi
);
2547 nfs_rw_exit(&svp
->sv_lock
);
2550 * Get the fname for filesystem root.
2552 mi
->mi_fname
= fn_get(NULL
, ".", mi
->mi_rootfh
);
2553 mfname
= mi
->mi_fname
;
2557 * Make the root vnode without attributes.
2559 rtvp
= makenfs4node_by_fh(mi
->mi_rootfh
, NULL
,
2560 &mfname
, NULL
, mi
, cr
, gethrtime());
2561 rtvp
->v_type
= vtype
;
2563 mi
->mi_curread
= mi
->mi_tsize
;
2564 mi
->mi_curwrite
= mi
->mi_stsize
;
2567 * Start the manager thread responsible for handling async worker
2571 VFS_HOLD(vfsp
); /* add reference for thread */
2572 mi
->mi_manager_thread
= zthread_create(NULL
, 0, nfs4_async_manager
,
2573 vfsp
, 0, minclsyspri
);
2574 ASSERT(mi
->mi_manager_thread
!= NULL
);
2577 * Create the thread that handles over-the-wire calls for
2579 * This needs to happen after the manager thread is created.
2582 mi
->mi_inactive_thread
= zthread_create(NULL
, 0, nfs4_inactive_thread
,
2583 mi
, 0, minclsyspri
);
2584 ASSERT(mi
->mi_inactive_thread
!= NULL
);
2586 /* If we didn't get a type, get one now */
2587 if (rtvp
->v_type
== VNON
) {
2588 va
.va_mask
= AT_TYPE
;
2589 error
= nfs4getattr(rtvp
, &va
, tcr
);
2592 rtvp
->v_type
= va
.va_type
;
2595 mi
->mi_type
= rtvp
->v_type
;
2597 mutex_enter(&mi
->mi_lock
);
2598 mi
->mi_flags
&= ~MI4_MOUNTING
;
2599 mutex_exit(&mi
->mi_lock
);
2601 /* Update VFS with new server and path info */
2602 if ((strcmp(svp
->sv_hostname
, origsvp
->sv_hostname
) != 0) ||
2603 (strcmp(svp
->sv_path
, origsvp
->sv_path
) != 0)) {
2604 len
= svp
->sv_hostnamelen
+ svp
->sv_pathlen
;
2605 resource
= kmem_zalloc(len
, KM_SLEEP
);
2606 (void) strcat(resource
, svp
->sv_hostname
);
2607 (void) strcat(resource
, ":");
2608 (void) strcat(resource
, svp
->sv_path
);
2609 vfs_setresource(vfsp
, resource
, 0);
2610 kmem_free(resource
, len
);
2621 * An error occurred somewhere, need to clean up...
2628 * We need to release our reference to the root vnode and
2629 * destroy the mntinfo4 struct that we just created.
2632 if (rp
->r_flags
& R4HASHED
)
2636 nfs4_async_stop(vfsp
);
2637 nfs4_async_manager_stop(vfsp
);
2638 removed
= nfs4_mi_zonelist_remove(mi
);
2640 zone_rele_ref(&mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
2643 * This releases the initial "hold" of the mi since it will never
2644 * be referenced by the vfsp. Also, when mount returns to vfs.c
2645 * with an error, the vfsp will be destroyed, not rele'd.
2649 if (origsvp
!= NULL
)
2660 nfs4_unmount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
2668 nfs4_ephemeral_tree_t
*eph_tree
;
2670 if (secpolicy_fs_unmount(cr
, vfsp
) != 0)
2675 if (flag
& MS_FORCE
) {
2676 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
2677 if (nfs_zone() != mi
->mi_zone
) {
2679 * If the request is coming from the wrong zone,
2680 * we don't want to create any new threads, and
2681 * performance is not a concern. Do everything
2684 NFS4_DEBUG(nfs4_client_zone_debug
, (CE_NOTE
,
2685 "nfs4_unmount x-zone forced unmount of vfs %p\n",
2687 nfs4_free_mount(vfsp
, flag
, cr
);
2690 * Free data structures asynchronously, to avoid
2691 * blocking the current thread (for performance
2694 async_free_mount(vfsp
, flag
, cr
);
2701 * Wait until all asynchronous putpage operations on
2702 * this file system are complete before flushing rnodes
2705 omax
= mi
->mi_max_threads
;
2706 if (nfs4_async_stop_sig(vfsp
))
2712 * About the only reason that this would fail would be
2713 * that the harvester is already busy tearing down this
2714 * node. So we fail back to the caller and let them try
2715 * again when needed.
2717 if (nfs4_ephemeral_umount(mi
, flag
, cr
,
2718 &must_unlock
, &eph_tree
)) {
2719 ASSERT(must_unlock
== FALSE
);
2720 mutex_enter(&mi
->mi_async_lock
);
2721 mi
->mi_max_threads
= omax
;
2722 mutex_exit(&mi
->mi_async_lock
);
2728 * If there are any active vnodes on this file system,
2729 * then the file system is busy and can't be unmounted.
2731 if (check_rtable4(vfsp
)) {
2732 nfs4_ephemeral_umount_unlock(&must_unlock
, &eph_tree
);
2734 mutex_enter(&mi
->mi_async_lock
);
2735 mi
->mi_max_threads
= omax
;
2736 mutex_exit(&mi
->mi_async_lock
);
2742 * The unmount can't fail from now on, so record any
2743 * ephemeral changes.
2745 nfs4_ephemeral_umount_activate(mi
, &must_unlock
, &eph_tree
);
2748 * There are no active files that could require over-the-wire
2749 * calls to the server, so stop the async manager and the
2752 nfs4_async_manager_stop(vfsp
);
2755 * Destroy all rnodes belonging to this file system from the
2756 * rnode hash queues and purge any resources allocated to
2759 destroy_rtable4(vfsp
, cr
);
2760 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
2762 nfs4_remove_mi_from_server(mi
, NULL
);
2763 removed
= nfs4_mi_zonelist_remove(mi
);
2765 zone_rele_ref(&mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
2774 nfs4_root(vfs_t
*vfsp
, vnode_t
**vpp
)
2778 nfs4_fname_t
*mfname
;
2783 if (nfs_zone() != mi
->mi_zone
)
2786 svp
= mi
->mi_curr_serv
;
2788 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2789 if (svp
->sv_flags
& SV4_ROOT_STALE
) {
2790 nfs_rw_exit(&svp
->sv_lock
);
2792 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_WRITER
, 0);
2793 if (svp
->sv_flags
& SV4_ROOT_STALE
) {
2794 svp
->sv_flags
&= ~SV4_ROOT_STALE
;
2795 nfs_rw_exit(&svp
->sv_lock
);
2798 nfs_rw_exit(&svp
->sv_lock
);
2800 nfs_rw_exit(&svp
->sv_lock
);
2803 mfname
= mi
->mi_fname
;
2805 vp
= makenfs4node_by_fh(mi
->mi_rootfh
, NULL
, &mfname
, NULL
,
2806 VFTOMI4(vfsp
), CRED(), gethrtime());
2808 if (VTOR4(vp
)->r_flags
& R4STALE
) {
2813 ASSERT(vp
->v_type
== VNON
|| vp
->v_type
== mi
->mi_type
);
2815 vp
->v_type
= mi
->mi_type
;
2823 nfs4_statfs_otw(vnode_t
*vp
, struct statvfs64
*sbp
, cred_t
*cr
)
2827 nfs4_ga_ext_res_t ger
;
2829 gar
.n4g_ext_res
= &ger
;
2831 if (error
= nfs4_attr_otw(vp
, TAG_FSINFO
, &gar
,
2832 NFS4_STATFS_ATTR_MASK
, cr
))
2835 *sbp
= gar
.n4g_ext_res
->n4g_sb
;
2841 * Get file system statistics.
2844 nfs4_statvfs(vfs_t
*vfsp
, struct statvfs64
*sbp
)
2850 error
= nfs4_root(vfsp
, &vp
);
2856 error
= nfs4_statfs_otw(vp
, sbp
, cr
);
2858 (void) strncpy(sbp
->f_basetype
,
2859 vfssw
[vfsp
->vfs_fstype
].vsw_name
, FSTYPSZ
);
2860 sbp
->f_flag
= vf_to_stf(vfsp
->vfs_flag
);
2862 nfs4_purge_stale_fh(error
, vp
, cr
);
2870 static kmutex_t nfs4_syncbusy
;
2873 * Flush dirty nfs files for file system vfsp.
2874 * If vfsp == NULL, all nfs files are flushed.
2876 * SYNC_CLOSE in flag is passed to us to
2877 * indicate that we are shutting down and or
2881 nfs4_sync(vfs_t
*vfsp
, short flag
, cred_t
*cr
)
2884 * Cross-zone calls are OK here, since this translates to a
2885 * fop_putpage(B_ASYNC), which gets picked up by the right zone.
2887 if (!(flag
& SYNC_ATTR
) && mutex_tryenter(&nfs4_syncbusy
) != 0) {
2889 mutex_exit(&nfs4_syncbusy
);
2893 * if SYNC_CLOSE is set then we know that
2894 * the system is rebooting, mark the mntinfo
2895 * for later examination.
2897 if (vfsp
&& (flag
& SYNC_CLOSE
)) {
2901 if (!(mi
->mi_flags
& MI4_SHUTDOWN
)) {
2902 mutex_enter(&mi
->mi_lock
);
2903 mi
->mi_flags
|= MI4_SHUTDOWN
;
2904 mutex_exit(&mi
->mi_lock
);
2911 * vget is difficult, if not impossible, to support in v4 because we don't
2912 * know the parent directory or name, which makes it impossible to create a
2913 * useful shadow vnode. And we need the shadow vnode for things like
2919 * XXX Check nfs4_vget_pseudo() for dependency.
2922 nfs4_vget(vfs_t
*vfsp
, vnode_t
**vpp
, fid_t
*fidp
)
2928 * nfs4_mountroot get called in the case where we are diskless booting. All
2929 * we need from here is the ability to get the server info and from there we
2930 * can simply call nfs4_rootvp.
2934 nfs4_mountroot(vfs_t
*vfsp
, whymountroot_t why
)
2937 char root_hostname
[SYS_NMLN
+1];
2938 struct servinfo4
*svp
;
2947 struct nfs_args args
; /* nfs mount arguments */
2948 static char token
[10];
2951 bzero(&args
, sizeof (args
));
2953 /* do this BEFORE getfile which causes xid stamps to be initialized */
2954 clkset(-1L); /* hack for now - until we get time svc? */
2956 if (why
== ROOT_REMOUNT
) {
2960 panic("nfs4_mountroot: why == ROOT_REMOUNT");
2963 if (why
== ROOT_UNMOUNT
) {
2965 * Nothing to do for NFS.
2976 (void) getfsname("root", name
, sizeof (token
));
2979 root_path
= pn
.pn_path
;
2981 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
2982 nfs_rw_init(&svp
->sv_lock
, NULL
, RW_DEFAULT
, NULL
);
2983 svp
->sv_knconf
= kmem_zalloc(sizeof (*svp
->sv_knconf
), KM_SLEEP
);
2984 svp
->sv_knconf
->knc_protofmly
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
2985 svp
->sv_knconf
->knc_proto
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
2988 * Get server address
2990 * Get server's transport
2991 * Get server's hostname
2994 args
.addr
= &svp
->sv_addr
;
2995 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
2996 args
.fh
= (char *)&svp
->sv_fhandle
;
2997 args
.knconf
= svp
->sv_knconf
;
2998 args
.hostname
= root_hostname
;
3000 if (error
= mount_root(*name
? name
: "root", root_path
, NFS_V4
,
3001 &args
, &vfsflags
)) {
3002 if (error
== EPROTONOSUPPORT
)
3003 nfs_cmn_err(error
, CE_WARN
, "nfs4_mountroot: "
3004 "mount_root failed: server doesn't support NFS V4");
3006 nfs_cmn_err(error
, CE_WARN
,
3007 "nfs4_mountroot: mount_root failed: %m");
3008 nfs_rw_exit(&svp
->sv_lock
);
3013 nfs_rw_exit(&svp
->sv_lock
);
3014 svp
->sv_hostnamelen
= (int)(strlen(root_hostname
) + 1);
3015 svp
->sv_hostname
= kmem_alloc(svp
->sv_hostnamelen
, KM_SLEEP
);
3016 (void) strcpy(svp
->sv_hostname
, root_hostname
);
3018 svp
->sv_pathlen
= (int)(strlen(root_path
) + 1);
3019 svp
->sv_path
= kmem_alloc(svp
->sv_pathlen
, KM_SLEEP
);
3020 (void) strcpy(svp
->sv_path
, root_path
);
3023 * Force root partition to always be mounted with AUTH_UNIX for now
3025 svp
->sv_secdata
= kmem_alloc(sizeof (*svp
->sv_secdata
), KM_SLEEP
);
3026 svp
->sv_secdata
->secmod
= AUTH_UNIX
;
3027 svp
->sv_secdata
->rpcflavor
= AUTH_UNIX
;
3028 svp
->sv_secdata
->data
= NULL
;
3033 error
= nfs4rootvp(&rtvp
, vfsp
, svp
, args
.flags
, cr
, global_zone
);
3045 * Send client id to the server, if necessary
3047 nfs4_error_zinit(&n4e
);
3048 nfs4setclientid(mi
, cr
, FALSE
, &n4e
);
3058 error
= nfs4_setopts(rtvp
, DATAMODEL_NATIVE
, &args
);
3060 nfs_cmn_err(error
, CE_WARN
,
3061 "nfs4_mountroot: invalid root mount options");
3066 (void) vfs_lock_wait(vfsp
);
3067 vfs_add(NULL
, vfsp
, vfsflags
);
3070 size
= strlen(svp
->sv_hostname
);
3071 (void) strcpy(rootfs
.bo_name
, svp
->sv_hostname
);
3072 rootfs
.bo_name
[size
] = ':';
3073 (void) strcpy(&rootfs
.bo_name
[size
+ 1], root_path
);
3080 nfs4_async_stop(vfsp
);
3081 nfs4_async_manager_stop(vfsp
);
3091 * Initialization routine for VFS routines. Should only be called once
3096 mutex_init(&nfs4_syncbusy
, NULL
, MUTEX_DEFAULT
, NULL
);
3097 nfs4setclientid_init();
3098 nfs4_ephemeral_init();
3105 nfs4_ephemeral_fini();
3106 nfs4setclientid_fini();
3107 mutex_destroy(&nfs4_syncbusy
);
3111 nfs4_freevfs(vfs_t
*vfsp
)
3115 /* need to release the initial hold */
3119 * At this point, we can no longer reference the vfs
3120 * and need to inform other holders of the reference
3121 * to the mntinfo4_t.
3129 * Client side SETCLIENTID and SETCLIENTID_CONFIRM
3131 struct nfs4_server nfs4_server_lst
=
3132 { &nfs4_server_lst
, &nfs4_server_lst
};
3134 kmutex_t nfs4_server_lst_lock
;
3137 nfs4setclientid_init(void)
3139 mutex_init(&nfs4_server_lst_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3143 nfs4setclientid_fini(void)
3145 mutex_destroy(&nfs4_server_lst_lock
);
3148 int nfs4_retry_sclid_delay
= NFS4_RETRY_SCLID_DELAY
;
3149 int nfs4_num_sclid_retries
= NFS4_NUM_SCLID_RETRIES
;
3152 * Set the clientid for the server for "mi". No-op if the clientid is
3155 * The recovery boolean should be set to TRUE if this function was called
3156 * by the recovery code, and FALSE otherwise. This is used to determine
3157 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock
3158 * for adding a mntinfo4_t to a nfs4_server_t.
3160 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then
3161 * 'n4ep->error' is set to geterrno4(n4ep->stat).
3164 nfs4setclientid(mntinfo4_t
*mi
, cred_t
*cr
, bool_t recovery
, nfs4_error_t
*n4ep
)
3166 struct nfs4_server
*np
;
3167 struct servinfo4
*svp
= mi
->mi_curr_serv
;
3168 nfs4_recov_state_t recov_state
;
3169 int num_retries
= 0;
3172 int retry_inuse
= 1; /* only retry once on NFS4ERR_CLID_INUSE */
3173 time_t lease_time
= 0;
3175 recov_state
.rs_flags
= 0;
3176 recov_state
.rs_num_retry_despite_err
= 0;
3177 ASSERT(n4ep
!= NULL
);
3181 nfs4_error_zinit(n4ep
);
3183 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, 0);
3185 mutex_enter(&nfs4_server_lst_lock
);
3186 np
= servinfo4_to_nfs4_server(svp
); /* This locks np if it is found */
3187 mutex_exit(&nfs4_server_lst_lock
);
3189 struct nfs4_server
*tnp
;
3190 np
= new_nfs4_server(svp
, cr
);
3191 mutex_enter(&np
->s_lock
);
3193 mutex_enter(&nfs4_server_lst_lock
);
3194 tnp
= servinfo4_to_nfs4_server(svp
);
3197 * another thread snuck in and put server on list.
3198 * since we aren't adding it to the nfs4_server_list
3199 * we need to set the ref count to 0 and destroy it.
3202 destroy_nfs4_server(np
);
3206 * do not give list a reference until everything
3209 insque(np
, &nfs4_server_lst
);
3211 mutex_exit(&nfs4_server_lst_lock
);
3213 ASSERT(MUTEX_HELD(&np
->s_lock
));
3215 * If we find the server already has N4S_CLIENTID_SET, then
3216 * just return, we've already done SETCLIENTID to that server
3218 if (np
->s_flags
& N4S_CLIENTID_SET
) {
3219 /* add mi to np's mntinfo4_list */
3220 nfs4_add_mi_to_server(np
, mi
);
3222 nfs_rw_exit(&mi
->mi_recovlock
);
3223 mutex_exit(&np
->s_lock
);
3224 nfs4_server_rele(np
);
3227 mutex_exit(&np
->s_lock
);
3231 * Drop the mi_recovlock since nfs4_start_op will
3232 * acquire it again for us.
3235 nfs_rw_exit(&mi
->mi_recovlock
);
3237 n4ep
->error
= nfs4_start_op(mi
, NULL
, NULL
, &recov_state
);
3239 nfs4_server_rele(np
);
3244 mutex_enter(&np
->s_lock
);
3245 while (np
->s_flags
& N4S_CLIENTID_PEND
) {
3246 if (!cv_wait_sig(&np
->s_clientid_pend
, &np
->s_lock
)) {
3247 mutex_exit(&np
->s_lock
);
3248 nfs4_server_rele(np
);
3250 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
,
3252 n4ep
->error
= EINTR
;
3257 if (np
->s_flags
& N4S_CLIENTID_SET
) {
3258 /* XXX copied/pasted from above */
3259 /* add mi to np's mntinfo4_list */
3260 nfs4_add_mi_to_server(np
, mi
);
3261 mutex_exit(&np
->s_lock
);
3262 nfs4_server_rele(np
);
3264 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, recovery
);
3269 * Reset the N4S_CB_PINGED flag. This is used to
3270 * indicate if we have received a CB_NULL from the
3271 * server. Also we reset the waiter flag.
3273 np
->s_flags
&= ~(N4S_CB_PINGED
| N4S_CB_WAITER
);
3274 /* any failure must now clear this flag */
3275 np
->s_flags
|= N4S_CLIENTID_PEND
;
3276 mutex_exit(&np
->s_lock
);
3277 nfs4setclientid_otw(mi
, svp
, cr
, np
, n4ep
, &retry_inuse
);
3279 if (n4ep
->error
== EACCES
) {
3281 * If the uid is set then set the creds for secure mounts
3282 * by proxy processes such as automountd.
3284 (void) nfs_rw_enter_sig(&svp
->sv_lock
, RW_READER
, 0);
3285 if (svp
->sv_secdata
->uid
!= 0) {
3287 (void) crsetugid(lcr
, svp
->sv_secdata
->uid
,
3290 nfs_rw_exit(&svp
->sv_lock
);
3293 mutex_enter(&np
->s_lock
);
3296 mutex_exit(&np
->s_lock
);
3297 nfs4setclientid_otw(mi
, svp
, lcr
, np
, n4ep
,
3301 mutex_enter(&np
->s_lock
);
3302 lease_time
= np
->s_lease_time
;
3303 np
->s_flags
&= ~N4S_CLIENTID_PEND
;
3304 mutex_exit(&np
->s_lock
);
3306 if (n4ep
->error
!= 0 || n4ep
->stat
!= NFS4_OK
) {
3308 * Start recovery if failover is a possibility. If
3309 * invoked by the recovery thread itself, then just
3310 * return and let it handle the failover first. NB:
3311 * recovery is not allowed if the mount is in progress
3312 * since the infrastructure is not sufficiently setup
3313 * to allow it. Just return the error (after suitable
3316 if (FAILOVER_MOUNT4(mi
) && nfs4_try_failover(n4ep
)) {
3317 (void) nfs4_start_recovery(n4ep
, mi
, NULL
,
3318 NULL
, NULL
, NULL
, OP_SETCLIENTID
, NULL
, NULL
, NULL
);
3320 * Don't retry here, just return and let
3321 * recovery take over.
3325 } else if (nfs4_rpc_retry_error(n4ep
->error
) ||
3326 n4ep
->stat
== NFS4ERR_RESOURCE
||
3327 n4ep
->stat
== NFS4ERR_STALE_CLIENTID
) {
3331 * Always retry if in recovery or once had
3332 * contact with the server (but now it's
3335 if (recovery
== TRUE
||
3336 n4ep
->error
== ETIMEDOUT
||
3337 n4ep
->error
== ECONNRESET
)
3339 } else if (retry_inuse
&& n4ep
->error
== 0 &&
3340 n4ep
->stat
== NFS4ERR_CLID_INUSE
) {
3346 * Since everything succeeded give the list a reference count if
3347 * it hasn't been given one by add_new_nfs4_server() or if this
3348 * is not a recovery situation in which case it is already on
3351 mutex_enter(&np
->s_lock
);
3352 if ((np
->s_flags
& N4S_INSERTED
) == 0) {
3354 np
->s_flags
|= N4S_INSERTED
;
3356 mutex_exit(&np
->s_lock
);
3360 nfs4_end_op(mi
, NULL
, NULL
, &recov_state
, recovery
);
3363 if (retry
&& num_retries
++ < nfs4_num_sclid_retries
) {
3365 delay(SEC_TO_TICK(lease_time
+ nfs4_retry_sclid_delay
));
3368 delay(SEC_TO_TICK(nfs4_retry_sclid_delay
));
3370 nfs4_server_rele(np
);
3375 if (n4ep
->error
== 0)
3376 n4ep
->error
= geterrno4(n4ep
->stat
);
3378 /* broadcast before release in case no other threads are waiting */
3379 cv_broadcast(&np
->s_clientid_pend
);
3380 nfs4_server_rele(np
);
3383 int nfs4setclientid_otw_debug
= 0;
3386 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM,
3387 * but nothing else; the calling function must be designed to handle those
3391 nfs4setclientid_otw(mntinfo4_t
*mi
, struct servinfo4
*svp
, cred_t
*cr
,
3392 struct nfs4_server
*np
, nfs4_error_t
*ep
, int *retry_inusep
)
3394 COMPOUND4args_clnt args
;
3395 COMPOUND4res_clnt res
;
3396 nfs_argop4 argop
[3];
3397 SETCLIENTID4args
*s_args
;
3398 SETCLIENTID4resok
*s_resok
;
3400 nfs4_ga_res_t
*garp
= NULL
;
3401 timespec_t prop_time
, after_time
;
3403 clientid4 tmp_clientid
;
3405 ASSERT(!MUTEX_HELD(&np
->s_lock
));
3407 args
.ctag
= TAG_SETCLIENTID
;
3413 argop
[0].argop
= OP_PUTROOTFH
;
3416 argop
[1].argop
= OP_GETATTR
;
3417 argop
[1].nfs_argop4_u
.opgetattr
.attr_request
= FATTR4_LEASE_TIME_MASK
;
3418 argop
[1].nfs_argop4_u
.opgetattr
.mi
= mi
;
3421 argop
[2].argop
= OP_SETCLIENTID
;
3423 s_args
= &argop
[2].nfs_argop4_u
.opsetclientid
;
3425 mutex_enter(&np
->s_lock
);
3427 s_args
->client
.verifier
= np
->clidtosend
.verifier
;
3428 s_args
->client
.id_len
= np
->clidtosend
.id_len
;
3429 ASSERT(s_args
->client
.id_len
<= NFS4_OPAQUE_LIMIT
);
3430 s_args
->client
.id_val
= np
->clidtosend
.id_val
;
3433 * Callback needs to happen on non-RDMA transport
3434 * Check if we have saved the original knetconfig
3435 * if so, use that instead.
3437 if (svp
->sv_origknconf
!= NULL
)
3438 nfs4_cb_args(np
, svp
->sv_origknconf
, s_args
);
3440 nfs4_cb_args(np
, svp
->sv_knconf
, s_args
);
3442 mutex_exit(&np
->s_lock
);
3444 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, ep
);
3449 /* getattr lease_time res */
3450 if ((res
.array_len
>= 2) &&
3451 (res
.array
[1].nfs_resop4_u
.opgetattr
.status
== NFS4_OK
)) {
3452 garp
= &res
.array
[1].nfs_resop4_u
.opgetattr
.ga_res
;
3456 * The 32 bit client cannot handle a lease time greater than
3457 * (INT32_MAX/1000000). This is due to the use of the
3458 * lease_time in calls to drv_usectohz() in
3459 * nfs4_renew_lease_thread(). The problem is that
3460 * drv_usectohz() takes a time_t (which is just a long = 4
3461 * bytes) as its parameter. The lease_time is multiplied by
3462 * 1000000 to convert seconds to usecs for the parameter. If
3463 * a number bigger than (INT32_MAX/1000000) is used then we
3464 * overflow on the 32bit client.
3466 if (garp
->n4g_ext_res
->n4g_leasetime
> (INT32_MAX
/1000000)) {
3467 garp
->n4g_ext_res
->n4g_leasetime
= INT32_MAX
/1000000;
3471 mutex_enter(&np
->s_lock
);
3472 np
->s_lease_time
= garp
->n4g_ext_res
->n4g_leasetime
;
3475 * Keep track of the lease period for the mi's
3476 * mi_msg_list. We need an appropiate time
3477 * bound to associate past facts with a current
3478 * event. The lease period is perfect for this.
3480 mutex_enter(&mi
->mi_msg_list_lock
);
3481 mi
->mi_lease_period
= np
->s_lease_time
;
3482 mutex_exit(&mi
->mi_msg_list_lock
);
3483 mutex_exit(&np
->s_lock
);
3487 if (res
.status
== NFS4ERR_CLID_INUSE
) {
3488 clientaddr4
*clid_inuse
;
3490 if (!(*retry_inusep
)) {
3491 clid_inuse
= &res
.array
->nfs_resop4_u
.
3492 opsetclientid
.SETCLIENTID4res_u
.client_using
;
3494 zcmn_err(mi
->mi_zone
->zone_id
, CE_NOTE
,
3495 "NFS4 mount (SETCLIENTID failed)."
3496 " nfs4_client_id.id is in"
3497 "use already by: r_netid<%s> r_addr<%s>",
3498 clid_inuse
->r_netid
, clid_inuse
->r_addr
);
3502 * XXX - The client should be more robust in its
3503 * handling of clientid in use errors (regen another
3504 * clientid and try again?)
3506 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3511 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3515 s_resok
= &res
.array
[2].nfs_resop4_u
.
3516 opsetclientid
.SETCLIENTID4res_u
.resok4
;
3518 tmp_clientid
= s_resok
->clientid
;
3520 verf
= s_resok
->setclientid_confirm
;
3523 if (nfs4setclientid_otw_debug
) {
3529 cid
.clientid
= s_resok
->clientid
;
3531 zcmn_err(mi
->mi_zone
->zone_id
, CE_NOTE
,
3532 "nfs4setclientid_otw: OK, clientid = %x,%x, "
3533 "verifier = %" PRIx64
"\n", cid
.foo
[0], cid
.foo
[1], verf
);
3537 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3539 /* Confirm the client id and get the lease_time attribute */
3541 args
.ctag
= TAG_SETCLIENTID_CF
;
3546 argop
[0].argop
= OP_SETCLIENTID_CONFIRM
;
3548 argop
[0].nfs_argop4_u
.opsetclientid_confirm
.clientid
= tmp_clientid
;
3549 argop
[0].nfs_argop4_u
.opsetclientid_confirm
.setclientid_confirm
= verf
;
3551 /* used to figure out RTT for np */
3552 gethrestime(&prop_time
);
3554 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4setlientid_otw: "
3555 "start time: %ld sec %ld nsec", prop_time
.tv_sec
,
3556 prop_time
.tv_nsec
));
3558 rfs4call(mi
, &args
, &res
, cr
, &doqueue
, 0, ep
);
3560 gethrestime(&after_time
);
3561 mutex_enter(&np
->s_lock
);
3562 np
->propagation_delay
.tv_sec
=
3563 MAX(1, after_time
.tv_sec
- prop_time
.tv_sec
);
3564 mutex_exit(&np
->s_lock
);
3566 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4setlcientid_otw: "
3567 "finish time: %ld sec ", after_time
.tv_sec
));
3569 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4setclientid_otw: "
3570 "propagation delay set to %ld sec",
3571 np
->propagation_delay
.tv_sec
));
3576 if (res
.status
== NFS4ERR_CLID_INUSE
) {
3577 clientaddr4
*clid_inuse
;
3579 if (!(*retry_inusep
)) {
3580 clid_inuse
= &res
.array
->nfs_resop4_u
.
3581 opsetclientid
.SETCLIENTID4res_u
.client_using
;
3583 zcmn_err(mi
->mi_zone
->zone_id
, CE_NOTE
,
3584 "SETCLIENTID_CONFIRM failed. "
3585 "nfs4_client_id.id is in use already by: "
3586 "r_netid<%s> r_addr<%s>",
3587 clid_inuse
->r_netid
, clid_inuse
->r_addr
);
3590 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3595 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3599 mutex_enter(&np
->s_lock
);
3600 np
->clientid
= tmp_clientid
;
3601 np
->s_flags
|= N4S_CLIENTID_SET
;
3603 /* Add mi to np's mntinfo4 list */
3604 nfs4_add_mi_to_server(np
, mi
);
3606 if (np
->lease_valid
== NFS4_LEASE_NOT_STARTED
) {
3608 * Start lease management thread.
3609 * Keep trying until we succeed.
3612 np
->s_refcnt
++; /* pass reference to thread */
3613 (void) zthread_create(NULL
, 0, nfs4_renew_lease_thread
, np
, 0,
3616 mutex_exit(&np
->s_lock
);
3618 (void) xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
3622 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes
3623 * mi's clientid the same as sp's.
3624 * Assumes sp is locked down.
3627 nfs4_add_mi_to_server(nfs4_server_t
*sp
, mntinfo4_t
*mi
)
3632 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
3633 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
3634 ASSERT(sp
!= &nfs4_server_lst
);
3635 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3637 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3638 "nfs4_add_mi_to_server: add mi %p to sp %p",
3639 (void*)mi
, (void*)sp
));
3641 for (tmi
= sp
->mntinfo4_list
;
3643 tmi
= tmi
->mi_clientid_next
) {
3645 NFS4_DEBUG(nfs4_client_lease_debug
,
3647 "nfs4_add_mi_to_server: mi in list"));
3653 * First put a hold on the mntinfo4's vfsp so that references via
3654 * mntinfo4_list will be valid.
3657 VFS_HOLD(mi
->mi_vfsp
);
3659 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
, "nfs4_add_mi_to_server: "
3660 "hold vfs %p for mi: %p", (void*)mi
->mi_vfsp
, (void*)mi
));
3663 if (sp
->mntinfo4_list
)
3664 sp
->mntinfo4_list
->mi_clientid_prev
= mi
;
3665 mi
->mi_clientid_next
= sp
->mntinfo4_list
;
3667 sp
->mntinfo4_list
= mi
;
3668 mi
->mi_srvsettime
= gethrestime_sec();
3669 mi
->mi_srvset_cnt
++;
3672 /* set mi's clientid to that of sp's for later matching */
3673 mi
->mi_clientid
= sp
->clientid
;
3676 * Update the clientid for any other mi's belonging to sp. This
3677 * must be done here while we hold sp->s_lock, so that
3678 * find_nfs4_server() continues to work.
3681 for (tmi
= sp
->mntinfo4_list
;
3683 tmi
= tmi
->mi_clientid_next
) {
3685 tmi
->mi_clientid
= sp
->clientid
;
3691 * Remove the mi from sp's mntinfo4_list and release its reference.
3692 * Exception: if mi still has open files, flag it for later removal (when
3693 * all the files are closed).
3695 * If this is the last mntinfo4 in sp's list then tell the lease renewal
3699 nfs4_remove_mi_from_server_nolock(mntinfo4_t
*mi
, nfs4_server_t
*sp
)
3701 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3702 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p",
3703 (void*)mi
, (void*)sp
));
3706 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3707 ASSERT(mi
->mi_open_files
>= 0);
3710 * First make sure this mntinfo4 can be taken off of the list,
3711 * ie: it doesn't have any open files remaining.
3713 if (mi
->mi_open_files
> 0) {
3714 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3715 "nfs4_remove_mi_from_server_nolock: don't "
3716 "remove mi since it still has files open"));
3718 mutex_enter(&mi
->mi_lock
);
3719 mi
->mi_flags
|= MI4_REMOVE_ON_LAST_CLOSE
;
3720 mutex_exit(&mi
->mi_lock
);
3724 VFS_HOLD(mi
->mi_vfsp
);
3726 VFS_RELE(mi
->mi_vfsp
);
3728 if (sp
->mntinfo4_list
== NULL
) {
3729 /* last fs unmounted, kill the thread */
3730 NFS4_DEBUG(nfs4_client_lease_debug
, (CE_NOTE
,
3731 "remove_mi_from_nfs4_server_nolock: kill the thread"));
3732 nfs4_mark_srv_dead(sp
);
3737 * Remove mi from sp's mntinfo4_list and release the vfs reference.
3740 remove_mi(nfs4_server_t
*sp
, mntinfo4_t
*mi
)
3742 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3745 * We release a reference, and the caller must still have a
3748 ASSERT(mi
->mi_vfsp
->vfs_count
>= 2);
3750 if (mi
->mi_clientid_prev
) {
3751 mi
->mi_clientid_prev
->mi_clientid_next
= mi
->mi_clientid_next
;
3753 /* This is the first mi in sp's mntinfo4_list */
3755 * Make sure the first mntinfo4 in the list is the actual
3756 * mntinfo4 passed in.
3758 ASSERT(sp
->mntinfo4_list
== mi
);
3760 sp
->mntinfo4_list
= mi
->mi_clientid_next
;
3762 if (mi
->mi_clientid_next
)
3763 mi
->mi_clientid_next
->mi_clientid_prev
= mi
->mi_clientid_prev
;
3765 /* Now mark the mntinfo4's links as being removed */
3766 mi
->mi_clientid_prev
= mi
->mi_clientid_next
= NULL
;
3768 mi
->mi_srvset_cnt
++;
3770 VFS_RELE(mi
->mi_vfsp
);
3774 * Free all the entries in sp's mntinfo4_list.
3777 remove_all_mi(nfs4_server_t
*sp
)
3781 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3783 while (sp
->mntinfo4_list
!= NULL
) {
3784 mi
= sp
->mntinfo4_list
;
3786 * Grab a reference in case there is only one left (which
3787 * remove_mi() frees).
3789 VFS_HOLD(mi
->mi_vfsp
);
3791 VFS_RELE(mi
->mi_vfsp
);
3796 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs.
3798 * This version can be called with a null nfs4_server_t arg,
3799 * and will either find the right one and handle locking, or
3800 * do nothing because the mi wasn't added to an sp's mntinfo4_list.
3803 nfs4_remove_mi_from_server(mntinfo4_t
*mi
, nfs4_server_t
*esp
)
3808 nfs4_remove_mi_from_server_nolock(mi
, esp
);
3812 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, 0);
3813 if (sp
= find_nfs4_server_all(mi
, 1)) {
3814 nfs4_remove_mi_from_server_nolock(mi
, sp
);
3815 mutex_exit(&sp
->s_lock
);
3816 nfs4_server_rele(sp
);
3818 nfs_rw_exit(&mi
->mi_recovlock
);
3822 * Return TRUE if the given server has any non-unmounted filesystems.
3826 nfs4_fs_active(nfs4_server_t
*sp
)
3830 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3832 for (mi
= sp
->mntinfo4_list
; mi
!= NULL
; mi
= mi
->mi_clientid_next
) {
3833 if (!(mi
->mi_vfsp
->vfs_flag
& VFS_UNMOUNTED
))
3841 * Mark sp as finished and notify any waiters.
3845 nfs4_mark_srv_dead(nfs4_server_t
*sp
)
3847 ASSERT(MUTEX_HELD(&sp
->s_lock
));
3849 sp
->s_thread_exit
= NFS4_THREAD_EXIT
;
3850 cv_broadcast(&sp
->cv_thread_exit
);
3854 * Create a new nfs4_server_t structure.
3855 * Returns new node unlocked and not in list, but with a reference count of
3858 struct nfs4_server
*
3859 new_nfs4_server(struct servinfo4
*svp
, cred_t
*cr
)
3861 struct nfs4_server
*np
;
3868 verifier4 un_verifier
;
3869 } nfs4clientid_verifier
;
3871 * We change this ID string carefully and with the Solaris
3872 * NFS server behaviour in mind. "+referrals" indicates
3873 * a client that can handle an NFSv4 referral.
3875 char id_val
[] = "Solaris: %s, NFSv4 kernel client +referrals";
3878 np
= kmem_zalloc(sizeof (struct nfs4_server
), KM_SLEEP
);
3879 np
->saddr
.len
= svp
->sv_addr
.len
;
3880 np
->saddr
.maxlen
= svp
->sv_addr
.maxlen
;
3881 np
->saddr
.buf
= kmem_alloc(svp
->sv_addr
.maxlen
, KM_SLEEP
);
3882 bcopy(svp
->sv_addr
.buf
, np
->saddr
.buf
, svp
->sv_addr
.len
);
3886 * Build the nfs_client_id4 for this server mount. Ensure
3887 * the verifier is useful and that the identification is
3888 * somehow based on the server's address for the case of
3889 * multi-homed servers.
3891 nfs4clientid_verifier
.un_verifier
= 0;
3893 nfs4clientid_verifier
.un_curtime
.sec
= (uint32_t)tt
.tv_sec
;
3894 nfs4clientid_verifier
.un_curtime
.subsec
= (uint32_t)tt
.tv_nsec
;
3895 np
->clidtosend
.verifier
= nfs4clientid_verifier
.un_verifier
;
3898 * calculate the length of the opaque identifier. Subtract 2
3899 * for the "%s" and add the traditional +1 for null
3902 len
= strlen(id_val
) - 2 + strlen(uts_nodename()) + 1;
3903 np
->clidtosend
.id_len
= len
+ np
->saddr
.maxlen
;
3905 np
->clidtosend
.id_val
= kmem_alloc(np
->clidtosend
.id_len
, KM_SLEEP
);
3906 (void) sprintf(np
->clidtosend
.id_val
, id_val
, uts_nodename());
3907 bcopy(np
->saddr
.buf
, &np
->clidtosend
.id_val
[len
], np
->saddr
.len
);
3910 np
->mntinfo4_list
= NULL
;
3911 /* save cred for issuing rfs4calls inside the renew thread */
3914 cv_init(&np
->cv_thread_exit
, NULL
, CV_DEFAULT
, NULL
);
3915 mutex_init(&np
->s_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3916 nfs_rw_init(&np
->s_recovlock
, NULL
, RW_DEFAULT
, NULL
);
3917 list_create(&np
->s_deleg_list
, sizeof (rnode4_t
),
3918 offsetof(rnode4_t
, r_deleg_link
));
3919 np
->s_thread_exit
= 0;
3920 np
->state_ref_count
= 0;
3921 np
->lease_valid
= NFS4_LEASE_NOT_STARTED
;
3922 cv_init(&np
->s_cv_otw_count
, NULL
, CV_DEFAULT
, NULL
);
3923 cv_init(&np
->s_clientid_pend
, NULL
, CV_DEFAULT
, NULL
);
3924 np
->s_otw_call_count
= 0;
3925 cv_init(&np
->wait_cb_null
, NULL
, CV_DEFAULT
, NULL
);
3926 np
->zoneid
= getzoneid();
3927 np
->zone_globals
= nfs4_get_callback_globals();
3928 ASSERT(np
->zone_globals
!= NULL
);
3933 * Create a new nfs4_server_t structure and add it to the list.
3934 * Returns new node locked; reference must eventually be freed.
3936 static struct nfs4_server
*
3937 add_new_nfs4_server(struct servinfo4
*svp
, cred_t
*cr
)
3941 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock
));
3942 sp
= new_nfs4_server(svp
, cr
);
3943 mutex_enter(&sp
->s_lock
);
3944 insque(sp
, &nfs4_server_lst
);
3945 sp
->s_refcnt
++; /* list gets a reference */
3946 sp
->s_flags
|= N4S_INSERTED
;
3951 int nfs4_server_t_debug
= 0;
3956 dumpnfs4slist(char *txt
, mntinfo4_t
*mi
, clientid4 clientid
, servinfo4_t
*srv_p
)
3958 int hash16(void *p
, int len
);
3961 NFS4_DEBUG(nfs4_server_t_debug
, (CE_NOTE
,
3962 "dumping nfs4_server_t list in %s", txt
));
3963 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3964 "mi 0x%p, want clientid %llx, addr %d/%04X",
3965 mi
, (longlong_t
)clientid
, srv_p
->sv_addr
.len
,
3966 hash16((void *)srv_p
->sv_addr
.buf
, srv_p
->sv_addr
.len
)));
3967 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
;
3969 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3970 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d",
3971 np
, (longlong_t
)np
->clientid
, np
->saddr
.len
,
3972 hash16((void *)np
->saddr
.buf
, np
->saddr
.len
),
3973 np
->state_ref_count
));
3974 if (np
->saddr
.len
== srv_p
->sv_addr
.len
&&
3975 bcmp(np
->saddr
.buf
, srv_p
->sv_addr
.buf
,
3976 np
->saddr
.len
) == 0)
3977 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3978 " - address matches"));
3979 if (np
->clientid
== clientid
|| np
->clientid
== 0)
3980 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3981 " - clientid matches"));
3982 if (np
->s_thread_exit
!= NFS4_THREAD_EXIT
)
3983 NFS4_DEBUG(nfs4_server_t_debug
, (CE_CONT
,
3984 " - thread not exiting"));
3992 * Move a mntinfo4_t from one server list to another.
3993 * Locking of the two nfs4_server_t nodes will be done in list order.
3995 * Returns NULL if the current nfs4_server_t for the filesystem could not
3996 * be found (e.g., due to forced unmount). Otherwise returns a reference
3997 * to the new nfs4_server_t, which must eventually be freed.
4000 nfs4_move_mi(mntinfo4_t
*mi
, servinfo4_t
*old
, servinfo4_t
*new)
4002 nfs4_server_t
*p
, *op
= NULL
, *np
= NULL
;
4004 zoneid_t zoneid
= nfs_zoneid();
4006 ASSERT(nfs_zone() == mi
->mi_zone
);
4008 mutex_enter(&nfs4_server_lst_lock
);
4010 if (nfs4_server_t_debug
)
4011 dumpnfs4slist("nfs4_move_mi", mi
, (clientid4
)0, new);
4013 for (p
= nfs4_server_lst
.forw
; p
!= &nfs4_server_lst
; p
= p
->forw
) {
4014 if (p
->zoneid
!= zoneid
)
4016 if (p
->saddr
.len
== old
->sv_addr
.len
&&
4017 bcmp(p
->saddr
.buf
, old
->sv_addr
.buf
, p
->saddr
.len
) == 0 &&
4018 p
->s_thread_exit
!= NFS4_THREAD_EXIT
) {
4020 mutex_enter(&op
->s_lock
);
4023 if (p
->saddr
.len
== new->sv_addr
.len
&&
4024 bcmp(p
->saddr
.buf
, new->sv_addr
.buf
, p
->saddr
.len
) == 0 &&
4025 p
->s_thread_exit
!= NFS4_THREAD_EXIT
) {
4027 mutex_enter(&np
->s_lock
);
4029 if (op
!= NULL
&& np
!= NULL
)
4034 * Filesystem has been forcibly unmounted. Bail out.
4037 mutex_exit(&np
->s_lock
);
4038 mutex_exit(&nfs4_server_lst_lock
);
4045 NFS4_DEBUG(nfs4_client_failover_debug
, (CE_NOTE
,
4046 "nfs4_move_mi: no target nfs4_server, will create."));
4048 np
= add_new_nfs4_server(new, kcred
);
4050 mutex_exit(&nfs4_server_lst_lock
);
4052 NFS4_DEBUG(nfs4_client_failover_debug
, (CE_NOTE
,
4053 "nfs4_move_mi: for mi 0x%p, "
4054 "old servinfo4 0x%p, new servinfo4 0x%p, "
4055 "old nfs4_server 0x%p, new nfs4_server 0x%p, ",
4056 (void*)mi
, (void*)old
, (void*)new,
4057 (void*)op
, (void*)np
));
4058 ASSERT(op
!= NULL
&& np
!= NULL
);
4060 /* discard any delegations */
4061 nfs4_deleg_discard(mi
, op
);
4063 num_open
= mi
->mi_open_files
;
4064 mi
->mi_open_files
= 0;
4065 op
->state_ref_count
-= num_open
;
4066 ASSERT(op
->state_ref_count
>= 0);
4067 np
->state_ref_count
+= num_open
;
4068 nfs4_remove_mi_from_server_nolock(mi
, op
);
4069 mi
->mi_open_files
= num_open
;
4070 NFS4_DEBUG(nfs4_client_failover_debug
, (CE_NOTE
,
4071 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d",
4072 mi
->mi_open_files
, op
->state_ref_count
, np
->state_ref_count
));
4074 nfs4_add_mi_to_server(np
, mi
);
4076 mutex_exit(&op
->s_lock
);
4077 mutex_exit(&np
->s_lock
);
4078 nfs4_server_rele(op
);
4084 * Need to have the nfs4_server_lst_lock.
4085 * Search the nfs4_server list to find a match on this servinfo4
4086 * based on its address.
4088 * Returns NULL if no match is found. Otherwise returns a reference (which
4089 * must eventually be freed) to a locked nfs4_server.
4092 servinfo4_to_nfs4_server(servinfo4_t
*srv_p
)
4095 zoneid_t zoneid
= nfs_zoneid();
4097 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock
));
4098 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
; np
= np
->forw
) {
4099 if (np
->zoneid
== zoneid
&&
4100 np
->saddr
.len
== srv_p
->sv_addr
.len
&&
4101 bcmp(np
->saddr
.buf
, srv_p
->sv_addr
.buf
,
4102 np
->saddr
.len
) == 0 &&
4103 np
->s_thread_exit
!= NFS4_THREAD_EXIT
) {
4104 mutex_enter(&np
->s_lock
);
4113 * Locks the nfs4_server down if it is found and returns a reference that
4114 * must eventually be freed.
4116 static nfs4_server_t
*
4117 lookup_nfs4_server(nfs4_server_t
*sp
, int any_state
)
4121 mutex_enter(&nfs4_server_lst_lock
);
4122 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
; np
= np
->forw
) {
4123 mutex_enter(&np
->s_lock
);
4124 if (np
== sp
&& np
->s_refcnt
> 0 &&
4125 (np
->s_thread_exit
!= NFS4_THREAD_EXIT
|| any_state
)) {
4126 mutex_exit(&nfs4_server_lst_lock
);
4130 mutex_exit(&np
->s_lock
);
4132 mutex_exit(&nfs4_server_lst_lock
);
4138 * The caller should be holding mi->mi_recovlock, and it should continue to
4139 * hold the lock until done with the returned nfs4_server_t. Once
4140 * mi->mi_recovlock is released, there is no guarantee that the returned
4141 * mi->nfs4_server_t will continue to correspond to mi.
4144 find_nfs4_server(mntinfo4_t
*mi
)
4146 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
4147 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
4149 return (lookup_nfs4_server(mi
->mi_srv
, 0));
4153 * Same as above, but takes an "any_state" parameter which can be
4154 * set to 1 if the caller wishes to find nfs4_server_t's which
4155 * have been marked for termination by the exit of the renew
4156 * thread. This should only be used by operations which are
4157 * cleaning up and will not cause an OTW op.
4160 find_nfs4_server_all(mntinfo4_t
*mi
, int any_state
)
4162 ASSERT(nfs_rw_lock_held(&mi
->mi_recovlock
, RW_READER
) ||
4163 nfs_rw_lock_held(&mi
->mi_recovlock
, RW_WRITER
));
4165 return (lookup_nfs4_server(mi
->mi_srv
, any_state
));
4169 * Lock sp, but only if it's still active (in the list and hasn't been
4170 * flagged as exiting) or 'any_state' is non-zero.
4171 * Returns TRUE if sp got locked and adds a reference to sp.
4174 nfs4_server_vlock(nfs4_server_t
*sp
, int any_state
)
4176 return (lookup_nfs4_server(sp
, any_state
) != NULL
);
4180 * Release the reference to sp and destroy it if that's the last one.
4184 nfs4_server_rele(nfs4_server_t
*sp
)
4186 mutex_enter(&sp
->s_lock
);
4187 ASSERT(sp
->s_refcnt
> 0);
4189 if (sp
->s_refcnt
> 0) {
4190 mutex_exit(&sp
->s_lock
);
4193 mutex_exit(&sp
->s_lock
);
4195 mutex_enter(&nfs4_server_lst_lock
);
4196 mutex_enter(&sp
->s_lock
);
4197 if (sp
->s_refcnt
> 0) {
4198 mutex_exit(&sp
->s_lock
);
4199 mutex_exit(&nfs4_server_lst_lock
);
4203 sp
->forw
= sp
->back
= NULL
;
4204 mutex_exit(&nfs4_server_lst_lock
);
4205 destroy_nfs4_server(sp
);
4209 destroy_nfs4_server(nfs4_server_t
*sp
)
4211 ASSERT(MUTEX_HELD(&sp
->s_lock
));
4212 ASSERT(sp
->s_refcnt
== 0);
4213 ASSERT(sp
->s_otw_call_count
== 0);
4218 kmem_free(sp
->saddr
.buf
, sp
->saddr
.maxlen
);
4219 kmem_free(sp
->clidtosend
.id_val
, sp
->clidtosend
.id_len
);
4220 mutex_exit(&sp
->s_lock
);
4222 /* destroy the nfs4_server */
4223 nfs4callback_destroy(sp
);
4224 list_destroy(&sp
->s_deleg_list
);
4225 mutex_destroy(&sp
->s_lock
);
4226 cv_destroy(&sp
->cv_thread_exit
);
4227 cv_destroy(&sp
->s_cv_otw_count
);
4228 cv_destroy(&sp
->s_clientid_pend
);
4229 cv_destroy(&sp
->wait_cb_null
);
4230 nfs_rw_destroy(&sp
->s_recovlock
);
4231 kmem_free(sp
, sizeof (*sp
));
4235 * Fork off a thread to free the data structures for a mount.
4239 async_free_mount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
4241 freemountargs_t
*args
;
4242 args
= kmem_alloc(sizeof (freemountargs_t
), KM_SLEEP
);
4243 args
->fm_vfsp
= vfsp
;
4245 MI4_HOLD(VFTOMI4(vfsp
));
4246 args
->fm_flag
= flag
;
4249 (void) zthread_create(NULL
, 0, nfs4_free_mount_thread
, args
, 0,
4254 nfs4_free_mount_thread(freemountargs_t
*args
)
4257 nfs4_free_mount(args
->fm_vfsp
, args
->fm_flag
, args
->fm_cr
);
4258 mi
= VFTOMI4(args
->fm_vfsp
);
4259 crfree(args
->fm_cr
);
4260 VFS_RELE(args
->fm_vfsp
);
4262 kmem_free(args
, sizeof (freemountargs_t
));
4268 * Thread to free the data structures for a given filesystem.
4271 nfs4_free_mount(vfs_t
*vfsp
, int flag
, cred_t
*cr
)
4273 mntinfo4_t
*mi
= VFTOMI4(vfsp
);
4275 callb_cpr_t cpr_info
;
4277 boolean_t async_thread
;
4281 nfs4_ephemeral_tree_t
*eph_tree
;
4284 * We need to participate in the CPR framework if this is a kernel
4287 async_thread
= (curproc
== nfs_zone()->zone_zsched
);
4289 mutex_init(&cpr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
4290 CALLB_CPR_INIT(&cpr_info
, &cpr_lock
, callb_generic_cpr
,
4291 "nfsv4AsyncUnmount");
4295 * We need to wait for all outstanding OTW calls
4296 * and recovery to finish before we remove the mi
4297 * from the nfs4_server_t, as current pending
4298 * calls might still need this linkage (in order
4299 * to find a nfs4_server_t from a mntinfo4_t).
4301 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, FALSE
);
4302 sp
= find_nfs4_server(mi
);
4303 nfs_rw_exit(&mi
->mi_recovlock
);
4306 while (sp
->s_otw_call_count
!= 0) {
4308 mutex_enter(&cpr_lock
);
4309 CALLB_CPR_SAFE_BEGIN(&cpr_info
);
4310 mutex_exit(&cpr_lock
);
4312 cv_wait(&sp
->s_cv_otw_count
, &sp
->s_lock
);
4314 mutex_enter(&cpr_lock
);
4315 CALLB_CPR_SAFE_END(&cpr_info
, &cpr_lock
);
4316 mutex_exit(&cpr_lock
);
4319 mutex_exit(&sp
->s_lock
);
4320 nfs4_server_rele(sp
);
4324 mutex_enter(&mi
->mi_lock
);
4325 while (mi
->mi_in_recovery
!= 0) {
4327 mutex_enter(&cpr_lock
);
4328 CALLB_CPR_SAFE_BEGIN(&cpr_info
);
4329 mutex_exit(&cpr_lock
);
4331 cv_wait(&mi
->mi_cv_in_recov
, &mi
->mi_lock
);
4333 mutex_enter(&cpr_lock
);
4334 CALLB_CPR_SAFE_END(&cpr_info
, &cpr_lock
);
4335 mutex_exit(&cpr_lock
);
4338 mutex_exit(&mi
->mi_lock
);
4341 * If we got an error, then do not nuke the
4342 * tree. Either the harvester is busy reclaiming
4343 * this node or we ran into some busy condition.
4345 * The harvester will eventually come along and cleanup.
4346 * The only problem would be the root mount point.
4348 * Since the busy node can occur for a variety
4349 * of reasons and can result in an entry staying
4350 * in df output but no longer accessible from the
4351 * directory tree, we are okay.
4353 if (!nfs4_ephemeral_umount(mi
, flag
, cr
,
4354 &must_unlock
, &eph_tree
))
4355 nfs4_ephemeral_umount_activate(mi
, &must_unlock
,
4359 * The original purge of the dnlc via 'dounmount'
4360 * doesn't guarantee that another dnlc entry was not
4361 * added while we waitied for all outstanding OTW
4362 * and recovery calls to finish. So re-purge the
4365 (void) dnlc_purge_vfsp(vfsp
, 0);
4368 * We need to explicitly stop the manager thread; the asyc worker
4369 * threads can timeout and exit on their own.
4371 mutex_enter(&mi
->mi_async_lock
);
4372 mi
->mi_max_threads
= 0;
4373 NFS4_WAKEALL_ASYNC_WORKERS(mi
->mi_async_work_cv
);
4374 mutex_exit(&mi
->mi_async_lock
);
4375 if (mi
->mi_manager_thread
)
4376 nfs4_async_manager_stop(vfsp
);
4378 destroy_rtable4(vfsp
, cr
);
4380 nfs4_remove_mi_from_server(mi
, NULL
);
4383 mutex_enter(&cpr_lock
);
4384 CALLB_CPR_EXIT(&cpr_info
); /* drops cpr_lock */
4385 mutex_destroy(&cpr_lock
);
4388 removed
= nfs4_mi_zonelist_remove(mi
);
4390 zone_rele_ref(&mi
->mi_zone_ref
, ZONE_REF_NFSV4
);
4393 /* Referral related sub-routines */
4395 /* Freeup knetconfig */
4397 free_knconf_contents(struct knetconfig
*k
)
4401 if (k
->knc_protofmly
)
4402 kmem_free(k
->knc_protofmly
, KNC_STRSIZE
);
4404 kmem_free(k
->knc_proto
, KNC_STRSIZE
);
4408 * This updates newpath variable with exact name component from the
4409 * path which gave us a NFS4ERR_MOVED error.
4410 * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned.
4413 extract_referral_point(const char *svp
, int nth
)
4415 int num_slashes
= 0;
4417 char *newpath
= NULL
;
4420 newpath
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
4421 for (p
= svp
; *p
; p
++) {
4424 if (num_slashes
== nth
+ 1) {
4433 newpath
[i
++] = '\0';
4441 * This sets up a new path in sv_path to do a lookup of the referral point.
4442 * If the path is /rp/aaa/bbb and the referral point is aaa,
4443 * this updates /rp/aaa. This path will be used to get referral
4447 setup_newsvpath(servinfo4_t
*svp
, int nth
)
4449 int num_slashes
= 0, pathlen
, i
= 0;
4452 newpath
= kmem_zalloc(MAXPATHLEN
, KM_SLEEP
);
4453 for (p
= svp
->sv_path
; *p
; p
++) {
4457 if (num_slashes
== nth
+ 1) {
4459 pathlen
= strlen(newpath
) + 1;
4460 kmem_free(svp
->sv_path
, svp
->sv_pathlen
);
4461 svp
->sv_path
= kmem_alloc(pathlen
, KM_SLEEP
);
4462 svp
->sv_pathlen
= pathlen
;
4463 bcopy(newpath
, svp
->sv_path
, pathlen
);
4468 kmem_free(newpath
, MAXPATHLEN
);