fs: rename AT_* to VATTR_*
[unleashed/lotheac.git] / kernel / fs / nfs / nfs_vfsops.c
blob343ef1f95ad8cbcf5203158778af8e1005cd079d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
25 * All rights reserved.
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/vfs.h>
33 #include <sys/vnode.h>
34 #include <sys/pathname.h>
35 #include <sys/sysmacros.h>
36 #include <sys/kmem.h>
37 #include <sys/mkdev.h>
38 #include <sys/mount.h>
39 #include <sys/mntent.h>
40 #include <sys/statvfs.h>
41 #include <sys/errno.h>
42 #include <sys/debug.h>
43 #include <sys/cmn_err.h>
44 #include <sys/utsname.h>
45 #include <sys/bootconf.h>
46 #include <sys/modctl.h>
47 #include <sys/acl.h>
48 #include <sys/flock.h>
49 #include <sys/policy.h>
50 #include <sys/zone.h>
51 #include <sys/class.h>
52 #include <sys/socket.h>
53 #include <sys/netconfig.h>
54 #include <sys/mntent.h>
56 #include <rpc/types.h>
57 #include <rpc/auth.h>
58 #include <rpc/clnt.h>
60 #include <nfs/nfs.h>
61 #include <nfs/nfs_clnt.h>
62 #include <nfs/rnode.h>
63 #include <nfs/mount.h>
64 #include <nfs/nfs_acl.h>
66 #include <sys/fs_subr.h>
69 * From rpcsec module (common/rpcsec).
71 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
72 extern void sec_clnt_freeinfo(struct sec_data *);
74 static int pathconf_copyin(struct nfs_args *, struct pathcnf *);
75 static int pathconf_get(struct mntinfo *, struct nfs_args *);
76 static void pathconf_rele(struct mntinfo *);
79 * The order and contents of this structure must be kept in sync with that of
80 * rfsreqcnt_v2_tmpl in nfs_stats.c
82 static char *rfsnames_v2[] = {
83 "null", "getattr", "setattr", "unused", "lookup", "readlink", "read",
84 "unused", "write", "create", "remove", "rename", "link", "symlink",
85 "mkdir", "rmdir", "readdir", "fsstat"
89 * This table maps from NFS protocol number into call type.
90 * Zero means a "Lookup" type call
91 * One means a "Read" type call
92 * Two means a "Write" type call
93 * This is used to select a default time-out.
95 static uchar_t call_type_v2[] = {
96 0, 0, 1, 0, 0, 0, 1,
97 0, 2, 2, 2, 2, 2, 2,
98 2, 2, 1, 0
102 * Similar table, but to determine which timer to use
103 * (only real reads and writes!)
105 static uchar_t timer_type_v2[] = {
106 0, 0, 0, 0, 0, 0, 1,
107 0, 2, 0, 0, 0, 0, 0,
108 0, 0, 1, 0
112 * This table maps from NFS protocol number into a call type
113 * for the semisoft mount option.
114 * Zero means do not repeat operation.
115 * One means repeat.
117 static uchar_t ss_call_type_v2[] = {
118 0, 0, 1, 0, 0, 0, 0,
119 0, 1, 1, 1, 1, 1, 1,
120 1, 1, 0, 0
124 * nfs vfs operations.
126 static int nfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
127 static int nfs_unmount(vfs_t *, int, cred_t *);
128 static int nfs_root(vfs_t *, vnode_t **);
129 static int nfs_statvfs(vfs_t *, struct statvfs64 *);
130 static int nfs_sync(vfs_t *, short, cred_t *);
131 static int nfs_vget(vfs_t *, vnode_t **, fid_t *);
132 static int nfs_mountroot(vfs_t *, whymountroot_t);
133 static void nfs_freevfs(vfs_t *);
135 static int nfsrootvp(vnode_t **, vfs_t *, struct servinfo *,
136 int, cred_t *, zone_t *);
139 * Initialize the vfs structure
142 int nfsfstyp;
145 * Debug variable to check for rdma based
146 * transport startup and cleanup. Controlled
147 * through /etc/system. Off by default.
149 int rdma_debug = 0;
151 const struct vfsops nfs_vfsops = {
152 .vfs_mount = nfs_mount,
153 .vfs_unmount = nfs_unmount,
154 .vfs_root = nfs_root,
155 .vfs_statvfs = nfs_statvfs,
156 .vfs_sync = nfs_sync,
157 .vfs_vget = nfs_vget,
158 .vfs_mountroot = nfs_mountroot,
159 .vfs_freevfs = nfs_freevfs,
163 nfsinit(int fstyp, char *name)
165 int error;
167 error = vfs_setfsops(fstyp, &nfs_vfsops);
168 if (error != 0) {
169 zcmn_err(GLOBAL_ZONEID, CE_WARN,
170 "nfsinit: bad fstyp");
171 return (error);
174 nfsfstyp = fstyp;
176 return (0);
179 void
180 nfsfini(void)
184 static void
185 nfs_free_args(struct nfs_args *nargs, nfs_fhandle *fh)
188 if (fh)
189 kmem_free(fh, sizeof (*fh));
191 if (nargs->pathconf) {
192 kmem_free(nargs->pathconf, sizeof (struct pathcnf));
193 nargs->pathconf = NULL;
196 if (nargs->knconf) {
197 if (nargs->knconf->knc_protofmly)
198 kmem_free(nargs->knconf->knc_protofmly, KNC_STRSIZE);
199 if (nargs->knconf->knc_proto)
200 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE);
201 kmem_free(nargs->knconf, sizeof (*nargs->knconf));
202 nargs->knconf = NULL;
205 if (nargs->fh) {
206 kmem_free(nargs->fh, strlen(nargs->fh) + 1);
207 nargs->fh = NULL;
210 if (nargs->hostname) {
211 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1);
212 nargs->hostname = NULL;
215 if (nargs->addr) {
216 if (nargs->addr->buf) {
217 ASSERT(nargs->addr->len);
218 kmem_free(nargs->addr->buf, nargs->addr->len);
220 kmem_free(nargs->addr, sizeof (struct netbuf));
221 nargs->addr = NULL;
224 if (nargs->syncaddr) {
225 ASSERT(nargs->syncaddr->len);
226 if (nargs->syncaddr->buf) {
227 ASSERT(nargs->syncaddr->len);
228 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len);
230 kmem_free(nargs->syncaddr, sizeof (struct netbuf));
231 nargs->syncaddr = NULL;
234 if (nargs->netname) {
235 kmem_free(nargs->netname, strlen(nargs->netname) + 1);
236 nargs->netname = NULL;
239 if (nargs->nfs_ext_u.nfs_extA.secdata) {
240 sec_clnt_freeinfo(nargs->nfs_ext_u.nfs_extA.secdata);
241 nargs->nfs_ext_u.nfs_extA.secdata = NULL;
245 static int
246 nfs_copyin(char *data, int datalen, struct nfs_args *nargs, nfs_fhandle *fh)
249 int error;
250 size_t nlen; /* length of netname */
251 size_t hlen; /* length of hostname */
252 char netname[MAXNETNAMELEN+1]; /* server's netname */
253 struct netbuf addr; /* server's address */
254 struct netbuf syncaddr; /* AUTH_DES time sync addr */
255 struct knetconfig *knconf; /* transport knetconfig structure */
256 struct sec_data *secdata = NULL; /* security data */
257 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */
258 STRUCT_DECL(knetconfig, knconf_tmp);
259 STRUCT_DECL(netbuf, addr_tmp);
260 int flags;
261 struct pathcnf *pc; /* Pathconf */
262 char *p, *pf;
263 char *userbufptr;
266 bzero(nargs, sizeof (*nargs));
268 STRUCT_INIT(args, get_udatamodel());
269 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
270 if (copyin(data, STRUCT_BUF(args), MIN(datalen, STRUCT_SIZE(args))))
271 return (EFAULT);
273 nargs->wsize = STRUCT_FGET(args, wsize);
274 nargs->rsize = STRUCT_FGET(args, rsize);
275 nargs->timeo = STRUCT_FGET(args, timeo);
276 nargs->retrans = STRUCT_FGET(args, retrans);
277 nargs->acregmin = STRUCT_FGET(args, acregmin);
278 nargs->acregmax = STRUCT_FGET(args, acregmax);
279 nargs->acdirmin = STRUCT_FGET(args, acdirmin);
280 nargs->acdirmax = STRUCT_FGET(args, acdirmax);
282 flags = STRUCT_FGET(args, flags);
283 nargs->flags = flags;
286 addr.buf = NULL;
287 syncaddr.buf = NULL;
290 * Allocate space for a knetconfig structure and
291 * its strings and copy in from user-land.
293 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
294 STRUCT_INIT(knconf_tmp, get_udatamodel());
295 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
296 STRUCT_SIZE(knconf_tmp))) {
297 kmem_free(knconf, sizeof (*knconf));
298 return (EFAULT);
301 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
302 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
303 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
304 if (get_udatamodel() != DATAMODEL_LP64) {
305 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
306 } else {
307 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
310 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
311 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
312 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
313 if (error) {
314 kmem_free(pf, KNC_STRSIZE);
315 kmem_free(p, KNC_STRSIZE);
316 kmem_free(knconf, sizeof (*knconf));
317 return (error);
320 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
321 if (error) {
322 kmem_free(pf, KNC_STRSIZE);
323 kmem_free(p, KNC_STRSIZE);
324 kmem_free(knconf, sizeof (*knconf));
325 return (error);
329 knconf->knc_protofmly = pf;
330 knconf->knc_proto = p;
332 nargs->knconf = knconf;
334 /* Copyin pathconf if there is one */
335 if (STRUCT_FGETP(args, pathconf) != NULL) {
336 pc = kmem_alloc(sizeof (*pc), KM_SLEEP);
337 error = pathconf_copyin(STRUCT_BUF(args), pc);
338 nargs->pathconf = pc;
339 if (error)
340 goto errout;
344 * Get server address
346 STRUCT_INIT(addr_tmp, get_udatamodel());
347 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
348 STRUCT_SIZE(addr_tmp))) {
349 error = EFAULT;
350 goto errout;
352 nargs->addr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
353 userbufptr = STRUCT_FGETP(addr_tmp, buf);
354 addr.len = STRUCT_FGET(addr_tmp, len);
355 addr.buf = kmem_alloc(addr.len, KM_SLEEP);
356 addr.maxlen = addr.len;
357 if (copyin(userbufptr, addr.buf, addr.len)) {
358 kmem_free(addr.buf, addr.len);
359 error = EFAULT;
360 goto errout;
362 bcopy(&addr, nargs->addr, sizeof (struct netbuf));
365 * Get the root fhandle
368 if (copyin(STRUCT_FGETP(args, fh), &fh->fh_buf, NFS_FHSIZE)) {
369 error = EFAULT;
370 goto errout;
372 fh->fh_len = NFS_FHSIZE;
375 * Get server's hostname
377 if (flags & NFSMNT_HOSTNAME) {
378 error = copyinstr(STRUCT_FGETP(args, hostname), netname,
379 sizeof (netname), &hlen);
380 if (error)
381 goto errout;
382 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP);
383 (void) strcpy(nargs->hostname, netname);
385 } else {
386 nargs->hostname = NULL;
391 * If there are syncaddr and netname data, load them in. This is
392 * to support data needed for NFSV4 when AUTH_DH is the negotiated
393 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
395 netname[0] = '\0';
396 if (flags & NFSMNT_SECURE) {
397 if (STRUCT_FGETP(args, syncaddr) == NULL) {
398 error = EINVAL;
399 goto errout;
401 /* get syncaddr */
402 STRUCT_INIT(addr_tmp, get_udatamodel());
403 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp),
404 STRUCT_SIZE(addr_tmp))) {
405 error = EINVAL;
406 goto errout;
408 userbufptr = STRUCT_FGETP(addr_tmp, buf);
409 syncaddr.len = STRUCT_FGET(addr_tmp, len);
410 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP);
411 syncaddr.maxlen = syncaddr.len;
412 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) {
413 kmem_free(syncaddr.buf, syncaddr.len);
414 error = EFAULT;
415 goto errout;
418 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
419 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf));
421 ASSERT(STRUCT_FGETP(args, netname));
422 if (copyinstr(STRUCT_FGETP(args, netname), netname,
423 sizeof (netname), &nlen)) {
424 error = EFAULT;
425 goto errout;
428 netname[nlen] = '\0';
429 nargs->netname = kmem_zalloc(nlen, KM_SLEEP);
430 (void) strcpy(nargs->netname, netname);
434 * Get the extention data which has the security data structure.
435 * This includes data for AUTH_SYS as well.
437 if (flags & NFSMNT_NEWARGS) {
438 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext);
439 if (nargs->nfs_args_ext == NFS_ARGS_EXTA ||
440 nargs->nfs_args_ext == NFS_ARGS_EXTB) {
442 * Indicating the application is using the new
443 * sec_data structure to pass in the security
444 * data.
446 if (STRUCT_FGETP(args,
447 nfs_ext_u.nfs_extA.secdata) != NULL) {
448 error = sec_clnt_loadinfo(
449 (struct sec_data *)STRUCT_FGETP(args,
450 nfs_ext_u.nfs_extA.secdata), &secdata,
451 get_udatamodel());
453 nargs->nfs_ext_u.nfs_extA.secdata = secdata;
457 if (error)
458 goto errout;
461 * Failover support:
463 * We may have a linked list of nfs_args structures,
464 * which means the user is looking for failover. If
465 * the mount is either not "read-only" or "soft",
466 * we want to bail out with EINVAL.
468 if (nargs->nfs_args_ext == NFS_ARGS_EXTB)
469 nargs->nfs_ext_u.nfs_extB.next =
470 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next);
472 errout:
473 if (error)
474 nfs_free_args(nargs, fh);
476 return (error);
481 * nfs mount vfsop
482 * Set up mount info record and attach it to vfs struct.
484 static int
485 nfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
487 char *data = uap->dataptr;
488 int error;
489 vnode_t *rtvp; /* the server's root */
490 mntinfo_t *mi; /* mount info, pointed at by vfs */
491 size_t nlen; /* length of netname */
492 struct knetconfig *knconf; /* transport knetconfig structure */
493 struct knetconfig *rdma_knconf; /* rdma transport structure */
494 rnode_t *rp;
495 struct servinfo *svp; /* nfs server info */
496 struct servinfo *svp_tail = NULL; /* previous nfs server info */
497 struct servinfo *svp_head; /* first nfs server info */
498 struct servinfo *svp_2ndlast; /* 2nd last in the server info list */
499 struct sec_data *secdata; /* security data */
500 struct nfs_args *args = NULL;
501 int flags, addr_type;
502 zone_t *zone = nfs_zone();
503 zone_t *mntzone = NULL;
504 nfs_fhandle *fhandle = NULL;
506 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
507 return (error);
509 if (mvp->v_type != VDIR)
510 return (ENOTDIR);
513 * get arguments
515 * nfs_args is now versioned and is extensible, so
516 * uap->datalen might be different from sizeof (args)
517 * in a compatible situation.
519 more:
521 if (!(uap->flags & MS_SYSSPACE)) {
522 if (args == NULL)
523 args = kmem_alloc(sizeof (struct nfs_args), KM_SLEEP);
524 else {
525 nfs_free_args(args, fhandle);
526 fhandle = NULL;
528 if (fhandle == NULL)
529 fhandle = kmem_zalloc(sizeof (nfs_fhandle), KM_SLEEP);
530 error = nfs_copyin(data, uap->datalen, args, fhandle);
531 if (error) {
532 if (args)
533 kmem_free(args, sizeof (*args));
534 return (error);
536 } else {
537 args = (struct nfs_args *)data;
538 fhandle = (nfs_fhandle *)args->fh;
542 flags = args->flags;
544 if (uap->flags & MS_REMOUNT) {
545 size_t n;
546 char name[FSTYPSZ];
548 if (uap->flags & MS_SYSSPACE)
549 error = copystr(uap->fstype, name, FSTYPSZ, &n);
550 else
551 error = copyinstr(uap->fstype, name, FSTYPSZ, &n);
553 if (error) {
554 if (error == ENAMETOOLONG)
555 return (EINVAL);
556 return (error);
561 * This check is to ensure that the request is a
562 * genuine nfs remount request.
565 if (strncmp(name, "nfs", 3) != 0)
566 return (EINVAL);
569 * If the request changes the locking type, disallow the
570 * remount,
571 * because it's questionable whether we can transfer the
572 * locking state correctly.
574 * Remounts need to save the pathconf information.
575 * Part of the infamous static kludge.
578 if ((mi = VFTOMI(vfsp)) != NULL) {
579 uint_t new_mi_llock;
580 uint_t old_mi_llock;
582 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
583 old_mi_llock = (mi->mi_flags & MI_LLOCK) ? 1 : 0;
584 if (old_mi_llock != new_mi_llock)
585 return (EBUSY);
587 error = pathconf_get((struct mntinfo *)vfsp->vfs_data, args);
589 if (!(uap->flags & MS_SYSSPACE)) {
590 nfs_free_args(args, fhandle);
591 kmem_free(args, sizeof (*args));
594 return (error);
597 mutex_enter(&mvp->v_lock);
598 if (!(uap->flags & MS_OVERLAY) &&
599 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
600 mutex_exit(&mvp->v_lock);
601 if (!(uap->flags & MS_SYSSPACE)) {
602 nfs_free_args(args, fhandle);
603 kmem_free(args, sizeof (*args));
605 return (EBUSY);
607 mutex_exit(&mvp->v_lock);
609 /* make sure things are zeroed for errout: */
610 rtvp = NULL;
611 mi = NULL;
612 secdata = NULL;
615 * A valid knetconfig structure is required.
617 if (!(flags & NFSMNT_KNCONF)) {
618 if (!(uap->flags & MS_SYSSPACE)) {
619 nfs_free_args(args, fhandle);
620 kmem_free(args, sizeof (*args));
622 return (EINVAL);
625 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) ||
626 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) {
627 if (!(uap->flags & MS_SYSSPACE)) {
628 nfs_free_args(args, fhandle);
629 kmem_free(args, sizeof (*args));
631 return (EINVAL);
636 * Allocate a servinfo struct.
638 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
639 mutex_init(&svp->sv_lock, NULL, MUTEX_DEFAULT, NULL);
640 if (svp_tail) {
641 svp_2ndlast = svp_tail;
642 svp_tail->sv_next = svp;
643 } else {
644 svp_head = svp;
645 svp_2ndlast = svp;
648 svp_tail = svp;
651 * Get knetconfig and server address
653 svp->sv_knconf = args->knconf;
654 args->knconf = NULL;
656 if (args->addr == NULL || args->addr->buf == NULL) {
657 error = EINVAL;
658 goto errout;
661 svp->sv_addr.maxlen = args->addr->maxlen;
662 svp->sv_addr.len = args->addr->len;
663 svp->sv_addr.buf = args->addr->buf;
664 args->addr->buf = NULL;
667 * Get the root fhandle
669 ASSERT(fhandle);
671 bcopy(&fhandle->fh_buf, &svp->sv_fhandle.fh_buf, fhandle->fh_len);
672 svp->sv_fhandle.fh_len = fhandle->fh_len;
675 * Get server's hostname
677 if (flags & NFSMNT_HOSTNAME) {
678 if (args->hostname == NULL) {
679 error = EINVAL;
680 goto errout;
682 svp->sv_hostnamelen = strlen(args->hostname) + 1;
683 svp->sv_hostname = args->hostname;
684 args->hostname = NULL;
685 } else {
686 char *p = "unknown-host";
687 svp->sv_hostnamelen = strlen(p) + 1;
688 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP);
689 (void) strcpy(svp->sv_hostname, p);
694 * RDMA MOUNT SUPPORT FOR NFS v2:
695 * Establish, is it possible to use RDMA, if so overload the
696 * knconf with rdma specific knconf and free the orignal.
698 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
700 * Determine the addr type for RDMA, IPv4 or v6.
702 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
703 addr_type = AF_INET;
704 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
705 addr_type = AF_INET6;
707 if (rdma_reachable(addr_type, &svp->sv_addr,
708 &rdma_knconf) == 0) {
710 * If successful, hijack, the orignal knconf and
711 * replace with a new one, depending on the flags.
713 svp->sv_origknconf = svp->sv_knconf;
714 svp->sv_knconf = rdma_knconf;
715 knconf = rdma_knconf;
716 } else {
717 if (flags & NFSMNT_TRYRDMA) {
718 #ifdef DEBUG
719 if (rdma_debug)
720 zcmn_err(getzoneid(), CE_WARN,
721 "no RDMA onboard, revert\n");
722 #endif
725 if (flags & NFSMNT_DORDMA) {
727 * If proto=rdma is specified and no RDMA
728 * path to this server is avialable then
729 * ditch this server.
730 * This is not included in the mountable
731 * server list or the replica list.
732 * Check if more servers are specified;
733 * Failover case, otherwise bail out of mount.
735 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
736 args->nfs_ext_u.nfs_extB.next != NULL) {
737 data = (char *)
738 args->nfs_ext_u.nfs_extB.next;
739 if (uap->flags & MS_RDONLY &&
740 !(flags & NFSMNT_SOFT)) {
741 if (svp_head->sv_next == NULL) {
742 svp_tail = NULL;
743 svp_2ndlast = NULL;
744 sv_free(svp_head);
745 goto more;
746 } else {
747 svp_tail = svp_2ndlast;
748 svp_2ndlast->sv_next =
749 NULL;
750 sv_free(svp);
751 goto more;
754 } else {
756 * This is the last server specified
757 * in the nfs_args list passed down
758 * and its not rdma capable.
760 if (svp_head->sv_next == NULL) {
762 * Is this the only one
764 error = EINVAL;
765 #ifdef DEBUG
766 if (rdma_debug)
767 zcmn_err(getzoneid(),
768 CE_WARN,
769 "No RDMA srv");
770 #endif
771 goto errout;
772 } else {
774 * There is list, since some
775 * servers specified before
776 * this passed all requirements
778 svp_tail = svp_2ndlast;
779 svp_2ndlast->sv_next = NULL;
780 sv_free(svp);
781 goto proceed;
789 * Get the extention data which has the new security data structure.
791 if (flags & NFSMNT_NEWARGS) {
792 switch (args->nfs_args_ext) {
793 case NFS_ARGS_EXTA:
794 case NFS_ARGS_EXTB:
796 * Indicating the application is using the new
797 * sec_data structure to pass in the security
798 * data.
800 secdata = args->nfs_ext_u.nfs_extA.secdata;
801 if (secdata == NULL) {
802 error = EINVAL;
803 } else {
805 * Need to validate the flavor here if
806 * sysspace, userspace was already
807 * validate from the nfs_copyin function.
809 switch (secdata->rpcflavor) {
810 case AUTH_NONE:
811 case AUTH_UNIX:
812 case AUTH_LOOPBACK:
813 case AUTH_DES:
814 case RPCSEC_GSS:
815 break;
816 default:
817 error = EINVAL;
818 goto errout;
821 args->nfs_ext_u.nfs_extA.secdata = NULL;
822 break;
824 default:
825 error = EINVAL;
826 break;
828 } else if (flags & NFSMNT_SECURE) {
830 * Keep this for backward compatibility to support
831 * NFSMNT_SECURE/NFSMNT_RPCTIMESYNC flags.
833 if (args->syncaddr == NULL || args->syncaddr->buf == NULL) {
834 error = EINVAL;
835 goto errout;
839 * get time sync address.
841 if (args->syncaddr == NULL) {
842 error = EFAULT;
843 goto errout;
847 * Move security related data to the sec_data structure.
850 dh_k4_clntdata_t *data;
851 char *pf, *p;
853 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
854 if (flags & NFSMNT_RPCTIMESYNC)
855 secdata->flags |= AUTH_F_RPCTIMESYNC;
856 data = kmem_alloc(sizeof (*data), KM_SLEEP);
857 bcopy(args->syncaddr, &data->syncaddr,
858 sizeof (*args->syncaddr));
862 * duplicate the knconf information for the
863 * new opaque data.
865 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
866 *data->knconf = *knconf;
867 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
868 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
869 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
870 bcopy(knconf->knc_proto, pf, KNC_STRSIZE);
871 data->knconf->knc_protofmly = pf;
872 data->knconf->knc_proto = p;
874 /* move server netname to the sec_data structure */
875 nlen = strlen(args->hostname) + 1;
876 if (nlen != 0) {
877 data->netname = kmem_alloc(nlen, KM_SLEEP);
878 bcopy(args->hostname, data->netname, nlen);
879 data->netnamelen = (int)nlen;
881 secdata->secmod = secdata->rpcflavor = AUTH_DES;
882 secdata->data = (caddr_t)data;
884 } else {
885 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
886 secdata->secmod = secdata->rpcflavor = AUTH_UNIX;
887 secdata->data = NULL;
889 svp->sv_secdata = secdata;
892 * See bug 1180236.
893 * If mount secure failed, we will fall back to AUTH_NONE
894 * and try again. nfs3rootvp() will turn this back off.
896 * The NFS Version 2 mount uses GETATTR and STATFS procedures.
897 * The server does not care if these procedures have the proper
898 * authentication flavor, so if mount retries using AUTH_NONE
899 * that does not require a credential setup for root then the
900 * automounter would work without requiring root to be
901 * keylogged into AUTH_DES.
903 if (secdata->rpcflavor != AUTH_UNIX &&
904 secdata->rpcflavor != AUTH_LOOPBACK)
905 secdata->flags |= AUTH_F_TRYNONE;
908 * Failover support:
910 * We may have a linked list of nfs_args structures,
911 * which means the user is looking for failover. If
912 * the mount is either not "read-only" or "soft",
913 * we want to bail out with EINVAL.
915 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
916 args->nfs_ext_u.nfs_extB.next != NULL) {
917 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
918 data = (char *)args->nfs_ext_u.nfs_extB.next;
919 goto more;
921 error = EINVAL;
922 goto errout;
926 * Determine the zone we're being mounted into.
928 zone_hold(mntzone = zone); /* start with this assumption */
929 if (getzoneid() == GLOBAL_ZONEID) {
930 zone_rele(mntzone);
931 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
932 ASSERT(mntzone != NULL);
933 if (mntzone != zone) {
934 error = EBUSY;
935 goto errout;
940 * Stop the mount from going any further if the zone is going away.
942 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
943 error = EBUSY;
944 goto errout;
948 * Get root vnode.
950 proceed:
951 error = nfsrootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone);
953 if (error)
954 goto errout;
957 * Set option fields in the mount info record
959 mi = VTOMI(rtvp);
961 if (svp_head->sv_next)
962 mi->mi_flags |= MI_LLOCK;
964 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, args);
965 if (!error) {
966 /* static pathconf kludge */
967 error = pathconf_get(mi, args);
970 errout:
971 if (rtvp != NULL) {
972 if (error) {
973 rp = VTOR(rtvp);
974 if (rp->r_flags & RHASHED)
975 rp_rmhash(rp);
977 VN_RELE(rtvp);
980 if (error) {
981 sv_free(svp_head);
982 if (mi != NULL) {
983 nfs_async_stop(vfsp);
984 nfs_async_manager_stop(vfsp);
985 if (mi->mi_io_kstats) {
986 kstat_delete(mi->mi_io_kstats);
987 mi->mi_io_kstats = NULL;
989 if (mi->mi_ro_kstats) {
990 kstat_delete(mi->mi_ro_kstats);
991 mi->mi_ro_kstats = NULL;
993 nfs_free_mi(mi);
997 if (!(uap->flags & MS_SYSSPACE)) {
998 nfs_free_args(args, fhandle);
999 kmem_free(args, sizeof (*args));
1002 if (mntzone != NULL)
1003 zone_rele(mntzone);
1005 return (error);
1009 * The pathconf information is kept on a linked list of kmem_alloc'ed
1010 * structs. We search the list & add a new struct iff there is no other
1011 * struct with the same information.
1012 * See sys/pathconf.h for ``the rest of the story.''
1014 static struct pathcnf *allpc = NULL;
1016 static int
1017 pathconf_copyin(struct nfs_args *args, struct pathcnf *pc)
1019 STRUCT_DECL(pathcnf, pc_tmp);
1020 STRUCT_HANDLE(nfs_args, ap);
1021 int i;
1022 model_t model;
1024 model = get_udatamodel();
1025 STRUCT_INIT(pc_tmp, model);
1026 STRUCT_SET_HANDLE(ap, model, args);
1028 if ((STRUCT_FGET(ap, flags) & NFSMNT_POSIX) &&
1029 STRUCT_FGETP(ap, pathconf) != NULL) {
1030 if (copyin(STRUCT_FGETP(ap, pathconf), STRUCT_BUF(pc_tmp),
1031 STRUCT_SIZE(pc_tmp)))
1032 return (EFAULT);
1033 if (_PC_ISSET(_PC_ERROR, STRUCT_FGET(pc_tmp, pc_mask)))
1034 return (EINVAL);
1036 pc->pc_link_max = STRUCT_FGET(pc_tmp, pc_link_max);
1037 pc->pc_max_canon = STRUCT_FGET(pc_tmp, pc_max_canon);
1038 pc->pc_max_input = STRUCT_FGET(pc_tmp, pc_max_input);
1039 pc->pc_name_max = STRUCT_FGET(pc_tmp, pc_name_max);
1040 pc->pc_path_max = STRUCT_FGET(pc_tmp, pc_path_max);
1041 pc->pc_pipe_buf = STRUCT_FGET(pc_tmp, pc_pipe_buf);
1042 pc->pc_vdisable = STRUCT_FGET(pc_tmp, pc_vdisable);
1043 pc->pc_xxx = STRUCT_FGET(pc_tmp, pc_xxx);
1044 for (i = 0; i < _PC_N; i++)
1045 pc->pc_mask[i] = STRUCT_FGET(pc_tmp, pc_mask[i]);
1047 return (0);
1050 static int
1051 pathconf_get(struct mntinfo *mi, struct nfs_args *args)
1053 struct pathcnf *p, *pc;
1055 pc = args->pathconf;
1056 if (mi->mi_pathconf != NULL) {
1057 pathconf_rele(mi);
1058 mi->mi_pathconf = NULL;
1061 if (args->flags & NFSMNT_POSIX && args->pathconf != NULL) {
1062 if (_PC_ISSET(_PC_ERROR, pc->pc_mask))
1063 return (EINVAL);
1065 for (p = allpc; p != NULL; p = p->pc_next) {
1066 if (PCCMP(p, pc) == 0)
1067 break;
1069 if (p != NULL) {
1070 mi->mi_pathconf = p;
1071 p->pc_refcnt++;
1072 } else {
1073 p = kmem_alloc(sizeof (*p), KM_SLEEP);
1074 bcopy(pc, p, sizeof (struct pathcnf));
1075 p->pc_next = allpc;
1076 p->pc_refcnt = 1;
1077 allpc = mi->mi_pathconf = p;
1080 return (0);
1084 * release the static pathconf information
1086 static void
1087 pathconf_rele(struct mntinfo *mi)
1089 if (mi->mi_pathconf != NULL) {
1090 if (--mi->mi_pathconf->pc_refcnt == 0) {
1091 struct pathcnf *p;
1092 struct pathcnf *p2;
1094 p2 = p = allpc;
1095 while (p != NULL && p != mi->mi_pathconf) {
1096 p2 = p;
1097 p = p->pc_next;
1099 if (p == NULL) {
1100 panic("mi->pathconf");
1101 /*NOTREACHED*/
1103 if (p == allpc)
1104 allpc = p->pc_next;
1105 else
1106 p2->pc_next = p->pc_next;
1107 kmem_free(p, sizeof (*p));
1108 mi->mi_pathconf = NULL;
1113 static int nfs_dynamic = 1; /* global variable to enable dynamic retrans. */
1114 static ushort_t nfs_max_threads = 8; /* max number of active async threads */
1115 static uint_t nfs_async_clusters = 1; /* # of reqs from each async queue */
1116 static uint_t nfs_cots_timeo = NFS_COTS_TIMEO;
1118 static int
1119 nfsrootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo *svp,
1120 int flags, cred_t *cr, zone_t *zone)
1122 vnode_t *rtvp;
1123 mntinfo_t *mi;
1124 dev_t nfs_dev;
1125 struct vattr va;
1126 int error;
1127 rnode_t *rp;
1128 int i;
1129 struct nfs_stats *nfsstatsp;
1130 cred_t *lcr = NULL, *tcr = cr;
1132 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone());
1133 ASSERT(nfsstatsp != NULL);
1136 * Create a mount record and link it to the vfs struct.
1138 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
1139 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
1140 mutex_init(&mi->mi_remap_lock, NULL, MUTEX_DEFAULT, NULL);
1141 mi->mi_flags = MI_ACL | MI_EXTATTR;
1142 if (!(flags & NFSMNT_SOFT))
1143 mi->mi_flags |= MI_HARD;
1144 if ((flags & NFSMNT_SEMISOFT))
1145 mi->mi_flags |= MI_SEMISOFT;
1146 if ((flags & NFSMNT_NOPRINT))
1147 mi->mi_flags |= MI_NOPRINT;
1148 if (flags & NFSMNT_INT)
1149 mi->mi_flags |= MI_INT;
1150 mi->mi_retrans = NFS_RETRIES;
1151 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
1152 svp->sv_knconf->knc_semantics == NC_TPI_COTS)
1153 mi->mi_timeo = nfs_cots_timeo;
1154 else
1155 mi->mi_timeo = NFS_TIMEO;
1156 mi->mi_prog = NFS_PROGRAM;
1157 mi->mi_vers = NFS_VERSION;
1158 mi->mi_rfsnames = rfsnames_v2;
1159 mi->mi_reqs = nfsstatsp->nfs_stats_v2.rfsreqcnt_ptr;
1160 mi->mi_call_type = call_type_v2;
1161 mi->mi_ss_call_type = ss_call_type_v2;
1162 mi->mi_timer_type = timer_type_v2;
1163 mi->mi_aclnames = aclnames_v2;
1164 mi->mi_aclreqs = nfsstatsp->nfs_stats_v2.aclreqcnt_ptr;
1165 mi->mi_acl_call_type = acl_call_type_v2;
1166 mi->mi_acl_ss_call_type = acl_ss_call_type_v2;
1167 mi->mi_acl_timer_type = acl_timer_type_v2;
1168 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
1169 mi->mi_servers = svp;
1170 mi->mi_curr_serv = svp;
1171 mi->mi_acregmin = SEC2HR(ACREGMIN);
1172 mi->mi_acregmax = SEC2HR(ACREGMAX);
1173 mi->mi_acdirmin = SEC2HR(ACDIRMIN);
1174 mi->mi_acdirmax = SEC2HR(ACDIRMAX);
1176 if (nfs_dynamic)
1177 mi->mi_flags |= MI_DYNAMIC;
1179 if (flags & NFSMNT_DIRECTIO)
1180 mi->mi_flags |= MI_DIRECTIO;
1182 mutex_init(&mi->mi_rnodes_lock, NULL, MUTEX_DEFAULT, NULL);
1183 list_create(&mi->mi_rnodes, sizeof (rnode_t),
1184 offsetof(rnode_t, r_mi_link));
1187 * Make a vfs struct for nfs. We do this here instead of below
1188 * because rtvp needs a vfs before we can do a getattr on it.
1190 * Assign a unique device id to the mount
1192 mutex_enter(&nfs_minor_lock);
1193 do {
1194 nfs_minor = (nfs_minor + 1) & MAXMIN32;
1195 nfs_dev = makedevice(nfs_major, nfs_minor);
1196 } while (vfs_devismounted(nfs_dev));
1197 mutex_exit(&nfs_minor_lock);
1199 vfsp->vfs_dev = nfs_dev;
1200 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfsfstyp);
1201 vfsp->vfs_data = (caddr_t)mi;
1202 vfsp->vfs_fstype = nfsfstyp;
1203 vfsp->vfs_bsize = NFS_MAXDATA;
1206 * Initialize fields used to support async putpage operations.
1208 for (i = 0; i < NFS_ASYNC_TYPES; i++)
1209 mi->mi_async_clusters[i] = nfs_async_clusters;
1210 mi->mi_async_init_clusters = nfs_async_clusters;
1211 mi->mi_async_curr[NFS_ASYNC_QUEUE] =
1212 mi->mi_async_curr[NFS_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0];
1213 mi->mi_max_threads = nfs_max_threads;
1214 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
1215 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
1216 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_QUEUE], NULL, CV_DEFAULT, NULL);
1217 cv_init(&mi->mi_async_work_cv[NFS_ASYNC_PGOPS_QUEUE], NULL,
1218 CV_DEFAULT, NULL);
1219 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
1221 mi->mi_vfsp = vfsp;
1222 mi->mi_zone = zone;
1223 zone_init_ref(&mi->mi_zone_ref);
1224 zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFS);
1225 nfs_mi_zonelist_add(mi);
1228 * Make the root vnode, use it to get attributes,
1229 * then remake it with the attributes.
1231 rtvp = makenfsnode((fhandle_t *)svp->sv_fhandle.fh_buf,
1232 NULL, vfsp, gethrtime(), cr, NULL, NULL);
1234 va.va_mask = VATTR_ALL;
1237 * If the uid is set then set the creds for secure mounts
1238 * by proxy processes such as automountd.
1240 if (svp->sv_secdata->uid != 0 &&
1241 svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
1242 lcr = crdup(cr);
1243 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
1244 tcr = lcr;
1247 error = nfsgetattr(rtvp, &va, tcr);
1248 if (error)
1249 goto bad;
1250 rtvp->v_type = va.va_type;
1253 * Poll every server to get the filesystem stats; we're
1254 * only interested in the server's transfer size, and we
1255 * want the minimum.
1257 * While we're looping, we'll turn off AUTH_F_TRYNONE,
1258 * which is only for the mount operation.
1261 mi->mi_tsize = MIN(NFS_MAXDATA, nfstsize());
1262 mi->mi_stsize = MIN(NFS_MAXDATA, nfstsize());
1264 for (svp = mi->mi_servers; svp != NULL; svp = svp->sv_next) {
1265 struct nfsstatfs fs;
1266 int douprintf;
1268 douprintf = 1;
1269 mi->mi_curr_serv = svp;
1271 error = rfs2call(mi, RFS_STATFS, xdr_fhandle,
1272 (caddr_t)svp->sv_fhandle.fh_buf, xdr_statfs, (caddr_t)&fs,
1273 tcr, &douprintf, &fs.fs_status, 0, NULL);
1274 if (error)
1275 goto bad;
1276 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize);
1277 svp->sv_secdata->flags &= ~AUTH_F_TRYNONE;
1279 mi->mi_curr_serv = mi->mi_servers;
1280 mi->mi_curread = mi->mi_tsize;
1281 mi->mi_curwrite = mi->mi_stsize;
1284 * Start the manager thread responsible for handling async worker
1285 * threads.
1287 VFS_HOLD(vfsp); /* add reference for thread */
1288 mi->mi_manager_thread = zthread_create(NULL, 0, nfs_async_manager,
1289 vfsp, 0, minclsyspri);
1290 ASSERT(mi->mi_manager_thread != NULL);
1293 * Initialize kstats
1295 nfs_mnt_kstat_init(vfsp);
1297 mi->mi_type = rtvp->v_type;
1299 *rtvpp = rtvp;
1300 if (lcr != NULL)
1301 crfree(lcr);
1303 return (0);
1304 bad:
1306 * An error occurred somewhere, need to clean up...
1307 * We need to release our reference to the root vnode and
1308 * destroy the mntinfo struct that we just created.
1310 if (lcr != NULL)
1311 crfree(lcr);
1312 rp = VTOR(rtvp);
1313 if (rp->r_flags & RHASHED)
1314 rp_rmhash(rp);
1315 VN_RELE(rtvp);
1316 nfs_async_stop(vfsp);
1317 nfs_async_manager_stop(vfsp);
1318 if (mi->mi_io_kstats) {
1319 kstat_delete(mi->mi_io_kstats);
1320 mi->mi_io_kstats = NULL;
1322 if (mi->mi_ro_kstats) {
1323 kstat_delete(mi->mi_ro_kstats);
1324 mi->mi_ro_kstats = NULL;
1326 nfs_free_mi(mi);
1327 *rtvpp = NULL;
1328 return (error);
1332 * vfs operations
1334 static int
1335 nfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
1337 mntinfo_t *mi;
1338 ushort_t omax;
1340 if (secpolicy_fs_unmount(cr, vfsp) != 0)
1341 return (EPERM);
1343 mi = VFTOMI(vfsp);
1344 if (flag & MS_FORCE) {
1346 vfsp->vfs_flag |= VFS_UNMOUNTED;
1349 * We are about to stop the async manager.
1350 * Let every one know not to schedule any
1351 * more async requests.
1353 mutex_enter(&mi->mi_async_lock);
1354 mi->mi_max_threads = 0;
1355 NFS_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
1356 mutex_exit(&mi->mi_async_lock);
1359 * We need to stop the manager thread explicitly; the worker
1360 * threads can time out and exit on their own.
1362 nfs_async_manager_stop(vfsp);
1363 destroy_rtable(vfsp, cr);
1364 if (mi->mi_io_kstats) {
1365 kstat_delete(mi->mi_io_kstats);
1366 mi->mi_io_kstats = NULL;
1368 if (mi->mi_ro_kstats) {
1369 kstat_delete(mi->mi_ro_kstats);
1370 mi->mi_ro_kstats = NULL;
1372 return (0);
1375 * Wait until all asynchronous putpage operations on
1376 * this file system are complete before flushing rnodes
1377 * from the cache.
1379 omax = mi->mi_max_threads;
1380 if (nfs_async_stop_sig(vfsp)) {
1381 return (EINTR);
1383 rflush(vfsp, cr);
1385 * If there are any active vnodes on this file system,
1386 * then the file system is busy and can't be umounted.
1388 if (check_rtable(vfsp)) {
1389 mutex_enter(&mi->mi_async_lock);
1390 mi->mi_max_threads = omax;
1391 mutex_exit(&mi->mi_async_lock);
1392 return (EBUSY);
1395 * The unmount can't fail from now on; stop the manager thread.
1397 nfs_async_manager_stop(vfsp);
1399 * Destroy all rnodes belonging to this file system from the
1400 * rnode hash queues and purge any resources allocated to
1401 * them.
1403 destroy_rtable(vfsp, cr);
1404 if (mi->mi_io_kstats) {
1405 kstat_delete(mi->mi_io_kstats);
1406 mi->mi_io_kstats = NULL;
1408 if (mi->mi_ro_kstats) {
1409 kstat_delete(mi->mi_ro_kstats);
1410 mi->mi_ro_kstats = NULL;
1412 return (0);
1416 * find root of nfs
1418 static int
1419 nfs_root(vfs_t *vfsp, vnode_t **vpp)
1421 mntinfo_t *mi;
1422 vnode_t *vp;
1423 servinfo_t *svp;
1424 rnode_t *rp;
1425 int error = 0;
1427 mi = VFTOMI(vfsp);
1429 if (nfs_zone() != mi->mi_zone)
1430 return (EPERM);
1432 svp = mi->mi_curr_serv;
1433 if (svp && (svp->sv_flags & SV_ROOT_STALE)) {
1434 mutex_enter(&svp->sv_lock);
1435 svp->sv_flags &= ~SV_ROOT_STALE;
1436 mutex_exit(&svp->sv_lock);
1437 error = ENOENT;
1440 vp = makenfsnode((fhandle_t *)mi->mi_curr_serv->sv_fhandle.fh_buf,
1441 NULL, vfsp, gethrtime(), CRED(), NULL, NULL);
1444 * if the SV_ROOT_STALE flag was reset above, reset the
1445 * RSTALE flag if needed and return an error
1447 if (error == ENOENT) {
1448 rp = VTOR(vp);
1449 if (svp && rp->r_flags & RSTALE) {
1450 mutex_enter(&rp->r_statelock);
1451 rp->r_flags &= ~RSTALE;
1452 mutex_exit(&rp->r_statelock);
1454 VN_RELE(vp);
1455 return (error);
1458 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
1460 vp->v_type = mi->mi_type;
1462 *vpp = vp;
1464 return (0);
1468 * Get file system statistics.
1470 static int
1471 nfs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
1473 int error;
1474 mntinfo_t *mi;
1475 struct nfsstatfs fs;
1476 int douprintf;
1477 failinfo_t fi;
1478 vnode_t *vp;
1480 error = nfs_root(vfsp, &vp);
1481 if (error)
1482 return (error);
1484 mi = VFTOMI(vfsp);
1485 douprintf = 1;
1486 fi.vp = vp;
1487 fi.fhp = NULL; /* no need to update, filehandle not copied */
1488 fi.copyproc = nfscopyfh;
1489 fi.lookupproc = nfslookup;
1490 fi.xattrdirproc = acl_getxattrdir2;
1492 error = rfs2call(mi, RFS_STATFS, xdr_fhandle, (caddr_t)VTOFH(vp),
1493 xdr_statfs, (caddr_t)&fs, CRED(), &douprintf, &fs.fs_status, 0,
1494 &fi);
1496 if (!error) {
1497 error = geterrno(fs.fs_status);
1498 if (!error) {
1499 mutex_enter(&mi->mi_lock);
1500 if (mi->mi_stsize) {
1501 mi->mi_stsize = MIN(mi->mi_stsize, fs.fs_tsize);
1502 } else {
1503 mi->mi_stsize = fs.fs_tsize;
1504 mi->mi_curwrite = mi->mi_stsize;
1506 mutex_exit(&mi->mi_lock);
1507 sbp->f_bsize = fs.fs_bsize;
1508 sbp->f_frsize = fs.fs_bsize;
1509 sbp->f_blocks = (fsblkcnt64_t)fs.fs_blocks;
1510 sbp->f_bfree = (fsblkcnt64_t)fs.fs_bfree;
1512 * Some servers may return negative available
1513 * block counts. They may do this because they
1514 * calculate the number of available blocks by
1515 * subtracting the number of used blocks from
1516 * the total number of blocks modified by the
1517 * minimum free value. For example, if the
1518 * minumum free percentage is 10 and the file
1519 * system is greater than 90 percent full, then
1520 * 90 percent of the total blocks minus the
1521 * actual number of used blocks may be a
1522 * negative number.
1524 * In this case, we need to sign extend the
1525 * negative number through the assignment from
1526 * the 32 bit bavail count to the 64 bit bavail
1527 * count.
1529 * We need to be able to discern between there
1530 * just being a lot of available blocks on the
1531 * file system and the case described above.
1532 * We are making the assumption that it does
1533 * not make sense to have more available blocks
1534 * than there are free blocks. So, if there
1535 * are, then we treat the number as if it were
1536 * a negative number and arrange to have it
1537 * sign extended when it is converted from 32
1538 * bits to 64 bits.
1540 if (fs.fs_bavail <= fs.fs_bfree)
1541 sbp->f_bavail = (fsblkcnt64_t)fs.fs_bavail;
1542 else {
1543 sbp->f_bavail =
1544 (fsblkcnt64_t)((long)fs.fs_bavail);
1546 sbp->f_files = (fsfilcnt64_t)-1;
1547 sbp->f_ffree = (fsfilcnt64_t)-1;
1548 sbp->f_favail = (fsfilcnt64_t)-1;
1549 sbp->f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
1550 (void) strncpy(sbp->f_basetype,
1551 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
1552 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
1553 sbp->f_namemax = (uint32_t)-1;
1554 } else {
1555 PURGE_STALE_FH(error, vp, CRED());
1559 VN_RELE(vp);
1561 return (error);
1564 static kmutex_t nfs_syncbusy;
1567 * Flush dirty nfs files for file system vfsp.
1568 * If vfsp == NULL, all nfs files are flushed.
1570 /* ARGSUSED */
1571 static int
1572 nfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
1575 * Cross-zone calls are OK here, since this translates to a
1576 * fop_putpage(B_ASYNC), which gets picked up by the right zone.
1578 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs_syncbusy) != 0) {
1579 rflush(vfsp, cr);
1580 mutex_exit(&nfs_syncbusy);
1582 return (0);
1585 /* ARGSUSED */
1586 static int
1587 nfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1589 int error;
1590 vnode_t *vp;
1591 struct vattr va;
1592 struct nfs_fid *nfsfidp = (struct nfs_fid *)fidp;
1593 zoneid_t zoneid = VFTOMI(vfsp)->mi_zone->zone_id;
1595 if (nfs_zone() != VFTOMI(vfsp)->mi_zone)
1596 return (EPERM);
1597 if (fidp->fid_len != (sizeof (*nfsfidp) - sizeof (short))) {
1598 #ifdef DEBUG
1599 zcmn_err(zoneid, CE_WARN,
1600 "nfs_vget: bad fid len, %d/%d", fidp->fid_len,
1601 (int)(sizeof (*nfsfidp) - sizeof (short)));
1602 #endif
1603 *vpp = NULL;
1604 return (ESTALE);
1607 vp = makenfsnode((fhandle_t *)(nfsfidp->nf_data), NULL, vfsp,
1608 gethrtime(), CRED(), NULL, NULL);
1610 if (VTOR(vp)->r_flags & RSTALE) {
1611 VN_RELE(vp);
1612 *vpp = NULL;
1613 return (ENOENT);
1616 if (vp->v_type == VNON) {
1617 va.va_mask = VATTR_ALL;
1618 error = nfsgetattr(vp, &va, CRED());
1619 if (error) {
1620 VN_RELE(vp);
1621 *vpp = NULL;
1622 return (error);
1624 vp->v_type = va.va_type;
1627 *vpp = vp;
1629 return (0);
1632 /* ARGSUSED */
1633 static int
1634 nfs_mountroot(vfs_t *vfsp, whymountroot_t why)
1636 vnode_t *rtvp;
1637 char root_hostname[SYS_NMLN+1];
1638 struct servinfo *svp;
1639 int error;
1640 int vfsflags;
1641 size_t size;
1642 char *root_path;
1643 struct pathname pn;
1644 char *name;
1645 cred_t *cr;
1646 struct nfs_args args; /* nfs mount arguments */
1647 static char token[10];
1649 bzero(&args, sizeof (args));
1651 /* do this BEFORE getfile which causes xid stamps to be initialized */
1652 clkset(-1L); /* hack for now - until we get time svc? */
1654 if (why == ROOT_REMOUNT) {
1656 * Shouldn't happen.
1658 panic("nfs_mountroot: why == ROOT_REMOUNT");
1661 if (why == ROOT_UNMOUNT) {
1663 * Nothing to do for NFS.
1665 return (0);
1669 * why == ROOT_INIT
1672 name = token;
1673 *name = 0;
1674 getfsname("root", name, sizeof (token));
1676 pn_alloc(&pn);
1677 root_path = pn.pn_path;
1679 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
1680 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
1681 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1682 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
1685 * Get server address
1686 * Get the root fhandle
1687 * Get server's transport
1688 * Get server's hostname
1689 * Get options
1691 args.addr = &svp->sv_addr;
1692 args.fh = (char *)&svp->sv_fhandle.fh_buf;
1693 args.knconf = svp->sv_knconf;
1694 args.hostname = root_hostname;
1695 vfsflags = 0;
1696 if (error = mount_root(*name ? name : "root", root_path, NFS_VERSION,
1697 &args, &vfsflags)) {
1698 nfs_cmn_err(error, CE_WARN,
1699 "nfs_mountroot: mount_root failed: %m");
1700 sv_free(svp);
1701 pn_free(&pn);
1702 return (error);
1704 svp->sv_fhandle.fh_len = NFS_FHSIZE;
1705 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
1706 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
1707 (void) strcpy(svp->sv_hostname, root_hostname);
1710 * Force root partition to always be mounted with AUTH_UNIX for now
1712 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
1713 svp->sv_secdata->secmod = AUTH_UNIX;
1714 svp->sv_secdata->rpcflavor = AUTH_UNIX;
1715 svp->sv_secdata->data = NULL;
1717 cr = crgetcred();
1718 rtvp = NULL;
1720 error = nfsrootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
1722 crfree(cr);
1724 if (error) {
1725 pn_free(&pn);
1726 sv_free(svp);
1727 return (error);
1730 error = nfs_setopts(rtvp, DATAMODEL_NATIVE, &args);
1731 if (error) {
1732 nfs_cmn_err(error, CE_WARN,
1733 "nfs_mountroot: invalid root mount options");
1734 pn_free(&pn);
1735 goto errout;
1738 (void) vfs_lock_wait(vfsp);
1739 vfs_add(NULL, vfsp, vfsflags);
1740 vfs_unlock(vfsp);
1742 size = strlen(svp->sv_hostname);
1743 (void) strcpy(rootfs.bo_name, svp->sv_hostname);
1744 rootfs.bo_name[size] = ':';
1745 (void) strcpy(&rootfs.bo_name[size + 1], root_path);
1747 pn_free(&pn);
1749 errout:
1750 if (error) {
1751 sv_free(svp);
1752 nfs_async_stop(vfsp);
1753 nfs_async_manager_stop(vfsp);
1756 if (rtvp != NULL)
1757 VN_RELE(rtvp);
1759 return (error);
1763 * Initialization routine for VFS routines. Should only be called once
1766 nfs_vfsinit(void)
1768 mutex_init(&nfs_syncbusy, NULL, MUTEX_DEFAULT, NULL);
1769 return (0);
1772 void
1773 nfs_vfsfini(void)
1775 mutex_destroy(&nfs_syncbusy);
1778 void
1779 nfs_freevfs(vfs_t *vfsp)
1781 mntinfo_t *mi;
1782 servinfo_t *svp;
1784 /* free up the resources */
1785 mi = VFTOMI(vfsp);
1786 pathconf_rele(mi);
1787 svp = mi->mi_servers;
1788 mi->mi_servers = mi->mi_curr_serv = NULL;
1789 sv_free(svp);
1792 * By this time we should have already deleted the
1793 * mi kstats in the unmount code. If they are still around
1794 * somethings wrong
1796 ASSERT(mi->mi_io_kstats == NULL);
1797 nfs_free_mi(mi);