4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
27 * All rights reserved.
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/types.h>
36 #include <sys/utsname.h>
38 #include <sys/vfs_opreg.h>
39 #include <sys/vnode.h>
40 #include <sys/pathname.h>
41 #include <sys/bootconf.h>
42 #include <fs/fs_subr.h>
43 #include <rpc/types.h>
46 #include <nfs/nfs_clnt.h>
47 #include <nfs/rnode.h>
48 #include <nfs/mount.h>
49 #include <nfs/nfssys.h>
50 #include <sys/debug.h>
51 #include <sys/cmn_err.h>
53 #include <sys/fcntl.h>
57 * This is the loadable module wrapper.
59 #include <sys/systm.h>
60 #include <sys/modctl.h>
61 #include <sys/syscall.h>
64 #include <rpc/types.h>
70 * The pseudo NFS filesystem to allow diskless booting to dynamically
71 * mount either a NFS V2, NFS V3, or NFS V4 filesystem. This only implements
72 * the VFS_MOUNTROOT op and is only intended to be used by the
73 * diskless booting code until the real root filesystem is mounted.
74 * Nothing else should ever call this!
76 * The strategy is that if the initial rootfs type is set to "nfsdyn"
77 * by loadrootmodules() this filesystem is called to mount the
78 * root filesystem. It first attempts to mount a V4 filesystem, and if that
79 * fails due to an RPC version mismatch it tries V3 and finally V2.
80 * Once the real mount succeeds the vfsops and rootfs name are changed
81 * to reflect the real filesystem type.
83 static int nfsdyninit(int, char *);
84 static int nfsdyn_mountroot(vfs_t
*, whymountroot_t
);
86 vfsops_t
*nfsdyn_vfsops
;
89 * The following data structures are used to configure the NFS
90 * system call, the NFS Version 2 client VFS, and the NFS Version
91 * 3 client VFS into the system. The NFS Version 4 structures are defined in
96 * The NFS system call.
98 static struct sysent nfssysent
= {
100 SE_32RVAL1
| SE_ARGC
| SE_NOUNLOAD
,
104 static struct modlsys modlsys
= {
106 "NFS syscall, client, and common",
110 #ifdef _SYSCALL32_IMPL
111 static struct modlsys modlsys32
= {
113 "NFS syscall, client, and common (32-bit)",
116 #endif /* _SYSCALL32_IMPL */
119 * The NFS Dynamic client VFS.
121 static vfsdef_t vfw
= {
129 static struct modlfs modlfs
= {
131 "network filesystem",
136 * The NFS Version 2 client VFS.
138 static vfsdef_t vfw2
= {
142 VSW_CANREMOUNT
|VSW_NOTZONESAFE
|VSW_STATS
|VSW_ZMOUNT
,
146 static struct modlfs modlfs2
= {
148 "network filesystem version 2",
153 * The NFS Version 3 client VFS.
155 static vfsdef_t vfw3
= {
159 VSW_CANREMOUNT
|VSW_NOTZONESAFE
|VSW_STATS
|VSW_ZMOUNT
,
163 static struct modlfs modlfs3
= {
165 "network filesystem version 3",
169 extern struct modlfs modlfs4
;
172 * We have too many linkage structures so we define our own XXX
174 struct modlinkage_big
{
175 int ml_rev
; /* rev of loadable modules system */
176 void *ml_linkage
[7]; /* NULL terminated list of */
177 /* linkage structures */
181 * All of the module configuration linkages required to configure
182 * the system call and client VFS's into the system.
184 static struct modlinkage_big modlinkage
= {
187 #ifdef _SYSCALL32_IMPL
198 * specfs - for getfsname only??
199 * rpcmod - too many symbols to build stubs for them all
201 char _depends_on
[] = "fs/specfs strmod/rpcmod misc/rpcsec";
204 * This routine is invoked automatically when the kernel module
205 * containing this routine is loaded. This allows module specific
206 * initialization to be done when the module is loaded.
213 if ((status
= nfs_clntinit()) != 0) {
214 cmn_err(CE_WARN
, "_init: nfs_clntinit failed");
219 * Create the version specific kstats.
221 * PSARC 2001/697 Contract Private Interface
222 * All nfs kstats are under SunMC contract
223 * Please refer to the PSARC listed above and contact
224 * SunMC before making any changes!
226 * Changes must be reviewed by Solaris File Sharing
227 * Changes must be communicated to contract-2001-697@sun.com
231 zone_key_create(&nfsstat_zone_key
, nfsstat_zone_init
, NULL
,
233 status
= mod_install((struct modlinkage
*)&modlinkage
);
236 (void) zone_key_delete(nfsstat_zone_key
);
239 * Failed to install module, cleanup previous
240 * initialization work.
245 * Clean up work performed indirectly by mod_installfs()
246 * as a result of our call to mod_install().
258 /* Don't allow module to be unloaded */
263 _info(struct modinfo
*modinfop
)
265 return (mod_info((struct modlinkage
*)&modlinkage
, modinfop
));
273 * Returns the preferred transfer size in bytes based on
274 * what network interfaces are available.
280 * For the moment, just return NFS_MAXDATA until we can query the
281 * appropriate transport.
283 return (NFS_MAXDATA
);
287 * Returns the preferred transfer size in bytes based on
288 * what network interfaces are available.
291 /* this should reflect the largest transfer size possible */
292 static int nfs3_max_transfer_size
= 1024 * 1024;
298 * For the moment, just return nfs3_max_transfer_size until we
299 * can query the appropriate transport.
301 return (nfs3_max_transfer_size
);
304 static uint_t nfs3_max_transfer_size_clts
= 32 * 1024;
305 static uint_t nfs3_max_transfer_size_cots
= 1024 * 1024;
306 static uint_t nfs3_max_transfer_size_rdma
= 1024 * 1024;
309 nfs3_tsize(struct knetconfig
*knp
)
312 if (knp
->knc_semantics
== NC_TPI_COTS_ORD
||
313 knp
->knc_semantics
== NC_TPI_COTS
)
314 return (nfs3_max_transfer_size_cots
);
315 if (knp
->knc_semantics
== NC_TPI_RDMA
)
316 return (nfs3_max_transfer_size_rdma
);
317 return (nfs3_max_transfer_size_clts
);
321 rfs3_tsize(struct svc_req
*req
)
324 if (req
->rq_xprt
->xp_type
== T_COTS_ORD
||
325 req
->rq_xprt
->xp_type
== T_COTS
)
326 return (nfs3_max_transfer_size_cots
);
327 if (req
->rq_xprt
->xp_type
== T_RDMA
)
328 return (nfs3_max_transfer_size_rdma
);
329 return (nfs3_max_transfer_size_clts
);
334 nfsdyninit(int fstyp
, char *name
)
336 static const fs_operation_def_t nfsdyn_vfsops_template
[] = {
337 VFSNAME_MOUNTROOT
, { .vfs_mountroot
= nfsdyn_mountroot
},
342 error
= vfs_setfsops(fstyp
, nfsdyn_vfsops_template
, &nfsdyn_vfsops
);
351 nfsdyn_mountroot(vfs_t
*vfsp
, whymountroot_t why
)
353 char root_hostname
[SYS_NMLN
+1];
354 struct servinfo
*svp
;
360 static char token
[10];
361 struct nfs_args args
; /* nfs mount arguments */
363 bzero(&args
, sizeof (args
));
365 /* do this BEFORE getfile which causes xid stamps to be initialized */
366 clkset(-1L); /* hack for now - until we get time svc? */
368 if (why
== ROOT_REMOUNT
) {
372 panic("nfs3_mountroot: why == ROOT_REMOUNT\n");
375 if (why
== ROOT_UNMOUNT
) {
377 * Nothing to do for NFS.
388 getfsname("root", name
, sizeof (token
));
391 root_path
= pn
.pn_path
;
393 svp
= kmem_zalloc(sizeof (*svp
), KM_SLEEP
);
394 mutex_init(&svp
->sv_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
395 svp
->sv_knconf
= kmem_zalloc(sizeof (*svp
->sv_knconf
), KM_SLEEP
);
396 svp
->sv_knconf
->knc_protofmly
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
397 svp
->sv_knconf
->knc_proto
= kmem_alloc(KNC_STRSIZE
, KM_SLEEP
);
400 * First try version 4
402 vfs_setops(vfsp
, nfs4_vfsops
);
403 args
.addr
= &svp
->sv_addr
;
404 args
.fh
= (char *)&svp
->sv_fhandle
;
405 args
.knconf
= svp
->sv_knconf
;
406 args
.hostname
= root_hostname
;
409 if (error
= mount_root(*name
? name
: "root", root_path
, NFS_V4
,
411 if (error
!= EPROTONOSUPPORT
) {
412 nfs_cmn_err(error
, CE_WARN
,
413 "Unable to mount NFS root filesystem: %m");
416 vfs_setops(vfsp
, nfsdyn_vfsops
);
423 bzero(&args
, sizeof (args
));
424 vfs_setops(vfsp
, nfs3_vfsops
);
425 args
.addr
= &svp
->sv_addr
;
426 args
.fh
= (char *)&svp
->sv_fhandle
;
427 args
.knconf
= svp
->sv_knconf
;
428 args
.hostname
= root_hostname
;
431 if (error
= mount_root(*name
? name
: "root", root_path
,
432 NFS_V3
, &args
, &vfsflags
)) {
433 if (error
!= EPROTONOSUPPORT
) {
434 nfs_cmn_err(error
, CE_WARN
,
435 "Unable to mount NFS root filesystem: %m");
438 vfs_setops(vfsp
, nfsdyn_vfsops
);
443 * Finally, try version 2
445 bzero(&args
, sizeof (args
));
446 args
.addr
= &svp
->sv_addr
;
447 args
.fh
= (char *)&svp
->sv_fhandle
.fh_buf
;
448 args
.knconf
= svp
->sv_knconf
;
449 args
.hostname
= root_hostname
;
452 vfs_setops(vfsp
, nfs_vfsops
);
454 if (error
= mount_root(*name
? name
: "root",
455 root_path
, NFS_VERSION
, &args
, &vfsflags
)) {
456 nfs_cmn_err(error
, CE_WARN
,
457 "Unable to mount NFS root filesystem: %m");
460 vfs_setops(vfsp
, nfsdyn_vfsops
);
468 return (VFS_MOUNTROOT(vfsp
, why
));
472 nfs_setopts(vnode_t
*vp
, model_t model
, struct nfs_args
*buf
)
474 mntinfo_t
*mi
; /* mount info, pointed at by vfs */
475 STRUCT_HANDLE(nfs_args
, args
);
482 STRUCT_SET_HANDLE(args
, model
, buf
);
484 flags
= STRUCT_FGET(args
, flags
);
487 * Set option fields in mount info record
491 if (flags
& NFSMNT_NOAC
) {
492 mi
->mi_flags
|= MI_NOAC
;
495 if (flags
& NFSMNT_NOCTO
)
496 mi
->mi_flags
|= MI_NOCTO
;
497 if (flags
& NFSMNT_LLOCK
)
498 mi
->mi_flags
|= MI_LLOCK
;
499 if (flags
& NFSMNT_GRPID
)
500 mi
->mi_flags
|= MI_GRPID
;
501 if (flags
& NFSMNT_RETRANS
) {
502 if (STRUCT_FGET(args
, retrans
) < 0)
504 mi
->mi_retrans
= STRUCT_FGET(args
, retrans
);
506 if (flags
& NFSMNT_TIMEO
) {
507 if (STRUCT_FGET(args
, timeo
) <= 0)
509 mi
->mi_timeo
= STRUCT_FGET(args
, timeo
);
511 * The following scales the standard deviation and
512 * and current retransmission timer to match the
513 * initial value for the timeout specified.
515 mi
->mi_timers
[NFS_CALLTYPES
].rt_deviate
=
516 (mi
->mi_timeo
* hz
* 2) / 5;
517 mi
->mi_timers
[NFS_CALLTYPES
].rt_rtxcur
=
518 mi
->mi_timeo
* hz
/ 10;
520 if (flags
& NFSMNT_RSIZE
) {
521 if (STRUCT_FGET(args
, rsize
) <= 0)
523 mi
->mi_tsize
= MIN(mi
->mi_tsize
, STRUCT_FGET(args
, rsize
));
524 mi
->mi_curread
= MIN(mi
->mi_curread
, mi
->mi_tsize
);
526 if (flags
& NFSMNT_WSIZE
) {
527 if (STRUCT_FGET(args
, wsize
) <= 0)
529 mi
->mi_stsize
= MIN(mi
->mi_stsize
, STRUCT_FGET(args
, wsize
));
530 mi
->mi_curwrite
= MIN(mi
->mi_curwrite
, mi
->mi_stsize
);
532 if (flags
& NFSMNT_ACREGMIN
) {
533 if (STRUCT_FGET(args
, acregmin
) < 0)
534 mi
->mi_acregmin
= ACMINMAX
;
536 mi
->mi_acregmin
= MIN(STRUCT_FGET(args
, acregmin
),
538 mi
->mi_acregmin
= SEC2HR(mi
->mi_acregmin
);
540 if (flags
& NFSMNT_ACREGMAX
) {
541 if (STRUCT_FGET(args
, acregmax
) < 0)
542 mi
->mi_acregmax
= ACMAXMAX
;
544 mi
->mi_acregmax
= MIN(STRUCT_FGET(args
, acregmax
),
546 mi
->mi_acregmax
= SEC2HR(mi
->mi_acregmax
);
548 if (flags
& NFSMNT_ACDIRMIN
) {
549 if (STRUCT_FGET(args
, acdirmin
) < 0)
550 mi
->mi_acdirmin
= ACMINMAX
;
552 mi
->mi_acdirmin
= MIN(STRUCT_FGET(args
, acdirmin
),
554 mi
->mi_acdirmin
= SEC2HR(mi
->mi_acdirmin
);
556 if (flags
& NFSMNT_ACDIRMAX
) {
557 if (STRUCT_FGET(args
, acdirmax
) < 0)
558 mi
->mi_acdirmax
= ACMAXMAX
;
560 mi
->mi_acdirmax
= MIN(STRUCT_FGET(args
, acdirmax
),
562 mi
->mi_acdirmax
= SEC2HR(mi
->mi_acdirmax
);
565 if (flags
& NFSMNT_LOOPBACK
)
566 mi
->mi_flags
|= MI_LOOPBACK
;
572 * Set or Clear direct I/O flag
573 * VOP_RWLOCK() is held for write access to prevent a race condition
574 * which would occur if a process is in the middle of a write when
575 * directio flag gets set. It is possible that all pages may not get flushed.
580 nfs_directio(vnode_t
*vp
, int cmd
, cred_t
*cr
)
587 if (cmd
== DIRECTIO_ON
) {
589 if (rp
->r_flags
& RDIRECTIO
)
593 * Flush the page cache.
596 (void) VOP_RWLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
598 if (rp
->r_flags
& RDIRECTIO
) {
599 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
603 if (vn_has_cached_data(vp
) &&
604 ((rp
->r_flags
& RDIRTY
) || rp
->r_awcount
> 0)) {
605 error
= VOP_PUTPAGE(vp
, (offset_t
)0, (uint_t
)0,
608 if (error
== ENOSPC
|| error
== EDQUOT
) {
609 mutex_enter(&rp
->r_statelock
);
612 mutex_exit(&rp
->r_statelock
);
614 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
619 mutex_enter(&rp
->r_statelock
);
620 rp
->r_flags
|= RDIRECTIO
;
621 mutex_exit(&rp
->r_statelock
);
622 VOP_RWUNLOCK(vp
, V_WRITELOCK_TRUE
, NULL
);
626 if (cmd
== DIRECTIO_OFF
) {
627 mutex_enter(&rp
->r_statelock
);
628 rp
->r_flags
&= ~RDIRECTIO
; /* disable direct mode */
629 mutex_exit(&rp
->r_statelock
);