2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
37 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $
38 * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.51 2007/09/04 00:48:18 dillon Exp $
41 #include "opt_bootp.h"
42 #include "opt_nfsroot.h"
44 #include <sys/param.h>
45 #include <sys/sockio.h>
47 #include <sys/vnode.h>
48 #include <sys/fcntl.h>
49 #include <sys/kernel.h>
50 #include <sys/sysctl.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/systm.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_zone.h>
63 #include <net/route.h>
64 #include <netinet/in.h>
66 #include <sys/thread2.h>
74 #include "nfsm_subs.h"
75 #include "nfsdiskless.h"
76 #include "nfsmountrpc.h"
78 extern int nfs_mountroot(struct mount
*mp
);
79 extern void bootpc_init(void);
82 extern struct vop_ops nfsv2_vnode_vops
;
83 extern struct vop_ops nfsv2_fifo_vops
;
84 extern struct vop_ops nfsv2_spec_vops
;
86 MALLOC_DEFINE(M_NFSREQ
, "NFS req", "NFS request header");
87 MALLOC_DEFINE(M_NFSBIGFH
, "NFSV3 bigfh", "NFS version 3 file handle");
88 MALLOC_DEFINE(M_NFSD
, "NFS daemon", "Nfs server daemon structure");
89 MALLOC_DEFINE(M_NFSDIROFF
, "NFSV3 diroff", "NFS directory offset data");
90 MALLOC_DEFINE(M_NFSRVDESC
, "NFSV3 srvdesc", "NFS server socket descriptor");
91 MALLOC_DEFINE(M_NFSUID
, "NFS uid", "Nfs uid mapping structure");
92 MALLOC_DEFINE(M_NFSHASH
, "NFS hash", "NFS hash tables");
94 vm_zone_t nfsmount_zone
;
96 struct nfsstats nfsstats
;
97 SYSCTL_NODE(_vfs
, OID_AUTO
, nfs
, CTLFLAG_RW
, 0, "NFS filesystem");
98 SYSCTL_STRUCT(_vfs_nfs
, NFS_NFSSTATS
, nfsstats
, CTLFLAG_RD
,
99 &nfsstats
, nfsstats
, "");
100 static int nfs_ip_paranoia
= 1;
101 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, nfs_ip_paranoia
, CTLFLAG_RW
,
102 &nfs_ip_paranoia
, 0, "");
105 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, debug
, CTLFLAG_RW
, &nfs_debug
, 0, "");
109 * Tunable to determine the Read/Write unit size. Maximum value
110 * is NFS_MAXDATA. We also default to NFS_MAXDATA.
112 static int nfs_io_size
= NFS_MAXDATA
;
113 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, nfs_io_size
, CTLFLAG_RW
,
114 &nfs_io_size
, 0, "NFS optimal I/O unit size");
116 static void nfs_decode_args (struct nfsmount
*nmp
,
117 struct nfs_args
*argp
);
118 static int mountnfs (struct nfs_args
*,struct mount
*,
119 struct sockaddr
*,char *,char *,struct vnode
**);
120 static int nfs_mount ( struct mount
*mp
, char *path
, caddr_t data
,
122 static int nfs_unmount ( struct mount
*mp
, int mntflags
);
123 static int nfs_root ( struct mount
*mp
, struct vnode
**vpp
);
124 static int nfs_statfs ( struct mount
*mp
, struct statfs
*sbp
,
126 static int nfs_sync ( struct mount
*mp
, int waitfor
);
129 * nfs vfs operations.
131 static struct vfsops nfs_vfsops
= {
132 .vfs_mount
= nfs_mount
,
133 .vfs_unmount
= nfs_unmount
,
134 .vfs_root
= nfs_root
,
135 .vfs_statfs
= nfs_statfs
,
136 .vfs_sync
= nfs_sync
,
137 .vfs_init
= nfs_init
,
138 .vfs_uninit
= nfs_uninit
140 VFS_SET(nfs_vfsops
, nfs
, VFCF_NETWORK
);
143 * This structure must be filled in by a primary bootstrap or bootstrap
144 * server for a diskless/dataless machine. It is initialized below just
145 * to ensure that it is allocated to initialized data (.data not .bss).
147 struct nfs_diskless nfs_diskless
= { { { 0 } } };
148 struct nfsv3_diskless nfsv3_diskless
= { { { 0 } } };
149 int nfs_diskless_valid
= 0;
151 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, diskless_valid
, CTLFLAG_RD
,
152 &nfs_diskless_valid
, 0, "");
154 SYSCTL_STRING(_vfs_nfs
, OID_AUTO
, diskless_rootpath
, CTLFLAG_RD
,
155 nfsv3_diskless
.root_hostnam
, 0, "");
157 SYSCTL_OPAQUE(_vfs_nfs
, OID_AUTO
, diskless_rootaddr
, CTLFLAG_RD
,
158 &nfsv3_diskless
.root_saddr
, sizeof nfsv3_diskless
.root_saddr
,
159 "%Ssockaddr_in", "");
161 SYSCTL_STRING(_vfs_nfs
, OID_AUTO
, diskless_swappath
, CTLFLAG_RD
,
162 nfsv3_diskless
.swap_hostnam
, 0, "");
164 SYSCTL_OPAQUE(_vfs_nfs
, OID_AUTO
, diskless_swapaddr
, CTLFLAG_RD
,
165 &nfsv3_diskless
.swap_saddr
, sizeof nfsv3_diskless
.swap_saddr
,
169 void nfsargs_ntoh (struct nfs_args
*);
170 static int nfs_mountdiskless (char *, char *, int,
171 struct sockaddr_in
*, struct nfs_args
*,
172 struct thread
*, struct vnode
**,
174 static void nfs_convert_diskless (void);
175 static void nfs_convert_oargs (struct nfs_args
*args
,
176 struct onfs_args
*oargs
);
179 * Calculate the buffer I/O block size to use. The maximum V2 block size
180 * is typically 8K, the maximum datagram size is typically 16K, and the
181 * maximum V3 block size is typically 32K. The buffer cache tends to work
182 * best with 16K blocks but we allow 32K for TCP connections.
184 * We force the block size to be at least a page for buffer cache efficiency.
187 nfs_iosize(int v3
, int sotype
)
193 if (sotype
== SOCK_STREAM
)
196 iomax
= NFS_MAXDGRAMDATA
;
198 iomax
= NFS_V2MAXDATA
;
200 if ((iosize
= nfs_io_size
) > iomax
)
202 if (iosize
< PAGE_SIZE
)
206 * This is an aweful hack but until the buffer cache is rewritten
207 * we need it. The problem is that when you combine write() with
208 * mmap() the vm_page->valid bits can become weird looking
209 * (e.g. 0xfc). This occurs because NFS uses piecemeal buffers
210 * at the file EOF. To solve the problem the BIO system needs to
211 * be guarenteed that the NFS iosize for regular files will be a
212 * multiple of PAGE_SIZE so it can invalidate the whole page
213 * rather then just the piece of it owned by the buffer when
214 * NFS does vinvalbuf() calls.
216 if (iosize
& PAGE_MASK
)
217 iosize
= (iosize
& ~PAGE_MASK
) + PAGE_SIZE
;
222 nfs_convert_oargs(struct nfs_args
*args
, struct onfs_args
*oargs
)
224 args
->version
= NFS_ARGSVERSION
;
225 args
->addr
= oargs
->addr
;
226 args
->addrlen
= oargs
->addrlen
;
227 args
->sotype
= oargs
->sotype
;
228 args
->proto
= oargs
->proto
;
229 args
->fh
= oargs
->fh
;
230 args
->fhsize
= oargs
->fhsize
;
231 args
->flags
= oargs
->flags
;
232 args
->wsize
= oargs
->wsize
;
233 args
->rsize
= oargs
->rsize
;
234 args
->readdirsize
= oargs
->readdirsize
;
235 args
->timeo
= oargs
->timeo
;
236 args
->retrans
= oargs
->retrans
;
237 args
->maxgrouplist
= oargs
->maxgrouplist
;
238 args
->readahead
= oargs
->readahead
;
239 args
->deadthresh
= oargs
->deadthresh
;
240 args
->hostname
= oargs
->hostname
;
244 nfs_convert_diskless(void)
248 bcopy(&nfs_diskless
.myif
, &nfsv3_diskless
.myif
,
249 sizeof(struct ifaliasreq
));
250 bcopy(&nfs_diskless
.mygateway
, &nfsv3_diskless
.mygateway
,
251 sizeof(struct sockaddr_in
));
252 nfs_convert_oargs(&nfsv3_diskless
.swap_args
,&nfs_diskless
.swap_args
);
254 bcopy(nfs_diskless
.swap_fh
,nfsv3_diskless
.swap_fh
,NFSX_V2FH
);
255 nfsv3_diskless
.swap_fhsize
= NFSX_V2FH
;
256 for (i
= NFSX_V2FH
- 1; i
>= 0; --i
) {
257 if (nfs_diskless
.swap_fh
[i
])
261 nfsv3_diskless
.swap_fhsize
= 0;
263 bcopy(&nfs_diskless
.swap_saddr
,&nfsv3_diskless
.swap_saddr
,
264 sizeof(struct sockaddr_in
));
265 bcopy(nfs_diskless
.swap_hostnam
,nfsv3_diskless
.swap_hostnam
, MNAMELEN
);
266 nfsv3_diskless
.swap_nblks
= nfs_diskless
.swap_nblks
;
267 bcopy(&nfs_diskless
.swap_ucred
, &nfsv3_diskless
.swap_ucred
,
268 sizeof(struct ucred
));
269 nfs_convert_oargs(&nfsv3_diskless
.root_args
,&nfs_diskless
.root_args
);
271 bcopy(nfs_diskless
.root_fh
,nfsv3_diskless
.root_fh
,NFSX_V2FH
);
272 nfsv3_diskless
.root_fhsize
= NFSX_V2FH
;
273 for (i
= NFSX_V2FH
- 1; i
>= 0; --i
) {
274 if (nfs_diskless
.root_fh
[i
])
278 nfsv3_diskless
.root_fhsize
= 0;
280 bcopy(&nfs_diskless
.root_saddr
,&nfsv3_diskless
.root_saddr
,
281 sizeof(struct sockaddr_in
));
282 bcopy(nfs_diskless
.root_hostnam
,nfsv3_diskless
.root_hostnam
, MNAMELEN
);
283 nfsv3_diskless
.root_time
= nfs_diskless
.root_time
;
284 bcopy(nfs_diskless
.my_hostnam
,nfsv3_diskless
.my_hostnam
,
286 nfs_diskless_valid
= 3;
293 nfs_statfs(struct mount
*mp
, struct statfs
*sbp
, struct ucred
*cred
)
296 struct nfs_statfs
*sfp
;
300 caddr_t bpos
, dpos
, cp2
;
301 struct nfsmount
*nmp
= VFSTONFS(mp
);
302 thread_t td
= curthread
;
303 int error
= 0, v3
= (nmp
->nm_flag
& NFSMNT_NFSV3
), retattr
;
304 struct mbuf
*mreq
, *mrep
, *md
, *mb
, *mb2
;
309 sfp
= (struct nfs_statfs
*)0;
311 error
= nfs_nget(mp
, (nfsfh_t
*)nmp
->nm_fh
, nmp
->nm_fhsize
, &np
);
315 /* ignore the passed cred */
317 cred
->cr_ngroups
= 1;
318 if (v3
&& (nmp
->nm_state
& NFSSTA_GOTFSINFO
) == 0)
319 (void)nfs_fsinfo(nmp
, vp
, td
);
320 nfsstats
.rpccnt
[NFSPROC_FSSTAT
]++;
321 nfsm_reqhead(vp
, NFSPROC_FSSTAT
, NFSX_FH(v3
));
323 nfsm_request(vp
, NFSPROC_FSSTAT
, td
, cred
);
325 nfsm_postop_attr(vp
, retattr
, NFS_LATTR_NOSHRINK
);
331 nfsm_dissect(sfp
, struct nfs_statfs
*, NFSX_STATFS(v3
));
332 sbp
->f_flags
= nmp
->nm_flag
;
333 sbp
->f_iosize
= nfs_iosize(v3
, nmp
->nm_sotype
);
336 sbp
->f_bsize
= NFS_FABLKSIZE
;
337 tquad
= fxdr_hyper(&sfp
->sf_tbytes
);
338 sbp
->f_blocks
= (long)(tquad
/ ((u_quad_t
)NFS_FABLKSIZE
));
339 tquad
= fxdr_hyper(&sfp
->sf_fbytes
);
340 sbp
->f_bfree
= (long)(tquad
/ ((u_quad_t
)NFS_FABLKSIZE
));
341 tquad
= fxdr_hyper(&sfp
->sf_abytes
);
342 sbp
->f_bavail
= (long)(tquad
/ ((u_quad_t
)NFS_FABLKSIZE
));
343 sbp
->f_files
= (fxdr_unsigned(int32_t,
344 sfp
->sf_tfiles
.nfsuquad
[1]) & 0x7fffffff);
345 sbp
->f_ffree
= (fxdr_unsigned(int32_t,
346 sfp
->sf_ffiles
.nfsuquad
[1]) & 0x7fffffff);
348 sbp
->f_bsize
= fxdr_unsigned(int32_t, sfp
->sf_bsize
);
349 sbp
->f_blocks
= fxdr_unsigned(int32_t, sfp
->sf_blocks
);
350 sbp
->f_bfree
= fxdr_unsigned(int32_t, sfp
->sf_bfree
);
351 sbp
->f_bavail
= fxdr_unsigned(int32_t, sfp
->sf_bavail
);
355 if (sbp
!= &mp
->mnt_stat
) {
356 sbp
->f_type
= mp
->mnt_vfc
->vfc_typenum
;
357 bcopy(mp
->mnt_stat
.f_mntfromname
, sbp
->f_mntfromname
, MNAMELEN
);
367 * nfs version 3 fsinfo rpc call
370 nfs_fsinfo(struct nfsmount
*nmp
, struct vnode
*vp
, struct thread
*td
)
372 struct nfsv3_fsinfo
*fsp
;
375 u_int32_t
*tl
, pref
, max
;
376 caddr_t bpos
, dpos
, cp2
;
377 int error
= 0, retattr
;
378 struct mbuf
*mreq
, *mrep
, *md
, *mb
, *mb2
;
381 nfsstats
.rpccnt
[NFSPROC_FSINFO
]++;
382 nfsm_reqhead(vp
, NFSPROC_FSINFO
, NFSX_FH(1));
384 nfsm_request(vp
, NFSPROC_FSINFO
, td
, nfs_vpcred(vp
, ND_READ
));
385 nfsm_postop_attr(vp
, retattr
, NFS_LATTR_NOSHRINK
);
387 nfsm_dissect(fsp
, struct nfsv3_fsinfo
*, NFSX_V3FSINFO
);
388 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_wtpref
);
389 if (pref
< nmp
->nm_wsize
&& pref
>= NFS_FABLKSIZE
)
390 nmp
->nm_wsize
= (pref
+ NFS_FABLKSIZE
- 1) &
391 ~(NFS_FABLKSIZE
- 1);
392 max
= fxdr_unsigned(u_int32_t
, fsp
->fs_wtmax
);
393 if (max
< nmp
->nm_wsize
&& max
> 0) {
394 nmp
->nm_wsize
= max
& ~(NFS_FABLKSIZE
- 1);
395 if (nmp
->nm_wsize
== 0)
398 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_rtpref
);
399 if (pref
< nmp
->nm_rsize
&& pref
>= NFS_FABLKSIZE
)
400 nmp
->nm_rsize
= (pref
+ NFS_FABLKSIZE
- 1) &
401 ~(NFS_FABLKSIZE
- 1);
402 max
= fxdr_unsigned(u_int32_t
, fsp
->fs_rtmax
);
403 if (max
< nmp
->nm_rsize
&& max
> 0) {
404 nmp
->nm_rsize
= max
& ~(NFS_FABLKSIZE
- 1);
405 if (nmp
->nm_rsize
== 0)
408 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_dtpref
);
409 if (pref
< nmp
->nm_readdirsize
&& pref
>= NFS_DIRBLKSIZ
)
410 nmp
->nm_readdirsize
= (pref
+ NFS_DIRBLKSIZ
- 1) &
411 ~(NFS_DIRBLKSIZ
- 1);
412 if (max
< nmp
->nm_readdirsize
&& max
> 0) {
413 nmp
->nm_readdirsize
= max
& ~(NFS_DIRBLKSIZ
- 1);
414 if (nmp
->nm_readdirsize
== 0)
415 nmp
->nm_readdirsize
= max
;
417 maxfsize
= fxdr_hyper(&fsp
->fs_maxfilesize
);
418 if (maxfsize
> 0 && maxfsize
< nmp
->nm_maxfilesize
)
419 nmp
->nm_maxfilesize
= maxfsize
;
420 nmp
->nm_state
|= NFSSTA_GOTFSINFO
;
428 * Mount a remote root fs via. nfs. This depends on the info in the
429 * nfs_diskless structure that has been filled in properly by some primary
431 * It goes something like this:
432 * - do enough of "ifconfig" by calling ifioctl() so that the system
433 * can talk to the server
434 * - If nfs_diskless.mygateway is filled in, use that address as
436 * - build the rootfs mount point and call mountnfs() to do the rest.
439 nfs_mountroot(struct mount
*mp
)
441 struct mount
*swap_mp
;
442 struct nfsv3_diskless
*nd
= &nfsv3_diskless
;
445 struct thread
*td
= curthread
; /* XXX */
450 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
451 bootpc_init(); /* use bootp to get nfs_diskless filled in */
455 * XXX time must be non-zero when we init the interface or else
456 * the arp code will wedge...
458 while (mycpu
->gd_time_seconds
== 0)
459 tsleep(mycpu
, 0, "arpkludge", 10);
462 * The boot code may have passed us a diskless structure.
464 if (nfs_diskless_valid
== 1)
465 nfs_convert_diskless();
467 #define SINP(sockaddr) ((struct sockaddr_in *)(sockaddr))
468 kprintf("nfs_mountroot: interface %s ip %s",
470 inet_ntoa(SINP(&nd
->myif
.ifra_addr
)->sin_addr
));
472 inet_ntoa(SINP(&nd
->myif
.ifra_broadaddr
)->sin_addr
));
473 kprintf(" mask %s\n",
474 inet_ntoa(SINP(&nd
->myif
.ifra_mask
)->sin_addr
));
478 * XXX splnet, so networks will receive...
483 * BOOTP does not necessarily have to be compiled into the kernel
484 * for an NFS root to work. If we inherited the network
485 * configuration for PXEBOOT then pxe_setup_nfsdiskless() has figured
486 * out our interface for us and all we need to do is ifconfig the
487 * interface. We only do this if the interface has not already been
488 * ifconfig'd by e.g. BOOTP.
490 error
= socreate(nd
->myif
.ifra_addr
.sa_family
, &so
, SOCK_DGRAM
, 0, td
);
492 panic("nfs_mountroot: socreate(%04x): %d",
493 nd
->myif
.ifra_addr
.sa_family
, error
);
496 error
= ifioctl(so
, SIOCAIFADDR
, (caddr_t
)&nd
->myif
, proc0
.p_ucred
);
498 panic("nfs_mountroot: SIOCAIFADDR: %d", error
);
500 soclose(so
, FNONBLOCK
);
503 * If the gateway field is filled in, set it as the default route.
505 if (nd
->mygateway
.sin_len
!= 0) {
506 struct sockaddr_in mask
, sin
;
508 bzero((caddr_t
)&mask
, sizeof(mask
));
510 sin
.sin_family
= AF_INET
;
511 sin
.sin_len
= sizeof(sin
);
512 kprintf("nfs_mountroot: gateway %s\n",
513 inet_ntoa(nd
->mygateway
.sin_addr
));
514 error
= rtrequest_global(RTM_ADD
, (struct sockaddr
*)&sin
,
515 (struct sockaddr
*)&nd
->mygateway
,
516 (struct sockaddr
*)&mask
,
517 RTF_UP
| RTF_GATEWAY
);
519 kprintf("nfs_mountroot: unable to set gateway, error %d, continuing anyway\n", error
);
523 * Create the rootfs mount point.
525 nd
->root_args
.fh
= nd
->root_fh
;
526 nd
->root_args
.fhsize
= nd
->root_fhsize
;
527 l
= ntohl(nd
->root_saddr
.sin_addr
.s_addr
);
528 ksnprintf(buf
, sizeof(buf
), "%ld.%ld.%ld.%ld:%s",
529 (l
>> 24) & 0xff, (l
>> 16) & 0xff,
530 (l
>> 8) & 0xff, (l
>> 0) & 0xff,nd
->root_hostnam
);
531 kprintf("NFS ROOT: %s\n",buf
);
532 if ((error
= nfs_mountdiskless(buf
, "/", MNT_RDONLY
,
533 &nd
->root_saddr
, &nd
->root_args
, td
, &vp
, &mp
)) != 0) {
535 mp
->mnt_vfc
->vfc_refcount
--;
536 kfree(swap_mp
, M_MOUNT
);
543 if (nd
->swap_nblks
) {
545 /* Convert to DEV_BSIZE instead of Kilobyte */
549 * Create a fake mount point just for the swap vnode so that the
550 * swap file can be on a different server from the rootfs.
552 nd
->swap_args
.fh
= nd
->swap_fh
;
553 nd
->swap_args
.fhsize
= nd
->swap_fhsize
;
554 l
= ntohl(nd
->swap_saddr
.sin_addr
.s_addr
);
555 ksnprintf(buf
, sizeof(buf
), "%ld.%ld.%ld.%ld:%s",
556 (l
>> 24) & 0xff, (l
>> 16) & 0xff,
557 (l
>> 8) & 0xff, (l
>> 0) & 0xff,nd
->swap_hostnam
);
558 kprintf("NFS SWAP: %s\n",buf
);
559 if ((error
= nfs_mountdiskless(buf
, "/swap", 0,
560 &nd
->swap_saddr
, &nd
->swap_args
, td
, &vp
, &swap_mp
)) != 0) {
566 VTONFS(vp
)->n_size
= VTONFS(vp
)->n_vattr
.va_size
=
567 nd
->swap_nblks
* DEV_BSIZE
;
570 * Since the swap file is not the root dir of a file system,
571 * hack it to a regular file.
575 nfs_setvtype(vp
, VREG
);
576 swaponvp(td
, vp
, nd
->swap_nblks
);
579 mp
->mnt_flag
|= MNT_ROOTFS
;
583 * This is not really an nfs issue, but it is much easier to
584 * set hostname here and then let the "/etc/rc.xxx" files
585 * mount the right /var based upon its preset value.
587 bcopy(nd
->my_hostnam
, hostname
, MAXHOSTNAMELEN
);
588 hostname
[MAXHOSTNAMELEN
- 1] = '\0';
589 for (i
= 0; i
< MAXHOSTNAMELEN
; i
++)
590 if (hostname
[i
] == '\0')
592 inittodr(ntohl(nd
->root_time
));
598 * Internal version of mount system call for diskless setup.
601 nfs_mountdiskless(char *path
, char *which
, int mountflag
,
602 struct sockaddr_in
*sin
, struct nfs_args
*args
, struct thread
*td
,
603 struct vnode
**vpp
, struct mount
**mpp
)
606 struct sockaddr
*nam
;
613 if ((error
= vfs_rootmountalloc("nfs", path
, &mp
)) != 0) {
614 kprintf("nfs_mountroot: NFS not configured");
619 mp
->mnt_kern_flag
= 0;
620 mp
->mnt_flag
= mountflag
;
621 nam
= dup_sockaddr((struct sockaddr
*)sin
);
623 #if defined(BOOTP) || defined(NFS_ROOT)
624 if (args
->fhsize
== 0) {
625 kprintf("NFS_ROOT: No FH passed from loader, attempting mount rpc...");
627 error
= md_mount(sin
, which
, args
->fh
, &args
->fhsize
, args
, td
);
629 kprintf("failed.\n");
632 kprintf("success!\n");
636 if ((error
= mountnfs(args
, mp
, nam
, which
, path
, vpp
)) != 0) {
637 #if defined(BOOTP) || defined(NFS_ROOT)
640 kprintf("nfs_mountroot: mount %s on %s: %d", path
, which
, error
);
641 mp
->mnt_vfc
->vfc_refcount
--;
653 nfs_decode_args(struct nfsmount
*nmp
, struct nfs_args
*argp
)
660 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
661 * no sense in that context.
663 if (argp
->sotype
== SOCK_STREAM
)
664 nmp
->nm_flag
&= ~NFSMNT_NOCONN
;
666 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
667 if ((argp
->flags
& NFSMNT_NFSV3
) == 0)
668 nmp
->nm_flag
&= ~NFSMNT_RDIRPLUS
;
670 /* Re-bind if rsrvd port requested and wasn't on one */
671 adjsock
= !(nmp
->nm_flag
& NFSMNT_RESVPORT
)
672 && (argp
->flags
& NFSMNT_RESVPORT
);
673 /* Also re-bind if we're switching to/from a connected UDP socket */
674 adjsock
|= ((nmp
->nm_flag
& NFSMNT_NOCONN
) !=
675 (argp
->flags
& NFSMNT_NOCONN
));
677 /* Update flags atomically. Don't change the lock bits. */
678 nmp
->nm_flag
= argp
->flags
| nmp
->nm_flag
;
681 if ((argp
->flags
& NFSMNT_TIMEO
) && argp
->timeo
> 0) {
682 nmp
->nm_timeo
= (argp
->timeo
* NFS_HZ
+ 5) / 10;
683 if (nmp
->nm_timeo
< NFS_MINTIMEO
)
684 nmp
->nm_timeo
= NFS_MINTIMEO
;
685 else if (nmp
->nm_timeo
> NFS_MAXTIMEO
)
686 nmp
->nm_timeo
= NFS_MAXTIMEO
;
689 if ((argp
->flags
& NFSMNT_RETRANS
) && argp
->retrans
> 1) {
690 nmp
->nm_retry
= argp
->retrans
;
691 if (nmp
->nm_retry
> NFS_MAXREXMIT
)
692 nmp
->nm_retry
= NFS_MAXREXMIT
;
695 maxio
= nfs_iosize(argp
->flags
& NFSMNT_NFSV3
, argp
->sotype
);
697 if ((argp
->flags
& NFSMNT_WSIZE
) && argp
->wsize
> 0) {
698 nmp
->nm_wsize
= argp
->wsize
;
699 /* Round down to multiple of blocksize */
700 nmp
->nm_wsize
&= ~(NFS_FABLKSIZE
- 1);
701 if (nmp
->nm_wsize
<= 0)
702 nmp
->nm_wsize
= NFS_FABLKSIZE
;
704 if (nmp
->nm_wsize
> maxio
)
705 nmp
->nm_wsize
= maxio
;
706 if (nmp
->nm_wsize
> MAXBSIZE
)
707 nmp
->nm_wsize
= MAXBSIZE
;
709 if ((argp
->flags
& NFSMNT_RSIZE
) && argp
->rsize
> 0) {
710 nmp
->nm_rsize
= argp
->rsize
;
711 /* Round down to multiple of blocksize */
712 nmp
->nm_rsize
&= ~(NFS_FABLKSIZE
- 1);
713 if (nmp
->nm_rsize
<= 0)
714 nmp
->nm_rsize
= NFS_FABLKSIZE
;
716 if (nmp
->nm_rsize
> maxio
)
717 nmp
->nm_rsize
= maxio
;
718 if (nmp
->nm_rsize
> MAXBSIZE
)
719 nmp
->nm_rsize
= MAXBSIZE
;
721 if ((argp
->flags
& NFSMNT_READDIRSIZE
) && argp
->readdirsize
> 0) {
722 nmp
->nm_readdirsize
= argp
->readdirsize
;
724 if (nmp
->nm_readdirsize
> maxio
)
725 nmp
->nm_readdirsize
= maxio
;
726 if (nmp
->nm_readdirsize
> nmp
->nm_rsize
)
727 nmp
->nm_readdirsize
= nmp
->nm_rsize
;
729 if ((argp
->flags
& NFSMNT_ACREGMIN
) && argp
->acregmin
>= 0)
730 nmp
->nm_acregmin
= argp
->acregmin
;
732 nmp
->nm_acregmin
= NFS_MINATTRTIMO
;
733 if ((argp
->flags
& NFSMNT_ACREGMAX
) && argp
->acregmax
>= 0)
734 nmp
->nm_acregmax
= argp
->acregmax
;
736 nmp
->nm_acregmax
= NFS_MAXATTRTIMO
;
737 if ((argp
->flags
& NFSMNT_ACDIRMIN
) && argp
->acdirmin
>= 0)
738 nmp
->nm_acdirmin
= argp
->acdirmin
;
740 nmp
->nm_acdirmin
= NFS_MINDIRATTRTIMO
;
741 if ((argp
->flags
& NFSMNT_ACDIRMAX
) && argp
->acdirmax
>= 0)
742 nmp
->nm_acdirmax
= argp
->acdirmax
;
744 nmp
->nm_acdirmax
= NFS_MAXDIRATTRTIMO
;
745 if (nmp
->nm_acdirmin
> nmp
->nm_acdirmax
)
746 nmp
->nm_acdirmin
= nmp
->nm_acdirmax
;
747 if (nmp
->nm_acregmin
> nmp
->nm_acregmax
)
748 nmp
->nm_acregmin
= nmp
->nm_acregmax
;
750 if ((argp
->flags
& NFSMNT_MAXGRPS
) && argp
->maxgrouplist
>= 0) {
751 if (argp
->maxgrouplist
<= NFS_MAXGRPS
)
752 nmp
->nm_numgrps
= argp
->maxgrouplist
;
754 nmp
->nm_numgrps
= NFS_MAXGRPS
;
756 if ((argp
->flags
& NFSMNT_READAHEAD
) && argp
->readahead
>= 0) {
757 if (argp
->readahead
<= NFS_MAXRAHEAD
)
758 nmp
->nm_readahead
= argp
->readahead
;
760 nmp
->nm_readahead
= NFS_MAXRAHEAD
;
762 if ((argp
->flags
& NFSMNT_DEADTHRESH
) && argp
->deadthresh
>= 1) {
763 if (argp
->deadthresh
<= NFS_NEVERDEAD
)
764 nmp
->nm_deadthresh
= argp
->deadthresh
;
766 nmp
->nm_deadthresh
= NFS_NEVERDEAD
;
769 adjsock
|= ((nmp
->nm_sotype
!= argp
->sotype
) ||
770 (nmp
->nm_soproto
!= argp
->proto
));
771 nmp
->nm_sotype
= argp
->sotype
;
772 nmp
->nm_soproto
= argp
->proto
;
774 if (nmp
->nm_so
&& adjsock
) {
775 nfs_safedisconnect(nmp
);
776 if (nmp
->nm_sotype
== SOCK_DGRAM
)
777 while (nfs_connect(nmp
, (struct nfsreq
*)0)) {
778 kprintf("nfs_args: retrying connect\n");
779 (void) tsleep((caddr_t
)&lbolt
, 0, "nfscon", 0);
788 * It seems a bit dumb to copyinstr() the host and path here and then
789 * bcopy() them in mountnfs(), but I wanted to detect errors before
790 * doing the sockargs() call because sockargs() allocates an mbuf and
791 * an error after that means that I have to release the mbuf.
795 nfs_mount(struct mount
*mp
, char *path
, caddr_t data
, struct ucred
*cred
)
798 struct nfs_args args
;
799 struct sockaddr
*nam
;
801 char pth
[MNAMELEN
], hst
[MNAMELEN
];
803 u_char nfh
[NFSX_V3FHMAX
];
809 error
= copyin(data
, (caddr_t
)&args
, sizeof (struct nfs_args
));
812 if (args
.version
!= NFS_ARGSVERSION
) {
813 #ifdef COMPAT_PRELITE2
815 * If the argument version is unknown, then assume the
816 * caller is a pre-lite2 4.4BSD client and convert its
819 struct onfs_args oargs
;
820 error
= copyin(data
, (caddr_t
)&oargs
, sizeof (struct onfs_args
));
823 nfs_convert_oargs(&args
,&oargs
);
824 #else /* !COMPAT_PRELITE2 */
825 return (EPROGMISMATCH
);
826 #endif /* COMPAT_PRELITE2 */
828 if (mp
->mnt_flag
& MNT_UPDATE
) {
829 struct nfsmount
*nmp
= VFSTONFS(mp
);
834 * When doing an update, we can't change from or to
835 * v3, or change cookie translation
837 args
.flags
= (args
.flags
&
838 ~(NFSMNT_NFSV3
/*|NFSMNT_XLATECOOKIE*/)) |
840 (NFSMNT_NFSV3
/*|NFSMNT_XLATECOOKIE*/));
841 nfs_decode_args(nmp
, &args
);
846 * Make the nfs_ip_paranoia sysctl serve as the default connection
847 * or no-connection mode for those protocols that support
848 * no-connection mode (the flag will be cleared later for protocols
849 * that do not support no-connection mode). This will allow a client
850 * to receive replies from a different IP then the request was
851 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
854 if (nfs_ip_paranoia
== 0)
855 args
.flags
|= NFSMNT_NOCONN
;
856 if (args
.fhsize
< 0 || args
.fhsize
> NFSX_V3FHMAX
)
858 error
= copyin((caddr_t
)args
.fh
, (caddr_t
)nfh
, args
.fhsize
);
861 error
= copyinstr(path
, pth
, MNAMELEN
-1, &len
);
864 bzero(&pth
[len
], MNAMELEN
- len
);
865 error
= copyinstr(args
.hostname
, hst
, MNAMELEN
-1, &len
);
868 bzero(&hst
[len
], MNAMELEN
- len
);
869 /* sockargs() call must be after above copyin() calls */
870 error
= getsockaddr(&nam
, (caddr_t
)args
.addr
, args
.addrlen
);
874 error
= mountnfs(&args
, mp
, nam
, pth
, hst
, &vp
);
879 * Common code for mount and mountroot
882 mountnfs(struct nfs_args
*argp
, struct mount
*mp
, struct sockaddr
*nam
,
883 char *pth
, char *hst
, struct vnode
**vpp
)
885 struct nfsmount
*nmp
;
889 if (mp
->mnt_flag
& MNT_UPDATE
) {
891 /* update paths, file handles, etc, here XXX */
895 nmp
= zalloc(nfsmount_zone
);
896 bzero((caddr_t
)nmp
, sizeof (struct nfsmount
));
897 TAILQ_INIT(&nmp
->nm_uidlruhead
);
898 TAILQ_INIT(&nmp
->nm_bioq
);
899 mp
->mnt_data
= (qaddr_t
)nmp
;
905 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
906 * high, depending on whether we end up with negative offsets in
907 * the client or server somewhere. 2GB-1 may be safer.
909 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
910 * that we can handle until we find out otherwise.
911 * XXX Our "safe" limit on the client is what we can store in our
912 * buffer cache using signed(!) block numbers.
914 if ((argp
->flags
& NFSMNT_NFSV3
) == 0)
915 nmp
->nm_maxfilesize
= 0xffffffffLL
;
917 nmp
->nm_maxfilesize
= (u_int64_t
)0x80000000 * DEV_BSIZE
- 1;
919 nmp
->nm_timeo
= NFS_TIMEO
;
920 nmp
->nm_retry
= NFS_RETRANS
;
921 nmp
->nm_wsize
= nfs_iosize(argp
->flags
& NFSMNT_NFSV3
, argp
->sotype
);
922 nmp
->nm_rsize
= nmp
->nm_wsize
;
923 nmp
->nm_readdirsize
= NFS_READDIRSIZE
;
924 nmp
->nm_numgrps
= NFS_MAXGRPS
;
925 nmp
->nm_readahead
= NFS_DEFRAHEAD
;
926 nmp
->nm_deadthresh
= NFS_DEADTHRESH
;
927 nmp
->nm_fhsize
= argp
->fhsize
;
928 bcopy((caddr_t
)argp
->fh
, (caddr_t
)nmp
->nm_fh
, argp
->fhsize
);
929 bcopy(hst
, mp
->mnt_stat
.f_mntfromname
, MNAMELEN
);
931 /* Set up the sockets and per-host congestion */
932 nmp
->nm_sotype
= argp
->sotype
;
933 nmp
->nm_soproto
= argp
->proto
;
934 nmp
->nm_cred
= crhold(proc0
.p_ucred
);
936 nfs_decode_args(nmp
, argp
);
939 * For Connection based sockets (TCP,...) defer the connect until
940 * the first request, in case the server is not responding.
942 if (nmp
->nm_sotype
== SOCK_DGRAM
&&
943 (error
= nfs_connect(nmp
, (struct nfsreq
*)0)))
947 * This is silly, but it has to be set so that vinifod() works.
948 * We do not want to do an nfs_statfs() here since we can get
949 * stuck on a dead server and we are holding a lock on the mount
952 mp
->mnt_stat
.f_iosize
=
953 nfs_iosize(nmp
->nm_flag
& NFSMNT_NFSV3
, nmp
->nm_sotype
);
956 * Install vop_ops for our vnops
958 vfs_add_vnodeops(mp
, &nfsv2_vnode_vops
, &mp
->mnt_vn_norm_ops
);
959 vfs_add_vnodeops(mp
, &nfsv2_spec_vops
, &mp
->mnt_vn_spec_ops
);
960 vfs_add_vnodeops(mp
, &nfsv2_fifo_vops
, &mp
->mnt_vn_fifo_ops
);
963 * A reference count is needed on the nfsnode representing the
964 * remote root. If this object is not persistent, then backward
965 * traversals of the mount point (i.e. "..") will not work if
966 * the nfsnode gets flushed out of the cache. Ufs does not have
967 * this problem, because one can identify root inodes by their
968 * number == ROOTINO (2).
970 error
= nfs_nget(mp
, (nfsfh_t
*)nmp
->nm_fh
, nmp
->nm_fhsize
, &np
);
976 * Retrieval of mountpoint attributes is delayed until nfs_rot
977 * or nfs_statfs are first called. This will happen either when
978 * we first traverse the mount point or if somebody does a df(1).
980 * NFSSTA_GOTFSINFO is used to flag if we have successfully
981 * retrieved mountpoint attributes. In the case of NFSv3 we
982 * also flag static fsinfo.
985 (*vpp
)->v_type
= VNON
;
988 * Lose the lock but keep the ref.
1001 * unmount system call
1004 nfs_unmount(struct mount
*mp
, int mntflags
)
1006 struct nfsmount
*nmp
;
1007 int error
, flags
= 0;
1009 if (mntflags
& MNT_FORCE
)
1010 flags
|= FORCECLOSE
;
1013 * Goes something like this..
1014 * - Call vflush() to clear out vnodes for this file system
1015 * - Close the socket
1016 * - Free up the data structures
1018 /* In the forced case, cancel any outstanding requests. */
1019 if (flags
& FORCECLOSE
) {
1020 error
= nfs_nmcancelreqs(nmp
);
1025 * Must handshake with nfs_clientd() if it is active. XXX
1027 nmp
->nm_state
|= NFSSTA_DISMINPROG
;
1029 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1030 error
= vflush(mp
, 1, flags
);
1032 nmp
->nm_state
&= ~NFSSTA_DISMINPROG
;
1037 * We are now committed to the unmount.
1038 * For NQNFS, let the server daemon free the nfsmount structure.
1040 if (nmp
->nm_flag
& NFSMNT_KERB
)
1041 nmp
->nm_state
|= NFSSTA_DISMNT
;
1043 nfs_disconnect(nmp
);
1044 FREE(nmp
->nm_nam
, M_SONAME
);
1046 if ((nmp
->nm_flag
& NFSMNT_KERB
) == 0)
1047 nfs_free_mount(nmp
);
1052 nfs_free_mount(struct nfsmount
*nmp
)
1055 crfree(nmp
->nm_cred
);
1056 nmp
->nm_cred
= NULL
;
1058 zfree(nfsmount_zone
, nmp
);
1062 * Return root of a filesystem
1065 nfs_root(struct mount
*mp
, struct vnode
**vpp
)
1068 struct nfsmount
*nmp
;
1074 error
= nfs_nget(mp
, (nfsfh_t
*)nmp
->nm_fh
, nmp
->nm_fhsize
, &np
);
1080 * Get transfer parameters and root vnode attributes
1082 if ((nmp
->nm_state
& NFSSTA_GOTFSINFO
) == 0) {
1083 if (nmp
->nm_flag
& NFSMNT_NFSV3
) {
1084 error
= nfs_fsinfo(nmp
, vp
, curthread
);
1085 mp
->mnt_stat
.f_iosize
= nfs_iosize(1, nmp
->nm_sotype
);
1087 if ((error
= VOP_GETATTR(vp
, &attrs
)) == 0)
1088 nmp
->nm_state
|= NFSSTA_GOTFSINFO
;
1093 * The root vnode is usually cached by the namecache so do not
1094 * try to avoid going over the wire even if we have previous
1095 * information cached. A stale NFS mount can loop
1096 * forever resolving the root vnode if we return no-error when
1097 * there is in fact an error.
1099 np
->n_attrstamp
= 0;
1100 error
= VOP_GETATTR(vp
, &attrs
);
1102 if (vp
->v_type
== VNON
)
1103 nfs_setvtype(vp
, VDIR
);
1120 static int nfs_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
);
1121 static int nfs_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
);
1124 * Flush out the buffer cache
1128 nfs_sync(struct mount
*mp
, int waitfor
)
1130 struct scaninfo scaninfo
;
1133 scaninfo
.rescan
= 0;
1134 scaninfo
.waitfor
= waitfor
;
1135 scaninfo
.allerror
= 0;
1138 * Force stale buffer cache information to be flushed.
1141 while (error
== 0 && scaninfo
.rescan
) {
1142 scaninfo
.rescan
= 0;
1143 error
= vmntvnodescan(mp
, VMSC_GETVP
, nfs_sync_scan1
,
1144 nfs_sync_scan2
, &scaninfo
);
1150 nfs_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
)
1152 struct scaninfo
*info
= data
;
1154 if (vn_islocked(vp
) || RB_EMPTY(&vp
->v_rbdirty_tree
))
1156 if (info
->waitfor
== MNT_LAZY
)
1162 nfs_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
)
1164 struct scaninfo
*info
= data
;
1167 error
= VOP_FSYNC(vp
, info
->waitfor
);
1169 info
->allerror
= error
;