2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
37 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $
38 * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.54 2008/07/31 20:23:40 swildner Exp $
41 #include "opt_bootp.h"
42 #include "opt_nfsroot.h"
44 #include <sys/param.h>
45 #include <sys/sockio.h>
47 #include <sys/vnode.h>
48 #include <sys/fcntl.h>
49 #include <sys/kernel.h>
50 #include <sys/sysctl.h>
51 #include <sys/malloc.h>
52 #include <sys/mount.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/systm.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_zone.h>
63 #include <net/route.h>
64 #include <netinet/in.h>
66 #include <sys/thread2.h>
67 #include <sys/mutex2.h>
75 #include "nfsm_subs.h"
76 #include "nfsdiskless.h"
77 #include "nfsmountrpc.h"
79 extern int nfs_mountroot(struct mount
*mp
);
80 extern void bootpc_init(void);
82 extern struct vop_ops nfsv2_vnode_vops
;
83 extern struct vop_ops nfsv2_fifo_vops
;
84 extern struct vop_ops nfsv2_spec_vops
;
86 MALLOC_DEFINE(M_NFSREQ
, "NFS req", "NFS request header");
87 MALLOC_DEFINE(M_NFSBIGFH
, "NFSV3 bigfh", "NFS version 3 file handle");
88 MALLOC_DEFINE(M_NFSD
, "NFS daemon", "Nfs server daemon structure");
89 MALLOC_DEFINE(M_NFSDIROFF
, "NFSV3 diroff", "NFS directory offset data");
90 MALLOC_DEFINE(M_NFSRVDESC
, "NFSV3 srvdesc", "NFS server socket descriptor");
91 MALLOC_DEFINE(M_NFSUID
, "NFS uid", "Nfs uid mapping structure");
92 MALLOC_DEFINE(M_NFSHASH
, "NFS hash", "NFS hash tables");
94 vm_zone_t nfsmount_zone
;
96 struct nfsstats nfsstats
;
97 SYSCTL_NODE(_vfs
, OID_AUTO
, nfs
, CTLFLAG_RW
, 0, "NFS filesystem");
98 SYSCTL_STRUCT(_vfs_nfs
, NFS_NFSSTATS
, nfsstats
, CTLFLAG_RD
,
99 &nfsstats
, nfsstats
, "");
100 static int nfs_ip_paranoia
= 1;
101 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, nfs_ip_paranoia
, CTLFLAG_RW
,
102 &nfs_ip_paranoia
, 0, "");
105 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, debug
, CTLFLAG_RW
, &nfs_debug
, 0, "");
109 * Tunable to determine the Read/Write unit size. Maximum value
110 * is NFS_MAXDATA. We also default to NFS_MAXDATA.
112 static int nfs_io_size
= NFS_MAXDATA
;
113 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, nfs_io_size
, CTLFLAG_RW
,
114 &nfs_io_size
, 0, "NFS optimal I/O unit size");
116 static void nfs_decode_args (struct nfsmount
*nmp
,
117 struct nfs_args
*argp
);
118 static int mountnfs (struct nfs_args
*,struct mount
*,
119 struct sockaddr
*,char *,char *,struct vnode
**);
120 static int nfs_mount ( struct mount
*mp
, char *path
, caddr_t data
,
122 static int nfs_unmount ( struct mount
*mp
, int mntflags
);
123 static int nfs_root ( struct mount
*mp
, struct vnode
**vpp
);
124 static int nfs_statfs ( struct mount
*mp
, struct statfs
*sbp
,
126 static int nfs_sync ( struct mount
*mp
, int waitfor
);
129 * nfs vfs operations.
131 static struct vfsops nfs_vfsops
= {
132 .vfs_mount
= nfs_mount
,
133 .vfs_unmount
= nfs_unmount
,
134 .vfs_root
= nfs_root
,
135 .vfs_statfs
= nfs_statfs
,
136 .vfs_sync
= nfs_sync
,
137 .vfs_init
= nfs_init
,
138 .vfs_uninit
= nfs_uninit
140 VFS_SET(nfs_vfsops
, nfs
, VFCF_NETWORK
);
143 * This structure must be filled in by a primary bootstrap or bootstrap
144 * server for a diskless/dataless machine. It is initialized below just
145 * to ensure that it is allocated to initialized data (.data not .bss).
147 struct nfs_diskless nfs_diskless
= { { { 0 } } };
148 struct nfsv3_diskless nfsv3_diskless
= { { { 0 } } };
149 int nfs_diskless_valid
= 0;
151 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, diskless_valid
, CTLFLAG_RD
,
152 &nfs_diskless_valid
, 0, "");
154 SYSCTL_STRING(_vfs_nfs
, OID_AUTO
, diskless_rootpath
, CTLFLAG_RD
,
155 nfsv3_diskless
.root_hostnam
, 0, "");
157 SYSCTL_OPAQUE(_vfs_nfs
, OID_AUTO
, diskless_rootaddr
, CTLFLAG_RD
,
158 &nfsv3_diskless
.root_saddr
, sizeof nfsv3_diskless
.root_saddr
,
159 "%Ssockaddr_in", "");
161 SYSCTL_STRING(_vfs_nfs
, OID_AUTO
, diskless_swappath
, CTLFLAG_RD
,
162 nfsv3_diskless
.swap_hostnam
, 0, "");
164 SYSCTL_OPAQUE(_vfs_nfs
, OID_AUTO
, diskless_swapaddr
, CTLFLAG_RD
,
165 &nfsv3_diskless
.swap_saddr
, sizeof nfsv3_diskless
.swap_saddr
,
169 void nfsargs_ntoh (struct nfs_args
*);
170 static int nfs_mountdiskless (char *, char *, int,
171 struct sockaddr_in
*, struct nfs_args
*,
172 struct thread
*, struct vnode
**,
174 static void nfs_convert_diskless (void);
175 static void nfs_convert_oargs (struct nfs_args
*args
,
176 struct onfs_args
*oargs
);
179 * Calculate the buffer I/O block size to use. The maximum V2 block size
180 * is typically 8K, the maximum datagram size is typically 16K, and the
181 * maximum V3 block size is typically 32K. The buffer cache tends to work
182 * best with 16K blocks but we allow 32K for TCP connections.
184 * We force the block size to be at least a page for buffer cache efficiency.
187 nfs_iosize(int v3
, int sotype
)
193 if (sotype
== SOCK_STREAM
)
196 iomax
= NFS_MAXDGRAMDATA
;
198 iomax
= NFS_V2MAXDATA
;
200 if ((iosize
= nfs_io_size
) > iomax
)
202 if (iosize
< PAGE_SIZE
)
206 * This is an aweful hack but until the buffer cache is rewritten
207 * we need it. The problem is that when you combine write() with
208 * mmap() the vm_page->valid bits can become weird looking
209 * (e.g. 0xfc). This occurs because NFS uses piecemeal buffers
210 * at the file EOF. To solve the problem the BIO system needs to
211 * be guarenteed that the NFS iosize for regular files will be a
212 * multiple of PAGE_SIZE so it can invalidate the whole page
213 * rather then just the piece of it owned by the buffer when
214 * NFS does vinvalbuf() calls.
216 if (iosize
& PAGE_MASK
)
217 iosize
= (iosize
& ~PAGE_MASK
) + PAGE_SIZE
;
222 nfs_convert_oargs(struct nfs_args
*args
, struct onfs_args
*oargs
)
224 args
->version
= NFS_ARGSVERSION
;
225 args
->addr
= oargs
->addr
;
226 args
->addrlen
= oargs
->addrlen
;
227 args
->sotype
= oargs
->sotype
;
228 args
->proto
= oargs
->proto
;
229 args
->fh
= oargs
->fh
;
230 args
->fhsize
= oargs
->fhsize
;
231 args
->flags
= oargs
->flags
;
232 args
->wsize
= oargs
->wsize
;
233 args
->rsize
= oargs
->rsize
;
234 args
->readdirsize
= oargs
->readdirsize
;
235 args
->timeo
= oargs
->timeo
;
236 args
->retrans
= oargs
->retrans
;
237 args
->maxgrouplist
= oargs
->maxgrouplist
;
238 args
->readahead
= oargs
->readahead
;
239 args
->deadthresh
= oargs
->deadthresh
;
240 args
->hostname
= oargs
->hostname
;
244 nfs_convert_diskless(void)
248 bcopy(&nfs_diskless
.myif
, &nfsv3_diskless
.myif
,
249 sizeof(struct ifaliasreq
));
250 bcopy(&nfs_diskless
.mygateway
, &nfsv3_diskless
.mygateway
,
251 sizeof(struct sockaddr_in
));
252 nfs_convert_oargs(&nfsv3_diskless
.swap_args
,&nfs_diskless
.swap_args
);
254 bcopy(nfs_diskless
.swap_fh
,nfsv3_diskless
.swap_fh
,NFSX_V2FH
);
255 nfsv3_diskless
.swap_fhsize
= NFSX_V2FH
;
256 for (i
= NFSX_V2FH
- 1; i
>= 0; --i
) {
257 if (nfs_diskless
.swap_fh
[i
])
261 nfsv3_diskless
.swap_fhsize
= 0;
263 bcopy(&nfs_diskless
.swap_saddr
,&nfsv3_diskless
.swap_saddr
,
264 sizeof(struct sockaddr_in
));
265 bcopy(nfs_diskless
.swap_hostnam
,nfsv3_diskless
.swap_hostnam
, MNAMELEN
);
266 nfsv3_diskless
.swap_nblks
= nfs_diskless
.swap_nblks
;
267 bcopy(&nfs_diskless
.swap_ucred
, &nfsv3_diskless
.swap_ucred
,
268 sizeof(struct ucred
));
269 nfs_convert_oargs(&nfsv3_diskless
.root_args
,&nfs_diskless
.root_args
);
271 bcopy(nfs_diskless
.root_fh
,nfsv3_diskless
.root_fh
,NFSX_V2FH
);
272 nfsv3_diskless
.root_fhsize
= NFSX_V2FH
;
273 for (i
= NFSX_V2FH
- 1; i
>= 0; --i
) {
274 if (nfs_diskless
.root_fh
[i
])
278 nfsv3_diskless
.root_fhsize
= 0;
280 bcopy(&nfs_diskless
.root_saddr
,&nfsv3_diskless
.root_saddr
,
281 sizeof(struct sockaddr_in
));
282 bcopy(nfs_diskless
.root_hostnam
,nfsv3_diskless
.root_hostnam
, MNAMELEN
);
283 nfsv3_diskless
.root_time
= nfs_diskless
.root_time
;
284 bcopy(nfs_diskless
.my_hostnam
,nfsv3_diskless
.my_hostnam
,
286 nfs_diskless_valid
= 3;
293 nfs_statfs(struct mount
*mp
, struct statfs
*sbp
, struct ucred
*cred
)
296 struct nfs_statfs
*sfp
;
297 struct nfsmount
*nmp
= VFSTONFS(mp
);
298 thread_t td
= curthread
;
299 int error
= 0, retattr
;
302 struct nfsm_info info
;
305 info
.v3
= (nmp
->nm_flag
& NFSMNT_NFSV3
);
310 error
= nfs_nget(mp
, (nfsfh_t
*)nmp
->nm_fh
, nmp
->nm_fhsize
, &np
);
314 /* ignore the passed cred */
316 cred
->cr_ngroups
= 1;
317 if (info
.v3
&& (nmp
->nm_state
& NFSSTA_GOTFSINFO
) == 0)
318 (void)nfs_fsinfo(nmp
, vp
, td
);
319 nfsstats
.rpccnt
[NFSPROC_FSSTAT
]++;
320 nfsm_reqhead(&info
, vp
, NFSPROC_FSSTAT
, NFSX_FH(info
.v3
));
321 ERROROUT(nfsm_fhtom(&info
, vp
));
322 NEGKEEPOUT(nfsm_request(&info
, vp
, NFSPROC_FSSTAT
, td
, cred
, &error
));
324 ERROROUT(nfsm_postop_attr(&info
, vp
, &retattr
,
325 NFS_LATTR_NOSHRINK
));
328 if (info
.mrep
!= NULL
)
332 NULLOUT(sfp
= nfsm_dissect(&info
, NFSX_STATFS(info
.v3
)));
333 sbp
->f_flags
= nmp
->nm_flag
;
334 sbp
->f_iosize
= nfs_iosize(info
.v3
, nmp
->nm_sotype
);
337 sbp
->f_bsize
= NFS_FABLKSIZE
;
338 tquad
= fxdr_hyper(&sfp
->sf_tbytes
);
339 sbp
->f_blocks
= (long)(tquad
/ ((u_quad_t
)NFS_FABLKSIZE
));
340 tquad
= fxdr_hyper(&sfp
->sf_fbytes
);
341 sbp
->f_bfree
= (long)(tquad
/ ((u_quad_t
)NFS_FABLKSIZE
));
342 tquad
= fxdr_hyper(&sfp
->sf_abytes
);
343 sbp
->f_bavail
= (long)(tquad
/ ((u_quad_t
)NFS_FABLKSIZE
));
344 sbp
->f_files
= (fxdr_unsigned(int32_t,
345 sfp
->sf_tfiles
.nfsuquad
[1]) & 0x7fffffff);
346 sbp
->f_ffree
= (fxdr_unsigned(int32_t,
347 sfp
->sf_ffiles
.nfsuquad
[1]) & 0x7fffffff);
349 sbp
->f_bsize
= fxdr_unsigned(int32_t, sfp
->sf_bsize
);
350 sbp
->f_blocks
= fxdr_unsigned(int32_t, sfp
->sf_blocks
);
351 sbp
->f_bfree
= fxdr_unsigned(int32_t, sfp
->sf_bfree
);
352 sbp
->f_bavail
= fxdr_unsigned(int32_t, sfp
->sf_bavail
);
356 if (sbp
!= &mp
->mnt_stat
) {
357 sbp
->f_type
= mp
->mnt_vfc
->vfc_typenum
;
358 bcopy(mp
->mnt_stat
.f_mntfromname
, sbp
->f_mntfromname
, MNAMELEN
);
369 * nfs version 3 fsinfo rpc call
372 nfs_fsinfo(struct nfsmount
*nmp
, struct vnode
*vp
, struct thread
*td
)
374 struct nfsv3_fsinfo
*fsp
;
376 int error
= 0, retattr
;
378 struct nfsm_info info
;
381 nfsstats
.rpccnt
[NFSPROC_FSINFO
]++;
382 nfsm_reqhead(&info
, vp
, NFSPROC_FSINFO
, NFSX_FH(1));
383 ERROROUT(nfsm_fhtom(&info
, vp
));
384 NEGKEEPOUT(nfsm_request(&info
, vp
, NFSPROC_FSINFO
, td
,
385 nfs_vpcred(vp
, ND_READ
), &error
));
386 ERROROUT(nfsm_postop_attr(&info
, vp
, &retattr
, NFS_LATTR_NOSHRINK
));
388 NULLOUT(fsp
= nfsm_dissect(&info
, NFSX_V3FSINFO
));
389 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_wtpref
);
390 if (pref
< nmp
->nm_wsize
&& pref
>= NFS_FABLKSIZE
)
391 nmp
->nm_wsize
= (pref
+ NFS_FABLKSIZE
- 1) &
392 ~(NFS_FABLKSIZE
- 1);
393 max
= fxdr_unsigned(u_int32_t
, fsp
->fs_wtmax
);
394 if (max
< nmp
->nm_wsize
&& max
> 0) {
395 nmp
->nm_wsize
= max
& ~(NFS_FABLKSIZE
- 1);
396 if (nmp
->nm_wsize
== 0)
399 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_rtpref
);
400 if (pref
< nmp
->nm_rsize
&& pref
>= NFS_FABLKSIZE
)
401 nmp
->nm_rsize
= (pref
+ NFS_FABLKSIZE
- 1) &
402 ~(NFS_FABLKSIZE
- 1);
403 max
= fxdr_unsigned(u_int32_t
, fsp
->fs_rtmax
);
404 if (max
< nmp
->nm_rsize
&& max
> 0) {
405 nmp
->nm_rsize
= max
& ~(NFS_FABLKSIZE
- 1);
406 if (nmp
->nm_rsize
== 0)
409 pref
= fxdr_unsigned(u_int32_t
, fsp
->fs_dtpref
);
410 if (pref
< nmp
->nm_readdirsize
&& pref
>= NFS_DIRBLKSIZ
)
411 nmp
->nm_readdirsize
= (pref
+ NFS_DIRBLKSIZ
- 1) &
412 ~(NFS_DIRBLKSIZ
- 1);
413 if (max
< nmp
->nm_readdirsize
&& max
> 0) {
414 nmp
->nm_readdirsize
= max
& ~(NFS_DIRBLKSIZ
- 1);
415 if (nmp
->nm_readdirsize
== 0)
416 nmp
->nm_readdirsize
= max
;
418 maxfsize
= fxdr_hyper(&fsp
->fs_maxfilesize
);
419 if (maxfsize
> 0 && maxfsize
< nmp
->nm_maxfilesize
)
420 nmp
->nm_maxfilesize
= maxfsize
;
421 nmp
->nm_state
|= NFSSTA_GOTFSINFO
;
430 * Mount a remote root fs via. nfs. This depends on the info in the
431 * nfs_diskless structure that has been filled in properly by some primary
433 * It goes something like this:
434 * - do enough of "ifconfig" by calling ifioctl() so that the system
435 * can talk to the server
436 * - If nfs_diskless.mygateway is filled in, use that address as
438 * - build the rootfs mount point and call mountnfs() to do the rest.
441 nfs_mountroot(struct mount
*mp
)
443 struct mount
*swap_mp
;
444 struct nfsv3_diskless
*nd
= &nfsv3_diskless
;
447 struct thread
*td
= curthread
; /* XXX */
452 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
453 bootpc_init(); /* use bootp to get nfs_diskless filled in */
457 * XXX time must be non-zero when we init the interface or else
458 * the arp code will wedge...
460 while (mycpu
->gd_time_seconds
== 0)
461 tsleep(mycpu
, 0, "arpkludge", 10);
464 * The boot code may have passed us a diskless structure.
466 if (nfs_diskless_valid
== 1)
467 nfs_convert_diskless();
469 #define SINP(sockaddr) ((struct sockaddr_in *)(sockaddr))
470 kprintf("nfs_mountroot: interface %s ip %s",
472 inet_ntoa(SINP(&nd
->myif
.ifra_addr
)->sin_addr
));
474 inet_ntoa(SINP(&nd
->myif
.ifra_broadaddr
)->sin_addr
));
475 kprintf(" mask %s\n",
476 inet_ntoa(SINP(&nd
->myif
.ifra_mask
)->sin_addr
));
480 * XXX splnet, so networks will receive...
485 * BOOTP does not necessarily have to be compiled into the kernel
486 * for an NFS root to work. If we inherited the network
487 * configuration for PXEBOOT then pxe_setup_nfsdiskless() has figured
488 * out our interface for us and all we need to do is ifconfig the
489 * interface. We only do this if the interface has not already been
490 * ifconfig'd by e.g. BOOTP.
492 error
= socreate(nd
->myif
.ifra_addr
.sa_family
, &so
, SOCK_DGRAM
, 0, td
);
494 panic("nfs_mountroot: socreate(%04x): %d",
495 nd
->myif
.ifra_addr
.sa_family
, error
);
498 error
= ifioctl(so
, SIOCAIFADDR
, (caddr_t
)&nd
->myif
, proc0
.p_ucred
);
500 panic("nfs_mountroot: SIOCAIFADDR: %d", error
);
502 soclose(so
, FNONBLOCK
);
505 * If the gateway field is filled in, set it as the default route.
507 if (nd
->mygateway
.sin_len
!= 0) {
508 struct sockaddr_in mask
, sin
;
510 bzero((caddr_t
)&mask
, sizeof(mask
));
512 sin
.sin_family
= AF_INET
;
513 sin
.sin_len
= sizeof(sin
);
514 kprintf("nfs_mountroot: gateway %s\n",
515 inet_ntoa(nd
->mygateway
.sin_addr
));
516 error
= rtrequest_global(RTM_ADD
, (struct sockaddr
*)&sin
,
517 (struct sockaddr
*)&nd
->mygateway
,
518 (struct sockaddr
*)&mask
,
519 RTF_UP
| RTF_GATEWAY
);
521 kprintf("nfs_mountroot: unable to set gateway, error %d, continuing anyway\n", error
);
525 * Create the rootfs mount point.
527 nd
->root_args
.fh
= nd
->root_fh
;
528 nd
->root_args
.fhsize
= nd
->root_fhsize
;
529 l
= ntohl(nd
->root_saddr
.sin_addr
.s_addr
);
530 ksnprintf(buf
, sizeof(buf
), "%ld.%ld.%ld.%ld:%s",
531 (l
>> 24) & 0xff, (l
>> 16) & 0xff,
532 (l
>> 8) & 0xff, (l
>> 0) & 0xff,nd
->root_hostnam
);
533 kprintf("NFS_ROOT: %s\n",buf
);
534 if ((error
= nfs_mountdiskless(buf
, "/", MNT_RDONLY
,
535 &nd
->root_saddr
, &nd
->root_args
, td
, &vp
, &mp
)) != 0) {
536 mp
->mnt_vfc
->vfc_refcount
--;
542 if (nd
->swap_nblks
) {
544 /* Convert to DEV_BSIZE instead of Kilobyte */
548 * Create a fake mount point just for the swap vnode so that the
549 * swap file can be on a different server from the rootfs.
551 nd
->swap_args
.fh
= nd
->swap_fh
;
552 nd
->swap_args
.fhsize
= nd
->swap_fhsize
;
553 l
= ntohl(nd
->swap_saddr
.sin_addr
.s_addr
);
554 ksnprintf(buf
, sizeof(buf
), "%ld.%ld.%ld.%ld:%s",
555 (l
>> 24) & 0xff, (l
>> 16) & 0xff,
556 (l
>> 8) & 0xff, (l
>> 0) & 0xff,nd
->swap_hostnam
);
557 kprintf("NFS SWAP: %s\n",buf
);
558 if ((error
= nfs_mountdiskless(buf
, "/swap", 0,
559 &nd
->swap_saddr
, &nd
->swap_args
, td
, &vp
, &swap_mp
)) != 0) {
565 VTONFS(vp
)->n_size
= VTONFS(vp
)->n_vattr
.va_size
=
566 nd
->swap_nblks
* DEV_BSIZE
;
569 * Since the swap file is not the root dir of a file system,
570 * hack it to a regular file.
574 nfs_setvtype(vp
, VREG
);
575 swaponvp(td
, vp
, nd
->swap_nblks
);
578 mp
->mnt_flag
|= MNT_ROOTFS
;
582 * This is not really an nfs issue, but it is much easier to
583 * set hostname here and then let the "/etc/rc.xxx" files
584 * mount the right /var based upon its preset value.
586 bcopy(nd
->my_hostnam
, hostname
, MAXHOSTNAMELEN
);
587 hostname
[MAXHOSTNAMELEN
- 1] = '\0';
588 for (i
= 0; i
< MAXHOSTNAMELEN
; i
++)
589 if (hostname
[i
] == '\0')
591 inittodr(ntohl(nd
->root_time
));
597 * Internal version of mount system call for diskless setup.
600 nfs_mountdiskless(char *path
, char *which
, int mountflag
,
601 struct sockaddr_in
*sin
, struct nfs_args
*args
, struct thread
*td
,
602 struct vnode
**vpp
, struct mount
**mpp
)
605 struct sockaddr
*nam
;
612 if ((error
= vfs_rootmountalloc("nfs", path
, &mp
)) != 0) {
613 kprintf("nfs_mountroot: NFS not configured");
618 mp
->mnt_kern_flag
= 0;
619 mp
->mnt_flag
= mountflag
;
620 nam
= dup_sockaddr((struct sockaddr
*)sin
);
622 #if defined(BOOTP) || defined(NFS_ROOT)
623 if (args
->fhsize
== 0) {
626 kprintf("NFS_ROOT: No FH passed from loader, attempting mount rpc...");
627 while (*xpath
&& *xpath
!= ':')
632 error
= md_mount(sin
, xpath
, args
->fh
, &args
->fhsize
, args
, td
);
634 kprintf("failed error %d.\n", error
);
637 kprintf("success!\n");
641 if ((error
= mountnfs(args
, mp
, nam
, which
, path
, vpp
)) != 0) {
642 #if defined(BOOTP) || defined(NFS_ROOT)
645 kprintf("nfs_mountroot: mount %s on %s: %d", path
, which
, error
);
646 mp
->mnt_vfc
->vfc_refcount
--;
658 nfs_decode_args(struct nfsmount
*nmp
, struct nfs_args
*argp
)
665 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
666 * no sense in that context.
668 if (argp
->sotype
== SOCK_STREAM
)
669 nmp
->nm_flag
&= ~NFSMNT_NOCONN
;
671 /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */
672 if ((argp
->flags
& NFSMNT_NFSV3
) == 0)
673 nmp
->nm_flag
&= ~NFSMNT_RDIRPLUS
;
675 /* Re-bind if rsrvd port requested and wasn't on one */
676 adjsock
= !(nmp
->nm_flag
& NFSMNT_RESVPORT
)
677 && (argp
->flags
& NFSMNT_RESVPORT
);
678 /* Also re-bind if we're switching to/from a connected UDP socket */
679 adjsock
|= ((nmp
->nm_flag
& NFSMNT_NOCONN
) !=
680 (argp
->flags
& NFSMNT_NOCONN
));
682 /* Update flags atomically. Don't change the lock bits. */
683 nmp
->nm_flag
= argp
->flags
| nmp
->nm_flag
;
686 if ((argp
->flags
& NFSMNT_TIMEO
) && argp
->timeo
> 0) {
687 nmp
->nm_timeo
= (argp
->timeo
* NFS_HZ
+ 5) / 10;
688 if (nmp
->nm_timeo
< NFS_MINTIMEO
)
689 nmp
->nm_timeo
= NFS_MINTIMEO
;
690 else if (nmp
->nm_timeo
> NFS_MAXTIMEO
)
691 nmp
->nm_timeo
= NFS_MAXTIMEO
;
694 if ((argp
->flags
& NFSMNT_RETRANS
) && argp
->retrans
> 1) {
695 nmp
->nm_retry
= argp
->retrans
;
696 if (nmp
->nm_retry
> NFS_MAXREXMIT
)
697 nmp
->nm_retry
= NFS_MAXREXMIT
;
700 maxio
= nfs_iosize(argp
->flags
& NFSMNT_NFSV3
, argp
->sotype
);
702 if ((argp
->flags
& NFSMNT_WSIZE
) && argp
->wsize
> 0) {
703 nmp
->nm_wsize
= argp
->wsize
;
704 /* Round down to multiple of blocksize */
705 nmp
->nm_wsize
&= ~(NFS_FABLKSIZE
- 1);
706 if (nmp
->nm_wsize
<= 0)
707 nmp
->nm_wsize
= NFS_FABLKSIZE
;
709 if (nmp
->nm_wsize
> maxio
)
710 nmp
->nm_wsize
= maxio
;
711 if (nmp
->nm_wsize
> MAXBSIZE
)
712 nmp
->nm_wsize
= MAXBSIZE
;
714 if ((argp
->flags
& NFSMNT_RSIZE
) && argp
->rsize
> 0) {
715 nmp
->nm_rsize
= argp
->rsize
;
716 /* Round down to multiple of blocksize */
717 nmp
->nm_rsize
&= ~(NFS_FABLKSIZE
- 1);
718 if (nmp
->nm_rsize
<= 0)
719 nmp
->nm_rsize
= NFS_FABLKSIZE
;
721 if (nmp
->nm_rsize
> maxio
)
722 nmp
->nm_rsize
= maxio
;
723 if (nmp
->nm_rsize
> MAXBSIZE
)
724 nmp
->nm_rsize
= MAXBSIZE
;
726 if ((argp
->flags
& NFSMNT_READDIRSIZE
) && argp
->readdirsize
> 0) {
727 nmp
->nm_readdirsize
= argp
->readdirsize
;
729 if (nmp
->nm_readdirsize
> maxio
)
730 nmp
->nm_readdirsize
= maxio
;
731 if (nmp
->nm_readdirsize
> nmp
->nm_rsize
)
732 nmp
->nm_readdirsize
= nmp
->nm_rsize
;
734 if ((argp
->flags
& NFSMNT_ACREGMIN
) && argp
->acregmin
>= 0)
735 nmp
->nm_acregmin
= argp
->acregmin
;
737 nmp
->nm_acregmin
= NFS_MINATTRTIMO
;
738 if ((argp
->flags
& NFSMNT_ACREGMAX
) && argp
->acregmax
>= 0)
739 nmp
->nm_acregmax
= argp
->acregmax
;
741 nmp
->nm_acregmax
= NFS_MAXATTRTIMO
;
742 if ((argp
->flags
& NFSMNT_ACDIRMIN
) && argp
->acdirmin
>= 0)
743 nmp
->nm_acdirmin
= argp
->acdirmin
;
745 nmp
->nm_acdirmin
= NFS_MINDIRATTRTIMO
;
746 if ((argp
->flags
& NFSMNT_ACDIRMAX
) && argp
->acdirmax
>= 0)
747 nmp
->nm_acdirmax
= argp
->acdirmax
;
749 nmp
->nm_acdirmax
= NFS_MAXDIRATTRTIMO
;
750 if (nmp
->nm_acdirmin
> nmp
->nm_acdirmax
)
751 nmp
->nm_acdirmin
= nmp
->nm_acdirmax
;
752 if (nmp
->nm_acregmin
> nmp
->nm_acregmax
)
753 nmp
->nm_acregmin
= nmp
->nm_acregmax
;
755 if ((argp
->flags
& NFSMNT_MAXGRPS
) && argp
->maxgrouplist
>= 0) {
756 if (argp
->maxgrouplist
<= NFS_MAXGRPS
)
757 nmp
->nm_numgrps
= argp
->maxgrouplist
;
759 nmp
->nm_numgrps
= NFS_MAXGRPS
;
761 if ((argp
->flags
& NFSMNT_READAHEAD
) && argp
->readahead
>= 0) {
762 if (argp
->readahead
<= NFS_MAXRAHEAD
)
763 nmp
->nm_readahead
= argp
->readahead
;
765 nmp
->nm_readahead
= NFS_MAXRAHEAD
;
767 if ((argp
->flags
& NFSMNT_DEADTHRESH
) && argp
->deadthresh
>= 1) {
768 if (argp
->deadthresh
<= NFS_NEVERDEAD
)
769 nmp
->nm_deadthresh
= argp
->deadthresh
;
771 nmp
->nm_deadthresh
= NFS_NEVERDEAD
;
774 adjsock
|= ((nmp
->nm_sotype
!= argp
->sotype
) ||
775 (nmp
->nm_soproto
!= argp
->proto
));
776 nmp
->nm_sotype
= argp
->sotype
;
777 nmp
->nm_soproto
= argp
->proto
;
779 if (nmp
->nm_so
&& adjsock
) {
780 nfs_safedisconnect(nmp
);
781 if (nmp
->nm_sotype
== SOCK_DGRAM
)
782 while (nfs_connect(nmp
, NULL
)) {
783 kprintf("nfs_args: retrying connect\n");
784 (void) tsleep((caddr_t
)&lbolt
, 0, "nfscon", 0);
793 * It seems a bit dumb to copyinstr() the host and path here and then
794 * bcopy() them in mountnfs(), but I wanted to detect errors before
795 * doing the sockargs() call because sockargs() allocates an mbuf and
796 * an error after that means that I have to release the mbuf.
800 nfs_mount(struct mount
*mp
, char *path
, caddr_t data
, struct ucred
*cred
)
803 struct nfs_args args
;
804 struct sockaddr
*nam
;
806 char pth
[MNAMELEN
], hst
[MNAMELEN
];
808 u_char nfh
[NFSX_V3FHMAX
];
814 error
= copyin(data
, (caddr_t
)&args
, sizeof (struct nfs_args
));
817 if (args
.version
!= NFS_ARGSVERSION
) {
818 #ifdef COMPAT_PRELITE2
820 * If the argument version is unknown, then assume the
821 * caller is a pre-lite2 4.4BSD client and convert its
824 struct onfs_args oargs
;
825 error
= copyin(data
, (caddr_t
)&oargs
, sizeof (struct onfs_args
));
828 nfs_convert_oargs(&args
,&oargs
);
829 #else /* !COMPAT_PRELITE2 */
830 return (EPROGMISMATCH
);
831 #endif /* COMPAT_PRELITE2 */
833 if (mp
->mnt_flag
& MNT_UPDATE
) {
834 struct nfsmount
*nmp
= VFSTONFS(mp
);
839 * When doing an update, we can't change from or to
840 * v3, or change cookie translation
842 args
.flags
= (args
.flags
&
843 ~(NFSMNT_NFSV3
/*|NFSMNT_XLATECOOKIE*/)) |
845 (NFSMNT_NFSV3
/*|NFSMNT_XLATECOOKIE*/));
846 nfs_decode_args(nmp
, &args
);
851 * Make the nfs_ip_paranoia sysctl serve as the default connection
852 * or no-connection mode for those protocols that support
853 * no-connection mode (the flag will be cleared later for protocols
854 * that do not support no-connection mode). This will allow a client
855 * to receive replies from a different IP then the request was
856 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
859 if (nfs_ip_paranoia
== 0)
860 args
.flags
|= NFSMNT_NOCONN
;
861 if (args
.fhsize
< 0 || args
.fhsize
> NFSX_V3FHMAX
)
863 error
= copyin((caddr_t
)args
.fh
, (caddr_t
)nfh
, args
.fhsize
);
866 error
= copyinstr(path
, pth
, MNAMELEN
-1, &len
);
869 bzero(&pth
[len
], MNAMELEN
- len
);
870 error
= copyinstr(args
.hostname
, hst
, MNAMELEN
-1, &len
);
873 bzero(&hst
[len
], MNAMELEN
- len
);
874 /* sockargs() call must be after above copyin() calls */
875 error
= getsockaddr(&nam
, (caddr_t
)args
.addr
, args
.addrlen
);
879 error
= mountnfs(&args
, mp
, nam
, pth
, hst
, &vp
);
884 * Common code for mount and mountroot
887 mountnfs(struct nfs_args
*argp
, struct mount
*mp
, struct sockaddr
*nam
,
888 char *pth
, char *hst
, struct vnode
**vpp
)
890 struct nfsmount
*nmp
;
896 if (mp
->mnt_flag
& MNT_UPDATE
) {
898 /* update paths, file handles, etc, here XXX */
902 nmp
= zalloc(nfsmount_zone
);
903 bzero((caddr_t
)nmp
, sizeof (struct nfsmount
));
904 mtx_init(&nmp
->nm_rxlock
);
905 mtx_init(&nmp
->nm_txlock
);
906 TAILQ_INIT(&nmp
->nm_uidlruhead
);
907 TAILQ_INIT(&nmp
->nm_bioq
);
908 TAILQ_INIT(&nmp
->nm_reqq
);
909 TAILQ_INIT(&nmp
->nm_reqtxq
);
910 TAILQ_INIT(&nmp
->nm_reqrxq
);
911 mp
->mnt_data
= (qaddr_t
)nmp
;
917 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
918 * high, depending on whether we end up with negative offsets in
919 * the client or server somewhere. 2GB-1 may be safer.
921 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
922 * that we can handle until we find out otherwise.
923 * XXX Our "safe" limit on the client is what we can store in our
924 * buffer cache using signed(!) block numbers.
926 if ((argp
->flags
& NFSMNT_NFSV3
) == 0)
927 nmp
->nm_maxfilesize
= 0xffffffffLL
;
929 nmp
->nm_maxfilesize
= (u_int64_t
)0x80000000 * DEV_BSIZE
- 1;
931 nmp
->nm_timeo
= NFS_TIMEO
;
932 nmp
->nm_retry
= NFS_RETRANS
;
933 nmp
->nm_wsize
= nfs_iosize(argp
->flags
& NFSMNT_NFSV3
, argp
->sotype
);
934 nmp
->nm_rsize
= nmp
->nm_wsize
;
935 nmp
->nm_readdirsize
= NFS_READDIRSIZE
;
936 nmp
->nm_numgrps
= NFS_MAXGRPS
;
937 nmp
->nm_readahead
= NFS_DEFRAHEAD
;
938 nmp
->nm_deadthresh
= NFS_DEADTHRESH
;
939 nmp
->nm_fhsize
= argp
->fhsize
;
940 bcopy((caddr_t
)argp
->fh
, (caddr_t
)nmp
->nm_fh
, argp
->fhsize
);
941 bcopy(hst
, mp
->mnt_stat
.f_mntfromname
, MNAMELEN
);
943 /* Set up the sockets and per-host congestion */
944 nmp
->nm_sotype
= argp
->sotype
;
945 nmp
->nm_soproto
= argp
->proto
;
946 nmp
->nm_cred
= crhold(proc0
.p_ucred
);
948 nfs_decode_args(nmp
, argp
);
951 * For Connection based sockets (TCP,...) defer the connect until
952 * the first request, in case the server is not responding.
954 if (nmp
->nm_sotype
== SOCK_DGRAM
&&
955 (error
= nfs_connect(nmp
, NULL
)))
959 * This is silly, but it has to be set so that vinifod() works.
960 * We do not want to do an nfs_statfs() here since we can get
961 * stuck on a dead server and we are holding a lock on the mount
964 mp
->mnt_stat
.f_iosize
=
965 nfs_iosize(nmp
->nm_flag
& NFSMNT_NFSV3
, nmp
->nm_sotype
);
968 * Install vop_ops for our vnops
970 vfs_add_vnodeops(mp
, &nfsv2_vnode_vops
, &mp
->mnt_vn_norm_ops
);
971 vfs_add_vnodeops(mp
, &nfsv2_spec_vops
, &mp
->mnt_vn_spec_ops
);
972 vfs_add_vnodeops(mp
, &nfsv2_fifo_vops
, &mp
->mnt_vn_fifo_ops
);
975 * A reference count is needed on the nfsnode representing the
976 * remote root. If this object is not persistent, then backward
977 * traversals of the mount point (i.e. "..") will not work if
978 * the nfsnode gets flushed out of the cache. Ufs does not have
979 * this problem, because one can identify root inodes by their
980 * number == ROOTINO (2).
982 error
= nfs_nget(mp
, (nfsfh_t
*)nmp
->nm_fh
, nmp
->nm_fhsize
, &np
);
988 * Retrieval of mountpoint attributes is delayed until nfs_rot
989 * or nfs_statfs are first called. This will happen either when
990 * we first traverse the mount point or if somebody does a df(1).
992 * NFSSTA_GOTFSINFO is used to flag if we have successfully
993 * retrieved mountpoint attributes. In the case of NFSv3 we
994 * also flag static fsinfo.
997 (*vpp
)->v_type
= VNON
;
1000 * Lose the lock but keep the ref.
1003 TAILQ_INSERT_TAIL(&nfs_mountq
, nmp
, nm_entry
);
1027 * Start the reader and writer threads.
1029 lwkt_create(nfssvc_iod_reader
, nmp
, &nmp
->nm_rxthread
,
1030 NULL
, 0, rxcpu
, "nfsiod_rx");
1031 lwkt_create(nfssvc_iod_writer
, nmp
, &nmp
->nm_txthread
,
1032 NULL
, 0, txcpu
, "nfsiod_tx");
1036 nfs_disconnect(nmp
);
1037 nfs_free_mount(nmp
);
1042 * unmount system call
1045 nfs_unmount(struct mount
*mp
, int mntflags
)
1047 struct nfsmount
*nmp
;
1048 int error
, flags
= 0;
1050 if (mntflags
& MNT_FORCE
)
1051 flags
|= FORCECLOSE
;
1054 * Goes something like this..
1055 * - Call vflush() to clear out vnodes for this file system
1056 * - Close the socket
1057 * - Free up the data structures
1059 /* In the forced case, cancel any outstanding requests. */
1060 if (flags
& FORCECLOSE
) {
1061 error
= nfs_nmcancelreqs(nmp
);
1066 * Must handshake with nfs_clientd() if it is active. XXX
1068 nmp
->nm_state
|= NFSSTA_DISMINPROG
;
1070 /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1071 error
= vflush(mp
, 1, flags
);
1073 nmp
->nm_state
&= ~NFSSTA_DISMINPROG
;
1078 * We are now committed to the unmount.
1079 * For NQNFS, let the server daemon free the nfsmount structure.
1081 if (nmp
->nm_flag
& NFSMNT_KERB
)
1082 nmp
->nm_state
|= NFSSTA_DISMNT
;
1083 nfssvc_iod_stop1(nmp
);
1084 nfs_disconnect(nmp
);
1085 nfssvc_iod_stop2(nmp
);
1086 TAILQ_REMOVE(&nfs_mountq
, nmp
, nm_entry
);
1088 if ((nmp
->nm_flag
& NFSMNT_KERB
) == 0) {
1089 nfs_free_mount(nmp
);
1095 nfs_free_mount(struct nfsmount
*nmp
)
1098 crfree(nmp
->nm_cred
);
1099 nmp
->nm_cred
= NULL
;
1102 FREE(nmp
->nm_nam
, M_SONAME
);
1105 zfree(nfsmount_zone
, nmp
);
1109 * Return root of a filesystem
1112 nfs_root(struct mount
*mp
, struct vnode
**vpp
)
1115 struct nfsmount
*nmp
;
1121 error
= nfs_nget(mp
, (nfsfh_t
*)nmp
->nm_fh
, nmp
->nm_fhsize
, &np
);
1127 * Get transfer parameters and root vnode attributes
1129 if ((nmp
->nm_state
& NFSSTA_GOTFSINFO
) == 0) {
1130 if (nmp
->nm_flag
& NFSMNT_NFSV3
) {
1131 error
= nfs_fsinfo(nmp
, vp
, curthread
);
1132 mp
->mnt_stat
.f_iosize
= nfs_iosize(1, nmp
->nm_sotype
);
1134 if ((error
= VOP_GETATTR(vp
, &attrs
)) == 0)
1135 nmp
->nm_state
|= NFSSTA_GOTFSINFO
;
1140 * The root vnode is usually cached by the namecache so do not
1141 * try to avoid going over the wire even if we have previous
1142 * information cached. A stale NFS mount can loop
1143 * forever resolving the root vnode if we return no-error when
1144 * there is in fact an error.
1146 np
->n_attrstamp
= 0;
1147 error
= VOP_GETATTR(vp
, &attrs
);
1149 if (vp
->v_type
== VNON
)
1150 nfs_setvtype(vp
, VDIR
);
1165 static int nfs_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
);
1166 static int nfs_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
);
1169 * Flush out the buffer cache
1173 nfs_sync(struct mount
*mp
, int waitfor
)
1175 struct scaninfo scaninfo
;
1178 scaninfo
.rescan
= 1;
1179 scaninfo
.waitfor
= waitfor
;
1180 scaninfo
.allerror
= 0;
1183 * Force stale buffer cache information to be flushed.
1186 while (error
== 0 && scaninfo
.rescan
) {
1187 scaninfo
.rescan
= 0;
1188 error
= vmntvnodescan(mp
, VMSC_GETVP
, nfs_sync_scan1
,
1189 nfs_sync_scan2
, &scaninfo
);
1195 nfs_sync_scan1(struct mount
*mp
, struct vnode
*vp
, void *data
)
1197 struct scaninfo
*info
= data
;
1199 if (vn_islocked(vp
) || RB_EMPTY(&vp
->v_rbdirty_tree
))
1201 if (info
->waitfor
== MNT_LAZY
)
1207 nfs_sync_scan2(struct mount
*mp
, struct vnode
*vp
, void *data
)
1209 struct scaninfo
*info
= data
;
1212 error
= VOP_FSYNC(vp
, info
->waitfor
);
1214 info
->allerror
= error
;