kernel - Refactor smp collision statistics
[dragonfly.git] / sys / vfs / nfs / nfs_vfsops.c
blobaf1224e63a46858f9eb9be975f741f3c223b768b
1 /*
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $
36 #include "opt_bootp.h"
37 #include "opt_nfsroot.h"
39 #include <sys/param.h>
40 #include <sys/sockio.h>
41 #include <sys/proc.h>
42 #include <sys/vnode.h>
43 #include <sys/fcntl.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/systm.h>
52 #include <sys/objcache.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
57 #include <net/if.h>
58 #include <net/route.h>
59 #include <netinet/in.h>
61 #include <sys/thread2.h>
62 #include <sys/mutex2.h>
64 #include "rpcv2.h"
65 #include "nfsproto.h"
66 #include "nfs.h"
67 #include "nfsmount.h"
68 #include "nfsnode.h"
69 #include "xdr_subs.h"
70 #include "nfsm_subs.h"
71 #include "nfsdiskless.h"
72 #include "nfsmountrpc.h"
74 extern int nfs_mountroot(struct mount *mp);
75 extern void bootpc_init(void);
77 extern struct vop_ops nfsv2_vnode_vops;
78 extern struct vop_ops nfsv2_fifo_vops;
79 extern struct vop_ops nfsv2_spec_vops;
81 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header");
82 MALLOC_DEFINE(M_NFSBIGFH, "NFSV3 bigfh", "NFS version 3 file handle");
83 MALLOC_DEFINE(M_NFSD, "NFS daemon", "Nfs server daemon structure");
84 MALLOC_DEFINE(M_NFSDIROFF, "NFSV3 diroff", "NFS directory offset data");
85 MALLOC_DEFINE(M_NFSRVDESC, "NFSV3 srvdesc", "NFS server socket descriptor");
86 MALLOC_DEFINE(M_NFSUID, "NFS uid", "Nfs uid mapping structure");
87 MALLOC_DEFINE(M_NFSHASH, "NFS hash", "NFS hash tables");
89 struct objcache *nfsmount_objcache;
91 struct nfsstats nfsstats;
92 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
93 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats,
94 "Nfs stats structure");
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0,
97 "Enable no-connection mode for protocols that support no-connection mode");
98 #ifdef NFS_DEBUG
99 int nfs_debug;
100 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
101 #endif
104 * Tunable to determine the Read/Write unit size. Maximum value
105 * is NFS_MAXDATA. We also default to NFS_MAXDATA.
107 static int nfs_io_size = NFS_MAXDATA;
108 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_io_size, CTLFLAG_RW,
109 &nfs_io_size, 0, "NFS optimal I/O unit size");
111 static void nfs_decode_args (struct nfsmount *nmp,
112 struct nfs_args *argp);
113 static int mountnfs (struct nfs_args *,struct mount *,
114 struct sockaddr *,char *,char *,struct vnode **);
115 static int nfs_mount ( struct mount *mp, char *path, caddr_t data,
116 struct ucred *cred);
117 static int nfs_unmount ( struct mount *mp, int mntflags);
118 static int nfs_root ( struct mount *mp, struct vnode **vpp);
119 static int nfs_statfs ( struct mount *mp, struct statfs *sbp,
120 struct ucred *cred);
121 static int nfs_statvfs(struct mount *mp, struct statvfs *sbp,
122 struct ucred *cred);
123 static int nfs_sync ( struct mount *mp, int waitfor);
126 * nfs vfs operations.
128 static struct vfsops nfs_vfsops = {
129 .vfs_mount = nfs_mount,
130 .vfs_unmount = nfs_unmount,
131 .vfs_root = nfs_root,
132 .vfs_statfs = nfs_statfs,
133 .vfs_statvfs = nfs_statvfs,
134 .vfs_sync = nfs_sync,
135 .vfs_init = nfs_init,
136 .vfs_uninit = nfs_uninit
138 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_MPSAFE);
139 MODULE_VERSION(nfs, 1);
142 * This structure must be filled in by a primary bootstrap or bootstrap
143 * server for a diskless/dataless machine. It is initialized below just
144 * to ensure that it is allocated to initialized data (.data not .bss).
146 struct nfs_diskless nfs_diskless = { { { 0 } } };
147 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
148 int nfs_diskless_valid = 0;
150 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
151 &nfs_diskless_valid, 0,
152 "NFS diskless params were obtained");
154 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
155 nfsv3_diskless.root_hostnam, 0,
156 "Host name for mount point");
158 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
159 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
160 "%Ssockaddr_in", "Address of root server");
162 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_swappath, CTLFLAG_RD,
163 nfsv3_diskless.swap_hostnam, 0,
164 "Host name for mount ppoint");
166 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_swapaddr, CTLFLAG_RD,
167 &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr,
168 "%Ssockaddr_in", "Address of swap server");
171 void nfsargs_ntoh (struct nfs_args *);
172 static int nfs_mountdiskless (char *, char *, int,
173 struct sockaddr_in *, struct nfs_args *,
174 struct thread *, struct vnode **,
175 struct mount **);
176 static void nfs_convert_diskless (void);
177 static void nfs_convert_oargs (struct nfs_args *args,
178 struct onfs_args *oargs);
181 * Calculate the buffer I/O block size to use. The maximum V2 block size
182 * is typically 8K, the maximum datagram size is typically 16K, and the
183 * maximum V3 block size is typically 32K. The buffer cache tends to work
184 * best with 16K blocks but we allow 32K for TCP connections.
186 * We force the block size to be at least a page for buffer cache efficiency.
188 static int
189 nfs_iosize(int v3, int sotype)
191 int iosize;
192 int iomax;
194 if (v3) {
195 if (sotype == SOCK_STREAM)
196 iomax = NFS_MAXDATA;
197 else
198 iomax = NFS_MAXDGRAMDATA;
199 } else {
200 iomax = NFS_V2MAXDATA;
202 if ((iosize = nfs_io_size) > iomax)
203 iosize = iomax;
204 if (iosize < PAGE_SIZE)
205 iosize = PAGE_SIZE;
208 * This is an aweful hack but until the buffer cache is rewritten
209 * we need it. The problem is that when you combine write() with
210 * mmap() the vm_page->valid bits can become weird looking
211 * (e.g. 0xfc). This occurs because NFS uses piecemeal buffers
212 * at the file EOF. To solve the problem the BIO system needs to
213 * be guarenteed that the NFS iosize for regular files will be a
214 * multiple of PAGE_SIZE so it can invalidate the whole page
215 * rather then just the piece of it owned by the buffer when
216 * NFS does vinvalbuf() calls.
218 if (iosize & PAGE_MASK)
219 iosize = (iosize & ~PAGE_MASK) + PAGE_SIZE;
220 return iosize;
223 static void
224 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
226 args->version = NFS_ARGSVERSION;
227 args->addr = oargs->addr;
228 args->addrlen = oargs->addrlen;
229 args->sotype = oargs->sotype;
230 args->proto = oargs->proto;
231 args->fh = oargs->fh;
232 args->fhsize = oargs->fhsize;
233 args->flags = oargs->flags;
234 args->wsize = oargs->wsize;
235 args->rsize = oargs->rsize;
236 args->readdirsize = oargs->readdirsize;
237 args->timeo = oargs->timeo;
238 args->retrans = oargs->retrans;
239 args->maxgrouplist = oargs->maxgrouplist;
240 args->readahead = oargs->readahead;
241 args->deadthresh = oargs->deadthresh;
242 args->hostname = oargs->hostname;
245 static void
246 nfs_convert_diskless(void)
248 int i;
250 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
251 sizeof(struct ifaliasreq));
252 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
253 sizeof(struct sockaddr_in));
254 nfs_convert_oargs(&nfsv3_diskless.swap_args, &nfs_diskless.swap_args);
257 * Copy the NFS handle passed from the diskless code.
259 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which
260 * will fail utterly with HAMMER due to limitations with NFSv2
261 * directory cookies.
263 bcopy(nfs_diskless.swap_fh, nfsv3_diskless.swap_fh, NFSX_V2FH);
264 nfsv3_diskless.swap_fhsize = NFSX_V2FH;
265 for (i = NFSX_V2FH - 1; i >= 0; --i) {
266 if (nfs_diskless.swap_fh[i])
267 break;
269 if (i < 0)
270 nfsv3_diskless.swap_fhsize = 0;
271 nfsv3_diskless.swap_fhsize = 0; /* FORCE DISABLE */
273 bcopy(&nfs_diskless.swap_saddr,&nfsv3_diskless.swap_saddr,
274 sizeof(struct sockaddr_in));
275 bcopy(nfs_diskless.swap_hostnam,nfsv3_diskless.swap_hostnam, MNAMELEN);
276 nfsv3_diskless.swap_nblks = nfs_diskless.swap_nblks;
277 bcopy(&nfs_diskless.swap_ucred, &nfsv3_diskless.swap_ucred,
278 sizeof(struct ucred));
279 nfs_convert_oargs(&nfsv3_diskless.root_args, &nfs_diskless.root_args);
282 * Copy the NFS handle passed from the diskless code.
284 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which
285 * will fail utterly with HAMMER due to limitations with NFSv2
286 * directory cookies.
288 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
289 nfsv3_diskless.root_fhsize = NFSX_V2FH;
290 for (i = NFSX_V2FH - 1; i >= 0; --i) {
291 if (nfs_diskless.root_fh[i])
292 break;
294 if (i < 0)
295 nfsv3_diskless.root_fhsize = 0;
296 nfsv3_diskless.root_fhsize = 0; /* FORCE DISABLE */
298 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
299 sizeof(struct sockaddr_in));
300 bcopy(nfs_diskless.root_hostnam,nfsv3_diskless.root_hostnam, MNAMELEN);
301 nfsv3_diskless.root_time = nfs_diskless.root_time;
302 bcopy(nfs_diskless.my_hostnam,nfsv3_diskless.my_hostnam,
303 MAXHOSTNAMELEN);
304 nfs_diskless_valid = 3;
308 * nfs statfs call
311 nfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
313 struct vnode *vp;
314 struct nfs_statfs *sfp;
315 struct nfsmount *nmp = VFSTONFS(mp);
316 thread_t td = curthread;
317 int error = 0, retattr;
318 struct nfsnode *np;
319 u_quad_t tquad;
320 struct nfsm_info info;
322 info.mrep = NULL;
323 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3);
325 lwkt_gettoken(&nmp->nm_token);
327 #ifndef nolint
328 sfp = NULL;
329 #endif
330 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
331 if (error) {
332 lwkt_reltoken(&nmp->nm_token);
333 return (error);
335 vp = NFSTOV(np);
336 /* ignore the passed cred */
337 cred = crget();
338 cred->cr_ngroups = 1;
339 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
340 (void)nfs_fsinfo(nmp, vp, td);
341 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
342 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3));
343 ERROROUT(nfsm_fhtom(&info, vp));
344 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error));
345 if (info.v3) {
346 ERROROUT(nfsm_postop_attr(&info, vp, &retattr,
347 NFS_LATTR_NOSHRINK));
349 if (error) {
350 if (info.mrep != NULL)
351 m_freem(info.mrep);
352 goto nfsmout;
354 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3)));
355 sbp->f_flags = nmp->nm_flag;
357 if (info.v3) {
358 sbp->f_bsize = NFS_FABLKSIZE;
359 tquad = fxdr_hyper(&sfp->sf_tbytes);
360 sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
361 tquad = fxdr_hyper(&sfp->sf_fbytes);
362 sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
363 tquad = fxdr_hyper(&sfp->sf_abytes);
364 sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
365 sbp->f_files = (fxdr_unsigned(int32_t,
366 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
367 sbp->f_ffree = (fxdr_unsigned(int32_t,
368 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
369 } else {
370 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
371 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
372 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
373 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
374 sbp->f_files = 0;
375 sbp->f_ffree = 0;
379 * Some values are pre-set in mnt_stat. Note in particular f_iosize
380 * cannot be changed once the filesystem is mounted as it is used
381 * as the basis for BIOs.
383 if (sbp != &mp->mnt_stat) {
384 sbp->f_type = mp->mnt_vfc->vfc_typenum;
385 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
386 sbp->f_iosize = mp->mnt_stat.f_iosize;
388 m_freem(info.mrep);
389 info.mrep = NULL;
390 nfsmout:
391 vput(vp);
392 crfree(cred);
393 lwkt_reltoken(&nmp->nm_token);
394 return (error);
397 static int
398 nfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
400 struct vnode *vp;
401 struct nfs_statfs *sfp;
402 struct nfsmount *nmp = VFSTONFS(mp);
403 thread_t td = curthread;
404 int error = 0, retattr;
405 struct nfsnode *np;
406 struct nfsm_info info;
408 info.mrep = NULL;
409 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3);
410 lwkt_gettoken(&nmp->nm_token);
412 #ifndef nolint
413 sfp = NULL;
414 #endif
415 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
416 if (error) {
417 lwkt_reltoken(&nmp->nm_token);
418 return (error);
420 vp = NFSTOV(np);
421 /* ignore the passed cred */
422 cred = crget();
423 cred->cr_ngroups = 1;
424 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
425 (void)nfs_fsinfo(nmp, vp, td);
426 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
427 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3));
428 ERROROUT(nfsm_fhtom(&info, vp));
429 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error));
430 if (info.v3) {
431 ERROROUT(nfsm_postop_attr(&info, vp, &retattr,
432 NFS_LATTR_NOSHRINK));
434 if (error) {
435 if (info.mrep != NULL)
436 m_freem(info.mrep);
437 goto nfsmout;
439 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3)));
440 sbp->f_flag = nmp->nm_flag;
441 sbp->f_owner = nmp->nm_cred->cr_ruid;
443 if (info.v3) {
444 sbp->f_bsize = NFS_FABLKSIZE;
445 sbp->f_frsize = NFS_FABLKSIZE;
446 sbp->f_blocks = (fxdr_hyper(&sfp->sf_tbytes) /
447 ((u_quad_t)NFS_FABLKSIZE));
448 sbp->f_bfree = (fxdr_hyper(&sfp->sf_fbytes) /
449 ((u_quad_t)NFS_FABLKSIZE));
450 sbp->f_bavail = (fxdr_hyper(&sfp->sf_abytes) /
451 ((u_quad_t)NFS_FABLKSIZE));
452 sbp->f_files = fxdr_hyper(&sfp->sf_tfiles);
453 sbp->f_ffree = fxdr_hyper(&sfp->sf_ffiles);
454 sbp->f_favail = fxdr_hyper(&sfp->sf_afiles);
455 } else {
456 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
457 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
458 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
459 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
460 sbp->f_files = 0;
461 sbp->f_ffree = 0;
462 sbp->f_favail = 0;
464 sbp->f_syncreads = 0;
465 sbp->f_syncwrites = 0;
466 sbp->f_asyncreads = 0;
467 sbp->f_asyncwrites = 0;
468 sbp->f_type = mp->mnt_vfc->vfc_typenum;
470 m_freem(info.mrep);
471 info.mrep = NULL;
472 nfsmout:
473 vput(vp);
474 crfree(cred);
475 lwkt_reltoken(&nmp->nm_token);
476 return (error);
480 * nfs version 3 fsinfo rpc call
483 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct thread *td)
485 struct nfsv3_fsinfo *fsp;
486 u_int32_t pref, max;
487 int error = 0, retattr;
488 u_int64_t maxfsize;
489 struct nfsm_info info;
491 info.v3 = 1;
492 nfsstats.rpccnt[NFSPROC_FSINFO]++;
493 nfsm_reqhead(&info, vp, NFSPROC_FSINFO, NFSX_FH(1));
494 ERROROUT(nfsm_fhtom(&info, vp));
495 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSINFO, td,
496 nfs_vpcred(vp, ND_READ), &error));
497 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, NFS_LATTR_NOSHRINK));
498 if (error == 0) {
499 NULLOUT(fsp = nfsm_dissect(&info, NFSX_V3FSINFO));
500 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
501 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
502 nmp->nm_wsize = roundup2(pref, NFS_FABLKSIZE);
503 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
504 if (max < nmp->nm_wsize && max > 0) {
505 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1);
506 if (nmp->nm_wsize == 0)
507 nmp->nm_wsize = max;
509 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
510 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
511 nmp->nm_rsize = roundup2(pref, NFS_FABLKSIZE);
512 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
513 if (max < nmp->nm_rsize && max > 0) {
514 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1);
515 if (nmp->nm_rsize == 0)
516 nmp->nm_rsize = max;
518 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
519 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
520 nmp->nm_readdirsize = roundup2(pref, NFS_DIRBLKSIZ);
521 if (max < nmp->nm_readdirsize && max > 0) {
522 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1);
523 if (nmp->nm_readdirsize == 0)
524 nmp->nm_readdirsize = max;
526 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
527 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
528 nmp->nm_maxfilesize = maxfsize;
529 nmp->nm_state |= NFSSTA_GOTFSINFO;
532 * Use the smaller of rsize/wsize for the biosize.
534 if (nmp->nm_rsize < nmp->nm_wsize)
535 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_rsize;
536 else
537 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_wsize;
539 m_freem(info.mrep);
540 info.mrep = NULL;
541 nfsmout:
542 return (error);
546 * Mount a remote root fs via. nfs. This depends on the info in the
547 * nfs_diskless structure that has been filled in properly by some primary
548 * bootstrap.
549 * It goes something like this:
550 * - do enough of "ifconfig" by calling ifioctl() so that the system
551 * can talk to the server
552 * - If nfs_diskless.mygateway is filled in, use that address as
553 * a default gateway.
554 * - build the rootfs mount point and call mountnfs() to do the rest.
557 nfs_mountroot(struct mount *mp)
559 struct mount *swap_mp;
560 struct nfsv3_diskless *nd = &nfsv3_diskless;
561 struct socket *so;
562 struct vnode *vp;
563 struct thread *td = curthread; /* XXX */
564 int error, i;
565 u_long l;
566 char buf[128], addr[INET_ADDRSTRLEN];
568 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
569 bootpc_init(); /* use bootp to get nfs_diskless filled in */
570 #endif
573 * XXX time must be non-zero when we init the interface or else
574 * the arp code will wedge...
576 while (mycpu->gd_time_seconds == 0)
577 tsleep(mycpu, 0, "arpkludge", 10);
580 * The boot code may have passed us a diskless structure.
582 kprintf("DISKLESS %d\n", nfs_diskless_valid);
583 if (nfs_diskless_valid == 1)
584 nfs_convert_diskless();
587 * NFSv3 is required.
589 nd->root_args.flags |= NFSMNT_NFSV3 | NFSMNT_RDIRPLUS;
590 nd->swap_args.flags |= NFSMNT_NFSV3;
592 #define SINP(sockaddr) ((struct sockaddr_in *)(sockaddr))
593 kprintf("nfs_mountroot: interface %s ip %s",
594 nd->myif.ifra_name,
595 kinet_ntoa(SINP(&nd->myif.ifra_addr)->sin_addr, addr));
596 kprintf(" bcast %s",
597 kinet_ntoa(SINP(&nd->myif.ifra_broadaddr)->sin_addr, addr));
598 kprintf(" mask %s\n",
599 kinet_ntoa(SINP(&nd->myif.ifra_mask)->sin_addr, addr));
600 #undef SINP
603 * XXX splnet, so networks will receive...
605 crit_enter();
608 * BOOTP does not necessarily have to be compiled into the kernel
609 * for an NFS root to work. If we inherited the network
610 * configuration for PXEBOOT then pxe_setup_nfsdiskless() has figured
611 * out our interface for us and all we need to do is ifconfig the
612 * interface. We only do this if the interface has not already been
613 * ifconfig'd by e.g. BOOTP.
615 error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, td);
616 if (error) {
617 panic("nfs_mountroot: socreate(%04x): %d",
618 nd->myif.ifra_addr.sa_family, error);
621 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, proc0.p_ucred);
622 if (error)
623 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
625 soclose(so, FNONBLOCK);
628 * If the gateway field is filled in, set it as the default route.
630 if (nd->mygateway.sin_len != 0) {
631 struct sockaddr_in mask, sin;
633 bzero((caddr_t)&mask, sizeof(mask));
634 sin = mask;
635 sin.sin_family = AF_INET;
636 sin.sin_len = sizeof(sin);
637 kprintf("nfs_mountroot: gateway %s\n",
638 kinet_ntoa(nd->mygateway.sin_addr, addr));
639 error = rtrequest_global(RTM_ADD, (struct sockaddr *)&sin,
640 (struct sockaddr *)&nd->mygateway,
641 (struct sockaddr *)&mask,
642 RTF_UP | RTF_GATEWAY);
643 if (error)
644 kprintf("nfs_mountroot: unable to set gateway, error %d, continuing anyway\n", error);
648 * Create the rootfs mount point.
650 nd->root_args.fh = nd->root_fh;
651 nd->root_args.fhsize = nd->root_fhsize;
652 l = ntohl(nd->root_saddr.sin_addr.s_addr);
653 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
654 (l >> 24) & 0xff, (l >> 16) & 0xff,
655 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam);
656 kprintf("NFS_ROOT: %s\n",buf);
657 error = nfs_mountdiskless(buf, "/", MNT_RDONLY, &nd->root_saddr,
658 &nd->root_args, td, &vp, &mp);
659 if (error) {
660 mp->mnt_vfc->vfc_refcount--;
661 crit_exit();
662 return (error);
665 swap_mp = NULL;
666 if (nd->swap_nblks) {
668 /* Convert to DEV_BSIZE instead of Kilobyte */
669 nd->swap_nblks *= 2;
672 * Create a fake mount point just for the swap vnode so that the
673 * swap file can be on a different server from the rootfs.
675 nd->swap_args.fh = nd->swap_fh;
676 nd->swap_args.fhsize = nd->swap_fhsize;
677 l = ntohl(nd->swap_saddr.sin_addr.s_addr);
678 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
679 (l >> 24) & 0xff, (l >> 16) & 0xff,
680 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam);
681 kprintf("NFS SWAP: %s\n",buf);
682 vp = NULL; /* avoid gcc warnings */
683 error = nfs_mountdiskless(buf, "/swap", 0, &nd->swap_saddr,
684 &nd->swap_args, td, &vp, &swap_mp);
685 if (error) {
686 crit_exit();
687 return (error);
689 vfs_unbusy(swap_mp);
691 VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size =
692 nd->swap_nblks * DEV_BSIZE ;
695 * Since the swap file is not the root dir of a file system,
696 * hack it to a regular file.
698 vclrflags(vp, VROOT);
699 vref(vp);
700 nfs_setvtype(vp, VREG);
701 swaponvp(td, vp, nd->swap_nblks);
704 mp->mnt_flag |= MNT_ROOTFS;
707 * This is not really an nfs issue, but it is much easier to
708 * set hostname here and then let the "/etc/rc.xxx" files
709 * mount the right /var based upon its preset value.
711 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
712 hostname[MAXHOSTNAMELEN - 1] = '\0';
713 for (i = 0; i < MAXHOSTNAMELEN; i++)
714 if (hostname[i] == '\0')
715 break;
716 inittodr(ntohl(nd->root_time));
717 crit_exit();
718 return (0);
722 * Internal version of mount system call for diskless setup.
724 static int
725 nfs_mountdiskless(char *path, char *which, int mountflag,
726 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
727 struct vnode **vpp, struct mount **mpp)
729 struct mount *mp;
730 struct sockaddr *nam;
731 int didalloc = 0;
732 int error;
734 mp = *mpp;
736 if (mp == NULL) {
737 if ((error = vfs_rootmountalloc("nfs", path, &mp)) != 0) {
738 kprintf("nfs_mountroot: NFS not configured");
739 return (error);
741 didalloc = 1;
743 mp->mnt_kern_flag = 0;
744 mp->mnt_flag = mountflag;
745 nam = dup_sockaddr((struct sockaddr *)sin);
747 #if defined(BOOTP) || defined(NFS_ROOT)
748 if (args->fhsize == 0) {
749 char *xpath = path;
751 kprintf("NFS_ROOT: No FH passed from loader, attempting "
752 "mount rpc...");
753 while (*xpath && *xpath != ':')
754 ++xpath;
755 if (*xpath)
756 ++xpath;
757 args->fhsize = 0;
758 error = md_mount(sin, xpath, args->fh, &args->fhsize, args, td);
759 if (error) {
760 kprintf("failed error %d.\n", error);
761 goto haderror;
763 kprintf("success!\n");
765 #endif
767 if ((error = mountnfs(args, mp, nam, which, path, vpp)) != 0) {
768 #if defined(BOOTP) || defined(NFS_ROOT)
769 haderror:
770 #endif
771 kprintf("nfs_mountroot: mount %s on %s: %d", path, which, error);
772 mp->mnt_vfc->vfc_refcount--;
773 if (didalloc)
774 kfree(mp, M_MOUNT);
775 kfree(nam, M_SONAME);
776 return (error);
778 *mpp = mp;
779 return (0);
782 static void
783 nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp)
785 int adjsock;
786 int maxio;
788 crit_enter();
790 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
791 * no sense in that context.
793 if (nmp->nm_sotype == SOCK_STREAM) {
794 nmp->nm_flag &= ~NFSMNT_NOCONN;
795 argp->flags &= ~NFSMNT_NOCONN;
799 * readdirplus is NFSv3 only.
801 if ((argp->flags & NFSMNT_NFSV3) == 0) {
802 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
803 argp->flags &= ~NFSMNT_RDIRPLUS;
807 * Re-bind if rsrvd port flag has changed
809 adjsock = (nmp->nm_flag & NFSMNT_RESVPORT) !=
810 (argp->flags & NFSMNT_RESVPORT);
812 /* Update flags atomically. Don't change the lock bits. */
813 nmp->nm_flag = argp->flags | nmp->nm_flag;
814 crit_exit();
816 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
817 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
818 if (nmp->nm_timeo < NFS_MINTIMEO)
819 nmp->nm_timeo = NFS_MINTIMEO;
820 else if (nmp->nm_timeo > NFS_MAXTIMEO)
821 nmp->nm_timeo = NFS_MAXTIMEO;
824 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
825 nmp->nm_retry = argp->retrans;
826 if (nmp->nm_retry > NFS_MAXREXMIT)
827 nmp->nm_retry = NFS_MAXREXMIT;
831 * These parameters effect the buffer cache and cannot be changed
832 * once we've successfully mounted.
834 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
835 maxio = nfs_iosize(argp->flags & NFSMNT_NFSV3, nmp->nm_sotype);
837 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
838 nmp->nm_wsize = argp->wsize;
839 /* Round down to multiple of blocksize */
840 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
841 if (nmp->nm_wsize <= 0)
842 nmp->nm_wsize = NFS_FABLKSIZE;
844 if (nmp->nm_wsize > maxio)
845 nmp->nm_wsize = maxio;
846 if (nmp->nm_wsize > MAXBSIZE)
847 nmp->nm_wsize = MAXBSIZE;
849 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
850 nmp->nm_rsize = argp->rsize;
851 /* Round down to multiple of blocksize */
852 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
853 if (nmp->nm_rsize <= 0)
854 nmp->nm_rsize = NFS_FABLKSIZE;
856 if (nmp->nm_rsize > maxio)
857 nmp->nm_rsize = maxio;
858 if (nmp->nm_rsize > MAXBSIZE)
859 nmp->nm_rsize = MAXBSIZE;
861 if ((argp->flags & NFSMNT_READDIRSIZE) &&
862 argp->readdirsize > 0) {
863 nmp->nm_readdirsize = argp->readdirsize;
865 if (nmp->nm_readdirsize > maxio)
866 nmp->nm_readdirsize = maxio;
867 if (nmp->nm_readdirsize > nmp->nm_rsize)
868 nmp->nm_readdirsize = nmp->nm_rsize;
871 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
872 nmp->nm_acregmin = argp->acregmin;
873 else
874 nmp->nm_acregmin = NFS_MINATTRTIMO;
875 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
876 nmp->nm_acregmax = argp->acregmax;
877 else
878 nmp->nm_acregmax = NFS_MAXATTRTIMO;
879 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
880 nmp->nm_acdirmin = argp->acdirmin;
881 else
882 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
883 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
884 nmp->nm_acdirmax = argp->acdirmax;
885 else
886 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
887 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
888 nmp->nm_acdirmin = nmp->nm_acdirmax;
889 if (nmp->nm_acregmin > nmp->nm_acregmax)
890 nmp->nm_acregmin = nmp->nm_acregmax;
892 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
893 if (argp->maxgrouplist <= NFS_MAXGRPS)
894 nmp->nm_numgrps = argp->maxgrouplist;
895 else
896 nmp->nm_numgrps = NFS_MAXGRPS;
898 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
899 if (argp->readahead <= NFS_MAXRAHEAD)
900 nmp->nm_readahead = argp->readahead;
901 else
902 nmp->nm_readahead = NFS_MAXRAHEAD;
904 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
905 if (argp->deadthresh <= NFS_NEVERDEAD)
906 nmp->nm_deadthresh = argp->deadthresh;
907 else
908 nmp->nm_deadthresh = NFS_NEVERDEAD;
911 if (nmp->nm_so && adjsock) {
912 nfs_safedisconnect(nmp);
913 if (nmp->nm_sotype == SOCK_DGRAM)
914 while (nfs_connect(nmp, NULL)) {
915 kprintf("nfs_args: retrying connect\n");
916 (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0);
922 * VFS Operations.
924 * mount system call
925 * It seems a bit dumb to copyinstr() the host and path here and then
926 * bcopy() them in mountnfs(), but I wanted to detect errors before
927 * doing the sockargs() call because sockargs() allocates an mbuf and
928 * an error after that means that I have to release the mbuf.
930 /* ARGSUSED */
931 static int
932 nfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
934 int error;
935 struct nfs_args args;
936 struct sockaddr *nam;
937 struct vnode *vp;
938 char pth[MNAMELEN], hst[MNAMELEN];
939 size_t len;
940 u_char nfh[NFSX_V3FHMAX];
942 if (path == NULL) {
943 nfs_mountroot(mp);
944 return (0);
946 error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args));
947 if (error)
948 return (error);
949 if (args.version != NFS_ARGSVERSION) {
950 #ifdef COMPAT_PRELITE2
952 * If the argument version is unknown, then assume the
953 * caller is a pre-lite2 4.4BSD client and convert its
954 * arguments.
956 struct onfs_args oargs;
957 error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args));
958 if (error)
959 return (error);
960 nfs_convert_oargs(&args,&oargs);
961 #else /* !COMPAT_PRELITE2 */
962 return (EPROGMISMATCH);
963 #endif /* COMPAT_PRELITE2 */
965 if (mp->mnt_flag & MNT_UPDATE) {
966 struct nfsmount *nmp = VFSTONFS(mp);
968 if (nmp == NULL)
969 return (EIO);
971 * When doing an update, we can't change from or to
972 * v3, or change cookie translation, or rsize or wsize.
974 args.flags &= ~(NFSMNT_NFSV3 | NFSMNT_RSIZE | NFSMNT_WSIZE);
975 args.flags |= nmp->nm_flag & (NFSMNT_NFSV3);
976 nfs_decode_args(nmp, &args);
977 return (0);
981 * Make the nfs_ip_paranoia sysctl serve as the default connection
982 * or no-connection mode for those protocols that support
983 * no-connection mode (the flag will be cleared later for protocols
984 * that do not support no-connection mode). This will allow a client
985 * to receive replies from a different IP then the request was
986 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
987 * not 0.
989 if (nfs_ip_paranoia == 0)
990 args.flags |= NFSMNT_NOCONN;
991 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX)
992 return (EINVAL);
993 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
994 if (error)
995 return (error);
996 error = copyinstr(path, pth, MNAMELEN-1, &len);
997 if (error)
998 return (error);
999 bzero(&pth[len], MNAMELEN - len);
1000 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1001 if (error)
1002 return (error);
1003 bzero(&hst[len], MNAMELEN - len);
1004 /* sockargs() call must be after above copyin() calls */
1005 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
1006 if (error)
1007 return (error);
1008 args.fh = nfh;
1009 error = mountnfs(&args, mp, nam, pth, hst, &vp);
1010 return (error);
1014 * Common code for mount and mountroot
1016 static int
1017 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1018 char *pth, char *hst, struct vnode **vpp)
1020 struct nfsmount *nmp;
1021 struct nfsnode *np;
1022 int error;
1023 int rxcpu;
1024 int txcpu;
1026 if (mp->mnt_flag & MNT_UPDATE) {
1027 nmp = VFSTONFS(mp);
1028 /* update paths, file handles, etc, here XXX */
1029 kfree(nam, M_SONAME);
1030 return (0);
1031 } else {
1032 nmp = objcache_get(nfsmount_objcache, M_WAITOK);
1033 bzero((caddr_t)nmp, sizeof (struct nfsmount));
1034 mtx_init_flags(&nmp->nm_rxlock, "nfsrx", MTXF_NOCOLLSTATS);
1035 mtx_init_flags(&nmp->nm_txlock, "nfstx", MTXF_NOCOLLSTATS);
1036 TAILQ_INIT(&nmp->nm_uidlruhead);
1037 TAILQ_INIT(&nmp->nm_bioq);
1038 TAILQ_INIT(&nmp->nm_reqq);
1039 TAILQ_INIT(&nmp->nm_reqtxq);
1040 TAILQ_INIT(&nmp->nm_reqrxq);
1041 mp->mnt_data = (qaddr_t)nmp;
1042 lwkt_token_init(&nmp->nm_token, "nfs_token");
1044 vfs_getnewfsid(mp);
1045 nmp->nm_mountp = mp;
1046 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
1047 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */
1049 lwkt_gettoken(&nmp->nm_token);
1052 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1053 * high, depending on whether we end up with negative offsets in
1054 * the client or server somewhere. 2GB-1 may be safer.
1056 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1057 * that we can handle until we find out otherwise. Note that seek
1058 * offsets are signed.
1060 if ((argp->flags & NFSMNT_NFSV3) == 0)
1061 nmp->nm_maxfilesize = 0xffffffffLL;
1062 else
1063 nmp->nm_maxfilesize = 0x7fffffffffffffffLL;
1065 nmp->nm_timeo = NFS_TIMEO;
1066 nmp->nm_retry = NFS_RETRANS;
1067 nmp->nm_wsize = nfs_iosize(argp->flags & NFSMNT_NFSV3, argp->sotype);
1068 nmp->nm_rsize = nmp->nm_wsize;
1069 nmp->nm_readdirsize = NFS_READDIRSIZE;
1070 nmp->nm_numgrps = NFS_MAXGRPS;
1071 nmp->nm_readahead = NFS_DEFRAHEAD;
1072 nmp->nm_deadthresh = NFS_DEADTHRESH;
1073 nmp->nm_fhsize = argp->fhsize;
1074 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1075 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1076 nmp->nm_nam = nam;
1077 /* Set up the sockets and per-host congestion */
1078 nmp->nm_sotype = argp->sotype;
1079 nmp->nm_soproto = argp->proto;
1080 nmp->nm_cred = crhold(proc0.p_ucred);
1082 nfs_decode_args(nmp, argp);
1085 * For Connection based sockets (TCP,...) defer the connect until
1086 * the first request, in case the server is not responding.
1088 if (nmp->nm_sotype == SOCK_DGRAM &&
1089 (error = nfs_connect(nmp, NULL)))
1090 goto bad;
1093 * This is silly, but it has to be set so that vinifod() works.
1094 * We do not want to do an nfs_statfs() here since we can get
1095 * stuck on a dead server and we are holding a lock on the mount
1096 * point.
1098 mp->mnt_stat.f_iosize =
1099 nfs_iosize(nmp->nm_flag & NFSMNT_NFSV3, nmp->nm_sotype);
1102 * Install vop_ops for our vnops
1104 vfs_add_vnodeops(mp, &nfsv2_vnode_vops, &mp->mnt_vn_norm_ops);
1105 vfs_add_vnodeops(mp, &nfsv2_spec_vops, &mp->mnt_vn_spec_ops);
1106 vfs_add_vnodeops(mp, &nfsv2_fifo_vops, &mp->mnt_vn_fifo_ops);
1109 * A reference count is needed on the nfsnode representing the
1110 * remote root. If this object is not persistent, then backward
1111 * traversals of the mount point (i.e. "..") will not work if
1112 * the nfsnode gets flushed out of the cache. Ufs does not have
1113 * this problem, because one can identify root inodes by their
1114 * number == ROOTINO (2).
1116 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
1117 if (error)
1118 goto bad;
1119 *vpp = NFSTOV(np);
1122 * Retrieval of mountpoint attributes is delayed until nfs_rot
1123 * or nfs_statfs are first called. This will happen either when
1124 * we first traverse the mount point or if somebody does a df(1).
1126 * NFSSTA_GOTFSINFO is used to flag if we have successfully
1127 * retrieved mountpoint attributes. In the case of NFSv3 we
1128 * also flag static fsinfo.
1130 if (*vpp != NULL)
1131 (*vpp)->v_type = VNON;
1134 * Lose the lock but keep the ref.
1136 vn_unlock(*vpp);
1137 lwkt_gettoken(&nfs_token);
1138 TAILQ_INSERT_TAIL(&nfs_mountq, nmp, nm_entry);
1139 lwkt_reltoken(&nfs_token);
1141 switch(ncpus) {
1142 case 0:
1143 case 1:
1144 rxcpu = 0;
1145 txcpu = 0;
1146 break;
1147 case 2:
1148 rxcpu = 0;
1149 txcpu = 1;
1150 break;
1151 default:
1152 rxcpu = -1;
1153 txcpu = -1;
1154 break;
1158 * Start the reader and writer threads.
1160 lwkt_create(nfssvc_iod_reader, nmp, &nmp->nm_rxthread,
1161 NULL, 0, rxcpu, "nfsiod_rx");
1162 lwkt_create(nfssvc_iod_writer, nmp, &nmp->nm_txthread,
1163 NULL, 0, txcpu, "nfsiod_tx");
1164 lwkt_reltoken(&nmp->nm_token);
1165 return (0);
1166 bad:
1167 nfs_disconnect(nmp);
1168 lwkt_reltoken(&nmp->nm_token);
1169 nfs_free_mount(nmp);
1170 return (error);
1174 * unmount system call
1176 static int
1177 nfs_unmount(struct mount *mp, int mntflags)
1179 struct nfsmount *nmp;
1180 int error, flags = 0;
1182 nmp = VFSTONFS(mp);
1183 lwkt_gettoken(&nmp->nm_token);
1184 if (mntflags & MNT_FORCE) {
1185 flags |= FORCECLOSE;
1186 nmp->nm_flag |= NFSMNT_FORCE;
1190 * Goes something like this..
1191 * - Call vflush() to clear out vnodes for this file system
1192 * - Close the socket
1193 * - Free up the data structures
1195 /* In the forced case, cancel any outstanding requests. */
1196 if (flags & FORCECLOSE) {
1197 error = nfs_nmcancelreqs(nmp);
1198 if (error) {
1199 kprintf("NFS: %s: Unable to cancel all requests\n",
1200 mp->mnt_stat.f_mntfromname);
1201 /* continue anyway */
1206 * Must handshake with nfs_clientd() if it is active. XXX
1208 nmp->nm_state |= NFSSTA_DISMINPROG;
1211 * We hold 1 extra ref on the root vnode; see comment in mountnfs().
1213 * If this doesn't work and we are doing a forced unmount we continue
1214 * anyway.
1216 error = vflush(mp, 1, flags);
1217 if (error) {
1218 nmp->nm_state &= ~NFSSTA_DISMINPROG;
1219 if ((flags & FORCECLOSE) == 0) {
1220 lwkt_reltoken(&nmp->nm_token);
1221 return (error);
1226 * We are now committed to the unmount.
1227 * For NQNFS, let the server daemon free the nfsmount structure.
1229 if (nmp->nm_flag & NFSMNT_KERB)
1230 nmp->nm_state |= NFSSTA_DISMNT;
1231 nfssvc_iod_stop1(nmp);
1232 nfs_disconnect(nmp);
1233 nfssvc_iod_stop2(nmp);
1235 lwkt_gettoken(&nfs_token);
1236 TAILQ_REMOVE(&nfs_mountq, nmp, nm_entry);
1237 lwkt_reltoken(&nfs_token);
1239 lwkt_reltoken(&nmp->nm_token);
1241 if ((nmp->nm_flag & NFSMNT_KERB) == 0) {
1242 nfs_free_mount(nmp);
1244 return (0);
1247 void
1248 nfs_free_mount(struct nfsmount *nmp)
1250 if (nmp->nm_cred) {
1251 crfree(nmp->nm_cred);
1252 nmp->nm_cred = NULL;
1254 if (nmp->nm_nam) {
1255 kfree(nmp->nm_nam, M_SONAME);
1256 nmp->nm_nam = NULL;
1258 objcache_put(nfsmount_objcache, nmp);
1262 * Return root of a filesystem
1264 static int
1265 nfs_root(struct mount *mp, struct vnode **vpp)
1267 struct vnode *vp;
1268 struct nfsmount *nmp;
1269 struct vattr attrs;
1270 struct nfsnode *np;
1271 int error;
1273 nmp = VFSTONFS(mp);
1274 lwkt_gettoken(&nmp->nm_token);
1275 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
1276 if (error) {
1277 lwkt_reltoken(&nmp->nm_token);
1278 return (error);
1280 vp = NFSTOV(np);
1283 * Get transfer parameters and root vnode attributes
1285 * NOTE: nfs_fsinfo() is expected to override the default
1286 * f_iosize we set.
1288 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
1289 if (nmp->nm_flag & NFSMNT_NFSV3) {
1290 mp->mnt_stat.f_iosize = nfs_iosize(1, nmp->nm_sotype);
1291 error = nfs_fsinfo(nmp, vp, curthread);
1292 } else {
1293 if ((error = VOP_GETATTR(vp, &attrs)) == 0)
1294 nmp->nm_state |= NFSSTA_GOTFSINFO;
1297 } else {
1299 * The root vnode is usually cached by the namecache so do not
1300 * try to avoid going over the wire even if we have previous
1301 * information cached. A stale NFS mount can loop
1302 * forever resolving the root vnode if we return no-error when
1303 * there is in fact an error.
1305 np->n_attrstamp = 0;
1306 error = VOP_GETATTR(vp, &attrs);
1308 if (vp->v_type == VNON)
1309 nfs_setvtype(vp, VDIR);
1310 vsetflags(vp, VROOT);
1311 if (error)
1312 vput(vp);
1313 else
1314 *vpp = vp;
1315 lwkt_reltoken(&nmp->nm_token);
1316 return (error);
1319 struct scaninfo {
1320 int rescan;
1321 int waitfor;
1322 int allerror;
1325 static int nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
1328 * Flush out the buffer cache
1330 /* ARGSUSED */
1331 static int
1332 nfs_sync(struct mount *mp, int waitfor)
1334 struct nfsmount *nmp = VFSTONFS(mp);
1335 struct scaninfo scaninfo;
1336 int error;
1338 scaninfo.rescan = 1;
1339 scaninfo.waitfor = waitfor;
1340 scaninfo.allerror = 0;
1343 * Force stale buffer cache information to be flushed.
1345 lwkt_gettoken(&nmp->nm_token);
1346 error = 0;
1347 if ((waitfor & MNT_LAZY) == 0) {
1348 while (error == 0 && scaninfo.rescan) {
1349 scaninfo.rescan = 0;
1350 error = vsyncscan(mp, VMSC_GETVP,
1351 nfs_sync_scan2, &scaninfo);
1354 lwkt_reltoken(&nmp->nm_token);
1355 return(error);
1358 static int
1359 nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
1361 struct scaninfo *info = data;
1362 int error;
1364 if (vp->v_type == VNON || vp->v_type == VBAD)
1365 return(0);
1366 error = VOP_FSYNC(vp, info->waitfor, 0);
1367 if (error)
1368 info->allerror = error;
1369 return(0);