2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)nfs_syscalls.c 8.5 (Berkeley) 3/30/95
33 * $FreeBSD: src/sys/nfs/nfs_syscalls.c,v 1.58.2.1 2000/11/26 02:30:06 dillon Exp $
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/sysproto.h>
39 #include <sys/kernel.h>
40 #include <sys/sysctl.h>
42 #include <sys/filedesc.h>
43 #include <sys/vnode.h>
44 #include <sys/malloc.h>
45 #include <sys/mount.h>
50 #include <sys/resourcevar.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/domain.h>
54 #include <sys/protosw.h>
55 #include <sys/nlookup.h>
57 #include <sys/mutex2.h>
58 #include <sys/thread2.h>
60 #include <netinet/in.h>
61 #include <netinet/tcp.h>
66 #include "nfsm_subs.h"
67 #include "nfsrvcache.h"
72 static MALLOC_DEFINE(M_NFSSVC
, "NFS srvsock", "Nfs server structure");
74 static int nuidhash_max
= NFS_MAXUIDHASH
;
77 static void nfsrv_zapsock (struct nfssvc_sock
*slp
);
83 SYSCTL_DECL(_vfs_nfs
);
87 static struct nfsdrt nfsdrt
;
88 static int nfs_numnfsd
= 0;
89 static void nfsd_rt (int sotype
, struct nfsrv_descript
*nd
,
91 static int nfssvc_addsock (struct file
*, struct sockaddr
*,
93 static int nfssvc_nfsd (struct nfsd_srvargs
*,caddr_t
,struct thread
*);
95 static int nfs_privport
= 0;
96 SYSCTL_INT(_vfs_nfs
, NFS_NFSPRIVPORT
, nfs_privport
, CTLFLAG_RW
, &nfs_privport
,
97 0, "Enable privileged source port checks");
98 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, gatherdelay
, CTLFLAG_RW
, &nfsrvw_procrastinate
, 0,
99 "Enable NFS request procrastination");
100 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, gatherdelay_v3
, CTLFLAG_RW
, &nfsrvw_procrastinate_v3
, 0,
101 "Enable NFSv3 request procrastination");
102 int nfs_soreserve
= NFS_MAXPACKET
* NFS_MAXASYNCBIO
;
103 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, soreserve
, CTLFLAG_RW
, &nfs_soreserve
, 0,
104 "Minimum NFS socket buffer size reservation");
107 * NFS server system calls
110 #endif /* NFS_NOSERVER */
112 * nfssvc_args(int flag, caddr_t argp)
114 * Nfs server psuedo system call for the nfsd's
115 * Based on the flag value it either:
116 * - adds a socket to the selection list
117 * - remains in the kernel as an nfsd
118 * - remains in the kernel as an nfsiod
123 sys_nfssvc(struct nfssvc_args
*uap
)
126 struct nlookupdata nd
;
128 struct sockaddr
*nam
;
129 struct nfsd_args nfsdarg
;
130 struct nfsd_srvargs nfsd_srvargs
, *nsd
= &nfsd_srvargs
;
131 struct nfsd_cargs ncd
;
133 struct nfssvc_sock
*slp
;
134 struct nfsuid
*nuidp
;
135 struct nfsmount
*nmp
;
137 #endif /* NFS_NOSERVER */
139 struct thread
*td
= curthread
;
144 error
= priv_check(td
, PRIV_ROOT
);
148 lwkt_gettoken(&nfs_token
);
150 while (nfssvc_sockhead_flag
& SLP_INIT
) {
151 nfssvc_sockhead_flag
|= SLP_WANTINIT
;
152 tsleep((caddr_t
)&nfssvc_sockhead
, 0, "nfsd init", 0);
154 if (uap
->flag
& NFSSVC_BIOD
)
155 error
= ENXIO
; /* no longer need nfsiod's */
159 #else /* !NFS_NOSERVER */
160 else if (uap
->flag
& NFSSVC_MNTD
) {
161 error
= copyin(uap
->argp
, (caddr_t
)&ncd
, sizeof (ncd
));
165 error
= nlookup_init(&nd
, ncd
.ncd_dirp
, UIO_USERSPACE
,
168 error
= nlookup(&nd
);
170 error
= cache_vget(&nd
.nl_nch
, nd
.nl_cred
, LK_EXCLUSIVE
, &vp
);
175 if ((vp
->v_flag
& VROOT
) == 0)
177 nmp
= VFSTONFS(vp
->v_mount
);
181 if ((nmp
->nm_state
& NFSSTA_MNTD
) &&
182 (uap
->flag
& NFSSVC_GOTAUTH
) == 0) {
186 nmp
->nm_state
|= NFSSTA_MNTD
;
187 error
= nfs_clientd(nmp
, td
->td_ucred
, &ncd
, uap
->flag
,
189 } else if (uap
->flag
& NFSSVC_ADDSOCK
) {
190 error
= copyin(uap
->argp
, (caddr_t
)&nfsdarg
, sizeof(nfsdarg
));
193 error
= holdsock(td
->td_proc
->p_fd
, nfsdarg
.sock
, &fp
);
197 * Get the client address for connected sockets.
199 if (nfsdarg
.name
== NULL
|| nfsdarg
.namelen
== 0)
202 error
= getsockaddr(&nam
, nfsdarg
.name
,
209 error
= nfssvc_addsock(fp
, nam
, td
);
212 error
= copyin(uap
->argp
, (caddr_t
)nsd
, sizeof (*nsd
));
215 if ((uap
->flag
& NFSSVC_AUTHIN
) &&
216 ((nfsd
= nsd
->nsd_nfsd
)) != NULL
&&
217 (nfsd
->nfsd_slp
->ns_flag
& SLP_VALID
)) {
218 slp
= nfsd
->nfsd_slp
;
221 * First check to see if another nfsd has already
222 * added this credential.
224 for (nuidp
= NUIDHASH(slp
,nsd
->nsd_cr
.cr_uid
)->lh_first
;
225 nuidp
!= 0; nuidp
= nuidp
->nu_hash
.le_next
) {
226 if (nuidp
->nu_cr
.cr_uid
== nsd
->nsd_cr
.cr_uid
&&
227 (!nfsd
->nfsd_nd
->nd_nam2
||
228 netaddr_match(AF_INET
,
229 &nuidp
->nu_haddr
, nfsd
->nfsd_nd
->nd_nam2
)))
233 nfsrv_setcred(&nuidp
->nu_cr
,&nfsd
->nfsd_nd
->nd_cr
);
234 nfsd
->nfsd_nd
->nd_flag
|= ND_KERBFULL
;
239 if (slp
->ns_numuids
< nuidhash_max
) {
241 nuidp
= (struct nfsuid
*)
242 kmalloc(sizeof (struct nfsuid
), M_NFSUID
,
246 if ((slp
->ns_flag
& SLP_VALID
) == 0) {
248 kfree((caddr_t
)nuidp
, M_NFSUID
);
251 nuidp
= TAILQ_FIRST(&slp
->ns_uidlruhead
);
252 LIST_REMOVE(nuidp
, nu_hash
);
253 TAILQ_REMOVE(&slp
->ns_uidlruhead
, nuidp
,
255 if (nuidp
->nu_flag
& NU_NAM
)
256 kfree(nuidp
->nu_nam
, M_SONAME
);
259 nuidp
->nu_cr
= nsd
->nsd_cr
;
260 if (nuidp
->nu_cr
.cr_ngroups
> NGROUPS
)
261 nuidp
->nu_cr
.cr_ngroups
= NGROUPS
;
262 nuidp
->nu_cr
.cr_ref
= 1;
263 nuidp
->nu_timestamp
= nsd
->nsd_timestamp
;
264 nuidp
->nu_expire
= time_uptime
+ nsd
->nsd_ttl
;
266 * and save the session key in nu_key.
268 bcopy(nsd
->nsd_key
, nuidp
->nu_key
,
269 sizeof (nsd
->nsd_key
));
270 if (nfsd
->nfsd_nd
->nd_nam2
) {
271 struct sockaddr_in
*saddr
;
273 saddr
= (struct sockaddr_in
*)
274 nfsd
->nfsd_nd
->nd_nam2
;
275 switch (saddr
->sin_family
) {
277 nuidp
->nu_flag
|= NU_INETADDR
;
279 saddr
->sin_addr
.s_addr
;
282 nuidp
->nu_flag
|= NU_NAM
;
284 dup_sockaddr(nfsd
->nfsd_nd
->nd_nam2
);
288 TAILQ_INSERT_TAIL(&slp
->ns_uidlruhead
, nuidp
,
290 LIST_INSERT_HEAD(NUIDHASH(slp
, nsd
->nsd_uid
),
292 nfsrv_setcred(&nuidp
->nu_cr
,
293 &nfsd
->nfsd_nd
->nd_cr
);
294 nfsd
->nfsd_nd
->nd_flag
|= ND_KERBFULL
;
298 if ((uap
->flag
& NFSSVC_AUTHINFAIL
) && (nfsd
= nsd
->nsd_nfsd
))
299 nfsd
->nfsd_flag
|= NFSD_AUTHFAIL
;
300 error
= nfssvc_nfsd(nsd
, uap
->argp
, td
);
302 #endif /* NFS_NOSERVER */
303 if (error
== EINTR
|| error
== ERESTART
)
306 lwkt_reltoken(&nfs_token
);
312 * Adds a socket to the list for servicing by nfsds.
315 nfssvc_addsock(struct file
*fp
, struct sockaddr
*mynam
, struct thread
*td
)
318 struct nfssvc_sock
*slp
;
322 so
= (struct socket
*)fp
->f_data
;
326 * Add it to the list, as required.
328 if (so
->so_proto
->pr_protocol
== IPPROTO_UDP
) {
330 if (tslp
->ns_flag
& SLP_VALID
) {
332 kfree(mynam
, M_SONAME
);
338 * Reserve buffer space in the socket. Note that due to bugs in
339 * Linux's delayed-ack code, serious performance degredation may
340 * occur with linux hosts if the minimum is used.
342 * NFS sockets are not limited to the standard sb_max or by
345 if (so
->so_type
== SOCK_STREAM
)
346 siz
= NFS_MAXPACKET
+ sizeof (u_long
);
349 if (siz
< nfs_soreserve
)
352 error
= soreserve(so
, siz
, siz
, NULL
);
355 kfree(mynam
, M_SONAME
);
360 * Set protocol specific options { for now TCP only } and
361 * reserve some space. For datagram sockets, this can get called
362 * repeatedly for the same socket, but that isn't harmful.
364 if (so
->so_type
== SOCK_STREAM
) {
368 bzero(&sopt
, sizeof sopt
);
369 sopt
.sopt_level
= SOL_SOCKET
;
370 sopt
.sopt_name
= SO_KEEPALIVE
;
371 sopt
.sopt_val
= &val
;
372 sopt
.sopt_valsize
= sizeof val
;
376 if (so
->so_proto
->pr_domain
->dom_family
== AF_INET
&&
377 so
->so_proto
->pr_protocol
== IPPROTO_TCP
) {
381 bzero(&sopt
, sizeof sopt
);
382 sopt
.sopt_level
= IPPROTO_TCP
;
383 sopt
.sopt_name
= TCP_NODELAY
;
384 sopt
.sopt_val
= &val
;
385 sopt
.sopt_valsize
= sizeof val
;
389 bzero(&sopt
, sizeof sopt
);
390 sopt
.sopt_level
= IPPROTO_TCP
;
391 sopt
.sopt_name
= TCP_FASTKEEP
;
392 sopt
.sopt_val
= &val
;
393 sopt
.sopt_valsize
= sizeof val
;
397 atomic_clear_int(&so
->so_rcv
.ssb_flags
, SSB_NOINTR
);
398 so
->so_rcv
.ssb_timeo
= 0;
399 atomic_clear_int(&so
->so_snd
.ssb_flags
, SSB_NOINTR
);
400 so
->so_snd
.ssb_timeo
= 0;
403 * Clear AUTOSIZE, otherwise the socket buffer could be reduced
404 * to the point where rpc's cannot be queued using the mbuf
407 atomic_clear_int(&so
->so_rcv
.ssb_flags
, SSB_AUTOSIZE
);
408 atomic_clear_int(&so
->so_snd
.ssb_flags
, SSB_AUTOSIZE
);
410 slp
= kmalloc(sizeof (struct nfssvc_sock
), M_NFSSVC
, M_WAITOK
| M_ZERO
);
411 mtx_init(&slp
->ns_solock
, "nfsvc");
412 STAILQ_INIT(&slp
->ns_rec
);
413 TAILQ_INIT(&slp
->ns_uidlruhead
);
414 lwkt_token_init(&slp
->ns_token
, "nfssrv_token");
416 lwkt_gettoken(&nfs_token
);
418 TAILQ_INSERT_TAIL(&nfssvc_sockhead
, slp
, ns_chain
);
419 lwkt_gettoken(&slp
->ns_token
);
426 so
->so_upcallarg
= (caddr_t
)slp
;
427 so
->so_upcall
= nfsrv_rcv_upcall
;
428 atomic_set_int(&so
->so_rcv
.ssb_flags
, SSB_UPCALL
);
429 slp
->ns_flag
= (SLP_VALID
| SLP_NEEDQ
);
430 nfsrv_wakenfsd(slp
, 1);
432 lwkt_reltoken(&slp
->ns_token
);
433 lwkt_reltoken(&nfs_token
);
439 * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
440 * until it is killed by a signal.
443 nfssvc_nfsd(struct nfsd_srvargs
*nsd
, caddr_t argp
, struct thread
*td
)
446 struct nfssvc_sock
*slp
;
447 struct nfsd
*nfsd
= nsd
->nsd_nfsd
;
448 struct nfsrv_descript
*nd
= NULL
;
449 struct mbuf
*m
, *mreq
;
450 int error
, cacherep
, sotype
, writes_todo
;
459 lwkt_gettoken(&nfs_token
);
462 nsd
->nsd_nfsd
= nfsd
= (struct nfsd
*)
463 kmalloc(sizeof (struct nfsd
), M_NFSD
, M_WAITOK
|M_ZERO
);
465 TAILQ_INSERT_TAIL(&nfsd_head
, nfsd
, nfsd_chain
);
470 * Loop getting rpc requests until SIGKILL.
473 if ((nfsd
->nfsd_flag
& NFSD_REQINPROG
) == 0) {
474 while (nfsd
->nfsd_slp
== NULL
&&
475 (nfsd_head_flag
& NFSD_CHECKSLP
) == 0) {
476 nfsd
->nfsd_flag
|= NFSD_WAITING
;
478 error
= tsleep(nfsd
, PCATCH
, "nfsd", 0);
480 if (error
&& nfsd
->nfsd_slp
== NULL
)
483 if (nfsd
->nfsd_slp
== NULL
&&
484 (nfsd_head_flag
& NFSD_CHECKSLP
)) {
485 TAILQ_FOREACH(slp
, &nfssvc_sockhead
, ns_chain
) {
486 if ((slp
->ns_flag
& SLP_ACTION_MASK
) ||
487 slp
->ns_needq_upcall
) {
489 nfsd
->nfsd_slp
= slp
;
494 nfsd_head_flag
&= ~NFSD_CHECKSLP
;
496 if ((slp
= nfsd
->nfsd_slp
) == NULL
)
499 lwkt_reltoken(&nfs_token
);
500 lwkt_gettoken(&slp
->ns_token
);
502 if (slp
->ns_needq_upcall
) {
503 slp
->ns_needq_upcall
= 0;
504 slp
->ns_flag
|= SLP_NEEDQ
;
507 if (slp
->ns_flag
& SLP_VALID
) {
509 * We can both process additional received
510 * data into new records and process existing
511 * records. This keeps the pipeline hot by
512 * allowing the tcp socket to continue to
513 * drain while we are processing records.
515 while (slp
->ns_flag
& (SLP_DISCONN
|SLP_NEEDQ
)) {
516 if (slp
->ns_flag
& SLP_DISCONN
) {
520 if (slp
->ns_flag
& SLP_NEEDQ
) {
527 nfsrv_rcv(slp
->ns_so
,
534 error
= nfsrv_dorec(slp
, nfsd
, &nd
);
535 cur_usec
= nfs_curusec();
536 if (error
&& slp
->ns_tq
.lh_first
&&
537 slp
->ns_tq
.lh_first
->nd_time
<= cur_usec
) {
544 nfsd
->nfsd_flag
|= NFSD_REQINPROG
;
546 slp
->ns_flag
&= ~SLP_ACTION_MASK
;
551 slp
= nfsd
->nfsd_slp
;
553 lwkt_reltoken(&nfs_token
);
554 lwkt_gettoken(&slp
->ns_token
);
556 if (slp
->ns_needq_upcall
) {
557 slp
->ns_needq_upcall
= 0;
558 slp
->ns_flag
|= SLP_NEEDQ
;
560 if (NFSRV_RECLIMIT(slp
) == 0 &&
561 (slp
->ns_flag
& SLP_NEEDQ
)) {
563 if (NFSRV_RECLIMIT(slp
) == 0 &&
564 (slp
->ns_flag
& SLP_NEEDQ
)) {
565 nfsrv_rcv(slp
->ns_so
, (caddr_t
)slp
,
573 * nfs_token not held here. slp token is held.
575 if (error
|| (slp
->ns_flag
& SLP_VALID
) == 0) {
577 kfree((caddr_t
)nd
, M_NFSRVDESC
);
580 nfsd
->nfsd_flag
&= ~NFSD_REQINPROG
;
581 if (slp
->ns_flag
& SLP_ACTION_MASK
) {
582 lwkt_reltoken(&slp
->ns_token
);
583 lwkt_gettoken(&nfs_token
);
585 nfsd
->nfsd_slp
= NULL
;
586 lwkt_reltoken(&slp
->ns_token
);
587 lwkt_gettoken(&nfs_token
);
592 sotype
= slp
->ns_so
->so_type
;
595 * Execute the NFS request - handle the server side cache
597 * nfs_token not held here. slp token is held.
600 getmicrotime(&nd
->nd_starttime
);
602 nd
->nd_nam
= nd
->nd_nam2
;
604 nd
->nd_nam
= slp
->ns_nam
;
607 * Check to see if authorization is needed.
609 if (nfsd
->nfsd_flag
& NFSD_NEEDAUTH
) {
610 nfsd
->nfsd_flag
&= ~NFSD_NEEDAUTH
;
612 ((struct sockaddr_in
*)
613 nd
->nd_nam
)->sin_addr
.s_addr
;
614 nsd
->nsd_authlen
= nfsd
->nfsd_authlen
;
615 nsd
->nsd_verflen
= nfsd
->nfsd_verflen
;
616 if (!copyout(nfsd
->nfsd_authstr
,nsd
->nsd_authstr
,
617 nfsd
->nfsd_authlen
) &&
618 !copyout(nfsd
->nfsd_verfstr
, nsd
->nsd_verfstr
,
619 nfsd
->nfsd_verflen
) &&
620 !copyout((caddr_t
)nsd
, argp
, sizeof (*nsd
)))
622 lwkt_reltoken(&slp
->ns_token
);
625 cacherep
= RC_DROPIT
;
627 cacherep
= nfsrv_getcache(nd
, slp
, &mreq
);
630 if (nfsd
->nfsd_flag
& NFSD_AUTHFAIL
) {
631 nfsd
->nfsd_flag
&= ~NFSD_AUTHFAIL
;
632 nd
->nd_procnum
= NFSPROC_NOOP
;
633 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
635 } else if (nfs_privport
) {
636 /* Check if source port is privileged */
638 struct sockaddr
*nam
= nd
->nd_nam
;
639 struct sockaddr_in
*sin
;
641 sin
= (struct sockaddr_in
*)nam
;
642 port
= ntohs(sin
->sin_port
);
643 if (port
>= IPPORT_RESERVED
&&
644 nd
->nd_procnum
!= NFSPROC_NULL
) {
645 nd
->nd_procnum
= NFSPROC_NOOP
;
646 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
648 kprintf("NFS request from unprivileged port (%s:%d)\n",
649 inet_ntoa(sin
->sin_addr
), port
);
655 * Execute the NFS request - direct execution
657 * Loop to get all the write rpc replies that have been
660 * nfs_token not held here. slp token is held.
665 if (nd
&& (nd
->nd_flag
& ND_NFSV3
))
666 procrastinate
= nfsrvw_procrastinate_v3
;
668 procrastinate
= nfsrvw_procrastinate
;
669 if (writes_todo
|| (nd
->nd_procnum
== NFSPROC_WRITE
&&
672 error
= nfsrv_writegather(&nd
, slp
,
673 nfsd
->nfsd_td
, &mreq
);
675 /* NOT YET lwkt_reltoken(&slp->ns_token); */
676 error
= (*(nfsrv3_procs
[nd
->nd_procnum
]))(nd
,
677 slp
, nfsd
->nfsd_td
, &mreq
);
678 /* NOT YET lwkt_gettoken(&slp->ns_token); */
679 lwpkthreaddeferred(); /* vnlru issues */
683 if (error
!= 0 && error
!= NFSERR_RETVOID
) {
684 if (nd
->nd_procnum
!= NQNFSPROC_VACATED
)
686 nfsrv_updatecache(nd
, FALSE
, mreq
);
688 kfree(nd
->nd_nam2
, M_SONAME
);
691 nfsstats
.srvrpccnt
[nd
->nd_procnum
]++;
692 nfsrv_updatecache(nd
, TRUE
, mreq
);
702 if (siz
<= 0 || siz
> NFS_MAXPACKET
) {
703 kprintf("mbuf siz=%d\n",siz
);
704 panic("Bad nfs svc reply");
707 m
->m_pkthdr
.len
= siz
;
708 m
->m_pkthdr
.rcvif
= NULL
;
710 * For stream protocols, prepend a Sun RPC
713 if (sotype
== SOCK_STREAM
) {
714 M_PREPEND(m
, NFSX_UNSIGNED
, M_WAITOK
);
720 *mtod(m
, u_int32_t
*) = htonl(0x80000000 | siz
);
722 if ((slp
->ns_flag
& SLP_VALID
) &&
723 (slp
->ns_so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)){
729 if (slp
->ns_flag
& SLP_VALID
) {
730 error
= nfs_send(slp
->ns_so
, nd
->nd_nam2
, m
, NULL
);
737 nfsd_rt(sotype
, nd
, cacherep
);
739 kfree(nd
->nd_nam2
, M_SONAME
);
741 m_freem(nd
->nd_mrep
);
742 if (error
== EPIPE
|| error
== ENOBUFS
)
746 if (error
== EINTR
|| error
== ERESTART
) {
747 kfree((caddr_t
)nd
, M_NFSRVDESC
);
748 lwkt_reltoken(&slp
->ns_token
);
749 lwkt_gettoken(&nfs_token
);
750 nfsd
->nfsd_slp
= NULL
;
757 nfsd_rt(sotype
, nd
, cacherep
);
758 m_freem(nd
->nd_mrep
);
760 kfree(nd
->nd_nam2
, M_SONAME
);
764 kfree((caddr_t
)nd
, M_NFSRVDESC
);
769 * Check to see if there are outstanding writes that
770 * need to be serviced.
772 cur_usec
= nfs_curusec();
773 if (slp
->ns_tq
.lh_first
&&
774 slp
->ns_tq
.lh_first
->nd_time
<= cur_usec
) {
780 } while (writes_todo
);
783 * nfs_token not held here. slp token is held.
785 if (nfsrv_dorec(slp
, nfsd
, &nd
)) {
786 nfsd
->nfsd_flag
&= ~NFSD_REQINPROG
;
787 if (slp
->ns_flag
& SLP_ACTION_MASK
) {
788 lwkt_reltoken(&slp
->ns_token
);
789 lwkt_gettoken(&nfs_token
);
791 nfsd
->nfsd_slp
= NULL
;
792 lwkt_reltoken(&slp
->ns_token
);
793 lwkt_gettoken(&nfs_token
);
797 lwkt_reltoken(&slp
->ns_token
);
798 lwkt_gettoken(&nfs_token
);
802 TAILQ_REMOVE(&nfsd_head
, nfsd
, nfsd_chain
);
803 kfree((caddr_t
)nfsd
, M_NFSD
);
804 nsd
->nsd_nfsd
= NULL
;
805 if (--nfs_numnfsd
== 0)
806 nfsrv_init(TRUE
); /* Reinitialize everything */
808 lwkt_reltoken(&nfs_token
);
813 * Shut down a socket associated with an nfssvc_sock structure.
814 * Should be called with the send lock set, if required.
816 * The trick here is to increment the sref at the start, so that the nfsds
817 * will stop using it and clear ns_flag at the end so that it will not be
818 * reassigned during cleanup.
820 * That said, while we shutdown() the socket here, we don't actually destroy
821 * it until the final deref as there might be other code in the middle of
825 nfsrv_zapsock(struct nfssvc_sock
*slp
)
827 struct nfsuid
*nuidp
, *nnuidp
;
828 struct nfsrv_descript
*nwp
, *nnwp
;
830 struct nfsrv_rec
*rec
;
833 wasvalid
= slp
->ns_flag
& SLP_VALID
;
834 slp
->ns_flag
&= ~SLP_ALLFLAGS
;
835 if (wasvalid
&& slp
->ns_fp
) {
837 atomic_clear_int(&so
->so_rcv
.ssb_flags
, SSB_UPCALL
);
838 so
->so_upcall
= NULL
;
839 so
->so_upcallarg
= NULL
;
840 soshutdown(so
, SHUT_RDWR
);
842 kfree(slp
->ns_nam
, M_SONAME
);
846 m_freem(slp
->ns_raw
);
849 while ((rec
= STAILQ_FIRST(&slp
->ns_rec
)) != NULL
) {
851 STAILQ_REMOVE_HEAD(&slp
->ns_rec
, nr_link
);
853 kfree(rec
->nr_address
, M_SONAME
);
854 m_freem(rec
->nr_packet
);
855 kfree(rec
, M_NFSRVDESC
);
857 KKASSERT(slp
->ns_numrec
== 0);
859 TAILQ_FOREACH_MUTABLE(nuidp
, &slp
->ns_uidlruhead
, nu_lru
,
861 LIST_REMOVE(nuidp
, nu_hash
);
862 TAILQ_REMOVE(&slp
->ns_uidlruhead
, nuidp
, nu_lru
);
863 if (nuidp
->nu_flag
& NU_NAM
)
864 kfree(nuidp
->nu_nam
, M_SONAME
);
865 kfree((caddr_t
)nuidp
, M_NFSUID
);
867 crit_enter(); /* XXX doesn't do anything any more */
868 for (nwp
= slp
->ns_tq
.lh_first
; nwp
; nwp
= nnwp
) {
869 nnwp
= nwp
->nd_tq
.le_next
;
870 LIST_REMOVE(nwp
, nd_tq
);
871 kfree((caddr_t
)nwp
, M_NFSRVDESC
);
873 LIST_INIT(&slp
->ns_tq
);
881 * Derefence a server socket structure. If it has no more references and
882 * is no longer valid, you can throw it away.
884 * Must be holding nfs_token!
887 nfsrv_slpderef(struct nfssvc_sock
*slp
)
891 ASSERT_LWKT_TOKEN_HELD(&nfs_token
);
892 if (slp
->ns_sref
== 1) {
893 KKASSERT((slp
->ns_flag
& SLP_VALID
) == 0);
894 TAILQ_REMOVE(&nfssvc_sockhead
, slp
, ns_chain
);
901 kfree((caddr_t
)slp
, M_NFSSVC
);
908 nfsrv_slpref(struct nfssvc_sock
*slp
)
910 ASSERT_LWKT_TOKEN_HELD(&nfs_token
);
915 * Lock a socket against others.
917 * Returns 0 on failure, 1 on success.
920 nfs_slplock(struct nfssvc_sock
*slp
, int wait
)
922 mtx_t
*mtx
= &slp
->ns_solock
;
925 mtx_lock_ex(mtx
, 0, 0);
927 } else if (mtx_lock_ex_try(mtx
) == 0) {
935 * Unlock the stream socket for others.
938 nfs_slpunlock(struct nfssvc_sock
*slp
)
940 mtx_t
*mtx
= &slp
->ns_solock
;
946 * Initialize the data structures for the server.
947 * Handshake with any new nfsds starting up to avoid any chance of
951 nfsrv_init(int terminating
)
953 struct nfssvc_sock
*slp
, *nslp
;
955 lwkt_gettoken(&nfs_token
);
956 if (nfssvc_sockhead_flag
& SLP_INIT
)
958 nfssvc_sockhead_flag
|= SLP_INIT
;
961 TAILQ_FOREACH_MUTABLE(slp
, &nfssvc_sockhead
, ns_chain
, nslp
) {
963 lwkt_gettoken(&slp
->ns_token
);
964 if (slp
->ns_flag
& SLP_VALID
)
966 lwkt_reltoken(&slp
->ns_token
);
969 nfsrv_cleancache(); /* And clear out server cache */
971 nfs_pub
.np_valid
= 0;
974 TAILQ_INIT(&nfssvc_sockhead
);
975 nfssvc_sockhead_flag
&= ~SLP_INIT
;
976 if (nfssvc_sockhead_flag
& SLP_WANTINIT
) {
977 nfssvc_sockhead_flag
&= ~SLP_WANTINIT
;
978 wakeup((caddr_t
)&nfssvc_sockhead
);
981 TAILQ_INIT(&nfsd_head
);
982 nfsd_head_flag
&= ~NFSD_CHECKSLP
;
984 lwkt_reltoken(&nfs_token
);
987 nfs_udpsock
= (struct nfssvc_sock
*)
988 kmalloc(sizeof (struct nfssvc_sock
), M_NFSSVC
, M_WAITOK
| M_ZERO
);
989 mtx_init(&nfs_udpsock
->ns_solock
);
990 STAILQ_INIT(&nfs_udpsock
->ns_rec
);
991 TAILQ_INIT(&nfs_udpsock
->ns_uidlruhead
);
992 TAILQ_INSERT_HEAD(&nfssvc_sockhead
, nfs_udpsock
, ns_chain
);
994 nfs_cltpsock
= (struct nfssvc_sock
*)
995 kmalloc(sizeof (struct nfssvc_sock
), M_NFSSVC
, M_WAITOK
| M_ZERO
);
996 mtx_init(&nfs_cltpsock
->ns_solock
);
997 STAILQ_INIT(&nfs_cltpsock
->ns_rec
);
998 TAILQ_INIT(&nfs_cltpsock
->ns_uidlruhead
);
999 TAILQ_INSERT_TAIL(&nfssvc_sockhead
, nfs_cltpsock
, ns_chain
);
1004 * Add entries to the server monitor log.
1007 nfsd_rt(int sotype
, struct nfsrv_descript
*nd
, int cacherep
)
1011 rt
= &nfsdrt
.drt
[nfsdrt
.pos
];
1012 if (cacherep
== RC_DOIT
)
1014 else if (cacherep
== RC_REPLY
)
1015 rt
->flag
= DRT_CACHEREPLY
;
1017 rt
->flag
= DRT_CACHEDROP
;
1018 if (sotype
== SOCK_STREAM
)
1019 rt
->flag
|= DRT_TCP
;
1020 if (nd
->nd_flag
& ND_NFSV3
)
1021 rt
->flag
|= DRT_NFSV3
;
1022 rt
->proc
= nd
->nd_procnum
;
1023 if (nd
->nd_nam
->sa_family
== AF_INET
)
1024 rt
->ipadr
= ((struct sockaddr_in
*)nd
->nd_nam
)->sin_addr
.s_addr
;
1026 rt
->ipadr
= INADDR_ANY
;
1027 rt
->resptime
= nfs_curusec() - (nd
->nd_starttime
.tv_sec
* 1000000 + nd
->nd_starttime
.tv_usec
);
1028 getmicrotime(&rt
->tstamp
);
1029 nfsdrt
.pos
= (nfsdrt
.pos
+ 1) % NFSRTTLOGSIZ
;
1031 #endif /* NFS_NOSERVER */
1034 * Get an authorization string for the uid by having the mount_nfs sitting
1035 * on this mount point porpous out of the kernel and do it.
1038 nfs_getauth(struct nfsmount
*nmp
, struct nfsreq
*rep
,
1039 struct ucred
*cred
, char **auth_str
, int *auth_len
, char *verf_str
,
1040 int *verf_len
, NFSKERBKEY_T key
/* return session key */)
1044 while ((nmp
->nm_state
& NFSSTA_WAITAUTH
) == 0) {
1045 nmp
->nm_state
|= NFSSTA_WANTAUTH
;
1046 (void) tsleep((caddr_t
)&nmp
->nm_authtype
, 0,
1047 "nfsauth1", 2 * hz
);
1048 error
= nfs_sigintr(nmp
, rep
, rep
->r_td
);
1050 nmp
->nm_state
&= ~NFSSTA_WANTAUTH
;
1054 nmp
->nm_state
&= ~(NFSSTA_WAITAUTH
| NFSSTA_WANTAUTH
);
1055 nmp
->nm_authstr
= *auth_str
= (char *)kmalloc(RPCAUTH_MAXSIZ
, M_TEMP
, M_WAITOK
);
1056 nmp
->nm_authlen
= RPCAUTH_MAXSIZ
;
1057 nmp
->nm_verfstr
= verf_str
;
1058 nmp
->nm_verflen
= *verf_len
;
1059 nmp
->nm_authuid
= cred
->cr_uid
;
1060 wakeup((caddr_t
)&nmp
->nm_authstr
);
1063 * And wait for mount_nfs to do its stuff.
1065 while ((nmp
->nm_state
& NFSSTA_HASAUTH
) == 0 && error
== 0) {
1066 (void) tsleep((caddr_t
)&nmp
->nm_authlen
, 0,
1067 "nfsauth2", 2 * hz
);
1068 error
= nfs_sigintr(nmp
, rep
, rep
->r_td
);
1070 if (nmp
->nm_state
& NFSSTA_AUTHERR
) {
1071 nmp
->nm_state
&= ~NFSSTA_AUTHERR
;
1075 kfree((caddr_t
)*auth_str
, M_TEMP
);
1077 *auth_len
= nmp
->nm_authlen
;
1078 *verf_len
= nmp
->nm_verflen
;
1079 bcopy((caddr_t
)nmp
->nm_key
, (caddr_t
)key
, sizeof (NFSKERBKEY_T
));
1081 nmp
->nm_state
&= ~NFSSTA_HASAUTH
;
1082 nmp
->nm_state
|= NFSSTA_WAITAUTH
;
1083 if (nmp
->nm_state
& NFSSTA_WANTAUTH
) {
1084 nmp
->nm_state
&= ~NFSSTA_WANTAUTH
;
1085 wakeup((caddr_t
)&nmp
->nm_authtype
);
1091 * Get a nickname authenticator and verifier.
1094 nfs_getnickauth(struct nfsmount
*nmp
, struct ucred
*cred
, char **auth_str
,
1095 int *auth_len
, char *verf_str
, int verf_len
)
1097 struct nfsuid
*nuidp
;
1098 u_int32_t
*nickp
, *verfp
;
1099 struct timeval ktvout
;
1102 if (verf_len
< (4 * NFSX_UNSIGNED
))
1103 panic("nfs_getnickauth verf too small");
1105 for (nuidp
= NMUIDHASH(nmp
, cred
->cr_uid
)->lh_first
;
1106 nuidp
!= NULL
; nuidp
= nuidp
->nu_hash
.le_next
) {
1107 if (nuidp
->nu_cr
.cr_uid
== cred
->cr_uid
)
1110 if (!nuidp
|| nuidp
->nu_expire
< time_uptime
)
1114 * Move to the end of the lru list (end of lru == most recently used).
1116 TAILQ_REMOVE(&nmp
->nm_uidlruhead
, nuidp
, nu_lru
);
1117 TAILQ_INSERT_TAIL(&nmp
->nm_uidlruhead
, nuidp
, nu_lru
);
1119 nickp
= (u_int32_t
*)kmalloc(2 * NFSX_UNSIGNED
, M_TEMP
, M_WAITOK
);
1120 *nickp
++ = txdr_unsigned(RPCAKN_NICKNAME
);
1121 *nickp
= txdr_unsigned(nuidp
->nu_nickname
);
1122 *auth_str
= (char *)nickp
;
1123 *auth_len
= 2 * NFSX_UNSIGNED
;
1126 * Now we must encrypt the verifier and package it up.
1128 verfp
= (u_int32_t
*)verf_str
;
1129 *verfp
++ = txdr_unsigned(RPCAKN_NICKNAME
);
1130 if (time_second
!= nuidp
->nu_timestamp
.tv_sec
||
1131 (time_second
== nuidp
->nu_timestamp
.tv_sec
&&
1132 time_second
> nuidp
->nu_timestamp
.tv_usec
)) /* XXX */
1133 getmicrotime(&nuidp
->nu_timestamp
);
1135 nuidp
->nu_timestamp
.tv_usec
++;
1138 * Now encrypt the timestamp verifier in ecb mode using the session
1148 *verfp
++ = ktvout
.tv_sec
;
1149 *verfp
++ = ktvout
.tv_usec
;
1155 * Save the current nickname in a hash list entry on the mount point.
1158 nfs_savenickauth(struct nfsmount
*nmp
, struct ucred
*cred
, int len
,
1159 NFSKERBKEY_T key
, struct mbuf
**mdp
, char **dposp
,
1162 struct nfsuid
*nuidp
;
1164 struct timeval ktvin
, ktvout
;
1166 int deltasec
, error
= 0;
1167 struct nfsm_info info
;
1173 if (len
== (3 * NFSX_UNSIGNED
)) {
1174 NULLOUT(tl
= nfsm_dissect(&info
, 3 * NFSX_UNSIGNED
));
1175 ktvin
.tv_sec
= *tl
++;
1176 ktvin
.tv_usec
= *tl
++;
1177 nick
= fxdr_unsigned(u_int32_t
, *tl
);
1180 * Decrypt the timestamp in ecb mode.
1188 ktvout
.tv_sec
= fxdr_unsigned(long, ktvout
.tv_sec
);
1189 ktvout
.tv_usec
= fxdr_unsigned(long, ktvout
.tv_usec
);
1190 deltasec
= time_second
- ktvout
.tv_sec
;
1192 deltasec
= -deltasec
;
1194 * If ok, add it to the hash list for the mount point.
1196 if (deltasec
<= NFS_KERBCLOCKSKEW
) {
1197 if (nmp
->nm_numuids
< nuidhash_max
) {
1199 nuidp
= (struct nfsuid
*)
1200 kmalloc(sizeof (struct nfsuid
), M_NFSUID
,
1203 nuidp
= TAILQ_FIRST(&nmp
->nm_uidlruhead
);
1204 LIST_REMOVE(nuidp
, nu_hash
);
1205 TAILQ_REMOVE(&nmp
->nm_uidlruhead
, nuidp
,
1209 nuidp
->nu_cr
.cr_uid
= cred
->cr_uid
;
1210 nuidp
->nu_expire
= time_uptime
+ NFS_KERBTTL
;
1211 nuidp
->nu_timestamp
= ktvout
;
1212 nuidp
->nu_nickname
= nick
;
1213 bcopy(key
, nuidp
->nu_key
, sizeof (NFSKERBKEY_T
));
1214 TAILQ_INSERT_TAIL(&nmp
->nm_uidlruhead
, nuidp
,
1216 LIST_INSERT_HEAD(NMUIDHASH(nmp
, cred
->cr_uid
),
1220 ERROROUT(nfsm_adv(&info
, nfsm_rndup(len
)));