2 * Copyright (c) 1989, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
37 * $FreeBSD: src/sys/nfs/nfs_socket.c,v 1.60.2.6 2003/03/26 01:44:46 alfred Exp $
38 * $DragonFly: src/sys/vfs/nfs/nfs_socket.c,v 1.45 2007/05/18 17:05:13 dillon Exp $
42 * Socket operations for use by nfs
45 #include <sys/param.h>
46 #include <sys/systm.h>
48 #include <sys/malloc.h>
49 #include <sys/mount.h>
50 #include <sys/kernel.h>
52 #include <sys/vnode.h>
53 #include <sys/fcntl.h>
54 #include <sys/protosw.h>
55 #include <sys/resourcevar.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/socketops.h>
59 #include <sys/syslog.h>
60 #include <sys/thread.h>
61 #include <sys/tprintf.h>
62 #include <sys/sysctl.h>
63 #include <sys/signalvar.h>
64 #include <sys/signal2.h>
66 #include <netinet/in.h>
67 #include <netinet/tcp.h>
68 #include <sys/thread2.h>
74 #include "nfsm_subs.h"
83 * Estimate rto for an nfs rpc sent via. an unreliable datagram.
84 * Use the mean and mean deviation of rtt for the appropriate type of rpc
85 * for the frequent rpcs and a default for the others.
86 * The justification for doing "other" this way is that these rpcs
87 * happen so infrequently that timer est. would probably be stale.
88 * Also, since many of these rpcs are
89 * non-idempotent, a conservative timeout is desired.
90 * getattr, lookup - A+2D
94 #define NFS_RTO(n, t) \
95 ((t) == 0 ? (n)->nm_timeo : \
97 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
98 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
99 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
100 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
102 * External data, mostly RPC constants in XDR form
104 extern u_int32_t rpc_reply
, rpc_msgdenied
, rpc_mismatch
, rpc_vers
,
105 rpc_auth_unix
, rpc_msgaccepted
, rpc_call
, rpc_autherr
,
107 extern u_int32_t nfs_prog
;
108 extern struct nfsstats nfsstats
;
109 extern int nfsv3_procid
[NFS_NPROCS
];
110 extern int nfs_ticks
;
113 * Defines which timer to use for the procnum.
120 static int proct
[NFS_NPROCS
] = {
121 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
125 static int nfs_realign_test
;
126 static int nfs_realign_count
;
127 static int nfs_bufpackets
= 4;
128 static int nfs_timer_raced
;
130 SYSCTL_DECL(_vfs_nfs
);
132 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, realign_test
, CTLFLAG_RW
, &nfs_realign_test
, 0, "");
133 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, realign_count
, CTLFLAG_RW
, &nfs_realign_count
, 0, "");
134 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, bufpackets
, CTLFLAG_RW
, &nfs_bufpackets
, 0, "");
138 * There is a congestion window for outstanding rpcs maintained per mount
139 * point. The cwnd size is adjusted in roughly the way that:
140 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
141 * SIGCOMM '88". ACM, August 1988.
142 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
143 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
144 * of rpcs is in progress.
145 * (The sent count and cwnd are scaled for integer arith.)
146 * Variants of "slow start" were tried and were found to be too much of a
147 * performance hit (ave. rtt 3 times larger),
148 * I suspect due to the large rtt that nfs rpcs have.
150 #define NFS_CWNDSCALE 256
151 #define NFS_MAXCWND (NFS_CWNDSCALE * 32)
152 static int nfs_backoff
[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
154 struct nfsrtt nfsrtt
;
155 struct callout nfs_timer_handle
;
157 static int nfs_msg (struct thread
*,char *,char *);
158 static int nfs_rcvlock (struct nfsreq
*);
159 static void nfs_rcvunlock (struct nfsreq
*);
160 static void nfs_realign (struct mbuf
**pm
, int hsiz
);
161 static int nfs_receive (struct nfsreq
*rep
, struct sockaddr
**aname
,
163 static void nfs_softterm (struct nfsreq
*rep
);
164 static int nfs_reconnect (struct nfsreq
*rep
);
166 static int nfsrv_getstream (struct nfssvc_sock
*, int, int *);
168 int (*nfsrv3_procs
[NFS_NPROCS
]) (struct nfsrv_descript
*nd
,
169 struct nfssvc_sock
*slp
,
171 struct mbuf
**mreqp
) = {
199 #endif /* NFS_NOSERVER */
202 * Initialize sockets and congestion for a new NFS connection.
203 * We do not free the sockaddr if error.
206 nfs_connect(struct nfsmount
*nmp
, struct nfsreq
*rep
)
209 int error
, rcvreserve
, sndreserve
;
211 struct sockaddr
*saddr
;
212 struct sockaddr_in
*sin
;
213 struct thread
*td
= &thread0
; /* only used for socreate and sobind */
215 nmp
->nm_so
= (struct socket
*)0;
217 error
= socreate(saddr
->sa_family
, &nmp
->nm_so
, nmp
->nm_sotype
,
218 nmp
->nm_soproto
, td
);
222 nmp
->nm_soflags
= so
->so_proto
->pr_flags
;
225 * Some servers require that the client port be a reserved port number.
227 if (saddr
->sa_family
== AF_INET
&& (nmp
->nm_flag
& NFSMNT_RESVPORT
)) {
230 struct sockaddr_in ssin
;
232 bzero(&sopt
, sizeof sopt
);
233 ip
= IP_PORTRANGE_LOW
;
234 sopt
.sopt_level
= IPPROTO_IP
;
235 sopt
.sopt_name
= IP_PORTRANGE
;
236 sopt
.sopt_val
= (void *)&ip
;
237 sopt
.sopt_valsize
= sizeof(ip
);
239 error
= sosetopt(so
, &sopt
);
242 bzero(&ssin
, sizeof ssin
);
244 sin
->sin_len
= sizeof (struct sockaddr_in
);
245 sin
->sin_family
= AF_INET
;
246 sin
->sin_addr
.s_addr
= INADDR_ANY
;
247 sin
->sin_port
= htons(0);
248 error
= sobind(so
, (struct sockaddr
*)sin
, td
);
251 bzero(&sopt
, sizeof sopt
);
252 ip
= IP_PORTRANGE_DEFAULT
;
253 sopt
.sopt_level
= IPPROTO_IP
;
254 sopt
.sopt_name
= IP_PORTRANGE
;
255 sopt
.sopt_val
= (void *)&ip
;
256 sopt
.sopt_valsize
= sizeof(ip
);
258 error
= sosetopt(so
, &sopt
);
264 * Protocols that do not require connections may be optionally left
265 * unconnected for servers that reply from a port other than NFS_PORT.
267 if (nmp
->nm_flag
& NFSMNT_NOCONN
) {
268 if (nmp
->nm_soflags
& PR_CONNREQUIRED
) {
273 error
= soconnect(so
, nmp
->nm_nam
, td
);
278 * Wait for the connection to complete. Cribbed from the
279 * connect system call but with the wait timing out so
280 * that interruptible mounts don't hang here for a long time.
283 while ((so
->so_state
& SS_ISCONNECTING
) && so
->so_error
== 0) {
284 (void) tsleep((caddr_t
)&so
->so_timeo
, 0,
286 if ((so
->so_state
& SS_ISCONNECTING
) &&
287 so
->so_error
== 0 && rep
&&
288 (error
= nfs_sigintr(nmp
, rep
, rep
->r_td
)) != 0){
289 so
->so_state
&= ~SS_ISCONNECTING
;
295 error
= so
->so_error
;
302 so
->so_rcv
.ssb_timeo
= (5 * hz
);
303 so
->so_snd
.ssb_timeo
= (5 * hz
);
306 * Get buffer reservation size from sysctl, but impose reasonable
309 pktscale
= nfs_bufpackets
;
315 if (nmp
->nm_sotype
== SOCK_DGRAM
) {
316 sndreserve
= (nmp
->nm_wsize
+ NFS_MAXPKTHDR
) * pktscale
;
317 rcvreserve
= (max(nmp
->nm_rsize
, nmp
->nm_readdirsize
) +
318 NFS_MAXPKTHDR
) * pktscale
;
319 } else if (nmp
->nm_sotype
== SOCK_SEQPACKET
) {
320 sndreserve
= (nmp
->nm_wsize
+ NFS_MAXPKTHDR
) * pktscale
;
321 rcvreserve
= (max(nmp
->nm_rsize
, nmp
->nm_readdirsize
) +
322 NFS_MAXPKTHDR
) * pktscale
;
324 if (nmp
->nm_sotype
!= SOCK_STREAM
)
325 panic("nfscon sotype");
326 if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
330 bzero(&sopt
, sizeof sopt
);
331 sopt
.sopt_level
= SOL_SOCKET
;
332 sopt
.sopt_name
= SO_KEEPALIVE
;
333 sopt
.sopt_val
= &val
;
334 sopt
.sopt_valsize
= sizeof val
;
338 if (so
->so_proto
->pr_protocol
== IPPROTO_TCP
) {
342 bzero(&sopt
, sizeof sopt
);
343 sopt
.sopt_level
= IPPROTO_TCP
;
344 sopt
.sopt_name
= TCP_NODELAY
;
345 sopt
.sopt_val
= &val
;
346 sopt
.sopt_valsize
= sizeof val
;
350 sndreserve
= (nmp
->nm_wsize
+ NFS_MAXPKTHDR
+
351 sizeof (u_int32_t
)) * pktscale
;
352 rcvreserve
= (nmp
->nm_rsize
+ NFS_MAXPKTHDR
+
353 sizeof (u_int32_t
)) * pktscale
;
355 error
= soreserve(so
, sndreserve
, rcvreserve
,
356 &td
->td_proc
->p_rlimit
[RLIMIT_SBSIZE
]);
359 so
->so_rcv
.ssb_flags
|= SSB_NOINTR
;
360 so
->so_snd
.ssb_flags
|= SSB_NOINTR
;
362 /* Initialize other non-zero congestion variables */
363 nmp
->nm_srtt
[0] = nmp
->nm_srtt
[1] = nmp
->nm_srtt
[2] =
364 nmp
->nm_srtt
[3] = (NFS_TIMEO
<< 3);
365 nmp
->nm_sdrtt
[0] = nmp
->nm_sdrtt
[1] = nmp
->nm_sdrtt
[2] =
366 nmp
->nm_sdrtt
[3] = 0;
367 nmp
->nm_cwnd
= NFS_MAXCWND
/ 2; /* Initial send window */
369 nmp
->nm_timeouts
= 0;
379 * Called when a connection is broken on a reliable protocol.
380 * - clean up the old socket
381 * - nfs_connect() again
382 * - set R_MUSTRESEND for all outstanding requests on mount point
383 * If this fails the mount point is DEAD!
384 * nb: Must be called with the nfs_sndlock() set on the mount point.
387 nfs_reconnect(struct nfsreq
*rep
)
390 struct nfsmount
*nmp
= rep
->r_nmp
;
394 while ((error
= nfs_connect(nmp
, rep
)) != 0) {
395 if (error
== EINTR
|| error
== ERESTART
)
397 (void) tsleep((caddr_t
)&lbolt
, 0, "nfscon", 0);
401 * Loop through outstanding request list and fix up all requests
405 TAILQ_FOREACH(rp
, &nfs_reqq
, r_chain
) {
406 if (rp
->r_nmp
== nmp
)
407 rp
->r_flags
|= R_MUSTRESEND
;
414 * NFS disconnect. Clean up and unlink.
417 nfs_disconnect(struct nfsmount
*nmp
)
423 nmp
->nm_so
= (struct socket
*)0;
424 soshutdown(so
, SHUT_RDWR
);
425 soclose(so
, FNONBLOCK
);
430 nfs_safedisconnect(struct nfsmount
*nmp
)
432 struct nfsreq dummyreq
;
434 bzero(&dummyreq
, sizeof(dummyreq
));
435 dummyreq
.r_nmp
= nmp
;
436 dummyreq
.r_td
= NULL
;
437 nfs_rcvlock(&dummyreq
);
439 nfs_rcvunlock(&dummyreq
);
443 * This is the nfs send routine. For connection based socket types, it
444 * must be called with an nfs_sndlock() on the socket.
445 * "rep == NULL" indicates that it has been called from a server.
446 * For the client side:
447 * - return EINTR if the RPC is terminated, 0 otherwise
448 * - set R_MUSTRESEND if the send fails for any reason
449 * - do any cleanup required by recoverable socket errors (?)
450 * For the server side:
451 * - return EINTR or ERESTART if interrupted by a signal
452 * - return EPIPE if a connection is lost for connection based sockets (TCP...)
453 * - do any cleanup required by recoverable socket errors (?)
456 nfs_send(struct socket
*so
, struct sockaddr
*nam
, struct mbuf
*top
,
459 struct sockaddr
*sendnam
;
460 int error
, soflags
, flags
;
463 if (rep
->r_flags
& R_SOFTTERM
) {
467 if ((so
= rep
->r_nmp
->nm_so
) == NULL
) {
468 rep
->r_flags
|= R_MUSTRESEND
;
472 rep
->r_flags
&= ~R_MUSTRESEND
;
473 soflags
= rep
->r_nmp
->nm_soflags
;
475 soflags
= so
->so_proto
->pr_flags
;
476 if ((soflags
& PR_CONNREQUIRED
) || (so
->so_state
& SS_ISCONNECTED
))
477 sendnam
= (struct sockaddr
*)0;
480 if (so
->so_type
== SOCK_SEQPACKET
)
485 error
= so_pru_sosend(so
, sendnam
, NULL
, top
, NULL
, flags
,
488 * ENOBUFS for dgram sockets is transient and non fatal.
489 * No need to log, and no need to break a soft mount.
491 if (error
== ENOBUFS
&& so
->so_type
== SOCK_DGRAM
) {
493 if (rep
) /* do backoff retransmit on client */
494 rep
->r_flags
|= R_MUSTRESEND
;
499 log(LOG_INFO
, "nfs send error %d for server %s\n",error
,
500 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
502 * Deal with errors for the client side.
504 if (rep
->r_flags
& R_SOFTTERM
)
507 rep
->r_flags
|= R_MUSTRESEND
;
509 log(LOG_INFO
, "nfsd send error %d\n", error
);
512 * Handle any recoverable (soft) socket errors here. (?)
514 if (error
!= EINTR
&& error
!= ERESTART
&&
515 error
!= EWOULDBLOCK
&& error
!= EPIPE
)
522 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
523 * done by soreceive(), but for SOCK_STREAM we must deal with the Record
524 * Mark and consolidate the data into a new mbuf list.
525 * nb: Sometimes TCP passes the data up to soreceive() in long lists of
527 * For SOCK_STREAM we must be very careful to read an entire record once
528 * we have read any of it, even if the system call has been interrupted.
531 nfs_receive(struct nfsreq
*rep
, struct sockaddr
**aname
, struct mbuf
**mp
)
538 struct mbuf
*control
;
540 struct sockaddr
**getnam
;
541 int error
, sotype
, rcvflg
;
542 struct thread
*td
= curthread
; /* XXX */
545 * Set up arguments for soreceive()
549 sotype
= rep
->r_nmp
->nm_sotype
;
552 * For reliable protocols, lock against other senders/receivers
553 * in case a reconnect is necessary.
554 * For SOCK_STREAM, first get the Record Mark to find out how much
555 * more there is to get.
556 * We must lock the socket against other receivers
557 * until we have an entire rpc request/reply.
559 if (sotype
!= SOCK_DGRAM
) {
560 error
= nfs_sndlock(rep
);
565 * Check for fatal errors and resending request.
568 * Ugh: If a reconnect attempt just happened, nm_so
569 * would have changed. NULL indicates a failed
570 * attempt that has essentially shut down this
573 if (rep
->r_mrep
|| (rep
->r_flags
& R_SOFTTERM
)) {
577 so
= rep
->r_nmp
->nm_so
;
579 error
= nfs_reconnect(rep
);
586 while (rep
->r_flags
& R_MUSTRESEND
) {
587 m
= m_copym(rep
->r_mreq
, 0, M_COPYALL
, MB_WAIT
);
588 nfsstats
.rpcretries
++;
589 error
= nfs_send(so
, rep
->r_nmp
->nm_nam
, m
, rep
);
591 if (error
== EINTR
|| error
== ERESTART
||
592 (error
= nfs_reconnect(rep
)) != 0) {
600 if (sotype
== SOCK_STREAM
) {
602 * Get the length marker from the stream
604 aio
.iov_base
= (caddr_t
)&len
;
605 aio
.iov_len
= sizeof(u_int32_t
);
608 auio
.uio_segflg
= UIO_SYSSPACE
;
609 auio
.uio_rw
= UIO_READ
;
611 auio
.uio_resid
= sizeof(u_int32_t
);
614 rcvflg
= MSG_WAITALL
;
615 error
= so_pru_soreceive(so
, NULL
, &auio
, NULL
,
617 if (error
== EWOULDBLOCK
&& rep
) {
618 if (rep
->r_flags
& R_SOFTTERM
)
621 } while (error
== EWOULDBLOCK
);
623 if (error
== 0 && auio
.uio_resid
> 0) {
625 * Only log short packets if not EOF
627 if (auio
.uio_resid
!= sizeof(u_int32_t
))
629 "short receive (%d/%d) from nfs server %s\n",
630 (int)(sizeof(u_int32_t
) - auio
.uio_resid
),
631 (int)sizeof(u_int32_t
),
632 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
637 len
= ntohl(len
) & ~0x80000000;
639 * This is SERIOUS! We are out of sync with the sender
640 * and forcing a disconnect/reconnect is all I can do.
642 if (len
> NFS_MAXPACKET
) {
643 log(LOG_ERR
, "%s (%d) from nfs server %s\n",
644 "impossible packet length",
646 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
652 * Get the rest of the packet as an mbuf chain
656 rcvflg
= MSG_WAITALL
;
657 error
= so_pru_soreceive(so
, NULL
, NULL
, &sio
,
659 } while (error
== EWOULDBLOCK
|| error
== EINTR
||
661 if (error
== 0 && sio
.sb_cc
!= len
) {
664 "short receive (%d/%d) from nfs server %s\n",
665 len
- auio
.uio_resid
, len
,
666 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
672 * Non-stream, so get the whole packet by not
673 * specifying MSG_WAITALL and by specifying a large
676 * We have no use for control msg., but must grab them
677 * and then throw them away so we know what is going
680 sbinit(&sio
, 100000000);
683 error
= so_pru_soreceive(so
, NULL
, NULL
, &sio
,
687 if (error
== EWOULDBLOCK
&& rep
) {
688 if (rep
->r_flags
& R_SOFTTERM
) {
693 } while (error
== EWOULDBLOCK
||
694 (error
== 0 && sio
.sb_mb
== NULL
&& control
));
695 if ((rcvflg
& MSG_EOR
) == 0)
697 if (error
== 0 && sio
.sb_mb
== NULL
)
703 if (error
&& error
!= EINTR
&& error
!= ERESTART
) {
706 if (error
!= EPIPE
) {
708 "receive error %d from nfs server %s\n",
710 rep
->r_nmp
->nm_mountp
->mnt_stat
.f_mntfromname
);
712 error
= nfs_sndlock(rep
);
714 error
= nfs_reconnect(rep
);
722 if ((so
= rep
->r_nmp
->nm_so
) == NULL
)
724 if (so
->so_state
& SS_ISCONNECTED
)
728 sbinit(&sio
, 100000000);
731 error
= so_pru_soreceive(so
, getnam
, NULL
, &sio
,
733 if (error
== EWOULDBLOCK
&&
734 (rep
->r_flags
& R_SOFTTERM
)) {
738 } while (error
== EWOULDBLOCK
);
747 * Search for any mbufs that are not a multiple of 4 bytes long
748 * or with m_data not longword aligned.
749 * These could cause pointer alignment problems, so copy them to
750 * well aligned mbufs.
752 nfs_realign(mp
, 5 * NFSX_UNSIGNED
);
757 * Implement receipt of reply on a socket.
758 * We must search through the list of received datagrams matching them
759 * with outstanding requests using the xid, until ours is found.
763 nfs_reply(struct nfsreq
*myrep
)
766 struct nfsmount
*nmp
= myrep
->r_nmp
;
768 struct mbuf
*mrep
, *md
;
769 struct sockaddr
*nam
;
775 * Loop around until we get our own reply
779 * Lock against other receivers so that I don't get stuck in
780 * sbwait() after someone else has received my reply for me.
781 * Also necessary for connection based protocols to avoid
782 * race conditions during a reconnect.
783 * If nfs_rcvlock() returns EALREADY, that means that
784 * the reply has already been recieved by another
785 * process and we can return immediately. In this
786 * case, the lock is not taken to avoid races with
789 error
= nfs_rcvlock(myrep
);
790 if (error
== EALREADY
)
795 * Get the next Rpc reply off the socket
797 error
= nfs_receive(myrep
, &nam
, &mrep
);
798 nfs_rcvunlock(myrep
);
801 * Ignore routing errors on connectionless protocols??
803 if (NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
)) {
804 nmp
->nm_so
->so_error
= 0;
805 if (myrep
->r_flags
& R_GETONEREP
)
815 * Get the xid and check that it is an rpc reply
818 dpos
= mtod(md
, caddr_t
);
819 nfsm_dissect(tl
, u_int32_t
*, 2*NFSX_UNSIGNED
);
821 if (*tl
!= rpc_reply
) {
822 nfsstats
.rpcinvalid
++;
825 if (myrep
->r_flags
& R_GETONEREP
)
831 * Loop through the request list to match up the reply
832 * Iff no match, just drop the datagram. On match, set
833 * r_mrep atomically to prevent the timer from messing
834 * around with the request after we have exited the critical
838 TAILQ_FOREACH(rep
, &nfs_reqq
, r_chain
) {
839 if (rep
->r_mrep
== NULL
&& rxid
== rep
->r_xid
) {
847 * Fill in the rest of the reply if we found a match.
855 rt
= &nfsrtt
.rttl
[nfsrtt
.pos
];
856 rt
->proc
= rep
->r_procnum
;
857 rt
->rto
= NFS_RTO(nmp
, proct
[rep
->r_procnum
]);
858 rt
->sent
= nmp
->nm_sent
;
859 rt
->cwnd
= nmp
->nm_cwnd
;
860 rt
->srtt
= nmp
->nm_srtt
[proct
[rep
->r_procnum
] - 1];
861 rt
->sdrtt
= nmp
->nm_sdrtt
[proct
[rep
->r_procnum
] - 1];
862 rt
->fsid
= nmp
->nm_mountp
->mnt_stat
.f_fsid
;
863 getmicrotime(&rt
->tstamp
);
864 if (rep
->r_flags
& R_TIMING
)
865 rt
->rtt
= rep
->r_rtt
;
868 nfsrtt
.pos
= (nfsrtt
.pos
+ 1) % NFSRTTLOGSIZ
;
871 * Update congestion window.
872 * Do the additive increase of
875 if (nmp
->nm_cwnd
<= nmp
->nm_sent
) {
877 (NFS_CWNDSCALE
* NFS_CWNDSCALE
+
878 (nmp
->nm_cwnd
>> 1)) / nmp
->nm_cwnd
;
879 if (nmp
->nm_cwnd
> NFS_MAXCWND
)
880 nmp
->nm_cwnd
= NFS_MAXCWND
;
882 crit_enter(); /* nfs_timer interlock for nm_sent */
883 if (rep
->r_flags
& R_SENT
) {
884 rep
->r_flags
&= ~R_SENT
;
885 nmp
->nm_sent
-= NFS_CWNDSCALE
;
889 * Update rtt using a gain of 0.125 on the mean
890 * and a gain of 0.25 on the deviation.
892 if (rep
->r_flags
& R_TIMING
) {
894 * Since the timer resolution of
895 * NFS_HZ is so course, it can often
896 * result in r_rtt == 0. Since
897 * r_rtt == N means that the actual
898 * rtt is between N+dt and N+2-dt ticks,
902 t1
-= (NFS_SRTT(rep
) >> 3);
906 t1
-= (NFS_SDRTT(rep
) >> 2);
907 NFS_SDRTT(rep
) += t1
;
909 nmp
->nm_timeouts
= 0;
912 * If not matched to a request, drop it.
913 * If it's mine, get out.
916 nfsstats
.rpcunexpected
++;
918 } else if (rep
== myrep
) {
919 if (rep
->r_mrep
== NULL
)
920 panic("nfsreply nil");
923 if (myrep
->r_flags
& R_GETONEREP
)
929 * nfs_request - goes something like this
930 * - fill in request struct
931 * - links it into list
932 * - calls nfs_send() for first transmit
933 * - calls nfs_receive() to get reply
934 * - break down rpc header and return with nfs reply pointed to
936 * nb: always frees up mreq mbuf list
939 nfs_request(struct vnode
*vp
, struct mbuf
*mrest
, int procnum
,
940 struct thread
*td
, struct ucred
*cred
, struct mbuf
**mrp
,
941 struct mbuf
**mdp
, caddr_t
*dposp
)
943 struct mbuf
*mrep
, *m2
;
947 struct nfsmount
*nmp
;
948 struct mbuf
*m
, *md
, *mheadend
;
949 char nickv
[RPCX_NICKVERF
];
952 int t1
, error
= 0, mrest_len
, auth_len
, auth_type
;
953 int trylater_delay
= 15, trylater_cnt
= 0, failed_auth
= 0;
954 int verf_len
, verf_type
;
956 char *auth_str
, *verf_str
;
957 NFSKERBKEY_T key
; /* save session key */
959 /* Reject requests while attempting a forced unmount. */
960 if (vp
->v_mount
->mnt_kern_flag
& MNTK_UNMOUNTF
) {
964 nmp
= VFSTONFS(vp
->v_mount
);
965 MALLOC(rep
, struct nfsreq
*, sizeof(struct nfsreq
), M_NFSREQ
, M_WAITOK
);
969 rep
->r_procnum
= procnum
;
980 * Get the RPC header with authorization.
983 verf_str
= auth_str
= (char *)0;
984 if (nmp
->nm_flag
& NFSMNT_KERB
) {
986 verf_len
= sizeof (nickv
);
987 auth_type
= RPCAUTH_KERB4
;
988 bzero((caddr_t
)key
, sizeof (key
));
989 if (failed_auth
|| nfs_getnickauth(nmp
, cred
, &auth_str
,
990 &auth_len
, verf_str
, verf_len
)) {
991 error
= nfs_getauth(nmp
, rep
, cred
, &auth_str
,
992 &auth_len
, verf_str
, &verf_len
, key
);
994 kfree((caddr_t
)rep
, M_NFSREQ
);
1000 auth_type
= RPCAUTH_UNIX
;
1001 if (cred
->cr_ngroups
< 1)
1002 panic("nfsreq nogrps");
1003 auth_len
= ((((cred
->cr_ngroups
- 1) > nmp
->nm_numgrps
) ?
1004 nmp
->nm_numgrps
: (cred
->cr_ngroups
- 1)) << 2) +
1007 m
= nfsm_rpchead(cred
, nmp
->nm_flag
, procnum
, auth_type
, auth_len
,
1008 auth_str
, verf_len
, verf_str
, mrest
, mrest_len
, &mheadend
, &xid
);
1010 kfree(auth_str
, M_TEMP
);
1013 * For stream protocols, insert a Sun RPC Record Mark.
1015 if (nmp
->nm_sotype
== SOCK_STREAM
) {
1016 M_PREPEND(m
, NFSX_UNSIGNED
, MB_WAIT
);
1018 kfree(rep
, M_NFSREQ
);
1021 *mtod(m
, u_int32_t
*) = htonl(0x80000000 |
1022 (m
->m_pkthdr
.len
- NFSX_UNSIGNED
));
1027 if (nmp
->nm_flag
& NFSMNT_SOFT
)
1028 rep
->r_retry
= nmp
->nm_retry
;
1030 rep
->r_retry
= NFS_MAXREXMIT
+ 1; /* past clip limit */
1031 rep
->r_rtt
= rep
->r_rexmit
= 0;
1032 if (proct
[procnum
] > 0)
1033 rep
->r_flags
= R_TIMING
| R_MASKTIMER
;
1035 rep
->r_flags
= R_MASKTIMER
;
1039 * Do the client side RPC.
1041 nfsstats
.rpcrequests
++;
1044 * Chain request into list of outstanding requests. Be sure
1045 * to put it LAST so timer finds oldest requests first. Note
1046 * that R_MASKTIMER is set at the moment to prevent any timer
1047 * action on this request while we are still doing processing on
1048 * it below. splsoftclock() primarily protects nm_sent. Note
1049 * that we may block in this code so there is no atomicy guarentee.
1052 TAILQ_INSERT_TAIL(&nfs_reqq
, rep
, r_chain
);
1055 * If backing off another request or avoiding congestion, don't
1056 * send this one now but let timer do it. If not timing a request,
1059 * Even though the timer will not mess with our request there is
1060 * still the possibility that we will race a reply (which clears
1061 * R_SENT), especially on localhost connections, so be very careful
1062 * when setting R_SENT. We could set R_SENT prior to calling
1063 * nfs_send() but why bother if the response occurs that quickly?
1065 if (nmp
->nm_so
&& (nmp
->nm_sotype
!= SOCK_DGRAM
||
1066 (nmp
->nm_flag
& NFSMNT_DUMBTIMR
) ||
1067 nmp
->nm_sent
< nmp
->nm_cwnd
)) {
1068 if (nmp
->nm_soflags
& PR_CONNREQUIRED
)
1069 error
= nfs_sndlock(rep
);
1071 m2
= m_copym(m
, 0, M_COPYALL
, MB_WAIT
);
1072 error
= nfs_send(nmp
->nm_so
, nmp
->nm_nam
, m2
, rep
);
1073 if (nmp
->nm_soflags
& PR_CONNREQUIRED
)
1076 if (!error
&& (rep
->r_flags
& R_MUSTRESEND
) == 0 &&
1077 rep
->r_mrep
== NULL
) {
1078 KASSERT((rep
->r_flags
& R_SENT
) == 0,
1079 ("R_SENT ASSERT %p", rep
));
1080 nmp
->nm_sent
+= NFS_CWNDSCALE
;
1081 rep
->r_flags
|= R_SENT
;
1088 * Let the timer do what it will with the request, then
1089 * wait for the reply from our send or the timer's.
1091 if (!error
|| error
== EPIPE
) {
1092 rep
->r_flags
&= ~R_MASKTIMER
;
1094 error
= nfs_reply(rep
);
1099 * RPC done, unlink the request, but don't rip it out from under
1100 * the callout timer.
1102 while (rep
->r_flags
& R_LOCKED
) {
1103 nfs_timer_raced
= 1;
1104 tsleep(&nfs_timer_raced
, 0, "nfstrac", 0);
1106 TAILQ_REMOVE(&nfs_reqq
, rep
, r_chain
);
1109 * Decrement the outstanding request count.
1111 if (rep
->r_flags
& R_SENT
) {
1112 rep
->r_flags
&= ~R_SENT
;
1113 nmp
->nm_sent
-= NFS_CWNDSCALE
;
1118 * If there was a successful reply and a tprintf msg.
1119 * tprintf a response.
1121 if (!error
&& (rep
->r_flags
& R_TPRINTFMSG
))
1122 nfs_msg(rep
->r_td
, nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
1128 m_freem(rep
->r_mreq
);
1129 kfree((caddr_t
)rep
, M_NFSREQ
);
1134 * break down the rpc header and check if ok
1136 nfsm_dissect(tl
, u_int32_t
*, 3 * NFSX_UNSIGNED
);
1137 if (*tl
++ == rpc_msgdenied
) {
1138 if (*tl
== rpc_mismatch
)
1140 else if ((nmp
->nm_flag
& NFSMNT_KERB
) && *tl
++ == rpc_autherr
) {
1143 mheadend
->m_next
= (struct mbuf
*)0;
1145 m_freem(rep
->r_mreq
);
1152 m_freem(rep
->r_mreq
);
1153 kfree((caddr_t
)rep
, M_NFSREQ
);
1158 * Grab any Kerberos verifier, otherwise just throw it away.
1160 verf_type
= fxdr_unsigned(int, *tl
++);
1161 i
= fxdr_unsigned(int32_t, *tl
);
1162 if ((nmp
->nm_flag
& NFSMNT_KERB
) && verf_type
== RPCAUTH_KERB4
) {
1163 error
= nfs_savenickauth(nmp
, cred
, i
, key
, &md
, &dpos
, mrep
);
1167 nfsm_adv(nfsm_rndup(i
));
1168 nfsm_dissect(tl
, u_int32_t
*, NFSX_UNSIGNED
);
1171 nfsm_dissect(tl
, u_int32_t
*, NFSX_UNSIGNED
);
1173 error
= fxdr_unsigned(int, *tl
);
1174 if ((nmp
->nm_flag
& NFSMNT_NFSV3
) &&
1175 error
== NFSERR_TRYLATER
) {
1178 waituntil
= time_second
+ trylater_delay
;
1179 while (time_second
< waituntil
)
1180 (void) tsleep((caddr_t
)&lbolt
,
1182 trylater_delay
*= nfs_backoff
[trylater_cnt
];
1183 if (trylater_cnt
< 7)
1189 * If the File Handle was stale, invalidate the
1190 * lookup cache, just in case.
1192 * To avoid namecache<->vnode deadlocks we must
1193 * release the vnode lock if we hold it.
1195 if (error
== ESTALE
) {
1198 ltype
= lockstatus(&vp
->v_lock
, curthread
);
1199 if (ltype
== LK_EXCLUSIVE
|| ltype
== LK_SHARED
)
1200 lockmgr(&vp
->v_lock
, LK_RELEASE
);
1201 cache_inval_vp(vp
, CINV_CHILDREN
);
1202 if (ltype
== LK_EXCLUSIVE
|| ltype
== LK_SHARED
)
1203 lockmgr(&vp
->v_lock
, ltype
);
1205 if (nmp
->nm_flag
& NFSMNT_NFSV3
) {
1209 error
|= NFSERR_RETERR
;
1212 m_freem(rep
->r_mreq
);
1213 kfree((caddr_t
)rep
, M_NFSREQ
);
1220 m_freem(rep
->r_mreq
);
1221 FREE((caddr_t
)rep
, M_NFSREQ
);
1225 error
= EPROTONOSUPPORT
;
1227 m_freem(rep
->r_mreq
);
1228 kfree((caddr_t
)rep
, M_NFSREQ
);
1232 #ifndef NFS_NOSERVER
1234 * Generate the rpc reply header
1235 * siz arg. is used to decide if adding a cluster is worthwhile
1238 nfs_rephead(int siz
, struct nfsrv_descript
*nd
, struct nfssvc_sock
*slp
,
1239 int err
, struct mbuf
**mrq
, struct mbuf
**mbp
, caddr_t
*bposp
)
1244 struct mbuf
*mb
, *mb2
;
1246 siz
+= RPC_REPLYSIZ
;
1247 mb
= mreq
= m_getl(max_hdr
+ siz
, MB_WAIT
, MT_DATA
, M_PKTHDR
, NULL
);
1248 mreq
->m_pkthdr
.len
= 0;
1250 * If this is not a cluster, try and leave leading space
1251 * for the lower level headers.
1253 if ((max_hdr
+ siz
) < MINCLSIZE
)
1254 mreq
->m_data
+= max_hdr
;
1255 tl
= mtod(mreq
, u_int32_t
*);
1256 mreq
->m_len
= 6 * NFSX_UNSIGNED
;
1257 bpos
= ((caddr_t
)tl
) + mreq
->m_len
;
1258 *tl
++ = txdr_unsigned(nd
->nd_retxid
);
1260 if (err
== ERPCMISMATCH
|| (err
& NFSERR_AUTHERR
)) {
1261 *tl
++ = rpc_msgdenied
;
1262 if (err
& NFSERR_AUTHERR
) {
1263 *tl
++ = rpc_autherr
;
1264 *tl
= txdr_unsigned(err
& ~NFSERR_AUTHERR
);
1265 mreq
->m_len
-= NFSX_UNSIGNED
;
1266 bpos
-= NFSX_UNSIGNED
;
1268 *tl
++ = rpc_mismatch
;
1269 *tl
++ = txdr_unsigned(RPC_VER2
);
1270 *tl
= txdr_unsigned(RPC_VER2
);
1273 *tl
++ = rpc_msgaccepted
;
1276 * For Kerberos authentication, we must send the nickname
1277 * verifier back, otherwise just RPCAUTH_NULL.
1279 if (nd
->nd_flag
& ND_KERBFULL
) {
1280 struct nfsuid
*nuidp
;
1281 struct timeval ktvin
, ktvout
;
1283 for (nuidp
= NUIDHASH(slp
, nd
->nd_cr
.cr_uid
)->lh_first
;
1284 nuidp
!= 0; nuidp
= nuidp
->nu_hash
.le_next
) {
1285 if (nuidp
->nu_cr
.cr_uid
== nd
->nd_cr
.cr_uid
&&
1286 (!nd
->nd_nam2
|| netaddr_match(NU_NETFAM(nuidp
),
1287 &nuidp
->nu_haddr
, nd
->nd_nam2
)))
1292 txdr_unsigned(nuidp
->nu_timestamp
.tv_sec
- 1);
1294 txdr_unsigned(nuidp
->nu_timestamp
.tv_usec
);
1297 * Encrypt the timestamp in ecb mode using the
1304 *tl
++ = rpc_auth_kerb
;
1305 *tl
++ = txdr_unsigned(3 * NFSX_UNSIGNED
);
1306 *tl
= ktvout
.tv_sec
;
1307 nfsm_build(tl
, u_int32_t
*, 3 * NFSX_UNSIGNED
);
1308 *tl
++ = ktvout
.tv_usec
;
1309 *tl
++ = txdr_unsigned(nuidp
->nu_cr
.cr_uid
);
1320 *tl
= txdr_unsigned(RPC_PROGUNAVAIL
);
1323 *tl
= txdr_unsigned(RPC_PROGMISMATCH
);
1324 nfsm_build(tl
, u_int32_t
*, 2 * NFSX_UNSIGNED
);
1325 *tl
++ = txdr_unsigned(2);
1326 *tl
= txdr_unsigned(3);
1329 *tl
= txdr_unsigned(RPC_PROCUNAVAIL
);
1332 *tl
= txdr_unsigned(RPC_GARBAGE
);
1336 if (err
!= NFSERR_RETVOID
) {
1337 nfsm_build(tl
, u_int32_t
*, NFSX_UNSIGNED
);
1339 *tl
= txdr_unsigned(nfsrv_errmap(nd
, err
));
1351 if (err
!= 0 && err
!= NFSERR_RETVOID
)
1352 nfsstats
.srvrpc_errs
++;
1357 #endif /* NFS_NOSERVER */
1360 * Scan the nfsreq list and retranmit any requests that have timed out
1361 * To avoid retransmission attempts on STREAM sockets (in the future) make
1362 * sure to set the r_retry field to 0 (implies nm_retry == 0).
1365 nfs_timer(void *arg
/* never used */)
1370 struct nfsmount
*nmp
;
1373 #ifndef NFS_NOSERVER
1374 struct nfssvc_sock
*slp
;
1376 #endif /* NFS_NOSERVER */
1377 struct thread
*td
= &thread0
; /* XXX for credentials, will break if sleep */
1380 TAILQ_FOREACH(rep
, &nfs_reqq
, r_chain
) {
1382 if (rep
->r_mrep
|| (rep
->r_flags
& (R_SOFTTERM
|R_MASKTIMER
)))
1384 rep
->r_flags
|= R_LOCKED
;
1385 if (nfs_sigintr(nmp
, rep
, rep
->r_td
)) {
1389 if (rep
->r_rtt
>= 0) {
1391 if (nmp
->nm_flag
& NFSMNT_DUMBTIMR
)
1392 timeo
= nmp
->nm_timeo
;
1394 timeo
= NFS_RTO(nmp
, proct
[rep
->r_procnum
]);
1395 if (nmp
->nm_timeouts
> 0)
1396 timeo
*= nfs_backoff
[nmp
->nm_timeouts
- 1];
1397 if (rep
->r_rtt
<= timeo
)
1399 if (nmp
->nm_timeouts
< 8)
1403 * Check for server not responding
1405 if ((rep
->r_flags
& R_TPRINTFMSG
) == 0 &&
1406 rep
->r_rexmit
> nmp
->nm_deadthresh
) {
1408 nmp
->nm_mountp
->mnt_stat
.f_mntfromname
,
1410 rep
->r_flags
|= R_TPRINTFMSG
;
1412 if (rep
->r_rexmit
>= rep
->r_retry
) { /* too many */
1413 nfsstats
.rpctimeouts
++;
1417 if (nmp
->nm_sotype
!= SOCK_DGRAM
) {
1418 if (++rep
->r_rexmit
> NFS_MAXREXMIT
)
1419 rep
->r_rexmit
= NFS_MAXREXMIT
;
1422 if ((so
= nmp
->nm_so
) == NULL
)
1426 * If there is enough space and the window allows..
1428 * Set r_rtt to -1 in case we fail to send it now.
1431 if (ssb_space(&so
->so_snd
) >= rep
->r_mreq
->m_pkthdr
.len
&&
1432 ((nmp
->nm_flag
& NFSMNT_DUMBTIMR
) ||
1433 (rep
->r_flags
& R_SENT
) ||
1434 nmp
->nm_sent
< nmp
->nm_cwnd
) &&
1435 (m
= m_copym(rep
->r_mreq
, 0, M_COPYALL
, MB_DONTWAIT
))){
1436 if ((nmp
->nm_flag
& NFSMNT_NOCONN
) == 0)
1437 error
= so_pru_send(so
, 0, m
, (struct sockaddr
*)0,
1438 (struct mbuf
*)0, td
);
1440 error
= so_pru_send(so
, 0, m
, nmp
->nm_nam
,
1441 (struct mbuf
*)0, td
);
1443 if (NFSIGNORE_SOERROR(nmp
->nm_soflags
, error
))
1445 } else if (rep
->r_mrep
== NULL
) {
1447 * Iff first send, start timing
1448 * else turn timing off, backoff timer
1449 * and divide congestion window by 2.
1451 * It is possible for the so_pru_send() to
1452 * block and for us to race a reply so we
1453 * only do this if the reply field has not
1454 * been filled in. R_LOCKED will prevent
1455 * the request from being ripped out from under
1458 if (rep
->r_flags
& R_SENT
) {
1459 rep
->r_flags
&= ~R_TIMING
;
1460 if (++rep
->r_rexmit
> NFS_MAXREXMIT
)
1461 rep
->r_rexmit
= NFS_MAXREXMIT
;
1463 if (nmp
->nm_cwnd
< NFS_CWNDSCALE
)
1464 nmp
->nm_cwnd
= NFS_CWNDSCALE
;
1465 nfsstats
.rpcretries
++;
1467 rep
->r_flags
|= R_SENT
;
1468 nmp
->nm_sent
+= NFS_CWNDSCALE
;
1474 rep
->r_flags
&= ~R_LOCKED
;
1476 #ifndef NFS_NOSERVER
1479 * Scan the write gathering queues for writes that need to be
1482 cur_usec
= nfs_curusec();
1483 TAILQ_FOREACH(slp
, &nfssvc_sockhead
, ns_chain
) {
1484 if (slp
->ns_tq
.lh_first
&& slp
->ns_tq
.lh_first
->nd_time
<=cur_usec
)
1485 nfsrv_wakenfsd(slp
, 1);
1487 #endif /* NFS_NOSERVER */
1490 * Due to possible blocking, a client operation may be waiting for
1491 * us to finish processing this request so it can remove it.
1493 if (nfs_timer_raced
) {
1494 nfs_timer_raced
= 0;
1495 wakeup(&nfs_timer_raced
);
1498 callout_reset(&nfs_timer_handle
, nfs_ticks
, nfs_timer
, NULL
);
1502 * Mark all of an nfs mount's outstanding requests with R_SOFTTERM and
1503 * wait for all requests to complete. This is used by forced unmounts
1504 * to terminate any outstanding RPCs.
1507 nfs_nmcancelreqs(struct nfsmount
*nmp
)
1513 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
1514 if (nmp
!= req
->r_nmp
|| req
->r_mrep
!= NULL
||
1515 (req
->r_flags
& R_SOFTTERM
)) {
1522 for (i
= 0; i
< 30; i
++) {
1524 TAILQ_FOREACH(req
, &nfs_reqq
, r_chain
) {
1525 if (nmp
== req
->r_nmp
)
1531 tsleep(&lbolt
, 0, "nfscancel", 0);
1537 * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
1538 * The nm_send count is decremented now to avoid deadlocks when the process in
1539 * soreceive() hasn't yet managed to send its own request.
1541 * This routine must be called at splsoftclock() to protect r_flags and
1546 nfs_softterm(struct nfsreq
*rep
)
1548 rep
->r_flags
|= R_SOFTTERM
;
1550 if (rep
->r_flags
& R_SENT
) {
1551 rep
->r_nmp
->nm_sent
-= NFS_CWNDSCALE
;
1552 rep
->r_flags
&= ~R_SENT
;
1557 * Test for a termination condition pending on the process.
1558 * This is used for NFSMNT_INT mounts.
1561 nfs_sigintr(struct nfsmount
*nmp
, struct nfsreq
*rep
, struct thread
*td
)
1567 if (rep
&& (rep
->r_flags
& R_SOFTTERM
))
1569 /* Terminate all requests while attempting a forced unmount. */
1570 if (nmp
->nm_mountp
->mnt_kern_flag
& MNTK_UNMOUNTF
)
1572 if (!(nmp
->nm_flag
& NFSMNT_INT
))
1574 /* td might be NULL YYY */
1575 if (td
== NULL
|| (p
= td
->td_proc
) == NULL
)
1579 tmpset
= lwp_sigpend(lp
);
1580 SIGSETNAND(tmpset
, lp
->lwp_sigmask
);
1581 SIGSETNAND(tmpset
, p
->p_sigignore
);
1582 if (SIGNOTEMPTY(tmpset
) && NFSINT_SIGMASK(tmpset
))
1589 * Lock a socket against others.
1590 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
1591 * and also to avoid race conditions between the processes with nfs requests
1592 * in progress when a reconnect is necessary.
1595 nfs_sndlock(struct nfsreq
*rep
)
1597 int *statep
= &rep
->r_nmp
->nm_state
;
1606 if (rep
->r_nmp
->nm_flag
& NFSMNT_INT
)
1611 while (*statep
& NFSSTA_SNDLOCK
) {
1612 *statep
|= NFSSTA_WANTSND
;
1613 if (nfs_sigintr(rep
->r_nmp
, rep
, td
)) {
1617 tsleep((caddr_t
)statep
, slpflag
, "nfsndlck", slptimeo
);
1618 if (slpflag
== PCATCH
) {
1623 /* Always fail if our request has been cancelled. */
1624 if ((rep
->r_flags
& R_SOFTTERM
))
1627 *statep
|= NFSSTA_SNDLOCK
;
1633 * Unlock the stream socket for others.
1636 nfs_sndunlock(struct nfsreq
*rep
)
1638 int *statep
= &rep
->r_nmp
->nm_state
;
1640 if ((*statep
& NFSSTA_SNDLOCK
) == 0)
1641 panic("nfs sndunlock");
1643 *statep
&= ~NFSSTA_SNDLOCK
;
1644 if (*statep
& NFSSTA_WANTSND
) {
1645 *statep
&= ~NFSSTA_WANTSND
;
1646 wakeup((caddr_t
)statep
);
1652 nfs_rcvlock(struct nfsreq
*rep
)
1654 int *statep
= &rep
->r_nmp
->nm_state
;
1660 * Unconditionally check for completion in case another nfsiod
1661 * get the packet while the caller was blocked, before the caller
1662 * called us. Packet reception is handled by mainline code which
1663 * is protected by the BGL at the moment.
1665 * We do not strictly need the second check just before the
1666 * tsleep(), but it's good defensive programming.
1668 if (rep
->r_mrep
!= NULL
)
1671 if (rep
->r_nmp
->nm_flag
& NFSMNT_INT
)
1678 while (*statep
& NFSSTA_RCVLOCK
) {
1679 if (nfs_sigintr(rep
->r_nmp
, rep
, rep
->r_td
)) {
1683 if (rep
->r_mrep
!= NULL
) {
1687 *statep
|= NFSSTA_WANTRCV
;
1688 tsleep((caddr_t
)statep
, slpflag
, "nfsrcvlk", slptimeo
);
1690 * If our reply was recieved while we were sleeping,
1691 * then just return without taking the lock to avoid a
1692 * situation where a single iod could 'capture' the
1695 if (rep
->r_mrep
!= NULL
) {
1699 if (slpflag
== PCATCH
) {
1705 *statep
|= NFSSTA_RCVLOCK
;
1706 rep
->r_nmp
->nm_rcvlock_td
= curthread
; /* DEBUGGING */
1713 * Unlock the stream socket for others.
1716 nfs_rcvunlock(struct nfsreq
*rep
)
1718 int *statep
= &rep
->r_nmp
->nm_state
;
1720 if ((*statep
& NFSSTA_RCVLOCK
) == 0)
1721 panic("nfs rcvunlock");
1723 rep
->r_nmp
->nm_rcvlock_td
= (void *)-1; /* DEBUGGING */
1724 *statep
&= ~NFSSTA_RCVLOCK
;
1725 if (*statep
& NFSSTA_WANTRCV
) {
1726 *statep
&= ~NFSSTA_WANTRCV
;
1727 wakeup((caddr_t
)statep
);
1735 * Check for badly aligned mbuf data and realign by copying the unaligned
1736 * portion of the data into a new mbuf chain and freeing the portions
1737 * of the old chain that were replaced.
1739 * We cannot simply realign the data within the existing mbuf chain
1740 * because the underlying buffers may contain other rpc commands and
1741 * we cannot afford to overwrite them.
1743 * We would prefer to avoid this situation entirely. The situation does
1744 * not occur with NFS/UDP and is supposed to only occassionally occur
1745 * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
1748 nfs_realign(struct mbuf
**pm
, int hsiz
)
1751 struct mbuf
*n
= NULL
;
1756 while ((m
= *pm
) != NULL
) {
1757 if ((m
->m_len
& 0x3) || (mtod(m
, intptr_t) & 0x3)) {
1758 n
= m_getl(m
->m_len
, MB_WAIT
, MT_DATA
, 0, NULL
);
1766 * If n is non-NULL, loop on m copying data, then replace the
1767 * portion of the chain that had to be realigned.
1770 ++nfs_realign_count
;
1772 m_copyback(n
, off
, m
->m_len
, mtod(m
, caddr_t
));
1781 #ifndef NFS_NOSERVER
1784 * Parse an RPC request
1786 * - fill in the cred struct.
1789 nfs_getreq(struct nfsrv_descript
*nd
, struct nfsd
*nfsd
, int has_header
)
1796 caddr_t dpos
, cp2
, cp
;
1797 u_int32_t nfsvers
, auth_type
;
1799 int error
= 0, ticklen
;
1800 struct mbuf
*mrep
, *md
;
1801 struct nfsuid
*nuidp
;
1802 struct timeval tvin
, tvout
;
1803 #if 0 /* until encrypted keys are implemented */
1804 NFSKERBKEYSCHED_T keys
; /* stores key schedule */
1811 nfsm_dissect(tl
, u_int32_t
*, 10 * NFSX_UNSIGNED
);
1812 nd
->nd_retxid
= fxdr_unsigned(u_int32_t
, *tl
++);
1813 if (*tl
++ != rpc_call
) {
1818 nfsm_dissect(tl
, u_int32_t
*, 8 * NFSX_UNSIGNED
);
1821 if (*tl
++ != rpc_vers
) {
1822 nd
->nd_repstat
= ERPCMISMATCH
;
1823 nd
->nd_procnum
= NFSPROC_NOOP
;
1826 if (*tl
!= nfs_prog
) {
1827 nd
->nd_repstat
= EPROGUNAVAIL
;
1828 nd
->nd_procnum
= NFSPROC_NOOP
;
1832 nfsvers
= fxdr_unsigned(u_int32_t
, *tl
++);
1833 if (nfsvers
< NFS_VER2
|| nfsvers
> NFS_VER3
) {
1834 nd
->nd_repstat
= EPROGMISMATCH
;
1835 nd
->nd_procnum
= NFSPROC_NOOP
;
1838 if (nfsvers
== NFS_VER3
)
1839 nd
->nd_flag
= ND_NFSV3
;
1840 nd
->nd_procnum
= fxdr_unsigned(u_int32_t
, *tl
++);
1841 if (nd
->nd_procnum
== NFSPROC_NULL
)
1843 if (nd
->nd_procnum
>= NFS_NPROCS
||
1844 (nd
->nd_procnum
>= NQNFSPROC_GETLEASE
) ||
1845 (!nd
->nd_flag
&& nd
->nd_procnum
> NFSV2PROC_STATFS
)) {
1846 nd
->nd_repstat
= EPROCUNAVAIL
;
1847 nd
->nd_procnum
= NFSPROC_NOOP
;
1850 if ((nd
->nd_flag
& ND_NFSV3
) == 0)
1851 nd
->nd_procnum
= nfsv3_procid
[nd
->nd_procnum
];
1853 len
= fxdr_unsigned(int, *tl
++);
1854 if (len
< 0 || len
> RPCAUTH_MAXSIZ
) {
1859 nd
->nd_flag
&= ~ND_KERBAUTH
;
1861 * Handle auth_unix or auth_kerb.
1863 if (auth_type
== rpc_auth_unix
) {
1864 len
= fxdr_unsigned(int, *++tl
);
1865 if (len
< 0 || len
> NFS_MAXNAMLEN
) {
1869 nfsm_adv(nfsm_rndup(len
));
1870 nfsm_dissect(tl
, u_int32_t
*, 3 * NFSX_UNSIGNED
);
1871 bzero((caddr_t
)&nd
->nd_cr
, sizeof (struct ucred
));
1872 nd
->nd_cr
.cr_ref
= 1;
1873 nd
->nd_cr
.cr_uid
= fxdr_unsigned(uid_t
, *tl
++);
1874 nd
->nd_cr
.cr_gid
= fxdr_unsigned(gid_t
, *tl
++);
1875 len
= fxdr_unsigned(int, *tl
);
1876 if (len
< 0 || len
> RPCAUTH_UNIXGIDS
) {
1880 nfsm_dissect(tl
, u_int32_t
*, (len
+ 2) * NFSX_UNSIGNED
);
1881 for (i
= 1; i
<= len
; i
++)
1883 nd
->nd_cr
.cr_groups
[i
] = fxdr_unsigned(gid_t
, *tl
++);
1886 nd
->nd_cr
.cr_ngroups
= (len
>= NGROUPS
) ? NGROUPS
: (len
+ 1);
1887 if (nd
->nd_cr
.cr_ngroups
> 1)
1888 nfsrvw_sort(nd
->nd_cr
.cr_groups
, nd
->nd_cr
.cr_ngroups
);
1889 len
= fxdr_unsigned(int, *++tl
);
1890 if (len
< 0 || len
> RPCAUTH_MAXSIZ
) {
1895 nfsm_adv(nfsm_rndup(len
));
1896 } else if (auth_type
== rpc_auth_kerb
) {
1897 switch (fxdr_unsigned(int, *tl
++)) {
1898 case RPCAKN_FULLNAME
:
1899 ticklen
= fxdr_unsigned(int, *tl
);
1900 *((u_int32_t
*)nfsd
->nfsd_authstr
) = *tl
;
1901 uio
.uio_resid
= nfsm_rndup(ticklen
) + NFSX_UNSIGNED
;
1902 nfsd
->nfsd_authlen
= uio
.uio_resid
+ NFSX_UNSIGNED
;
1903 if (uio
.uio_resid
> (len
- 2 * NFSX_UNSIGNED
)) {
1910 uio
.uio_segflg
= UIO_SYSSPACE
;
1911 iov
.iov_base
= (caddr_t
)&nfsd
->nfsd_authstr
[4];
1912 iov
.iov_len
= RPCAUTH_MAXSIZ
- 4;
1913 nfsm_mtouio(&uio
, uio
.uio_resid
);
1914 nfsm_dissect(tl
, u_int32_t
*, 2 * NFSX_UNSIGNED
);
1915 if (*tl
++ != rpc_auth_kerb
||
1916 fxdr_unsigned(int, *tl
) != 4 * NFSX_UNSIGNED
) {
1917 kprintf("Bad kerb verifier\n");
1918 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADVERF
);
1919 nd
->nd_procnum
= NFSPROC_NOOP
;
1922 nfsm_dissect(cp
, caddr_t
, 4 * NFSX_UNSIGNED
);
1923 tl
= (u_int32_t
*)cp
;
1924 if (fxdr_unsigned(int, *tl
) != RPCAKN_FULLNAME
) {
1925 kprintf("Not fullname kerb verifier\n");
1926 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADVERF
);
1927 nd
->nd_procnum
= NFSPROC_NOOP
;
1930 cp
+= NFSX_UNSIGNED
;
1931 bcopy(cp
, nfsd
->nfsd_verfstr
, 3 * NFSX_UNSIGNED
);
1932 nfsd
->nfsd_verflen
= 3 * NFSX_UNSIGNED
;
1933 nd
->nd_flag
|= ND_KERBFULL
;
1934 nfsd
->nfsd_flag
|= NFSD_NEEDAUTH
;
1936 case RPCAKN_NICKNAME
:
1937 if (len
!= 2 * NFSX_UNSIGNED
) {
1938 kprintf("Kerb nickname short\n");
1939 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADCRED
);
1940 nd
->nd_procnum
= NFSPROC_NOOP
;
1943 nickuid
= fxdr_unsigned(uid_t
, *tl
);
1944 nfsm_dissect(tl
, u_int32_t
*, 2 * NFSX_UNSIGNED
);
1945 if (*tl
++ != rpc_auth_kerb
||
1946 fxdr_unsigned(int, *tl
) != 3 * NFSX_UNSIGNED
) {
1947 kprintf("Kerb nick verifier bad\n");
1948 nd
->nd_repstat
= (NFSERR_AUTHERR
|AUTH_BADVERF
);
1949 nd
->nd_procnum
= NFSPROC_NOOP
;
1952 nfsm_dissect(tl
, u_int32_t
*, 3 * NFSX_UNSIGNED
);
1953 tvin
.tv_sec
= *tl
++;
1956 for (nuidp
= NUIDHASH(nfsd
->nfsd_slp
,nickuid
)->lh_first
;
1957 nuidp
!= 0; nuidp
= nuidp
->nu_hash
.le_next
) {
1958 if (nuidp
->nu_cr
.cr_uid
== nickuid
&&
1960 netaddr_match(NU_NETFAM(nuidp
),
1961 &nuidp
->nu_haddr
, nd
->nd_nam2
)))
1966 (NFSERR_AUTHERR
|AUTH_REJECTCRED
);
1967 nd
->nd_procnum
= NFSPROC_NOOP
;
1972 * Now, decrypt the timestamp using the session key
1979 tvout
.tv_sec
= fxdr_unsigned(long, tvout
.tv_sec
);
1980 tvout
.tv_usec
= fxdr_unsigned(long, tvout
.tv_usec
);
1981 if (nuidp
->nu_expire
< time_second
||
1982 nuidp
->nu_timestamp
.tv_sec
> tvout
.tv_sec
||
1983 (nuidp
->nu_timestamp
.tv_sec
== tvout
.tv_sec
&&
1984 nuidp
->nu_timestamp
.tv_usec
> tvout
.tv_usec
)) {
1985 nuidp
->nu_expire
= 0;
1987 (NFSERR_AUTHERR
|AUTH_REJECTVERF
);
1988 nd
->nd_procnum
= NFSPROC_NOOP
;
1991 nfsrv_setcred(&nuidp
->nu_cr
, &nd
->nd_cr
);
1992 nd
->nd_flag
|= ND_KERBNICK
;
1995 nd
->nd_repstat
= (NFSERR_AUTHERR
| AUTH_REJECTCRED
);
1996 nd
->nd_procnum
= NFSPROC_NOOP
;
2010 * Send a message to the originating process's terminal. The thread and/or
2011 * process may be NULL. YYY the thread should not be NULL but there may
2012 * still be some uio_td's that are still being passed as NULL through to
2016 nfs_msg(struct thread
*td
, char *server
, char *msg
)
2020 if (td
&& td
->td_proc
)
2021 tpr
= tprintf_open(td
->td_proc
);
2024 tprintf(tpr
, "nfs server %s: %s\n", server
, msg
);
2029 #ifndef NFS_NOSERVER
2031 * Socket upcall routine for the nfsd sockets.
2032 * The caddr_t arg is a pointer to the "struct nfssvc_sock".
2033 * Essentially do as much as possible non-blocking, else punt and it will
2034 * be called with MB_WAIT from an nfsd.
2037 nfsrv_rcv(struct socket
*so
, void *arg
, int waitflag
)
2039 struct nfssvc_sock
*slp
= (struct nfssvc_sock
*)arg
;
2041 struct sockaddr
*nam
;
2044 int nparallel_wakeup
= 0;
2046 if ((slp
->ns_flag
& SLP_VALID
) == 0)
2050 * Do not allow an infinite number of completed RPC records to build
2051 * up before we stop reading data from the socket. Otherwise we could
2052 * end up holding onto an unreasonable number of mbufs for requests
2053 * waiting for service.
2055 * This should give pretty good feedback to the TCP
2056 * layer and prevents a memory crunch for other protocols.
2058 * Note that the same service socket can be dispatched to several
2059 * nfs servers simultaniously.
2061 * the tcp protocol callback calls us with MB_DONTWAIT.
2062 * nfsd calls us with MB_WAIT (typically).
2064 if (waitflag
== MB_DONTWAIT
&& slp
->ns_numrec
>= nfsd_waiting
/ 2 + 1) {
2065 slp
->ns_flag
|= SLP_NEEDQ
;
2070 * Handle protocol specifics to parse an RPC request. We always
2071 * pull from the socket using non-blocking I/O.
2073 if (so
->so_type
== SOCK_STREAM
) {
2075 * The data has to be read in an orderly fashion from a TCP
2076 * stream, unlike a UDP socket. It is possible for soreceive
2077 * and/or nfsrv_getstream() to block, so make sure only one
2078 * entity is messing around with the TCP stream at any given
2079 * moment. The receive sockbuf's lock in soreceive is not
2082 * Note that this procedure can be called from any number of
2083 * NFS severs *OR* can be upcalled directly from a TCP
2086 if (slp
->ns_flag
& SLP_GETSTREAM
) {
2087 slp
->ns_flag
|= SLP_NEEDQ
;
2090 slp
->ns_flag
|= SLP_GETSTREAM
;
2093 * Do soreceive(). Pull out as much data as possible without
2096 sbinit(&sio
, 1000000000);
2097 flags
= MSG_DONTWAIT
;
2098 error
= so_pru_soreceive(so
, &nam
, NULL
, &sio
, NULL
, &flags
);
2099 if (error
|| sio
.sb_mb
== NULL
) {
2100 if (error
== EWOULDBLOCK
)
2101 slp
->ns_flag
|= SLP_NEEDQ
;
2103 slp
->ns_flag
|= SLP_DISCONN
;
2104 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2108 if (slp
->ns_rawend
) {
2109 slp
->ns_rawend
->m_next
= m
;
2110 slp
->ns_cc
+= sio
.sb_cc
;
2113 slp
->ns_cc
= sio
.sb_cc
;
2120 * Now try and parse as many record(s) as we can out of the
2123 error
= nfsrv_getstream(slp
, waitflag
, &nparallel_wakeup
);
2126 slp
->ns_flag
|= SLP_DISCONN
;
2128 slp
->ns_flag
|= SLP_NEEDQ
;
2130 slp
->ns_flag
&= ~SLP_GETSTREAM
;
2133 * For UDP soreceive typically pulls just one packet, loop
2134 * to get the whole batch.
2137 sbinit(&sio
, 1000000000);
2138 flags
= MSG_DONTWAIT
;
2139 error
= so_pru_soreceive(so
, &nam
, NULL
, &sio
,
2142 struct nfsrv_rec
*rec
;
2143 int mf
= (waitflag
& MB_DONTWAIT
) ?
2144 M_NOWAIT
: M_WAITOK
;
2145 rec
= kmalloc(sizeof(struct nfsrv_rec
),
2149 FREE(nam
, M_SONAME
);
2153 nfs_realign(&sio
.sb_mb
, 10 * NFSX_UNSIGNED
);
2154 rec
->nr_address
= nam
;
2155 rec
->nr_packet
= sio
.sb_mb
;
2156 STAILQ_INSERT_TAIL(&slp
->ns_rec
, rec
, nr_link
);
2161 if ((so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)
2162 && error
!= EWOULDBLOCK
) {
2163 slp
->ns_flag
|= SLP_DISCONN
;
2167 } while (sio
.sb_mb
);
2171 * If we were upcalled from the tcp protocol layer and we have
2172 * fully parsed records ready to go, or there is new data pending,
2173 * or something went wrong, try to wake up an nfsd thread to deal
2177 if (waitflag
== MB_DONTWAIT
&& (slp
->ns_numrec
> 0
2178 || (slp
->ns_flag
& (SLP_NEEDQ
| SLP_DISCONN
)))) {
2179 nfsrv_wakenfsd(slp
, nparallel_wakeup
);
2184 * Try and extract an RPC request from the mbuf data list received on a
2185 * stream socket. The "waitflag" argument indicates whether or not it
2189 nfsrv_getstream(struct nfssvc_sock
*slp
, int waitflag
, int *countp
)
2191 struct mbuf
*m
, **mpp
;
2194 struct mbuf
*om
, *m2
, *recm
;
2198 if (slp
->ns_reclen
== 0) {
2199 if (slp
->ns_cc
< NFSX_UNSIGNED
)
2202 if (m
->m_len
>= NFSX_UNSIGNED
) {
2203 bcopy(mtod(m
, caddr_t
), (caddr_t
)&recmark
, NFSX_UNSIGNED
);
2204 m
->m_data
+= NFSX_UNSIGNED
;
2205 m
->m_len
-= NFSX_UNSIGNED
;
2207 cp1
= (caddr_t
)&recmark
;
2208 cp2
= mtod(m
, caddr_t
);
2209 while (cp1
< ((caddr_t
)&recmark
) + NFSX_UNSIGNED
) {
2210 while (m
->m_len
== 0) {
2212 cp2
= mtod(m
, caddr_t
);
2219 slp
->ns_cc
-= NFSX_UNSIGNED
;
2220 recmark
= ntohl(recmark
);
2221 slp
->ns_reclen
= recmark
& ~0x80000000;
2222 if (recmark
& 0x80000000)
2223 slp
->ns_flag
|= SLP_LASTFRAG
;
2225 slp
->ns_flag
&= ~SLP_LASTFRAG
;
2226 if (slp
->ns_reclen
> NFS_MAXPACKET
|| slp
->ns_reclen
<= 0) {
2227 log(LOG_ERR
, "%s (%d) from nfs client\n",
2228 "impossible packet length",
2235 * Now get the record part.
2237 * Note that slp->ns_reclen may be 0. Linux sometimes
2238 * generates 0-length RPCs
2241 if (slp
->ns_cc
== slp
->ns_reclen
) {
2243 slp
->ns_raw
= slp
->ns_rawend
= (struct mbuf
*)0;
2244 slp
->ns_cc
= slp
->ns_reclen
= 0;
2245 } else if (slp
->ns_cc
> slp
->ns_reclen
) {
2248 om
= (struct mbuf
*)0;
2250 while (len
< slp
->ns_reclen
) {
2251 if ((len
+ m
->m_len
) > slp
->ns_reclen
) {
2252 m2
= m_copym(m
, 0, slp
->ns_reclen
- len
,
2260 m
->m_data
+= slp
->ns_reclen
- len
;
2261 m
->m_len
-= slp
->ns_reclen
- len
;
2262 len
= slp
->ns_reclen
;
2264 return (EWOULDBLOCK
);
2266 } else if ((len
+ m
->m_len
) == slp
->ns_reclen
) {
2271 om
->m_next
= (struct mbuf
*)0;
2286 * Accumulate the fragments into a record.
2288 mpp
= &slp
->ns_frag
;
2290 mpp
= &((*mpp
)->m_next
);
2292 if (slp
->ns_flag
& SLP_LASTFRAG
) {
2293 struct nfsrv_rec
*rec
;
2294 int mf
= (waitflag
& MB_DONTWAIT
) ? M_NOWAIT
: M_WAITOK
;
2295 rec
= kmalloc(sizeof(struct nfsrv_rec
), M_NFSRVDESC
, mf
);
2297 m_freem(slp
->ns_frag
);
2299 nfs_realign(&slp
->ns_frag
, 10 * NFSX_UNSIGNED
);
2300 rec
->nr_address
= (struct sockaddr
*)0;
2301 rec
->nr_packet
= slp
->ns_frag
;
2302 STAILQ_INSERT_TAIL(&slp
->ns_rec
, rec
, nr_link
);
2306 slp
->ns_frag
= (struct mbuf
*)0;
2312 * Parse an RPC header.
2315 nfsrv_dorec(struct nfssvc_sock
*slp
, struct nfsd
*nfsd
,
2316 struct nfsrv_descript
**ndp
)
2318 struct nfsrv_rec
*rec
;
2320 struct sockaddr
*nam
;
2321 struct nfsrv_descript
*nd
;
2325 if ((slp
->ns_flag
& SLP_VALID
) == 0 || !STAILQ_FIRST(&slp
->ns_rec
))
2327 rec
= STAILQ_FIRST(&slp
->ns_rec
);
2328 STAILQ_REMOVE_HEAD(&slp
->ns_rec
, nr_link
);
2329 KKASSERT(slp
->ns_numrec
> 0);
2331 nam
= rec
->nr_address
;
2333 kfree(rec
, M_NFSRVDESC
);
2334 MALLOC(nd
, struct nfsrv_descript
*, sizeof (struct nfsrv_descript
),
2335 M_NFSRVDESC
, M_WAITOK
);
2336 nd
->nd_md
= nd
->nd_mrep
= m
;
2338 nd
->nd_dpos
= mtod(m
, caddr_t
);
2339 error
= nfs_getreq(nd
, nfsd
, TRUE
);
2342 FREE(nam
, M_SONAME
);
2344 kfree((caddr_t
)nd
, M_NFSRVDESC
);
2353 * Try to assign service sockets to nfsd threads based on the number
2354 * of new rpc requests that have been queued on the service socket.
2356 * If no nfsd's are available or additonal requests are pending, set the
2357 * NFSD_CHECKSLP flag so that one of the running nfsds will go look for
2358 * the work in the nfssvc_sock list when it is finished processing its
2359 * current work. This flag is only cleared when an nfsd can not find
2360 * any new work to perform.
2363 nfsrv_wakenfsd(struct nfssvc_sock
*slp
, int nparallel
)
2367 if ((slp
->ns_flag
& SLP_VALID
) == 0)
2371 TAILQ_FOREACH(nd
, &nfsd_head
, nfsd_chain
) {
2372 if (nd
->nfsd_flag
& NFSD_WAITING
) {
2373 nd
->nfsd_flag
&= ~NFSD_WAITING
;
2375 panic("nfsd wakeup");
2378 wakeup((caddr_t
)nd
);
2379 if (--nparallel
== 0)
2384 slp
->ns_flag
|= SLP_DOREC
;
2385 nfsd_head_flag
|= NFSD_CHECKSLP
;
2388 #endif /* NFS_NOSERVER */