4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2014 Gary Mills
31 * nfs_tbind.c, common part for nfsd and lockd.
36 #include <netconfig.h>
42 #include <sys/resource.h>
47 #include <netinet/tcp.h>
50 #include "nfs_tbind.h"
52 #include <nfs/nfs_acl.h>
53 #include <nfs/nfssys.h>
56 #include <sys/socket.h>
57 #include <tsol/label.h>
60 * Determine valid semantics for most applications.
62 #define OK_TPI_TYPE(_nconf) \
63 (_nconf->nc_semantics == NC_TPI_CLTS || \
64 _nconf->nc_semantics == NC_TPI_COTS || \
65 _nconf->nc_semantics == NC_TPI_COTS_ORD)
67 #define BE32_TO_U32(a) \
68 ((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
69 (((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
70 (((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8) | \
71 ((ulong_t)((uchar_t *)a)[3] & 0xFF))
74 * Number of elements to add to the poll array on each allocation.
76 #define POLL_ARRAY_INC_SIZE 64
79 * Number of file descriptors by which the process soft limit may be
80 * increased on each call to nofile_increase(0).
82 #define NOFILE_INC_SIZE 64
85 * Default TCP send and receive buffer size of NFS server.
87 #define NFSD_TCP_BUFSZ (1024*1024)
90 struct conn_ind
*conn_next
;
91 struct conn_ind
*conn_prev
;
92 struct t_call
*conn_call
;
101 * this file contains transport routines common to nfsd and lockd
103 static int nofile_increase(int);
104 static int reuseaddr(int);
105 static int recvucred(int);
106 static int anonmlp(int);
107 static void add_to_poll_list(int, struct netconfig
*);
108 static char *serv_name_to_port_name(char *);
109 static int bind_to_proto(char *, char *, struct netbuf
**,
110 struct netconfig
**);
111 static int bind_to_provider(char *, char *, struct netbuf
**,
112 struct netconfig
**);
113 static void conn_close_oldest(void);
114 static boolean_t
conn_get(int, struct netconfig
*, struct conn_ind
**);
115 static void cots_listen_event(int, int);
116 static int discon_get(int, struct netconfig
*, struct conn_ind
**);
117 static int do_poll_clts_action(int, int);
118 static int do_poll_cots_action(int, int);
119 static void remove_from_poll_list(int);
120 static int set_addrmask(int, struct netconfig
*, struct netbuf
*);
121 static int is_listen_fd_index(int);
123 static struct pollfd
*poll_array
;
124 static struct conn_entry
*conn_polled
;
125 static int num_conns
; /* Current number of connections */
126 int (*Mysvc4
)(int, struct netbuf
*, struct netconfig
*, int,
128 static int setopt(int fd
, int level
, int name
, int value
);
129 static int get_opt(int fd
, int level
, int name
);
130 static void nfslib_set_sockbuf(int fd
);
133 * Called to create and prepare a transport descriptor for in-kernel
135 * Returns -1 on failure and a valid descriptor on success.
138 nfslib_transport_open(struct netconfig
*nconf
)
141 struct strioctl strioc
;
143 if ((nconf
== (struct netconfig
*)NULL
) ||
144 (nconf
->nc_device
== (char *)NULL
)) {
145 syslog(LOG_ERR
, "no netconfig device");
150 * Open the transport device.
152 fd
= t_open(nconf
->nc_device
, O_RDWR
, (struct t_info
*)NULL
);
154 if (t_errno
== TSYSERR
&& errno
== EMFILE
&&
155 (nofile_increase(0) == 0)) {
156 /* Try again with a higher NOFILE limit. */
157 fd
= t_open(nconf
->nc_device
, O_RDWR
,
158 (struct t_info
*)NULL
);
161 syslog(LOG_ERR
, "t_open %s failed: t_errno %d, %m",
162 nconf
->nc_device
, t_errno
);
168 * Pop timod because the RPC module must be as close as possible
171 if (ioctl(fd
, I_POP
, 0) < 0) {
172 syslog(LOG_ERR
, "I_POP of timod failed: %m");
178 * Common code for CLTS and COTS transports
180 if (ioctl(fd
, I_PUSH
, "rpcmod") < 0) {
181 syslog(LOG_ERR
, "I_PUSH of rpcmod failed: %m");
186 strioc
.ic_cmd
= RPC_SERVER
;
187 strioc
.ic_dp
= (char *)0;
189 strioc
.ic_timout
= -1;
191 /* Tell rpcmod to act like a server stream. */
192 if (ioctl(fd
, I_STR
, &strioc
) < 0) {
193 syslog(LOG_ERR
, "rpcmod set-up ioctl failed: %m");
199 * Re-push timod so that we will still be doing TLI
200 * operations on the descriptor.
202 if (ioctl(fd
, I_PUSH
, "timod") < 0) {
203 syslog(LOG_ERR
, "I_PUSH of timod failed: %m");
209 * Enable options of returning the ip's for udp.
211 if (strcmp(nconf
->nc_netid
, "udp6") == 0)
212 __rpc_tli_set_options(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, 1);
213 else if (strcmp(nconf
->nc_netid
, "udp") == 0)
214 __rpc_tli_set_options(fd
, IPPROTO_IP
, IP_RECVDSTADDR
, 1);
220 nofile_increase(int limit
)
224 if (getrlimit(RLIMIT_NOFILE
, &rl
) == -1) {
225 syslog(LOG_ERR
, "getrlimit of NOFILE failed: %m");
232 rl
.rlim_cur
+= NOFILE_INC_SIZE
;
234 if (rl
.rlim_cur
> rl
.rlim_max
&&
235 rl
.rlim_max
!= RLIM_INFINITY
)
236 rl
.rlim_max
= rl
.rlim_cur
;
238 if (setrlimit(RLIMIT_NOFILE
, &rl
) == -1) {
239 syslog(LOG_ERR
, "setrlimit of NOFILE to %d failed: %m",
248 nfslib_set_sockbuf(int fd
)
252 val
= NFSD_TCP_BUFSZ
;
254 curval
= get_opt(fd
, SOL_SOCKET
, SO_SNDBUF
);
255 syslog(LOG_DEBUG
, "Current SO_SNDBUF value is %d", curval
);
256 if ((curval
!= -1) && (curval
< val
)) {
257 syslog(LOG_DEBUG
, "Set SO_SNDBUF option to %d", val
);
258 if (setopt(fd
, SOL_SOCKET
, SO_SNDBUF
, val
) < 0) {
260 "couldn't set SO_SNDBUF to %d - t_errno = %d",
263 "Check and increase system-wide tcp_max_buf");
267 curval
= get_opt(fd
, SOL_SOCKET
, SO_RCVBUF
);
268 syslog(LOG_DEBUG
, "Current SO_RCVBUF value is %d", curval
);
269 if ((curval
!= -1) && (curval
< val
)) {
270 syslog(LOG_DEBUG
, "Set SO_RCVBUF option to %d", val
);
271 if (setopt(fd
, SOL_SOCKET
, SO_RCVBUF
, val
) < 0) {
273 "couldn't set SO_RCVBUF to %d - t_errno = %d",
276 "Check and increase system-wide tcp_max_buf");
282 nfslib_bindit(struct netconfig
*nconf
, struct netbuf
**addr
,
283 struct nd_hostserv
*hs
, int backlog
)
288 struct nd_addrlist
*addrlist
;
289 struct t_optmgmt req
, resp
;
292 bool_t use_any
= FALSE
;
295 if ((fd
= nfslib_transport_open(nconf
)) == -1) {
296 syslog(LOG_ERR
, "cannot establish transport service over %s",
301 addrlist
= (struct nd_addrlist
*)NULL
;
303 /* nfs4_callback service does not used a fieed port number */
305 if (strcmp(hs
->h_serv
, "nfs4_callback") == 0) {
310 gzone
= (getzoneid() == GLOBAL_ZONEID
);
311 } else if (netdir_getbyname(nconf
, hs
, &addrlist
) != 0) {
314 "Cannot get address for transport %s host %s service %s",
315 nconf
->nc_netid
, hs
->h_host
, hs
->h_serv
);
320 if (strcmp(nconf
->nc_proto
, "tcp") == 0) {
322 * If we're running over TCP, then set the
323 * SO_REUSEADDR option so that we can bind
324 * to our preferred address even if previously
325 * left connections exist in FIN_WAIT states.
326 * This is somewhat bogus, but otherwise you have
327 * to wait 2 minutes to restart after killing it.
329 if (reuseaddr(fd
) == -1) {
331 "couldn't set SO_REUSEADDR option on transport");
333 } else if (strcmp(nconf
->nc_proto
, "udp") == 0) {
335 * In order to run MLP on UDP, we need to handle creds.
337 if (recvucred(fd
) == -1) {
339 "couldn't set SO_RECVUCRED option on transport");
344 * Make non global zone nfs4_callback port MLP
346 if (use_any
&& is_system_labeled() && !gzone
) {
347 if (anonmlp(fd
) == -1) {
349 * failing to set this option means nfs4_callback
350 * could fail silently later. So fail it with
351 * with an error message now.
354 "couldn't set SO_ANON_MLP option on transport");
360 if (nconf
->nc_semantics
== NC_TPI_CLTS
)
365 /* LINTED pointer alignment */
366 ntb
= (struct t_bind
*)t_alloc(fd
, T_BIND
, T_ALL
);
367 if (ntb
== (struct t_bind
*)NULL
) {
368 syslog(LOG_ERR
, "t_alloc failed: t_errno %d, %m", t_errno
);
370 netdir_free((void *)addrlist
, ND_ADDRLIST
);
375 * XXX - what about the space tb->addr.buf points to? This should
376 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
377 * should't be called with T_ALL.
380 tb
.addr
= *(addrlist
->n_addrs
); /* structure copy */
382 if (t_bind(fd
, &tb
, ntb
) == -1) {
383 syslog(LOG_ERR
, "t_bind failed: t_errno %d, %m", t_errno
);
384 (void) t_free((char *)ntb
, T_BIND
);
385 netdir_free((void *)addrlist
, ND_ADDRLIST
);
390 /* make sure we bound to the right address */
391 if (use_any
== FALSE
&&
392 (tb
.addr
.len
!= ntb
->addr
.len
||
393 memcmp(tb
.addr
.buf
, ntb
->addr
.buf
, tb
.addr
.len
) != 0)) {
394 syslog(LOG_ERR
, "t_bind to wrong address");
395 (void) t_free((char *)ntb
, T_BIND
);
396 netdir_free((void *)addrlist
, ND_ADDRLIST
);
402 * Call nfs4svc_setport so that the kernel can be
403 * informed what port number the daemon is listing
404 * for incoming connection requests.
407 if ((nconf
->nc_semantics
== NC_TPI_COTS
||
408 nconf
->nc_semantics
== NC_TPI_COTS_ORD
) && Mysvc4
!= NULL
)
409 (*Mysvc4
)(fd
, NULL
, nconf
, NFS4_SETPORT
, &ntb
->addr
);
412 netdir_free((void *)addrlist
, ND_ADDRLIST
);
414 if (strcmp(nconf
->nc_proto
, "tcp") == 0) {
416 * Disable the Nagle algorithm on TCP connections.
417 * Connections accepted from this listener will
418 * inherit the listener options.
421 /* LINTED pointer alignment */
422 opt
= (struct opthdr
*)reqbuf
;
423 opt
->level
= IPPROTO_TCP
;
424 opt
->name
= TCP_NODELAY
;
425 opt
->len
= sizeof (int);
427 /* LINTED pointer alignment */
428 *(int *)((char *)opt
+ sizeof (*opt
)) = 1;
430 req
.flags
= T_NEGOTIATE
;
431 req
.opt
.len
= sizeof (*opt
) + opt
->len
;
432 req
.opt
.buf
= (char *)opt
;
434 resp
.opt
.buf
= reqbuf
;
435 resp
.opt
.maxlen
= sizeof (reqbuf
);
437 if (t_optmgmt(fd
, &req
, &resp
) < 0 ||
438 resp
.flags
!= T_SUCCESS
) {
440 "couldn't set NODELAY option for proto %s: t_errno = %d, %m",
441 nconf
->nc_proto
, t_errno
);
444 nfslib_set_sockbuf(fd
);
451 get_opt(int fd
, int level
, int name
)
453 struct t_optmgmt req
, res
;
459 reqbuf
.opt
.level
= level
;
460 reqbuf
.opt
.name
= name
;
461 reqbuf
.opt
.len
= sizeof (int);
464 req
.flags
= T_CURRENT
;
465 req
.opt
.len
= sizeof (reqbuf
);
466 req
.opt
.buf
= (char *)&reqbuf
;
469 res
.opt
.buf
= (char *)&reqbuf
;
470 res
.opt
.maxlen
= sizeof (reqbuf
);
472 if (t_optmgmt(fd
, &req
, &res
) < 0 || res
.flags
!= T_SUCCESS
) {
473 t_error("t_optmgmt");
476 return (reqbuf
.value
);
480 setopt(int fd
, int level
, int name
, int value
)
482 struct t_optmgmt req
, resp
;
488 reqbuf
.opt
.level
= level
;
489 reqbuf
.opt
.name
= name
;
490 reqbuf
.opt
.len
= sizeof (int);
492 reqbuf
.value
= value
;
494 req
.flags
= T_NEGOTIATE
;
495 req
.opt
.len
= sizeof (reqbuf
);
496 req
.opt
.buf
= (char *)&reqbuf
;
499 resp
.opt
.buf
= (char *)&reqbuf
;
500 resp
.opt
.maxlen
= sizeof (reqbuf
);
502 if (t_optmgmt(fd
, &req
, &resp
) < 0 || resp
.flags
!= T_SUCCESS
) {
503 t_error("t_optmgmt");
512 return (setopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, 1));
518 return (setopt(fd
, SOL_SOCKET
, SO_RECVUCRED
, 1));
524 return (setopt(fd
, SOL_SOCKET
, SO_ANON_MLP
, 1));
528 nfslib_log_tli_error(char *tli_name
, int fd
, struct netconfig
*nconf
)
533 * Save the error code across syslog(), just in case syslog()
534 * gets its own error and, therefore, overwrites errno.
537 if (t_errno
== TSYSERR
) {
538 syslog(LOG_ERR
, "%s(file descriptor %d/transport %s) %m",
539 tli_name
, fd
, nconf
->nc_proto
);
542 "%s(file descriptor %d/transport %s) TLI error %d",
543 tli_name
, fd
, nconf
->nc_proto
, t_errno
);
549 * Called to set up service over a particular transport.
552 do_one(char *provider
, NETSELDECL(proto
), struct protob
*protobp0
,
553 int (*svc
)(int, struct netbuf
, struct netconfig
*))
556 struct protob
*protobp
;
557 struct netbuf
*retaddr
;
558 struct netconfig
*retnconf
;
559 struct netbuf addrmask
;
565 sock
= bind_to_provider(provider
, protobp0
->serv
, &retaddr
,
568 sock
= bind_to_proto(proto
, protobp0
->serv
, &retaddr
,
572 (void) syslog(LOG_ERR
,
573 "Cannot establish %s service over %s: transport setup problem.",
574 protobp0
->serv
, provider
? provider
: proto
);
578 if (set_addrmask(sock
, retnconf
, &addrmask
) < 0) {
579 (void) syslog(LOG_ERR
,
580 "Cannot set address mask for %s", retnconf
->nc_netid
);
585 * Register all versions of the programs in the protocol block list.
588 for (protobp
= protobp0
; protobp
; protobp
= protobp
->next
) {
589 for (vers
= protobp
->versmin
; vers
<= protobp
->versmax
;
591 if ((protobp
->program
== NFS_PROGRAM
||
592 protobp
->program
== NFS_ACL_PROGRAM
) &&
594 strncasecmp(retnconf
->nc_proto
, NC_UDP
, l
) == 0)
597 (void) rpcb_unset(protobp
->program
, vers
, retnconf
);
598 (void) rpcb_set(protobp
->program
, vers
, retnconf
,
604 * Register services with CLTS semantics right now.
605 * Note: services with COTS/COTS_ORD semantics will be
606 * registered later from cots_listen_event function.
608 if (retnconf
->nc_semantics
== NC_TPI_CLTS
) {
609 /* Don't drop core if supporting module(s) aren't loaded. */
610 (void) signal(SIGSYS
, SIG_IGN
);
613 * svc() doesn't block, it returns success or failure.
616 if (svc
== NULL
&& Mysvc4
!= NULL
)
617 err
= (*Mysvc4
)(sock
, &addrmask
, retnconf
,
618 NFS4_SETPORT
|NFS4_KRPC_START
, retaddr
);
620 err
= (*svc
)(sock
, addrmask
, retnconf
);
623 (void) syslog(LOG_ERR
,
624 "Cannot establish %s service over <file desc."
625 " %d, protocol %s> : %m. Exiting",
626 protobp0
->serv
, sock
, retnconf
->nc_proto
);
633 * We successfully set up the server over this transport.
634 * Add this descriptor to the one being polled on.
636 add_to_poll_list(sock
, retnconf
);
640 * Set up the NFS service over all the available transports.
641 * Returns -1 for failure, 0 for success.
644 do_all(struct protob
*protobp
,
645 int (*svc
)(int, struct netbuf
, struct netconfig
*))
647 struct netconfig
*nconf
;
651 if ((nc
= setnetconfig()) == (NCONF_HANDLE
*)NULL
) {
652 syslog(LOG_ERR
, "setnetconfig failed: %m");
656 while (nconf
= getnetconfig(nc
)) {
657 if ((nconf
->nc_flag
& NC_VISIBLE
) &&
658 strcmp(nconf
->nc_protofmly
, NC_LOOPBACK
) != 0 &&
659 OK_TPI_TYPE(nconf
) &&
660 (protobp
->program
!= NFS4_CALLBACK
||
661 strncasecmp(nconf
->nc_proto
, NC_UDP
, l
) != 0))
662 do_one(nconf
->nc_device
, nconf
->nc_proto
,
665 (void) endnetconfig(nc
);
670 * poll on the open transport descriptors for events and errors.
673 poll_for_action(void)
679 * Keep polling until all transports have been closed. When this
680 * happens, we return.
682 while ((int)num_fds
> 0) {
683 nfds
= poll(poll_array
, num_fds
, INFTIM
);
690 * Some errors from poll could be
691 * due to temporary conditions, and we try to
692 * be robust in the face of them. Other
693 * errors (should never happen in theory)
694 * are fatal (eg. EINVAL, EFAULT).
706 (void) syslog(LOG_ERR
,
707 "poll failed: %m. Exiting");
715 * Go through the poll list looking for events.
717 for (i
= 0; i
< num_fds
&& nfds
> 0; i
++) {
718 if (poll_array
[i
].revents
) {
721 * We have a message, so try to read it.
722 * Record the error return in errno,
723 * so that syslog(LOG_ERR, "...%m")
724 * dumps the corresponding error string.
726 if (conn_polled
[i
].nc
.nc_semantics
==
728 errno
= do_poll_clts_action(
729 poll_array
[i
].fd
, i
);
731 errno
= do_poll_cots_action(
732 poll_array
[i
].fd
, i
);
738 * Most returned error codes mean that there is
739 * fatal condition which we can only deal with
740 * by closing the transport.
742 if (errno
!= EAGAIN
&& errno
!= ENOMEM
) {
743 (void) syslog(LOG_ERR
,
744 "Error (%m) reading descriptor %d/transport %s. Closing it.",
746 conn_polled
[i
].nc
.nc_proto
);
747 (void) t_close(poll_array
[i
].fd
);
748 remove_from_poll_list(poll_array
[i
].fd
);
750 } else if (errno
== ENOMEM
)
756 (void) syslog(LOG_ERR
,
757 "All transports have been closed with errors. Exiting.");
761 * Allocate poll/transport array entries for this descriptor.
764 add_to_poll_list(int fd
, struct netconfig
*nconf
)
766 static int poll_array_size
= 0;
769 * If the arrays are full, allocate new ones.
771 if (num_fds
== poll_array_size
) {
773 struct conn_entry
*tnp
;
775 if (poll_array_size
!= 0) {
779 tpa
= (struct pollfd
*)0;
781 poll_array_size
+= POLL_ARRAY_INC_SIZE
;
783 * Allocate new arrays.
785 poll_array
= (struct pollfd
*)
786 malloc(poll_array_size
* sizeof (struct pollfd
) + 256);
787 conn_polled
= (struct conn_entry
*)
788 malloc(poll_array_size
* sizeof (struct conn_entry
) + 256);
789 if (poll_array
== (struct pollfd
*)NULL
||
790 conn_polled
== (struct conn_entry
*)NULL
) {
791 syslog(LOG_ERR
, "malloc failed for poll array");
796 * Copy the data of the old ones into new arrays, and
800 (void) memcpy((void *)poll_array
, (void *)tpa
,
801 num_fds
* sizeof (struct pollfd
));
802 (void) memcpy((void *)conn_polled
, (void *)tnp
,
803 num_fds
* sizeof (struct conn_entry
));
810 * Set the descriptor and event list. All possible events are
813 poll_array
[num_fds
].fd
= fd
;
814 poll_array
[num_fds
].events
= POLLIN
|POLLRDNORM
|POLLRDBAND
|POLLPRI
;
817 * Copy the transport data over too.
819 conn_polled
[num_fds
].nc
= *nconf
;
820 conn_polled
[num_fds
].closing
= 0;
823 * Set the descriptor to non-blocking. Avoids a race
824 * between data arriving on the stream and then having it
825 * flushed before we can read it.
827 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) == -1) {
828 (void) syslog(LOG_ERR
,
829 "fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
830 num_fds
, nconf
->nc_proto
);
835 * Count this descriptor.
841 remove_from_poll_list(int fd
)
846 for (i
= 0; i
< num_fds
; i
++) {
847 if (poll_array
[i
].fd
== fd
) {
849 num_to_copy
= num_fds
- i
;
850 (void) memcpy((void *)&poll_array
[i
],
851 (void *)&poll_array
[i
+1],
852 num_to_copy
* sizeof (struct pollfd
));
853 (void) memset((void *)&poll_array
[num_fds
], 0,
854 sizeof (struct pollfd
));
855 (void) memcpy((void *)&conn_polled
[i
],
856 (void *)&conn_polled
[i
+1],
857 num_to_copy
* sizeof (struct conn_entry
));
858 (void) memset((void *)&conn_polled
[num_fds
], 0,
859 sizeof (struct conn_entry
));
863 syslog(LOG_ERR
, "attempt to remove nonexistent fd from poll list");
868 * Called to read and interpret the event on a connectionless descriptor.
869 * Returns 0 if successful, or a UNIX error code if failure.
872 do_poll_clts_action(int fd
, int conn_index
)
877 struct netconfig
*nconf
= &conn_polled
[conn_index
].nc
;
878 static struct t_unitdata
*unitdata
= NULL
;
879 static struct t_uderr
*uderr
= NULL
;
880 static int oldfd
= -1;
881 struct nd_hostservlist
*host
= NULL
;
882 struct strbuf ctl
[1], data
[1];
884 * We just need to have some space to consume the
885 * message in the event we can't use the TLI interface to do the
888 * We flush the message using getmsg(). For the control part
889 * we allocate enough for any TPI header plus 32 bytes for address
890 * and options. For the data part, there is nothing magic about
891 * the size of the array, but 256 bytes is probably better than
892 * 1 byte, and we don't expect any data portion anyway.
894 * If the array sizes are too small, we handle this because getmsg()
895 * (called to consume the message) will return MOREDATA|MORECTL.
896 * Thus we just call getmsg() until it's read the message.
898 char ctlbuf
[sizeof (union T_primitives
) + 32];
902 * If this is the same descriptor as the last time
903 * do_poll_clts_action was called, we can save some
904 * de-allocation and allocation.
910 (void) t_free((char *)unitdata
, T_UNITDATA
);
914 (void) t_free((char *)uderr
, T_UDERROR
);
920 * Allocate a unitdata structure for receiving the event.
922 if (unitdata
== NULL
) {
923 /* LINTED pointer alignment */
924 unitdata
= (struct t_unitdata
*)t_alloc(fd
, T_UNITDATA
, T_ALL
);
925 if (unitdata
== NULL
) {
926 if (t_errno
== TSYSERR
) {
928 * Save the error code across
929 * syslog(), just in case
930 * syslog() gets its own error
931 * and therefore overwrites errno.
934 (void) syslog(LOG_ERR
,
935 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
936 fd
, nconf
->nc_proto
);
939 (void) syslog(LOG_ERR
,
940 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
941 fd
, nconf
->nc_proto
, t_errno
);
950 * The idea is we wait for T_UNITDATA_IND's. Of course,
951 * we don't get any, because rpcmod filters them out.
952 * However, we need to call t_rcvudata() to let TLI
953 * tell us we have a T_UDERROR_IND.
956 * t_rcvudata(), expecting TLOOK.
957 * t_look(), expecting T_UDERR.
958 * t_rcvuderr(), expecting success (0).
959 * expand destination address into ASCII,
963 ret
= t_rcvudata(fd
, unitdata
, &flags
);
964 if (ret
== 0 || t_errno
== TBUFOVFLW
) {
965 (void) syslog(LOG_WARNING
,
966 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
967 fd
, nconf
->nc_proto
, unitdata
->udata
.len
);
970 * Even though we don't expect any data, in case we do,
971 * keep reading until there is no more.
984 * System errors are returned to caller.
985 * Save the error code across
986 * syslog(), just in case
987 * syslog() gets its own error
988 * and therefore overwrites errno.
991 (void) syslog(LOG_ERR
,
992 "t_rcvudata(file descriptor %d/transport %s) %m",
993 fd
, nconf
->nc_proto
);
998 (void) syslog(LOG_ERR
,
999 "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1000 fd
, nconf
->nc_proto
, t_errno
);
1010 * System errors are returned to caller.
1012 if (t_errno
== TSYSERR
) {
1014 * Save the error code across
1015 * syslog(), just in case
1016 * syslog() gets its own error
1017 * and therefore overwrites errno.
1020 (void) syslog(LOG_ERR
,
1021 "t_look(file descriptor %d/transport %s) %m",
1022 fd
, nconf
->nc_proto
);
1025 (void) syslog(LOG_ERR
,
1026 "t_look(file descriptor %d/transport %s) TLI error %d",
1027 fd
, nconf
->nc_proto
, t_errno
);
1032 (void) syslog(LOG_WARNING
,
1033 "t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1034 fd
, nconf
->nc_proto
, ret
, T_UDERR
);
1037 if (uderr
== NULL
) {
1038 /* LINTED pointer alignment */
1039 uderr
= (struct t_uderr
*)t_alloc(fd
, T_UDERROR
, T_ALL
);
1040 if (uderr
== NULL
) {
1041 if (t_errno
== TSYSERR
) {
1043 * Save the error code across
1044 * syslog(), just in case
1045 * syslog() gets its own error
1046 * and therefore overwrites errno.
1049 (void) syslog(LOG_ERR
,
1050 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1051 fd
, nconf
->nc_proto
);
1054 (void) syslog(LOG_ERR
,
1055 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1056 fd
, nconf
->nc_proto
, t_errno
);
1061 ret
= t_rcvuderr(fd
, uderr
);
1065 * Save the datagram error in errno, so that the
1066 * %m argument to syslog picks up the error string.
1068 errno
= uderr
->error
;
1071 * Log the datagram error, then log the host that
1072 * probably triggerred. Cannot log both in the
1073 * same transaction because of packet size limitations
1076 (void) syslog((errno
== ECONNREFUSED
) ? LOG_DEBUG
: LOG_WARNING
,
1077 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1078 fd
, nconf
->nc_proto
);
1081 * Try to map the client's address back to a
1084 ret
= netdir_getbyaddr(nconf
, &host
, &uderr
->addr
);
1085 if (ret
!= -1 && host
&& host
->h_cnt
> 0 &&
1086 host
->h_hostservs
) {
1087 (void) syslog((errno
== ECONNREFUSED
) ? LOG_DEBUG
: LOG_WARNING
,
1088 "Bad NFS response was sent to client with host name: %s; service port: %s",
1089 host
->h_hostservs
->h_host
,
1090 host
->h_hostservs
->h_serv
);
1094 char *hex
= "0123456789abcdef";
1097 * Mapping failed, print the whole thing
1100 buf
= (char *)malloc(uderr
->addr
.len
* 2 + 1);
1101 for (i
= 0, j
= 0; i
< uderr
->addr
.len
; i
++, j
+= 2) {
1102 buf
[j
] = hex
[((uderr
->addr
.buf
[i
]) >> 4) & 0xf];
1103 buf
[j
+1] = hex
[uderr
->addr
.buf
[i
] & 0xf];
1106 (void) syslog((errno
== ECONNREFUSED
) ? LOG_DEBUG
: LOG_WARNING
,
1107 "Bad NFS response was sent to client with transport address: 0x%s",
1112 if (ret
== 0 && host
!= NULL
)
1113 netdir_free((void *)host
, ND_HOSTSERVLIST
);
1122 * System errors are returned to caller.
1123 * Save the error code across
1124 * syslog(), just in case
1125 * syslog() gets its own error
1126 * and therefore overwrites errno.
1129 (void) syslog(LOG_ERR
,
1130 "t_rcvuderr(file descriptor %d/transport %s) %m",
1131 fd
, nconf
->nc_proto
);
1134 (void) syslog(LOG_ERR
,
1135 "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1136 fd
, nconf
->nc_proto
, t_errno
);
1142 * If we get here, then we could not cope with whatever message
1143 * we attempted to read, so flush it. If we did read a message,
1144 * and one isn't present, that is all right, because fd is in
1147 (void) syslog(LOG_ERR
,
1148 "Flushing one input message from <file descriptor %d/transport %s>",
1149 fd
, nconf
->nc_proto
);
1152 * Read and discard the message. Do this this until there is
1153 * no more control/data in the message or until we get an error.
1156 ctl
->maxlen
= sizeof (ctlbuf
);
1158 data
->maxlen
= sizeof (databuf
);
1159 data
->buf
= databuf
;
1161 ret
= getmsg(fd
, ctl
, data
, &flags
);
1170 conn_close_oldest(void)
1176 * Find the oldest connection that is not already in the
1177 * process of shutting down.
1179 for (i1
= end_listen_fds
; /* no conditional expression */; i1
++) {
1182 if (conn_polled
[i1
].closing
== 0)
1186 printf("too many connections (%d), releasing oldest (%d)\n",
1187 num_conns
, poll_array
[i1
].fd
);
1189 syslog(LOG_WARNING
, "too many connections (%d), releasing oldest (%d)",
1190 num_conns
, poll_array
[i1
].fd
);
1192 fd
= poll_array
[i1
].fd
;
1193 if (conn_polled
[i1
].nc
.nc_semantics
== NC_TPI_COTS
) {
1195 * For politeness, send a T_DISCON_REQ to the transport
1196 * provider. We close the stream anyway.
1198 (void) t_snddis(fd
, (struct t_call
*)0);
1200 remove_from_poll_list(fd
);
1204 * For orderly release, we do not close the stream
1205 * until the T_ORDREL_IND arrives to complete
1208 if (t_sndrel(fd
) == 0)
1209 conn_polled
[i1
].closing
= 1;
1214 conn_get(int fd
, struct netconfig
*nconf
, struct conn_ind
**connp
)
1216 struct conn_ind
*conn
;
1217 struct conn_ind
*next_conn
;
1219 conn
= (struct conn_ind
*)malloc(sizeof (*conn
));
1221 syslog(LOG_ERR
, "malloc for listen indication failed");
1225 /* LINTED pointer alignment */
1226 conn
->conn_call
= (struct t_call
*)t_alloc(fd
, T_CALL
, T_ALL
);
1227 if (conn
->conn_call
== NULL
) {
1229 nfslib_log_tli_error("t_alloc", fd
, nconf
);
1233 if (t_listen(fd
, conn
->conn_call
) == -1) {
1234 nfslib_log_tli_error("t_listen", fd
, nconf
);
1235 (void) t_free((char *)conn
->conn_call
, T_CALL
);
1240 if (conn
->conn_call
->udata
.len
> 0) {
1242 "rejecting inbound connection(%s) with %d bytes of connect data",
1243 nconf
->nc_proto
, conn
->conn_call
->udata
.len
);
1245 conn
->conn_call
->udata
.len
= 0;
1246 (void) t_snddis(fd
, conn
->conn_call
);
1247 (void) t_free((char *)conn
->conn_call
, T_CALL
);
1252 if ((next_conn
= *connp
) != NULL
) {
1253 next_conn
->conn_prev
->conn_next
= conn
;
1254 conn
->conn_next
= next_conn
;
1255 conn
->conn_prev
= next_conn
->conn_prev
;
1256 next_conn
->conn_prev
= conn
;
1258 conn
->conn_next
= conn
;
1259 conn
->conn_prev
= conn
;
1266 discon_get(int fd
, struct netconfig
*nconf
, struct conn_ind
**connp
)
1268 struct conn_ind
*conn
;
1269 struct t_discon discon
;
1271 discon
.udata
.buf
= (char *)0;
1272 discon
.udata
.maxlen
= 0;
1273 if (t_rcvdis(fd
, &discon
) == -1) {
1274 nfslib_log_tli_error("t_rcvdis", fd
, nconf
);
1283 if (conn
->conn_call
->sequence
== discon
.sequence
) {
1284 if (conn
->conn_next
== conn
)
1285 *connp
= (struct conn_ind
*)0;
1287 if (conn
== *connp
) {
1288 *connp
= conn
->conn_next
;
1290 conn
->conn_next
->conn_prev
= conn
->conn_prev
;
1291 conn
->conn_prev
->conn_next
= conn
->conn_next
;
1296 conn
= conn
->conn_next
;
1297 } while (conn
!= *connp
);
1303 cots_listen_event(int fd
, int conn_index
)
1305 struct t_call
*call
;
1306 struct conn_ind
*conn
;
1307 struct conn_ind
*conn_head
;
1309 struct netconfig
*nconf
= &conn_polled
[conn_index
].nc
;
1311 struct netbuf addrmask
;
1314 char *clnt_uaddr
= NULL
;
1315 struct nd_hostservlist
*clnt_serv
= NULL
;
1318 (void) conn_get(fd
, nconf
, &conn_head
);
1320 while ((conn
= conn_head
) != NULL
) {
1321 conn_head
= conn
->conn_next
;
1322 if (conn_head
== conn
)
1325 conn_head
->conn_prev
= conn
->conn_prev
;
1326 conn
->conn_prev
->conn_next
= conn_head
;
1328 call
= conn
->conn_call
;
1332 * If we have already accepted the maximum number of
1333 * connections allowed on the command line, then drop
1334 * the oldest connection (for any protocol) before
1335 * accepting the new connection. Unless explicitly
1336 * set on the command line, max_conns_allowed is -1.
1338 if (max_conns_allowed
!= -1 && num_conns
>= max_conns_allowed
)
1339 conn_close_oldest();
1342 * Create a new transport endpoint for the same proto as
1345 new_fd
= nfslib_transport_open(nconf
);
1347 call
->udata
.len
= 0;
1348 (void) t_snddis(fd
, call
);
1349 (void) t_free((char *)call
, T_CALL
);
1350 syslog(LOG_ERR
, "Cannot establish transport over %s",
1355 /* Bind to a generic address/port for the accepting stream. */
1356 if (t_bind(new_fd
, NULL
, NULL
) == -1) {
1357 nfslib_log_tli_error("t_bind", new_fd
, nconf
);
1358 call
->udata
.len
= 0;
1359 (void) t_snddis(fd
, call
);
1360 (void) t_free((char *)call
, T_CALL
);
1361 (void) t_close(new_fd
);
1365 while (t_accept(fd
, new_fd
, call
) == -1) {
1366 if (t_errno
!= TLOOK
) {
1368 nfslib_log_tli_error("t_accept", fd
, nconf
);
1370 call
->udata
.len
= 0;
1371 (void) t_snddis(fd
, call
);
1372 (void) t_free((char *)call
, T_CALL
);
1373 (void) t_close(new_fd
);
1376 while (event
= t_look(fd
)) {
1381 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf
->nc_proto
);
1383 (void) conn_get(fd
, nconf
, &conn_head
);
1388 "cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1391 (void) discon_get(fd
, nconf
,
1396 "unexpected event 0x%x during accept processing (%s)",
1397 event
, nconf
->nc_proto
);
1398 call
->udata
.len
= 0;
1399 (void) t_snddis(fd
, call
);
1400 (void) t_free((char *)call
, T_CALL
);
1401 (void) t_close(new_fd
);
1407 if (set_addrmask(new_fd
, nconf
, &addrmask
) < 0) {
1408 (void) syslog(LOG_ERR
,
1409 "Cannot set address mask for %s",
1411 (void) t_snddis(new_fd
, NULL
);
1412 (void) t_free((char *)call
, T_CALL
);
1413 (void) t_close(new_fd
);
1417 /* Tell kRPC about the new stream. */
1419 ret
= (*Mysvc4
)(new_fd
, &addrmask
, nconf
,
1420 NFS4_KRPC_START
, &call
->addr
);
1422 ret
= (*Mysvc
)(new_fd
, addrmask
, nconf
);
1425 if (errno
!= ENOTCONN
) {
1427 "unable to register new connection: %m");
1430 * This is the only error that could be
1431 * caused by the client, so who was it?
1433 if (netdir_getbyaddr(nconf
, &clnt_serv
,
1434 &(call
->addr
)) == ND_OK
&&
1435 clnt_serv
->h_cnt
> 0)
1436 clnt
= clnt_serv
->h_hostservs
->h_host
;
1438 clnt
= clnt_uaddr
= taddr2uaddr(nconf
,
1441 * If we don't know who the client was,
1446 "unable to register new connection: client %s has dropped connection", clnt
);
1448 netdir_free(clnt_serv
, ND_HOSTSERVLIST
);
1457 (void) t_snddis(new_fd
, NULL
);
1458 (void) t_free((char *)call
, T_CALL
);
1459 (void) t_close(new_fd
);
1464 (void) t_free((char *)call
, T_CALL
);
1467 * Poll on the new descriptor so that we get disconnect
1468 * and orderly release indications.
1471 add_to_poll_list(new_fd
, nconf
);
1473 /* Reset nconf in case it has been moved. */
1474 nconf
= &conn_polled
[conn_index
].nc
;
1480 do_poll_cots_action(int fd
, int conn_index
)
1486 struct conn_entry
*connent
= &conn_polled
[conn_index
];
1487 struct netconfig
*nconf
= &(connent
->nc
);
1488 const char *errorstr
;
1490 while (event
= t_look(fd
)) {
1494 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf
->nc_proto
, fd
);
1496 cots_listen_event(fd
, conn_index
);
1501 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd
, nconf
->nc_proto
);
1504 * Receive a private notification from CONS rpcmod.
1506 i1
= t_rcv(fd
, buf
, sizeof (buf
), &flags
);
1508 syslog(LOG_ERR
, "t_rcv failed");
1511 if (i1
< sizeof (int))
1513 i1
= BE32_TO_U32(buf
);
1514 if (i1
== 1 || i1
== 2) {
1516 * This connection has been idle for too long,
1517 * so release it as politely as we can. If we
1518 * have already initiated an orderly release
1519 * and we get notified that the stream is
1520 * still idle, pull the plug. This prevents
1521 * hung connections from continuing to consume
1525 printf("do_poll_cots_action(%s,%d): ", nconf
->nc_proto
, fd
);
1526 printf("initiating orderly release of idle connection\n");
1528 if (nconf
->nc_semantics
== NC_TPI_COTS
||
1529 connent
->closing
!= 0) {
1530 (void) t_snddis(fd
, (struct t_call
*)0);
1534 * For NC_TPI_COTS_ORD, the stream is closed
1535 * and removed from the poll list when the
1536 * T_ORDREL is received from the provider. We
1537 * don't wait for it here because it may take
1538 * a while for the transport to shut down.
1540 if (t_sndrel(fd
) == -1) {
1542 "unable to send orderly release %m");
1544 connent
->closing
= 1;
1547 "unexpected event from CONS rpcmod %d", i1
);
1552 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf
->nc_proto
, fd
);
1554 /* Perform an orderly release. */
1555 if (t_rcvrel(fd
) == 0) {
1556 /* T_ORDREL on listen fd's should be ignored */
1557 if (!is_listen_fd_index(conn_index
)) {
1558 (void) t_sndrel(fd
);
1563 } else if (t_errno
== TLOOK
) {
1566 nfslib_log_tli_error("t_rcvrel", fd
, nconf
);
1569 * check to make sure we do not close
1572 if (is_listen_fd_index(conn_index
))
1580 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf
->nc_proto
, fd
);
1582 if (t_rcvdis(fd
, (struct t_discon
*)NULL
) == -1)
1583 nfslib_log_tli_error("t_rcvdis", fd
, nconf
);
1586 * T_DISCONNECT on listen fd's should be ignored.
1588 if (is_listen_fd_index(conn_index
))
1594 if (t_errno
== TSYSERR
) {
1595 if ((errorstr
= strerror(errno
)) == NULL
) {
1597 "Unknown error num %d", errno
);
1598 errorstr
= (const char *) buf
;
1600 } else if (event
== -1)
1601 errorstr
= t_strerror(t_errno
);
1605 "unexpected TLI event (0x%x) on "
1606 "connection-oriented transport(%s,%d):%s",
1607 event
, nconf
->nc_proto
, fd
, errorstr
);
1610 remove_from_poll_list(fd
);
1620 serv_name_to_port_name(char *name
)
1623 * Map service names (used primarily in logging) to
1624 * RPC port names (used by netdir_*() routines).
1626 if (strcmp(name
, "NFS") == 0) {
1628 } else if (strcmp(name
, "NLM") == 0) {
1630 } else if (strcmp(name
, "NFS4_CALLBACK") == 0) {
1631 return ("nfs4_callback");
1634 return ("unrecognized");
1638 bind_to_provider(char *provider
, char *serv
, struct netbuf
**addr
,
1639 struct netconfig
**retnconf
)
1641 struct netconfig
*nconf
;
1643 struct nd_hostserv hs
;
1645 hs
.h_host
= HOST_SELF
;
1646 hs
.h_serv
= serv_name_to_port_name(serv
);
1648 if ((nc
= setnetconfig()) == (NCONF_HANDLE
*)NULL
) {
1649 syslog(LOG_ERR
, "setnetconfig failed: %m");
1652 while (nconf
= getnetconfig(nc
)) {
1653 if (OK_TPI_TYPE(nconf
) &&
1654 strcmp(nconf
->nc_device
, provider
) == 0) {
1656 return (nfslib_bindit(nconf
, addr
, &hs
,
1660 (void) endnetconfig(nc
);
1662 syslog(LOG_ERR
, "couldn't find netconfig entry for provider %s",
1668 bind_to_proto(NETSELDECL(proto
), char *serv
, struct netbuf
**addr
,
1669 struct netconfig
**retnconf
)
1671 struct netconfig
*nconf
;
1672 NCONF_HANDLE
*nc
= NULL
;
1673 struct nd_hostserv hs
;
1675 hs
.h_host
= HOST_SELF
;
1676 hs
.h_serv
= serv_name_to_port_name(serv
);
1678 if ((nc
= setnetconfig()) == (NCONF_HANDLE
*)NULL
) {
1679 syslog(LOG_ERR
, "setnetconfig failed: %m");
1682 while (nconf
= getnetconfig(nc
)) {
1683 if (OK_TPI_TYPE(nconf
) && NETSELEQ(nconf
->nc_proto
, proto
)) {
1685 return (nfslib_bindit(nconf
, addr
, &hs
,
1689 (void) endnetconfig(nc
);
1691 syslog(LOG_ERR
, "couldn't find netconfig entry for protocol %s",
1696 #include <netinet/in.h>
1699 * Create an address mask appropriate for the transport.
1700 * The mask is used to obtain the host-specific part of
1701 * a network address when comparing addresses.
1702 * For an internet address the host-specific part is just
1703 * the 32 bit IP address and this part of the mask is set
1704 * to all-ones. The port number part of the mask is zeroes.
1707 set_addrmask(int fd
,
1708 struct netconfig
*nconf
,
1709 struct netbuf
*mask
)
1714 * Find the size of the address we need to mask.
1716 if (t_getinfo(fd
, &info
) < 0) {
1717 t_error("t_getinfo");
1720 mask
->len
= mask
->maxlen
= info
.addr
;
1721 if (info
.addr
<= 0) {
1723 * loopback devices have infinite addr size
1724 * (it is identified by -1 in addr field of t_info structure),
1725 * so don't build the netmask for them. It's a special case
1726 * that should be handled properly.
1728 if ((info
.addr
== -1) &&
1729 (0 == strcmp(nconf
->nc_protofmly
, NC_LOOPBACK
))) {
1730 memset(mask
, 0, sizeof (*mask
));
1734 syslog(LOG_ERR
, "set_addrmask: address size: %ld", info
.addr
);
1738 mask
->buf
= (char *)malloc(mask
->len
);
1739 if (mask
->buf
== NULL
) {
1740 syslog(LOG_ERR
, "set_addrmask: no memory");
1743 (void) memset(mask
->buf
, 0, mask
->len
); /* reset all mask bits */
1745 if (strcmp(nconf
->nc_protofmly
, NC_INET
) == 0) {
1747 * Set the mask so that the port is ignored.
1749 /* LINTED pointer alignment */
1750 ((struct sockaddr_in
*)mask
->buf
)->sin_addr
.s_addr
=
1752 /* LINTED pointer alignment */
1753 ((struct sockaddr_in
*)mask
->buf
)->sin_family
=
1755 } else if (strcmp(nconf
->nc_protofmly
, NC_INET6
) == 0) {
1756 /* LINTED pointer alignment */
1757 (void) memset(&((struct sockaddr_in6
*)mask
->buf
)->sin6_addr
,
1758 (uchar_t
)~0, sizeof (struct in6_addr
));
1759 /* LINTED pointer alignment */
1760 ((struct sockaddr_in6
*)mask
->buf
)->sin6_family
=
1765 * Set all mask bits.
1767 (void) memset(mask
->buf
, 0xFF, mask
->len
);
1773 * For listen fd's index is always less than end_listen_fds.
1774 * end_listen_fds is defined externally in the daemon that uses this library.
1775 * It's value is equal to the number of open file descriptors after the
1776 * last listen end point was opened but before any connection was accepted.
1779 is_listen_fd_index(int index
)
1781 return (index
< end_listen_fds
);