4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2014 Gary Mills
31 * nfs_tbind.c, common part for nfsd and lockd.
36 #include <netconfig.h>
42 #include <sys/resource.h>
47 #include <netinet/tcp.h>
50 #include "nfs_tbind.h"
52 #include <nfs/nfs_acl.h>
53 #include <nfs/nfssys.h>
56 #include <sys/socket.h>
59 * Determine valid semantics for most applications.
61 #define OK_TPI_TYPE(_nconf) \
62 (_nconf->nc_semantics == NC_TPI_CLTS || \
63 _nconf->nc_semantics == NC_TPI_COTS || \
64 _nconf->nc_semantics == NC_TPI_COTS_ORD)
66 #define BE32_TO_U32(a) \
67 ((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
68 (((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
69 (((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8) | \
70 ((ulong_t)((uchar_t *)a)[3] & 0xFF))
73 * Number of elements to add to the poll array on each allocation.
75 #define POLL_ARRAY_INC_SIZE 64
78 * Number of file descriptors by which the process soft limit may be
79 * increased on each call to nofile_increase(0).
81 #define NOFILE_INC_SIZE 64
84 * Default TCP send and receive buffer size of NFS server.
86 #define NFSD_TCP_BUFSZ (1024*1024)
89 struct conn_ind
*conn_next
;
90 struct conn_ind
*conn_prev
;
91 struct t_call
*conn_call
;
100 * this file contains transport routines common to nfsd and lockd
102 static int nofile_increase(int);
103 static int reuseaddr(int);
104 static int recvucred(int);
105 static void add_to_poll_list(int, struct netconfig
*);
106 static char *serv_name_to_port_name(char *);
107 static int bind_to_proto(char *, char *, struct netbuf
**,
108 struct netconfig
**);
109 static int bind_to_provider(char *, char *, struct netbuf
**,
110 struct netconfig
**);
111 static void conn_close_oldest(void);
112 static boolean_t
conn_get(int, struct netconfig
*, struct conn_ind
**);
113 static void cots_listen_event(int, int);
114 static int discon_get(int, struct netconfig
*, struct conn_ind
**);
115 static int do_poll_clts_action(int, int);
116 static int do_poll_cots_action(int, int);
117 static void remove_from_poll_list(int);
118 static int set_addrmask(int, struct netconfig
*, struct netbuf
*);
119 static int is_listen_fd_index(int);
121 static struct pollfd
*poll_array
;
122 static struct conn_entry
*conn_polled
;
123 static int num_conns
; /* Current number of connections */
124 int (*Mysvc4
)(int, struct netbuf
*, struct netconfig
*, int,
126 static int setopt(int fd
, int level
, int name
, int value
);
127 static int get_opt(int fd
, int level
, int name
);
128 static void nfslib_set_sockbuf(int fd
);
131 * Called to create and prepare a transport descriptor for in-kernel
133 * Returns -1 on failure and a valid descriptor on success.
136 nfslib_transport_open(struct netconfig
*nconf
)
139 struct strioctl strioc
;
141 if ((nconf
== (struct netconfig
*)NULL
) ||
142 (nconf
->nc_device
== (char *)NULL
)) {
143 syslog(LOG_ERR
, "no netconfig device");
148 * Open the transport device.
150 fd
= t_open(nconf
->nc_device
, O_RDWR
, (struct t_info
*)NULL
);
152 if (t_errno
== TSYSERR
&& errno
== EMFILE
&&
153 (nofile_increase(0) == 0)) {
154 /* Try again with a higher NOFILE limit. */
155 fd
= t_open(nconf
->nc_device
, O_RDWR
,
156 (struct t_info
*)NULL
);
159 syslog(LOG_ERR
, "t_open %s failed: t_errno %d, %m",
160 nconf
->nc_device
, t_errno
);
166 * Pop timod because the RPC module must be as close as possible
169 if (ioctl(fd
, I_POP
, 0) < 0) {
170 syslog(LOG_ERR
, "I_POP of timod failed: %m");
176 * Common code for CLTS and COTS transports
178 if (ioctl(fd
, I_PUSH
, "rpcmod") < 0) {
179 syslog(LOG_ERR
, "I_PUSH of rpcmod failed: %m");
184 strioc
.ic_cmd
= RPC_SERVER
;
185 strioc
.ic_dp
= (char *)0;
187 strioc
.ic_timout
= -1;
189 /* Tell rpcmod to act like a server stream. */
190 if (ioctl(fd
, I_STR
, &strioc
) < 0) {
191 syslog(LOG_ERR
, "rpcmod set-up ioctl failed: %m");
197 * Re-push timod so that we will still be doing TLI
198 * operations on the descriptor.
200 if (ioctl(fd
, I_PUSH
, "timod") < 0) {
201 syslog(LOG_ERR
, "I_PUSH of timod failed: %m");
207 * Enable options of returning the ip's for udp.
209 if (strcmp(nconf
->nc_netid
, "udp6") == 0)
210 __rpc_tli_set_options(fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
, 1);
211 else if (strcmp(nconf
->nc_netid
, "udp") == 0)
212 __rpc_tli_set_options(fd
, IPPROTO_IP
, IP_RECVDSTADDR
, 1);
218 nofile_increase(int limit
)
222 if (getrlimit(RLIMIT_NOFILE
, &rl
) == -1) {
223 syslog(LOG_ERR
, "getrlimit of NOFILE failed: %m");
230 rl
.rlim_cur
+= NOFILE_INC_SIZE
;
232 if (rl
.rlim_cur
> rl
.rlim_max
&&
233 rl
.rlim_max
!= RLIM_INFINITY
)
234 rl
.rlim_max
= rl
.rlim_cur
;
236 if (setrlimit(RLIMIT_NOFILE
, &rl
) == -1) {
237 syslog(LOG_ERR
, "setrlimit of NOFILE to %d failed: %m",
246 nfslib_set_sockbuf(int fd
)
250 val
= NFSD_TCP_BUFSZ
;
252 curval
= get_opt(fd
, SOL_SOCKET
, SO_SNDBUF
);
253 syslog(LOG_DEBUG
, "Current SO_SNDBUF value is %d", curval
);
254 if ((curval
!= -1) && (curval
< val
)) {
255 syslog(LOG_DEBUG
, "Set SO_SNDBUF option to %d", val
);
256 if (setopt(fd
, SOL_SOCKET
, SO_SNDBUF
, val
) < 0) {
258 "couldn't set SO_SNDBUF to %d - t_errno = %d",
261 "Check and increase system-wide tcp_max_buf");
265 curval
= get_opt(fd
, SOL_SOCKET
, SO_RCVBUF
);
266 syslog(LOG_DEBUG
, "Current SO_RCVBUF value is %d", curval
);
267 if ((curval
!= -1) && (curval
< val
)) {
268 syslog(LOG_DEBUG
, "Set SO_RCVBUF option to %d", val
);
269 if (setopt(fd
, SOL_SOCKET
, SO_RCVBUF
, val
) < 0) {
271 "couldn't set SO_RCVBUF to %d - t_errno = %d",
274 "Check and increase system-wide tcp_max_buf");
280 nfslib_bindit(struct netconfig
*nconf
, struct netbuf
**addr
,
281 struct nd_hostserv
*hs
, int backlog
)
286 struct nd_addrlist
*addrlist
;
287 struct t_optmgmt req
, resp
;
290 bool_t use_any
= FALSE
;
293 if ((fd
= nfslib_transport_open(nconf
)) == -1) {
294 syslog(LOG_ERR
, "cannot establish transport service over %s",
299 addrlist
= (struct nd_addrlist
*)NULL
;
301 /* nfs4_callback service does not used a fieed port number */
303 if (strcmp(hs
->h_serv
, "nfs4_callback") == 0) {
308 gzone
= (getzoneid() == GLOBAL_ZONEID
);
309 } else if (netdir_getbyname(nconf
, hs
, &addrlist
) != 0) {
312 "Cannot get address for transport %s host %s service %s",
313 nconf
->nc_netid
, hs
->h_host
, hs
->h_serv
);
318 if (strcmp(nconf
->nc_proto
, "tcp") == 0) {
320 * If we're running over TCP, then set the
321 * SO_REUSEADDR option so that we can bind
322 * to our preferred address even if previously
323 * left connections exist in FIN_WAIT states.
324 * This is somewhat bogus, but otherwise you have
325 * to wait 2 minutes to restart after killing it.
327 if (reuseaddr(fd
) == -1) {
329 "couldn't set SO_REUSEADDR option on transport");
331 } else if (strcmp(nconf
->nc_proto
, "udp") == 0) {
333 * In order to run MLP on UDP, we need to handle creds.
335 if (recvucred(fd
) == -1) {
337 "couldn't set SO_RECVUCRED option on transport");
341 if (nconf
->nc_semantics
== NC_TPI_CLTS
)
346 /* LINTED pointer alignment */
347 ntb
= (struct t_bind
*)t_alloc(fd
, T_BIND
, T_ALL
);
348 if (ntb
== (struct t_bind
*)NULL
) {
349 syslog(LOG_ERR
, "t_alloc failed: t_errno %d, %m", t_errno
);
351 netdir_free((void *)addrlist
, ND_ADDRLIST
);
356 * XXX - what about the space tb->addr.buf points to? This should
357 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
358 * should't be called with T_ALL.
361 tb
.addr
= *(addrlist
->n_addrs
); /* structure copy */
363 if (t_bind(fd
, &tb
, ntb
) == -1) {
364 syslog(LOG_ERR
, "t_bind failed: t_errno %d, %m", t_errno
);
365 (void) t_free((char *)ntb
, T_BIND
);
366 netdir_free((void *)addrlist
, ND_ADDRLIST
);
371 /* make sure we bound to the right address */
372 if (use_any
== FALSE
&&
373 (tb
.addr
.len
!= ntb
->addr
.len
||
374 memcmp(tb
.addr
.buf
, ntb
->addr
.buf
, tb
.addr
.len
) != 0)) {
375 syslog(LOG_ERR
, "t_bind to wrong address");
376 (void) t_free((char *)ntb
, T_BIND
);
377 netdir_free((void *)addrlist
, ND_ADDRLIST
);
383 * Call nfs4svc_setport so that the kernel can be
384 * informed what port number the daemon is listing
385 * for incoming connection requests.
388 if ((nconf
->nc_semantics
== NC_TPI_COTS
||
389 nconf
->nc_semantics
== NC_TPI_COTS_ORD
) && Mysvc4
!= NULL
)
390 (*Mysvc4
)(fd
, NULL
, nconf
, NFS4_SETPORT
, &ntb
->addr
);
393 netdir_free((void *)addrlist
, ND_ADDRLIST
);
395 if (strcmp(nconf
->nc_proto
, "tcp") == 0) {
397 * Disable the Nagle algorithm on TCP connections.
398 * Connections accepted from this listener will
399 * inherit the listener options.
402 /* LINTED pointer alignment */
403 opt
= (struct opthdr
*)reqbuf
;
404 opt
->level
= IPPROTO_TCP
;
405 opt
->name
= TCP_NODELAY
;
406 opt
->len
= sizeof (int);
408 /* LINTED pointer alignment */
409 *(int *)((char *)opt
+ sizeof (*opt
)) = 1;
411 req
.flags
= T_NEGOTIATE
;
412 req
.opt
.len
= sizeof (*opt
) + opt
->len
;
413 req
.opt
.buf
= (char *)opt
;
415 resp
.opt
.buf
= reqbuf
;
416 resp
.opt
.maxlen
= sizeof (reqbuf
);
418 if (t_optmgmt(fd
, &req
, &resp
) < 0 ||
419 resp
.flags
!= T_SUCCESS
) {
421 "couldn't set NODELAY option for proto %s: t_errno = %d, %m",
422 nconf
->nc_proto
, t_errno
);
425 nfslib_set_sockbuf(fd
);
432 get_opt(int fd
, int level
, int name
)
434 struct t_optmgmt req
, res
;
440 reqbuf
.opt
.level
= level
;
441 reqbuf
.opt
.name
= name
;
442 reqbuf
.opt
.len
= sizeof (int);
445 req
.flags
= T_CURRENT
;
446 req
.opt
.len
= sizeof (reqbuf
);
447 req
.opt
.buf
= (char *)&reqbuf
;
450 res
.opt
.buf
= (char *)&reqbuf
;
451 res
.opt
.maxlen
= sizeof (reqbuf
);
453 if (t_optmgmt(fd
, &req
, &res
) < 0 || res
.flags
!= T_SUCCESS
) {
454 t_error("t_optmgmt");
457 return (reqbuf
.value
);
461 setopt(int fd
, int level
, int name
, int value
)
463 struct t_optmgmt req
, resp
;
469 reqbuf
.opt
.level
= level
;
470 reqbuf
.opt
.name
= name
;
471 reqbuf
.opt
.len
= sizeof (int);
473 reqbuf
.value
= value
;
475 req
.flags
= T_NEGOTIATE
;
476 req
.opt
.len
= sizeof (reqbuf
);
477 req
.opt
.buf
= (char *)&reqbuf
;
480 resp
.opt
.buf
= (char *)&reqbuf
;
481 resp
.opt
.maxlen
= sizeof (reqbuf
);
483 if (t_optmgmt(fd
, &req
, &resp
) < 0 || resp
.flags
!= T_SUCCESS
) {
484 t_error("t_optmgmt");
493 return (setopt(fd
, SOL_SOCKET
, SO_REUSEADDR
, 1));
499 return (setopt(fd
, SOL_SOCKET
, SO_RECVUCRED
, 1));
503 nfslib_log_tli_error(char *tli_name
, int fd
, struct netconfig
*nconf
)
508 * Save the error code across syslog(), just in case syslog()
509 * gets its own error and, therefore, overwrites errno.
512 if (t_errno
== TSYSERR
) {
513 syslog(LOG_ERR
, "%s(file descriptor %d/transport %s) %m",
514 tli_name
, fd
, nconf
->nc_proto
);
517 "%s(file descriptor %d/transport %s) TLI error %d",
518 tli_name
, fd
, nconf
->nc_proto
, t_errno
);
524 * Called to set up service over a particular transport.
527 do_one(char *provider
, NETSELDECL(proto
), struct protob
*protobp0
,
528 int (*svc
)(int, struct netbuf
, struct netconfig
*))
531 struct protob
*protobp
;
532 struct netbuf
*retaddr
;
533 struct netconfig
*retnconf
;
534 struct netbuf addrmask
;
540 sock
= bind_to_provider(provider
, protobp0
->serv
, &retaddr
,
543 sock
= bind_to_proto(proto
, protobp0
->serv
, &retaddr
,
547 (void) syslog(LOG_ERR
,
548 "Cannot establish %s service over %s: transport setup problem.",
549 protobp0
->serv
, provider
? provider
: proto
);
553 if (set_addrmask(sock
, retnconf
, &addrmask
) < 0) {
554 (void) syslog(LOG_ERR
,
555 "Cannot set address mask for %s", retnconf
->nc_netid
);
560 * Register all versions of the programs in the protocol block list.
563 for (protobp
= protobp0
; protobp
; protobp
= protobp
->next
) {
564 for (vers
= protobp
->versmin
; vers
<= protobp
->versmax
;
566 if ((protobp
->program
== NFS_PROGRAM
||
567 protobp
->program
== NFS_ACL_PROGRAM
) &&
569 strncasecmp(retnconf
->nc_proto
, NC_UDP
, l
) == 0)
572 (void) rpcb_unset(protobp
->program
, vers
, retnconf
);
573 (void) rpcb_set(protobp
->program
, vers
, retnconf
,
579 * Register services with CLTS semantics right now.
580 * Note: services with COTS/COTS_ORD semantics will be
581 * registered later from cots_listen_event function.
583 if (retnconf
->nc_semantics
== NC_TPI_CLTS
) {
584 /* Don't drop core if supporting module(s) aren't loaded. */
585 (void) signal(SIGSYS
, SIG_IGN
);
588 * svc() doesn't block, it returns success or failure.
591 if (svc
== NULL
&& Mysvc4
!= NULL
)
592 err
= (*Mysvc4
)(sock
, &addrmask
, retnconf
,
593 NFS4_SETPORT
|NFS4_KRPC_START
, retaddr
);
595 err
= (*svc
)(sock
, addrmask
, retnconf
);
598 (void) syslog(LOG_ERR
,
599 "Cannot establish %s service over <file desc."
600 " %d, protocol %s> : %m. Exiting",
601 protobp0
->serv
, sock
, retnconf
->nc_proto
);
608 * We successfully set up the server over this transport.
609 * Add this descriptor to the one being polled on.
611 add_to_poll_list(sock
, retnconf
);
615 * Set up the NFS service over all the available transports.
616 * Returns -1 for failure, 0 for success.
619 do_all(struct protob
*protobp
,
620 int (*svc
)(int, struct netbuf
, struct netconfig
*))
622 struct netconfig
*nconf
;
626 if ((nc
= setnetconfig()) == (NCONF_HANDLE
*)NULL
) {
627 syslog(LOG_ERR
, "setnetconfig failed: %m");
631 while (nconf
= getnetconfig(nc
)) {
632 if ((nconf
->nc_flag
& NC_VISIBLE
) &&
633 strcmp(nconf
->nc_protofmly
, NC_LOOPBACK
) != 0 &&
634 OK_TPI_TYPE(nconf
) &&
635 (protobp
->program
!= NFS4_CALLBACK
||
636 strncasecmp(nconf
->nc_proto
, NC_UDP
, l
) != 0))
637 do_one(nconf
->nc_device
, nconf
->nc_proto
,
640 (void) endnetconfig(nc
);
645 * poll on the open transport descriptors for events and errors.
648 poll_for_action(void)
654 * Keep polling until all transports have been closed. When this
655 * happens, we return.
657 while ((int)num_fds
> 0) {
658 nfds
= poll(poll_array
, num_fds
, INFTIM
);
665 * Some errors from poll could be
666 * due to temporary conditions, and we try to
667 * be robust in the face of them. Other
668 * errors (should never happen in theory)
669 * are fatal (eg. EINVAL, EFAULT).
681 (void) syslog(LOG_ERR
,
682 "poll failed: %m. Exiting");
690 * Go through the poll list looking for events.
692 for (i
= 0; i
< num_fds
&& nfds
> 0; i
++) {
693 if (poll_array
[i
].revents
) {
696 * We have a message, so try to read it.
697 * Record the error return in errno,
698 * so that syslog(LOG_ERR, "...%m")
699 * dumps the corresponding error string.
701 if (conn_polled
[i
].nc
.nc_semantics
==
703 errno
= do_poll_clts_action(
704 poll_array
[i
].fd
, i
);
706 errno
= do_poll_cots_action(
707 poll_array
[i
].fd
, i
);
713 * Most returned error codes mean that there is
714 * fatal condition which we can only deal with
715 * by closing the transport.
717 if (errno
!= EAGAIN
&& errno
!= ENOMEM
) {
718 (void) syslog(LOG_ERR
,
719 "Error (%m) reading descriptor %d/transport %s. Closing it.",
721 conn_polled
[i
].nc
.nc_proto
);
722 (void) t_close(poll_array
[i
].fd
);
723 remove_from_poll_list(poll_array
[i
].fd
);
725 } else if (errno
== ENOMEM
)
731 (void) syslog(LOG_ERR
,
732 "All transports have been closed with errors. Exiting.");
736 * Allocate poll/transport array entries for this descriptor.
739 add_to_poll_list(int fd
, struct netconfig
*nconf
)
741 static int poll_array_size
= 0;
744 * If the arrays are full, allocate new ones.
746 if (num_fds
== poll_array_size
) {
748 struct conn_entry
*tnp
;
750 if (poll_array_size
!= 0) {
754 tpa
= (struct pollfd
*)0;
756 poll_array_size
+= POLL_ARRAY_INC_SIZE
;
758 * Allocate new arrays.
760 poll_array
= (struct pollfd
*)
761 malloc(poll_array_size
* sizeof (struct pollfd
) + 256);
762 conn_polled
= (struct conn_entry
*)
763 malloc(poll_array_size
* sizeof (struct conn_entry
) + 256);
764 if (poll_array
== (struct pollfd
*)NULL
||
765 conn_polled
== (struct conn_entry
*)NULL
) {
766 syslog(LOG_ERR
, "malloc failed for poll array");
771 * Copy the data of the old ones into new arrays, and
775 (void) memcpy((void *)poll_array
, (void *)tpa
,
776 num_fds
* sizeof (struct pollfd
));
777 (void) memcpy((void *)conn_polled
, (void *)tnp
,
778 num_fds
* sizeof (struct conn_entry
));
785 * Set the descriptor and event list. All possible events are
788 poll_array
[num_fds
].fd
= fd
;
789 poll_array
[num_fds
].events
= POLLIN
|POLLRDNORM
|POLLRDBAND
|POLLPRI
;
792 * Copy the transport data over too.
794 conn_polled
[num_fds
].nc
= *nconf
;
795 conn_polled
[num_fds
].closing
= 0;
798 * Set the descriptor to non-blocking. Avoids a race
799 * between data arriving on the stream and then having it
800 * flushed before we can read it.
802 if (fcntl(fd
, F_SETFL
, O_NONBLOCK
) == -1) {
803 (void) syslog(LOG_ERR
,
804 "fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
805 num_fds
, nconf
->nc_proto
);
810 * Count this descriptor.
816 remove_from_poll_list(int fd
)
821 for (i
= 0; i
< num_fds
; i
++) {
822 if (poll_array
[i
].fd
== fd
) {
824 num_to_copy
= num_fds
- i
;
825 (void) memcpy((void *)&poll_array
[i
],
826 (void *)&poll_array
[i
+1],
827 num_to_copy
* sizeof (struct pollfd
));
828 (void) memset((void *)&poll_array
[num_fds
], 0,
829 sizeof (struct pollfd
));
830 (void) memcpy((void *)&conn_polled
[i
],
831 (void *)&conn_polled
[i
+1],
832 num_to_copy
* sizeof (struct conn_entry
));
833 (void) memset((void *)&conn_polled
[num_fds
], 0,
834 sizeof (struct conn_entry
));
838 syslog(LOG_ERR
, "attempt to remove nonexistent fd from poll list");
843 * Called to read and interpret the event on a connectionless descriptor.
844 * Returns 0 if successful, or a UNIX error code if failure.
847 do_poll_clts_action(int fd
, int conn_index
)
852 struct netconfig
*nconf
= &conn_polled
[conn_index
].nc
;
853 static struct t_unitdata
*unitdata
= NULL
;
854 static struct t_uderr
*uderr
= NULL
;
855 static int oldfd
= -1;
856 struct nd_hostservlist
*host
= NULL
;
857 struct strbuf ctl
[1], data
[1];
859 * We just need to have some space to consume the
860 * message in the event we can't use the TLI interface to do the
863 * We flush the message using getmsg(). For the control part
864 * we allocate enough for any TPI header plus 32 bytes for address
865 * and options. For the data part, there is nothing magic about
866 * the size of the array, but 256 bytes is probably better than
867 * 1 byte, and we don't expect any data portion anyway.
869 * If the array sizes are too small, we handle this because getmsg()
870 * (called to consume the message) will return MOREDATA|MORECTL.
871 * Thus we just call getmsg() until it's read the message.
873 char ctlbuf
[sizeof (union T_primitives
) + 32];
877 * If this is the same descriptor as the last time
878 * do_poll_clts_action was called, we can save some
879 * de-allocation and allocation.
885 (void) t_free((char *)unitdata
, T_UNITDATA
);
889 (void) t_free((char *)uderr
, T_UDERROR
);
895 * Allocate a unitdata structure for receiving the event.
897 if (unitdata
== NULL
) {
898 /* LINTED pointer alignment */
899 unitdata
= (struct t_unitdata
*)t_alloc(fd
, T_UNITDATA
, T_ALL
);
900 if (unitdata
== NULL
) {
901 if (t_errno
== TSYSERR
) {
903 * Save the error code across
904 * syslog(), just in case
905 * syslog() gets its own error
906 * and therefore overwrites errno.
909 (void) syslog(LOG_ERR
,
910 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
911 fd
, nconf
->nc_proto
);
914 (void) syslog(LOG_ERR
,
915 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
916 fd
, nconf
->nc_proto
, t_errno
);
925 * The idea is we wait for T_UNITDATA_IND's. Of course,
926 * we don't get any, because rpcmod filters them out.
927 * However, we need to call t_rcvudata() to let TLI
928 * tell us we have a T_UDERROR_IND.
931 * t_rcvudata(), expecting TLOOK.
932 * t_look(), expecting T_UDERR.
933 * t_rcvuderr(), expecting success (0).
934 * expand destination address into ASCII,
938 ret
= t_rcvudata(fd
, unitdata
, &flags
);
939 if (ret
== 0 || t_errno
== TBUFOVFLW
) {
940 (void) syslog(LOG_WARNING
,
941 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
942 fd
, nconf
->nc_proto
, unitdata
->udata
.len
);
945 * Even though we don't expect any data, in case we do,
946 * keep reading until there is no more.
959 * System errors are returned to caller.
960 * Save the error code across
961 * syslog(), just in case
962 * syslog() gets its own error
963 * and therefore overwrites errno.
966 (void) syslog(LOG_ERR
,
967 "t_rcvudata(file descriptor %d/transport %s) %m",
968 fd
, nconf
->nc_proto
);
973 (void) syslog(LOG_ERR
,
974 "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
975 fd
, nconf
->nc_proto
, t_errno
);
985 * System errors are returned to caller.
987 if (t_errno
== TSYSERR
) {
989 * Save the error code across
990 * syslog(), just in case
991 * syslog() gets its own error
992 * and therefore overwrites errno.
995 (void) syslog(LOG_ERR
,
996 "t_look(file descriptor %d/transport %s) %m",
997 fd
, nconf
->nc_proto
);
1000 (void) syslog(LOG_ERR
,
1001 "t_look(file descriptor %d/transport %s) TLI error %d",
1002 fd
, nconf
->nc_proto
, t_errno
);
1007 (void) syslog(LOG_WARNING
,
1008 "t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1009 fd
, nconf
->nc_proto
, ret
, T_UDERR
);
1012 if (uderr
== NULL
) {
1013 /* LINTED pointer alignment */
1014 uderr
= (struct t_uderr
*)t_alloc(fd
, T_UDERROR
, T_ALL
);
1015 if (uderr
== NULL
) {
1016 if (t_errno
== TSYSERR
) {
1018 * Save the error code across
1019 * syslog(), just in case
1020 * syslog() gets its own error
1021 * and therefore overwrites errno.
1024 (void) syslog(LOG_ERR
,
1025 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1026 fd
, nconf
->nc_proto
);
1029 (void) syslog(LOG_ERR
,
1030 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1031 fd
, nconf
->nc_proto
, t_errno
);
1036 ret
= t_rcvuderr(fd
, uderr
);
1040 * Save the datagram error in errno, so that the
1041 * %m argument to syslog picks up the error string.
1043 errno
= uderr
->error
;
1046 * Log the datagram error, then log the host that
1047 * probably triggerred. Cannot log both in the
1048 * same transaction because of packet size limitations
1051 (void) syslog((errno
== ECONNREFUSED
) ? LOG_DEBUG
: LOG_WARNING
,
1052 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1053 fd
, nconf
->nc_proto
);
1056 * Try to map the client's address back to a
1059 ret
= netdir_getbyaddr(nconf
, &host
, &uderr
->addr
);
1060 if (ret
!= -1 && host
&& host
->h_cnt
> 0 &&
1061 host
->h_hostservs
) {
1062 (void) syslog((errno
== ECONNREFUSED
) ? LOG_DEBUG
: LOG_WARNING
,
1063 "Bad NFS response was sent to client with host name: %s; service port: %s",
1064 host
->h_hostservs
->h_host
,
1065 host
->h_hostservs
->h_serv
);
1069 char *hex
= "0123456789abcdef";
1072 * Mapping failed, print the whole thing
1075 buf
= (char *)malloc(uderr
->addr
.len
* 2 + 1);
1076 for (i
= 0, j
= 0; i
< uderr
->addr
.len
; i
++, j
+= 2) {
1077 buf
[j
] = hex
[((uderr
->addr
.buf
[i
]) >> 4) & 0xf];
1078 buf
[j
+1] = hex
[uderr
->addr
.buf
[i
] & 0xf];
1081 (void) syslog((errno
== ECONNREFUSED
) ? LOG_DEBUG
: LOG_WARNING
,
1082 "Bad NFS response was sent to client with transport address: 0x%s",
1087 if (ret
== 0 && host
!= NULL
)
1088 netdir_free((void *)host
, ND_HOSTSERVLIST
);
1097 * System errors are returned to caller.
1098 * Save the error code across
1099 * syslog(), just in case
1100 * syslog() gets its own error
1101 * and therefore overwrites errno.
1104 (void) syslog(LOG_ERR
,
1105 "t_rcvuderr(file descriptor %d/transport %s) %m",
1106 fd
, nconf
->nc_proto
);
1109 (void) syslog(LOG_ERR
,
1110 "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1111 fd
, nconf
->nc_proto
, t_errno
);
1117 * If we get here, then we could not cope with whatever message
1118 * we attempted to read, so flush it. If we did read a message,
1119 * and one isn't present, that is all right, because fd is in
1122 (void) syslog(LOG_ERR
,
1123 "Flushing one input message from <file descriptor %d/transport %s>",
1124 fd
, nconf
->nc_proto
);
1127 * Read and discard the message. Do this this until there is
1128 * no more control/data in the message or until we get an error.
1131 ctl
->maxlen
= sizeof (ctlbuf
);
1133 data
->maxlen
= sizeof (databuf
);
1134 data
->buf
= databuf
;
1136 ret
= getmsg(fd
, ctl
, data
, &flags
);
1145 conn_close_oldest(void)
1151 * Find the oldest connection that is not already in the
1152 * process of shutting down.
1154 for (i1
= end_listen_fds
; /* no conditional expression */; i1
++) {
1157 if (conn_polled
[i1
].closing
== 0)
1161 printf("too many connections (%d), releasing oldest (%d)\n",
1162 num_conns
, poll_array
[i1
].fd
);
1164 syslog(LOG_WARNING
, "too many connections (%d), releasing oldest (%d)",
1165 num_conns
, poll_array
[i1
].fd
);
1167 fd
= poll_array
[i1
].fd
;
1168 if (conn_polled
[i1
].nc
.nc_semantics
== NC_TPI_COTS
) {
1170 * For politeness, send a T_DISCON_REQ to the transport
1171 * provider. We close the stream anyway.
1173 (void) t_snddis(fd
, (struct t_call
*)0);
1175 remove_from_poll_list(fd
);
1179 * For orderly release, we do not close the stream
1180 * until the T_ORDREL_IND arrives to complete
1183 if (t_sndrel(fd
) == 0)
1184 conn_polled
[i1
].closing
= 1;
1189 conn_get(int fd
, struct netconfig
*nconf
, struct conn_ind
**connp
)
1191 struct conn_ind
*conn
;
1192 struct conn_ind
*next_conn
;
1194 conn
= (struct conn_ind
*)malloc(sizeof (*conn
));
1196 syslog(LOG_ERR
, "malloc for listen indication failed");
1200 /* LINTED pointer alignment */
1201 conn
->conn_call
= (struct t_call
*)t_alloc(fd
, T_CALL
, T_ALL
);
1202 if (conn
->conn_call
== NULL
) {
1204 nfslib_log_tli_error("t_alloc", fd
, nconf
);
1208 if (t_listen(fd
, conn
->conn_call
) == -1) {
1209 nfslib_log_tli_error("t_listen", fd
, nconf
);
1210 (void) t_free((char *)conn
->conn_call
, T_CALL
);
1215 if (conn
->conn_call
->udata
.len
> 0) {
1217 "rejecting inbound connection(%s) with %d bytes of connect data",
1218 nconf
->nc_proto
, conn
->conn_call
->udata
.len
);
1220 conn
->conn_call
->udata
.len
= 0;
1221 (void) t_snddis(fd
, conn
->conn_call
);
1222 (void) t_free((char *)conn
->conn_call
, T_CALL
);
1227 if ((next_conn
= *connp
) != NULL
) {
1228 next_conn
->conn_prev
->conn_next
= conn
;
1229 conn
->conn_next
= next_conn
;
1230 conn
->conn_prev
= next_conn
->conn_prev
;
1231 next_conn
->conn_prev
= conn
;
1233 conn
->conn_next
= conn
;
1234 conn
->conn_prev
= conn
;
1241 discon_get(int fd
, struct netconfig
*nconf
, struct conn_ind
**connp
)
1243 struct conn_ind
*conn
;
1244 struct t_discon discon
;
1246 discon
.udata
.buf
= (char *)0;
1247 discon
.udata
.maxlen
= 0;
1248 if (t_rcvdis(fd
, &discon
) == -1) {
1249 nfslib_log_tli_error("t_rcvdis", fd
, nconf
);
1258 if (conn
->conn_call
->sequence
== discon
.sequence
) {
1259 if (conn
->conn_next
== conn
)
1260 *connp
= (struct conn_ind
*)0;
1262 if (conn
== *connp
) {
1263 *connp
= conn
->conn_next
;
1265 conn
->conn_next
->conn_prev
= conn
->conn_prev
;
1266 conn
->conn_prev
->conn_next
= conn
->conn_next
;
1271 conn
= conn
->conn_next
;
1272 } while (conn
!= *connp
);
1278 cots_listen_event(int fd
, int conn_index
)
1280 struct t_call
*call
;
1281 struct conn_ind
*conn
;
1282 struct conn_ind
*conn_head
;
1284 struct netconfig
*nconf
= &conn_polled
[conn_index
].nc
;
1286 struct netbuf addrmask
;
1289 char *clnt_uaddr
= NULL
;
1290 struct nd_hostservlist
*clnt_serv
= NULL
;
1293 (void) conn_get(fd
, nconf
, &conn_head
);
1295 while ((conn
= conn_head
) != NULL
) {
1296 conn_head
= conn
->conn_next
;
1297 if (conn_head
== conn
)
1300 conn_head
->conn_prev
= conn
->conn_prev
;
1301 conn
->conn_prev
->conn_next
= conn_head
;
1303 call
= conn
->conn_call
;
1307 * If we have already accepted the maximum number of
1308 * connections allowed on the command line, then drop
1309 * the oldest connection (for any protocol) before
1310 * accepting the new connection. Unless explicitly
1311 * set on the command line, max_conns_allowed is -1.
1313 if (max_conns_allowed
!= -1 && num_conns
>= max_conns_allowed
)
1314 conn_close_oldest();
1317 * Create a new transport endpoint for the same proto as
1320 new_fd
= nfslib_transport_open(nconf
);
1322 call
->udata
.len
= 0;
1323 (void) t_snddis(fd
, call
);
1324 (void) t_free((char *)call
, T_CALL
);
1325 syslog(LOG_ERR
, "Cannot establish transport over %s",
1330 /* Bind to a generic address/port for the accepting stream. */
1331 if (t_bind(new_fd
, NULL
, NULL
) == -1) {
1332 nfslib_log_tli_error("t_bind", new_fd
, nconf
);
1333 call
->udata
.len
= 0;
1334 (void) t_snddis(fd
, call
);
1335 (void) t_free((char *)call
, T_CALL
);
1336 (void) t_close(new_fd
);
1340 while (t_accept(fd
, new_fd
, call
) == -1) {
1341 if (t_errno
!= TLOOK
) {
1343 nfslib_log_tli_error("t_accept", fd
, nconf
);
1345 call
->udata
.len
= 0;
1346 (void) t_snddis(fd
, call
);
1347 (void) t_free((char *)call
, T_CALL
);
1348 (void) t_close(new_fd
);
1351 while (event
= t_look(fd
)) {
1356 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf
->nc_proto
);
1358 (void) conn_get(fd
, nconf
, &conn_head
);
1363 "cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1366 (void) discon_get(fd
, nconf
,
1371 "unexpected event 0x%x during accept processing (%s)",
1372 event
, nconf
->nc_proto
);
1373 call
->udata
.len
= 0;
1374 (void) t_snddis(fd
, call
);
1375 (void) t_free((char *)call
, T_CALL
);
1376 (void) t_close(new_fd
);
1382 if (set_addrmask(new_fd
, nconf
, &addrmask
) < 0) {
1383 (void) syslog(LOG_ERR
,
1384 "Cannot set address mask for %s",
1386 (void) t_snddis(new_fd
, NULL
);
1387 (void) t_free((char *)call
, T_CALL
);
1388 (void) t_close(new_fd
);
1392 /* Tell kRPC about the new stream. */
1394 ret
= (*Mysvc4
)(new_fd
, &addrmask
, nconf
,
1395 NFS4_KRPC_START
, &call
->addr
);
1397 ret
= (*Mysvc
)(new_fd
, addrmask
, nconf
);
1400 if (errno
!= ENOTCONN
) {
1402 "unable to register new connection: %m");
1405 * This is the only error that could be
1406 * caused by the client, so who was it?
1408 if (netdir_getbyaddr(nconf
, &clnt_serv
,
1409 &(call
->addr
)) == ND_OK
&&
1410 clnt_serv
->h_cnt
> 0)
1411 clnt
= clnt_serv
->h_hostservs
->h_host
;
1413 clnt
= clnt_uaddr
= taddr2uaddr(nconf
,
1416 * If we don't know who the client was,
1421 "unable to register new connection: client %s has dropped connection", clnt
);
1423 netdir_free(clnt_serv
, ND_HOSTSERVLIST
);
1432 (void) t_snddis(new_fd
, NULL
);
1433 (void) t_free((char *)call
, T_CALL
);
1434 (void) t_close(new_fd
);
1439 (void) t_free((char *)call
, T_CALL
);
1442 * Poll on the new descriptor so that we get disconnect
1443 * and orderly release indications.
1446 add_to_poll_list(new_fd
, nconf
);
1448 /* Reset nconf in case it has been moved. */
1449 nconf
= &conn_polled
[conn_index
].nc
;
1455 do_poll_cots_action(int fd
, int conn_index
)
1461 struct conn_entry
*connent
= &conn_polled
[conn_index
];
1462 struct netconfig
*nconf
= &(connent
->nc
);
1463 const char *errorstr
;
1465 while (event
= t_look(fd
)) {
1469 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf
->nc_proto
, fd
);
1471 cots_listen_event(fd
, conn_index
);
1476 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd
, nconf
->nc_proto
);
1479 * Receive a private notification from CONS rpcmod.
1481 i1
= t_rcv(fd
, buf
, sizeof (buf
), &flags
);
1483 syslog(LOG_ERR
, "t_rcv failed");
1486 if (i1
< sizeof (int))
1488 i1
= BE32_TO_U32(buf
);
1489 if (i1
== 1 || i1
== 2) {
1491 * This connection has been idle for too long,
1492 * so release it as politely as we can. If we
1493 * have already initiated an orderly release
1494 * and we get notified that the stream is
1495 * still idle, pull the plug. This prevents
1496 * hung connections from continuing to consume
1500 printf("do_poll_cots_action(%s,%d): ", nconf
->nc_proto
, fd
);
1501 printf("initiating orderly release of idle connection\n");
1503 if (nconf
->nc_semantics
== NC_TPI_COTS
||
1504 connent
->closing
!= 0) {
1505 (void) t_snddis(fd
, (struct t_call
*)0);
1509 * For NC_TPI_COTS_ORD, the stream is closed
1510 * and removed from the poll list when the
1511 * T_ORDREL is received from the provider. We
1512 * don't wait for it here because it may take
1513 * a while for the transport to shut down.
1515 if (t_sndrel(fd
) == -1) {
1517 "unable to send orderly release %m");
1519 connent
->closing
= 1;
1522 "unexpected event from CONS rpcmod %d", i1
);
1527 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf
->nc_proto
, fd
);
1529 /* Perform an orderly release. */
1530 if (t_rcvrel(fd
) == 0) {
1531 /* T_ORDREL on listen fd's should be ignored */
1532 if (!is_listen_fd_index(conn_index
)) {
1533 (void) t_sndrel(fd
);
1538 } else if (t_errno
== TLOOK
) {
1541 nfslib_log_tli_error("t_rcvrel", fd
, nconf
);
1544 * check to make sure we do not close
1547 if (is_listen_fd_index(conn_index
))
1555 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf
->nc_proto
, fd
);
1557 if (t_rcvdis(fd
, (struct t_discon
*)NULL
) == -1)
1558 nfslib_log_tli_error("t_rcvdis", fd
, nconf
);
1561 * T_DISCONNECT on listen fd's should be ignored.
1563 if (is_listen_fd_index(conn_index
))
1569 if (t_errno
== TSYSERR
) {
1570 if ((errorstr
= strerror(errno
)) == NULL
) {
1572 "Unknown error num %d", errno
);
1573 errorstr
= (const char *) buf
;
1575 } else if (event
== -1)
1576 errorstr
= t_strerror(t_errno
);
1580 "unexpected TLI event (0x%x) on "
1581 "connection-oriented transport(%s,%d):%s",
1582 event
, nconf
->nc_proto
, fd
, errorstr
);
1585 remove_from_poll_list(fd
);
1595 serv_name_to_port_name(char *name
)
1598 * Map service names (used primarily in logging) to
1599 * RPC port names (used by netdir_*() routines).
1601 if (strcmp(name
, "NFS") == 0) {
1603 } else if (strcmp(name
, "NLM") == 0) {
1605 } else if (strcmp(name
, "NFS4_CALLBACK") == 0) {
1606 return ("nfs4_callback");
1609 return ("unrecognized");
1613 bind_to_provider(char *provider
, char *serv
, struct netbuf
**addr
,
1614 struct netconfig
**retnconf
)
1616 struct netconfig
*nconf
;
1618 struct nd_hostserv hs
;
1620 hs
.h_host
= HOST_SELF
;
1621 hs
.h_serv
= serv_name_to_port_name(serv
);
1623 if ((nc
= setnetconfig()) == (NCONF_HANDLE
*)NULL
) {
1624 syslog(LOG_ERR
, "setnetconfig failed: %m");
1627 while (nconf
= getnetconfig(nc
)) {
1628 if (OK_TPI_TYPE(nconf
) &&
1629 strcmp(nconf
->nc_device
, provider
) == 0) {
1631 return (nfslib_bindit(nconf
, addr
, &hs
,
1635 (void) endnetconfig(nc
);
1637 syslog(LOG_ERR
, "couldn't find netconfig entry for provider %s",
1643 bind_to_proto(NETSELDECL(proto
), char *serv
, struct netbuf
**addr
,
1644 struct netconfig
**retnconf
)
1646 struct netconfig
*nconf
;
1647 NCONF_HANDLE
*nc
= NULL
;
1648 struct nd_hostserv hs
;
1650 hs
.h_host
= HOST_SELF
;
1651 hs
.h_serv
= serv_name_to_port_name(serv
);
1653 if ((nc
= setnetconfig()) == (NCONF_HANDLE
*)NULL
) {
1654 syslog(LOG_ERR
, "setnetconfig failed: %m");
1657 while (nconf
= getnetconfig(nc
)) {
1658 if (OK_TPI_TYPE(nconf
) && NETSELEQ(nconf
->nc_proto
, proto
)) {
1660 return (nfslib_bindit(nconf
, addr
, &hs
,
1664 (void) endnetconfig(nc
);
1666 syslog(LOG_ERR
, "couldn't find netconfig entry for protocol %s",
1671 #include <netinet/in.h>
1674 * Create an address mask appropriate for the transport.
1675 * The mask is used to obtain the host-specific part of
1676 * a network address when comparing addresses.
1677 * For an internet address the host-specific part is just
1678 * the 32 bit IP address and this part of the mask is set
1679 * to all-ones. The port number part of the mask is zeroes.
1682 set_addrmask(int fd
,
1683 struct netconfig
*nconf
,
1684 struct netbuf
*mask
)
1689 * Find the size of the address we need to mask.
1691 if (t_getinfo(fd
, &info
) < 0) {
1692 t_error("t_getinfo");
1695 mask
->len
= mask
->maxlen
= info
.addr
;
1696 if (info
.addr
<= 0) {
1698 * loopback devices have infinite addr size
1699 * (it is identified by -1 in addr field of t_info structure),
1700 * so don't build the netmask for them. It's a special case
1701 * that should be handled properly.
1703 if ((info
.addr
== -1) &&
1704 (0 == strcmp(nconf
->nc_protofmly
, NC_LOOPBACK
))) {
1705 memset(mask
, 0, sizeof (*mask
));
1709 syslog(LOG_ERR
, "set_addrmask: address size: %ld", info
.addr
);
1713 mask
->buf
= (char *)malloc(mask
->len
);
1714 if (mask
->buf
== NULL
) {
1715 syslog(LOG_ERR
, "set_addrmask: no memory");
1718 (void) memset(mask
->buf
, 0, mask
->len
); /* reset all mask bits */
1720 if (strcmp(nconf
->nc_protofmly
, NC_INET
) == 0) {
1722 * Set the mask so that the port is ignored.
1724 /* LINTED pointer alignment */
1725 ((struct sockaddr_in
*)mask
->buf
)->sin_addr
.s_addr
=
1727 /* LINTED pointer alignment */
1728 ((struct sockaddr_in
*)mask
->buf
)->sin_family
=
1730 } else if (strcmp(nconf
->nc_protofmly
, NC_INET6
) == 0) {
1731 /* LINTED pointer alignment */
1732 (void) memset(&((struct sockaddr_in6
*)mask
->buf
)->sin6_addr
,
1733 (uchar_t
)~0, sizeof (struct in6_addr
));
1734 /* LINTED pointer alignment */
1735 ((struct sockaddr_in6
*)mask
->buf
)->sin6_family
=
1740 * Set all mask bits.
1742 (void) memset(mask
->buf
, 0xFF, mask
->len
);
1748 * For listen fd's index is always less than end_listen_fds.
1749 * end_listen_fds is defined externally in the daemon that uses this library.
1750 * It's value is equal to the number of open file descriptors after the
1751 * last listen end point was opened but before any connection was accepted.
1754 is_listen_fd_index(int index
)
1756 return (index
< end_listen_fds
);