2 * Copyright (C) 2004-2006 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: socket.c,v 1.207.2.19.2.26 2006/05/19 02:53:36 marka Exp $ */
22 #include <sys/param.h>
23 #include <sys/types.h>
24 #include <sys/socket.h>
35 #include <isc/buffer.h>
36 #include <isc/bufferlist.h>
37 #include <isc/condition.h>
38 #include <isc/formatcheck.h>
43 #include <isc/mutex.h>
45 #include <isc/platform.h>
46 #include <isc/print.h>
47 #include <isc/region.h>
48 #include <isc/socket.h>
49 #include <isc/strerror.h>
51 #include <isc/thread.h>
54 #include "errno2result.h"
56 #ifndef ISC_PLATFORM_USETHREADS
58 #endif /* ISC_PLATFORM_USETHREADS */
61 * Some systems define the socket length argument as an int, some as size_t,
62 * some as socklen_t. This is here so it can be easily changed if needed.
64 #ifndef ISC_SOCKADDR_LEN_T
65 #define ISC_SOCKADDR_LEN_T unsigned int
69 * Define what the possible "soft" errors can be. These are non-fatal returns
70 * of various network related functions, like recv() and so on.
72 * For some reason, BSDI (and perhaps others) will sometimes return <0
73 * from recv() but will have errno==0. This is broken, but we have to
74 * work around it here.
76 #define SOFT_ERROR(e) ((e) == EAGAIN || \
77 (e) == EWOULDBLOCK || \
81 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
84 * DLVL(90) -- Function entry/exit and other tracing.
85 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
86 * DLVL(60) -- Socket data send/receive
87 * DLVL(50) -- Event tracing, including receiving/sending completion events.
88 * DLVL(20) -- Socket creation/destruction.
90 #define TRACE_LEVEL 90
91 #define CORRECTNESS_LEVEL 70
92 #define IOEVENT_LEVEL 60
93 #define EVENT_LEVEL 50
94 #define CREATION_LEVEL 20
96 #define TRACE DLVL(TRACE_LEVEL)
97 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
98 #define IOEVENT DLVL(IOEVENT_LEVEL)
99 #define EVENT DLVL(EVENT_LEVEL)
100 #define CREATION DLVL(CREATION_LEVEL)
102 typedef isc_event_t intev_t
;
104 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
105 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
108 * IPv6 control information. If the socket is an IPv6 socket we want
109 * to collect the destination address and interface so the client can
110 * set them on outgoing packets.
112 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
119 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have
120 * a setsockopt() like interface to request timestamps, and if the OS
121 * doesn't do it for us, call gettimeofday() on every UDP receive?
130 * The number of times a send operation is repeated if the result is EINTR.
137 isc_socketmgr_t
*manager
;
139 isc_sockettype_t type
;
141 /* Locked by socket lock. */
142 ISC_LINK(isc_socket_t
) link
;
143 unsigned int references
;
147 ISC_LIST(isc_socketevent_t
) send_list
;
148 ISC_LIST(isc_socketevent_t
) recv_list
;
149 ISC_LIST(isc_socket_newconnev_t
) accept_list
;
150 isc_socket_connev_t
*connect_ev
;
153 * Internal events. Posted when a descriptor is readable or
154 * writable. These are statically allocated and never freed.
155 * They will be set to non-purgable before use.
160 isc_sockaddr_t address
; /* remote address */
162 unsigned int pending_recv
: 1,
165 listener
: 1, /* listener socket */
167 connecting
: 1, /* connect pending */
168 bound
: 1; /* bound to local addr */
170 #ifdef ISC_NET_RECVOVERFLOW
171 unsigned char overflow
; /* used for MSG_TRUNC fake */
175 ISC_SOCKADDR_LEN_T recvcmsgbuflen
;
177 ISC_SOCKADDR_LEN_T sendcmsgbuflen
;
180 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
181 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
183 struct isc_socketmgr
{
188 /* Locked by manager lock. */
189 ISC_LIST(isc_socket_t
) socklist
;
192 isc_socket_t
*fds
[FD_SETSIZE
];
193 int fdstate
[FD_SETSIZE
];
195 #ifdef ISC_PLATFORM_USETHREADS
196 isc_thread_t watcher
;
197 isc_condition_t shutdown_ok
;
199 #else /* ISC_PLATFORM_USETHREADS */
201 #endif /* ISC_PLATFORM_USETHREADS */
204 #ifndef ISC_PLATFORM_USETHREADS
205 static isc_socketmgr_t
*socketmgr
= NULL
;
206 #endif /* ISC_PLATFORM_USETHREADS */
208 #define CLOSED 0 /* this one must be zero */
210 #define CLOSE_PENDING 2
213 * send() and recv() iovec counts
215 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
216 #ifdef ISC_NET_RECVOVERFLOW
217 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1)
219 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
222 static void send_recvdone_event(isc_socket_t
*, isc_socketevent_t
**);
223 static void send_senddone_event(isc_socket_t
*, isc_socketevent_t
**);
224 static void free_socket(isc_socket_t
**);
225 static isc_result_t
allocate_socket(isc_socketmgr_t
*, isc_sockettype_t
,
227 static void destroy(isc_socket_t
**);
228 static void internal_accept(isc_task_t
*, isc_event_t
*);
229 static void internal_connect(isc_task_t
*, isc_event_t
*);
230 static void internal_recv(isc_task_t
*, isc_event_t
*);
231 static void internal_send(isc_task_t
*, isc_event_t
*);
232 static void process_cmsg(isc_socket_t
*, struct msghdr
*, isc_socketevent_t
*);
233 static void build_msghdr_send(isc_socket_t
*, isc_socketevent_t
*,
234 struct msghdr
*, struct iovec
*, size_t *);
235 static void build_msghdr_recv(isc_socket_t
*, isc_socketevent_t
*,
236 struct msghdr
*, struct iovec
*, size_t *);
238 #define SELECT_POKE_SHUTDOWN (-1)
239 #define SELECT_POKE_NOTHING (-2)
240 #define SELECT_POKE_READ (-3)
241 #define SELECT_POKE_ACCEPT (-3) /* Same as _READ */
242 #define SELECT_POKE_WRITE (-4)
243 #define SELECT_POKE_CONNECT (-4) /* Same as _WRITE */
244 #define SELECT_POKE_CLOSE (-5)
246 #define SOCK_DEAD(s) ((s)->references == 0)
249 manager_log(isc_socketmgr_t
*sockmgr
,
250 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
251 const char *fmt
, ...) ISC_FORMAT_PRINTF(5, 6);
253 manager_log(isc_socketmgr_t
*sockmgr
,
254 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
255 const char *fmt
, ...)
260 if (! isc_log_wouldlog(isc_lctx
, level
))
264 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
267 isc_log_write(isc_lctx
, category
, module
, level
,
268 "sockmgr %p: %s", sockmgr
, msgbuf
);
272 socket_log(isc_socket_t
*sock
, isc_sockaddr_t
*address
,
273 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
274 isc_msgcat_t
*msgcat
, int msgset
, int message
,
275 const char *fmt
, ...) ISC_FORMAT_PRINTF(9, 10);
277 socket_log(isc_socket_t
*sock
, isc_sockaddr_t
*address
,
278 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
279 isc_msgcat_t
*msgcat
, int msgset
, int message
,
280 const char *fmt
, ...)
283 char peerbuf
[ISC_SOCKADDR_FORMATSIZE
];
286 if (! isc_log_wouldlog(isc_lctx
, level
))
290 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
293 if (address
== NULL
) {
294 isc_log_iwrite(isc_lctx
, category
, module
, level
,
295 msgcat
, msgset
, message
,
296 "socket %p: %s", sock
, msgbuf
);
298 isc_sockaddr_format(address
, peerbuf
, sizeof(peerbuf
));
299 isc_log_iwrite(isc_lctx
, category
, module
, level
,
300 msgcat
, msgset
, message
,
301 "socket %p %s: %s", sock
, peerbuf
, msgbuf
);
306 wakeup_socket(isc_socketmgr_t
*manager
, int fd
, int msg
) {
310 * This is a wakeup on a socket. If the socket is not in the
311 * process of being closed, start watching it for either reads
315 INSIST(fd
>= 0 && fd
< (int)FD_SETSIZE
);
317 if (manager
->fdstate
[fd
] == CLOSE_PENDING
) {
318 manager
->fdstate
[fd
] = CLOSED
;
319 FD_CLR(fd
, &manager
->read_fds
);
320 FD_CLR(fd
, &manager
->write_fds
);
324 if (manager
->fdstate
[fd
] != MANAGED
)
327 sock
= manager
->fds
[fd
];
332 if (msg
== SELECT_POKE_READ
)
333 FD_SET(sock
->fd
, &manager
->read_fds
);
334 if (msg
== SELECT_POKE_WRITE
)
335 FD_SET(sock
->fd
, &manager
->write_fds
);
338 #ifdef ISC_PLATFORM_USETHREADS
340 * Poke the select loop when there is something for us to do.
341 * The write is required (by POSIX) to complete. That is, we
342 * will not get partial writes.
345 select_poke(isc_socketmgr_t
*mgr
, int fd
, int msg
) {
348 char strbuf
[ISC_STRERRORSIZE
];
354 cc
= write(mgr
->pipe_fds
[1], buf
, sizeof(buf
));
357 * Treat ENOSR as EAGAIN but loop slowly as it is
358 * unlikely to clear fast.
360 if (cc
< 0 && errno
== ENOSR
) {
365 } while (cc
< 0 && SOFT_ERROR(errno
));
368 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
369 FATAL_ERROR(__FILE__
, __LINE__
,
370 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
373 "during watcher poke: %s"),
377 INSIST(cc
== sizeof(buf
));
381 * Read a message on the internal fd.
384 select_readmsg(isc_socketmgr_t
*mgr
, int *fd
, int *msg
) {
387 char strbuf
[ISC_STRERRORSIZE
];
389 cc
= read(mgr
->pipe_fds
[0], buf
, sizeof(buf
));
391 *msg
= SELECT_POKE_NOTHING
;
392 *fd
= -1; /* Silence compiler. */
393 if (SOFT_ERROR(errno
))
396 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
397 FATAL_ERROR(__FILE__
, __LINE__
,
398 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
401 "during watcher poke: %s"),
406 INSIST(cc
== sizeof(buf
));
411 #else /* ISC_PLATFORM_USETHREADS */
413 * Update the state of the socketmgr when something changes.
416 select_poke(isc_socketmgr_t
*manager
, int fd
, int msg
) {
417 if (msg
== SELECT_POKE_SHUTDOWN
)
420 wakeup_socket(manager
, fd
, msg
);
423 #endif /* ISC_PLATFORM_USETHREADS */
426 * Make a fd non-blocking.
429 make_nonblock(int fd
) {
432 char strbuf
[ISC_STRERRORSIZE
];
433 #ifdef USE_FIONBIO_IOCTL
436 ret
= ioctl(fd
, FIONBIO
, (char *)&on
);
438 flags
= fcntl(fd
, F_GETFL
, 0);
439 flags
|= PORT_NONBLOCK
;
440 ret
= fcntl(fd
, F_SETFL
, flags
);
444 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
445 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
446 #ifdef USE_FIONBIO_IOCTL
447 "ioctl(%d, FIONBIO, &on): %s", fd
,
449 "fcntl(%d, F_SETFL, %d): %s", fd
, flags
,
453 return (ISC_R_UNEXPECTED
);
456 return (ISC_R_SUCCESS
);
461 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE.
462 * In order to ensure as much portability as possible, we provide wrapper
463 * functions of these macros.
464 * Note that cmsg_space() could run slow on OSes that do not have
467 static inline ISC_SOCKADDR_LEN_T
468 cmsg_len(ISC_SOCKADDR_LEN_T len
) {
470 return (CMSG_LEN(len
));
472 ISC_SOCKADDR_LEN_T hdrlen
;
475 * Cast NULL so that any pointer arithmetic performed by CMSG_DATA
478 hdrlen
= (ISC_SOCKADDR_LEN_T
)CMSG_DATA(((struct cmsghdr
*)NULL
));
479 return (hdrlen
+ len
);
483 static inline ISC_SOCKADDR_LEN_T
484 cmsg_space(ISC_SOCKADDR_LEN_T len
) {
486 return (CMSG_SPACE(len
));
489 struct cmsghdr
*cmsgp
;
491 * XXX: The buffer length is an ad-hoc value, but should be enough
492 * in a practical sense.
494 char dummybuf
[sizeof(struct cmsghdr
) + 1024];
496 memset(&msg
, 0, sizeof(msg
));
497 msg
.msg_control
= dummybuf
;
498 msg
.msg_controllen
= sizeof(dummybuf
);
500 cmsgp
= (struct cmsghdr
*)dummybuf
;
501 cmsgp
->cmsg_len
= cmsg_len(len
);
503 cmsgp
= CMSG_NXTHDR(&msg
, cmsgp
);
505 return ((char *)cmsgp
- (char *)msg
.msg_control
);
510 #endif /* USE_CMSG */
513 * Process control messages received on a socket.
516 process_cmsg(isc_socket_t
*sock
, struct msghdr
*msg
, isc_socketevent_t
*dev
) {
518 struct cmsghdr
*cmsgp
;
519 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
520 struct in6_pktinfo
*pktinfop
;
523 struct timeval
*timevalp
;
528 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined.
529 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined.
530 * They are all here, outside of the CPP tests, because it is
531 * more consistent with the usual ISC coding style.
537 #ifdef ISC_NET_BSD44MSGHDR
540 if ((msg
->msg_flags
& MSG_TRUNC
) == MSG_TRUNC
)
541 dev
->attributes
|= ISC_SOCKEVENTATTR_TRUNC
;
545 if ((msg
->msg_flags
& MSG_CTRUNC
) == MSG_CTRUNC
)
546 dev
->attributes
|= ISC_SOCKEVENTATTR_CTRUNC
;
552 if (msg
->msg_controllen
== 0U || msg
->msg_control
== NULL
)
558 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
562 cmsgp
= CMSG_FIRSTHDR(msg
);
563 while (cmsgp
!= NULL
) {
564 socket_log(sock
, NULL
, TRACE
,
565 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_PROCESSCMSG
,
566 "processing cmsg %p", cmsgp
);
568 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
569 if (cmsgp
->cmsg_level
== IPPROTO_IPV6
570 && cmsgp
->cmsg_type
== IPV6_PKTINFO
) {
572 pktinfop
= (struct in6_pktinfo
*)CMSG_DATA(cmsgp
);
573 memcpy(&dev
->pktinfo
, pktinfop
,
574 sizeof(struct in6_pktinfo
));
575 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
576 socket_log(sock
, NULL
, TRACE
,
577 isc_msgcat
, ISC_MSGSET_SOCKET
,
579 "interface received on ifindex %u",
580 dev
->pktinfo
.ipi6_ifindex
);
581 if (IN6_IS_ADDR_MULTICAST(&pktinfop
->ipi6_addr
))
582 dev
->attributes
|= ISC_SOCKEVENTATTR_MULTICAST
;
588 if (cmsgp
->cmsg_level
== SOL_SOCKET
589 && cmsgp
->cmsg_type
== SCM_TIMESTAMP
) {
590 timevalp
= (struct timeval
*)CMSG_DATA(cmsgp
);
591 dev
->timestamp
.seconds
= timevalp
->tv_sec
;
592 dev
->timestamp
.nanoseconds
= timevalp
->tv_usec
* 1000;
593 dev
->attributes
|= ISC_SOCKEVENTATTR_TIMESTAMP
;
599 cmsgp
= CMSG_NXTHDR(msg
, cmsgp
);
601 #endif /* USE_CMSG */
603 #endif /* ISC_NET_BSD44MSGHDR */
607 * Construct an iov array and attach it to the msghdr passed in. This is
608 * the SEND constructor, which will use the used region of the buffer
609 * (if using a buffer list) or will use the internal region (if a single
610 * buffer I/O is requested).
612 * Nothing can be NULL, and the done event must list at least one buffer
613 * on the buffer linked list for this function to be meaningful.
615 * If write_countp != NULL, *write_countp will hold the number of bytes
616 * this transaction can send.
619 build_msghdr_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
620 struct msghdr
*msg
, struct iovec
*iov
, size_t *write_countp
)
622 unsigned int iovcount
;
623 isc_buffer_t
*buffer
;
628 memset(msg
, 0, sizeof(*msg
));
630 if (sock
->type
== isc_sockettype_udp
) {
631 msg
->msg_name
= (void *)&dev
->address
.type
.sa
;
632 msg
->msg_namelen
= dev
->address
.length
;
634 msg
->msg_name
= NULL
;
635 msg
->msg_namelen
= 0;
638 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
643 * Single buffer I/O? Skip what we've done so far in this region.
645 if (buffer
== NULL
) {
646 write_count
= dev
->region
.length
- dev
->n
;
647 iov
[0].iov_base
= (void *)(dev
->region
.base
+ dev
->n
);
648 iov
[0].iov_len
= write_count
;
656 * Skip the data in the buffer list that we have already written.
659 while (buffer
!= NULL
) {
660 REQUIRE(ISC_BUFFER_VALID(buffer
));
661 if (skip_count
< isc_buffer_usedlength(buffer
))
663 skip_count
-= isc_buffer_usedlength(buffer
);
664 buffer
= ISC_LIST_NEXT(buffer
, link
);
667 while (buffer
!= NULL
) {
668 INSIST(iovcount
< MAXSCATTERGATHER_SEND
);
670 isc_buffer_usedregion(buffer
, &used
);
672 if (used
.length
> 0) {
673 iov
[iovcount
].iov_base
= (void *)(used
.base
675 iov
[iovcount
].iov_len
= used
.length
- skip_count
;
676 write_count
+= (used
.length
- skip_count
);
680 buffer
= ISC_LIST_NEXT(buffer
, link
);
683 INSIST(skip_count
== 0U);
687 msg
->msg_iovlen
= iovcount
;
689 #ifdef ISC_NET_BSD44MSGHDR
690 msg
->msg_control
= NULL
;
691 msg
->msg_controllen
= 0;
693 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
694 if ((sock
->type
== isc_sockettype_udp
)
695 && ((dev
->attributes
& ISC_SOCKEVENTATTR_PKTINFO
) != 0)) {
696 struct cmsghdr
*cmsgp
;
697 struct in6_pktinfo
*pktinfop
;
699 socket_log(sock
, NULL
, TRACE
,
700 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_SENDTODATA
,
701 "sendto pktinfo data, ifindex %u",
702 dev
->pktinfo
.ipi6_ifindex
);
704 msg
->msg_controllen
= cmsg_space(sizeof(struct in6_pktinfo
));
705 INSIST(msg
->msg_controllen
<= sock
->sendcmsgbuflen
);
706 msg
->msg_control
= (void *)sock
->sendcmsgbuf
;
708 cmsgp
= (struct cmsghdr
*)sock
->sendcmsgbuf
;
709 cmsgp
->cmsg_level
= IPPROTO_IPV6
;
710 cmsgp
->cmsg_type
= IPV6_PKTINFO
;
711 cmsgp
->cmsg_len
= cmsg_len(sizeof(struct in6_pktinfo
));
712 pktinfop
= (struct in6_pktinfo
*)CMSG_DATA(cmsgp
);
713 memcpy(pktinfop
, &dev
->pktinfo
, sizeof(struct in6_pktinfo
));
715 #endif /* USE_CMSG && ISC_PLATFORM_HAVEIPV6 */
716 #else /* ISC_NET_BSD44MSGHDR */
717 msg
->msg_accrights
= NULL
;
718 msg
->msg_accrightslen
= 0;
719 #endif /* ISC_NET_BSD44MSGHDR */
721 if (write_countp
!= NULL
)
722 *write_countp
= write_count
;
726 * Construct an iov array and attach it to the msghdr passed in. This is
727 * the RECV constructor, which will use the avialable region of the buffer
728 * (if using a buffer list) or will use the internal region (if a single
729 * buffer I/O is requested).
731 * Nothing can be NULL, and the done event must list at least one buffer
732 * on the buffer linked list for this function to be meaningful.
734 * If read_countp != NULL, *read_countp will hold the number of bytes
735 * this transaction can receive.
738 build_msghdr_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
739 struct msghdr
*msg
, struct iovec
*iov
, size_t *read_countp
)
741 unsigned int iovcount
;
742 isc_buffer_t
*buffer
;
743 isc_region_t available
;
746 memset(msg
, 0, sizeof(struct msghdr
));
748 if (sock
->type
== isc_sockettype_udp
) {
749 memset(&dev
->address
, 0, sizeof(dev
->address
));
750 #ifdef BROKEN_RECVMSG
751 if (sock
->pf
== AF_INET
) {
752 msg
->msg_name
= (void *)&dev
->address
.type
.sin
;
753 msg
->msg_namelen
= sizeof(dev
->address
.type
.sin6
);
754 } else if (sock
->pf
== AF_INET6
) {
755 msg
->msg_name
= (void *)&dev
->address
.type
.sin6
;
756 msg
->msg_namelen
= sizeof(dev
->address
.type
.sin6
);
757 #ifdef ISC_PLATFORM_HAVESYSUNH
758 } else if (sock
->pf
== AF_UNIX
) {
759 msg
->msg_name
= (void *)&dev
->address
.type
.sunix
;
760 msg
->msg_namelen
= sizeof(dev
->address
.type
.sunix
);
763 msg
->msg_name
= (void *)&dev
->address
.type
.sa
;
764 msg
->msg_namelen
= sizeof(dev
->address
.type
);
767 msg
->msg_name
= (void *)&dev
->address
.type
.sa
;
768 msg
->msg_namelen
= sizeof(dev
->address
.type
);
770 #ifdef ISC_NET_RECVOVERFLOW
771 /* If needed, steal one iovec for overflow detection. */
775 msg
->msg_name
= NULL
;
776 msg
->msg_namelen
= 0;
777 dev
->address
= sock
->address
;
780 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
784 * Single buffer I/O? Skip what we've done so far in this region.
786 if (buffer
== NULL
) {
787 read_count
= dev
->region
.length
- dev
->n
;
788 iov
[0].iov_base
= (void *)(dev
->region
.base
+ dev
->n
);
789 iov
[0].iov_len
= read_count
;
797 * Skip empty buffers.
799 while (buffer
!= NULL
) {
800 REQUIRE(ISC_BUFFER_VALID(buffer
));
801 if (isc_buffer_availablelength(buffer
) != 0)
803 buffer
= ISC_LIST_NEXT(buffer
, link
);
807 while (buffer
!= NULL
) {
808 INSIST(iovcount
< MAXSCATTERGATHER_RECV
);
810 isc_buffer_availableregion(buffer
, &available
);
812 if (available
.length
> 0) {
813 iov
[iovcount
].iov_base
= (void *)(available
.base
);
814 iov
[iovcount
].iov_len
= available
.length
;
815 read_count
+= available
.length
;
818 buffer
= ISC_LIST_NEXT(buffer
, link
);
824 * If needed, set up to receive that one extra byte. Note that
825 * we know there is at least one iov left, since we stole it
826 * at the top of this function.
828 #ifdef ISC_NET_RECVOVERFLOW
829 if (sock
->type
== isc_sockettype_udp
) {
830 iov
[iovcount
].iov_base
= (void *)(&sock
->overflow
);
831 iov
[iovcount
].iov_len
= 1;
837 msg
->msg_iovlen
= iovcount
;
839 #ifdef ISC_NET_BSD44MSGHDR
840 msg
->msg_control
= NULL
;
841 msg
->msg_controllen
= 0;
843 #if defined(USE_CMSG)
844 if (sock
->type
== isc_sockettype_udp
) {
845 msg
->msg_control
= sock
->recvcmsgbuf
;
846 msg
->msg_controllen
= sock
->recvcmsgbuflen
;
848 #endif /* USE_CMSG */
849 #else /* ISC_NET_BSD44MSGHDR */
850 msg
->msg_accrights
= NULL
;
851 msg
->msg_accrightslen
= 0;
852 #endif /* ISC_NET_BSD44MSGHDR */
854 if (read_countp
!= NULL
)
855 *read_countp
= read_count
;
859 set_dev_address(isc_sockaddr_t
*address
, isc_socket_t
*sock
,
860 isc_socketevent_t
*dev
)
862 if (sock
->type
== isc_sockettype_udp
) {
864 dev
->address
= *address
;
866 dev
->address
= sock
->address
;
867 } else if (sock
->type
== isc_sockettype_tcp
) {
868 INSIST(address
== NULL
);
869 dev
->address
= sock
->address
;
873 static isc_socketevent_t
*
874 allocate_socketevent(isc_socket_t
*sock
, isc_eventtype_t eventtype
,
875 isc_taskaction_t action
, const void *arg
)
877 isc_socketevent_t
*ev
;
879 ev
= (isc_socketevent_t
*)isc_event_allocate(sock
->manager
->mctx
,
887 ev
->result
= ISC_R_UNEXPECTED
;
888 ISC_LINK_INIT(ev
, ev_link
);
889 ISC_LIST_INIT(ev
->bufferlist
);
890 ev
->region
.base
= NULL
;
898 #if defined(ISC_SOCKET_DEBUG)
900 dump_msg(struct msghdr
*msg
) {
903 printf("MSGHDR %p\n", msg
);
904 printf("\tname %p, namelen %d\n", msg
->msg_name
, msg
->msg_namelen
);
905 printf("\tiov %p, iovlen %d\n", msg
->msg_iov
, msg
->msg_iovlen
);
906 for (i
= 0; i
< (unsigned int)msg
->msg_iovlen
; i
++)
907 printf("\t\t%d\tbase %p, len %d\n", i
,
908 msg
->msg_iov
[i
].iov_base
,
909 msg
->msg_iov
[i
].iov_len
);
910 #ifdef ISC_NET_BSD44MSGHDR
911 printf("\tcontrol %p, controllen %d\n", msg
->msg_control
,
912 msg
->msg_controllen
);
917 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
918 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
919 #define DOIO_HARD 2 /* i/o error, event sent */
920 #define DOIO_EOF 3 /* EOF, no event sent */
923 doio_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
925 struct iovec iov
[MAXSCATTERGATHER_RECV
];
928 struct msghdr msghdr
;
929 isc_buffer_t
*buffer
;
931 char strbuf
[ISC_STRERRORSIZE
];
933 build_msghdr_recv(sock
, dev
, &msghdr
, iov
, &read_count
);
935 #if defined(ISC_SOCKET_DEBUG)
939 cc
= recvmsg(sock
->fd
, &msghdr
, 0);
942 #if defined(ISC_SOCKET_DEBUG)
947 if (SOFT_ERROR(recv_errno
))
950 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
951 isc__strerror(recv_errno
, strbuf
, sizeof(strbuf
));
952 socket_log(sock
, NULL
, IOEVENT
,
953 isc_msgcat
, ISC_MSGSET_SOCKET
,
955 "doio_recv: recvmsg(%d) %d bytes, err %d/%s",
956 sock
->fd
, cc
, recv_errno
, strbuf
);
959 #define SOFT_OR_HARD(_system, _isc) \
960 if (recv_errno == _system) { \
961 if (sock->connected) { \
962 dev->result = _isc; \
963 return (DOIO_HARD); \
965 return (DOIO_SOFT); \
967 #define ALWAYS_HARD(_system, _isc) \
968 if (recv_errno == _system) { \
969 dev->result = _isc; \
970 return (DOIO_HARD); \
973 SOFT_OR_HARD(ECONNREFUSED
, ISC_R_CONNREFUSED
);
974 SOFT_OR_HARD(ENETUNREACH
, ISC_R_NETUNREACH
);
975 SOFT_OR_HARD(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
976 SOFT_OR_HARD(EHOSTDOWN
, ISC_R_HOSTDOWN
);
977 /* HPUX 11.11 can return EADDRNOTAVAIL. */
978 SOFT_OR_HARD(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
979 ALWAYS_HARD(ENOBUFS
, ISC_R_NORESOURCES
);
984 dev
->result
= isc__errno2result(recv_errno
);
989 * On TCP, zero length reads indicate EOF, while on
990 * UDP, zero length reads are perfectly valid, although
993 if ((sock
->type
== isc_sockettype_tcp
) && (cc
== 0))
996 if (sock
->type
== isc_sockettype_udp
) {
997 dev
->address
.length
= msghdr
.msg_namelen
;
998 if (isc_sockaddr_getport(&dev
->address
) == 0) {
999 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
1000 socket_log(sock
, &dev
->address
, IOEVENT
,
1001 isc_msgcat
, ISC_MSGSET_SOCKET
,
1003 "dropping source port zero packet");
1009 socket_log(sock
, &dev
->address
, IOEVENT
,
1010 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_PKTRECV
,
1011 "packet received correctly");
1014 * Overflow bit detection. If we received MORE bytes than we should,
1015 * this indicates an overflow situation. Set the flag in the
1016 * dev entry and adjust how much we read by one.
1018 #ifdef ISC_NET_RECVOVERFLOW
1019 if ((sock
->type
== isc_sockettype_udp
) && ((size_t)cc
> read_count
)) {
1020 dev
->attributes
|= ISC_SOCKEVENTATTR_TRUNC
;
1026 * If there are control messages attached, run through them and pull
1027 * out the interesting bits.
1029 if (sock
->type
== isc_sockettype_udp
)
1030 process_cmsg(sock
, &msghdr
, dev
);
1033 * update the buffers (if any) and the i/o count
1037 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
1038 while (buffer
!= NULL
&& actual_count
> 0U) {
1039 REQUIRE(ISC_BUFFER_VALID(buffer
));
1040 if (isc_buffer_availablelength(buffer
) <= actual_count
) {
1041 actual_count
-= isc_buffer_availablelength(buffer
);
1042 isc_buffer_add(buffer
,
1043 isc_buffer_availablelength(buffer
));
1045 isc_buffer_add(buffer
, actual_count
);
1049 buffer
= ISC_LIST_NEXT(buffer
, link
);
1050 if (buffer
== NULL
) {
1051 INSIST(actual_count
== 0U);
1056 * If we read less than we expected, update counters,
1057 * and let the upper layer poke the descriptor.
1059 if (((size_t)cc
!= read_count
) && (dev
->n
< dev
->minimum
))
1063 * Full reads are posted, or partials if partials are ok.
1065 dev
->result
= ISC_R_SUCCESS
;
1066 return (DOIO_SUCCESS
);
1071 * DOIO_SUCCESS The operation succeeded. dev->result contains
1074 * DOIO_HARD A hard or unexpected I/O error was encountered.
1075 * dev->result contains the appropriate error.
1077 * DOIO_SOFT A soft I/O error was encountered. No senddone
1078 * event was sent. The operation should be retried.
1080 * No other return values are possible.
1083 doio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
1085 struct iovec iov
[MAXSCATTERGATHER_SEND
];
1087 struct msghdr msghdr
;
1088 char addrbuf
[ISC_SOCKADDR_FORMATSIZE
];
1091 char strbuf
[ISC_STRERRORSIZE
];
1093 build_msghdr_send(sock
, dev
, &msghdr
, iov
, &write_count
);
1096 cc
= sendmsg(sock
->fd
, &msghdr
, 0);
1100 * Check for error or block condition.
1103 if (send_errno
== EINTR
&& ++attempts
< NRETRIES
)
1106 if (SOFT_ERROR(send_errno
))
1109 #define SOFT_OR_HARD(_system, _isc) \
1110 if (send_errno == _system) { \
1111 if (sock->connected) { \
1112 dev->result = _isc; \
1113 return (DOIO_HARD); \
1115 return (DOIO_SOFT); \
1117 #define ALWAYS_HARD(_system, _isc) \
1118 if (send_errno == _system) { \
1119 dev->result = _isc; \
1120 return (DOIO_HARD); \
1123 SOFT_OR_HARD(ECONNREFUSED
, ISC_R_CONNREFUSED
);
1124 ALWAYS_HARD(EACCES
, ISC_R_NOPERM
);
1125 ALWAYS_HARD(EAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
1126 ALWAYS_HARD(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
1127 ALWAYS_HARD(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
1129 ALWAYS_HARD(EHOSTDOWN
, ISC_R_HOSTUNREACH
);
1131 ALWAYS_HARD(ENETUNREACH
, ISC_R_NETUNREACH
);
1132 ALWAYS_HARD(ENOBUFS
, ISC_R_NORESOURCES
);
1133 ALWAYS_HARD(EPERM
, ISC_R_HOSTUNREACH
);
1134 ALWAYS_HARD(EPIPE
, ISC_R_NOTCONNECTED
);
1135 ALWAYS_HARD(ECONNRESET
, ISC_R_CONNECTIONRESET
);
1141 * The other error types depend on whether or not the
1142 * socket is UDP or TCP. If it is UDP, some errors
1143 * that we expect to be fatal under TCP are merely
1144 * annoying, and are really soft errors.
1146 * However, these soft errors are still returned as
1149 isc_sockaddr_format(&dev
->address
, addrbuf
, sizeof(addrbuf
));
1150 isc__strerror(send_errno
, strbuf
, sizeof(strbuf
));
1151 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "internal_send: %s: %s",
1153 dev
->result
= isc__errno2result(send_errno
);
1158 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1159 "internal_send: send() %s 0",
1160 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
1161 ISC_MSG_RETURNED
, "returned"));
1164 * If we write less than we expected, update counters, poke.
1167 if ((size_t)cc
!= write_count
)
1171 * Exactly what we wanted to write. We're done with this
1172 * entry. Post its completion event.
1174 dev
->result
= ISC_R_SUCCESS
;
1175 return (DOIO_SUCCESS
);
1181 * Caller must ensure that the socket is not locked and no external
1185 destroy(isc_socket_t
**sockp
) {
1186 isc_socket_t
*sock
= *sockp
;
1187 isc_socketmgr_t
*manager
= sock
->manager
;
1189 socket_log(sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1190 ISC_MSG_DESTROYING
, "destroying");
1192 INSIST(ISC_LIST_EMPTY(sock
->accept_list
));
1193 INSIST(ISC_LIST_EMPTY(sock
->recv_list
));
1194 INSIST(ISC_LIST_EMPTY(sock
->send_list
));
1195 INSIST(sock
->connect_ev
== NULL
);
1196 REQUIRE(sock
->fd
>= 0 && sock
->fd
< (int)FD_SETSIZE
);
1198 LOCK(&manager
->lock
);
1201 * No one has this socket open, so the watcher doesn't have to be
1202 * poked, and the socket doesn't have to be locked.
1204 manager
->fds
[sock
->fd
] = NULL
;
1205 manager
->fdstate
[sock
->fd
] = CLOSE_PENDING
;
1206 select_poke(manager
, sock
->fd
, SELECT_POKE_CLOSE
);
1207 ISC_LIST_UNLINK(manager
->socklist
, sock
, link
);
1209 #ifdef ISC_PLATFORM_USETHREADS
1210 if (ISC_LIST_EMPTY(manager
->socklist
))
1211 SIGNAL(&manager
->shutdown_ok
);
1212 #endif /* ISC_PLATFORM_USETHREADS */
1215 * XXX should reset manager->maxfd here
1218 UNLOCK(&manager
->lock
);
1224 allocate_socket(isc_socketmgr_t
*manager
, isc_sockettype_t type
,
1225 isc_socket_t
**socketp
)
1229 ISC_SOCKADDR_LEN_T cmsgbuflen
;
1231 sock
= isc_mem_get(manager
->mctx
, sizeof(*sock
));
1234 return (ISC_R_NOMEMORY
);
1236 ret
= ISC_R_UNEXPECTED
;
1239 sock
->references
= 0;
1241 sock
->manager
= manager
;
1245 ISC_LINK_INIT(sock
, link
);
1247 sock
->recvcmsgbuf
= NULL
;
1248 sock
->sendcmsgbuf
= NULL
;
1251 * set up cmsg buffers
1254 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
1255 cmsgbuflen
= cmsg_space(sizeof(struct in6_pktinfo
));
1257 #if defined(USE_CMSG) && defined(SO_TIMESTAMP)
1258 cmsgbuflen
+= cmsg_space(sizeof(struct timeval
));
1260 sock
->recvcmsgbuflen
= cmsgbuflen
;
1261 if (sock
->recvcmsgbuflen
!= 0U) {
1262 sock
->recvcmsgbuf
= isc_mem_get(manager
->mctx
, cmsgbuflen
);
1263 if (sock
->recvcmsgbuf
== NULL
)
1268 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
1269 cmsgbuflen
= cmsg_space(sizeof(struct in6_pktinfo
));
1271 sock
->sendcmsgbuflen
= cmsgbuflen
;
1272 if (sock
->sendcmsgbuflen
!= 0U) {
1273 sock
->sendcmsgbuf
= isc_mem_get(manager
->mctx
, cmsgbuflen
);
1274 if (sock
->sendcmsgbuf
== NULL
)
1279 * set up list of readers and writers to be initially empty
1281 ISC_LIST_INIT(sock
->recv_list
);
1282 ISC_LIST_INIT(sock
->send_list
);
1283 ISC_LIST_INIT(sock
->accept_list
);
1284 sock
->connect_ev
= NULL
;
1285 sock
->pending_recv
= 0;
1286 sock
->pending_send
= 0;
1287 sock
->pending_accept
= 0;
1289 sock
->connected
= 0;
1290 sock
->connecting
= 0;
1294 * initialize the lock
1296 if (isc_mutex_init(&sock
->lock
) != ISC_R_SUCCESS
) {
1298 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1299 "isc_mutex_init() %s",
1300 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
1301 ISC_MSG_FAILED
, "failed"));
1302 ret
= ISC_R_UNEXPECTED
;
1307 * Initialize readable and writable events
1309 ISC_EVENT_INIT(&sock
->readable_ev
, sizeof(intev_t
),
1310 ISC_EVENTATTR_NOPURGE
, NULL
, ISC_SOCKEVENT_INTR
,
1311 NULL
, sock
, sock
, NULL
, NULL
);
1312 ISC_EVENT_INIT(&sock
->writable_ev
, sizeof(intev_t
),
1313 ISC_EVENTATTR_NOPURGE
, NULL
, ISC_SOCKEVENT_INTW
,
1314 NULL
, sock
, sock
, NULL
, NULL
);
1316 sock
->magic
= SOCKET_MAGIC
;
1319 return (ISC_R_SUCCESS
);
1322 if (sock
->recvcmsgbuf
!= NULL
)
1323 isc_mem_put(manager
->mctx
, sock
->recvcmsgbuf
,
1324 sock
->recvcmsgbuflen
);
1325 if (sock
->sendcmsgbuf
!= NULL
)
1326 isc_mem_put(manager
->mctx
, sock
->sendcmsgbuf
,
1327 sock
->sendcmsgbuflen
);
1328 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1334 * This event requires that the various lists be empty, that the reference
1335 * count be 1, and that the magic number is valid. The other socket bits,
1336 * like the lock, must be initialized as well. The fd associated must be
1337 * marked as closed, by setting it to -1 on close, or this routine will
1338 * also close the socket.
1341 free_socket(isc_socket_t
**socketp
) {
1342 isc_socket_t
*sock
= *socketp
;
1344 INSIST(sock
->references
== 0);
1345 INSIST(VALID_SOCKET(sock
));
1346 INSIST(!sock
->connecting
);
1347 INSIST(!sock
->pending_recv
);
1348 INSIST(!sock
->pending_send
);
1349 INSIST(!sock
->pending_accept
);
1350 INSIST(ISC_LIST_EMPTY(sock
->recv_list
));
1351 INSIST(ISC_LIST_EMPTY(sock
->send_list
));
1352 INSIST(ISC_LIST_EMPTY(sock
->accept_list
));
1353 INSIST(!ISC_LINK_LINKED(sock
, link
));
1355 if (sock
->recvcmsgbuf
!= NULL
)
1356 isc_mem_put(sock
->manager
->mctx
, sock
->recvcmsgbuf
,
1357 sock
->recvcmsgbuflen
);
1358 if (sock
->sendcmsgbuf
!= NULL
)
1359 isc_mem_put(sock
->manager
->mctx
, sock
->sendcmsgbuf
,
1360 sock
->sendcmsgbuflen
);
1364 DESTROYLOCK(&sock
->lock
);
1366 isc_mem_put(sock
->manager
->mctx
, sock
, sizeof(*sock
));
1372 * Create a new 'type' socket managed by 'manager'. Events
1373 * will be posted to 'task' and when dispatched 'action' will be
1374 * called with 'arg' as the arg value. The new socket is returned
1378 isc_socket_create(isc_socketmgr_t
*manager
, int pf
, isc_sockettype_t type
,
1379 isc_socket_t
**socketp
)
1381 isc_socket_t
*sock
= NULL
;
1383 #if defined(USE_CMSG) || defined(SO_BSDCOMPAT)
1386 char strbuf
[ISC_STRERRORSIZE
];
1387 const char *err
= "socket";
1389 REQUIRE(VALID_MANAGER(manager
));
1390 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1392 ret
= allocate_socket(manager
, type
, &sock
);
1393 if (ret
!= ISC_R_SUCCESS
)
1398 case isc_sockettype_udp
:
1399 sock
->fd
= socket(pf
, SOCK_DGRAM
, IPPROTO_UDP
);
1401 case isc_sockettype_tcp
:
1402 sock
->fd
= socket(pf
, SOCK_STREAM
, IPPROTO_TCP
);
1408 * Leave a space for stdio to work in.
1410 if (sock
->fd
>= 0 && sock
->fd
< 20) {
1412 new = fcntl(sock
->fd
, F_DUPFD
, 20);
1414 (void)close(sock
->fd
);
1417 err
= "isc_socket_create: fcntl";
1421 if (sock
->fd
>= (int)FD_SETSIZE
) {
1422 (void)close(sock
->fd
);
1423 isc_log_iwrite(isc_lctx
, ISC_LOGCATEGORY_GENERAL
,
1424 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
1425 isc_msgcat
, ISC_MSGSET_SOCKET
,
1427 "%s: too many open file descriptors", "socket");
1429 return (ISC_R_NORESOURCES
);
1439 return (ISC_R_NORESOURCES
);
1441 case EPROTONOSUPPORT
:
1445 * Linux 2.2 (and maybe others) return EINVAL instead of
1449 return (ISC_R_FAMILYNOSUPPORT
);
1452 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1453 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1455 isc_msgcat_get(isc_msgcat
,
1460 return (ISC_R_UNEXPECTED
);
1464 if (make_nonblock(sock
->fd
) != ISC_R_SUCCESS
) {
1465 (void)close(sock
->fd
);
1467 return (ISC_R_UNEXPECTED
);
1471 if (setsockopt(sock
->fd
, SOL_SOCKET
, SO_BSDCOMPAT
,
1472 (void *)&on
, sizeof(on
)) < 0) {
1473 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1474 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1475 "setsockopt(%d, SO_BSDCOMPAT) %s: %s",
1477 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
1478 ISC_MSG_FAILED
, "failed"),
1484 #if defined(USE_CMSG)
1485 if (type
== isc_sockettype_udp
) {
1487 #if defined(SO_TIMESTAMP)
1488 if (setsockopt(sock
->fd
, SOL_SOCKET
, SO_TIMESTAMP
,
1489 (void *)&on
, sizeof(on
)) < 0
1490 && errno
!= ENOPROTOOPT
) {
1491 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1492 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1493 "setsockopt(%d, SO_TIMESTAMP) %s: %s",
1495 isc_msgcat_get(isc_msgcat
,
1502 #endif /* SO_TIMESTAMP */
1504 #if defined(ISC_PLATFORM_HAVEIPV6)
1505 if (pf
== AF_INET6
&& sock
->recvcmsgbuflen
== 0U) {
1507 * Warn explicitly because this anomaly can be hidden
1508 * in usual operation (and unexpectedly appear later).
1510 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1511 "No buffer available to receive "
1512 "IPv6 destination");
1514 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
1515 #ifdef IPV6_RECVPKTINFO
1517 if ((pf
== AF_INET6
)
1518 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
,
1519 (void *)&on
, sizeof(on
)) < 0)) {
1520 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1521 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1522 "setsockopt(%d, IPV6_RECVPKTINFO) "
1524 isc_msgcat_get(isc_msgcat
,
1532 if ((pf
== AF_INET6
)
1533 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_PKTINFO
,
1534 (void *)&on
, sizeof(on
)) < 0)) {
1535 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1536 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1537 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1539 isc_msgcat_get(isc_msgcat
,
1545 #endif /* IPV6_RECVPKTINFO */
1546 #endif /* ISC_PLATFORM_HAVEIN6PKTINFO */
1547 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1548 /* use minimum MTU */
1549 if (pf
== AF_INET6
) {
1550 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
,
1552 (void *)&on
, sizeof(on
));
1555 #endif /* ISC_PLATFORM_HAVEIPV6 */
1558 #endif /* USE_CMSG */
1560 sock
->references
= 1;
1563 LOCK(&manager
->lock
);
1566 * Note we don't have to lock the socket like we normally would because
1567 * there are no external references to it yet.
1570 manager
->fds
[sock
->fd
] = sock
;
1571 manager
->fdstate
[sock
->fd
] = MANAGED
;
1572 ISC_LIST_APPEND(manager
->socklist
, sock
, link
);
1573 if (manager
->maxfd
< sock
->fd
)
1574 manager
->maxfd
= sock
->fd
;
1576 UNLOCK(&manager
->lock
);
1578 socket_log(sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1579 ISC_MSG_CREATED
, "created");
1581 return (ISC_R_SUCCESS
);
1585 * Attach to a socket. Caller must explicitly detach when it is done.
1588 isc_socket_attach(isc_socket_t
*sock
, isc_socket_t
**socketp
) {
1589 REQUIRE(VALID_SOCKET(sock
));
1590 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1594 UNLOCK(&sock
->lock
);
1600 * Dereference a socket. If this is the last reference to it, clean things
1601 * up by destroying the socket.
1604 isc_socket_detach(isc_socket_t
**socketp
) {
1606 isc_boolean_t kill_socket
= ISC_FALSE
;
1608 REQUIRE(socketp
!= NULL
);
1610 REQUIRE(VALID_SOCKET(sock
));
1613 REQUIRE(sock
->references
> 0);
1615 if (sock
->references
== 0)
1616 kill_socket
= ISC_TRUE
;
1617 UNLOCK(&sock
->lock
);
1626 * I/O is possible on a given socket. Schedule an event to this task that
1627 * will call an internal function to do the I/O. This will charge the
1628 * task with the I/O operation and let our select loop handler get back
1629 * to doing something real as fast as possible.
1631 * The socket and manager must be locked before calling this function.
1634 dispatch_recv(isc_socket_t
*sock
) {
1636 isc_socketevent_t
*ev
;
1638 INSIST(!sock
->pending_recv
);
1640 ev
= ISC_LIST_HEAD(sock
->recv_list
);
1644 sock
->pending_recv
= 1;
1645 iev
= &sock
->readable_ev
;
1647 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
1648 "dispatch_recv: event %p -> task %p", ev
, ev
->ev_sender
);
1651 iev
->ev_sender
= sock
;
1652 iev
->ev_action
= internal_recv
;
1655 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1659 dispatch_send(isc_socket_t
*sock
) {
1661 isc_socketevent_t
*ev
;
1663 INSIST(!sock
->pending_send
);
1665 ev
= ISC_LIST_HEAD(sock
->send_list
);
1669 sock
->pending_send
= 1;
1670 iev
= &sock
->writable_ev
;
1672 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
1673 "dispatch_send: event %p -> task %p", ev
, ev
->ev_sender
);
1676 iev
->ev_sender
= sock
;
1677 iev
->ev_action
= internal_send
;
1680 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1684 * Dispatch an internal accept event.
1687 dispatch_accept(isc_socket_t
*sock
) {
1689 isc_socket_newconnev_t
*ev
;
1691 INSIST(!sock
->pending_accept
);
1694 * Are there any done events left, or were they all canceled
1695 * before the manager got the socket lock?
1697 ev
= ISC_LIST_HEAD(sock
->accept_list
);
1701 sock
->pending_accept
= 1;
1702 iev
= &sock
->readable_ev
;
1704 sock
->references
++; /* keep socket around for this internal event */
1705 iev
->ev_sender
= sock
;
1706 iev
->ev_action
= internal_accept
;
1709 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1713 dispatch_connect(isc_socket_t
*sock
) {
1715 isc_socket_connev_t
*ev
;
1717 iev
= &sock
->writable_ev
;
1719 ev
= sock
->connect_ev
;
1720 INSIST(ev
!= NULL
); /* XXX */
1722 INSIST(sock
->connecting
);
1724 sock
->references
++; /* keep socket around for this internal event */
1725 iev
->ev_sender
= sock
;
1726 iev
->ev_action
= internal_connect
;
1729 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1733 * Dequeue an item off the given socket's read queue, set the result code
1734 * in the done event to the one provided, and send it to the task it was
1737 * If the event to be sent is on a list, remove it before sending. If
1738 * asked to, send and detach from the socket as well.
1740 * Caller must have the socket locked if the event is attached to the socket.
1743 send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1746 task
= (*dev
)->ev_sender
;
1748 (*dev
)->ev_sender
= sock
;
1750 if (ISC_LINK_LINKED(*dev
, ev_link
))
1751 ISC_LIST_DEQUEUE(sock
->recv_list
, *dev
, ev_link
);
1753 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1754 == ISC_SOCKEVENTATTR_ATTACHED
)
1755 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1757 isc_task_send(task
, (isc_event_t
**)dev
);
1761 * See comments for send_recvdone_event() above.
1763 * Caller must have the socket locked if the event is attached to the socket.
1766 send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1769 INSIST(dev
!= NULL
&& *dev
!= NULL
);
1771 task
= (*dev
)->ev_sender
;
1772 (*dev
)->ev_sender
= sock
;
1774 if (ISC_LINK_LINKED(*dev
, ev_link
))
1775 ISC_LIST_DEQUEUE(sock
->send_list
, *dev
, ev_link
);
1777 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1778 == ISC_SOCKEVENTATTR_ATTACHED
)
1779 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1781 isc_task_send(task
, (isc_event_t
**)dev
);
1785 * Call accept() on a socket, to get the new file descriptor. The listen
1786 * socket is used as a prototype to create a new isc_socket_t. The new
1787 * socket has one outstanding reference. The task receiving the event
1788 * will be detached from just after the event is delivered.
1790 * On entry to this function, the event delivered is the internal
1791 * readable event, and the first item on the accept_list should be
1792 * the done event we want to send. If the list is empty, this is a no-op,
1793 * so just unlock and return.
1796 internal_accept(isc_task_t
*me
, isc_event_t
*ev
) {
1798 isc_socketmgr_t
*manager
;
1799 isc_socket_newconnev_t
*dev
;
1801 ISC_SOCKADDR_LEN_T addrlen
;
1803 isc_result_t result
= ISC_R_SUCCESS
;
1804 char strbuf
[ISC_STRERRORSIZE
];
1805 const char *err
= "accept";
1809 sock
= ev
->ev_sender
;
1810 INSIST(VALID_SOCKET(sock
));
1813 socket_log(sock
, NULL
, TRACE
,
1814 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1815 "internal_accept called, locked socket");
1817 manager
= sock
->manager
;
1818 INSIST(VALID_MANAGER(manager
));
1820 INSIST(sock
->listener
);
1821 INSIST(sock
->pending_accept
== 1);
1822 sock
->pending_accept
= 0;
1824 INSIST(sock
->references
> 0);
1825 sock
->references
--; /* the internal event is done with this socket */
1826 if (sock
->references
== 0) {
1827 UNLOCK(&sock
->lock
);
1833 * Get the first item off the accept list.
1834 * If it is empty, unlock the socket and return.
1836 dev
= ISC_LIST_HEAD(sock
->accept_list
);
1838 UNLOCK(&sock
->lock
);
1843 * Try to accept the new connection. If the accept fails with
1844 * EAGAIN or EINTR, simply poke the watcher to watch this socket
1845 * again. Also ignore ECONNRESET, which has been reported to
1846 * be spuriously returned on Linux 2.2.19 although it is not
1847 * a documented error for accept(). ECONNABORTED has been
1848 * reported for Solaris 8. The rest are thrown in not because
1849 * we have seen them but because they are ignored by other
1850 * deamons such as BIND 8 and Apache.
1853 addrlen
= sizeof(dev
->newsocket
->address
.type
);
1854 memset(&dev
->newsocket
->address
.type
.sa
, 0, addrlen
);
1855 fd
= accept(sock
->fd
, &dev
->newsocket
->address
.type
.sa
,
1860 * Leave a space for stdio to work in.
1862 if (fd
>= 0 && fd
< 20) {
1864 new = fcntl(fd
, F_DUPFD
, 20);
1874 if (SOFT_ERROR(errno
))
1897 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1898 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1899 "internal_accept: %s() %s: %s", err
,
1900 isc_msgcat_get(isc_msgcat
,
1906 result
= ISC_R_UNEXPECTED
;
1908 if (addrlen
== 0U) {
1909 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1910 "internal_accept(): "
1911 "accept() failed to return "
1916 } else if (dev
->newsocket
->address
.type
.sa
.sa_family
!=
1919 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1920 "internal_accept(): "
1921 "accept() returned peer address "
1922 "family %u (expected %u)",
1923 dev
->newsocket
->address
.
1928 } else if (fd
>= (int)FD_SETSIZE
) {
1929 isc_log_iwrite(isc_lctx
, ISC_LOGCATEGORY_GENERAL
,
1930 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
1931 isc_msgcat
, ISC_MSGSET_SOCKET
,
1933 "%s: too many open file descriptors",
1941 dev
->newsocket
->address
.length
= addrlen
;
1942 dev
->newsocket
->pf
= sock
->pf
;
1946 * Pull off the done event.
1948 ISC_LIST_UNLINK(sock
->accept_list
, dev
, ev_link
);
1951 * Poke watcher if there are more pending accepts.
1953 if (!ISC_LIST_EMPTY(sock
->accept_list
))
1954 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_ACCEPT
);
1956 UNLOCK(&sock
->lock
);
1958 if (fd
!= -1 && (make_nonblock(fd
) != ISC_R_SUCCESS
)) {
1961 result
= ISC_R_UNEXPECTED
;
1965 * -1 means the new socket didn't happen.
1968 LOCK(&manager
->lock
);
1969 ISC_LIST_APPEND(manager
->socklist
, dev
->newsocket
, link
);
1971 dev
->newsocket
->fd
= fd
;
1972 dev
->newsocket
->bound
= 1;
1973 dev
->newsocket
->connected
= 1;
1976 * Save away the remote address
1978 dev
->address
= dev
->newsocket
->address
;
1980 manager
->fds
[fd
] = dev
->newsocket
;
1981 manager
->fdstate
[fd
] = MANAGED
;
1982 if (manager
->maxfd
< fd
)
1983 manager
->maxfd
= fd
;
1985 socket_log(sock
, &dev
->newsocket
->address
, CREATION
,
1986 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
1987 "accepted connection, new socket %p",
1990 UNLOCK(&manager
->lock
);
1992 dev
->newsocket
->references
--;
1993 free_socket(&dev
->newsocket
);
1997 * Fill in the done event details and send it off.
1999 dev
->result
= result
;
2000 task
= dev
->ev_sender
;
2001 dev
->ev_sender
= sock
;
2003 isc_task_sendanddetach(&task
, ISC_EVENT_PTR(&dev
));
2007 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_ACCEPT
);
2008 UNLOCK(&sock
->lock
);
2013 internal_recv(isc_task_t
*me
, isc_event_t
*ev
) {
2014 isc_socketevent_t
*dev
;
2017 INSIST(ev
->ev_type
== ISC_SOCKEVENT_INTR
);
2019 sock
= ev
->ev_sender
;
2020 INSIST(VALID_SOCKET(sock
));
2023 socket_log(sock
, NULL
, IOEVENT
,
2024 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALRECV
,
2025 "internal_recv: task %p got event %p", me
, ev
);
2027 INSIST(sock
->pending_recv
== 1);
2028 sock
->pending_recv
= 0;
2030 INSIST(sock
->references
> 0);
2031 sock
->references
--; /* the internal event is done with this socket */
2032 if (sock
->references
== 0) {
2033 UNLOCK(&sock
->lock
);
2039 * Try to do as much I/O as possible on this socket. There are no
2040 * limits here, currently.
2042 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2043 while (dev
!= NULL
) {
2044 switch (doio_recv(sock
, dev
)) {
2050 * read of 0 means the remote end was closed.
2051 * Run through the event queue and dispatch all
2052 * the events with an EOF result code.
2055 dev
->result
= ISC_R_EOF
;
2056 send_recvdone_event(sock
, &dev
);
2057 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2058 } while (dev
!= NULL
);
2063 send_recvdone_event(sock
, &dev
);
2067 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2071 if (!ISC_LIST_EMPTY(sock
->recv_list
))
2072 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_READ
);
2074 UNLOCK(&sock
->lock
);
2078 internal_send(isc_task_t
*me
, isc_event_t
*ev
) {
2079 isc_socketevent_t
*dev
;
2082 INSIST(ev
->ev_type
== ISC_SOCKEVENT_INTW
);
2085 * Find out what socket this is and lock it.
2087 sock
= (isc_socket_t
*)ev
->ev_sender
;
2088 INSIST(VALID_SOCKET(sock
));
2091 socket_log(sock
, NULL
, IOEVENT
,
2092 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALSEND
,
2093 "internal_send: task %p got event %p", me
, ev
);
2095 INSIST(sock
->pending_send
== 1);
2096 sock
->pending_send
= 0;
2098 INSIST(sock
->references
> 0);
2099 sock
->references
--; /* the internal event is done with this socket */
2100 if (sock
->references
== 0) {
2101 UNLOCK(&sock
->lock
);
2107 * Try to do as much I/O as possible on this socket. There are no
2108 * limits here, currently.
2110 dev
= ISC_LIST_HEAD(sock
->send_list
);
2111 while (dev
!= NULL
) {
2112 switch (doio_send(sock
, dev
)) {
2118 send_senddone_event(sock
, &dev
);
2122 dev
= ISC_LIST_HEAD(sock
->send_list
);
2126 if (!ISC_LIST_EMPTY(sock
->send_list
))
2127 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_WRITE
);
2129 UNLOCK(&sock
->lock
);
2133 process_fds(isc_socketmgr_t
*manager
, int maxfd
,
2134 fd_set
*readfds
, fd_set
*writefds
)
2138 isc_boolean_t unlock_sock
;
2140 REQUIRE(maxfd
<= (int)FD_SETSIZE
);
2143 * Process read/writes on other fds here. Avoid locking
2144 * and unlocking twice if both reads and writes are possible.
2146 for (i
= 0; i
< maxfd
; i
++) {
2147 #ifdef ISC_PLATFORM_USETHREADS
2148 if (i
== manager
->pipe_fds
[0] || i
== manager
->pipe_fds
[1])
2150 #endif /* ISC_PLATFORM_USETHREADS */
2152 if (manager
->fdstate
[i
] == CLOSE_PENDING
) {
2153 manager
->fdstate
[i
] = CLOSED
;
2154 FD_CLR(i
, &manager
->read_fds
);
2155 FD_CLR(i
, &manager
->write_fds
);
2162 sock
= manager
->fds
[i
];
2163 unlock_sock
= ISC_FALSE
;
2164 if (FD_ISSET(i
, readfds
)) {
2166 FD_CLR(i
, &manager
->read_fds
);
2169 unlock_sock
= ISC_TRUE
;
2171 if (!SOCK_DEAD(sock
)) {
2173 dispatch_accept(sock
);
2175 dispatch_recv(sock
);
2177 FD_CLR(i
, &manager
->read_fds
);
2180 if (FD_ISSET(i
, writefds
)) {
2182 FD_CLR(i
, &manager
->write_fds
);
2186 unlock_sock
= ISC_TRUE
;
2189 if (!SOCK_DEAD(sock
)) {
2190 if (sock
->connecting
)
2191 dispatch_connect(sock
);
2193 dispatch_send(sock
);
2195 FD_CLR(i
, &manager
->write_fds
);
2198 UNLOCK(&sock
->lock
);
2202 #ifdef ISC_PLATFORM_USETHREADS
2204 * This is the thread that will loop forever, always in a select or poll
2207 * When select returns something to do, track down what thread gets to do
2208 * this I/O and post the event to it.
2210 static isc_threadresult_t
2211 watcher(void *uap
) {
2212 isc_socketmgr_t
*manager
= uap
;
2220 char strbuf
[ISC_STRERRORSIZE
];
2223 * Get the control fd here. This will never change.
2225 LOCK(&manager
->lock
);
2226 ctlfd
= manager
->pipe_fds
[0];
2231 readfds
= manager
->read_fds
;
2232 writefds
= manager
->write_fds
;
2233 maxfd
= manager
->maxfd
+ 1;
2235 UNLOCK(&manager
->lock
);
2237 cc
= select(maxfd
, &readfds
, &writefds
, NULL
, NULL
);
2239 if (!SOFT_ERROR(errno
)) {
2240 isc__strerror(errno
, strbuf
,
2242 FATAL_ERROR(__FILE__
, __LINE__
,
2244 isc_msgcat_get(isc_msgcat
,
2252 LOCK(&manager
->lock
);
2257 * Process reads on internal, control fd.
2259 if (FD_ISSET(ctlfd
, &readfds
)) {
2261 select_readmsg(manager
, &fd
, &msg
);
2263 manager_log(manager
, IOEVENT
,
2264 isc_msgcat_get(isc_msgcat
,
2267 "watcher got message %d"),
2273 if (msg
== SELECT_POKE_NOTHING
)
2277 * Handle shutdown message. We really should
2278 * jump out of this loop right away, but
2279 * it doesn't matter if we have to do a little
2282 if (msg
== SELECT_POKE_SHUTDOWN
) {
2289 * This is a wakeup on a socket. Look
2290 * at the event queue for both read and write,
2291 * and decide if we need to watch on it now
2294 wakeup_socket(manager
, fd
, msg
);
2298 process_fds(manager
, maxfd
, &readfds
, &writefds
);
2301 manager_log(manager
, TRACE
,
2302 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2303 ISC_MSG_EXITING
, "watcher exiting"));
2305 UNLOCK(&manager
->lock
);
2306 return ((isc_threadresult_t
)0);
2308 #endif /* ISC_PLATFORM_USETHREADS */
2311 * Create a new socket manager.
2314 isc_socketmgr_create(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
) {
2315 isc_socketmgr_t
*manager
;
2316 #ifdef ISC_PLATFORM_USETHREADS
2317 char strbuf
[ISC_STRERRORSIZE
];
2320 REQUIRE(managerp
!= NULL
&& *managerp
== NULL
);
2322 #ifndef ISC_PLATFORM_USETHREADS
2323 if (socketmgr
!= NULL
) {
2325 *managerp
= socketmgr
;
2326 return (ISC_R_SUCCESS
);
2328 #endif /* ISC_PLATFORM_USETHREADS */
2330 manager
= isc_mem_get(mctx
, sizeof(*manager
));
2331 if (manager
== NULL
)
2332 return (ISC_R_NOMEMORY
);
2334 manager
->magic
= SOCKET_MANAGER_MAGIC
;
2335 manager
->mctx
= NULL
;
2336 memset(manager
->fds
, 0, sizeof(manager
->fds
));
2337 ISC_LIST_INIT(manager
->socklist
);
2338 if (isc_mutex_init(&manager
->lock
) != ISC_R_SUCCESS
) {
2339 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2340 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2341 "isc_mutex_init() %s",
2342 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2343 ISC_MSG_FAILED
, "failed"));
2344 return (ISC_R_UNEXPECTED
);
2346 #ifdef ISC_PLATFORM_USETHREADS
2347 if (isc_condition_init(&manager
->shutdown_ok
) != ISC_R_SUCCESS
) {
2348 DESTROYLOCK(&manager
->lock
);
2349 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2350 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2351 "isc_condition_init() %s",
2352 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2353 ISC_MSG_FAILED
, "failed"));
2354 return (ISC_R_UNEXPECTED
);
2358 * Create the special fds that will be used to wake up the
2359 * select/poll loop when something internal needs to be done.
2361 if (pipe(manager
->pipe_fds
) != 0) {
2362 DESTROYLOCK(&manager
->lock
);
2363 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2364 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2365 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2367 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2368 ISC_MSG_FAILED
, "failed"),
2371 return (ISC_R_UNEXPECTED
);
2374 RUNTIME_CHECK(make_nonblock(manager
->pipe_fds
[0]) == ISC_R_SUCCESS
);
2376 RUNTIME_CHECK(make_nonblock(manager
->pipe_fds
[1]) == ISC_R_SUCCESS
);
2378 #else /* ISC_PLATFORM_USETHREADS */
2380 #endif /* ISC_PLATFORM_USETHREADS */
2383 * Set up initial state for the select loop
2385 FD_ZERO(&manager
->read_fds
);
2386 FD_ZERO(&manager
->write_fds
);
2387 #ifdef ISC_PLATFORM_USETHREADS
2388 FD_SET(manager
->pipe_fds
[0], &manager
->read_fds
);
2389 manager
->maxfd
= manager
->pipe_fds
[0];
2390 #else /* ISC_PLATFORM_USETHREADS */
2392 #endif /* ISC_PLATFORM_USETHREADS */
2393 memset(manager
->fdstate
, 0, sizeof(manager
->fdstate
));
2395 #ifdef ISC_PLATFORM_USETHREADS
2397 * Start up the select/poll thread.
2399 if (isc_thread_create(watcher
, manager
, &manager
->watcher
) !=
2401 (void)close(manager
->pipe_fds
[0]);
2402 (void)close(manager
->pipe_fds
[1]);
2403 DESTROYLOCK(&manager
->lock
);
2404 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2405 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2406 "isc_thread_create() %s",
2407 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2408 ISC_MSG_FAILED
, "failed"));
2409 return (ISC_R_UNEXPECTED
);
2411 #endif /* ISC_PLATFORM_USETHREADS */
2412 isc_mem_attach(mctx
, &manager
->mctx
);
2414 #ifndef ISC_PLATFORM_USETHREADS
2415 socketmgr
= manager
;
2416 #endif /* ISC_PLATFORM_USETHREADS */
2417 *managerp
= manager
;
2419 return (ISC_R_SUCCESS
);
2423 isc_socketmgr_destroy(isc_socketmgr_t
**managerp
) {
2424 isc_socketmgr_t
*manager
;
2429 * Destroy a socket manager.
2432 REQUIRE(managerp
!= NULL
);
2433 manager
= *managerp
;
2434 REQUIRE(VALID_MANAGER(manager
));
2436 #ifndef ISC_PLATFORM_USETHREADS
2437 if (manager
->refs
> 1) {
2442 #endif /* ISC_PLATFORM_USETHREADS */
2444 LOCK(&manager
->lock
);
2446 #ifdef ISC_PLATFORM_USETHREADS
2448 * Wait for all sockets to be destroyed.
2450 while (!ISC_LIST_EMPTY(manager
->socklist
)) {
2451 manager_log(manager
, CREATION
,
2452 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2453 ISC_MSG_SOCKETSREMAIN
,
2455 WAIT(&manager
->shutdown_ok
, &manager
->lock
);
2457 #else /* ISC_PLATFORM_USETHREADS */
2459 * Hope all sockets have been destroyed.
2461 if (!ISC_LIST_EMPTY(manager
->socklist
)) {
2462 manager_log(manager
, CREATION
,
2463 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2464 ISC_MSG_SOCKETSREMAIN
,
2468 #endif /* ISC_PLATFORM_USETHREADS */
2470 UNLOCK(&manager
->lock
);
2473 * Here, poke our select/poll thread. Do this by closing the write
2474 * half of the pipe, which will send EOF to the read half.
2475 * This is currently a no-op in the non-threaded case.
2477 select_poke(manager
, 0, SELECT_POKE_SHUTDOWN
);
2479 #ifdef ISC_PLATFORM_USETHREADS
2481 * Wait for thread to exit.
2483 if (isc_thread_join(manager
->watcher
, NULL
) != ISC_R_SUCCESS
)
2484 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2485 "isc_thread_join() %s",
2486 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2487 ISC_MSG_FAILED
, "failed"));
2488 #endif /* ISC_PLATFORM_USETHREADS */
2493 #ifdef ISC_PLATFORM_USETHREADS
2494 (void)close(manager
->pipe_fds
[0]);
2495 (void)close(manager
->pipe_fds
[1]);
2496 (void)isc_condition_destroy(&manager
->shutdown_ok
);
2497 #endif /* ISC_PLATFORM_USETHREADS */
2499 for (i
= 0; i
< (int)FD_SETSIZE
; i
++)
2500 if (manager
->fdstate
[i
] == CLOSE_PENDING
)
2503 DESTROYLOCK(&manager
->lock
);
2505 mctx
= manager
->mctx
;
2506 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2508 isc_mem_detach(&mctx
);
2514 socket_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2518 isc_boolean_t have_lock
= ISC_FALSE
;
2519 isc_task_t
*ntask
= NULL
;
2520 isc_result_t result
= ISC_R_SUCCESS
;
2522 dev
->ev_sender
= task
;
2524 if (sock
->type
== isc_sockettype_udp
) {
2525 io_state
= doio_recv(sock
, dev
);
2528 have_lock
= ISC_TRUE
;
2530 if (ISC_LIST_EMPTY(sock
->recv_list
))
2531 io_state
= doio_recv(sock
, dev
);
2533 io_state
= DOIO_SOFT
;
2539 * We couldn't read all or part of the request right now, so
2542 * Attach to socket and to task
2544 isc_task_attach(task
, &ntask
);
2545 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2549 have_lock
= ISC_TRUE
;
2553 * Enqueue the request. If the socket was previously not being
2554 * watched, poke the watcher to start paying attention to it.
2556 if (ISC_LIST_EMPTY(sock
->recv_list
))
2557 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_READ
);
2558 ISC_LIST_ENQUEUE(sock
->recv_list
, dev
, ev_link
);
2560 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
2561 "socket_recv: event %p -> task %p",
2564 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2565 result
= ISC_R_INPROGRESS
;
2569 dev
->result
= ISC_R_EOF
;
2574 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) == 0)
2575 send_recvdone_event(sock
, &dev
);
2580 UNLOCK(&sock
->lock
);
2586 isc_socket_recvv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2587 unsigned int minimum
, isc_task_t
*task
,
2588 isc_taskaction_t action
, const void *arg
)
2590 isc_socketevent_t
*dev
;
2591 isc_socketmgr_t
*manager
;
2592 unsigned int iocount
;
2593 isc_buffer_t
*buffer
;
2595 REQUIRE(VALID_SOCKET(sock
));
2596 REQUIRE(buflist
!= NULL
);
2597 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2598 REQUIRE(task
!= NULL
);
2599 REQUIRE(action
!= NULL
);
2601 manager
= sock
->manager
;
2602 REQUIRE(VALID_MANAGER(manager
));
2604 iocount
= isc_bufferlist_availablecount(buflist
);
2605 REQUIRE(iocount
> 0);
2607 INSIST(sock
->bound
);
2609 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2611 return (ISC_R_NOMEMORY
);
2615 * UDP sockets are always partial read
2617 if (sock
->type
== isc_sockettype_udp
)
2621 dev
->minimum
= iocount
;
2623 dev
->minimum
= minimum
;
2627 * Move each buffer from the passed in list to our internal one.
2629 buffer
= ISC_LIST_HEAD(*buflist
);
2630 while (buffer
!= NULL
) {
2631 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2632 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2633 buffer
= ISC_LIST_HEAD(*buflist
);
2636 return (socket_recv(sock
, dev
, task
, 0));
2640 isc_socket_recv(isc_socket_t
*sock
, isc_region_t
*region
, unsigned int minimum
,
2641 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2643 isc_socketevent_t
*dev
;
2644 isc_socketmgr_t
*manager
;
2646 REQUIRE(VALID_SOCKET(sock
));
2647 REQUIRE(action
!= NULL
);
2649 manager
= sock
->manager
;
2650 REQUIRE(VALID_MANAGER(manager
));
2652 INSIST(sock
->bound
);
2654 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2656 return (ISC_R_NOMEMORY
);
2658 return (isc_socket_recv2(sock
, region
, minimum
, task
, dev
, 0));
2662 isc_socket_recv2(isc_socket_t
*sock
, isc_region_t
*region
,
2663 unsigned int minimum
, isc_task_t
*task
,
2664 isc_socketevent_t
*event
, unsigned int flags
)
2666 event
->ev_sender
= sock
;
2667 event
->result
= ISC_R_UNEXPECTED
;
2668 ISC_LIST_INIT(event
->bufferlist
);
2669 event
->region
= *region
;
2672 event
->attributes
= 0;
2675 * UDP sockets are always partial read.
2677 if (sock
->type
== isc_sockettype_udp
)
2681 event
->minimum
= region
->length
;
2683 event
->minimum
= minimum
;
2686 return (socket_recv(sock
, event
, task
, flags
));
2690 socket_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2691 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2695 isc_boolean_t have_lock
= ISC_FALSE
;
2696 isc_task_t
*ntask
= NULL
;
2697 isc_result_t result
= ISC_R_SUCCESS
;
2699 dev
->ev_sender
= task
;
2701 set_dev_address(address
, sock
, dev
);
2702 if (pktinfo
!= NULL
) {
2703 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
2704 dev
->pktinfo
= *pktinfo
;
2706 if (!isc_sockaddr_issitelocal(&dev
->address
) &&
2707 !isc_sockaddr_islinklocal(&dev
->address
)) {
2708 socket_log(sock
, NULL
, TRACE
, isc_msgcat
,
2709 ISC_MSGSET_SOCKET
, ISC_MSG_PKTINFOPROVIDED
,
2710 "pktinfo structure provided, ifindex %u "
2711 "(set to 0)", pktinfo
->ipi6_ifindex
);
2714 * Set the pktinfo index to 0 here, to let the
2715 * kernel decide what interface it should send on.
2717 dev
->pktinfo
.ipi6_ifindex
= 0;
2721 if (sock
->type
== isc_sockettype_udp
)
2722 io_state
= doio_send(sock
, dev
);
2725 have_lock
= ISC_TRUE
;
2727 if (ISC_LIST_EMPTY(sock
->send_list
))
2728 io_state
= doio_send(sock
, dev
);
2730 io_state
= DOIO_SOFT
;
2736 * We couldn't send all or part of the request right now, so
2737 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2739 if ((flags
& ISC_SOCKFLAG_NORETRY
) == 0) {
2740 isc_task_attach(task
, &ntask
);
2741 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2745 have_lock
= ISC_TRUE
;
2749 * Enqueue the request. If the socket was previously
2750 * not being watched, poke the watcher to start
2751 * paying attention to it.
2753 if (ISC_LIST_EMPTY(sock
->send_list
))
2754 select_poke(sock
->manager
, sock
->fd
,
2756 ISC_LIST_ENQUEUE(sock
->send_list
, dev
, ev_link
);
2758 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
2759 "socket_send: event %p -> task %p",
2762 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2763 result
= ISC_R_INPROGRESS
;
2769 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) == 0)
2770 send_senddone_event(sock
, &dev
);
2775 UNLOCK(&sock
->lock
);
2781 isc_socket_send(isc_socket_t
*sock
, isc_region_t
*region
,
2782 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2785 * REQUIRE() checking is performed in isc_socket_sendto().
2787 return (isc_socket_sendto(sock
, region
, task
, action
, arg
, NULL
,
2792 isc_socket_sendto(isc_socket_t
*sock
, isc_region_t
*region
,
2793 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2794 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2796 isc_socketevent_t
*dev
;
2797 isc_socketmgr_t
*manager
;
2799 REQUIRE(VALID_SOCKET(sock
));
2800 REQUIRE(region
!= NULL
);
2801 REQUIRE(task
!= NULL
);
2802 REQUIRE(action
!= NULL
);
2804 manager
= sock
->manager
;
2805 REQUIRE(VALID_MANAGER(manager
));
2807 INSIST(sock
->bound
);
2809 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2811 return (ISC_R_NOMEMORY
);
2814 dev
->region
= *region
;
2816 return (socket_send(sock
, dev
, task
, address
, pktinfo
, 0));
2820 isc_socket_sendv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2821 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2823 return (isc_socket_sendtov(sock
, buflist
, task
, action
, arg
, NULL
,
2828 isc_socket_sendtov(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2829 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2830 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2832 isc_socketevent_t
*dev
;
2833 isc_socketmgr_t
*manager
;
2834 unsigned int iocount
;
2835 isc_buffer_t
*buffer
;
2837 REQUIRE(VALID_SOCKET(sock
));
2838 REQUIRE(buflist
!= NULL
);
2839 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2840 REQUIRE(task
!= NULL
);
2841 REQUIRE(action
!= NULL
);
2843 manager
= sock
->manager
;
2844 REQUIRE(VALID_MANAGER(manager
));
2846 iocount
= isc_bufferlist_usedcount(buflist
);
2847 REQUIRE(iocount
> 0);
2849 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2851 return (ISC_R_NOMEMORY
);
2855 * Move each buffer from the passed in list to our internal one.
2857 buffer
= ISC_LIST_HEAD(*buflist
);
2858 while (buffer
!= NULL
) {
2859 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2860 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2861 buffer
= ISC_LIST_HEAD(*buflist
);
2864 return (socket_send(sock
, dev
, task
, address
, pktinfo
, 0));
2868 isc_socket_sendto2(isc_socket_t
*sock
, isc_region_t
*region
,
2870 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2871 isc_socketevent_t
*event
, unsigned int flags
)
2873 REQUIRE((flags
& ~(ISC_SOCKFLAG_IMMEDIATE
|ISC_SOCKFLAG_NORETRY
)) == 0);
2874 if ((flags
& ISC_SOCKFLAG_NORETRY
) != 0)
2875 REQUIRE(sock
->type
== isc_sockettype_udp
);
2876 event
->ev_sender
= sock
;
2877 event
->result
= ISC_R_UNEXPECTED
;
2878 ISC_LIST_INIT(event
->bufferlist
);
2879 event
->region
= *region
;
2882 event
->attributes
= 0;
2884 return (socket_send(sock
, event
, task
, address
, pktinfo
, flags
));
2888 isc_socket_bind(isc_socket_t
*sock
, isc_sockaddr_t
*sockaddr
) {
2889 char strbuf
[ISC_STRERRORSIZE
];
2894 INSIST(!sock
->bound
);
2896 if (sock
->pf
!= sockaddr
->type
.sa
.sa_family
) {
2897 UNLOCK(&sock
->lock
);
2898 return (ISC_R_FAMILYMISMATCH
);
2901 * Only set SO_REUSEADDR when we want a specific port.
2903 if (isc_sockaddr_getport(sockaddr
) != (in_port_t
)0 &&
2904 setsockopt(sock
->fd
, SOL_SOCKET
, SO_REUSEADDR
, (void *)&on
,
2906 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2907 "setsockopt(%d) %s", sock
->fd
,
2908 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2909 ISC_MSG_FAILED
, "failed"));
2912 if (bind(sock
->fd
, &sockaddr
->type
.sa
, sockaddr
->length
) < 0) {
2913 UNLOCK(&sock
->lock
);
2916 return (ISC_R_NOPERM
);
2918 return (ISC_R_ADDRNOTAVAIL
);
2920 return (ISC_R_ADDRINUSE
);
2922 return (ISC_R_BOUND
);
2924 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2925 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "bind: %s",
2927 return (ISC_R_UNEXPECTED
);
2931 socket_log(sock
, sockaddr
, TRACE
,
2932 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "bound");
2935 UNLOCK(&sock
->lock
);
2936 return (ISC_R_SUCCESS
);
2940 isc_socket_filter(isc_socket_t
*sock
, const char *filter
) {
2941 #ifdef SO_ACCEPTFILTER
2942 char strbuf
[ISC_STRERRORSIZE
];
2943 struct accept_filter_arg afa
;
2949 REQUIRE(VALID_SOCKET(sock
));
2951 #ifdef SO_ACCEPTFILTER
2952 bzero(&afa
, sizeof(afa
));
2953 strncpy(afa
.af_name
, filter
, sizeof(afa
.af_name
));
2954 if (setsockopt(sock
->fd
, SOL_SOCKET
, SO_ACCEPTFILTER
,
2955 &afa
, sizeof(afa
)) == -1) {
2956 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2957 socket_log(sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
2958 ISC_MSG_FILTER
, "setsockopt(SO_ACCEPTFILTER): %s",
2960 return (ISC_R_FAILURE
);
2962 return (ISC_R_SUCCESS
);
2964 return (ISC_R_NOTIMPLEMENTED
);
2969 * Set up to listen on a given socket. We do this by creating an internal
2970 * event that will be dispatched when the socket has read activity. The
2971 * watcher will send the internal event to the task when there is a new
2974 * Unlike in read, we don't preallocate a done event here. Every time there
2975 * is a new connection we'll have to allocate a new one anyway, so we might
2976 * as well keep things simple rather than having to track them.
2979 isc_socket_listen(isc_socket_t
*sock
, unsigned int backlog
) {
2980 char strbuf
[ISC_STRERRORSIZE
];
2982 REQUIRE(VALID_SOCKET(sock
));
2986 REQUIRE(!sock
->listener
);
2987 REQUIRE(sock
->bound
);
2988 REQUIRE(sock
->type
== isc_sockettype_tcp
);
2991 backlog
= SOMAXCONN
;
2993 if (listen(sock
->fd
, (int)backlog
) < 0) {
2994 UNLOCK(&sock
->lock
);
2995 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2997 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "listen: %s", strbuf
);
2999 return (ISC_R_UNEXPECTED
);
3004 UNLOCK(&sock
->lock
);
3005 return (ISC_R_SUCCESS
);
3009 * This should try to do agressive accept() XXXMLG
3012 isc_socket_accept(isc_socket_t
*sock
,
3013 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3015 isc_socket_newconnev_t
*dev
;
3016 isc_socketmgr_t
*manager
;
3017 isc_task_t
*ntask
= NULL
;
3018 isc_socket_t
*nsock
;
3020 isc_boolean_t do_poke
= ISC_FALSE
;
3022 REQUIRE(VALID_SOCKET(sock
));
3023 manager
= sock
->manager
;
3024 REQUIRE(VALID_MANAGER(manager
));
3028 REQUIRE(sock
->listener
);
3031 * Sender field is overloaded here with the task we will be sending
3032 * this event to. Just before the actual event is delivered the
3033 * actual ev_sender will be touched up to be the socket.
3035 dev
= (isc_socket_newconnev_t
*)
3036 isc_event_allocate(manager
->mctx
, task
, ISC_SOCKEVENT_NEWCONN
,
3037 action
, arg
, sizeof(*dev
));
3039 UNLOCK(&sock
->lock
);
3040 return (ISC_R_NOMEMORY
);
3042 ISC_LINK_INIT(dev
, ev_link
);
3044 ret
= allocate_socket(manager
, sock
->type
, &nsock
);
3045 if (ret
!= ISC_R_SUCCESS
) {
3046 isc_event_free(ISC_EVENT_PTR(&dev
));
3047 UNLOCK(&sock
->lock
);
3052 * Attach to socket and to task.
3054 isc_task_attach(task
, &ntask
);
3055 nsock
->references
++;
3057 dev
->ev_sender
= ntask
;
3058 dev
->newsocket
= nsock
;
3061 * Poke watcher here. We still have the socket locked, so there
3062 * is no race condition. We will keep the lock for such a short
3063 * bit of time waking it up now or later won't matter all that much.
3065 if (ISC_LIST_EMPTY(sock
->accept_list
))
3068 ISC_LIST_ENQUEUE(sock
->accept_list
, dev
, ev_link
);
3071 select_poke(manager
, sock
->fd
, SELECT_POKE_ACCEPT
);
3073 UNLOCK(&sock
->lock
);
3074 return (ISC_R_SUCCESS
);
3078 isc_socket_connect(isc_socket_t
*sock
, isc_sockaddr_t
*addr
,
3079 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3081 isc_socket_connev_t
*dev
;
3082 isc_task_t
*ntask
= NULL
;
3083 isc_socketmgr_t
*manager
;
3085 char strbuf
[ISC_STRERRORSIZE
];
3087 REQUIRE(VALID_SOCKET(sock
));
3088 REQUIRE(addr
!= NULL
);
3089 REQUIRE(task
!= NULL
);
3090 REQUIRE(action
!= NULL
);
3092 manager
= sock
->manager
;
3093 REQUIRE(VALID_MANAGER(manager
));
3094 REQUIRE(addr
!= NULL
);
3096 if (isc_sockaddr_ismulticast(addr
))
3097 return (ISC_R_MULTICAST
);
3101 REQUIRE(!sock
->connecting
);
3103 dev
= (isc_socket_connev_t
*)isc_event_allocate(manager
->mctx
, sock
,
3104 ISC_SOCKEVENT_CONNECT
,
3108 UNLOCK(&sock
->lock
);
3109 return (ISC_R_NOMEMORY
);
3111 ISC_LINK_INIT(dev
, ev_link
);
3114 * Try to do the connect right away, as there can be only one
3115 * outstanding, and it might happen to complete.
3117 sock
->address
= *addr
;
3118 cc
= connect(sock
->fd
, &addr
->type
.sa
, addr
->length
);
3120 if (SOFT_ERROR(errno
) || errno
== EINPROGRESS
)
3124 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
3125 ERROR_MATCH(EACCES
, ISC_R_NOPERM
);
3126 ERROR_MATCH(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
3127 ERROR_MATCH(EAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
3128 ERROR_MATCH(ECONNREFUSED
, ISC_R_CONNREFUSED
);
3129 ERROR_MATCH(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
3131 ERROR_MATCH(EHOSTDOWN
, ISC_R_HOSTUNREACH
);
3133 ERROR_MATCH(ENETUNREACH
, ISC_R_NETUNREACH
);
3134 ERROR_MATCH(ENOBUFS
, ISC_R_NORESOURCES
);
3135 ERROR_MATCH(EPERM
, ISC_R_HOSTUNREACH
);
3136 ERROR_MATCH(EPIPE
, ISC_R_NOTCONNECTED
);
3137 ERROR_MATCH(ECONNRESET
, ISC_R_CONNECTIONRESET
);
3141 sock
->connected
= 0;
3143 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
3144 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "%d/%s", errno
, strbuf
);
3146 UNLOCK(&sock
->lock
);
3147 isc_event_free(ISC_EVENT_PTR(&dev
));
3148 return (ISC_R_UNEXPECTED
);
3151 sock
->connected
= 0;
3152 isc_task_send(task
, ISC_EVENT_PTR(&dev
));
3154 UNLOCK(&sock
->lock
);
3155 return (ISC_R_SUCCESS
);
3159 * If connect completed, fire off the done event.
3162 sock
->connected
= 1;
3164 dev
->result
= ISC_R_SUCCESS
;
3165 isc_task_send(task
, ISC_EVENT_PTR(&dev
));
3167 UNLOCK(&sock
->lock
);
3168 return (ISC_R_SUCCESS
);
3176 isc_task_attach(task
, &ntask
);
3178 sock
->connecting
= 1;
3180 dev
->ev_sender
= ntask
;
3183 * Poke watcher here. We still have the socket locked, so there
3184 * is no race condition. We will keep the lock for such a short
3185 * bit of time waking it up now or later won't matter all that much.
3187 if (sock
->connect_ev
== NULL
)
3188 select_poke(manager
, sock
->fd
, SELECT_POKE_CONNECT
);
3190 sock
->connect_ev
= dev
;
3192 UNLOCK(&sock
->lock
);
3193 return (ISC_R_SUCCESS
);
3197 * Called when a socket with a pending connect() finishes.
3200 internal_connect(isc_task_t
*me
, isc_event_t
*ev
) {
3202 isc_socket_connev_t
*dev
;
3205 ISC_SOCKADDR_LEN_T optlen
;
3206 char strbuf
[ISC_STRERRORSIZE
];
3207 char peerbuf
[ISC_SOCKADDR_FORMATSIZE
];
3210 INSIST(ev
->ev_type
== ISC_SOCKEVENT_INTW
);
3212 sock
= ev
->ev_sender
;
3213 INSIST(VALID_SOCKET(sock
));
3218 * When the internal event was sent the reference count was bumped
3219 * to keep the socket around for us. Decrement the count here.
3221 INSIST(sock
->references
> 0);
3223 if (sock
->references
== 0) {
3224 UNLOCK(&sock
->lock
);
3230 * Has this event been canceled?
3232 dev
= sock
->connect_ev
;
3234 INSIST(!sock
->connecting
);
3235 UNLOCK(&sock
->lock
);
3239 INSIST(sock
->connecting
);
3240 sock
->connecting
= 0;
3243 * Get any possible error status here.
3245 optlen
= sizeof(cc
);
3246 if (getsockopt(sock
->fd
, SOL_SOCKET
, SO_ERROR
,
3247 (void *)&cc
, (void *)&optlen
) < 0)
3254 * If the error is EAGAIN, just re-select on this
3255 * fd and pretend nothing strange happened.
3257 if (SOFT_ERROR(errno
) || errno
== EINPROGRESS
) {
3258 sock
->connecting
= 1;
3259 select_poke(sock
->manager
, sock
->fd
,
3260 SELECT_POKE_CONNECT
);
3261 UNLOCK(&sock
->lock
);
3267 * Translate other errors into ISC_R_* flavors.
3270 #define ERROR_MATCH(a, b) case a: dev->result = b; break;
3271 ERROR_MATCH(EACCES
, ISC_R_NOPERM
);
3272 ERROR_MATCH(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
3273 ERROR_MATCH(EAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
3274 ERROR_MATCH(ECONNREFUSED
, ISC_R_CONNREFUSED
);
3275 ERROR_MATCH(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
3277 ERROR_MATCH(EHOSTDOWN
, ISC_R_HOSTUNREACH
);
3279 ERROR_MATCH(ENETUNREACH
, ISC_R_NETUNREACH
);
3280 ERROR_MATCH(ENOBUFS
, ISC_R_NORESOURCES
);
3281 ERROR_MATCH(EPERM
, ISC_R_HOSTUNREACH
);
3282 ERROR_MATCH(EPIPE
, ISC_R_NOTCONNECTED
);
3283 ERROR_MATCH(ETIMEDOUT
, ISC_R_TIMEDOUT
);
3284 ERROR_MATCH(ECONNRESET
, ISC_R_CONNECTIONRESET
);
3287 dev
->result
= ISC_R_UNEXPECTED
;
3288 isc_sockaddr_format(&sock
->address
, peerbuf
,
3290 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
3291 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3292 "internal_connect: connect(%s) %s",
3296 dev
->result
= ISC_R_SUCCESS
;
3297 sock
->connected
= 1;
3301 sock
->connect_ev
= NULL
;
3303 UNLOCK(&sock
->lock
);
3305 task
= dev
->ev_sender
;
3306 dev
->ev_sender
= sock
;
3307 isc_task_sendanddetach(&task
, ISC_EVENT_PTR(&dev
));
3311 isc_socket_getpeername(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3314 REQUIRE(VALID_SOCKET(sock
));
3315 REQUIRE(addressp
!= NULL
);
3319 if (sock
->connected
) {
3320 *addressp
= sock
->address
;
3321 ret
= ISC_R_SUCCESS
;
3323 ret
= ISC_R_NOTCONNECTED
;
3326 UNLOCK(&sock
->lock
);
3332 isc_socket_getsockname(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3333 ISC_SOCKADDR_LEN_T len
;
3335 char strbuf
[ISC_STRERRORSIZE
];
3337 REQUIRE(VALID_SOCKET(sock
));
3338 REQUIRE(addressp
!= NULL
);
3343 ret
= ISC_R_NOTBOUND
;
3347 ret
= ISC_R_SUCCESS
;
3349 len
= sizeof(addressp
->type
);
3350 if (getsockname(sock
->fd
, &addressp
->type
.sa
, (void *)&len
) < 0) {
3351 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
3352 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "getsockname: %s",
3354 ret
= ISC_R_UNEXPECTED
;
3357 addressp
->length
= (unsigned int)len
;
3360 UNLOCK(&sock
->lock
);
3366 * Run through the list of events on this socket, and cancel the ones
3367 * queued for task "task" of type "how". "how" is a bitmask.
3370 isc_socket_cancel(isc_socket_t
*sock
, isc_task_t
*task
, unsigned int how
) {
3372 REQUIRE(VALID_SOCKET(sock
));
3375 * Quick exit if there is nothing to do. Don't even bother locking
3384 * All of these do the same thing, more or less.
3386 * o If the internal event is marked as "posted" try to
3387 * remove it from the task's queue. If this fails, mark it
3388 * as canceled instead, and let the task clean it up later.
3389 * o For each I/O request for that task of that type, post
3390 * its done event with status of "ISC_R_CANCELED".
3391 * o Reset any state needed.
3393 if (((how
& ISC_SOCKCANCEL_RECV
) == ISC_SOCKCANCEL_RECV
)
3394 && !ISC_LIST_EMPTY(sock
->recv_list
)) {
3395 isc_socketevent_t
*dev
;
3396 isc_socketevent_t
*next
;
3397 isc_task_t
*current_task
;
3399 dev
= ISC_LIST_HEAD(sock
->recv_list
);
3401 while (dev
!= NULL
) {
3402 current_task
= dev
->ev_sender
;
3403 next
= ISC_LIST_NEXT(dev
, ev_link
);
3405 if ((task
== NULL
) || (task
== current_task
)) {
3406 dev
->result
= ISC_R_CANCELED
;
3407 send_recvdone_event(sock
, &dev
);
3413 if (((how
& ISC_SOCKCANCEL_SEND
) == ISC_SOCKCANCEL_SEND
)
3414 && !ISC_LIST_EMPTY(sock
->send_list
)) {
3415 isc_socketevent_t
*dev
;
3416 isc_socketevent_t
*next
;
3417 isc_task_t
*current_task
;
3419 dev
= ISC_LIST_HEAD(sock
->send_list
);
3421 while (dev
!= NULL
) {
3422 current_task
= dev
->ev_sender
;
3423 next
= ISC_LIST_NEXT(dev
, ev_link
);
3425 if ((task
== NULL
) || (task
== current_task
)) {
3426 dev
->result
= ISC_R_CANCELED
;
3427 send_senddone_event(sock
, &dev
);
3433 if (((how
& ISC_SOCKCANCEL_ACCEPT
) == ISC_SOCKCANCEL_ACCEPT
)
3434 && !ISC_LIST_EMPTY(sock
->accept_list
)) {
3435 isc_socket_newconnev_t
*dev
;
3436 isc_socket_newconnev_t
*next
;
3437 isc_task_t
*current_task
;
3439 dev
= ISC_LIST_HEAD(sock
->accept_list
);
3440 while (dev
!= NULL
) {
3441 current_task
= dev
->ev_sender
;
3442 next
= ISC_LIST_NEXT(dev
, ev_link
);
3444 if ((task
== NULL
) || (task
== current_task
)) {
3446 ISC_LIST_UNLINK(sock
->accept_list
, dev
,
3449 dev
->newsocket
->references
--;
3450 free_socket(&dev
->newsocket
);
3452 dev
->result
= ISC_R_CANCELED
;
3453 dev
->ev_sender
= sock
;
3454 isc_task_sendanddetach(¤t_task
,
3455 ISC_EVENT_PTR(&dev
));
3463 * Connecting is not a list.
3465 if (((how
& ISC_SOCKCANCEL_CONNECT
) == ISC_SOCKCANCEL_CONNECT
)
3466 && sock
->connect_ev
!= NULL
) {
3467 isc_socket_connev_t
*dev
;
3468 isc_task_t
*current_task
;
3470 INSIST(sock
->connecting
);
3471 sock
->connecting
= 0;
3473 dev
= sock
->connect_ev
;
3474 current_task
= dev
->ev_sender
;
3476 if ((task
== NULL
) || (task
== current_task
)) {
3477 sock
->connect_ev
= NULL
;
3479 dev
->result
= ISC_R_CANCELED
;
3480 dev
->ev_sender
= sock
;
3481 isc_task_sendanddetach(¤t_task
,
3482 ISC_EVENT_PTR(&dev
));
3486 UNLOCK(&sock
->lock
);
3490 isc_socket_gettype(isc_socket_t
*sock
) {
3491 REQUIRE(VALID_SOCKET(sock
));
3493 return (sock
->type
);
3497 isc_socket_isbound(isc_socket_t
*sock
) {
3501 val
= ((sock
->bound
) ? ISC_TRUE
: ISC_FALSE
);
3502 UNLOCK(&sock
->lock
);
3508 isc_socket_ipv6only(isc_socket_t
*sock
, isc_boolean_t yes
) {
3509 #if defined(IPV6_V6ONLY)
3510 int onoff
= yes
? 1 : 0;
3516 REQUIRE(VALID_SOCKET(sock
));
3519 if (sock
->pf
== AF_INET6
) {
3520 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_V6ONLY
,
3521 (void *)&onoff
, sizeof(onoff
));
3526 #ifndef ISC_PLATFORM_USETHREADS
3528 isc__socketmgr_getfdsets(fd_set
*readset
, fd_set
*writeset
, int *maxfd
) {
3529 if (socketmgr
== NULL
)
3532 *readset
= socketmgr
->read_fds
;
3533 *writeset
= socketmgr
->write_fds
;
3534 *maxfd
= socketmgr
->maxfd
+ 1;
3539 isc__socketmgr_dispatch(fd_set
*readset
, fd_set
*writeset
, int maxfd
) {
3540 isc_socketmgr_t
*manager
= socketmgr
;
3542 if (manager
== NULL
)
3543 return (ISC_R_NOTFOUND
);
3545 process_fds(manager
, maxfd
, readset
, writeset
);
3546 return (ISC_R_SUCCESS
);
3548 #endif /* ISC_PLATFORM_USETHREADS */