2 * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: socket.c,v 1.207.2.29 2004/07/01 04:51:44 marka Exp $ */
22 #include <sys/param.h>
23 #include <sys/types.h>
24 #include <sys/socket.h>
35 #include <isc/buffer.h>
36 #include <isc/bufferlist.h>
37 #include <isc/condition.h>
38 #include <isc/formatcheck.h>
43 #include <isc/mutex.h>
45 #include <isc/platform.h>
46 #include <isc/print.h>
47 #include <isc/region.h>
48 #include <isc/socket.h>
49 #include <isc/strerror.h>
51 #include <isc/thread.h>
54 #include "errno2result.h"
56 #ifndef ISC_PLATFORM_USETHREADS
58 #endif /* ISC_PLATFORM_USETHREADS */
61 * Some systems define the socket length argument as an int, some as size_t,
62 * some as socklen_t. This is here so it can be easily changed if needed.
64 #ifndef ISC_SOCKADDR_LEN_T
65 #ifdef _BSD_SOCKLEN_T_
66 #define ISC_SOCKADDR_LEN_T _BSD_SOCKLEN_T_
68 #define ISC_SOCKADDR_LEN_T unsigned int
73 * Define what the possible "soft" errors can be. These are non-fatal returns
74 * of various network related functions, like recv() and so on.
76 * For some reason, BSDI (and perhaps others) will sometimes return <0
77 * from recv() but will have errno==0. This is broken, but we have to
78 * work around it here.
80 #define SOFT_ERROR(e) ((e) == EAGAIN || \
81 (e) == EWOULDBLOCK || \
85 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
88 * DLVL(90) -- Function entry/exit and other tracing.
89 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
90 * DLVL(60) -- Socket data send/receive
91 * DLVL(50) -- Event tracing, including receiving/sending completion events.
92 * DLVL(20) -- Socket creation/destruction.
94 #define TRACE_LEVEL 90
95 #define CORRECTNESS_LEVEL 70
96 #define IOEVENT_LEVEL 60
97 #define EVENT_LEVEL 50
98 #define CREATION_LEVEL 20
100 #define TRACE DLVL(TRACE_LEVEL)
101 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
102 #define IOEVENT DLVL(IOEVENT_LEVEL)
103 #define EVENT DLVL(EVENT_LEVEL)
104 #define CREATION DLVL(CREATION_LEVEL)
106 typedef isc_event_t intev_t
;
108 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
109 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
112 * IPv6 control information. If the socket is an IPv6 socket we want
113 * to collect the destination address and interface so the client can
114 * set them on outgoing packets.
116 #ifdef ISC_PLATFORM_HAVEIPV6
123 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have
124 * a setsockopt() like interface to request timestamps, and if the OS
125 * doesn't do it for us, call gettimeofday() on every UDP receive?
134 * The number of times a send operation is repeated if the result is EINTR.
141 isc_socketmgr_t
*manager
;
143 isc_sockettype_t type
;
145 /* Locked by socket lock. */
146 ISC_LINK(isc_socket_t
) link
;
147 unsigned int references
;
151 ISC_LIST(isc_socketevent_t
) send_list
;
152 ISC_LIST(isc_socketevent_t
) recv_list
;
153 ISC_LIST(isc_socket_newconnev_t
) accept_list
;
154 isc_socket_connev_t
*connect_ev
;
157 * Internal events. Posted when a descriptor is readable or
158 * writable. These are statically allocated and never freed.
159 * They will be set to non-purgable before use.
164 isc_sockaddr_t address
; /* remote address */
166 unsigned int pending_recv
: 1,
169 listener
: 1, /* listener socket */
171 connecting
: 1, /* connect pending */
172 bound
: 1; /* bound to local addr */
174 #ifdef ISC_NET_RECVOVERFLOW
175 unsigned char overflow
; /* used for MSG_TRUNC fake */
179 ISC_SOCKADDR_LEN_T recvcmsgbuflen
;
181 ISC_SOCKADDR_LEN_T sendcmsgbuflen
;
184 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
185 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
187 struct isc_socketmgr
{
192 /* Locked by manager lock. */
193 ISC_LIST(isc_socket_t
) socklist
;
196 isc_socket_t
*fds
[FD_SETSIZE
];
197 int fdstate
[FD_SETSIZE
];
199 #ifdef ISC_PLATFORM_USETHREADS
200 isc_thread_t watcher
;
201 isc_condition_t shutdown_ok
;
203 #else /* ISC_PLATFORM_USETHREADS */
205 #endif /* ISC_PLATFORM_USETHREADS */
208 #ifndef ISC_PLATFORM_USETHREADS
209 static isc_socketmgr_t
*socketmgr
= NULL
;
210 #endif /* ISC_PLATFORM_USETHREADS */
212 #define CLOSED 0 /* this one must be zero */
214 #define CLOSE_PENDING 2
217 * send() and recv() iovec counts
219 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
220 #ifdef ISC_NET_RECVOVERFLOW
221 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1)
223 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
226 static void send_recvdone_event(isc_socket_t
*, isc_socketevent_t
**);
227 static void send_senddone_event(isc_socket_t
*, isc_socketevent_t
**);
228 static void free_socket(isc_socket_t
**);
229 static isc_result_t
allocate_socket(isc_socketmgr_t
*, isc_sockettype_t
,
231 static void destroy(isc_socket_t
**);
232 static void internal_accept(isc_task_t
*, isc_event_t
*);
233 static void internal_connect(isc_task_t
*, isc_event_t
*);
234 static void internal_recv(isc_task_t
*, isc_event_t
*);
235 static void internal_send(isc_task_t
*, isc_event_t
*);
236 static void process_cmsg(isc_socket_t
*, struct msghdr
*, isc_socketevent_t
*);
237 static void build_msghdr_send(isc_socket_t
*, isc_socketevent_t
*,
238 struct msghdr
*, struct iovec
*, size_t *);
239 static void build_msghdr_recv(isc_socket_t
*, isc_socketevent_t
*,
240 struct msghdr
*, struct iovec
*, size_t *);
242 #define SELECT_POKE_SHUTDOWN (-1)
243 #define SELECT_POKE_NOTHING (-2)
244 #define SELECT_POKE_READ (-3)
245 #define SELECT_POKE_ACCEPT (-3) /* Same as _READ */
246 #define SELECT_POKE_WRITE (-4)
247 #define SELECT_POKE_CONNECT (-4) /* Same as _WRITE */
248 #define SELECT_POKE_CLOSE (-5)
250 #define SOCK_DEAD(s) ((s)->references == 0)
253 manager_log(isc_socketmgr_t
*sockmgr
,
254 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
255 const char *fmt
, ...) ISC_FORMAT_PRINTF(5, 6);
257 manager_log(isc_socketmgr_t
*sockmgr
,
258 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
259 const char *fmt
, ...)
264 if (! isc_log_wouldlog(isc_lctx
, level
))
268 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
271 isc_log_write(isc_lctx
, category
, module
, level
,
272 "sockmgr %p: %s", sockmgr
, msgbuf
);
276 socket_log(isc_socket_t
*sock
, isc_sockaddr_t
*address
,
277 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
278 isc_msgcat_t
*msgcat
, int msgset
, int message
,
279 const char *fmt
, ...) ISC_FORMAT_PRINTF(9, 10);
281 socket_log(isc_socket_t
*sock
, isc_sockaddr_t
*address
,
282 isc_logcategory_t
*category
, isc_logmodule_t
*module
, int level
,
283 isc_msgcat_t
*msgcat
, int msgset
, int message
,
284 const char *fmt
, ...)
290 if (! isc_log_wouldlog(isc_lctx
, level
))
294 vsnprintf(msgbuf
, sizeof(msgbuf
), fmt
, ap
);
297 if (address
== NULL
) {
298 isc_log_iwrite(isc_lctx
, category
, module
, level
,
299 msgcat
, msgset
, message
,
300 "socket %p: %s", sock
, msgbuf
);
302 isc_sockaddr_format(address
, peerbuf
, sizeof(peerbuf
));
303 isc_log_iwrite(isc_lctx
, category
, module
, level
,
304 msgcat
, msgset
, message
,
305 "socket %p %s: %s", sock
, peerbuf
, msgbuf
);
310 wakeup_socket(isc_socketmgr_t
*manager
, int fd
, int msg
) {
314 * This is a wakeup on a socket. If the socket is not in the
315 * process of being closed, start watching it for either reads
319 INSIST(fd
>= 0 && fd
< (int)FD_SETSIZE
);
321 if (manager
->fdstate
[fd
] == CLOSE_PENDING
) {
322 manager
->fdstate
[fd
] = CLOSED
;
323 FD_CLR(fd
, &manager
->read_fds
);
324 FD_CLR(fd
, &manager
->write_fds
);
328 if (manager
->fdstate
[fd
] != MANAGED
)
331 sock
= manager
->fds
[fd
];
336 if (msg
== SELECT_POKE_READ
)
337 FD_SET(sock
->fd
, &manager
->read_fds
);
338 if (msg
== SELECT_POKE_WRITE
)
339 FD_SET(sock
->fd
, &manager
->write_fds
);
342 #ifdef ISC_PLATFORM_USETHREADS
344 * Poke the select loop when there is something for us to do.
345 * The write is required (by POSIX) to complete. That is, we
346 * will not get partial writes.
349 select_poke(isc_socketmgr_t
*mgr
, int fd
, int msg
) {
352 char strbuf
[ISC_STRERRORSIZE
];
358 cc
= write(mgr
->pipe_fds
[1], buf
, sizeof(buf
));
361 * Treat ENOSR as EAGAIN but loop slowly as it is
362 * unlikely to clear fast.
364 if (cc
< 0 && errno
== ENOSR
) {
369 } while (cc
< 0 && SOFT_ERROR(errno
));
372 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
373 FATAL_ERROR(__FILE__
, __LINE__
,
374 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
377 "during watcher poke: %s"),
381 INSIST(cc
== sizeof(buf
));
385 * Read a message on the internal fd.
388 select_readmsg(isc_socketmgr_t
*mgr
, int *fd
, int *msg
) {
391 char strbuf
[ISC_STRERRORSIZE
];
393 cc
= read(mgr
->pipe_fds
[0], buf
, sizeof(buf
));
395 *msg
= SELECT_POKE_NOTHING
;
396 if (SOFT_ERROR(errno
))
399 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
400 FATAL_ERROR(__FILE__
, __LINE__
,
401 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
404 "during watcher poke: %s"),
409 INSIST(cc
== sizeof(buf
));
414 #else /* ISC_PLATFORM_USETHREADS */
416 * Update the state of the socketmgr when something changes.
419 select_poke(isc_socketmgr_t
*manager
, int fd
, int msg
) {
420 if (msg
== SELECT_POKE_SHUTDOWN
)
423 wakeup_socket(manager
, fd
, msg
);
426 #endif /* ISC_PLATFORM_USETHREADS */
429 * Make a fd non-blocking.
432 make_nonblock(int fd
) {
435 char strbuf
[ISC_STRERRORSIZE
];
437 flags
= fcntl(fd
, F_GETFL
, 0);
439 ret
= fcntl(fd
, F_SETFL
, flags
);
442 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
443 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
444 "fcntl(%d, F_SETFL, %d): %s",
447 return (ISC_R_UNEXPECTED
);
450 return (ISC_R_SUCCESS
);
455 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE.
456 * In order to ensure as much portability as possible, we provide wrapper
457 * functions of these macros.
458 * Note that cmsg_space() could run slow on OSes that do not have
461 static inline ISC_SOCKADDR_LEN_T
462 cmsg_len(ISC_SOCKADDR_LEN_T len
) {
464 return (CMSG_LEN(len
));
466 ISC_SOCKADDR_LEN_T hdrlen
;
468 hdrlen
= (ISC_SOCKADDR_LEN_T
)CMSG_DATA(NULL
); /* XXX */
469 return (hdrlen
+ len
);
473 static inline ISC_SOCKADDR_LEN_T
474 cmsg_space(ISC_SOCKADDR_LEN_T len
) {
476 return (CMSG_SPACE(len
));
479 struct cmsghdr
*cmsgp
;
481 * XXX: The buffer length is an ad-hoc value, but should be enough
482 * in a practical sense.
484 char dummybuf
[sizeof(struct cmsghdr
) + 1024];
486 memset(&msg
, 0, sizeof(msg
));
487 msg
.msg_control
= dummybuf
;
488 msg
.msg_controllen
= sizeof(dummybuf
);
490 cmsgp
= (struct cmsghdr
*)dummybuf
;
491 cmsgp
->cmsg_len
= cmsg_len(len
);
493 cmsgp
= CMSG_NXTHDR(&msg
, cmsgp
);
495 return ((char *)cmsgp
- (char *)msg
.msg_control
);
500 #endif /* USE_CMSG */
503 * Process control messages received on a socket.
506 process_cmsg(isc_socket_t
*sock
, struct msghdr
*msg
, isc_socketevent_t
*dev
) {
508 struct cmsghdr
*cmsgp
;
509 #ifdef ISC_PLATFORM_HAVEIPV6
510 struct in6_pktinfo
*pktinfop
;
513 struct timeval
*timevalp
;
518 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined.
519 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined.
520 * They are all here, outside of the CPP tests, because it is
521 * more consistent with the usual ISC coding style.
527 #ifdef ISC_NET_BSD44MSGHDR
530 if ((msg
->msg_flags
& MSG_TRUNC
) == MSG_TRUNC
)
531 dev
->attributes
|= ISC_SOCKEVENTATTR_TRUNC
;
535 if ((msg
->msg_flags
& MSG_CTRUNC
) == MSG_CTRUNC
)
536 dev
->attributes
|= ISC_SOCKEVENTATTR_CTRUNC
;
542 if (msg
->msg_controllen
== 0U || msg
->msg_control
== NULL
)
548 #ifdef ISC_PLATFORM_HAVEIPV6
552 cmsgp
= CMSG_FIRSTHDR(msg
);
553 while (cmsgp
!= NULL
) {
554 socket_log(sock
, NULL
, TRACE
,
555 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_PROCESSCMSG
,
556 "processing cmsg %p", cmsgp
);
558 #ifdef ISC_PLATFORM_HAVEIPV6
559 if (cmsgp
->cmsg_level
== IPPROTO_IPV6
560 && cmsgp
->cmsg_type
== IPV6_PKTINFO
) {
562 pktinfop
= (struct in6_pktinfo
*)CMSG_DATA(cmsgp
);
563 memcpy(&dev
->pktinfo
, pktinfop
,
564 sizeof(struct in6_pktinfo
));
565 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
566 socket_log(sock
, NULL
, TRACE
,
567 isc_msgcat
, ISC_MSGSET_SOCKET
,
569 "interface received on ifindex %u",
570 dev
->pktinfo
.ipi6_ifindex
);
571 if (IN6_IS_ADDR_MULTICAST(&pktinfop
->ipi6_addr
))
572 dev
->attributes
|= ISC_SOCKEVENTATTR_MULTICAST
;
578 if (cmsgp
->cmsg_level
== SOL_SOCKET
579 && cmsgp
->cmsg_type
== SCM_TIMESTAMP
) {
580 timevalp
= (struct timeval
*)CMSG_DATA(cmsgp
);
581 dev
->timestamp
.seconds
= timevalp
->tv_sec
;
582 dev
->timestamp
.nanoseconds
= timevalp
->tv_usec
* 1000;
583 dev
->attributes
|= ISC_SOCKEVENTATTR_TIMESTAMP
;
589 cmsgp
= CMSG_NXTHDR(msg
, cmsgp
);
591 #endif /* USE_CMSG */
593 #endif /* ISC_NET_BSD44MSGHDR */
597 * Construct an iov array and attach it to the msghdr passed in. This is
598 * the SEND constructor, which will use the used region of the buffer
599 * (if using a buffer list) or will use the internal region (if a single
600 * buffer I/O is requested).
602 * Nothing can be NULL, and the done event must list at least one buffer
603 * on the buffer linked list for this function to be meaningful.
605 * If write_countp != NULL, *write_countp will hold the number of bytes
606 * this transaction can send.
609 build_msghdr_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
610 struct msghdr
*msg
, struct iovec
*iov
, size_t *write_countp
)
612 unsigned int iovcount
;
613 isc_buffer_t
*buffer
;
618 memset(msg
, 0, sizeof(*msg
));
620 if (sock
->type
== isc_sockettype_udp
) {
621 msg
->msg_name
= (void *)&dev
->address
.type
.sa
;
622 msg
->msg_namelen
= dev
->address
.length
;
624 msg
->msg_name
= NULL
;
625 msg
->msg_namelen
= 0;
628 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
633 * Single buffer I/O? Skip what we've done so far in this region.
635 if (buffer
== NULL
) {
636 write_count
= dev
->region
.length
- dev
->n
;
637 iov
[0].iov_base
= (void *)(dev
->region
.base
+ dev
->n
);
638 iov
[0].iov_len
= write_count
;
646 * Skip the data in the buffer list that we have already written.
649 while (buffer
!= NULL
) {
650 REQUIRE(ISC_BUFFER_VALID(buffer
));
651 if (skip_count
< isc_buffer_usedlength(buffer
))
653 skip_count
-= isc_buffer_usedlength(buffer
);
654 buffer
= ISC_LIST_NEXT(buffer
, link
);
657 while (buffer
!= NULL
) {
658 INSIST(iovcount
< MAXSCATTERGATHER_SEND
);
660 isc_buffer_usedregion(buffer
, &used
);
662 if (used
.length
> 0) {
663 iov
[iovcount
].iov_base
= (void *)(used
.base
665 iov
[iovcount
].iov_len
= used
.length
- skip_count
;
666 write_count
+= (used
.length
- skip_count
);
670 buffer
= ISC_LIST_NEXT(buffer
, link
);
673 INSIST(skip_count
== 0U);
677 msg
->msg_iovlen
= iovcount
;
679 #ifdef ISC_NET_BSD44MSGHDR
680 msg
->msg_control
= NULL
;
681 msg
->msg_controllen
= 0;
683 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIPV6)
684 if ((sock
->type
== isc_sockettype_udp
)
685 && ((dev
->attributes
& ISC_SOCKEVENTATTR_PKTINFO
) != 0)) {
686 struct cmsghdr
*cmsgp
;
687 struct in6_pktinfo
*pktinfop
;
689 socket_log(sock
, NULL
, TRACE
,
690 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_SENDTODATA
,
691 "sendto pktinfo data, ifindex %u",
692 dev
->pktinfo
.ipi6_ifindex
);
694 msg
->msg_controllen
= cmsg_space(sizeof(struct in6_pktinfo
));
695 INSIST(msg
->msg_controllen
<= sock
->sendcmsgbuflen
);
696 msg
->msg_control
= (void *)sock
->sendcmsgbuf
;
698 cmsgp
= (struct cmsghdr
*)sock
->sendcmsgbuf
;
699 cmsgp
->cmsg_level
= IPPROTO_IPV6
;
700 cmsgp
->cmsg_type
= IPV6_PKTINFO
;
701 cmsgp
->cmsg_len
= cmsg_len(sizeof(struct in6_pktinfo
));
702 pktinfop
= (struct in6_pktinfo
*)CMSG_DATA(cmsgp
);
703 memcpy(pktinfop
, &dev
->pktinfo
, sizeof(struct in6_pktinfo
));
705 #endif /* USE_CMSG && ISC_PLATFORM_HAVEIPV6 */
706 #else /* ISC_NET_BSD44MSGHDR */
707 msg
->msg_accrights
= NULL
;
708 msg
->msg_accrightslen
= 0;
709 #endif /* ISC_NET_BSD44MSGHDR */
711 if (write_countp
!= NULL
)
712 *write_countp
= write_count
;
716 * Construct an iov array and attach it to the msghdr passed in. This is
717 * the RECV constructor, which will use the avialable region of the buffer
718 * (if using a buffer list) or will use the internal region (if a single
719 * buffer I/O is requested).
721 * Nothing can be NULL, and the done event must list at least one buffer
722 * on the buffer linked list for this function to be meaningful.
724 * If read_countp != NULL, *read_countp will hold the number of bytes
725 * this transaction can receive.
728 build_msghdr_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
,
729 struct msghdr
*msg
, struct iovec
*iov
, size_t *read_countp
)
731 unsigned int iovcount
;
732 isc_buffer_t
*buffer
;
733 isc_region_t available
;
736 memset(msg
, 0, sizeof(struct msghdr
));
738 if (sock
->type
== isc_sockettype_udp
) {
739 memset(&dev
->address
, 0, sizeof(dev
->address
));
740 msg
->msg_name
= (void *)&dev
->address
.type
.sa
;
741 msg
->msg_namelen
= sizeof(dev
->address
.type
);
742 #ifdef ISC_NET_RECVOVERFLOW
743 /* If needed, steal one iovec for overflow detection. */
747 msg
->msg_name
= NULL
;
748 msg
->msg_namelen
= 0;
749 dev
->address
= sock
->address
;
752 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
756 * Single buffer I/O? Skip what we've done so far in this region.
758 if (buffer
== NULL
) {
759 read_count
= dev
->region
.length
- dev
->n
;
760 iov
[0].iov_base
= (void *)(dev
->region
.base
+ dev
->n
);
761 iov
[0].iov_len
= read_count
;
769 * Skip empty buffers.
771 while (buffer
!= NULL
) {
772 REQUIRE(ISC_BUFFER_VALID(buffer
));
773 if (isc_buffer_availablelength(buffer
) != 0)
775 buffer
= ISC_LIST_NEXT(buffer
, link
);
779 while (buffer
!= NULL
) {
780 INSIST(iovcount
< MAXSCATTERGATHER_RECV
);
782 isc_buffer_availableregion(buffer
, &available
);
784 if (available
.length
> 0) {
785 iov
[iovcount
].iov_base
= (void *)(available
.base
);
786 iov
[iovcount
].iov_len
= available
.length
;
787 read_count
+= available
.length
;
790 buffer
= ISC_LIST_NEXT(buffer
, link
);
796 * If needed, set up to receive that one extra byte. Note that
797 * we know there is at least one iov left, since we stole it
798 * at the top of this function.
800 #ifdef ISC_NET_RECVOVERFLOW
801 if (sock
->type
== isc_sockettype_udp
) {
802 iov
[iovcount
].iov_base
= (void *)(&sock
->overflow
);
803 iov
[iovcount
].iov_len
= 1;
809 msg
->msg_iovlen
= iovcount
;
811 #ifdef ISC_NET_BSD44MSGHDR
812 msg
->msg_control
= NULL
;
813 msg
->msg_controllen
= 0;
815 #if defined(USE_CMSG)
816 if (sock
->type
== isc_sockettype_udp
) {
817 msg
->msg_control
= sock
->recvcmsgbuf
;
818 msg
->msg_controllen
= sock
->recvcmsgbuflen
;
820 #endif /* USE_CMSG */
821 #else /* ISC_NET_BSD44MSGHDR */
822 msg
->msg_accrights
= NULL
;
823 msg
->msg_accrightslen
= 0;
824 #endif /* ISC_NET_BSD44MSGHDR */
826 if (read_countp
!= NULL
)
827 *read_countp
= read_count
;
831 set_dev_address(isc_sockaddr_t
*address
, isc_socket_t
*sock
,
832 isc_socketevent_t
*dev
)
834 if (sock
->type
== isc_sockettype_udp
) {
836 dev
->address
= *address
;
838 dev
->address
= sock
->address
;
839 } else if (sock
->type
== isc_sockettype_tcp
) {
840 INSIST(address
== NULL
);
841 dev
->address
= sock
->address
;
845 static isc_socketevent_t
*
846 allocate_socketevent(isc_socket_t
*sock
, isc_eventtype_t eventtype
,
847 isc_taskaction_t action
, const void *arg
)
849 isc_socketevent_t
*ev
;
851 ev
= (isc_socketevent_t
*)isc_event_allocate(sock
->manager
->mctx
,
859 ev
->result
= ISC_R_UNEXPECTED
;
860 ISC_LINK_INIT(ev
, ev_link
);
861 ISC_LIST_INIT(ev
->bufferlist
);
862 ev
->region
.base
= NULL
;
870 #if defined(ISC_SOCKET_DEBUG)
872 dump_msg(struct msghdr
*msg
) {
875 printf("MSGHDR %p\n", msg
);
876 printf("\tname %p, namelen %d\n", msg
->msg_name
, msg
->msg_namelen
);
877 printf("\tiov %p, iovlen %d\n", msg
->msg_iov
, msg
->msg_iovlen
);
878 for (i
= 0 ; i
< (unsigned int)msg
->msg_iovlen
; i
++)
879 printf("\t\t%d\tbase %p, len %d\n", i
,
880 msg
->msg_iov
[i
].iov_base
,
881 msg
->msg_iov
[i
].iov_len
);
882 #ifdef ISC_NET_BSD44MSGHDR
883 printf("\tcontrol %p, controllen %d\n", msg
->msg_control
,
884 msg
->msg_controllen
);
889 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
890 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
891 #define DOIO_HARD 2 /* i/o error, event sent */
892 #define DOIO_EOF 3 /* EOF, no event sent */
895 doio_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
897 struct iovec iov
[MAXSCATTERGATHER_RECV
];
900 struct msghdr msghdr
;
901 isc_buffer_t
*buffer
;
903 char strbuf
[ISC_STRERRORSIZE
];
905 build_msghdr_recv(sock
, dev
, &msghdr
, iov
, &read_count
);
907 #if defined(ISC_SOCKET_DEBUG)
911 cc
= recvmsg(sock
->fd
, &msghdr
, 0);
915 if (SOFT_ERROR(recv_errno
))
918 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
919 isc__strerror(recv_errno
, strbuf
, sizeof(strbuf
));
920 socket_log(sock
, NULL
, IOEVENT
,
921 isc_msgcat
, ISC_MSGSET_SOCKET
,
923 "doio_recv: recvmsg(%d) %d bytes, err %d/%s",
924 sock
->fd
, cc
, recv_errno
, strbuf
);
927 #define SOFT_OR_HARD(_system, _isc) \
928 if (recv_errno == _system) { \
929 if (sock->connected) { \
930 dev->result = _isc; \
931 return (DOIO_HARD); \
933 return (DOIO_SOFT); \
935 #define ALWAYS_HARD(_system, _isc) \
936 if (recv_errno == _system) { \
937 dev->result = _isc; \
938 return (DOIO_HARD); \
941 SOFT_OR_HARD(ECONNREFUSED
, ISC_R_CONNREFUSED
);
942 SOFT_OR_HARD(ENETUNREACH
, ISC_R_NETUNREACH
);
943 SOFT_OR_HARD(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
944 SOFT_OR_HARD(EHOSTDOWN
, ISC_R_HOSTDOWN
);
945 /* HPUX 11.11 can return EADDRNOTAVAIL. */
946 SOFT_OR_HARD(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
947 ALWAYS_HARD(ENOBUFS
, ISC_R_NORESOURCES
);
952 dev
->result
= isc__errno2result(recv_errno
);
957 * On TCP, zero length reads indicate EOF, while on
958 * UDP, zero length reads are perfectly valid, although
961 if ((sock
->type
== isc_sockettype_tcp
) && (cc
== 0))
964 if (sock
->type
== isc_sockettype_udp
) {
965 dev
->address
.length
= msghdr
.msg_namelen
;
966 if (isc_sockaddr_getport(&dev
->address
) == 0) {
967 if (isc_log_wouldlog(isc_lctx
, IOEVENT_LEVEL
)) {
968 socket_log(sock
, &dev
->address
, IOEVENT
,
969 isc_msgcat
, ISC_MSGSET_SOCKET
,
971 "dropping source port zero packet");
977 socket_log(sock
, &dev
->address
, IOEVENT
,
978 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_PKTRECV
,
979 "packet received correctly");
982 * Overflow bit detection. If we received MORE bytes than we should,
983 * this indicates an overflow situation. Set the flag in the
984 * dev entry and adjust how much we read by one.
986 #ifdef ISC_NET_RECVOVERFLOW
987 if ((sock
->type
== isc_sockettype_udp
) && ((size_t)cc
> read_count
)) {
988 dev
->attributes
|= ISC_SOCKEVENTATTR_TRUNC
;
994 * If there are control messages attached, run through them and pull
995 * out the interesting bits.
997 if (sock
->type
== isc_sockettype_udp
)
998 process_cmsg(sock
, &msghdr
, dev
);
1001 * update the buffers (if any) and the i/o count
1005 buffer
= ISC_LIST_HEAD(dev
->bufferlist
);
1006 while (buffer
!= NULL
&& actual_count
> 0U) {
1007 REQUIRE(ISC_BUFFER_VALID(buffer
));
1008 if (isc_buffer_availablelength(buffer
) <= actual_count
) {
1009 actual_count
-= isc_buffer_availablelength(buffer
);
1010 isc_buffer_add(buffer
,
1011 isc_buffer_availablelength(buffer
));
1013 isc_buffer_add(buffer
, actual_count
);
1017 buffer
= ISC_LIST_NEXT(buffer
, link
);
1018 if (buffer
== NULL
) {
1019 INSIST(actual_count
== 0U);
1024 * If we read less than we expected, update counters,
1025 * and let the upper layer poke the descriptor.
1027 if (((size_t)cc
!= read_count
) && (dev
->n
< dev
->minimum
))
1031 * Full reads are posted, or partials if partials are ok.
1033 dev
->result
= ISC_R_SUCCESS
;
1034 return (DOIO_SUCCESS
);
1039 * DOIO_SUCCESS The operation succeeded. dev->result contains
1042 * DOIO_HARD A hard or unexpected I/O error was encountered.
1043 * dev->result contains the appropriate error.
1045 * DOIO_SOFT A soft I/O error was encountered. No senddone
1046 * event was sent. The operation should be retried.
1048 * No other return values are possible.
1051 doio_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
) {
1053 struct iovec iov
[MAXSCATTERGATHER_SEND
];
1055 struct msghdr msghdr
;
1056 char addrbuf
[ISC_SOCKADDR_FORMATSIZE
];
1059 char strbuf
[ISC_STRERRORSIZE
];
1061 build_msghdr_send(sock
, dev
, &msghdr
, iov
, &write_count
);
1064 cc
= sendmsg(sock
->fd
, &msghdr
, 0);
1068 * Check for error or block condition.
1071 if (send_errno
== EINTR
&& ++attempts
< NRETRIES
)
1074 if (SOFT_ERROR(send_errno
))
1077 #define SOFT_OR_HARD(_system, _isc) \
1078 if (send_errno == _system) { \
1079 if (sock->connected) { \
1080 dev->result = _isc; \
1081 return (DOIO_HARD); \
1083 return (DOIO_SOFT); \
1085 #define ALWAYS_HARD(_system, _isc) \
1086 if (send_errno == _system) { \
1087 dev->result = _isc; \
1088 return (DOIO_HARD); \
1091 SOFT_OR_HARD(ECONNREFUSED
, ISC_R_CONNREFUSED
);
1092 ALWAYS_HARD(EACCES
, ISC_R_NOPERM
);
1093 ALWAYS_HARD(EAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
1094 ALWAYS_HARD(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
1095 ALWAYS_HARD(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
1097 ALWAYS_HARD(EHOSTDOWN
, ISC_R_HOSTUNREACH
);
1099 ALWAYS_HARD(ENETUNREACH
, ISC_R_NETUNREACH
);
1100 ALWAYS_HARD(ENOBUFS
, ISC_R_NORESOURCES
);
1101 ALWAYS_HARD(EPERM
, ISC_R_HOSTUNREACH
);
1102 ALWAYS_HARD(EPIPE
, ISC_R_NOTCONNECTED
);
1103 ALWAYS_HARD(ECONNRESET
, ISC_R_CONNECTIONRESET
);
1109 * The other error types depend on whether or not the
1110 * socket is UDP or TCP. If it is UDP, some errors
1111 * that we expect to be fatal under TCP are merely
1112 * annoying, and are really soft errors.
1114 * However, these soft errors are still returned as
1117 isc_sockaddr_format(&dev
->address
, addrbuf
, sizeof(addrbuf
));
1118 isc__strerror(send_errno
, strbuf
, sizeof(strbuf
));
1119 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "internal_send: %s: %s",
1121 dev
->result
= isc__errno2result(send_errno
);
1126 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1127 "internal_send: send() %s 0",
1128 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
1129 ISC_MSG_RETURNED
, "returned"));
1132 * If we write less than we expected, update counters, poke.
1135 if ((size_t)cc
!= write_count
)
1139 * Exactly what we wanted to write. We're done with this
1140 * entry. Post its completion event.
1142 dev
->result
= ISC_R_SUCCESS
;
1143 return (DOIO_SUCCESS
);
1149 * Caller must ensure that the socket is not locked and no external
1153 destroy(isc_socket_t
**sockp
) {
1154 isc_socket_t
*sock
= *sockp
;
1155 isc_socketmgr_t
*manager
= sock
->manager
;
1157 socket_log(sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1158 ISC_MSG_DESTROYING
, "destroying");
1160 INSIST(ISC_LIST_EMPTY(sock
->accept_list
));
1161 INSIST(ISC_LIST_EMPTY(sock
->recv_list
));
1162 INSIST(ISC_LIST_EMPTY(sock
->send_list
));
1163 INSIST(sock
->connect_ev
== NULL
);
1164 REQUIRE(sock
->fd
>= 0 && sock
->fd
< (int)FD_SETSIZE
);
1166 LOCK(&manager
->lock
);
1169 * No one has this socket open, so the watcher doesn't have to be
1170 * poked, and the socket doesn't have to be locked.
1172 manager
->fds
[sock
->fd
] = NULL
;
1173 manager
->fdstate
[sock
->fd
] = CLOSE_PENDING
;
1174 select_poke(manager
, sock
->fd
, SELECT_POKE_CLOSE
);
1175 ISC_LIST_UNLINK(manager
->socklist
, sock
, link
);
1177 #ifdef ISC_PLATFORM_USETHREADS
1178 if (ISC_LIST_EMPTY(manager
->socklist
))
1179 SIGNAL(&manager
->shutdown_ok
);
1180 #endif /* ISC_PLATFORM_USETHREADS */
1183 * XXX should reset manager->maxfd here
1186 UNLOCK(&manager
->lock
);
1192 allocate_socket(isc_socketmgr_t
*manager
, isc_sockettype_t type
,
1193 isc_socket_t
**socketp
)
1197 ISC_SOCKADDR_LEN_T cmsgbuflen
;
1199 sock
= isc_mem_get(manager
->mctx
, sizeof(*sock
));
1202 return (ISC_R_NOMEMORY
);
1204 ret
= ISC_R_UNEXPECTED
;
1207 sock
->references
= 0;
1209 sock
->manager
= manager
;
1213 ISC_LINK_INIT(sock
, link
);
1215 sock
->recvcmsgbuf
= NULL
;
1216 sock
->sendcmsgbuf
= NULL
;
1219 * set up cmsg buffers
1222 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIPV6)
1223 cmsgbuflen
= cmsg_space(sizeof(struct in6_pktinfo
));
1225 #if defined(USE_CMSG) && defined(SO_TIMESTAMP)
1226 cmsgbuflen
+= cmsg_space(sizeof(struct timeval
));
1228 sock
->recvcmsgbuflen
= cmsgbuflen
;
1229 if (sock
->recvcmsgbuflen
!= 0) {
1230 sock
->recvcmsgbuf
= isc_mem_get(manager
->mctx
, cmsgbuflen
);
1231 if (sock
->recvcmsgbuf
== NULL
)
1236 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIPV6)
1237 cmsgbuflen
= cmsg_space(sizeof(struct in6_pktinfo
));
1239 sock
->sendcmsgbuflen
= cmsgbuflen
;
1240 if (sock
->sendcmsgbuflen
!= 0) {
1241 sock
->sendcmsgbuf
= isc_mem_get(manager
->mctx
, cmsgbuflen
);
1242 if (sock
->sendcmsgbuf
== NULL
)
1247 * set up list of readers and writers to be initially empty
1249 ISC_LIST_INIT(sock
->recv_list
);
1250 ISC_LIST_INIT(sock
->send_list
);
1251 ISC_LIST_INIT(sock
->accept_list
);
1252 sock
->connect_ev
= NULL
;
1253 sock
->pending_recv
= 0;
1254 sock
->pending_send
= 0;
1255 sock
->pending_accept
= 0;
1257 sock
->connected
= 0;
1258 sock
->connecting
= 0;
1262 * initialize the lock
1264 if (isc_mutex_init(&sock
->lock
) != ISC_R_SUCCESS
) {
1266 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1267 "isc_mutex_init() %s",
1268 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
1269 ISC_MSG_FAILED
, "failed"));
1270 ret
= ISC_R_UNEXPECTED
;
1275 * Initialize readable and writable events
1277 ISC_EVENT_INIT(&sock
->readable_ev
, sizeof(intev_t
),
1278 ISC_EVENTATTR_NOPURGE
, NULL
, ISC_SOCKEVENT_INTR
,
1279 NULL
, sock
, sock
, NULL
, NULL
);
1280 ISC_EVENT_INIT(&sock
->writable_ev
, sizeof(intev_t
),
1281 ISC_EVENTATTR_NOPURGE
, NULL
, ISC_SOCKEVENT_INTW
,
1282 NULL
, sock
, sock
, NULL
, NULL
);
1284 sock
->magic
= SOCKET_MAGIC
;
1287 return (ISC_R_SUCCESS
);
1290 if (sock
->recvcmsgbuf
!= NULL
)
1291 isc_mem_put(manager
->mctx
, sock
->recvcmsgbuf
,
1292 sock
->recvcmsgbuflen
);
1293 if (sock
->sendcmsgbuf
!= NULL
)
1294 isc_mem_put(manager
->mctx
, sock
->sendcmsgbuf
,
1295 sock
->sendcmsgbuflen
);
1296 isc_mem_put(manager
->mctx
, sock
, sizeof(*sock
));
1302 * This event requires that the various lists be empty, that the reference
1303 * count be 1, and that the magic number is valid. The other socket bits,
1304 * like the lock, must be initialized as well. The fd associated must be
1305 * marked as closed, by setting it to -1 on close, or this routine will
1306 * also close the socket.
1309 free_socket(isc_socket_t
**socketp
) {
1310 isc_socket_t
*sock
= *socketp
;
1312 INSIST(sock
->references
== 0);
1313 INSIST(VALID_SOCKET(sock
));
1314 INSIST(!sock
->connecting
);
1315 INSIST(!sock
->pending_recv
);
1316 INSIST(!sock
->pending_send
);
1317 INSIST(!sock
->pending_accept
);
1318 INSIST(ISC_LIST_EMPTY(sock
->recv_list
));
1319 INSIST(ISC_LIST_EMPTY(sock
->send_list
));
1320 INSIST(ISC_LIST_EMPTY(sock
->accept_list
));
1321 INSIST(!ISC_LINK_LINKED(sock
, link
));
1323 if (sock
->recvcmsgbuf
!= NULL
)
1324 isc_mem_put(sock
->manager
->mctx
, sock
->recvcmsgbuf
,
1325 sock
->recvcmsgbuflen
);
1326 if (sock
->sendcmsgbuf
!= NULL
)
1327 isc_mem_put(sock
->manager
->mctx
, sock
->sendcmsgbuf
,
1328 sock
->sendcmsgbuflen
);
1332 DESTROYLOCK(&sock
->lock
);
1334 isc_mem_put(sock
->manager
->mctx
, sock
, sizeof(*sock
));
1340 * Create a new 'type' socket managed by 'manager'. Events
1341 * will be posted to 'task' and when dispatched 'action' will be
1342 * called with 'arg' as the arg value. The new socket is returned
1346 isc_socket_create(isc_socketmgr_t
*manager
, int pf
, isc_sockettype_t type
,
1347 isc_socket_t
**socketp
)
1349 isc_socket_t
*sock
= NULL
;
1351 #if defined(USE_CMSG) || defined(SO_BSDCOMPAT)
1354 char strbuf
[ISC_STRERRORSIZE
];
1356 REQUIRE(VALID_MANAGER(manager
));
1357 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1359 ret
= allocate_socket(manager
, type
, &sock
);
1360 if (ret
!= ISC_R_SUCCESS
)
1365 case isc_sockettype_udp
:
1366 sock
->fd
= socket(pf
, SOCK_DGRAM
, IPPROTO_UDP
);
1368 case isc_sockettype_tcp
:
1369 sock
->fd
= socket(pf
, SOCK_STREAM
, IPPROTO_TCP
);
1375 * Leave a space for stdio to work in.
1377 if (sock
->fd
>= 0 && sock
->fd
< 20) {
1379 new = fcntl(sock
->fd
, F_DUPFD
, 20);
1381 (void)close(sock
->fd
);
1387 if (sock
->fd
>= (int)FD_SETSIZE
) {
1388 (void)close(sock
->fd
);
1389 isc_log_iwrite(isc_lctx
, ISC_LOGCATEGORY_GENERAL
,
1390 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
1391 isc_msgcat
, ISC_MSGSET_SOCKET
,
1393 "%s: too many open file descriptors", "socket");
1395 return (ISC_R_NORESOURCES
);
1405 return (ISC_R_NORESOURCES
);
1407 case EPROTONOSUPPORT
:
1411 * Linux 2.2 (and maybe others) return EINVAL instead of
1415 return (ISC_R_FAMILYNOSUPPORT
);
1418 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1419 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1421 isc_msgcat_get(isc_msgcat
,
1426 return (ISC_R_UNEXPECTED
);
1430 if (make_nonblock(sock
->fd
) != ISC_R_SUCCESS
) {
1431 (void)close(sock
->fd
);
1433 return (ISC_R_UNEXPECTED
);
1437 if (setsockopt(sock
->fd
, SOL_SOCKET
, SO_BSDCOMPAT
,
1438 (void *)&on
, sizeof(on
)) < 0) {
1439 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1440 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1441 "setsockopt(%d, SO_BSDCOMPAT) %s: %s",
1443 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
1444 ISC_MSG_FAILED
, "failed"),
1450 #if defined(USE_CMSG)
1451 if (type
== isc_sockettype_udp
) {
1453 #if defined(SO_TIMESTAMP)
1454 if (setsockopt(sock
->fd
, SOL_SOCKET
, SO_TIMESTAMP
,
1455 (void *)&on
, sizeof(on
)) < 0
1456 && errno
!= ENOPROTOOPT
) {
1457 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1458 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1459 "setsockopt(%d, SO_TIMESTAMP) %s: %s",
1461 isc_msgcat_get(isc_msgcat
,
1468 #endif /* SO_TIMESTAMP */
1470 #if defined(ISC_PLATFORM_HAVEIPV6)
1471 if (pf
== AF_INET6
&& sock
->recvcmsgbuflen
== 0) {
1473 * Warn explicitly because this anomaly can be hidden
1474 * in usual operation (and unexpectedly appear later).
1476 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1477 "No buffer available to receive "
1478 "IPv6 destination");
1480 #ifdef IPV6_RECVPKTINFO
1482 if ((pf
== AF_INET6
)
1483 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_RECVPKTINFO
,
1484 (void *)&on
, sizeof(on
)) < 0)) {
1485 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1486 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1487 "setsockopt(%d, IPV6_RECVPKTINFO) "
1489 isc_msgcat_get(isc_msgcat
,
1497 if ((pf
== AF_INET6
)
1498 && (setsockopt(sock
->fd
, IPPROTO_IPV6
, IPV6_PKTINFO
,
1499 (void *)&on
, sizeof(on
)) < 0)) {
1500 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1501 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1502 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1504 isc_msgcat_get(isc_msgcat
,
1510 #endif /* IPV6_RECVPKTINFO */
1511 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1512 /* use minimum MTU */
1513 if (pf
== AF_INET6
) {
1514 (void)setsockopt(sock
->fd
, IPPROTO_IPV6
,
1516 (void *)&on
, sizeof(on
));
1519 #endif /* ISC_PLATFORM_HAVEIPV6 */
1522 #endif /* USE_CMSG */
1524 sock
->references
= 1;
1527 LOCK(&manager
->lock
);
1530 * Note we don't have to lock the socket like we normally would because
1531 * there are no external references to it yet.
1534 manager
->fds
[sock
->fd
] = sock
;
1535 manager
->fdstate
[sock
->fd
] = MANAGED
;
1536 ISC_LIST_APPEND(manager
->socklist
, sock
, link
);
1537 if (manager
->maxfd
< sock
->fd
)
1538 manager
->maxfd
= sock
->fd
;
1540 UNLOCK(&manager
->lock
);
1542 socket_log(sock
, NULL
, CREATION
, isc_msgcat
, ISC_MSGSET_SOCKET
,
1543 ISC_MSG_CREATED
, "created");
1545 return (ISC_R_SUCCESS
);
1549 * Attach to a socket. Caller must explicitly detach when it is done.
1552 isc_socket_attach(isc_socket_t
*sock
, isc_socket_t
**socketp
) {
1553 REQUIRE(VALID_SOCKET(sock
));
1554 REQUIRE(socketp
!= NULL
&& *socketp
== NULL
);
1558 UNLOCK(&sock
->lock
);
1564 * Dereference a socket. If this is the last reference to it, clean things
1565 * up by destroying the socket.
1568 isc_socket_detach(isc_socket_t
**socketp
) {
1570 isc_boolean_t kill_socket
= ISC_FALSE
;
1572 REQUIRE(socketp
!= NULL
);
1574 REQUIRE(VALID_SOCKET(sock
));
1577 REQUIRE(sock
->references
> 0);
1579 if (sock
->references
== 0)
1580 kill_socket
= ISC_TRUE
;
1581 UNLOCK(&sock
->lock
);
1590 * I/O is possible on a given socket. Schedule an event to this task that
1591 * will call an internal function to do the I/O. This will charge the
1592 * task with the I/O operation and let our select loop handler get back
1593 * to doing something real as fast as possible.
1595 * The socket and manager must be locked before calling this function.
1598 dispatch_recv(isc_socket_t
*sock
) {
1600 isc_socketevent_t
*ev
;
1602 INSIST(!sock
->pending_recv
);
1604 ev
= ISC_LIST_HEAD(sock
->recv_list
);
1608 sock
->pending_recv
= 1;
1609 iev
= &sock
->readable_ev
;
1611 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
1612 "dispatch_recv: event %p -> task %p", ev
, ev
->ev_sender
);
1615 iev
->ev_sender
= sock
;
1616 iev
->ev_action
= internal_recv
;
1619 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1623 dispatch_send(isc_socket_t
*sock
) {
1625 isc_socketevent_t
*ev
;
1627 INSIST(!sock
->pending_send
);
1629 ev
= ISC_LIST_HEAD(sock
->send_list
);
1633 sock
->pending_send
= 1;
1634 iev
= &sock
->writable_ev
;
1636 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
1637 "dispatch_send: event %p -> task %p", ev
, ev
->ev_sender
);
1640 iev
->ev_sender
= sock
;
1641 iev
->ev_action
= internal_send
;
1644 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1648 * Dispatch an internal accept event.
1651 dispatch_accept(isc_socket_t
*sock
) {
1653 isc_socket_newconnev_t
*ev
;
1655 INSIST(!sock
->pending_accept
);
1658 * Are there any done events left, or were they all canceled
1659 * before the manager got the socket lock?
1661 ev
= ISC_LIST_HEAD(sock
->accept_list
);
1665 sock
->pending_accept
= 1;
1666 iev
= &sock
->readable_ev
;
1668 sock
->references
++; /* keep socket around for this internal event */
1669 iev
->ev_sender
= sock
;
1670 iev
->ev_action
= internal_accept
;
1673 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1677 dispatch_connect(isc_socket_t
*sock
) {
1679 isc_socket_connev_t
*ev
;
1681 iev
= &sock
->writable_ev
;
1683 ev
= sock
->connect_ev
;
1684 INSIST(ev
!= NULL
); /* XXX */
1686 INSIST(sock
->connecting
);
1688 sock
->references
++; /* keep socket around for this internal event */
1689 iev
->ev_sender
= sock
;
1690 iev
->ev_action
= internal_connect
;
1693 isc_task_send(ev
->ev_sender
, (isc_event_t
**)&iev
);
1697 * Dequeue an item off the given socket's read queue, set the result code
1698 * in the done event to the one provided, and send it to the task it was
1701 * If the event to be sent is on a list, remove it before sending. If
1702 * asked to, send and detach from the socket as well.
1704 * Caller must have the socket locked if the event is attached to the socket.
1707 send_recvdone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1710 task
= (*dev
)->ev_sender
;
1712 (*dev
)->ev_sender
= sock
;
1714 if (ISC_LINK_LINKED(*dev
, ev_link
))
1715 ISC_LIST_DEQUEUE(sock
->recv_list
, *dev
, ev_link
);
1717 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1718 == ISC_SOCKEVENTATTR_ATTACHED
)
1719 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1721 isc_task_send(task
, (isc_event_t
**)dev
);
1725 * See comments for send_recvdone_event() above.
1727 * Caller must have the socket locked if the event is attached to the socket.
1730 send_senddone_event(isc_socket_t
*sock
, isc_socketevent_t
**dev
) {
1733 INSIST(dev
!= NULL
&& *dev
!= NULL
);
1735 task
= (*dev
)->ev_sender
;
1736 (*dev
)->ev_sender
= sock
;
1738 if (ISC_LINK_LINKED(*dev
, ev_link
))
1739 ISC_LIST_DEQUEUE(sock
->send_list
, *dev
, ev_link
);
1741 if (((*dev
)->attributes
& ISC_SOCKEVENTATTR_ATTACHED
)
1742 == ISC_SOCKEVENTATTR_ATTACHED
)
1743 isc_task_sendanddetach(&task
, (isc_event_t
**)dev
);
1745 isc_task_send(task
, (isc_event_t
**)dev
);
1749 * Call accept() on a socket, to get the new file descriptor. The listen
1750 * socket is used as a prototype to create a new isc_socket_t. The new
1751 * socket has one outstanding reference. The task receiving the event
1752 * will be detached from just after the event is delivered.
1754 * On entry to this function, the event delivered is the internal
1755 * readable event, and the first item on the accept_list should be
1756 * the done event we want to send. If the list is empty, this is a no-op,
1757 * so just unlock and return.
1760 internal_accept(isc_task_t
*me
, isc_event_t
*ev
) {
1762 isc_socketmgr_t
*manager
;
1763 isc_socket_newconnev_t
*dev
;
1765 ISC_SOCKADDR_LEN_T addrlen
;
1767 isc_result_t result
= ISC_R_SUCCESS
;
1768 char strbuf
[ISC_STRERRORSIZE
];
1772 sock
= ev
->ev_sender
;
1773 INSIST(VALID_SOCKET(sock
));
1776 socket_log(sock
, NULL
, TRACE
,
1777 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTLOCK
,
1778 "internal_accept called, locked socket");
1780 manager
= sock
->manager
;
1781 INSIST(VALID_MANAGER(manager
));
1783 INSIST(sock
->listener
);
1784 INSIST(sock
->pending_accept
== 1);
1785 sock
->pending_accept
= 0;
1787 INSIST(sock
->references
> 0);
1788 sock
->references
--; /* the internal event is done with this socket */
1789 if (sock
->references
== 0) {
1790 UNLOCK(&sock
->lock
);
1796 * Get the first item off the accept list.
1797 * If it is empty, unlock the socket and return.
1799 dev
= ISC_LIST_HEAD(sock
->accept_list
);
1801 UNLOCK(&sock
->lock
);
1806 * Try to accept the new connection. If the accept fails with
1807 * EAGAIN or EINTR, simply poke the watcher to watch this socket
1808 * again. Also ignore ECONNRESET, which has been reported to
1809 * be spuriously returned on Linux 2.2.19 although it is not
1810 * a documented error for accept(). ECONNABORTED has been
1811 * reported for Solaris 8. The rest are thrown in not because
1812 * we have seen them but because they are ignored by other
1813 * deamons such as BIND 8 and Apache.
1816 addrlen
= sizeof(dev
->newsocket
->address
.type
);
1817 memset(&dev
->newsocket
->address
.type
.sa
, 0, addrlen
);
1818 fd
= accept(sock
->fd
, &dev
->newsocket
->address
.type
.sa
,
1823 * Leave a space for stdio to work in.
1825 if (fd
>= 0 && fd
< 20) {
1827 new = fcntl(fd
, F_DUPFD
, 20);
1836 if (SOFT_ERROR(errno
))
1859 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
1860 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1861 "internal_accept: accept() %s: %s",
1862 isc_msgcat_get(isc_msgcat
,
1868 result
= ISC_R_UNEXPECTED
;
1871 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1872 "internal_accept(): "
1873 "accept() failed to return "
1878 } else if (dev
->newsocket
->address
.type
.sa
.sa_family
!=
1881 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
1882 "internal_accept(): "
1883 "accept() returned peer address "
1884 "family %u (expected %u)",
1885 dev
->newsocket
->address
.
1890 } else if (fd
>= (int)FD_SETSIZE
) {
1891 isc_log_iwrite(isc_lctx
, ISC_LOGCATEGORY_GENERAL
,
1892 ISC_LOGMODULE_SOCKET
, ISC_LOG_ERROR
,
1893 isc_msgcat
, ISC_MSGSET_SOCKET
,
1895 "%s: too many open file descriptors",
1903 dev
->newsocket
->address
.length
= addrlen
;
1904 dev
->newsocket
->pf
= sock
->pf
;
1908 * Pull off the done event.
1910 ISC_LIST_UNLINK(sock
->accept_list
, dev
, ev_link
);
1913 * Poke watcher if there are more pending accepts.
1915 if (!ISC_LIST_EMPTY(sock
->accept_list
))
1916 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_ACCEPT
);
1918 UNLOCK(&sock
->lock
);
1920 if (fd
!= -1 && (make_nonblock(fd
) != ISC_R_SUCCESS
)) {
1923 result
= ISC_R_UNEXPECTED
;
1927 * -1 means the new socket didn't happen.
1930 LOCK(&manager
->lock
);
1931 ISC_LIST_APPEND(manager
->socklist
, dev
->newsocket
, link
);
1933 dev
->newsocket
->fd
= fd
;
1934 dev
->newsocket
->bound
= 1;
1935 dev
->newsocket
->connected
= 1;
1938 * Save away the remote address
1940 dev
->address
= dev
->newsocket
->address
;
1942 manager
->fds
[fd
] = dev
->newsocket
;
1943 manager
->fdstate
[fd
] = MANAGED
;
1944 if (manager
->maxfd
< fd
)
1945 manager
->maxfd
= fd
;
1947 socket_log(sock
, &dev
->newsocket
->address
, CREATION
,
1948 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_ACCEPTEDCXN
,
1949 "accepted connection, new socket %p",
1952 UNLOCK(&manager
->lock
);
1954 dev
->newsocket
->references
--;
1955 free_socket(&dev
->newsocket
);
1959 * Fill in the done event details and send it off.
1961 dev
->result
= result
;
1962 task
= dev
->ev_sender
;
1963 dev
->ev_sender
= sock
;
1965 isc_task_sendanddetach(&task
, ISC_EVENT_PTR(&dev
));
1969 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_ACCEPT
);
1970 UNLOCK(&sock
->lock
);
1975 internal_recv(isc_task_t
*me
, isc_event_t
*ev
) {
1976 isc_socketevent_t
*dev
;
1979 INSIST(ev
->ev_type
== ISC_SOCKEVENT_INTR
);
1981 sock
= ev
->ev_sender
;
1982 INSIST(VALID_SOCKET(sock
));
1985 socket_log(sock
, NULL
, IOEVENT
,
1986 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALRECV
,
1987 "internal_recv: task %p got event %p", me
, ev
);
1989 INSIST(sock
->pending_recv
== 1);
1990 sock
->pending_recv
= 0;
1992 INSIST(sock
->references
> 0);
1993 sock
->references
--; /* the internal event is done with this socket */
1994 if (sock
->references
== 0) {
1995 UNLOCK(&sock
->lock
);
2001 * Try to do as much I/O as possible on this socket. There are no
2002 * limits here, currently.
2004 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2005 while (dev
!= NULL
) {
2006 switch (doio_recv(sock
, dev
)) {
2012 * read of 0 means the remote end was closed.
2013 * Run through the event queue and dispatch all
2014 * the events with an EOF result code.
2017 dev
->result
= ISC_R_EOF
;
2018 send_recvdone_event(sock
, &dev
);
2019 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2020 } while (dev
!= NULL
);
2025 send_recvdone_event(sock
, &dev
);
2029 dev
= ISC_LIST_HEAD(sock
->recv_list
);
2033 if (!ISC_LIST_EMPTY(sock
->recv_list
))
2034 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_READ
);
2036 UNLOCK(&sock
->lock
);
2040 internal_send(isc_task_t
*me
, isc_event_t
*ev
) {
2041 isc_socketevent_t
*dev
;
2044 INSIST(ev
->ev_type
== ISC_SOCKEVENT_INTW
);
2047 * Find out what socket this is and lock it.
2049 sock
= (isc_socket_t
*)ev
->ev_sender
;
2050 INSIST(VALID_SOCKET(sock
));
2053 socket_log(sock
, NULL
, IOEVENT
,
2054 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_INTERNALSEND
,
2055 "internal_send: task %p got event %p", me
, ev
);
2057 INSIST(sock
->pending_send
== 1);
2058 sock
->pending_send
= 0;
2060 INSIST(sock
->references
> 0);
2061 sock
->references
--; /* the internal event is done with this socket */
2062 if (sock
->references
== 0) {
2063 UNLOCK(&sock
->lock
);
2069 * Try to do as much I/O as possible on this socket. There are no
2070 * limits here, currently.
2072 dev
= ISC_LIST_HEAD(sock
->send_list
);
2073 while (dev
!= NULL
) {
2074 switch (doio_send(sock
, dev
)) {
2080 send_senddone_event(sock
, &dev
);
2084 dev
= ISC_LIST_HEAD(sock
->send_list
);
2088 if (!ISC_LIST_EMPTY(sock
->send_list
))
2089 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_WRITE
);
2091 UNLOCK(&sock
->lock
);
2095 process_fds(isc_socketmgr_t
*manager
, int maxfd
,
2096 fd_set
*readfds
, fd_set
*writefds
)
2100 isc_boolean_t unlock_sock
;
2102 REQUIRE(maxfd
<= (int)FD_SETSIZE
);
2105 * Process read/writes on other fds here. Avoid locking
2106 * and unlocking twice if both reads and writes are possible.
2108 for (i
= 0 ; i
< maxfd
; i
++) {
2109 #ifdef ISC_PLATFORM_USETHREADS
2110 if (i
== manager
->pipe_fds
[0] || i
== manager
->pipe_fds
[1])
2112 #endif /* ISC_PLATFORM_USETHREADS */
2114 if (manager
->fdstate
[i
] == CLOSE_PENDING
) {
2115 manager
->fdstate
[i
] = CLOSED
;
2116 FD_CLR(i
, &manager
->read_fds
);
2117 FD_CLR(i
, &manager
->write_fds
);
2124 sock
= manager
->fds
[i
];
2125 unlock_sock
= ISC_FALSE
;
2126 if (FD_ISSET(i
, readfds
)) {
2128 FD_CLR(i
, &manager
->read_fds
);
2131 unlock_sock
= ISC_TRUE
;
2133 if (!SOCK_DEAD(sock
)) {
2135 dispatch_accept(sock
);
2137 dispatch_recv(sock
);
2139 FD_CLR(i
, &manager
->read_fds
);
2142 if (FD_ISSET(i
, writefds
)) {
2144 FD_CLR(i
, &manager
->write_fds
);
2148 unlock_sock
= ISC_TRUE
;
2151 if (!SOCK_DEAD(sock
)) {
2152 if (sock
->connecting
)
2153 dispatch_connect(sock
);
2155 dispatch_send(sock
);
2157 FD_CLR(i
, &manager
->write_fds
);
2160 UNLOCK(&sock
->lock
);
2164 #ifdef ISC_PLATFORM_USETHREADS
2166 * This is the thread that will loop forever, always in a select or poll
2169 * When select returns something to do, track down what thread gets to do
2170 * this I/O and post the event to it.
2172 static isc_threadresult_t
2173 watcher(void *uap
) {
2174 isc_socketmgr_t
*manager
= uap
;
2182 char strbuf
[ISC_STRERRORSIZE
];
2185 * Get the control fd here. This will never change.
2187 LOCK(&manager
->lock
);
2188 ctlfd
= manager
->pipe_fds
[0];
2193 readfds
= manager
->read_fds
;
2194 writefds
= manager
->write_fds
;
2195 maxfd
= manager
->maxfd
+ 1;
2197 UNLOCK(&manager
->lock
);
2199 cc
= select(maxfd
, &readfds
, &writefds
, NULL
, NULL
);
2201 if (!SOFT_ERROR(errno
)) {
2202 isc__strerror(errno
, strbuf
,
2204 FATAL_ERROR(__FILE__
, __LINE__
,
2206 isc_msgcat_get(isc_msgcat
,
2214 LOCK(&manager
->lock
);
2219 * Process reads on internal, control fd.
2221 if (FD_ISSET(ctlfd
, &readfds
)) {
2223 select_readmsg(manager
, &fd
, &msg
);
2225 manager_log(manager
, IOEVENT
,
2226 isc_msgcat_get(isc_msgcat
,
2229 "watcher got message %d"),
2235 if (msg
== SELECT_POKE_NOTHING
)
2239 * Handle shutdown message. We really should
2240 * jump out of this loop right away, but
2241 * it doesn't matter if we have to do a little
2244 if (msg
== SELECT_POKE_SHUTDOWN
) {
2251 * This is a wakeup on a socket. Look
2252 * at the event queue for both read and write,
2253 * and decide if we need to watch on it now
2256 wakeup_socket(manager
, fd
, msg
);
2260 process_fds(manager
, maxfd
, &readfds
, &writefds
);
2263 manager_log(manager
, TRACE
,
2264 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2265 ISC_MSG_EXITING
, "watcher exiting"));
2267 UNLOCK(&manager
->lock
);
2268 return ((isc_threadresult_t
)0);
2270 #endif /* ISC_PLATFORM_USETHREADS */
2273 * Create a new socket manager.
2276 isc_socketmgr_create(isc_mem_t
*mctx
, isc_socketmgr_t
**managerp
) {
2277 isc_socketmgr_t
*manager
;
2278 #ifdef ISC_PLATFORM_USETHREADS
2279 char strbuf
[ISC_STRERRORSIZE
];
2282 REQUIRE(managerp
!= NULL
&& *managerp
== NULL
);
2284 #ifndef ISC_PLATFORM_USETHREADS
2285 if (socketmgr
!= NULL
) {
2287 *managerp
= socketmgr
;
2288 return (ISC_R_SUCCESS
);
2290 #endif /* ISC_PLATFORM_USETHREADS */
2292 manager
= isc_mem_get(mctx
, sizeof(*manager
));
2293 if (manager
== NULL
)
2294 return (ISC_R_NOMEMORY
);
2296 manager
->magic
= SOCKET_MANAGER_MAGIC
;
2297 manager
->mctx
= NULL
;
2298 memset(manager
->fds
, 0, sizeof(manager
->fds
));
2299 ISC_LIST_INIT(manager
->socklist
);
2300 if (isc_mutex_init(&manager
->lock
) != ISC_R_SUCCESS
) {
2301 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2302 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2303 "isc_mutex_init() %s",
2304 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2305 ISC_MSG_FAILED
, "failed"));
2306 return (ISC_R_UNEXPECTED
);
2308 #ifdef ISC_PLATFORM_USETHREADS
2309 if (isc_condition_init(&manager
->shutdown_ok
) != ISC_R_SUCCESS
) {
2310 DESTROYLOCK(&manager
->lock
);
2311 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2312 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2313 "isc_condition_init() %s",
2314 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2315 ISC_MSG_FAILED
, "failed"));
2316 return (ISC_R_UNEXPECTED
);
2320 * Create the special fds that will be used to wake up the
2321 * select/poll loop when something internal needs to be done.
2323 if (pipe(manager
->pipe_fds
) != 0) {
2324 DESTROYLOCK(&manager
->lock
);
2325 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2326 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2327 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2329 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2330 ISC_MSG_FAILED
, "failed"),
2333 return (ISC_R_UNEXPECTED
);
2336 RUNTIME_CHECK(make_nonblock(manager
->pipe_fds
[0]) == ISC_R_SUCCESS
);
2338 RUNTIME_CHECK(make_nonblock(manager
->pipe_fds
[1]) == ISC_R_SUCCESS
);
2340 #else /* ISC_PLATFORM_USETHREADS */
2342 #endif /* ISC_PLATFORM_USETHREADS */
2345 * Set up initial state for the select loop
2347 FD_ZERO(&manager
->read_fds
);
2348 FD_ZERO(&manager
->write_fds
);
2349 #ifdef ISC_PLATFORM_USETHREADS
2350 FD_SET(manager
->pipe_fds
[0], &manager
->read_fds
);
2351 manager
->maxfd
= manager
->pipe_fds
[0];
2352 #else /* ISC_PLATFORM_USETHREADS */
2354 #endif /* ISC_PLATFORM_USETHREADS */
2355 memset(manager
->fdstate
, 0, sizeof(manager
->fdstate
));
2357 #ifdef ISC_PLATFORM_USETHREADS
2359 * Start up the select/poll thread.
2361 if (isc_thread_create(watcher
, manager
, &manager
->watcher
) !=
2363 close(manager
->pipe_fds
[0]);
2364 close(manager
->pipe_fds
[1]);
2365 DESTROYLOCK(&manager
->lock
);
2366 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2367 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2368 "isc_thread_create() %s",
2369 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2370 ISC_MSG_FAILED
, "failed"));
2371 return (ISC_R_UNEXPECTED
);
2373 #endif /* ISC_PLATFORM_USETHREADS */
2374 isc_mem_attach(mctx
, &manager
->mctx
);
2376 #ifndef ISC_PLATFORM_USETHREADS
2377 socketmgr
= manager
;
2378 #endif /* ISC_PLATFORM_USETHREADS */
2379 *managerp
= manager
;
2381 return (ISC_R_SUCCESS
);
2385 isc_socketmgr_destroy(isc_socketmgr_t
**managerp
) {
2386 isc_socketmgr_t
*manager
;
2391 * Destroy a socket manager.
2394 REQUIRE(managerp
!= NULL
);
2395 manager
= *managerp
;
2396 REQUIRE(VALID_MANAGER(manager
));
2398 #ifndef ISC_PLATFORM_USETHREADS
2399 if (manager
->refs
> 1) {
2404 #endif /* ISC_PLATFORM_USETHREADS */
2406 LOCK(&manager
->lock
);
2408 #ifdef ISC_PLATFORM_USETHREADS
2410 * Wait for all sockets to be destroyed.
2412 while (!ISC_LIST_EMPTY(manager
->socklist
)) {
2413 manager_log(manager
, CREATION
,
2414 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2415 ISC_MSG_SOCKETSREMAIN
,
2417 WAIT(&manager
->shutdown_ok
, &manager
->lock
);
2419 #else /* ISC_PLATFORM_USETHREADS */
2421 * Hope all sockets have been destroyed.
2423 if (!ISC_LIST_EMPTY(manager
->socklist
)) {
2424 manager_log(manager
, CREATION
,
2425 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_SOCKET
,
2426 ISC_MSG_SOCKETSREMAIN
,
2430 #endif /* ISC_PLATFORM_USETHREADS */
2432 UNLOCK(&manager
->lock
);
2435 * Here, poke our select/poll thread. Do this by closing the write
2436 * half of the pipe, which will send EOF to the read half.
2437 * This is currently a no-op in the non-threaded case.
2439 select_poke(manager
, 0, SELECT_POKE_SHUTDOWN
);
2441 #ifdef ISC_PLATFORM_USETHREADS
2443 * Wait for thread to exit.
2445 if (isc_thread_join(manager
->watcher
, NULL
) != ISC_R_SUCCESS
)
2446 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2447 "isc_thread_join() %s",
2448 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2449 ISC_MSG_FAILED
, "failed"));
2450 #endif /* ISC_PLATFORM_USETHREADS */
2455 #ifdef ISC_PLATFORM_USETHREADS
2456 close(manager
->pipe_fds
[0]);
2457 close(manager
->pipe_fds
[1]);
2458 (void)isc_condition_destroy(&manager
->shutdown_ok
);
2459 #endif /* ISC_PLATFORM_USETHREADS */
2461 for (i
= 0 ; i
< (int)FD_SETSIZE
; i
++)
2462 if (manager
->fdstate
[i
] == CLOSE_PENDING
)
2465 DESTROYLOCK(&manager
->lock
);
2467 mctx
= manager
->mctx
;
2468 isc_mem_put(mctx
, manager
, sizeof(*manager
));
2470 isc_mem_detach(&mctx
);
2476 socket_recv(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2480 isc_boolean_t have_lock
= ISC_FALSE
;
2481 isc_task_t
*ntask
= NULL
;
2482 isc_result_t result
= ISC_R_SUCCESS
;
2484 dev
->ev_sender
= task
;
2486 if (sock
->type
== isc_sockettype_udp
) {
2487 io_state
= doio_recv(sock
, dev
);
2490 have_lock
= ISC_TRUE
;
2492 if (ISC_LIST_EMPTY(sock
->recv_list
))
2493 io_state
= doio_recv(sock
, dev
);
2495 io_state
= DOIO_SOFT
;
2501 * We couldn't read all or part of the request right now, so
2504 * Attach to socket and to task
2506 isc_task_attach(task
, &ntask
);
2507 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2511 have_lock
= ISC_TRUE
;
2515 * Enqueue the request. If the socket was previously not being
2516 * watched, poke the watcher to start paying attention to it.
2518 if (ISC_LIST_EMPTY(sock
->recv_list
))
2519 select_poke(sock
->manager
, sock
->fd
, SELECT_POKE_READ
);
2520 ISC_LIST_ENQUEUE(sock
->recv_list
, dev
, ev_link
);
2522 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
2523 "socket_recv: event %p -> task %p",
2526 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2527 result
= ISC_R_INPROGRESS
;
2531 dev
->result
= ISC_R_EOF
;
2536 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) == 0)
2537 send_recvdone_event(sock
, &dev
);
2542 UNLOCK(&sock
->lock
);
2548 isc_socket_recvv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2549 unsigned int minimum
, isc_task_t
*task
,
2550 isc_taskaction_t action
, const void *arg
)
2552 isc_socketevent_t
*dev
;
2553 isc_socketmgr_t
*manager
;
2554 unsigned int iocount
;
2555 isc_buffer_t
*buffer
;
2557 REQUIRE(VALID_SOCKET(sock
));
2558 REQUIRE(buflist
!= NULL
);
2559 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2560 REQUIRE(task
!= NULL
);
2561 REQUIRE(action
!= NULL
);
2563 manager
= sock
->manager
;
2564 REQUIRE(VALID_MANAGER(manager
));
2566 iocount
= isc_bufferlist_availablecount(buflist
);
2567 REQUIRE(iocount
> 0);
2569 INSIST(sock
->bound
);
2571 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2573 return (ISC_R_NOMEMORY
);
2577 * UDP sockets are always partial read
2579 if (sock
->type
== isc_sockettype_udp
)
2583 dev
->minimum
= iocount
;
2585 dev
->minimum
= minimum
;
2589 * Move each buffer from the passed in list to our internal one.
2591 buffer
= ISC_LIST_HEAD(*buflist
);
2592 while (buffer
!= NULL
) {
2593 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2594 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2595 buffer
= ISC_LIST_HEAD(*buflist
);
2598 return (socket_recv(sock
, dev
, task
, 0));
2602 isc_socket_recv(isc_socket_t
*sock
, isc_region_t
*region
, unsigned int minimum
,
2603 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2605 isc_socketevent_t
*dev
;
2606 isc_socketmgr_t
*manager
;
2608 REQUIRE(VALID_SOCKET(sock
));
2609 REQUIRE(action
!= NULL
);
2611 manager
= sock
->manager
;
2612 REQUIRE(VALID_MANAGER(manager
));
2614 INSIST(sock
->bound
);
2616 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_RECVDONE
, action
, arg
);
2618 return (ISC_R_NOMEMORY
);
2620 return (isc_socket_recv2(sock
, region
, minimum
, task
, dev
, 0));
2624 isc_socket_recv2(isc_socket_t
*sock
, isc_region_t
*region
,
2625 unsigned int minimum
, isc_task_t
*task
,
2626 isc_socketevent_t
*event
, unsigned int flags
)
2628 event
->ev_sender
= sock
;
2629 event
->result
= ISC_R_UNEXPECTED
;
2630 ISC_LIST_INIT(event
->bufferlist
);
2631 event
->region
= *region
;
2634 event
->attributes
= 0;
2637 * UDP sockets are always partial read.
2639 if (sock
->type
== isc_sockettype_udp
)
2643 event
->minimum
= region
->length
;
2645 event
->minimum
= minimum
;
2648 return (socket_recv(sock
, event
, task
, flags
));
2652 socket_send(isc_socket_t
*sock
, isc_socketevent_t
*dev
, isc_task_t
*task
,
2653 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2657 isc_boolean_t have_lock
= ISC_FALSE
;
2658 isc_task_t
*ntask
= NULL
;
2659 isc_result_t result
= ISC_R_SUCCESS
;
2661 dev
->ev_sender
= task
;
2663 set_dev_address(address
, sock
, dev
);
2664 if (pktinfo
!= NULL
) {
2665 socket_log(sock
, NULL
, TRACE
, isc_msgcat
, ISC_MSGSET_SOCKET
,
2666 ISC_MSG_PKTINFOPROVIDED
,
2667 "pktinfo structure provided, ifindex %u (set to 0)",
2668 pktinfo
->ipi6_ifindex
);
2670 dev
->attributes
|= ISC_SOCKEVENTATTR_PKTINFO
;
2671 dev
->pktinfo
= *pktinfo
;
2673 * Set the pktinfo index to 0 here, to let the kernel decide
2674 * what interface it should send on.
2676 dev
->pktinfo
.ipi6_ifindex
= 0;
2679 if (sock
->type
== isc_sockettype_udp
)
2680 io_state
= doio_send(sock
, dev
);
2683 have_lock
= ISC_TRUE
;
2685 if (ISC_LIST_EMPTY(sock
->send_list
))
2686 io_state
= doio_send(sock
, dev
);
2688 io_state
= DOIO_SOFT
;
2694 * We couldn't send all or part of the request right now, so
2695 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2697 if ((flags
& ISC_SOCKFLAG_NORETRY
) == 0) {
2698 isc_task_attach(task
, &ntask
);
2699 dev
->attributes
|= ISC_SOCKEVENTATTR_ATTACHED
;
2703 have_lock
= ISC_TRUE
;
2707 * Enqueue the request. If the socket was previously
2708 * not being watched, poke the watcher to start
2709 * paying attention to it.
2711 if (ISC_LIST_EMPTY(sock
->send_list
))
2712 select_poke(sock
->manager
, sock
->fd
,
2714 ISC_LIST_ENQUEUE(sock
->send_list
, dev
, ev_link
);
2716 socket_log(sock
, NULL
, EVENT
, NULL
, 0, 0,
2717 "socket_send: event %p -> task %p",
2720 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) != 0)
2721 result
= ISC_R_INPROGRESS
;
2727 if ((flags
& ISC_SOCKFLAG_IMMEDIATE
) == 0)
2728 send_senddone_event(sock
, &dev
);
2733 UNLOCK(&sock
->lock
);
2739 isc_socket_send(isc_socket_t
*sock
, isc_region_t
*region
,
2740 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2743 * REQUIRE() checking is performed in isc_socket_sendto().
2745 return (isc_socket_sendto(sock
, region
, task
, action
, arg
, NULL
,
2750 isc_socket_sendto(isc_socket_t
*sock
, isc_region_t
*region
,
2751 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2752 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2754 isc_socketevent_t
*dev
;
2755 isc_socketmgr_t
*manager
;
2757 REQUIRE(VALID_SOCKET(sock
));
2758 REQUIRE(region
!= NULL
);
2759 REQUIRE(task
!= NULL
);
2760 REQUIRE(action
!= NULL
);
2762 manager
= sock
->manager
;
2763 REQUIRE(VALID_MANAGER(manager
));
2765 INSIST(sock
->bound
);
2767 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2769 return (ISC_R_NOMEMORY
);
2772 dev
->region
= *region
;
2774 return (socket_send(sock
, dev
, task
, address
, pktinfo
, 0));
2778 isc_socket_sendv(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2779 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2781 return (isc_socket_sendtov(sock
, buflist
, task
, action
, arg
, NULL
,
2786 isc_socket_sendtov(isc_socket_t
*sock
, isc_bufferlist_t
*buflist
,
2787 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
,
2788 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
)
2790 isc_socketevent_t
*dev
;
2791 isc_socketmgr_t
*manager
;
2792 unsigned int iocount
;
2793 isc_buffer_t
*buffer
;
2795 REQUIRE(VALID_SOCKET(sock
));
2796 REQUIRE(buflist
!= NULL
);
2797 REQUIRE(!ISC_LIST_EMPTY(*buflist
));
2798 REQUIRE(task
!= NULL
);
2799 REQUIRE(action
!= NULL
);
2801 manager
= sock
->manager
;
2802 REQUIRE(VALID_MANAGER(manager
));
2804 iocount
= isc_bufferlist_usedcount(buflist
);
2805 REQUIRE(iocount
> 0);
2807 dev
= allocate_socketevent(sock
, ISC_SOCKEVENT_SENDDONE
, action
, arg
);
2809 return (ISC_R_NOMEMORY
);
2813 * Move each buffer from the passed in list to our internal one.
2815 buffer
= ISC_LIST_HEAD(*buflist
);
2816 while (buffer
!= NULL
) {
2817 ISC_LIST_DEQUEUE(*buflist
, buffer
, link
);
2818 ISC_LIST_ENQUEUE(dev
->bufferlist
, buffer
, link
);
2819 buffer
= ISC_LIST_HEAD(*buflist
);
2822 return (socket_send(sock
, dev
, task
, address
, pktinfo
, 0));
2826 isc_socket_sendto2(isc_socket_t
*sock
, isc_region_t
*region
,
2828 isc_sockaddr_t
*address
, struct in6_pktinfo
*pktinfo
,
2829 isc_socketevent_t
*event
, unsigned int flags
)
2831 REQUIRE((flags
& ~(ISC_SOCKFLAG_IMMEDIATE
|ISC_SOCKFLAG_NORETRY
)) == 0);
2832 if ((flags
& ISC_SOCKFLAG_NORETRY
) != 0)
2833 REQUIRE(sock
->type
== isc_sockettype_udp
);
2834 event
->ev_sender
= sock
;
2835 event
->result
= ISC_R_UNEXPECTED
;
2836 ISC_LIST_INIT(event
->bufferlist
);
2837 event
->region
= *region
;
2840 event
->attributes
= 0;
2842 return (socket_send(sock
, event
, task
, address
, pktinfo
, flags
));
2846 isc_socket_bind(isc_socket_t
*sock
, isc_sockaddr_t
*sockaddr
) {
2847 char strbuf
[ISC_STRERRORSIZE
];
2852 INSIST(!sock
->bound
);
2854 if (sock
->pf
!= sockaddr
->type
.sa
.sa_family
) {
2855 UNLOCK(&sock
->lock
);
2856 return (ISC_R_FAMILYMISMATCH
);
2859 * Only set SO_REUSEADDR when we want a specific port.
2861 if (isc_sockaddr_getport(sockaddr
) != (in_port_t
)0 &&
2862 setsockopt(sock
->fd
, SOL_SOCKET
, SO_REUSEADDR
, (void *)&on
,
2864 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
2865 "setsockopt(%d) %s", sock
->fd
,
2866 isc_msgcat_get(isc_msgcat
, ISC_MSGSET_GENERAL
,
2867 ISC_MSG_FAILED
, "failed"));
2870 if (bind(sock
->fd
, &sockaddr
->type
.sa
, sockaddr
->length
) < 0) {
2871 UNLOCK(&sock
->lock
);
2874 return (ISC_R_NOPERM
);
2876 return (ISC_R_ADDRNOTAVAIL
);
2878 return (ISC_R_ADDRINUSE
);
2880 return (ISC_R_BOUND
);
2882 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2883 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "bind: %s",
2885 return (ISC_R_UNEXPECTED
);
2889 socket_log(sock
, sockaddr
, TRACE
,
2890 isc_msgcat
, ISC_MSGSET_SOCKET
, ISC_MSG_BOUND
, "bound");
2893 UNLOCK(&sock
->lock
);
2894 return (ISC_R_SUCCESS
);
2898 * Set up to listen on a given socket. We do this by creating an internal
2899 * event that will be dispatched when the socket has read activity. The
2900 * watcher will send the internal event to the task when there is a new
2903 * Unlike in read, we don't preallocate a done event here. Every time there
2904 * is a new connection we'll have to allocate a new one anyway, so we might
2905 * as well keep things simple rather than having to track them.
2908 isc_socket_listen(isc_socket_t
*sock
, unsigned int backlog
) {
2909 char strbuf
[ISC_STRERRORSIZE
];
2911 REQUIRE(VALID_SOCKET(sock
));
2915 REQUIRE(!sock
->listener
);
2916 REQUIRE(sock
->bound
);
2917 REQUIRE(sock
->type
== isc_sockettype_tcp
);
2920 backlog
= SOMAXCONN
;
2922 if (listen(sock
->fd
, (int)backlog
) < 0) {
2923 UNLOCK(&sock
->lock
);
2924 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
2926 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "listen: %s", strbuf
);
2928 return (ISC_R_UNEXPECTED
);
2933 UNLOCK(&sock
->lock
);
2934 return (ISC_R_SUCCESS
);
2938 * This should try to do agressive accept() XXXMLG
2941 isc_socket_accept(isc_socket_t
*sock
,
2942 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
2944 isc_socket_newconnev_t
*dev
;
2945 isc_socketmgr_t
*manager
;
2946 isc_task_t
*ntask
= NULL
;
2947 isc_socket_t
*nsock
;
2949 isc_boolean_t do_poke
= ISC_FALSE
;
2951 REQUIRE(VALID_SOCKET(sock
));
2952 manager
= sock
->manager
;
2953 REQUIRE(VALID_MANAGER(manager
));
2957 REQUIRE(sock
->listener
);
2960 * Sender field is overloaded here with the task we will be sending
2961 * this event to. Just before the actual event is delivered the
2962 * actual ev_sender will be touched up to be the socket.
2964 dev
= (isc_socket_newconnev_t
*)
2965 isc_event_allocate(manager
->mctx
, task
, ISC_SOCKEVENT_NEWCONN
,
2966 action
, arg
, sizeof(*dev
));
2968 UNLOCK(&sock
->lock
);
2969 return (ISC_R_NOMEMORY
);
2971 ISC_LINK_INIT(dev
, ev_link
);
2973 ret
= allocate_socket(manager
, sock
->type
, &nsock
);
2974 if (ret
!= ISC_R_SUCCESS
) {
2975 isc_event_free(ISC_EVENT_PTR(&dev
));
2976 UNLOCK(&sock
->lock
);
2981 * Attach to socket and to task.
2983 isc_task_attach(task
, &ntask
);
2984 nsock
->references
++;
2986 dev
->ev_sender
= ntask
;
2987 dev
->newsocket
= nsock
;
2990 * Poke watcher here. We still have the socket locked, so there
2991 * is no race condition. We will keep the lock for such a short
2992 * bit of time waking it up now or later won't matter all that much.
2994 if (ISC_LIST_EMPTY(sock
->accept_list
))
2997 ISC_LIST_ENQUEUE(sock
->accept_list
, dev
, ev_link
);
3000 select_poke(manager
, sock
->fd
, SELECT_POKE_ACCEPT
);
3002 UNLOCK(&sock
->lock
);
3003 return (ISC_R_SUCCESS
);
3007 isc_socket_connect(isc_socket_t
*sock
, isc_sockaddr_t
*addr
,
3008 isc_task_t
*task
, isc_taskaction_t action
, const void *arg
)
3010 isc_socket_connev_t
*dev
;
3011 isc_task_t
*ntask
= NULL
;
3012 isc_socketmgr_t
*manager
;
3014 char strbuf
[ISC_STRERRORSIZE
];
3016 REQUIRE(VALID_SOCKET(sock
));
3017 REQUIRE(addr
!= NULL
);
3018 REQUIRE(task
!= NULL
);
3019 REQUIRE(action
!= NULL
);
3021 manager
= sock
->manager
;
3022 REQUIRE(VALID_MANAGER(manager
));
3023 REQUIRE(addr
!= NULL
);
3025 if (isc_sockaddr_ismulticast(addr
))
3026 return (ISC_R_MULTICAST
);
3030 REQUIRE(!sock
->connecting
);
3032 dev
= (isc_socket_connev_t
*)isc_event_allocate(manager
->mctx
, sock
,
3033 ISC_SOCKEVENT_CONNECT
,
3037 UNLOCK(&sock
->lock
);
3038 return (ISC_R_NOMEMORY
);
3040 ISC_LINK_INIT(dev
, ev_link
);
3043 * Try to do the connect right away, as there can be only one
3044 * outstanding, and it might happen to complete.
3046 sock
->address
= *addr
;
3047 cc
= connect(sock
->fd
, &addr
->type
.sa
, addr
->length
);
3049 if (SOFT_ERROR(errno
) || errno
== EINPROGRESS
)
3053 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
3054 ERROR_MATCH(EACCES
, ISC_R_NOPERM
);
3055 ERROR_MATCH(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
3056 ERROR_MATCH(EAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
3057 ERROR_MATCH(ECONNREFUSED
, ISC_R_CONNREFUSED
);
3058 ERROR_MATCH(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
3060 ERROR_MATCH(EHOSTDOWN
, ISC_R_HOSTUNREACH
);
3062 ERROR_MATCH(ENETUNREACH
, ISC_R_NETUNREACH
);
3063 ERROR_MATCH(ENOBUFS
, ISC_R_NORESOURCES
);
3064 ERROR_MATCH(EPERM
, ISC_R_HOSTUNREACH
);
3065 ERROR_MATCH(EPIPE
, ISC_R_NOTCONNECTED
);
3069 sock
->connected
= 0;
3071 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
3072 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "%d/%s", errno
, strbuf
);
3074 UNLOCK(&sock
->lock
);
3075 isc_event_free(ISC_EVENT_PTR(&dev
));
3076 return (ISC_R_UNEXPECTED
);
3079 sock
->connected
= 0;
3080 isc_task_send(task
, ISC_EVENT_PTR(&dev
));
3082 UNLOCK(&sock
->lock
);
3083 return (ISC_R_SUCCESS
);
3087 * If connect completed, fire off the done event.
3090 sock
->connected
= 1;
3092 dev
->result
= ISC_R_SUCCESS
;
3093 isc_task_send(task
, ISC_EVENT_PTR(&dev
));
3095 UNLOCK(&sock
->lock
);
3096 return (ISC_R_SUCCESS
);
3104 isc_task_attach(task
, &ntask
);
3106 sock
->connecting
= 1;
3108 dev
->ev_sender
= ntask
;
3111 * Poke watcher here. We still have the socket locked, so there
3112 * is no race condition. We will keep the lock for such a short
3113 * bit of time waking it up now or later won't matter all that much.
3115 if (sock
->connect_ev
== NULL
)
3116 select_poke(manager
, sock
->fd
, SELECT_POKE_CONNECT
);
3118 sock
->connect_ev
= dev
;
3120 UNLOCK(&sock
->lock
);
3121 return (ISC_R_SUCCESS
);
3125 * Called when a socket with a pending connect() finishes.
3128 internal_connect(isc_task_t
*me
, isc_event_t
*ev
) {
3130 isc_socket_connev_t
*dev
;
3133 ISC_SOCKADDR_LEN_T optlen
;
3134 char strbuf
[ISC_STRERRORSIZE
];
3137 INSIST(ev
->ev_type
== ISC_SOCKEVENT_INTW
);
3139 sock
= ev
->ev_sender
;
3140 INSIST(VALID_SOCKET(sock
));
3145 * When the internal event was sent the reference count was bumped
3146 * to keep the socket around for us. Decrement the count here.
3148 INSIST(sock
->references
> 0);
3150 if (sock
->references
== 0) {
3151 UNLOCK(&sock
->lock
);
3157 * Has this event been canceled?
3159 dev
= sock
->connect_ev
;
3161 INSIST(!sock
->connecting
);
3162 UNLOCK(&sock
->lock
);
3166 INSIST(sock
->connecting
);
3167 sock
->connecting
= 0;
3170 * Get any possible error status here.
3172 optlen
= sizeof(cc
);
3173 if (getsockopt(sock
->fd
, SOL_SOCKET
, SO_ERROR
,
3174 (void *)&cc
, (void *)&optlen
) < 0)
3181 * If the error is EAGAIN, just re-select on this
3182 * fd and pretend nothing strange happened.
3184 if (SOFT_ERROR(errno
) || errno
== EINPROGRESS
) {
3185 sock
->connecting
= 1;
3186 select_poke(sock
->manager
, sock
->fd
,
3187 SELECT_POKE_CONNECT
);
3188 UNLOCK(&sock
->lock
);
3194 * Translate other errors into ISC_R_* flavors.
3197 #define ERROR_MATCH(a, b) case a: dev->result = b; break;
3198 ERROR_MATCH(EACCES
, ISC_R_NOPERM
);
3199 ERROR_MATCH(EADDRNOTAVAIL
, ISC_R_ADDRNOTAVAIL
);
3200 ERROR_MATCH(EAFNOSUPPORT
, ISC_R_ADDRNOTAVAIL
);
3201 ERROR_MATCH(ECONNREFUSED
, ISC_R_CONNREFUSED
);
3202 ERROR_MATCH(EHOSTUNREACH
, ISC_R_HOSTUNREACH
);
3204 ERROR_MATCH(EHOSTDOWN
, ISC_R_HOSTUNREACH
);
3206 ERROR_MATCH(ENETUNREACH
, ISC_R_NETUNREACH
);
3207 ERROR_MATCH(ENOBUFS
, ISC_R_NORESOURCES
);
3208 ERROR_MATCH(EPERM
, ISC_R_HOSTUNREACH
);
3209 ERROR_MATCH(EPIPE
, ISC_R_NOTCONNECTED
);
3210 ERROR_MATCH(ETIMEDOUT
, ISC_R_TIMEDOUT
);
3213 dev
->result
= ISC_R_UNEXPECTED
;
3214 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
3215 UNEXPECTED_ERROR(__FILE__
, __LINE__
,
3216 "internal_connect: connect() %s",
3220 dev
->result
= ISC_R_SUCCESS
;
3221 sock
->connected
= 1;
3225 sock
->connect_ev
= NULL
;
3227 UNLOCK(&sock
->lock
);
3229 task
= dev
->ev_sender
;
3230 dev
->ev_sender
= sock
;
3231 isc_task_sendanddetach(&task
, ISC_EVENT_PTR(&dev
));
3235 isc_socket_getpeername(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3238 REQUIRE(VALID_SOCKET(sock
));
3239 REQUIRE(addressp
!= NULL
);
3243 if (sock
->connected
) {
3244 *addressp
= sock
->address
;
3245 ret
= ISC_R_SUCCESS
;
3247 ret
= ISC_R_NOTCONNECTED
;
3250 UNLOCK(&sock
->lock
);
3256 isc_socket_getsockname(isc_socket_t
*sock
, isc_sockaddr_t
*addressp
) {
3257 ISC_SOCKADDR_LEN_T len
;
3259 char strbuf
[ISC_STRERRORSIZE
];
3261 REQUIRE(VALID_SOCKET(sock
));
3262 REQUIRE(addressp
!= NULL
);
3267 ret
= ISC_R_NOTBOUND
;
3271 ret
= ISC_R_SUCCESS
;
3273 len
= sizeof(addressp
->type
);
3274 if (getsockname(sock
->fd
, &addressp
->type
.sa
, (void *)&len
) < 0) {
3275 isc__strerror(errno
, strbuf
, sizeof(strbuf
));
3276 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "getsockname: %s",
3278 ret
= ISC_R_UNEXPECTED
;
3281 addressp
->length
= (unsigned int)len
;
3284 UNLOCK(&sock
->lock
);
3290 * Run through the list of events on this socket, and cancel the ones
3291 * queued for task "task" of type "how". "how" is a bitmask.
3294 isc_socket_cancel(isc_socket_t
*sock
, isc_task_t
*task
, unsigned int how
) {
3296 REQUIRE(VALID_SOCKET(sock
));
3299 * Quick exit if there is nothing to do. Don't even bother locking
3308 * All of these do the same thing, more or less.
3310 * o If the internal event is marked as "posted" try to
3311 * remove it from the task's queue. If this fails, mark it
3312 * as canceled instead, and let the task clean it up later.
3313 * o For each I/O request for that task of that type, post
3314 * its done event with status of "ISC_R_CANCELED".
3315 * o Reset any state needed.
3317 if (((how
& ISC_SOCKCANCEL_RECV
) == ISC_SOCKCANCEL_RECV
)
3318 && !ISC_LIST_EMPTY(sock
->recv_list
)) {
3319 isc_socketevent_t
*dev
;
3320 isc_socketevent_t
*next
;
3321 isc_task_t
*current_task
;
3323 dev
= ISC_LIST_HEAD(sock
->recv_list
);
3325 while (dev
!= NULL
) {
3326 current_task
= dev
->ev_sender
;
3327 next
= ISC_LIST_NEXT(dev
, ev_link
);
3329 if ((task
== NULL
) || (task
== current_task
)) {
3330 dev
->result
= ISC_R_CANCELED
;
3331 send_recvdone_event(sock
, &dev
);
3337 if (((how
& ISC_SOCKCANCEL_SEND
) == ISC_SOCKCANCEL_SEND
)
3338 && !ISC_LIST_EMPTY(sock
->send_list
)) {
3339 isc_socketevent_t
*dev
;
3340 isc_socketevent_t
*next
;
3341 isc_task_t
*current_task
;
3343 dev
= ISC_LIST_HEAD(sock
->send_list
);
3345 while (dev
!= NULL
) {
3346 current_task
= dev
->ev_sender
;
3347 next
= ISC_LIST_NEXT(dev
, ev_link
);
3349 if ((task
== NULL
) || (task
== current_task
)) {
3350 dev
->result
= ISC_R_CANCELED
;
3351 send_senddone_event(sock
, &dev
);
3357 if (((how
& ISC_SOCKCANCEL_ACCEPT
) == ISC_SOCKCANCEL_ACCEPT
)
3358 && !ISC_LIST_EMPTY(sock
->accept_list
)) {
3359 isc_socket_newconnev_t
*dev
;
3360 isc_socket_newconnev_t
*next
;
3361 isc_task_t
*current_task
;
3363 dev
= ISC_LIST_HEAD(sock
->accept_list
);
3364 while (dev
!= NULL
) {
3365 current_task
= dev
->ev_sender
;
3366 next
= ISC_LIST_NEXT(dev
, ev_link
);
3368 if ((task
== NULL
) || (task
== current_task
)) {
3370 ISC_LIST_UNLINK(sock
->accept_list
, dev
,
3373 dev
->newsocket
->references
--;
3374 free_socket(&dev
->newsocket
);
3376 dev
->result
= ISC_R_CANCELED
;
3377 dev
->ev_sender
= sock
;
3378 isc_task_sendanddetach(¤t_task
,
3379 ISC_EVENT_PTR(&dev
));
3387 * Connecting is not a list.
3389 if (((how
& ISC_SOCKCANCEL_CONNECT
) == ISC_SOCKCANCEL_CONNECT
)
3390 && sock
->connect_ev
!= NULL
) {
3391 isc_socket_connev_t
*dev
;
3392 isc_task_t
*current_task
;
3394 INSIST(sock
->connecting
);
3395 sock
->connecting
= 0;
3397 dev
= sock
->connect_ev
;
3398 current_task
= dev
->ev_sender
;
3400 if ((task
== NULL
) || (task
== current_task
)) {
3401 sock
->connect_ev
= NULL
;
3403 dev
->result
= ISC_R_CANCELED
;
3404 dev
->ev_sender
= sock
;
3405 isc_task_sendanddetach(¤t_task
,
3406 ISC_EVENT_PTR(&dev
));
3410 UNLOCK(&sock
->lock
);
3414 isc_socket_gettype(isc_socket_t
*sock
) {
3415 REQUIRE(VALID_SOCKET(sock
));
3417 return (sock
->type
);
3421 isc_socket_isbound(isc_socket_t
*sock
) {
3425 val
= ((sock
->bound
) ? ISC_TRUE
: ISC_FALSE
);
3426 UNLOCK(&sock
->lock
);
3431 #ifndef ISC_PLATFORM_USETHREADS
3433 isc__socketmgr_getfdsets(fd_set
*readset
, fd_set
*writeset
, int *maxfd
) {
3434 if (socketmgr
== NULL
)
3437 *readset
= socketmgr
->read_fds
;
3438 *writeset
= socketmgr
->write_fds
;
3439 *maxfd
= socketmgr
->maxfd
+ 1;
3444 isc__socketmgr_dispatch(fd_set
*readset
, fd_set
*writeset
, int maxfd
) {
3445 isc_socketmgr_t
*manager
= socketmgr
;
3447 if (manager
== NULL
)
3448 return (ISC_R_NOTFOUND
);
3450 process_fds(manager
, maxfd
, readset
, writeset
);
3451 return (ISC_R_SUCCESS
);
3453 #endif /* ISC_PLATFORM_USETHREADS */