Import bind-9.3.4
[dragonfly.git] / contrib / bind-9.3 / lib / isc / unix / socket.c
blobf95e3c8f75d4104a4e87bca38a472a1ab0249b4e
1 /*
2 * Copyright (C) 2004-2006 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: socket.c,v 1.207.2.19.2.26 2006/05/19 02:53:36 marka Exp $ */
20 #include <config.h>
22 #include <sys/param.h>
23 #include <sys/types.h>
24 #include <sys/socket.h>
25 #include <sys/time.h>
26 #include <sys/uio.h>
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <stddef.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
35 #include <isc/buffer.h>
36 #include <isc/bufferlist.h>
37 #include <isc/condition.h>
38 #include <isc/formatcheck.h>
39 #include <isc/list.h>
40 #include <isc/log.h>
41 #include <isc/mem.h>
42 #include <isc/msgs.h>
43 #include <isc/mutex.h>
44 #include <isc/net.h>
45 #include <isc/platform.h>
46 #include <isc/print.h>
47 #include <isc/region.h>
48 #include <isc/socket.h>
49 #include <isc/strerror.h>
50 #include <isc/task.h>
51 #include <isc/thread.h>
52 #include <isc/util.h>
54 #include "errno2result.h"
56 #ifndef ISC_PLATFORM_USETHREADS
57 #include "socket_p.h"
58 #endif /* ISC_PLATFORM_USETHREADS */
61 * Some systems define the socket length argument as an int, some as size_t,
62 * some as socklen_t. This is here so it can be easily changed if needed.
64 #ifndef ISC_SOCKADDR_LEN_T
65 #define ISC_SOCKADDR_LEN_T unsigned int
66 #endif
69 * Define what the possible "soft" errors can be. These are non-fatal returns
70 * of various network related functions, like recv() and so on.
72 * For some reason, BSDI (and perhaps others) will sometimes return <0
73 * from recv() but will have errno==0. This is broken, but we have to
74 * work around it here.
76 #define SOFT_ERROR(e) ((e) == EAGAIN || \
77 (e) == EWOULDBLOCK || \
78 (e) == EINTR || \
79 (e) == 0)
81 #define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
84 * DLVL(90) -- Function entry/exit and other tracing.
85 * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
86 * DLVL(60) -- Socket data send/receive
87 * DLVL(50) -- Event tracing, including receiving/sending completion events.
88 * DLVL(20) -- Socket creation/destruction.
90 #define TRACE_LEVEL 90
91 #define CORRECTNESS_LEVEL 70
92 #define IOEVENT_LEVEL 60
93 #define EVENT_LEVEL 50
94 #define CREATION_LEVEL 20
96 #define TRACE DLVL(TRACE_LEVEL)
97 #define CORRECTNESS DLVL(CORRECTNESS_LEVEL)
98 #define IOEVENT DLVL(IOEVENT_LEVEL)
99 #define EVENT DLVL(EVENT_LEVEL)
100 #define CREATION DLVL(CREATION_LEVEL)
102 typedef isc_event_t intev_t;
104 #define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o')
105 #define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC)
108 * IPv6 control information. If the socket is an IPv6 socket we want
109 * to collect the destination address and interface so the client can
110 * set them on outgoing packets.
112 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
113 #ifndef USE_CMSG
114 #define USE_CMSG 1
115 #endif
116 #endif
119 * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have
120 * a setsockopt() like interface to request timestamps, and if the OS
121 * doesn't do it for us, call gettimeofday() on every UDP receive?
123 #ifdef SO_TIMESTAMP
124 #ifndef USE_CMSG
125 #define USE_CMSG 1
126 #endif
127 #endif
130 * The number of times a send operation is repeated if the result is EINTR.
132 #define NRETRIES 10
134 struct isc_socket {
135 /* Not locked. */
136 unsigned int magic;
137 isc_socketmgr_t *manager;
138 isc_mutex_t lock;
139 isc_sockettype_t type;
141 /* Locked by socket lock. */
142 ISC_LINK(isc_socket_t) link;
143 unsigned int references;
144 int fd;
145 int pf;
147 ISC_LIST(isc_socketevent_t) send_list;
148 ISC_LIST(isc_socketevent_t) recv_list;
149 ISC_LIST(isc_socket_newconnev_t) accept_list;
150 isc_socket_connev_t *connect_ev;
153 * Internal events. Posted when a descriptor is readable or
154 * writable. These are statically allocated and never freed.
155 * They will be set to non-purgable before use.
157 intev_t readable_ev;
158 intev_t writable_ev;
160 isc_sockaddr_t address; /* remote address */
162 unsigned int pending_recv : 1,
163 pending_send : 1,
164 pending_accept : 1,
165 listener : 1, /* listener socket */
166 connected : 1,
167 connecting : 1, /* connect pending */
168 bound : 1; /* bound to local addr */
170 #ifdef ISC_NET_RECVOVERFLOW
171 unsigned char overflow; /* used for MSG_TRUNC fake */
172 #endif
174 char *recvcmsgbuf;
175 ISC_SOCKADDR_LEN_T recvcmsgbuflen;
176 char *sendcmsgbuf;
177 ISC_SOCKADDR_LEN_T sendcmsgbuflen;
180 #define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g')
181 #define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
183 struct isc_socketmgr {
184 /* Not locked. */
185 unsigned int magic;
186 isc_mem_t *mctx;
187 isc_mutex_t lock;
188 /* Locked by manager lock. */
189 ISC_LIST(isc_socket_t) socklist;
190 fd_set read_fds;
191 fd_set write_fds;
192 isc_socket_t *fds[FD_SETSIZE];
193 int fdstate[FD_SETSIZE];
194 int maxfd;
195 #ifdef ISC_PLATFORM_USETHREADS
196 isc_thread_t watcher;
197 isc_condition_t shutdown_ok;
198 int pipe_fds[2];
199 #else /* ISC_PLATFORM_USETHREADS */
200 unsigned int refs;
201 #endif /* ISC_PLATFORM_USETHREADS */
204 #ifndef ISC_PLATFORM_USETHREADS
205 static isc_socketmgr_t *socketmgr = NULL;
206 #endif /* ISC_PLATFORM_USETHREADS */
208 #define CLOSED 0 /* this one must be zero */
209 #define MANAGED 1
210 #define CLOSE_PENDING 2
213 * send() and recv() iovec counts
215 #define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
216 #ifdef ISC_NET_RECVOVERFLOW
217 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1)
218 #else
219 # define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
220 #endif
222 static void send_recvdone_event(isc_socket_t *, isc_socketevent_t **);
223 static void send_senddone_event(isc_socket_t *, isc_socketevent_t **);
224 static void free_socket(isc_socket_t **);
225 static isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t,
226 isc_socket_t **);
227 static void destroy(isc_socket_t **);
228 static void internal_accept(isc_task_t *, isc_event_t *);
229 static void internal_connect(isc_task_t *, isc_event_t *);
230 static void internal_recv(isc_task_t *, isc_event_t *);
231 static void internal_send(isc_task_t *, isc_event_t *);
232 static void process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *);
233 static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
234 struct msghdr *, struct iovec *, size_t *);
235 static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
236 struct msghdr *, struct iovec *, size_t *);
238 #define SELECT_POKE_SHUTDOWN (-1)
239 #define SELECT_POKE_NOTHING (-2)
240 #define SELECT_POKE_READ (-3)
241 #define SELECT_POKE_ACCEPT (-3) /* Same as _READ */
242 #define SELECT_POKE_WRITE (-4)
243 #define SELECT_POKE_CONNECT (-4) /* Same as _WRITE */
244 #define SELECT_POKE_CLOSE (-5)
246 #define SOCK_DEAD(s) ((s)->references == 0)
248 static void
249 manager_log(isc_socketmgr_t *sockmgr,
250 isc_logcategory_t *category, isc_logmodule_t *module, int level,
251 const char *fmt, ...) ISC_FORMAT_PRINTF(5, 6);
252 static void
253 manager_log(isc_socketmgr_t *sockmgr,
254 isc_logcategory_t *category, isc_logmodule_t *module, int level,
255 const char *fmt, ...)
257 char msgbuf[2048];
258 va_list ap;
260 if (! isc_log_wouldlog(isc_lctx, level))
261 return;
263 va_start(ap, fmt);
264 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
265 va_end(ap);
267 isc_log_write(isc_lctx, category, module, level,
268 "sockmgr %p: %s", sockmgr, msgbuf);
271 static void
272 socket_log(isc_socket_t *sock, isc_sockaddr_t *address,
273 isc_logcategory_t *category, isc_logmodule_t *module, int level,
274 isc_msgcat_t *msgcat, int msgset, int message,
275 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
276 static void
277 socket_log(isc_socket_t *sock, isc_sockaddr_t *address,
278 isc_logcategory_t *category, isc_logmodule_t *module, int level,
279 isc_msgcat_t *msgcat, int msgset, int message,
280 const char *fmt, ...)
282 char msgbuf[2048];
283 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
284 va_list ap;
286 if (! isc_log_wouldlog(isc_lctx, level))
287 return;
289 va_start(ap, fmt);
290 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
291 va_end(ap);
293 if (address == NULL) {
294 isc_log_iwrite(isc_lctx, category, module, level,
295 msgcat, msgset, message,
296 "socket %p: %s", sock, msgbuf);
297 } else {
298 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
299 isc_log_iwrite(isc_lctx, category, module, level,
300 msgcat, msgset, message,
301 "socket %p %s: %s", sock, peerbuf, msgbuf);
305 static void
306 wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) {
307 isc_socket_t *sock;
310 * This is a wakeup on a socket. If the socket is not in the
311 * process of being closed, start watching it for either reads
312 * or writes.
315 INSIST(fd >= 0 && fd < (int)FD_SETSIZE);
317 if (manager->fdstate[fd] == CLOSE_PENDING) {
318 manager->fdstate[fd] = CLOSED;
319 FD_CLR(fd, &manager->read_fds);
320 FD_CLR(fd, &manager->write_fds);
321 (void)close(fd);
322 return;
324 if (manager->fdstate[fd] != MANAGED)
325 return;
327 sock = manager->fds[fd];
330 * Set requested bit.
332 if (msg == SELECT_POKE_READ)
333 FD_SET(sock->fd, &manager->read_fds);
334 if (msg == SELECT_POKE_WRITE)
335 FD_SET(sock->fd, &manager->write_fds);
338 #ifdef ISC_PLATFORM_USETHREADS
340 * Poke the select loop when there is something for us to do.
341 * The write is required (by POSIX) to complete. That is, we
342 * will not get partial writes.
344 static void
345 select_poke(isc_socketmgr_t *mgr, int fd, int msg) {
346 int cc;
347 int buf[2];
348 char strbuf[ISC_STRERRORSIZE];
350 buf[0] = fd;
351 buf[1] = msg;
353 do {
354 cc = write(mgr->pipe_fds[1], buf, sizeof(buf));
355 #ifdef ENOSR
357 * Treat ENOSR as EAGAIN but loop slowly as it is
358 * unlikely to clear fast.
360 if (cc < 0 && errno == ENOSR) {
361 sleep(1);
362 errno = EAGAIN;
364 #endif
365 } while (cc < 0 && SOFT_ERROR(errno));
367 if (cc < 0) {
368 isc__strerror(errno, strbuf, sizeof(strbuf));
369 FATAL_ERROR(__FILE__, __LINE__,
370 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
371 ISC_MSG_WRITEFAILED,
372 "write() failed "
373 "during watcher poke: %s"),
374 strbuf);
377 INSIST(cc == sizeof(buf));
381 * Read a message on the internal fd.
383 static void
384 select_readmsg(isc_socketmgr_t *mgr, int *fd, int *msg) {
385 int buf[2];
386 int cc;
387 char strbuf[ISC_STRERRORSIZE];
389 cc = read(mgr->pipe_fds[0], buf, sizeof(buf));
390 if (cc < 0) {
391 *msg = SELECT_POKE_NOTHING;
392 *fd = -1; /* Silence compiler. */
393 if (SOFT_ERROR(errno))
394 return;
396 isc__strerror(errno, strbuf, sizeof(strbuf));
397 FATAL_ERROR(__FILE__, __LINE__,
398 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
399 ISC_MSG_READFAILED,
400 "read() failed "
401 "during watcher poke: %s"),
402 strbuf);
404 return;
406 INSIST(cc == sizeof(buf));
408 *fd = buf[0];
409 *msg = buf[1];
411 #else /* ISC_PLATFORM_USETHREADS */
413 * Update the state of the socketmgr when something changes.
415 static void
416 select_poke(isc_socketmgr_t *manager, int fd, int msg) {
417 if (msg == SELECT_POKE_SHUTDOWN)
418 return;
419 else if (fd >= 0)
420 wakeup_socket(manager, fd, msg);
421 return;
423 #endif /* ISC_PLATFORM_USETHREADS */
426 * Make a fd non-blocking.
428 static isc_result_t
429 make_nonblock(int fd) {
430 int ret;
431 int flags;
432 char strbuf[ISC_STRERRORSIZE];
433 #ifdef USE_FIONBIO_IOCTL
434 int on = 1;
436 ret = ioctl(fd, FIONBIO, (char *)&on);
437 #else
438 flags = fcntl(fd, F_GETFL, 0);
439 flags |= PORT_NONBLOCK;
440 ret = fcntl(fd, F_SETFL, flags);
441 #endif
443 if (ret == -1) {
444 isc__strerror(errno, strbuf, sizeof(strbuf));
445 UNEXPECTED_ERROR(__FILE__, __LINE__,
446 #ifdef USE_FIONBIO_IOCTL
447 "ioctl(%d, FIONBIO, &on): %s", fd,
448 #else
449 "fcntl(%d, F_SETFL, %d): %s", fd, flags,
450 #endif
451 strbuf);
453 return (ISC_R_UNEXPECTED);
456 return (ISC_R_SUCCESS);
459 #ifdef USE_CMSG
461 * Not all OSes support advanced CMSG macros: CMSG_LEN and CMSG_SPACE.
462 * In order to ensure as much portability as possible, we provide wrapper
463 * functions of these macros.
464 * Note that cmsg_space() could run slow on OSes that do not have
465 * CMSG_SPACE.
467 static inline ISC_SOCKADDR_LEN_T
468 cmsg_len(ISC_SOCKADDR_LEN_T len) {
469 #ifdef CMSG_LEN
470 return (CMSG_LEN(len));
471 #else
472 ISC_SOCKADDR_LEN_T hdrlen;
475 * Cast NULL so that any pointer arithmetic performed by CMSG_DATA
476 * is correct.
478 hdrlen = (ISC_SOCKADDR_LEN_T)CMSG_DATA(((struct cmsghdr *)NULL));
479 return (hdrlen + len);
480 #endif
483 static inline ISC_SOCKADDR_LEN_T
484 cmsg_space(ISC_SOCKADDR_LEN_T len) {
485 #ifdef CMSG_SPACE
486 return (CMSG_SPACE(len));
487 #else
488 struct msghdr msg;
489 struct cmsghdr *cmsgp;
491 * XXX: The buffer length is an ad-hoc value, but should be enough
492 * in a practical sense.
494 char dummybuf[sizeof(struct cmsghdr) + 1024];
496 memset(&msg, 0, sizeof(msg));
497 msg.msg_control = dummybuf;
498 msg.msg_controllen = sizeof(dummybuf);
500 cmsgp = (struct cmsghdr *)dummybuf;
501 cmsgp->cmsg_len = cmsg_len(len);
503 cmsgp = CMSG_NXTHDR(&msg, cmsgp);
504 if (cmsgp != NULL)
505 return ((char *)cmsgp - (char *)msg.msg_control);
506 else
507 return (0);
508 #endif
510 #endif /* USE_CMSG */
513 * Process control messages received on a socket.
515 static void
516 process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) {
517 #ifdef USE_CMSG
518 struct cmsghdr *cmsgp;
519 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
520 struct in6_pktinfo *pktinfop;
521 #endif
522 #ifdef SO_TIMESTAMP
523 struct timeval *timevalp;
524 #endif
525 #endif
528 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined.
529 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined.
530 * They are all here, outside of the CPP tests, because it is
531 * more consistent with the usual ISC coding style.
533 UNUSED(sock);
534 UNUSED(msg);
535 UNUSED(dev);
537 #ifdef ISC_NET_BSD44MSGHDR
539 #ifdef MSG_TRUNC
540 if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC)
541 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
542 #endif
544 #ifdef MSG_CTRUNC
545 if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
546 dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC;
547 #endif
549 #ifndef USE_CMSG
550 return;
551 #else
552 if (msg->msg_controllen == 0U || msg->msg_control == NULL)
553 return;
555 #ifdef SO_TIMESTAMP
556 timevalp = NULL;
557 #endif
558 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
559 pktinfop = NULL;
560 #endif
562 cmsgp = CMSG_FIRSTHDR(msg);
563 while (cmsgp != NULL) {
564 socket_log(sock, NULL, TRACE,
565 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PROCESSCMSG,
566 "processing cmsg %p", cmsgp);
568 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
569 if (cmsgp->cmsg_level == IPPROTO_IPV6
570 && cmsgp->cmsg_type == IPV6_PKTINFO) {
572 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
573 memcpy(&dev->pktinfo, pktinfop,
574 sizeof(struct in6_pktinfo));
575 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
576 socket_log(sock, NULL, TRACE,
577 isc_msgcat, ISC_MSGSET_SOCKET,
578 ISC_MSG_IFRECEIVED,
579 "interface received on ifindex %u",
580 dev->pktinfo.ipi6_ifindex);
581 if (IN6_IS_ADDR_MULTICAST(&pktinfop->ipi6_addr))
582 dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST;
583 goto next;
585 #endif
587 #ifdef SO_TIMESTAMP
588 if (cmsgp->cmsg_level == SOL_SOCKET
589 && cmsgp->cmsg_type == SCM_TIMESTAMP) {
590 timevalp = (struct timeval *)CMSG_DATA(cmsgp);
591 dev->timestamp.seconds = timevalp->tv_sec;
592 dev->timestamp.nanoseconds = timevalp->tv_usec * 1000;
593 dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP;
594 goto next;
596 #endif
598 next:
599 cmsgp = CMSG_NXTHDR(msg, cmsgp);
601 #endif /* USE_CMSG */
603 #endif /* ISC_NET_BSD44MSGHDR */
607 * Construct an iov array and attach it to the msghdr passed in. This is
608 * the SEND constructor, which will use the used region of the buffer
609 * (if using a buffer list) or will use the internal region (if a single
610 * buffer I/O is requested).
612 * Nothing can be NULL, and the done event must list at least one buffer
613 * on the buffer linked list for this function to be meaningful.
615 * If write_countp != NULL, *write_countp will hold the number of bytes
616 * this transaction can send.
618 static void
619 build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
620 struct msghdr *msg, struct iovec *iov, size_t *write_countp)
622 unsigned int iovcount;
623 isc_buffer_t *buffer;
624 isc_region_t used;
625 size_t write_count;
626 size_t skip_count;
628 memset(msg, 0, sizeof(*msg));
630 if (sock->type == isc_sockettype_udp) {
631 msg->msg_name = (void *)&dev->address.type.sa;
632 msg->msg_namelen = dev->address.length;
633 } else {
634 msg->msg_name = NULL;
635 msg->msg_namelen = 0;
638 buffer = ISC_LIST_HEAD(dev->bufferlist);
639 write_count = 0;
640 iovcount = 0;
643 * Single buffer I/O? Skip what we've done so far in this region.
645 if (buffer == NULL) {
646 write_count = dev->region.length - dev->n;
647 iov[0].iov_base = (void *)(dev->region.base + dev->n);
648 iov[0].iov_len = write_count;
649 iovcount = 1;
651 goto config;
655 * Multibuffer I/O.
656 * Skip the data in the buffer list that we have already written.
658 skip_count = dev->n;
659 while (buffer != NULL) {
660 REQUIRE(ISC_BUFFER_VALID(buffer));
661 if (skip_count < isc_buffer_usedlength(buffer))
662 break;
663 skip_count -= isc_buffer_usedlength(buffer);
664 buffer = ISC_LIST_NEXT(buffer, link);
667 while (buffer != NULL) {
668 INSIST(iovcount < MAXSCATTERGATHER_SEND);
670 isc_buffer_usedregion(buffer, &used);
672 if (used.length > 0) {
673 iov[iovcount].iov_base = (void *)(used.base
674 + skip_count);
675 iov[iovcount].iov_len = used.length - skip_count;
676 write_count += (used.length - skip_count);
677 skip_count = 0;
678 iovcount++;
680 buffer = ISC_LIST_NEXT(buffer, link);
683 INSIST(skip_count == 0U);
685 config:
686 msg->msg_iov = iov;
687 msg->msg_iovlen = iovcount;
689 #ifdef ISC_NET_BSD44MSGHDR
690 msg->msg_control = NULL;
691 msg->msg_controllen = 0;
692 msg->msg_flags = 0;
693 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
694 if ((sock->type == isc_sockettype_udp)
695 && ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) {
696 struct cmsghdr *cmsgp;
697 struct in6_pktinfo *pktinfop;
699 socket_log(sock, NULL, TRACE,
700 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_SENDTODATA,
701 "sendto pktinfo data, ifindex %u",
702 dev->pktinfo.ipi6_ifindex);
704 msg->msg_controllen = cmsg_space(sizeof(struct in6_pktinfo));
705 INSIST(msg->msg_controllen <= sock->sendcmsgbuflen);
706 msg->msg_control = (void *)sock->sendcmsgbuf;
708 cmsgp = (struct cmsghdr *)sock->sendcmsgbuf;
709 cmsgp->cmsg_level = IPPROTO_IPV6;
710 cmsgp->cmsg_type = IPV6_PKTINFO;
711 cmsgp->cmsg_len = cmsg_len(sizeof(struct in6_pktinfo));
712 pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
713 memcpy(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo));
715 #endif /* USE_CMSG && ISC_PLATFORM_HAVEIPV6 */
716 #else /* ISC_NET_BSD44MSGHDR */
717 msg->msg_accrights = NULL;
718 msg->msg_accrightslen = 0;
719 #endif /* ISC_NET_BSD44MSGHDR */
721 if (write_countp != NULL)
722 *write_countp = write_count;
726 * Construct an iov array and attach it to the msghdr passed in. This is
727 * the RECV constructor, which will use the avialable region of the buffer
728 * (if using a buffer list) or will use the internal region (if a single
729 * buffer I/O is requested).
731 * Nothing can be NULL, and the done event must list at least one buffer
732 * on the buffer linked list for this function to be meaningful.
734 * If read_countp != NULL, *read_countp will hold the number of bytes
735 * this transaction can receive.
737 static void
738 build_msghdr_recv(isc_socket_t *sock, isc_socketevent_t *dev,
739 struct msghdr *msg, struct iovec *iov, size_t *read_countp)
741 unsigned int iovcount;
742 isc_buffer_t *buffer;
743 isc_region_t available;
744 size_t read_count;
746 memset(msg, 0, sizeof(struct msghdr));
748 if (sock->type == isc_sockettype_udp) {
749 memset(&dev->address, 0, sizeof(dev->address));
750 #ifdef BROKEN_RECVMSG
751 if (sock->pf == AF_INET) {
752 msg->msg_name = (void *)&dev->address.type.sin;
753 msg->msg_namelen = sizeof(dev->address.type.sin6);
754 } else if (sock->pf == AF_INET6) {
755 msg->msg_name = (void *)&dev->address.type.sin6;
756 msg->msg_namelen = sizeof(dev->address.type.sin6);
757 #ifdef ISC_PLATFORM_HAVESYSUNH
758 } else if (sock->pf == AF_UNIX) {
759 msg->msg_name = (void *)&dev->address.type.sunix;
760 msg->msg_namelen = sizeof(dev->address.type.sunix);
761 #endif
762 } else {
763 msg->msg_name = (void *)&dev->address.type.sa;
764 msg->msg_namelen = sizeof(dev->address.type);
766 #else
767 msg->msg_name = (void *)&dev->address.type.sa;
768 msg->msg_namelen = sizeof(dev->address.type);
769 #endif
770 #ifdef ISC_NET_RECVOVERFLOW
771 /* If needed, steal one iovec for overflow detection. */
772 maxiov--;
773 #endif
774 } else { /* TCP */
775 msg->msg_name = NULL;
776 msg->msg_namelen = 0;
777 dev->address = sock->address;
780 buffer = ISC_LIST_HEAD(dev->bufferlist);
781 read_count = 0;
784 * Single buffer I/O? Skip what we've done so far in this region.
786 if (buffer == NULL) {
787 read_count = dev->region.length - dev->n;
788 iov[0].iov_base = (void *)(dev->region.base + dev->n);
789 iov[0].iov_len = read_count;
790 iovcount = 1;
792 goto config;
796 * Multibuffer I/O.
797 * Skip empty buffers.
799 while (buffer != NULL) {
800 REQUIRE(ISC_BUFFER_VALID(buffer));
801 if (isc_buffer_availablelength(buffer) != 0)
802 break;
803 buffer = ISC_LIST_NEXT(buffer, link);
806 iovcount = 0;
807 while (buffer != NULL) {
808 INSIST(iovcount < MAXSCATTERGATHER_RECV);
810 isc_buffer_availableregion(buffer, &available);
812 if (available.length > 0) {
813 iov[iovcount].iov_base = (void *)(available.base);
814 iov[iovcount].iov_len = available.length;
815 read_count += available.length;
816 iovcount++;
818 buffer = ISC_LIST_NEXT(buffer, link);
821 config:
824 * If needed, set up to receive that one extra byte. Note that
825 * we know there is at least one iov left, since we stole it
826 * at the top of this function.
828 #ifdef ISC_NET_RECVOVERFLOW
829 if (sock->type == isc_sockettype_udp) {
830 iov[iovcount].iov_base = (void *)(&sock->overflow);
831 iov[iovcount].iov_len = 1;
832 iovcount++;
834 #endif
836 msg->msg_iov = iov;
837 msg->msg_iovlen = iovcount;
839 #ifdef ISC_NET_BSD44MSGHDR
840 msg->msg_control = NULL;
841 msg->msg_controllen = 0;
842 msg->msg_flags = 0;
843 #if defined(USE_CMSG)
844 if (sock->type == isc_sockettype_udp) {
845 msg->msg_control = sock->recvcmsgbuf;
846 msg->msg_controllen = sock->recvcmsgbuflen;
848 #endif /* USE_CMSG */
849 #else /* ISC_NET_BSD44MSGHDR */
850 msg->msg_accrights = NULL;
851 msg->msg_accrightslen = 0;
852 #endif /* ISC_NET_BSD44MSGHDR */
854 if (read_countp != NULL)
855 *read_countp = read_count;
858 static void
859 set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
860 isc_socketevent_t *dev)
862 if (sock->type == isc_sockettype_udp) {
863 if (address != NULL)
864 dev->address = *address;
865 else
866 dev->address = sock->address;
867 } else if (sock->type == isc_sockettype_tcp) {
868 INSIST(address == NULL);
869 dev->address = sock->address;
873 static isc_socketevent_t *
874 allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
875 isc_taskaction_t action, const void *arg)
877 isc_socketevent_t *ev;
879 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
880 sock, eventtype,
881 action, arg,
882 sizeof(*ev));
884 if (ev == NULL)
885 return (NULL);
887 ev->result = ISC_R_UNEXPECTED;
888 ISC_LINK_INIT(ev, ev_link);
889 ISC_LIST_INIT(ev->bufferlist);
890 ev->region.base = NULL;
891 ev->n = 0;
892 ev->offset = 0;
893 ev->attributes = 0;
895 return (ev);
898 #if defined(ISC_SOCKET_DEBUG)
899 static void
900 dump_msg(struct msghdr *msg) {
901 unsigned int i;
903 printf("MSGHDR %p\n", msg);
904 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
905 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
906 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
907 printf("\t\t%d\tbase %p, len %d\n", i,
908 msg->msg_iov[i].iov_base,
909 msg->msg_iov[i].iov_len);
910 #ifdef ISC_NET_BSD44MSGHDR
911 printf("\tcontrol %p, controllen %d\n", msg->msg_control,
912 msg->msg_controllen);
913 #endif
915 #endif
917 #define DOIO_SUCCESS 0 /* i/o ok, event sent */
918 #define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */
919 #define DOIO_HARD 2 /* i/o error, event sent */
920 #define DOIO_EOF 3 /* EOF, no event sent */
922 static int
923 doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
924 int cc;
925 struct iovec iov[MAXSCATTERGATHER_RECV];
926 size_t read_count;
927 size_t actual_count;
928 struct msghdr msghdr;
929 isc_buffer_t *buffer;
930 int recv_errno;
931 char strbuf[ISC_STRERRORSIZE];
933 build_msghdr_recv(sock, dev, &msghdr, iov, &read_count);
935 #if defined(ISC_SOCKET_DEBUG)
936 dump_msg(&msghdr);
937 #endif
939 cc = recvmsg(sock->fd, &msghdr, 0);
940 recv_errno = errno;
942 #if defined(ISC_SOCKET_DEBUG)
943 dump_msg(&msghdr);
944 #endif
946 if (cc < 0) {
947 if (SOFT_ERROR(recv_errno))
948 return (DOIO_SOFT);
950 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
951 isc__strerror(recv_errno, strbuf, sizeof(strbuf));
952 socket_log(sock, NULL, IOEVENT,
953 isc_msgcat, ISC_MSGSET_SOCKET,
954 ISC_MSG_DOIORECV,
955 "doio_recv: recvmsg(%d) %d bytes, err %d/%s",
956 sock->fd, cc, recv_errno, strbuf);
959 #define SOFT_OR_HARD(_system, _isc) \
960 if (recv_errno == _system) { \
961 if (sock->connected) { \
962 dev->result = _isc; \
963 return (DOIO_HARD); \
965 return (DOIO_SOFT); \
967 #define ALWAYS_HARD(_system, _isc) \
968 if (recv_errno == _system) { \
969 dev->result = _isc; \
970 return (DOIO_HARD); \
973 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
974 SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH);
975 SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
976 SOFT_OR_HARD(EHOSTDOWN, ISC_R_HOSTDOWN);
977 /* HPUX 11.11 can return EADDRNOTAVAIL. */
978 SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
979 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
981 #undef SOFT_OR_HARD
982 #undef ALWAYS_HARD
984 dev->result = isc__errno2result(recv_errno);
985 return (DOIO_HARD);
989 * On TCP, zero length reads indicate EOF, while on
990 * UDP, zero length reads are perfectly valid, although
991 * strange.
993 if ((sock->type == isc_sockettype_tcp) && (cc == 0))
994 return (DOIO_EOF);
996 if (sock->type == isc_sockettype_udp) {
997 dev->address.length = msghdr.msg_namelen;
998 if (isc_sockaddr_getport(&dev->address) == 0) {
999 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1000 socket_log(sock, &dev->address, IOEVENT,
1001 isc_msgcat, ISC_MSGSET_SOCKET,
1002 ISC_MSG_ZEROPORT,
1003 "dropping source port zero packet");
1005 return (DOIO_SOFT);
1009 socket_log(sock, &dev->address, IOEVENT,
1010 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_PKTRECV,
1011 "packet received correctly");
1014 * Overflow bit detection. If we received MORE bytes than we should,
1015 * this indicates an overflow situation. Set the flag in the
1016 * dev entry and adjust how much we read by one.
1018 #ifdef ISC_NET_RECVOVERFLOW
1019 if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) {
1020 dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
1021 cc--;
1023 #endif
1026 * If there are control messages attached, run through them and pull
1027 * out the interesting bits.
1029 if (sock->type == isc_sockettype_udp)
1030 process_cmsg(sock, &msghdr, dev);
1033 * update the buffers (if any) and the i/o count
1035 dev->n += cc;
1036 actual_count = cc;
1037 buffer = ISC_LIST_HEAD(dev->bufferlist);
1038 while (buffer != NULL && actual_count > 0U) {
1039 REQUIRE(ISC_BUFFER_VALID(buffer));
1040 if (isc_buffer_availablelength(buffer) <= actual_count) {
1041 actual_count -= isc_buffer_availablelength(buffer);
1042 isc_buffer_add(buffer,
1043 isc_buffer_availablelength(buffer));
1044 } else {
1045 isc_buffer_add(buffer, actual_count);
1046 actual_count = 0;
1047 break;
1049 buffer = ISC_LIST_NEXT(buffer, link);
1050 if (buffer == NULL) {
1051 INSIST(actual_count == 0U);
1056 * If we read less than we expected, update counters,
1057 * and let the upper layer poke the descriptor.
1059 if (((size_t)cc != read_count) && (dev->n < dev->minimum))
1060 return (DOIO_SOFT);
1063 * Full reads are posted, or partials if partials are ok.
1065 dev->result = ISC_R_SUCCESS;
1066 return (DOIO_SUCCESS);
1070 * Returns:
1071 * DOIO_SUCCESS The operation succeeded. dev->result contains
1072 * ISC_R_SUCCESS.
1074 * DOIO_HARD A hard or unexpected I/O error was encountered.
1075 * dev->result contains the appropriate error.
1077 * DOIO_SOFT A soft I/O error was encountered. No senddone
1078 * event was sent. The operation should be retried.
1080 * No other return values are possible.
1082 static int
1083 doio_send(isc_socket_t *sock, isc_socketevent_t *dev) {
1084 int cc;
1085 struct iovec iov[MAXSCATTERGATHER_SEND];
1086 size_t write_count;
1087 struct msghdr msghdr;
1088 char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1089 int attempts = 0;
1090 int send_errno;
1091 char strbuf[ISC_STRERRORSIZE];
1093 build_msghdr_send(sock, dev, &msghdr, iov, &write_count);
1095 resend:
1096 cc = sendmsg(sock->fd, &msghdr, 0);
1097 send_errno = errno;
1100 * Check for error or block condition.
1102 if (cc < 0) {
1103 if (send_errno == EINTR && ++attempts < NRETRIES)
1104 goto resend;
1106 if (SOFT_ERROR(send_errno))
1107 return (DOIO_SOFT);
1109 #define SOFT_OR_HARD(_system, _isc) \
1110 if (send_errno == _system) { \
1111 if (sock->connected) { \
1112 dev->result = _isc; \
1113 return (DOIO_HARD); \
1115 return (DOIO_SOFT); \
1117 #define ALWAYS_HARD(_system, _isc) \
1118 if (send_errno == _system) { \
1119 dev->result = _isc; \
1120 return (DOIO_HARD); \
1123 SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
1124 ALWAYS_HARD(EACCES, ISC_R_NOPERM);
1125 ALWAYS_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
1126 ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
1127 ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
1128 #ifdef EHOSTDOWN
1129 ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH);
1130 #endif
1131 ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH);
1132 ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
1133 ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH);
1134 ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED);
1135 ALWAYS_HARD(ECONNRESET, ISC_R_CONNECTIONRESET);
1137 #undef SOFT_OR_HARD
1138 #undef ALWAYS_HARD
1141 * The other error types depend on whether or not the
1142 * socket is UDP or TCP. If it is UDP, some errors
1143 * that we expect to be fatal under TCP are merely
1144 * annoying, and are really soft errors.
1146 * However, these soft errors are still returned as
1147 * a status.
1149 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1150 isc__strerror(send_errno, strbuf, sizeof(strbuf));
1151 UNEXPECTED_ERROR(__FILE__, __LINE__, "internal_send: %s: %s",
1152 addrbuf, strbuf);
1153 dev->result = isc__errno2result(send_errno);
1154 return (DOIO_HARD);
1157 if (cc == 0)
1158 UNEXPECTED_ERROR(__FILE__, __LINE__,
1159 "internal_send: send() %s 0",
1160 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
1161 ISC_MSG_RETURNED, "returned"));
1164 * If we write less than we expected, update counters, poke.
1166 dev->n += cc;
1167 if ((size_t)cc != write_count)
1168 return (DOIO_SOFT);
1171 * Exactly what we wanted to write. We're done with this
1172 * entry. Post its completion event.
1174 dev->result = ISC_R_SUCCESS;
1175 return (DOIO_SUCCESS);
1179 * Kill.
1181 * Caller must ensure that the socket is not locked and no external
1182 * references exist.
1184 static void
1185 destroy(isc_socket_t **sockp) {
1186 isc_socket_t *sock = *sockp;
1187 isc_socketmgr_t *manager = sock->manager;
1189 socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1190 ISC_MSG_DESTROYING, "destroying");
1192 INSIST(ISC_LIST_EMPTY(sock->accept_list));
1193 INSIST(ISC_LIST_EMPTY(sock->recv_list));
1194 INSIST(ISC_LIST_EMPTY(sock->send_list));
1195 INSIST(sock->connect_ev == NULL);
1196 REQUIRE(sock->fd >= 0 && sock->fd < (int)FD_SETSIZE);
1198 LOCK(&manager->lock);
1201 * No one has this socket open, so the watcher doesn't have to be
1202 * poked, and the socket doesn't have to be locked.
1204 manager->fds[sock->fd] = NULL;
1205 manager->fdstate[sock->fd] = CLOSE_PENDING;
1206 select_poke(manager, sock->fd, SELECT_POKE_CLOSE);
1207 ISC_LIST_UNLINK(manager->socklist, sock, link);
1209 #ifdef ISC_PLATFORM_USETHREADS
1210 if (ISC_LIST_EMPTY(manager->socklist))
1211 SIGNAL(&manager->shutdown_ok);
1212 #endif /* ISC_PLATFORM_USETHREADS */
1215 * XXX should reset manager->maxfd here
1218 UNLOCK(&manager->lock);
1220 free_socket(sockp);
1223 static isc_result_t
1224 allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1225 isc_socket_t **socketp)
1227 isc_socket_t *sock;
1228 isc_result_t ret;
1229 ISC_SOCKADDR_LEN_T cmsgbuflen;
1231 sock = isc_mem_get(manager->mctx, sizeof(*sock));
1233 if (sock == NULL)
1234 return (ISC_R_NOMEMORY);
1236 ret = ISC_R_UNEXPECTED;
1238 sock->magic = 0;
1239 sock->references = 0;
1241 sock->manager = manager;
1242 sock->type = type;
1243 sock->fd = -1;
1245 ISC_LINK_INIT(sock, link);
1247 sock->recvcmsgbuf = NULL;
1248 sock->sendcmsgbuf = NULL;
1251 * set up cmsg buffers
1253 cmsgbuflen = 0;
1254 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
1255 cmsgbuflen = cmsg_space(sizeof(struct in6_pktinfo));
1256 #endif
1257 #if defined(USE_CMSG) && defined(SO_TIMESTAMP)
1258 cmsgbuflen += cmsg_space(sizeof(struct timeval));
1259 #endif
1260 sock->recvcmsgbuflen = cmsgbuflen;
1261 if (sock->recvcmsgbuflen != 0U) {
1262 sock->recvcmsgbuf = isc_mem_get(manager->mctx, cmsgbuflen);
1263 if (sock->recvcmsgbuf == NULL)
1264 goto error;
1267 cmsgbuflen = 0;
1268 #if defined(USE_CMSG) && defined(ISC_PLATFORM_HAVEIN6PKTINFO)
1269 cmsgbuflen = cmsg_space(sizeof(struct in6_pktinfo));
1270 #endif
1271 sock->sendcmsgbuflen = cmsgbuflen;
1272 if (sock->sendcmsgbuflen != 0U) {
1273 sock->sendcmsgbuf = isc_mem_get(manager->mctx, cmsgbuflen);
1274 if (sock->sendcmsgbuf == NULL)
1275 goto error;
1279 * set up list of readers and writers to be initially empty
1281 ISC_LIST_INIT(sock->recv_list);
1282 ISC_LIST_INIT(sock->send_list);
1283 ISC_LIST_INIT(sock->accept_list);
1284 sock->connect_ev = NULL;
1285 sock->pending_recv = 0;
1286 sock->pending_send = 0;
1287 sock->pending_accept = 0;
1288 sock->listener = 0;
1289 sock->connected = 0;
1290 sock->connecting = 0;
1291 sock->bound = 0;
1294 * initialize the lock
1296 if (isc_mutex_init(&sock->lock) != ISC_R_SUCCESS) {
1297 sock->magic = 0;
1298 UNEXPECTED_ERROR(__FILE__, __LINE__,
1299 "isc_mutex_init() %s",
1300 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
1301 ISC_MSG_FAILED, "failed"));
1302 ret = ISC_R_UNEXPECTED;
1303 goto error;
1307 * Initialize readable and writable events
1309 ISC_EVENT_INIT(&sock->readable_ev, sizeof(intev_t),
1310 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTR,
1311 NULL, sock, sock, NULL, NULL);
1312 ISC_EVENT_INIT(&sock->writable_ev, sizeof(intev_t),
1313 ISC_EVENTATTR_NOPURGE, NULL, ISC_SOCKEVENT_INTW,
1314 NULL, sock, sock, NULL, NULL);
1316 sock->magic = SOCKET_MAGIC;
1317 *socketp = sock;
1319 return (ISC_R_SUCCESS);
1321 error:
1322 if (sock->recvcmsgbuf != NULL)
1323 isc_mem_put(manager->mctx, sock->recvcmsgbuf,
1324 sock->recvcmsgbuflen);
1325 if (sock->sendcmsgbuf != NULL)
1326 isc_mem_put(manager->mctx, sock->sendcmsgbuf,
1327 sock->sendcmsgbuflen);
1328 isc_mem_put(manager->mctx, sock, sizeof(*sock));
1330 return (ret);
1334 * This event requires that the various lists be empty, that the reference
1335 * count be 1, and that the magic number is valid. The other socket bits,
1336 * like the lock, must be initialized as well. The fd associated must be
1337 * marked as closed, by setting it to -1 on close, or this routine will
1338 * also close the socket.
1340 static void
1341 free_socket(isc_socket_t **socketp) {
1342 isc_socket_t *sock = *socketp;
1344 INSIST(sock->references == 0);
1345 INSIST(VALID_SOCKET(sock));
1346 INSIST(!sock->connecting);
1347 INSIST(!sock->pending_recv);
1348 INSIST(!sock->pending_send);
1349 INSIST(!sock->pending_accept);
1350 INSIST(ISC_LIST_EMPTY(sock->recv_list));
1351 INSIST(ISC_LIST_EMPTY(sock->send_list));
1352 INSIST(ISC_LIST_EMPTY(sock->accept_list));
1353 INSIST(!ISC_LINK_LINKED(sock, link));
1355 if (sock->recvcmsgbuf != NULL)
1356 isc_mem_put(sock->manager->mctx, sock->recvcmsgbuf,
1357 sock->recvcmsgbuflen);
1358 if (sock->sendcmsgbuf != NULL)
1359 isc_mem_put(sock->manager->mctx, sock->sendcmsgbuf,
1360 sock->sendcmsgbuflen);
1362 sock->magic = 0;
1364 DESTROYLOCK(&sock->lock);
1366 isc_mem_put(sock->manager->mctx, sock, sizeof(*sock));
1368 *socketp = NULL;
1372 * Create a new 'type' socket managed by 'manager'. Events
1373 * will be posted to 'task' and when dispatched 'action' will be
1374 * called with 'arg' as the arg value. The new socket is returned
1375 * in 'socketp'.
1377 isc_result_t
1378 isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1379 isc_socket_t **socketp)
1381 isc_socket_t *sock = NULL;
1382 isc_result_t ret;
1383 #if defined(USE_CMSG) || defined(SO_BSDCOMPAT)
1384 int on = 1;
1385 #endif
1386 char strbuf[ISC_STRERRORSIZE];
1387 const char *err = "socket";
1389 REQUIRE(VALID_MANAGER(manager));
1390 REQUIRE(socketp != NULL && *socketp == NULL);
1392 ret = allocate_socket(manager, type, &sock);
1393 if (ret != ISC_R_SUCCESS)
1394 return (ret);
1396 sock->pf = pf;
1397 switch (type) {
1398 case isc_sockettype_udp:
1399 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1400 break;
1401 case isc_sockettype_tcp:
1402 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1403 break;
1406 #ifdef F_DUPFD
1408 * Leave a space for stdio to work in.
1410 if (sock->fd >= 0 && sock->fd < 20) {
1411 int new, tmp;
1412 new = fcntl(sock->fd, F_DUPFD, 20);
1413 tmp = errno;
1414 (void)close(sock->fd);
1415 errno = tmp;
1416 sock->fd = new;
1417 err = "isc_socket_create: fcntl";
1419 #endif
1421 if (sock->fd >= (int)FD_SETSIZE) {
1422 (void)close(sock->fd);
1423 isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1424 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
1425 isc_msgcat, ISC_MSGSET_SOCKET,
1426 ISC_MSG_TOOMANYFDS,
1427 "%s: too many open file descriptors", "socket");
1428 free_socket(&sock);
1429 return (ISC_R_NORESOURCES);
1432 if (sock->fd < 0) {
1433 free_socket(&sock);
1435 switch (errno) {
1436 case EMFILE:
1437 case ENFILE:
1438 case ENOBUFS:
1439 return (ISC_R_NORESOURCES);
1441 case EPROTONOSUPPORT:
1442 case EPFNOSUPPORT:
1443 case EAFNOSUPPORT:
1445 * Linux 2.2 (and maybe others) return EINVAL instead of
1446 * EAFNOSUPPORT.
1448 case EINVAL:
1449 return (ISC_R_FAMILYNOSUPPORT);
1451 default:
1452 isc__strerror(errno, strbuf, sizeof(strbuf));
1453 UNEXPECTED_ERROR(__FILE__, __LINE__,
1454 "%s() %s: %s", err,
1455 isc_msgcat_get(isc_msgcat,
1456 ISC_MSGSET_GENERAL,
1457 ISC_MSG_FAILED,
1458 "failed"),
1459 strbuf);
1460 return (ISC_R_UNEXPECTED);
1464 if (make_nonblock(sock->fd) != ISC_R_SUCCESS) {
1465 (void)close(sock->fd);
1466 free_socket(&sock);
1467 return (ISC_R_UNEXPECTED);
1470 #ifdef SO_BSDCOMPAT
1471 if (setsockopt(sock->fd, SOL_SOCKET, SO_BSDCOMPAT,
1472 (void *)&on, sizeof(on)) < 0) {
1473 isc__strerror(errno, strbuf, sizeof(strbuf));
1474 UNEXPECTED_ERROR(__FILE__, __LINE__,
1475 "setsockopt(%d, SO_BSDCOMPAT) %s: %s",
1476 sock->fd,
1477 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
1478 ISC_MSG_FAILED, "failed"),
1479 strbuf);
1480 /* Press on... */
1482 #endif
1484 #if defined(USE_CMSG)
1485 if (type == isc_sockettype_udp) {
1487 #if defined(SO_TIMESTAMP)
1488 if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP,
1489 (void *)&on, sizeof(on)) < 0
1490 && errno != ENOPROTOOPT) {
1491 isc__strerror(errno, strbuf, sizeof(strbuf));
1492 UNEXPECTED_ERROR(__FILE__, __LINE__,
1493 "setsockopt(%d, SO_TIMESTAMP) %s: %s",
1494 sock->fd,
1495 isc_msgcat_get(isc_msgcat,
1496 ISC_MSGSET_GENERAL,
1497 ISC_MSG_FAILED,
1498 "failed"),
1499 strbuf);
1500 /* Press on... */
1502 #endif /* SO_TIMESTAMP */
1504 #if defined(ISC_PLATFORM_HAVEIPV6)
1505 if (pf == AF_INET6 && sock->recvcmsgbuflen == 0U) {
1507 * Warn explicitly because this anomaly can be hidden
1508 * in usual operation (and unexpectedly appear later).
1510 UNEXPECTED_ERROR(__FILE__, __LINE__,
1511 "No buffer available to receive "
1512 "IPv6 destination");
1514 #ifdef ISC_PLATFORM_HAVEIN6PKTINFO
1515 #ifdef IPV6_RECVPKTINFO
1516 /* 2292bis */
1517 if ((pf == AF_INET6)
1518 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1519 (void *)&on, sizeof(on)) < 0)) {
1520 isc__strerror(errno, strbuf, sizeof(strbuf));
1521 UNEXPECTED_ERROR(__FILE__, __LINE__,
1522 "setsockopt(%d, IPV6_RECVPKTINFO) "
1523 "%s: %s", sock->fd,
1524 isc_msgcat_get(isc_msgcat,
1525 ISC_MSGSET_GENERAL,
1526 ISC_MSG_FAILED,
1527 "failed"),
1528 strbuf);
1530 #else
1531 /* 2292 */
1532 if ((pf == AF_INET6)
1533 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1534 (void *)&on, sizeof(on)) < 0)) {
1535 isc__strerror(errno, strbuf, sizeof(strbuf));
1536 UNEXPECTED_ERROR(__FILE__, __LINE__,
1537 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1538 sock->fd,
1539 isc_msgcat_get(isc_msgcat,
1540 ISC_MSGSET_GENERAL,
1541 ISC_MSG_FAILED,
1542 "failed"),
1543 strbuf);
1545 #endif /* IPV6_RECVPKTINFO */
1546 #endif /* ISC_PLATFORM_HAVEIN6PKTINFO */
1547 #ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/
1548 /* use minimum MTU */
1549 if (pf == AF_INET6) {
1550 (void)setsockopt(sock->fd, IPPROTO_IPV6,
1551 IPV6_USE_MIN_MTU,
1552 (void *)&on, sizeof(on));
1554 #endif
1555 #endif /* ISC_PLATFORM_HAVEIPV6 */
1558 #endif /* USE_CMSG */
1560 sock->references = 1;
1561 *socketp = sock;
1563 LOCK(&manager->lock);
1566 * Note we don't have to lock the socket like we normally would because
1567 * there are no external references to it yet.
1570 manager->fds[sock->fd] = sock;
1571 manager->fdstate[sock->fd] = MANAGED;
1572 ISC_LIST_APPEND(manager->socklist, sock, link);
1573 if (manager->maxfd < sock->fd)
1574 manager->maxfd = sock->fd;
1576 UNLOCK(&manager->lock);
1578 socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1579 ISC_MSG_CREATED, "created");
1581 return (ISC_R_SUCCESS);
1585 * Attach to a socket. Caller must explicitly detach when it is done.
1587 void
1588 isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1589 REQUIRE(VALID_SOCKET(sock));
1590 REQUIRE(socketp != NULL && *socketp == NULL);
1592 LOCK(&sock->lock);
1593 sock->references++;
1594 UNLOCK(&sock->lock);
1596 *socketp = sock;
1600 * Dereference a socket. If this is the last reference to it, clean things
1601 * up by destroying the socket.
1603 void
1604 isc_socket_detach(isc_socket_t **socketp) {
1605 isc_socket_t *sock;
1606 isc_boolean_t kill_socket = ISC_FALSE;
1608 REQUIRE(socketp != NULL);
1609 sock = *socketp;
1610 REQUIRE(VALID_SOCKET(sock));
1612 LOCK(&sock->lock);
1613 REQUIRE(sock->references > 0);
1614 sock->references--;
1615 if (sock->references == 0)
1616 kill_socket = ISC_TRUE;
1617 UNLOCK(&sock->lock);
1619 if (kill_socket)
1620 destroy(&sock);
1622 *socketp = NULL;
1626 * I/O is possible on a given socket. Schedule an event to this task that
1627 * will call an internal function to do the I/O. This will charge the
1628 * task with the I/O operation and let our select loop handler get back
1629 * to doing something real as fast as possible.
1631 * The socket and manager must be locked before calling this function.
1633 static void
1634 dispatch_recv(isc_socket_t *sock) {
1635 intev_t *iev;
1636 isc_socketevent_t *ev;
1638 INSIST(!sock->pending_recv);
1640 ev = ISC_LIST_HEAD(sock->recv_list);
1641 if (ev == NULL)
1642 return;
1644 sock->pending_recv = 1;
1645 iev = &sock->readable_ev;
1647 socket_log(sock, NULL, EVENT, NULL, 0, 0,
1648 "dispatch_recv: event %p -> task %p", ev, ev->ev_sender);
1650 sock->references++;
1651 iev->ev_sender = sock;
1652 iev->ev_action = internal_recv;
1653 iev->ev_arg = sock;
1655 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1658 static void
1659 dispatch_send(isc_socket_t *sock) {
1660 intev_t *iev;
1661 isc_socketevent_t *ev;
1663 INSIST(!sock->pending_send);
1665 ev = ISC_LIST_HEAD(sock->send_list);
1666 if (ev == NULL)
1667 return;
1669 sock->pending_send = 1;
1670 iev = &sock->writable_ev;
1672 socket_log(sock, NULL, EVENT, NULL, 0, 0,
1673 "dispatch_send: event %p -> task %p", ev, ev->ev_sender);
1675 sock->references++;
1676 iev->ev_sender = sock;
1677 iev->ev_action = internal_send;
1678 iev->ev_arg = sock;
1680 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1684 * Dispatch an internal accept event.
1686 static void
1687 dispatch_accept(isc_socket_t *sock) {
1688 intev_t *iev;
1689 isc_socket_newconnev_t *ev;
1691 INSIST(!sock->pending_accept);
1694 * Are there any done events left, or were they all canceled
1695 * before the manager got the socket lock?
1697 ev = ISC_LIST_HEAD(sock->accept_list);
1698 if (ev == NULL)
1699 return;
1701 sock->pending_accept = 1;
1702 iev = &sock->readable_ev;
1704 sock->references++; /* keep socket around for this internal event */
1705 iev->ev_sender = sock;
1706 iev->ev_action = internal_accept;
1707 iev->ev_arg = sock;
1709 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1712 static void
1713 dispatch_connect(isc_socket_t *sock) {
1714 intev_t *iev;
1715 isc_socket_connev_t *ev;
1717 iev = &sock->writable_ev;
1719 ev = sock->connect_ev;
1720 INSIST(ev != NULL); /* XXX */
1722 INSIST(sock->connecting);
1724 sock->references++; /* keep socket around for this internal event */
1725 iev->ev_sender = sock;
1726 iev->ev_action = internal_connect;
1727 iev->ev_arg = sock;
1729 isc_task_send(ev->ev_sender, (isc_event_t **)&iev);
1733 * Dequeue an item off the given socket's read queue, set the result code
1734 * in the done event to the one provided, and send it to the task it was
1735 * destined for.
1737 * If the event to be sent is on a list, remove it before sending. If
1738 * asked to, send and detach from the socket as well.
1740 * Caller must have the socket locked if the event is attached to the socket.
1742 static void
1743 send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1744 isc_task_t *task;
1746 task = (*dev)->ev_sender;
1748 (*dev)->ev_sender = sock;
1750 if (ISC_LINK_LINKED(*dev, ev_link))
1751 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1753 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1754 == ISC_SOCKEVENTATTR_ATTACHED)
1755 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1756 else
1757 isc_task_send(task, (isc_event_t **)dev);
1761 * See comments for send_recvdone_event() above.
1763 * Caller must have the socket locked if the event is attached to the socket.
1765 static void
1766 send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1767 isc_task_t *task;
1769 INSIST(dev != NULL && *dev != NULL);
1771 task = (*dev)->ev_sender;
1772 (*dev)->ev_sender = sock;
1774 if (ISC_LINK_LINKED(*dev, ev_link))
1775 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1777 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1778 == ISC_SOCKEVENTATTR_ATTACHED)
1779 isc_task_sendanddetach(&task, (isc_event_t **)dev);
1780 else
1781 isc_task_send(task, (isc_event_t **)dev);
1785 * Call accept() on a socket, to get the new file descriptor. The listen
1786 * socket is used as a prototype to create a new isc_socket_t. The new
1787 * socket has one outstanding reference. The task receiving the event
1788 * will be detached from just after the event is delivered.
1790 * On entry to this function, the event delivered is the internal
1791 * readable event, and the first item on the accept_list should be
1792 * the done event we want to send. If the list is empty, this is a no-op,
1793 * so just unlock and return.
1795 static void
1796 internal_accept(isc_task_t *me, isc_event_t *ev) {
1797 isc_socket_t *sock;
1798 isc_socketmgr_t *manager;
1799 isc_socket_newconnev_t *dev;
1800 isc_task_t *task;
1801 ISC_SOCKADDR_LEN_T addrlen;
1802 int fd;
1803 isc_result_t result = ISC_R_SUCCESS;
1804 char strbuf[ISC_STRERRORSIZE];
1805 const char *err = "accept";
1807 UNUSED(me);
1809 sock = ev->ev_sender;
1810 INSIST(VALID_SOCKET(sock));
1812 LOCK(&sock->lock);
1813 socket_log(sock, NULL, TRACE,
1814 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1815 "internal_accept called, locked socket");
1817 manager = sock->manager;
1818 INSIST(VALID_MANAGER(manager));
1820 INSIST(sock->listener);
1821 INSIST(sock->pending_accept == 1);
1822 sock->pending_accept = 0;
1824 INSIST(sock->references > 0);
1825 sock->references--; /* the internal event is done with this socket */
1826 if (sock->references == 0) {
1827 UNLOCK(&sock->lock);
1828 destroy(&sock);
1829 return;
1833 * Get the first item off the accept list.
1834 * If it is empty, unlock the socket and return.
1836 dev = ISC_LIST_HEAD(sock->accept_list);
1837 if (dev == NULL) {
1838 UNLOCK(&sock->lock);
1839 return;
1843 * Try to accept the new connection. If the accept fails with
1844 * EAGAIN or EINTR, simply poke the watcher to watch this socket
1845 * again. Also ignore ECONNRESET, which has been reported to
1846 * be spuriously returned on Linux 2.2.19 although it is not
1847 * a documented error for accept(). ECONNABORTED has been
1848 * reported for Solaris 8. The rest are thrown in not because
1849 * we have seen them but because they are ignored by other
1850 * deamons such as BIND 8 and Apache.
1853 addrlen = sizeof(dev->newsocket->address.type);
1854 memset(&dev->newsocket->address.type.sa, 0, addrlen);
1855 fd = accept(sock->fd, &dev->newsocket->address.type.sa,
1856 (void *)&addrlen);
1858 #ifdef F_DUPFD
1860 * Leave a space for stdio to work in.
1862 if (fd >= 0 && fd < 20) {
1863 int new, tmp;
1864 new = fcntl(fd, F_DUPFD, 20);
1865 tmp = errno;
1866 (void)close(fd);
1867 errno = tmp;
1868 fd = new;
1869 err = "fcntl";
1871 #endif
1873 if (fd < 0) {
1874 if (SOFT_ERROR(errno))
1875 goto soft_error;
1876 switch (errno) {
1877 case ENOBUFS:
1878 case ENFILE:
1879 case ENOMEM:
1880 case ECONNRESET:
1881 case ECONNABORTED:
1882 case EHOSTUNREACH:
1883 case EHOSTDOWN:
1884 case ENETUNREACH:
1885 case ENETDOWN:
1886 case ECONNREFUSED:
1887 #ifdef EPROTO
1888 case EPROTO:
1889 #endif
1890 #ifdef ENONET
1891 case ENONET:
1892 #endif
1893 goto soft_error;
1894 default:
1895 break;
1897 isc__strerror(errno, strbuf, sizeof(strbuf));
1898 UNEXPECTED_ERROR(__FILE__, __LINE__,
1899 "internal_accept: %s() %s: %s", err,
1900 isc_msgcat_get(isc_msgcat,
1901 ISC_MSGSET_GENERAL,
1902 ISC_MSG_FAILED,
1903 "failed"),
1904 strbuf);
1905 fd = -1;
1906 result = ISC_R_UNEXPECTED;
1907 } else {
1908 if (addrlen == 0U) {
1909 UNEXPECTED_ERROR(__FILE__, __LINE__,
1910 "internal_accept(): "
1911 "accept() failed to return "
1912 "remote address");
1914 (void)close(fd);
1915 goto soft_error;
1916 } else if (dev->newsocket->address.type.sa.sa_family !=
1917 sock->pf)
1919 UNEXPECTED_ERROR(__FILE__, __LINE__,
1920 "internal_accept(): "
1921 "accept() returned peer address "
1922 "family %u (expected %u)",
1923 dev->newsocket->address.
1924 type.sa.sa_family,
1925 sock->pf);
1926 (void)close(fd);
1927 goto soft_error;
1928 } else if (fd >= (int)FD_SETSIZE) {
1929 isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL,
1930 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
1931 isc_msgcat, ISC_MSGSET_SOCKET,
1932 ISC_MSG_TOOMANYFDS,
1933 "%s: too many open file descriptors",
1934 "accept");
1935 (void)close(fd);
1936 goto soft_error;
1940 if (fd != -1) {
1941 dev->newsocket->address.length = addrlen;
1942 dev->newsocket->pf = sock->pf;
1946 * Pull off the done event.
1948 ISC_LIST_UNLINK(sock->accept_list, dev, ev_link);
1951 * Poke watcher if there are more pending accepts.
1953 if (!ISC_LIST_EMPTY(sock->accept_list))
1954 select_poke(sock->manager, sock->fd, SELECT_POKE_ACCEPT);
1956 UNLOCK(&sock->lock);
1958 if (fd != -1 && (make_nonblock(fd) != ISC_R_SUCCESS)) {
1959 (void)close(fd);
1960 fd = -1;
1961 result = ISC_R_UNEXPECTED;
1965 * -1 means the new socket didn't happen.
1967 if (fd != -1) {
1968 LOCK(&manager->lock);
1969 ISC_LIST_APPEND(manager->socklist, dev->newsocket, link);
1971 dev->newsocket->fd = fd;
1972 dev->newsocket->bound = 1;
1973 dev->newsocket->connected = 1;
1976 * Save away the remote address
1978 dev->address = dev->newsocket->address;
1980 manager->fds[fd] = dev->newsocket;
1981 manager->fdstate[fd] = MANAGED;
1982 if (manager->maxfd < fd)
1983 manager->maxfd = fd;
1985 socket_log(sock, &dev->newsocket->address, CREATION,
1986 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
1987 "accepted connection, new socket %p",
1988 dev->newsocket);
1990 UNLOCK(&manager->lock);
1991 } else {
1992 dev->newsocket->references--;
1993 free_socket(&dev->newsocket);
1997 * Fill in the done event details and send it off.
1999 dev->result = result;
2000 task = dev->ev_sender;
2001 dev->ev_sender = sock;
2003 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev));
2004 return;
2006 soft_error:
2007 select_poke(sock->manager, sock->fd, SELECT_POKE_ACCEPT);
2008 UNLOCK(&sock->lock);
2009 return;
2012 static void
2013 internal_recv(isc_task_t *me, isc_event_t *ev) {
2014 isc_socketevent_t *dev;
2015 isc_socket_t *sock;
2017 INSIST(ev->ev_type == ISC_SOCKEVENT_INTR);
2019 sock = ev->ev_sender;
2020 INSIST(VALID_SOCKET(sock));
2022 LOCK(&sock->lock);
2023 socket_log(sock, NULL, IOEVENT,
2024 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2025 "internal_recv: task %p got event %p", me, ev);
2027 INSIST(sock->pending_recv == 1);
2028 sock->pending_recv = 0;
2030 INSIST(sock->references > 0);
2031 sock->references--; /* the internal event is done with this socket */
2032 if (sock->references == 0) {
2033 UNLOCK(&sock->lock);
2034 destroy(&sock);
2035 return;
2039 * Try to do as much I/O as possible on this socket. There are no
2040 * limits here, currently.
2042 dev = ISC_LIST_HEAD(sock->recv_list);
2043 while (dev != NULL) {
2044 switch (doio_recv(sock, dev)) {
2045 case DOIO_SOFT:
2046 goto poke;
2048 case DOIO_EOF:
2050 * read of 0 means the remote end was closed.
2051 * Run through the event queue and dispatch all
2052 * the events with an EOF result code.
2054 do {
2055 dev->result = ISC_R_EOF;
2056 send_recvdone_event(sock, &dev);
2057 dev = ISC_LIST_HEAD(sock->recv_list);
2058 } while (dev != NULL);
2059 goto poke;
2061 case DOIO_SUCCESS:
2062 case DOIO_HARD:
2063 send_recvdone_event(sock, &dev);
2064 break;
2067 dev = ISC_LIST_HEAD(sock->recv_list);
2070 poke:
2071 if (!ISC_LIST_EMPTY(sock->recv_list))
2072 select_poke(sock->manager, sock->fd, SELECT_POKE_READ);
2074 UNLOCK(&sock->lock);
2077 static void
2078 internal_send(isc_task_t *me, isc_event_t *ev) {
2079 isc_socketevent_t *dev;
2080 isc_socket_t *sock;
2082 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW);
2085 * Find out what socket this is and lock it.
2087 sock = (isc_socket_t *)ev->ev_sender;
2088 INSIST(VALID_SOCKET(sock));
2090 LOCK(&sock->lock);
2091 socket_log(sock, NULL, IOEVENT,
2092 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2093 "internal_send: task %p got event %p", me, ev);
2095 INSIST(sock->pending_send == 1);
2096 sock->pending_send = 0;
2098 INSIST(sock->references > 0);
2099 sock->references--; /* the internal event is done with this socket */
2100 if (sock->references == 0) {
2101 UNLOCK(&sock->lock);
2102 destroy(&sock);
2103 return;
2107 * Try to do as much I/O as possible on this socket. There are no
2108 * limits here, currently.
2110 dev = ISC_LIST_HEAD(sock->send_list);
2111 while (dev != NULL) {
2112 switch (doio_send(sock, dev)) {
2113 case DOIO_SOFT:
2114 goto poke;
2116 case DOIO_HARD:
2117 case DOIO_SUCCESS:
2118 send_senddone_event(sock, &dev);
2119 break;
2122 dev = ISC_LIST_HEAD(sock->send_list);
2125 poke:
2126 if (!ISC_LIST_EMPTY(sock->send_list))
2127 select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE);
2129 UNLOCK(&sock->lock);
2132 static void
2133 process_fds(isc_socketmgr_t *manager, int maxfd,
2134 fd_set *readfds, fd_set *writefds)
2136 int i;
2137 isc_socket_t *sock;
2138 isc_boolean_t unlock_sock;
2140 REQUIRE(maxfd <= (int)FD_SETSIZE);
2143 * Process read/writes on other fds here. Avoid locking
2144 * and unlocking twice if both reads and writes are possible.
2146 for (i = 0; i < maxfd; i++) {
2147 #ifdef ISC_PLATFORM_USETHREADS
2148 if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1])
2149 continue;
2150 #endif /* ISC_PLATFORM_USETHREADS */
2152 if (manager->fdstate[i] == CLOSE_PENDING) {
2153 manager->fdstate[i] = CLOSED;
2154 FD_CLR(i, &manager->read_fds);
2155 FD_CLR(i, &manager->write_fds);
2157 (void)close(i);
2159 continue;
2162 sock = manager->fds[i];
2163 unlock_sock = ISC_FALSE;
2164 if (FD_ISSET(i, readfds)) {
2165 if (sock == NULL) {
2166 FD_CLR(i, &manager->read_fds);
2167 goto check_write;
2169 unlock_sock = ISC_TRUE;
2170 LOCK(&sock->lock);
2171 if (!SOCK_DEAD(sock)) {
2172 if (sock->listener)
2173 dispatch_accept(sock);
2174 else
2175 dispatch_recv(sock);
2177 FD_CLR(i, &manager->read_fds);
2179 check_write:
2180 if (FD_ISSET(i, writefds)) {
2181 if (sock == NULL) {
2182 FD_CLR(i, &manager->write_fds);
2183 continue;
2185 if (!unlock_sock) {
2186 unlock_sock = ISC_TRUE;
2187 LOCK(&sock->lock);
2189 if (!SOCK_DEAD(sock)) {
2190 if (sock->connecting)
2191 dispatch_connect(sock);
2192 else
2193 dispatch_send(sock);
2195 FD_CLR(i, &manager->write_fds);
2197 if (unlock_sock)
2198 UNLOCK(&sock->lock);
2202 #ifdef ISC_PLATFORM_USETHREADS
2204 * This is the thread that will loop forever, always in a select or poll
2205 * call.
2207 * When select returns something to do, track down what thread gets to do
2208 * this I/O and post the event to it.
2210 static isc_threadresult_t
2211 watcher(void *uap) {
2212 isc_socketmgr_t *manager = uap;
2213 isc_boolean_t done;
2214 int ctlfd;
2215 int cc;
2216 fd_set readfds;
2217 fd_set writefds;
2218 int msg, fd;
2219 int maxfd;
2220 char strbuf[ISC_STRERRORSIZE];
2223 * Get the control fd here. This will never change.
2225 LOCK(&manager->lock);
2226 ctlfd = manager->pipe_fds[0];
2228 done = ISC_FALSE;
2229 while (!done) {
2230 do {
2231 readfds = manager->read_fds;
2232 writefds = manager->write_fds;
2233 maxfd = manager->maxfd + 1;
2235 UNLOCK(&manager->lock);
2237 cc = select(maxfd, &readfds, &writefds, NULL, NULL);
2238 if (cc < 0) {
2239 if (!SOFT_ERROR(errno)) {
2240 isc__strerror(errno, strbuf,
2241 sizeof(strbuf));
2242 FATAL_ERROR(__FILE__, __LINE__,
2243 "select() %s: %s",
2244 isc_msgcat_get(isc_msgcat,
2245 ISC_MSGSET_GENERAL,
2246 ISC_MSG_FAILED,
2247 "failed"),
2248 strbuf);
2252 LOCK(&manager->lock);
2253 } while (cc < 0);
2257 * Process reads on internal, control fd.
2259 if (FD_ISSET(ctlfd, &readfds)) {
2260 for (;;) {
2261 select_readmsg(manager, &fd, &msg);
2263 manager_log(manager, IOEVENT,
2264 isc_msgcat_get(isc_msgcat,
2265 ISC_MSGSET_SOCKET,
2266 ISC_MSG_WATCHERMSG,
2267 "watcher got message %d"),
2268 msg);
2271 * Nothing to read?
2273 if (msg == SELECT_POKE_NOTHING)
2274 break;
2277 * Handle shutdown message. We really should
2278 * jump out of this loop right away, but
2279 * it doesn't matter if we have to do a little
2280 * more work first.
2282 if (msg == SELECT_POKE_SHUTDOWN) {
2283 done = ISC_TRUE;
2285 break;
2289 * This is a wakeup on a socket. Look
2290 * at the event queue for both read and write,
2291 * and decide if we need to watch on it now
2292 * or not.
2294 wakeup_socket(manager, fd, msg);
2298 process_fds(manager, maxfd, &readfds, &writefds);
2301 manager_log(manager, TRACE,
2302 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2303 ISC_MSG_EXITING, "watcher exiting"));
2305 UNLOCK(&manager->lock);
2306 return ((isc_threadresult_t)0);
2308 #endif /* ISC_PLATFORM_USETHREADS */
2311 * Create a new socket manager.
2313 isc_result_t
2314 isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2315 isc_socketmgr_t *manager;
2316 #ifdef ISC_PLATFORM_USETHREADS
2317 char strbuf[ISC_STRERRORSIZE];
2318 #endif
2320 REQUIRE(managerp != NULL && *managerp == NULL);
2322 #ifndef ISC_PLATFORM_USETHREADS
2323 if (socketmgr != NULL) {
2324 socketmgr->refs++;
2325 *managerp = socketmgr;
2326 return (ISC_R_SUCCESS);
2328 #endif /* ISC_PLATFORM_USETHREADS */
2330 manager = isc_mem_get(mctx, sizeof(*manager));
2331 if (manager == NULL)
2332 return (ISC_R_NOMEMORY);
2334 manager->magic = SOCKET_MANAGER_MAGIC;
2335 manager->mctx = NULL;
2336 memset(manager->fds, 0, sizeof(manager->fds));
2337 ISC_LIST_INIT(manager->socklist);
2338 if (isc_mutex_init(&manager->lock) != ISC_R_SUCCESS) {
2339 isc_mem_put(mctx, manager, sizeof(*manager));
2340 UNEXPECTED_ERROR(__FILE__, __LINE__,
2341 "isc_mutex_init() %s",
2342 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2343 ISC_MSG_FAILED, "failed"));
2344 return (ISC_R_UNEXPECTED);
2346 #ifdef ISC_PLATFORM_USETHREADS
2347 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2348 DESTROYLOCK(&manager->lock);
2349 isc_mem_put(mctx, manager, sizeof(*manager));
2350 UNEXPECTED_ERROR(__FILE__, __LINE__,
2351 "isc_condition_init() %s",
2352 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2353 ISC_MSG_FAILED, "failed"));
2354 return (ISC_R_UNEXPECTED);
2358 * Create the special fds that will be used to wake up the
2359 * select/poll loop when something internal needs to be done.
2361 if (pipe(manager->pipe_fds) != 0) {
2362 DESTROYLOCK(&manager->lock);
2363 isc_mem_put(mctx, manager, sizeof(*manager));
2364 isc__strerror(errno, strbuf, sizeof(strbuf));
2365 UNEXPECTED_ERROR(__FILE__, __LINE__,
2366 "pipe() %s: %s",
2367 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2368 ISC_MSG_FAILED, "failed"),
2369 strbuf);
2371 return (ISC_R_UNEXPECTED);
2374 RUNTIME_CHECK(make_nonblock(manager->pipe_fds[0]) == ISC_R_SUCCESS);
2375 #if 0
2376 RUNTIME_CHECK(make_nonblock(manager->pipe_fds[1]) == ISC_R_SUCCESS);
2377 #endif
2378 #else /* ISC_PLATFORM_USETHREADS */
2379 manager->refs = 1;
2380 #endif /* ISC_PLATFORM_USETHREADS */
2383 * Set up initial state for the select loop
2385 FD_ZERO(&manager->read_fds);
2386 FD_ZERO(&manager->write_fds);
2387 #ifdef ISC_PLATFORM_USETHREADS
2388 FD_SET(manager->pipe_fds[0], &manager->read_fds);
2389 manager->maxfd = manager->pipe_fds[0];
2390 #else /* ISC_PLATFORM_USETHREADS */
2391 manager->maxfd = 0;
2392 #endif /* ISC_PLATFORM_USETHREADS */
2393 memset(manager->fdstate, 0, sizeof(manager->fdstate));
2395 #ifdef ISC_PLATFORM_USETHREADS
2397 * Start up the select/poll thread.
2399 if (isc_thread_create(watcher, manager, &manager->watcher) !=
2400 ISC_R_SUCCESS) {
2401 (void)close(manager->pipe_fds[0]);
2402 (void)close(manager->pipe_fds[1]);
2403 DESTROYLOCK(&manager->lock);
2404 isc_mem_put(mctx, manager, sizeof(*manager));
2405 UNEXPECTED_ERROR(__FILE__, __LINE__,
2406 "isc_thread_create() %s",
2407 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2408 ISC_MSG_FAILED, "failed"));
2409 return (ISC_R_UNEXPECTED);
2411 #endif /* ISC_PLATFORM_USETHREADS */
2412 isc_mem_attach(mctx, &manager->mctx);
2414 #ifndef ISC_PLATFORM_USETHREADS
2415 socketmgr = manager;
2416 #endif /* ISC_PLATFORM_USETHREADS */
2417 *managerp = manager;
2419 return (ISC_R_SUCCESS);
2422 void
2423 isc_socketmgr_destroy(isc_socketmgr_t **managerp) {
2424 isc_socketmgr_t *manager;
2425 int i;
2426 isc_mem_t *mctx;
2429 * Destroy a socket manager.
2432 REQUIRE(managerp != NULL);
2433 manager = *managerp;
2434 REQUIRE(VALID_MANAGER(manager));
2436 #ifndef ISC_PLATFORM_USETHREADS
2437 if (manager->refs > 1) {
2438 manager->refs--;
2439 *managerp = NULL;
2440 return;
2442 #endif /* ISC_PLATFORM_USETHREADS */
2444 LOCK(&manager->lock);
2446 #ifdef ISC_PLATFORM_USETHREADS
2448 * Wait for all sockets to be destroyed.
2450 while (!ISC_LIST_EMPTY(manager->socklist)) {
2451 manager_log(manager, CREATION,
2452 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2453 ISC_MSG_SOCKETSREMAIN,
2454 "sockets exist"));
2455 WAIT(&manager->shutdown_ok, &manager->lock);
2457 #else /* ISC_PLATFORM_USETHREADS */
2459 * Hope all sockets have been destroyed.
2461 if (!ISC_LIST_EMPTY(manager->socklist)) {
2462 manager_log(manager, CREATION,
2463 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2464 ISC_MSG_SOCKETSREMAIN,
2465 "sockets exist"));
2466 INSIST(0);
2468 #endif /* ISC_PLATFORM_USETHREADS */
2470 UNLOCK(&manager->lock);
2473 * Here, poke our select/poll thread. Do this by closing the write
2474 * half of the pipe, which will send EOF to the read half.
2475 * This is currently a no-op in the non-threaded case.
2477 select_poke(manager, 0, SELECT_POKE_SHUTDOWN);
2479 #ifdef ISC_PLATFORM_USETHREADS
2481 * Wait for thread to exit.
2483 if (isc_thread_join(manager->watcher, NULL) != ISC_R_SUCCESS)
2484 UNEXPECTED_ERROR(__FILE__, __LINE__,
2485 "isc_thread_join() %s",
2486 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2487 ISC_MSG_FAILED, "failed"));
2488 #endif /* ISC_PLATFORM_USETHREADS */
2491 * Clean up.
2493 #ifdef ISC_PLATFORM_USETHREADS
2494 (void)close(manager->pipe_fds[0]);
2495 (void)close(manager->pipe_fds[1]);
2496 (void)isc_condition_destroy(&manager->shutdown_ok);
2497 #endif /* ISC_PLATFORM_USETHREADS */
2499 for (i = 0; i < (int)FD_SETSIZE; i++)
2500 if (manager->fdstate[i] == CLOSE_PENDING)
2501 (void)close(i);
2503 DESTROYLOCK(&manager->lock);
2504 manager->magic = 0;
2505 mctx= manager->mctx;
2506 isc_mem_put(mctx, manager, sizeof(*manager));
2508 isc_mem_detach(&mctx);
2510 *managerp = NULL;
2513 static isc_result_t
2514 socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2515 unsigned int flags)
2517 int io_state;
2518 isc_boolean_t have_lock = ISC_FALSE;
2519 isc_task_t *ntask = NULL;
2520 isc_result_t result = ISC_R_SUCCESS;
2522 dev->ev_sender = task;
2524 if (sock->type == isc_sockettype_udp) {
2525 io_state = doio_recv(sock, dev);
2526 } else {
2527 LOCK(&sock->lock);
2528 have_lock = ISC_TRUE;
2530 if (ISC_LIST_EMPTY(sock->recv_list))
2531 io_state = doio_recv(sock, dev);
2532 else
2533 io_state = DOIO_SOFT;
2536 switch (io_state) {
2537 case DOIO_SOFT:
2539 * We couldn't read all or part of the request right now, so
2540 * queue it.
2542 * Attach to socket and to task
2544 isc_task_attach(task, &ntask);
2545 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2547 if (!have_lock) {
2548 LOCK(&sock->lock);
2549 have_lock = ISC_TRUE;
2553 * Enqueue the request. If the socket was previously not being
2554 * watched, poke the watcher to start paying attention to it.
2556 if (ISC_LIST_EMPTY(sock->recv_list))
2557 select_poke(sock->manager, sock->fd, SELECT_POKE_READ);
2558 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2560 socket_log(sock, NULL, EVENT, NULL, 0, 0,
2561 "socket_recv: event %p -> task %p",
2562 dev, ntask);
2564 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2565 result = ISC_R_INPROGRESS;
2566 break;
2568 case DOIO_EOF:
2569 dev->result = ISC_R_EOF;
2570 /* fallthrough */
2572 case DOIO_HARD:
2573 case DOIO_SUCCESS:
2574 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
2575 send_recvdone_event(sock, &dev);
2576 break;
2579 if (have_lock)
2580 UNLOCK(&sock->lock);
2582 return (result);
2585 isc_result_t
2586 isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2587 unsigned int minimum, isc_task_t *task,
2588 isc_taskaction_t action, const void *arg)
2590 isc_socketevent_t *dev;
2591 isc_socketmgr_t *manager;
2592 unsigned int iocount;
2593 isc_buffer_t *buffer;
2595 REQUIRE(VALID_SOCKET(sock));
2596 REQUIRE(buflist != NULL);
2597 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2598 REQUIRE(task != NULL);
2599 REQUIRE(action != NULL);
2601 manager = sock->manager;
2602 REQUIRE(VALID_MANAGER(manager));
2604 iocount = isc_bufferlist_availablecount(buflist);
2605 REQUIRE(iocount > 0);
2607 INSIST(sock->bound);
2609 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2610 if (dev == NULL) {
2611 return (ISC_R_NOMEMORY);
2615 * UDP sockets are always partial read
2617 if (sock->type == isc_sockettype_udp)
2618 dev->minimum = 1;
2619 else {
2620 if (minimum == 0)
2621 dev->minimum = iocount;
2622 else
2623 dev->minimum = minimum;
2627 * Move each buffer from the passed in list to our internal one.
2629 buffer = ISC_LIST_HEAD(*buflist);
2630 while (buffer != NULL) {
2631 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2632 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2633 buffer = ISC_LIST_HEAD(*buflist);
2636 return (socket_recv(sock, dev, task, 0));
2639 isc_result_t
2640 isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum,
2641 isc_task_t *task, isc_taskaction_t action, const void *arg)
2643 isc_socketevent_t *dev;
2644 isc_socketmgr_t *manager;
2646 REQUIRE(VALID_SOCKET(sock));
2647 REQUIRE(action != NULL);
2649 manager = sock->manager;
2650 REQUIRE(VALID_MANAGER(manager));
2652 INSIST(sock->bound);
2654 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2655 if (dev == NULL)
2656 return (ISC_R_NOMEMORY);
2658 return (isc_socket_recv2(sock, region, minimum, task, dev, 0));
2661 isc_result_t
2662 isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
2663 unsigned int minimum, isc_task_t *task,
2664 isc_socketevent_t *event, unsigned int flags)
2666 event->ev_sender = sock;
2667 event->result = ISC_R_UNEXPECTED;
2668 ISC_LIST_INIT(event->bufferlist);
2669 event->region = *region;
2670 event->n = 0;
2671 event->offset = 0;
2672 event->attributes = 0;
2675 * UDP sockets are always partial read.
2677 if (sock->type == isc_sockettype_udp)
2678 event->minimum = 1;
2679 else {
2680 if (minimum == 0)
2681 event->minimum = region->length;
2682 else
2683 event->minimum = minimum;
2686 return (socket_recv(sock, event, task, flags));
2689 static isc_result_t
2690 socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2691 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2692 unsigned int flags)
2694 int io_state;
2695 isc_boolean_t have_lock = ISC_FALSE;
2696 isc_task_t *ntask = NULL;
2697 isc_result_t result = ISC_R_SUCCESS;
2699 dev->ev_sender = task;
2701 set_dev_address(address, sock, dev);
2702 if (pktinfo != NULL) {
2703 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2704 dev->pktinfo = *pktinfo;
2706 if (!isc_sockaddr_issitelocal(&dev->address) &&
2707 !isc_sockaddr_islinklocal(&dev->address)) {
2708 socket_log(sock, NULL, TRACE, isc_msgcat,
2709 ISC_MSGSET_SOCKET, ISC_MSG_PKTINFOPROVIDED,
2710 "pktinfo structure provided, ifindex %u "
2711 "(set to 0)", pktinfo->ipi6_ifindex);
2714 * Set the pktinfo index to 0 here, to let the
2715 * kernel decide what interface it should send on.
2717 dev->pktinfo.ipi6_ifindex = 0;
2721 if (sock->type == isc_sockettype_udp)
2722 io_state = doio_send(sock, dev);
2723 else {
2724 LOCK(&sock->lock);
2725 have_lock = ISC_TRUE;
2727 if (ISC_LIST_EMPTY(sock->send_list))
2728 io_state = doio_send(sock, dev);
2729 else
2730 io_state = DOIO_SOFT;
2733 switch (io_state) {
2734 case DOIO_SOFT:
2736 * We couldn't send all or part of the request right now, so
2737 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2739 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2740 isc_task_attach(task, &ntask);
2741 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2743 if (!have_lock) {
2744 LOCK(&sock->lock);
2745 have_lock = ISC_TRUE;
2749 * Enqueue the request. If the socket was previously
2750 * not being watched, poke the watcher to start
2751 * paying attention to it.
2753 if (ISC_LIST_EMPTY(sock->send_list))
2754 select_poke(sock->manager, sock->fd,
2755 SELECT_POKE_WRITE);
2756 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
2758 socket_log(sock, NULL, EVENT, NULL, 0, 0,
2759 "socket_send: event %p -> task %p",
2760 dev, ntask);
2762 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2763 result = ISC_R_INPROGRESS;
2764 break;
2767 case DOIO_HARD:
2768 case DOIO_SUCCESS:
2769 if ((flags & ISC_SOCKFLAG_IMMEDIATE) == 0)
2770 send_senddone_event(sock, &dev);
2771 break;
2774 if (have_lock)
2775 UNLOCK(&sock->lock);
2777 return (result);
2780 isc_result_t
2781 isc_socket_send(isc_socket_t *sock, isc_region_t *region,
2782 isc_task_t *task, isc_taskaction_t action, const void *arg)
2785 * REQUIRE() checking is performed in isc_socket_sendto().
2787 return (isc_socket_sendto(sock, region, task, action, arg, NULL,
2788 NULL));
2791 isc_result_t
2792 isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
2793 isc_task_t *task, isc_taskaction_t action, const void *arg,
2794 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2796 isc_socketevent_t *dev;
2797 isc_socketmgr_t *manager;
2799 REQUIRE(VALID_SOCKET(sock));
2800 REQUIRE(region != NULL);
2801 REQUIRE(task != NULL);
2802 REQUIRE(action != NULL);
2804 manager = sock->manager;
2805 REQUIRE(VALID_MANAGER(manager));
2807 INSIST(sock->bound);
2809 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2810 if (dev == NULL) {
2811 return (ISC_R_NOMEMORY);
2814 dev->region = *region;
2816 return (socket_send(sock, dev, task, address, pktinfo, 0));
2819 isc_result_t
2820 isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2821 isc_task_t *task, isc_taskaction_t action, const void *arg)
2823 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
2824 NULL));
2827 isc_result_t
2828 isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
2829 isc_task_t *task, isc_taskaction_t action, const void *arg,
2830 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
2832 isc_socketevent_t *dev;
2833 isc_socketmgr_t *manager;
2834 unsigned int iocount;
2835 isc_buffer_t *buffer;
2837 REQUIRE(VALID_SOCKET(sock));
2838 REQUIRE(buflist != NULL);
2839 REQUIRE(!ISC_LIST_EMPTY(*buflist));
2840 REQUIRE(task != NULL);
2841 REQUIRE(action != NULL);
2843 manager = sock->manager;
2844 REQUIRE(VALID_MANAGER(manager));
2846 iocount = isc_bufferlist_usedcount(buflist);
2847 REQUIRE(iocount > 0);
2849 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
2850 if (dev == NULL) {
2851 return (ISC_R_NOMEMORY);
2855 * Move each buffer from the passed in list to our internal one.
2857 buffer = ISC_LIST_HEAD(*buflist);
2858 while (buffer != NULL) {
2859 ISC_LIST_DEQUEUE(*buflist, buffer, link);
2860 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2861 buffer = ISC_LIST_HEAD(*buflist);
2864 return (socket_send(sock, dev, task, address, pktinfo, 0));
2867 isc_result_t
2868 isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
2869 isc_task_t *task,
2870 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2871 isc_socketevent_t *event, unsigned int flags)
2873 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
2874 if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
2875 REQUIRE(sock->type == isc_sockettype_udp);
2876 event->ev_sender = sock;
2877 event->result = ISC_R_UNEXPECTED;
2878 ISC_LIST_INIT(event->bufferlist);
2879 event->region = *region;
2880 event->n = 0;
2881 event->offset = 0;
2882 event->attributes = 0;
2884 return (socket_send(sock, event, task, address, pktinfo, flags));
2887 isc_result_t
2888 isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr) {
2889 char strbuf[ISC_STRERRORSIZE];
2890 int on = 1;
2892 LOCK(&sock->lock);
2894 INSIST(!sock->bound);
2896 if (sock->pf != sockaddr->type.sa.sa_family) {
2897 UNLOCK(&sock->lock);
2898 return (ISC_R_FAMILYMISMATCH);
2901 * Only set SO_REUSEADDR when we want a specific port.
2903 if (isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
2904 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on,
2905 sizeof(on)) < 0) {
2906 UNEXPECTED_ERROR(__FILE__, __LINE__,
2907 "setsockopt(%d) %s", sock->fd,
2908 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2909 ISC_MSG_FAILED, "failed"));
2910 /* Press on... */
2912 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
2913 UNLOCK(&sock->lock);
2914 switch (errno) {
2915 case EACCES:
2916 return (ISC_R_NOPERM);
2917 case EADDRNOTAVAIL:
2918 return (ISC_R_ADDRNOTAVAIL);
2919 case EADDRINUSE:
2920 return (ISC_R_ADDRINUSE);
2921 case EINVAL:
2922 return (ISC_R_BOUND);
2923 default:
2924 isc__strerror(errno, strbuf, sizeof(strbuf));
2925 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
2926 strbuf);
2927 return (ISC_R_UNEXPECTED);
2931 socket_log(sock, sockaddr, TRACE,
2932 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
2933 sock->bound = 1;
2935 UNLOCK(&sock->lock);
2936 return (ISC_R_SUCCESS);
2939 isc_result_t
2940 isc_socket_filter(isc_socket_t *sock, const char *filter) {
2941 #ifdef SO_ACCEPTFILTER
2942 char strbuf[ISC_STRERRORSIZE];
2943 struct accept_filter_arg afa;
2944 #else
2945 UNUSED(sock);
2946 UNUSED(filter);
2947 #endif
2949 REQUIRE(VALID_SOCKET(sock));
2951 #ifdef SO_ACCEPTFILTER
2952 bzero(&afa, sizeof(afa));
2953 strncpy(afa.af_name, filter, sizeof(afa.af_name));
2954 if (setsockopt(sock->fd, SOL_SOCKET, SO_ACCEPTFILTER,
2955 &afa, sizeof(afa)) == -1) {
2956 isc__strerror(errno, strbuf, sizeof(strbuf));
2957 socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
2958 ISC_MSG_FILTER, "setsockopt(SO_ACCEPTFILTER): %s",
2959 strbuf);
2960 return (ISC_R_FAILURE);
2962 return (ISC_R_SUCCESS);
2963 #else
2964 return (ISC_R_NOTIMPLEMENTED);
2965 #endif
2969 * Set up to listen on a given socket. We do this by creating an internal
2970 * event that will be dispatched when the socket has read activity. The
2971 * watcher will send the internal event to the task when there is a new
2972 * connection.
2974 * Unlike in read, we don't preallocate a done event here. Every time there
2975 * is a new connection we'll have to allocate a new one anyway, so we might
2976 * as well keep things simple rather than having to track them.
2978 isc_result_t
2979 isc_socket_listen(isc_socket_t *sock, unsigned int backlog) {
2980 char strbuf[ISC_STRERRORSIZE];
2982 REQUIRE(VALID_SOCKET(sock));
2984 LOCK(&sock->lock);
2986 REQUIRE(!sock->listener);
2987 REQUIRE(sock->bound);
2988 REQUIRE(sock->type == isc_sockettype_tcp);
2990 if (backlog == 0)
2991 backlog = SOMAXCONN;
2993 if (listen(sock->fd, (int)backlog) < 0) {
2994 UNLOCK(&sock->lock);
2995 isc__strerror(errno, strbuf, sizeof(strbuf));
2997 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
2999 return (ISC_R_UNEXPECTED);
3002 sock->listener = 1;
3004 UNLOCK(&sock->lock);
3005 return (ISC_R_SUCCESS);
3009 * This should try to do agressive accept() XXXMLG
3011 isc_result_t
3012 isc_socket_accept(isc_socket_t *sock,
3013 isc_task_t *task, isc_taskaction_t action, const void *arg)
3015 isc_socket_newconnev_t *dev;
3016 isc_socketmgr_t *manager;
3017 isc_task_t *ntask = NULL;
3018 isc_socket_t *nsock;
3019 isc_result_t ret;
3020 isc_boolean_t do_poke = ISC_FALSE;
3022 REQUIRE(VALID_SOCKET(sock));
3023 manager = sock->manager;
3024 REQUIRE(VALID_MANAGER(manager));
3026 LOCK(&sock->lock);
3028 REQUIRE(sock->listener);
3031 * Sender field is overloaded here with the task we will be sending
3032 * this event to. Just before the actual event is delivered the
3033 * actual ev_sender will be touched up to be the socket.
3035 dev = (isc_socket_newconnev_t *)
3036 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3037 action, arg, sizeof(*dev));
3038 if (dev == NULL) {
3039 UNLOCK(&sock->lock);
3040 return (ISC_R_NOMEMORY);
3042 ISC_LINK_INIT(dev, ev_link);
3044 ret = allocate_socket(manager, sock->type, &nsock);
3045 if (ret != ISC_R_SUCCESS) {
3046 isc_event_free(ISC_EVENT_PTR(&dev));
3047 UNLOCK(&sock->lock);
3048 return (ret);
3052 * Attach to socket and to task.
3054 isc_task_attach(task, &ntask);
3055 nsock->references++;
3057 dev->ev_sender = ntask;
3058 dev->newsocket = nsock;
3061 * Poke watcher here. We still have the socket locked, so there
3062 * is no race condition. We will keep the lock for such a short
3063 * bit of time waking it up now or later won't matter all that much.
3065 if (ISC_LIST_EMPTY(sock->accept_list))
3066 do_poke = ISC_TRUE;
3068 ISC_LIST_ENQUEUE(sock->accept_list, dev, ev_link);
3070 if (do_poke)
3071 select_poke(manager, sock->fd, SELECT_POKE_ACCEPT);
3073 UNLOCK(&sock->lock);
3074 return (ISC_R_SUCCESS);
3077 isc_result_t
3078 isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3079 isc_task_t *task, isc_taskaction_t action, const void *arg)
3081 isc_socket_connev_t *dev;
3082 isc_task_t *ntask = NULL;
3083 isc_socketmgr_t *manager;
3084 int cc;
3085 char strbuf[ISC_STRERRORSIZE];
3087 REQUIRE(VALID_SOCKET(sock));
3088 REQUIRE(addr != NULL);
3089 REQUIRE(task != NULL);
3090 REQUIRE(action != NULL);
3092 manager = sock->manager;
3093 REQUIRE(VALID_MANAGER(manager));
3094 REQUIRE(addr != NULL);
3096 if (isc_sockaddr_ismulticast(addr))
3097 return (ISC_R_MULTICAST);
3099 LOCK(&sock->lock);
3101 REQUIRE(!sock->connecting);
3103 dev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3104 ISC_SOCKEVENT_CONNECT,
3105 action, arg,
3106 sizeof(*dev));
3107 if (dev == NULL) {
3108 UNLOCK(&sock->lock);
3109 return (ISC_R_NOMEMORY);
3111 ISC_LINK_INIT(dev, ev_link);
3114 * Try to do the connect right away, as there can be only one
3115 * outstanding, and it might happen to complete.
3117 sock->address = *addr;
3118 cc = connect(sock->fd, &addr->type.sa, addr->length);
3119 if (cc < 0) {
3120 if (SOFT_ERROR(errno) || errno == EINPROGRESS)
3121 goto queue;
3123 switch (errno) {
3124 #define ERROR_MATCH(a, b) case a: dev->result = b; goto err_exit;
3125 ERROR_MATCH(EACCES, ISC_R_NOPERM);
3126 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
3127 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
3128 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED);
3129 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH);
3130 #ifdef EHOSTDOWN
3131 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH);
3132 #endif
3133 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH);
3134 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES);
3135 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
3136 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
3137 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET);
3138 #undef ERROR_MATCH
3141 sock->connected = 0;
3143 isc__strerror(errno, strbuf, sizeof(strbuf));
3144 UNEXPECTED_ERROR(__FILE__, __LINE__, "%d/%s", errno, strbuf);
3146 UNLOCK(&sock->lock);
3147 isc_event_free(ISC_EVENT_PTR(&dev));
3148 return (ISC_R_UNEXPECTED);
3150 err_exit:
3151 sock->connected = 0;
3152 isc_task_send(task, ISC_EVENT_PTR(&dev));
3154 UNLOCK(&sock->lock);
3155 return (ISC_R_SUCCESS);
3159 * If connect completed, fire off the done event.
3161 if (cc == 0) {
3162 sock->connected = 1;
3163 sock->bound = 1;
3164 dev->result = ISC_R_SUCCESS;
3165 isc_task_send(task, ISC_EVENT_PTR(&dev));
3167 UNLOCK(&sock->lock);
3168 return (ISC_R_SUCCESS);
3171 queue:
3174 * Attach to task.
3176 isc_task_attach(task, &ntask);
3178 sock->connecting = 1;
3180 dev->ev_sender = ntask;
3183 * Poke watcher here. We still have the socket locked, so there
3184 * is no race condition. We will keep the lock for such a short
3185 * bit of time waking it up now or later won't matter all that much.
3187 if (sock->connect_ev == NULL)
3188 select_poke(manager, sock->fd, SELECT_POKE_CONNECT);
3190 sock->connect_ev = dev;
3192 UNLOCK(&sock->lock);
3193 return (ISC_R_SUCCESS);
3197 * Called when a socket with a pending connect() finishes.
3199 static void
3200 internal_connect(isc_task_t *me, isc_event_t *ev) {
3201 isc_socket_t *sock;
3202 isc_socket_connev_t *dev;
3203 isc_task_t *task;
3204 int cc;
3205 ISC_SOCKADDR_LEN_T optlen;
3206 char strbuf[ISC_STRERRORSIZE];
3207 char peerbuf[ISC_SOCKADDR_FORMATSIZE];
3209 UNUSED(me);
3210 INSIST(ev->ev_type == ISC_SOCKEVENT_INTW);
3212 sock = ev->ev_sender;
3213 INSIST(VALID_SOCKET(sock));
3215 LOCK(&sock->lock);
3218 * When the internal event was sent the reference count was bumped
3219 * to keep the socket around for us. Decrement the count here.
3221 INSIST(sock->references > 0);
3222 sock->references--;
3223 if (sock->references == 0) {
3224 UNLOCK(&sock->lock);
3225 destroy(&sock);
3226 return;
3230 * Has this event been canceled?
3232 dev = sock->connect_ev;
3233 if (dev == NULL) {
3234 INSIST(!sock->connecting);
3235 UNLOCK(&sock->lock);
3236 return;
3239 INSIST(sock->connecting);
3240 sock->connecting = 0;
3243 * Get any possible error status here.
3245 optlen = sizeof(cc);
3246 if (getsockopt(sock->fd, SOL_SOCKET, SO_ERROR,
3247 (void *)&cc, (void *)&optlen) < 0)
3248 cc = errno;
3249 else
3250 errno = cc;
3252 if (errno != 0) {
3254 * If the error is EAGAIN, just re-select on this
3255 * fd and pretend nothing strange happened.
3257 if (SOFT_ERROR(errno) || errno == EINPROGRESS) {
3258 sock->connecting = 1;
3259 select_poke(sock->manager, sock->fd,
3260 SELECT_POKE_CONNECT);
3261 UNLOCK(&sock->lock);
3263 return;
3267 * Translate other errors into ISC_R_* flavors.
3269 switch (errno) {
3270 #define ERROR_MATCH(a, b) case a: dev->result = b; break;
3271 ERROR_MATCH(EACCES, ISC_R_NOPERM);
3272 ERROR_MATCH(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
3273 ERROR_MATCH(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
3274 ERROR_MATCH(ECONNREFUSED, ISC_R_CONNREFUSED);
3275 ERROR_MATCH(EHOSTUNREACH, ISC_R_HOSTUNREACH);
3276 #ifdef EHOSTDOWN
3277 ERROR_MATCH(EHOSTDOWN, ISC_R_HOSTUNREACH);
3278 #endif
3279 ERROR_MATCH(ENETUNREACH, ISC_R_NETUNREACH);
3280 ERROR_MATCH(ENOBUFS, ISC_R_NORESOURCES);
3281 ERROR_MATCH(EPERM, ISC_R_HOSTUNREACH);
3282 ERROR_MATCH(EPIPE, ISC_R_NOTCONNECTED);
3283 ERROR_MATCH(ETIMEDOUT, ISC_R_TIMEDOUT);
3284 ERROR_MATCH(ECONNRESET, ISC_R_CONNECTIONRESET);
3285 #undef ERROR_MATCH
3286 default:
3287 dev->result = ISC_R_UNEXPECTED;
3288 isc_sockaddr_format(&sock->address, peerbuf,
3289 sizeof(peerbuf));
3290 isc__strerror(errno, strbuf, sizeof(strbuf));
3291 UNEXPECTED_ERROR(__FILE__, __LINE__,
3292 "internal_connect: connect(%s) %s",
3293 peerbuf, strbuf);
3295 } else {
3296 dev->result = ISC_R_SUCCESS;
3297 sock->connected = 1;
3298 sock->bound = 1;
3301 sock->connect_ev = NULL;
3303 UNLOCK(&sock->lock);
3305 task = dev->ev_sender;
3306 dev->ev_sender = sock;
3307 isc_task_sendanddetach(&task, ISC_EVENT_PTR(&dev));
3310 isc_result_t
3311 isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3312 isc_result_t ret;
3314 REQUIRE(VALID_SOCKET(sock));
3315 REQUIRE(addressp != NULL);
3317 LOCK(&sock->lock);
3319 if (sock->connected) {
3320 *addressp = sock->address;
3321 ret = ISC_R_SUCCESS;
3322 } else {
3323 ret = ISC_R_NOTCONNECTED;
3326 UNLOCK(&sock->lock);
3328 return (ret);
3331 isc_result_t
3332 isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3333 ISC_SOCKADDR_LEN_T len;
3334 isc_result_t ret;
3335 char strbuf[ISC_STRERRORSIZE];
3337 REQUIRE(VALID_SOCKET(sock));
3338 REQUIRE(addressp != NULL);
3340 LOCK(&sock->lock);
3342 if (!sock->bound) {
3343 ret = ISC_R_NOTBOUND;
3344 goto out;
3347 ret = ISC_R_SUCCESS;
3349 len = sizeof(addressp->type);
3350 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3351 isc__strerror(errno, strbuf, sizeof(strbuf));
3352 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3353 strbuf);
3354 ret = ISC_R_UNEXPECTED;
3355 goto out;
3357 addressp->length = (unsigned int)len;
3359 out:
3360 UNLOCK(&sock->lock);
3362 return (ret);
3366 * Run through the list of events on this socket, and cancel the ones
3367 * queued for task "task" of type "how". "how" is a bitmask.
3369 void
3370 isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3372 REQUIRE(VALID_SOCKET(sock));
3375 * Quick exit if there is nothing to do. Don't even bother locking
3376 * in this case.
3378 if (how == 0)
3379 return;
3381 LOCK(&sock->lock);
3384 * All of these do the same thing, more or less.
3385 * Each will:
3386 * o If the internal event is marked as "posted" try to
3387 * remove it from the task's queue. If this fails, mark it
3388 * as canceled instead, and let the task clean it up later.
3389 * o For each I/O request for that task of that type, post
3390 * its done event with status of "ISC_R_CANCELED".
3391 * o Reset any state needed.
3393 if (((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV)
3394 && !ISC_LIST_EMPTY(sock->recv_list)) {
3395 isc_socketevent_t *dev;
3396 isc_socketevent_t *next;
3397 isc_task_t *current_task;
3399 dev = ISC_LIST_HEAD(sock->recv_list);
3401 while (dev != NULL) {
3402 current_task = dev->ev_sender;
3403 next = ISC_LIST_NEXT(dev, ev_link);
3405 if ((task == NULL) || (task == current_task)) {
3406 dev->result = ISC_R_CANCELED;
3407 send_recvdone_event(sock, &dev);
3409 dev = next;
3413 if (((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND)
3414 && !ISC_LIST_EMPTY(sock->send_list)) {
3415 isc_socketevent_t *dev;
3416 isc_socketevent_t *next;
3417 isc_task_t *current_task;
3419 dev = ISC_LIST_HEAD(sock->send_list);
3421 while (dev != NULL) {
3422 current_task = dev->ev_sender;
3423 next = ISC_LIST_NEXT(dev, ev_link);
3425 if ((task == NULL) || (task == current_task)) {
3426 dev->result = ISC_R_CANCELED;
3427 send_senddone_event(sock, &dev);
3429 dev = next;
3433 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3434 && !ISC_LIST_EMPTY(sock->accept_list)) {
3435 isc_socket_newconnev_t *dev;
3436 isc_socket_newconnev_t *next;
3437 isc_task_t *current_task;
3439 dev = ISC_LIST_HEAD(sock->accept_list);
3440 while (dev != NULL) {
3441 current_task = dev->ev_sender;
3442 next = ISC_LIST_NEXT(dev, ev_link);
3444 if ((task == NULL) || (task == current_task)) {
3446 ISC_LIST_UNLINK(sock->accept_list, dev,
3447 ev_link);
3449 dev->newsocket->references--;
3450 free_socket(&dev->newsocket);
3452 dev->result = ISC_R_CANCELED;
3453 dev->ev_sender = sock;
3454 isc_task_sendanddetach(&current_task,
3455 ISC_EVENT_PTR(&dev));
3458 dev = next;
3463 * Connecting is not a list.
3465 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3466 && sock->connect_ev != NULL) {
3467 isc_socket_connev_t *dev;
3468 isc_task_t *current_task;
3470 INSIST(sock->connecting);
3471 sock->connecting = 0;
3473 dev = sock->connect_ev;
3474 current_task = dev->ev_sender;
3476 if ((task == NULL) || (task == current_task)) {
3477 sock->connect_ev = NULL;
3479 dev->result = ISC_R_CANCELED;
3480 dev->ev_sender = sock;
3481 isc_task_sendanddetach(&current_task,
3482 ISC_EVENT_PTR(&dev));
3486 UNLOCK(&sock->lock);
3489 isc_sockettype_t
3490 isc_socket_gettype(isc_socket_t *sock) {
3491 REQUIRE(VALID_SOCKET(sock));
3493 return (sock->type);
3496 isc_boolean_t
3497 isc_socket_isbound(isc_socket_t *sock) {
3498 isc_boolean_t val;
3500 LOCK(&sock->lock);
3501 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3502 UNLOCK(&sock->lock);
3504 return (val);
3507 void
3508 isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3509 #if defined(IPV6_V6ONLY)
3510 int onoff = yes ? 1 : 0;
3511 #else
3512 UNUSED(yes);
3513 UNUSED(sock);
3514 #endif
3516 REQUIRE(VALID_SOCKET(sock));
3518 #ifdef IPV6_V6ONLY
3519 if (sock->pf == AF_INET6) {
3520 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3521 (void *)&onoff, sizeof(onoff));
3523 #endif
3526 #ifndef ISC_PLATFORM_USETHREADS
3527 void
3528 isc__socketmgr_getfdsets(fd_set *readset, fd_set *writeset, int *maxfd) {
3529 if (socketmgr == NULL)
3530 *maxfd = 0;
3531 else {
3532 *readset = socketmgr->read_fds;
3533 *writeset = socketmgr->write_fds;
3534 *maxfd = socketmgr->maxfd + 1;
3538 isc_result_t
3539 isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd) {
3540 isc_socketmgr_t *manager = socketmgr;
3542 if (manager == NULL)
3543 return (ISC_R_NOTFOUND);
3545 process_fds(manager, maxfd, readset, writeset);
3546 return (ISC_R_SUCCESS);
3548 #endif /* ISC_PLATFORM_USETHREADS */