2 * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (c) 1995-1999 by Internet Software Consortium
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
15 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 /* eventlib.c - implement glue for the eventlib
19 * vix 09sep95 [initial]
22 #include "port_before.h"
23 #include "fd_setsize.h"
25 #include <sys/types.h>
38 #include <isc/eventlib.h>
39 #include <isc/assertions.h>
40 #include "eventlib_p.h"
42 #include "port_after.h"
47 #define pselect Pselect
52 #if defined(NEED_PSELECT) || defined(USE_POLL)
53 static int pselect(int, void *, void *, void *,
63 evCreate(evContext
*opaqueCtx
) {
66 /* Make sure the memory heap is initialized. */
67 if (meminit(0, 0) < 0 && errno
!= EEXIST
)
81 INIT_LIST(ctx
->accepts
);
89 emulMaskInit(ctx
, rdLast
, EV_READ
, 1);
90 emulMaskInit(ctx
, rdNext
, EV_READ
, 0);
91 emulMaskInit(ctx
, wrLast
, EV_WRITE
, 1);
92 emulMaskInit(ctx
, wrNext
, EV_WRITE
, 0);
93 emulMaskInit(ctx
, exLast
, EV_EXCEPT
, 1);
94 emulMaskInit(ctx
, exNext
, EV_EXCEPT
, 0);
95 emulMaskInit(ctx
, nonblockBefore
, EV_WASNONBLOCKING
, 0);
97 FD_ZERO(&ctx
->rdNext
);
98 FD_ZERO(&ctx
->wrNext
);
99 FD_ZERO(&ctx
->exNext
);
100 FD_ZERO(&ctx
->nonblockBefore
);
103 ctx
->fdCount
= 0; /*%< Invalidate {rd,wr,ex}Last. */
105 ctx
->highestFD
= FD_SETSIZE
- 1;
106 memset(ctx
->fdTable
, 0, sizeof ctx
->fdTable
);
108 ctx
->highestFD
= INT_MAX
/ sizeof(struct pollfd
);
110 #endif /* USE_POLL */
111 #ifdef EVENTLIB_TIME_CHECKS
112 ctx
->lastFdCount
= 0;
121 ctx
->lastEventTime
= evNowTime();
122 #ifdef EVENTLIB_TIME_CHECKS
123 ctx
->lastSelectTime
= ctx
->lastEventTime
;
125 ctx
->timers
= evCreateTimers(ctx
);
126 if (ctx
->timers
== NULL
)
130 ctx
->waitLists
= NULL
;
131 ctx
->waitDone
.first
= ctx
->waitDone
.last
= NULL
;
132 ctx
->waitDone
.prev
= ctx
->waitDone
.next
= NULL
;
134 opaqueCtx
->opaque
= ctx
;
139 evSetDebug(evContext opaqueCtx
, int level
, FILE *output
) {
140 evContext_p
*ctx
= opaqueCtx
.opaque
;
143 ctx
->output
= output
;
147 evDestroy(evContext opaqueCtx
) {
148 evContext_p
*ctx
= opaqueCtx
.opaque
;
149 int revs
= 424242; /*%< Doug Adams. */
150 evWaitList
*this_wl
, *next_wl
;
151 evWait
*this_wait
, *next_wait
;
154 while (revs
-- > 0 && ctx
->conns
!= NULL
) {
157 id
.opaque
= ctx
->conns
;
158 (void) evCancelConn(opaqueCtx
, id
);
163 while (revs
-- > 0 && ctx
->streams
!= NULL
) {
166 id
.opaque
= ctx
->streams
;
167 (void) evCancelRW(opaqueCtx
, id
);
171 while (revs
-- > 0 && ctx
->files
!= NULL
) {
174 id
.opaque
= ctx
->files
;
175 (void) evDeselectFD(opaqueCtx
, id
);
180 evDestroyTimers(ctx
);
183 for (this_wl
= ctx
->waitLists
;
184 revs
-- > 0 && this_wl
!= NULL
;
186 next_wl
= this_wl
->next
;
187 for (this_wait
= this_wl
->first
;
188 revs
-- > 0 && this_wait
!= NULL
;
189 this_wait
= next_wait
) {
190 next_wait
= this_wait
->next
;
195 for (this_wait
= ctx
->waitDone
.first
;
196 revs
-- > 0 && this_wait
!= NULL
;
197 this_wait
= next_wait
) {
198 next_wait
= this_wait
->next
;
207 evGetNext(evContext opaqueCtx
, evEvent
*opaqueEv
, int options
) {
208 evContext_p
*ctx
= opaqueCtx
.opaque
;
209 struct timespec nextTime
;
212 int x
, pselect_errno
, timerPast
;
213 #ifdef EVENTLIB_TIME_CHECKS
214 struct timespec interval
;
217 /* Ensure that exactly one of EV_POLL or EV_WAIT was specified. */
218 x
= ((options
& EV_POLL
) != 0) + ((options
& EV_WAIT
) != 0);
222 /* Get the time of day. We'll do this again after select() blocks. */
223 ctx
->lastEventTime
= evNowTime();
226 /* Finished accept()'s do not require a select(). */
227 if (!EMPTY(ctx
->accepts
)) {
230 new->u
.accept
.this = HEAD(ctx
->accepts
);
231 UNLINK(ctx
->accepts
, HEAD(ctx
->accepts
), link
);
232 opaqueEv
->opaque
= new;
236 /* Stream IO does not require a select(). */
237 if (ctx
->strDone
!= NULL
) {
240 new->u
.stream
.this = ctx
->strDone
;
241 ctx
->strDone
= ctx
->strDone
->nextDone
;
242 if (ctx
->strDone
== NULL
)
244 opaqueEv
->opaque
= new;
248 /* Waits do not require a select(). */
249 if (ctx
->waitDone
.first
!= NULL
) {
252 new->u
.wait
.this = ctx
->waitDone
.first
;
253 ctx
->waitDone
.first
= ctx
->waitDone
.first
->next
;
254 if (ctx
->waitDone
.first
== NULL
)
255 ctx
->waitDone
.last
= NULL
;
256 opaqueEv
->opaque
= new;
260 /* Get the status and content of the next timer. */
261 if ((nextTimer
= heap_element(ctx
->timers
, 1)) != NULL
) {
262 nextTime
= nextTimer
->due
;
263 timerPast
= (evCmpTime(nextTime
, ctx
->lastEventTime
) <= 0);
265 timerPast
= 0; /*%< Make gcc happy. */
266 evPrintf(ctx
, 9, "evGetNext: fdCount %d\n", ctx
->fdCount
);
267 if (ctx
->fdCount
== 0) {
268 static const struct timespec NoTime
= {0, 0L};
269 enum { JustPoll
, Block
, Timer
} m
;
270 struct timespec t
, *tp
;
272 /* Are there any events at all? */
273 if ((options
& EV_WAIT
) != 0 && !nextTimer
&& ctx
->fdMax
== -1)
276 /* Figure out what select()'s timeout parameter should be. */
277 if ((options
& EV_POLL
) != 0) {
281 } else if (nextTimer
== NULL
) {
285 } else if (timerPast
) {
291 /* ``t'' filled in later. */
294 #ifdef EVENTLIB_TIME_CHECKS
295 if (ctx
->debug
> 0) {
296 interval
= evSubTime(ctx
->lastEventTime
,
297 ctx
->lastSelectTime
);
298 if (interval
.tv_sec
> 0 || interval
.tv_nsec
> 0)
300 "time between pselect() %u.%09u count %d\n",
301 interval
.tv_sec
, interval
.tv_nsec
,
307 /* XXX need to copy only the bits we are using. */
308 ctx
->rdLast
= ctx
->rdNext
;
309 ctx
->wrLast
= ctx
->wrNext
;
310 ctx
->exLast
= ctx
->exNext
;
313 * The pollfd structure uses separate fields for
314 * the input and output events (corresponding to
315 * the ??Next and ??Last fd sets), so there's no
316 * need to copy one to the other.
318 #endif /* USE_POLL */
321 t
= evSubTime(nextTime
, ctx
->lastEventTime
);
324 /* XXX should predict system's earliness and adjust. */
325 x
= pselect(ctx
->fdMax
+1,
326 &ctx
->rdLast
, &ctx
->wrLast
, &ctx
->exLast
,
328 pselect_errno
= errno
;
331 evPrintf(ctx
, 4, "select() returns %d (err: %s)\n",
332 x
, (x
== -1) ? strerror(errno
) : "none");
334 evPrintf(ctx
, 4, "poll() returns %d (err: %s)\n",
335 x
, (x
== -1) ? strerror(errno
) : "none");
336 #endif /* USE_POLL */
337 /* Anything but a poll can change the time. */
339 ctx
->lastEventTime
= evNowTime();
341 /* Select() likes to finish about 10ms early. */
342 } while (x
== 0 && m
== Timer
&&
343 evCmpTime(ctx
->lastEventTime
, nextTime
) < 0);
344 #ifdef EVENTLIB_TIME_CHECKS
345 ctx
->lastSelectTime
= ctx
->lastEventTime
;
348 if (pselect_errno
== EINTR
) {
349 if ((options
& EV_NULL
) != 0)
354 opaqueEv
->opaque
= new;
357 if (pselect_errno
== EBADF
) {
358 for (x
= 0; x
<= ctx
->fdMax
; x
++) {
361 if (FD_ISSET(x
, &ctx
->rdNext
) == 0 &&
362 FD_ISSET(x
, &ctx
->wrNext
) == 0 &&
363 FD_ISSET(x
, &ctx
->exNext
) == 0)
365 if (fstat(x
, &sb
) == -1 &&
367 evPrintf(ctx
, 1, "EBADF: %d\n",
372 EV_ERR(pselect_errno
);
374 if (x
== 0 && (nextTimer
== NULL
|| !timerPast
) &&
378 #ifdef EVENTLIB_TIME_CHECKS
379 ctx
->lastFdCount
= x
;
382 INSIST(nextTimer
|| ctx
->fdCount
);
384 /* Timers go first since we'd like them to be accurate. */
385 if (nextTimer
&& !timerPast
) {
386 /* Has anything happened since we blocked? */
387 timerPast
= (evCmpTime(nextTime
, ctx
->lastEventTime
) <= 0);
389 if (nextTimer
&& timerPast
) {
392 new->u
.timer
.this = nextTimer
;
393 opaqueEv
->opaque
= new;
397 /* No timers, so there should be a ready file descriptor. */
399 while (ctx
->fdCount
> 0) {
403 if (ctx
->fdNext
== NULL
) {
406 * Hitting the end twice means that the last
407 * select() found some FD's which have since
410 * On some systems, the count returned by
411 * selects is the total number of bits in
412 * all masks that are set, and on others it's
413 * the number of fd's that have some bit set,
414 * and on others, it's just broken. We
415 * always assume that it's the number of
416 * bits set in all masks, because that's what
417 * the man page says it should do, and
418 * the worst that can happen is we do an
424 ctx
->fdNext
= ctx
->files
;
427 ctx
->fdNext
= fid
->next
;
431 if (FD_ISSET(fd
, &ctx
->rdLast
))
432 eventmask
|= EV_READ
;
433 if (FD_ISSET(fd
, &ctx
->wrLast
))
434 eventmask
|= EV_WRITE
;
435 if (FD_ISSET(fd
, &ctx
->exLast
))
436 eventmask
|= EV_EXCEPT
;
437 eventmask
&= fid
->eventmask
;
438 if (eventmask
!= 0) {
439 if ((eventmask
& EV_READ
) != 0) {
440 FD_CLR(fd
, &ctx
->rdLast
);
443 if ((eventmask
& EV_WRITE
) != 0) {
444 FD_CLR(fd
, &ctx
->wrLast
);
447 if ((eventmask
& EV_EXCEPT
) != 0) {
448 FD_CLR(fd
, &ctx
->exLast
);
453 new->u
.file
.this = fid
;
454 new->u
.file
.eventmask
= eventmask
;
455 opaqueEv
->opaque
= new;
459 if (ctx
->fdCount
< 0) {
461 * select()'s count is off on a number of systems, and
462 * can result in fdCount < 0.
464 evPrintf(ctx
, 4, "fdCount < 0 (%d)\n", ctx
->fdCount
);
468 /* We get here if the caller deselect()'s an FD. Gag me with a goto. */
473 evDispatch(evContext opaqueCtx
, evEvent opaqueEv
) {
474 evContext_p
*ctx
= opaqueCtx
.opaque
;
475 evEvent_p
*ev
= opaqueEv
.opaque
;
476 #ifdef EVENTLIB_TIME_CHECKS
478 struct timespec start_time
;
479 struct timespec interval
;
482 #ifdef EVENTLIB_TIME_CHECKS
484 start_time
= evNowTime();
489 evAccept
*this = ev
->u
.accept
.this;
492 "Dispatch.Accept: fd %d -> %d, func %p, uap %p\n",
493 this->conn
->fd
, this->fd
,
494 this->conn
->func
, this->conn
->uap
);
495 errno
= this->ioErrno
;
496 (this->conn
->func
)(opaqueCtx
, this->conn
->uap
, this->fd
,
497 &this->la
, this->lalen
,
498 &this->ra
, this->ralen
);
499 #ifdef EVENTLIB_TIME_CHECKS
500 func
= this->conn
->func
;
505 evFile
*this = ev
->u
.file
.this;
506 int eventmask
= ev
->u
.file
.eventmask
;
509 "Dispatch.File: fd %d, mask 0x%x, func %p, uap %p\n",
510 this->fd
, this->eventmask
, this->func
, this->uap
);
511 (this->func
)(opaqueCtx
, this->uap
, this->fd
, eventmask
);
512 #ifdef EVENTLIB_TIME_CHECKS
518 evStream
*this = ev
->u
.stream
.this;
521 "Dispatch.Stream: fd %d, func %p, uap %p\n",
522 this->fd
, this->func
, this->uap
);
523 errno
= this->ioErrno
;
524 (this->func
)(opaqueCtx
, this->uap
, this->fd
, this->ioDone
);
525 #ifdef EVENTLIB_TIME_CHECKS
531 evTimer
*this = ev
->u
.timer
.this;
533 evPrintf(ctx
, 5, "Dispatch.Timer: func %p, uap %p\n",
534 this->func
, this->uap
);
535 (this->func
)(opaqueCtx
, this->uap
, this->due
, this->inter
);
536 #ifdef EVENTLIB_TIME_CHECKS
542 evWait
*this = ev
->u
.wait
.this;
545 "Dispatch.Wait: tag %p, func %p, uap %p\n",
546 this->tag
, this->func
, this->uap
);
547 (this->func
)(opaqueCtx
, this->uap
, this->tag
);
548 #ifdef EVENTLIB_TIME_CHECKS
555 #ifdef EVENTLIB_TIME_CHECKS
564 #ifdef EVENTLIB_TIME_CHECKS
565 if (ctx
->debug
> 0) {
566 interval
= evSubTime(evNowTime(), start_time
);
568 * Complain if it took longer than 50 milliseconds.
570 * We call getuid() to make an easy to find mark in a kernel
573 if (interval
.tv_sec
> 0 || interval
.tv_nsec
> 50000000)
575 "dispatch interval %u.%09u uid %d type %d func %p\n",
576 interval
.tv_sec
, interval
.tv_nsec
,
577 getuid(), ev
->type
, func
);
581 evDrop(opaqueCtx
, opaqueEv
);
586 evDrop(evContext opaqueCtx
, evEvent opaqueEv
) {
587 evContext_p
*ctx
= opaqueCtx
.opaque
;
588 evEvent_p
*ev
= opaqueEv
.opaque
;
592 FREE(ev
->u
.accept
.this);
602 id
.opaque
= ev
->u
.stream
.this;
603 (void) evCancelRW(opaqueCtx
, id
);
607 evTimer
*this = ev
->u
.timer
.this;
610 /* Check to see whether the user func cleared the timer. */
611 if (heap_element(ctx
->timers
, this->index
) != this) {
612 evPrintf(ctx
, 5, "Dispatch.Timer: timer rm'd?\n");
616 * Timer is still there. Delete it if it has expired,
617 * otherwise set it according to its next interval.
619 if (this->inter
.tv_sec
== (time_t)0 &&
620 this->inter
.tv_nsec
== 0L) {
621 opaque
.opaque
= this;
622 (void) evClearTimer(opaqueCtx
, opaque
);
624 opaque
.opaque
= this;
625 (void) evResetTimer(opaqueCtx
, opaque
, this->func
,
627 evAddTime((this->mode
& EV_TMR_RATE
) ?
636 FREE(ev
->u
.wait
.this);
651 evMainLoop(evContext opaqueCtx
) {
655 while ((x
= evGetNext(opaqueCtx
, &event
, EV_WAIT
)) == 0)
656 if ((x
= evDispatch(opaqueCtx
, event
)) < 0)
662 evHighestFD(evContext opaqueCtx
) {
663 evContext_p
*ctx
= opaqueCtx
.opaque
;
665 return (ctx
->highestFD
);
669 evPrintf(const evContext_p
*ctx
, int level
, const char *fmt
, ...) {
673 if (ctx
->output
!= NULL
&& ctx
->debug
>= level
) {
674 vfprintf(ctx
->output
, fmt
, ap
);
681 evSetOption(evContext
*opaqueCtx
, const char *option
, int value
) {
682 /* evContext_p *ctx = opaqueCtx->opaque; */
686 #ifndef CLOCK_MONOTONIC
690 #ifdef CLOCK_MONOTONIC
691 if (strcmp(option
, "monotime") == 0) {
692 if (opaqueCtx
!= NULL
)
694 if (value
== 0 || value
== 1) {
695 __evOptMonoTime
= value
;
708 evGetOption(evContext
*opaqueCtx
, const char *option
, int *value
) {
709 /* evContext_p *ctx = opaqueCtx->opaque; */
712 #ifndef CLOCK_MONOTONIC
717 #ifdef CLOCK_MONOTONIC
718 if (strcmp(option
, "monotime") == 0) {
719 if (opaqueCtx
!= NULL
)
721 *value
= __evOptMonoTime
;
729 #if defined(NEED_PSELECT) || defined(USE_POLL)
730 /* XXX needs to move to the porting library. */
732 pselect(int nfds
, void *rfds
, void *wfds
, void *efds
,
733 struct timespec
*tsp
,
734 const sigset_t
*sigmask
)
740 int polltimeout
= INFTIM
;
747 struct timeval
*tvp
= NULL
;
748 #endif /* USE_POLL */
751 tv
= evTimeVal(*tsp
);
753 polltimeout
= 1000 * tv
.tv_sec
+ tv
.tv_usec
/ 1000;
756 #endif /* USE_POLL */
759 sigprocmask(SIG_SETMASK
, sigmask
, &sigs
);
761 n
= select(nfds
, rfds
, wfds
, efds
, tvp
);
764 * rfds, wfds, and efds should all be from the same evContext_p,
765 * so any of them will do. If they're all NULL, the caller is
766 * presumably calling us to block.
769 ctx
= ((__evEmulMask
*)rfds
)->ctx
;
770 else if (wfds
!= NULL
)
771 ctx
= ((__evEmulMask
*)wfds
)->ctx
;
772 else if (efds
!= NULL
)
773 ctx
= ((__evEmulMask
*)efds
)->ctx
;
776 if (ctx
!= NULL
&& ctx
->fdMax
!= -1) {
777 fds
= &(ctx
->pollfds
[ctx
->firstfd
]);
778 pnfds
= ctx
->fdMax
- ctx
->firstfd
+ 1;
783 n
= poll(fds
, pnfds
, polltimeout
);
788 for (e
= 0, i
= ctx
->firstfd
; i
<= ctx
->fdMax
; i
++) {
789 if (ctx
->pollfds
[i
].fd
< 0)
791 if (FD_ISSET(i
, &ctx
->rdLast
))
793 if (FD_ISSET(i
, &ctx
->wrLast
))
795 if (FD_ISSET(i
, &ctx
->exLast
))
800 #endif /* USE_POLL */
802 sigprocmask(SIG_SETMASK
, &sigs
, NULL
);
804 *tsp
= evTimeSpec(tv
);
811 evPollfdRealloc(evContext_p
*ctx
, int pollfd_chunk_size
, int fd
) {
814 void *pollfds
, *fdTable
;
816 if (fd
< ctx
->maxnfds
)
819 /* Don't allow ridiculously small values for pollfd_chunk_size */
820 if (pollfd_chunk_size
< 20)
821 pollfd_chunk_size
= 20;
823 maxnfds
= (1 + (fd
/pollfd_chunk_size
)) * pollfd_chunk_size
;
825 pollfds
= realloc(ctx
->pollfds
, maxnfds
* sizeof(*ctx
->pollfds
));
827 ctx
->pollfds
= pollfds
;
828 fdTable
= realloc(ctx
->fdTable
, maxnfds
* sizeof(*ctx
->fdTable
));
830 ctx
->fdTable
= fdTable
;
832 if (pollfds
== NULL
|| fdTable
== NULL
) {
833 evPrintf(ctx
, 2, "pollfd() realloc (%ld) failed\n",
834 (long)maxnfds
*sizeof(struct pollfd
));
838 for (i
= ctx
->maxnfds
; i
< maxnfds
; i
++) {
839 ctx
->pollfds
[i
].fd
= -1;
840 ctx
->pollfds
[i
].events
= 0;
844 ctx
->maxnfds
= maxnfds
;
849 /* Find the appropriate 'events' or 'revents' field in the pollfds array */
851 __fd_eventfield(int fd
, __evEmulMask
*maskp
) {
853 evContext_p
*ctx
= (evContext_p
*)maskp
->ctx
;
855 if (!maskp
->result
|| maskp
->type
== EV_WASNONBLOCKING
)
856 return (&(ctx
->pollfds
[fd
].events
));
858 return (&(ctx
->pollfds
[fd
].revents
));
861 /* Translate to poll(2) event */
863 __poll_event(__evEmulMask
*maskp
) {
865 switch ((maskp
)->type
) {
871 return (POLLRDBAND
| POLLPRI
| POLLWRBAND
);
872 case EV_WASNONBLOCKING
:
880 * Clear the events corresponding to the specified mask. If this leaves
881 * the events mask empty (apart from the POLLHUP bit), set the fd field
882 * to -1 so that poll(2) will ignore this fd.
885 __fd_clr(int fd
, __evEmulMask
*maskp
) {
887 evContext_p
*ctx
= maskp
->ctx
;
889 *__fd_eventfield(fd
, maskp
) &= ~__poll_event(maskp
);
890 if ((ctx
->pollfds
[fd
].events
& ~POLLHUP
) == 0) {
891 ctx
->pollfds
[fd
].fd
= -1;
892 if (fd
== ctx
->fdMax
)
893 while (ctx
->fdMax
> ctx
->firstfd
&&
894 ctx
->pollfds
[ctx
->fdMax
].fd
< 0)
896 if (fd
== ctx
->firstfd
)
897 while (ctx
->firstfd
<= ctx
->fdMax
&&
898 ctx
->pollfds
[ctx
->firstfd
].fd
< 0)
901 * Do we have a empty set of descriptors?
903 if (ctx
->firstfd
> ctx
->fdMax
) {
911 * Set the events bit(s) corresponding to the specified mask. If the events
912 * field has any other bits than POLLHUP set, also set the fd field so that
913 * poll(2) will watch this fd.
916 __fd_set(int fd
, __evEmulMask
*maskp
) {
918 evContext_p
*ctx
= maskp
->ctx
;
920 *__fd_eventfield(fd
, maskp
) |= __poll_event(maskp
);
921 if ((ctx
->pollfds
[fd
].events
& ~POLLHUP
) != 0) {
922 ctx
->pollfds
[fd
].fd
= fd
;
923 if (fd
< ctx
->firstfd
|| ctx
->fdMax
== -1)
929 #endif /* USE_POLL */