tevent: In epoll_event_loop() ensure we trigger the right handler for a multiplexed...
[Samba/gebeck_regimport.git] / lib / tevent / tevent_epoll.c
blob41415b06e9aaf520ea90258a80b77946ef72cbc5
1 /*
2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005-2013
8 Copyright (C) Jeremy Allison 2013
10 ** NOTE! The following LGPL license applies to the tevent
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/filesys.h"
30 #include "system/select.h"
31 #include "tevent.h"
32 #include "tevent_internal.h"
33 #include "tevent_util.h"
35 struct epoll_event_context {
36 /* a pointer back to the generic event_context */
37 struct tevent_context *ev;
39 /* when using epoll this is the handle from epoll_create */
40 int epoll_fd;
42 pid_t pid;
44 bool panic_force_replay;
45 bool *panic_state;
46 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
49 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
54 #ifdef TEST_PANIC_FALLBACK
56 static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 int size)
59 if (epoll_ev->panic_fallback == NULL) {
60 return epoll_create(size);
63 /* 50% of the time, fail... */
64 if ((random() % 2) == 0) {
65 errno = EINVAL;
66 return -1;
69 return epoll_create(size);
72 static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 int epfd, int op, int fd,
74 struct epoll_event *event)
76 if (epoll_ev->panic_fallback == NULL) {
77 return epoll_ctl(epfd, op, fd, event);
80 /* 50% of the time, fail... */
81 if ((random() % 2) == 0) {
82 errno = EINVAL;
83 return -1;
86 return epoll_ctl(epfd, op, fd, event);
89 static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 int epfd,
91 struct epoll_event *events,
92 int maxevents,
93 int timeout)
95 if (epoll_ev->panic_fallback == NULL) {
96 return epoll_wait(epfd, events, maxevents, timeout);
99 /* 50% of the time, fail... */
100 if ((random() % 2) == 0) {
101 errno = EINVAL;
102 return -1;
105 return epoll_wait(epfd, events, maxevents, timeout);
108 #define epoll_create(_size) \
109 epoll_create_panic_fallback(epoll_ev, _size)
110 #define epoll_ctl(_epfd, _op, _fd, _event) \
111 epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 #endif
117 called to set the panic fallback function.
119 _PRIVATE_ bool tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 bool (*panic_fallback)(struct tevent_context *ev,
121 bool replay))
123 struct epoll_event_context *epoll_ev;
125 if (ev->additional_data == NULL) {
126 return false;
129 epoll_ev = talloc_get_type(ev->additional_data,
130 struct epoll_event_context);
131 if (epoll_ev == NULL) {
132 return false;
134 epoll_ev->panic_fallback = panic_fallback;
135 return true;
139 called when a epoll call fails
141 static void epoll_panic(struct epoll_event_context *epoll_ev,
142 const char *reason, bool replay)
144 struct tevent_context *ev = epoll_ev->ev;
145 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
147 panic_fallback = epoll_ev->panic_fallback;
149 if (epoll_ev->panic_state != NULL) {
150 *epoll_ev->panic_state = true;
153 if (epoll_ev->panic_force_replay) {
154 replay = true;
157 TALLOC_FREE(ev->additional_data);
159 if (panic_fallback == NULL) {
160 tevent_debug(ev, TEVENT_DEBUG_FATAL,
161 "%s (%s) replay[%u] - calling abort()\n",
162 reason, strerror(errno), (unsigned)replay);
163 abort();
166 tevent_debug(ev, TEVENT_DEBUG_WARNING,
167 "%s (%s) replay[%u] - calling panic_fallback\n",
168 reason, strerror(errno), (unsigned)replay);
170 if (!panic_fallback(ev, replay)) {
171 /* Fallback failed. */
172 tevent_debug(ev, TEVENT_DEBUG_FATAL,
173 "%s (%s) replay[%u] - calling abort()\n",
174 reason, strerror(errno), (unsigned)replay);
175 abort();
180 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
182 static uint32_t epoll_map_flags(uint16_t flags)
184 uint32_t ret = 0;
185 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
186 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
187 return ret;
191 free the epoll fd
193 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
195 close(epoll_ev->epoll_fd);
196 epoll_ev->epoll_fd = -1;
197 return 0;
201 init the epoll fd
203 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
205 epoll_ev->epoll_fd = epoll_create(64);
206 if (epoll_ev->epoll_fd == -1) {
207 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
208 "Failed to create epoll handle.\n");
209 return -1;
212 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
213 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
214 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
217 epoll_ev->pid = getpid();
218 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
220 return 0;
223 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
226 reopen the epoll handle when our pid changes
227 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
228 demonstration of why this is needed
230 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
232 struct tevent_fd *fde;
233 bool *caller_panic_state = epoll_ev->panic_state;
234 bool panic_triggered = false;
236 if (epoll_ev->pid == getpid()) {
237 return;
240 close(epoll_ev->epoll_fd);
241 epoll_ev->epoll_fd = epoll_create(64);
242 if (epoll_ev->epoll_fd == -1) {
243 epoll_panic(epoll_ev, "epoll_create() failed", false);
244 return;
247 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
248 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
249 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
252 epoll_ev->pid = getpid();
253 epoll_ev->panic_state = &panic_triggered;
254 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
255 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
256 epoll_update_event(epoll_ev, fde);
258 if (panic_triggered) {
259 if (caller_panic_state != NULL) {
260 *caller_panic_state = true;
262 return;
265 epoll_ev->panic_state = NULL;
269 epoll cannot add the same file descriptor twice, once
270 with read, once with write which is allowed by the
271 tevent backend. Multiplex the existing fde, flag it
272 as such so we can search for the correct fde on
273 event triggering.
276 static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
277 struct tevent_fd *add_fde)
279 struct epoll_event event;
280 struct tevent_fd *mpx_fde;
281 int ret;
283 /* Find the existing fde that caused the EEXIST error. */
284 for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
285 if (mpx_fde->fd != add_fde->fd) {
286 continue;
289 if (mpx_fde == add_fde) {
290 continue;
293 break;
295 if (mpx_fde == NULL) {
296 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
297 "can't find multiplex fde for fd[%d]",
298 add_fde->fd);
299 return -1;
302 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
303 /* Logic error. Can't have more than 2 multiplexed fde's. */
304 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
305 "multiplex fde for fd[%d] is already multiplexed\n",
306 mpx_fde->fd);
307 return -1;
311 * The multiplex fde must have the same fd, and also
312 * already have an epoll event attached.
314 if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
315 /* Logic error. Can't have more than 2 multiplexed fde's. */
316 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
317 "multiplex fde for fd[%d] has no event\n",
318 mpx_fde->fd);
319 return -1;
322 /* Modify the mpx_fde to add in the new flags. */
323 ZERO_STRUCT(event);
324 event.events = epoll_map_flags(mpx_fde->flags);
325 event.events |= epoll_map_flags(add_fde->flags);
326 event.data.ptr = mpx_fde;
327 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
328 if (ret != 0 && errno == EBADF) {
329 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
330 "EPOLL_CTL_MOD EBADF for "
331 "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
332 add_fde, mpx_fde, add_fde->fd);
333 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
334 mpx_fde->event_ctx = NULL;
335 DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
336 add_fde->event_ctx = NULL;
337 return 0;
338 } else if (ret != 0) {
339 return ret;
343 * Make each fde->additional_data pointers point at each other
344 * so we can look them up from each other. They are now paired.
346 mpx_fde->additional_data = (struct tevent_fd *)add_fde;
347 add_fde->additional_data = (struct tevent_fd *)mpx_fde;
349 /* Now flag both fde's as being multiplexed. */
350 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
351 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
353 /* we need to keep the GOT_ERROR flag */
354 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
355 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
358 return 0;
362 add the epoll event to the given fd_event
364 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
366 struct epoll_event event;
367 int ret;
368 struct tevent_fd *mpx_fde = NULL;
370 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
371 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
373 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
375 * This is a multiplexed fde, we need to include both
376 * flags in the modified event.
378 mpx_fde = talloc_get_type_abort(fde->additional_data,
379 struct tevent_fd);
381 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
382 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
385 ZERO_STRUCT(event);
386 event.events = epoll_map_flags(fde->flags);
387 if (mpx_fde != NULL) {
388 event.events |= epoll_map_flags(mpx_fde->flags);
390 event.data.ptr = fde;
391 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
392 if (ret != 0 && errno == EBADF) {
393 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
394 "EPOLL_CTL_ADD EBADF for "
395 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
396 fde, mpx_fde, fde->fd);
397 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
398 fde->event_ctx = NULL;
399 if (mpx_fde != NULL) {
400 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
401 mpx_fde->event_ctx = NULL;
403 return;
404 } else if (ret != 0) {
405 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
406 return;
409 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
410 /* only if we want to read we want to tell the event handler about errors */
411 if (fde->flags & TEVENT_FD_READ) {
412 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
415 if (mpx_fde == NULL) {
416 return;
419 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
420 /* only if we want to read we want to tell the event handler about errors */
421 if (mpx_fde->flags & TEVENT_FD_READ) {
422 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
427 delete the epoll event for given fd_event
429 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
431 struct epoll_event event;
432 int ret;
433 struct tevent_fd *mpx_fde = NULL;
435 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
436 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
438 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
440 * This is a multiplexed fde, we need to modify both events.
442 mpx_fde = talloc_get_type_abort(fde->additional_data,
443 struct tevent_fd);
445 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
446 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
449 ZERO_STRUCT(event);
450 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
451 if (ret != 0 && errno == ENOENT) {
453 * This can happen after a epoll_check_reopen
454 * within epoll_event_fd_destructor.
456 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
457 "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
458 fde->fd);
459 return;
460 } else if (ret != 0 && errno == EBADF) {
461 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
462 "EPOLL_CTL_DEL EBADF for "
463 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
464 fde, mpx_fde, fde->fd);
465 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
466 fde->event_ctx = NULL;
467 if (mpx_fde != NULL) {
468 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
469 mpx_fde->event_ctx = NULL;
471 return;
472 } else if (ret != 0) {
473 epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
474 return;
479 change the epoll event to the given fd_event
481 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
483 struct tevent_fd *mpx_fde = NULL;
484 struct epoll_event event;
485 int ret;
487 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
488 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
490 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
492 * This is a multiplexed fde, we need to include both
493 * flags in the modified event.
495 mpx_fde = talloc_get_type_abort(fde->additional_data,
496 struct tevent_fd);
498 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
499 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
502 ZERO_STRUCT(event);
503 event.events = epoll_map_flags(fde->flags);
504 if (mpx_fde != NULL) {
505 event.events |= epoll_map_flags(mpx_fde->flags);
507 event.data.ptr = fde;
508 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
509 if (ret != 0 && errno == EBADF) {
510 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
511 "EPOLL_CTL_MOD EBADF for "
512 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
513 fde, mpx_fde, fde->fd);
514 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
515 fde->event_ctx = NULL;
516 if (mpx_fde != NULL) {
517 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
518 mpx_fde->event_ctx = NULL;
520 return;
521 } else if (ret != 0) {
522 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
523 return;
526 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
527 /* only if we want to read we want to tell the event handler about errors */
528 if (fde->flags & TEVENT_FD_READ) {
529 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
532 if (mpx_fde == NULL) {
533 return;
536 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
537 /* only if we want to read we want to tell the event handler about errors */
538 if (mpx_fde->flags & TEVENT_FD_READ) {
539 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
543 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
545 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
546 bool want_read = (fde->flags & TEVENT_FD_READ);
547 bool want_write= (fde->flags & TEVENT_FD_WRITE);
548 struct tevent_fd *mpx_fde = NULL;
550 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
552 * work out what the multiplexed fde wants.
554 mpx_fde = talloc_get_type_abort(fde->additional_data,
555 struct tevent_fd);
557 if (mpx_fde->flags & TEVENT_FD_READ) {
558 want_read = true;
561 if (mpx_fde->flags & TEVENT_FD_WRITE) {
562 want_write = true;
566 /* there's already an event */
567 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
568 if (want_read || (want_write && !got_error)) {
569 epoll_mod_event(epoll_ev, fde);
570 return;
573 * if we want to match the select behavior, we need to remove the epoll_event
574 * when the caller isn't interested in events.
576 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
578 epoll_del_event(epoll_ev, fde);
579 return;
582 /* there's no epoll_event attached to the fde */
583 if (want_read || (want_write && !got_error)) {
584 epoll_add_event(epoll_ev, fde);
585 return;
590 Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
591 Return true if there's nothing else to do, false if
592 this event needs further handling.
594 static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
595 struct tevent_fd *fde)
597 if (fde == NULL) {
598 /* Nothing to do if no event. */
599 return true;
602 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
604 * if we only wait for TEVENT_FD_WRITE, we should not tell the
605 * event handler about it, and remove the epoll_event,
606 * as we only report errors when waiting for read events,
607 * to match the select() behavior
609 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
611 * Do the same as the poll backend and
612 * remove the writeable flag.
614 fde->flags &= ~TEVENT_FD_WRITE;
615 return true;
617 /* This has TEVENT_FD_READ set, we're not finished. */
618 return false;
622 event loop handling using epoll
624 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
626 int ret, i;
627 #define MAXEVENTS 1
628 struct epoll_event events[MAXEVENTS];
629 int timeout = -1;
630 int wait_errno;
632 if (tvalp) {
633 /* it's better to trigger timed events a bit later than too early */
634 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
637 if (epoll_ev->ev->signal_events &&
638 tevent_common_check_signal(epoll_ev->ev)) {
639 return 0;
642 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
643 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
644 wait_errno = errno;
645 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
647 if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
648 if (tevent_common_check_signal(epoll_ev->ev)) {
649 return 0;
653 if (ret == -1 && wait_errno != EINTR) {
654 epoll_panic(epoll_ev, "epoll_wait() failed", true);
655 return -1;
658 if (ret == 0 && tvalp) {
659 /* we don't care about a possible delay here */
660 tevent_common_loop_timer_delay(epoll_ev->ev);
661 return 0;
664 for (i=0;i<ret;i++) {
665 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
666 struct tevent_fd);
667 uint16_t flags = 0;
668 struct tevent_fd *mpx_fde = NULL;
670 if (fde == NULL) {
671 epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
672 return -1;
674 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
676 * Save off the multiplexed event in case we need
677 * to use it to call the handler function.
679 mpx_fde = talloc_get_type_abort(fde->additional_data,
680 struct tevent_fd);
682 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
683 bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
684 bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
686 if (handled_fde && handled_mpx) {
687 epoll_update_event(epoll_ev, fde);
688 continue;
691 if (!handled_mpx) {
693 * If the mpx event was the one that needs
694 * further handling, it's the TEVENT_FD_READ
695 * event so switch over and call that handler.
697 fde = mpx_fde;
698 mpx_fde = NULL;
700 flags |= TEVENT_FD_READ;
702 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
703 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
705 if (mpx_fde) {
706 /* Ensure we got the right fde. */
707 if ((flags & fde->flags) == 0) {
708 fde = mpx_fde;
709 mpx_fde = NULL;
714 * make sure we only pass the flags
715 * the handler is expecting.
717 flags &= fde->flags;
718 if (flags) {
719 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
720 break;
724 return 0;
728 create a epoll_event_context structure.
730 static int epoll_event_context_init(struct tevent_context *ev)
732 int ret;
733 struct epoll_event_context *epoll_ev;
736 * We might be called during tevent_re_initialise()
737 * which means we need to free our old additional_data.
739 TALLOC_FREE(ev->additional_data);
741 epoll_ev = talloc_zero(ev, struct epoll_event_context);
742 if (!epoll_ev) return -1;
743 epoll_ev->ev = ev;
744 epoll_ev->epoll_fd = -1;
746 ret = epoll_init_ctx(epoll_ev);
747 if (ret != 0) {
748 talloc_free(epoll_ev);
749 return ret;
752 ev->additional_data = epoll_ev;
753 return 0;
757 destroy an fd_event
759 static int epoll_event_fd_destructor(struct tevent_fd *fde)
761 struct tevent_context *ev = fde->event_ctx;
762 struct epoll_event_context *epoll_ev = NULL;
763 bool panic_triggered = false;
764 struct tevent_fd *mpx_fde = NULL;
765 int flags = fde->flags;
767 if (ev == NULL) {
768 return tevent_common_fd_destructor(fde);
771 epoll_ev = talloc_get_type_abort(ev->additional_data,
772 struct epoll_event_context);
775 * we must remove the event from the list
776 * otherwise a panic fallback handler may
777 * reuse invalid memory
779 DLIST_REMOVE(ev->fd_events, fde);
781 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
782 mpx_fde = talloc_get_type_abort(fde->additional_data,
783 struct tevent_fd);
785 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
786 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
788 fde->additional_data = NULL;
789 mpx_fde->additional_data = NULL;
791 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
794 epoll_ev->panic_state = &panic_triggered;
795 epoll_check_reopen(epoll_ev);
796 if (panic_triggered) {
797 return tevent_common_fd_destructor(fde);
800 if (mpx_fde != NULL) {
801 epoll_update_event(epoll_ev, mpx_fde);
802 if (panic_triggered) {
803 return tevent_common_fd_destructor(fde);
807 fde->flags = 0;
808 epoll_update_event(epoll_ev, fde);
809 fde->flags = flags;
810 if (panic_triggered) {
811 return tevent_common_fd_destructor(fde);
813 epoll_ev->panic_state = NULL;
815 return tevent_common_fd_destructor(fde);
819 add a fd based event
820 return NULL on failure (memory allocation error)
822 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
823 int fd, uint16_t flags,
824 tevent_fd_handler_t handler,
825 void *private_data,
826 const char *handler_name,
827 const char *location)
829 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
830 struct epoll_event_context);
831 struct tevent_fd *fde;
832 bool panic_triggered = false;
834 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
835 handler, private_data,
836 handler_name, location);
837 if (!fde) return NULL;
839 talloc_set_destructor(fde, epoll_event_fd_destructor);
841 epoll_ev->panic_state = &panic_triggered;
842 epoll_check_reopen(epoll_ev);
843 if (panic_triggered) {
844 return fde;
846 epoll_ev->panic_state = NULL;
848 epoll_update_event(epoll_ev, fde);
850 return fde;
854 set the fd event flags
856 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
858 struct tevent_context *ev;
859 struct epoll_event_context *epoll_ev;
860 bool panic_triggered = false;
862 if (fde->flags == flags) return;
864 ev = fde->event_ctx;
865 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
867 fde->flags = flags;
869 epoll_ev->panic_state = &panic_triggered;
870 epoll_check_reopen(epoll_ev);
871 if (panic_triggered) {
872 return;
874 epoll_ev->panic_state = NULL;
876 epoll_update_event(epoll_ev, fde);
880 do a single event loop using the events defined in ev
882 static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
884 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
885 struct epoll_event_context);
886 struct timeval tval;
887 bool panic_triggered = false;
889 if (ev->signal_events &&
890 tevent_common_check_signal(ev)) {
891 return 0;
894 if (ev->immediate_events &&
895 tevent_common_loop_immediate(ev)) {
896 return 0;
899 tval = tevent_common_loop_timer_delay(ev);
900 if (tevent_timeval_is_zero(&tval)) {
901 return 0;
904 epoll_ev->panic_state = &panic_triggered;
905 epoll_ev->panic_force_replay = true;
906 epoll_check_reopen(epoll_ev);
907 if (panic_triggered) {
908 errno = EINVAL;
909 return -1;
911 epoll_ev->panic_force_replay = false;
912 epoll_ev->panic_state = NULL;
914 return epoll_event_loop(epoll_ev, &tval);
917 static const struct tevent_ops epoll_event_ops = {
918 .context_init = epoll_event_context_init,
919 .add_fd = epoll_event_add_fd,
920 .set_fd_close_fn = tevent_common_fd_set_close_fn,
921 .get_fd_flags = tevent_common_fd_get_flags,
922 .set_fd_flags = epoll_event_set_fd_flags,
923 .add_timer = tevent_common_add_timer,
924 .schedule_immediate = tevent_common_schedule_immediate,
925 .add_signal = tevent_common_add_signal,
926 .loop_once = epoll_event_loop_once,
927 .loop_wait = tevent_common_loop_wait,
930 _PRIVATE_ bool tevent_epoll_init(void)
932 return tevent_register_backend("epoll", &epoll_event_ops);