tevent: preferr the write handler if there're two possible handlers registered with...
[Samba/gebeck_regimport.git] / lib / tevent / tevent_epoll.c
blob19f6809f2721731a8885d68aef8c9f57b2780438
1 /*
2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005-2013
8 Copyright (C) Jeremy Allison 2013
10 ** NOTE! The following LGPL license applies to the tevent
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/filesys.h"
30 #include "system/select.h"
31 #include "tevent.h"
32 #include "tevent_internal.h"
33 #include "tevent_util.h"
35 struct epoll_event_context {
36 /* a pointer back to the generic event_context */
37 struct tevent_context *ev;
39 /* when using epoll this is the handle from epoll_create */
40 int epoll_fd;
42 pid_t pid;
44 bool panic_force_replay;
45 bool *panic_state;
46 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
49 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
54 #ifdef TEST_PANIC_FALLBACK
56 static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 int size)
59 if (epoll_ev->panic_fallback == NULL) {
60 return epoll_create(size);
63 /* 50% of the time, fail... */
64 if ((random() % 2) == 0) {
65 errno = EINVAL;
66 return -1;
69 return epoll_create(size);
72 static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 int epfd, int op, int fd,
74 struct epoll_event *event)
76 if (epoll_ev->panic_fallback == NULL) {
77 return epoll_ctl(epfd, op, fd, event);
80 /* 50% of the time, fail... */
81 if ((random() % 2) == 0) {
82 errno = EINVAL;
83 return -1;
86 return epoll_ctl(epfd, op, fd, event);
89 static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 int epfd,
91 struct epoll_event *events,
92 int maxevents,
93 int timeout)
95 if (epoll_ev->panic_fallback == NULL) {
96 return epoll_wait(epfd, events, maxevents, timeout);
99 /* 50% of the time, fail... */
100 if ((random() % 2) == 0) {
101 errno = EINVAL;
102 return -1;
105 return epoll_wait(epfd, events, maxevents, timeout);
108 #define epoll_create(_size) \
109 epoll_create_panic_fallback(epoll_ev, _size)
110 #define epoll_ctl(_epfd, _op, _fd, _event) \
111 epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 #endif
117 called to set the panic fallback function.
119 _PRIVATE_ bool tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 bool (*panic_fallback)(struct tevent_context *ev,
121 bool replay))
123 struct epoll_event_context *epoll_ev;
125 if (ev->additional_data == NULL) {
126 return false;
129 epoll_ev = talloc_get_type(ev->additional_data,
130 struct epoll_event_context);
131 if (epoll_ev == NULL) {
132 return false;
134 epoll_ev->panic_fallback = panic_fallback;
135 return true;
139 called when a epoll call fails
141 static void epoll_panic(struct epoll_event_context *epoll_ev,
142 const char *reason, bool replay)
144 struct tevent_context *ev = epoll_ev->ev;
145 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
147 panic_fallback = epoll_ev->panic_fallback;
149 if (epoll_ev->panic_state != NULL) {
150 *epoll_ev->panic_state = true;
153 if (epoll_ev->panic_force_replay) {
154 replay = true;
157 TALLOC_FREE(ev->additional_data);
159 if (panic_fallback == NULL) {
160 tevent_debug(ev, TEVENT_DEBUG_FATAL,
161 "%s (%s) replay[%u] - calling abort()\n",
162 reason, strerror(errno), (unsigned)replay);
163 abort();
166 tevent_debug(ev, TEVENT_DEBUG_WARNING,
167 "%s (%s) replay[%u] - calling panic_fallback\n",
168 reason, strerror(errno), (unsigned)replay);
170 if (!panic_fallback(ev, replay)) {
171 /* Fallback failed. */
172 tevent_debug(ev, TEVENT_DEBUG_FATAL,
173 "%s (%s) replay[%u] - calling abort()\n",
174 reason, strerror(errno), (unsigned)replay);
175 abort();
180 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
182 static uint32_t epoll_map_flags(uint16_t flags)
184 uint32_t ret = 0;
185 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
186 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
187 return ret;
191 free the epoll fd
193 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
195 close(epoll_ev->epoll_fd);
196 epoll_ev->epoll_fd = -1;
197 return 0;
201 init the epoll fd
203 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
205 epoll_ev->epoll_fd = epoll_create(64);
206 if (epoll_ev->epoll_fd == -1) {
207 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
208 "Failed to create epoll handle.\n");
209 return -1;
212 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
213 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
214 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
217 epoll_ev->pid = getpid();
218 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
220 return 0;
223 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
226 reopen the epoll handle when our pid changes
227 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
228 demonstration of why this is needed
230 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
232 struct tevent_fd *fde;
233 bool *caller_panic_state = epoll_ev->panic_state;
234 bool panic_triggered = false;
236 if (epoll_ev->pid == getpid()) {
237 return;
240 close(epoll_ev->epoll_fd);
241 epoll_ev->epoll_fd = epoll_create(64);
242 if (epoll_ev->epoll_fd == -1) {
243 epoll_panic(epoll_ev, "epoll_create() failed", false);
244 return;
247 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
248 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
249 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
252 epoll_ev->pid = getpid();
253 epoll_ev->panic_state = &panic_triggered;
254 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
255 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
256 epoll_update_event(epoll_ev, fde);
258 if (panic_triggered) {
259 if (caller_panic_state != NULL) {
260 *caller_panic_state = true;
262 return;
265 epoll_ev->panic_state = NULL;
269 epoll cannot add the same file descriptor twice, once
270 with read, once with write which is allowed by the
271 tevent backend. Multiplex the existing fde, flag it
272 as such so we can search for the correct fde on
273 event triggering.
276 static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
277 struct tevent_fd *add_fde)
279 struct epoll_event event;
280 struct tevent_fd *mpx_fde;
281 int ret;
283 /* Find the existing fde that caused the EEXIST error. */
284 for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
285 if (mpx_fde->fd != add_fde->fd) {
286 continue;
289 if (mpx_fde == add_fde) {
290 continue;
293 break;
295 if (mpx_fde == NULL) {
296 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
297 "can't find multiplex fde for fd[%d]",
298 add_fde->fd);
299 return -1;
302 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
303 /* Logic error. Can't have more than 2 multiplexed fde's. */
304 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
305 "multiplex fde for fd[%d] is already multiplexed\n",
306 mpx_fde->fd);
307 return -1;
311 * The multiplex fde must have the same fd, and also
312 * already have an epoll event attached.
314 if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
315 /* Logic error. Can't have more than 2 multiplexed fde's. */
316 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
317 "multiplex fde for fd[%d] has no event\n",
318 mpx_fde->fd);
319 return -1;
322 /* Modify the mpx_fde to add in the new flags. */
323 ZERO_STRUCT(event);
324 event.events = epoll_map_flags(mpx_fde->flags);
325 event.events |= epoll_map_flags(add_fde->flags);
326 event.data.ptr = mpx_fde;
327 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
328 if (ret != 0 && errno == EBADF) {
329 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
330 "EPOLL_CTL_MOD EBADF for "
331 "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
332 add_fde, mpx_fde, add_fde->fd);
333 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
334 mpx_fde->event_ctx = NULL;
335 DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
336 add_fde->event_ctx = NULL;
337 return 0;
338 } else if (ret != 0) {
339 return ret;
343 * Make each fde->additional_data pointers point at each other
344 * so we can look them up from each other. They are now paired.
346 mpx_fde->additional_data = (struct tevent_fd *)add_fde;
347 add_fde->additional_data = (struct tevent_fd *)mpx_fde;
349 /* Now flag both fde's as being multiplexed. */
350 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
351 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
353 /* we need to keep the GOT_ERROR flag */
354 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
355 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
358 return 0;
362 add the epoll event to the given fd_event
364 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
366 struct epoll_event event;
367 int ret;
368 struct tevent_fd *mpx_fde = NULL;
370 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
371 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
373 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
375 * This is a multiplexed fde, we need to include both
376 * flags in the modified event.
378 mpx_fde = talloc_get_type_abort(fde->additional_data,
379 struct tevent_fd);
381 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
382 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
385 ZERO_STRUCT(event);
386 event.events = epoll_map_flags(fde->flags);
387 if (mpx_fde != NULL) {
388 event.events |= epoll_map_flags(mpx_fde->flags);
390 event.data.ptr = fde;
391 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
392 if (ret != 0 && errno == EBADF) {
393 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
394 "EPOLL_CTL_ADD EBADF for "
395 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
396 fde, mpx_fde, fde->fd);
397 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
398 fde->event_ctx = NULL;
399 if (mpx_fde != NULL) {
400 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
401 mpx_fde->event_ctx = NULL;
403 return;
404 } else if (ret != 0) {
405 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
406 return;
409 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
410 /* only if we want to read we want to tell the event handler about errors */
411 if (fde->flags & TEVENT_FD_READ) {
412 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
415 if (mpx_fde == NULL) {
416 return;
419 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
420 /* only if we want to read we want to tell the event handler about errors */
421 if (mpx_fde->flags & TEVENT_FD_READ) {
422 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
427 delete the epoll event for given fd_event
429 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
431 struct epoll_event event;
432 int ret;
433 struct tevent_fd *mpx_fde = NULL;
435 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
436 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
438 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
440 * This is a multiplexed fde, we need to modify both events.
442 mpx_fde = talloc_get_type_abort(fde->additional_data,
443 struct tevent_fd);
445 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
446 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
449 ZERO_STRUCT(event);
450 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
451 if (ret != 0 && errno == ENOENT) {
453 * This can happen after a epoll_check_reopen
454 * within epoll_event_fd_destructor.
456 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
457 "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
458 fde->fd);
459 return;
460 } else if (ret != 0 && errno == EBADF) {
461 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
462 "EPOLL_CTL_DEL EBADF for "
463 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
464 fde, mpx_fde, fde->fd);
465 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
466 fde->event_ctx = NULL;
467 if (mpx_fde != NULL) {
468 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
469 mpx_fde->event_ctx = NULL;
471 return;
472 } else if (ret != 0) {
473 epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
474 return;
479 change the epoll event to the given fd_event
481 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
483 struct tevent_fd *mpx_fde = NULL;
484 struct epoll_event event;
485 int ret;
487 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
488 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
490 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
492 * This is a multiplexed fde, we need to include both
493 * flags in the modified event.
495 mpx_fde = talloc_get_type_abort(fde->additional_data,
496 struct tevent_fd);
498 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
499 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
502 ZERO_STRUCT(event);
503 event.events = epoll_map_flags(fde->flags);
504 if (mpx_fde != NULL) {
505 event.events |= epoll_map_flags(mpx_fde->flags);
507 event.data.ptr = fde;
508 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
509 if (ret != 0 && errno == EBADF) {
510 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
511 "EPOLL_CTL_MOD EBADF for "
512 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
513 fde, mpx_fde, fde->fd);
514 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
515 fde->event_ctx = NULL;
516 if (mpx_fde != NULL) {
517 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
518 mpx_fde->event_ctx = NULL;
520 return;
521 } else if (ret != 0) {
522 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
523 return;
526 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
527 /* only if we want to read we want to tell the event handler about errors */
528 if (fde->flags & TEVENT_FD_READ) {
529 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
532 if (mpx_fde == NULL) {
533 return;
536 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
537 /* only if we want to read we want to tell the event handler about errors */
538 if (mpx_fde->flags & TEVENT_FD_READ) {
539 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
543 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
545 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
546 bool want_read = (fde->flags & TEVENT_FD_READ);
547 bool want_write= (fde->flags & TEVENT_FD_WRITE);
548 struct tevent_fd *mpx_fde = NULL;
550 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
552 * work out what the multiplexed fde wants.
554 mpx_fde = talloc_get_type_abort(fde->additional_data,
555 struct tevent_fd);
557 if (mpx_fde->flags & TEVENT_FD_READ) {
558 want_read = true;
561 if (mpx_fde->flags & TEVENT_FD_WRITE) {
562 want_write = true;
566 /* there's already an event */
567 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
568 if (want_read || (want_write && !got_error)) {
569 epoll_mod_event(epoll_ev, fde);
570 return;
573 * if we want to match the select behavior, we need to remove the epoll_event
574 * when the caller isn't interested in events.
576 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
578 epoll_del_event(epoll_ev, fde);
579 return;
582 /* there's no epoll_event attached to the fde */
583 if (want_read || (want_write && !got_error)) {
584 epoll_add_event(epoll_ev, fde);
585 return;
590 Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
591 Return true if there's nothing else to do, false if
592 this event needs further handling.
594 static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
595 struct tevent_fd *fde)
597 if (fde == NULL) {
598 /* Nothing to do if no event. */
599 return true;
602 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
604 * if we only wait for TEVENT_FD_WRITE, we should not tell the
605 * event handler about it, and remove the epoll_event,
606 * as we only report errors when waiting for read events,
607 * to match the select() behavior
609 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
611 * Do the same as the poll backend and
612 * remove the writeable flag.
614 fde->flags &= ~TEVENT_FD_WRITE;
615 return true;
617 /* This has TEVENT_FD_READ set, we're not finished. */
618 return false;
622 event loop handling using epoll
624 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
626 int ret, i;
627 #define MAXEVENTS 1
628 struct epoll_event events[MAXEVENTS];
629 int timeout = -1;
630 int wait_errno;
632 if (tvalp) {
633 /* it's better to trigger timed events a bit later than too early */
634 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
637 if (epoll_ev->ev->signal_events &&
638 tevent_common_check_signal(epoll_ev->ev)) {
639 return 0;
642 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
643 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
644 wait_errno = errno;
645 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
647 if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
648 if (tevent_common_check_signal(epoll_ev->ev)) {
649 return 0;
653 if (ret == -1 && wait_errno != EINTR) {
654 epoll_panic(epoll_ev, "epoll_wait() failed", true);
655 return -1;
658 if (ret == 0 && tvalp) {
659 /* we don't care about a possible delay here */
660 tevent_common_loop_timer_delay(epoll_ev->ev);
661 return 0;
664 for (i=0;i<ret;i++) {
665 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
666 struct tevent_fd);
667 uint16_t flags = 0;
668 struct tevent_fd *mpx_fde = NULL;
670 if (fde == NULL) {
671 epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
672 return -1;
674 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
676 * Save off the multiplexed event in case we need
677 * to use it to call the handler function.
679 mpx_fde = talloc_get_type_abort(fde->additional_data,
680 struct tevent_fd);
682 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
683 bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
684 bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
686 if (handled_fde && handled_mpx) {
687 epoll_update_event(epoll_ev, fde);
688 continue;
691 if (!handled_mpx) {
693 * If the mpx event was the one that needs
694 * further handling, it's the TEVENT_FD_READ
695 * event so switch over and call that handler.
697 fde = mpx_fde;
698 mpx_fde = NULL;
700 flags |= TEVENT_FD_READ;
702 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
703 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
705 if (flags & TEVENT_FD_WRITE) {
706 if (fde->flags & TEVENT_FD_WRITE) {
707 mpx_fde = NULL;
709 if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
710 fde = mpx_fde;
711 mpx_fde = NULL;
715 if (mpx_fde) {
716 /* Ensure we got the right fde. */
717 if ((flags & fde->flags) == 0) {
718 fde = mpx_fde;
719 mpx_fde = NULL;
724 * make sure we only pass the flags
725 * the handler is expecting.
727 flags &= fde->flags;
728 if (flags) {
729 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
730 break;
734 return 0;
738 create a epoll_event_context structure.
740 static int epoll_event_context_init(struct tevent_context *ev)
742 int ret;
743 struct epoll_event_context *epoll_ev;
746 * We might be called during tevent_re_initialise()
747 * which means we need to free our old additional_data.
749 TALLOC_FREE(ev->additional_data);
751 epoll_ev = talloc_zero(ev, struct epoll_event_context);
752 if (!epoll_ev) return -1;
753 epoll_ev->ev = ev;
754 epoll_ev->epoll_fd = -1;
756 ret = epoll_init_ctx(epoll_ev);
757 if (ret != 0) {
758 talloc_free(epoll_ev);
759 return ret;
762 ev->additional_data = epoll_ev;
763 return 0;
767 destroy an fd_event
769 static int epoll_event_fd_destructor(struct tevent_fd *fde)
771 struct tevent_context *ev = fde->event_ctx;
772 struct epoll_event_context *epoll_ev = NULL;
773 bool panic_triggered = false;
774 struct tevent_fd *mpx_fde = NULL;
775 int flags = fde->flags;
777 if (ev == NULL) {
778 return tevent_common_fd_destructor(fde);
781 epoll_ev = talloc_get_type_abort(ev->additional_data,
782 struct epoll_event_context);
785 * we must remove the event from the list
786 * otherwise a panic fallback handler may
787 * reuse invalid memory
789 DLIST_REMOVE(ev->fd_events, fde);
791 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
792 mpx_fde = talloc_get_type_abort(fde->additional_data,
793 struct tevent_fd);
795 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
796 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
798 fde->additional_data = NULL;
799 mpx_fde->additional_data = NULL;
801 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
804 epoll_ev->panic_state = &panic_triggered;
805 epoll_check_reopen(epoll_ev);
806 if (panic_triggered) {
807 return tevent_common_fd_destructor(fde);
810 if (mpx_fde != NULL) {
811 epoll_update_event(epoll_ev, mpx_fde);
812 if (panic_triggered) {
813 return tevent_common_fd_destructor(fde);
817 fde->flags = 0;
818 epoll_update_event(epoll_ev, fde);
819 fde->flags = flags;
820 if (panic_triggered) {
821 return tevent_common_fd_destructor(fde);
823 epoll_ev->panic_state = NULL;
825 return tevent_common_fd_destructor(fde);
829 add a fd based event
830 return NULL on failure (memory allocation error)
832 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
833 int fd, uint16_t flags,
834 tevent_fd_handler_t handler,
835 void *private_data,
836 const char *handler_name,
837 const char *location)
839 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
840 struct epoll_event_context);
841 struct tevent_fd *fde;
842 bool panic_triggered = false;
844 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
845 handler, private_data,
846 handler_name, location);
847 if (!fde) return NULL;
849 talloc_set_destructor(fde, epoll_event_fd_destructor);
851 epoll_ev->panic_state = &panic_triggered;
852 epoll_check_reopen(epoll_ev);
853 if (panic_triggered) {
854 return fde;
856 epoll_ev->panic_state = NULL;
858 epoll_update_event(epoll_ev, fde);
860 return fde;
864 set the fd event flags
866 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
868 struct tevent_context *ev;
869 struct epoll_event_context *epoll_ev;
870 bool panic_triggered = false;
872 if (fde->flags == flags) return;
874 ev = fde->event_ctx;
875 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
877 fde->flags = flags;
879 epoll_ev->panic_state = &panic_triggered;
880 epoll_check_reopen(epoll_ev);
881 if (panic_triggered) {
882 return;
884 epoll_ev->panic_state = NULL;
886 epoll_update_event(epoll_ev, fde);
890 do a single event loop using the events defined in ev
892 static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
894 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
895 struct epoll_event_context);
896 struct timeval tval;
897 bool panic_triggered = false;
899 if (ev->signal_events &&
900 tevent_common_check_signal(ev)) {
901 return 0;
904 if (ev->immediate_events &&
905 tevent_common_loop_immediate(ev)) {
906 return 0;
909 tval = tevent_common_loop_timer_delay(ev);
910 if (tevent_timeval_is_zero(&tval)) {
911 return 0;
914 epoll_ev->panic_state = &panic_triggered;
915 epoll_ev->panic_force_replay = true;
916 epoll_check_reopen(epoll_ev);
917 if (panic_triggered) {
918 errno = EINVAL;
919 return -1;
921 epoll_ev->panic_force_replay = false;
922 epoll_ev->panic_state = NULL;
924 return epoll_event_loop(epoll_ev, &tval);
927 static const struct tevent_ops epoll_event_ops = {
928 .context_init = epoll_event_context_init,
929 .add_fd = epoll_event_add_fd,
930 .set_fd_close_fn = tevent_common_fd_set_close_fn,
931 .get_fd_flags = tevent_common_fd_get_flags,
932 .set_fd_flags = epoll_event_set_fd_flags,
933 .add_timer = tevent_common_add_timer,
934 .schedule_immediate = tevent_common_schedule_immediate,
935 .add_signal = tevent_common_add_signal,
936 .loop_once = epoll_event_loop_once,
937 .loop_wait = tevent_common_loop_wait,
940 _PRIVATE_ bool tevent_epoll_init(void)
942 return tevent_register_backend("epoll", &epoll_event_ops);