vfs_glusterfs: Implement AIO support
[Samba.git] / lib / tevent / tevent_epoll.c
blob599c190658cc5bd846f4f5d4fe82cc3cdc2f0bd6
1 /*
2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005-2013
8 Copyright (C) Jeremy Allison 2013
10 ** NOTE! The following LGPL license applies to the tevent
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/filesys.h"
30 #include "system/select.h"
31 #include "tevent.h"
32 #include "tevent_internal.h"
33 #include "tevent_util.h"
35 struct epoll_event_context {
36 /* a pointer back to the generic event_context */
37 struct tevent_context *ev;
39 /* when using epoll this is the handle from epoll_create */
40 int epoll_fd;
42 pid_t pid;
44 bool panic_force_replay;
45 bool *panic_state;
46 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
49 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
54 #ifdef TEST_PANIC_FALLBACK
56 static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 int size)
59 if (epoll_ev->panic_fallback == NULL) {
60 return epoll_create(size);
63 /* 50% of the time, fail... */
64 if ((random() % 2) == 0) {
65 errno = EINVAL;
66 return -1;
69 return epoll_create(size);
72 static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 int epfd, int op, int fd,
74 struct epoll_event *event)
76 if (epoll_ev->panic_fallback == NULL) {
77 return epoll_ctl(epfd, op, fd, event);
80 /* 50% of the time, fail... */
81 if ((random() % 2) == 0) {
82 errno = EINVAL;
83 return -1;
86 return epoll_ctl(epfd, op, fd, event);
89 static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 int epfd,
91 struct epoll_event *events,
92 int maxevents,
93 int timeout)
95 if (epoll_ev->panic_fallback == NULL) {
96 return epoll_wait(epfd, events, maxevents, timeout);
99 /* 50% of the time, fail... */
100 if ((random() % 2) == 0) {
101 errno = EINVAL;
102 return -1;
105 return epoll_wait(epfd, events, maxevents, timeout);
108 #define epoll_create(_size) \
109 epoll_create_panic_fallback(epoll_ev, _size)
110 #define epoll_ctl(_epfd, _op, _fd, _event) \
111 epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 #endif
117 called to set the panic fallback function.
119 _PRIVATE_ bool tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 bool (*panic_fallback)(struct tevent_context *ev,
121 bool replay))
123 struct epoll_event_context *epoll_ev;
125 if (ev->additional_data == NULL) {
126 return false;
129 epoll_ev = talloc_get_type(ev->additional_data,
130 struct epoll_event_context);
131 if (epoll_ev == NULL) {
132 return false;
134 epoll_ev->panic_fallback = panic_fallback;
135 return true;
139 called when a epoll call fails
141 static void epoll_panic(struct epoll_event_context *epoll_ev,
142 const char *reason, bool replay)
144 struct tevent_context *ev = epoll_ev->ev;
145 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
147 panic_fallback = epoll_ev->panic_fallback;
149 if (epoll_ev->panic_state != NULL) {
150 *epoll_ev->panic_state = true;
153 if (epoll_ev->panic_force_replay) {
154 replay = true;
157 TALLOC_FREE(ev->additional_data);
159 if (panic_fallback == NULL) {
160 tevent_debug(ev, TEVENT_DEBUG_FATAL,
161 "%s (%s) replay[%u] - calling abort()\n",
162 reason, strerror(errno), (unsigned)replay);
163 abort();
166 tevent_debug(ev, TEVENT_DEBUG_ERROR,
167 "%s (%s) replay[%u] - calling panic_fallback\n",
168 reason, strerror(errno), (unsigned)replay);
170 if (!panic_fallback(ev, replay)) {
171 /* Fallback failed. */
172 tevent_debug(ev, TEVENT_DEBUG_FATAL,
173 "%s (%s) replay[%u] - calling abort()\n",
174 reason, strerror(errno), (unsigned)replay);
175 abort();
180 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
182 static uint32_t epoll_map_flags(uint16_t flags)
184 uint32_t ret = 0;
185 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
186 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
187 return ret;
191 free the epoll fd
193 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
195 close(epoll_ev->epoll_fd);
196 epoll_ev->epoll_fd = -1;
197 return 0;
201 init the epoll fd
203 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
205 epoll_ev->epoll_fd = epoll_create(64);
206 if (epoll_ev->epoll_fd == -1) {
207 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
208 "Failed to create epoll handle.\n");
209 return -1;
212 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
213 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
214 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
217 epoll_ev->pid = getpid();
218 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
220 return 0;
223 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
226 reopen the epoll handle when our pid changes
227 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
228 demonstration of why this is needed
230 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
232 struct tevent_fd *fde;
233 bool *caller_panic_state = epoll_ev->panic_state;
234 bool panic_triggered = false;
236 if (epoll_ev->pid == getpid()) {
237 return;
240 close(epoll_ev->epoll_fd);
241 epoll_ev->epoll_fd = epoll_create(64);
242 if (epoll_ev->epoll_fd == -1) {
243 epoll_panic(epoll_ev, "epoll_create() failed", false);
244 return;
247 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
248 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
249 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
252 epoll_ev->pid = getpid();
253 epoll_ev->panic_state = &panic_triggered;
254 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
255 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
256 epoll_update_event(epoll_ev, fde);
258 if (panic_triggered) {
259 if (caller_panic_state != NULL) {
260 *caller_panic_state = true;
262 return;
265 epoll_ev->panic_state = NULL;
269 epoll cannot add the same file descriptor twice, once
270 with read, once with write which is allowed by the
271 tevent backend. Multiplex the existing fde, flag it
272 as such so we can search for the correct fde on
273 event triggering.
276 static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
277 struct tevent_fd *add_fde)
279 struct epoll_event event;
280 struct tevent_fd *mpx_fde;
281 int ret;
283 /* Find the existing fde that caused the EEXIST error. */
284 for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
285 if (mpx_fde->fd != add_fde->fd) {
286 continue;
289 if (mpx_fde == add_fde) {
290 continue;
293 break;
295 if (mpx_fde == NULL) {
296 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
297 "can't find multiplex fde for fd[%d]",
298 add_fde->fd);
299 return -1;
302 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
303 /* Logic error. Can't have more than 2 multiplexed fde's. */
304 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
305 "multiplex fde for fd[%d] is already multiplexed\n",
306 mpx_fde->fd);
307 return -1;
311 * The multiplex fde must have the same fd, and also
312 * already have an epoll event attached.
314 if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
315 /* Logic error. Can't have more than 2 multiplexed fde's. */
316 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
317 "multiplex fde for fd[%d] has no event\n",
318 mpx_fde->fd);
319 return -1;
322 /* Modify the mpx_fde to add in the new flags. */
323 ZERO_STRUCT(event);
324 event.events = epoll_map_flags(mpx_fde->flags);
325 event.events |= epoll_map_flags(add_fde->flags);
326 event.data.ptr = mpx_fde;
327 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
328 if (ret != 0 && errno == EBADF) {
329 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
330 "EPOLL_CTL_MOD EBADF for "
331 "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
332 add_fde, mpx_fde, add_fde->fd);
333 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
334 mpx_fde->event_ctx = NULL;
335 DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
336 add_fde->event_ctx = NULL;
337 return 0;
338 } else if (ret != 0) {
339 return ret;
343 * Make each fde->additional_data pointers point at each other
344 * so we can look them up from each other. They are now paired.
346 mpx_fde->additional_data = (struct tevent_fd *)add_fde;
347 add_fde->additional_data = (struct tevent_fd *)mpx_fde;
349 /* Now flag both fde's as being multiplexed. */
350 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
351 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
353 /* we need to keep the GOT_ERROR flag */
354 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
355 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
358 return 0;
362 add the epoll event to the given fd_event
364 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
366 struct epoll_event event;
367 int ret;
368 struct tevent_fd *mpx_fde = NULL;
370 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
371 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
373 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
375 * This is a multiplexed fde, we need to include both
376 * flags in the modified event.
378 mpx_fde = talloc_get_type_abort(fde->additional_data,
379 struct tevent_fd);
381 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
382 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
385 ZERO_STRUCT(event);
386 event.events = epoll_map_flags(fde->flags);
387 if (mpx_fde != NULL) {
388 event.events |= epoll_map_flags(mpx_fde->flags);
390 event.data.ptr = fde;
391 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
392 if (ret != 0 && errno == EBADF) {
393 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
394 "EPOLL_CTL_ADD EBADF for "
395 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
396 fde, mpx_fde, fde->fd);
397 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
398 fde->event_ctx = NULL;
399 if (mpx_fde != NULL) {
400 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
401 mpx_fde->event_ctx = NULL;
403 return;
404 } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
405 ret = epoll_add_multiplex_fd(epoll_ev, fde);
406 if (ret != 0) {
407 epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
408 false);
409 return;
411 } else if (ret != 0) {
412 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
413 return;
416 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
417 /* only if we want to read we want to tell the event handler about errors */
418 if (fde->flags & TEVENT_FD_READ) {
419 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
422 if (mpx_fde == NULL) {
423 return;
426 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
427 /* only if we want to read we want to tell the event handler about errors */
428 if (mpx_fde->flags & TEVENT_FD_READ) {
429 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
434 delete the epoll event for given fd_event
436 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
438 struct epoll_event event;
439 int ret;
440 struct tevent_fd *mpx_fde = NULL;
442 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
443 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
445 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
447 * This is a multiplexed fde, we need to modify both events.
449 mpx_fde = talloc_get_type_abort(fde->additional_data,
450 struct tevent_fd);
452 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
453 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
456 ZERO_STRUCT(event);
457 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
458 if (ret != 0 && errno == ENOENT) {
460 * This can happen after a epoll_check_reopen
461 * within epoll_event_fd_destructor.
463 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
464 "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
465 fde->fd);
466 return;
467 } else if (ret != 0 && errno == EBADF) {
468 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
469 "EPOLL_CTL_DEL EBADF for "
470 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
471 fde, mpx_fde, fde->fd);
472 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
473 fde->event_ctx = NULL;
474 if (mpx_fde != NULL) {
475 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
476 mpx_fde->event_ctx = NULL;
478 return;
479 } else if (ret != 0) {
480 epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
481 return;
486 change the epoll event to the given fd_event
488 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
490 struct tevent_fd *mpx_fde = NULL;
491 struct epoll_event event;
492 int ret;
494 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
495 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
497 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
499 * This is a multiplexed fde, we need to include both
500 * flags in the modified event.
502 mpx_fde = talloc_get_type_abort(fde->additional_data,
503 struct tevent_fd);
505 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
506 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
509 ZERO_STRUCT(event);
510 event.events = epoll_map_flags(fde->flags);
511 if (mpx_fde != NULL) {
512 event.events |= epoll_map_flags(mpx_fde->flags);
514 event.data.ptr = fde;
515 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
516 if (ret != 0 && errno == EBADF) {
517 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
518 "EPOLL_CTL_MOD EBADF for "
519 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
520 fde, mpx_fde, fde->fd);
521 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
522 fde->event_ctx = NULL;
523 if (mpx_fde != NULL) {
524 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
525 mpx_fde->event_ctx = NULL;
527 return;
528 } else if (ret != 0) {
529 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
530 return;
533 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
534 /* only if we want to read we want to tell the event handler about errors */
535 if (fde->flags & TEVENT_FD_READ) {
536 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
539 if (mpx_fde == NULL) {
540 return;
543 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
544 /* only if we want to read we want to tell the event handler about errors */
545 if (mpx_fde->flags & TEVENT_FD_READ) {
546 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
550 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
552 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
553 bool want_read = (fde->flags & TEVENT_FD_READ);
554 bool want_write= (fde->flags & TEVENT_FD_WRITE);
555 struct tevent_fd *mpx_fde = NULL;
557 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
559 * work out what the multiplexed fde wants.
561 mpx_fde = talloc_get_type_abort(fde->additional_data,
562 struct tevent_fd);
564 if (mpx_fde->flags & TEVENT_FD_READ) {
565 want_read = true;
568 if (mpx_fde->flags & TEVENT_FD_WRITE) {
569 want_write = true;
573 /* there's already an event */
574 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
575 if (want_read || (want_write && !got_error)) {
576 epoll_mod_event(epoll_ev, fde);
577 return;
580 * if we want to match the select behavior, we need to remove the epoll_event
581 * when the caller isn't interested in events.
583 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
585 epoll_del_event(epoll_ev, fde);
586 return;
589 /* there's no epoll_event attached to the fde */
590 if (want_read || (want_write && !got_error)) {
591 epoll_add_event(epoll_ev, fde);
592 return;
597 Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
598 Return true if there's nothing else to do, false if
599 this event needs further handling.
601 static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
602 struct tevent_fd *fde)
604 if (fde == NULL) {
605 /* Nothing to do if no event. */
606 return true;
609 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
611 * if we only wait for TEVENT_FD_WRITE, we should not tell the
612 * event handler about it, and remove the epoll_event,
613 * as we only report errors when waiting for read events,
614 * to match the select() behavior
616 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
618 * Do the same as the poll backend and
619 * remove the writeable flag.
621 fde->flags &= ~TEVENT_FD_WRITE;
622 return true;
624 /* This has TEVENT_FD_READ set, we're not finished. */
625 return false;
629 event loop handling using epoll
631 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
633 int ret, i;
634 #define MAXEVENTS 1
635 struct epoll_event events[MAXEVENTS];
636 int timeout = -1;
637 int wait_errno;
639 if (tvalp) {
640 /* it's better to trigger timed events a bit later than too early */
641 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
644 if (epoll_ev->ev->signal_events &&
645 tevent_common_check_signal(epoll_ev->ev)) {
646 return 0;
649 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
650 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
651 wait_errno = errno;
652 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
654 if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
655 if (tevent_common_check_signal(epoll_ev->ev)) {
656 return 0;
660 if (ret == -1 && wait_errno != EINTR) {
661 epoll_panic(epoll_ev, "epoll_wait() failed", true);
662 return -1;
665 if (ret == 0 && tvalp) {
666 /* we don't care about a possible delay here */
667 tevent_common_loop_timer_delay(epoll_ev->ev);
668 return 0;
671 for (i=0;i<ret;i++) {
672 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
673 struct tevent_fd);
674 uint16_t flags = 0;
675 struct tevent_fd *mpx_fde = NULL;
677 if (fde == NULL) {
678 epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
679 return -1;
681 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
683 * Save off the multiplexed event in case we need
684 * to use it to call the handler function.
686 mpx_fde = talloc_get_type_abort(fde->additional_data,
687 struct tevent_fd);
689 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
690 bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
691 bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
693 if (handled_fde && handled_mpx) {
694 epoll_update_event(epoll_ev, fde);
695 continue;
698 if (!handled_mpx) {
700 * If the mpx event was the one that needs
701 * further handling, it's the TEVENT_FD_READ
702 * event so switch over and call that handler.
704 fde = mpx_fde;
705 mpx_fde = NULL;
707 flags |= TEVENT_FD_READ;
709 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
710 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
712 if (flags & TEVENT_FD_WRITE) {
713 if (fde->flags & TEVENT_FD_WRITE) {
714 mpx_fde = NULL;
716 if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
717 fde = mpx_fde;
718 mpx_fde = NULL;
722 if (mpx_fde) {
723 /* Ensure we got the right fde. */
724 if ((flags & fde->flags) == 0) {
725 fde = mpx_fde;
726 mpx_fde = NULL;
731 * make sure we only pass the flags
732 * the handler is expecting.
734 flags &= fde->flags;
735 if (flags) {
736 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
737 break;
741 return 0;
745 create a epoll_event_context structure.
747 static int epoll_event_context_init(struct tevent_context *ev)
749 int ret;
750 struct epoll_event_context *epoll_ev;
753 * We might be called during tevent_re_initialise()
754 * which means we need to free our old additional_data.
756 TALLOC_FREE(ev->additional_data);
758 epoll_ev = talloc_zero(ev, struct epoll_event_context);
759 if (!epoll_ev) return -1;
760 epoll_ev->ev = ev;
761 epoll_ev->epoll_fd = -1;
763 ret = epoll_init_ctx(epoll_ev);
764 if (ret != 0) {
765 talloc_free(epoll_ev);
766 return ret;
769 ev->additional_data = epoll_ev;
770 return 0;
774 destroy an fd_event
776 static int epoll_event_fd_destructor(struct tevent_fd *fde)
778 struct tevent_context *ev = fde->event_ctx;
779 struct epoll_event_context *epoll_ev = NULL;
780 bool panic_triggered = false;
781 struct tevent_fd *mpx_fde = NULL;
782 int flags = fde->flags;
784 if (ev == NULL) {
785 return tevent_common_fd_destructor(fde);
788 epoll_ev = talloc_get_type_abort(ev->additional_data,
789 struct epoll_event_context);
792 * we must remove the event from the list
793 * otherwise a panic fallback handler may
794 * reuse invalid memory
796 DLIST_REMOVE(ev->fd_events, fde);
798 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
799 mpx_fde = talloc_get_type_abort(fde->additional_data,
800 struct tevent_fd);
802 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
803 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
805 fde->additional_data = NULL;
806 mpx_fde->additional_data = NULL;
808 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
811 epoll_ev->panic_state = &panic_triggered;
812 epoll_check_reopen(epoll_ev);
813 if (panic_triggered) {
814 return tevent_common_fd_destructor(fde);
817 if (mpx_fde != NULL) {
818 epoll_update_event(epoll_ev, mpx_fde);
819 if (panic_triggered) {
820 return tevent_common_fd_destructor(fde);
824 fde->flags = 0;
825 epoll_update_event(epoll_ev, fde);
826 fde->flags = flags;
827 if (panic_triggered) {
828 return tevent_common_fd_destructor(fde);
830 epoll_ev->panic_state = NULL;
832 return tevent_common_fd_destructor(fde);
836 add a fd based event
837 return NULL on failure (memory allocation error)
839 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
840 int fd, uint16_t flags,
841 tevent_fd_handler_t handler,
842 void *private_data,
843 const char *handler_name,
844 const char *location)
846 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
847 struct epoll_event_context);
848 struct tevent_fd *fde;
849 bool panic_triggered = false;
851 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
852 handler, private_data,
853 handler_name, location);
854 if (!fde) return NULL;
856 talloc_set_destructor(fde, epoll_event_fd_destructor);
858 epoll_ev->panic_state = &panic_triggered;
859 epoll_check_reopen(epoll_ev);
860 if (panic_triggered) {
861 return fde;
863 epoll_ev->panic_state = NULL;
865 epoll_update_event(epoll_ev, fde);
867 return fde;
871 set the fd event flags
873 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
875 struct tevent_context *ev;
876 struct epoll_event_context *epoll_ev;
877 bool panic_triggered = false;
879 if (fde->flags == flags) return;
881 ev = fde->event_ctx;
882 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
884 fde->flags = flags;
886 epoll_ev->panic_state = &panic_triggered;
887 epoll_check_reopen(epoll_ev);
888 if (panic_triggered) {
889 return;
891 epoll_ev->panic_state = NULL;
893 epoll_update_event(epoll_ev, fde);
897 do a single event loop using the events defined in ev
899 static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
901 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
902 struct epoll_event_context);
903 struct timeval tval;
904 bool panic_triggered = false;
906 if (ev->signal_events &&
907 tevent_common_check_signal(ev)) {
908 return 0;
911 if (ev->immediate_events &&
912 tevent_common_loop_immediate(ev)) {
913 return 0;
916 tval = tevent_common_loop_timer_delay(ev);
917 if (tevent_timeval_is_zero(&tval)) {
918 return 0;
921 epoll_ev->panic_state = &panic_triggered;
922 epoll_ev->panic_force_replay = true;
923 epoll_check_reopen(epoll_ev);
924 if (panic_triggered) {
925 errno = EINVAL;
926 return -1;
928 epoll_ev->panic_force_replay = false;
929 epoll_ev->panic_state = NULL;
931 return epoll_event_loop(epoll_ev, &tval);
934 static const struct tevent_ops epoll_event_ops = {
935 .context_init = epoll_event_context_init,
936 .add_fd = epoll_event_add_fd,
937 .set_fd_close_fn = tevent_common_fd_set_close_fn,
938 .get_fd_flags = tevent_common_fd_get_flags,
939 .set_fd_flags = epoll_event_set_fd_flags,
940 .add_timer = tevent_common_add_timer_v2,
941 .schedule_immediate = tevent_common_schedule_immediate,
942 .add_signal = tevent_common_add_signal,
943 .loop_once = epoll_event_loop_once,
944 .loop_wait = tevent_common_loop_wait,
947 _PRIVATE_ bool tevent_epoll_init(void)
949 return tevent_register_backend("epoll", &epoll_event_ops);