s4: torture: Add an async SMB2_OP_FLUSH + SMB2_OP_CLOSE test to smb2.compound_async.
[Samba.git] / lib / tevent / tevent_epoll.c
blob1f1f47b2b21189d124997f950c2db11bb74ba310
1 /*
2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005-2013
8 Copyright (C) Jeremy Allison 2013
10 ** NOTE! The following LGPL license applies to the tevent
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/filesys.h"
30 #include "system/select.h"
31 #include "tevent.h"
32 #include "tevent_internal.h"
33 #include "tevent_util.h"
35 struct epoll_event_context {
36 /* a pointer back to the generic event_context */
37 struct tevent_context *ev;
39 /* when using epoll this is the handle from epoll_create */
40 int epoll_fd;
42 pid_t pid;
44 bool panic_force_replay;
45 bool *panic_state;
46 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
49 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
54 #ifdef TEST_PANIC_FALLBACK
56 static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 int size)
59 if (epoll_ev->panic_fallback == NULL) {
60 return epoll_create(size);
63 /* 50% of the time, fail... */
64 if ((random() % 2) == 0) {
65 errno = EINVAL;
66 return -1;
69 return epoll_create(size);
72 static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 int epfd, int op, int fd,
74 struct epoll_event *event)
76 if (epoll_ev->panic_fallback == NULL) {
77 return epoll_ctl(epfd, op, fd, event);
80 /* 50% of the time, fail... */
81 if ((random() % 2) == 0) {
82 errno = EINVAL;
83 return -1;
86 return epoll_ctl(epfd, op, fd, event);
89 static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 int epfd,
91 struct epoll_event *events,
92 int maxevents,
93 int timeout)
95 if (epoll_ev->panic_fallback == NULL) {
96 return epoll_wait(epfd, events, maxevents, timeout);
99 /* 50% of the time, fail... */
100 if ((random() % 2) == 0) {
101 errno = EINVAL;
102 return -1;
105 return epoll_wait(epfd, events, maxevents, timeout);
108 #define epoll_create(_size) \
109 epoll_create_panic_fallback(epoll_ev, _size)
110 #define epoll_ctl(_epfd, _op, _fd, _event) \
111 epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 #endif
117 called to set the panic fallback function.
119 _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 bool (*panic_fallback)(struct tevent_context *ev,
121 bool replay))
123 struct epoll_event_context *epoll_ev =
124 talloc_get_type_abort(ev->additional_data,
125 struct epoll_event_context);
127 epoll_ev->panic_fallback = panic_fallback;
131 called when a epoll call fails
133 static void epoll_panic(struct epoll_event_context *epoll_ev,
134 const char *reason, bool replay)
136 struct tevent_context *ev = epoll_ev->ev;
137 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
139 panic_fallback = epoll_ev->panic_fallback;
141 if (epoll_ev->panic_state != NULL) {
142 *epoll_ev->panic_state = true;
145 if (epoll_ev->panic_force_replay) {
146 replay = true;
149 TALLOC_FREE(ev->additional_data);
151 if (panic_fallback == NULL) {
152 tevent_debug(ev, TEVENT_DEBUG_FATAL,
153 "%s (%s) replay[%u] - calling abort()\n",
154 reason, strerror(errno), (unsigned)replay);
155 abort();
158 tevent_debug(ev, TEVENT_DEBUG_ERROR,
159 "%s (%s) replay[%u] - calling panic_fallback\n",
160 reason, strerror(errno), (unsigned)replay);
162 if (!panic_fallback(ev, replay)) {
163 /* Fallback failed. */
164 tevent_debug(ev, TEVENT_DEBUG_FATAL,
165 "%s (%s) replay[%u] - calling abort()\n",
166 reason, strerror(errno), (unsigned)replay);
167 abort();
172 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
174 static uint32_t epoll_map_flags(uint16_t flags)
176 uint32_t ret = 0;
177 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
178 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
179 return ret;
183 free the epoll fd
185 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
187 close(epoll_ev->epoll_fd);
188 epoll_ev->epoll_fd = -1;
189 return 0;
193 init the epoll fd
195 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
197 epoll_ev->epoll_fd = epoll_create(64);
198 if (epoll_ev->epoll_fd == -1) {
199 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
200 "Failed to create epoll handle.\n");
201 return -1;
204 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
205 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
206 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
209 epoll_ev->pid = tevent_cached_getpid();
210 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
212 return 0;
215 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
218 reopen the epoll handle when our pid changes
219 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
220 demonstration of why this is needed
222 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
224 struct tevent_fd *fde;
225 bool *caller_panic_state = epoll_ev->panic_state;
226 bool panic_triggered = false;
227 pid_t pid = tevent_cached_getpid();
229 if (epoll_ev->pid == pid) {
230 return;
233 close(epoll_ev->epoll_fd);
234 epoll_ev->epoll_fd = epoll_create(64);
235 if (epoll_ev->epoll_fd == -1) {
236 epoll_panic(epoll_ev, "epoll_create() failed", false);
237 return;
240 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
241 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
242 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
245 epoll_ev->pid = pid;
246 epoll_ev->panic_state = &panic_triggered;
247 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
248 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
249 epoll_update_event(epoll_ev, fde);
251 if (panic_triggered) {
252 if (caller_panic_state != NULL) {
253 *caller_panic_state = true;
255 return;
258 epoll_ev->panic_state = NULL;
262 epoll cannot add the same file descriptor twice, once
263 with read, once with write which is allowed by the
264 tevent backend. Multiplex the existing fde, flag it
265 as such so we can search for the correct fde on
266 event triggering.
269 static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
270 struct tevent_fd *add_fde)
272 struct epoll_event event;
273 struct tevent_fd *mpx_fde;
274 int ret;
276 /* Find the existing fde that caused the EEXIST error. */
277 for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
278 if (mpx_fde->fd != add_fde->fd) {
279 continue;
282 if (mpx_fde == add_fde) {
283 continue;
286 break;
288 if (mpx_fde == NULL) {
289 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
290 "can't find multiplex fde for fd[%d]",
291 add_fde->fd);
292 return -1;
295 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
296 /* Logic error. Can't have more than 2 multiplexed fde's. */
297 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
298 "multiplex fde for fd[%d] is already multiplexed\n",
299 mpx_fde->fd);
300 return -1;
304 * The multiplex fde must have the same fd, and also
305 * already have an epoll event attached.
307 if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
308 /* Logic error. Can't have more than 2 multiplexed fde's. */
309 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
310 "multiplex fde for fd[%d] has no event\n",
311 mpx_fde->fd);
312 return -1;
315 /* Modify the mpx_fde to add in the new flags. */
316 ZERO_STRUCT(event);
317 event.events = epoll_map_flags(mpx_fde->flags);
318 event.events |= epoll_map_flags(add_fde->flags);
319 event.data.ptr = mpx_fde;
320 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
321 if (ret != 0 && errno == EBADF) {
322 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
323 "EPOLL_CTL_MOD EBADF for "
324 "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
325 add_fde, mpx_fde, add_fde->fd);
326 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
327 mpx_fde->wrapper = NULL;
328 mpx_fde->event_ctx = NULL;
329 DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
330 add_fde->wrapper = NULL;
331 add_fde->event_ctx = NULL;
332 return 0;
333 } else if (ret != 0) {
334 return ret;
338 * Make each fde->additional_data pointers point at each other
339 * so we can look them up from each other. They are now paired.
341 mpx_fde->additional_data = (struct tevent_fd *)add_fde;
342 add_fde->additional_data = (struct tevent_fd *)mpx_fde;
344 /* Now flag both fde's as being multiplexed. */
345 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
346 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
348 /* we need to keep the GOT_ERROR flag */
349 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
350 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
353 return 0;
357 add the epoll event to the given fd_event
359 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
361 struct epoll_event event;
362 int ret;
363 struct tevent_fd *mpx_fde = NULL;
365 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
366 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
368 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
370 * This is a multiplexed fde, we need to include both
371 * flags in the modified event.
373 mpx_fde = talloc_get_type_abort(fde->additional_data,
374 struct tevent_fd);
376 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
377 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
380 ZERO_STRUCT(event);
381 event.events = epoll_map_flags(fde->flags);
382 if (mpx_fde != NULL) {
383 event.events |= epoll_map_flags(mpx_fde->flags);
385 event.data.ptr = fde;
386 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
387 if (ret != 0 && errno == EBADF) {
388 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
389 "EPOLL_CTL_ADD EBADF for "
390 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
391 fde, mpx_fde, fde->fd);
392 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
393 fde->wrapper = NULL;
394 fde->event_ctx = NULL;
395 if (mpx_fde != NULL) {
396 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
397 mpx_fde->wrapper = NULL;
398 mpx_fde->event_ctx = NULL;
400 return;
401 } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
402 ret = epoll_add_multiplex_fd(epoll_ev, fde);
403 if (ret != 0) {
404 epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
405 false);
406 return;
408 } else if (ret != 0) {
409 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
410 return;
413 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
414 /* only if we want to read we want to tell the event handler about errors */
415 if (fde->flags & TEVENT_FD_READ) {
416 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
419 if (mpx_fde == NULL) {
420 return;
423 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
424 /* only if we want to read we want to tell the event handler about errors */
425 if (mpx_fde->flags & TEVENT_FD_READ) {
426 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
431 delete the epoll event for given fd_event
433 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
435 struct epoll_event event;
436 int ret;
437 struct tevent_fd *mpx_fde = NULL;
439 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
440 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
442 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
444 * This is a multiplexed fde, we need to modify both events.
446 mpx_fde = talloc_get_type_abort(fde->additional_data,
447 struct tevent_fd);
449 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
450 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
453 ZERO_STRUCT(event);
454 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
455 if (ret != 0 && errno == ENOENT) {
457 * This can happen after a epoll_check_reopen
458 * within epoll_event_fd_destructor.
460 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
461 "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
462 fde->fd);
463 return;
464 } else if (ret != 0 && errno == EBADF) {
465 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
466 "EPOLL_CTL_DEL EBADF for "
467 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
468 fde, mpx_fde, fde->fd);
469 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
470 fde->wrapper = NULL;
471 fde->event_ctx = NULL;
472 if (mpx_fde != NULL) {
473 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
474 mpx_fde->wrapper = NULL;
475 mpx_fde->event_ctx = NULL;
477 return;
478 } else if (ret != 0) {
479 epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
480 return;
485 change the epoll event to the given fd_event
487 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
489 struct tevent_fd *mpx_fde = NULL;
490 struct epoll_event event;
491 int ret;
493 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
494 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
496 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
498 * This is a multiplexed fde, we need to include both
499 * flags in the modified event.
501 mpx_fde = talloc_get_type_abort(fde->additional_data,
502 struct tevent_fd);
504 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
505 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
508 ZERO_STRUCT(event);
509 event.events = epoll_map_flags(fde->flags);
510 if (mpx_fde != NULL) {
511 event.events |= epoll_map_flags(mpx_fde->flags);
513 event.data.ptr = fde;
514 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
515 if (ret != 0 && errno == EBADF) {
516 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
517 "EPOLL_CTL_MOD EBADF for "
518 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
519 fde, mpx_fde, fde->fd);
520 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
521 fde->wrapper = NULL;
522 fde->event_ctx = NULL;
523 if (mpx_fde != NULL) {
524 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
525 mpx_fde->wrapper = NULL;
526 mpx_fde->event_ctx = NULL;
528 return;
529 } else if (ret != 0) {
530 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
531 return;
534 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
535 /* only if we want to read we want to tell the event handler about errors */
536 if (fde->flags & TEVENT_FD_READ) {
537 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
540 if (mpx_fde == NULL) {
541 return;
544 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
545 /* only if we want to read we want to tell the event handler about errors */
546 if (mpx_fde->flags & TEVENT_FD_READ) {
547 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
551 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
553 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
554 bool want_read = (fde->flags & TEVENT_FD_READ);
555 bool want_write= (fde->flags & TEVENT_FD_WRITE);
556 struct tevent_fd *mpx_fde = NULL;
558 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
560 * work out what the multiplexed fde wants.
562 mpx_fde = talloc_get_type_abort(fde->additional_data,
563 struct tevent_fd);
565 if (mpx_fde->flags & TEVENT_FD_READ) {
566 want_read = true;
569 if (mpx_fde->flags & TEVENT_FD_WRITE) {
570 want_write = true;
574 /* there's already an event */
575 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
576 if (want_read || (want_write && !got_error)) {
577 epoll_mod_event(epoll_ev, fde);
578 return;
581 * if we want to match the select behavior, we need to remove the epoll_event
582 * when the caller isn't interested in events.
584 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
586 epoll_del_event(epoll_ev, fde);
587 return;
590 /* there's no epoll_event attached to the fde */
591 if (want_read || (want_write && !got_error)) {
592 epoll_add_event(epoll_ev, fde);
593 return;
598 Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
599 Return true if there's nothing else to do, false if
600 this event needs further handling.
602 static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
603 struct tevent_fd *fde)
605 if (fde == NULL) {
606 /* Nothing to do if no event. */
607 return true;
610 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
612 * if we only wait for TEVENT_FD_WRITE, we should not tell the
613 * event handler about it, and remove the epoll_event,
614 * as we only report errors when waiting for read events,
615 * to match the select() behavior
617 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
619 * Do the same as the poll backend and
620 * remove the writeable flag.
622 fde->flags &= ~TEVENT_FD_WRITE;
623 return true;
625 /* This has TEVENT_FD_READ set, we're not finished. */
626 return false;
630 event loop handling using epoll
632 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
634 int ret, i;
635 #define MAXEVENTS 1
636 struct epoll_event events[MAXEVENTS];
637 int timeout = -1;
638 int wait_errno;
640 if (tvalp) {
641 /* it's better to trigger timed events a bit later than too early */
642 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
645 if (epoll_ev->ev->signal_events &&
646 tevent_common_check_signal(epoll_ev->ev)) {
647 return 0;
650 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
651 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
652 wait_errno = errno;
653 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
655 if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
656 if (tevent_common_check_signal(epoll_ev->ev)) {
657 return 0;
661 if (ret == -1 && wait_errno != EINTR) {
662 epoll_panic(epoll_ev, "epoll_wait() failed", true);
663 return -1;
666 if (ret == 0 && tvalp) {
667 /* we don't care about a possible delay here */
668 tevent_common_loop_timer_delay(epoll_ev->ev);
669 return 0;
672 for (i=0;i<ret;i++) {
673 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
674 struct tevent_fd);
675 uint16_t flags = 0;
676 struct tevent_fd *mpx_fde = NULL;
678 if (fde == NULL) {
679 epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
680 return -1;
682 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
684 * Save off the multiplexed event in case we need
685 * to use it to call the handler function.
687 mpx_fde = talloc_get_type_abort(fde->additional_data,
688 struct tevent_fd);
690 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
691 bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
692 bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
694 if (handled_fde && handled_mpx) {
695 epoll_update_event(epoll_ev, fde);
696 continue;
699 if (!handled_mpx) {
701 * If the mpx event was the one that needs
702 * further handling, it's the TEVENT_FD_READ
703 * event so switch over and call that handler.
705 fde = mpx_fde;
706 mpx_fde = NULL;
708 flags |= TEVENT_FD_READ;
710 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
711 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
713 if (flags & TEVENT_FD_WRITE) {
714 if (fde->flags & TEVENT_FD_WRITE) {
715 mpx_fde = NULL;
717 if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
718 fde = mpx_fde;
719 mpx_fde = NULL;
723 if (mpx_fde) {
724 /* Ensure we got the right fde. */
725 if ((flags & fde->flags) == 0) {
726 fde = mpx_fde;
727 mpx_fde = NULL;
732 * make sure we only pass the flags
733 * the handler is expecting.
735 flags &= fde->flags;
736 if (flags) {
737 return tevent_common_invoke_fd_handler(fde, flags, NULL);
741 return 0;
745 create a epoll_event_context structure.
747 static int epoll_event_context_init(struct tevent_context *ev)
749 int ret;
750 struct epoll_event_context *epoll_ev;
753 * We might be called during tevent_re_initialise()
754 * which means we need to free our old additional_data.
756 TALLOC_FREE(ev->additional_data);
758 epoll_ev = talloc_zero(ev, struct epoll_event_context);
759 if (!epoll_ev) return -1;
760 epoll_ev->ev = ev;
761 epoll_ev->epoll_fd = -1;
763 ret = epoll_init_ctx(epoll_ev);
764 if (ret != 0) {
765 talloc_free(epoll_ev);
766 return ret;
769 ev->additional_data = epoll_ev;
770 return 0;
774 destroy an fd_event
776 static int epoll_event_fd_destructor(struct tevent_fd *fde)
778 struct tevent_context *ev = fde->event_ctx;
779 struct epoll_event_context *epoll_ev = NULL;
780 bool panic_triggered = false;
781 struct tevent_fd *mpx_fde = NULL;
782 int flags = fde->flags;
784 if (ev == NULL) {
785 return tevent_common_fd_destructor(fde);
788 epoll_ev = talloc_get_type_abort(ev->additional_data,
789 struct epoll_event_context);
792 * we must remove the event from the list
793 * otherwise a panic fallback handler may
794 * reuse invalid memory
796 DLIST_REMOVE(ev->fd_events, fde);
798 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
799 mpx_fde = talloc_get_type_abort(fde->additional_data,
800 struct tevent_fd);
802 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
803 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
805 fde->additional_data = NULL;
806 mpx_fde->additional_data = NULL;
808 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
811 epoll_ev->panic_state = &panic_triggered;
812 epoll_check_reopen(epoll_ev);
813 if (panic_triggered) {
814 return tevent_common_fd_destructor(fde);
817 if (mpx_fde != NULL) {
818 epoll_update_event(epoll_ev, mpx_fde);
819 if (panic_triggered) {
820 return tevent_common_fd_destructor(fde);
824 fde->flags = 0;
825 epoll_update_event(epoll_ev, fde);
826 fde->flags = flags;
827 if (panic_triggered) {
828 return tevent_common_fd_destructor(fde);
830 epoll_ev->panic_state = NULL;
832 return tevent_common_fd_destructor(fde);
836 add a fd based event
837 return NULL on failure (memory allocation error)
839 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
840 int fd, uint16_t flags,
841 tevent_fd_handler_t handler,
842 void *private_data,
843 const char *handler_name,
844 const char *location)
846 struct epoll_event_context *epoll_ev =
847 talloc_get_type_abort(ev->additional_data,
848 struct epoll_event_context);
849 struct tevent_fd *fde;
850 bool panic_triggered = false;
852 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
853 handler, private_data,
854 handler_name, location);
855 if (!fde) return NULL;
857 talloc_set_destructor(fde, epoll_event_fd_destructor);
859 epoll_ev->panic_state = &panic_triggered;
860 epoll_check_reopen(epoll_ev);
861 if (panic_triggered) {
862 return fde;
864 epoll_ev->panic_state = NULL;
866 epoll_update_event(epoll_ev, fde);
868 return fde;
872 set the fd event flags
874 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
876 struct tevent_context *ev;
877 struct epoll_event_context *epoll_ev;
878 bool panic_triggered = false;
880 if (fde->flags == flags) return;
882 ev = fde->event_ctx;
883 epoll_ev = talloc_get_type_abort(ev->additional_data,
884 struct epoll_event_context);
886 fde->flags = flags;
888 epoll_ev->panic_state = &panic_triggered;
889 epoll_check_reopen(epoll_ev);
890 if (panic_triggered) {
891 return;
893 epoll_ev->panic_state = NULL;
895 epoll_update_event(epoll_ev, fde);
899 do a single event loop using the events defined in ev
901 static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
903 struct epoll_event_context *epoll_ev =
904 talloc_get_type_abort(ev->additional_data,
905 struct epoll_event_context);
906 struct timeval tval;
907 bool panic_triggered = false;
909 if (ev->signal_events &&
910 tevent_common_check_signal(ev)) {
911 return 0;
914 if (ev->threaded_contexts != NULL) {
915 tevent_common_threaded_activate_immediate(ev);
918 if (ev->immediate_events &&
919 tevent_common_loop_immediate(ev)) {
920 return 0;
923 tval = tevent_common_loop_timer_delay(ev);
924 if (tevent_timeval_is_zero(&tval)) {
925 return 0;
928 epoll_ev->panic_state = &panic_triggered;
929 epoll_ev->panic_force_replay = true;
930 epoll_check_reopen(epoll_ev);
931 if (panic_triggered) {
932 errno = EINVAL;
933 return -1;
935 epoll_ev->panic_force_replay = false;
936 epoll_ev->panic_state = NULL;
938 return epoll_event_loop(epoll_ev, &tval);
941 static const struct tevent_ops epoll_event_ops = {
942 .context_init = epoll_event_context_init,
943 .add_fd = epoll_event_add_fd,
944 .set_fd_close_fn = tevent_common_fd_set_close_fn,
945 .get_fd_flags = tevent_common_fd_get_flags,
946 .set_fd_flags = epoll_event_set_fd_flags,
947 .add_timer = tevent_common_add_timer_v2,
948 .schedule_immediate = tevent_common_schedule_immediate,
949 .add_signal = tevent_common_add_signal,
950 .loop_once = epoll_event_loop_once,
951 .loop_wait = tevent_common_loop_wait,
954 _PRIVATE_ bool tevent_epoll_init(void)
956 return tevent_register_backend("epoll", &epoll_event_ops);