s3/torture: migrate SMB_VFS_FLISTXATTR calls to SMB_VFS_FLISTXATTR
[Samba.git] / lib / tevent / tevent_epoll.c
blob9cbe505c98a4d817f43ec45091ad215add6b239a
1 /*
2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005-2013
8 Copyright (C) Jeremy Allison 2013
10 ** NOTE! The following LGPL license applies to the tevent
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
28 #include "replace.h"
29 #include "system/filesys.h"
30 #include "system/select.h"
31 #include "tevent.h"
32 #include "tevent_internal.h"
33 #include "tevent_util.h"
35 struct epoll_event_context {
36 /* a pointer back to the generic event_context */
37 struct tevent_context *ev;
39 /* when using epoll this is the handle from epoll_create */
40 int epoll_fd;
42 pid_t pid;
44 bool panic_force_replay;
45 bool *panic_state;
46 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
49 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
50 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
51 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
52 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
54 #ifdef TEST_PANIC_FALLBACK
56 static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
57 int size)
59 if (epoll_ev->panic_fallback == NULL) {
60 return epoll_create(size);
63 /* 50% of the time, fail... */
64 if ((random() % 2) == 0) {
65 errno = EINVAL;
66 return -1;
69 return epoll_create(size);
72 static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
73 int epfd, int op, int fd,
74 struct epoll_event *event)
76 if (epoll_ev->panic_fallback == NULL) {
77 return epoll_ctl(epfd, op, fd, event);
80 /* 50% of the time, fail... */
81 if ((random() % 2) == 0) {
82 errno = EINVAL;
83 return -1;
86 return epoll_ctl(epfd, op, fd, event);
89 static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
90 int epfd,
91 struct epoll_event *events,
92 int maxevents,
93 int timeout)
95 if (epoll_ev->panic_fallback == NULL) {
96 return epoll_wait(epfd, events, maxevents, timeout);
99 /* 50% of the time, fail... */
100 if ((random() % 2) == 0) {
101 errno = EINVAL;
102 return -1;
105 return epoll_wait(epfd, events, maxevents, timeout);
108 #define epoll_create(_size) \
109 epoll_create_panic_fallback(epoll_ev, _size)
110 #define epoll_ctl(_epfd, _op, _fd, _event) \
111 epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
112 #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
113 epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
114 #endif
117 called to set the panic fallback function.
119 _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
120 bool (*panic_fallback)(struct tevent_context *ev,
121 bool replay))
123 struct epoll_event_context *epoll_ev =
124 talloc_get_type_abort(ev->additional_data,
125 struct epoll_event_context);
127 epoll_ev->panic_fallback = panic_fallback;
131 called when a epoll call fails
133 static void epoll_panic(struct epoll_event_context *epoll_ev,
134 const char *reason, bool replay)
136 struct tevent_context *ev = epoll_ev->ev;
137 bool (*panic_fallback)(struct tevent_context *ev, bool replay);
139 panic_fallback = epoll_ev->panic_fallback;
141 if (epoll_ev->panic_state != NULL) {
142 *epoll_ev->panic_state = true;
145 if (epoll_ev->panic_force_replay) {
146 replay = true;
149 TALLOC_FREE(ev->additional_data);
151 if (panic_fallback == NULL) {
152 tevent_debug(ev, TEVENT_DEBUG_FATAL,
153 "%s (%s) replay[%u] - calling abort()\n",
154 reason, strerror(errno), (unsigned)replay);
155 abort();
158 tevent_debug(ev, TEVENT_DEBUG_ERROR,
159 "%s (%s) replay[%u] - calling panic_fallback\n",
160 reason, strerror(errno), (unsigned)replay);
162 if (!panic_fallback(ev, replay)) {
163 /* Fallback failed. */
164 tevent_debug(ev, TEVENT_DEBUG_FATAL,
165 "%s (%s) replay[%u] - calling abort()\n",
166 reason, strerror(errno), (unsigned)replay);
167 abort();
172 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
174 static uint32_t epoll_map_flags(uint16_t flags)
176 uint32_t ret = 0;
177 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
178 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
179 return ret;
183 free the epoll fd
185 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
187 close(epoll_ev->epoll_fd);
188 epoll_ev->epoll_fd = -1;
189 return 0;
193 init the epoll fd
195 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
197 epoll_ev->epoll_fd = epoll_create(64);
198 if (epoll_ev->epoll_fd == -1) {
199 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
200 "Failed to create epoll handle.\n");
201 return -1;
204 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
205 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
206 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
209 epoll_ev->pid = getpid();
210 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
212 return 0;
215 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
218 reopen the epoll handle when our pid changes
219 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
220 demonstration of why this is needed
222 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
224 struct tevent_fd *fde;
225 bool *caller_panic_state = epoll_ev->panic_state;
226 bool panic_triggered = false;
228 if (epoll_ev->pid == getpid()) {
229 return;
232 close(epoll_ev->epoll_fd);
233 epoll_ev->epoll_fd = epoll_create(64);
234 if (epoll_ev->epoll_fd == -1) {
235 epoll_panic(epoll_ev, "epoll_create() failed", false);
236 return;
239 if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
240 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
241 "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
244 epoll_ev->pid = getpid();
245 epoll_ev->panic_state = &panic_triggered;
246 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
247 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
248 epoll_update_event(epoll_ev, fde);
250 if (panic_triggered) {
251 if (caller_panic_state != NULL) {
252 *caller_panic_state = true;
254 return;
257 epoll_ev->panic_state = NULL;
261 epoll cannot add the same file descriptor twice, once
262 with read, once with write which is allowed by the
263 tevent backend. Multiplex the existing fde, flag it
264 as such so we can search for the correct fde on
265 event triggering.
268 static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
269 struct tevent_fd *add_fde)
271 struct epoll_event event;
272 struct tevent_fd *mpx_fde;
273 int ret;
275 /* Find the existing fde that caused the EEXIST error. */
276 for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
277 if (mpx_fde->fd != add_fde->fd) {
278 continue;
281 if (mpx_fde == add_fde) {
282 continue;
285 break;
287 if (mpx_fde == NULL) {
288 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
289 "can't find multiplex fde for fd[%d]",
290 add_fde->fd);
291 return -1;
294 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
295 /* Logic error. Can't have more than 2 multiplexed fde's. */
296 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
297 "multiplex fde for fd[%d] is already multiplexed\n",
298 mpx_fde->fd);
299 return -1;
303 * The multiplex fde must have the same fd, and also
304 * already have an epoll event attached.
306 if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
307 /* Logic error. Can't have more than 2 multiplexed fde's. */
308 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
309 "multiplex fde for fd[%d] has no event\n",
310 mpx_fde->fd);
311 return -1;
314 /* Modify the mpx_fde to add in the new flags. */
315 ZERO_STRUCT(event);
316 event.events = epoll_map_flags(mpx_fde->flags);
317 event.events |= epoll_map_flags(add_fde->flags);
318 event.data.ptr = mpx_fde;
319 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
320 if (ret != 0 && errno == EBADF) {
321 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
322 "EPOLL_CTL_MOD EBADF for "
323 "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
324 add_fde, mpx_fde, add_fde->fd);
325 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
326 mpx_fde->wrapper = NULL;
327 mpx_fde->event_ctx = NULL;
328 DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
329 add_fde->wrapper = NULL;
330 add_fde->event_ctx = NULL;
331 return 0;
332 } else if (ret != 0) {
333 return ret;
337 * Make each fde->additional_data pointers point at each other
338 * so we can look them up from each other. They are now paired.
340 mpx_fde->additional_data = (struct tevent_fd *)add_fde;
341 add_fde->additional_data = (struct tevent_fd *)mpx_fde;
343 /* Now flag both fde's as being multiplexed. */
344 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
345 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
347 /* we need to keep the GOT_ERROR flag */
348 if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
349 add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
352 return 0;
356 add the epoll event to the given fd_event
358 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
360 struct epoll_event event;
361 int ret;
362 struct tevent_fd *mpx_fde = NULL;
364 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
365 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
367 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
369 * This is a multiplexed fde, we need to include both
370 * flags in the modified event.
372 mpx_fde = talloc_get_type_abort(fde->additional_data,
373 struct tevent_fd);
375 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
376 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
379 ZERO_STRUCT(event);
380 event.events = epoll_map_flags(fde->flags);
381 if (mpx_fde != NULL) {
382 event.events |= epoll_map_flags(mpx_fde->flags);
384 event.data.ptr = fde;
385 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
386 if (ret != 0 && errno == EBADF) {
387 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
388 "EPOLL_CTL_ADD EBADF for "
389 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
390 fde, mpx_fde, fde->fd);
391 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
392 fde->wrapper = NULL;
393 fde->event_ctx = NULL;
394 if (mpx_fde != NULL) {
395 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
396 mpx_fde->wrapper = NULL;
397 mpx_fde->event_ctx = NULL;
399 return;
400 } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
401 ret = epoll_add_multiplex_fd(epoll_ev, fde);
402 if (ret != 0) {
403 epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
404 false);
405 return;
407 } else if (ret != 0) {
408 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
409 return;
412 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
413 /* only if we want to read we want to tell the event handler about errors */
414 if (fde->flags & TEVENT_FD_READ) {
415 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
418 if (mpx_fde == NULL) {
419 return;
422 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
423 /* only if we want to read we want to tell the event handler about errors */
424 if (mpx_fde->flags & TEVENT_FD_READ) {
425 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
430 delete the epoll event for given fd_event
432 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
434 struct epoll_event event;
435 int ret;
436 struct tevent_fd *mpx_fde = NULL;
438 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
439 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
441 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
443 * This is a multiplexed fde, we need to modify both events.
445 mpx_fde = talloc_get_type_abort(fde->additional_data,
446 struct tevent_fd);
448 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
449 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
452 ZERO_STRUCT(event);
453 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
454 if (ret != 0 && errno == ENOENT) {
456 * This can happen after a epoll_check_reopen
457 * within epoll_event_fd_destructor.
459 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
460 "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
461 fde->fd);
462 return;
463 } else if (ret != 0 && errno == EBADF) {
464 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
465 "EPOLL_CTL_DEL EBADF for "
466 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
467 fde, mpx_fde, fde->fd);
468 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
469 fde->wrapper = NULL;
470 fde->event_ctx = NULL;
471 if (mpx_fde != NULL) {
472 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
473 mpx_fde->wrapper = NULL;
474 mpx_fde->event_ctx = NULL;
476 return;
477 } else if (ret != 0) {
478 epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
479 return;
484 change the epoll event to the given fd_event
486 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
488 struct tevent_fd *mpx_fde = NULL;
489 struct epoll_event event;
490 int ret;
492 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
493 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
495 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
497 * This is a multiplexed fde, we need to include both
498 * flags in the modified event.
500 mpx_fde = talloc_get_type_abort(fde->additional_data,
501 struct tevent_fd);
503 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
504 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
507 ZERO_STRUCT(event);
508 event.events = epoll_map_flags(fde->flags);
509 if (mpx_fde != NULL) {
510 event.events |= epoll_map_flags(mpx_fde->flags);
512 event.data.ptr = fde;
513 ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
514 if (ret != 0 && errno == EBADF) {
515 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
516 "EPOLL_CTL_MOD EBADF for "
517 "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
518 fde, mpx_fde, fde->fd);
519 DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
520 fde->wrapper = NULL;
521 fde->event_ctx = NULL;
522 if (mpx_fde != NULL) {
523 DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
524 mpx_fde->wrapper = NULL;
525 mpx_fde->event_ctx = NULL;
527 return;
528 } else if (ret != 0) {
529 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
530 return;
533 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
534 /* only if we want to read we want to tell the event handler about errors */
535 if (fde->flags & TEVENT_FD_READ) {
536 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
539 if (mpx_fde == NULL) {
540 return;
543 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
544 /* only if we want to read we want to tell the event handler about errors */
545 if (mpx_fde->flags & TEVENT_FD_READ) {
546 mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
550 static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
552 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
553 bool want_read = (fde->flags & TEVENT_FD_READ);
554 bool want_write= (fde->flags & TEVENT_FD_WRITE);
555 struct tevent_fd *mpx_fde = NULL;
557 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
559 * work out what the multiplexed fde wants.
561 mpx_fde = talloc_get_type_abort(fde->additional_data,
562 struct tevent_fd);
564 if (mpx_fde->flags & TEVENT_FD_READ) {
565 want_read = true;
568 if (mpx_fde->flags & TEVENT_FD_WRITE) {
569 want_write = true;
573 /* there's already an event */
574 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
575 if (want_read || (want_write && !got_error)) {
576 epoll_mod_event(epoll_ev, fde);
577 return;
580 * if we want to match the select behavior, we need to remove the epoll_event
581 * when the caller isn't interested in events.
583 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
585 epoll_del_event(epoll_ev, fde);
586 return;
589 /* there's no epoll_event attached to the fde */
590 if (want_read || (want_write && !got_error)) {
591 epoll_add_event(epoll_ev, fde);
592 return;
597 Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
598 Return true if there's nothing else to do, false if
599 this event needs further handling.
601 static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
602 struct tevent_fd *fde)
604 if (fde == NULL) {
605 /* Nothing to do if no event. */
606 return true;
609 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
611 * if we only wait for TEVENT_FD_WRITE, we should not tell the
612 * event handler about it, and remove the epoll_event,
613 * as we only report errors when waiting for read events,
614 * to match the select() behavior
616 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
618 * Do the same as the poll backend and
619 * remove the writeable flag.
621 fde->flags &= ~TEVENT_FD_WRITE;
622 return true;
624 /* This has TEVENT_FD_READ set, we're not finished. */
625 return false;
629 event loop handling using epoll
631 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
633 int ret, i;
634 #define MAXEVENTS 1
635 struct epoll_event events[MAXEVENTS];
636 int timeout = -1;
637 int wait_errno;
639 if (tvalp) {
640 /* it's better to trigger timed events a bit later than too early */
641 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
644 if (epoll_ev->ev->signal_events &&
645 tevent_common_check_signal(epoll_ev->ev)) {
646 return 0;
649 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
650 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
651 wait_errno = errno;
652 tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
654 if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
655 if (tevent_common_check_signal(epoll_ev->ev)) {
656 return 0;
660 if (ret == -1 && wait_errno != EINTR) {
661 epoll_panic(epoll_ev, "epoll_wait() failed", true);
662 return -1;
665 if (ret == 0 && tvalp) {
666 /* we don't care about a possible delay here */
667 tevent_common_loop_timer_delay(epoll_ev->ev);
668 return 0;
671 for (i=0;i<ret;i++) {
672 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
673 struct tevent_fd);
674 uint16_t flags = 0;
675 struct tevent_fd *mpx_fde = NULL;
677 if (fde == NULL) {
678 epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
679 return -1;
681 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
683 * Save off the multiplexed event in case we need
684 * to use it to call the handler function.
686 mpx_fde = talloc_get_type_abort(fde->additional_data,
687 struct tevent_fd);
689 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
690 bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
691 bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
693 if (handled_fde && handled_mpx) {
694 epoll_update_event(epoll_ev, fde);
695 continue;
698 if (!handled_mpx) {
700 * If the mpx event was the one that needs
701 * further handling, it's the TEVENT_FD_READ
702 * event so switch over and call that handler.
704 fde = mpx_fde;
705 mpx_fde = NULL;
707 flags |= TEVENT_FD_READ;
709 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
710 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
712 if (flags & TEVENT_FD_WRITE) {
713 if (fde->flags & TEVENT_FD_WRITE) {
714 mpx_fde = NULL;
716 if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
717 fde = mpx_fde;
718 mpx_fde = NULL;
722 if (mpx_fde) {
723 /* Ensure we got the right fde. */
724 if ((flags & fde->flags) == 0) {
725 fde = mpx_fde;
726 mpx_fde = NULL;
731 * make sure we only pass the flags
732 * the handler is expecting.
734 flags &= fde->flags;
735 if (flags) {
736 return tevent_common_invoke_fd_handler(fde, flags, NULL);
740 return 0;
744 create a epoll_event_context structure.
746 static int epoll_event_context_init(struct tevent_context *ev)
748 int ret;
749 struct epoll_event_context *epoll_ev;
752 * We might be called during tevent_re_initialise()
753 * which means we need to free our old additional_data.
755 TALLOC_FREE(ev->additional_data);
757 epoll_ev = talloc_zero(ev, struct epoll_event_context);
758 if (!epoll_ev) return -1;
759 epoll_ev->ev = ev;
760 epoll_ev->epoll_fd = -1;
762 ret = epoll_init_ctx(epoll_ev);
763 if (ret != 0) {
764 talloc_free(epoll_ev);
765 return ret;
768 ev->additional_data = epoll_ev;
769 return 0;
773 destroy an fd_event
775 static int epoll_event_fd_destructor(struct tevent_fd *fde)
777 struct tevent_context *ev = fde->event_ctx;
778 struct epoll_event_context *epoll_ev = NULL;
779 bool panic_triggered = false;
780 struct tevent_fd *mpx_fde = NULL;
781 int flags = fde->flags;
783 if (ev == NULL) {
784 return tevent_common_fd_destructor(fde);
787 epoll_ev = talloc_get_type_abort(ev->additional_data,
788 struct epoll_event_context);
791 * we must remove the event from the list
792 * otherwise a panic fallback handler may
793 * reuse invalid memory
795 DLIST_REMOVE(ev->fd_events, fde);
797 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
798 mpx_fde = talloc_get_type_abort(fde->additional_data,
799 struct tevent_fd);
801 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
802 mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
804 fde->additional_data = NULL;
805 mpx_fde->additional_data = NULL;
807 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
810 epoll_ev->panic_state = &panic_triggered;
811 epoll_check_reopen(epoll_ev);
812 if (panic_triggered) {
813 return tevent_common_fd_destructor(fde);
816 if (mpx_fde != NULL) {
817 epoll_update_event(epoll_ev, mpx_fde);
818 if (panic_triggered) {
819 return tevent_common_fd_destructor(fde);
823 fde->flags = 0;
824 epoll_update_event(epoll_ev, fde);
825 fde->flags = flags;
826 if (panic_triggered) {
827 return tevent_common_fd_destructor(fde);
829 epoll_ev->panic_state = NULL;
831 return tevent_common_fd_destructor(fde);
835 add a fd based event
836 return NULL on failure (memory allocation error)
838 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
839 int fd, uint16_t flags,
840 tevent_fd_handler_t handler,
841 void *private_data,
842 const char *handler_name,
843 const char *location)
845 struct epoll_event_context *epoll_ev =
846 talloc_get_type_abort(ev->additional_data,
847 struct epoll_event_context);
848 struct tevent_fd *fde;
849 bool panic_triggered = false;
851 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
852 handler, private_data,
853 handler_name, location);
854 if (!fde) return NULL;
856 talloc_set_destructor(fde, epoll_event_fd_destructor);
858 epoll_ev->panic_state = &panic_triggered;
859 epoll_check_reopen(epoll_ev);
860 if (panic_triggered) {
861 return fde;
863 epoll_ev->panic_state = NULL;
865 epoll_update_event(epoll_ev, fde);
867 return fde;
871 set the fd event flags
873 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
875 struct tevent_context *ev;
876 struct epoll_event_context *epoll_ev;
877 bool panic_triggered = false;
879 if (fde->flags == flags) return;
881 ev = fde->event_ctx;
882 epoll_ev = talloc_get_type_abort(ev->additional_data,
883 struct epoll_event_context);
885 fde->flags = flags;
887 epoll_ev->panic_state = &panic_triggered;
888 epoll_check_reopen(epoll_ev);
889 if (panic_triggered) {
890 return;
892 epoll_ev->panic_state = NULL;
894 epoll_update_event(epoll_ev, fde);
898 do a single event loop using the events defined in ev
900 static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
902 struct epoll_event_context *epoll_ev =
903 talloc_get_type_abort(ev->additional_data,
904 struct epoll_event_context);
905 struct timeval tval;
906 bool panic_triggered = false;
908 if (ev->signal_events &&
909 tevent_common_check_signal(ev)) {
910 return 0;
913 if (ev->threaded_contexts != NULL) {
914 tevent_common_threaded_activate_immediate(ev);
917 if (ev->immediate_events &&
918 tevent_common_loop_immediate(ev)) {
919 return 0;
922 tval = tevent_common_loop_timer_delay(ev);
923 if (tevent_timeval_is_zero(&tval)) {
924 return 0;
927 epoll_ev->panic_state = &panic_triggered;
928 epoll_ev->panic_force_replay = true;
929 epoll_check_reopen(epoll_ev);
930 if (panic_triggered) {
931 errno = EINVAL;
932 return -1;
934 epoll_ev->panic_force_replay = false;
935 epoll_ev->panic_state = NULL;
937 return epoll_event_loop(epoll_ev, &tval);
940 static const struct tevent_ops epoll_event_ops = {
941 .context_init = epoll_event_context_init,
942 .add_fd = epoll_event_add_fd,
943 .set_fd_close_fn = tevent_common_fd_set_close_fn,
944 .get_fd_flags = tevent_common_fd_get_flags,
945 .set_fd_flags = epoll_event_set_fd_flags,
946 .add_timer = tevent_common_add_timer_v2,
947 .schedule_immediate = tevent_common_schedule_immediate,
948 .add_signal = tevent_common_add_signal,
949 .loop_once = epoll_event_loop_once,
950 .loop_wait = tevent_common_loop_wait,
953 _PRIVATE_ bool tevent_epoll_init(void)
955 return tevent_register_backend("epoll", &epoll_event_ops);