tevent: keep a linked list of fd_events
[Samba/bb.git] / lib / tevent / tevent_epoll.c
blobb90e4c757e604834ca73c16875edab20348c0a52
1 /*
2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #include "replace.h"
24 #include "system/filesys.h"
25 #include "system/select.h"
26 #include "tevent.h"
27 #include "tevent_internal.h"
28 #include "tevent_util.h"
30 struct epoll_event_context {
31 /* a pointer back to the generic event_context */
32 struct tevent_context *ev;
34 /* this is changed by the destructors for the fd event
35 type. It is used to detect event destruction by event
36 handlers, which means the code that is calling the event
37 handler needs to assume that the linked list is no longer
38 valid
40 uint32_t destruction_count;
42 /* when using epoll this is the handle from epoll_create */
43 int epoll_fd;
45 pid_t pid;
49 called when a epoll call fails, and we should fallback
50 to using select
52 static void epoll_panic(struct epoll_event_context *epoll_ev, const char *reason)
54 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
55 "%s (%s) - calling abort()\n", reason, strerror(errno));
56 abort();
60 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
62 static uint32_t epoll_map_flags(uint16_t flags)
64 uint32_t ret = 0;
65 if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
66 if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
67 return ret;
71 free the epoll fd
73 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
75 close(epoll_ev->epoll_fd);
76 epoll_ev->epoll_fd = -1;
77 return 0;
81 init the epoll fd
83 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
85 epoll_ev->epoll_fd = epoll_create(64);
86 epoll_ev->pid = getpid();
87 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
88 if (epoll_ev->epoll_fd == -1) {
89 return -1;
91 return 0;
94 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
97 reopen the epoll handle when our pid changes
98 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
99 demonstration of why this is needed
101 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
103 struct tevent_fd *fde;
105 if (epoll_ev->pid == getpid()) {
106 return;
109 close(epoll_ev->epoll_fd);
110 epoll_ev->epoll_fd = epoll_create(64);
111 if (epoll_ev->epoll_fd == -1) {
112 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
113 "Failed to recreate epoll handle after fork\n");
114 return;
116 epoll_ev->pid = getpid();
117 for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
118 epoll_add_event(epoll_ev, fde);
122 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
123 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
124 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
127 add the epoll event to the given fd_event
129 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
131 struct epoll_event event;
133 if (epoll_ev->epoll_fd == -1) return;
135 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
137 /* if we don't want events yet, don't add an epoll_event */
138 if (fde->flags == 0) return;
140 ZERO_STRUCT(event);
141 event.events = epoll_map_flags(fde->flags);
142 event.data.ptr = fde;
143 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
144 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed");
146 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
148 /* only if we want to read we want to tell the event handler about errors */
149 if (fde->flags & TEVENT_FD_READ) {
150 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
155 delete the epoll event for given fd_event
157 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
159 struct epoll_event event;
161 if (epoll_ev->epoll_fd == -1) return;
163 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
165 /* if there's no epoll_event, we don't need to delete it */
166 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
168 ZERO_STRUCT(event);
169 event.events = epoll_map_flags(fde->flags);
170 event.data.ptr = fde;
171 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
172 tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
173 "epoll_del_event failed! probable early close bug (%s)\n",
174 strerror(errno));
176 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
180 change the epoll event to the given fd_event
182 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
184 struct epoll_event event;
185 if (epoll_ev->epoll_fd == -1) return;
187 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
189 ZERO_STRUCT(event);
190 event.events = epoll_map_flags(fde->flags);
191 event.data.ptr = fde;
192 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
193 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed");
196 /* only if we want to read we want to tell the event handler about errors */
197 if (fde->flags & TEVENT_FD_READ) {
198 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
202 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
204 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
205 bool want_read = (fde->flags & TEVENT_FD_READ);
206 bool want_write= (fde->flags & TEVENT_FD_WRITE);
208 if (epoll_ev->epoll_fd == -1) return;
210 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
212 /* there's already an event */
213 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
214 if (want_read || (want_write && !got_error)) {
215 epoll_mod_event(epoll_ev, fde);
216 return;
219 * if we want to match the select behavior, we need to remove the epoll_event
220 * when the caller isn't interested in events.
222 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
224 epoll_del_event(epoll_ev, fde);
225 return;
228 /* there's no epoll_event attached to the fde */
229 if (want_read || (want_write && !got_error)) {
230 epoll_add_event(epoll_ev, fde);
231 return;
236 event loop handling using epoll
238 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
240 int ret, i;
241 #define MAXEVENTS 32
242 struct epoll_event events[MAXEVENTS];
243 uint32_t destruction_count = ++epoll_ev->destruction_count;
244 int timeout = -1;
246 if (epoll_ev->epoll_fd == -1) return -1;
248 if (tvalp) {
249 /* it's better to trigger timed events a bit later than to early */
250 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
253 if (epoll_ev->ev->signal_events &&
254 tevent_common_check_signal(epoll_ev->ev)) {
255 return 0;
258 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
260 if (ret == -1 && errno == EINTR && epoll_ev->ev->signal_events) {
261 if (tevent_common_check_signal(epoll_ev->ev)) {
262 return 0;
266 if (ret == -1 && errno != EINTR) {
267 epoll_panic(epoll_ev, "epoll_wait() failed");
268 return -1;
271 if (ret == 0 && tvalp) {
272 /* we don't care about a possible delay here */
273 tevent_common_loop_timer_delay(epoll_ev->ev);
274 return 0;
277 for (i=0;i<ret;i++) {
278 struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
279 struct tevent_fd);
280 uint16_t flags = 0;
282 if (fde == NULL) {
283 epoll_panic(epoll_ev, "epoll_wait() gave bad data");
284 return -1;
286 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
287 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
289 * if we only wait for TEVENT_FD_WRITE, we should not tell the
290 * event handler about it, and remove the epoll_event,
291 * as we only report errors when waiting for read events,
292 * to match the select() behavior
294 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
295 epoll_del_event(epoll_ev, fde);
296 continue;
298 flags |= TEVENT_FD_READ;
300 if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
301 if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
302 if (flags) {
303 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
304 if (destruction_count != epoll_ev->destruction_count) {
305 break;
310 return 0;
314 create a epoll_event_context structure.
316 static int epoll_event_context_init(struct tevent_context *ev)
318 int ret;
319 struct epoll_event_context *epoll_ev;
321 epoll_ev = talloc_zero(ev, struct epoll_event_context);
322 if (!epoll_ev) return -1;
323 epoll_ev->ev = ev;
324 epoll_ev->epoll_fd = -1;
326 ret = epoll_init_ctx(epoll_ev);
327 if (ret != 0) {
328 talloc_free(epoll_ev);
329 return ret;
332 ev->additional_data = epoll_ev;
333 return 0;
337 destroy an fd_event
339 static int epoll_event_fd_destructor(struct tevent_fd *fde)
341 struct tevent_context *ev = fde->event_ctx;
342 struct epoll_event_context *epoll_ev = NULL;
344 if (ev) {
345 epoll_ev = talloc_get_type(ev->additional_data,
346 struct epoll_event_context);
348 epoll_check_reopen(epoll_ev);
350 epoll_ev->destruction_count++;
352 epoll_del_event(epoll_ev, fde);
355 return tevent_common_fd_destructor(fde);
359 add a fd based event
360 return NULL on failure (memory allocation error)
362 static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
363 int fd, uint16_t flags,
364 tevent_fd_handler_t handler,
365 void *private_data,
366 const char *handler_name,
367 const char *location)
369 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
370 struct epoll_event_context);
371 struct tevent_fd *fde;
373 epoll_check_reopen(epoll_ev);
375 fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
376 handler, private_data,
377 handler_name, location);
378 if (!fde) return NULL;
380 talloc_set_destructor(fde, epoll_event_fd_destructor);
382 epoll_add_event(epoll_ev, fde);
384 return fde;
388 set the fd event flags
390 static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
392 struct tevent_context *ev;
393 struct epoll_event_context *epoll_ev;
395 if (fde->flags == flags) return;
397 ev = fde->event_ctx;
398 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
400 fde->flags = flags;
402 epoll_check_reopen(epoll_ev);
404 epoll_change_event(epoll_ev, fde);
408 do a single event loop using the events defined in ev
410 static int epoll_event_loop_once(struct tevent_context *ev)
412 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
413 struct epoll_event_context);
414 struct timeval tval;
416 tval = tevent_common_loop_timer_delay(ev);
417 if (ev_timeval_is_zero(&tval)) {
418 return 0;
421 epoll_check_reopen(epoll_ev);
423 return epoll_event_loop(epoll_ev, &tval);
427 return on failure or (with 0) if all fd events are removed
429 static int epoll_event_loop_wait(struct tevent_context *ev)
431 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
432 struct epoll_event_context);
433 while (epoll_ev->ev->fd_events) {
434 if (epoll_event_loop_once(ev) != 0) {
435 break;
439 return 0;
442 static const struct tevent_ops epoll_event_ops = {
443 .context_init = epoll_event_context_init,
444 .add_fd = epoll_event_add_fd,
445 .set_fd_close_fn= tevent_common_fd_set_close_fn,
446 .get_fd_flags = tevent_common_fd_get_flags,
447 .set_fd_flags = epoll_event_set_fd_flags,
448 .add_timer = tevent_common_add_timer,
449 .add_signal = tevent_common_add_signal,
450 .loop_once = epoll_event_loop_once,
451 .loop_wait = epoll_event_loop_wait,
454 bool tevent_epoll_init(void)
456 return tevent_register_backend("epoll", &epoll_event_ops);