2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "system/filesys.h"
25 #include "system/network.h"
27 #include "tevent_internal.h"
28 #include "tevent_util.h"
29 #include <sys/epoll.h>
31 struct epoll_event_context
{
32 /* a pointer back to the generic event_context */
33 struct event_context
*ev
;
35 /* list of filedescriptor events */
36 struct fd_event
*fd_events
;
38 /* number of registered fd event handlers */
41 /* this is changed by the destructors for the fd event
42 type. It is used to detect event destruction by event
43 handlers, which means the code that is calling the event
44 handler needs to assume that the linked list is no longer
47 uint32_t destruction_count
;
49 /* when using epoll this is the handle from epoll_create */
56 called when a epoll call fails, and we should fallback
59 static void epoll_panic(struct epoll_event_context
*epoll_ev
, const char *reason
)
61 ev_debug(epoll_ev
->ev
, EV_DEBUG_FATAL
,
62 "%s (%s) - calling abort()\n", reason
, strerror(errno
));
67 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
69 static uint32_t epoll_map_flags(uint16_t flags
)
72 if (flags
& EVENT_FD_READ
) ret
|= (EPOLLIN
| EPOLLERR
| EPOLLHUP
);
73 if (flags
& EVENT_FD_WRITE
) ret
|= (EPOLLOUT
| EPOLLERR
| EPOLLHUP
);
80 static int epoll_ctx_destructor(struct epoll_event_context
*epoll_ev
)
82 close(epoll_ev
->epoll_fd
);
83 epoll_ev
->epoll_fd
= -1;
90 static int epoll_init_ctx(struct epoll_event_context
*epoll_ev
)
92 epoll_ev
->epoll_fd
= epoll_create(64);
93 epoll_ev
->pid
= getpid();
94 talloc_set_destructor(epoll_ev
, epoll_ctx_destructor
);
95 if (epoll_ev
->epoll_fd
== -1) {
101 static void epoll_add_event(struct epoll_event_context
*epoll_ev
, struct fd_event
*fde
);
104 reopen the epoll handle when our pid changes
105 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
106 demonstration of why this is needed
108 static void epoll_check_reopen(struct epoll_event_context
*epoll_ev
)
110 struct fd_event
*fde
;
112 if (epoll_ev
->pid
== getpid()) {
116 close(epoll_ev
->epoll_fd
);
117 epoll_ev
->epoll_fd
= epoll_create(64);
118 if (epoll_ev
->epoll_fd
== -1) {
119 ev_debug(epoll_ev
->ev
, EV_DEBUG_FATAL
,
120 "Failed to recreate epoll handle after fork\n");
123 epoll_ev
->pid
= getpid();
124 for (fde
=epoll_ev
->fd_events
;fde
;fde
=fde
->next
) {
125 epoll_add_event(epoll_ev
, fde
);
129 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
130 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
131 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
134 add the epoll event to the given fd_event
136 static void epoll_add_event(struct epoll_event_context
*epoll_ev
, struct fd_event
*fde
)
138 struct epoll_event event
;
140 if (epoll_ev
->epoll_fd
== -1) return;
142 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
144 /* if we don't want events yet, don't add an epoll_event */
145 if (fde
->flags
== 0) return;
148 event
.events
= epoll_map_flags(fde
->flags
);
149 event
.data
.ptr
= fde
;
150 if (epoll_ctl(epoll_ev
->epoll_fd
, EPOLL_CTL_ADD
, fde
->fd
, &event
) != 0) {
151 epoll_panic(epoll_ev
, "EPOLL_CTL_ADD failed");
153 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
;
155 /* only if we want to read we want to tell the event handler about errors */
156 if (fde
->flags
& EVENT_FD_READ
) {
157 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
162 delete the epoll event for given fd_event
164 static void epoll_del_event(struct epoll_event_context
*epoll_ev
, struct fd_event
*fde
)
166 struct epoll_event event
;
168 if (epoll_ev
->epoll_fd
== -1) return;
170 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
172 /* if there's no epoll_event, we don't need to delete it */
173 if (!(fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
)) return;
176 event
.events
= epoll_map_flags(fde
->flags
);
177 event
.data
.ptr
= fde
;
178 if (epoll_ctl(epoll_ev
->epoll_fd
, EPOLL_CTL_DEL
, fde
->fd
, &event
) != 0) {
179 ev_debug(epoll_ev
->ev
, EV_DEBUG_FATAL
,
180 "epoll_del_event failed! probable early close bug (%s)\n",
183 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
;
187 change the epoll event to the given fd_event
189 static void epoll_mod_event(struct epoll_event_context
*epoll_ev
, struct fd_event
*fde
)
191 struct epoll_event event
;
192 if (epoll_ev
->epoll_fd
== -1) return;
194 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
197 event
.events
= epoll_map_flags(fde
->flags
);
198 event
.data
.ptr
= fde
;
199 if (epoll_ctl(epoll_ev
->epoll_fd
, EPOLL_CTL_MOD
, fde
->fd
, &event
) != 0) {
200 epoll_panic(epoll_ev
, "EPOLL_CTL_MOD failed");
203 /* only if we want to read we want to tell the event handler about errors */
204 if (fde
->flags
& EVENT_FD_READ
) {
205 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
209 static void epoll_change_event(struct epoll_event_context
*epoll_ev
, struct fd_event
*fde
)
211 bool got_error
= (fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
);
212 bool want_read
= (fde
->flags
& EVENT_FD_READ
);
213 bool want_write
= (fde
->flags
& EVENT_FD_WRITE
);
215 if (epoll_ev
->epoll_fd
== -1) return;
217 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
219 /* there's already an event */
220 if (fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
) {
221 if (want_read
|| (want_write
&& !got_error
)) {
222 epoll_mod_event(epoll_ev
, fde
);
226 * if we want to match the select behavior, we need to remove the epoll_event
227 * when the caller isn't interested in events.
229 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
231 epoll_del_event(epoll_ev
, fde
);
235 /* there's no epoll_event attached to the fde */
236 if (want_read
|| (want_write
&& !got_error
)) {
237 epoll_add_event(epoll_ev
, fde
);
243 event loop handling using epoll
245 static int epoll_event_loop(struct epoll_event_context
*epoll_ev
, struct timeval
*tvalp
)
249 struct epoll_event events
[MAXEVENTS
];
250 uint32_t destruction_count
= ++epoll_ev
->destruction_count
;
253 if (epoll_ev
->epoll_fd
== -1) return -1;
256 /* it's better to trigger timed events a bit later than to early */
257 timeout
= ((tvalp
->tv_usec
+999) / 1000) + (tvalp
->tv_sec
*1000);
260 if (epoll_ev
->ev
->num_signal_handlers
&&
261 common_event_check_signal(epoll_ev
->ev
)) {
265 ret
= epoll_wait(epoll_ev
->epoll_fd
, events
, MAXEVENTS
, timeout
);
267 if (ret
== -1 && errno
== EINTR
&& epoll_ev
->ev
->num_signal_handlers
) {
268 if (common_event_check_signal(epoll_ev
->ev
)) {
273 if (ret
== -1 && errno
!= EINTR
) {
274 epoll_panic(epoll_ev
, "epoll_wait() failed");
278 if (ret
== 0 && tvalp
) {
279 /* we don't care about a possible delay here */
280 common_event_loop_timer_delay(epoll_ev
->ev
);
284 for (i
=0;i
<ret
;i
++) {
285 struct fd_event
*fde
= talloc_get_type(events
[i
].data
.ptr
,
290 epoll_panic(epoll_ev
, "epoll_wait() gave bad data");
293 if (events
[i
].events
& (EPOLLHUP
|EPOLLERR
)) {
294 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
;
296 * if we only wait for EVENT_FD_WRITE, we should not tell the
297 * event handler about it, and remove the epoll_event,
298 * as we only report errors when waiting for read events,
299 * to match the select() behavior
301 if (!(fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
)) {
302 epoll_del_event(epoll_ev
, fde
);
305 flags
|= EVENT_FD_READ
;
307 if (events
[i
].events
& EPOLLIN
) flags
|= EVENT_FD_READ
;
308 if (events
[i
].events
& EPOLLOUT
) flags
|= EVENT_FD_WRITE
;
310 fde
->handler(epoll_ev
->ev
, fde
, flags
, fde
->private_data
);
311 if (destruction_count
!= epoll_ev
->destruction_count
) {
321 create a epoll_event_context structure.
323 static int epoll_event_context_init(struct event_context
*ev
)
326 struct epoll_event_context
*epoll_ev
;
328 epoll_ev
= talloc_zero(ev
, struct epoll_event_context
);
329 if (!epoll_ev
) return -1;
331 epoll_ev
->epoll_fd
= -1;
333 ret
= epoll_init_ctx(epoll_ev
);
335 talloc_free(epoll_ev
);
339 ev
->additional_data
= epoll_ev
;
346 static int epoll_event_fd_destructor(struct fd_event
*fde
)
348 struct event_context
*ev
= fde
->event_ctx
;
349 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
350 struct epoll_event_context
);
352 epoll_check_reopen(epoll_ev
);
354 epoll_ev
->num_fd_events
--;
355 epoll_ev
->destruction_count
++;
357 DLIST_REMOVE(epoll_ev
->fd_events
, fde
);
359 epoll_del_event(epoll_ev
, fde
);
361 if (fde
->flags
& EVENT_FD_AUTOCLOSE
) {
371 return NULL on failure (memory allocation error)
373 static struct fd_event
*epoll_event_add_fd(struct event_context
*ev
, TALLOC_CTX
*mem_ctx
,
374 int fd
, uint16_t flags
,
375 event_fd_handler_t handler
,
378 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
379 struct epoll_event_context
);
380 struct fd_event
*fde
;
382 epoll_check_reopen(epoll_ev
);
384 fde
= talloc(mem_ctx
?mem_ctx
:ev
, struct fd_event
);
385 if (!fde
) return NULL
;
390 fde
->handler
= handler
;
391 fde
->private_data
= private_data
;
392 fde
->additional_flags
= 0;
393 fde
->additional_data
= NULL
;
395 epoll_ev
->num_fd_events
++;
396 talloc_set_destructor(fde
, epoll_event_fd_destructor
);
398 DLIST_ADD(epoll_ev
->fd_events
, fde
);
399 epoll_add_event(epoll_ev
, fde
);
406 return the fd event flags
408 static uint16_t epoll_event_get_fd_flags(struct fd_event
*fde
)
414 set the fd event flags
416 static void epoll_event_set_fd_flags(struct fd_event
*fde
, uint16_t flags
)
418 struct event_context
*ev
;
419 struct epoll_event_context
*epoll_ev
;
421 if (fde
->flags
== flags
) return;
424 epoll_ev
= talloc_get_type(ev
->additional_data
, struct epoll_event_context
);
428 epoll_check_reopen(epoll_ev
);
430 epoll_change_event(epoll_ev
, fde
);
434 do a single event loop using the events defined in ev
436 static int epoll_event_loop_once(struct event_context
*ev
)
438 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
439 struct epoll_event_context
);
442 tval
= common_event_loop_timer_delay(ev
);
443 if (ev_timeval_is_zero(&tval
)) {
447 epoll_check_reopen(epoll_ev
);
449 return epoll_event_loop(epoll_ev
, &tval
);
453 return on failure or (with 0) if all fd events are removed
455 static int epoll_event_loop_wait(struct event_context
*ev
)
457 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
458 struct epoll_event_context
);
459 while (epoll_ev
->num_fd_events
) {
460 if (epoll_event_loop_once(ev
) != 0) {
468 static const struct event_ops epoll_event_ops
= {
469 .context_init
= epoll_event_context_init
,
470 .add_fd
= epoll_event_add_fd
,
471 .get_fd_flags
= epoll_event_get_fd_flags
,
472 .set_fd_flags
= epoll_event_set_fd_flags
,
473 .add_timed
= common_event_add_timed
,
474 .add_signal
= common_event_add_signal
,
475 .loop_once
= epoll_event_loop_once
,
476 .loop_wait
= epoll_event_loop_wait
,
479 bool events_epoll_init(void)
481 return event_register_backend("epoll", &epoll_event_ops
);