2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "system/filesys.h"
25 #include "system/select.h"
27 #include "tevent_internal.h"
28 #include "tevent_util.h"
30 struct epoll_event_context
{
31 /* a pointer back to the generic event_context */
32 struct tevent_context
*ev
;
34 /* this is changed by the destructors for the fd event
35 type. It is used to detect event destruction by event
36 handlers, which means the code that is calling the event
37 handler needs to assume that the linked list is no longer
40 uint32_t destruction_count
;
42 /* when using epoll this is the handle from epoll_create */
49 called when a epoll call fails, and we should fallback
52 static void epoll_panic(struct epoll_event_context
*epoll_ev
, const char *reason
)
54 tevent_debug(epoll_ev
->ev
, TEVENT_DEBUG_FATAL
,
55 "%s (%s) - calling abort()\n", reason
, strerror(errno
));
60 map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
62 static uint32_t epoll_map_flags(uint16_t flags
)
65 if (flags
& TEVENT_FD_READ
) ret
|= (EPOLLIN
| EPOLLERR
| EPOLLHUP
);
66 if (flags
& TEVENT_FD_WRITE
) ret
|= (EPOLLOUT
| EPOLLERR
| EPOLLHUP
);
73 static int epoll_ctx_destructor(struct epoll_event_context
*epoll_ev
)
75 close(epoll_ev
->epoll_fd
);
76 epoll_ev
->epoll_fd
= -1;
83 static int epoll_init_ctx(struct epoll_event_context
*epoll_ev
)
85 epoll_ev
->epoll_fd
= epoll_create(64);
86 epoll_ev
->pid
= getpid();
87 talloc_set_destructor(epoll_ev
, epoll_ctx_destructor
);
88 if (epoll_ev
->epoll_fd
== -1) {
94 static void epoll_add_event(struct epoll_event_context
*epoll_ev
, struct tevent_fd
*fde
);
97 reopen the epoll handle when our pid changes
98 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
99 demonstration of why this is needed
101 static void epoll_check_reopen(struct epoll_event_context
*epoll_ev
)
103 struct tevent_fd
*fde
;
105 if (epoll_ev
->pid
== getpid()) {
109 close(epoll_ev
->epoll_fd
);
110 epoll_ev
->epoll_fd
= epoll_create(64);
111 if (epoll_ev
->epoll_fd
== -1) {
112 tevent_debug(epoll_ev
->ev
, TEVENT_DEBUG_FATAL
,
113 "Failed to recreate epoll handle after fork\n");
116 epoll_ev
->pid
= getpid();
117 for (fde
=epoll_ev
->ev
->fd_events
;fde
;fde
=fde
->next
) {
118 epoll_add_event(epoll_ev
, fde
);
122 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
123 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
124 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
127 add the epoll event to the given fd_event
129 static void epoll_add_event(struct epoll_event_context
*epoll_ev
, struct tevent_fd
*fde
)
131 struct epoll_event event
;
133 if (epoll_ev
->epoll_fd
== -1) return;
135 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
137 /* if we don't want events yet, don't add an epoll_event */
138 if (fde
->flags
== 0) return;
141 event
.events
= epoll_map_flags(fde
->flags
);
142 event
.data
.ptr
= fde
;
143 if (epoll_ctl(epoll_ev
->epoll_fd
, EPOLL_CTL_ADD
, fde
->fd
, &event
) != 0) {
144 epoll_panic(epoll_ev
, "EPOLL_CTL_ADD failed");
146 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
;
148 /* only if we want to read we want to tell the event handler about errors */
149 if (fde
->flags
& TEVENT_FD_READ
) {
150 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
155 delete the epoll event for given fd_event
157 static void epoll_del_event(struct epoll_event_context
*epoll_ev
, struct tevent_fd
*fde
)
159 struct epoll_event event
;
161 if (epoll_ev
->epoll_fd
== -1) return;
163 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
165 /* if there's no epoll_event, we don't need to delete it */
166 if (!(fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
)) return;
169 event
.events
= epoll_map_flags(fde
->flags
);
170 event
.data
.ptr
= fde
;
171 if (epoll_ctl(epoll_ev
->epoll_fd
, EPOLL_CTL_DEL
, fde
->fd
, &event
) != 0) {
172 tevent_debug(epoll_ev
->ev
, TEVENT_DEBUG_FATAL
,
173 "epoll_del_event failed! probable early close bug (%s)\n",
176 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
;
180 change the epoll event to the given fd_event
182 static void epoll_mod_event(struct epoll_event_context
*epoll_ev
, struct tevent_fd
*fde
)
184 struct epoll_event event
;
185 if (epoll_ev
->epoll_fd
== -1) return;
187 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
190 event
.events
= epoll_map_flags(fde
->flags
);
191 event
.data
.ptr
= fde
;
192 if (epoll_ctl(epoll_ev
->epoll_fd
, EPOLL_CTL_MOD
, fde
->fd
, &event
) != 0) {
193 epoll_panic(epoll_ev
, "EPOLL_CTL_MOD failed");
196 /* only if we want to read we want to tell the event handler about errors */
197 if (fde
->flags
& TEVENT_FD_READ
) {
198 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
202 static void epoll_change_event(struct epoll_event_context
*epoll_ev
, struct tevent_fd
*fde
)
204 bool got_error
= (fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
);
205 bool want_read
= (fde
->flags
& TEVENT_FD_READ
);
206 bool want_write
= (fde
->flags
& TEVENT_FD_WRITE
);
208 if (epoll_ev
->epoll_fd
== -1) return;
210 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
212 /* there's already an event */
213 if (fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
) {
214 if (want_read
|| (want_write
&& !got_error
)) {
215 epoll_mod_event(epoll_ev
, fde
);
219 * if we want to match the select behavior, we need to remove the epoll_event
220 * when the caller isn't interested in events.
222 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
224 epoll_del_event(epoll_ev
, fde
);
228 /* there's no epoll_event attached to the fde */
229 if (want_read
|| (want_write
&& !got_error
)) {
230 epoll_add_event(epoll_ev
, fde
);
236 event loop handling using epoll
238 static int epoll_event_loop(struct epoll_event_context
*epoll_ev
, struct timeval
*tvalp
)
242 struct epoll_event events
[MAXEVENTS
];
243 uint32_t destruction_count
= ++epoll_ev
->destruction_count
;
246 if (epoll_ev
->epoll_fd
== -1) return -1;
249 /* it's better to trigger timed events a bit later than to early */
250 timeout
= ((tvalp
->tv_usec
+999) / 1000) + (tvalp
->tv_sec
*1000);
253 if (epoll_ev
->ev
->signal_events
&&
254 tevent_common_check_signal(epoll_ev
->ev
)) {
258 ret
= epoll_wait(epoll_ev
->epoll_fd
, events
, MAXEVENTS
, timeout
);
260 if (ret
== -1 && errno
== EINTR
&& epoll_ev
->ev
->signal_events
) {
261 if (tevent_common_check_signal(epoll_ev
->ev
)) {
266 if (ret
== -1 && errno
!= EINTR
) {
267 epoll_panic(epoll_ev
, "epoll_wait() failed");
271 if (ret
== 0 && tvalp
) {
272 /* we don't care about a possible delay here */
273 tevent_common_loop_timer_delay(epoll_ev
->ev
);
277 for (i
=0;i
<ret
;i
++) {
278 struct tevent_fd
*fde
= talloc_get_type(events
[i
].data
.ptr
,
283 epoll_panic(epoll_ev
, "epoll_wait() gave bad data");
286 if (events
[i
].events
& (EPOLLHUP
|EPOLLERR
)) {
287 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
;
289 * if we only wait for TEVENT_FD_WRITE, we should not tell the
290 * event handler about it, and remove the epoll_event,
291 * as we only report errors when waiting for read events,
292 * to match the select() behavior
294 if (!(fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
)) {
295 epoll_del_event(epoll_ev
, fde
);
298 flags
|= TEVENT_FD_READ
;
300 if (events
[i
].events
& EPOLLIN
) flags
|= TEVENT_FD_READ
;
301 if (events
[i
].events
& EPOLLOUT
) flags
|= TEVENT_FD_WRITE
;
303 fde
->handler(epoll_ev
->ev
, fde
, flags
, fde
->private_data
);
304 if (destruction_count
!= epoll_ev
->destruction_count
) {
314 create a epoll_event_context structure.
316 static int epoll_event_context_init(struct tevent_context
*ev
)
319 struct epoll_event_context
*epoll_ev
;
321 epoll_ev
= talloc_zero(ev
, struct epoll_event_context
);
322 if (!epoll_ev
) return -1;
324 epoll_ev
->epoll_fd
= -1;
326 ret
= epoll_init_ctx(epoll_ev
);
328 talloc_free(epoll_ev
);
332 ev
->additional_data
= epoll_ev
;
339 static int epoll_event_fd_destructor(struct tevent_fd
*fde
)
341 struct tevent_context
*ev
= fde
->event_ctx
;
342 struct epoll_event_context
*epoll_ev
= NULL
;
345 epoll_ev
= talloc_get_type(ev
->additional_data
,
346 struct epoll_event_context
);
348 epoll_check_reopen(epoll_ev
);
350 epoll_ev
->destruction_count
++;
352 epoll_del_event(epoll_ev
, fde
);
355 return tevent_common_fd_destructor(fde
);
360 return NULL on failure (memory allocation error)
362 static struct tevent_fd
*epoll_event_add_fd(struct tevent_context
*ev
, TALLOC_CTX
*mem_ctx
,
363 int fd
, uint16_t flags
,
364 tevent_fd_handler_t handler
,
366 const char *handler_name
,
367 const char *location
)
369 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
370 struct epoll_event_context
);
371 struct tevent_fd
*fde
;
373 epoll_check_reopen(epoll_ev
);
375 fde
= tevent_common_add_fd(ev
, mem_ctx
, fd
, flags
,
376 handler
, private_data
,
377 handler_name
, location
);
378 if (!fde
) return NULL
;
380 talloc_set_destructor(fde
, epoll_event_fd_destructor
);
382 epoll_add_event(epoll_ev
, fde
);
388 set the fd event flags
390 static void epoll_event_set_fd_flags(struct tevent_fd
*fde
, uint16_t flags
)
392 struct tevent_context
*ev
;
393 struct epoll_event_context
*epoll_ev
;
395 if (fde
->flags
== flags
) return;
398 epoll_ev
= talloc_get_type(ev
->additional_data
, struct epoll_event_context
);
402 epoll_check_reopen(epoll_ev
);
404 epoll_change_event(epoll_ev
, fde
);
408 do a single event loop using the events defined in ev
410 static int epoll_event_loop_once(struct tevent_context
*ev
)
412 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
413 struct epoll_event_context
);
416 tval
= tevent_common_loop_timer_delay(ev
);
417 if (ev_timeval_is_zero(&tval
)) {
421 epoll_check_reopen(epoll_ev
);
423 return epoll_event_loop(epoll_ev
, &tval
);
427 return on failure or (with 0) if all fd events are removed
429 static int epoll_event_loop_wait(struct tevent_context
*ev
)
431 struct epoll_event_context
*epoll_ev
= talloc_get_type(ev
->additional_data
,
432 struct epoll_event_context
);
433 while (epoll_ev
->ev
->fd_events
) {
434 if (epoll_event_loop_once(ev
) != 0) {
442 static const struct tevent_ops epoll_event_ops
= {
443 .context_init
= epoll_event_context_init
,
444 .add_fd
= epoll_event_add_fd
,
445 .set_fd_close_fn
= tevent_common_fd_set_close_fn
,
446 .get_fd_flags
= tevent_common_fd_get_flags
,
447 .set_fd_flags
= epoll_event_set_fd_flags
,
448 .add_timer
= tevent_common_add_timer
,
449 .add_signal
= tevent_common_add_signal
,
450 .loop_once
= epoll_event_loop_once
,
451 .loop_wait
= epoll_event_loop_wait
,
454 bool tevent_epoll_init(void)
456 return tevent_register_backend("epoll", &epoll_event_ops
);