2 Unix SMB/CIFS implementation.
4 main select loop and event handling - aio/epoll hybrid implementation
6 Copyright (C) Andrew Tridgell 2006
8 based on events_standard.c
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 this is a very strange beast. The Linux AIO implementation doesn't
26 yet integrate properly with epoll, but there is a kernel patch that
27 allows the aio wait primitives to be used to wait for epoll events,
28 and this can be used to give us a unified event system incorporating
29 both aio events and epoll events
31 this is _very_ experimental code
35 #include "system/filesys.h"
36 #include "lib/util/dlinklist.h"
37 #include "lib/events/events.h"
38 #include "lib/events/events_internal.h"
39 #include <sys/epoll.h>
42 #define MAX_AIO_QUEUE_DEPTH 100
43 #ifndef IOCB_CMD_EPOLL_WAIT
44 #define IOCB_CMD_EPOLL_WAIT 9
47 struct aio_event_context
{
48 /* a pointer back to the generic event_context */
49 struct event_context
*ev
;
51 /* number of registered fd event handlers */
54 uint32_t destruction_count
;
58 struct epoll_event epevent
[MAX_AIO_QUEUE_DEPTH
];
60 struct iocb
*epoll_iocb
;
67 struct event_context
*event_ctx
;
70 event_aio_handler_t handler
;
74 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
76 static uint32_t epoll_map_flags(uint16_t flags
)
79 if (flags
& EVENT_FD_READ
) ret
|= (EPOLLIN
| EPOLLERR
| EPOLLHUP
);
80 if (flags
& EVENT_FD_WRITE
) ret
|= (EPOLLOUT
| EPOLLERR
| EPOLLHUP
);
87 static int aio_ctx_destructor(struct aio_event_context
*aio_ev
)
89 io_queue_release(aio_ev
->ioctx
);
90 close(aio_ev
->epoll_fd
);
91 aio_ev
->epoll_fd
= -1;
95 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
96 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
97 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
100 add the epoll event to the given fd_event
102 static void epoll_add_event(struct aio_event_context
*aio_ev
, struct fd_event
*fde
)
104 struct epoll_event event
;
105 if (aio_ev
->epoll_fd
== -1) return;
107 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
109 /* if we don't want events yet, don't add an aio_event */
110 if (fde
->flags
== 0) return;
113 event
.events
= epoll_map_flags(fde
->flags
);
114 event
.data
.ptr
= fde
;
115 epoll_ctl(aio_ev
->epoll_fd
, EPOLL_CTL_ADD
, fde
->fd
, &event
);
116 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
;
118 /* only if we want to read we want to tell the event handler about errors */
119 if (fde
->flags
& EVENT_FD_READ
) {
120 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
125 delete the epoll event for given fd_event
127 static void epoll_del_event(struct aio_event_context
*aio_ev
, struct fd_event
*fde
)
129 struct epoll_event event
;
130 if (aio_ev
->epoll_fd
== -1) return;
132 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
134 /* if there's no aio_event, we don't need to delete it */
135 if (!(fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
)) return;
138 event
.events
= epoll_map_flags(fde
->flags
);
139 event
.data
.ptr
= fde
;
140 epoll_ctl(aio_ev
->epoll_fd
, EPOLL_CTL_DEL
, fde
->fd
, &event
);
142 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
;
146 change the epoll event to the given fd_event
148 static void epoll_mod_event(struct aio_event_context
*aio_ev
, struct fd_event
*fde
)
150 struct epoll_event event
;
151 if (aio_ev
->epoll_fd
== -1) return;
153 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
156 event
.events
= epoll_map_flags(fde
->flags
);
157 event
.data
.ptr
= fde
;
158 epoll_ctl(aio_ev
->epoll_fd
, EPOLL_CTL_MOD
, fde
->fd
, &event
);
160 /* only if we want to read we want to tell the event handler about errors */
161 if (fde
->flags
& EVENT_FD_READ
) {
162 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
166 static void epoll_change_event(struct aio_event_context
*aio_ev
, struct fd_event
*fde
)
168 BOOL got_error
= (fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
);
169 BOOL want_read
= (fde
->flags
& EVENT_FD_READ
);
170 BOOL want_write
= (fde
->flags
& EVENT_FD_WRITE
);
172 if (aio_ev
->epoll_fd
== -1) return;
174 fde
->additional_flags
&= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
;
176 /* there's already an event */
177 if (fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT
) {
178 if (want_read
|| (want_write
&& !got_error
)) {
179 epoll_mod_event(aio_ev
, fde
);
182 epoll_del_event(aio_ev
, fde
);
186 /* there's no aio_event attached to the fde */
187 if (want_read
|| (want_write
&& !got_error
)) {
188 epoll_add_event(aio_ev
, fde
);
193 static int setup_epoll_wait(struct aio_event_context
*aio_ev
)
195 if (aio_ev
->is_epoll_set
) {
198 memset(aio_ev
->epoll_iocb
, 0, sizeof(*aio_ev
->epoll_iocb
));
199 aio_ev
->epoll_iocb
->aio_fildes
= aio_ev
->epoll_fd
;
200 aio_ev
->epoll_iocb
->aio_lio_opcode
= IOCB_CMD_EPOLL_WAIT
;
201 aio_ev
->epoll_iocb
->aio_reqprio
= 0;
203 aio_ev
->epoll_iocb
->u
.c
.nbytes
= MAX_AIO_QUEUE_DEPTH
;
204 aio_ev
->epoll_iocb
->u
.c
.offset
= -1;
205 aio_ev
->epoll_iocb
->u
.c
.buf
= aio_ev
->epevent
;
207 if (io_submit(aio_ev
->ioctx
, 1, &aio_ev
->epoll_iocb
) != 1) {
210 aio_ev
->is_epoll_set
= 1;
217 event loop handling using aio/epoll hybrid
219 static int aio_event_loop(struct aio_event_context
*aio_ev
, struct timeval
*tvalp
)
222 uint32_t destruction_count
= aio_ev
->destruction_count
;
223 struct timespec timeout
;
224 struct io_event events
[8];
226 if (aio_ev
->epoll_fd
== -1) return -1;
228 if (aio_ev
->ev
->num_signal_handlers
&&
229 common_event_check_signal(aio_ev
->ev
)) {
234 timeout
.tv_sec
= tvalp
->tv_sec
;
235 timeout
.tv_nsec
= tvalp
->tv_usec
;
236 timeout
.tv_nsec
*= 1000;
239 if (setup_epoll_wait(aio_ev
) < 0)
242 ret
= io_getevents(aio_ev
->ioctx
, 1, 8,
243 events
, tvalp
?&timeout
:NULL
);
246 if (aio_ev
->ev
->num_signal_handlers
) {
247 common_event_check_signal(aio_ev
->ev
);
252 if (ret
== 0 && tvalp
) {
253 common_event_loop_timer(aio_ev
->ev
);
257 for (i
=0;i
<ret
;i
++) {
258 struct io_event
*event
= &events
[i
];
259 struct iocb
*finished
= event
->obj
;
261 switch (finished
->aio_lio_opcode
) {
264 struct aio_event
*ae
= talloc_get_type(finished
->data
,
267 talloc_set_destructor(ae
, NULL
);
268 ae
->handler(ae
->event_ctx
, ae
,
269 event
->res
, ae
->private_data
);
274 case IOCB_CMD_EPOLL_WAIT
: {
275 struct epoll_event
*ep
= (struct epoll_event
*)finished
->u
.c
.buf
;
276 struct fd_event
*fde
;
280 aio_ev
->is_epoll_set
= 0;
282 for (j
=0; j
<event
->res
; j
++, ep
++) {
283 fde
= talloc_get_type(ep
->data
.ptr
,
288 if (ep
->events
& (EPOLLHUP
|EPOLLERR
)) {
289 fde
->additional_flags
|= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR
;
290 if (!(fde
->additional_flags
& EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR
)) {
291 epoll_del_event(aio_ev
, fde
);
294 flags
|= EVENT_FD_READ
;
296 if (ep
->events
& EPOLLIN
) flags
|= EVENT_FD_READ
;
297 if (ep
->events
& EPOLLOUT
) flags
|= EVENT_FD_WRITE
;
299 fde
->handler(aio_ev
->ev
, fde
, flags
, fde
->private_data
);
305 if (destruction_count
!= aio_ev
->destruction_count
) {
314 create a aio_event_context structure.
316 static int aio_event_context_init(struct event_context
*ev
)
318 struct aio_event_context
*aio_ev
;
320 aio_ev
= talloc_zero(ev
, struct aio_event_context
);
321 if (!aio_ev
) return -1;
324 aio_ev
->epoll_iocb
= talloc(aio_ev
, struct iocb
);
326 if (io_queue_init(MAX_AIO_QUEUE_DEPTH
, &aio_ev
->ioctx
) != 0) {
330 aio_ev
->epoll_fd
= epoll_create(MAX_AIO_QUEUE_DEPTH
);
331 if (aio_ev
->epoll_fd
== -1) return -1;
333 talloc_set_destructor(aio_ev
, aio_ctx_destructor
);
335 ev
->additional_data
= aio_ev
;
342 static int aio_event_fd_destructor(struct fd_event
*fde
)
344 struct event_context
*ev
= fde
->event_ctx
;
345 struct aio_event_context
*aio_ev
= talloc_get_type(ev
->additional_data
,
346 struct aio_event_context
);
348 aio_ev
->num_fd_events
--;
349 aio_ev
->destruction_count
++;
351 epoll_del_event(aio_ev
, fde
);
358 return NULL on failure (memory allocation error)
360 static struct fd_event
*aio_event_add_fd(struct event_context
*ev
, TALLOC_CTX
*mem_ctx
,
361 int fd
, uint16_t flags
,
362 event_fd_handler_t handler
,
365 struct aio_event_context
*aio_ev
= talloc_get_type(ev
->additional_data
,
366 struct aio_event_context
);
367 struct fd_event
*fde
;
369 fde
= talloc(mem_ctx
?mem_ctx
:ev
, struct fd_event
);
370 if (!fde
) return NULL
;
375 fde
->handler
= handler
;
376 fde
->private_data
= private_data
;
377 fde
->additional_flags
= 0;
378 fde
->additional_data
= NULL
;
380 aio_ev
->num_fd_events
++;
381 talloc_set_destructor(fde
, aio_event_fd_destructor
);
383 epoll_add_event(aio_ev
, fde
);
390 return the fd event flags
392 static uint16_t aio_event_get_fd_flags(struct fd_event
*fde
)
398 set the fd event flags
400 static void aio_event_set_fd_flags(struct fd_event
*fde
, uint16_t flags
)
402 struct event_context
*ev
;
403 struct aio_event_context
*aio_ev
;
405 if (fde
->flags
== flags
) return;
408 aio_ev
= talloc_get_type(ev
->additional_data
, struct aio_event_context
);
412 epoll_change_event(aio_ev
, fde
);
416 do a single event loop using the events defined in ev
418 static int aio_event_loop_once(struct event_context
*ev
)
420 struct aio_event_context
*aio_ev
= talloc_get_type(ev
->additional_data
,
421 struct aio_event_context
);
424 tval
= common_event_loop_delay(ev
);
426 if (timeval_is_zero(&tval
)) {
427 common_event_loop_timer(ev
);
431 return aio_event_loop(aio_ev
, &tval
);
435 return on failure or (with 0) if all fd events are removed
437 static int aio_event_loop_wait(struct event_context
*ev
)
439 struct aio_event_context
*aio_ev
= talloc_get_type(ev
->additional_data
,
440 struct aio_event_context
);
441 while (aio_ev
->num_fd_events
) {
442 if (aio_event_loop_once(ev
) != 0) {
451 called when a disk IO event needs to be cancelled
453 static int aio_destructor(struct aio_event
*ae
)
455 struct event_context
*ev
= ae
->event_ctx
;
456 struct aio_event_context
*aio_ev
= talloc_get_type(ev
->additional_data
,
457 struct aio_event_context
);
458 struct io_event result
;
459 io_cancel(aio_ev
->ioctx
, &ae
->iocb
, &result
);
460 /* TODO: handle errors from io_cancel()! */
464 /* submit an aio disk IO event */
465 static struct aio_event
*aio_event_add_aio(struct event_context
*ev
,
468 event_aio_handler_t handler
,
471 struct aio_event_context
*aio_ev
= talloc_get_type(ev
->additional_data
,
472 struct aio_event_context
);
474 struct aio_event
*ae
= talloc(mem_ctx
?mem_ctx
:ev
, struct aio_event
);
475 if (ae
== NULL
) return NULL
;
479 ae
->handler
= handler
;
480 ae
->private_data
= private_data
;
483 if (io_submit(aio_ev
->ioctx
, 1, &iocbp
) != 1) {
488 talloc_set_destructor(ae
, aio_destructor
);
493 static const struct event_ops aio_event_ops
= {
494 .context_init
= aio_event_context_init
,
495 .add_fd
= aio_event_add_fd
,
496 .add_aio
= aio_event_add_aio
,
497 .get_fd_flags
= aio_event_get_fd_flags
,
498 .set_fd_flags
= aio_event_set_fd_flags
,
499 .add_timed
= common_event_add_timed
,
500 .add_signal
= common_event_add_signal
,
501 .loop_once
= aio_event_loop_once
,
502 .loop_wait
= aio_event_loop_wait
,
505 NTSTATUS
events_aio_init(void)
507 return event_register_backend("aio", &aio_event_ops
);