2 * Simulate Posix AIO using Linux kernel AIO.
4 * Copyright (C) Jeremy Allison 2012
5 * Copyright (C) Volker Lendecke 2012
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "system/filesys.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "lib/util/tevent_unix.h"
27 #include "lib/util/sys_rw.h"
28 #include <sys/eventfd.h>
30 #include "smbprofile.h"
32 static int event_fd
= -1;
33 static io_context_t io_ctx
;
34 static struct tevent_fd
*aio_read_event
;
36 static unsigned num_busy
;
38 static void aio_linux_done(struct tevent_context
*event_ctx
,
39 struct tevent_fd
*event
,
40 uint16_t flags
, void *private_data
);
42 /************************************************************************
43 Housekeeping. Cleanup if no activity for 30 seconds.
44 ***********************************************************************/
46 static void aio_linux_housekeeping(struct tevent_context
*event_ctx
,
47 struct tevent_timer
*te
,
51 /* Remove this timed event handler. */
54 if ((num_busy
!= 0) || used
) {
57 /* Still busy. Look again in 30 seconds. */
58 (void)tevent_add_timer(event_ctx
,
60 timeval_current_ofs(30, 0),
61 aio_linux_housekeeping
,
66 /* No activity for 30 seconds. Close out kernel resources. */
67 io_queue_release(io_ctx
);
68 memset(&io_ctx
, '\0', sizeof(io_ctx
));
75 TALLOC_FREE(aio_read_event
);
78 /************************************************************************
79 Ensure event fd and aio context are initialized.
80 ***********************************************************************/
82 static bool init_aio_linux(struct vfs_handle_struct
*handle
)
84 struct tevent_timer
*te
= NULL
;
87 /* Already initialized. */
91 /* Schedule a shutdown event for 30 seconds from now. */
92 te
= tevent_add_timer(handle
->conn
->sconn
->ev_ctx
,
94 timeval_current_ofs(30, 0),
95 aio_linux_housekeeping
,
102 event_fd
= eventfd(0, EFD_NONBLOCK
| EFD_CLOEXEC
);
103 if (event_fd
== -1) {
107 aio_read_event
= tevent_add_fd(server_event_context(),
113 if (aio_read_event
== NULL
) {
117 if (io_queue_init(lp_aio_max_threads(), &io_ctx
)) {
121 DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
122 (int)lp_aio_max_threads()));
128 DEBUG(10,("init_aio_linux: initialization failed\n"));
131 TALLOC_FREE(aio_read_event
);
132 if (event_fd
!= -1) {
136 memset(&io_ctx
, '\0', sizeof(io_ctx
));
140 struct aio_linux_state
{
141 struct iocb event_iocb
;
143 struct vfs_aio_state vfs_aio_state
;
144 struct timespec start
;
147 static struct tevent_req
*aio_linux_pread_send(
148 struct vfs_handle_struct
*handle
, TALLOC_CTX
*mem_ctx
,
149 struct tevent_context
*ev
, struct files_struct
*fsp
,
150 void *data
, size_t n
, off_t offset
)
152 struct tevent_req
*req
;
153 struct aio_linux_state
*state
;
157 req
= tevent_req_create(mem_ctx
, &state
, struct aio_linux_state
);
161 if (!init_aio_linux(handle
)) {
162 tevent_req_error(req
, EIO
);
163 return tevent_req_post(req
, ev
);
166 io_prep_pread(&state
->event_iocb
, fsp
->fh
->fd
, data
, n
, offset
);
167 io_set_eventfd(&state
->event_iocb
, event_fd
);
168 state
->event_iocb
.data
= req
;
170 piocb
= &state
->event_iocb
;
172 PROFILE_TIMESTAMP(&state
->start
);
173 ret
= io_submit(io_ctx
, 1, &piocb
);
175 tevent_req_error(req
, -ret
);
176 return tevent_req_post(req
, ev
);
183 static struct tevent_req
*aio_linux_pwrite_send(
184 struct vfs_handle_struct
*handle
, TALLOC_CTX
*mem_ctx
,
185 struct tevent_context
*ev
, struct files_struct
*fsp
,
186 const void *data
, size_t n
, off_t offset
)
188 struct tevent_req
*req
;
189 struct aio_linux_state
*state
;
193 req
= tevent_req_create(mem_ctx
, &state
, struct aio_linux_state
);
197 if (!init_aio_linux(handle
)) {
198 tevent_req_error(req
, EIO
);
199 return tevent_req_post(req
, ev
);
202 io_prep_pwrite(&state
->event_iocb
, fsp
->fh
->fd
, discard_const(data
),
204 io_set_eventfd(&state
->event_iocb
, event_fd
);
205 state
->event_iocb
.data
= req
;
207 piocb
= &state
->event_iocb
;
209 PROFILE_TIMESTAMP(&state
->start
);
210 ret
= io_submit(io_ctx
, 1, &piocb
);
212 tevent_req_error(req
, -ret
);
213 return tevent_req_post(req
, ev
);
220 static struct tevent_req
*aio_linux_fsync_send(
221 struct vfs_handle_struct
*handle
, TALLOC_CTX
*mem_ctx
,
222 struct tevent_context
*ev
, struct files_struct
*fsp
)
224 struct tevent_req
*req
;
225 struct aio_linux_state
*state
;
229 req
= tevent_req_create(mem_ctx
, &state
, struct aio_linux_state
);
233 if (!init_aio_linux(handle
)) {
234 tevent_req_error(req
, EIO
);
235 return tevent_req_post(req
, ev
);
238 io_prep_fsync(&state
->event_iocb
, fsp
->fh
->fd
);
239 io_set_eventfd(&state
->event_iocb
, event_fd
);
240 state
->event_iocb
.data
= req
;
242 piocb
= &state
->event_iocb
;
244 PROFILE_TIMESTAMP(&state
->start
);
245 ret
= io_submit(io_ctx
, 1, &piocb
);
247 tevent_req_error(req
, -ret
);
248 return tevent_req_post(req
, ev
);
255 static void aio_linux_done(struct tevent_context
*event_ctx
,
256 struct tevent_fd
*event
,
257 uint16_t flags
, void *private_data
)
259 uint64_t num_events
= 0;
262 DEBUG(10, ("aio_linux_done called with flags=%d\n",
265 PROFILE_TIMESTAMP(&end
);
267 /* Read the number of events available. */
268 if (sys_read(event_fd
, &num_events
, sizeof(num_events
)) !=
269 sizeof(num_events
)) {
270 smb_panic("aio_linux_handle_completion: invalid read");
273 while (num_events
> 0) {
274 struct timespec ts
= { 0, };
275 struct io_event finished
;
276 struct tevent_req
*req
;
277 struct aio_linux_state
*state
;
280 ret
= io_getevents(io_ctx
, 1, 1, &finished
, &ts
);
282 DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
287 DEBUG(10, ("aio_linux_done: io_getvents returned "
294 req
= talloc_get_type_abort(finished
.data
,
296 state
= tevent_req_data(req
, struct aio_linux_state
);
298 if (finished
.res
< 0) {
300 state
->vfs_aio_state
.error
= -finished
.res
;
302 state
->ret
= finished
.res
;
304 state
->vfs_aio_state
.duration
= nsec_time_diff(&end
, &state
->start
);
305 tevent_req_done(req
);
310 static ssize_t
aio_linux_recv(struct tevent_req
*req
,
311 struct vfs_aio_state
*vfs_aio_state
)
313 struct aio_linux_state
*state
= tevent_req_data(
314 req
, struct aio_linux_state
);
316 if (tevent_req_is_unix_error(req
, &vfs_aio_state
->error
)) {
319 *vfs_aio_state
= state
->vfs_aio_state
;
323 static int aio_linux_int_recv(struct tevent_req
*req
,
324 struct vfs_aio_state
*vfs_aio_state
)
327 * Use implicit conversion ssize_t->int
329 return aio_linux_recv(req
, vfs_aio_state
);
332 static struct vfs_fn_pointers vfs_aio_linux_fns
= {
333 .pread_send_fn
= aio_linux_pread_send
,
334 .pread_recv_fn
= aio_linux_recv
,
335 .pwrite_send_fn
= aio_linux_pwrite_send
,
336 .pwrite_recv_fn
= aio_linux_recv
,
337 .fsync_send_fn
= aio_linux_fsync_send
,
338 .fsync_recv_fn
= aio_linux_int_recv
,
342 NTSTATUS
vfs_aio_linux_init(TALLOC_CTX
*ctx
)
344 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION
,
345 "aio_linux", &vfs_aio_linux_fns
);