2 * Simulate Posix AIO using Linux kernel AIO.
4 * Copyright (C) Jeremy Allison 2012
5 * Copyright (C) Volker Lendecke 2012
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include "system/filesys.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "lib/util/tevent_unix.h"
27 #include "lib/sys_rw.h"
28 #include <sys/eventfd.h>
31 static int event_fd
= -1;
32 static io_context_t io_ctx
;
33 static struct tevent_fd
*aio_read_event
;
35 static unsigned num_busy
;
37 static void aio_linux_done(struct tevent_context
*event_ctx
,
38 struct tevent_fd
*event
,
39 uint16_t flags
, void *private_data
);
41 /************************************************************************
42 Housekeeping. Cleanup if no activity for 30 seconds.
43 ***********************************************************************/
45 static void aio_linux_housekeeping(struct tevent_context
*event_ctx
,
46 struct tevent_timer
*te
,
50 /* Remove this timed event handler. */
53 if ((num_busy
!= 0) || used
) {
56 /* Still busy. Look again in 30 seconds. */
57 (void)tevent_add_timer(event_ctx
,
59 timeval_current_ofs(30, 0),
60 aio_linux_housekeeping
,
65 /* No activity for 30 seconds. Close out kernel resources. */
66 io_queue_release(io_ctx
);
67 memset(&io_ctx
, '\0', sizeof(io_ctx
));
74 TALLOC_FREE(aio_read_event
);
77 /************************************************************************
78 Ensure event fd and aio context are initialized.
79 ***********************************************************************/
81 static bool init_aio_linux(struct vfs_handle_struct
*handle
)
83 struct tevent_timer
*te
= NULL
;
86 /* Already initialized. */
90 /* Schedule a shutdown event for 30 seconds from now. */
91 te
= tevent_add_timer(handle
->conn
->sconn
->ev_ctx
,
93 timeval_current_ofs(30, 0),
94 aio_linux_housekeeping
,
101 event_fd
= eventfd(0, EFD_NONBLOCK
| EFD_CLOEXEC
);
102 if (event_fd
== -1) {
106 aio_read_event
= tevent_add_fd(server_event_context(),
112 if (aio_read_event
== NULL
) {
116 if (io_queue_init(aio_pending_size
, &io_ctx
)) {
120 DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
127 DEBUG(10,("init_aio_linux: initialization failed\n"));
130 TALLOC_FREE(aio_read_event
);
131 if (event_fd
!= -1) {
135 memset(&io_ctx
, '\0', sizeof(io_ctx
));
139 struct aio_linux_state
{
140 struct iocb event_iocb
;
145 static struct tevent_req
*aio_linux_pread_send(
146 struct vfs_handle_struct
*handle
, TALLOC_CTX
*mem_ctx
,
147 struct tevent_context
*ev
, struct files_struct
*fsp
,
148 void *data
, size_t n
, off_t offset
)
150 struct tevent_req
*req
;
151 struct aio_linux_state
*state
;
155 req
= tevent_req_create(mem_ctx
, &state
, struct aio_linux_state
);
159 if (!init_aio_linux(handle
)) {
160 tevent_req_error(req
, EIO
);
161 return tevent_req_post(req
, ev
);
164 io_prep_pread(&state
->event_iocb
, fsp
->fh
->fd
, data
, n
, offset
);
165 io_set_eventfd(&state
->event_iocb
, event_fd
);
166 state
->event_iocb
.data
= req
;
168 piocb
= &state
->event_iocb
;
170 ret
= io_submit(io_ctx
, 1, &piocb
);
172 tevent_req_error(req
, -ret
);
173 return tevent_req_post(req
, ev
);
180 static struct tevent_req
*aio_linux_pwrite_send(
181 struct vfs_handle_struct
*handle
, TALLOC_CTX
*mem_ctx
,
182 struct tevent_context
*ev
, struct files_struct
*fsp
,
183 const void *data
, size_t n
, off_t offset
)
185 struct tevent_req
*req
;
186 struct aio_linux_state
*state
;
190 req
= tevent_req_create(mem_ctx
, &state
, struct aio_linux_state
);
194 if (!init_aio_linux(handle
)) {
195 tevent_req_error(req
, EIO
);
196 return tevent_req_post(req
, ev
);
199 io_prep_pwrite(&state
->event_iocb
, fsp
->fh
->fd
, discard_const(data
),
201 io_set_eventfd(&state
->event_iocb
, event_fd
);
202 state
->event_iocb
.data
= req
;
204 piocb
= &state
->event_iocb
;
206 ret
= io_submit(io_ctx
, 1, &piocb
);
208 tevent_req_error(req
, -ret
);
209 return tevent_req_post(req
, ev
);
216 static struct tevent_req
*aio_linux_fsync_send(
217 struct vfs_handle_struct
*handle
, TALLOC_CTX
*mem_ctx
,
218 struct tevent_context
*ev
, struct files_struct
*fsp
)
220 struct tevent_req
*req
;
221 struct aio_linux_state
*state
;
225 req
= tevent_req_create(mem_ctx
, &state
, struct aio_linux_state
);
229 if (!init_aio_linux(handle
)) {
230 tevent_req_error(req
, EIO
);
231 return tevent_req_post(req
, ev
);
234 io_prep_fsync(&state
->event_iocb
, fsp
->fh
->fd
);
235 io_set_eventfd(&state
->event_iocb
, event_fd
);
236 state
->event_iocb
.data
= req
;
238 piocb
= &state
->event_iocb
;
240 ret
= io_submit(io_ctx
, 1, &piocb
);
242 tevent_req_error(req
, -ret
);
243 return tevent_req_post(req
, ev
);
250 static void aio_linux_done(struct tevent_context
*event_ctx
,
251 struct tevent_fd
*event
,
252 uint16_t flags
, void *private_data
)
254 uint64_t num_events
= 0;
256 DEBUG(10, ("aio_linux_done called with flags=%d\n",
259 /* Read the number of events available. */
260 if (sys_read(event_fd
, &num_events
, sizeof(num_events
)) !=
261 sizeof(num_events
)) {
262 smb_panic("aio_linux_handle_completion: invalid read");
265 while (num_events
> 0) {
266 struct timespec ts
= { 0, };
267 struct io_event finished
;
268 struct tevent_req
*req
;
269 struct aio_linux_state
*state
;
272 ret
= io_getevents(io_ctx
, 1, 1, &finished
, &ts
);
274 DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
279 DEBUG(10, ("aio_linux_done: io_getvents returned "
286 req
= talloc_get_type_abort(finished
.data
,
288 state
= tevent_req_data(req
, struct aio_linux_state
);
290 if (finished
.res
< 0) {
292 state
->err
= -finished
.res
;
294 state
->ret
= finished
.res
;
297 tevent_req_done(req
);
302 static ssize_t
aio_linux_recv(struct tevent_req
*req
, int *err
)
304 struct aio_linux_state
*state
= tevent_req_data(
305 req
, struct aio_linux_state
);
307 if (tevent_req_is_unix_error(req
, err
)) {
310 if (state
->ret
== -1) {
316 static int aio_linux_int_recv(struct tevent_req
*req
, int *err
)
319 * Use implicit conversion ssize_t->int
321 return aio_linux_recv(req
, err
);
324 static int aio_linux_connect(vfs_handle_struct
*handle
, const char *service
,
327 /*********************************************************************
328 * How many io_events to initialize ?
329 * 128 per process seems insane as a default until you realize that
330 * (a) Throttling is done in SMB2 via the crediting algorithm.
331 * (b) SMB1 clients are limited to max_mux (50) outstanding
332 * requests and Windows clients don't use this anyway.
333 * Essentially we want this to be unlimited unless smb.conf
335 *********************************************************************/
336 aio_pending_size
= lp_parm_int(
337 SNUM(handle
->conn
), "aio_linux", "aio num events", 128);
338 return SMB_VFS_NEXT_CONNECT(handle
, service
, user
);
341 static struct vfs_fn_pointers vfs_aio_linux_fns
= {
342 .connect_fn
= aio_linux_connect
,
343 .pread_send_fn
= aio_linux_pread_send
,
344 .pread_recv_fn
= aio_linux_recv
,
345 .pwrite_send_fn
= aio_linux_pwrite_send
,
346 .pwrite_recv_fn
= aio_linux_recv
,
347 .fsync_send_fn
= aio_linux_fsync_send
,
348 .fsync_recv_fn
= aio_linux_int_recv
,
351 NTSTATUS
vfs_aio_linux_init(void)
353 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION
,
354 "aio_linux", &vfs_aio_linux_fns
);