docs: man regpatch: Add missing meta data.
[Samba/gbeck.git] / source3 / modules / vfs_aio_linux.c
blob4ccdd8fb7ef2372e5f36d732d220cb49600b3da4
1 /*
2 * Simulate Posix AIO using Linux kernel AIO.
4 * Copyright (C) Jeremy Allison 2012
5 * Copyright (C) Volker Lendecke 2012
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
23 #include "system/filesys.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "lib/util/tevent_unix.h"
27 #include <sys/eventfd.h>
28 #include <libaio.h>
30 static int event_fd = -1;
31 static io_context_t io_ctx;
32 static struct fd_event *aio_read_event;
33 static bool used;
34 static unsigned num_busy;
36 static void aio_linux_done(struct event_context *event_ctx,
37 struct fd_event *event,
38 uint16 flags, void *private_data);
40 /************************************************************************
41 Housekeeping. Cleanup if no activity for 30 seconds.
42 ***********************************************************************/
44 static void aio_linux_housekeeping(struct tevent_context *event_ctx,
45 struct tevent_timer *te,
46 struct timeval now,
47 void *private_data)
49 /* Remove this timed event handler. */
50 TALLOC_FREE(te);
52 if ((num_busy != 0) || used) {
53 used = false;
55 /* Still busy. Look again in 30 seconds. */
56 (void)tevent_add_timer(event_ctx,
57 NULL,
58 timeval_current_ofs(30, 0),
59 aio_linux_housekeeping,
60 NULL);
61 return;
64 /* No activity for 30 seconds. Close out kernel resources. */
65 io_queue_release(io_ctx);
66 memset(&io_ctx, '\0', sizeof(io_ctx));
68 if (event_fd != -1) {
69 close(event_fd);
70 event_fd = -1;
73 TALLOC_FREE(aio_read_event);
76 /************************************************************************
77 Ensure event fd and aio context are initialized.
78 ***********************************************************************/
80 static bool init_aio_linux(struct vfs_handle_struct *handle)
82 struct tevent_timer *te = NULL;
84 if (event_fd != -1) {
85 /* Already initialized. */
86 return true;
89 /* Schedule a shutdown event for 30 seconds from now. */
90 te = tevent_add_timer(handle->conn->sconn->ev_ctx,
91 NULL,
92 timeval_current_ofs(30, 0),
93 aio_linux_housekeeping,
94 NULL);
96 if (te == NULL) {
97 goto fail;
100 event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
101 if (event_fd == -1) {
102 goto fail;
105 aio_read_event = tevent_add_fd(server_event_context(),
106 NULL,
107 event_fd,
108 TEVENT_FD_READ,
109 aio_linux_done,
110 NULL);
111 if (aio_read_event == NULL) {
112 goto fail;
115 if (io_queue_init(aio_pending_size, &io_ctx)) {
116 goto fail;
119 DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
120 aio_pending_size));
122 return true;
124 fail:
126 DEBUG(10,("init_aio_linux: initialization failed\n"));
128 TALLOC_FREE(te);
129 TALLOC_FREE(aio_read_event);
130 if (event_fd != -1) {
131 close(event_fd);
132 event_fd = -1;
134 memset(&io_ctx, '\0', sizeof(io_ctx));
135 return false;
138 struct aio_linux_state {
139 struct iocb event_iocb;
140 ssize_t ret;
141 int err;
144 static struct tevent_req *aio_linux_pread_send(
145 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
146 struct tevent_context *ev, struct files_struct *fsp,
147 void *data, size_t n, off_t offset)
149 struct tevent_req *req;
150 struct aio_linux_state *state;
151 struct iocb *piocb;
152 int ret;
154 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
155 if (req == NULL) {
156 return NULL;
158 if (!init_aio_linux(handle)) {
159 tevent_req_error(req, EIO);
160 return tevent_req_post(req, ev);
163 io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset);
164 io_set_eventfd(&state->event_iocb, event_fd);
165 state->event_iocb.data = req;
167 piocb = &state->event_iocb;
169 ret = io_submit(io_ctx, 1, &piocb);
170 if (ret < 0) {
171 tevent_req_error(req, -ret);
172 return tevent_req_post(req, ev);
174 num_busy += 1;
175 used = true;
176 return req;
179 static struct tevent_req *aio_linux_pwrite_send(
180 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
181 struct tevent_context *ev, struct files_struct *fsp,
182 const void *data, size_t n, off_t offset)
184 struct tevent_req *req;
185 struct aio_linux_state *state;
186 struct iocb *piocb;
187 int ret;
189 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
190 if (req == NULL) {
191 return NULL;
193 if (!init_aio_linux(handle)) {
194 tevent_req_error(req, EIO);
195 return tevent_req_post(req, ev);
198 io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data),
199 n, offset);
200 io_set_eventfd(&state->event_iocb, event_fd);
201 state->event_iocb.data = req;
203 piocb = &state->event_iocb;
205 ret = io_submit(io_ctx, 1, &piocb);
206 if (ret < 0) {
207 tevent_req_error(req, -ret);
208 return tevent_req_post(req, ev);
210 num_busy += 1;
211 used = true;
212 return req;
215 static struct tevent_req *aio_linux_fsync_send(
216 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
217 struct tevent_context *ev, struct files_struct *fsp)
219 struct tevent_req *req;
220 struct aio_linux_state *state;
221 struct iocb *piocb;
222 int ret;
224 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
225 if (req == NULL) {
226 return NULL;
228 if (!init_aio_linux(handle)) {
229 tevent_req_error(req, EIO);
230 return tevent_req_post(req, ev);
233 io_prep_fsync(&state->event_iocb, fsp->fh->fd);
234 io_set_eventfd(&state->event_iocb, event_fd);
235 state->event_iocb.data = req;
237 piocb = &state->event_iocb;
239 ret = io_submit(io_ctx, 1, &piocb);
240 if (ret < 0) {
241 tevent_req_error(req, -ret);
242 return tevent_req_post(req, ev);
244 num_busy += 1;
245 used = true;
246 return req;
249 static void aio_linux_done(struct event_context *event_ctx,
250 struct fd_event *event,
251 uint16 flags, void *private_data)
253 uint64_t num_events = 0;
255 DEBUG(10, ("aio_linux_done called with flags=%d\n",
256 (int)flags));
258 /* Read the number of events available. */
259 if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
260 sizeof(num_events)) {
261 smb_panic("aio_linux_handle_completion: invalid read");
264 while (num_events > 0) {
265 struct timespec ts = { 0, };
266 struct io_event finished;
267 struct tevent_req *req;
268 struct aio_linux_state *state;
269 int ret;
271 ret = io_getevents(io_ctx, 1, 1, &finished, &ts);
272 if (ret < 0) {
273 DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
274 strerror(-ret)));
275 return;
277 if (ret == 0) {
278 DEBUG(10, ("aio_linux_done: io_getvents returned "
279 "0\n"));
280 continue;
283 num_busy -= 1;
285 req = talloc_get_type_abort(finished.data,
286 struct tevent_req);
287 state = tevent_req_data(req, struct aio_linux_state);
289 if (finished.res < 0) {
290 state->ret = -1;
291 state->err = -finished.res;
292 } else {
293 state->ret = finished.res;
294 state->err = 0;
296 tevent_req_done(req);
297 num_events -= 1;
301 static ssize_t aio_linux_recv(struct tevent_req *req, int *err)
303 struct aio_linux_state *state = tevent_req_data(
304 req, struct aio_linux_state);
306 if (tevent_req_is_unix_error(req, err)) {
307 return -1;
309 if (state->ret == -1) {
310 *err = state->err;
312 return state->ret;
315 static int aio_linux_int_recv(struct tevent_req *req, int *err)
318 * Use implicit conversion ssize_t->int
320 return aio_linux_recv(req, err);
323 static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
324 const char *user)
326 /*********************************************************************
327 * How many io_events to initialize ?
328 * 128 per process seems insane as a default until you realize that
329 * (a) Throttling is done in SMB2 via the crediting algorithm.
330 * (b) SMB1 clients are limited to max_mux (50) outstanding
331 * requests and Windows clients don't use this anyway.
332 * Essentially we want this to be unlimited unless smb.conf
333 * says different.
334 *********************************************************************/
335 aio_pending_size = lp_parm_int(
336 SNUM(handle->conn), "aio_linux", "aio num events", 128);
337 return SMB_VFS_NEXT_CONNECT(handle, service, user);
340 static struct vfs_fn_pointers vfs_aio_linux_fns = {
341 .connect_fn = aio_linux_connect,
342 .pread_send_fn = aio_linux_pread_send,
343 .pread_recv_fn = aio_linux_recv,
344 .pwrite_send_fn = aio_linux_pwrite_send,
345 .pwrite_recv_fn = aio_linux_recv,
346 .fsync_send_fn = aio_linux_fsync_send,
347 .fsync_recv_fn = aio_linux_int_recv,
350 NTSTATUS vfs_aio_linux_init(void)
352 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
353 "aio_linux", &vfs_aio_linux_fns);