ctdb: Accept the key in hex format for the pstore command
[Samba.git] / source3 / modules / vfs_aio_linux.c
blob5b515d8a52d9660431e3531d585240948aee3208
1 /*
2 * Simulate Posix AIO using Linux kernel AIO.
4 * Copyright (C) Jeremy Allison 2012
5 * Copyright (C) Volker Lendecke 2012
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 #include "includes.h"
23 #include "system/filesys.h"
24 #include "smbd/smbd.h"
25 #include "smbd/globals.h"
26 #include "lib/util/tevent_unix.h"
27 #include "lib/sys_rw.h"
28 #include <sys/eventfd.h>
29 #include <libaio.h>
31 static int event_fd = -1;
32 static io_context_t io_ctx;
33 static struct tevent_fd *aio_read_event;
34 static bool used;
35 static unsigned num_busy;
37 static void aio_linux_done(struct tevent_context *event_ctx,
38 struct tevent_fd *event,
39 uint16_t flags, void *private_data);
41 /************************************************************************
42 Housekeeping. Cleanup if no activity for 30 seconds.
43 ***********************************************************************/
45 static void aio_linux_housekeeping(struct tevent_context *event_ctx,
46 struct tevent_timer *te,
47 struct timeval now,
48 void *private_data)
50 /* Remove this timed event handler. */
51 TALLOC_FREE(te);
53 if ((num_busy != 0) || used) {
54 used = false;
56 /* Still busy. Look again in 30 seconds. */
57 (void)tevent_add_timer(event_ctx,
58 NULL,
59 timeval_current_ofs(30, 0),
60 aio_linux_housekeeping,
61 NULL);
62 return;
65 /* No activity for 30 seconds. Close out kernel resources. */
66 io_queue_release(io_ctx);
67 memset(&io_ctx, '\0', sizeof(io_ctx));
69 if (event_fd != -1) {
70 close(event_fd);
71 event_fd = -1;
74 TALLOC_FREE(aio_read_event);
77 /************************************************************************
78 Ensure event fd and aio context are initialized.
79 ***********************************************************************/
81 static bool init_aio_linux(struct vfs_handle_struct *handle)
83 struct tevent_timer *te = NULL;
85 if (event_fd != -1) {
86 /* Already initialized. */
87 return true;
90 /* Schedule a shutdown event for 30 seconds from now. */
91 te = tevent_add_timer(handle->conn->sconn->ev_ctx,
92 NULL,
93 timeval_current_ofs(30, 0),
94 aio_linux_housekeeping,
95 NULL);
97 if (te == NULL) {
98 goto fail;
101 event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
102 if (event_fd == -1) {
103 goto fail;
106 aio_read_event = tevent_add_fd(server_event_context(),
107 NULL,
108 event_fd,
109 TEVENT_FD_READ,
110 aio_linux_done,
111 NULL);
112 if (aio_read_event == NULL) {
113 goto fail;
116 if (io_queue_init(aio_pending_size, &io_ctx)) {
117 goto fail;
120 DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
121 aio_pending_size));
123 return true;
125 fail:
127 DEBUG(10,("init_aio_linux: initialization failed\n"));
129 TALLOC_FREE(te);
130 TALLOC_FREE(aio_read_event);
131 if (event_fd != -1) {
132 close(event_fd);
133 event_fd = -1;
135 memset(&io_ctx, '\0', sizeof(io_ctx));
136 return false;
139 struct aio_linux_state {
140 struct iocb event_iocb;
141 ssize_t ret;
142 int err;
145 static struct tevent_req *aio_linux_pread_send(
146 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
147 struct tevent_context *ev, struct files_struct *fsp,
148 void *data, size_t n, off_t offset)
150 struct tevent_req *req;
151 struct aio_linux_state *state;
152 struct iocb *piocb;
153 int ret;
155 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
156 if (req == NULL) {
157 return NULL;
159 if (!init_aio_linux(handle)) {
160 tevent_req_error(req, EIO);
161 return tevent_req_post(req, ev);
164 io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset);
165 io_set_eventfd(&state->event_iocb, event_fd);
166 state->event_iocb.data = req;
168 piocb = &state->event_iocb;
170 ret = io_submit(io_ctx, 1, &piocb);
171 if (ret < 0) {
172 tevent_req_error(req, -ret);
173 return tevent_req_post(req, ev);
175 num_busy += 1;
176 used = true;
177 return req;
180 static struct tevent_req *aio_linux_pwrite_send(
181 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
182 struct tevent_context *ev, struct files_struct *fsp,
183 const void *data, size_t n, off_t offset)
185 struct tevent_req *req;
186 struct aio_linux_state *state;
187 struct iocb *piocb;
188 int ret;
190 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
191 if (req == NULL) {
192 return NULL;
194 if (!init_aio_linux(handle)) {
195 tevent_req_error(req, EIO);
196 return tevent_req_post(req, ev);
199 io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data),
200 n, offset);
201 io_set_eventfd(&state->event_iocb, event_fd);
202 state->event_iocb.data = req;
204 piocb = &state->event_iocb;
206 ret = io_submit(io_ctx, 1, &piocb);
207 if (ret < 0) {
208 tevent_req_error(req, -ret);
209 return tevent_req_post(req, ev);
211 num_busy += 1;
212 used = true;
213 return req;
216 static struct tevent_req *aio_linux_fsync_send(
217 struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
218 struct tevent_context *ev, struct files_struct *fsp)
220 struct tevent_req *req;
221 struct aio_linux_state *state;
222 struct iocb *piocb;
223 int ret;
225 req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
226 if (req == NULL) {
227 return NULL;
229 if (!init_aio_linux(handle)) {
230 tevent_req_error(req, EIO);
231 return tevent_req_post(req, ev);
234 io_prep_fsync(&state->event_iocb, fsp->fh->fd);
235 io_set_eventfd(&state->event_iocb, event_fd);
236 state->event_iocb.data = req;
238 piocb = &state->event_iocb;
240 ret = io_submit(io_ctx, 1, &piocb);
241 if (ret < 0) {
242 tevent_req_error(req, -ret);
243 return tevent_req_post(req, ev);
245 num_busy += 1;
246 used = true;
247 return req;
250 static void aio_linux_done(struct tevent_context *event_ctx,
251 struct tevent_fd *event,
252 uint16_t flags, void *private_data)
254 uint64_t num_events = 0;
256 DEBUG(10, ("aio_linux_done called with flags=%d\n",
257 (int)flags));
259 /* Read the number of events available. */
260 if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
261 sizeof(num_events)) {
262 smb_panic("aio_linux_handle_completion: invalid read");
265 while (num_events > 0) {
266 struct timespec ts = { 0, };
267 struct io_event finished;
268 struct tevent_req *req;
269 struct aio_linux_state *state;
270 int ret;
272 ret = io_getevents(io_ctx, 1, 1, &finished, &ts);
273 if (ret < 0) {
274 DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
275 strerror(-ret)));
276 return;
278 if (ret == 0) {
279 DEBUG(10, ("aio_linux_done: io_getvents returned "
280 "0\n"));
281 continue;
284 num_busy -= 1;
286 req = talloc_get_type_abort(finished.data,
287 struct tevent_req);
288 state = tevent_req_data(req, struct aio_linux_state);
290 if (finished.res < 0) {
291 state->ret = -1;
292 state->err = -finished.res;
293 } else {
294 state->ret = finished.res;
295 state->err = 0;
297 tevent_req_done(req);
298 num_events -= 1;
302 static ssize_t aio_linux_recv(struct tevent_req *req, int *err)
304 struct aio_linux_state *state = tevent_req_data(
305 req, struct aio_linux_state);
307 if (tevent_req_is_unix_error(req, err)) {
308 return -1;
310 if (state->ret == -1) {
311 *err = state->err;
313 return state->ret;
316 static int aio_linux_int_recv(struct tevent_req *req, int *err)
319 * Use implicit conversion ssize_t->int
321 return aio_linux_recv(req, err);
324 static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
325 const char *user)
327 /*********************************************************************
328 * How many io_events to initialize ?
329 * 128 per process seems insane as a default until you realize that
330 * (a) Throttling is done in SMB2 via the crediting algorithm.
331 * (b) SMB1 clients are limited to max_mux (50) outstanding
332 * requests and Windows clients don't use this anyway.
333 * Essentially we want this to be unlimited unless smb.conf
334 * says different.
335 *********************************************************************/
336 aio_pending_size = lp_parm_int(
337 SNUM(handle->conn), "aio_linux", "aio num events", 128);
338 return SMB_VFS_NEXT_CONNECT(handle, service, user);
341 static struct vfs_fn_pointers vfs_aio_linux_fns = {
342 .connect_fn = aio_linux_connect,
343 .pread_send_fn = aio_linux_pread_send,
344 .pread_recv_fn = aio_linux_recv,
345 .pwrite_send_fn = aio_linux_pwrite_send,
346 .pwrite_recv_fn = aio_linux_recv,
347 .fsync_send_fn = aio_linux_fsync_send,
348 .fsync_recv_fn = aio_linux_int_recv,
351 NTSTATUS vfs_aio_linux_init(void)
353 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
354 "aio_linux", &vfs_aio_linux_fns);