selftest:Samba4: report when samba is started and ready
[Samba.git] / source3 / modules / vfs_aio_pthread.c
blob577180b6b01e9f02b2d4fddaa55b5557ef5a1f28
1 /*
2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "smbd/globals.h"
29 #include "../lib/pthreadpool/pthreadpool_tevent.h"
30 #ifdef HAVE_LINUX_FALLOC_H
31 #include <linux/falloc.h>
32 #endif
34 #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
37 * We must have openat() to do any thread-based
38 * asynchronous opens. We also must be using
39 * thread-specific credentials (Linux-only
40 * for now).
43 struct aio_open_private_data {
44 struct aio_open_private_data *prev, *next;
45 /* Inputs. */
46 int dir_fd;
47 int flags;
48 mode_t mode;
49 uint64_t mid;
50 bool in_progress;
51 const char *fname;
52 char *dname;
53 connection_struct *conn;
54 const struct security_unix_token *ux_tok;
55 uint64_t initial_allocation_size;
56 /* Returns. */
57 int ret_fd;
58 int ret_errno;
61 /* List of outstanding requests we have. */
62 static struct aio_open_private_data *open_pd_list;
64 static void aio_open_do(struct aio_open_private_data *opd);
66 /************************************************************************
67 Find the open private data by mid.
68 ***********************************************************************/
70 static struct aio_open_private_data *find_open_private_data_by_mid(uint64_t mid)
72 struct aio_open_private_data *opd;
74 for (opd = open_pd_list; opd != NULL; opd = opd->next) {
75 if (opd->mid == mid) {
76 return opd;
80 return NULL;
83 /************************************************************************
84 Callback when an open completes.
85 ***********************************************************************/
87 static void aio_open_handle_completion(struct tevent_req *subreq)
89 struct aio_open_private_data *opd =
90 tevent_req_callback_data(subreq,
91 struct aio_open_private_data);
92 int ret;
93 struct smbXsrv_connection *xconn;
95 ret = pthreadpool_tevent_job_recv(subreq);
96 TALLOC_FREE(subreq);
97 if (ret != 0) {
98 bool ok;
100 if (ret != EAGAIN) {
101 smb_panic("aio_open_handle_completion");
102 /* notreached. */
103 return;
106 * Make sure we run as the user again
108 ok = change_to_user(opd->conn, opd->conn->vuid);
109 if (!ok) {
110 smb_panic("Can't change to user");
111 return;
114 * If we get EAGAIN from pthreadpool_tevent_job_recv() this
115 * means the lower level pthreadpool failed to create a new
116 * thread. Fallback to sync processing in that case to allow
117 * some progress for the client.
119 aio_open_do(opd);
122 DEBUG(10,("aio_open_handle_completion: mid %llu "
123 "for file %s/%s completed\n",
124 (unsigned long long)opd->mid,
125 opd->dname,
126 opd->fname));
128 opd->in_progress = false;
131 * TODO: In future we need a proper algorithm
132 * to find the correct connection for a fsp.
133 * For now we only have one connection, so this is correct...
135 xconn = opd->conn->sconn->client->connections;
137 /* Find outstanding event and reschedule. */
138 if (!schedule_deferred_open_message_smb(xconn, opd->mid)) {
140 * Outstanding event didn't exist or was
141 * cancelled. Free up the fd and throw
142 * away the result.
144 if (opd->ret_fd != -1) {
145 close(opd->ret_fd);
146 opd->ret_fd = -1;
148 TALLOC_FREE(opd);
152 /*****************************************************************
153 The core of the async open code - the worker function. Note we
154 use the new openat() system call to avoid any problems with
155 current working directory changes plus we change credentials
156 on the thread to prevent any security race conditions.
157 *****************************************************************/
159 static void aio_open_worker(void *private_data)
161 struct aio_open_private_data *opd =
162 (struct aio_open_private_data *)private_data;
164 /* Become the correct credential on this thread. */
165 if (set_thread_credentials(opd->ux_tok->uid,
166 opd->ux_tok->gid,
167 (size_t)opd->ux_tok->ngroups,
168 opd->ux_tok->groups) != 0) {
169 opd->ret_fd = -1;
170 opd->ret_errno = errno;
171 return;
174 aio_open_do(opd);
177 static void aio_open_do(struct aio_open_private_data *opd)
179 opd->ret_fd = openat(opd->dir_fd,
180 opd->fname,
181 opd->flags,
182 opd->mode);
184 if (opd->ret_fd == -1) {
185 opd->ret_errno = errno;
186 } else {
187 /* Create was successful. */
188 opd->ret_errno = 0;
190 #if defined(HAVE_LINUX_FALLOCATE)
192 * See if we can set the initial
193 * allocation size. We don't record
194 * the return for this as it's an
195 * optimization - the upper layer
196 * will also do this for us once
197 * the open returns.
199 if (opd->initial_allocation_size) {
200 (void)fallocate(opd->ret_fd,
201 FALLOC_FL_KEEP_SIZE,
203 (off_t)opd->initial_allocation_size);
205 #endif
209 /************************************************************************
210 Open private data destructor.
211 ***********************************************************************/
213 static int opd_destructor(struct aio_open_private_data *opd)
215 if (opd->dir_fd != -1) {
216 close(opd->dir_fd);
218 DLIST_REMOVE(open_pd_list, opd);
219 return 0;
222 /************************************************************************
223 Create and initialize a private data struct for async open.
224 ***********************************************************************/
226 static struct aio_open_private_data *create_private_open_data(const files_struct *fsp,
227 int flags,
228 mode_t mode)
230 struct aio_open_private_data *opd = talloc_zero(NULL,
231 struct aio_open_private_data);
232 const char *fname = NULL;
234 if (!opd) {
235 return NULL;
238 *opd = (struct aio_open_private_data) {
239 .dir_fd = -1,
240 .ret_fd = -1,
241 .ret_errno = EINPROGRESS,
242 .flags = flags,
243 .mode = mode,
244 .mid = fsp->mid,
245 .in_progress = true,
246 .conn = fsp->conn,
247 .initial_allocation_size = fsp->initial_allocation_size,
250 /* Copy our current credentials. */
251 opd->ux_tok = copy_unix_token(opd, get_current_utok(fsp->conn));
252 if (opd->ux_tok == NULL) {
253 TALLOC_FREE(opd);
254 return NULL;
258 * Copy the parent directory name and the
259 * relative path within it.
261 if (parent_dirname(opd,
262 fsp->fsp_name->base_name,
263 &opd->dname,
264 &fname) == false) {
265 TALLOC_FREE(opd);
266 return NULL;
268 opd->fname = talloc_strdup(opd, fname);
269 if (opd->fname == NULL) {
270 TALLOC_FREE(opd);
271 return NULL;
274 #if defined(O_DIRECTORY)
275 opd->dir_fd = open(opd->dname, O_RDONLY|O_DIRECTORY);
276 #else
277 opd->dir_fd = open(opd->dname, O_RDONLY);
278 #endif
279 if (opd->dir_fd == -1) {
280 TALLOC_FREE(opd);
281 return NULL;
284 talloc_set_destructor(opd, opd_destructor);
285 DLIST_ADD_END(open_pd_list, opd);
286 return opd;
289 /*****************************************************************
290 Setup an async open.
291 *****************************************************************/
293 static int open_async(const files_struct *fsp,
294 int flags,
295 mode_t mode)
297 struct aio_open_private_data *opd = NULL;
298 struct tevent_req *subreq = NULL;
300 opd = create_private_open_data(fsp, flags, mode);
301 if (opd == NULL) {
302 DEBUG(10, ("open_async: Could not create private data.\n"));
303 return -1;
306 subreq = pthreadpool_tevent_job_send(opd,
307 fsp->conn->sconn->ev_ctx,
308 fsp->conn->sconn->pool,
309 aio_open_worker, opd);
310 if (subreq == NULL) {
311 return -1;
313 tevent_req_set_callback(subreq, aio_open_handle_completion, opd);
315 DEBUG(5,("open_async: mid %llu created for file %s/%s\n",
316 (unsigned long long)opd->mid,
317 opd->dname,
318 opd->fname));
320 /* Cause the calling code to reschedule us. */
321 errno = EINTR; /* Maps to NT_STATUS_RETRY. */
322 return -1;
325 /*****************************************************************
326 Look for a matching SMB2 mid. If we find it we're rescheduled,
327 just return the completed open.
328 *****************************************************************/
330 static bool find_completed_open(files_struct *fsp,
331 int *p_fd,
332 int *p_errno)
334 struct aio_open_private_data *opd;
336 opd = find_open_private_data_by_mid(fsp->mid);
337 if (!opd) {
338 return false;
341 if (opd->in_progress) {
342 DEBUG(0,("find_completed_open: mid %llu "
343 "still in progress for "
344 "file %s/%s. PANIC !\n",
345 (unsigned long long)opd->mid,
346 opd->dname,
347 opd->fname));
348 /* Disaster ! This is an open timeout. Just panic. */
349 smb_panic("find_completed_open - in_progress\n");
350 /* notreached. */
351 return false;
354 *p_fd = opd->ret_fd;
355 *p_errno = opd->ret_errno;
357 DEBUG(5,("find_completed_open: mid %llu returning "
358 "fd = %d, errno = %d (%s) "
359 "for file %s\n",
360 (unsigned long long)opd->mid,
361 opd->ret_fd,
362 opd->ret_errno,
363 strerror(opd->ret_errno),
364 smb_fname_str_dbg(fsp->fsp_name)));
366 /* Now we can free the opd. */
367 TALLOC_FREE(opd);
368 return true;
371 /*****************************************************************
372 The core open function. Only go async on O_CREAT|O_EXCL
373 opens to prevent any race conditions.
374 *****************************************************************/
376 static int aio_pthread_open_fn(vfs_handle_struct *handle,
377 struct smb_filename *smb_fname,
378 files_struct *fsp,
379 int flags,
380 mode_t mode)
382 int my_errno = 0;
383 int fd = -1;
384 bool aio_allow_open = lp_parm_bool(
385 SNUM(handle->conn), "aio_pthread", "aio open", false);
387 if (smb_fname->stream_name) {
388 /* Don't handle stream opens. */
389 errno = ENOENT;
390 return -1;
393 if (!aio_allow_open) {
394 /* aio opens turned off. */
395 return open(smb_fname->base_name, flags, mode);
398 if (!(flags & O_CREAT)) {
399 /* Only creates matter. */
400 return open(smb_fname->base_name, flags, mode);
403 if (!(flags & O_EXCL)) {
404 /* Only creates with O_EXCL matter. */
405 return open(smb_fname->base_name, flags, mode);
409 * See if this is a reentrant call - i.e. is this a
410 * restart of an existing open that just completed.
413 if (find_completed_open(fsp,
414 &fd,
415 &my_errno)) {
416 errno = my_errno;
417 return fd;
420 /* Ok, it's a create exclusive call - pass it to a thread helper. */
421 return open_async(fsp, flags, mode);
423 #endif
425 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
426 #if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS)
427 .open_fn = aio_pthread_open_fn,
428 #endif
431 static_decl_vfs;
432 NTSTATUS vfs_aio_pthread_init(TALLOC_CTX *ctx)
434 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
435 "aio_pthread", &vfs_aio_pthread_fns);