Add a sys_get_number_of_cores() function that calls sysconf or sysctl
[Samba/gebeck_regimport.git] / source3 / modules / vfs_aio_pthread.c
blob9217b69e80f03ffbe385e4ae83bb06ff5a668b0d
1 /*
2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "pthreadpool.h"
30 struct aio_extra;
31 static struct pthreadpool *pool;
32 static int aio_pthread_jobid;
34 struct aio_private_data {
35 struct aio_private_data *prev, *next;
36 int jobid;
37 SMB_STRUCT_AIOCB *aiocb;
38 ssize_t ret_size;
39 int ret_errno;
40 bool cancelled;
41 bool write_command;
44 /* List of outstanding requests we have. */
45 struct aio_private_data *pd_list;
47 static void aio_pthread_handle_completion(struct event_context *event_ctx,
48 struct fd_event *event,
49 uint16 flags,
50 void *p);
52 /************************************************************************
53 How many threads to initialize ?
54 ***********************************************************************/
56 static int aio_get_num_threads(void)
58 int num_cores = sys_get_number_of_cores();
59 DEBUG(10,("aio_get_num_threads: sys_get_number_of_cores "
60 "returned %d\n",
61 num_cores));
62 num_cores *= 2;
63 if (num_cores < 1) {
64 num_cores = 1;
66 /* Even on a single processor box give a little
67 concurrency. */
68 return MIN(4,num_cores);
71 #if 0
72 /************************************************************************
73 Called every 30 seconds to destroy pool if it's idle.
74 ***********************************************************************/
76 static void idle_pool_destroy_timer(struct tevent_context *ev,
77 struct tevent_timer *te,
78 struct timeval current_time,
79 void *private_data)
81 struct timeval ne;
83 TALLOC_FREE(te);
85 if (pool && pd_list == NULL) {
86 if (pthreadpool_destroy(pool) == 0) {
87 pool = NULL;
89 DEBUG(10,("idle_pool_destroy_timer: destroyed AIO pool.\n"));
90 return;
93 /* Here, the IO is still active. */
95 /* Set an event up for 30 seconds time - if we have
96 no outstanding IO at this time shut the threadpool
97 down. */
98 ne = tevent_timeval_current_ofs(30, 0);
99 tevent_add_timer(server_event_context(),
100 NULL,
102 idle_pool_destroy_timer,
103 NULL);
105 #endif
107 /************************************************************************
108 Ensure thread pool is initialized.
109 ***********************************************************************/
111 static bool init_aio_threadpool(void)
113 struct fd_event *sock_event = NULL;
114 int ret = 0;
115 int num_threads;
116 #if 0
117 struct timeval ne;
118 #endif
120 if (pool) {
121 return true;
124 num_threads = aio_get_num_threads();
125 ret = pthreadpool_init(num_threads, &pool);
126 if (ret) {
127 errno = ret;
128 return false;
130 sock_event = tevent_add_fd(server_event_context(),
131 NULL,
132 pthreadpool_signal_fd(pool),
133 TEVENT_FD_READ,
134 aio_pthread_handle_completion,
135 NULL);
136 if (sock_event == NULL) {
137 pthreadpool_destroy(pool);
138 pool = NULL;
139 return false;
142 #if 0
143 /* Set an event up for 30 seconds time - if we have
144 no outstanding IO at this time shut the threadpool
145 down. */
146 ne = tevent_timeval_current_ofs(30, 0);
147 tevent_add_timer(server_event_context(),
148 NULL,
150 idle_pool_destroy_timer,
151 NULL);
152 #endif
154 DEBUG(10,("init_aio_threadpool: initialized with %d threads\n",
155 num_threads));
157 return true;
161 /************************************************************************
162 Worker function - core of the pthread aio engine.
163 This is the function that actually does the IO.
164 ***********************************************************************/
166 static void aio_worker(void *private_data)
168 struct aio_private_data *pd =
169 (struct aio_private_data *)private_data;
171 if (pd->write_command) {
172 pd->ret_size = pwrite(pd->aiocb->aio_fildes,
173 (const void *)pd->aiocb->aio_buf,
174 pd->aiocb->aio_nbytes,
175 pd->aiocb->aio_offset);
176 } else {
177 pd->ret_size = pread(pd->aiocb->aio_fildes,
178 (void *)pd->aiocb->aio_buf,
179 pd->aiocb->aio_nbytes,
180 pd->aiocb->aio_offset);
182 if (pd->ret_size == -1) {
183 pd->ret_errno = errno;
184 } else {
185 pd->ret_errno = 0;
189 /************************************************************************
190 Private data destructor.
191 ***********************************************************************/
193 static int pd_destructor(struct aio_private_data *pd)
195 DLIST_REMOVE(pd_list, pd);
196 return 0;
199 /************************************************************************
200 Create and initialize a private data struct.
201 ***********************************************************************/
203 static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
204 SMB_STRUCT_AIOCB *aiocb)
206 struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
207 if (!pd) {
208 return NULL;
210 pd->jobid = aio_pthread_jobid++;
211 pd->aiocb = aiocb;
212 pd->ret_size = -1;
213 pd->ret_errno = EINPROGRESS;
214 talloc_set_destructor(pd, pd_destructor);
215 DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
216 return pd;
219 /************************************************************************
220 Spin off a threadpool (if needed) and initiate a pread call.
221 ***********************************************************************/
223 static int aio_pthread_read(struct vfs_handle_struct *handle,
224 struct files_struct *fsp,
225 SMB_STRUCT_AIOCB *aiocb)
227 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
228 struct aio_private_data *pd = NULL;
229 int ret;
231 if (!init_aio_threadpool()) {
232 return -1;
235 pd = create_private_data(aio_ex, aiocb);
236 if (pd == NULL) {
237 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
238 return -1;
241 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
242 if (ret) {
243 errno = ret;
244 return -1;
247 DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
248 "of %llu bytes at offset %llu\n",
249 pd->jobid,
250 (unsigned long long)pd->aiocb->aio_nbytes,
251 (unsigned long long)pd->aiocb->aio_offset));
253 return 0;
256 /************************************************************************
257 Spin off a threadpool (if needed) and initiate a pwrite call.
258 ***********************************************************************/
260 static int aio_pthread_write(struct vfs_handle_struct *handle,
261 struct files_struct *fsp,
262 SMB_STRUCT_AIOCB *aiocb)
264 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
265 struct aio_private_data *pd = NULL;
266 int ret;
268 if (!init_aio_threadpool()) {
269 return -1;
272 pd = create_private_data(aio_ex, aiocb);
273 if (pd == NULL) {
274 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
275 return -1;
278 pd->write_command = true;
280 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
281 if (ret) {
282 errno = ret;
283 return -1;
286 DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
287 "of %llu bytes at offset %llu\n",
288 pd->jobid,
289 (unsigned long long)pd->aiocb->aio_nbytes,
290 (unsigned long long)pd->aiocb->aio_offset));
292 return 0;
295 /************************************************************************
296 Find the private data by jobid.
297 ***********************************************************************/
299 static struct aio_private_data *find_private_data_by_jobid(int jobid)
301 struct aio_private_data *pd;
303 for (pd = pd_list; pd != NULL; pd = pd->next) {
304 if (pd->jobid == jobid) {
305 return pd;
309 return NULL;
312 /************************************************************************
313 Callback when an IO completes.
314 ***********************************************************************/
316 static void aio_pthread_handle_completion(struct event_context *event_ctx,
317 struct fd_event *event,
318 uint16 flags,
319 void *p)
321 struct aio_extra *aio_ex = NULL;
322 struct aio_private_data *pd = NULL;
323 int jobid = 0;
324 int ret;
326 DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
327 (int)flags));
329 if ((flags & EVENT_FD_READ) == 0) {
330 return;
333 ret = pthreadpool_finished_job(pool, &jobid);
334 if (ret) {
335 smb_panic("aio_pthread_handle_completion");
336 return;
339 pd = find_private_data_by_jobid(jobid);
340 if (pd == NULL) {
341 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
342 jobid));
343 return;
346 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
347 smbd_aio_complete_aio_ex(aio_ex);
349 DEBUG(10,("aio_pthread_handle_completion: jobid %d completed\n",
350 jobid ));
354 /************************************************************************
355 Find the private data by aiocb.
356 ***********************************************************************/
358 static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
360 struct aio_private_data *pd;
362 for (pd = pd_list; pd != NULL; pd = pd->next) {
363 if (pd->aiocb == aiocb) {
364 return pd;
368 return NULL;
371 /************************************************************************
372 Called to return the result of a completed AIO.
373 Should only be called if aio_error returns something other than EINPROGRESS.
374 Returns:
375 Any other value - return from IO operation.
376 ***********************************************************************/
378 static ssize_t aio_pthread_return_fn(struct vfs_handle_struct *handle,
379 struct files_struct *fsp,
380 SMB_STRUCT_AIOCB *aiocb)
382 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
384 if (pd == NULL) {
385 errno = EINVAL;
386 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
387 return -1;
390 pd->aiocb = NULL;
392 if (pd->ret_size == -1) {
393 errno = pd->ret_errno;
396 return pd->ret_size;
399 /************************************************************************
400 Called to check the result of an AIO.
401 Returns:
402 EINPROGRESS - still in progress.
403 EINVAL - invalid aiocb.
404 ECANCELED - request was cancelled.
405 0 - request completed successfully.
406 Any other value - errno from IO operation.
407 ***********************************************************************/
409 static int aio_pthread_error_fn(struct vfs_handle_struct *handle,
410 struct files_struct *fsp,
411 SMB_STRUCT_AIOCB *aiocb)
413 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
415 if (pd == NULL) {
416 return EINVAL;
418 if (pd->cancelled) {
419 return ECANCELED;
421 return pd->ret_errno;
424 /************************************************************************
425 Called to request the cancel of an AIO, or all of them on a specific
426 fsp if aiocb == NULL.
427 ***********************************************************************/
429 static int aio_pthread_cancel(struct vfs_handle_struct *handle,
430 struct files_struct *fsp,
431 SMB_STRUCT_AIOCB *aiocb)
433 struct aio_private_data *pd = NULL;
435 for (pd = pd_list; pd != NULL; pd = pd->next) {
436 if (pd->aiocb == NULL) {
437 continue;
439 if (pd->aiocb->aio_fildes != fsp->fh->fd) {
440 continue;
442 if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
443 continue;
447 * We let the child do its job, but we discard the result when
448 * it's finished.
451 pd->cancelled = true;
454 return AIO_CANCELED;
457 /************************************************************************
458 Callback for a previously detected job completion.
459 ***********************************************************************/
461 static void aio_pthread_handle_immediate(struct tevent_context *ctx,
462 struct tevent_immediate *im,
463 void *private_data)
465 struct aio_extra *aio_ex = NULL;
466 int *pjobid = (int *)private_data;
467 struct aio_private_data *pd = find_private_data_by_jobid(*pjobid);
469 if (pd == NULL) {
470 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
471 *pjobid));
472 TALLOC_FREE(pjobid);
473 return;
476 TALLOC_FREE(pjobid);
477 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
478 smbd_aio_complete_aio_ex(aio_ex);
481 /************************************************************************
482 Private data struct used in suspend completion code.
483 ***********************************************************************/
485 struct suspend_private {
486 int num_entries;
487 int num_finished;
488 const SMB_STRUCT_AIOCB * const *aiocb_array;
491 /************************************************************************
492 Callback when an IO completes from a suspend call.
493 ***********************************************************************/
495 static void aio_pthread_handle_suspend_completion(struct event_context *event_ctx,
496 struct fd_event *event,
497 uint16 flags,
498 void *p)
500 struct suspend_private *sp = (struct suspend_private *)p;
501 struct aio_private_data *pd = NULL;
502 struct tevent_immediate *im = NULL;
503 int *pjobid = NULL;
504 int i;
506 DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
507 (int)flags));
509 if ((flags & EVENT_FD_READ) == 0) {
510 return;
513 pjobid = talloc_array(NULL, int, 1);
514 if (pjobid) {
515 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
518 if (pthreadpool_finished_job(pool, pjobid)) {
519 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
520 return;
523 pd = find_private_data_by_jobid(*pjobid);
524 if (pd == NULL) {
525 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
526 *pjobid));
527 TALLOC_FREE(pjobid);
528 return;
531 /* Is this a jobid with an aiocb we're interested in ? */
532 for (i = 0; i < sp->num_entries; i++) {
533 if (sp->aiocb_array[i] == pd->aiocb) {
534 sp->num_finished++;
535 TALLOC_FREE(pjobid);
536 return;
540 /* Jobid completed we weren't waiting for.
541 We must reshedule this as an immediate event
542 on the main event context. */
543 im = tevent_create_immediate(NULL);
544 if (!im) {
545 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
548 DEBUG(10,("aio_pthread_handle_suspend_completion: "
549 "re-scheduling job id %d\n",
550 *pjobid));
552 tevent_schedule_immediate(im,
553 server_event_context(),
554 aio_pthread_handle_immediate,
555 (void *)pjobid);
559 static void aio_pthread_suspend_timed_out(struct tevent_context *event_ctx,
560 struct tevent_timer *te,
561 struct timeval now,
562 void *private_data)
564 bool *timed_out = (bool *)private_data;
565 /* Remove this timed event handler. */
566 TALLOC_FREE(te);
567 *timed_out = true;
570 /************************************************************************
571 Called to request everything to stop until all IO is completed.
572 ***********************************************************************/
574 static int aio_pthread_suspend(struct vfs_handle_struct *handle,
575 struct files_struct *fsp,
576 const SMB_STRUCT_AIOCB * const aiocb_array[],
577 int n,
578 const struct timespec *timeout)
580 struct event_context *ev = NULL;
581 struct fd_event *sock_event = NULL;
582 int ret = -1;
583 struct suspend_private sp;
584 bool timed_out = false;
585 TALLOC_CTX *frame = talloc_stackframe();
587 /* This is a blocking call, and has to use a sub-event loop. */
588 ev = event_context_init(frame);
589 if (ev == NULL) {
590 errno = ENOMEM;
591 goto out;
594 if (timeout) {
595 struct timeval tv = convert_timespec_to_timeval(*timeout);
596 struct tevent_timer *te = tevent_add_timer(ev,
597 frame,
598 timeval_current_ofs(tv.tv_sec,
599 tv.tv_usec),
600 aio_pthread_suspend_timed_out,
601 &timed_out);
602 if (!te) {
603 errno = ENOMEM;
604 goto out;
608 ZERO_STRUCT(sp);
609 sp.num_entries = n;
610 sp.aiocb_array = aiocb_array;
611 sp.num_finished = 0;
613 sock_event = tevent_add_fd(ev,
614 frame,
615 pthreadpool_signal_fd(pool),
616 TEVENT_FD_READ,
617 aio_pthread_handle_suspend_completion,
618 (void *)&sp);
619 if (sock_event == NULL) {
620 pthreadpool_destroy(pool);
621 pool = NULL;
622 goto out;
625 * We're going to cheat here. We know that smbd/aio.c
626 * only calls this when it's waiting for every single
627 * outstanding call to finish on a close, so just wait
628 * individually for each IO to complete. We don't care
629 * what order they finish - only that they all do. JRA.
631 while (sp.num_entries != sp.num_finished) {
632 if (tevent_loop_once(ev) == -1) {
633 goto out;
636 if (timed_out) {
637 errno = EAGAIN;
638 goto out;
642 ret = 0;
644 out:
646 TALLOC_FREE(frame);
647 return ret;
650 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
651 .aio_read_fn = aio_pthread_read,
652 .aio_write_fn = aio_pthread_write,
653 .aio_return_fn = aio_pthread_return_fn,
654 .aio_cancel_fn = aio_pthread_cancel,
655 .aio_error_fn = aio_pthread_error_fn,
656 .aio_suspend_fn = aio_pthread_suspend,
659 NTSTATUS vfs_aio_pthread_init(void);
660 NTSTATUS vfs_aio_pthread_init(void)
662 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
663 "aio_pthread", &vfs_aio_pthread_fns);