2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "pthreadpool.h"
31 static struct pthreadpool
*pool
;
32 static int aio_pthread_jobid
;
34 struct aio_private_data
{
35 struct aio_private_data
*prev
, *next
;
37 SMB_STRUCT_AIOCB
*aiocb
;
44 /* List of outstanding requests we have. */
45 struct aio_private_data
*pd_list
;
47 static void aio_pthread_handle_completion(struct event_context
*event_ctx
,
48 struct fd_event
*event
,
52 /************************************************************************
53 How many threads to initialize ?
54 ***********************************************************************/
56 static int aio_get_num_threads(void)
58 int num_cores
= sys_get_number_of_cores();
59 DEBUG(10,("aio_get_num_threads: sys_get_number_of_cores "
66 /* Even on a single processor box give a little
68 return MIN(4,num_cores
);
72 /************************************************************************
73 Called every 30 seconds to destroy pool if it's idle.
74 ***********************************************************************/
76 static void idle_pool_destroy_timer(struct tevent_context
*ev
,
77 struct tevent_timer
*te
,
78 struct timeval current_time
,
85 if (pool
&& pd_list
== NULL
) {
86 if (pthreadpool_destroy(pool
) == 0) {
89 DEBUG(10,("idle_pool_destroy_timer: destroyed AIO pool.\n"));
93 /* Here, the IO is still active. */
95 /* Set an event up for 30 seconds time - if we have
96 no outstanding IO at this time shut the threadpool
98 ne
= tevent_timeval_current_ofs(30, 0);
99 tevent_add_timer(server_event_context(),
102 idle_pool_destroy_timer
,
107 /************************************************************************
108 Ensure thread pool is initialized.
109 ***********************************************************************/
111 static bool init_aio_threadpool(void)
113 struct fd_event
*sock_event
= NULL
;
124 num_threads
= aio_get_num_threads();
125 ret
= pthreadpool_init(num_threads
, &pool
);
130 sock_event
= tevent_add_fd(server_event_context(),
132 pthreadpool_signal_fd(pool
),
134 aio_pthread_handle_completion
,
136 if (sock_event
== NULL
) {
137 pthreadpool_destroy(pool
);
143 /* Set an event up for 30 seconds time - if we have
144 no outstanding IO at this time shut the threadpool
146 ne
= tevent_timeval_current_ofs(30, 0);
147 tevent_add_timer(server_event_context(),
150 idle_pool_destroy_timer
,
154 DEBUG(10,("init_aio_threadpool: initialized with %d threads\n",
161 /************************************************************************
162 Worker function - core of the pthread aio engine.
163 This is the function that actually does the IO.
164 ***********************************************************************/
166 static void aio_worker(void *private_data
)
168 struct aio_private_data
*pd
=
169 (struct aio_private_data
*)private_data
;
171 if (pd
->write_command
) {
172 pd
->ret_size
= pwrite(pd
->aiocb
->aio_fildes
,
173 (const void *)pd
->aiocb
->aio_buf
,
174 pd
->aiocb
->aio_nbytes
,
175 pd
->aiocb
->aio_offset
);
177 pd
->ret_size
= pread(pd
->aiocb
->aio_fildes
,
178 (void *)pd
->aiocb
->aio_buf
,
179 pd
->aiocb
->aio_nbytes
,
180 pd
->aiocb
->aio_offset
);
182 if (pd
->ret_size
== -1) {
183 pd
->ret_errno
= errno
;
189 /************************************************************************
190 Private data destructor.
191 ***********************************************************************/
193 static int pd_destructor(struct aio_private_data
*pd
)
195 DLIST_REMOVE(pd_list
, pd
);
199 /************************************************************************
200 Create and initialize a private data struct.
201 ***********************************************************************/
203 static struct aio_private_data
*create_private_data(TALLOC_CTX
*ctx
,
204 SMB_STRUCT_AIOCB
*aiocb
)
206 struct aio_private_data
*pd
= talloc_zero(ctx
, struct aio_private_data
);
210 pd
->jobid
= aio_pthread_jobid
++;
213 pd
->ret_errno
= EINPROGRESS
;
214 talloc_set_destructor(pd
, pd_destructor
);
215 DLIST_ADD_END(pd_list
, pd
, struct aio_private_data
*);
219 /************************************************************************
220 Spin off a threadpool (if needed) and initiate a pread call.
221 ***********************************************************************/
223 static int aio_pthread_read(struct vfs_handle_struct
*handle
,
224 struct files_struct
*fsp
,
225 SMB_STRUCT_AIOCB
*aiocb
)
227 struct aio_extra
*aio_ex
= (struct aio_extra
*)aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
228 struct aio_private_data
*pd
= NULL
;
231 if (!init_aio_threadpool()) {
235 pd
= create_private_data(aio_ex
, aiocb
);
237 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
241 ret
= pthreadpool_add_job(pool
, pd
->jobid
, aio_worker
, (void *)pd
);
247 DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
248 "of %llu bytes at offset %llu\n",
250 (unsigned long long)pd
->aiocb
->aio_nbytes
,
251 (unsigned long long)pd
->aiocb
->aio_offset
));
256 /************************************************************************
257 Spin off a threadpool (if needed) and initiate a pwrite call.
258 ***********************************************************************/
260 static int aio_pthread_write(struct vfs_handle_struct
*handle
,
261 struct files_struct
*fsp
,
262 SMB_STRUCT_AIOCB
*aiocb
)
264 struct aio_extra
*aio_ex
= (struct aio_extra
*)aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
265 struct aio_private_data
*pd
= NULL
;
268 if (!init_aio_threadpool()) {
272 pd
= create_private_data(aio_ex
, aiocb
);
274 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
278 pd
->write_command
= true;
280 ret
= pthreadpool_add_job(pool
, pd
->jobid
, aio_worker
, (void *)pd
);
286 DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
287 "of %llu bytes at offset %llu\n",
289 (unsigned long long)pd
->aiocb
->aio_nbytes
,
290 (unsigned long long)pd
->aiocb
->aio_offset
));
295 /************************************************************************
296 Find the private data by jobid.
297 ***********************************************************************/
299 static struct aio_private_data
*find_private_data_by_jobid(int jobid
)
301 struct aio_private_data
*pd
;
303 for (pd
= pd_list
; pd
!= NULL
; pd
= pd
->next
) {
304 if (pd
->jobid
== jobid
) {
312 /************************************************************************
313 Callback when an IO completes.
314 ***********************************************************************/
316 static void aio_pthread_handle_completion(struct event_context
*event_ctx
,
317 struct fd_event
*event
,
321 struct aio_extra
*aio_ex
= NULL
;
322 struct aio_private_data
*pd
= NULL
;
326 DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
329 if ((flags
& EVENT_FD_READ
) == 0) {
333 ret
= pthreadpool_finished_job(pool
, &jobid
);
335 smb_panic("aio_pthread_handle_completion");
339 pd
= find_private_data_by_jobid(jobid
);
341 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
346 aio_ex
= (struct aio_extra
*)pd
->aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
347 smbd_aio_complete_aio_ex(aio_ex
);
349 DEBUG(10,("aio_pthread_handle_completion: jobid %d completed\n",
354 /************************************************************************
355 Find the private data by aiocb.
356 ***********************************************************************/
358 static struct aio_private_data
*find_private_data_by_aiocb(SMB_STRUCT_AIOCB
*aiocb
)
360 struct aio_private_data
*pd
;
362 for (pd
= pd_list
; pd
!= NULL
; pd
= pd
->next
) {
363 if (pd
->aiocb
== aiocb
) {
371 /************************************************************************
372 Called to return the result of a completed AIO.
373 Should only be called if aio_error returns something other than EINPROGRESS.
375 Any other value - return from IO operation.
376 ***********************************************************************/
378 static ssize_t
aio_pthread_return_fn(struct vfs_handle_struct
*handle
,
379 struct files_struct
*fsp
,
380 SMB_STRUCT_AIOCB
*aiocb
)
382 struct aio_private_data
*pd
= find_private_data_by_aiocb(aiocb
);
386 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
392 if (pd
->ret_size
== -1) {
393 errno
= pd
->ret_errno
;
399 /************************************************************************
400 Called to check the result of an AIO.
402 EINPROGRESS - still in progress.
403 EINVAL - invalid aiocb.
404 ECANCELED - request was cancelled.
405 0 - request completed successfully.
406 Any other value - errno from IO operation.
407 ***********************************************************************/
409 static int aio_pthread_error_fn(struct vfs_handle_struct
*handle
,
410 struct files_struct
*fsp
,
411 SMB_STRUCT_AIOCB
*aiocb
)
413 struct aio_private_data
*pd
= find_private_data_by_aiocb(aiocb
);
421 return pd
->ret_errno
;
424 /************************************************************************
425 Called to request the cancel of an AIO, or all of them on a specific
426 fsp if aiocb == NULL.
427 ***********************************************************************/
429 static int aio_pthread_cancel(struct vfs_handle_struct
*handle
,
430 struct files_struct
*fsp
,
431 SMB_STRUCT_AIOCB
*aiocb
)
433 struct aio_private_data
*pd
= NULL
;
435 for (pd
= pd_list
; pd
!= NULL
; pd
= pd
->next
) {
436 if (pd
->aiocb
== NULL
) {
439 if (pd
->aiocb
->aio_fildes
!= fsp
->fh
->fd
) {
442 if ((aiocb
!= NULL
) && (pd
->aiocb
!= aiocb
)) {
447 * We let the child do its job, but we discard the result when
451 pd
->cancelled
= true;
457 /************************************************************************
458 Callback for a previously detected job completion.
459 ***********************************************************************/
461 static void aio_pthread_handle_immediate(struct tevent_context
*ctx
,
462 struct tevent_immediate
*im
,
465 struct aio_extra
*aio_ex
= NULL
;
466 int *pjobid
= (int *)private_data
;
467 struct aio_private_data
*pd
= find_private_data_by_jobid(*pjobid
);
470 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
477 aio_ex
= (struct aio_extra
*)pd
->aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
478 smbd_aio_complete_aio_ex(aio_ex
);
481 /************************************************************************
482 Private data struct used in suspend completion code.
483 ***********************************************************************/
485 struct suspend_private
{
488 const SMB_STRUCT_AIOCB
* const *aiocb_array
;
491 /************************************************************************
492 Callback when an IO completes from a suspend call.
493 ***********************************************************************/
495 static void aio_pthread_handle_suspend_completion(struct event_context
*event_ctx
,
496 struct fd_event
*event
,
500 struct suspend_private
*sp
= (struct suspend_private
*)p
;
501 struct aio_private_data
*pd
= NULL
;
502 struct tevent_immediate
*im
= NULL
;
506 DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
509 if ((flags
& EVENT_FD_READ
) == 0) {
513 pjobid
= talloc_array(NULL
, int, 1);
515 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
518 if (pthreadpool_finished_job(pool
, pjobid
)) {
519 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
523 pd
= find_private_data_by_jobid(*pjobid
);
525 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
531 /* Is this a jobid with an aiocb we're interested in ? */
532 for (i
= 0; i
< sp
->num_entries
; i
++) {
533 if (sp
->aiocb_array
[i
] == pd
->aiocb
) {
540 /* Jobid completed we weren't waiting for.
541 We must reshedule this as an immediate event
542 on the main event context. */
543 im
= tevent_create_immediate(NULL
);
545 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
548 DEBUG(10,("aio_pthread_handle_suspend_completion: "
549 "re-scheduling job id %d\n",
552 tevent_schedule_immediate(im
,
553 server_event_context(),
554 aio_pthread_handle_immediate
,
559 static void aio_pthread_suspend_timed_out(struct tevent_context
*event_ctx
,
560 struct tevent_timer
*te
,
564 bool *timed_out
= (bool *)private_data
;
565 /* Remove this timed event handler. */
570 /************************************************************************
571 Called to request everything to stop until all IO is completed.
572 ***********************************************************************/
574 static int aio_pthread_suspend(struct vfs_handle_struct
*handle
,
575 struct files_struct
*fsp
,
576 const SMB_STRUCT_AIOCB
* const aiocb_array
[],
578 const struct timespec
*timeout
)
580 struct event_context
*ev
= NULL
;
581 struct fd_event
*sock_event
= NULL
;
583 struct suspend_private sp
;
584 bool timed_out
= false;
585 TALLOC_CTX
*frame
= talloc_stackframe();
587 /* This is a blocking call, and has to use a sub-event loop. */
588 ev
= event_context_init(frame
);
595 struct timeval tv
= convert_timespec_to_timeval(*timeout
);
596 struct tevent_timer
*te
= tevent_add_timer(ev
,
598 timeval_current_ofs(tv
.tv_sec
,
600 aio_pthread_suspend_timed_out
,
610 sp
.aiocb_array
= aiocb_array
;
613 sock_event
= tevent_add_fd(ev
,
615 pthreadpool_signal_fd(pool
),
617 aio_pthread_handle_suspend_completion
,
619 if (sock_event
== NULL
) {
620 pthreadpool_destroy(pool
);
625 * We're going to cheat here. We know that smbd/aio.c
626 * only calls this when it's waiting for every single
627 * outstanding call to finish on a close, so just wait
628 * individually for each IO to complete. We don't care
629 * what order they finish - only that they all do. JRA.
631 while (sp
.num_entries
!= sp
.num_finished
) {
632 if (tevent_loop_once(ev
) == -1) {
650 static struct vfs_fn_pointers vfs_aio_pthread_fns
= {
651 .aio_read_fn
= aio_pthread_read
,
652 .aio_write_fn
= aio_pthread_write
,
653 .aio_return_fn
= aio_pthread_return_fn
,
654 .aio_cancel_fn
= aio_pthread_cancel
,
655 .aio_error_fn
= aio_pthread_error_fn
,
656 .aio_suspend_fn
= aio_pthread_suspend
,
659 NTSTATUS
vfs_aio_pthread_init(void);
660 NTSTATUS
vfs_aio_pthread_init(void)
662 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION
,
663 "aio_pthread", &vfs_aio_pthread_fns
);