2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "lib/pthreadpool/pthreadpool.h"
31 static struct pthreadpool
*pool
;
32 static int aio_pthread_jobid
;
34 struct aio_private_data
{
35 struct aio_private_data
*prev
, *next
;
37 SMB_STRUCT_AIOCB
*aiocb
;
44 /* List of outstanding requests we have. */
45 static struct aio_private_data
*pd_list
;
47 static void aio_pthread_handle_completion(struct event_context
*event_ctx
,
48 struct fd_event
*event
,
52 /************************************************************************
53 How many threads to initialize ?
54 100 per process seems insane as a default until you realize that
55 (a) Threads terminate after 1 second when idle.
56 (b) Throttling is done in SMB2 via the crediting algorithm.
57 (c) SMB1 clients are limited to max_mux (50) outstanding requests and
58 Windows clients don't use this anyway.
59 Essentially we want this to be unlimited unless smb.conf says different.
60 ***********************************************************************/
62 static int aio_get_num_threads(struct vfs_handle_struct
*handle
)
64 return lp_parm_int(SNUM(handle
->conn
),
65 "aio_pthread", "aio num threads", 100);
68 /************************************************************************
69 Ensure thread pool is initialized.
70 ***********************************************************************/
72 static bool init_aio_threadpool(struct vfs_handle_struct
*handle
)
74 struct fd_event
*sock_event
= NULL
;
83 num_threads
= aio_get_num_threads(handle
);
84 ret
= pthreadpool_init(num_threads
, &pool
);
90 fd
= pthreadpool_signal_fd(pool
);
92 set_blocking(fd
, false);
94 sock_event
= tevent_add_fd(server_event_context(),
98 aio_pthread_handle_completion
,
100 if (sock_event
== NULL
) {
101 pthreadpool_destroy(pool
);
106 DEBUG(10,("init_aio_threadpool: initialized with up to %d threads\n",
113 /************************************************************************
114 Worker function - core of the pthread aio engine.
115 This is the function that actually does the IO.
116 ***********************************************************************/
118 static void aio_worker(void *private_data
)
120 struct aio_private_data
*pd
=
121 (struct aio_private_data
*)private_data
;
123 if (pd
->write_command
) {
124 pd
->ret_size
= sys_pwrite(pd
->aiocb
->aio_fildes
,
125 (const void *)pd
->aiocb
->aio_buf
,
126 pd
->aiocb
->aio_nbytes
,
127 pd
->aiocb
->aio_offset
);
128 if (pd
->ret_size
== -1 && errno
== ESPIPE
) {
129 /* Maintain the fiction that pipes can
130 be seeked (sought?) on. */
131 pd
->ret_size
= sys_write(pd
->aiocb
->aio_fildes
,
132 (const void *)pd
->aiocb
->aio_buf
,
133 pd
->aiocb
->aio_nbytes
);
136 pd
->ret_size
= sys_pread(pd
->aiocb
->aio_fildes
,
137 (void *)pd
->aiocb
->aio_buf
,
138 pd
->aiocb
->aio_nbytes
,
139 pd
->aiocb
->aio_offset
);
140 if (pd
->ret_size
== -1 && errno
== ESPIPE
) {
141 /* Maintain the fiction that pipes can
142 be seeked (sought?) on. */
143 pd
->ret_size
= sys_read(pd
->aiocb
->aio_fildes
,
144 (void *)pd
->aiocb
->aio_buf
,
145 pd
->aiocb
->aio_nbytes
);
148 if (pd
->ret_size
== -1) {
149 pd
->ret_errno
= errno
;
155 /************************************************************************
156 Private data destructor.
157 ***********************************************************************/
159 static int pd_destructor(struct aio_private_data
*pd
)
161 DLIST_REMOVE(pd_list
, pd
);
165 /************************************************************************
166 Create and initialize a private data struct.
167 ***********************************************************************/
169 static struct aio_private_data
*create_private_data(TALLOC_CTX
*ctx
,
170 SMB_STRUCT_AIOCB
*aiocb
)
172 struct aio_private_data
*pd
= talloc_zero(ctx
, struct aio_private_data
);
176 pd
->jobid
= aio_pthread_jobid
++;
179 pd
->ret_errno
= EINPROGRESS
;
180 talloc_set_destructor(pd
, pd_destructor
);
181 DLIST_ADD_END(pd_list
, pd
, struct aio_private_data
*);
185 /************************************************************************
186 Spin off a threadpool (if needed) and initiate a pread call.
187 ***********************************************************************/
189 static int aio_pthread_read(struct vfs_handle_struct
*handle
,
190 struct files_struct
*fsp
,
191 SMB_STRUCT_AIOCB
*aiocb
)
193 struct aio_extra
*aio_ex
= (struct aio_extra
*)aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
194 struct aio_private_data
*pd
= NULL
;
197 if (!init_aio_threadpool(handle
)) {
201 pd
= create_private_data(aio_ex
, aiocb
);
203 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
207 ret
= pthreadpool_add_job(pool
, pd
->jobid
, aio_worker
, (void *)pd
);
213 DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
214 "of %llu bytes at offset %llu\n",
216 (unsigned long long)pd
->aiocb
->aio_nbytes
,
217 (unsigned long long)pd
->aiocb
->aio_offset
));
222 /************************************************************************
223 Spin off a threadpool (if needed) and initiate a pwrite call.
224 ***********************************************************************/
226 static int aio_pthread_write(struct vfs_handle_struct
*handle
,
227 struct files_struct
*fsp
,
228 SMB_STRUCT_AIOCB
*aiocb
)
230 struct aio_extra
*aio_ex
= (struct aio_extra
*)aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
231 struct aio_private_data
*pd
= NULL
;
234 if (!init_aio_threadpool(handle
)) {
238 pd
= create_private_data(aio_ex
, aiocb
);
240 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
244 pd
->write_command
= true;
246 ret
= pthreadpool_add_job(pool
, pd
->jobid
, aio_worker
, (void *)pd
);
252 DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
253 "of %llu bytes at offset %llu\n",
255 (unsigned long long)pd
->aiocb
->aio_nbytes
,
256 (unsigned long long)pd
->aiocb
->aio_offset
));
261 /************************************************************************
262 Find the private data by jobid.
263 ***********************************************************************/
265 static struct aio_private_data
*find_private_data_by_jobid(int jobid
)
267 struct aio_private_data
*pd
;
269 for (pd
= pd_list
; pd
!= NULL
; pd
= pd
->next
) {
270 if (pd
->jobid
== jobid
) {
278 /************************************************************************
279 Callback when an IO completes.
280 ***********************************************************************/
282 static void aio_pthread_handle_completion(struct event_context
*event_ctx
,
283 struct fd_event
*event
,
287 struct aio_extra
*aio_ex
= NULL
;
288 struct aio_private_data
*pd
= NULL
;
292 DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
295 if ((flags
& EVENT_FD_READ
) == 0) {
300 ret
= pthreadpool_finished_job(pool
, &jobid
);
302 if (ret
== EINTR
|| ret
== EAGAIN
) {
306 if (ret
== EWOULDBLOCK
) {
311 if (ret
== ECANCELED
) {
316 smb_panic("aio_pthread_handle_completion");
320 pd
= find_private_data_by_jobid(jobid
);
322 DEBUG(1, ("aio_pthread_handle_completion cannot find "
323 "jobid %d\n", jobid
));
327 aio_ex
= (struct aio_extra
*)
328 pd
->aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
330 smbd_aio_complete_aio_ex(aio_ex
);
332 DEBUG(10,("aio_pthread_handle_completion: jobid %d "
333 "completed\n", jobid
));
338 /************************************************************************
339 Find the private data by aiocb.
340 ***********************************************************************/
342 static struct aio_private_data
*find_private_data_by_aiocb(SMB_STRUCT_AIOCB
*aiocb
)
344 struct aio_private_data
*pd
;
346 for (pd
= pd_list
; pd
!= NULL
; pd
= pd
->next
) {
347 if (pd
->aiocb
== aiocb
) {
355 /************************************************************************
356 Called to return the result of a completed AIO.
357 Should only be called if aio_error returns something other than EINPROGRESS.
359 Any other value - return from IO operation.
360 ***********************************************************************/
362 static ssize_t
aio_pthread_return_fn(struct vfs_handle_struct
*handle
,
363 struct files_struct
*fsp
,
364 SMB_STRUCT_AIOCB
*aiocb
)
366 struct aio_private_data
*pd
= find_private_data_by_aiocb(aiocb
);
370 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
376 if (pd
->ret_size
== -1) {
377 errno
= pd
->ret_errno
;
383 /************************************************************************
384 Called to check the result of an AIO.
386 EINPROGRESS - still in progress.
387 EINVAL - invalid aiocb.
388 ECANCELED - request was cancelled.
389 0 - request completed successfully.
390 Any other value - errno from IO operation.
391 ***********************************************************************/
393 static int aio_pthread_error_fn(struct vfs_handle_struct
*handle
,
394 struct files_struct
*fsp
,
395 SMB_STRUCT_AIOCB
*aiocb
)
397 struct aio_private_data
*pd
= find_private_data_by_aiocb(aiocb
);
405 return pd
->ret_errno
;
408 /************************************************************************
409 Called to request the cancel of an AIO, or all of them on a specific
410 fsp if aiocb == NULL.
411 ***********************************************************************/
413 static int aio_pthread_cancel(struct vfs_handle_struct
*handle
,
414 struct files_struct
*fsp
,
415 SMB_STRUCT_AIOCB
*aiocb
)
417 struct aio_private_data
*pd
= NULL
;
419 for (pd
= pd_list
; pd
!= NULL
; pd
= pd
->next
) {
420 if (pd
->aiocb
== NULL
) {
423 if (pd
->aiocb
->aio_fildes
!= fsp
->fh
->fd
) {
426 if ((aiocb
!= NULL
) && (pd
->aiocb
!= aiocb
)) {
431 * We let the child do its job, but we discard the result when
435 pd
->cancelled
= true;
441 /************************************************************************
442 Callback for a previously detected job completion.
443 ***********************************************************************/
445 static void aio_pthread_handle_immediate(struct tevent_context
*ctx
,
446 struct tevent_immediate
*im
,
449 struct aio_extra
*aio_ex
= NULL
;
450 int *pjobid
= (int *)private_data
;
451 struct aio_private_data
*pd
= find_private_data_by_jobid(*pjobid
);
454 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
461 aio_ex
= (struct aio_extra
*)pd
->aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
462 smbd_aio_complete_aio_ex(aio_ex
);
466 /************************************************************************
467 Private data struct used in suspend completion code.
468 ***********************************************************************/
470 struct suspend_private
{
473 const SMB_STRUCT_AIOCB
* const *aiocb_array
;
476 /************************************************************************
477 Callback when an IO completes from a suspend call.
478 ***********************************************************************/
480 static void aio_pthread_handle_suspend_completion(struct event_context
*event_ctx
,
481 struct fd_event
*event
,
485 struct suspend_private
*sp
= (struct suspend_private
*)p
;
486 struct aio_private_data
*pd
= NULL
;
487 struct tevent_immediate
*im
= NULL
;
491 DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
494 if ((flags
& EVENT_FD_READ
) == 0) {
498 pjobid
= talloc_array(NULL
, int, 1);
499 if (pjobid
== NULL
) {
500 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
503 if (pthreadpool_finished_job(pool
, pjobid
)) {
504 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
508 pd
= find_private_data_by_jobid(*pjobid
);
510 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
516 /* Is this a jobid with an aiocb we're interested in ? */
517 for (i
= 0; i
< sp
->num_entries
; i
++) {
518 if (sp
->aiocb_array
[i
] == pd
->aiocb
) {
525 /* Jobid completed we weren't waiting for.
526 We must reshedule this as an immediate event
527 on the main event context. */
528 im
= tevent_create_immediate(NULL
);
530 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
533 DEBUG(10,("aio_pthread_handle_suspend_completion: "
534 "re-scheduling job id %d\n",
537 tevent_schedule_immediate(im
,
538 server_event_context(),
539 aio_pthread_handle_immediate
,
544 static void aio_pthread_suspend_timed_out(struct tevent_context
*event_ctx
,
545 struct tevent_timer
*te
,
549 bool *timed_out
= (bool *)private_data
;
550 /* Remove this timed event handler. */
555 /************************************************************************
556 Called to request everything to stop until all IO is completed.
557 ***********************************************************************/
559 static int aio_pthread_suspend(struct vfs_handle_struct
*handle
,
560 struct files_struct
*fsp
,
561 const SMB_STRUCT_AIOCB
* const aiocb_array
[],
563 const struct timespec
*timeout
)
565 struct event_context
*ev
= NULL
;
566 struct fd_event
*sock_event
= NULL
;
568 struct suspend_private sp
;
569 bool timed_out
= false;
570 TALLOC_CTX
*frame
= talloc_stackframe();
572 /* This is a blocking call, and has to use a sub-event loop. */
573 ev
= event_context_init(frame
);
580 struct timeval tv
= convert_timespec_to_timeval(*timeout
);
581 struct tevent_timer
*te
= tevent_add_timer(ev
,
583 timeval_current_ofs(tv
.tv_sec
,
585 aio_pthread_suspend_timed_out
,
595 sp
.aiocb_array
= aiocb_array
;
598 sock_event
= tevent_add_fd(ev
,
600 pthreadpool_signal_fd(pool
),
602 aio_pthread_handle_suspend_completion
,
604 if (sock_event
== NULL
) {
605 pthreadpool_destroy(pool
);
610 * We're going to cheat here. We know that smbd/aio.c
611 * only calls this when it's waiting for every single
612 * outstanding call to finish on a close, so just wait
613 * individually for each IO to complete. We don't care
614 * what order they finish - only that they all do. JRA.
616 while (sp
.num_entries
!= sp
.num_finished
) {
617 if (tevent_loop_once(ev
) == -1) {
635 static struct vfs_fn_pointers vfs_aio_pthread_fns
= {
636 .aio_read
= aio_pthread_read
,
637 .aio_write
= aio_pthread_write
,
638 .aio_return_fn
= aio_pthread_return_fn
,
639 .aio_cancel
= aio_pthread_cancel
,
640 .aio_error_fn
= aio_pthread_error_fn
,
641 .aio_suspend
= aio_pthread_suspend
,
644 NTSTATUS
vfs_aio_pthread_init(void);
645 NTSTATUS
vfs_aio_pthread_init(void)
647 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION
,
648 "aio_pthread", &vfs_aio_pthread_fns
);