1 /* $NetBSD: sys_aio.c,v 1.22 2009/01/11 02:45:52 christos Exp $ */
4 * Copyright (c) 2007, Mindaugas Rasiukevicius <rmind at NetBSD org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * Implementation of POSIX asynchronous I/O.
31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001.
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.22 2009/01/11 02:45:52 christos Exp $");
41 #include <sys/param.h>
42 #include <sys/condvar.h>
44 #include <sys/filedesc.h>
45 #include <sys/kernel.h>
48 #include <sys/mutex.h>
51 #include <sys/queue.h>
52 #include <sys/signal.h>
53 #include <sys/signalvar.h>
54 #include <sys/syscall.h>
55 #include <sys/syscallargs.h>
56 #include <sys/syscallvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/systm.h>
59 #include <sys/types.h>
60 #include <sys/vnode.h>
61 #include <sys/atomic.h>
62 #include <sys/module.h>
65 #include <uvm/uvm_extern.h>
67 MODULE(MODULE_CLASS_MISC
, aio
, NULL
);
70 * System-wide limits and counter of AIO operations.
72 u_int aio_listio_max
= AIO_LISTIO_MAX
;
73 static u_int aio_max
= AIO_MAX
;
74 static u_int aio_jobs_count
;
76 static struct pool aio_job_pool
;
77 static struct pool aio_lio_pool
;
78 static void *aio_ehook
;
81 void aio_worker(void *);
82 static void aio_process(struct aio_job
*);
83 static void aio_sendsig(struct proc
*, struct sigevent
*);
84 static int aio_enqueue_job(int, void *, struct lio_req
*);
85 static void aio_exit(proc_t
*, void *);
87 static const struct syscall_package aio_syscalls
[] = {
88 { SYS_aio_cancel
, 0, (sy_call_t
*)sys_aio_cancel
},
89 { SYS_aio_error
, 0, (sy_call_t
*)sys_aio_error
},
90 { SYS_aio_fsync
, 0, (sy_call_t
*)sys_aio_fsync
},
91 { SYS_aio_read
, 0, (sy_call_t
*)sys_aio_read
},
92 { SYS_aio_return
, 0, (sy_call_t
*)sys_aio_return
},
93 { SYS___aio_suspend50
, 0, (sy_call_t
*)sys___aio_suspend50
},
94 { SYS_aio_write
, 0, (sy_call_t
*)sys_aio_write
},
95 { SYS_lio_listio
, 0, (sy_call_t
*)sys_lio_listio
},
100 * Tear down all AIO state.
103 aio_fini(bool interface
)
109 /* Stop syscall activity. */
110 error
= syscall_disestablish(NULL
, aio_syscalls
);
113 /* Abort if any processes are using AIO. */
114 mutex_enter(proc_lock
);
115 PROCLIST_FOREACH(p
, &allproc
) {
116 if (p
->p_aio
!= NULL
)
119 mutex_exit(proc_lock
);
121 error
= syscall_establish(NULL
, aio_syscalls
);
126 KASSERT(aio_jobs_count
== 0);
127 exithook_disestablish(aio_ehook
);
128 pool_destroy(&aio_job_pool
);
129 pool_destroy(&aio_lio_pool
);
134 * Initialize global AIO state.
141 pool_init(&aio_job_pool
, sizeof(struct aio_job
), 0, 0, 0,
142 "aio_jobs_pool", &pool_allocator_nointr
, IPL_NONE
);
143 pool_init(&aio_lio_pool
, sizeof(struct lio_req
), 0, 0, 0,
144 "aio_lio_pool", &pool_allocator_nointr
, IPL_NONE
);
145 aio_ehook
= exithook_establish(aio_exit
, NULL
);
146 error
= syscall_establish(NULL
, aio_syscalls
);
156 aio_modcmd(modcmd_t cmd
, void *arg
)
160 case MODULE_CMD_INIT
:
162 case MODULE_CMD_FINI
:
163 return aio_fini(true);
170 * Initialize Asynchronous I/O data structures for the process.
173 aio_procinit(struct proc
*p
)
181 /* Allocate and initialize AIO structure */
182 aio
= kmem_zalloc(sizeof(struct aioproc
), KM_SLEEP
);
186 /* Initialize queue and their synchronization structures */
187 mutex_init(&aio
->aio_mtx
, MUTEX_DEFAULT
, IPL_NONE
);
188 cv_init(&aio
->aio_worker_cv
, "aiowork");
189 cv_init(&aio
->done_cv
, "aiodone");
190 TAILQ_INIT(&aio
->jobs_queue
);
193 * Create an AIO worker thread.
194 * XXX: Currently, AIO thread is not protected against user's actions.
196 inmem
= uvm_uarea_alloc(&uaddr
);
201 error
= lwp_create(curlwp
, p
, uaddr
, inmem
, 0, NULL
, 0, aio_worker
,
202 NULL
, &l
, curlwp
->l_class
);
204 uvm_uarea_free(uaddr
, curcpu());
209 /* Recheck if we are really first */
210 mutex_enter(p
->p_lock
);
212 mutex_exit(p
->p_lock
);
219 /* Complete the initialization of thread, and run it */
224 l
->l_priority
= MAXPRI_USER
;
225 sched_enqueue(l
, false);
227 mutex_exit(p
->p_lock
);
233 * Exit of Asynchronous I/O subsystem of process.
236 aio_exit(struct proc
*p
, void *cookie
)
238 struct aio_job
*a_job
;
243 else if ((aio
= p
->p_aio
) == NULL
)
247 while (!TAILQ_EMPTY(&aio
->jobs_queue
)) {
248 a_job
= TAILQ_FIRST(&aio
->jobs_queue
);
249 TAILQ_REMOVE(&aio
->jobs_queue
, a_job
, list
);
250 pool_put(&aio_job_pool
, a_job
);
251 atomic_dec_uint(&aio_jobs_count
);
254 /* Destroy and free the entire AIO data structure */
255 cv_destroy(&aio
->aio_worker_cv
);
256 cv_destroy(&aio
->done_cv
);
257 mutex_destroy(&aio
->aio_mtx
);
258 kmem_free(aio
, sizeof(struct aioproc
));
262 * AIO worker thread and processor.
265 aio_worker(void *arg
)
267 struct proc
*p
= curlwp
->l_proc
;
268 struct aioproc
*aio
= p
->p_aio
;
269 struct aio_job
*a_job
;
275 * Make an empty signal mask, so it
276 * handles only SIGKILL and SIGSTOP.
279 mutex_enter(p
->p_lock
);
280 error
= sigprocmask1(curlwp
, SIG_SETMASK
, &nss
, &oss
);
281 mutex_exit(p
->p_lock
);
286 * Loop for each job in the queue. If there
287 * are no jobs then sleep.
289 mutex_enter(&aio
->aio_mtx
);
290 while ((a_job
= TAILQ_FIRST(&aio
->jobs_queue
)) == NULL
) {
291 if (cv_wait_sig(&aio
->aio_worker_cv
, &aio
->aio_mtx
)) {
293 * Thread was interrupted - check for
294 * pending exit or suspend.
296 mutex_exit(&aio
->aio_mtx
);
298 mutex_enter(&aio
->aio_mtx
);
302 /* Take the job from the queue */
304 TAILQ_REMOVE(&aio
->jobs_queue
, a_job
, list
);
306 atomic_dec_uint(&aio_jobs_count
);
309 mutex_exit(&aio
->aio_mtx
);
311 /* Process an AIO operation */
314 /* Copy data structure back to the user-space */
315 (void)copyout(&a_job
->aiocbp
, a_job
->aiocb_uptr
,
316 sizeof(struct aiocb
));
318 mutex_enter(&aio
->aio_mtx
);
321 /* Decrease a reference counter, if there is a LIO structure */
323 refcnt
= (lio
!= NULL
? --lio
->refcnt
: -1);
325 /* Notify all suspenders */
326 cv_broadcast(&aio
->done_cv
);
327 mutex_exit(&aio
->aio_mtx
);
329 /* Send a signal, if any */
330 aio_sendsig(p
, &a_job
->aiocbp
.aio_sigevent
);
332 /* Destroy the LIO structure */
334 aio_sendsig(p
, &lio
->sig
);
335 pool_put(&aio_lio_pool
, lio
);
338 /* Destroy the the job */
339 pool_put(&aio_job_pool
, a_job
);
346 aio_process(struct aio_job
*a_job
)
348 struct proc
*p
= curlwp
->l_proc
;
349 struct aiocb
*aiocbp
= &a_job
->aiocbp
;
351 int fd
= aiocbp
->aio_fildes
;
354 KASSERT(a_job
->aio_op
!= 0);
356 if ((a_job
->aio_op
& (AIO_READ
| AIO_WRITE
)) != 0) {
360 if (aiocbp
->aio_nbytes
> SSIZE_MAX
) {
371 aiov
.iov_base
= (void *)(uintptr_t)aiocbp
->aio_buf
;
372 aiov
.iov_len
= aiocbp
->aio_nbytes
;
373 auio
.uio_iov
= &aiov
;
375 auio
.uio_resid
= aiocbp
->aio_nbytes
;
376 auio
.uio_vmspace
= p
->p_vmspace
;
378 if (a_job
->aio_op
& AIO_READ
) {
380 * Perform a Read operation
382 KASSERT((a_job
->aio_op
& AIO_WRITE
) == 0);
384 if ((fp
->f_flag
& FREAD
) == 0) {
389 auio
.uio_rw
= UIO_READ
;
390 error
= (*fp
->f_ops
->fo_read
)(fp
, &aiocbp
->aio_offset
,
391 &auio
, fp
->f_cred
, FOF_UPDATE_OFFSET
);
394 * Perform a Write operation
396 KASSERT(a_job
->aio_op
& AIO_WRITE
);
398 if ((fp
->f_flag
& FWRITE
) == 0) {
403 auio
.uio_rw
= UIO_WRITE
;
404 error
= (*fp
->f_ops
->fo_write
)(fp
, &aiocbp
->aio_offset
,
405 &auio
, fp
->f_cred
, FOF_UPDATE_OFFSET
);
409 /* Store the result value */
410 a_job
->aiocbp
.aio_nbytes
-= auio
.uio_resid
;
411 a_job
->aiocbp
._retval
= (error
== 0) ?
412 a_job
->aiocbp
.aio_nbytes
: -1;
414 } else if ((a_job
->aio_op
& (AIO_SYNC
| AIO_DSYNC
)) != 0) {
416 * Perform a file Sync operation
420 if ((error
= fd_getvnode(fd
, &fp
)) != 0)
423 if ((fp
->f_flag
& FWRITE
) == 0) {
429 vp
= (struct vnode
*)fp
->f_data
;
430 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
431 if (a_job
->aio_op
& AIO_DSYNC
) {
432 error
= VOP_FSYNC(vp
, fp
->f_cred
,
433 FSYNC_WAIT
| FSYNC_DATAONLY
, 0, 0);
434 } else if (a_job
->aio_op
& AIO_SYNC
) {
435 error
= VOP_FSYNC(vp
, fp
->f_cred
,
441 /* Store the result value */
442 a_job
->aiocbp
._retval
= (error
== 0) ? 0 : -1;
445 panic("aio_process: invalid operation code\n");
448 /* Job is done, set the error, if any */
449 a_job
->aiocbp
._errno
= error
;
450 a_job
->aiocbp
._state
= JOB_DONE
;
457 aio_sendsig(struct proc
*p
, struct sigevent
*sig
)
461 if (sig
->sigev_signo
== 0 || sig
->sigev_notify
== SIGEV_NONE
)
465 ksi
.ksi_signo
= sig
->sigev_signo
;
466 ksi
.ksi_code
= SI_ASYNCIO
;
467 ksi
.ksi_value
= sig
->sigev_value
;
468 mutex_enter(proc_lock
);
469 kpsignal(p
, &ksi
, NULL
);
470 mutex_exit(proc_lock
);
477 aio_enqueue_job(int op
, void *aiocb_uptr
, struct lio_req
*lio
)
479 struct proc
*p
= curlwp
->l_proc
;
481 struct aio_job
*a_job
;
483 struct sigevent
*sig
;
486 /* Non-accurate check for the limit */
487 if (aio_jobs_count
+ 1 > aio_max
)
490 /* Get the data structure from user-space */
491 error
= copyin(aiocb_uptr
, &aiocbp
, sizeof(struct aiocb
));
495 /* Check if signal is set, and validate it */
496 sig
= &aiocbp
.aio_sigevent
;
497 if (sig
->sigev_signo
< 0 || sig
->sigev_signo
>= NSIG
||
498 sig
->sigev_notify
< SIGEV_NONE
|| sig
->sigev_notify
> SIGEV_SA
)
501 /* Buffer and byte count */
502 if (((AIO_SYNC
| AIO_DSYNC
) & op
) == 0)
503 if (aiocbp
.aio_buf
== NULL
|| aiocbp
.aio_nbytes
> SSIZE_MAX
)
506 /* Check the opcode, if LIO_NOP - simply ignore */
508 KASSERT(lio
!= NULL
);
509 if (aiocbp
.aio_lio_opcode
== LIO_WRITE
)
511 else if (aiocbp
.aio_lio_opcode
== LIO_READ
)
514 return (aiocbp
.aio_lio_opcode
== LIO_NOP
) ? 0 : EINVAL
;
516 KASSERT(lio
== NULL
);
520 * Look for already existing job. If found - the job is in-progress.
521 * According to POSIX this is invalid, so return the error.
525 mutex_enter(&aio
->aio_mtx
);
528 if (a_job
->aiocb_uptr
== aiocb_uptr
) {
529 mutex_exit(&aio
->aio_mtx
);
533 TAILQ_FOREACH(a_job
, &aio
->jobs_queue
, list
) {
534 if (a_job
->aiocb_uptr
!= aiocb_uptr
)
536 mutex_exit(&aio
->aio_mtx
);
539 mutex_exit(&aio
->aio_mtx
);
543 * Check if AIO structure is initialized, if not - initialize it.
544 * In LIO case, we did that already. We will recheck this with
545 * the lock in aio_procinit().
547 if (lio
== NULL
&& p
->p_aio
== NULL
)
553 * Set the state with errno, and copy data
554 * structure back to the user-space.
556 aiocbp
._state
= JOB_WIP
;
557 aiocbp
._errno
= EINPROGRESS
;
559 error
= copyout(&aiocbp
, aiocb_uptr
, sizeof(struct aiocb
));
563 /* Allocate and initialize a new AIO job */
564 a_job
= pool_get(&aio_job_pool
, PR_WAITOK
);
565 memset(a_job
, 0, sizeof(struct aio_job
));
569 * Store the user-space pointer for searching. Since we
570 * are storing only per proc pointers - it is safe.
572 memcpy(&a_job
->aiocbp
, &aiocbp
, sizeof(struct aiocb
));
573 a_job
->aiocb_uptr
= aiocb_uptr
;
578 * Add the job to the queue, update the counters, and
579 * notify the AIO worker thread to handle the job.
581 mutex_enter(&aio
->aio_mtx
);
583 /* Fail, if the limit was reached */
584 if (atomic_inc_uint_nv(&aio_jobs_count
) > aio_max
||
585 aio
->jobs_count
>= aio_listio_max
) {
586 atomic_dec_uint(&aio_jobs_count
);
587 mutex_exit(&aio
->aio_mtx
);
588 pool_put(&aio_job_pool
, a_job
);
592 TAILQ_INSERT_TAIL(&aio
->jobs_queue
, a_job
, list
);
596 cv_signal(&aio
->aio_worker_cv
);
598 mutex_exit(&aio
->aio_mtx
);
601 * One would handle the errors only with aio_error() function.
602 * This way is appropriate according to POSIX.
612 sys_aio_cancel(struct lwp
*l
, const struct sys_aio_cancel_args
*uap
, register_t
*retval
)
615 syscallarg(int) fildes;
616 syscallarg(struct aiocb *) aiocbp;
618 struct proc
*p
= l
->l_proc
;
620 struct aio_job
*a_job
;
621 struct aiocb
*aiocbp_ptr
;
623 struct filedesc
*fdp
= p
->p_fd
;
624 unsigned int cn
, errcnt
, fildes
;
626 TAILQ_HEAD(, aio_job
) tmp_jobs_list
;
628 /* Check for invalid file descriptor */
629 fildes
= (unsigned int)SCARG(uap
, fildes
);
630 if (fildes
>= fdp
->fd_nfiles
)
633 if (fdp
->fd_ofiles
[fildes
] == NULL
|| fdp
->fd_ofiles
[fildes
]->ff_file
== NULL
)
636 /* Check if AIO structure is initialized */
637 if (p
->p_aio
== NULL
) {
638 *retval
= AIO_NOTCANCELED
;
643 aiocbp_ptr
= (struct aiocb
*)SCARG(uap
, aiocbp
);
645 mutex_enter(&aio
->aio_mtx
);
647 /* Cancel the jobs, and remove them from the queue */
649 TAILQ_INIT(&tmp_jobs_list
);
650 TAILQ_FOREACH(a_job
, &aio
->jobs_queue
, list
) {
652 if (aiocbp_ptr
!= a_job
->aiocb_uptr
)
654 if (fildes
!= a_job
->aiocbp
.aio_fildes
) {
655 mutex_exit(&aio
->aio_mtx
);
658 } else if (a_job
->aiocbp
.aio_fildes
!= fildes
)
661 TAILQ_REMOVE(&aio
->jobs_queue
, a_job
, list
);
662 TAILQ_INSERT_TAIL(&tmp_jobs_list
, a_job
, list
);
664 /* Decrease the counters */
665 atomic_dec_uint(&aio_jobs_count
);
668 if (lio
!= NULL
&& --lio
->refcnt
!= 0)
676 /* There are canceled jobs */
678 *retval
= AIO_CANCELED
;
680 /* We cannot cancel current job */
682 if (a_job
&& ((a_job
->aiocbp
.aio_fildes
== fildes
) ||
683 (a_job
->aiocb_uptr
== aiocbp_ptr
)))
684 *retval
= AIO_NOTCANCELED
;
686 mutex_exit(&aio
->aio_mtx
);
688 /* Free the jobs after the lock */
690 while (!TAILQ_EMPTY(&tmp_jobs_list
)) {
691 a_job
= TAILQ_FIRST(&tmp_jobs_list
);
692 TAILQ_REMOVE(&tmp_jobs_list
, a_job
, list
);
693 /* Set the errno and copy structures back to the user-space */
694 a_job
->aiocbp
._errno
= ECANCELED
;
695 a_job
->aiocbp
._state
= JOB_DONE
;
696 if (copyout(&a_job
->aiocbp
, a_job
->aiocb_uptr
,
697 sizeof(struct aiocb
)))
699 /* Send a signal if any */
700 aio_sendsig(p
, &a_job
->aiocbp
.aio_sigevent
);
703 aio_sendsig(p
, &lio
->sig
);
704 pool_put(&aio_lio_pool
, lio
);
706 pool_put(&aio_job_pool
, a_job
);
712 /* Set a correct return value */
714 *retval
= AIO_ALLDONE
;
720 sys_aio_error(struct lwp
*l
, const struct sys_aio_error_args
*uap
, register_t
*retval
)
723 syscallarg(const struct aiocb *) aiocbp;
725 struct proc
*p
= l
->l_proc
;
726 struct aioproc
*aio
= p
->p_aio
;
733 error
= copyin(SCARG(uap
, aiocbp
), &aiocbp
, sizeof(struct aiocb
));
737 if (aiocbp
._state
== JOB_NONE
)
740 *retval
= aiocbp
._errno
;
746 sys_aio_fsync(struct lwp
*l
, const struct sys_aio_fsync_args
*uap
, register_t
*retval
)
750 syscallarg(struct aiocb *) aiocbp;
752 int op
= SCARG(uap
, op
);
754 if ((op
!= O_DSYNC
) && (op
!= O_SYNC
))
757 op
= O_DSYNC
? AIO_DSYNC
: AIO_SYNC
;
759 return aio_enqueue_job(op
, SCARG(uap
, aiocbp
), NULL
);
763 sys_aio_read(struct lwp
*l
, const struct sys_aio_read_args
*uap
, register_t
*retval
)
766 syscallarg(struct aiocb *) aiocbp;
769 return aio_enqueue_job(AIO_READ
, SCARG(uap
, aiocbp
), NULL
);
773 sys_aio_return(struct lwp
*l
, const struct sys_aio_return_args
*uap
, register_t
*retval
)
776 syscallarg(struct aiocb *) aiocbp;
778 struct proc
*p
= l
->l_proc
;
779 struct aioproc
*aio
= p
->p_aio
;
786 error
= copyin(SCARG(uap
, aiocbp
), &aiocbp
, sizeof(struct aiocb
));
790 if (aiocbp
._errno
== EINPROGRESS
|| aiocbp
._state
!= JOB_DONE
)
793 *retval
= aiocbp
._retval
;
795 /* Reset the internal variables */
798 aiocbp
._state
= JOB_NONE
;
799 error
= copyout(&aiocbp
, SCARG(uap
, aiocbp
), sizeof(struct aiocb
));
805 sys___aio_suspend50(struct lwp
*l
, const struct sys___aio_suspend50_args
*uap
,
809 syscallarg(const struct aiocb *const[]) list;
810 syscallarg(int) nent;
811 syscallarg(const struct timespec *) timeout;
817 nent
= SCARG(uap
, nent
);
818 if (nent
<= 0 || nent
> aio_listio_max
)
821 if (SCARG(uap
, timeout
)) {
822 /* Convert timespec to ticks */
823 error
= copyin(SCARG(uap
, timeout
), &ts
,
824 sizeof(struct timespec
));
828 list
= kmem_zalloc(nent
* sizeof(struct aio_job
), KM_SLEEP
);
829 error
= copyin(SCARG(uap
, list
), list
, nent
* sizeof(struct aiocb
));
832 error
= aio_suspend1(l
, list
, nent
, SCARG(uap
, timeout
) ? &ts
: NULL
);
834 kmem_free(list
, nent
* sizeof(struct aio_job
));
839 aio_suspend1(struct lwp
*l
, struct aiocb
**aiocbp_list
, int nent
,
842 struct proc
*p
= l
->l_proc
;
844 struct aio_job
*a_job
;
847 if (p
->p_aio
== NULL
)
852 timo
= mstohz((ts
->tv_sec
* 1000) + (ts
->tv_nsec
/ 1000000));
853 if (timo
== 0 && ts
->tv_sec
== 0 && ts
->tv_nsec
> 0)
860 /* Get the list from user-space */
862 mutex_enter(&aio
->aio_mtx
);
865 for (i
= 0; i
< nent
; i
++) {
867 /* Skip NULL entries */
868 if (aiocbp_list
[i
] == NULL
)
871 /* Skip current job */
874 if (a_job
->aiocb_uptr
== aiocbp_list
[i
])
878 /* Look for a job in the queue */
879 TAILQ_FOREACH(a_job
, &aio
->jobs_queue
, list
)
880 if (a_job
->aiocb_uptr
== aiocbp_list
[i
])
886 mutex_exit(&aio
->aio_mtx
);
888 error
= copyin(aiocbp_list
[i
], &aiocbp
,
889 sizeof(struct aiocb
));
890 if (error
== 0 && aiocbp
._state
!= JOB_DONE
) {
891 mutex_enter(&aio
->aio_mtx
);
895 kmem_free(aiocbp_list
,
896 nent
* sizeof(struct aio_job
));
901 /* Wait for a signal or when timeout occurs */
902 error
= cv_timedwait_sig(&aio
->done_cv
, &aio
->aio_mtx
, timo
);
904 if (error
== EWOULDBLOCK
)
909 mutex_exit(&aio
->aio_mtx
);
914 sys_aio_write(struct lwp
*l
, const struct sys_aio_write_args
*uap
, register_t
*retval
)
917 syscallarg(struct aiocb *) aiocbp;
920 return aio_enqueue_job(AIO_WRITE
, SCARG(uap
, aiocbp
), NULL
);
924 sys_lio_listio(struct lwp
*l
, const struct sys_lio_listio_args
*uap
, register_t
*retval
)
927 syscallarg(int) mode;
928 syscallarg(struct aiocb *const[]) list;
929 syscallarg(int) nent;
930 syscallarg(struct sigevent *) sig;
932 struct proc
*p
= l
->l_proc
;
934 struct aiocb
**aiocbp_list
;
936 int i
, error
, errcnt
, mode
, nent
;
938 mode
= SCARG(uap
, mode
);
939 nent
= SCARG(uap
, nent
);
941 /* Non-accurate checks for the limit and invalid values */
942 if (nent
< 1 || nent
> aio_listio_max
)
944 if (aio_jobs_count
+ nent
> aio_max
)
947 /* Check if AIO structure is initialized, if not - initialize it */
948 if (p
->p_aio
== NULL
)
953 /* Create a LIO structure */
954 lio
= pool_get(&aio_lio_pool
, PR_WAITOK
);
960 memset(&lio
->sig
, 0, sizeof(struct sigevent
));
963 /* Check for signal, validate it */
964 if (SCARG(uap
, sig
)) {
965 struct sigevent
*sig
= &lio
->sig
;
967 error
= copyin(SCARG(uap
, sig
), &lio
->sig
,
968 sizeof(struct sigevent
));
970 (sig
->sigev_signo
< 0 ||
971 sig
->sigev_signo
>= NSIG
||
972 sig
->sigev_notify
< SIGEV_NONE
||
973 sig
->sigev_notify
> SIGEV_SA
))
976 memset(&lio
->sig
, 0, sizeof(struct sigevent
));
984 pool_put(&aio_lio_pool
, lio
);
988 /* Get the list from user-space */
989 aiocbp_list
= kmem_zalloc(nent
* sizeof(struct aio_job
), KM_SLEEP
);
990 error
= copyin(SCARG(uap
, list
), aiocbp_list
,
991 nent
* sizeof(struct aiocb
));
993 mutex_enter(&aio
->aio_mtx
);
997 /* Enqueue all jobs */
999 for (i
= 0; i
< nent
; i
++) {
1000 error
= aio_enqueue_job(AIO_LIO
, aiocbp_list
[i
], lio
);
1002 * According to POSIX, in such error case it may
1003 * fail with other I/O operations initiated.
1009 mutex_enter(&aio
->aio_mtx
);
1011 /* Return an error, if any */
1017 if (mode
== LIO_WAIT
) {
1019 * Wait for AIO completion. In such case,
1020 * the LIO structure will be freed here.
1022 while (lio
->refcnt
> 1 && error
== 0)
1023 error
= cv_wait_sig(&aio
->done_cv
, &aio
->aio_mtx
);
1029 if (--lio
->refcnt
!= 0)
1031 mutex_exit(&aio
->aio_mtx
);
1033 aio_sendsig(p
, &lio
->sig
);
1034 pool_put(&aio_lio_pool
, lio
);
1036 kmem_free(aiocbp_list
, nent
* sizeof(struct aio_job
));
1045 sysctl_aio_listio_max(SYSCTLFN_ARGS
)
1047 struct sysctlnode node
;
1051 node
.sysctl_data
= &newsize
;
1053 newsize
= aio_listio_max
;
1054 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
1055 if (error
|| newp
== NULL
)
1058 if (newsize
< 1 || newsize
> aio_max
)
1060 aio_listio_max
= newsize
;
1066 sysctl_aio_max(SYSCTLFN_ARGS
)
1068 struct sysctlnode node
;
1072 node
.sysctl_data
= &newsize
;
1075 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
1076 if (error
|| newp
== NULL
)
1079 if (newsize
< 1 || newsize
< aio_listio_max
)
1086 SYSCTL_SETUP(sysctl_aio_setup
, "sysctl aio setup")
1089 sysctl_createv(clog
, 0, NULL
, NULL
,
1091 CTLTYPE_NODE
, "kern", NULL
,
1094 sysctl_createv(clog
, 0, NULL
, NULL
,
1095 CTLFLAG_PERMANENT
| CTLFLAG_IMMEDIATE
,
1096 CTLTYPE_INT
, "posix_aio",
1097 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
1098 "Asynchronous I/O option to which the "
1099 "system attempts to conform"),
1100 NULL
, _POSIX_ASYNCHRONOUS_IO
, NULL
, 0,
1101 CTL_KERN
, CTL_CREATE
, CTL_EOL
);
1102 sysctl_createv(clog
, 0, NULL
, NULL
,
1103 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1104 CTLTYPE_INT
, "aio_listio_max",
1105 SYSCTL_DESCR("Maximum number of asynchronous I/O "
1106 "operations in a single list I/O call"),
1107 sysctl_aio_listio_max
, 0, &aio_listio_max
, 0,
1108 CTL_KERN
, CTL_CREATE
, CTL_EOL
);
1109 sysctl_createv(clog
, 0, NULL
, NULL
,
1110 CTLFLAG_PERMANENT
| CTLFLAG_READWRITE
,
1111 CTLTYPE_INT
, "aio_max",
1112 SYSCTL_DESCR("Maximum number of asynchronous I/O "
1114 sysctl_aio_max
, 0, &aio_max
, 0,
1115 CTL_KERN
, CTL_CREATE
, CTL_EOL
);
1123 aio_print_jobs(void (*pr
)(const char *, ...))
1125 struct proc
*p
= (curlwp
== NULL
? NULL
: curlwp
->l_proc
);
1126 struct aioproc
*aio
;
1127 struct aio_job
*a_job
;
1128 struct aiocb
*aiocbp
;
1131 (*pr
)("AIO: We are not in the processes right now.\n");
1137 (*pr
)("AIO data is not initialized (PID = %d).\n", p
->p_pid
);
1141 (*pr
)("AIO: PID = %d\n", p
->p_pid
);
1142 (*pr
)("AIO: Global count of the jobs = %u\n", aio_jobs_count
);
1143 (*pr
)("AIO: Count of the jobs = %u\n", aio
->jobs_count
);
1146 a_job
= aio
->curjob
;
1147 (*pr
)("\nAIO current job:\n");
1148 (*pr
)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n",
1149 a_job
->aio_op
, a_job
->aiocbp
._errno
,
1150 a_job
->aiocbp
._state
, a_job
->aiocb_uptr
);
1151 aiocbp
= &a_job
->aiocbp
;
1152 (*pr
)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n",
1153 aiocbp
->aio_fildes
, aiocbp
->aio_offset
,
1154 aiocbp
->aio_buf
, aiocbp
->aio_nbytes
);
1157 (*pr
)("\nAIO queue:\n");
1158 TAILQ_FOREACH(a_job
, &aio
->jobs_queue
, list
) {
1159 (*pr
)(" opcode = %d, errno = %d, state = %d, aiocb_ptr = %p\n",
1160 a_job
->aio_op
, a_job
->aiocbp
._errno
,
1161 a_job
->aiocbp
._state
, a_job
->aiocb_uptr
);
1162 aiocbp
= &a_job
->aiocbp
;
1163 (*pr
)(" fd = %d, offset = %u, buf = %p, nbytes = %u\n",
1164 aiocbp
->aio_fildes
, aiocbp
->aio_offset
,
1165 aiocbp
->aio_buf
, aiocbp
->aio_nbytes
);
1168 #endif /* defined(DDB) */