Merge commit '7e934d3acc051b7ee3ef0d11571fd1225800a607'
[unleashed.git] / kernel / os / aio.c
blob5cdfff0282a6312c77c6552228464e0a3b2f3b25
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Kernel asynchronous I/O.
29 * This is only for raw devices now (as of Nov. 1993).
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/conf.h>
35 #include <sys/file.h>
36 #include <sys/fs/snode.h>
37 #include <sys/unistd.h>
38 #include <sys/cmn_err.h>
39 #include <vm/as.h>
40 #include <vm/faultcode.h>
41 #include <sys/sysmacros.h>
42 #include <sys/procfs.h>
43 #include <sys/kmem.h>
44 #include <sys/autoconf.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/sunddi.h>
47 #include <sys/aio_impl.h>
48 #include <sys/debug.h>
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/vmsystm.h>
52 #include <sys/contract/process_impl.h>
55 * external entry point.
57 #ifdef _LP64
58 static int64_t kaioc(long, long, long, long, long, long);
59 #endif
60 static int kaio(ulong_t *, rval_t *);
63 #define AIO_64 0
64 #define AIO_32 1
65 #define AIO_LARGEFILE 2
68 * implementation specific functions (private)
70 #ifdef _LP64
71 static int alio(int, aiocb_t **, int, struct sigevent *);
72 #endif
73 static int aionotify(void);
74 static int aioinit(void);
75 static int aiostart(void);
76 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
77 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
78 cred_t *);
79 static void lio_set_error(aio_req_t *, int portused);
80 static aio_t *aio_aiop_alloc();
81 static int aio_req_alloc(aio_req_t **, aio_result_t *);
82 static int aio_lio_alloc(aio_lio_t **);
83 static aio_req_t *aio_req_done(void *);
84 static aio_req_t *aio_req_remove(aio_req_t *);
85 static int aio_req_find(aio_result_t *, aio_req_t **);
86 static int aio_hash_insert(struct aio_req_t *, aio_t *);
87 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
88 aio_result_t *, vnode_t *, int);
89 static int aio_cleanup_thread(aio_t *);
90 static aio_lio_t *aio_list_get(aio_result_t *);
91 static void lio_set_uerror(void *, int);
92 extern void aio_zerolen(aio_req_t *);
93 static int aiowait(struct timeval *, int, long *);
94 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
95 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
96 aio_req_t *reqlist, aio_t *aiop, model_t model);
97 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
98 static int aiosuspend(void *, int, struct timespec *, int,
99 long *, int);
100 static int aliowait(int, void *, int, void *, int);
101 static int aioerror(void *, int);
102 static int aio_cancel(int, void *, long *, int);
103 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
104 static int aiorw(int, void *, int, int);
106 static int alioLF(int, void *, int, void *);
107 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
108 aio_result_t *, vnode_t *, int);
109 static int alio32(int, void *, int, void *);
110 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
111 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
113 #ifdef _SYSCALL32_IMPL
114 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
115 void aiocb_32ton(aiocb32_t *, aiocb_t *);
116 #endif /* _SYSCALL32_IMPL */
119 * implementation specific functions (external)
121 void aio_req_free(aio_t *, aio_req_t *);
124 * Event Port framework
127 void aio_req_free_port(aio_t *, aio_req_t *);
128 static int aio_port_callback(void *, int *, pid_t, int, void *);
131 * This is the loadable module wrapper.
133 #include <sys/modctl.h>
134 #include <sys/syscall.h>
136 #ifdef _LP64
138 static struct sysent kaio_sysent = {
140 SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
141 (int (*)())kaioc
144 #ifdef _SYSCALL32_IMPL
145 static struct sysent kaio_sysent32 = {
147 SE_NOUNLOAD | SE_64RVAL,
148 kaio
150 #endif /* _SYSCALL32_IMPL */
152 #else /* _LP64 */
154 static struct sysent kaio_sysent = {
156 SE_NOUNLOAD | SE_32RVAL1,
157 kaio
160 #endif /* _LP64 */
163 * Module linkage information for the kernel.
166 static struct modlsys modlsys = {
167 &mod_syscallops,
168 "kernel Async I/O",
169 &kaio_sysent
172 #ifdef _SYSCALL32_IMPL
173 static struct modlsys modlsys32 = {
174 &mod_syscallops32,
175 "kernel Async I/O for 32 bit compatibility",
176 &kaio_sysent32
178 #endif /* _SYSCALL32_IMPL */
181 static struct modlinkage modlinkage = {
182 MODREV_1,
183 &modlsys,
184 #ifdef _SYSCALL32_IMPL
185 &modlsys32,
186 #endif
187 NULL
191 _init(void)
193 int retval;
195 if ((retval = mod_install(&modlinkage)) != 0)
196 return (retval);
198 return (0);
202 _fini(void)
204 int retval;
206 retval = mod_remove(&modlinkage);
208 return (retval);
212 _info(struct modinfo *modinfop)
214 return (mod_info(&modlinkage, modinfop));
217 #ifdef _LP64
218 static int64_t
219 kaioc(
220 long a0,
221 long a1,
222 long a2,
223 long a3,
224 long a4,
225 long a5)
227 int error;
228 long rval = 0;
230 switch ((int)a0 & ~AIO_POLL_BIT) {
231 case AIOREAD:
232 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
233 (offset_t)a4, (aio_result_t *)a5, FREAD);
234 break;
235 case AIOWRITE:
236 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
237 (offset_t)a4, (aio_result_t *)a5, FWRITE);
238 break;
239 case AIOWAIT:
240 error = aiowait((struct timeval *)a1, (int)a2, &rval);
241 break;
242 case AIOWAITN:
243 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
244 (timespec_t *)a4);
245 break;
246 case AIONOTIFY:
247 error = aionotify();
248 break;
249 case AIOINIT:
250 error = aioinit();
251 break;
252 case AIOSTART:
253 error = aiostart();
254 break;
255 case AIOLIO:
256 error = alio((int)a1, (aiocb_t **)a2, (int)a3,
257 (struct sigevent *)a4);
258 break;
259 case AIOLIOWAIT:
260 error = aliowait((int)a1, (void *)a2, (int)a3,
261 (struct sigevent *)a4, AIO_64);
262 break;
263 case AIOSUSPEND:
264 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
265 (int)a4, &rval, AIO_64);
266 break;
267 case AIOERROR:
268 error = aioerror((void *)a1, AIO_64);
269 break;
270 case AIOAREAD:
271 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
272 break;
273 case AIOAWRITE:
274 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
275 break;
276 case AIOCANCEL:
277 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
278 break;
281 * The large file related stuff is valid only for
282 * 32 bit kernel and not for 64 bit kernel
283 * On 64 bit kernel we convert large file calls
284 * to regular 64bit calls.
287 default:
288 error = EINVAL;
290 if (error)
291 return ((int64_t)set_errno(error));
292 return (rval);
294 #endif
296 static int
297 kaio(
298 ulong_t *uap,
299 rval_t *rvp)
301 long rval = 0;
302 int error = 0;
303 offset_t off;
306 rvp->r_vals = 0;
307 #if defined(_LITTLE_ENDIAN)
308 off = ((uoff_t)uap[5] << 32) | (uoff_t)uap[4];
309 #else
310 off = ((uoff_t)uap[4] << 32) | (uoff_t)uap[5];
311 #endif
313 switch (uap[0] & ~AIO_POLL_BIT) {
315 * It must be the 32 bit system call on 64 bit kernel
317 case AIOREAD:
318 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
319 (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
320 case AIOWRITE:
321 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
322 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
323 case AIOWAIT:
324 error = aiowait((struct timeval *)uap[1], (int)uap[2],
325 &rval);
326 break;
327 case AIOWAITN:
328 error = aiowaitn((void *)uap[1], (uint_t)uap[2],
329 (uint_t *)uap[3], (timespec_t *)uap[4]);
330 break;
331 case AIONOTIFY:
332 return (aionotify());
333 case AIOINIT:
334 return (aioinit());
335 case AIOSTART:
336 return (aiostart());
337 case AIOLIO:
338 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
339 (void *)uap[4]));
340 case AIOLIOWAIT:
341 return (aliowait((int)uap[1], (void *)uap[2],
342 (int)uap[3], (struct sigevent *)uap[4], AIO_32));
343 case AIOSUSPEND:
344 error = aiosuspend((void *)uap[1], (int)uap[2],
345 (timespec_t *)uap[3], (int)uap[4],
346 &rval, AIO_32);
347 break;
348 case AIOERROR:
349 return (aioerror((void *)uap[1], AIO_32));
350 case AIOAREAD:
351 return (aiorw((int)uap[0], (void *)uap[1],
352 FREAD, AIO_32));
353 case AIOAWRITE:
354 return (aiorw((int)uap[0], (void *)uap[1],
355 FWRITE, AIO_32));
356 case AIOCANCEL:
357 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
358 AIO_32));
359 break;
360 case AIOLIO64:
361 return (alioLF((int)uap[1], (void *)uap[2],
362 (int)uap[3], (void *)uap[4]));
363 case AIOLIOWAIT64:
364 return (aliowait(uap[1], (void *)uap[2],
365 (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
366 case AIOSUSPEND64:
367 error = aiosuspend((void *)uap[1], (int)uap[2],
368 (timespec_t *)uap[3], (int)uap[4], &rval,
369 AIO_LARGEFILE);
370 break;
371 case AIOERROR64:
372 return (aioerror((void *)uap[1], AIO_LARGEFILE));
373 case AIOAREAD64:
374 return (aiorw((int)uap[0], (void *)uap[1], FREAD,
375 AIO_LARGEFILE));
376 case AIOAWRITE64:
377 return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
378 AIO_LARGEFILE));
379 case AIOCANCEL64:
380 error = (aio_cancel((int)uap[1], (void *)uap[2],
381 &rval, AIO_LARGEFILE));
382 break;
383 default:
384 return (EINVAL);
387 rvp->r_val1 = rval;
388 return (error);
392 * wake up LWPs in this process that are sleeping in
393 * aiowait().
395 static int
396 aionotify(void)
398 aio_t *aiop;
400 aiop = curproc->p_aio;
401 if (aiop == NULL)
402 return (0);
404 mutex_enter(&aiop->aio_mutex);
405 aiop->aio_notifycnt++;
406 cv_broadcast(&aiop->aio_waitcv);
407 mutex_exit(&aiop->aio_mutex);
409 return (0);
412 static int
413 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
414 timestruc_t **rqtp, int *blocking)
416 #ifdef _SYSCALL32_IMPL
417 struct timeval32 wait_time_32;
418 #endif
419 struct timeval wait_time;
420 model_t model = get_udatamodel();
422 *rqtp = NULL;
423 if (timout == NULL) { /* wait indefinitely */
424 *blocking = 1;
425 return (0);
429 * Need to correctly compare with the -1 passed in for a user
430 * address pointer, with both 32 bit and 64 bit apps.
432 if (model == DATAMODEL_NATIVE) {
433 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */
434 *blocking = 0;
435 return (0);
438 if (copyin(timout, &wait_time, sizeof (wait_time)))
439 return (EFAULT);
441 #ifdef _SYSCALL32_IMPL
442 else {
444 * -1 from a 32bit app. It will not get sign extended.
445 * don't wait if -1.
447 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
448 *blocking = 0;
449 return (0);
452 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
453 return (EFAULT);
454 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
456 #endif /* _SYSCALL32_IMPL */
458 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */
459 *blocking = 0;
460 return (0);
463 if (wait_time.tv_sec < 0 ||
464 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
465 return (EINVAL);
467 rqtime->tv_sec = wait_time.tv_sec;
468 rqtime->tv_nsec = wait_time.tv_usec * 1000;
469 *rqtp = rqtime;
470 *blocking = 1;
472 return (0);
475 static int
476 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
477 timestruc_t **rqtp, int *blocking)
479 #ifdef _SYSCALL32_IMPL
480 timespec32_t wait_time_32;
481 #endif
482 model_t model = get_udatamodel();
484 *rqtp = NULL;
485 if (timout == NULL) {
486 *blocking = 1;
487 return (0);
490 if (model == DATAMODEL_NATIVE) {
491 if (copyin(timout, rqtime, sizeof (*rqtime)))
492 return (EFAULT);
494 #ifdef _SYSCALL32_IMPL
495 else {
496 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
497 return (EFAULT);
498 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
500 #endif /* _SYSCALL32_IMPL */
502 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
503 *blocking = 0;
504 return (0);
507 if (rqtime->tv_sec < 0 ||
508 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
509 return (EINVAL);
511 *rqtp = rqtime;
512 *blocking = 1;
514 return (0);
517 /*ARGSUSED*/
518 static int
519 aiowait(
520 struct timeval *timout,
521 int dontblockflg,
522 long *rval)
524 int error;
525 aio_t *aiop;
526 aio_req_t *reqp;
527 clock_t status;
528 int blocking;
529 int timecheck;
530 timestruc_t rqtime;
531 timestruc_t *rqtp;
533 aiop = curproc->p_aio;
534 if (aiop == NULL)
535 return (EINVAL);
538 * Establish the absolute future time for the timeout.
540 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
541 if (error)
542 return (error);
543 if (rqtp) {
544 timestruc_t now;
545 timecheck = timechanged;
546 gethrestime(&now);
547 timespecadd(rqtp, &now);
550 mutex_enter(&aiop->aio_mutex);
551 for (;;) {
552 /* process requests on poll queue */
553 if (aiop->aio_pollq) {
554 mutex_exit(&aiop->aio_mutex);
555 aio_cleanup(0);
556 mutex_enter(&aiop->aio_mutex);
558 if ((reqp = aio_req_remove(NULL)) != NULL) {
559 *rval = (long)reqp->aio_req_resultp;
560 break;
562 /* user-level done queue might not be empty */
563 if (aiop->aio_notifycnt > 0) {
564 aiop->aio_notifycnt--;
565 *rval = 1;
566 break;
568 /* don't block if no outstanding aio */
569 if (aiop->aio_outstanding == 0 && dontblockflg) {
570 error = EINVAL;
571 break;
573 if (blocking) {
574 status = cv_waituntil_sig(&aiop->aio_waitcv,
575 &aiop->aio_mutex, rqtp, timecheck);
577 if (status > 0) /* check done queue again */
578 continue;
579 if (status == 0) { /* interrupted by a signal */
580 error = EINTR;
581 *rval = -1;
582 } else { /* timer expired */
583 error = ETIME;
586 break;
588 mutex_exit(&aiop->aio_mutex);
589 if (reqp) {
590 aphysio_unlock(reqp);
591 aio_copyout_result(reqp);
592 mutex_enter(&aiop->aio_mutex);
593 aio_req_free(aiop, reqp);
594 mutex_exit(&aiop->aio_mutex);
596 return (error);
600 * aiowaitn can be used to reap completed asynchronous requests submitted with
601 * lio_listio, aio_read or aio_write.
602 * This function only reaps asynchronous raw I/Os.
605 /*ARGSUSED*/
606 static int
607 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
609 int error = 0;
610 aio_t *aiop;
611 aio_req_t *reqlist = NULL;
612 caddr_t iocblist = NULL; /* array of iocb ptr's */
613 uint_t waitcnt, cnt = 0; /* iocb cnt */
614 size_t iocbsz; /* users iocb size */
615 size_t riocbsz; /* returned iocb size */
616 int iocb_index = 0;
617 model_t model = get_udatamodel();
618 int blocking = 1;
619 int timecheck;
620 timestruc_t rqtime;
621 timestruc_t *rqtp;
623 aiop = curproc->p_aio;
624 if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX)
625 return (EINVAL);
627 if (aiop->aio_outstanding == 0)
628 return (EAGAIN);
630 if (copyin(nwait, &waitcnt, sizeof (uint_t)))
631 return (EFAULT);
633 /* set *nwait to zero, if we must return prematurely */
634 if (copyout(&cnt, nwait, sizeof (uint_t)))
635 return (EFAULT);
637 if (waitcnt == 0) {
638 blocking = 0;
639 rqtp = NULL;
640 waitcnt = nent;
641 } else {
642 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
643 if (error)
644 return (error);
647 if (model == DATAMODEL_NATIVE)
648 iocbsz = (sizeof (aiocb_t *) * nent);
649 #ifdef _SYSCALL32_IMPL
650 else
651 iocbsz = (sizeof (caddr32_t) * nent);
652 #endif /* _SYSCALL32_IMPL */
655 * Only one aio_waitn call is allowed at a time.
656 * The active aio_waitn will collect all requests
657 * out of the "done" list and if necessary it will wait
658 * for some/all pending requests to fulfill the nwait
659 * parameter.
660 * A second or further aio_waitn calls will sleep here
661 * until the active aio_waitn finishes and leaves the kernel
662 * If the second call does not block (poll), then return
663 * immediately with the error code : EAGAIN.
664 * If the second call should block, then sleep here, but
665 * do not touch the timeout. The timeout starts when this
666 * aio_waitn-call becomes active.
669 mutex_enter(&aiop->aio_mutex);
671 while (aiop->aio_flags & AIO_WAITN) {
672 if (blocking == 0) {
673 mutex_exit(&aiop->aio_mutex);
674 return (EAGAIN);
677 /* block, no timeout */
678 aiop->aio_flags |= AIO_WAITN_PENDING;
679 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
680 mutex_exit(&aiop->aio_mutex);
681 return (EINTR);
686 * Establish the absolute future time for the timeout.
688 if (rqtp) {
689 timestruc_t now;
690 timecheck = timechanged;
691 gethrestime(&now);
692 timespecadd(rqtp, &now);
695 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
696 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
697 aiop->aio_iocb = NULL;
700 if (aiop->aio_iocb == NULL) {
701 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
702 if (iocblist == NULL) {
703 mutex_exit(&aiop->aio_mutex);
704 return (ENOMEM);
706 aiop->aio_iocb = (aiocb_t **)iocblist;
707 aiop->aio_iocbsz = iocbsz;
708 } else {
709 iocblist = (char *)aiop->aio_iocb;
712 aiop->aio_waitncnt = waitcnt;
713 aiop->aio_flags |= AIO_WAITN;
715 for (;;) {
716 /* push requests on poll queue to done queue */
717 if (aiop->aio_pollq) {
718 mutex_exit(&aiop->aio_mutex);
719 aio_cleanup(0);
720 mutex_enter(&aiop->aio_mutex);
723 /* check for requests on done queue */
724 if (aiop->aio_doneq) {
725 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
726 aiop->aio_waitncnt = waitcnt - cnt;
729 /* user-level done queue might not be empty */
730 if (aiop->aio_notifycnt > 0) {
731 aiop->aio_notifycnt--;
732 error = 0;
733 break;
737 * if we are here second time as a result of timer
738 * expiration, we reset error if there are enough
739 * aiocb's to satisfy request.
740 * We return also if all requests are already done
741 * and we picked up the whole done queue.
744 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
745 aiop->aio_doneq == NULL)) {
746 error = 0;
747 break;
750 if ((cnt < waitcnt) && blocking) {
751 int rval = cv_waituntil_sig(&aiop->aio_waitcv,
752 &aiop->aio_mutex, rqtp, timecheck);
753 if (rval > 0)
754 continue;
755 if (rval < 0) {
756 error = ETIME;
757 blocking = 0;
758 continue;
760 error = EINTR;
762 break;
765 mutex_exit(&aiop->aio_mutex);
767 if (cnt > 0) {
769 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
770 aiop, model);
772 if (model == DATAMODEL_NATIVE)
773 riocbsz = (sizeof (aiocb_t *) * cnt);
774 #ifdef _SYSCALL32_IMPL
775 else
776 riocbsz = (sizeof (caddr32_t) * cnt);
777 #endif /* _SYSCALL32_IMPL */
779 if (copyout(iocblist, uiocb, riocbsz) ||
780 copyout(&cnt, nwait, sizeof (uint_t)))
781 error = EFAULT;
784 /* check if there is another thread waiting for execution */
785 mutex_enter(&aiop->aio_mutex);
786 aiop->aio_flags &= ~AIO_WAITN;
787 if (aiop->aio_flags & AIO_WAITN_PENDING) {
788 aiop->aio_flags &= ~AIO_WAITN_PENDING;
789 cv_signal(&aiop->aio_waitncv);
791 mutex_exit(&aiop->aio_mutex);
793 return (error);
797 * aio_unlock_requests
798 * copyouts the result of the request as well as the return value.
799 * It builds the list of completed asynchronous requests,
800 * unlocks the allocated memory ranges and
801 * put the aio request structure back into the free list.
804 static int
805 aio_unlock_requests(
806 caddr_t iocblist,
807 int iocb_index,
808 aio_req_t *reqlist,
809 aio_t *aiop,
810 model_t model)
812 aio_req_t *reqp, *nreqp;
814 if (model == DATAMODEL_NATIVE) {
815 for (reqp = reqlist; reqp != NULL; reqp = nreqp) {
816 (((caddr_t *)iocblist)[iocb_index++]) =
817 reqp->aio_req_iocb.iocb;
818 nreqp = reqp->aio_req_next;
819 aphysio_unlock(reqp);
820 aio_copyout_result(reqp);
821 mutex_enter(&aiop->aio_mutex);
822 aio_req_free(aiop, reqp);
823 mutex_exit(&aiop->aio_mutex);
826 #ifdef _SYSCALL32_IMPL
827 else {
828 for (reqp = reqlist; reqp != NULL; reqp = nreqp) {
829 ((caddr32_t *)iocblist)[iocb_index++] =
830 reqp->aio_req_iocb.iocb32;
831 nreqp = reqp->aio_req_next;
832 aphysio_unlock(reqp);
833 aio_copyout_result(reqp);
834 mutex_enter(&aiop->aio_mutex);
835 aio_req_free(aiop, reqp);
836 mutex_exit(&aiop->aio_mutex);
839 #endif /* _SYSCALL32_IMPL */
840 return (iocb_index);
844 * aio_reqlist_concat
845 * moves "max" elements from the done queue to the reqlist queue and removes
846 * the AIO_DONEQ flag.
847 * - reqlist queue is a simple linked list
848 * - done queue is a double linked list
851 static int
852 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
854 aio_req_t *q2, *q2work, *list;
855 int count = 0;
857 list = *reqlist;
858 q2 = aiop->aio_doneq;
859 q2work = q2;
860 while (max-- > 0) {
861 q2work->aio_req_flags &= ~AIO_DONEQ;
862 q2work = q2work->aio_req_next;
863 count++;
864 if (q2work == q2)
865 break;
868 if (q2work == q2) {
869 /* all elements revised */
870 q2->aio_req_prev->aio_req_next = list;
871 list = q2;
872 aiop->aio_doneq = NULL;
873 } else {
875 * max < elements in the doneq
876 * detach only the required amount of elements
877 * out of the doneq
879 q2work->aio_req_prev->aio_req_next = list;
880 list = q2;
882 aiop->aio_doneq = q2work;
883 q2work->aio_req_prev = q2->aio_req_prev;
884 q2->aio_req_prev->aio_req_next = q2work;
886 *reqlist = list;
887 return (count);
890 /*ARGSUSED*/
891 static int
892 aiosuspend(
893 void *aiocb,
894 int nent,
895 struct timespec *timout,
896 int flag,
897 long *rval,
898 int run_mode)
900 int error;
901 aio_t *aiop;
902 aio_req_t *reqp, *found, *next;
903 caddr_t cbplist = NULL;
904 aiocb_t *cbp, **ucbp;
905 #ifdef _SYSCALL32_IMPL
906 aiocb32_t *cbp32;
907 caddr32_t *ucbp32;
908 #endif /* _SYSCALL32_IMPL */
909 aiocb64_32_t *cbp64;
910 int rv;
911 int i;
912 size_t ssize;
913 model_t model = get_udatamodel();
914 int blocking;
915 int timecheck;
916 timestruc_t rqtime;
917 timestruc_t *rqtp;
919 aiop = curproc->p_aio;
920 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
921 return (EINVAL);
924 * Establish the absolute future time for the timeout.
926 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
927 if (error)
928 return (error);
929 if (rqtp) {
930 timestruc_t now;
931 timecheck = timechanged;
932 gethrestime(&now);
933 timespecadd(rqtp, &now);
937 * If we are not blocking and there's no IO complete
938 * skip aiocb copyin.
940 if (!blocking && (aiop->aio_pollq == NULL) &&
941 (aiop->aio_doneq == NULL)) {
942 return (EAGAIN);
945 if (model == DATAMODEL_NATIVE)
946 ssize = (sizeof (aiocb_t *) * nent);
947 #ifdef _SYSCALL32_IMPL
948 else
949 ssize = (sizeof (caddr32_t) * nent);
950 #endif /* _SYSCALL32_IMPL */
952 cbplist = kmem_alloc(ssize, KM_NOSLEEP);
953 if (cbplist == NULL)
954 return (ENOMEM);
956 if (copyin(aiocb, cbplist, ssize)) {
957 error = EFAULT;
958 goto done;
961 found = NULL;
963 * we need to get the aio_cleanupq_mutex since we call
964 * aio_req_done().
966 mutex_enter(&aiop->aio_cleanupq_mutex);
967 mutex_enter(&aiop->aio_mutex);
968 for (;;) {
969 /* push requests on poll queue to done queue */
970 if (aiop->aio_pollq) {
971 mutex_exit(&aiop->aio_mutex);
972 mutex_exit(&aiop->aio_cleanupq_mutex);
973 aio_cleanup(0);
974 mutex_enter(&aiop->aio_cleanupq_mutex);
975 mutex_enter(&aiop->aio_mutex);
977 /* check for requests on done queue */
978 if (aiop->aio_doneq) {
979 if (model == DATAMODEL_NATIVE)
980 ucbp = (aiocb_t **)cbplist;
981 #ifdef _SYSCALL32_IMPL
982 else
983 ucbp32 = (caddr32_t *)cbplist;
984 #endif /* _SYSCALL32_IMPL */
985 for (i = 0; i < nent; i++) {
986 if (model == DATAMODEL_NATIVE) {
987 if ((cbp = *ucbp++) == NULL)
988 continue;
989 if (run_mode != AIO_LARGEFILE)
990 reqp = aio_req_done(
991 &cbp->aio_resultp);
992 else {
993 cbp64 = (aiocb64_32_t *)cbp;
994 reqp = aio_req_done(
995 &cbp64->aio_resultp);
998 #ifdef _SYSCALL32_IMPL
999 else {
1000 if (run_mode == AIO_32) {
1001 if ((cbp32 =
1002 (aiocb32_t *)(uintptr_t)
1003 *ucbp32++) == NULL)
1004 continue;
1005 reqp = aio_req_done(
1006 &cbp32->aio_resultp);
1007 } else if (run_mode == AIO_LARGEFILE) {
1008 if ((cbp64 =
1009 (aiocb64_32_t *)(uintptr_t)
1010 *ucbp32++) == NULL)
1011 continue;
1012 reqp = aio_req_done(
1013 &cbp64->aio_resultp);
1017 #endif /* _SYSCALL32_IMPL */
1018 if (reqp) {
1019 reqp->aio_req_next = found;
1020 found = reqp;
1022 if (aiop->aio_doneq == NULL)
1023 break;
1025 if (found)
1026 break;
1028 if (aiop->aio_notifycnt > 0) {
1030 * nothing on the kernel's queue. the user
1031 * has notified the kernel that it has items
1032 * on a user-level queue.
1034 aiop->aio_notifycnt--;
1035 *rval = 1;
1036 error = 0;
1037 break;
1039 /* don't block if nothing is outstanding */
1040 if (aiop->aio_outstanding == 0) {
1041 error = EAGAIN;
1042 break;
1044 if (blocking) {
1046 * drop the aio_cleanupq_mutex as we are
1047 * going to block.
1049 mutex_exit(&aiop->aio_cleanupq_mutex);
1050 rv = cv_waituntil_sig(&aiop->aio_waitcv,
1051 &aiop->aio_mutex, rqtp, timecheck);
1053 * we have to drop aio_mutex and
1054 * grab it in the right order.
1056 mutex_exit(&aiop->aio_mutex);
1057 mutex_enter(&aiop->aio_cleanupq_mutex);
1058 mutex_enter(&aiop->aio_mutex);
1059 if (rv > 0) /* check done queue again */
1060 continue;
1061 if (rv == 0) /* interrupted by a signal */
1062 error = EINTR;
1063 else /* timer expired */
1064 error = ETIME;
1065 } else {
1066 error = EAGAIN;
1068 break;
1070 mutex_exit(&aiop->aio_mutex);
1071 mutex_exit(&aiop->aio_cleanupq_mutex);
1072 for (reqp = found; reqp != NULL; reqp = next) {
1073 next = reqp->aio_req_next;
1074 aphysio_unlock(reqp);
1075 aio_copyout_result(reqp);
1076 mutex_enter(&aiop->aio_mutex);
1077 aio_req_free(aiop, reqp);
1078 mutex_exit(&aiop->aio_mutex);
1080 done:
1081 kmem_free(cbplist, ssize);
1082 return (error);
1086 * initialize aio by allocating an aio_t struct for this
1087 * process.
1089 static int
1090 aioinit(void)
1092 proc_t *p = curproc;
1093 aio_t *aiop;
1094 mutex_enter(&p->p_lock);
1095 if ((aiop = p->p_aio) == NULL) {
1096 aiop = aio_aiop_alloc();
1097 p->p_aio = aiop;
1099 mutex_exit(&p->p_lock);
1100 if (aiop == NULL)
1101 return (ENOMEM);
1102 return (0);
1106 * start a special thread that will cleanup after aio requests
1107 * that are preventing a segment from being unmapped. as_unmap()
1108 * blocks until all phsyio to this segment is completed. this
1109 * doesn't happen until all the pages in this segment are not
1110 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1111 * requests still outstanding. this special thread will make sure
1112 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1114 * this function will return an error if the process has only
1115 * one LWP. the assumption is that the caller is a separate LWP
1116 * that remains blocked in the kernel for the life of this process.
1118 static int
1119 aiostart(void)
1121 proc_t *p = curproc;
1122 aio_t *aiop;
1123 int first, error = 0;
1125 if (p->p_lwpcnt == 1)
1126 return (EDEADLK);
1127 mutex_enter(&p->p_lock);
1128 if ((aiop = p->p_aio) == NULL)
1129 error = EINVAL;
1130 else {
1131 first = aiop->aio_ok;
1132 if (aiop->aio_ok == 0)
1133 aiop->aio_ok = 1;
1135 mutex_exit(&p->p_lock);
1136 if (error == 0 && first == 0) {
1137 return (aio_cleanup_thread(aiop));
1138 /* should return only to exit */
1140 return (error);
1144 * Associate an aiocb with a port.
1145 * This function is used by aiorw() to associate a transaction with a port.
1146 * Allocate an event port structure (port_alloc_event()) and store the
1147 * delivered user pointer (portnfy_user) in the portkev_user field of the
1148 * port_kevent_t structure..
1149 * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1150 * the port association.
1153 static int
1154 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1155 aio_req_t *reqp, int event)
1157 port_kevent_t *pkevp = NULL;
1158 int error;
1160 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1161 PORT_SOURCE_AIO, &pkevp);
1162 if (error) {
1163 if ((error == ENOMEM) || (error == EAGAIN))
1164 error = EAGAIN;
1165 else
1166 error = EINVAL;
1167 } else {
1168 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1169 aio_port_callback, reqp);
1170 pkevp->portkev_events = event;
1171 reqp->aio_req_portkev = pkevp;
1172 reqp->aio_req_port = pntfy->portnfy_port;
1174 return (error);
1177 #ifdef _LP64
1180 * Asynchronous list IO. A chain of aiocb's are copied in
1181 * one at a time. If the aiocb is invalid, it is skipped.
1182 * For each aiocb, the appropriate driver entry point is
1183 * called. Optimize for the common case where the list
1184 * of requests is to the same file descriptor.
1186 * One possible optimization is to define a new driver entry
1187 * point that supports a list of IO requests. Whether this
1188 * improves performance depends somewhat on the driver's
1189 * locking strategy. Processing a list could adversely impact
1190 * the driver's interrupt latency.
1192 static int
1193 alio(
1194 int mode_arg,
1195 aiocb_t **aiocb_arg,
1196 int nent,
1197 struct sigevent *sigev)
1199 file_t *fp;
1200 file_t *prev_fp = NULL;
1201 int prev_mode = -1;
1202 struct vnode *vp;
1203 aio_lio_t *head;
1204 aio_req_t *reqp;
1205 aio_t *aiop;
1206 caddr_t cbplist;
1207 aiocb_t cb;
1208 aiocb_t *aiocb = &cb;
1209 aiocb_t *cbp;
1210 aiocb_t **ucbp;
1211 struct sigevent sigevk;
1212 sigqueue_t *sqp;
1213 int (*aio_func)();
1214 int mode;
1215 int error = 0;
1216 int aio_errors = 0;
1217 int i;
1218 size_t ssize;
1219 int deadhead = 0;
1220 int aio_notsupported = 0;
1221 int lio_head_port;
1222 int aio_port;
1223 int aio_thread;
1224 port_kevent_t *pkevtp = NULL;
1225 int portused = 0;
1226 port_notify_t pnotify;
1227 int event;
1229 aiop = curproc->p_aio;
1230 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1231 return (EINVAL);
1233 ssize = (sizeof (aiocb_t *) * nent);
1234 cbplist = kmem_alloc(ssize, KM_SLEEP);
1235 ucbp = (aiocb_t **)cbplist;
1237 if (copyin(aiocb_arg, cbplist, ssize) ||
1238 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1239 kmem_free(cbplist, ssize);
1240 return (EFAULT);
1243 /* Event Ports */
1244 if (sigev &&
1245 (sigevk.sigev_notify == SIGEV_THREAD ||
1246 sigevk.sigev_notify == SIGEV_PORT)) {
1247 if (sigevk.sigev_notify == SIGEV_THREAD) {
1248 pnotify.portnfy_port = sigevk.sigev_signo;
1249 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1250 } else if (copyin(sigevk.sigev_value.sival_ptr,
1251 &pnotify, sizeof (pnotify))) {
1252 kmem_free(cbplist, ssize);
1253 return (EFAULT);
1255 error = port_alloc_event(pnotify.portnfy_port,
1256 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1257 if (error) {
1258 if (error == ENOMEM || error == EAGAIN)
1259 error = EAGAIN;
1260 else
1261 error = EINVAL;
1262 kmem_free(cbplist, ssize);
1263 return (error);
1265 lio_head_port = pnotify.portnfy_port;
1266 portused = 1;
1270 * a list head should be allocated if notification is
1271 * enabled for this list.
1273 head = NULL;
1275 if (mode_arg == LIO_WAIT || sigev) {
1276 mutex_enter(&aiop->aio_mutex);
1277 error = aio_lio_alloc(&head);
1278 mutex_exit(&aiop->aio_mutex);
1279 if (error)
1280 goto done;
1281 deadhead = 1;
1282 head->lio_nent = nent;
1283 head->lio_refcnt = nent;
1284 head->lio_port = -1;
1285 head->lio_portkev = NULL;
1286 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1287 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1288 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1289 if (sqp == NULL) {
1290 error = EAGAIN;
1291 goto done;
1293 sqp->sq_func = NULL;
1294 sqp->sq_next = NULL;
1295 sqp->sq_info.si_code = SI_ASYNCIO;
1296 sqp->sq_info.si_pid = curproc->p_pid;
1297 sqp->sq_info.si_ctid = PRCTID(curproc);
1298 sqp->sq_info.si_zoneid = getzoneid();
1299 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1300 sqp->sq_info.si_signo = sigevk.sigev_signo;
1301 sqp->sq_info.si_value = sigevk.sigev_value;
1302 head->lio_sigqp = sqp;
1303 } else {
1304 head->lio_sigqp = NULL;
1306 if (pkevtp) {
1308 * Prepare data to send when list of aiocb's
1309 * has completed.
1311 port_init_event(pkevtp, (uintptr_t)sigev,
1312 (void *)(uintptr_t)pnotify.portnfy_user,
1313 NULL, head);
1314 pkevtp->portkev_events = AIOLIO;
1315 head->lio_portkev = pkevtp;
1316 head->lio_port = pnotify.portnfy_port;
1320 for (i = 0; i < nent; i++, ucbp++) {
1322 cbp = *ucbp;
1323 /* skip entry if it can't be copied. */
1324 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1325 if (head) {
1326 mutex_enter(&aiop->aio_mutex);
1327 head->lio_nent--;
1328 head->lio_refcnt--;
1329 mutex_exit(&aiop->aio_mutex);
1331 continue;
1334 /* skip if opcode for aiocb is LIO_NOP */
1335 mode = aiocb->aio_lio_opcode;
1336 if (mode == LIO_NOP) {
1337 cbp = NULL;
1338 if (head) {
1339 mutex_enter(&aiop->aio_mutex);
1340 head->lio_nent--;
1341 head->lio_refcnt--;
1342 mutex_exit(&aiop->aio_mutex);
1344 continue;
1347 /* increment file descriptor's ref count. */
1348 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1349 lio_set_uerror(&cbp->aio_resultp, EBADF);
1350 if (head) {
1351 mutex_enter(&aiop->aio_mutex);
1352 head->lio_nent--;
1353 head->lio_refcnt--;
1354 mutex_exit(&aiop->aio_mutex);
1356 aio_errors++;
1357 continue;
1361 * check the permission of the partition
1363 if ((fp->f_flag & mode) == 0) {
1364 releasef(aiocb->aio_fildes);
1365 lio_set_uerror(&cbp->aio_resultp, EBADF);
1366 if (head) {
1367 mutex_enter(&aiop->aio_mutex);
1368 head->lio_nent--;
1369 head->lio_refcnt--;
1370 mutex_exit(&aiop->aio_mutex);
1372 aio_errors++;
1373 continue;
1377 * common case where requests are to the same fd
1378 * for the same r/w operation.
1379 * for UFS, need to set EBADFD
1381 vp = fp->f_vnode;
1382 if (fp != prev_fp || mode != prev_mode) {
1383 aio_func = check_vp(vp, mode);
1384 if (aio_func == NULL) {
1385 prev_fp = NULL;
1386 releasef(aiocb->aio_fildes);
1387 lio_set_uerror(&cbp->aio_resultp, EBADFD);
1388 aio_notsupported++;
1389 if (head) {
1390 mutex_enter(&aiop->aio_mutex);
1391 head->lio_nent--;
1392 head->lio_refcnt--;
1393 mutex_exit(&aiop->aio_mutex);
1395 continue;
1396 } else {
1397 prev_fp = fp;
1398 prev_mode = mode;
1402 error = aio_req_setup(&reqp, aiop, aiocb,
1403 &cbp->aio_resultp, vp, 0);
1404 if (error) {
1405 releasef(aiocb->aio_fildes);
1406 lio_set_uerror(&cbp->aio_resultp, error);
1407 if (head) {
1408 mutex_enter(&aiop->aio_mutex);
1409 head->lio_nent--;
1410 head->lio_refcnt--;
1411 mutex_exit(&aiop->aio_mutex);
1413 aio_errors++;
1414 continue;
1417 reqp->aio_req_lio = head;
1418 deadhead = 0;
1421 * Set the errno field now before sending the request to
1422 * the driver to avoid a race condition
1424 (void) suword32(&cbp->aio_resultp.aio_errno,
1425 EINPROGRESS);
1427 reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1429 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1430 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1431 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1432 if (aio_port | aio_thread) {
1433 port_kevent_t *lpkevp;
1435 * Prepare data to send with each aiocb completed.
1437 if (aio_port) {
1438 void *paddr =
1439 aiocb->aio_sigevent.sigev_value.sival_ptr;
1440 if (copyin(paddr, &pnotify, sizeof (pnotify)))
1441 error = EFAULT;
1442 } else { /* aio_thread */
1443 pnotify.portnfy_port =
1444 aiocb->aio_sigevent.sigev_signo;
1445 pnotify.portnfy_user =
1446 aiocb->aio_sigevent.sigev_value.sival_ptr;
1448 if (error)
1449 /* EMPTY */;
1450 else if (pkevtp != NULL &&
1451 pnotify.portnfy_port == lio_head_port)
1452 error = port_dup_event(pkevtp, &lpkevp,
1453 PORT_ALLOC_DEFAULT);
1454 else
1455 error = port_alloc_event(pnotify.portnfy_port,
1456 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1457 &lpkevp);
1458 if (error == 0) {
1459 port_init_event(lpkevp, (uintptr_t)cbp,
1460 (void *)(uintptr_t)pnotify.portnfy_user,
1461 aio_port_callback, reqp);
1462 lpkevp->portkev_events = event;
1463 reqp->aio_req_portkev = lpkevp;
1464 reqp->aio_req_port = pnotify.portnfy_port;
1469 * send the request to driver.
1471 if (error == 0) {
1472 if (aiocb->aio_nbytes == 0) {
1473 clear_active_fd(aiocb->aio_fildes);
1474 aio_zerolen(reqp);
1475 continue;
1477 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1478 CRED());
1482 * the fd's ref count is not decremented until the IO has
1483 * completed unless there was an error.
1485 if (error) {
1486 releasef(aiocb->aio_fildes);
1487 lio_set_uerror(&cbp->aio_resultp, error);
1488 if (head) {
1489 mutex_enter(&aiop->aio_mutex);
1490 head->lio_nent--;
1491 head->lio_refcnt--;
1492 mutex_exit(&aiop->aio_mutex);
1494 if (error == ENOTSUP)
1495 aio_notsupported++;
1496 else
1497 aio_errors++;
1498 lio_set_error(reqp, portused);
1499 } else {
1500 clear_active_fd(aiocb->aio_fildes);
1504 if (aio_notsupported) {
1505 error = ENOTSUP;
1506 } else if (aio_errors) {
1508 * return EIO if any request failed
1510 error = EIO;
1513 if (mode_arg == LIO_WAIT) {
1514 mutex_enter(&aiop->aio_mutex);
1515 while (head->lio_refcnt > 0) {
1516 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1517 mutex_exit(&aiop->aio_mutex);
1518 error = EINTR;
1519 goto done;
1522 mutex_exit(&aiop->aio_mutex);
1523 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1526 done:
1527 kmem_free(cbplist, ssize);
1528 if (deadhead) {
1529 if (head->lio_sigqp)
1530 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1531 if (head->lio_portkev)
1532 port_free_event(head->lio_portkev);
1533 kmem_free(head, sizeof (aio_lio_t));
1535 return (error);
1538 #endif /* _LP64 */
1541 * Asynchronous list IO.
1542 * If list I/O is called with LIO_WAIT it can still return
1543 * before all the I/O's are completed if a signal is caught
1544 * or if the list include UFS I/O requests. If this happens,
1545 * libaio will call aliowait() to wait for the I/O's to
1546 * complete
1548 /*ARGSUSED*/
1549 static int
1550 aliowait(
1551 int mode,
1552 void *aiocb,
1553 int nent,
1554 void *sigev,
1555 int run_mode)
1557 aio_lio_t *head;
1558 aio_t *aiop;
1559 caddr_t cbplist;
1560 aiocb_t *cbp, **ucbp;
1561 #ifdef _SYSCALL32_IMPL
1562 aiocb32_t *cbp32;
1563 caddr32_t *ucbp32;
1564 aiocb64_32_t *cbp64;
1565 #endif
1566 int error = 0;
1567 int i;
1568 size_t ssize = 0;
1569 model_t model = get_udatamodel();
1571 aiop = curproc->p_aio;
1572 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1573 return (EINVAL);
1575 if (model == DATAMODEL_NATIVE)
1576 ssize = (sizeof (aiocb_t *) * nent);
1577 #ifdef _SYSCALL32_IMPL
1578 else
1579 ssize = (sizeof (caddr32_t) * nent);
1580 #endif /* _SYSCALL32_IMPL */
1582 if (ssize == 0)
1583 return (EINVAL);
1585 cbplist = kmem_alloc(ssize, KM_SLEEP);
1587 if (model == DATAMODEL_NATIVE)
1588 ucbp = (aiocb_t **)cbplist;
1589 #ifdef _SYSCALL32_IMPL
1590 else
1591 ucbp32 = (caddr32_t *)cbplist;
1592 #endif /* _SYSCALL32_IMPL */
1594 if (copyin(aiocb, cbplist, ssize)) {
1595 error = EFAULT;
1596 goto done;
1600 * To find the list head, we go through the
1601 * list of aiocb structs, find the request
1602 * its for, then get the list head that reqp
1603 * points to
1605 head = NULL;
1607 for (i = 0; i < nent; i++) {
1608 if (model == DATAMODEL_NATIVE) {
1610 * Since we are only checking for a NULL pointer
1611 * Following should work on both native data sizes
1612 * as well as for largefile aiocb.
1614 if ((cbp = *ucbp++) == NULL)
1615 continue;
1616 if (run_mode != AIO_LARGEFILE)
1617 if (head = aio_list_get(&cbp->aio_resultp))
1618 break;
1619 else {
1621 * This is a case when largefile call is
1622 * made on 32 bit kernel.
1623 * Treat each pointer as pointer to
1624 * aiocb64_32
1626 if (head = aio_list_get((aio_result_t *)
1627 &(((aiocb64_32_t *)cbp)->aio_resultp)))
1628 break;
1631 #ifdef _SYSCALL32_IMPL
1632 else {
1633 if (run_mode == AIO_LARGEFILE) {
1634 if ((cbp64 = (aiocb64_32_t *)
1635 (uintptr_t)*ucbp32++) == NULL)
1636 continue;
1637 if (head = aio_list_get((aio_result_t *)
1638 &cbp64->aio_resultp))
1639 break;
1640 } else if (run_mode == AIO_32) {
1641 if ((cbp32 = (aiocb32_t *)
1642 (uintptr_t)*ucbp32++) == NULL)
1643 continue;
1644 if (head = aio_list_get((aio_result_t *)
1645 &cbp32->aio_resultp))
1646 break;
1649 #endif /* _SYSCALL32_IMPL */
1652 if (head == NULL) {
1653 error = EINVAL;
1654 goto done;
1657 mutex_enter(&aiop->aio_mutex);
1658 while (head->lio_refcnt > 0) {
1659 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1660 mutex_exit(&aiop->aio_mutex);
1661 error = EINTR;
1662 goto done;
1665 mutex_exit(&aiop->aio_mutex);
1666 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1667 done:
1668 kmem_free(cbplist, ssize);
1669 return (error);
1672 aio_lio_t *
1673 aio_list_get(aio_result_t *resultp)
1675 aio_lio_t *head = NULL;
1676 aio_t *aiop;
1677 aio_req_t **bucket;
1678 aio_req_t *reqp;
1679 long index;
1681 aiop = curproc->p_aio;
1682 if (aiop == NULL)
1683 return (NULL);
1685 if (resultp) {
1686 index = AIO_HASH(resultp);
1687 bucket = &aiop->aio_hash[index];
1688 for (reqp = *bucket; reqp != NULL;
1689 reqp = reqp->aio_hash_next) {
1690 if (reqp->aio_req_resultp == resultp) {
1691 head = reqp->aio_req_lio;
1692 return (head);
1696 return (NULL);
1700 static void
1701 lio_set_uerror(void *resultp, int error)
1704 * the resultp field is a pointer to where the
1705 * error should be written out to the user's
1706 * aiocb.
1709 if (get_udatamodel() == DATAMODEL_NATIVE) {
1710 (void) sulword(&((aio_result_t *)resultp)->aio_return,
1711 (ssize_t)-1);
1712 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1714 #ifdef _SYSCALL32_IMPL
1715 else {
1716 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1717 (uint_t)-1);
1718 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1720 #endif /* _SYSCALL32_IMPL */
1724 * do cleanup completion for all requests in list. memory for
1725 * each request is also freed.
1727 static void
1728 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1730 int i;
1731 aio_req_t *reqp;
1732 aio_result_t *resultp;
1733 aiocb64_32_t *aiocb_64;
1735 for (i = 0; i < nent; i++) {
1736 if (get_udatamodel() == DATAMODEL_NATIVE) {
1737 if (cbp[i] == NULL)
1738 continue;
1739 if (run_mode == AIO_LARGEFILE) {
1740 aiocb_64 = (aiocb64_32_t *)cbp[i];
1741 resultp = (aio_result_t *)
1742 &aiocb_64->aio_resultp;
1743 } else
1744 resultp = &cbp[i]->aio_resultp;
1746 #ifdef _SYSCALL32_IMPL
1747 else {
1748 aiocb32_t *aiocb_32;
1749 caddr32_t *cbp32;
1751 cbp32 = (caddr32_t *)cbp;
1752 if (cbp32[i] == (uintptr_t)NULL)
1753 continue;
1754 if (run_mode == AIO_32) {
1755 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1756 resultp = (aio_result_t *)&aiocb_32->
1757 aio_resultp;
1758 } else if (run_mode == AIO_LARGEFILE) {
1759 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1760 resultp = (aio_result_t *)&aiocb_64->
1761 aio_resultp;
1764 #endif /* _SYSCALL32_IMPL */
1766 * we need to get the aio_cleanupq_mutex since we call
1767 * aio_req_done().
1769 mutex_enter(&aiop->aio_cleanupq_mutex);
1770 mutex_enter(&aiop->aio_mutex);
1771 reqp = aio_req_done(resultp);
1772 mutex_exit(&aiop->aio_mutex);
1773 mutex_exit(&aiop->aio_cleanupq_mutex);
1774 if (reqp != NULL) {
1775 aphysio_unlock(reqp);
1776 aio_copyout_result(reqp);
1777 mutex_enter(&aiop->aio_mutex);
1778 aio_req_free(aiop, reqp);
1779 mutex_exit(&aiop->aio_mutex);
1785 * Write out the results for an aio request that is done.
1787 static int
1788 aioerror(void *cb, int run_mode)
1790 aio_result_t *resultp;
1791 aio_t *aiop;
1792 aio_req_t *reqp;
1793 int retval;
1795 aiop = curproc->p_aio;
1796 if (aiop == NULL || cb == NULL)
1797 return (EINVAL);
1799 if (get_udatamodel() == DATAMODEL_NATIVE) {
1800 if (run_mode == AIO_LARGEFILE)
1801 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1802 aio_resultp;
1803 else
1804 resultp = &((aiocb_t *)cb)->aio_resultp;
1806 #ifdef _SYSCALL32_IMPL
1807 else {
1808 if (run_mode == AIO_LARGEFILE)
1809 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1810 aio_resultp;
1811 else if (run_mode == AIO_32)
1812 resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1813 aio_resultp;
1815 #endif /* _SYSCALL32_IMPL */
1817 * we need to get the aio_cleanupq_mutex since we call
1818 * aio_req_find().
1820 mutex_enter(&aiop->aio_cleanupq_mutex);
1821 mutex_enter(&aiop->aio_mutex);
1822 retval = aio_req_find(resultp, &reqp);
1823 mutex_exit(&aiop->aio_mutex);
1824 mutex_exit(&aiop->aio_cleanupq_mutex);
1825 if (retval == 0) {
1826 aphysio_unlock(reqp);
1827 aio_copyout_result(reqp);
1828 mutex_enter(&aiop->aio_mutex);
1829 aio_req_free(aiop, reqp);
1830 mutex_exit(&aiop->aio_mutex);
1831 return (0);
1832 } else if (retval == 1)
1833 return (EINPROGRESS);
1834 else if (retval == 2)
1835 return (EINVAL);
1836 return (0);
1840 * aio_cancel - if no requests outstanding,
1841 * return AIO_ALLDONE
1842 * else
1843 * return AIO_NOTCANCELED
1845 static int
1846 aio_cancel(
1847 int fildes,
1848 void *cb,
1849 long *rval,
1850 int run_mode)
1852 aio_t *aiop;
1853 void *resultp;
1854 int index;
1855 aio_req_t **bucket;
1856 aio_req_t *ent;
1860 * Verify valid file descriptor
1862 if ((getf(fildes)) == NULL) {
1863 return (EBADF);
1865 releasef(fildes);
1867 aiop = curproc->p_aio;
1868 if (aiop == NULL)
1869 return (EINVAL);
1871 if (aiop->aio_outstanding == 0) {
1872 *rval = AIO_ALLDONE;
1873 return (0);
1876 mutex_enter(&aiop->aio_mutex);
1877 if (cb != NULL) {
1878 if (get_udatamodel() == DATAMODEL_NATIVE) {
1879 if (run_mode == AIO_LARGEFILE)
1880 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1881 ->aio_resultp;
1882 else
1883 resultp = &((aiocb_t *)cb)->aio_resultp;
1885 #ifdef _SYSCALL32_IMPL
1886 else {
1887 if (run_mode == AIO_LARGEFILE)
1888 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1889 ->aio_resultp;
1890 else if (run_mode == AIO_32)
1891 resultp = (aio_result_t *)&((aiocb32_t *)cb)
1892 ->aio_resultp;
1894 #endif /* _SYSCALL32_IMPL */
1895 index = AIO_HASH(resultp);
1896 bucket = &aiop->aio_hash[index];
1897 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1898 if (ent->aio_req_resultp == resultp) {
1899 if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1900 mutex_exit(&aiop->aio_mutex);
1901 *rval = AIO_ALLDONE;
1902 return (0);
1904 mutex_exit(&aiop->aio_mutex);
1905 *rval = AIO_NOTCANCELED;
1906 return (0);
1909 mutex_exit(&aiop->aio_mutex);
1910 *rval = AIO_ALLDONE;
1911 return (0);
1914 for (index = 0; index < AIO_HASHSZ; index++) {
1915 bucket = &aiop->aio_hash[index];
1916 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1917 if (ent->aio_req_fd == fildes) {
1918 if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1919 mutex_exit(&aiop->aio_mutex);
1920 *rval = AIO_NOTCANCELED;
1921 return (0);
1926 mutex_exit(&aiop->aio_mutex);
1927 *rval = AIO_ALLDONE;
1928 return (0);
1932 * solaris version of asynchronous read and write
1934 static int
1935 arw(
1936 int opcode,
1937 int fdes,
1938 char *bufp,
1939 int bufsize,
1940 offset_t offset,
1941 aio_result_t *resultp,
1942 int mode)
1944 file_t *fp;
1945 int error;
1946 struct vnode *vp;
1947 aio_req_t *reqp;
1948 aio_t *aiop;
1949 int (*aio_func)();
1950 #ifdef _LP64
1951 aiocb_t aiocb;
1952 #else
1953 aiocb64_32_t aiocb64;
1954 #endif
1956 aiop = curproc->p_aio;
1957 if (aiop == NULL)
1958 return (EINVAL);
1960 if ((fp = getf(fdes)) == NULL) {
1961 return (EBADF);
1965 * check the permission of the partition
1967 if ((fp->f_flag & mode) == 0) {
1968 releasef(fdes);
1969 return (EBADF);
1972 vp = fp->f_vnode;
1973 aio_func = check_vp(vp, mode);
1974 if (aio_func == NULL) {
1975 releasef(fdes);
1976 return (EBADFD);
1978 #ifdef _LP64
1979 aiocb.aio_fildes = fdes;
1980 aiocb.aio_buf = bufp;
1981 aiocb.aio_nbytes = bufsize;
1982 aiocb.aio_offset = offset;
1983 aiocb.aio_sigevent.sigev_notify = 0;
1984 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1);
1985 #else
1986 aiocb64.aio_fildes = fdes;
1987 aiocb64.aio_buf = (caddr32_t)bufp;
1988 aiocb64.aio_nbytes = bufsize;
1989 aiocb64.aio_offset = offset;
1990 aiocb64.aio_sigevent.sigev_notify = 0;
1991 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1);
1992 #endif
1993 if (error) {
1994 releasef(fdes);
1995 return (error);
1999 * enable polling on this request if the opcode has
2000 * the AIO poll bit set
2002 if (opcode & AIO_POLL_BIT)
2003 reqp->aio_req_flags |= AIO_POLL;
2005 if (bufsize == 0) {
2006 clear_active_fd(fdes);
2007 aio_zerolen(reqp);
2008 return (0);
2011 * send the request to driver.
2013 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2015 * the fd is stored in the aio_req_t by aio_req_setup(), and
2016 * is released by the aio_cleanup_thread() when the IO has
2017 * completed.
2019 if (error) {
2020 releasef(fdes);
2021 mutex_enter(&aiop->aio_mutex);
2022 aio_req_free(aiop, reqp);
2023 aiop->aio_pending--;
2024 if (aiop->aio_flags & AIO_REQ_BLOCK)
2025 cv_signal(&aiop->aio_cleanupcv);
2026 mutex_exit(&aiop->aio_mutex);
2027 return (error);
2029 clear_active_fd(fdes);
2030 return (0);
2034 * posix version of asynchronous read and write
2036 static int
2037 aiorw(
2038 int opcode,
2039 void *aiocb_arg,
2040 int mode,
2041 int run_mode)
2043 #ifdef _SYSCALL32_IMPL
2044 aiocb32_t aiocb32;
2045 struct sigevent32 *sigev32;
2046 port_notify32_t pntfy32;
2047 #endif
2048 aiocb64_32_t aiocb64;
2049 aiocb_t aiocb;
2050 file_t *fp;
2051 int error, fd;
2052 size_t bufsize;
2053 struct vnode *vp;
2054 aio_req_t *reqp;
2055 aio_t *aiop;
2056 int (*aio_func)();
2057 aio_result_t *resultp;
2058 struct sigevent *sigev;
2059 model_t model;
2060 int aio_use_port = 0;
2061 port_notify_t pntfy;
2063 model = get_udatamodel();
2064 aiop = curproc->p_aio;
2065 if (aiop == NULL)
2066 return (EINVAL);
2068 if (model == DATAMODEL_NATIVE) {
2069 if (run_mode != AIO_LARGEFILE) {
2070 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2071 return (EFAULT);
2072 bufsize = aiocb.aio_nbytes;
2073 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2074 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2075 return (EBADF);
2077 sigev = &aiocb.aio_sigevent;
2078 } else {
2080 * We come here only when we make largefile
2081 * call on 32 bit kernel using 32 bit library.
2083 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2084 return (EFAULT);
2085 bufsize = aiocb64.aio_nbytes;
2086 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2087 ->aio_resultp);
2088 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2089 return (EBADF);
2090 sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2093 if (sigev->sigev_notify == SIGEV_PORT) {
2094 if (copyin((void *)sigev->sigev_value.sival_ptr,
2095 &pntfy, sizeof (port_notify_t))) {
2096 releasef(fd);
2097 return (EFAULT);
2099 aio_use_port = 1;
2100 } else if (sigev->sigev_notify == SIGEV_THREAD) {
2101 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2102 pntfy.portnfy_user =
2103 aiocb.aio_sigevent.sigev_value.sival_ptr;
2104 aio_use_port = 1;
2107 #ifdef _SYSCALL32_IMPL
2108 else {
2109 if (run_mode == AIO_32) {
2110 /* 32 bit system call is being made on 64 bit kernel */
2111 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2112 return (EFAULT);
2114 bufsize = aiocb32.aio_nbytes;
2115 aiocb_32ton(&aiocb32, &aiocb);
2116 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2117 aio_resultp);
2118 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2119 return (EBADF);
2121 sigev32 = &aiocb32.aio_sigevent;
2122 } else if (run_mode == AIO_LARGEFILE) {
2124 * We come here only when we make largefile
2125 * call on 64 bit kernel using 32 bit library.
2127 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2128 return (EFAULT);
2129 bufsize = aiocb64.aio_nbytes;
2130 aiocb_LFton(&aiocb64, &aiocb);
2131 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2132 ->aio_resultp);
2133 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2134 return (EBADF);
2135 sigev32 = &aiocb64.aio_sigevent;
2138 if (sigev32->sigev_notify == SIGEV_PORT) {
2139 if (copyin(
2140 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2141 &pntfy32, sizeof (port_notify32_t))) {
2142 releasef(fd);
2143 return (EFAULT);
2145 pntfy.portnfy_port = pntfy32.portnfy_port;
2146 pntfy.portnfy_user = (void *)(uintptr_t)
2147 pntfy32.portnfy_user;
2148 aio_use_port = 1;
2149 } else if (sigev32->sigev_notify == SIGEV_THREAD) {
2150 pntfy.portnfy_port = sigev32->sigev_signo;
2151 pntfy.portnfy_user = (void *)(uintptr_t)
2152 sigev32->sigev_value.sival_ptr;
2153 aio_use_port = 1;
2156 #endif /* _SYSCALL32_IMPL */
2159 * check the permission of the partition
2162 if ((fp->f_flag & mode) == 0) {
2163 releasef(fd);
2164 return (EBADF);
2167 vp = fp->f_vnode;
2168 aio_func = check_vp(vp, mode);
2169 if (aio_func == NULL) {
2170 releasef(fd);
2171 return (EBADFD);
2173 if (run_mode == AIO_LARGEFILE)
2174 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0);
2175 else
2176 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0);
2178 if (error) {
2179 releasef(fd);
2180 return (error);
2183 * enable polling on this request if the opcode has
2184 * the AIO poll bit set
2186 if (opcode & AIO_POLL_BIT)
2187 reqp->aio_req_flags |= AIO_POLL;
2189 if (model == DATAMODEL_NATIVE)
2190 reqp->aio_req_iocb.iocb = aiocb_arg;
2191 #ifdef _SYSCALL32_IMPL
2192 else
2193 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2194 #endif
2196 if (aio_use_port) {
2197 int event = (run_mode == AIO_LARGEFILE)?
2198 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2199 ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2200 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2204 * send the request to driver.
2206 if (error == 0) {
2207 if (bufsize == 0) {
2208 clear_active_fd(fd);
2209 aio_zerolen(reqp);
2210 return (0);
2212 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2216 * the fd is stored in the aio_req_t by aio_req_setup(), and
2217 * is released by the aio_cleanup_thread() when the IO has
2218 * completed.
2220 if (error) {
2221 releasef(fd);
2222 mutex_enter(&aiop->aio_mutex);
2223 if (aio_use_port)
2224 aio_deq(&aiop->aio_portpending, reqp);
2225 aio_req_free(aiop, reqp);
2226 aiop->aio_pending--;
2227 if (aiop->aio_flags & AIO_REQ_BLOCK)
2228 cv_signal(&aiop->aio_cleanupcv);
2229 mutex_exit(&aiop->aio_mutex);
2230 return (error);
2232 clear_active_fd(fd);
2233 return (0);
2238 * set error for a list IO entry that failed.
2240 static void
2241 lio_set_error(aio_req_t *reqp, int portused)
2243 aio_t *aiop = curproc->p_aio;
2245 if (aiop == NULL)
2246 return;
2248 mutex_enter(&aiop->aio_mutex);
2249 if (portused)
2250 aio_deq(&aiop->aio_portpending, reqp);
2251 aiop->aio_pending--;
2252 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2253 reqp->aio_req_flags |= AIO_PHYSIODONE;
2255 * Need to free the request now as its never
2256 * going to get on the done queue
2258 * Note: aio_outstanding is decremented in
2259 * aio_req_free()
2261 aio_req_free(aiop, reqp);
2262 if (aiop->aio_flags & AIO_REQ_BLOCK)
2263 cv_signal(&aiop->aio_cleanupcv);
2264 mutex_exit(&aiop->aio_mutex);
2268 * check if a specified request is done, and remove it from
2269 * the done queue. otherwise remove anybody from the done queue
2270 * if NULL is specified.
2272 static aio_req_t *
2273 aio_req_done(void *resultp)
2275 aio_req_t **bucket;
2276 aio_req_t *ent;
2277 aio_t *aiop = curproc->p_aio;
2278 long index;
2280 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2281 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2283 if (resultp) {
2284 index = AIO_HASH(resultp);
2285 bucket = &aiop->aio_hash[index];
2286 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2287 if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2288 if (ent->aio_req_flags & AIO_DONEQ) {
2289 return (aio_req_remove(ent));
2291 return (NULL);
2294 /* no match, resultp is invalid */
2295 return (NULL);
2297 return (aio_req_remove(NULL));
2301 * determine if a user-level resultp pointer is associated with an
2302 * active IO request. Zero is returned when the request is done,
2303 * and the request is removed from the done queue. Only when the
2304 * return value is zero, is the "reqp" pointer valid. One is returned
2305 * when the request is inprogress. Two is returned when the request
2306 * is invalid.
2308 static int
2309 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2311 aio_req_t **bucket;
2312 aio_req_t *ent;
2313 aio_t *aiop = curproc->p_aio;
2314 long index;
2316 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2317 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2319 index = AIO_HASH(resultp);
2320 bucket = &aiop->aio_hash[index];
2321 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2322 if (ent->aio_req_resultp == resultp) {
2323 if (ent->aio_req_flags & AIO_DONEQ) {
2324 *reqp = aio_req_remove(ent);
2325 return (0);
2327 return (1);
2330 /* no match, resultp is invalid */
2331 return (2);
2335 * remove a request from the done queue.
2337 static aio_req_t *
2338 aio_req_remove(aio_req_t *reqp)
2340 aio_t *aiop = curproc->p_aio;
2342 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2344 if (reqp != NULL) {
2345 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2346 if (reqp->aio_req_next == reqp) {
2347 /* only one request on queue */
2348 if (reqp == aiop->aio_doneq) {
2349 aiop->aio_doneq = NULL;
2350 } else {
2351 ASSERT(reqp == aiop->aio_cleanupq);
2352 aiop->aio_cleanupq = NULL;
2354 } else {
2355 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2356 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2358 * The request can be either on the aio_doneq or the
2359 * aio_cleanupq
2361 if (reqp == aiop->aio_doneq)
2362 aiop->aio_doneq = reqp->aio_req_next;
2364 if (reqp == aiop->aio_cleanupq)
2365 aiop->aio_cleanupq = reqp->aio_req_next;
2367 reqp->aio_req_flags &= ~AIO_DONEQ;
2368 reqp->aio_req_next = NULL;
2369 reqp->aio_req_prev = NULL;
2370 } else if ((reqp = aiop->aio_doneq) != NULL) {
2371 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2372 if (reqp == reqp->aio_req_next) {
2373 /* only one request on queue */
2374 aiop->aio_doneq = NULL;
2375 } else {
2376 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2377 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2378 aiop->aio_doneq = reqp->aio_req_next;
2380 reqp->aio_req_flags &= ~AIO_DONEQ;
2381 reqp->aio_req_next = NULL;
2382 reqp->aio_req_prev = NULL;
2384 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2385 cv_broadcast(&aiop->aio_waitcv);
2386 return (reqp);
2389 static int
2390 aio_req_setup(
2391 aio_req_t **reqpp,
2392 aio_t *aiop,
2393 aiocb_t *arg,
2394 aio_result_t *resultp,
2395 vnode_t *vp,
2396 int old_solaris_req)
2398 sigqueue_t *sqp = NULL;
2399 aio_req_t *reqp;
2400 struct uio *uio;
2401 struct sigevent *sigev;
2402 int error;
2404 sigev = &arg->aio_sigevent;
2405 if (sigev->sigev_notify == SIGEV_SIGNAL &&
2406 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2407 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2408 if (sqp == NULL)
2409 return (EAGAIN);
2410 sqp->sq_func = NULL;
2411 sqp->sq_next = NULL;
2412 sqp->sq_info.si_code = SI_ASYNCIO;
2413 sqp->sq_info.si_pid = curproc->p_pid;
2414 sqp->sq_info.si_ctid = PRCTID(curproc);
2415 sqp->sq_info.si_zoneid = getzoneid();
2416 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2417 sqp->sq_info.si_signo = sigev->sigev_signo;
2418 sqp->sq_info.si_value = sigev->sigev_value;
2421 mutex_enter(&aiop->aio_mutex);
2423 if (aiop->aio_flags & AIO_REQ_BLOCK) {
2424 mutex_exit(&aiop->aio_mutex);
2425 if (sqp)
2426 kmem_free(sqp, sizeof (sigqueue_t));
2427 return (EIO);
2430 * get an aio_reqp from the free list or allocate one
2431 * from dynamic memory.
2433 if (error = aio_req_alloc(&reqp, resultp)) {
2434 mutex_exit(&aiop->aio_mutex);
2435 if (sqp)
2436 kmem_free(sqp, sizeof (sigqueue_t));
2437 return (error);
2439 aiop->aio_pending++;
2440 aiop->aio_outstanding++;
2441 reqp->aio_req_flags = AIO_PENDING;
2442 if (old_solaris_req) {
2443 /* this is an old solaris aio request */
2444 reqp->aio_req_flags |= AIO_SOLARIS;
2445 aiop->aio_flags |= AIO_SOLARIS_REQ;
2447 if (sigev->sigev_notify == SIGEV_THREAD ||
2448 sigev->sigev_notify == SIGEV_PORT)
2449 aio_enq(&aiop->aio_portpending, reqp, 0);
2450 mutex_exit(&aiop->aio_mutex);
2452 * initialize aio request.
2454 reqp->aio_req_fd = arg->aio_fildes;
2455 reqp->aio_req_sigqp = sqp;
2456 reqp->aio_req_iocb.iocb = NULL;
2457 reqp->aio_req_lio = NULL;
2458 reqp->aio_req_buf.b_file = vp;
2459 uio = reqp->aio_req.aio_uio;
2460 uio->uio_iovcnt = 1;
2461 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2462 uio->uio_iov->iov_len = arg->aio_nbytes;
2463 uio->uio_loffset = arg->aio_offset;
2464 *reqpp = reqp;
2465 return (0);
2469 * Allocate p_aio struct.
2471 static aio_t *
2472 aio_aiop_alloc(void)
2474 aio_t *aiop;
2476 ASSERT(MUTEX_HELD(&curproc->p_lock));
2478 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2479 if (aiop) {
2480 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2481 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2482 NULL);
2483 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2485 return (aiop);
2489 * Allocate an aio_req struct.
2491 static int
2492 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2494 aio_req_t *reqp;
2495 aio_t *aiop = curproc->p_aio;
2497 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2499 if ((reqp = aiop->aio_free) != NULL) {
2500 aiop->aio_free = reqp->aio_req_next;
2501 bzero(reqp, sizeof (*reqp));
2502 } else {
2504 * Check whether memory is getting tight.
2505 * This is a temporary mechanism to avoid memory
2506 * exhaustion by a single process until we come up
2507 * with a per process solution such as setrlimit().
2509 if (freemem < desfree)
2510 return (EAGAIN);
2511 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2512 if (reqp == NULL)
2513 return (EAGAIN);
2515 reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2516 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2517 reqp->aio_req.aio_private = reqp;
2518 reqp->aio_req_buf.b_offset = -1;
2519 reqp->aio_req_resultp = resultp;
2520 if (aio_hash_insert(reqp, aiop)) {
2521 reqp->aio_req_next = aiop->aio_free;
2522 aiop->aio_free = reqp;
2523 return (EBUSY);
2525 *nreqp = reqp;
2526 return (0);
2530 * Allocate an aio_lio_t struct.
2532 static int
2533 aio_lio_alloc(aio_lio_t **head)
2535 aio_lio_t *liop;
2536 aio_t *aiop = curproc->p_aio;
2538 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2540 if ((liop = aiop->aio_lio_free) != NULL) {
2541 aiop->aio_lio_free = liop->lio_next;
2542 } else {
2544 * Check whether memory is getting tight.
2545 * This is a temporary mechanism to avoid memory
2546 * exhaustion by a single process until we come up
2547 * with a per process solution such as setrlimit().
2549 if (freemem < desfree)
2550 return (EAGAIN);
2552 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2553 if (liop == NULL)
2554 return (EAGAIN);
2556 *head = liop;
2557 return (0);
2561 * this is a special per-process thread that is only activated if
2562 * the process is unmapping a segment with outstanding aio. normally,
2563 * the process will have completed the aio before unmapping the
2564 * segment. If the process does unmap a segment with outstanding aio,
2565 * this special thread will guarentee that the locked pages due to
2566 * aphysio() are released, thereby permitting the segment to be
2567 * unmapped. In addition to this, the cleanup thread is woken up
2568 * during DR operations to release the locked pages.
2571 static int
2572 aio_cleanup_thread(aio_t *aiop)
2574 proc_t *p = curproc;
2575 struct as *as = p->p_as;
2576 int poked = 0;
2577 kcondvar_t *cvp;
2578 int exit_flag = 0;
2579 int rqclnup = 0;
2581 sigfillset(&curthread->t_hold);
2582 sigdiffset(&curthread->t_hold, &cantmask);
2583 for (;;) {
2585 * if a segment is being unmapped, and the current
2586 * process's done queue is not empty, then every request
2587 * on the doneq with locked resources should be forced
2588 * to release their locks. By moving the doneq request
2589 * to the cleanupq, aio_cleanup() will process the cleanupq,
2590 * and place requests back onto the doneq. All requests
2591 * processed by aio_cleanup() will have their physical
2592 * resources unlocked.
2594 mutex_enter(&aiop->aio_mutex);
2595 if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2596 aiop->aio_flags |= AIO_CLEANUP;
2597 mutex_enter(&as->a_contents);
2598 if (aiop->aio_rqclnup) {
2599 aiop->aio_rqclnup = 0;
2600 rqclnup = 1;
2602 mutex_exit(&as->a_contents);
2603 if (aiop->aio_doneq) {
2604 aio_req_t *doneqhead = aiop->aio_doneq;
2605 aiop->aio_doneq = NULL;
2606 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2609 mutex_exit(&aiop->aio_mutex);
2610 aio_cleanup(AIO_CLEANUP_THREAD);
2612 * thread should block on the cleanupcv while
2613 * AIO_CLEANUP is set.
2615 cvp = &aiop->aio_cleanupcv;
2616 mutex_enter(&aiop->aio_mutex);
2618 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2619 aiop->aio_notifyq != NULL ||
2620 aiop->aio_portcleanupq != NULL) {
2621 mutex_exit(&aiop->aio_mutex);
2622 continue;
2624 mutex_enter(&as->a_contents);
2627 * AIO_CLEANUP determines when the cleanup thread
2628 * should be active. This flag is set when
2629 * the cleanup thread is awakened by as_unmap() or
2630 * due to DR operations.
2631 * The flag is cleared when the blocking as_unmap()
2632 * that originally awakened us is allowed to
2633 * complete. as_unmap() blocks when trying to
2634 * unmap a segment that has SOFTLOCKed pages. when
2635 * the segment's pages are all SOFTUNLOCKed,
2636 * as->a_flags & AS_UNMAPWAIT should be zero.
2638 * In case of cleanup request by DR, the flag is cleared
2639 * once all the pending aio requests have been processed.
2641 * The flag shouldn't be cleared right away if the
2642 * cleanup thread was interrupted because the process
2643 * is doing forkall(). This happens when cv_wait_sig()
2644 * returns zero, because it was awakened by a pokelwps().
2645 * If the process is not exiting, it must be doing forkall().
2647 if ((poked == 0) &&
2648 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2649 (aiop->aio_pending == 0))) {
2650 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2651 cvp = &as->a_cv;
2652 rqclnup = 0;
2654 mutex_exit(&aiop->aio_mutex);
2655 if (poked) {
2657 * If the process is exiting/killed, don't return
2658 * immediately without waiting for pending I/O's
2659 * and releasing the page locks.
2661 if (p->p_flag & (SEXITLWPS|SKILLED)) {
2663 * If exit_flag is set, then it is
2664 * safe to exit because we have released
2665 * page locks of completed I/O's.
2667 if (exit_flag)
2668 break;
2670 mutex_exit(&as->a_contents);
2673 * Wait for all the pending aio to complete.
2675 mutex_enter(&aiop->aio_mutex);
2676 aiop->aio_flags |= AIO_REQ_BLOCK;
2677 while (aiop->aio_pending != 0)
2678 cv_wait(&aiop->aio_cleanupcv,
2679 &aiop->aio_mutex);
2680 mutex_exit(&aiop->aio_mutex);
2681 exit_flag = 1;
2682 continue;
2683 } else if (p->p_flag &
2684 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2686 * hold LWP until it
2687 * is continued.
2689 mutex_exit(&as->a_contents);
2690 mutex_enter(&p->p_lock);
2691 stop(PR_SUSPENDED, SUSPEND_NORMAL);
2692 mutex_exit(&p->p_lock);
2693 poked = 0;
2694 continue;
2696 } else {
2698 * When started this thread will sleep on as->a_cv.
2699 * as_unmap will awake this thread if the
2700 * segment has SOFTLOCKed pages (poked = 0).
2701 * 1. pokelwps() awakes this thread =>
2702 * break the loop to check SEXITLWPS, SHOLDFORK, etc
2703 * 2. as_unmap awakes this thread =>
2704 * to break the loop it is necessary that
2705 * - AS_UNMAPWAIT is set (as_unmap is waiting for
2706 * memory to be unlocked)
2707 * - AIO_CLEANUP is not set
2708 * (if AIO_CLEANUP is set we have to wait for
2709 * pending requests. aio_done will send a signal
2710 * for every request which completes to continue
2711 * unmapping the corresponding address range)
2712 * 3. A cleanup request will wake this thread up, ex.
2713 * by the DR operations. The aio_rqclnup flag will
2714 * be set.
2716 while (poked == 0) {
2718 * The clean up requests that came in
2719 * after we had just cleaned up, couldn't
2720 * be causing the unmap thread to block - as
2721 * unmap event happened first.
2722 * Let aio_done() wake us up if it sees a need.
2724 if (aiop->aio_rqclnup &&
2725 (aiop->aio_flags & AIO_CLEANUP) == 0)
2726 break;
2727 poked = !cv_wait_sig(cvp, &as->a_contents);
2728 if (AS_ISUNMAPWAIT(as) == 0)
2729 cv_signal(cvp);
2730 if (aiop->aio_outstanding != 0)
2731 break;
2734 mutex_exit(&as->a_contents);
2736 exit:
2737 mutex_exit(&as->a_contents);
2738 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2739 aston(curthread); /* make thread do post_syscall */
2740 return (0);
2744 * save a reference to a user's outstanding aio in a hash list.
2746 static int
2747 aio_hash_insert(
2748 aio_req_t *aio_reqp,
2749 aio_t *aiop)
2751 long index;
2752 aio_result_t *resultp = aio_reqp->aio_req_resultp;
2753 aio_req_t *current;
2754 aio_req_t **nextp;
2756 index = AIO_HASH(resultp);
2757 nextp = &aiop->aio_hash[index];
2758 while ((current = *nextp) != NULL) {
2759 if (current->aio_req_resultp == resultp)
2760 return (DUPLICATE);
2761 nextp = &current->aio_hash_next;
2763 *nextp = aio_reqp;
2764 aio_reqp->aio_hash_next = NULL;
2765 return (0);
2768 static int
2769 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2770 cred_t *)
2772 struct snode *sp;
2773 dev_t dev;
2774 struct cb_ops *cb;
2775 major_t major;
2776 int (*aio_func)();
2778 dev = vp->v_rdev;
2779 major = getmajor(dev);
2782 * return NULL for requests to files and STREAMs so
2783 * that libaio takes care of them.
2785 if (vp->v_type == VCHR) {
2786 /* no stream device for kaio */
2787 if (STREAMSTAB(major)) {
2788 return (NULL);
2790 } else {
2791 return (NULL);
2795 * Check old drivers which do not have async I/O entry points.
2797 if (devopsp[major]->devo_rev < 3)
2798 return (NULL);
2800 cb = devopsp[major]->devo_cb_ops;
2802 if (cb->cb_rev < 1)
2803 return (NULL);
2806 * Check whether this device is a block device.
2807 * Kaio is not supported for devices like tty.
2809 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2810 return (NULL);
2812 if (mode & FREAD)
2813 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2814 else
2815 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2818 * Do we need this ?
2819 * nodev returns ENXIO anyway.
2821 if (aio_func == nodev)
2822 return (NULL);
2824 sp = VTOS(vp);
2825 smark(sp, SACC);
2826 return (aio_func);
2830 * Clustering: We want check_vp to return a function prototyped
2831 * correctly that will be common to both PXFS and regular case.
2832 * We define this intermediate function that will do the right
2833 * thing for driver cases.
2836 static int
2837 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2839 dev_t dev;
2840 struct cb_ops *cb;
2842 ASSERT(vp->v_type == VCHR);
2843 dev = VTOS(vp)->s_dev;
2844 ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2846 cb = devopsp[getmajor(dev)]->devo_cb_ops;
2848 ASSERT(cb->cb_awrite != nodev);
2849 return ((*cb->cb_awrite)(dev, aio, cred_p));
2853 * Clustering: We want check_vp to return a function prototyped
2854 * correctly that will be common to both PXFS and regular case.
2855 * We define this intermediate function that will do the right
2856 * thing for driver cases.
2859 static int
2860 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2862 dev_t dev;
2863 struct cb_ops *cb;
2865 ASSERT(vp->v_type == VCHR);
2866 dev = VTOS(vp)->s_dev;
2867 ASSERT(!STREAMSTAB(getmajor(dev)));
2869 cb = devopsp[getmajor(dev)]->devo_cb_ops;
2871 ASSERT(cb->cb_aread != nodev);
2872 return ((*cb->cb_aread)(dev, aio, cred_p));
2876 * This routine is called when a largefile call is made by a 32bit
2877 * process on a ILP32 or LP64 kernel. All 64bit processes are large
2878 * file by definition and will call alio() instead.
2880 static int
2881 alioLF(
2882 int mode_arg,
2883 void *aiocb_arg,
2884 int nent,
2885 void *sigev)
2887 file_t *fp;
2888 file_t *prev_fp = NULL;
2889 int prev_mode = -1;
2890 struct vnode *vp;
2891 aio_lio_t *head;
2892 aio_req_t *reqp;
2893 aio_t *aiop;
2894 caddr_t cbplist;
2895 aiocb64_32_t cb64;
2896 aiocb64_32_t *aiocb = &cb64;
2897 aiocb64_32_t *cbp;
2898 caddr32_t *ucbp;
2899 #ifdef _LP64
2900 aiocb_t aiocb_n;
2901 #endif
2902 struct sigevent32 sigevk;
2903 sigqueue_t *sqp;
2904 int (*aio_func)();
2905 int mode;
2906 int error = 0;
2907 int aio_errors = 0;
2908 int i;
2909 size_t ssize;
2910 int deadhead = 0;
2911 int aio_notsupported = 0;
2912 int lio_head_port;
2913 int aio_port;
2914 int aio_thread;
2915 port_kevent_t *pkevtp = NULL;
2916 int portused = 0;
2917 port_notify32_t pnotify;
2918 int event;
2920 aiop = curproc->p_aio;
2921 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2922 return (EINVAL);
2924 ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2926 ssize = (sizeof (caddr32_t) * nent);
2927 cbplist = kmem_alloc(ssize, KM_SLEEP);
2928 ucbp = (caddr32_t *)cbplist;
2930 if (copyin(aiocb_arg, cbplist, ssize) ||
2931 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2932 kmem_free(cbplist, ssize);
2933 return (EFAULT);
2936 /* Event Ports */
2937 if (sigev &&
2938 (sigevk.sigev_notify == SIGEV_THREAD ||
2939 sigevk.sigev_notify == SIGEV_PORT)) {
2940 if (sigevk.sigev_notify == SIGEV_THREAD) {
2941 pnotify.portnfy_port = sigevk.sigev_signo;
2942 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2943 } else if (copyin(
2944 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2945 &pnotify, sizeof (pnotify))) {
2946 kmem_free(cbplist, ssize);
2947 return (EFAULT);
2949 error = port_alloc_event(pnotify.portnfy_port,
2950 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2951 if (error) {
2952 if (error == ENOMEM || error == EAGAIN)
2953 error = EAGAIN;
2954 else
2955 error = EINVAL;
2956 kmem_free(cbplist, ssize);
2957 return (error);
2959 lio_head_port = pnotify.portnfy_port;
2960 portused = 1;
2964 * a list head should be allocated if notification is
2965 * enabled for this list.
2967 head = NULL;
2969 if (mode_arg == LIO_WAIT || sigev) {
2970 mutex_enter(&aiop->aio_mutex);
2971 error = aio_lio_alloc(&head);
2972 mutex_exit(&aiop->aio_mutex);
2973 if (error)
2974 goto done;
2975 deadhead = 1;
2976 head->lio_nent = nent;
2977 head->lio_refcnt = nent;
2978 head->lio_port = -1;
2979 head->lio_portkev = NULL;
2980 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2981 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2982 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2983 if (sqp == NULL) {
2984 error = EAGAIN;
2985 goto done;
2987 sqp->sq_func = NULL;
2988 sqp->sq_next = NULL;
2989 sqp->sq_info.si_code = SI_ASYNCIO;
2990 sqp->sq_info.si_pid = curproc->p_pid;
2991 sqp->sq_info.si_ctid = PRCTID(curproc);
2992 sqp->sq_info.si_zoneid = getzoneid();
2993 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2994 sqp->sq_info.si_signo = sigevk.sigev_signo;
2995 sqp->sq_info.si_value.sival_int =
2996 sigevk.sigev_value.sival_int;
2997 head->lio_sigqp = sqp;
2998 } else {
2999 head->lio_sigqp = NULL;
3001 if (pkevtp) {
3003 * Prepare data to send when list of aiocb's
3004 * has completed.
3006 port_init_event(pkevtp, (uintptr_t)sigev,
3007 (void *)(uintptr_t)pnotify.portnfy_user,
3008 NULL, head);
3009 pkevtp->portkev_events = AIOLIO64;
3010 head->lio_portkev = pkevtp;
3011 head->lio_port = pnotify.portnfy_port;
3015 for (i = 0; i < nent; i++, ucbp++) {
3017 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3018 /* skip entry if it can't be copied. */
3019 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3020 if (head) {
3021 mutex_enter(&aiop->aio_mutex);
3022 head->lio_nent--;
3023 head->lio_refcnt--;
3024 mutex_exit(&aiop->aio_mutex);
3026 continue;
3029 /* skip if opcode for aiocb is LIO_NOP */
3030 mode = aiocb->aio_lio_opcode;
3031 if (mode == LIO_NOP) {
3032 cbp = NULL;
3033 if (head) {
3034 mutex_enter(&aiop->aio_mutex);
3035 head->lio_nent--;
3036 head->lio_refcnt--;
3037 mutex_exit(&aiop->aio_mutex);
3039 continue;
3042 /* increment file descriptor's ref count. */
3043 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3044 lio_set_uerror(&cbp->aio_resultp, EBADF);
3045 if (head) {
3046 mutex_enter(&aiop->aio_mutex);
3047 head->lio_nent--;
3048 head->lio_refcnt--;
3049 mutex_exit(&aiop->aio_mutex);
3051 aio_errors++;
3052 continue;
3056 * check the permission of the partition
3058 if ((fp->f_flag & mode) == 0) {
3059 releasef(aiocb->aio_fildes);
3060 lio_set_uerror(&cbp->aio_resultp, EBADF);
3061 if (head) {
3062 mutex_enter(&aiop->aio_mutex);
3063 head->lio_nent--;
3064 head->lio_refcnt--;
3065 mutex_exit(&aiop->aio_mutex);
3067 aio_errors++;
3068 continue;
3072 * common case where requests are to the same fd
3073 * for the same r/w operation
3074 * for UFS, need to set EBADFD
3076 vp = fp->f_vnode;
3077 if (fp != prev_fp || mode != prev_mode) {
3078 aio_func = check_vp(vp, mode);
3079 if (aio_func == NULL) {
3080 prev_fp = NULL;
3081 releasef(aiocb->aio_fildes);
3082 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3083 aio_notsupported++;
3084 if (head) {
3085 mutex_enter(&aiop->aio_mutex);
3086 head->lio_nent--;
3087 head->lio_refcnt--;
3088 mutex_exit(&aiop->aio_mutex);
3090 continue;
3091 } else {
3092 prev_fp = fp;
3093 prev_mode = mode;
3097 #ifdef _LP64
3098 aiocb_LFton(aiocb, &aiocb_n);
3099 error = aio_req_setup(&reqp, aiop, &aiocb_n,
3100 (aio_result_t *)&cbp->aio_resultp, vp, 0);
3101 #else
3102 error = aio_req_setupLF(&reqp, aiop, aiocb,
3103 (aio_result_t *)&cbp->aio_resultp, vp, 0);
3104 #endif /* _LP64 */
3105 if (error) {
3106 releasef(aiocb->aio_fildes);
3107 lio_set_uerror(&cbp->aio_resultp, error);
3108 if (head) {
3109 mutex_enter(&aiop->aio_mutex);
3110 head->lio_nent--;
3111 head->lio_refcnt--;
3112 mutex_exit(&aiop->aio_mutex);
3114 aio_errors++;
3115 continue;
3118 reqp->aio_req_lio = head;
3119 deadhead = 0;
3122 * Set the errno field now before sending the request to
3123 * the driver to avoid a race condition
3125 (void) suword32(&cbp->aio_resultp.aio_errno,
3126 EINPROGRESS);
3128 reqp->aio_req_iocb.iocb32 = *ucbp;
3130 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3131 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3132 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3133 if (aio_port | aio_thread) {
3134 port_kevent_t *lpkevp;
3136 * Prepare data to send with each aiocb completed.
3138 if (aio_port) {
3139 void *paddr = (void *)(uintptr_t)
3140 aiocb->aio_sigevent.sigev_value.sival_ptr;
3141 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3142 error = EFAULT;
3143 } else { /* aio_thread */
3144 pnotify.portnfy_port =
3145 aiocb->aio_sigevent.sigev_signo;
3146 pnotify.portnfy_user =
3147 aiocb->aio_sigevent.sigev_value.sival_ptr;
3149 if (error)
3150 /* EMPTY */;
3151 else if (pkevtp != NULL &&
3152 pnotify.portnfy_port == lio_head_port)
3153 error = port_dup_event(pkevtp, &lpkevp,
3154 PORT_ALLOC_DEFAULT);
3155 else
3156 error = port_alloc_event(pnotify.portnfy_port,
3157 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3158 &lpkevp);
3159 if (error == 0) {
3160 port_init_event(lpkevp, (uintptr_t)*ucbp,
3161 (void *)(uintptr_t)pnotify.portnfy_user,
3162 aio_port_callback, reqp);
3163 lpkevp->portkev_events = event;
3164 reqp->aio_req_portkev = lpkevp;
3165 reqp->aio_req_port = pnotify.portnfy_port;
3170 * send the request to driver.
3172 if (error == 0) {
3173 if (aiocb->aio_nbytes == 0) {
3174 clear_active_fd(aiocb->aio_fildes);
3175 aio_zerolen(reqp);
3176 continue;
3178 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3179 CRED());
3183 * the fd's ref count is not decremented until the IO has
3184 * completed unless there was an error.
3186 if (error) {
3187 releasef(aiocb->aio_fildes);
3188 lio_set_uerror(&cbp->aio_resultp, error);
3189 if (head) {
3190 mutex_enter(&aiop->aio_mutex);
3191 head->lio_nent--;
3192 head->lio_refcnt--;
3193 mutex_exit(&aiop->aio_mutex);
3195 if (error == ENOTSUP)
3196 aio_notsupported++;
3197 else
3198 aio_errors++;
3199 lio_set_error(reqp, portused);
3200 } else {
3201 clear_active_fd(aiocb->aio_fildes);
3205 if (aio_notsupported) {
3206 error = ENOTSUP;
3207 } else if (aio_errors) {
3209 * return EIO if any request failed
3211 error = EIO;
3214 if (mode_arg == LIO_WAIT) {
3215 mutex_enter(&aiop->aio_mutex);
3216 while (head->lio_refcnt > 0) {
3217 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3218 mutex_exit(&aiop->aio_mutex);
3219 error = EINTR;
3220 goto done;
3223 mutex_exit(&aiop->aio_mutex);
3224 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3227 done:
3228 kmem_free(cbplist, ssize);
3229 if (deadhead) {
3230 if (head->lio_sigqp)
3231 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3232 if (head->lio_portkev)
3233 port_free_event(head->lio_portkev);
3234 kmem_free(head, sizeof (aio_lio_t));
3236 return (error);
3239 #ifdef _SYSCALL32_IMPL
3240 static void
3241 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3243 dest->aio_fildes = src->aio_fildes;
3244 dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3245 dest->aio_nbytes = (size_t)src->aio_nbytes;
3246 dest->aio_offset = (off_t)src->aio_offset;
3247 dest->aio_reqprio = src->aio_reqprio;
3248 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3249 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3252 * See comment in sigqueue32() on handling of 32-bit
3253 * sigvals in a 64-bit kernel.
3255 dest->aio_sigevent.sigev_value.sival_int =
3256 (int)src->aio_sigevent.sigev_value.sival_int;
3257 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3258 (uintptr_t)src->aio_sigevent.sigev_notify_function;
3259 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3260 (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3261 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3262 dest->aio_lio_opcode = src->aio_lio_opcode;
3263 dest->aio_state = src->aio_state;
3264 dest->aio__pad[0] = src->aio__pad[0];
3266 #endif
3269 * This function is used only for largefile calls made by
3270 * 32 bit applications.
3272 static int
3273 aio_req_setupLF(
3274 aio_req_t **reqpp,
3275 aio_t *aiop,
3276 aiocb64_32_t *arg,
3277 aio_result_t *resultp,
3278 vnode_t *vp,
3279 int old_solaris_req)
3281 sigqueue_t *sqp = NULL;
3282 aio_req_t *reqp;
3283 struct uio *uio;
3284 struct sigevent32 *sigev;
3285 int error;
3287 sigev = &arg->aio_sigevent;
3288 if (sigev->sigev_notify == SIGEV_SIGNAL &&
3289 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3290 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3291 if (sqp == NULL)
3292 return (EAGAIN);
3293 sqp->sq_func = NULL;
3294 sqp->sq_next = NULL;
3295 sqp->sq_info.si_code = SI_ASYNCIO;
3296 sqp->sq_info.si_pid = curproc->p_pid;
3297 sqp->sq_info.si_ctid = PRCTID(curproc);
3298 sqp->sq_info.si_zoneid = getzoneid();
3299 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3300 sqp->sq_info.si_signo = sigev->sigev_signo;
3301 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3304 mutex_enter(&aiop->aio_mutex);
3306 if (aiop->aio_flags & AIO_REQ_BLOCK) {
3307 mutex_exit(&aiop->aio_mutex);
3308 if (sqp)
3309 kmem_free(sqp, sizeof (sigqueue_t));
3310 return (EIO);
3313 * get an aio_reqp from the free list or allocate one
3314 * from dynamic memory.
3316 if (error = aio_req_alloc(&reqp, resultp)) {
3317 mutex_exit(&aiop->aio_mutex);
3318 if (sqp)
3319 kmem_free(sqp, sizeof (sigqueue_t));
3320 return (error);
3322 aiop->aio_pending++;
3323 aiop->aio_outstanding++;
3324 reqp->aio_req_flags = AIO_PENDING;
3325 if (old_solaris_req) {
3326 /* this is an old solaris aio request */
3327 reqp->aio_req_flags |= AIO_SOLARIS;
3328 aiop->aio_flags |= AIO_SOLARIS_REQ;
3330 if (sigev->sigev_notify == SIGEV_THREAD ||
3331 sigev->sigev_notify == SIGEV_PORT)
3332 aio_enq(&aiop->aio_portpending, reqp, 0);
3333 mutex_exit(&aiop->aio_mutex);
3335 * initialize aio request.
3337 reqp->aio_req_fd = arg->aio_fildes;
3338 reqp->aio_req_sigqp = sqp;
3339 reqp->aio_req_iocb.iocb = NULL;
3340 reqp->aio_req_lio = NULL;
3341 reqp->aio_req_buf.b_file = vp;
3342 uio = reqp->aio_req.aio_uio;
3343 uio->uio_iovcnt = 1;
3344 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3345 uio->uio_iov->iov_len = arg->aio_nbytes;
3346 uio->uio_loffset = arg->aio_offset;
3347 *reqpp = reqp;
3348 return (0);
3352 * This routine is called when a non largefile call is made by a 32bit
3353 * process on a ILP32 or LP64 kernel.
3355 static int
3356 alio32(
3357 int mode_arg,
3358 void *aiocb_arg,
3359 int nent,
3360 void *sigev)
3362 file_t *fp;
3363 file_t *prev_fp = NULL;
3364 int prev_mode = -1;
3365 struct vnode *vp;
3366 aio_lio_t *head;
3367 aio_req_t *reqp;
3368 aio_t *aiop;
3369 caddr_t cbplist;
3370 aiocb_t cb;
3371 aiocb_t *aiocb = &cb;
3372 #ifdef _LP64
3373 aiocb32_t *cbp;
3374 caddr32_t *ucbp;
3375 aiocb32_t cb32;
3376 aiocb32_t *aiocb32 = &cb32;
3377 struct sigevent32 sigevk;
3378 #else
3379 aiocb_t *cbp, **ucbp;
3380 struct sigevent sigevk;
3381 #endif
3382 sigqueue_t *sqp;
3383 int (*aio_func)();
3384 int mode;
3385 int error = 0;
3386 int aio_errors = 0;
3387 int i;
3388 size_t ssize;
3389 int deadhead = 0;
3390 int aio_notsupported = 0;
3391 int lio_head_port;
3392 int aio_port;
3393 int aio_thread;
3394 port_kevent_t *pkevtp = NULL;
3395 int portused = 0;
3396 #ifdef _LP64
3397 port_notify32_t pnotify;
3398 #else
3399 port_notify_t pnotify;
3400 #endif
3401 int event;
3403 aiop = curproc->p_aio;
3404 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3405 return (EINVAL);
3407 #ifdef _LP64
3408 ssize = (sizeof (caddr32_t) * nent);
3409 #else
3410 ssize = (sizeof (aiocb_t *) * nent);
3411 #endif
3412 cbplist = kmem_alloc(ssize, KM_SLEEP);
3413 ucbp = (void *)cbplist;
3415 if (copyin(aiocb_arg, cbplist, ssize) ||
3416 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3417 kmem_free(cbplist, ssize);
3418 return (EFAULT);
3421 /* Event Ports */
3422 if (sigev &&
3423 (sigevk.sigev_notify == SIGEV_THREAD ||
3424 sigevk.sigev_notify == SIGEV_PORT)) {
3425 if (sigevk.sigev_notify == SIGEV_THREAD) {
3426 pnotify.portnfy_port = sigevk.sigev_signo;
3427 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3428 } else if (copyin(
3429 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3430 &pnotify, sizeof (pnotify))) {
3431 kmem_free(cbplist, ssize);
3432 return (EFAULT);
3434 error = port_alloc_event(pnotify.portnfy_port,
3435 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3436 if (error) {
3437 if (error == ENOMEM || error == EAGAIN)
3438 error = EAGAIN;
3439 else
3440 error = EINVAL;
3441 kmem_free(cbplist, ssize);
3442 return (error);
3444 lio_head_port = pnotify.portnfy_port;
3445 portused = 1;
3449 * a list head should be allocated if notification is
3450 * enabled for this list.
3452 head = NULL;
3454 if (mode_arg == LIO_WAIT || sigev) {
3455 mutex_enter(&aiop->aio_mutex);
3456 error = aio_lio_alloc(&head);
3457 mutex_exit(&aiop->aio_mutex);
3458 if (error)
3459 goto done;
3460 deadhead = 1;
3461 head->lio_nent = nent;
3462 head->lio_refcnt = nent;
3463 head->lio_port = -1;
3464 head->lio_portkev = NULL;
3465 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3466 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3467 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3468 if (sqp == NULL) {
3469 error = EAGAIN;
3470 goto done;
3472 sqp->sq_func = NULL;
3473 sqp->sq_next = NULL;
3474 sqp->sq_info.si_code = SI_ASYNCIO;
3475 sqp->sq_info.si_pid = curproc->p_pid;
3476 sqp->sq_info.si_ctid = PRCTID(curproc);
3477 sqp->sq_info.si_zoneid = getzoneid();
3478 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3479 sqp->sq_info.si_signo = sigevk.sigev_signo;
3480 sqp->sq_info.si_value.sival_int =
3481 sigevk.sigev_value.sival_int;
3482 head->lio_sigqp = sqp;
3483 } else {
3484 head->lio_sigqp = NULL;
3486 if (pkevtp) {
3488 * Prepare data to send when list of aiocb's has
3489 * completed.
3491 port_init_event(pkevtp, (uintptr_t)sigev,
3492 (void *)(uintptr_t)pnotify.portnfy_user,
3493 NULL, head);
3494 pkevtp->portkev_events = AIOLIO;
3495 head->lio_portkev = pkevtp;
3496 head->lio_port = pnotify.portnfy_port;
3500 for (i = 0; i < nent; i++, ucbp++) {
3502 /* skip entry if it can't be copied. */
3503 #ifdef _LP64
3504 cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3505 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3506 #else
3507 cbp = (aiocb_t *)*ucbp;
3508 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3509 #endif
3511 if (head) {
3512 mutex_enter(&aiop->aio_mutex);
3513 head->lio_nent--;
3514 head->lio_refcnt--;
3515 mutex_exit(&aiop->aio_mutex);
3517 continue;
3519 #ifdef _LP64
3521 * copy 32 bit structure into 64 bit structure
3523 aiocb_32ton(aiocb32, aiocb);
3524 #endif /* _LP64 */
3526 /* skip if opcode for aiocb is LIO_NOP */
3527 mode = aiocb->aio_lio_opcode;
3528 if (mode == LIO_NOP) {
3529 cbp = NULL;
3530 if (head) {
3531 mutex_enter(&aiop->aio_mutex);
3532 head->lio_nent--;
3533 head->lio_refcnt--;
3534 mutex_exit(&aiop->aio_mutex);
3536 continue;
3539 /* increment file descriptor's ref count. */
3540 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3541 lio_set_uerror(&cbp->aio_resultp, EBADF);
3542 if (head) {
3543 mutex_enter(&aiop->aio_mutex);
3544 head->lio_nent--;
3545 head->lio_refcnt--;
3546 mutex_exit(&aiop->aio_mutex);
3548 aio_errors++;
3549 continue;
3553 * check the permission of the partition
3555 if ((fp->f_flag & mode) == 0) {
3556 releasef(aiocb->aio_fildes);
3557 lio_set_uerror(&cbp->aio_resultp, EBADF);
3558 if (head) {
3559 mutex_enter(&aiop->aio_mutex);
3560 head->lio_nent--;
3561 head->lio_refcnt--;
3562 mutex_exit(&aiop->aio_mutex);
3564 aio_errors++;
3565 continue;
3569 * common case where requests are to the same fd
3570 * for the same r/w operation
3571 * for UFS, need to set EBADFD
3573 vp = fp->f_vnode;
3574 if (fp != prev_fp || mode != prev_mode) {
3575 aio_func = check_vp(vp, mode);
3576 if (aio_func == NULL) {
3577 prev_fp = NULL;
3578 releasef(aiocb->aio_fildes);
3579 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3580 aio_notsupported++;
3581 if (head) {
3582 mutex_enter(&aiop->aio_mutex);
3583 head->lio_nent--;
3584 head->lio_refcnt--;
3585 mutex_exit(&aiop->aio_mutex);
3587 continue;
3588 } else {
3589 prev_fp = fp;
3590 prev_mode = mode;
3594 error = aio_req_setup(&reqp, aiop, aiocb,
3595 (aio_result_t *)&cbp->aio_resultp, vp, 0);
3596 if (error) {
3597 releasef(aiocb->aio_fildes);
3598 lio_set_uerror(&cbp->aio_resultp, error);
3599 if (head) {
3600 mutex_enter(&aiop->aio_mutex);
3601 head->lio_nent--;
3602 head->lio_refcnt--;
3603 mutex_exit(&aiop->aio_mutex);
3605 aio_errors++;
3606 continue;
3609 reqp->aio_req_lio = head;
3610 deadhead = 0;
3613 * Set the errno field now before sending the request to
3614 * the driver to avoid a race condition
3616 (void) suword32(&cbp->aio_resultp.aio_errno,
3617 EINPROGRESS);
3619 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3621 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3622 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3623 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3624 if (aio_port | aio_thread) {
3625 port_kevent_t *lpkevp;
3627 * Prepare data to send with each aiocb completed.
3629 #ifdef _LP64
3630 if (aio_port) {
3631 void *paddr = (void *)(uintptr_t)
3632 aiocb32->aio_sigevent.sigev_value.sival_ptr;
3633 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3634 error = EFAULT;
3635 } else { /* aio_thread */
3636 pnotify.portnfy_port =
3637 aiocb32->aio_sigevent.sigev_signo;
3638 pnotify.portnfy_user =
3639 aiocb32->aio_sigevent.sigev_value.sival_ptr;
3641 #else
3642 if (aio_port) {
3643 void *paddr =
3644 aiocb->aio_sigevent.sigev_value.sival_ptr;
3645 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3646 error = EFAULT;
3647 } else { /* aio_thread */
3648 pnotify.portnfy_port =
3649 aiocb->aio_sigevent.sigev_signo;
3650 pnotify.portnfy_user =
3651 aiocb->aio_sigevent.sigev_value.sival_ptr;
3653 #endif
3654 if (error)
3655 /* EMPTY */;
3656 else if (pkevtp != NULL &&
3657 pnotify.portnfy_port == lio_head_port)
3658 error = port_dup_event(pkevtp, &lpkevp,
3659 PORT_ALLOC_DEFAULT);
3660 else
3661 error = port_alloc_event(pnotify.portnfy_port,
3662 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3663 &lpkevp);
3664 if (error == 0) {
3665 port_init_event(lpkevp, (uintptr_t)cbp,
3666 (void *)(uintptr_t)pnotify.portnfy_user,
3667 aio_port_callback, reqp);
3668 lpkevp->portkev_events = event;
3669 reqp->aio_req_portkev = lpkevp;
3670 reqp->aio_req_port = pnotify.portnfy_port;
3675 * send the request to driver.
3677 if (error == 0) {
3678 if (aiocb->aio_nbytes == 0) {
3679 clear_active_fd(aiocb->aio_fildes);
3680 aio_zerolen(reqp);
3681 continue;
3683 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3684 CRED());
3688 * the fd's ref count is not decremented until the IO has
3689 * completed unless there was an error.
3691 if (error) {
3692 releasef(aiocb->aio_fildes);
3693 lio_set_uerror(&cbp->aio_resultp, error);
3694 if (head) {
3695 mutex_enter(&aiop->aio_mutex);
3696 head->lio_nent--;
3697 head->lio_refcnt--;
3698 mutex_exit(&aiop->aio_mutex);
3700 if (error == ENOTSUP)
3701 aio_notsupported++;
3702 else
3703 aio_errors++;
3704 lio_set_error(reqp, portused);
3705 } else {
3706 clear_active_fd(aiocb->aio_fildes);
3710 if (aio_notsupported) {
3711 error = ENOTSUP;
3712 } else if (aio_errors) {
3714 * return EIO if any request failed
3716 error = EIO;
3719 if (mode_arg == LIO_WAIT) {
3720 mutex_enter(&aiop->aio_mutex);
3721 while (head->lio_refcnt > 0) {
3722 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3723 mutex_exit(&aiop->aio_mutex);
3724 error = EINTR;
3725 goto done;
3728 mutex_exit(&aiop->aio_mutex);
3729 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3732 done:
3733 kmem_free(cbplist, ssize);
3734 if (deadhead) {
3735 if (head->lio_sigqp)
3736 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3737 if (head->lio_portkev)
3738 port_free_event(head->lio_portkev);
3739 kmem_free(head, sizeof (aio_lio_t));
3741 return (error);
3745 #ifdef _SYSCALL32_IMPL
3746 void
3747 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3749 dest->aio_fildes = src->aio_fildes;
3750 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3751 dest->aio_nbytes = (size_t)src->aio_nbytes;
3752 dest->aio_offset = (off_t)src->aio_offset;
3753 dest->aio_reqprio = src->aio_reqprio;
3754 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3755 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3758 * See comment in sigqueue32() on handling of 32-bit
3759 * sigvals in a 64-bit kernel.
3761 dest->aio_sigevent.sigev_value.sival_int =
3762 (int)src->aio_sigevent.sigev_value.sival_int;
3763 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3764 (uintptr_t)src->aio_sigevent.sigev_notify_function;
3765 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3766 (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3767 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3768 dest->aio_lio_opcode = src->aio_lio_opcode;
3769 dest->aio_state = src->aio_state;
3770 dest->aio__pad[0] = src->aio__pad[0];
3772 #endif /* _SYSCALL32_IMPL */
3775 * aio_port_callback() is called just before the event is retrieved from the
3776 * port. The task of this callback function is to finish the work of the
3777 * transaction for the application, it means :
3778 * - copyout transaction data to the application
3779 * (this thread is running in the right process context)
3780 * - keep trace of the transaction (update of counters).
3781 * - free allocated buffers
3782 * The aiocb pointer is the object element of the port_kevent_t structure.
3784 * flag :
3785 * PORT_CALLBACK_DEFAULT : do copyout and free resources
3786 * PORT_CALLBACK_CLOSE : don't do copyout, free resources
3789 /*ARGSUSED*/
3791 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3793 aio_t *aiop = curproc->p_aio;
3794 aio_req_t *reqp = arg;
3795 struct iovec *iov;
3796 struct buf *bp;
3797 void *resultp;
3799 if (pid != curproc->p_pid) {
3800 /* wrong proc !!, can not deliver data here ... */
3801 return (EACCES);
3804 mutex_enter(&aiop->aio_portq_mutex);
3805 reqp->aio_req_portkev = NULL;
3806 aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3807 mutex_exit(&aiop->aio_portq_mutex);
3808 aphysio_unlock(reqp); /* unlock used pages */
3809 mutex_enter(&aiop->aio_mutex);
3810 if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3811 aio_req_free_port(aiop, reqp); /* back to free list */
3812 mutex_exit(&aiop->aio_mutex);
3813 return (0);
3816 iov = reqp->aio_req_uio.uio_iov;
3817 bp = &reqp->aio_req_buf;
3818 resultp = (void *)reqp->aio_req_resultp;
3819 if (flag == PORT_CALLBACK_DEFAULT)
3820 aio_copyout_result_port(iov, bp, resultp);
3821 aio_req_free_port(aiop, reqp); /* request struct back to free list */
3822 mutex_exit(&aiop->aio_mutex);
3823 return (0);