beadm: keep the symlink /usr/sbin/beadm
[unleashed.git] / kernel / os / aio_subr.c
blob82fcc72d7f9a51243b9ea67f2131065639e3be65
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/proc.h>
29 #include <sys/file.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
35 #include <vm/as.h>
36 #include <vm/page.h>
37 #include <sys/uio.h>
38 #include <sys/kmem.h>
39 #include <sys/debug.h>
40 #include <sys/aio_impl.h>
41 #include <sys/epm.h>
42 #include <sys/fs/snode.h>
43 #include <sys/siginfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/tnf_probe.h>
46 #include <sys/conf.h>
47 #include <sys/sdt.h>
49 int aphysio(int (*)(), int (*)(), dev_t, int, void (*)(), struct aio_req *);
50 void aio_done(struct buf *);
51 void aphysio_unlock(aio_req_t *);
52 void aio_cleanup(int);
53 void aio_cleanup_exit(void);
56 * private functions
58 static void aio_sigev_send(proc_t *, sigqueue_t *);
59 static void aio_hash_delete(aio_t *, aio_req_t *);
60 static void aio_lio_free(aio_t *, aio_lio_t *);
61 static int aio_cleanup_cleanupq(aio_t *, aio_req_t *, int);
62 static int aio_cleanup_notifyq(aio_t *, aio_req_t *, int);
63 static void aio_cleanup_pollq(aio_t *, aio_req_t *, int);
64 static void aio_cleanup_portq(aio_t *, aio_req_t *, int);
67 * async version of physio() that doesn't wait synchronously
68 * for the driver's strategy routine to complete.
71 int
72 aphysio(
73 int (*strategy)(struct buf *),
74 int (*cancel)(struct buf *),
75 dev_t dev,
76 int rw,
77 void (*mincnt)(struct buf *),
78 struct aio_req *aio)
80 struct uio *uio = aio->aio_uio;
81 aio_req_t *reqp = (aio_req_t *)aio->aio_private;
82 struct buf *bp = &reqp->aio_req_buf;
83 struct iovec *iov;
84 struct as *as;
85 char *a;
86 int error;
87 size_t c;
88 struct page **pplist;
89 struct dev_ops *ops = devopsp[getmajor(dev)];
91 if (uio->uio_loffset < 0)
92 return (EINVAL);
93 #ifdef _ILP32
95 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
96 * the maximum size that can be supported by the IO subsystem.
97 * XXX this code assumes a D_64BIT driver.
99 if (uio->uio_loffset > SPEC_MAXOFFSET_T)
100 return (EINVAL);
101 #endif /* _ILP32 */
103 if (rw == B_READ) {
104 CPU_STATS_ADD_K(sys, phread, 1);
105 } else {
106 CPU_STATS_ADD_K(sys, phwrite, 1);
109 iov = uio->uio_iov;
110 sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL);
111 sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL);
113 bp->b_error = 0;
114 bp->b_flags = B_BUSY | B_PHYS | B_ASYNC | rw;
115 bp->b_edev = dev;
116 bp->b_dev = cmpdev(dev);
117 bp->b_lblkno = btodt(uio->uio_loffset);
118 bp->b_offset = uio->uio_loffset;
119 (void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
120 (void *)bp->b_edev, (void **)&bp->b_dip);
123 * Clustering: Clustering can set the b_iodone, b_forw and
124 * b_proc fields to cluster-specifc values.
126 if (bp->b_iodone == NULL) {
127 bp->b_iodone = (int (*)()) aio_done;
128 /* b_forw points at an aio_req_t structure */
129 bp->b_forw = (struct buf *)reqp;
130 bp->b_proc = curproc;
133 a = bp->b_un.b_addr = iov->iov_base;
134 c = bp->b_bcount = iov->iov_len;
136 (*mincnt)(bp);
137 if (bp->b_bcount != iov->iov_len)
138 return (ENOTSUP);
140 as = bp->b_proc->p_as;
142 error = as_pagelock(as, &pplist, a,
143 c, rw == B_READ? S_WRITE : S_READ);
144 if (error != 0) {
145 bp->b_flags |= B_ERROR;
146 bp->b_error = error;
147 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
148 return (error);
150 reqp->aio_req_flags |= AIO_PAGELOCKDONE;
151 bp->b_shadow = pplist;
152 if (pplist != NULL) {
153 bp->b_flags |= B_SHADOW;
156 if (cancel != anocancel)
157 cmn_err(CE_PANIC,
158 "aphysio: cancellation not supported, use anocancel");
160 reqp->aio_req_cancel = cancel;
162 DTRACE_IO1(start, struct buf *, bp);
164 return ((*strategy)(bp));
167 /*ARGSUSED*/
169 anocancel(struct buf *bp)
171 return (ENXIO);
175 * Called from biodone().
176 * Notify process that a pending AIO has finished.
180 * Clustering: This function is made non-static as it is used
181 * by clustering s/w as contract private interface.
184 void
185 aio_done(struct buf *bp)
187 proc_t *p;
188 struct as *as;
189 aio_req_t *reqp;
190 aio_lio_t *head = NULL;
191 aio_t *aiop;
192 sigqueue_t *sigev = NULL;
193 sigqueue_t *lio_sigev = NULL;
194 port_kevent_t *pkevp = NULL;
195 port_kevent_t *lio_pkevp = NULL;
196 int fd;
197 int cleanupqflag;
198 int pollqflag;
199 int portevpend;
200 void (*func)();
201 int use_port = 0;
202 int reqp_flags = 0;
203 int send_signal = 0;
205 p = bp->b_proc;
206 as = p->p_as;
207 reqp = (aio_req_t *)bp->b_forw;
208 fd = reqp->aio_req_fd;
211 * mapout earlier so that more kmem is available when aio is
212 * heavily used. bug #1262082
214 if (bp->b_flags & B_REMAPPED)
215 bp_mapout(bp);
217 /* decrement fd's ref count by one, now that aio request is done. */
218 areleasef(fd, P_FINFO(p));
220 aiop = p->p_aio;
221 ASSERT(aiop != NULL);
223 mutex_enter(&aiop->aio_portq_mutex);
224 mutex_enter(&aiop->aio_mutex);
225 ASSERT(aiop->aio_pending > 0);
226 ASSERT(reqp->aio_req_flags & AIO_PENDING);
227 aiop->aio_pending--;
228 reqp->aio_req_flags &= ~AIO_PENDING;
229 reqp_flags = reqp->aio_req_flags;
230 if ((pkevp = reqp->aio_req_portkev) != NULL) {
231 /* Event port notification is desired for this transaction */
232 if (reqp->aio_req_flags & AIO_CLOSE_PORT) {
234 * The port is being closed and it is waiting for
235 * pending asynchronous I/O transactions to complete.
237 portevpend = --aiop->aio_portpendcnt;
238 aio_deq(&aiop->aio_portpending, reqp);
239 aio_enq(&aiop->aio_portq, reqp, 0);
240 mutex_exit(&aiop->aio_mutex);
241 mutex_exit(&aiop->aio_portq_mutex);
242 port_send_event(pkevp);
243 if (portevpend == 0)
244 cv_broadcast(&aiop->aio_portcv);
245 return;
248 if (aiop->aio_flags & AIO_CLEANUP) {
250 * aio_cleanup_thread() is waiting for completion of
251 * transactions.
253 mutex_enter(&as->a_contents);
254 aio_deq(&aiop->aio_portpending, reqp);
255 aio_enq(&aiop->aio_portcleanupq, reqp, 0);
256 cv_signal(&aiop->aio_cleanupcv);
257 mutex_exit(&as->a_contents);
258 mutex_exit(&aiop->aio_mutex);
259 mutex_exit(&aiop->aio_portq_mutex);
260 return;
263 aio_deq(&aiop->aio_portpending, reqp);
264 aio_enq(&aiop->aio_portq, reqp, 0);
266 use_port = 1;
267 } else {
269 * when the AIO_CLEANUP flag is enabled for this
270 * process, or when the AIO_POLL bit is set for
271 * this request, special handling is required.
272 * otherwise the request is put onto the doneq.
274 cleanupqflag = (aiop->aio_flags & AIO_CLEANUP);
275 pollqflag = (reqp->aio_req_flags & AIO_POLL);
276 if (cleanupqflag | pollqflag) {
278 if (cleanupqflag)
279 mutex_enter(&as->a_contents);
282 * requests with their AIO_POLL bit set are put
283 * on the pollq, requests with sigevent structures
284 * or with listio heads are put on the notifyq, and
285 * the remaining requests don't require any special
286 * cleanup handling, so they're put onto the default
287 * cleanupq.
289 if (pollqflag)
290 aio_enq(&aiop->aio_pollq, reqp, AIO_POLLQ);
291 else if (reqp->aio_req_sigqp || reqp->aio_req_lio)
292 aio_enq(&aiop->aio_notifyq, reqp, AIO_NOTIFYQ);
293 else
294 aio_enq(&aiop->aio_cleanupq, reqp,
295 AIO_CLEANUPQ);
297 if (cleanupqflag) {
298 cv_signal(&aiop->aio_cleanupcv);
299 mutex_exit(&as->a_contents);
300 mutex_exit(&aiop->aio_mutex);
301 mutex_exit(&aiop->aio_portq_mutex);
302 } else {
303 ASSERT(pollqflag);
304 /* block aio_cleanup_exit until we're done */
305 aiop->aio_flags |= AIO_DONE_ACTIVE;
306 mutex_exit(&aiop->aio_mutex);
307 mutex_exit(&aiop->aio_portq_mutex);
309 * let the cleanup processing happen from an AST
310 * set an AST on all threads in this process
312 mutex_enter(&p->p_lock);
313 set_proc_ast(p);
314 mutex_exit(&p->p_lock);
315 mutex_enter(&aiop->aio_mutex);
316 /* wakeup anybody waiting in aiowait() */
317 cv_broadcast(&aiop->aio_waitcv);
319 /* wakeup aio_cleanup_exit if needed */
320 if (aiop->aio_flags & AIO_CLEANUP)
321 cv_signal(&aiop->aio_cleanupcv);
322 aiop->aio_flags &= ~AIO_DONE_ACTIVE;
323 mutex_exit(&aiop->aio_mutex);
325 return;
329 * save req's sigevent pointer, and check its
330 * value after releasing aio_mutex lock.
332 sigev = reqp->aio_req_sigqp;
333 reqp->aio_req_sigqp = NULL;
335 /* put request on done queue. */
336 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
337 } /* portkevent */
340 * when list IO notification is enabled, a notification or
341 * signal is sent only when all entries in the list are done.
343 if ((head = reqp->aio_req_lio) != NULL) {
344 ASSERT(head->lio_refcnt > 0);
345 if (--head->lio_refcnt == 0) {
347 * save lio's sigevent pointer, and check
348 * its value after releasing aio_mutex lock.
350 lio_sigev = head->lio_sigqp;
351 head->lio_sigqp = NULL;
352 cv_signal(&head->lio_notify);
353 if (head->lio_port >= 0 &&
354 (lio_pkevp = head->lio_portkev) != NULL)
355 head->lio_port = -1;
360 * if AIO_WAITN set then
361 * send signal only when we reached the
362 * required amount of IO's finished
363 * or when all IO's are done
365 if (aiop->aio_flags & AIO_WAITN) {
366 if (aiop->aio_waitncnt > 0)
367 aiop->aio_waitncnt--;
368 if (aiop->aio_pending == 0 ||
369 aiop->aio_waitncnt == 0)
370 cv_broadcast(&aiop->aio_waitcv);
371 } else {
372 cv_broadcast(&aiop->aio_waitcv);
376 * No need to set this flag for pollq, portq, lio requests.
377 * If this is an old Solaris aio request, and the process has
378 * a SIGIO signal handler enabled, then send a SIGIO signal.
380 if (!sigev && !use_port && head == NULL &&
381 (reqp->aio_req_flags & AIO_SOLARIS) &&
382 (func = PTOU(p)->u_signal[SIGIO - 1]) != SIG_DFL &&
383 (func != SIG_IGN)) {
384 send_signal = 1;
385 reqp->aio_req_flags |= AIO_SIGNALLED;
388 mutex_exit(&aiop->aio_mutex);
389 mutex_exit(&aiop->aio_portq_mutex);
392 * Could the cleanup thread be waiting for AIO with locked
393 * resources to finish?
394 * Ideally in that case cleanup thread should block on cleanupcv,
395 * but there is a window, where it could miss to see a new aio
396 * request that sneaked in.
398 mutex_enter(&as->a_contents);
399 if ((reqp_flags & AIO_PAGELOCKDONE) && AS_ISUNMAPWAIT(as))
400 cv_broadcast(&as->a_cv);
401 mutex_exit(&as->a_contents);
403 if (sigev)
404 aio_sigev_send(p, sigev);
405 else if (send_signal)
406 psignal(p, SIGIO);
408 if (pkevp)
409 port_send_event(pkevp);
410 if (lio_sigev)
411 aio_sigev_send(p, lio_sigev);
412 if (lio_pkevp)
413 port_send_event(lio_pkevp);
417 * send a queued signal to the specified process when
418 * the event signal is non-NULL. A return value of 1
419 * will indicate that a signal is queued, and 0 means that
420 * no signal was specified, nor sent.
422 static void
423 aio_sigev_send(proc_t *p, sigqueue_t *sigev)
425 ASSERT(sigev != NULL);
427 mutex_enter(&p->p_lock);
428 sigaddqa(p, NULL, sigev);
429 mutex_exit(&p->p_lock);
433 * special case handling for zero length requests. the aio request
434 * short circuits the normal completion path since all that's required
435 * to complete this request is to copyout a zero to the aio request's
436 * return value.
438 void
439 aio_zerolen(aio_req_t *reqp)
442 struct buf *bp = &reqp->aio_req_buf;
444 reqp->aio_req_flags |= AIO_ZEROLEN;
446 bp->b_forw = (struct buf *)reqp;
447 bp->b_proc = curproc;
449 bp->b_resid = 0;
450 bp->b_flags = 0;
452 aio_done(bp);
456 * unlock pages previously locked by as_pagelock
458 void
459 aphysio_unlock(aio_req_t *reqp)
461 struct buf *bp;
462 struct iovec *iov;
463 int flags;
465 if (reqp->aio_req_flags & AIO_PHYSIODONE)
466 return;
468 reqp->aio_req_flags |= AIO_PHYSIODONE;
470 if (reqp->aio_req_flags & AIO_ZEROLEN)
471 return;
473 bp = &reqp->aio_req_buf;
474 iov = reqp->aio_req_uio.uio_iov;
475 flags = (((bp->b_flags & B_READ) == B_READ) ? S_WRITE : S_READ);
476 if (reqp->aio_req_flags & AIO_PAGELOCKDONE) {
477 as_pageunlock(bp->b_proc->p_as,
478 bp->b_flags & B_SHADOW ? bp->b_shadow : NULL,
479 iov->iov_base, iov->iov_len, flags);
480 reqp->aio_req_flags &= ~AIO_PAGELOCKDONE;
482 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_SHADOW);
483 bp->b_flags |= B_DONE;
487 * deletes a requests id from the hash table of outstanding io.
489 static void
490 aio_hash_delete(aio_t *aiop, struct aio_req_t *reqp)
492 long index;
493 aio_result_t *resultp = reqp->aio_req_resultp;
494 aio_req_t *current;
495 aio_req_t **nextp;
497 index = AIO_HASH(resultp);
498 nextp = (aiop->aio_hash + index);
499 while ((current = *nextp) != NULL) {
500 if (current->aio_req_resultp == resultp) {
501 *nextp = current->aio_hash_next;
502 return;
504 nextp = &current->aio_hash_next;
509 * Put a list head struct onto its free list.
511 static void
512 aio_lio_free(aio_t *aiop, aio_lio_t *head)
514 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
516 if (head->lio_sigqp != NULL)
517 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
518 head->lio_next = aiop->aio_lio_free;
519 aiop->aio_lio_free = head;
523 * Put a reqp onto the freelist.
525 void
526 aio_req_free(aio_t *aiop, aio_req_t *reqp)
528 aio_lio_t *liop;
530 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
532 if (reqp->aio_req_portkev) {
533 port_free_event(reqp->aio_req_portkev);
534 reqp->aio_req_portkev = NULL;
537 if ((liop = reqp->aio_req_lio) != NULL) {
538 if (--liop->lio_nent == 0)
539 aio_lio_free(aiop, liop);
540 reqp->aio_req_lio = NULL;
542 if (reqp->aio_req_sigqp != NULL) {
543 kmem_free(reqp->aio_req_sigqp, sizeof (sigqueue_t));
544 reqp->aio_req_sigqp = NULL;
546 reqp->aio_req_next = aiop->aio_free;
547 reqp->aio_req_prev = NULL;
548 aiop->aio_free = reqp;
549 aiop->aio_outstanding--;
550 if (aiop->aio_outstanding == 0)
551 cv_broadcast(&aiop->aio_waitcv);
552 aio_hash_delete(aiop, reqp);
556 * Put a reqp onto the freelist.
558 void
559 aio_req_free_port(aio_t *aiop, aio_req_t *reqp)
561 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
563 reqp->aio_req_next = aiop->aio_free;
564 reqp->aio_req_prev = NULL;
565 aiop->aio_free = reqp;
566 aiop->aio_outstanding--;
567 aio_hash_delete(aiop, reqp);
572 * Verify the integrity of a queue.
574 #if defined(DEBUG)
575 static void
576 aio_verify_queue(aio_req_t *head,
577 aio_req_t *entry_present, aio_req_t *entry_missing)
579 aio_req_t *reqp;
580 int found = 0;
581 int present = 0;
583 if ((reqp = head) != NULL) {
584 do {
585 ASSERT(reqp->aio_req_prev->aio_req_next == reqp);
586 ASSERT(reqp->aio_req_next->aio_req_prev == reqp);
587 if (entry_present == reqp)
588 found++;
589 if (entry_missing == reqp)
590 present++;
591 } while ((reqp = reqp->aio_req_next) != head);
593 ASSERT(entry_present == NULL || found == 1);
594 ASSERT(entry_missing == NULL || present == 0);
596 #else
597 #define aio_verify_queue(x, y, z)
598 #endif
601 * Put a request onto the tail of a queue.
603 void
604 aio_enq(aio_req_t **qhead, aio_req_t *reqp, int qflg_new)
606 aio_req_t *head;
607 aio_req_t *prev;
609 aio_verify_queue(*qhead, NULL, reqp);
611 if ((head = *qhead) == NULL) {
612 reqp->aio_req_next = reqp;
613 reqp->aio_req_prev = reqp;
614 *qhead = reqp;
615 } else {
616 reqp->aio_req_next = head;
617 reqp->aio_req_prev = prev = head->aio_req_prev;
618 prev->aio_req_next = reqp;
619 head->aio_req_prev = reqp;
621 reqp->aio_req_flags |= qflg_new;
625 * Remove a request from its queue.
627 void
628 aio_deq(aio_req_t **qhead, aio_req_t *reqp)
630 aio_verify_queue(*qhead, reqp, NULL);
632 if (reqp->aio_req_next == reqp) {
633 *qhead = NULL;
634 } else {
635 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
636 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
637 if (*qhead == reqp)
638 *qhead = reqp->aio_req_next;
640 reqp->aio_req_next = NULL;
641 reqp->aio_req_prev = NULL;
645 * concatenate a specified queue with the cleanupq. the specified
646 * queue is put onto the tail of the cleanupq. all elements on the
647 * specified queue should have their aio_req_flags field cleared.
649 /*ARGSUSED*/
650 void
651 aio_cleanupq_concat(aio_t *aiop, aio_req_t *q2, int qflg)
653 aio_req_t *cleanupqhead, *q2tail;
654 aio_req_t *reqp = q2;
656 do {
657 ASSERT(reqp->aio_req_flags & qflg);
658 reqp->aio_req_flags &= ~qflg;
659 reqp->aio_req_flags |= AIO_CLEANUPQ;
660 } while ((reqp = reqp->aio_req_next) != q2);
662 cleanupqhead = aiop->aio_cleanupq;
663 if (cleanupqhead == NULL)
664 aiop->aio_cleanupq = q2;
665 else {
666 cleanupqhead->aio_req_prev->aio_req_next = q2;
667 q2tail = q2->aio_req_prev;
668 q2tail->aio_req_next = cleanupqhead;
669 q2->aio_req_prev = cleanupqhead->aio_req_prev;
670 cleanupqhead->aio_req_prev = q2tail;
675 * cleanup aio requests that are on the per-process poll queue.
677 void
678 aio_cleanup(int flag)
680 aio_t *aiop = curproc->p_aio;
681 aio_req_t *pollqhead, *cleanupqhead, *notifyqhead;
682 aio_req_t *cleanupport;
683 aio_req_t *portq = NULL;
684 void (*func)();
685 int signalled = 0;
686 int qflag = 0;
687 int exitflg;
689 ASSERT(aiop != NULL);
691 if (flag == AIO_CLEANUP_EXIT)
692 exitflg = AIO_CLEANUP_EXIT;
693 else
694 exitflg = 0;
697 * We need to get the aio_cleanupq_mutex because we are calling
698 * aio_cleanup_cleanupq()
700 mutex_enter(&aiop->aio_cleanupq_mutex);
702 * take all the requests off the cleanupq, the notifyq,
703 * and the pollq.
705 mutex_enter(&aiop->aio_mutex);
706 if ((cleanupqhead = aiop->aio_cleanupq) != NULL) {
707 aiop->aio_cleanupq = NULL;
708 qflag++;
710 if ((notifyqhead = aiop->aio_notifyq) != NULL) {
711 aiop->aio_notifyq = NULL;
712 qflag++;
714 if ((pollqhead = aiop->aio_pollq) != NULL) {
715 aiop->aio_pollq = NULL;
716 qflag++;
718 if (flag) {
719 if ((portq = aiop->aio_portq) != NULL)
720 qflag++;
722 if ((cleanupport = aiop->aio_portcleanupq) != NULL) {
723 aiop->aio_portcleanupq = NULL;
724 qflag++;
727 mutex_exit(&aiop->aio_mutex);
730 * return immediately if cleanupq, pollq, and
731 * notifyq are all empty. someone else must have
732 * emptied them.
734 if (!qflag) {
735 mutex_exit(&aiop->aio_cleanupq_mutex);
736 return;
740 * do cleanup for the various queues.
742 if (cleanupqhead)
743 signalled = aio_cleanup_cleanupq(aiop, cleanupqhead, exitflg);
744 mutex_exit(&aiop->aio_cleanupq_mutex);
745 if (notifyqhead)
746 signalled = aio_cleanup_notifyq(aiop, notifyqhead, exitflg);
747 if (pollqhead)
748 aio_cleanup_pollq(aiop, pollqhead, exitflg);
749 if (flag && (cleanupport || portq))
750 aio_cleanup_portq(aiop, cleanupport, exitflg);
752 if (exitflg)
753 return;
756 * If we have an active aio_cleanup_thread it's possible for
757 * this routine to push something on to the done queue after
758 * an aiowait/aiosuspend thread has already decided to block.
759 * This being the case, we need a cv_broadcast here to wake
760 * these threads up. It is simpler and cleaner to do this
761 * broadcast here than in the individual cleanup routines.
764 mutex_enter(&aiop->aio_mutex);
766 * If there has never been an old solaris aio request
767 * issued by this process, then do not send a SIGIO signal.
769 if (!(aiop->aio_flags & AIO_SOLARIS_REQ))
770 signalled = 1;
771 cv_broadcast(&aiop->aio_waitcv);
772 mutex_exit(&aiop->aio_mutex);
775 * Only if the process wasn't already signalled,
776 * determine if a SIGIO signal should be delievered.
778 if (!signalled &&
779 (func = PTOU(curproc)->u_signal[SIGIO - 1]) != SIG_DFL &&
780 func != SIG_IGN)
781 psignal(curproc, SIGIO);
786 * Do cleanup for every element of the port cleanup queue.
788 static void
789 aio_cleanup_portq(aio_t *aiop, aio_req_t *cleanupq, int exitflag)
791 aio_req_t *reqp;
792 aio_req_t *next;
793 aio_req_t *headp;
794 aio_lio_t *liop;
796 /* first check the portq */
797 if (exitflag || ((aiop->aio_flags & AIO_CLEANUP_PORT) == 0)) {
798 mutex_enter(&aiop->aio_mutex);
799 if (aiop->aio_flags & AIO_CLEANUP)
800 aiop->aio_flags |= AIO_CLEANUP_PORT;
801 mutex_exit(&aiop->aio_mutex);
804 * It is not allowed to hold locks during aphysio_unlock().
805 * The aio_done() interrupt function will try to acquire
806 * aio_mutex and aio_portq_mutex. Therefore we disconnect
807 * the portq list from the aiop for the duration of the
808 * aphysio_unlock() loop below.
810 mutex_enter(&aiop->aio_portq_mutex);
811 headp = aiop->aio_portq;
812 aiop->aio_portq = NULL;
813 mutex_exit(&aiop->aio_portq_mutex);
814 if ((reqp = headp) != NULL) {
815 do {
816 next = reqp->aio_req_next;
817 aphysio_unlock(reqp);
818 if (exitflag) {
819 mutex_enter(&aiop->aio_mutex);
820 aio_req_free(aiop, reqp);
821 mutex_exit(&aiop->aio_mutex);
823 } while ((reqp = next) != headp);
826 if (headp != NULL && exitflag == 0) {
827 /* move unlocked requests back to the port queue */
828 aio_req_t *newq;
830 mutex_enter(&aiop->aio_portq_mutex);
831 if ((newq = aiop->aio_portq) != NULL) {
832 aio_req_t *headprev = headp->aio_req_prev;
833 aio_req_t *newqprev = newq->aio_req_prev;
835 headp->aio_req_prev = newqprev;
836 newq->aio_req_prev = headprev;
837 headprev->aio_req_next = newq;
838 newqprev->aio_req_next = headp;
840 aiop->aio_portq = headp;
841 cv_broadcast(&aiop->aio_portcv);
842 mutex_exit(&aiop->aio_portq_mutex);
846 /* now check the port cleanup queue */
847 if ((reqp = cleanupq) == NULL)
848 return;
849 do {
850 next = reqp->aio_req_next;
851 aphysio_unlock(reqp);
852 if (exitflag) {
853 mutex_enter(&aiop->aio_mutex);
854 aio_req_free(aiop, reqp);
855 mutex_exit(&aiop->aio_mutex);
856 } else {
857 mutex_enter(&aiop->aio_portq_mutex);
858 aio_enq(&aiop->aio_portq, reqp, 0);
859 mutex_exit(&aiop->aio_portq_mutex);
860 port_send_event(reqp->aio_req_portkev);
861 if ((liop = reqp->aio_req_lio) != NULL) {
862 int send_event = 0;
864 mutex_enter(&aiop->aio_mutex);
865 ASSERT(liop->lio_refcnt > 0);
866 if (--liop->lio_refcnt == 0) {
867 if (liop->lio_port >= 0 &&
868 liop->lio_portkev) {
869 liop->lio_port = -1;
870 send_event = 1;
873 mutex_exit(&aiop->aio_mutex);
874 if (send_event)
875 port_send_event(liop->lio_portkev);
878 } while ((reqp = next) != cleanupq);
882 * Do cleanup for every element of the cleanupq.
884 static int
885 aio_cleanup_cleanupq(aio_t *aiop, aio_req_t *qhead, int exitflg)
887 aio_req_t *reqp, *next;
888 int signalled = 0;
890 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
893 * Since aio_req_done() or aio_req_find() use the HASH list to find
894 * the required requests, they could potentially take away elements
895 * if they are already done (AIO_DONEQ is set).
896 * The aio_cleanupq_mutex protects the queue for the duration of the
897 * loop from aio_req_done() and aio_req_find().
899 if ((reqp = qhead) == NULL)
900 return (0);
901 do {
902 ASSERT(reqp->aio_req_flags & AIO_CLEANUPQ);
903 ASSERT(reqp->aio_req_portkev == NULL);
904 next = reqp->aio_req_next;
905 aphysio_unlock(reqp);
906 mutex_enter(&aiop->aio_mutex);
907 if (exitflg)
908 aio_req_free(aiop, reqp);
909 else
910 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
911 if (!exitflg) {
912 if (reqp->aio_req_flags & AIO_SIGNALLED)
913 signalled++;
914 else
915 reqp->aio_req_flags |= AIO_SIGNALLED;
917 mutex_exit(&aiop->aio_mutex);
918 } while ((reqp = next) != qhead);
919 return (signalled);
923 * do cleanup for every element of the notify queue.
925 static int
926 aio_cleanup_notifyq(aio_t *aiop, aio_req_t *qhead, int exitflg)
928 aio_req_t *reqp, *next;
929 aio_lio_t *liohead;
930 sigqueue_t *sigev, *lio_sigev = NULL;
931 int signalled = 0;
933 if ((reqp = qhead) == NULL)
934 return (0);
935 do {
936 ASSERT(reqp->aio_req_flags & AIO_NOTIFYQ);
937 next = reqp->aio_req_next;
938 aphysio_unlock(reqp);
939 if (exitflg) {
940 mutex_enter(&aiop->aio_mutex);
941 aio_req_free(aiop, reqp);
942 mutex_exit(&aiop->aio_mutex);
943 } else {
944 mutex_enter(&aiop->aio_mutex);
945 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
946 sigev = reqp->aio_req_sigqp;
947 reqp->aio_req_sigqp = NULL;
948 if ((liohead = reqp->aio_req_lio) != NULL) {
949 ASSERT(liohead->lio_refcnt > 0);
950 if (--liohead->lio_refcnt == 0) {
951 cv_signal(&liohead->lio_notify);
952 lio_sigev = liohead->lio_sigqp;
953 liohead->lio_sigqp = NULL;
956 mutex_exit(&aiop->aio_mutex);
957 if (sigev) {
958 signalled++;
959 aio_sigev_send(reqp->aio_req_buf.b_proc,
960 sigev);
962 if (lio_sigev) {
963 signalled++;
964 aio_sigev_send(reqp->aio_req_buf.b_proc,
965 lio_sigev);
968 } while ((reqp = next) != qhead);
970 return (signalled);
974 * Do cleanup for every element of the poll queue.
976 static void
977 aio_cleanup_pollq(aio_t *aiop, aio_req_t *qhead, int exitflg)
979 aio_req_t *reqp, *next;
982 * As no other threads should be accessing the queue at this point,
983 * it isn't necessary to hold aio_mutex while we traverse its elements.
985 if ((reqp = qhead) == NULL)
986 return;
987 do {
988 ASSERT(reqp->aio_req_flags & AIO_POLLQ);
989 next = reqp->aio_req_next;
990 aphysio_unlock(reqp);
991 if (exitflg) {
992 mutex_enter(&aiop->aio_mutex);
993 aio_req_free(aiop, reqp);
994 mutex_exit(&aiop->aio_mutex);
995 } else {
996 aio_copyout_result(reqp);
997 mutex_enter(&aiop->aio_mutex);
998 aio_enq(&aiop->aio_doneq, reqp, AIO_DONEQ);
999 mutex_exit(&aiop->aio_mutex);
1001 } while ((reqp = next) != qhead);
1005 * called by exit(). waits for all outstanding kaio to finish
1006 * before the kaio resources are freed.
1008 void
1009 aio_cleanup_exit(void)
1011 proc_t *p = curproc;
1012 aio_t *aiop = p->p_aio;
1013 aio_req_t *reqp, *next, *head;
1014 aio_lio_t *nxtlio, *liop;
1017 * wait for all outstanding kaio to complete. process
1018 * is now single-threaded; no other kaio requests can
1019 * happen once aio_pending is zero.
1021 mutex_enter(&aiop->aio_mutex);
1022 aiop->aio_flags |= AIO_CLEANUP;
1023 while ((aiop->aio_pending != 0) || (aiop->aio_flags & AIO_DONE_ACTIVE))
1024 cv_wait(&aiop->aio_cleanupcv, &aiop->aio_mutex);
1025 mutex_exit(&aiop->aio_mutex);
1027 /* cleanup the cleanup-thread queues. */
1028 aio_cleanup(AIO_CLEANUP_EXIT);
1031 * Although this process is now single-threaded, we
1032 * still need to protect ourselves against a race with
1033 * aio_cleanup_dr_delete_memory().
1035 mutex_enter(&p->p_lock);
1038 * free up the done queue's resources.
1040 if ((head = aiop->aio_doneq) != NULL) {
1041 aiop->aio_doneq = NULL;
1042 reqp = head;
1043 do {
1044 next = reqp->aio_req_next;
1045 aphysio_unlock(reqp);
1046 kmem_free(reqp, sizeof (struct aio_req_t));
1047 } while ((reqp = next) != head);
1050 * release aio request freelist.
1052 for (reqp = aiop->aio_free; reqp != NULL; reqp = next) {
1053 next = reqp->aio_req_next;
1054 kmem_free(reqp, sizeof (struct aio_req_t));
1058 * release io list head freelist.
1060 for (liop = aiop->aio_lio_free; liop != NULL; liop = nxtlio) {
1061 nxtlio = liop->lio_next;
1062 kmem_free(liop, sizeof (aio_lio_t));
1065 if (aiop->aio_iocb)
1066 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
1068 mutex_destroy(&aiop->aio_mutex);
1069 mutex_destroy(&aiop->aio_portq_mutex);
1070 mutex_destroy(&aiop->aio_cleanupq_mutex);
1071 p->p_aio = NULL;
1072 mutex_exit(&p->p_lock);
1073 kmem_free(aiop, sizeof (struct aio));
1077 * copy out aio request's result to a user-level result_t buffer.
1079 void
1080 aio_copyout_result(aio_req_t *reqp)
1082 struct buf *bp;
1083 struct iovec *iov;
1084 void *resultp;
1085 int error;
1086 size_t retval;
1088 if (reqp->aio_req_flags & AIO_COPYOUTDONE)
1089 return;
1091 reqp->aio_req_flags |= AIO_COPYOUTDONE;
1093 iov = reqp->aio_req_uio.uio_iov;
1094 bp = &reqp->aio_req_buf;
1095 /* "resultp" points to user-level result_t buffer */
1096 resultp = (void *)reqp->aio_req_resultp;
1097 if (bp->b_flags & B_ERROR) {
1098 if (bp->b_error)
1099 error = bp->b_error;
1100 else
1101 error = EIO;
1102 retval = (size_t)-1;
1103 } else {
1104 error = 0;
1105 retval = iov->iov_len - bp->b_resid;
1107 #ifdef _SYSCALL32_IMPL
1108 if (get_udatamodel() == DATAMODEL_NATIVE) {
1109 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1110 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1111 } else {
1112 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1113 (int)retval);
1114 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1116 #else
1117 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1118 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1119 #endif
1123 void
1124 aio_copyout_result_port(struct iovec *iov, struct buf *bp, void *resultp)
1126 int errno;
1127 size_t retval;
1129 if (bp->b_flags & B_ERROR) {
1130 if (bp->b_error)
1131 errno = bp->b_error;
1132 else
1133 errno = EIO;
1134 retval = (size_t)-1;
1135 } else {
1136 errno = 0;
1137 retval = iov->iov_len - bp->b_resid;
1139 #ifdef _SYSCALL32_IMPL
1140 if (get_udatamodel() == DATAMODEL_NATIVE) {
1141 (void) sulword(&((aio_result_t *)resultp)->aio_return, retval);
1142 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1143 } else {
1144 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1145 (int)retval);
1146 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, errno);
1148 #else
1149 (void) suword32(&((aio_result_t *)resultp)->aio_return, retval);
1150 (void) suword32(&((aio_result_t *)resultp)->aio_errno, errno);
1151 #endif
1155 * This function is used to remove a request from the done queue.
1158 void
1159 aio_req_remove_portq(aio_t *aiop, aio_req_t *reqp)
1161 ASSERT(MUTEX_HELD(&aiop->aio_portq_mutex));
1162 while (aiop->aio_portq == NULL) {
1164 * aio_portq is set to NULL when aio_cleanup_portq()
1165 * is working with the event queue.
1166 * The aio_cleanup_thread() uses aio_cleanup_portq()
1167 * to unlock all AIO buffers with completed transactions.
1168 * Wait here until aio_cleanup_portq() restores the
1169 * list of completed transactions in aio_portq.
1171 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1173 aio_deq(&aiop->aio_portq, reqp);
1176 /* ARGSUSED */
1177 void
1178 aio_close_port(void *arg, int port, pid_t pid, int lastclose)
1180 aio_t *aiop;
1181 aio_req_t *reqp;
1182 aio_req_t *next;
1183 aio_req_t *headp;
1184 int counter;
1186 if (arg == NULL)
1187 aiop = curproc->p_aio;
1188 else
1189 aiop = (aio_t *)arg;
1192 * The PORT_SOURCE_AIO source is always associated with every new
1193 * created port by default.
1194 * If no asynchronous I/O transactions were associated with the port
1195 * then the aiop pointer will still be set to NULL.
1197 if (aiop == NULL)
1198 return;
1201 * Within a process event ports can be used to collect events other
1202 * than PORT_SOURCE_AIO events. At the same time the process can submit
1203 * asynchronous I/Os transactions which are not associated with the
1204 * current port.
1205 * The current process oriented model of AIO uses a sigle queue for
1206 * pending events. On close the pending queue (queue of asynchronous
1207 * I/O transactions using event port notification) must be scanned
1208 * to detect and handle pending I/Os using the current port.
1210 mutex_enter(&aiop->aio_portq_mutex);
1211 mutex_enter(&aiop->aio_mutex);
1212 counter = 0;
1213 if ((headp = aiop->aio_portpending) != NULL) {
1214 reqp = headp;
1215 do {
1216 if (reqp->aio_req_portkev &&
1217 reqp->aio_req_port == port) {
1218 reqp->aio_req_flags |= AIO_CLOSE_PORT;
1219 counter++;
1221 } while ((reqp = reqp->aio_req_next) != headp);
1223 if (counter == 0) {
1224 /* no AIOs pending */
1225 mutex_exit(&aiop->aio_mutex);
1226 mutex_exit(&aiop->aio_portq_mutex);
1227 return;
1229 aiop->aio_portpendcnt += counter;
1230 mutex_exit(&aiop->aio_mutex);
1231 while (aiop->aio_portpendcnt)
1232 cv_wait(&aiop->aio_portcv, &aiop->aio_portq_mutex);
1235 * all pending AIOs are completed.
1236 * check port doneq
1238 headp = NULL;
1239 if ((reqp = aiop->aio_portq) != NULL) {
1240 do {
1241 next = reqp->aio_req_next;
1242 if (reqp->aio_req_port == port) {
1243 /* dequeue request and discard event */
1244 aio_req_remove_portq(aiop, reqp);
1245 port_free_event(reqp->aio_req_portkev);
1246 /* put request in temporary queue */
1247 reqp->aio_req_next = headp;
1248 headp = reqp;
1250 } while ((reqp = next) != aiop->aio_portq);
1252 mutex_exit(&aiop->aio_portq_mutex);
1254 /* headp points to the list of requests to be discarded */
1255 for (reqp = headp; reqp != NULL; reqp = next) {
1256 next = reqp->aio_req_next;
1257 aphysio_unlock(reqp);
1258 mutex_enter(&aiop->aio_mutex);
1259 aio_req_free_port(aiop, reqp);
1260 mutex_exit(&aiop->aio_mutex);
1263 if (aiop->aio_flags & AIO_CLEANUP)
1264 cv_broadcast(&aiop->aio_waitcv);
1268 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1269 * to kick start the aio_cleanup_thread for the give process to do the
1270 * necessary cleanup.
1271 * This is needed so that delete_memory_thread can obtain writer locks
1272 * on pages that need to be relocated during a dr memory delete operation,
1273 * otherwise a deadly embrace may occur.
1276 aio_cleanup_dr_delete_memory(proc_t *procp)
1278 struct aio *aiop = procp->p_aio;
1279 struct as *as = procp->p_as;
1280 int ret = 0;
1282 ASSERT(MUTEX_HELD(&procp->p_lock));
1284 mutex_enter(&as->a_contents);
1286 if (aiop != NULL) {
1287 aiop->aio_rqclnup = 1;
1288 cv_broadcast(&as->a_cv);
1289 ret = 1;
1291 mutex_exit(&as->a_contents);
1292 return (ret);