2 * QEMU aio implementation
4 * Copyright IBM, Corp. 2008
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
16 #include "qemu-common.h"
17 #include "block/block.h"
18 #include "qemu/queue.h"
19 #include "qemu/sockets.h"
28 QLIST_ENTRY(AioHandler
) node
;
31 static AioHandler
*find_aio_handler(AioContext
*ctx
, int fd
)
35 QLIST_FOREACH(node
, &ctx
->aio_handlers
, node
) {
36 if (node
->pfd
.fd
== fd
)
44 void aio_set_fd_handler(AioContext
*ctx
,
52 node
= find_aio_handler(ctx
, fd
);
54 /* Are we deleting the fd handler? */
55 if (!io_read
&& !io_write
) {
57 g_source_remove_poll(&ctx
->source
, &node
->pfd
);
59 /* If the lock is held, just mark the node as deleted */
60 if (ctx
->walking_handlers
) {
62 node
->pfd
.revents
= 0;
64 /* Otherwise, delete it for real. We can't just mark it as
65 * deleted because deleted nodes are only cleaned up after
66 * releasing the walking_handlers lock.
68 QLIST_REMOVE(node
, node
);
74 /* Alloc and insert if it's not already there */
75 node
= g_new0(AioHandler
, 1);
77 QLIST_INSERT_HEAD(&ctx
->aio_handlers
, node
, node
);
79 g_source_add_poll(&ctx
->source
, &node
->pfd
);
81 /* Update handler with latest information */
82 node
->io_read
= io_read
;
83 node
->io_write
= io_write
;
84 node
->opaque
= opaque
;
86 node
->pfd
.events
= (io_read
? G_IO_IN
| G_IO_HUP
| G_IO_ERR
: 0);
87 node
->pfd
.events
|= (io_write
? G_IO_OUT
| G_IO_ERR
: 0);
93 void aio_set_event_notifier(AioContext
*ctx
,
94 EventNotifier
*notifier
,
95 EventNotifierHandler
*io_read
)
97 aio_set_fd_handler(ctx
, event_notifier_get_fd(notifier
),
98 (IOHandler
*)io_read
, NULL
, notifier
);
101 bool aio_prepare(AioContext
*ctx
)
106 bool aio_pending(AioContext
*ctx
)
110 QLIST_FOREACH(node
, &ctx
->aio_handlers
, node
) {
113 revents
= node
->pfd
.revents
& node
->pfd
.events
;
114 if (revents
& (G_IO_IN
| G_IO_HUP
| G_IO_ERR
) && node
->io_read
) {
117 if (revents
& (G_IO_OUT
| G_IO_ERR
) && node
->io_write
) {
125 bool aio_dispatch(AioContext
*ctx
)
128 bool progress
= false;
131 * If there are callbacks left that have been queued, we need to call them.
132 * Do not call select in this case, because it is possible that the caller
133 * does not need a complete flush (as is the case for aio_poll loops).
135 if (aio_bh_poll(ctx
)) {
140 * We have to walk very carefully in case aio_set_fd_handler is
141 * called while we're walking.
143 node
= QLIST_FIRST(&ctx
->aio_handlers
);
148 ctx
->walking_handlers
++;
150 revents
= node
->pfd
.revents
& node
->pfd
.events
;
151 node
->pfd
.revents
= 0;
153 if (!node
->deleted
&&
154 (revents
& (G_IO_IN
| G_IO_HUP
| G_IO_ERR
)) &&
156 node
->io_read(node
->opaque
);
158 /* aio_notify() does not count as progress */
159 if (node
->opaque
!= &ctx
->notifier
) {
163 if (!node
->deleted
&&
164 (revents
& (G_IO_OUT
| G_IO_ERR
)) &&
166 node
->io_write(node
->opaque
);
171 node
= QLIST_NEXT(node
, node
);
173 ctx
->walking_handlers
--;
175 if (!ctx
->walking_handlers
&& tmp
->deleted
) {
176 QLIST_REMOVE(tmp
, node
);
182 progress
|= timerlistgroup_run_timers(&ctx
->tlg
);
187 /* These thread-local variables are used only in a small part of aio_poll
188 * around the call to the poll() system call. In particular they are not
189 * used while aio_poll is performing callbacks, which makes it much easier
190 * to think about reentrancy!
192 * Stack-allocated arrays would be perfect but they have size limitations;
193 * heap allocation is expensive enough that we want to reuse arrays across
194 * calls to aio_poll(). And because poll() has to be called without holding
195 * any lock, the arrays cannot be stored in AioContext. Thread-local data
196 * has none of the disadvantages of these three options.
198 static __thread GPollFD
*pollfds
;
199 static __thread AioHandler
**nodes
;
200 static __thread
unsigned npfd
, nalloc
;
201 static __thread Notifier pollfds_cleanup_notifier
;
203 static void pollfds_cleanup(Notifier
*n
, void *unused
)
211 static void add_pollfd(AioHandler
*node
)
213 if (npfd
== nalloc
) {
215 pollfds_cleanup_notifier
.notify
= pollfds_cleanup
;
216 qemu_thread_atexit_add(&pollfds_cleanup_notifier
);
219 g_assert(nalloc
<= INT_MAX
);
222 pollfds
= g_renew(GPollFD
, pollfds
, nalloc
);
223 nodes
= g_renew(AioHandler
*, nodes
, nalloc
);
226 pollfds
[npfd
] = (GPollFD
) {
228 .events
= node
->pfd
.events
,
233 bool aio_poll(AioContext
*ctx
, bool blocking
)
240 aio_context_acquire(ctx
);
243 /* aio_notify can avoid the expensive event_notifier_set if
244 * everything (file descriptors, bottom halves, timers) will
245 * be re-evaluated before the next blocking poll(). This is
246 * already true when aio_poll is called with blocking == false;
247 * if blocking == true, it is only true after poll() returns,
248 * so disable the optimization now.
251 atomic_add(&ctx
->notify_me
, 2);
254 ctx
->walking_handlers
++;
259 QLIST_FOREACH(node
, &ctx
->aio_handlers
, node
) {
260 if (!node
->deleted
&& node
->pfd
.events
) {
265 timeout
= blocking
? aio_compute_timeout(ctx
) : 0;
267 /* wait until next event */
269 aio_context_release(ctx
);
271 ret
= qemu_poll_ns((GPollFD
*)pollfds
, npfd
, timeout
);
273 atomic_sub(&ctx
->notify_me
, 2);
276 aio_context_acquire(ctx
);
279 aio_notify_accept(ctx
);
281 /* if we have any readable fds, dispatch event */
283 for (i
= 0; i
< npfd
; i
++) {
284 nodes
[i
]->pfd
.revents
= pollfds
[i
].revents
;
289 ctx
->walking_handlers
--;
291 /* Run dispatch even if there were no readable fds to run timers */
292 if (aio_dispatch(ctx
)) {
296 aio_context_release(ctx
);