include/block/aio.h

   1 /*
   2  * QEMU aio implementation
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 #ifndef QEMU_AIO_H
  15 #define QEMU_AIO_H
  16
  17 #include "qemu-common.h"
  18 #include "qemu/queue.h"
  19 #include "qemu/event_notifier.h"
  20 #include "qemu/thread.h"
  21 #include "qemu/rfifolock.h"
  22 #include "qemu/timer.h"
  23
  24 typedef struct BlockAIOCB BlockAIOCB;
  25 typedef void BlockCompletionFunc(void *opaque, int ret);
  26
  27 typedef struct AIOCBInfo {
  28     void (*cancel_async)(BlockAIOCB *acb);
  29     AioContext *(*get_aio_context)(BlockAIOCB *acb);
  30     size_t aiocb_size;
  31 } AIOCBInfo;
  32
  33 struct BlockAIOCB {
  34     const AIOCBInfo *aiocb_info;
  35     BlockDriverState *bs;
  36     BlockCompletionFunc *cb;
  37     void *opaque;
  38     int refcnt;
  39 };
  40
  41 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
  42                    BlockCompletionFunc *cb, void *opaque);
  43 void qemu_aio_unref(void *p);
  44 void qemu_aio_ref(void *p);
  45
  46 typedef struct AioHandler AioHandler;
  47 typedef void QEMUBHFunc(void *opaque);
  48 typedef void IOHandler(void *opaque);
  49
  50 struct ThreadPool;
  51 struct LinuxAioState;
  52
  53 struct AioContext {
  54     GSource source;
  55
  56     /* Protects all fields from multi-threaded access */
  57     RFifoLock lock;
  58
  59     /* The list of registered AIO handlers */
  60     QLIST_HEAD(, AioHandler) aio_handlers;
  61
  62     /* This is a simple lock used to protect the aio_handlers list.
  63      * Specifically, it's used to ensure that no callbacks are removed while
  64      * we're walking and dispatching callbacks.
  65      */
  66     int walking_handlers;
  67
  68     /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
  69      * accessed with atomic primitives.  If this field is 0, everything
  70      * (file descriptors, bottom halves, timers) will be re-evaluated
  71      * before the next blocking poll(), thus the event_notifier_set call
  72      * can be skipped.  If it is non-zero, you may need to wake up a
  73      * concurrent aio_poll or the glib main event loop, making
  74      * event_notifier_set necessary.
  75      *
  76      * Bit 0 is reserved for GSource usage of the AioContext, and is 1
  77      * between a call to aio_ctx_prepare and the next call to aio_ctx_check.
  78      * Bits 1-31 simply count the number of active calls to aio_poll
  79      * that are in the prepare or poll phase.
  80      *
  81      * The GSource and aio_poll must use a different mechanism because
  82      * there is no certainty that a call to GSource's prepare callback
  83      * (via g_main_context_prepare) is indeed followed by check and
  84      * dispatch.  It's not clear whether this would be a bug, but let's
  85      * play safe and allow it---it will just cause extra calls to
  86      * event_notifier_set until the next call to dispatch.
  87      *
  88      * Instead, the aio_poll calls include both the prepare and the
  89      * dispatch phase, hence a simple counter is enough for them.
  90      */
  91     uint32_t notify_me;
  92
  93     /* lock to protect between bh's adders and deleter */
  94     QemuMutex bh_lock;
  95
  96     /* Anchor of the list of Bottom Halves belonging to the context */
  97     struct QEMUBH *first_bh;
  98
  99     /* A simple lock used to protect the first_bh list, and ensure that
 100      * no callbacks are removed while we're walking and dispatching callbacks.
 101      */
 102     int walking_bh;
 103
 104     /* Used by aio_notify.
 105      *
 106      * "notified" is used to avoid expensive event_notifier_test_and_clear
 107      * calls.  When it is clear, the EventNotifier is clear, or one thread
 108      * is going to clear "notified" before processing more events.  False
 109      * positives are possible, i.e. "notified" could be set even though the
 110      * EventNotifier is clear.
 111      *
 112      * Note that event_notifier_set *cannot* be optimized the same way.  For
 113      * more information on the problem that would result, see "#ifdef BUG2"
 114      * in the docs/aio_notify_accept.promela formal model.
 115      */
 116     bool notified;
 117     EventNotifier notifier;
 118
 119     /* Scheduling this BH forces the event loop it iterate */
 120     QEMUBH *notify_dummy_bh;
 121
 122     /* Thread pool for performing work and receiving completion callbacks */
 123     struct ThreadPool *thread_pool;
 124
 125 #ifdef CONFIG_LINUX_AIO
 126     /* State for native Linux AIO.  Uses aio_context_acquire/release for
 127      * locking.
 128      */
 129     struct LinuxAioState *linux_aio;
 130 #endif
 131
 132     /* TimerLists for calling timers - one per clock type */
 133     QEMUTimerListGroup tlg;
 134
 135     int external_disable_cnt;
 136
 137     /* epoll(7) state used when built with CONFIG_EPOLL */
 138     int epollfd;
 139     bool epoll_enabled;
 140     bool epoll_available;
 141 };
 142
 143 /**
 144  * aio_context_new: Allocate a new AioContext.
 145  *
 146  * AioContext provide a mini event-loop that can be waited on synchronously.
 147  * They also provide bottom halves, a service to execute a piece of code
 148  * as soon as possible.
 149  */
 150 AioContext *aio_context_new(Error **errp);
 151
 152 /**
 153  * aio_context_ref:
 154  * @ctx: The AioContext to operate on.
 155  *
 156  * Add a reference to an AioContext.
 157  */
 158 void aio_context_ref(AioContext *ctx);
 159
 160 /**
 161  * aio_context_unref:
 162  * @ctx: The AioContext to operate on.
 163  *
 164  * Drop a reference to an AioContext.
 165  */
 166 void aio_context_unref(AioContext *ctx);
 167
 168 /* Take ownership of the AioContext.  If the AioContext will be shared between
 169  * threads, and a thread does not want to be interrupted, it will have to
 170  * take ownership around calls to aio_poll().  Otherwise, aio_poll()
 171  * automatically takes care of calling aio_context_acquire and
 172  * aio_context_release.
 173  *
 174  * Access to timers and BHs from a thread that has not acquired AioContext
 175  * is possible.  Access to callbacks for now must be done while the AioContext
 176  * is owned by the thread (FIXME).
 177  */
 178 void aio_context_acquire(AioContext *ctx);
 179
 180 /* Relinquish ownership of the AioContext. */
 181 void aio_context_release(AioContext *ctx);
 182
 183 /**
 184  * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
 185  * only once and as soon as possible.
 186  */
 187 void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
 188
 189 /**
 190  * aio_bh_new: Allocate a new bottom half structure.
 191  *
 192  * Bottom halves are lightweight callbacks whose invocation is guaranteed
 193  * to be wait-free, thread-safe and signal-safe.  The #QEMUBH structure
 194  * is opaque and must be allocated prior to its use.
 195  */
 196 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
 197
 198 /**
 199  * aio_notify: Force processing of pending events.
 200  *
 201  * Similar to signaling a condition variable, aio_notify forces
 202  * aio_wait to exit, so that the next call will re-examine pending events.
 203  * The caller of aio_notify will usually call aio_wait again very soon,
 204  * or go through another iteration of the GLib main loop.  Hence, aio_notify
 205  * also has the side effect of recalculating the sets of file descriptors
 206  * that the main loop waits for.
 207  *
 208  * Calling aio_notify is rarely necessary, because for example scheduling
 209  * a bottom half calls it already.
 210  */
 211 void aio_notify(AioContext *ctx);
 212
 213 /**
 214  * aio_notify_accept: Acknowledge receiving an aio_notify.
 215  *
 216  * aio_notify() uses an EventNotifier in order to wake up a sleeping
 217  * aio_poll() or g_main_context_iteration().  Calls to aio_notify() are
 218  * usually rare, but the AioContext has to clear the EventNotifier on
 219  * every aio_poll() or g_main_context_iteration() in order to avoid
 220  * busy waiting.  This event_notifier_test_and_clear() cannot be done
 221  * using the usual aio_context_set_event_notifier(), because it must
 222  * be done before processing all events (file descriptors, bottom halves,
 223  * timers).
 224  *
 225  * aio_notify_accept() is an optimized event_notifier_test_and_clear()
 226  * that is specific to an AioContext's notifier; it is used internally
 227  * to clear the EventNotifier only if aio_notify() had been called.
 228  */
 229 void aio_notify_accept(AioContext *ctx);
 230
 231 /**
 232  * aio_bh_call: Executes callback function of the specified BH.
 233  */
 234 void aio_bh_call(QEMUBH *bh);
 235
 236 /**
 237  * aio_bh_poll: Poll bottom halves for an AioContext.
 238  *
 239  * These are internal functions used by the QEMU main loop.
 240  * And notice that multiple occurrences of aio_bh_poll cannot
 241  * be called concurrently
 242  */
 243 int aio_bh_poll(AioContext *ctx);
 244
 245 /**
 246  * qemu_bh_schedule: Schedule a bottom half.
 247  *
 248  * Scheduling a bottom half interrupts the main loop and causes the
 249  * execution of the callback that was passed to qemu_bh_new.
 250  *
 251  * Bottom halves that are scheduled from a bottom half handler are instantly
 252  * invoked.  This can create an infinite loop if a bottom half handler
 253  * schedules itself.
 254  *
 255  * @bh: The bottom half to be scheduled.
 256  */
 257 void qemu_bh_schedule(QEMUBH *bh);
 258
 259 /**
 260  * qemu_bh_cancel: Cancel execution of a bottom half.
 261  *
 262  * Canceling execution of a bottom half undoes the effect of calls to
 263  * qemu_bh_schedule without freeing its resources yet.  While cancellation
 264  * itself is also wait-free and thread-safe, it can of course race with the
 265  * loop that executes bottom halves unless you are holding the iothread
 266  * mutex.  This makes it mostly useless if you are not holding the mutex.
 267  *
 268  * @bh: The bottom half to be canceled.
 269  */
 270 void qemu_bh_cancel(QEMUBH *bh);
 271
 272 /**
 273  *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
 274  *
 275  * Deleting a bottom half frees the memory that was allocated for it by
 276  * qemu_bh_new.  It also implies canceling the bottom half if it was
 277  * scheduled.
 278  * This func is async. The bottom half will do the delete action at the finial
 279  * end.
 280  *
 281  * @bh: The bottom half to be deleted.
 282  */
 283 void qemu_bh_delete(QEMUBH *bh);
 284
 285 /* Return whether there are any pending callbacks from the GSource
 286  * attached to the AioContext, before g_poll is invoked.
 287  *
 288  * This is used internally in the implementation of the GSource.
 289  */
 290 bool aio_prepare(AioContext *ctx);
 291
 292 /* Return whether there are any pending callbacks from the GSource
 293  * attached to the AioContext, after g_poll is invoked.
 294  *
 295  * This is used internally in the implementation of the GSource.
 296  */
 297 bool aio_pending(AioContext *ctx);
 298
 299 /* Dispatch any pending callbacks from the GSource attached to the AioContext.
 300  *
 301  * This is used internally in the implementation of the GSource.
 302  */
 303 bool aio_dispatch(AioContext *ctx);
 304
 305 /* Progress in completing AIO work to occur.  This can issue new pending
 306  * aio as a result of executing I/O completion or bh callbacks.
 307  *
 308  * Return whether any progress was made by executing AIO or bottom half
 309  * handlers.  If @blocking == true, this should always be true except
 310  * if someone called aio_notify.
 311  *
 312  * If there are no pending bottom halves, but there are pending AIO
 313  * operations, it may not be possible to make any progress without
 314  * blocking.  If @blocking is true, this function will wait until one
 315  * or more AIO events have completed, to ensure something has moved
 316  * before returning.
 317  */
 318 bool aio_poll(AioContext *ctx, bool blocking);
 319
 320 /* Register a file descriptor and associated callbacks.  Behaves very similarly
 321  * to qemu_set_fd_handler.  Unlike qemu_set_fd_handler, these callbacks will
 322  * be invoked when using aio_poll().
 323  *
 324  * Code that invokes AIO completion functions should rely on this function
 325  * instead of qemu_set_fd_handler[2].
 326  */
 327 void aio_set_fd_handler(AioContext *ctx,
 328                         int fd,
 329                         bool is_external,
 330                         IOHandler *io_read,
 331                         IOHandler *io_write,
 332                         void *opaque);
 333
 334 /* Register an event notifier and associated callbacks.  Behaves very similarly
 335  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
 336  * will be invoked when using aio_poll().
 337  *
 338  * Code that invokes AIO completion functions should rely on this function
 339  * instead of event_notifier_set_handler.
 340  */
 341 void aio_set_event_notifier(AioContext *ctx,
 342                             EventNotifier *notifier,
 343                             bool is_external,
 344                             EventNotifierHandler *io_read);
 345
 346 /* Return a GSource that lets the main loop poll the file descriptors attached
 347  * to this AioContext.
 348  */
 349 GSource *aio_get_g_source(AioContext *ctx);
 350
 351 /* Return the ThreadPool bound to this AioContext */
 352 struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
 353
 354 /* Return the LinuxAioState bound to this AioContext */
 355 struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
 356
 357 /**
 358  * aio_timer_new:
 359  * @ctx: the aio context
 360  * @type: the clock type
 361  * @scale: the scale
 362  * @cb: the callback to call on timer expiry
 363  * @opaque: the opaque pointer to pass to the callback
 364  *
 365  * Allocate a new timer attached to the context @ctx.
 366  * The function is responsible for memory allocation.
 367  *
 368  * The preferred interface is aio_timer_init. Use that
 369  * unless you really need dynamic memory allocation.
 370  *
 371  * Returns: a pointer to the new timer
 372  */
 373 static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
 374                                        int scale,
 375                                        QEMUTimerCB *cb, void *opaque)
 376 {
 377     return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
 378 }
 379
 380 /**
 381  * aio_timer_init:
 382  * @ctx: the aio context
 383  * @ts: the timer
 384  * @type: the clock type
 385  * @scale: the scale
 386  * @cb: the callback to call on timer expiry
 387  * @opaque: the opaque pointer to pass to the callback
 388  *
 389  * Initialise a new timer attached to the context @ctx.
 390  * The caller is responsible for memory allocation.
 391  */
 392 static inline void aio_timer_init(AioContext *ctx,
 393                                   QEMUTimer *ts, QEMUClockType type,
 394                                   int scale,
 395                                   QEMUTimerCB *cb, void *opaque)
 396 {
 397     timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
 398 }
 399
 400 /**
 401  * aio_compute_timeout:
 402  * @ctx: the aio context
 403  *
 404  * Compute the timeout that a blocking aio_poll should use.
 405  */
 406 int64_t aio_compute_timeout(AioContext *ctx);
 407
 408 /**
 409  * aio_disable_external:
 410  * @ctx: the aio context
 411  *
 412  * Disable the further processing of external clients.
 413  */
 414 static inline void aio_disable_external(AioContext *ctx)
 415 {
 416     atomic_inc(&ctx->external_disable_cnt);
 417 }
 418
 419 /**
 420  * aio_enable_external:
 421  * @ctx: the aio context
 422  *
 423  * Enable the processing of external clients.
 424  */
 425 static inline void aio_enable_external(AioContext *ctx)
 426 {
 427     assert(ctx->external_disable_cnt > 0);
 428     atomic_dec(&ctx->external_disable_cnt);
 429 }
 430
 431 /**
 432  * aio_external_disabled:
 433  * @ctx: the aio context
 434  *
 435  * Return true if the external clients are disabled.
 436  */
 437 static inline bool aio_external_disabled(AioContext *ctx)
 438 {
 439     return atomic_read(&ctx->external_disable_cnt);
 440 }
 441
 442 /**
 443  * aio_node_check:
 444  * @ctx: the aio context
 445  * @is_external: Whether or not the checked node is an external event source.
 446  *
 447  * Check if the node's is_external flag is okay to be polled by the ctx at this
 448  * moment. True means green light.
 449  */
 450 static inline bool aio_node_check(AioContext *ctx, bool is_external)
 451 {
 452     return !is_external || !atomic_read(&ctx->external_disable_cnt);
 453 }
 454
 455 /**
 456  * aio_context_setup:
 457  * @ctx: the aio context
 458  *
 459  * Initialize the aio context.
 460  */
 461 void aio_context_setup(AioContext *ctx);
 462
 463 #endif