util/aio-posix.c

   1 /*
   2  * QEMU aio implementation
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  * Contributions after 2012-01-13 are licensed under the terms of the
  13  * GNU GPL, version 2 or (at your option) any later version.
  14  */
  15
  16 #include "qemu/osdep.h"
  17 #include "block/block.h"
  18 #include "block/thread-pool.h"
  19 #include "qemu/main-loop.h"
  20 #include "qemu/rcu.h"
  21 #include "qemu/rcu_queue.h"
  22 #include "qemu/sockets.h"
  23 #include "qemu/cutils.h"
  24 #include "trace.h"
  25 #include "aio-posix.h"
  26
  27 /* Stop userspace polling on a handler if it isn't active for some time */
  28 #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
  29
  30 bool aio_poll_disabled(AioContext *ctx)
  31 {
  32     return qatomic_read(&ctx->poll_disable_cnt);
  33 }
  34
  35 void aio_add_ready_handler(AioHandlerList *ready_list,
  36                            AioHandler *node,
  37                            int revents)
  38 {
  39     QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
  40     node->pfd.revents = revents;
  41     QLIST_INSERT_HEAD(ready_list, node, node_ready);
  42 }
  43
  44 static void aio_add_poll_ready_handler(AioHandlerList *ready_list,
  45                                        AioHandler *node)
  46 {
  47     QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
  48     node->poll_ready = true;
  49     QLIST_INSERT_HEAD(ready_list, node, node_ready);
  50 }
  51
  52 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
  53 {
  54     AioHandler *node;
  55
  56     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  57         if (node->pfd.fd == fd) {
  58             if (!QLIST_IS_INSERTED(node, node_deleted)) {
  59                 return node;
  60             }
  61         }
  62     }
  63
  64     return NULL;
  65 }
  66
  67 static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  68 {
  69     /* If the GSource is in the process of being destroyed then
  70      * g_source_remove_poll() causes an assertion failure.  Skip
  71      * removal in that case, because glib cleans up its state during
  72      * destruction anyway.
  73      */
  74     if (!g_source_is_destroyed(&ctx->source)) {
  75         g_source_remove_poll(&ctx->source, &node->pfd);
  76     }
  77
  78     node->pfd.revents = 0;
  79     node->poll_ready = false;
  80
  81     /* If the fd monitor has already marked it deleted, leave it alone */
  82     if (QLIST_IS_INSERTED(node, node_deleted)) {
  83         return false;
  84     }
  85
  86     /* If a read is in progress, just mark the node as deleted */
  87     if (qemu_lockcnt_count(&ctx->list_lock)) {
  88         QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
  89         return false;
  90     }
  91     /* Otherwise, delete it for real.  We can't just mark it as
  92      * deleted because deleted nodes are only cleaned up while
  93      * no one is walking the handlers list.
  94      */
  95     QLIST_SAFE_REMOVE(node, node_poll);
  96     QLIST_REMOVE(node, node);
  97     return true;
  98 }
  99
 100 void aio_set_fd_handler(AioContext *ctx,
 101                         int fd,
 102                         IOHandler *io_read,
 103                         IOHandler *io_write,
 104                         AioPollFn *io_poll,
 105                         IOHandler *io_poll_ready,
 106                         void *opaque)
 107 {
 108     AioHandler *node;
 109     AioHandler *new_node = NULL;
 110     bool is_new = false;
 111     bool deleted = false;
 112     int poll_disable_change;
 113
 114     if (io_poll && !io_poll_ready) {
 115         io_poll = NULL; /* polling only makes sense if there is a handler */
 116     }
 117
 118     qemu_lockcnt_lock(&ctx->list_lock);
 119
 120     node = find_aio_handler(ctx, fd);
 121
 122     /* Are we deleting the fd handler? */
 123     if (!io_read && !io_write && !io_poll) {
 124         if (node == NULL) {
 125             qemu_lockcnt_unlock(&ctx->list_lock);
 126             return;
 127         }
 128         /* Clean events in order to unregister fd from the ctx epoll. */
 129         node->pfd.events = 0;
 130
 131         poll_disable_change = -!node->io_poll;
 132     } else {
 133         poll_disable_change = !io_poll - (node && !node->io_poll);
 134         if (node == NULL) {
 135             is_new = true;
 136         }
 137         /* Alloc and insert if it's not already there */
 138         new_node = g_new0(AioHandler, 1);
 139
 140         /* Update handler with latest information */
 141         new_node->io_read = io_read;
 142         new_node->io_write = io_write;
 143         new_node->io_poll = io_poll;
 144         new_node->io_poll_ready = io_poll_ready;
 145         new_node->opaque = opaque;
 146
 147         if (is_new) {
 148             new_node->pfd.fd = fd;
 149         } else {
 150             new_node->pfd = node->pfd;
 151         }
 152         g_source_add_poll(&ctx->source, &new_node->pfd);
 153
 154         new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
 155         new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
 156
 157         QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
 158     }
 159
 160     /* No need to order poll_disable_cnt writes against other updates;
 161      * the counter is only used to avoid wasting time and latency on
 162      * iterated polling when the system call will be ultimately necessary.
 163      * Changing handlers is a rare event, and a little wasted polling until
 164      * the aio_notify below is not an issue.
 165      */
 166     qatomic_set(&ctx->poll_disable_cnt,
 167                qatomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
 168
 169     ctx->fdmon_ops->update(ctx, node, new_node);
 170     if (node) {
 171         deleted = aio_remove_fd_handler(ctx, node);
 172     }
 173     qemu_lockcnt_unlock(&ctx->list_lock);
 174     aio_notify(ctx);
 175
 176     if (deleted) {
 177         g_free(node);
 178     }
 179 }
 180
 181 static void aio_set_fd_poll(AioContext *ctx, int fd,
 182                             IOHandler *io_poll_begin,
 183                             IOHandler *io_poll_end)
 184 {
 185     AioHandler *node = find_aio_handler(ctx, fd);
 186
 187     if (!node) {
 188         return;
 189     }
 190
 191     node->io_poll_begin = io_poll_begin;
 192     node->io_poll_end = io_poll_end;
 193 }
 194
 195 void aio_set_event_notifier(AioContext *ctx,
 196                             EventNotifier *notifier,
 197                             EventNotifierHandler *io_read,
 198                             AioPollFn *io_poll,
 199                             EventNotifierHandler *io_poll_ready)
 200 {
 201     aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
 202                        (IOHandler *)io_read, NULL, io_poll,
 203                        (IOHandler *)io_poll_ready, notifier);
 204 }
 205
 206 void aio_set_event_notifier_poll(AioContext *ctx,
 207                                  EventNotifier *notifier,
 208                                  EventNotifierHandler *io_poll_begin,
 209                                  EventNotifierHandler *io_poll_end)
 210 {
 211     aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
 212                     (IOHandler *)io_poll_begin,
 213                     (IOHandler *)io_poll_end);
 214 }
 215
 216 static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list,
 217                              bool started)
 218 {
 219     AioHandler *node;
 220     bool progress = false;
 221
 222     if (started == ctx->poll_started) {
 223         return false;
 224     }
 225
 226     ctx->poll_started = started;
 227
 228     qemu_lockcnt_inc(&ctx->list_lock);
 229     QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
 230         IOHandler *fn;
 231
 232         if (QLIST_IS_INSERTED(node, node_deleted)) {
 233             continue;
 234         }
 235
 236         if (started) {
 237             fn = node->io_poll_begin;
 238         } else {
 239             fn = node->io_poll_end;
 240         }
 241
 242         if (fn) {
 243             fn(node->opaque);
 244         }
 245
 246         /* Poll one last time in case ->io_poll_end() raced with the event */
 247         if (!started && node->io_poll(node->opaque)) {
 248             aio_add_poll_ready_handler(ready_list, node);
 249             progress = true;
 250         }
 251     }
 252     qemu_lockcnt_dec(&ctx->list_lock);
 253
 254     return progress;
 255 }
 256
 257
 258 bool aio_prepare(AioContext *ctx)
 259 {
 260     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
 261
 262     /* Poll mode cannot be used with glib's event loop, disable it. */
 263     poll_set_started(ctx, &ready_list, false);
 264     /* TODO what to do with this list? */
 265
 266     return false;
 267 }
 268
 269 bool aio_pending(AioContext *ctx)
 270 {
 271     AioHandler *node;
 272     bool result = false;
 273
 274     /*
 275      * We have to walk very carefully in case aio_set_fd_handler is
 276      * called while we're walking.
 277      */
 278     qemu_lockcnt_inc(&ctx->list_lock);
 279
 280     QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
 281         int revents;
 282
 283         /* TODO should this check poll ready? */
 284         revents = node->pfd.revents & node->pfd.events;
 285         if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
 286             result = true;
 287             break;
 288         }
 289         if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
 290             result = true;
 291             break;
 292         }
 293     }
 294     qemu_lockcnt_dec(&ctx->list_lock);
 295
 296     return result;
 297 }
 298
 299 static void aio_free_deleted_handlers(AioContext *ctx)
 300 {
 301     AioHandler *node;
 302
 303     if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
 304         return;
 305     }
 306     if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
 307         return; /* we are nested, let the parent do the freeing */
 308     }
 309
 310     while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
 311         QLIST_REMOVE(node, node);
 312         QLIST_REMOVE(node, node_deleted);
 313         QLIST_SAFE_REMOVE(node, node_poll);
 314         g_free(node);
 315     }
 316
 317     qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
 318 }
 319
 320 static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
 321 {
 322     bool progress = false;
 323     bool poll_ready;
 324     int revents;
 325
 326     revents = node->pfd.revents & node->pfd.events;
 327     node->pfd.revents = 0;
 328
 329     poll_ready = node->poll_ready;
 330     node->poll_ready = false;
 331
 332     /*
 333      * Start polling AioHandlers when they become ready because activity is
 334      * likely to continue.  Note that starvation is theoretically possible when
 335      * fdmon_supports_polling(), but only until the fd fires for the first
 336      * time.
 337      */
 338     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 339         !QLIST_IS_INSERTED(node, node_poll) &&
 340         node->io_poll) {
 341         trace_poll_add(ctx, node, node->pfd.fd, revents);
 342         if (ctx->poll_started && node->io_poll_begin) {
 343             node->io_poll_begin(node->opaque);
 344         }
 345         QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
 346     }
 347     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 348         poll_ready && revents == 0 && node->io_poll_ready) {
 349         /*
 350          * Remove temporarily to avoid infinite loops when ->io_poll_ready()
 351          * calls aio_poll() before clearing the condition that made the poll
 352          * handler become ready.
 353          */
 354         QLIST_SAFE_REMOVE(node, node_poll);
 355
 356         node->io_poll_ready(node->opaque);
 357
 358         if (!QLIST_IS_INSERTED(node, node_poll)) {
 359             QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
 360         }
 361
 362         /*
 363          * Return early since revents was zero. aio_notify() does not count as
 364          * progress.
 365          */
 366         return node->opaque != &ctx->notifier;
 367     }
 368
 369     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 370         (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
 371         node->io_read) {
 372         node->io_read(node->opaque);
 373
 374         /* aio_notify() does not count as progress */
 375         if (node->opaque != &ctx->notifier) {
 376             progress = true;
 377         }
 378     }
 379     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 380         (revents & (G_IO_OUT | G_IO_ERR)) &&
 381         node->io_write) {
 382         node->io_write(node->opaque);
 383         progress = true;
 384     }
 385
 386     return progress;
 387 }
 388
 389 /*
 390  * If we have a list of ready handlers then this is more efficient than
 391  * scanning all handlers with aio_dispatch_handlers().
 392  */
 393 static bool aio_dispatch_ready_handlers(AioContext *ctx,
 394                                         AioHandlerList *ready_list)
 395 {
 396     bool progress = false;
 397     AioHandler *node;
 398
 399     while ((node = QLIST_FIRST(ready_list))) {
 400         QLIST_REMOVE(node, node_ready);
 401         progress = aio_dispatch_handler(ctx, node) || progress;
 402     }
 403
 404     return progress;
 405 }
 406
 407 /* Slower than aio_dispatch_ready_handlers() but only used via glib */
 408 static bool aio_dispatch_handlers(AioContext *ctx)
 409 {
 410     AioHandler *node, *tmp;
 411     bool progress = false;
 412
 413     QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
 414         progress = aio_dispatch_handler(ctx, node) || progress;
 415     }
 416
 417     return progress;
 418 }
 419
 420 void aio_dispatch(AioContext *ctx)
 421 {
 422     qemu_lockcnt_inc(&ctx->list_lock);
 423     aio_bh_poll(ctx);
 424     aio_dispatch_handlers(ctx);
 425     aio_free_deleted_handlers(ctx);
 426     qemu_lockcnt_dec(&ctx->list_lock);
 427
 428     timerlistgroup_run_timers(&ctx->tlg);
 429 }
 430
 431 static bool run_poll_handlers_once(AioContext *ctx,
 432                                    AioHandlerList *ready_list,
 433                                    int64_t now,
 434                                    int64_t *timeout)
 435 {
 436     bool progress = false;
 437     AioHandler *node;
 438     AioHandler *tmp;
 439
 440     QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
 441         if (node->io_poll(node->opaque)) {
 442             aio_add_poll_ready_handler(ready_list, node);
 443
 444             node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
 445
 446             /*
 447              * Polling was successful, exit try_poll_mode immediately
 448              * to adjust the next polling time.
 449              */
 450             *timeout = 0;
 451             if (node->opaque != &ctx->notifier) {
 452                 progress = true;
 453             }
 454         }
 455
 456         /* Caller handles freeing deleted nodes.  Don't do it here. */
 457     }
 458
 459     return progress;
 460 }
 461
 462 static bool fdmon_supports_polling(AioContext *ctx)
 463 {
 464     return ctx->fdmon_ops->need_wait != aio_poll_disabled;
 465 }
 466
 467 static bool remove_idle_poll_handlers(AioContext *ctx,
 468                                       AioHandlerList *ready_list,
 469                                       int64_t now)
 470 {
 471     AioHandler *node;
 472     AioHandler *tmp;
 473     bool progress = false;
 474
 475     /*
 476      * File descriptor monitoring implementations without userspace polling
 477      * support suffer from starvation when a subset of handlers is polled
 478      * because fds will not be processed in a timely fashion.  Don't remove
 479      * idle poll handlers.
 480      */
 481     if (!fdmon_supports_polling(ctx)) {
 482         return false;
 483     }
 484
 485     QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
 486         if (node->poll_idle_timeout == 0LL) {
 487             node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
 488         } else if (now >= node->poll_idle_timeout) {
 489             trace_poll_remove(ctx, node, node->pfd.fd);
 490             node->poll_idle_timeout = 0LL;
 491             QLIST_SAFE_REMOVE(node, node_poll);
 492             if (ctx->poll_started && node->io_poll_end) {
 493                 node->io_poll_end(node->opaque);
 494
 495                 /*
 496                  * Final poll in case ->io_poll_end() races with an event.
 497                  * Nevermind about re-adding the handler in the rare case where
 498                  * this causes progress.
 499                  */
 500                 if (node->io_poll(node->opaque)) {
 501                     aio_add_poll_ready_handler(ready_list, node);
 502                     progress = true;
 503                 }
 504             }
 505         }
 506     }
 507
 508     return progress;
 509 }
 510
 511 /* run_poll_handlers:
 512  * @ctx: the AioContext
 513  * @ready_list: the list to place ready handlers on
 514  * @max_ns: maximum time to poll for, in nanoseconds
 515  *
 516  * Polls for a given time.
 517  *
 518  * Note that the caller must have incremented ctx->list_lock.
 519  *
 520  * Returns: true if progress was made, false otherwise
 521  */
 522 static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
 523                               int64_t max_ns, int64_t *timeout)
 524 {
 525     bool progress;
 526     int64_t start_time, elapsed_time;
 527
 528     assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
 529
 530     trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
 531
 532     /*
 533      * Optimization: ->io_poll() handlers often contain RCU read critical
 534      * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
 535      * -> rcu_read_lock() -> ... sequences with expensive memory
 536      * synchronization primitives.  Make the entire polling loop an RCU
 537      * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
 538      * are cheap.
 539      */
 540     RCU_READ_LOCK_GUARD();
 541
 542     start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 543     do {
 544         progress = run_poll_handlers_once(ctx, ready_list,
 545                                           start_time, timeout);
 546         elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
 547         max_ns = qemu_soonest_timeout(*timeout, max_ns);
 548         assert(!(max_ns && progress));
 549     } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
 550
 551     if (remove_idle_poll_handlers(ctx, ready_list,
 552                                   start_time + elapsed_time)) {
 553         *timeout = 0;
 554         progress = true;
 555     }
 556
 557     /* If time has passed with no successful polling, adjust *timeout to
 558      * keep the same ending time.
 559      */
 560     if (*timeout != -1) {
 561         *timeout -= MIN(*timeout, elapsed_time);
 562     }
 563
 564     trace_run_poll_handlers_end(ctx, progress, *timeout);
 565     return progress;
 566 }
 567
 568 /* try_poll_mode:
 569  * @ctx: the AioContext
 570  * @ready_list: list to add handlers that need to be run
 571  * @timeout: timeout for blocking wait, computed by the caller and updated if
 572  *    polling succeeds.
 573  *
 574  * Note that the caller must have incremented ctx->list_lock.
 575  *
 576  * Returns: true if progress was made, false otherwise
 577  */
 578 static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
 579                           int64_t *timeout)
 580 {
 581     int64_t max_ns;
 582
 583     if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
 584         return false;
 585     }
 586
 587     max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
 588     if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
 589         /*
 590          * Enable poll mode. It pairs with the poll_set_started() in
 591          * aio_poll() which disables poll mode.
 592          */
 593         poll_set_started(ctx, ready_list, true);
 594
 595         if (run_poll_handlers(ctx, ready_list, max_ns, timeout)) {
 596             return true;
 597         }
 598     }
 599     return false;
 600 }
 601
 602 bool aio_poll(AioContext *ctx, bool blocking)
 603 {
 604     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
 605     bool progress;
 606     bool use_notify_me;
 607     int64_t timeout;
 608     int64_t start = 0;
 609
 610     /*
 611      * There cannot be two concurrent aio_poll calls for the same AioContext (or
 612      * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
 613      * We rely on this below to avoid slow locked accesses to ctx->notify_me.
 614      *
 615      * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
 616      * is special in that it runs in the main thread, but that thread's context
 617      * is qemu_aio_context.
 618      */
 619     assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
 620                                       qemu_get_aio_context() : ctx));
 621
 622     qemu_lockcnt_inc(&ctx->list_lock);
 623
 624     if (ctx->poll_max_ns) {
 625         start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 626     }
 627
 628     timeout = blocking ? aio_compute_timeout(ctx) : 0;
 629     progress = try_poll_mode(ctx, &ready_list, &timeout);
 630     assert(!(timeout && progress));
 631
 632     /*
 633      * aio_notify can avoid the expensive event_notifier_set if
 634      * everything (file descriptors, bottom halves, timers) will
 635      * be re-evaluated before the next blocking poll().  This is
 636      * already true when aio_poll is called with blocking == false;
 637      * if blocking == true, it is only true after poll() returns,
 638      * so disable the optimization now.
 639      */
 640     use_notify_me = timeout != 0;
 641     if (use_notify_me) {
 642         qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
 643         /*
 644          * Write ctx->notify_me before reading ctx->notified.  Pairs with
 645          * smp_mb in aio_notify().
 646          */
 647         smp_mb();
 648
 649         /* Don't block if aio_notify() was called */
 650         if (qatomic_read(&ctx->notified)) {
 651             timeout = 0;
 652         }
 653     }
 654
 655     /* If polling is allowed, non-blocking aio_poll does not need the
 656      * system call---a single round of run_poll_handlers_once suffices.
 657      */
 658     if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
 659         /*
 660          * Disable poll mode. poll mode should be disabled before the call
 661          * of ctx->fdmon_ops->wait() so that guest's notification can wake
 662          * up IO threads when some work becomes pending. It is essential to
 663          * avoid hangs or unnecessary latency.
 664          */
 665         if (poll_set_started(ctx, &ready_list, false)) {
 666             timeout = 0;
 667             progress = true;
 668         }
 669
 670         ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
 671     }
 672
 673     if (use_notify_me) {
 674         /* Finish the poll before clearing the flag.  */
 675         qatomic_store_release(&ctx->notify_me,
 676                              qatomic_read(&ctx->notify_me) - 2);
 677     }
 678
 679     aio_notify_accept(ctx);
 680
 681     /* Adjust polling time */
 682     if (ctx->poll_max_ns) {
 683         int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
 684
 685         if (block_ns <= ctx->poll_ns) {
 686             /* This is the sweet spot, no adjustment needed */
 687         } else if (block_ns > ctx->poll_max_ns) {
 688             /* We'd have to poll for too long, poll less */
 689             int64_t old = ctx->poll_ns;
 690
 691             if (ctx->poll_shrink) {
 692                 ctx->poll_ns /= ctx->poll_shrink;
 693             } else {
 694                 ctx->poll_ns = 0;
 695             }
 696
 697             trace_poll_shrink(ctx, old, ctx->poll_ns);
 698         } else if (ctx->poll_ns < ctx->poll_max_ns &&
 699                    block_ns < ctx->poll_max_ns) {
 700             /* There is room to grow, poll longer */
 701             int64_t old = ctx->poll_ns;
 702             int64_t grow = ctx->poll_grow;
 703
 704             if (grow == 0) {
 705                 grow = 2;
 706             }
 707
 708             if (ctx->poll_ns) {
 709                 ctx->poll_ns *= grow;
 710             } else {
 711                 ctx->poll_ns = 4000; /* start polling at 4 microseconds */
 712             }
 713
 714             if (ctx->poll_ns > ctx->poll_max_ns) {
 715                 ctx->poll_ns = ctx->poll_max_ns;
 716             }
 717
 718             trace_poll_grow(ctx, old, ctx->poll_ns);
 719         }
 720     }
 721
 722     progress |= aio_bh_poll(ctx);
 723     progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
 724
 725     aio_free_deleted_handlers(ctx);
 726
 727     qemu_lockcnt_dec(&ctx->list_lock);
 728
 729     progress |= timerlistgroup_run_timers(&ctx->tlg);
 730
 731     return progress;
 732 }
 733
 734 void aio_context_setup(AioContext *ctx)
 735 {
 736     ctx->fdmon_ops = &fdmon_poll_ops;
 737     ctx->epollfd = -1;
 738
 739     /* Use the fastest fd monitoring implementation if available */
 740     if (fdmon_io_uring_setup(ctx)) {
 741         return;
 742     }
 743
 744     fdmon_epoll_setup(ctx);
 745 }
 746
 747 void aio_context_destroy(AioContext *ctx)
 748 {
 749     fdmon_io_uring_destroy(ctx);
 750     fdmon_epoll_disable(ctx);
 751     aio_free_deleted_handlers(ctx);
 752 }
 753
 754 void aio_context_use_g_source(AioContext *ctx)
 755 {
 756     /*
 757      * Disable io_uring when the glib main loop is used because it doesn't
 758      * support mixed glib/aio_poll() usage. It relies on aio_poll() being
 759      * called regularly so that changes to the monitored file descriptors are
 760      * submitted, otherwise a list of pending fd handlers builds up.
 761      */
 762     fdmon_io_uring_destroy(ctx);
 763     aio_free_deleted_handlers(ctx);
 764 }
 765
 766 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
 767                                  int64_t grow, int64_t shrink, Error **errp)
 768 {
 769     /* No thread synchronization here, it doesn't matter if an incorrect value
 770      * is used once.
 771      */
 772     ctx->poll_max_ns = max_ns;
 773     ctx->poll_ns = 0;
 774     ctx->poll_grow = grow;
 775     ctx->poll_shrink = shrink;
 776
 777     aio_notify(ctx);
 778 }
 779
 780 void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
 781                                 Error **errp)
 782 {
 783     /*
 784      * No thread synchronization here, it doesn't matter if an incorrect value
 785      * is used once.
 786      */
 787     ctx->aio_max_batch = max_batch;
 788
 789     aio_notify(ctx);
 790 }