util/aio-posix.c

   1 /*
   2  * QEMU aio implementation
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *
   6  * Authors:
   7  *  Anthony Liguori   <aliguori@us.ibm.com>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2.  See
  10  * the COPYING file in the top-level directory.
  11  *
  12  * Contributions after 2012-01-13 are licensed under the terms of the
  13  * GNU GPL, version 2 or (at your option) any later version.
  14  */
  15
  16 #include "qemu/osdep.h"
  17 #include "block/block.h"
  18 #include "block/thread-pool.h"
  19 #include "qemu/main-loop.h"
  20 #include "qemu/rcu.h"
  21 #include "qemu/rcu_queue.h"
  22 #include "qemu/sockets.h"
  23 #include "qemu/cutils.h"
  24 #include "trace.h"
  25 #include "aio-posix.h"
  26
  27 /* Stop userspace polling on a handler if it isn't active for some time */
  28 #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
  29
  30 bool aio_poll_disabled(AioContext *ctx)
  31 {
  32     return qatomic_read(&ctx->poll_disable_cnt);
  33 }
  34
  35 void aio_add_ready_handler(AioHandlerList *ready_list,
  36                            AioHandler *node,
  37                            int revents)
  38 {
  39     QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
  40     node->pfd.revents = revents;
  41     QLIST_INSERT_HEAD(ready_list, node, node_ready);
  42 }
  43
  44 static void aio_add_poll_ready_handler(AioHandlerList *ready_list,
  45                                        AioHandler *node)
  46 {
  47     QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
  48     node->poll_ready = true;
  49     QLIST_INSERT_HEAD(ready_list, node, node_ready);
  50 }
  51
  52 static AioHandler *find_aio_handler(AioContext *ctx, int fd)
  53 {
  54     AioHandler *node;
  55
  56     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
  57         if (node->pfd.fd == fd) {
  58             if (!QLIST_IS_INSERTED(node, node_deleted)) {
  59                 return node;
  60             }
  61         }
  62     }
  63
  64     return NULL;
  65 }
  66
  67 static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
  68 {
  69     /* If the GSource is in the process of being destroyed then
  70      * g_source_remove_poll() causes an assertion failure.  Skip
  71      * removal in that case, because glib cleans up its state during
  72      * destruction anyway.
  73      */
  74     if (!g_source_is_destroyed(&ctx->source)) {
  75         g_source_remove_poll(&ctx->source, &node->pfd);
  76     }
  77
  78     node->pfd.revents = 0;
  79     node->poll_ready = false;
  80
  81     /* If the fd monitor has already marked it deleted, leave it alone */
  82     if (QLIST_IS_INSERTED(node, node_deleted)) {
  83         return false;
  84     }
  85
  86     /* If a read is in progress, just mark the node as deleted */
  87     if (qemu_lockcnt_count(&ctx->list_lock)) {
  88         QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
  89         return false;
  90     }
  91     /* Otherwise, delete it for real.  We can't just mark it as
  92      * deleted because deleted nodes are only cleaned up while
  93      * no one is walking the handlers list.
  94      */
  95     QLIST_SAFE_REMOVE(node, node_poll);
  96     QLIST_REMOVE(node, node);
  97     return true;
  98 }
  99
 100 void aio_set_fd_handler(AioContext *ctx,
 101                         int fd,
 102                         bool is_external,
 103                         IOHandler *io_read,
 104                         IOHandler *io_write,
 105                         AioPollFn *io_poll,
 106                         IOHandler *io_poll_ready,
 107                         void *opaque)
 108 {
 109     AioHandler *node;
 110     AioHandler *new_node = NULL;
 111     bool is_new = false;
 112     bool deleted = false;
 113     int poll_disable_change;
 114
 115     if (io_poll && !io_poll_ready) {
 116         io_poll = NULL; /* polling only makes sense if there is a handler */
 117     }
 118
 119     qemu_lockcnt_lock(&ctx->list_lock);
 120
 121     node = find_aio_handler(ctx, fd);
 122
 123     /* Are we deleting the fd handler? */
 124     if (!io_read && !io_write && !io_poll) {
 125         if (node == NULL) {
 126             qemu_lockcnt_unlock(&ctx->list_lock);
 127             return;
 128         }
 129         /* Clean events in order to unregister fd from the ctx epoll. */
 130         node->pfd.events = 0;
 131
 132         poll_disable_change = -!node->io_poll;
 133     } else {
 134         poll_disable_change = !io_poll - (node && !node->io_poll);
 135         if (node == NULL) {
 136             is_new = true;
 137         }
 138         /* Alloc and insert if it's not already there */
 139         new_node = g_new0(AioHandler, 1);
 140
 141         /* Update handler with latest information */
 142         new_node->io_read = io_read;
 143         new_node->io_write = io_write;
 144         new_node->io_poll = io_poll;
 145         new_node->io_poll_ready = io_poll_ready;
 146         new_node->opaque = opaque;
 147         new_node->is_external = is_external;
 148
 149         if (is_new) {
 150             new_node->pfd.fd = fd;
 151         } else {
 152             new_node->pfd = node->pfd;
 153         }
 154         g_source_add_poll(&ctx->source, &new_node->pfd);
 155
 156         new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
 157         new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
 158
 159         QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
 160     }
 161
 162     /* No need to order poll_disable_cnt writes against other updates;
 163      * the counter is only used to avoid wasting time and latency on
 164      * iterated polling when the system call will be ultimately necessary.
 165      * Changing handlers is a rare event, and a little wasted polling until
 166      * the aio_notify below is not an issue.
 167      */
 168     qatomic_set(&ctx->poll_disable_cnt,
 169                qatomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
 170
 171     ctx->fdmon_ops->update(ctx, node, new_node);
 172     if (node) {
 173         deleted = aio_remove_fd_handler(ctx, node);
 174     }
 175     qemu_lockcnt_unlock(&ctx->list_lock);
 176     aio_notify(ctx);
 177
 178     if (deleted) {
 179         g_free(node);
 180     }
 181 }
 182
 183 void aio_set_fd_poll(AioContext *ctx, int fd,
 184                      IOHandler *io_poll_begin,
 185                      IOHandler *io_poll_end)
 186 {
 187     AioHandler *node = find_aio_handler(ctx, fd);
 188
 189     if (!node) {
 190         return;
 191     }
 192
 193     node->io_poll_begin = io_poll_begin;
 194     node->io_poll_end = io_poll_end;
 195 }
 196
 197 void aio_set_event_notifier(AioContext *ctx,
 198                             EventNotifier *notifier,
 199                             bool is_external,
 200                             EventNotifierHandler *io_read,
 201                             AioPollFn *io_poll,
 202                             EventNotifierHandler *io_poll_ready)
 203 {
 204     aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
 205                        (IOHandler *)io_read, NULL, io_poll,
 206                        (IOHandler *)io_poll_ready, notifier);
 207 }
 208
 209 void aio_set_event_notifier_poll(AioContext *ctx,
 210                                  EventNotifier *notifier,
 211                                  EventNotifierHandler *io_poll_begin,
 212                                  EventNotifierHandler *io_poll_end)
 213 {
 214     aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
 215                     (IOHandler *)io_poll_begin,
 216                     (IOHandler *)io_poll_end);
 217 }
 218
 219 static bool poll_set_started(AioContext *ctx, AioHandlerList *ready_list,
 220                              bool started)
 221 {
 222     AioHandler *node;
 223     bool progress = false;
 224
 225     if (started == ctx->poll_started) {
 226         return false;
 227     }
 228
 229     ctx->poll_started = started;
 230
 231     qemu_lockcnt_inc(&ctx->list_lock);
 232     QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
 233         IOHandler *fn;
 234
 235         if (QLIST_IS_INSERTED(node, node_deleted)) {
 236             continue;
 237         }
 238
 239         if (started) {
 240             fn = node->io_poll_begin;
 241         } else {
 242             fn = node->io_poll_end;
 243         }
 244
 245         if (fn) {
 246             fn(node->opaque);
 247         }
 248
 249         /* Poll one last time in case ->io_poll_end() raced with the event */
 250         if (!started && node->io_poll(node->opaque)) {
 251             aio_add_poll_ready_handler(ready_list, node);
 252             progress = true;
 253         }
 254     }
 255     qemu_lockcnt_dec(&ctx->list_lock);
 256
 257     return progress;
 258 }
 259
 260
 261 bool aio_prepare(AioContext *ctx)
 262 {
 263     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
 264
 265     /* Poll mode cannot be used with glib's event loop, disable it. */
 266     poll_set_started(ctx, &ready_list, false);
 267     /* TODO what to do with this list? */
 268
 269     return false;
 270 }
 271
 272 bool aio_pending(AioContext *ctx)
 273 {
 274     AioHandler *node;
 275     bool result = false;
 276
 277     /*
 278      * We have to walk very carefully in case aio_set_fd_handler is
 279      * called while we're walking.
 280      */
 281     qemu_lockcnt_inc(&ctx->list_lock);
 282
 283     QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
 284         int revents;
 285
 286         /* TODO should this check poll ready? */
 287         revents = node->pfd.revents & node->pfd.events;
 288         if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
 289             aio_node_check(ctx, node->is_external)) {
 290             result = true;
 291             break;
 292         }
 293         if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
 294             aio_node_check(ctx, node->is_external)) {
 295             result = true;
 296             break;
 297         }
 298     }
 299     qemu_lockcnt_dec(&ctx->list_lock);
 300
 301     return result;
 302 }
 303
 304 static void aio_free_deleted_handlers(AioContext *ctx)
 305 {
 306     AioHandler *node;
 307
 308     if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
 309         return;
 310     }
 311     if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
 312         return; /* we are nested, let the parent do the freeing */
 313     }
 314
 315     while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
 316         QLIST_REMOVE(node, node);
 317         QLIST_REMOVE(node, node_deleted);
 318         QLIST_SAFE_REMOVE(node, node_poll);
 319         g_free(node);
 320     }
 321
 322     qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
 323 }
 324
 325 static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
 326 {
 327     bool progress = false;
 328     bool poll_ready;
 329     int revents;
 330
 331     revents = node->pfd.revents & node->pfd.events;
 332     node->pfd.revents = 0;
 333
 334     poll_ready = node->poll_ready;
 335     node->poll_ready = false;
 336
 337     /*
 338      * Start polling AioHandlers when they become ready because activity is
 339      * likely to continue.  Note that starvation is theoretically possible when
 340      * fdmon_supports_polling(), but only until the fd fires for the first
 341      * time.
 342      */
 343     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 344         !QLIST_IS_INSERTED(node, node_poll) &&
 345         node->io_poll) {
 346         trace_poll_add(ctx, node, node->pfd.fd, revents);
 347         if (ctx->poll_started && node->io_poll_begin) {
 348             node->io_poll_begin(node->opaque);
 349         }
 350         QLIST_INSERT_HEAD(&ctx->poll_aio_handlers, node, node_poll);
 351     }
 352     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 353         poll_ready && revents == 0 &&
 354         aio_node_check(ctx, node->is_external) &&
 355         node->io_poll_ready) {
 356         node->io_poll_ready(node->opaque);
 357
 358         /*
 359          * Return early since revents was zero. aio_notify() does not count as
 360          * progress.
 361          */
 362         return node->opaque != &ctx->notifier;
 363     }
 364
 365     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 366         (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
 367         aio_node_check(ctx, node->is_external) &&
 368         node->io_read) {
 369         node->io_read(node->opaque);
 370
 371         /* aio_notify() does not count as progress */
 372         if (node->opaque != &ctx->notifier) {
 373             progress = true;
 374         }
 375     }
 376     if (!QLIST_IS_INSERTED(node, node_deleted) &&
 377         (revents & (G_IO_OUT | G_IO_ERR)) &&
 378         aio_node_check(ctx, node->is_external) &&
 379         node->io_write) {
 380         node->io_write(node->opaque);
 381         progress = true;
 382     }
 383
 384     return progress;
 385 }
 386
 387 /*
 388  * If we have a list of ready handlers then this is more efficient than
 389  * scanning all handlers with aio_dispatch_handlers().
 390  */
 391 static bool aio_dispatch_ready_handlers(AioContext *ctx,
 392                                         AioHandlerList *ready_list)
 393 {
 394     bool progress = false;
 395     AioHandler *node;
 396
 397     while ((node = QLIST_FIRST(ready_list))) {
 398         QLIST_REMOVE(node, node_ready);
 399         progress = aio_dispatch_handler(ctx, node) || progress;
 400     }
 401
 402     return progress;
 403 }
 404
 405 /* Slower than aio_dispatch_ready_handlers() but only used via glib */
 406 static bool aio_dispatch_handlers(AioContext *ctx)
 407 {
 408     AioHandler *node, *tmp;
 409     bool progress = false;
 410
 411     QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
 412         progress = aio_dispatch_handler(ctx, node) || progress;
 413     }
 414
 415     return progress;
 416 }
 417
 418 void aio_dispatch(AioContext *ctx)
 419 {
 420     qemu_lockcnt_inc(&ctx->list_lock);
 421     aio_bh_poll(ctx);
 422     aio_dispatch_handlers(ctx);
 423     aio_free_deleted_handlers(ctx);
 424     qemu_lockcnt_dec(&ctx->list_lock);
 425
 426     timerlistgroup_run_timers(&ctx->tlg);
 427 }
 428
 429 static bool run_poll_handlers_once(AioContext *ctx,
 430                                    AioHandlerList *ready_list,
 431                                    int64_t now,
 432                                    int64_t *timeout)
 433 {
 434     bool progress = false;
 435     AioHandler *node;
 436     AioHandler *tmp;
 437
 438     QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
 439         if (aio_node_check(ctx, node->is_external) &&
 440             node->io_poll(node->opaque)) {
 441             aio_add_poll_ready_handler(ready_list, node);
 442
 443             node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
 444
 445             /*
 446              * Polling was successful, exit try_poll_mode immediately
 447              * to adjust the next polling time.
 448              */
 449             *timeout = 0;
 450             if (node->opaque != &ctx->notifier) {
 451                 progress = true;
 452             }
 453         }
 454
 455         /* Caller handles freeing deleted nodes.  Don't do it here. */
 456     }
 457
 458     return progress;
 459 }
 460
 461 static bool fdmon_supports_polling(AioContext *ctx)
 462 {
 463     return ctx->fdmon_ops->need_wait != aio_poll_disabled;
 464 }
 465
 466 static bool remove_idle_poll_handlers(AioContext *ctx,
 467                                       AioHandlerList *ready_list,
 468                                       int64_t now)
 469 {
 470     AioHandler *node;
 471     AioHandler *tmp;
 472     bool progress = false;
 473
 474     /*
 475      * File descriptor monitoring implementations without userspace polling
 476      * support suffer from starvation when a subset of handlers is polled
 477      * because fds will not be processed in a timely fashion.  Don't remove
 478      * idle poll handlers.
 479      */
 480     if (!fdmon_supports_polling(ctx)) {
 481         return false;
 482     }
 483
 484     QLIST_FOREACH_SAFE(node, &ctx->poll_aio_handlers, node_poll, tmp) {
 485         if (node->poll_idle_timeout == 0LL) {
 486             node->poll_idle_timeout = now + POLL_IDLE_INTERVAL_NS;
 487         } else if (now >= node->poll_idle_timeout) {
 488             trace_poll_remove(ctx, node, node->pfd.fd);
 489             node->poll_idle_timeout = 0LL;
 490             QLIST_SAFE_REMOVE(node, node_poll);
 491             if (ctx->poll_started && node->io_poll_end) {
 492                 node->io_poll_end(node->opaque);
 493
 494                 /*
 495                  * Final poll in case ->io_poll_end() races with an event.
 496                  * Nevermind about re-adding the handler in the rare case where
 497                  * this causes progress.
 498                  */
 499                 if (node->io_poll(node->opaque)) {
 500                     aio_add_poll_ready_handler(ready_list, node);
 501                     progress = true;
 502                 }
 503             }
 504         }
 505     }
 506
 507     return progress;
 508 }
 509
 510 /* run_poll_handlers:
 511  * @ctx: the AioContext
 512  * @ready_list: the list to place ready handlers on
 513  * @max_ns: maximum time to poll for, in nanoseconds
 514  *
 515  * Polls for a given time.
 516  *
 517  * Note that the caller must have incremented ctx->list_lock.
 518  *
 519  * Returns: true if progress was made, false otherwise
 520  */
 521 static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
 522                               int64_t max_ns, int64_t *timeout)
 523 {
 524     bool progress;
 525     int64_t start_time, elapsed_time;
 526
 527     assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
 528
 529     trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
 530
 531     /*
 532      * Optimization: ->io_poll() handlers often contain RCU read critical
 533      * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
 534      * -> rcu_read_lock() -> ... sequences with expensive memory
 535      * synchronization primitives.  Make the entire polling loop an RCU
 536      * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
 537      * are cheap.
 538      */
 539     RCU_READ_LOCK_GUARD();
 540
 541     start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 542     do {
 543         progress = run_poll_handlers_once(ctx, ready_list,
 544                                           start_time, timeout);
 545         elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
 546         max_ns = qemu_soonest_timeout(*timeout, max_ns);
 547         assert(!(max_ns && progress));
 548     } while (elapsed_time < max_ns && !ctx->fdmon_ops->need_wait(ctx));
 549
 550     if (remove_idle_poll_handlers(ctx, ready_list,
 551                                   start_time + elapsed_time)) {
 552         *timeout = 0;
 553         progress = true;
 554     }
 555
 556     /* If time has passed with no successful polling, adjust *timeout to
 557      * keep the same ending time.
 558      */
 559     if (*timeout != -1) {
 560         *timeout -= MIN(*timeout, elapsed_time);
 561     }
 562
 563     trace_run_poll_handlers_end(ctx, progress, *timeout);
 564     return progress;
 565 }
 566
 567 /* try_poll_mode:
 568  * @ctx: the AioContext
 569  * @ready_list: list to add handlers that need to be run
 570  * @timeout: timeout for blocking wait, computed by the caller and updated if
 571  *    polling succeeds.
 572  *
 573  * Note that the caller must have incremented ctx->list_lock.
 574  *
 575  * Returns: true if progress was made, false otherwise
 576  */
 577 static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
 578                           int64_t *timeout)
 579 {
 580     int64_t max_ns;
 581
 582     if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
 583         return false;
 584     }
 585
 586     max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
 587     if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
 588         poll_set_started(ctx, ready_list, true);
 589
 590         if (run_poll_handlers(ctx, ready_list, max_ns, timeout)) {
 591             return true;
 592         }
 593     }
 594
 595     if (poll_set_started(ctx, ready_list, false)) {
 596         *timeout = 0;
 597         return true;
 598     }
 599
 600     return false;
 601 }
 602
 603 bool aio_poll(AioContext *ctx, bool blocking)
 604 {
 605     AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
 606     bool progress;
 607     bool use_notify_me;
 608     int64_t timeout;
 609     int64_t start = 0;
 610
 611     /*
 612      * There cannot be two concurrent aio_poll calls for the same AioContext (or
 613      * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
 614      * We rely on this below to avoid slow locked accesses to ctx->notify_me.
 615      *
 616      * aio_poll() may only be called in the AioContext's thread. iohandler_ctx
 617      * is special in that it runs in the main thread, but that thread's context
 618      * is qemu_aio_context.
 619      */
 620     assert(in_aio_context_home_thread(ctx == iohandler_get_aio_context() ?
 621                                       qemu_get_aio_context() : ctx));
 622
 623     qemu_lockcnt_inc(&ctx->list_lock);
 624
 625     if (ctx->poll_max_ns) {
 626         start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 627     }
 628
 629     timeout = blocking ? aio_compute_timeout(ctx) : 0;
 630     progress = try_poll_mode(ctx, &ready_list, &timeout);
 631     assert(!(timeout && progress));
 632
 633     /*
 634      * aio_notify can avoid the expensive event_notifier_set if
 635      * everything (file descriptors, bottom halves, timers) will
 636      * be re-evaluated before the next blocking poll().  This is
 637      * already true when aio_poll is called with blocking == false;
 638      * if blocking == true, it is only true after poll() returns,
 639      * so disable the optimization now.
 640      */
 641     use_notify_me = timeout != 0;
 642     if (use_notify_me) {
 643         qatomic_set(&ctx->notify_me, qatomic_read(&ctx->notify_me) + 2);
 644         /*
 645          * Write ctx->notify_me before reading ctx->notified.  Pairs with
 646          * smp_mb in aio_notify().
 647          */
 648         smp_mb();
 649
 650         /* Don't block if aio_notify() was called */
 651         if (qatomic_read(&ctx->notified)) {
 652             timeout = 0;
 653         }
 654     }
 655
 656     /* If polling is allowed, non-blocking aio_poll does not need the
 657      * system call---a single round of run_poll_handlers_once suffices.
 658      */
 659     if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
 660         ctx->fdmon_ops->wait(ctx, &ready_list, timeout);
 661     }
 662
 663     if (use_notify_me) {
 664         /* Finish the poll before clearing the flag.  */
 665         qatomic_store_release(&ctx->notify_me,
 666                              qatomic_read(&ctx->notify_me) - 2);
 667     }
 668
 669     aio_notify_accept(ctx);
 670
 671     /* Adjust polling time */
 672     if (ctx->poll_max_ns) {
 673         int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
 674
 675         if (block_ns <= ctx->poll_ns) {
 676             /* This is the sweet spot, no adjustment needed */
 677         } else if (block_ns > ctx->poll_max_ns) {
 678             /* We'd have to poll for too long, poll less */
 679             int64_t old = ctx->poll_ns;
 680
 681             if (ctx->poll_shrink) {
 682                 ctx->poll_ns /= ctx->poll_shrink;
 683             } else {
 684                 ctx->poll_ns = 0;
 685             }
 686
 687             trace_poll_shrink(ctx, old, ctx->poll_ns);
 688         } else if (ctx->poll_ns < ctx->poll_max_ns &&
 689                    block_ns < ctx->poll_max_ns) {
 690             /* There is room to grow, poll longer */
 691             int64_t old = ctx->poll_ns;
 692             int64_t grow = ctx->poll_grow;
 693
 694             if (grow == 0) {
 695                 grow = 2;
 696             }
 697
 698             if (ctx->poll_ns) {
 699                 ctx->poll_ns *= grow;
 700             } else {
 701                 ctx->poll_ns = 4000; /* start polling at 4 microseconds */
 702             }
 703
 704             if (ctx->poll_ns > ctx->poll_max_ns) {
 705                 ctx->poll_ns = ctx->poll_max_ns;
 706             }
 707
 708             trace_poll_grow(ctx, old, ctx->poll_ns);
 709         }
 710     }
 711
 712     progress |= aio_bh_poll(ctx);
 713     progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
 714
 715     aio_free_deleted_handlers(ctx);
 716
 717     qemu_lockcnt_dec(&ctx->list_lock);
 718
 719     progress |= timerlistgroup_run_timers(&ctx->tlg);
 720
 721     return progress;
 722 }
 723
 724 void aio_context_setup(AioContext *ctx)
 725 {
 726     ctx->fdmon_ops = &fdmon_poll_ops;
 727     ctx->epollfd = -1;
 728
 729     /* Use the fastest fd monitoring implementation if available */
 730     if (fdmon_io_uring_setup(ctx)) {
 731         return;
 732     }
 733
 734     fdmon_epoll_setup(ctx);
 735 }
 736
 737 void aio_context_destroy(AioContext *ctx)
 738 {
 739     fdmon_io_uring_destroy(ctx);
 740     fdmon_epoll_disable(ctx);
 741     aio_free_deleted_handlers(ctx);
 742 }
 743
 744 void aio_context_use_g_source(AioContext *ctx)
 745 {
 746     /*
 747      * Disable io_uring when the glib main loop is used because it doesn't
 748      * support mixed glib/aio_poll() usage. It relies on aio_poll() being
 749      * called regularly so that changes to the monitored file descriptors are
 750      * submitted, otherwise a list of pending fd handlers builds up.
 751      */
 752     fdmon_io_uring_destroy(ctx);
 753     aio_free_deleted_handlers(ctx);
 754 }
 755
 756 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
 757                                  int64_t grow, int64_t shrink, Error **errp)
 758 {
 759     /* No thread synchronization here, it doesn't matter if an incorrect value
 760      * is used once.
 761      */
 762     ctx->poll_max_ns = max_ns;
 763     ctx->poll_ns = 0;
 764     ctx->poll_grow = grow;
 765     ctx->poll_shrink = shrink;
 766
 767     aio_notify(ctx);
 768 }
 769
 770 void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
 771                                 Error **errp)
 772 {
 773     /*
 774      * No thread synchronization here, it doesn't matter if an incorrect value
 775      * is used once.
 776      */
 777     ctx->aio_max_batch = max_batch;
 778
 779     aio_notify(ctx);
 780 }