cpus.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 /* Needed early for CONFIG_BSD etc. */
  26 #include "qemu/osdep.h"
  27 #include "qemu-common.h"
  28 #include "qemu/config-file.h"
  29 #include "cpu.h"
  30 #include "monitor/monitor.h"
  31 #include "qapi/qmp/qerror.h"
  32 #include "qemu/error-report.h"
  33 #include "sysemu/sysemu.h"
  34 #include "sysemu/block-backend.h"
  35 #include "exec/gdbstub.h"
  36 #include "sysemu/dma.h"
  37 #include "sysemu/hw_accel.h"
  38 #include "sysemu/kvm.h"
  39 #include "sysemu/hax.h"
  40 #include "qmp-commands.h"
  41 #include "exec/exec-all.h"
  42
  43 #include "qemu/thread.h"
  44 #include "sysemu/cpus.h"
  45 #include "sysemu/qtest.h"
  46 #include "qemu/main-loop.h"
  47 #include "qemu/bitmap.h"
  48 #include "qemu/seqlock.h"
  49 #include "tcg.h"
  50 #include "qapi-event.h"
  51 #include "hw/nmi.h"
  52 #include "sysemu/replay.h"
  53 #include "hw/boards.h"
  54
  55 #ifdef CONFIG_LINUX
  56
  57 #include <sys/prctl.h>
  58
  59 #ifndef PR_MCE_KILL
  60 #define PR_MCE_KILL 33
  61 #endif
  62
  63 #ifndef PR_MCE_KILL_SET
  64 #define PR_MCE_KILL_SET 1
  65 #endif
  66
  67 #ifndef PR_MCE_KILL_EARLY
  68 #define PR_MCE_KILL_EARLY 1
  69 #endif
  70
  71 #endif /* CONFIG_LINUX */
  72
  73 int64_t max_delay;
  74 int64_t max_advance;
  75
  76 /* vcpu throttling controls */
  77 static QEMUTimer *throttle_timer;
  78 static unsigned int throttle_percentage;
  79
  80 #define CPU_THROTTLE_PCT_MIN 1
  81 #define CPU_THROTTLE_PCT_MAX 99
  82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
  83
  84 bool cpu_is_stopped(CPUState *cpu)
  85 {
  86     return cpu->stopped || !runstate_is_running();
  87 }
  88
  89 static bool cpu_thread_is_idle(CPUState *cpu)
  90 {
  91     if (cpu->stop || cpu->queued_work_first) {
  92         return false;
  93     }
  94     if (cpu_is_stopped(cpu)) {
  95         return true;
  96     }
  97     if (!cpu->halted || cpu_has_work(cpu) ||
  98         kvm_halt_in_kernel()) {
  99         return false;
 100     }
 101     return true;
 102 }
 103
 104 static bool all_cpu_threads_idle(void)
 105 {
 106     CPUState *cpu;
 107
 108     CPU_FOREACH(cpu) {
 109         if (!cpu_thread_is_idle(cpu)) {
 110             return false;
 111         }
 112     }
 113     return true;
 114 }
 115
 116 /***********************************************************/
 117 /* guest cycle counter */
 118
 119 /* Protected by TimersState seqlock */
 120
 121 static bool icount_sleep = true;
 122 static int64_t vm_clock_warp_start = -1;
 123 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 124 static int icount_time_shift;
 125 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 126 #define MAX_ICOUNT_SHIFT 10
 127
 128 static QEMUTimer *icount_rt_timer;
 129 static QEMUTimer *icount_vm_timer;
 130 static QEMUTimer *icount_warp_timer;
 131
 132 typedef struct TimersState {
 133     /* Protected by BQL.  */
 134     int64_t cpu_ticks_prev;
 135     int64_t cpu_ticks_offset;
 136
 137     /* cpu_clock_offset can be read out of BQL, so protect it with
 138      * this lock.
 139      */
 140     QemuSeqLock vm_clock_seqlock;
 141     int64_t cpu_clock_offset;
 142     int32_t cpu_ticks_enabled;
 143     int64_t dummy;
 144
 145     /* Compensate for varying guest execution speed.  */
 146     int64_t qemu_icount_bias;
 147     /* Only written by TCG thread */
 148     int64_t qemu_icount;
 149 } TimersState;
 150
 151 static TimersState timers_state;
 152 bool mttcg_enabled;
 153
 154 /*
 155  * We default to false if we know other options have been enabled
 156  * which are currently incompatible with MTTCG. Otherwise when each
 157  * guest (target) has been updated to support:
 158  *   - atomic instructions
 159  *   - memory ordering primitives (barriers)
 160  * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 161  *
 162  * Once a guest architecture has been converted to the new primitives
 163  * there are two remaining limitations to check.
 164  *
 165  * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
 166  * - The host must have a stronger memory order than the guest
 167  *
 168  * It may be possible in future to support strong guests on weak hosts
 169  * but that will require tagging all load/stores in a guest with their
 170  * implicit memory order requirements which would likely slow things
 171  * down a lot.
 172  */
 173
 174 static bool check_tcg_memory_orders_compatible(void)
 175 {
 176 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
 177     return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
 178 #else
 179     return false;
 180 #endif
 181 }
 182
 183 static bool default_mttcg_enabled(void)
 184 {
 185     if (use_icount || TCG_OVERSIZED_GUEST) {
 186         return false;
 187     } else {
 188 #ifdef TARGET_SUPPORTS_MTTCG
 189         return check_tcg_memory_orders_compatible();
 190 #else
 191         return false;
 192 #endif
 193     }
 194 }
 195
 196 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
 197 {
 198     const char *t = qemu_opt_get(opts, "thread");
 199     if (t) {
 200         if (strcmp(t, "multi") == 0) {
 201             if (TCG_OVERSIZED_GUEST) {
 202                 error_setg(errp, "No MTTCG when guest word size > hosts");
 203             } else if (use_icount) {
 204                 error_setg(errp, "No MTTCG when icount is enabled");
 205             } else {
 206 #ifndef TARGET_SUPPORTS_MTTCG
 207                 error_report("Guest not yet converted to MTTCG - "
 208                              "you may get unexpected results");
 209 #endif
 210                 if (!check_tcg_memory_orders_compatible()) {
 211                     error_report("Guest expects a stronger memory ordering "
 212                                  "than the host provides");
 213                     error_printf("This may cause strange/hard to debug errors\n");
 214                 }
 215                 mttcg_enabled = true;
 216             }
 217         } else if (strcmp(t, "single") == 0) {
 218             mttcg_enabled = false;
 219         } else {
 220             error_setg(errp, "Invalid 'thread' setting %s", t);
 221         }
 222     } else {
 223         mttcg_enabled = default_mttcg_enabled();
 224     }
 225 }
 226
 227 /* The current number of executed instructions is based on what we
 228  * originally budgeted minus the current state of the decrementing
 229  * icount counters in extra/u16.low.
 230  */
 231 static int64_t cpu_get_icount_executed(CPUState *cpu)
 232 {
 233     return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
 234 }
 235
 236 /*
 237  * Update the global shared timer_state.qemu_icount to take into
 238  * account executed instructions. This is done by the TCG vCPU
 239  * thread so the main-loop can see time has moved forward.
 240  */
 241 void cpu_update_icount(CPUState *cpu)
 242 {
 243     int64_t executed = cpu_get_icount_executed(cpu);
 244     cpu->icount_budget -= executed;
 245
 246 #ifdef CONFIG_ATOMIC64
 247     atomic_set__nocheck(&timers_state.qemu_icount,
 248                         atomic_read__nocheck(&timers_state.qemu_icount) +
 249                         executed);
 250 #else /* FIXME: we need 64bit atomics to do this safely */
 251     timers_state.qemu_icount += executed;
 252 #endif
 253 }
 254
 255 int64_t cpu_get_icount_raw(void)
 256 {
 257     CPUState *cpu = current_cpu;
 258
 259     if (cpu && cpu->running) {
 260         if (!cpu->can_do_io) {
 261             fprintf(stderr, "Bad icount read\n");
 262             exit(1);
 263         }
 264         /* Take into account what has run */
 265         cpu_update_icount(cpu);
 266     }
 267 #ifdef CONFIG_ATOMIC64
 268     return atomic_read__nocheck(&timers_state.qemu_icount);
 269 #else /* FIXME: we need 64bit atomics to do this safely */
 270     return timers_state.qemu_icount;
 271 #endif
 272 }
 273
 274 /* Return the virtual CPU time, based on the instruction counter.  */
 275 static int64_t cpu_get_icount_locked(void)
 276 {
 277     int64_t icount = cpu_get_icount_raw();
 278     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 279 }
 280
 281 int64_t cpu_get_icount(void)
 282 {
 283     int64_t icount;
 284     unsigned start;
 285
 286     do {
 287         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 288         icount = cpu_get_icount_locked();
 289     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 290
 291     return icount;
 292 }
 293
 294 int64_t cpu_icount_to_ns(int64_t icount)
 295 {
 296     return icount << icount_time_shift;
 297 }
 298
 299 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
 300  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 301  * counter.
 302  *
 303  * Caller must hold the BQL
 304  */
 305 int64_t cpu_get_ticks(void)
 306 {
 307     int64_t ticks;
 308
 309     if (use_icount) {
 310         return cpu_get_icount();
 311     }
 312
 313     ticks = timers_state.cpu_ticks_offset;
 314     if (timers_state.cpu_ticks_enabled) {
 315         ticks += cpu_get_host_ticks();
 316     }
 317
 318     if (timers_state.cpu_ticks_prev > ticks) {
 319         /* Note: non increasing ticks may happen if the host uses
 320            software suspend */
 321         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 322         ticks = timers_state.cpu_ticks_prev;
 323     }
 324
 325     timers_state.cpu_ticks_prev = ticks;
 326     return ticks;
 327 }
 328
 329 static int64_t cpu_get_clock_locked(void)
 330 {
 331     int64_t time;
 332
 333     time = timers_state.cpu_clock_offset;
 334     if (timers_state.cpu_ticks_enabled) {
 335         time += get_clock();
 336     }
 337
 338     return time;
 339 }
 340
 341 /* Return the monotonic time elapsed in VM, i.e.,
 342  * the time between vm_start and vm_stop
 343  */
 344 int64_t cpu_get_clock(void)
 345 {
 346     int64_t ti;
 347     unsigned start;
 348
 349     do {
 350         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 351         ti = cpu_get_clock_locked();
 352     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 353
 354     return ti;
 355 }
 356
 357 /* enable cpu_get_ticks()
 358  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 359  */
 360 void cpu_enable_ticks(void)
 361 {
 362     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 363     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 364     if (!timers_state.cpu_ticks_enabled) {
 365         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 366         timers_state.cpu_clock_offset -= get_clock();
 367         timers_state.cpu_ticks_enabled = 1;
 368     }
 369     seqlock_write_end(&timers_state.vm_clock_seqlock);
 370 }
 371
 372 /* disable cpu_get_ticks() : the clock is stopped. You must not call
 373  * cpu_get_ticks() after that.
 374  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 375  */
 376 void cpu_disable_ticks(void)
 377 {
 378     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 379     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 380     if (timers_state.cpu_ticks_enabled) {
 381         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 382         timers_state.cpu_clock_offset = cpu_get_clock_locked();
 383         timers_state.cpu_ticks_enabled = 0;
 384     }
 385     seqlock_write_end(&timers_state.vm_clock_seqlock);
 386 }
 387
 388 /* Correlation between real and virtual time is always going to be
 389    fairly approximate, so ignore small variation.
 390    When the guest is idle real and virtual time will be aligned in
 391    the IO wait loop.  */
 392 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 393
 394 static void icount_adjust(void)
 395 {
 396     int64_t cur_time;
 397     int64_t cur_icount;
 398     int64_t delta;
 399
 400     /* Protected by TimersState mutex.  */
 401     static int64_t last_delta;
 402
 403     /* If the VM is not running, then do nothing.  */
 404     if (!runstate_is_running()) {
 405         return;
 406     }
 407
 408     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 409     cur_time = cpu_get_clock_locked();
 410     cur_icount = cpu_get_icount_locked();
 411
 412     delta = cur_icount - cur_time;
 413     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 414     if (delta > 0
 415         && last_delta + ICOUNT_WOBBLE < delta * 2
 416         && icount_time_shift > 0) {
 417         /* The guest is getting too far ahead.  Slow time down.  */
 418         icount_time_shift--;
 419     }
 420     if (delta < 0
 421         && last_delta - ICOUNT_WOBBLE > delta * 2
 422         && icount_time_shift < MAX_ICOUNT_SHIFT) {
 423         /* The guest is getting too far behind.  Speed time up.  */
 424         icount_time_shift++;
 425     }
 426     last_delta = delta;
 427     timers_state.qemu_icount_bias = cur_icount
 428                               - (timers_state.qemu_icount << icount_time_shift);
 429     seqlock_write_end(&timers_state.vm_clock_seqlock);
 430 }
 431
 432 static void icount_adjust_rt(void *opaque)
 433 {
 434     timer_mod(icount_rt_timer,
 435               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 436     icount_adjust();
 437 }
 438
 439 static void icount_adjust_vm(void *opaque)
 440 {
 441     timer_mod(icount_vm_timer,
 442                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 443                    NANOSECONDS_PER_SECOND / 10);
 444     icount_adjust();
 445 }
 446
 447 static int64_t qemu_icount_round(int64_t count)
 448 {
 449     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 450 }
 451
 452 static void icount_warp_rt(void)
 453 {
 454     unsigned seq;
 455     int64_t warp_start;
 456
 457     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 458      * changes from -1 to another value, so the race here is okay.
 459      */
 460     do {
 461         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 462         warp_start = vm_clock_warp_start;
 463     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 464
 465     if (warp_start == -1) {
 466         return;
 467     }
 468
 469     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 470     if (runstate_is_running()) {
 471         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 472                                      cpu_get_clock_locked());
 473         int64_t warp_delta;
 474
 475         warp_delta = clock - vm_clock_warp_start;
 476         if (use_icount == 2) {
 477             /*
 478              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 479              * far ahead of real time.
 480              */
 481             int64_t cur_icount = cpu_get_icount_locked();
 482             int64_t delta = clock - cur_icount;
 483             warp_delta = MIN(warp_delta, delta);
 484         }
 485         timers_state.qemu_icount_bias += warp_delta;
 486     }
 487     vm_clock_warp_start = -1;
 488     seqlock_write_end(&timers_state.vm_clock_seqlock);
 489
 490     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 491         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 492     }
 493 }
 494
 495 static void icount_timer_cb(void *opaque)
 496 {
 497     /* No need for a checkpoint because the timer already synchronizes
 498      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 499      */
 500     icount_warp_rt();
 501 }
 502
 503 void qtest_clock_warp(int64_t dest)
 504 {
 505     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 506     AioContext *aio_context;
 507     assert(qtest_enabled());
 508     aio_context = qemu_get_aio_context();
 509     while (clock < dest) {
 510         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 511         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 512
 513         seqlock_write_begin(&timers_state.vm_clock_seqlock);
 514         timers_state.qemu_icount_bias += warp;
 515         seqlock_write_end(&timers_state.vm_clock_seqlock);
 516
 517         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 518         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 519         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 520     }
 521     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 522 }
 523
 524 void qemu_start_warp_timer(void)
 525 {
 526     int64_t clock;
 527     int64_t deadline;
 528
 529     if (!use_icount) {
 530         return;
 531     }
 532
 533     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 534      * do not fire, so computing the deadline does not make sense.
 535      */
 536     if (!runstate_is_running()) {
 537         return;
 538     }
 539
 540     /* warp clock deterministically in record/replay mode */
 541     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 542         return;
 543     }
 544
 545     if (!all_cpu_threads_idle()) {
 546         return;
 547     }
 548
 549     if (qtest_enabled()) {
 550         /* When testing, qtest commands advance icount.  */
 551         return;
 552     }
 553
 554     /* We want to use the earliest deadline from ALL vm_clocks */
 555     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 556     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 557     if (deadline < 0) {
 558         static bool notified;
 559         if (!icount_sleep && !notified) {
 560             warn_report("icount sleep disabled and no active timers");
 561             notified = true;
 562         }
 563         return;
 564     }
 565
 566     if (deadline > 0) {
 567         /*
 568          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 569          * sleep.  Otherwise, the CPU might be waiting for a future timer
 570          * interrupt to wake it up, but the interrupt never comes because
 571          * the vCPU isn't running any insns and thus doesn't advance the
 572          * QEMU_CLOCK_VIRTUAL.
 573          */
 574         if (!icount_sleep) {
 575             /*
 576              * We never let VCPUs sleep in no sleep icount mode.
 577              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 578              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 579              * It is useful when we want a deterministic execution time,
 580              * isolated from host latencies.
 581              */
 582             seqlock_write_begin(&timers_state.vm_clock_seqlock);
 583             timers_state.qemu_icount_bias += deadline;
 584             seqlock_write_end(&timers_state.vm_clock_seqlock);
 585             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 586         } else {
 587             /*
 588              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 589              * "real" time, (related to the time left until the next event) has
 590              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 591              * This avoids that the warps are visible externally; for example,
 592              * you will not be sending network packets continuously instead of
 593              * every 100ms.
 594              */
 595             seqlock_write_begin(&timers_state.vm_clock_seqlock);
 596             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 597                 vm_clock_warp_start = clock;
 598             }
 599             seqlock_write_end(&timers_state.vm_clock_seqlock);
 600             timer_mod_anticipate(icount_warp_timer, clock + deadline);
 601         }
 602     } else if (deadline == 0) {
 603         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 604     }
 605 }
 606
 607 static void qemu_account_warp_timer(void)
 608 {
 609     if (!use_icount || !icount_sleep) {
 610         return;
 611     }
 612
 613     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 614      * do not fire, so computing the deadline does not make sense.
 615      */
 616     if (!runstate_is_running()) {
 617         return;
 618     }
 619
 620     /* warp clock deterministically in record/replay mode */
 621     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 622         return;
 623     }
 624
 625     timer_del(icount_warp_timer);
 626     icount_warp_rt();
 627 }
 628
 629 static bool icount_state_needed(void *opaque)
 630 {
 631     return use_icount;
 632 }
 633
 634 /*
 635  * This is a subsection for icount migration.
 636  */
 637 static const VMStateDescription icount_vmstate_timers = {
 638     .name = "timer/icount",
 639     .version_id = 1,
 640     .minimum_version_id = 1,
 641     .needed = icount_state_needed,
 642     .fields = (VMStateField[]) {
 643         VMSTATE_INT64(qemu_icount_bias, TimersState),
 644         VMSTATE_INT64(qemu_icount, TimersState),
 645         VMSTATE_END_OF_LIST()
 646     }
 647 };
 648
 649 static const VMStateDescription vmstate_timers = {
 650     .name = "timer",
 651     .version_id = 2,
 652     .minimum_version_id = 1,
 653     .fields = (VMStateField[]) {
 654         VMSTATE_INT64(cpu_ticks_offset, TimersState),
 655         VMSTATE_INT64(dummy, TimersState),
 656         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 657         VMSTATE_END_OF_LIST()
 658     },
 659     .subsections = (const VMStateDescription*[]) {
 660         &icount_vmstate_timers,
 661         NULL
 662     }
 663 };
 664
 665 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 666 {
 667     double pct;
 668     double throttle_ratio;
 669     long sleeptime_ns;
 670
 671     if (!cpu_throttle_get_percentage()) {
 672         return;
 673     }
 674
 675     pct = (double)cpu_throttle_get_percentage()/100;
 676     throttle_ratio = pct / (1 - pct);
 677     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 678
 679     qemu_mutex_unlock_iothread();
 680     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 681     qemu_mutex_lock_iothread();
 682     atomic_set(&cpu->throttle_thread_scheduled, 0);
 683 }
 684
 685 static void cpu_throttle_timer_tick(void *opaque)
 686 {
 687     CPUState *cpu;
 688     double pct;
 689
 690     /* Stop the timer if needed */
 691     if (!cpu_throttle_get_percentage()) {
 692         return;
 693     }
 694     CPU_FOREACH(cpu) {
 695         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 696             async_run_on_cpu(cpu, cpu_throttle_thread,
 697                              RUN_ON_CPU_NULL);
 698         }
 699     }
 700
 701     pct = (double)cpu_throttle_get_percentage()/100;
 702     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 703                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 704 }
 705
 706 void cpu_throttle_set(int new_throttle_pct)
 707 {
 708     /* Ensure throttle percentage is within valid range */
 709     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 710     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 711
 712     atomic_set(&throttle_percentage, new_throttle_pct);
 713
 714     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 715                                        CPU_THROTTLE_TIMESLICE_NS);
 716 }
 717
 718 void cpu_throttle_stop(void)
 719 {
 720     atomic_set(&throttle_percentage, 0);
 721 }
 722
 723 bool cpu_throttle_active(void)
 724 {
 725     return (cpu_throttle_get_percentage() != 0);
 726 }
 727
 728 int cpu_throttle_get_percentage(void)
 729 {
 730     return atomic_read(&throttle_percentage);
 731 }
 732
 733 void cpu_ticks_init(void)
 734 {
 735     seqlock_init(&timers_state.vm_clock_seqlock);
 736     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 737     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 738                                            cpu_throttle_timer_tick, NULL);
 739 }
 740
 741 void configure_icount(QemuOpts *opts, Error **errp)
 742 {
 743     const char *option;
 744     char *rem_str = NULL;
 745
 746     option = qemu_opt_get(opts, "shift");
 747     if (!option) {
 748         if (qemu_opt_get(opts, "align") != NULL) {
 749             error_setg(errp, "Please specify shift option when using align");
 750         }
 751         return;
 752     }
 753
 754     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 755     if (icount_sleep) {
 756         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 757                                          icount_timer_cb, NULL);
 758     }
 759
 760     icount_align_option = qemu_opt_get_bool(opts, "align", false);
 761
 762     if (icount_align_option && !icount_sleep) {
 763         error_setg(errp, "align=on and sleep=off are incompatible");
 764     }
 765     if (strcmp(option, "auto") != 0) {
 766         errno = 0;
 767         icount_time_shift = strtol(option, &rem_str, 0);
 768         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 769             error_setg(errp, "icount: Invalid shift value");
 770         }
 771         use_icount = 1;
 772         return;
 773     } else if (icount_align_option) {
 774         error_setg(errp, "shift=auto and align=on are incompatible");
 775     } else if (!icount_sleep) {
 776         error_setg(errp, "shift=auto and sleep=off are incompatible");
 777     }
 778
 779     use_icount = 2;
 780
 781     /* 125MIPS seems a reasonable initial guess at the guest speed.
 782        It will be corrected fairly quickly anyway.  */
 783     icount_time_shift = 3;
 784
 785     /* Have both realtime and virtual time triggers for speed adjustment.
 786        The realtime trigger catches emulated time passing too slowly,
 787        the virtual time trigger catches emulated time passing too fast.
 788        Realtime triggers occur even when idle, so use them less frequently
 789        than VM triggers.  */
 790     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 791                                    icount_adjust_rt, NULL);
 792     timer_mod(icount_rt_timer,
 793                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 794     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 795                                         icount_adjust_vm, NULL);
 796     timer_mod(icount_vm_timer,
 797                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 798                    NANOSECONDS_PER_SECOND / 10);
 799 }
 800
 801 /***********************************************************/
 802 /* TCG vCPU kick timer
 803  *
 804  * The kick timer is responsible for moving single threaded vCPU
 805  * emulation on to the next vCPU. If more than one vCPU is running a
 806  * timer event with force a cpu->exit so the next vCPU can get
 807  * scheduled.
 808  *
 809  * The timer is removed if all vCPUs are idle and restarted again once
 810  * idleness is complete.
 811  */
 812
 813 static QEMUTimer *tcg_kick_vcpu_timer;
 814 static CPUState *tcg_current_rr_cpu;
 815
 816 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 817
 818 static inline int64_t qemu_tcg_next_kick(void)
 819 {
 820     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 821 }
 822
 823 /* Kick the currently round-robin scheduled vCPU */
 824 static void qemu_cpu_kick_rr_cpu(void)
 825 {
 826     CPUState *cpu;
 827     do {
 828         cpu = atomic_mb_read(&tcg_current_rr_cpu);
 829         if (cpu) {
 830             cpu_exit(cpu);
 831         }
 832     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 833 }
 834
 835 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
 836 {
 837 }
 838
 839 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 840 {
 841     if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
 842         qemu_notify_event();
 843         return;
 844     }
 845
 846     if (!qemu_in_vcpu_thread() && first_cpu) {
 847         /* qemu_cpu_kick is not enough to kick a halted CPU out of
 848          * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
 849          * causes cpu_thread_is_idle to return false.  This way,
 850          * handle_icount_deadline can run.
 851          */
 852         async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
 853     }
 854 }
 855
 856 static void kick_tcg_thread(void *opaque)
 857 {
 858     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 859     qemu_cpu_kick_rr_cpu();
 860 }
 861
 862 static void start_tcg_kick_timer(void)
 863 {
 864     if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 865         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 866                                            kick_tcg_thread, NULL);
 867         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 868     }
 869 }
 870
 871 static void stop_tcg_kick_timer(void)
 872 {
 873     if (tcg_kick_vcpu_timer) {
 874         timer_del(tcg_kick_vcpu_timer);
 875         tcg_kick_vcpu_timer = NULL;
 876     }
 877 }
 878
 879 /***********************************************************/
 880 void hw_error(const char *fmt, ...)
 881 {
 882     va_list ap;
 883     CPUState *cpu;
 884
 885     va_start(ap, fmt);
 886     fprintf(stderr, "qemu: hardware error: ");
 887     vfprintf(stderr, fmt, ap);
 888     fprintf(stderr, "\n");
 889     CPU_FOREACH(cpu) {
 890         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 891         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 892     }
 893     va_end(ap);
 894     abort();
 895 }
 896
 897 void cpu_synchronize_all_states(void)
 898 {
 899     CPUState *cpu;
 900
 901     CPU_FOREACH(cpu) {
 902         cpu_synchronize_state(cpu);
 903     }
 904 }
 905
 906 void cpu_synchronize_all_post_reset(void)
 907 {
 908     CPUState *cpu;
 909
 910     CPU_FOREACH(cpu) {
 911         cpu_synchronize_post_reset(cpu);
 912     }
 913 }
 914
 915 void cpu_synchronize_all_post_init(void)
 916 {
 917     CPUState *cpu;
 918
 919     CPU_FOREACH(cpu) {
 920         cpu_synchronize_post_init(cpu);
 921     }
 922 }
 923
 924 void cpu_synchronize_all_pre_loadvm(void)
 925 {
 926     CPUState *cpu;
 927
 928     CPU_FOREACH(cpu) {
 929         cpu_synchronize_pre_loadvm(cpu);
 930     }
 931 }
 932
 933 static int do_vm_stop(RunState state)
 934 {
 935     int ret = 0;
 936
 937     if (runstate_is_running()) {
 938         cpu_disable_ticks();
 939         pause_all_vcpus();
 940         runstate_set(state);
 941         vm_state_notify(0, state);
 942         qapi_event_send_stop(&error_abort);
 943     }
 944
 945     bdrv_drain_all();
 946     replay_disable_events();
 947     ret = bdrv_flush_all();
 948
 949     return ret;
 950 }
 951
 952 static bool cpu_can_run(CPUState *cpu)
 953 {
 954     if (cpu->stop) {
 955         return false;
 956     }
 957     if (cpu_is_stopped(cpu)) {
 958         return false;
 959     }
 960     return true;
 961 }
 962
 963 static void cpu_handle_guest_debug(CPUState *cpu)
 964 {
 965     gdb_set_stop_cpu(cpu);
 966     qemu_system_debug_request();
 967     cpu->stopped = true;
 968 }
 969
 970 #ifdef CONFIG_LINUX
 971 static void sigbus_reraise(void)
 972 {
 973     sigset_t set;
 974     struct sigaction action;
 975
 976     memset(&action, 0, sizeof(action));
 977     action.sa_handler = SIG_DFL;
 978     if (!sigaction(SIGBUS, &action, NULL)) {
 979         raise(SIGBUS);
 980         sigemptyset(&set);
 981         sigaddset(&set, SIGBUS);
 982         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 983     }
 984     perror("Failed to re-raise SIGBUS!\n");
 985     abort();
 986 }
 987
 988 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
 989 {
 990     if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
 991         sigbus_reraise();
 992     }
 993
 994     if (current_cpu) {
 995         /* Called asynchronously in VCPU thread.  */
 996         if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
 997             sigbus_reraise();
 998         }
 999     } else {
1000         /* Called synchronously (via signalfd) in main thread.  */
1001         if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1002             sigbus_reraise();
1003         }
1004     }
1005 }
1006
1007 static void qemu_init_sigbus(void)
1008 {
1009     struct sigaction action;
1010
1011     memset(&action, 0, sizeof(action));
1012     action.sa_flags = SA_SIGINFO;
1013     action.sa_sigaction = sigbus_handler;
1014     sigaction(SIGBUS, &action, NULL);
1015
1016     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1017 }
1018 #else /* !CONFIG_LINUX */
1019 static void qemu_init_sigbus(void)
1020 {
1021 }
1022 #endif /* !CONFIG_LINUX */
1023
1024 static QemuMutex qemu_global_mutex;
1025
1026 static QemuThread io_thread;
1027
1028 /* cpu creation */
1029 static QemuCond qemu_cpu_cond;
1030 /* system init */
1031 static QemuCond qemu_pause_cond;
1032
1033 void qemu_init_cpu_loop(void)
1034 {
1035     qemu_init_sigbus();
1036     qemu_cond_init(&qemu_cpu_cond);
1037     qemu_cond_init(&qemu_pause_cond);
1038     qemu_mutex_init(&qemu_global_mutex);
1039
1040     qemu_thread_get_self(&io_thread);
1041 }
1042
1043 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1044 {
1045     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1046 }
1047
1048 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1049 {
1050     if (kvm_destroy_vcpu(cpu) < 0) {
1051         error_report("kvm_destroy_vcpu failed");
1052         exit(EXIT_FAILURE);
1053     }
1054 }
1055
1056 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1057 {
1058 }
1059
1060 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1061 {
1062     g_assert(qemu_cpu_is_self(cpu));
1063     cpu->stop = false;
1064     cpu->stopped = true;
1065     if (exit) {
1066         cpu_exit(cpu);
1067     }
1068     qemu_cond_broadcast(&qemu_pause_cond);
1069 }
1070
1071 static void qemu_wait_io_event_common(CPUState *cpu)
1072 {
1073     atomic_mb_set(&cpu->thread_kicked, false);
1074     if (cpu->stop) {
1075         qemu_cpu_stop(cpu, false);
1076     }
1077     process_queued_cpu_work(cpu);
1078 }
1079
1080 static bool qemu_tcg_should_sleep(CPUState *cpu)
1081 {
1082     if (mttcg_enabled) {
1083         return cpu_thread_is_idle(cpu);
1084     } else {
1085         return all_cpu_threads_idle();
1086     }
1087 }
1088
1089 static void qemu_tcg_wait_io_event(CPUState *cpu)
1090 {
1091     while (qemu_tcg_should_sleep(cpu)) {
1092         stop_tcg_kick_timer();
1093         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1094     }
1095
1096     start_tcg_kick_timer();
1097
1098     qemu_wait_io_event_common(cpu);
1099 }
1100
1101 static void qemu_kvm_wait_io_event(CPUState *cpu)
1102 {
1103     while (cpu_thread_is_idle(cpu)) {
1104         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1105     }
1106
1107     qemu_wait_io_event_common(cpu);
1108 }
1109
1110 static void *qemu_kvm_cpu_thread_fn(void *arg)
1111 {
1112     CPUState *cpu = arg;
1113     int r;
1114
1115     rcu_register_thread();
1116
1117     qemu_mutex_lock_iothread();
1118     qemu_thread_get_self(cpu->thread);
1119     cpu->thread_id = qemu_get_thread_id();
1120     cpu->can_do_io = 1;
1121     current_cpu = cpu;
1122
1123     r = kvm_init_vcpu(cpu);
1124     if (r < 0) {
1125         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1126         exit(1);
1127     }
1128
1129     kvm_init_cpu_signals(cpu);
1130
1131     /* signal CPU creation */
1132     cpu->created = true;
1133     qemu_cond_signal(&qemu_cpu_cond);
1134
1135     do {
1136         if (cpu_can_run(cpu)) {
1137             r = kvm_cpu_exec(cpu);
1138             if (r == EXCP_DEBUG) {
1139                 cpu_handle_guest_debug(cpu);
1140             }
1141         }
1142         qemu_kvm_wait_io_event(cpu);
1143     } while (!cpu->unplug || cpu_can_run(cpu));
1144
1145     qemu_kvm_destroy_vcpu(cpu);
1146     cpu->created = false;
1147     qemu_cond_signal(&qemu_cpu_cond);
1148     qemu_mutex_unlock_iothread();
1149     return NULL;
1150 }
1151
1152 static void *qemu_dummy_cpu_thread_fn(void *arg)
1153 {
1154 #ifdef _WIN32
1155     fprintf(stderr, "qtest is not supported under Windows\n");
1156     exit(1);
1157 #else
1158     CPUState *cpu = arg;
1159     sigset_t waitset;
1160     int r;
1161
1162     rcu_register_thread();
1163
1164     qemu_mutex_lock_iothread();
1165     qemu_thread_get_self(cpu->thread);
1166     cpu->thread_id = qemu_get_thread_id();
1167     cpu->can_do_io = 1;
1168     current_cpu = cpu;
1169
1170     sigemptyset(&waitset);
1171     sigaddset(&waitset, SIG_IPI);
1172
1173     /* signal CPU creation */
1174     cpu->created = true;
1175     qemu_cond_signal(&qemu_cpu_cond);
1176
1177     while (1) {
1178         qemu_mutex_unlock_iothread();
1179         do {
1180             int sig;
1181             r = sigwait(&waitset, &sig);
1182         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1183         if (r == -1) {
1184             perror("sigwait");
1185             exit(1);
1186         }
1187         qemu_mutex_lock_iothread();
1188         qemu_wait_io_event_common(cpu);
1189     }
1190
1191     return NULL;
1192 #endif
1193 }
1194
1195 static int64_t tcg_get_icount_limit(void)
1196 {
1197     int64_t deadline;
1198
1199     if (replay_mode != REPLAY_MODE_PLAY) {
1200         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1201
1202         /* Maintain prior (possibly buggy) behaviour where if no deadline
1203          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1204          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1205          * nanoseconds.
1206          */
1207         if ((deadline < 0) || (deadline > INT32_MAX)) {
1208             deadline = INT32_MAX;
1209         }
1210
1211         return qemu_icount_round(deadline);
1212     } else {
1213         return replay_get_instructions();
1214     }
1215 }
1216
1217 static void handle_icount_deadline(void)
1218 {
1219     assert(qemu_in_vcpu_thread());
1220     if (use_icount) {
1221         int64_t deadline =
1222             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1223
1224         if (deadline == 0) {
1225             /* Wake up other AioContexts.  */
1226             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1227             qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1228         }
1229     }
1230 }
1231
1232 static void prepare_icount_for_run(CPUState *cpu)
1233 {
1234     if (use_icount) {
1235         int insns_left;
1236
1237         /* These should always be cleared by process_icount_data after
1238          * each vCPU execution. However u16.high can be raised
1239          * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1240          */
1241         g_assert(cpu->icount_decr.u16.low == 0);
1242         g_assert(cpu->icount_extra == 0);
1243
1244         cpu->icount_budget = tcg_get_icount_limit();
1245         insns_left = MIN(0xffff, cpu->icount_budget);
1246         cpu->icount_decr.u16.low = insns_left;
1247         cpu->icount_extra = cpu->icount_budget - insns_left;
1248     }
1249 }
1250
1251 static void process_icount_data(CPUState *cpu)
1252 {
1253     if (use_icount) {
1254         /* Account for executed instructions */
1255         cpu_update_icount(cpu);
1256
1257         /* Reset the counters */
1258         cpu->icount_decr.u16.low = 0;
1259         cpu->icount_extra = 0;
1260         cpu->icount_budget = 0;
1261
1262         replay_account_executed_instructions();
1263     }
1264 }
1265
1266
1267 static int tcg_cpu_exec(CPUState *cpu)
1268 {
1269     int ret;
1270 #ifdef CONFIG_PROFILER
1271     int64_t ti;
1272 #endif
1273
1274 #ifdef CONFIG_PROFILER
1275     ti = profile_getclock();
1276 #endif
1277     qemu_mutex_unlock_iothread();
1278     cpu_exec_start(cpu);
1279     ret = cpu_exec(cpu);
1280     cpu_exec_end(cpu);
1281     qemu_mutex_lock_iothread();
1282 #ifdef CONFIG_PROFILER
1283     tcg_time += profile_getclock() - ti;
1284 #endif
1285     return ret;
1286 }
1287
1288 /* Destroy any remaining vCPUs which have been unplugged and have
1289  * finished running
1290  */
1291 static void deal_with_unplugged_cpus(void)
1292 {
1293     CPUState *cpu;
1294
1295     CPU_FOREACH(cpu) {
1296         if (cpu->unplug && !cpu_can_run(cpu)) {
1297             qemu_tcg_destroy_vcpu(cpu);
1298             cpu->created = false;
1299             qemu_cond_signal(&qemu_cpu_cond);
1300             break;
1301         }
1302     }
1303 }
1304
1305 /* Single-threaded TCG
1306  *
1307  * In the single-threaded case each vCPU is simulated in turn. If
1308  * there is more than a single vCPU we create a simple timer to kick
1309  * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1310  * This is done explicitly rather than relying on side-effects
1311  * elsewhere.
1312  */
1313
1314 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1315 {
1316     CPUState *cpu = arg;
1317
1318     rcu_register_thread();
1319     tcg_register_thread();
1320
1321     qemu_mutex_lock_iothread();
1322     qemu_thread_get_self(cpu->thread);
1323
1324     CPU_FOREACH(cpu) {
1325         cpu->thread_id = qemu_get_thread_id();
1326         cpu->created = true;
1327         cpu->can_do_io = 1;
1328     }
1329     qemu_cond_signal(&qemu_cpu_cond);
1330
1331     /* wait for initial kick-off after machine start */
1332     while (first_cpu->stopped) {
1333         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1334
1335         /* process any pending work */
1336         CPU_FOREACH(cpu) {
1337             current_cpu = cpu;
1338             qemu_wait_io_event_common(cpu);
1339         }
1340     }
1341
1342     start_tcg_kick_timer();
1343
1344     cpu = first_cpu;
1345
1346     /* process any pending work */
1347     cpu->exit_request = 1;
1348
1349     while (1) {
1350         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1351         qemu_account_warp_timer();
1352
1353         /* Run the timers here.  This is much more efficient than
1354          * waking up the I/O thread and waiting for completion.
1355          */
1356         handle_icount_deadline();
1357
1358         if (!cpu) {
1359             cpu = first_cpu;
1360         }
1361
1362         while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1363
1364             atomic_mb_set(&tcg_current_rr_cpu, cpu);
1365             current_cpu = cpu;
1366
1367             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1368                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1369
1370             if (cpu_can_run(cpu)) {
1371                 int r;
1372
1373                 prepare_icount_for_run(cpu);
1374
1375                 r = tcg_cpu_exec(cpu);
1376
1377                 process_icount_data(cpu);
1378
1379                 if (r == EXCP_DEBUG) {
1380                     cpu_handle_guest_debug(cpu);
1381                     break;
1382                 } else if (r == EXCP_ATOMIC) {
1383                     qemu_mutex_unlock_iothread();
1384                     cpu_exec_step_atomic(cpu);
1385                     qemu_mutex_lock_iothread();
1386                     break;
1387                 }
1388             } else if (cpu->stop) {
1389                 if (cpu->unplug) {
1390                     cpu = CPU_NEXT(cpu);
1391                 }
1392                 break;
1393             }
1394
1395             cpu = CPU_NEXT(cpu);
1396         } /* while (cpu && !cpu->exit_request).. */
1397
1398         /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1399         atomic_set(&tcg_current_rr_cpu, NULL);
1400
1401         if (cpu && cpu->exit_request) {
1402             atomic_mb_set(&cpu->exit_request, 0);
1403         }
1404
1405         qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1406         deal_with_unplugged_cpus();
1407     }
1408
1409     return NULL;
1410 }
1411
1412 static void *qemu_hax_cpu_thread_fn(void *arg)
1413 {
1414     CPUState *cpu = arg;
1415     int r;
1416
1417     qemu_mutex_lock_iothread();
1418     qemu_thread_get_self(cpu->thread);
1419
1420     cpu->thread_id = qemu_get_thread_id();
1421     cpu->created = true;
1422     cpu->halted = 0;
1423     current_cpu = cpu;
1424
1425     hax_init_vcpu(cpu);
1426     qemu_cond_signal(&qemu_cpu_cond);
1427
1428     while (1) {
1429         if (cpu_can_run(cpu)) {
1430             r = hax_smp_cpu_exec(cpu);
1431             if (r == EXCP_DEBUG) {
1432                 cpu_handle_guest_debug(cpu);
1433             }
1434         }
1435
1436         while (cpu_thread_is_idle(cpu)) {
1437             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1438         }
1439 #ifdef _WIN32
1440         SleepEx(0, TRUE);
1441 #endif
1442         qemu_wait_io_event_common(cpu);
1443     }
1444     return NULL;
1445 }
1446
1447 #ifdef _WIN32
1448 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1449 {
1450 }
1451 #endif
1452
1453 /* Multi-threaded TCG
1454  *
1455  * In the multi-threaded case each vCPU has its own thread. The TLS
1456  * variable current_cpu can be used deep in the code to find the
1457  * current CPUState for a given thread.
1458  */
1459
1460 static void *qemu_tcg_cpu_thread_fn(void *arg)
1461 {
1462     CPUState *cpu = arg;
1463
1464     g_assert(!use_icount);
1465
1466     rcu_register_thread();
1467     tcg_register_thread();
1468
1469     qemu_mutex_lock_iothread();
1470     qemu_thread_get_self(cpu->thread);
1471
1472     cpu->thread_id = qemu_get_thread_id();
1473     cpu->created = true;
1474     cpu->can_do_io = 1;
1475     current_cpu = cpu;
1476     qemu_cond_signal(&qemu_cpu_cond);
1477
1478     /* process any pending work */
1479     cpu->exit_request = 1;
1480
1481     while (1) {
1482         if (cpu_can_run(cpu)) {
1483             int r;
1484             r = tcg_cpu_exec(cpu);
1485             switch (r) {
1486             case EXCP_DEBUG:
1487                 cpu_handle_guest_debug(cpu);
1488                 break;
1489             case EXCP_HALTED:
1490                 /* during start-up the vCPU is reset and the thread is
1491                  * kicked several times. If we don't ensure we go back
1492                  * to sleep in the halted state we won't cleanly
1493                  * start-up when the vCPU is enabled.
1494                  *
1495                  * cpu->halted should ensure we sleep in wait_io_event
1496                  */
1497                 g_assert(cpu->halted);
1498                 break;
1499             case EXCP_ATOMIC:
1500                 qemu_mutex_unlock_iothread();
1501                 cpu_exec_step_atomic(cpu);
1502                 qemu_mutex_lock_iothread();
1503             default:
1504                 /* Ignore everything else? */
1505                 break;
1506             }
1507         } else if (cpu->unplug) {
1508             qemu_tcg_destroy_vcpu(cpu);
1509             cpu->created = false;
1510             qemu_cond_signal(&qemu_cpu_cond);
1511             qemu_mutex_unlock_iothread();
1512             return NULL;
1513         }
1514
1515         atomic_mb_set(&cpu->exit_request, 0);
1516         qemu_tcg_wait_io_event(cpu);
1517     }
1518
1519     return NULL;
1520 }
1521
1522 static void qemu_cpu_kick_thread(CPUState *cpu)
1523 {
1524 #ifndef _WIN32
1525     int err;
1526
1527     if (cpu->thread_kicked) {
1528         return;
1529     }
1530     cpu->thread_kicked = true;
1531     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1532     if (err) {
1533         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1534         exit(1);
1535     }
1536 #else /* _WIN32 */
1537     if (!qemu_cpu_is_self(cpu)) {
1538         if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1539             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1540                     __func__, GetLastError());
1541             exit(1);
1542         }
1543     }
1544 #endif
1545 }
1546
1547 void qemu_cpu_kick(CPUState *cpu)
1548 {
1549     qemu_cond_broadcast(cpu->halt_cond);
1550     if (tcg_enabled()) {
1551         cpu_exit(cpu);
1552         /* NOP unless doing single-thread RR */
1553         qemu_cpu_kick_rr_cpu();
1554     } else {
1555         if (hax_enabled()) {
1556             /*
1557              * FIXME: race condition with the exit_request check in
1558              * hax_vcpu_hax_exec
1559              */
1560             cpu->exit_request = 1;
1561         }
1562         qemu_cpu_kick_thread(cpu);
1563     }
1564 }
1565
1566 void qemu_cpu_kick_self(void)
1567 {
1568     assert(current_cpu);
1569     qemu_cpu_kick_thread(current_cpu);
1570 }
1571
1572 bool qemu_cpu_is_self(CPUState *cpu)
1573 {
1574     return qemu_thread_is_self(cpu->thread);
1575 }
1576
1577 bool qemu_in_vcpu_thread(void)
1578 {
1579     return current_cpu && qemu_cpu_is_self(current_cpu);
1580 }
1581
1582 static __thread bool iothread_locked = false;
1583
1584 bool qemu_mutex_iothread_locked(void)
1585 {
1586     return iothread_locked;
1587 }
1588
1589 void qemu_mutex_lock_iothread(void)
1590 {
1591     g_assert(!qemu_mutex_iothread_locked());
1592     qemu_mutex_lock(&qemu_global_mutex);
1593     iothread_locked = true;
1594 }
1595
1596 void qemu_mutex_unlock_iothread(void)
1597 {
1598     g_assert(qemu_mutex_iothread_locked());
1599     iothread_locked = false;
1600     qemu_mutex_unlock(&qemu_global_mutex);
1601 }
1602
1603 static bool all_vcpus_paused(void)
1604 {
1605     CPUState *cpu;
1606
1607     CPU_FOREACH(cpu) {
1608         if (!cpu->stopped) {
1609             return false;
1610         }
1611     }
1612
1613     return true;
1614 }
1615
1616 void pause_all_vcpus(void)
1617 {
1618     CPUState *cpu;
1619
1620     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1621     CPU_FOREACH(cpu) {
1622         if (qemu_cpu_is_self(cpu)) {
1623             qemu_cpu_stop(cpu, true);
1624         } else {
1625             cpu->stop = true;
1626             qemu_cpu_kick(cpu);
1627         }
1628     }
1629
1630     while (!all_vcpus_paused()) {
1631         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1632         CPU_FOREACH(cpu) {
1633             qemu_cpu_kick(cpu);
1634         }
1635     }
1636 }
1637
1638 void cpu_resume(CPUState *cpu)
1639 {
1640     cpu->stop = false;
1641     cpu->stopped = false;
1642     qemu_cpu_kick(cpu);
1643 }
1644
1645 void resume_all_vcpus(void)
1646 {
1647     CPUState *cpu;
1648
1649     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1650     CPU_FOREACH(cpu) {
1651         cpu_resume(cpu);
1652     }
1653 }
1654
1655 void cpu_remove(CPUState *cpu)
1656 {
1657     cpu->stop = true;
1658     cpu->unplug = true;
1659     qemu_cpu_kick(cpu);
1660 }
1661
1662 void cpu_remove_sync(CPUState *cpu)
1663 {
1664     cpu_remove(cpu);
1665     while (cpu->created) {
1666         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1667     }
1668 }
1669
1670 /* For temporary buffers for forming a name */
1671 #define VCPU_THREAD_NAME_SIZE 16
1672
1673 static void qemu_tcg_init_vcpu(CPUState *cpu)
1674 {
1675     char thread_name[VCPU_THREAD_NAME_SIZE];
1676     static QemuCond *single_tcg_halt_cond;
1677     static QemuThread *single_tcg_cpu_thread;
1678     static int tcg_region_inited;
1679
1680     /*
1681      * Initialize TCG regions--once. Now is a good time, because:
1682      * (1) TCG's init context, prologue and target globals have been set up.
1683      * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1684      *     -accel flag is processed, so the check doesn't work then).
1685      */
1686     if (!tcg_region_inited) {
1687         tcg_region_inited = 1;
1688         tcg_region_init();
1689     }
1690
1691     if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1692         cpu->thread = g_malloc0(sizeof(QemuThread));
1693         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1694         qemu_cond_init(cpu->halt_cond);
1695
1696         if (qemu_tcg_mttcg_enabled()) {
1697             /* create a thread per vCPU with TCG (MTTCG) */
1698             parallel_cpus = true;
1699             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1700                  cpu->cpu_index);
1701
1702             qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1703                                cpu, QEMU_THREAD_JOINABLE);
1704
1705         } else {
1706             /* share a single thread for all cpus with TCG */
1707             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1708             qemu_thread_create(cpu->thread, thread_name,
1709                                qemu_tcg_rr_cpu_thread_fn,
1710                                cpu, QEMU_THREAD_JOINABLE);
1711
1712             single_tcg_halt_cond = cpu->halt_cond;
1713             single_tcg_cpu_thread = cpu->thread;
1714         }
1715 #ifdef _WIN32
1716         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1717 #endif
1718         while (!cpu->created) {
1719             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1720         }
1721     } else {
1722         /* For non-MTTCG cases we share the thread */
1723         cpu->thread = single_tcg_cpu_thread;
1724         cpu->halt_cond = single_tcg_halt_cond;
1725     }
1726 }
1727
1728 static void qemu_hax_start_vcpu(CPUState *cpu)
1729 {
1730     char thread_name[VCPU_THREAD_NAME_SIZE];
1731
1732     cpu->thread = g_malloc0(sizeof(QemuThread));
1733     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1734     qemu_cond_init(cpu->halt_cond);
1735
1736     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1737              cpu->cpu_index);
1738     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1739                        cpu, QEMU_THREAD_JOINABLE);
1740 #ifdef _WIN32
1741     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1742 #endif
1743     while (!cpu->created) {
1744         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1745     }
1746 }
1747
1748 static void qemu_kvm_start_vcpu(CPUState *cpu)
1749 {
1750     char thread_name[VCPU_THREAD_NAME_SIZE];
1751
1752     cpu->thread = g_malloc0(sizeof(QemuThread));
1753     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1754     qemu_cond_init(cpu->halt_cond);
1755     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1756              cpu->cpu_index);
1757     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1758                        cpu, QEMU_THREAD_JOINABLE);
1759     while (!cpu->created) {
1760         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1761     }
1762 }
1763
1764 static void qemu_dummy_start_vcpu(CPUState *cpu)
1765 {
1766     char thread_name[VCPU_THREAD_NAME_SIZE];
1767
1768     cpu->thread = g_malloc0(sizeof(QemuThread));
1769     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1770     qemu_cond_init(cpu->halt_cond);
1771     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1772              cpu->cpu_index);
1773     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1774                        QEMU_THREAD_JOINABLE);
1775     while (!cpu->created) {
1776         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1777     }
1778 }
1779
1780 void qemu_init_vcpu(CPUState *cpu)
1781 {
1782     cpu->nr_cores = smp_cores;
1783     cpu->nr_threads = smp_threads;
1784     cpu->stopped = true;
1785
1786     if (!cpu->as) {
1787         /* If the target cpu hasn't set up any address spaces itself,
1788          * give it the default one.
1789          */
1790         cpu->num_ases = 1;
1791         cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1792     }
1793
1794     if (kvm_enabled()) {
1795         qemu_kvm_start_vcpu(cpu);
1796     } else if (hax_enabled()) {
1797         qemu_hax_start_vcpu(cpu);
1798     } else if (tcg_enabled()) {
1799         qemu_tcg_init_vcpu(cpu);
1800     } else {
1801         qemu_dummy_start_vcpu(cpu);
1802     }
1803 }
1804
1805 void cpu_stop_current(void)
1806 {
1807     if (current_cpu) {
1808         qemu_cpu_stop(current_cpu, true);
1809     }
1810 }
1811
1812 int vm_stop(RunState state)
1813 {
1814     if (qemu_in_vcpu_thread()) {
1815         qemu_system_vmstop_request_prepare();
1816         qemu_system_vmstop_request(state);
1817         /*
1818          * FIXME: should not return to device code in case
1819          * vm_stop() has been requested.
1820          */
1821         cpu_stop_current();
1822         return 0;
1823     }
1824
1825     return do_vm_stop(state);
1826 }
1827
1828 /**
1829  * Prepare for (re)starting the VM.
1830  * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1831  * running or in case of an error condition), 0 otherwise.
1832  */
1833 int vm_prepare_start(void)
1834 {
1835     RunState requested;
1836     int res = 0;
1837
1838     qemu_vmstop_requested(&requested);
1839     if (runstate_is_running() && requested == RUN_STATE__MAX) {
1840         return -1;
1841     }
1842
1843     /* Ensure that a STOP/RESUME pair of events is emitted if a
1844      * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1845      * example, according to documentation is always followed by
1846      * the STOP event.
1847      */
1848     if (runstate_is_running()) {
1849         qapi_event_send_stop(&error_abort);
1850         res = -1;
1851     } else {
1852         replay_enable_events();
1853         cpu_enable_ticks();
1854         runstate_set(RUN_STATE_RUNNING);
1855         vm_state_notify(1, RUN_STATE_RUNNING);
1856     }
1857
1858     /* We are sending this now, but the CPUs will be resumed shortly later */
1859     qapi_event_send_resume(&error_abort);
1860     return res;
1861 }
1862
1863 void vm_start(void)
1864 {
1865     if (!vm_prepare_start()) {
1866         resume_all_vcpus();
1867     }
1868 }
1869
1870 /* does a state transition even if the VM is already stopped,
1871    current state is forgotten forever */
1872 int vm_stop_force_state(RunState state)
1873 {
1874     if (runstate_is_running()) {
1875         return vm_stop(state);
1876     } else {
1877         runstate_set(state);
1878
1879         bdrv_drain_all();
1880         /* Make sure to return an error if the flush in a previous vm_stop()
1881          * failed. */
1882         return bdrv_flush_all();
1883     }
1884 }
1885
1886 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1887 {
1888     /* XXX: implement xxx_cpu_list for targets that still miss it */
1889 #if defined(cpu_list)
1890     cpu_list(f, cpu_fprintf);
1891 #endif
1892 }
1893
1894 CpuInfoList *qmp_query_cpus(Error **errp)
1895 {
1896     MachineState *ms = MACHINE(qdev_get_machine());
1897     MachineClass *mc = MACHINE_GET_CLASS(ms);
1898     CpuInfoList *head = NULL, *cur_item = NULL;
1899     CPUState *cpu;
1900
1901     CPU_FOREACH(cpu) {
1902         CpuInfoList *info;
1903 #if defined(TARGET_I386)
1904         X86CPU *x86_cpu = X86_CPU(cpu);
1905         CPUX86State *env = &x86_cpu->env;
1906 #elif defined(TARGET_PPC)
1907         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1908         CPUPPCState *env = &ppc_cpu->env;
1909 #elif defined(TARGET_SPARC)
1910         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1911         CPUSPARCState *env = &sparc_cpu->env;
1912 #elif defined(TARGET_MIPS)
1913         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1914         CPUMIPSState *env = &mips_cpu->env;
1915 #elif defined(TARGET_TRICORE)
1916         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1917         CPUTriCoreState *env = &tricore_cpu->env;
1918 #endif
1919
1920         cpu_synchronize_state(cpu);
1921
1922         info = g_malloc0(sizeof(*info));
1923         info->value = g_malloc0(sizeof(*info->value));
1924         info->value->CPU = cpu->cpu_index;
1925         info->value->current = (cpu == first_cpu);
1926         info->value->halted = cpu->halted;
1927         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1928         info->value->thread_id = cpu->thread_id;
1929 #if defined(TARGET_I386)
1930         info->value->arch = CPU_INFO_ARCH_X86;
1931         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1932 #elif defined(TARGET_PPC)
1933         info->value->arch = CPU_INFO_ARCH_PPC;
1934         info->value->u.ppc.nip = env->nip;
1935 #elif defined(TARGET_SPARC)
1936         info->value->arch = CPU_INFO_ARCH_SPARC;
1937         info->value->u.q_sparc.pc = env->pc;
1938         info->value->u.q_sparc.npc = env->npc;
1939 #elif defined(TARGET_MIPS)
1940         info->value->arch = CPU_INFO_ARCH_MIPS;
1941         info->value->u.q_mips.PC = env->active_tc.PC;
1942 #elif defined(TARGET_TRICORE)
1943         info->value->arch = CPU_INFO_ARCH_TRICORE;
1944         info->value->u.tricore.PC = env->PC;
1945 #else
1946         info->value->arch = CPU_INFO_ARCH_OTHER;
1947 #endif
1948         info->value->has_props = !!mc->cpu_index_to_instance_props;
1949         if (info->value->has_props) {
1950             CpuInstanceProperties *props;
1951             props = g_malloc0(sizeof(*props));
1952             *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1953             info->value->props = props;
1954         }
1955
1956         /* XXX: waiting for the qapi to support GSList */
1957         if (!cur_item) {
1958             head = cur_item = info;
1959         } else {
1960             cur_item->next = info;
1961             cur_item = info;
1962         }
1963     }
1964
1965     return head;
1966 }
1967
1968 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1969                  bool has_cpu, int64_t cpu_index, Error **errp)
1970 {
1971     FILE *f;
1972     uint32_t l;
1973     CPUState *cpu;
1974     uint8_t buf[1024];
1975     int64_t orig_addr = addr, orig_size = size;
1976
1977     if (!has_cpu) {
1978         cpu_index = 0;
1979     }
1980
1981     cpu = qemu_get_cpu(cpu_index);
1982     if (cpu == NULL) {
1983         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1984                    "a CPU number");
1985         return;
1986     }
1987
1988     f = fopen(filename, "wb");
1989     if (!f) {
1990         error_setg_file_open(errp, errno, filename);
1991         return;
1992     }
1993
1994     while (size != 0) {
1995         l = sizeof(buf);
1996         if (l > size)
1997             l = size;
1998         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1999             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2000                              " specified", orig_addr, orig_size);
2001             goto exit;
2002         }
2003         if (fwrite(buf, 1, l, f) != l) {
2004             error_setg(errp, QERR_IO_ERROR);
2005             goto exit;
2006         }
2007         addr += l;
2008         size -= l;
2009     }
2010
2011 exit:
2012     fclose(f);
2013 }
2014
2015 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2016                   Error **errp)
2017 {
2018     FILE *f;
2019     uint32_t l;
2020     uint8_t buf[1024];
2021
2022     f = fopen(filename, "wb");
2023     if (!f) {
2024         error_setg_file_open(errp, errno, filename);
2025         return;
2026     }
2027
2028     while (size != 0) {
2029         l = sizeof(buf);
2030         if (l > size)
2031             l = size;
2032         cpu_physical_memory_read(addr, buf, l);
2033         if (fwrite(buf, 1, l, f) != l) {
2034             error_setg(errp, QERR_IO_ERROR);
2035             goto exit;
2036         }
2037         addr += l;
2038         size -= l;
2039     }
2040
2041 exit:
2042     fclose(f);
2043 }
2044
2045 void qmp_inject_nmi(Error **errp)
2046 {
2047     nmi_monitor_handle(monitor_get_cpu_index(), errp);
2048 }
2049
2050 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2051 {
2052     if (!use_icount) {
2053         return;
2054     }
2055
2056     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
2057                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2058     if (icount_align_option) {
2059         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
2060         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
2061     } else {
2062         cpu_fprintf(f, "Max guest delay     NA\n");
2063         cpu_fprintf(f, "Max guest advance   NA\n");
2064     }
2065 }