cpus.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 /* Needed early for CONFIG_BSD etc. */
  26 #include "qemu/osdep.h"
  27 #include "qemu-common.h"
  28 #include "qemu/config-file.h"
  29 #include "cpu.h"
  30 #include "monitor/monitor.h"
  31 #include "qapi/qmp/qerror.h"
  32 #include "qemu/error-report.h"
  33 #include "sysemu/sysemu.h"
  34 #include "sysemu/block-backend.h"
  35 #include "exec/gdbstub.h"
  36 #include "sysemu/dma.h"
  37 #include "sysemu/hw_accel.h"
  38 #include "sysemu/kvm.h"
  39 #include "sysemu/hax.h"
  40 #include "sysemu/hvf.h"
  41 #include "qmp-commands.h"
  42 #include "exec/exec-all.h"
  43
  44 #include "qemu/thread.h"
  45 #include "sysemu/cpus.h"
  46 #include "sysemu/qtest.h"
  47 #include "qemu/main-loop.h"
  48 #include "qemu/bitmap.h"
  49 #include "qemu/seqlock.h"
  50 #include "tcg.h"
  51 #include "qapi-event.h"
  52 #include "hw/nmi.h"
  53 #include "sysemu/replay.h"
  54 #include "hw/boards.h"
  55
  56 #ifdef CONFIG_LINUX
  57
  58 #include <sys/prctl.h>
  59
  60 #ifndef PR_MCE_KILL
  61 #define PR_MCE_KILL 33
  62 #endif
  63
  64 #ifndef PR_MCE_KILL_SET
  65 #define PR_MCE_KILL_SET 1
  66 #endif
  67
  68 #ifndef PR_MCE_KILL_EARLY
  69 #define PR_MCE_KILL_EARLY 1
  70 #endif
  71
  72 #endif /* CONFIG_LINUX */
  73
  74 int64_t max_delay;
  75 int64_t max_advance;
  76
  77 /* vcpu throttling controls */
  78 static QEMUTimer *throttle_timer;
  79 static unsigned int throttle_percentage;
  80
  81 #define CPU_THROTTLE_PCT_MIN 1
  82 #define CPU_THROTTLE_PCT_MAX 99
  83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
  84
  85 bool cpu_is_stopped(CPUState *cpu)
  86 {
  87     return cpu->stopped || !runstate_is_running();
  88 }
  89
  90 static bool cpu_thread_is_idle(CPUState *cpu)
  91 {
  92     if (cpu->stop || cpu->queued_work_first) {
  93         return false;
  94     }
  95     if (cpu_is_stopped(cpu)) {
  96         return true;
  97     }
  98     if (!cpu->halted || cpu_has_work(cpu) ||
  99         kvm_halt_in_kernel()) {
 100         return false;
 101     }
 102     return true;
 103 }
 104
 105 static bool all_cpu_threads_idle(void)
 106 {
 107     CPUState *cpu;
 108
 109     CPU_FOREACH(cpu) {
 110         if (!cpu_thread_is_idle(cpu)) {
 111             return false;
 112         }
 113     }
 114     return true;
 115 }
 116
 117 /***********************************************************/
 118 /* guest cycle counter */
 119
 120 /* Protected by TimersState seqlock */
 121
 122 static bool icount_sleep = true;
 123 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 124 static int icount_time_shift;
 125 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 126 #define MAX_ICOUNT_SHIFT 10
 127
 128 typedef struct TimersState {
 129     /* Protected by BQL.  */
 130     int64_t cpu_ticks_prev;
 131     int64_t cpu_ticks_offset;
 132
 133     /* cpu_clock_offset can be read out of BQL, so protect it with
 134      * this lock.
 135      */
 136     QemuSeqLock vm_clock_seqlock;
 137     int64_t cpu_clock_offset;
 138     int32_t cpu_ticks_enabled;
 139     int64_t dummy;
 140
 141     /* Compensate for varying guest execution speed.  */
 142     int64_t qemu_icount_bias;
 143     /* Only written by TCG thread */
 144     int64_t qemu_icount;
 145     /* for adjusting icount */
 146     int64_t vm_clock_warp_start;
 147     QEMUTimer *icount_rt_timer;
 148     QEMUTimer *icount_vm_timer;
 149     QEMUTimer *icount_warp_timer;
 150 } TimersState;
 151
 152 static TimersState timers_state;
 153 bool mttcg_enabled;
 154
 155 /*
 156  * We default to false if we know other options have been enabled
 157  * which are currently incompatible with MTTCG. Otherwise when each
 158  * guest (target) has been updated to support:
 159  *   - atomic instructions
 160  *   - memory ordering primitives (barriers)
 161  * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 162  *
 163  * Once a guest architecture has been converted to the new primitives
 164  * there are two remaining limitations to check.
 165  *
 166  * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
 167  * - The host must have a stronger memory order than the guest
 168  *
 169  * It may be possible in future to support strong guests on weak hosts
 170  * but that will require tagging all load/stores in a guest with their
 171  * implicit memory order requirements which would likely slow things
 172  * down a lot.
 173  */
 174
 175 static bool check_tcg_memory_orders_compatible(void)
 176 {
 177 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
 178     return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
 179 #else
 180     return false;
 181 #endif
 182 }
 183
 184 static bool default_mttcg_enabled(void)
 185 {
 186     if (use_icount || TCG_OVERSIZED_GUEST) {
 187         return false;
 188     } else {
 189 #ifdef TARGET_SUPPORTS_MTTCG
 190         return check_tcg_memory_orders_compatible();
 191 #else
 192         return false;
 193 #endif
 194     }
 195 }
 196
 197 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
 198 {
 199     const char *t = qemu_opt_get(opts, "thread");
 200     if (t) {
 201         if (strcmp(t, "multi") == 0) {
 202             if (TCG_OVERSIZED_GUEST) {
 203                 error_setg(errp, "No MTTCG when guest word size > hosts");
 204             } else if (use_icount) {
 205                 error_setg(errp, "No MTTCG when icount is enabled");
 206             } else {
 207 #ifndef TARGET_SUPPORTS_MTTCG
 208                 error_report("Guest not yet converted to MTTCG - "
 209                              "you may get unexpected results");
 210 #endif
 211                 if (!check_tcg_memory_orders_compatible()) {
 212                     error_report("Guest expects a stronger memory ordering "
 213                                  "than the host provides");
 214                     error_printf("This may cause strange/hard to debug errors\n");
 215                 }
 216                 mttcg_enabled = true;
 217             }
 218         } else if (strcmp(t, "single") == 0) {
 219             mttcg_enabled = false;
 220         } else {
 221             error_setg(errp, "Invalid 'thread' setting %s", t);
 222         }
 223     } else {
 224         mttcg_enabled = default_mttcg_enabled();
 225     }
 226 }
 227
 228 /* The current number of executed instructions is based on what we
 229  * originally budgeted minus the current state of the decrementing
 230  * icount counters in extra/u16.low.
 231  */
 232 static int64_t cpu_get_icount_executed(CPUState *cpu)
 233 {
 234     return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
 235 }
 236
 237 /*
 238  * Update the global shared timer_state.qemu_icount to take into
 239  * account executed instructions. This is done by the TCG vCPU
 240  * thread so the main-loop can see time has moved forward.
 241  */
 242 void cpu_update_icount(CPUState *cpu)
 243 {
 244     int64_t executed = cpu_get_icount_executed(cpu);
 245     cpu->icount_budget -= executed;
 246
 247 #ifdef CONFIG_ATOMIC64
 248     atomic_set__nocheck(&timers_state.qemu_icount,
 249                         atomic_read__nocheck(&timers_state.qemu_icount) +
 250                         executed);
 251 #else /* FIXME: we need 64bit atomics to do this safely */
 252     timers_state.qemu_icount += executed;
 253 #endif
 254 }
 255
 256 int64_t cpu_get_icount_raw(void)
 257 {
 258     CPUState *cpu = current_cpu;
 259
 260     if (cpu && cpu->running) {
 261         if (!cpu->can_do_io) {
 262             fprintf(stderr, "Bad icount read\n");
 263             exit(1);
 264         }
 265         /* Take into account what has run */
 266         cpu_update_icount(cpu);
 267     }
 268 #ifdef CONFIG_ATOMIC64
 269     return atomic_read__nocheck(&timers_state.qemu_icount);
 270 #else /* FIXME: we need 64bit atomics to do this safely */
 271     return timers_state.qemu_icount;
 272 #endif
 273 }
 274
 275 /* Return the virtual CPU time, based on the instruction counter.  */
 276 static int64_t cpu_get_icount_locked(void)
 277 {
 278     int64_t icount = cpu_get_icount_raw();
 279     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 280 }
 281
 282 int64_t cpu_get_icount(void)
 283 {
 284     int64_t icount;
 285     unsigned start;
 286
 287     do {
 288         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 289         icount = cpu_get_icount_locked();
 290     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 291
 292     return icount;
 293 }
 294
 295 int64_t cpu_icount_to_ns(int64_t icount)
 296 {
 297     return icount << icount_time_shift;
 298 }
 299
 300 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
 301  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 302  * counter.
 303  *
 304  * Caller must hold the BQL
 305  */
 306 int64_t cpu_get_ticks(void)
 307 {
 308     int64_t ticks;
 309
 310     if (use_icount) {
 311         return cpu_get_icount();
 312     }
 313
 314     ticks = timers_state.cpu_ticks_offset;
 315     if (timers_state.cpu_ticks_enabled) {
 316         ticks += cpu_get_host_ticks();
 317     }
 318
 319     if (timers_state.cpu_ticks_prev > ticks) {
 320         /* Note: non increasing ticks may happen if the host uses
 321            software suspend */
 322         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 323         ticks = timers_state.cpu_ticks_prev;
 324     }
 325
 326     timers_state.cpu_ticks_prev = ticks;
 327     return ticks;
 328 }
 329
 330 static int64_t cpu_get_clock_locked(void)
 331 {
 332     int64_t time;
 333
 334     time = timers_state.cpu_clock_offset;
 335     if (timers_state.cpu_ticks_enabled) {
 336         time += get_clock();
 337     }
 338
 339     return time;
 340 }
 341
 342 /* Return the monotonic time elapsed in VM, i.e.,
 343  * the time between vm_start and vm_stop
 344  */
 345 int64_t cpu_get_clock(void)
 346 {
 347     int64_t ti;
 348     unsigned start;
 349
 350     do {
 351         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 352         ti = cpu_get_clock_locked();
 353     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 354
 355     return ti;
 356 }
 357
 358 /* enable cpu_get_ticks()
 359  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 360  */
 361 void cpu_enable_ticks(void)
 362 {
 363     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 364     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 365     if (!timers_state.cpu_ticks_enabled) {
 366         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 367         timers_state.cpu_clock_offset -= get_clock();
 368         timers_state.cpu_ticks_enabled = 1;
 369     }
 370     seqlock_write_end(&timers_state.vm_clock_seqlock);
 371 }
 372
 373 /* disable cpu_get_ticks() : the clock is stopped. You must not call
 374  * cpu_get_ticks() after that.
 375  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 376  */
 377 void cpu_disable_ticks(void)
 378 {
 379     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 380     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 381     if (timers_state.cpu_ticks_enabled) {
 382         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 383         timers_state.cpu_clock_offset = cpu_get_clock_locked();
 384         timers_state.cpu_ticks_enabled = 0;
 385     }
 386     seqlock_write_end(&timers_state.vm_clock_seqlock);
 387 }
 388
 389 /* Correlation between real and virtual time is always going to be
 390    fairly approximate, so ignore small variation.
 391    When the guest is idle real and virtual time will be aligned in
 392    the IO wait loop.  */
 393 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 394
 395 static void icount_adjust(void)
 396 {
 397     int64_t cur_time;
 398     int64_t cur_icount;
 399     int64_t delta;
 400
 401     /* Protected by TimersState mutex.  */
 402     static int64_t last_delta;
 403
 404     /* If the VM is not running, then do nothing.  */
 405     if (!runstate_is_running()) {
 406         return;
 407     }
 408
 409     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 410     cur_time = cpu_get_clock_locked();
 411     cur_icount = cpu_get_icount_locked();
 412
 413     delta = cur_icount - cur_time;
 414     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 415     if (delta > 0
 416         && last_delta + ICOUNT_WOBBLE < delta * 2
 417         && icount_time_shift > 0) {
 418         /* The guest is getting too far ahead.  Slow time down.  */
 419         icount_time_shift--;
 420     }
 421     if (delta < 0
 422         && last_delta - ICOUNT_WOBBLE > delta * 2
 423         && icount_time_shift < MAX_ICOUNT_SHIFT) {
 424         /* The guest is getting too far behind.  Speed time up.  */
 425         icount_time_shift++;
 426     }
 427     last_delta = delta;
 428     timers_state.qemu_icount_bias = cur_icount
 429                               - (timers_state.qemu_icount << icount_time_shift);
 430     seqlock_write_end(&timers_state.vm_clock_seqlock);
 431 }
 432
 433 static void icount_adjust_rt(void *opaque)
 434 {
 435     timer_mod(timers_state.icount_rt_timer,
 436               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 437     icount_adjust();
 438 }
 439
 440 static void icount_adjust_vm(void *opaque)
 441 {
 442     timer_mod(timers_state.icount_vm_timer,
 443                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 444                    NANOSECONDS_PER_SECOND / 10);
 445     icount_adjust();
 446 }
 447
 448 static int64_t qemu_icount_round(int64_t count)
 449 {
 450     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 451 }
 452
 453 static void icount_warp_rt(void)
 454 {
 455     unsigned seq;
 456     int64_t warp_start;
 457
 458     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 459      * changes from -1 to another value, so the race here is okay.
 460      */
 461     do {
 462         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 463         warp_start = timers_state.vm_clock_warp_start;
 464     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 465
 466     if (warp_start == -1) {
 467         return;
 468     }
 469
 470     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 471     if (runstate_is_running()) {
 472         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 473                                      cpu_get_clock_locked());
 474         int64_t warp_delta;
 475
 476         warp_delta = clock - timers_state.vm_clock_warp_start;
 477         if (use_icount == 2) {
 478             /*
 479              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 480              * far ahead of real time.
 481              */
 482             int64_t cur_icount = cpu_get_icount_locked();
 483             int64_t delta = clock - cur_icount;
 484             warp_delta = MIN(warp_delta, delta);
 485         }
 486         timers_state.qemu_icount_bias += warp_delta;
 487     }
 488     timers_state.vm_clock_warp_start = -1;
 489     seqlock_write_end(&timers_state.vm_clock_seqlock);
 490
 491     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 492         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 493     }
 494 }
 495
 496 static void icount_timer_cb(void *opaque)
 497 {
 498     /* No need for a checkpoint because the timer already synchronizes
 499      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 500      */
 501     icount_warp_rt();
 502 }
 503
 504 void qtest_clock_warp(int64_t dest)
 505 {
 506     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 507     AioContext *aio_context;
 508     assert(qtest_enabled());
 509     aio_context = qemu_get_aio_context();
 510     while (clock < dest) {
 511         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 512         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 513
 514         seqlock_write_begin(&timers_state.vm_clock_seqlock);
 515         timers_state.qemu_icount_bias += warp;
 516         seqlock_write_end(&timers_state.vm_clock_seqlock);
 517
 518         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 519         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 520         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 521     }
 522     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 523 }
 524
 525 void qemu_start_warp_timer(void)
 526 {
 527     int64_t clock;
 528     int64_t deadline;
 529
 530     if (!use_icount) {
 531         return;
 532     }
 533
 534     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 535      * do not fire, so computing the deadline does not make sense.
 536      */
 537     if (!runstate_is_running()) {
 538         return;
 539     }
 540
 541     /* warp clock deterministically in record/replay mode */
 542     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 543         return;
 544     }
 545
 546     if (!all_cpu_threads_idle()) {
 547         return;
 548     }
 549
 550     if (qtest_enabled()) {
 551         /* When testing, qtest commands advance icount.  */
 552         return;
 553     }
 554
 555     /* We want to use the earliest deadline from ALL vm_clocks */
 556     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 557     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 558     if (deadline < 0) {
 559         static bool notified;
 560         if (!icount_sleep && !notified) {
 561             warn_report("icount sleep disabled and no active timers");
 562             notified = true;
 563         }
 564         return;
 565     }
 566
 567     if (deadline > 0) {
 568         /*
 569          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 570          * sleep.  Otherwise, the CPU might be waiting for a future timer
 571          * interrupt to wake it up, but the interrupt never comes because
 572          * the vCPU isn't running any insns and thus doesn't advance the
 573          * QEMU_CLOCK_VIRTUAL.
 574          */
 575         if (!icount_sleep) {
 576             /*
 577              * We never let VCPUs sleep in no sleep icount mode.
 578              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 579              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 580              * It is useful when we want a deterministic execution time,
 581              * isolated from host latencies.
 582              */
 583             seqlock_write_begin(&timers_state.vm_clock_seqlock);
 584             timers_state.qemu_icount_bias += deadline;
 585             seqlock_write_end(&timers_state.vm_clock_seqlock);
 586             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 587         } else {
 588             /*
 589              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 590              * "real" time, (related to the time left until the next event) has
 591              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 592              * This avoids that the warps are visible externally; for example,
 593              * you will not be sending network packets continuously instead of
 594              * every 100ms.
 595              */
 596             seqlock_write_begin(&timers_state.vm_clock_seqlock);
 597             if (timers_state.vm_clock_warp_start == -1
 598                 || timers_state.vm_clock_warp_start > clock) {
 599                 timers_state.vm_clock_warp_start = clock;
 600             }
 601             seqlock_write_end(&timers_state.vm_clock_seqlock);
 602             timer_mod_anticipate(timers_state.icount_warp_timer,
 603                                  clock + deadline);
 604         }
 605     } else if (deadline == 0) {
 606         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 607     }
 608 }
 609
 610 static void qemu_account_warp_timer(void)
 611 {
 612     if (!use_icount || !icount_sleep) {
 613         return;
 614     }
 615
 616     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 617      * do not fire, so computing the deadline does not make sense.
 618      */
 619     if (!runstate_is_running()) {
 620         return;
 621     }
 622
 623     /* warp clock deterministically in record/replay mode */
 624     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 625         return;
 626     }
 627
 628     timer_del(timers_state.icount_warp_timer);
 629     icount_warp_rt();
 630 }
 631
 632 static bool icount_state_needed(void *opaque)
 633 {
 634     return use_icount;
 635 }
 636
 637 static bool warp_timer_state_needed(void *opaque)
 638 {
 639     TimersState *s = opaque;
 640     return s->icount_warp_timer != NULL;
 641 }
 642
 643 static bool adjust_timers_state_needed(void *opaque)
 644 {
 645     TimersState *s = opaque;
 646     return s->icount_rt_timer != NULL;
 647 }
 648
 649 /*
 650  * Subsection for warp timer migration is optional, because may not be created
 651  */
 652 static const VMStateDescription icount_vmstate_warp_timer = {
 653     .name = "timer/icount/warp_timer",
 654     .version_id = 1,
 655     .minimum_version_id = 1,
 656     .needed = warp_timer_state_needed,
 657     .fields = (VMStateField[]) {
 658         VMSTATE_INT64(vm_clock_warp_start, TimersState),
 659         VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
 660         VMSTATE_END_OF_LIST()
 661     }
 662 };
 663
 664 static const VMStateDescription icount_vmstate_adjust_timers = {
 665     .name = "timer/icount/timers",
 666     .version_id = 1,
 667     .minimum_version_id = 1,
 668     .needed = adjust_timers_state_needed,
 669     .fields = (VMStateField[]) {
 670         VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
 671         VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
 672         VMSTATE_END_OF_LIST()
 673     }
 674 };
 675
 676 /*
 677  * This is a subsection for icount migration.
 678  */
 679 static const VMStateDescription icount_vmstate_timers = {
 680     .name = "timer/icount",
 681     .version_id = 1,
 682     .minimum_version_id = 1,
 683     .needed = icount_state_needed,
 684     .fields = (VMStateField[]) {
 685         VMSTATE_INT64(qemu_icount_bias, TimersState),
 686         VMSTATE_INT64(qemu_icount, TimersState),
 687         VMSTATE_END_OF_LIST()
 688     },
 689     .subsections = (const VMStateDescription*[]) {
 690         &icount_vmstate_warp_timer,
 691         &icount_vmstate_adjust_timers,
 692         NULL
 693     }
 694 };
 695
 696 static const VMStateDescription vmstate_timers = {
 697     .name = "timer",
 698     .version_id = 2,
 699     .minimum_version_id = 1,
 700     .fields = (VMStateField[]) {
 701         VMSTATE_INT64(cpu_ticks_offset, TimersState),
 702         VMSTATE_INT64(dummy, TimersState),
 703         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 704         VMSTATE_END_OF_LIST()
 705     },
 706     .subsections = (const VMStateDescription*[]) {
 707         &icount_vmstate_timers,
 708         NULL
 709     }
 710 };
 711
 712 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 713 {
 714     double pct;
 715     double throttle_ratio;
 716     long sleeptime_ns;
 717
 718     if (!cpu_throttle_get_percentage()) {
 719         return;
 720     }
 721
 722     pct = (double)cpu_throttle_get_percentage()/100;
 723     throttle_ratio = pct / (1 - pct);
 724     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 725
 726     qemu_mutex_unlock_iothread();
 727     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 728     qemu_mutex_lock_iothread();
 729     atomic_set(&cpu->throttle_thread_scheduled, 0);
 730 }
 731
 732 static void cpu_throttle_timer_tick(void *opaque)
 733 {
 734     CPUState *cpu;
 735     double pct;
 736
 737     /* Stop the timer if needed */
 738     if (!cpu_throttle_get_percentage()) {
 739         return;
 740     }
 741     CPU_FOREACH(cpu) {
 742         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 743             async_run_on_cpu(cpu, cpu_throttle_thread,
 744                              RUN_ON_CPU_NULL);
 745         }
 746     }
 747
 748     pct = (double)cpu_throttle_get_percentage()/100;
 749     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 750                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 751 }
 752
 753 void cpu_throttle_set(int new_throttle_pct)
 754 {
 755     /* Ensure throttle percentage is within valid range */
 756     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 757     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 758
 759     atomic_set(&throttle_percentage, new_throttle_pct);
 760
 761     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 762                                        CPU_THROTTLE_TIMESLICE_NS);
 763 }
 764
 765 void cpu_throttle_stop(void)
 766 {
 767     atomic_set(&throttle_percentage, 0);
 768 }
 769
 770 bool cpu_throttle_active(void)
 771 {
 772     return (cpu_throttle_get_percentage() != 0);
 773 }
 774
 775 int cpu_throttle_get_percentage(void)
 776 {
 777     return atomic_read(&throttle_percentage);
 778 }
 779
 780 void cpu_ticks_init(void)
 781 {
 782     seqlock_init(&timers_state.vm_clock_seqlock);
 783     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 784     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 785                                            cpu_throttle_timer_tick, NULL);
 786 }
 787
 788 void configure_icount(QemuOpts *opts, Error **errp)
 789 {
 790     const char *option;
 791     char *rem_str = NULL;
 792
 793     option = qemu_opt_get(opts, "shift");
 794     if (!option) {
 795         if (qemu_opt_get(opts, "align") != NULL) {
 796             error_setg(errp, "Please specify shift option when using align");
 797         }
 798         return;
 799     }
 800
 801     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 802     if (icount_sleep) {
 803         timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 804                                          icount_timer_cb, NULL);
 805     }
 806
 807     icount_align_option = qemu_opt_get_bool(opts, "align", false);
 808
 809     if (icount_align_option && !icount_sleep) {
 810         error_setg(errp, "align=on and sleep=off are incompatible");
 811     }
 812     if (strcmp(option, "auto") != 0) {
 813         errno = 0;
 814         icount_time_shift = strtol(option, &rem_str, 0);
 815         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 816             error_setg(errp, "icount: Invalid shift value");
 817         }
 818         use_icount = 1;
 819         return;
 820     } else if (icount_align_option) {
 821         error_setg(errp, "shift=auto and align=on are incompatible");
 822     } else if (!icount_sleep) {
 823         error_setg(errp, "shift=auto and sleep=off are incompatible");
 824     }
 825
 826     use_icount = 2;
 827
 828     /* 125MIPS seems a reasonable initial guess at the guest speed.
 829        It will be corrected fairly quickly anyway.  */
 830     icount_time_shift = 3;
 831
 832     /* Have both realtime and virtual time triggers for speed adjustment.
 833        The realtime trigger catches emulated time passing too slowly,
 834        the virtual time trigger catches emulated time passing too fast.
 835        Realtime triggers occur even when idle, so use them less frequently
 836        than VM triggers.  */
 837     timers_state.vm_clock_warp_start = -1;
 838     timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 839                                    icount_adjust_rt, NULL);
 840     timer_mod(timers_state.icount_rt_timer,
 841                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 842     timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 843                                         icount_adjust_vm, NULL);
 844     timer_mod(timers_state.icount_vm_timer,
 845                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 846                    NANOSECONDS_PER_SECOND / 10);
 847 }
 848
 849 /***********************************************************/
 850 /* TCG vCPU kick timer
 851  *
 852  * The kick timer is responsible for moving single threaded vCPU
 853  * emulation on to the next vCPU. If more than one vCPU is running a
 854  * timer event with force a cpu->exit so the next vCPU can get
 855  * scheduled.
 856  *
 857  * The timer is removed if all vCPUs are idle and restarted again once
 858  * idleness is complete.
 859  */
 860
 861 static QEMUTimer *tcg_kick_vcpu_timer;
 862 static CPUState *tcg_current_rr_cpu;
 863
 864 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 865
 866 static inline int64_t qemu_tcg_next_kick(void)
 867 {
 868     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 869 }
 870
 871 /* Kick the currently round-robin scheduled vCPU */
 872 static void qemu_cpu_kick_rr_cpu(void)
 873 {
 874     CPUState *cpu;
 875     do {
 876         cpu = atomic_mb_read(&tcg_current_rr_cpu);
 877         if (cpu) {
 878             cpu_exit(cpu);
 879         }
 880     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 881 }
 882
 883 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
 884 {
 885 }
 886
 887 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 888 {
 889     if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
 890         qemu_notify_event();
 891         return;
 892     }
 893
 894     if (!qemu_in_vcpu_thread() && first_cpu) {
 895         /* qemu_cpu_kick is not enough to kick a halted CPU out of
 896          * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
 897          * causes cpu_thread_is_idle to return false.  This way,
 898          * handle_icount_deadline can run.
 899          */
 900         async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
 901     }
 902 }
 903
 904 static void kick_tcg_thread(void *opaque)
 905 {
 906     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 907     qemu_cpu_kick_rr_cpu();
 908 }
 909
 910 static void start_tcg_kick_timer(void)
 911 {
 912     if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 913         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 914                                            kick_tcg_thread, NULL);
 915         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 916     }
 917 }
 918
 919 static void stop_tcg_kick_timer(void)
 920 {
 921     if (tcg_kick_vcpu_timer) {
 922         timer_del(tcg_kick_vcpu_timer);
 923         tcg_kick_vcpu_timer = NULL;
 924     }
 925 }
 926
 927 /***********************************************************/
 928 void hw_error(const char *fmt, ...)
 929 {
 930     va_list ap;
 931     CPUState *cpu;
 932
 933     va_start(ap, fmt);
 934     fprintf(stderr, "qemu: hardware error: ");
 935     vfprintf(stderr, fmt, ap);
 936     fprintf(stderr, "\n");
 937     CPU_FOREACH(cpu) {
 938         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 939         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 940     }
 941     va_end(ap);
 942     abort();
 943 }
 944
 945 void cpu_synchronize_all_states(void)
 946 {
 947     CPUState *cpu;
 948
 949     CPU_FOREACH(cpu) {
 950         cpu_synchronize_state(cpu);
 951         /* TODO: move to cpu_synchronize_state() */
 952         if (hvf_enabled()) {
 953             hvf_cpu_synchronize_state(cpu);
 954         }
 955     }
 956 }
 957
 958 void cpu_synchronize_all_post_reset(void)
 959 {
 960     CPUState *cpu;
 961
 962     CPU_FOREACH(cpu) {
 963         cpu_synchronize_post_reset(cpu);
 964         /* TODO: move to cpu_synchronize_post_reset() */
 965         if (hvf_enabled()) {
 966             hvf_cpu_synchronize_post_reset(cpu);
 967         }
 968     }
 969 }
 970
 971 void cpu_synchronize_all_post_init(void)
 972 {
 973     CPUState *cpu;
 974
 975     CPU_FOREACH(cpu) {
 976         cpu_synchronize_post_init(cpu);
 977         /* TODO: move to cpu_synchronize_post_init() */
 978         if (hvf_enabled()) {
 979             hvf_cpu_synchronize_post_init(cpu);
 980         }
 981     }
 982 }
 983
 984 void cpu_synchronize_all_pre_loadvm(void)
 985 {
 986     CPUState *cpu;
 987
 988     CPU_FOREACH(cpu) {
 989         cpu_synchronize_pre_loadvm(cpu);
 990     }
 991 }
 992
 993 static int do_vm_stop(RunState state)
 994 {
 995     int ret = 0;
 996
 997     if (runstate_is_running()) {
 998         cpu_disable_ticks();
 999         pause_all_vcpus();
1000         runstate_set(state);
1001         vm_state_notify(0, state);
1002         qapi_event_send_stop(&error_abort);
1003     }
1004
1005     bdrv_drain_all();
1006     replay_disable_events();
1007     ret = bdrv_flush_all();
1008
1009     return ret;
1010 }
1011
1012 static bool cpu_can_run(CPUState *cpu)
1013 {
1014     if (cpu->stop) {
1015         return false;
1016     }
1017     if (cpu_is_stopped(cpu)) {
1018         return false;
1019     }
1020     return true;
1021 }
1022
1023 static void cpu_handle_guest_debug(CPUState *cpu)
1024 {
1025     gdb_set_stop_cpu(cpu);
1026     qemu_system_debug_request();
1027     cpu->stopped = true;
1028 }
1029
1030 #ifdef CONFIG_LINUX
1031 static void sigbus_reraise(void)
1032 {
1033     sigset_t set;
1034     struct sigaction action;
1035
1036     memset(&action, 0, sizeof(action));
1037     action.sa_handler = SIG_DFL;
1038     if (!sigaction(SIGBUS, &action, NULL)) {
1039         raise(SIGBUS);
1040         sigemptyset(&set);
1041         sigaddset(&set, SIGBUS);
1042         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1043     }
1044     perror("Failed to re-raise SIGBUS!\n");
1045     abort();
1046 }
1047
1048 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1049 {
1050     if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1051         sigbus_reraise();
1052     }
1053
1054     if (current_cpu) {
1055         /* Called asynchronously in VCPU thread.  */
1056         if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1057             sigbus_reraise();
1058         }
1059     } else {
1060         /* Called synchronously (via signalfd) in main thread.  */
1061         if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1062             sigbus_reraise();
1063         }
1064     }
1065 }
1066
1067 static void qemu_init_sigbus(void)
1068 {
1069     struct sigaction action;
1070
1071     memset(&action, 0, sizeof(action));
1072     action.sa_flags = SA_SIGINFO;
1073     action.sa_sigaction = sigbus_handler;
1074     sigaction(SIGBUS, &action, NULL);
1075
1076     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1077 }
1078 #else /* !CONFIG_LINUX */
1079 static void qemu_init_sigbus(void)
1080 {
1081 }
1082 #endif /* !CONFIG_LINUX */
1083
1084 static QemuMutex qemu_global_mutex;
1085
1086 static QemuThread io_thread;
1087
1088 /* cpu creation */
1089 static QemuCond qemu_cpu_cond;
1090 /* system init */
1091 static QemuCond qemu_pause_cond;
1092
1093 void qemu_init_cpu_loop(void)
1094 {
1095     qemu_init_sigbus();
1096     qemu_cond_init(&qemu_cpu_cond);
1097     qemu_cond_init(&qemu_pause_cond);
1098     qemu_mutex_init(&qemu_global_mutex);
1099
1100     qemu_thread_get_self(&io_thread);
1101 }
1102
1103 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1104 {
1105     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1106 }
1107
1108 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1109 {
1110     if (kvm_destroy_vcpu(cpu) < 0) {
1111         error_report("kvm_destroy_vcpu failed");
1112         exit(EXIT_FAILURE);
1113     }
1114 }
1115
1116 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1117 {
1118 }
1119
1120 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1121 {
1122     g_assert(qemu_cpu_is_self(cpu));
1123     cpu->stop = false;
1124     cpu->stopped = true;
1125     if (exit) {
1126         cpu_exit(cpu);
1127     }
1128     qemu_cond_broadcast(&qemu_pause_cond);
1129 }
1130
1131 static void qemu_wait_io_event_common(CPUState *cpu)
1132 {
1133     atomic_mb_set(&cpu->thread_kicked, false);
1134     if (cpu->stop) {
1135         qemu_cpu_stop(cpu, false);
1136     }
1137     process_queued_cpu_work(cpu);
1138 }
1139
1140 static bool qemu_tcg_should_sleep(CPUState *cpu)
1141 {
1142     if (mttcg_enabled) {
1143         return cpu_thread_is_idle(cpu);
1144     } else {
1145         return all_cpu_threads_idle();
1146     }
1147 }
1148
1149 static void qemu_tcg_wait_io_event(CPUState *cpu)
1150 {
1151     while (qemu_tcg_should_sleep(cpu)) {
1152         stop_tcg_kick_timer();
1153         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1154     }
1155
1156     start_tcg_kick_timer();
1157
1158     qemu_wait_io_event_common(cpu);
1159 }
1160
1161 static void qemu_kvm_wait_io_event(CPUState *cpu)
1162 {
1163     while (cpu_thread_is_idle(cpu)) {
1164         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1165     }
1166
1167     qemu_wait_io_event_common(cpu);
1168 }
1169
1170 static void qemu_hvf_wait_io_event(CPUState *cpu)
1171 {
1172     while (cpu_thread_is_idle(cpu)) {
1173         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1174     }
1175     qemu_wait_io_event_common(cpu);
1176 }
1177
1178 static void *qemu_kvm_cpu_thread_fn(void *arg)
1179 {
1180     CPUState *cpu = arg;
1181     int r;
1182
1183     rcu_register_thread();
1184
1185     qemu_mutex_lock_iothread();
1186     qemu_thread_get_self(cpu->thread);
1187     cpu->thread_id = qemu_get_thread_id();
1188     cpu->can_do_io = 1;
1189     current_cpu = cpu;
1190
1191     r = kvm_init_vcpu(cpu);
1192     if (r < 0) {
1193         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1194         exit(1);
1195     }
1196
1197     kvm_init_cpu_signals(cpu);
1198
1199     /* signal CPU creation */
1200     cpu->created = true;
1201     qemu_cond_signal(&qemu_cpu_cond);
1202
1203     do {
1204         if (cpu_can_run(cpu)) {
1205             r = kvm_cpu_exec(cpu);
1206             if (r == EXCP_DEBUG) {
1207                 cpu_handle_guest_debug(cpu);
1208             }
1209         }
1210         qemu_kvm_wait_io_event(cpu);
1211     } while (!cpu->unplug || cpu_can_run(cpu));
1212
1213     qemu_kvm_destroy_vcpu(cpu);
1214     cpu->created = false;
1215     qemu_cond_signal(&qemu_cpu_cond);
1216     qemu_mutex_unlock_iothread();
1217     return NULL;
1218 }
1219
1220 static void *qemu_dummy_cpu_thread_fn(void *arg)
1221 {
1222 #ifdef _WIN32
1223     fprintf(stderr, "qtest is not supported under Windows\n");
1224     exit(1);
1225 #else
1226     CPUState *cpu = arg;
1227     sigset_t waitset;
1228     int r;
1229
1230     rcu_register_thread();
1231
1232     qemu_mutex_lock_iothread();
1233     qemu_thread_get_self(cpu->thread);
1234     cpu->thread_id = qemu_get_thread_id();
1235     cpu->can_do_io = 1;
1236     current_cpu = cpu;
1237
1238     sigemptyset(&waitset);
1239     sigaddset(&waitset, SIG_IPI);
1240
1241     /* signal CPU creation */
1242     cpu->created = true;
1243     qemu_cond_signal(&qemu_cpu_cond);
1244
1245     while (1) {
1246         qemu_mutex_unlock_iothread();
1247         do {
1248             int sig;
1249             r = sigwait(&waitset, &sig);
1250         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1251         if (r == -1) {
1252             perror("sigwait");
1253             exit(1);
1254         }
1255         qemu_mutex_lock_iothread();
1256         qemu_wait_io_event_common(cpu);
1257     }
1258
1259     return NULL;
1260 #endif
1261 }
1262
1263 static int64_t tcg_get_icount_limit(void)
1264 {
1265     int64_t deadline;
1266
1267     if (replay_mode != REPLAY_MODE_PLAY) {
1268         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1269
1270         /* Maintain prior (possibly buggy) behaviour where if no deadline
1271          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1272          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1273          * nanoseconds.
1274          */
1275         if ((deadline < 0) || (deadline > INT32_MAX)) {
1276             deadline = INT32_MAX;
1277         }
1278
1279         return qemu_icount_round(deadline);
1280     } else {
1281         return replay_get_instructions();
1282     }
1283 }
1284
1285 static void handle_icount_deadline(void)
1286 {
1287     assert(qemu_in_vcpu_thread());
1288     if (use_icount) {
1289         int64_t deadline =
1290             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1291
1292         if (deadline == 0) {
1293             /* Wake up other AioContexts.  */
1294             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1295             qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1296         }
1297     }
1298 }
1299
1300 static void prepare_icount_for_run(CPUState *cpu)
1301 {
1302     if (use_icount) {
1303         int insns_left;
1304
1305         /* These should always be cleared by process_icount_data after
1306          * each vCPU execution. However u16.high can be raised
1307          * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1308          */
1309         g_assert(cpu->icount_decr.u16.low == 0);
1310         g_assert(cpu->icount_extra == 0);
1311
1312         cpu->icount_budget = tcg_get_icount_limit();
1313         insns_left = MIN(0xffff, cpu->icount_budget);
1314         cpu->icount_decr.u16.low = insns_left;
1315         cpu->icount_extra = cpu->icount_budget - insns_left;
1316     }
1317 }
1318
1319 static void process_icount_data(CPUState *cpu)
1320 {
1321     if (use_icount) {
1322         /* Account for executed instructions */
1323         cpu_update_icount(cpu);
1324
1325         /* Reset the counters */
1326         cpu->icount_decr.u16.low = 0;
1327         cpu->icount_extra = 0;
1328         cpu->icount_budget = 0;
1329
1330         replay_account_executed_instructions();
1331     }
1332 }
1333
1334
1335 static int tcg_cpu_exec(CPUState *cpu)
1336 {
1337     int ret;
1338 #ifdef CONFIG_PROFILER
1339     int64_t ti;
1340 #endif
1341
1342 #ifdef CONFIG_PROFILER
1343     ti = profile_getclock();
1344 #endif
1345     qemu_mutex_unlock_iothread();
1346     cpu_exec_start(cpu);
1347     ret = cpu_exec(cpu);
1348     cpu_exec_end(cpu);
1349     qemu_mutex_lock_iothread();
1350 #ifdef CONFIG_PROFILER
1351     tcg_time += profile_getclock() - ti;
1352 #endif
1353     return ret;
1354 }
1355
1356 /* Destroy any remaining vCPUs which have been unplugged and have
1357  * finished running
1358  */
1359 static void deal_with_unplugged_cpus(void)
1360 {
1361     CPUState *cpu;
1362
1363     CPU_FOREACH(cpu) {
1364         if (cpu->unplug && !cpu_can_run(cpu)) {
1365             qemu_tcg_destroy_vcpu(cpu);
1366             cpu->created = false;
1367             qemu_cond_signal(&qemu_cpu_cond);
1368             break;
1369         }
1370     }
1371 }
1372
1373 /* Single-threaded TCG
1374  *
1375  * In the single-threaded case each vCPU is simulated in turn. If
1376  * there is more than a single vCPU we create a simple timer to kick
1377  * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1378  * This is done explicitly rather than relying on side-effects
1379  * elsewhere.
1380  */
1381
1382 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1383 {
1384     CPUState *cpu = arg;
1385
1386     rcu_register_thread();
1387     tcg_register_thread();
1388
1389     qemu_mutex_lock_iothread();
1390     qemu_thread_get_self(cpu->thread);
1391
1392     CPU_FOREACH(cpu) {
1393         cpu->thread_id = qemu_get_thread_id();
1394         cpu->created = true;
1395         cpu->can_do_io = 1;
1396     }
1397     qemu_cond_signal(&qemu_cpu_cond);
1398
1399     /* wait for initial kick-off after machine start */
1400     while (first_cpu->stopped) {
1401         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1402
1403         /* process any pending work */
1404         CPU_FOREACH(cpu) {
1405             current_cpu = cpu;
1406             qemu_wait_io_event_common(cpu);
1407         }
1408     }
1409
1410     start_tcg_kick_timer();
1411
1412     cpu = first_cpu;
1413
1414     /* process any pending work */
1415     cpu->exit_request = 1;
1416
1417     while (1) {
1418         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1419         qemu_account_warp_timer();
1420
1421         /* Run the timers here.  This is much more efficient than
1422          * waking up the I/O thread and waiting for completion.
1423          */
1424         handle_icount_deadline();
1425
1426         if (!cpu) {
1427             cpu = first_cpu;
1428         }
1429
1430         while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1431
1432             atomic_mb_set(&tcg_current_rr_cpu, cpu);
1433             current_cpu = cpu;
1434
1435             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1436                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1437
1438             if (cpu_can_run(cpu)) {
1439                 int r;
1440
1441                 prepare_icount_for_run(cpu);
1442
1443                 r = tcg_cpu_exec(cpu);
1444
1445                 process_icount_data(cpu);
1446
1447                 if (r == EXCP_DEBUG) {
1448                     cpu_handle_guest_debug(cpu);
1449                     break;
1450                 } else if (r == EXCP_ATOMIC) {
1451                     qemu_mutex_unlock_iothread();
1452                     cpu_exec_step_atomic(cpu);
1453                     qemu_mutex_lock_iothread();
1454                     break;
1455                 }
1456             } else if (cpu->stop) {
1457                 if (cpu->unplug) {
1458                     cpu = CPU_NEXT(cpu);
1459                 }
1460                 break;
1461             }
1462
1463             cpu = CPU_NEXT(cpu);
1464         } /* while (cpu && !cpu->exit_request).. */
1465
1466         /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1467         atomic_set(&tcg_current_rr_cpu, NULL);
1468
1469         if (cpu && cpu->exit_request) {
1470             atomic_mb_set(&cpu->exit_request, 0);
1471         }
1472
1473         qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1474         deal_with_unplugged_cpus();
1475     }
1476
1477     return NULL;
1478 }
1479
1480 static void *qemu_hax_cpu_thread_fn(void *arg)
1481 {
1482     CPUState *cpu = arg;
1483     int r;
1484
1485     qemu_mutex_lock_iothread();
1486     qemu_thread_get_self(cpu->thread);
1487
1488     cpu->thread_id = qemu_get_thread_id();
1489     cpu->created = true;
1490     cpu->halted = 0;
1491     current_cpu = cpu;
1492
1493     hax_init_vcpu(cpu);
1494     qemu_cond_signal(&qemu_cpu_cond);
1495
1496     while (1) {
1497         if (cpu_can_run(cpu)) {
1498             r = hax_smp_cpu_exec(cpu);
1499             if (r == EXCP_DEBUG) {
1500                 cpu_handle_guest_debug(cpu);
1501             }
1502         }
1503
1504         while (cpu_thread_is_idle(cpu)) {
1505             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1506         }
1507 #ifdef _WIN32
1508         SleepEx(0, TRUE);
1509 #endif
1510         qemu_wait_io_event_common(cpu);
1511     }
1512     return NULL;
1513 }
1514
1515 /* The HVF-specific vCPU thread function. This one should only run when the host
1516  * CPU supports the VMX "unrestricted guest" feature. */
1517 static void *qemu_hvf_cpu_thread_fn(void *arg)
1518 {
1519     CPUState *cpu = arg;
1520
1521     int r;
1522
1523     assert(hvf_enabled());
1524
1525     rcu_register_thread();
1526
1527     qemu_mutex_lock_iothread();
1528     qemu_thread_get_self(cpu->thread);
1529
1530     cpu->thread_id = qemu_get_thread_id();
1531     cpu->can_do_io = 1;
1532     current_cpu = cpu;
1533
1534     hvf_init_vcpu(cpu);
1535
1536     /* signal CPU creation */
1537     cpu->created = true;
1538     qemu_cond_signal(&qemu_cpu_cond);
1539
1540     do {
1541         if (cpu_can_run(cpu)) {
1542             r = hvf_vcpu_exec(cpu);
1543             if (r == EXCP_DEBUG) {
1544                 cpu_handle_guest_debug(cpu);
1545             }
1546         }
1547         qemu_hvf_wait_io_event(cpu);
1548     } while (!cpu->unplug || cpu_can_run(cpu));
1549
1550     hvf_vcpu_destroy(cpu);
1551     cpu->created = false;
1552     qemu_cond_signal(&qemu_cpu_cond);
1553     qemu_mutex_unlock_iothread();
1554     return NULL;
1555 }
1556
1557 #ifdef _WIN32
1558 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1559 {
1560 }
1561 #endif
1562
1563 /* Multi-threaded TCG
1564  *
1565  * In the multi-threaded case each vCPU has its own thread. The TLS
1566  * variable current_cpu can be used deep in the code to find the
1567  * current CPUState for a given thread.
1568  */
1569
1570 static void *qemu_tcg_cpu_thread_fn(void *arg)
1571 {
1572     CPUState *cpu = arg;
1573
1574     g_assert(!use_icount);
1575
1576     rcu_register_thread();
1577     tcg_register_thread();
1578
1579     qemu_mutex_lock_iothread();
1580     qemu_thread_get_self(cpu->thread);
1581
1582     cpu->thread_id = qemu_get_thread_id();
1583     cpu->created = true;
1584     cpu->can_do_io = 1;
1585     current_cpu = cpu;
1586     qemu_cond_signal(&qemu_cpu_cond);
1587
1588     /* process any pending work */
1589     cpu->exit_request = 1;
1590
1591     while (1) {
1592         if (cpu_can_run(cpu)) {
1593             int r;
1594             r = tcg_cpu_exec(cpu);
1595             switch (r) {
1596             case EXCP_DEBUG:
1597                 cpu_handle_guest_debug(cpu);
1598                 break;
1599             case EXCP_HALTED:
1600                 /* during start-up the vCPU is reset and the thread is
1601                  * kicked several times. If we don't ensure we go back
1602                  * to sleep in the halted state we won't cleanly
1603                  * start-up when the vCPU is enabled.
1604                  *
1605                  * cpu->halted should ensure we sleep in wait_io_event
1606                  */
1607                 g_assert(cpu->halted);
1608                 break;
1609             case EXCP_ATOMIC:
1610                 qemu_mutex_unlock_iothread();
1611                 cpu_exec_step_atomic(cpu);
1612                 qemu_mutex_lock_iothread();
1613             default:
1614                 /* Ignore everything else? */
1615                 break;
1616             }
1617         } else if (cpu->unplug) {
1618             qemu_tcg_destroy_vcpu(cpu);
1619             cpu->created = false;
1620             qemu_cond_signal(&qemu_cpu_cond);
1621             qemu_mutex_unlock_iothread();
1622             return NULL;
1623         }
1624
1625         atomic_mb_set(&cpu->exit_request, 0);
1626         qemu_tcg_wait_io_event(cpu);
1627     }
1628
1629     return NULL;
1630 }
1631
1632 static void qemu_cpu_kick_thread(CPUState *cpu)
1633 {
1634 #ifndef _WIN32
1635     int err;
1636
1637     if (cpu->thread_kicked) {
1638         return;
1639     }
1640     cpu->thread_kicked = true;
1641     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1642     if (err) {
1643         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1644         exit(1);
1645     }
1646 #else /* _WIN32 */
1647     if (!qemu_cpu_is_self(cpu)) {
1648         if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1649             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1650                     __func__, GetLastError());
1651             exit(1);
1652         }
1653     }
1654 #endif
1655 }
1656
1657 void qemu_cpu_kick(CPUState *cpu)
1658 {
1659     qemu_cond_broadcast(cpu->halt_cond);
1660     if (tcg_enabled()) {
1661         cpu_exit(cpu);
1662         /* NOP unless doing single-thread RR */
1663         qemu_cpu_kick_rr_cpu();
1664     } else {
1665         if (hax_enabled()) {
1666             /*
1667              * FIXME: race condition with the exit_request check in
1668              * hax_vcpu_hax_exec
1669              */
1670             cpu->exit_request = 1;
1671         }
1672         qemu_cpu_kick_thread(cpu);
1673     }
1674 }
1675
1676 void qemu_cpu_kick_self(void)
1677 {
1678     assert(current_cpu);
1679     qemu_cpu_kick_thread(current_cpu);
1680 }
1681
1682 bool qemu_cpu_is_self(CPUState *cpu)
1683 {
1684     return qemu_thread_is_self(cpu->thread);
1685 }
1686
1687 bool qemu_in_vcpu_thread(void)
1688 {
1689     return current_cpu && qemu_cpu_is_self(current_cpu);
1690 }
1691
1692 static __thread bool iothread_locked = false;
1693
1694 bool qemu_mutex_iothread_locked(void)
1695 {
1696     return iothread_locked;
1697 }
1698
1699 void qemu_mutex_lock_iothread(void)
1700 {
1701     g_assert(!qemu_mutex_iothread_locked());
1702     qemu_mutex_lock(&qemu_global_mutex);
1703     iothread_locked = true;
1704 }
1705
1706 void qemu_mutex_unlock_iothread(void)
1707 {
1708     g_assert(qemu_mutex_iothread_locked());
1709     iothread_locked = false;
1710     qemu_mutex_unlock(&qemu_global_mutex);
1711 }
1712
1713 static bool all_vcpus_paused(void)
1714 {
1715     CPUState *cpu;
1716
1717     CPU_FOREACH(cpu) {
1718         if (!cpu->stopped) {
1719             return false;
1720         }
1721     }
1722
1723     return true;
1724 }
1725
1726 void pause_all_vcpus(void)
1727 {
1728     CPUState *cpu;
1729
1730     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1731     CPU_FOREACH(cpu) {
1732         if (qemu_cpu_is_self(cpu)) {
1733             qemu_cpu_stop(cpu, true);
1734         } else {
1735             cpu->stop = true;
1736             qemu_cpu_kick(cpu);
1737         }
1738     }
1739
1740     while (!all_vcpus_paused()) {
1741         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1742         CPU_FOREACH(cpu) {
1743             qemu_cpu_kick(cpu);
1744         }
1745     }
1746 }
1747
1748 void cpu_resume(CPUState *cpu)
1749 {
1750     cpu->stop = false;
1751     cpu->stopped = false;
1752     qemu_cpu_kick(cpu);
1753 }
1754
1755 void resume_all_vcpus(void)
1756 {
1757     CPUState *cpu;
1758
1759     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1760     CPU_FOREACH(cpu) {
1761         cpu_resume(cpu);
1762     }
1763 }
1764
1765 void cpu_remove(CPUState *cpu)
1766 {
1767     cpu->stop = true;
1768     cpu->unplug = true;
1769     qemu_cpu_kick(cpu);
1770 }
1771
1772 void cpu_remove_sync(CPUState *cpu)
1773 {
1774     cpu_remove(cpu);
1775     while (cpu->created) {
1776         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1777     }
1778 }
1779
1780 /* For temporary buffers for forming a name */
1781 #define VCPU_THREAD_NAME_SIZE 16
1782
1783 static void qemu_tcg_init_vcpu(CPUState *cpu)
1784 {
1785     char thread_name[VCPU_THREAD_NAME_SIZE];
1786     static QemuCond *single_tcg_halt_cond;
1787     static QemuThread *single_tcg_cpu_thread;
1788     static int tcg_region_inited;
1789
1790     /*
1791      * Initialize TCG regions--once. Now is a good time, because:
1792      * (1) TCG's init context, prologue and target globals have been set up.
1793      * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1794      *     -accel flag is processed, so the check doesn't work then).
1795      */
1796     if (!tcg_region_inited) {
1797         tcg_region_inited = 1;
1798         tcg_region_init();
1799     }
1800
1801     if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1802         cpu->thread = g_malloc0(sizeof(QemuThread));
1803         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1804         qemu_cond_init(cpu->halt_cond);
1805
1806         if (qemu_tcg_mttcg_enabled()) {
1807             /* create a thread per vCPU with TCG (MTTCG) */
1808             parallel_cpus = true;
1809             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1810                  cpu->cpu_index);
1811
1812             qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1813                                cpu, QEMU_THREAD_JOINABLE);
1814
1815         } else {
1816             /* share a single thread for all cpus with TCG */
1817             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1818             qemu_thread_create(cpu->thread, thread_name,
1819                                qemu_tcg_rr_cpu_thread_fn,
1820                                cpu, QEMU_THREAD_JOINABLE);
1821
1822             single_tcg_halt_cond = cpu->halt_cond;
1823             single_tcg_cpu_thread = cpu->thread;
1824         }
1825 #ifdef _WIN32
1826         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1827 #endif
1828         while (!cpu->created) {
1829             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1830         }
1831     } else {
1832         /* For non-MTTCG cases we share the thread */
1833         cpu->thread = single_tcg_cpu_thread;
1834         cpu->halt_cond = single_tcg_halt_cond;
1835     }
1836 }
1837
1838 static void qemu_hax_start_vcpu(CPUState *cpu)
1839 {
1840     char thread_name[VCPU_THREAD_NAME_SIZE];
1841
1842     cpu->thread = g_malloc0(sizeof(QemuThread));
1843     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1844     qemu_cond_init(cpu->halt_cond);
1845
1846     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1847              cpu->cpu_index);
1848     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1849                        cpu, QEMU_THREAD_JOINABLE);
1850 #ifdef _WIN32
1851     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1852 #endif
1853     while (!cpu->created) {
1854         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1855     }
1856 }
1857
1858 static void qemu_kvm_start_vcpu(CPUState *cpu)
1859 {
1860     char thread_name[VCPU_THREAD_NAME_SIZE];
1861
1862     cpu->thread = g_malloc0(sizeof(QemuThread));
1863     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1864     qemu_cond_init(cpu->halt_cond);
1865     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1866              cpu->cpu_index);
1867     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1868                        cpu, QEMU_THREAD_JOINABLE);
1869     while (!cpu->created) {
1870         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1871     }
1872 }
1873
1874 static void qemu_hvf_start_vcpu(CPUState *cpu)
1875 {
1876     char thread_name[VCPU_THREAD_NAME_SIZE];
1877
1878     /* HVF currently does not support TCG, and only runs in
1879      * unrestricted-guest mode. */
1880     assert(hvf_enabled());
1881
1882     cpu->thread = g_malloc0(sizeof(QemuThread));
1883     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1884     qemu_cond_init(cpu->halt_cond);
1885
1886     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1887              cpu->cpu_index);
1888     qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1889                        cpu, QEMU_THREAD_JOINABLE);
1890     while (!cpu->created) {
1891         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1892     }
1893 }
1894
1895 static void qemu_dummy_start_vcpu(CPUState *cpu)
1896 {
1897     char thread_name[VCPU_THREAD_NAME_SIZE];
1898
1899     cpu->thread = g_malloc0(sizeof(QemuThread));
1900     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1901     qemu_cond_init(cpu->halt_cond);
1902     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1903              cpu->cpu_index);
1904     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1905                        QEMU_THREAD_JOINABLE);
1906     while (!cpu->created) {
1907         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1908     }
1909 }
1910
1911 void qemu_init_vcpu(CPUState *cpu)
1912 {
1913     cpu->nr_cores = smp_cores;
1914     cpu->nr_threads = smp_threads;
1915     cpu->stopped = true;
1916
1917     if (!cpu->as) {
1918         /* If the target cpu hasn't set up any address spaces itself,
1919          * give it the default one.
1920          */
1921         cpu->num_ases = 1;
1922         cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1923     }
1924
1925     if (kvm_enabled()) {
1926         qemu_kvm_start_vcpu(cpu);
1927     } else if (hax_enabled()) {
1928         qemu_hax_start_vcpu(cpu);
1929     } else if (hvf_enabled()) {
1930         qemu_hvf_start_vcpu(cpu);
1931     } else if (tcg_enabled()) {
1932         qemu_tcg_init_vcpu(cpu);
1933     } else {
1934         qemu_dummy_start_vcpu(cpu);
1935     }
1936 }
1937
1938 void cpu_stop_current(void)
1939 {
1940     if (current_cpu) {
1941         qemu_cpu_stop(current_cpu, true);
1942     }
1943 }
1944
1945 int vm_stop(RunState state)
1946 {
1947     if (qemu_in_vcpu_thread()) {
1948         qemu_system_vmstop_request_prepare();
1949         qemu_system_vmstop_request(state);
1950         /*
1951          * FIXME: should not return to device code in case
1952          * vm_stop() has been requested.
1953          */
1954         cpu_stop_current();
1955         return 0;
1956     }
1957
1958     return do_vm_stop(state);
1959 }
1960
1961 /**
1962  * Prepare for (re)starting the VM.
1963  * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1964  * running or in case of an error condition), 0 otherwise.
1965  */
1966 int vm_prepare_start(void)
1967 {
1968     RunState requested;
1969     int res = 0;
1970
1971     qemu_vmstop_requested(&requested);
1972     if (runstate_is_running() && requested == RUN_STATE__MAX) {
1973         return -1;
1974     }
1975
1976     /* Ensure that a STOP/RESUME pair of events is emitted if a
1977      * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1978      * example, according to documentation is always followed by
1979      * the STOP event.
1980      */
1981     if (runstate_is_running()) {
1982         qapi_event_send_stop(&error_abort);
1983         res = -1;
1984     } else {
1985         replay_enable_events();
1986         cpu_enable_ticks();
1987         runstate_set(RUN_STATE_RUNNING);
1988         vm_state_notify(1, RUN_STATE_RUNNING);
1989     }
1990
1991     /* We are sending this now, but the CPUs will be resumed shortly later */
1992     qapi_event_send_resume(&error_abort);
1993     return res;
1994 }
1995
1996 void vm_start(void)
1997 {
1998     if (!vm_prepare_start()) {
1999         resume_all_vcpus();
2000     }
2001 }
2002
2003 /* does a state transition even if the VM is already stopped,
2004    current state is forgotten forever */
2005 int vm_stop_force_state(RunState state)
2006 {
2007     if (runstate_is_running()) {
2008         return vm_stop(state);
2009     } else {
2010         runstate_set(state);
2011
2012         bdrv_drain_all();
2013         /* Make sure to return an error if the flush in a previous vm_stop()
2014          * failed. */
2015         return bdrv_flush_all();
2016     }
2017 }
2018
2019 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2020 {
2021     /* XXX: implement xxx_cpu_list for targets that still miss it */
2022 #if defined(cpu_list)
2023     cpu_list(f, cpu_fprintf);
2024 #endif
2025 }
2026
2027 CpuInfoList *qmp_query_cpus(Error **errp)
2028 {
2029     MachineState *ms = MACHINE(qdev_get_machine());
2030     MachineClass *mc = MACHINE_GET_CLASS(ms);
2031     CpuInfoList *head = NULL, *cur_item = NULL;
2032     CPUState *cpu;
2033
2034     CPU_FOREACH(cpu) {
2035         CpuInfoList *info;
2036 #if defined(TARGET_I386)
2037         X86CPU *x86_cpu = X86_CPU(cpu);
2038         CPUX86State *env = &x86_cpu->env;
2039 #elif defined(TARGET_PPC)
2040         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2041         CPUPPCState *env = &ppc_cpu->env;
2042 #elif defined(TARGET_SPARC)
2043         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2044         CPUSPARCState *env = &sparc_cpu->env;
2045 #elif defined(TARGET_MIPS)
2046         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2047         CPUMIPSState *env = &mips_cpu->env;
2048 #elif defined(TARGET_TRICORE)
2049         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2050         CPUTriCoreState *env = &tricore_cpu->env;
2051 #endif
2052
2053         cpu_synchronize_state(cpu);
2054
2055         info = g_malloc0(sizeof(*info));
2056         info->value = g_malloc0(sizeof(*info->value));
2057         info->value->CPU = cpu->cpu_index;
2058         info->value->current = (cpu == first_cpu);
2059         info->value->halted = cpu->halted;
2060         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2061         info->value->thread_id = cpu->thread_id;
2062 #if defined(TARGET_I386)
2063         info->value->arch = CPU_INFO_ARCH_X86;
2064         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2065 #elif defined(TARGET_PPC)
2066         info->value->arch = CPU_INFO_ARCH_PPC;
2067         info->value->u.ppc.nip = env->nip;
2068 #elif defined(TARGET_SPARC)
2069         info->value->arch = CPU_INFO_ARCH_SPARC;
2070         info->value->u.q_sparc.pc = env->pc;
2071         info->value->u.q_sparc.npc = env->npc;
2072 #elif defined(TARGET_MIPS)
2073         info->value->arch = CPU_INFO_ARCH_MIPS;
2074         info->value->u.q_mips.PC = env->active_tc.PC;
2075 #elif defined(TARGET_TRICORE)
2076         info->value->arch = CPU_INFO_ARCH_TRICORE;
2077         info->value->u.tricore.PC = env->PC;
2078 #else
2079         info->value->arch = CPU_INFO_ARCH_OTHER;
2080 #endif
2081         info->value->has_props = !!mc->cpu_index_to_instance_props;
2082         if (info->value->has_props) {
2083             CpuInstanceProperties *props;
2084             props = g_malloc0(sizeof(*props));
2085             *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2086             info->value->props = props;
2087         }
2088
2089         /* XXX: waiting for the qapi to support GSList */
2090         if (!cur_item) {
2091             head = cur_item = info;
2092         } else {
2093             cur_item->next = info;
2094             cur_item = info;
2095         }
2096     }
2097
2098     return head;
2099 }
2100
2101 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2102                  bool has_cpu, int64_t cpu_index, Error **errp)
2103 {
2104     FILE *f;
2105     uint32_t l;
2106     CPUState *cpu;
2107     uint8_t buf[1024];
2108     int64_t orig_addr = addr, orig_size = size;
2109
2110     if (!has_cpu) {
2111         cpu_index = 0;
2112     }
2113
2114     cpu = qemu_get_cpu(cpu_index);
2115     if (cpu == NULL) {
2116         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2117                    "a CPU number");
2118         return;
2119     }
2120
2121     f = fopen(filename, "wb");
2122     if (!f) {
2123         error_setg_file_open(errp, errno, filename);
2124         return;
2125     }
2126
2127     while (size != 0) {
2128         l = sizeof(buf);
2129         if (l > size)
2130             l = size;
2131         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2132             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2133                              " specified", orig_addr, orig_size);
2134             goto exit;
2135         }
2136         if (fwrite(buf, 1, l, f) != l) {
2137             error_setg(errp, QERR_IO_ERROR);
2138             goto exit;
2139         }
2140         addr += l;
2141         size -= l;
2142     }
2143
2144 exit:
2145     fclose(f);
2146 }
2147
2148 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2149                   Error **errp)
2150 {
2151     FILE *f;
2152     uint32_t l;
2153     uint8_t buf[1024];
2154
2155     f = fopen(filename, "wb");
2156     if (!f) {
2157         error_setg_file_open(errp, errno, filename);
2158         return;
2159     }
2160
2161     while (size != 0) {
2162         l = sizeof(buf);
2163         if (l > size)
2164             l = size;
2165         cpu_physical_memory_read(addr, buf, l);
2166         if (fwrite(buf, 1, l, f) != l) {
2167             error_setg(errp, QERR_IO_ERROR);
2168             goto exit;
2169         }
2170         addr += l;
2171         size -= l;
2172     }
2173
2174 exit:
2175     fclose(f);
2176 }
2177
2178 void qmp_inject_nmi(Error **errp)
2179 {
2180     nmi_monitor_handle(monitor_get_cpu_index(), errp);
2181 }
2182
2183 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2184 {
2185     if (!use_icount) {
2186         return;
2187     }
2188
2189     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
2190                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2191     if (icount_align_option) {
2192         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
2193         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
2194     } else {
2195         cpu_fprintf(f, "Max guest delay     NA\n");
2196         cpu_fprintf(f, "Max guest advance   NA\n");
2197     }
2198 }