test-bdrv-drain: Test graph changes in drained section
[qemu/ar7.git] / cpus.c
blob83700c1716584cd9720d048c20dec36fea83a1cb
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
53 #include "hw/boards.h"
55 #ifdef CONFIG_LINUX
57 #include <sys/prctl.h>
59 #ifndef PR_MCE_KILL
60 #define PR_MCE_KILL 33
61 #endif
63 #ifndef PR_MCE_KILL_SET
64 #define PR_MCE_KILL_SET 1
65 #endif
67 #ifndef PR_MCE_KILL_EARLY
68 #define PR_MCE_KILL_EARLY 1
69 #endif
71 #endif /* CONFIG_LINUX */
73 int64_t max_delay;
74 int64_t max_advance;
76 /* vcpu throttling controls */
77 static QEMUTimer *throttle_timer;
78 static unsigned int throttle_percentage;
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
84 bool cpu_is_stopped(CPUState *cpu)
86 return cpu->stopped || !runstate_is_running();
89 static bool cpu_thread_is_idle(CPUState *cpu)
91 if (cpu->stop || cpu->queued_work_first) {
92 return false;
94 if (cpu_is_stopped(cpu)) {
95 return true;
97 if (!cpu->halted || cpu_has_work(cpu) ||
98 kvm_halt_in_kernel()) {
99 return false;
101 return true;
104 static bool all_cpu_threads_idle(void)
106 CPUState *cpu;
108 CPU_FOREACH(cpu) {
109 if (!cpu_thread_is_idle(cpu)) {
110 return false;
113 return true;
116 /***********************************************************/
117 /* guest cycle counter */
119 /* Protected by TimersState seqlock */
121 static bool icount_sleep = true;
122 static int64_t vm_clock_warp_start = -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
128 static QEMUTimer *icount_rt_timer;
129 static QEMUTimer *icount_vm_timer;
130 static QEMUTimer *icount_warp_timer;
132 typedef struct TimersState {
133 /* Protected by BQL. */
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
140 QemuSeqLock vm_clock_seqlock;
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
149 } TimersState;
151 static TimersState timers_state;
152 bool mttcg_enabled;
155 * We default to false if we know other options have been enabled
156 * which are currently incompatible with MTTCG. Otherwise when each
157 * guest (target) has been updated to support:
158 * - atomic instructions
159 * - memory ordering primitives (barriers)
160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
162 * Once a guest architecture has been converted to the new primitives
163 * there are two remaining limitations to check.
165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
166 * - The host must have a stronger memory order than the guest
168 * It may be possible in future to support strong guests on weak hosts
169 * but that will require tagging all load/stores in a guest with their
170 * implicit memory order requirements which would likely slow things
171 * down a lot.
174 static bool check_tcg_memory_orders_compatible(void)
176 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
177 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
178 #else
179 return false;
180 #endif
183 static bool default_mttcg_enabled(void)
185 if (use_icount || TCG_OVERSIZED_GUEST) {
186 return false;
187 } else {
188 #ifdef TARGET_SUPPORTS_MTTCG
189 return check_tcg_memory_orders_compatible();
190 #else
191 return false;
192 #endif
196 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
198 const char *t = qemu_opt_get(opts, "thread");
199 if (t) {
200 if (strcmp(t, "multi") == 0) {
201 if (TCG_OVERSIZED_GUEST) {
202 error_setg(errp, "No MTTCG when guest word size > hosts");
203 } else if (use_icount) {
204 error_setg(errp, "No MTTCG when icount is enabled");
205 } else {
206 #ifndef TARGET_SUPPORTS_MTTCG
207 error_report("Guest not yet converted to MTTCG - "
208 "you may get unexpected results");
209 #endif
210 if (!check_tcg_memory_orders_compatible()) {
211 error_report("Guest expects a stronger memory ordering "
212 "than the host provides");
213 error_printf("This may cause strange/hard to debug errors\n");
215 mttcg_enabled = true;
217 } else if (strcmp(t, "single") == 0) {
218 mttcg_enabled = false;
219 } else {
220 error_setg(errp, "Invalid 'thread' setting %s", t);
222 } else {
223 mttcg_enabled = default_mttcg_enabled();
227 /* The current number of executed instructions is based on what we
228 * originally budgeted minus the current state of the decrementing
229 * icount counters in extra/u16.low.
231 static int64_t cpu_get_icount_executed(CPUState *cpu)
233 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
237 * Update the global shared timer_state.qemu_icount to take into
238 * account executed instructions. This is done by the TCG vCPU
239 * thread so the main-loop can see time has moved forward.
241 void cpu_update_icount(CPUState *cpu)
243 int64_t executed = cpu_get_icount_executed(cpu);
244 cpu->icount_budget -= executed;
246 #ifdef CONFIG_ATOMIC64
247 atomic_set__nocheck(&timers_state.qemu_icount,
248 atomic_read__nocheck(&timers_state.qemu_icount) +
249 executed);
250 #else /* FIXME: we need 64bit atomics to do this safely */
251 timers_state.qemu_icount += executed;
252 #endif
255 int64_t cpu_get_icount_raw(void)
257 CPUState *cpu = current_cpu;
259 if (cpu && cpu->running) {
260 if (!cpu->can_do_io) {
261 fprintf(stderr, "Bad icount read\n");
262 exit(1);
264 /* Take into account what has run */
265 cpu_update_icount(cpu);
267 #ifdef CONFIG_ATOMIC64
268 return atomic_read__nocheck(&timers_state.qemu_icount);
269 #else /* FIXME: we need 64bit atomics to do this safely */
270 return timers_state.qemu_icount;
271 #endif
274 /* Return the virtual CPU time, based on the instruction counter. */
275 static int64_t cpu_get_icount_locked(void)
277 int64_t icount = cpu_get_icount_raw();
278 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
281 int64_t cpu_get_icount(void)
283 int64_t icount;
284 unsigned start;
286 do {
287 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
288 icount = cpu_get_icount_locked();
289 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
291 return icount;
294 int64_t cpu_icount_to_ns(int64_t icount)
296 return icount << icount_time_shift;
299 /* return the time elapsed in VM between vm_start and vm_stop. Unless
300 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
301 * counter.
303 * Caller must hold the BQL
305 int64_t cpu_get_ticks(void)
307 int64_t ticks;
309 if (use_icount) {
310 return cpu_get_icount();
313 ticks = timers_state.cpu_ticks_offset;
314 if (timers_state.cpu_ticks_enabled) {
315 ticks += cpu_get_host_ticks();
318 if (timers_state.cpu_ticks_prev > ticks) {
319 /* Note: non increasing ticks may happen if the host uses
320 software suspend */
321 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
322 ticks = timers_state.cpu_ticks_prev;
325 timers_state.cpu_ticks_prev = ticks;
326 return ticks;
329 static int64_t cpu_get_clock_locked(void)
331 int64_t time;
333 time = timers_state.cpu_clock_offset;
334 if (timers_state.cpu_ticks_enabled) {
335 time += get_clock();
338 return time;
341 /* Return the monotonic time elapsed in VM, i.e.,
342 * the time between vm_start and vm_stop
344 int64_t cpu_get_clock(void)
346 int64_t ti;
347 unsigned start;
349 do {
350 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
351 ti = cpu_get_clock_locked();
352 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
354 return ti;
357 /* enable cpu_get_ticks()
358 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
360 void cpu_enable_ticks(void)
362 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
364 if (!timers_state.cpu_ticks_enabled) {
365 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
366 timers_state.cpu_clock_offset -= get_clock();
367 timers_state.cpu_ticks_enabled = 1;
369 seqlock_write_end(&timers_state.vm_clock_seqlock);
372 /* disable cpu_get_ticks() : the clock is stopped. You must not call
373 * cpu_get_ticks() after that.
374 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
376 void cpu_disable_ticks(void)
378 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
379 seqlock_write_begin(&timers_state.vm_clock_seqlock);
380 if (timers_state.cpu_ticks_enabled) {
381 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
382 timers_state.cpu_clock_offset = cpu_get_clock_locked();
383 timers_state.cpu_ticks_enabled = 0;
385 seqlock_write_end(&timers_state.vm_clock_seqlock);
388 /* Correlation between real and virtual time is always going to be
389 fairly approximate, so ignore small variation.
390 When the guest is idle real and virtual time will be aligned in
391 the IO wait loop. */
392 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
394 static void icount_adjust(void)
396 int64_t cur_time;
397 int64_t cur_icount;
398 int64_t delta;
400 /* Protected by TimersState mutex. */
401 static int64_t last_delta;
403 /* If the VM is not running, then do nothing. */
404 if (!runstate_is_running()) {
405 return;
408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
409 cur_time = cpu_get_clock_locked();
410 cur_icount = cpu_get_icount_locked();
412 delta = cur_icount - cur_time;
413 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
414 if (delta > 0
415 && last_delta + ICOUNT_WOBBLE < delta * 2
416 && icount_time_shift > 0) {
417 /* The guest is getting too far ahead. Slow time down. */
418 icount_time_shift--;
420 if (delta < 0
421 && last_delta - ICOUNT_WOBBLE > delta * 2
422 && icount_time_shift < MAX_ICOUNT_SHIFT) {
423 /* The guest is getting too far behind. Speed time up. */
424 icount_time_shift++;
426 last_delta = delta;
427 timers_state.qemu_icount_bias = cur_icount
428 - (timers_state.qemu_icount << icount_time_shift);
429 seqlock_write_end(&timers_state.vm_clock_seqlock);
432 static void icount_adjust_rt(void *opaque)
434 timer_mod(icount_rt_timer,
435 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
436 icount_adjust();
439 static void icount_adjust_vm(void *opaque)
441 timer_mod(icount_vm_timer,
442 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
443 NANOSECONDS_PER_SECOND / 10);
444 icount_adjust();
447 static int64_t qemu_icount_round(int64_t count)
449 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
452 static void icount_warp_rt(void)
454 unsigned seq;
455 int64_t warp_start;
457 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
458 * changes from -1 to another value, so the race here is okay.
460 do {
461 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
462 warp_start = vm_clock_warp_start;
463 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
465 if (warp_start == -1) {
466 return;
469 seqlock_write_begin(&timers_state.vm_clock_seqlock);
470 if (runstate_is_running()) {
471 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
472 cpu_get_clock_locked());
473 int64_t warp_delta;
475 warp_delta = clock - vm_clock_warp_start;
476 if (use_icount == 2) {
478 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
479 * far ahead of real time.
481 int64_t cur_icount = cpu_get_icount_locked();
482 int64_t delta = clock - cur_icount;
483 warp_delta = MIN(warp_delta, delta);
485 timers_state.qemu_icount_bias += warp_delta;
487 vm_clock_warp_start = -1;
488 seqlock_write_end(&timers_state.vm_clock_seqlock);
490 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
491 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
495 static void icount_timer_cb(void *opaque)
497 /* No need for a checkpoint because the timer already synchronizes
498 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
500 icount_warp_rt();
503 void qtest_clock_warp(int64_t dest)
505 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
506 AioContext *aio_context;
507 assert(qtest_enabled());
508 aio_context = qemu_get_aio_context();
509 while (clock < dest) {
510 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
511 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
513 seqlock_write_begin(&timers_state.vm_clock_seqlock);
514 timers_state.qemu_icount_bias += warp;
515 seqlock_write_end(&timers_state.vm_clock_seqlock);
517 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
518 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
519 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
521 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
524 void qemu_start_warp_timer(void)
526 int64_t clock;
527 int64_t deadline;
529 if (!use_icount) {
530 return;
533 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
534 * do not fire, so computing the deadline does not make sense.
536 if (!runstate_is_running()) {
537 return;
540 /* warp clock deterministically in record/replay mode */
541 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
542 return;
545 if (!all_cpu_threads_idle()) {
546 return;
549 if (qtest_enabled()) {
550 /* When testing, qtest commands advance icount. */
551 return;
554 /* We want to use the earliest deadline from ALL vm_clocks */
555 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
556 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
557 if (deadline < 0) {
558 static bool notified;
559 if (!icount_sleep && !notified) {
560 warn_report("icount sleep disabled and no active timers");
561 notified = true;
563 return;
566 if (deadline > 0) {
568 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
569 * sleep. Otherwise, the CPU might be waiting for a future timer
570 * interrupt to wake it up, but the interrupt never comes because
571 * the vCPU isn't running any insns and thus doesn't advance the
572 * QEMU_CLOCK_VIRTUAL.
574 if (!icount_sleep) {
576 * We never let VCPUs sleep in no sleep icount mode.
577 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
578 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
579 * It is useful when we want a deterministic execution time,
580 * isolated from host latencies.
582 seqlock_write_begin(&timers_state.vm_clock_seqlock);
583 timers_state.qemu_icount_bias += deadline;
584 seqlock_write_end(&timers_state.vm_clock_seqlock);
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
586 } else {
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
593 * every 100ms.
595 seqlock_write_begin(&timers_state.vm_clock_seqlock);
596 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
597 vm_clock_warp_start = clock;
599 seqlock_write_end(&timers_state.vm_clock_seqlock);
600 timer_mod_anticipate(icount_warp_timer, clock + deadline);
602 } else if (deadline == 0) {
603 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
607 static void qemu_account_warp_timer(void)
609 if (!use_icount || !icount_sleep) {
610 return;
613 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
614 * do not fire, so computing the deadline does not make sense.
616 if (!runstate_is_running()) {
617 return;
620 /* warp clock deterministically in record/replay mode */
621 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
622 return;
625 timer_del(icount_warp_timer);
626 icount_warp_rt();
629 static bool icount_state_needed(void *opaque)
631 return use_icount;
635 * This is a subsection for icount migration.
637 static const VMStateDescription icount_vmstate_timers = {
638 .name = "timer/icount",
639 .version_id = 1,
640 .minimum_version_id = 1,
641 .needed = icount_state_needed,
642 .fields = (VMStateField[]) {
643 VMSTATE_INT64(qemu_icount_bias, TimersState),
644 VMSTATE_INT64(qemu_icount, TimersState),
645 VMSTATE_END_OF_LIST()
649 static const VMStateDescription vmstate_timers = {
650 .name = "timer",
651 .version_id = 2,
652 .minimum_version_id = 1,
653 .fields = (VMStateField[]) {
654 VMSTATE_INT64(cpu_ticks_offset, TimersState),
655 VMSTATE_INT64(dummy, TimersState),
656 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
657 VMSTATE_END_OF_LIST()
659 .subsections = (const VMStateDescription*[]) {
660 &icount_vmstate_timers,
661 NULL
665 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
667 double pct;
668 double throttle_ratio;
669 long sleeptime_ns;
671 if (!cpu_throttle_get_percentage()) {
672 return;
675 pct = (double)cpu_throttle_get_percentage()/100;
676 throttle_ratio = pct / (1 - pct);
677 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
679 qemu_mutex_unlock_iothread();
680 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
681 qemu_mutex_lock_iothread();
682 atomic_set(&cpu->throttle_thread_scheduled, 0);
685 static void cpu_throttle_timer_tick(void *opaque)
687 CPUState *cpu;
688 double pct;
690 /* Stop the timer if needed */
691 if (!cpu_throttle_get_percentage()) {
692 return;
694 CPU_FOREACH(cpu) {
695 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
696 async_run_on_cpu(cpu, cpu_throttle_thread,
697 RUN_ON_CPU_NULL);
701 pct = (double)cpu_throttle_get_percentage()/100;
702 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
703 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
706 void cpu_throttle_set(int new_throttle_pct)
708 /* Ensure throttle percentage is within valid range */
709 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
710 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
712 atomic_set(&throttle_percentage, new_throttle_pct);
714 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
715 CPU_THROTTLE_TIMESLICE_NS);
718 void cpu_throttle_stop(void)
720 atomic_set(&throttle_percentage, 0);
723 bool cpu_throttle_active(void)
725 return (cpu_throttle_get_percentage() != 0);
728 int cpu_throttle_get_percentage(void)
730 return atomic_read(&throttle_percentage);
733 void cpu_ticks_init(void)
735 seqlock_init(&timers_state.vm_clock_seqlock);
736 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
737 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
738 cpu_throttle_timer_tick, NULL);
741 void configure_icount(QemuOpts *opts, Error **errp)
743 const char *option;
744 char *rem_str = NULL;
746 option = qemu_opt_get(opts, "shift");
747 if (!option) {
748 if (qemu_opt_get(opts, "align") != NULL) {
749 error_setg(errp, "Please specify shift option when using align");
751 return;
754 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
755 if (icount_sleep) {
756 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
757 icount_timer_cb, NULL);
760 icount_align_option = qemu_opt_get_bool(opts, "align", false);
762 if (icount_align_option && !icount_sleep) {
763 error_setg(errp, "align=on and sleep=off are incompatible");
765 if (strcmp(option, "auto") != 0) {
766 errno = 0;
767 icount_time_shift = strtol(option, &rem_str, 0);
768 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
769 error_setg(errp, "icount: Invalid shift value");
771 use_icount = 1;
772 return;
773 } else if (icount_align_option) {
774 error_setg(errp, "shift=auto and align=on are incompatible");
775 } else if (!icount_sleep) {
776 error_setg(errp, "shift=auto and sleep=off are incompatible");
779 use_icount = 2;
781 /* 125MIPS seems a reasonable initial guess at the guest speed.
782 It will be corrected fairly quickly anyway. */
783 icount_time_shift = 3;
785 /* Have both realtime and virtual time triggers for speed adjustment.
786 The realtime trigger catches emulated time passing too slowly,
787 the virtual time trigger catches emulated time passing too fast.
788 Realtime triggers occur even when idle, so use them less frequently
789 than VM triggers. */
790 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
791 icount_adjust_rt, NULL);
792 timer_mod(icount_rt_timer,
793 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
794 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
795 icount_adjust_vm, NULL);
796 timer_mod(icount_vm_timer,
797 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
798 NANOSECONDS_PER_SECOND / 10);
801 /***********************************************************/
802 /* TCG vCPU kick timer
804 * The kick timer is responsible for moving single threaded vCPU
805 * emulation on to the next vCPU. If more than one vCPU is running a
806 * timer event with force a cpu->exit so the next vCPU can get
807 * scheduled.
809 * The timer is removed if all vCPUs are idle and restarted again once
810 * idleness is complete.
813 static QEMUTimer *tcg_kick_vcpu_timer;
814 static CPUState *tcg_current_rr_cpu;
816 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
818 static inline int64_t qemu_tcg_next_kick(void)
820 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
823 /* Kick the currently round-robin scheduled vCPU */
824 static void qemu_cpu_kick_rr_cpu(void)
826 CPUState *cpu;
827 do {
828 cpu = atomic_mb_read(&tcg_current_rr_cpu);
829 if (cpu) {
830 cpu_exit(cpu);
832 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
835 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
839 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
841 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
842 qemu_notify_event();
843 return;
846 if (!qemu_in_vcpu_thread() && first_cpu) {
847 /* qemu_cpu_kick is not enough to kick a halted CPU out of
848 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
849 * causes cpu_thread_is_idle to return false. This way,
850 * handle_icount_deadline can run.
852 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
856 static void kick_tcg_thread(void *opaque)
858 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
859 qemu_cpu_kick_rr_cpu();
862 static void start_tcg_kick_timer(void)
864 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
865 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
866 kick_tcg_thread, NULL);
867 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
871 static void stop_tcg_kick_timer(void)
873 if (tcg_kick_vcpu_timer) {
874 timer_del(tcg_kick_vcpu_timer);
875 tcg_kick_vcpu_timer = NULL;
879 /***********************************************************/
880 void hw_error(const char *fmt, ...)
882 va_list ap;
883 CPUState *cpu;
885 va_start(ap, fmt);
886 fprintf(stderr, "qemu: hardware error: ");
887 vfprintf(stderr, fmt, ap);
888 fprintf(stderr, "\n");
889 CPU_FOREACH(cpu) {
890 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
891 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
893 va_end(ap);
894 abort();
897 void cpu_synchronize_all_states(void)
899 CPUState *cpu;
901 CPU_FOREACH(cpu) {
902 cpu_synchronize_state(cpu);
906 void cpu_synchronize_all_post_reset(void)
908 CPUState *cpu;
910 CPU_FOREACH(cpu) {
911 cpu_synchronize_post_reset(cpu);
915 void cpu_synchronize_all_post_init(void)
917 CPUState *cpu;
919 CPU_FOREACH(cpu) {
920 cpu_synchronize_post_init(cpu);
924 void cpu_synchronize_all_pre_loadvm(void)
926 CPUState *cpu;
928 CPU_FOREACH(cpu) {
929 cpu_synchronize_pre_loadvm(cpu);
933 static int do_vm_stop(RunState state)
935 int ret = 0;
937 if (runstate_is_running()) {
938 cpu_disable_ticks();
939 pause_all_vcpus();
940 runstate_set(state);
941 vm_state_notify(0, state);
942 qapi_event_send_stop(&error_abort);
945 bdrv_drain_all();
946 replay_disable_events();
947 ret = bdrv_flush_all();
949 return ret;
952 static bool cpu_can_run(CPUState *cpu)
954 if (cpu->stop) {
955 return false;
957 if (cpu_is_stopped(cpu)) {
958 return false;
960 return true;
963 static void cpu_handle_guest_debug(CPUState *cpu)
965 gdb_set_stop_cpu(cpu);
966 qemu_system_debug_request();
967 cpu->stopped = true;
970 #ifdef CONFIG_LINUX
971 static void sigbus_reraise(void)
973 sigset_t set;
974 struct sigaction action;
976 memset(&action, 0, sizeof(action));
977 action.sa_handler = SIG_DFL;
978 if (!sigaction(SIGBUS, &action, NULL)) {
979 raise(SIGBUS);
980 sigemptyset(&set);
981 sigaddset(&set, SIGBUS);
982 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
984 perror("Failed to re-raise SIGBUS!\n");
985 abort();
988 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
990 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
991 sigbus_reraise();
994 if (current_cpu) {
995 /* Called asynchronously in VCPU thread. */
996 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
997 sigbus_reraise();
999 } else {
1000 /* Called synchronously (via signalfd) in main thread. */
1001 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1002 sigbus_reraise();
1007 static void qemu_init_sigbus(void)
1009 struct sigaction action;
1011 memset(&action, 0, sizeof(action));
1012 action.sa_flags = SA_SIGINFO;
1013 action.sa_sigaction = sigbus_handler;
1014 sigaction(SIGBUS, &action, NULL);
1016 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1018 #else /* !CONFIG_LINUX */
1019 static void qemu_init_sigbus(void)
1022 #endif /* !CONFIG_LINUX */
1024 static QemuMutex qemu_global_mutex;
1026 static QemuThread io_thread;
1028 /* cpu creation */
1029 static QemuCond qemu_cpu_cond;
1030 /* system init */
1031 static QemuCond qemu_pause_cond;
1033 void qemu_init_cpu_loop(void)
1035 qemu_init_sigbus();
1036 qemu_cond_init(&qemu_cpu_cond);
1037 qemu_cond_init(&qemu_pause_cond);
1038 qemu_mutex_init(&qemu_global_mutex);
1040 qemu_thread_get_self(&io_thread);
1043 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1045 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1048 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1050 if (kvm_destroy_vcpu(cpu) < 0) {
1051 error_report("kvm_destroy_vcpu failed");
1052 exit(EXIT_FAILURE);
1056 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1060 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1062 g_assert(qemu_cpu_is_self(cpu));
1063 cpu->stop = false;
1064 cpu->stopped = true;
1065 if (exit) {
1066 cpu_exit(cpu);
1068 qemu_cond_broadcast(&qemu_pause_cond);
1071 static void qemu_wait_io_event_common(CPUState *cpu)
1073 atomic_mb_set(&cpu->thread_kicked, false);
1074 if (cpu->stop) {
1075 qemu_cpu_stop(cpu, false);
1077 process_queued_cpu_work(cpu);
1080 static bool qemu_tcg_should_sleep(CPUState *cpu)
1082 if (mttcg_enabled) {
1083 return cpu_thread_is_idle(cpu);
1084 } else {
1085 return all_cpu_threads_idle();
1089 static void qemu_tcg_wait_io_event(CPUState *cpu)
1091 while (qemu_tcg_should_sleep(cpu)) {
1092 stop_tcg_kick_timer();
1093 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1096 start_tcg_kick_timer();
1098 qemu_wait_io_event_common(cpu);
1101 static void qemu_kvm_wait_io_event(CPUState *cpu)
1103 while (cpu_thread_is_idle(cpu)) {
1104 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1107 qemu_wait_io_event_common(cpu);
1110 static void *qemu_kvm_cpu_thread_fn(void *arg)
1112 CPUState *cpu = arg;
1113 int r;
1115 rcu_register_thread();
1117 qemu_mutex_lock_iothread();
1118 qemu_thread_get_self(cpu->thread);
1119 cpu->thread_id = qemu_get_thread_id();
1120 cpu->can_do_io = 1;
1121 current_cpu = cpu;
1123 r = kvm_init_vcpu(cpu);
1124 if (r < 0) {
1125 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1126 exit(1);
1129 kvm_init_cpu_signals(cpu);
1131 /* signal CPU creation */
1132 cpu->created = true;
1133 qemu_cond_signal(&qemu_cpu_cond);
1135 do {
1136 if (cpu_can_run(cpu)) {
1137 r = kvm_cpu_exec(cpu);
1138 if (r == EXCP_DEBUG) {
1139 cpu_handle_guest_debug(cpu);
1142 qemu_kvm_wait_io_event(cpu);
1143 } while (!cpu->unplug || cpu_can_run(cpu));
1145 qemu_kvm_destroy_vcpu(cpu);
1146 cpu->created = false;
1147 qemu_cond_signal(&qemu_cpu_cond);
1148 qemu_mutex_unlock_iothread();
1149 return NULL;
1152 static void *qemu_dummy_cpu_thread_fn(void *arg)
1154 #ifdef _WIN32
1155 fprintf(stderr, "qtest is not supported under Windows\n");
1156 exit(1);
1157 #else
1158 CPUState *cpu = arg;
1159 sigset_t waitset;
1160 int r;
1162 rcu_register_thread();
1164 qemu_mutex_lock_iothread();
1165 qemu_thread_get_self(cpu->thread);
1166 cpu->thread_id = qemu_get_thread_id();
1167 cpu->can_do_io = 1;
1168 current_cpu = cpu;
1170 sigemptyset(&waitset);
1171 sigaddset(&waitset, SIG_IPI);
1173 /* signal CPU creation */
1174 cpu->created = true;
1175 qemu_cond_signal(&qemu_cpu_cond);
1177 while (1) {
1178 qemu_mutex_unlock_iothread();
1179 do {
1180 int sig;
1181 r = sigwait(&waitset, &sig);
1182 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1183 if (r == -1) {
1184 perror("sigwait");
1185 exit(1);
1187 qemu_mutex_lock_iothread();
1188 qemu_wait_io_event_common(cpu);
1191 return NULL;
1192 #endif
1195 static int64_t tcg_get_icount_limit(void)
1197 int64_t deadline;
1199 if (replay_mode != REPLAY_MODE_PLAY) {
1200 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1202 /* Maintain prior (possibly buggy) behaviour where if no deadline
1203 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1204 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1205 * nanoseconds.
1207 if ((deadline < 0) || (deadline > INT32_MAX)) {
1208 deadline = INT32_MAX;
1211 return qemu_icount_round(deadline);
1212 } else {
1213 return replay_get_instructions();
1217 static void handle_icount_deadline(void)
1219 assert(qemu_in_vcpu_thread());
1220 if (use_icount) {
1221 int64_t deadline =
1222 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1224 if (deadline == 0) {
1225 /* Wake up other AioContexts. */
1226 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1227 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1232 static void prepare_icount_for_run(CPUState *cpu)
1234 if (use_icount) {
1235 int insns_left;
1237 /* These should always be cleared by process_icount_data after
1238 * each vCPU execution. However u16.high can be raised
1239 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1241 g_assert(cpu->icount_decr.u16.low == 0);
1242 g_assert(cpu->icount_extra == 0);
1244 cpu->icount_budget = tcg_get_icount_limit();
1245 insns_left = MIN(0xffff, cpu->icount_budget);
1246 cpu->icount_decr.u16.low = insns_left;
1247 cpu->icount_extra = cpu->icount_budget - insns_left;
1251 static void process_icount_data(CPUState *cpu)
1253 if (use_icount) {
1254 /* Account for executed instructions */
1255 cpu_update_icount(cpu);
1257 /* Reset the counters */
1258 cpu->icount_decr.u16.low = 0;
1259 cpu->icount_extra = 0;
1260 cpu->icount_budget = 0;
1262 replay_account_executed_instructions();
1267 static int tcg_cpu_exec(CPUState *cpu)
1269 int ret;
1270 #ifdef CONFIG_PROFILER
1271 int64_t ti;
1272 #endif
1274 #ifdef CONFIG_PROFILER
1275 ti = profile_getclock();
1276 #endif
1277 qemu_mutex_unlock_iothread();
1278 cpu_exec_start(cpu);
1279 ret = cpu_exec(cpu);
1280 cpu_exec_end(cpu);
1281 qemu_mutex_lock_iothread();
1282 #ifdef CONFIG_PROFILER
1283 tcg_time += profile_getclock() - ti;
1284 #endif
1285 return ret;
1288 /* Destroy any remaining vCPUs which have been unplugged and have
1289 * finished running
1291 static void deal_with_unplugged_cpus(void)
1293 CPUState *cpu;
1295 CPU_FOREACH(cpu) {
1296 if (cpu->unplug && !cpu_can_run(cpu)) {
1297 qemu_tcg_destroy_vcpu(cpu);
1298 cpu->created = false;
1299 qemu_cond_signal(&qemu_cpu_cond);
1300 break;
1305 /* Single-threaded TCG
1307 * In the single-threaded case each vCPU is simulated in turn. If
1308 * there is more than a single vCPU we create a simple timer to kick
1309 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1310 * This is done explicitly rather than relying on side-effects
1311 * elsewhere.
1314 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1316 CPUState *cpu = arg;
1318 rcu_register_thread();
1319 tcg_register_thread();
1321 qemu_mutex_lock_iothread();
1322 qemu_thread_get_self(cpu->thread);
1324 CPU_FOREACH(cpu) {
1325 cpu->thread_id = qemu_get_thread_id();
1326 cpu->created = true;
1327 cpu->can_do_io = 1;
1329 qemu_cond_signal(&qemu_cpu_cond);
1331 /* wait for initial kick-off after machine start */
1332 while (first_cpu->stopped) {
1333 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1335 /* process any pending work */
1336 CPU_FOREACH(cpu) {
1337 current_cpu = cpu;
1338 qemu_wait_io_event_common(cpu);
1342 start_tcg_kick_timer();
1344 cpu = first_cpu;
1346 /* process any pending work */
1347 cpu->exit_request = 1;
1349 while (1) {
1350 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1351 qemu_account_warp_timer();
1353 /* Run the timers here. This is much more efficient than
1354 * waking up the I/O thread and waiting for completion.
1356 handle_icount_deadline();
1358 if (!cpu) {
1359 cpu = first_cpu;
1362 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1364 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1365 current_cpu = cpu;
1367 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1368 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1370 if (cpu_can_run(cpu)) {
1371 int r;
1373 prepare_icount_for_run(cpu);
1375 r = tcg_cpu_exec(cpu);
1377 process_icount_data(cpu);
1379 if (r == EXCP_DEBUG) {
1380 cpu_handle_guest_debug(cpu);
1381 break;
1382 } else if (r == EXCP_ATOMIC) {
1383 qemu_mutex_unlock_iothread();
1384 cpu_exec_step_atomic(cpu);
1385 qemu_mutex_lock_iothread();
1386 break;
1388 } else if (cpu->stop) {
1389 if (cpu->unplug) {
1390 cpu = CPU_NEXT(cpu);
1392 break;
1395 cpu = CPU_NEXT(cpu);
1396 } /* while (cpu && !cpu->exit_request).. */
1398 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1399 atomic_set(&tcg_current_rr_cpu, NULL);
1401 if (cpu && cpu->exit_request) {
1402 atomic_mb_set(&cpu->exit_request, 0);
1405 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1406 deal_with_unplugged_cpus();
1409 return NULL;
1412 static void *qemu_hax_cpu_thread_fn(void *arg)
1414 CPUState *cpu = arg;
1415 int r;
1417 qemu_mutex_lock_iothread();
1418 qemu_thread_get_self(cpu->thread);
1420 cpu->thread_id = qemu_get_thread_id();
1421 cpu->created = true;
1422 cpu->halted = 0;
1423 current_cpu = cpu;
1425 hax_init_vcpu(cpu);
1426 qemu_cond_signal(&qemu_cpu_cond);
1428 while (1) {
1429 if (cpu_can_run(cpu)) {
1430 r = hax_smp_cpu_exec(cpu);
1431 if (r == EXCP_DEBUG) {
1432 cpu_handle_guest_debug(cpu);
1436 while (cpu_thread_is_idle(cpu)) {
1437 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1439 #ifdef _WIN32
1440 SleepEx(0, TRUE);
1441 #endif
1442 qemu_wait_io_event_common(cpu);
1444 return NULL;
1447 #ifdef _WIN32
1448 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1451 #endif
1453 /* Multi-threaded TCG
1455 * In the multi-threaded case each vCPU has its own thread. The TLS
1456 * variable current_cpu can be used deep in the code to find the
1457 * current CPUState for a given thread.
1460 static void *qemu_tcg_cpu_thread_fn(void *arg)
1462 CPUState *cpu = arg;
1464 g_assert(!use_icount);
1466 rcu_register_thread();
1467 tcg_register_thread();
1469 qemu_mutex_lock_iothread();
1470 qemu_thread_get_self(cpu->thread);
1472 cpu->thread_id = qemu_get_thread_id();
1473 cpu->created = true;
1474 cpu->can_do_io = 1;
1475 current_cpu = cpu;
1476 qemu_cond_signal(&qemu_cpu_cond);
1478 /* process any pending work */
1479 cpu->exit_request = 1;
1481 while (1) {
1482 if (cpu_can_run(cpu)) {
1483 int r;
1484 r = tcg_cpu_exec(cpu);
1485 switch (r) {
1486 case EXCP_DEBUG:
1487 cpu_handle_guest_debug(cpu);
1488 break;
1489 case EXCP_HALTED:
1490 /* during start-up the vCPU is reset and the thread is
1491 * kicked several times. If we don't ensure we go back
1492 * to sleep in the halted state we won't cleanly
1493 * start-up when the vCPU is enabled.
1495 * cpu->halted should ensure we sleep in wait_io_event
1497 g_assert(cpu->halted);
1498 break;
1499 case EXCP_ATOMIC:
1500 qemu_mutex_unlock_iothread();
1501 cpu_exec_step_atomic(cpu);
1502 qemu_mutex_lock_iothread();
1503 default:
1504 /* Ignore everything else? */
1505 break;
1507 } else if (cpu->unplug) {
1508 qemu_tcg_destroy_vcpu(cpu);
1509 cpu->created = false;
1510 qemu_cond_signal(&qemu_cpu_cond);
1511 qemu_mutex_unlock_iothread();
1512 return NULL;
1515 atomic_mb_set(&cpu->exit_request, 0);
1516 qemu_tcg_wait_io_event(cpu);
1519 return NULL;
1522 static void qemu_cpu_kick_thread(CPUState *cpu)
1524 #ifndef _WIN32
1525 int err;
1527 if (cpu->thread_kicked) {
1528 return;
1530 cpu->thread_kicked = true;
1531 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1532 if (err) {
1533 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1534 exit(1);
1536 #else /* _WIN32 */
1537 if (!qemu_cpu_is_self(cpu)) {
1538 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1539 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1540 __func__, GetLastError());
1541 exit(1);
1544 #endif
1547 void qemu_cpu_kick(CPUState *cpu)
1549 qemu_cond_broadcast(cpu->halt_cond);
1550 if (tcg_enabled()) {
1551 cpu_exit(cpu);
1552 /* NOP unless doing single-thread RR */
1553 qemu_cpu_kick_rr_cpu();
1554 } else {
1555 if (hax_enabled()) {
1557 * FIXME: race condition with the exit_request check in
1558 * hax_vcpu_hax_exec
1560 cpu->exit_request = 1;
1562 qemu_cpu_kick_thread(cpu);
1566 void qemu_cpu_kick_self(void)
1568 assert(current_cpu);
1569 qemu_cpu_kick_thread(current_cpu);
1572 bool qemu_cpu_is_self(CPUState *cpu)
1574 return qemu_thread_is_self(cpu->thread);
1577 bool qemu_in_vcpu_thread(void)
1579 return current_cpu && qemu_cpu_is_self(current_cpu);
1582 static __thread bool iothread_locked = false;
1584 bool qemu_mutex_iothread_locked(void)
1586 return iothread_locked;
1589 void qemu_mutex_lock_iothread(void)
1591 g_assert(!qemu_mutex_iothread_locked());
1592 qemu_mutex_lock(&qemu_global_mutex);
1593 iothread_locked = true;
1596 void qemu_mutex_unlock_iothread(void)
1598 g_assert(qemu_mutex_iothread_locked());
1599 iothread_locked = false;
1600 qemu_mutex_unlock(&qemu_global_mutex);
1603 static bool all_vcpus_paused(void)
1605 CPUState *cpu;
1607 CPU_FOREACH(cpu) {
1608 if (!cpu->stopped) {
1609 return false;
1613 return true;
1616 void pause_all_vcpus(void)
1618 CPUState *cpu;
1620 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1621 CPU_FOREACH(cpu) {
1622 if (qemu_cpu_is_self(cpu)) {
1623 qemu_cpu_stop(cpu, true);
1624 } else {
1625 cpu->stop = true;
1626 qemu_cpu_kick(cpu);
1630 while (!all_vcpus_paused()) {
1631 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1632 CPU_FOREACH(cpu) {
1633 qemu_cpu_kick(cpu);
1638 void cpu_resume(CPUState *cpu)
1640 cpu->stop = false;
1641 cpu->stopped = false;
1642 qemu_cpu_kick(cpu);
1645 void resume_all_vcpus(void)
1647 CPUState *cpu;
1649 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1650 CPU_FOREACH(cpu) {
1651 cpu_resume(cpu);
1655 void cpu_remove(CPUState *cpu)
1657 cpu->stop = true;
1658 cpu->unplug = true;
1659 qemu_cpu_kick(cpu);
1662 void cpu_remove_sync(CPUState *cpu)
1664 cpu_remove(cpu);
1665 while (cpu->created) {
1666 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1670 /* For temporary buffers for forming a name */
1671 #define VCPU_THREAD_NAME_SIZE 16
1673 static void qemu_tcg_init_vcpu(CPUState *cpu)
1675 char thread_name[VCPU_THREAD_NAME_SIZE];
1676 static QemuCond *single_tcg_halt_cond;
1677 static QemuThread *single_tcg_cpu_thread;
1678 static int tcg_region_inited;
1681 * Initialize TCG regions--once. Now is a good time, because:
1682 * (1) TCG's init context, prologue and target globals have been set up.
1683 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1684 * -accel flag is processed, so the check doesn't work then).
1686 if (!tcg_region_inited) {
1687 tcg_region_inited = 1;
1688 tcg_region_init();
1691 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1692 cpu->thread = g_malloc0(sizeof(QemuThread));
1693 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1694 qemu_cond_init(cpu->halt_cond);
1696 if (qemu_tcg_mttcg_enabled()) {
1697 /* create a thread per vCPU with TCG (MTTCG) */
1698 parallel_cpus = true;
1699 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1700 cpu->cpu_index);
1702 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1703 cpu, QEMU_THREAD_JOINABLE);
1705 } else {
1706 /* share a single thread for all cpus with TCG */
1707 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1708 qemu_thread_create(cpu->thread, thread_name,
1709 qemu_tcg_rr_cpu_thread_fn,
1710 cpu, QEMU_THREAD_JOINABLE);
1712 single_tcg_halt_cond = cpu->halt_cond;
1713 single_tcg_cpu_thread = cpu->thread;
1715 #ifdef _WIN32
1716 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1717 #endif
1718 while (!cpu->created) {
1719 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1721 } else {
1722 /* For non-MTTCG cases we share the thread */
1723 cpu->thread = single_tcg_cpu_thread;
1724 cpu->halt_cond = single_tcg_halt_cond;
1728 static void qemu_hax_start_vcpu(CPUState *cpu)
1730 char thread_name[VCPU_THREAD_NAME_SIZE];
1732 cpu->thread = g_malloc0(sizeof(QemuThread));
1733 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1734 qemu_cond_init(cpu->halt_cond);
1736 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1737 cpu->cpu_index);
1738 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1739 cpu, QEMU_THREAD_JOINABLE);
1740 #ifdef _WIN32
1741 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1742 #endif
1743 while (!cpu->created) {
1744 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1748 static void qemu_kvm_start_vcpu(CPUState *cpu)
1750 char thread_name[VCPU_THREAD_NAME_SIZE];
1752 cpu->thread = g_malloc0(sizeof(QemuThread));
1753 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1754 qemu_cond_init(cpu->halt_cond);
1755 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1756 cpu->cpu_index);
1757 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1758 cpu, QEMU_THREAD_JOINABLE);
1759 while (!cpu->created) {
1760 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1764 static void qemu_dummy_start_vcpu(CPUState *cpu)
1766 char thread_name[VCPU_THREAD_NAME_SIZE];
1768 cpu->thread = g_malloc0(sizeof(QemuThread));
1769 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1770 qemu_cond_init(cpu->halt_cond);
1771 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1772 cpu->cpu_index);
1773 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1774 QEMU_THREAD_JOINABLE);
1775 while (!cpu->created) {
1776 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1780 void qemu_init_vcpu(CPUState *cpu)
1782 cpu->nr_cores = smp_cores;
1783 cpu->nr_threads = smp_threads;
1784 cpu->stopped = true;
1786 if (!cpu->as) {
1787 /* If the target cpu hasn't set up any address spaces itself,
1788 * give it the default one.
1790 cpu->num_ases = 1;
1791 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1794 if (kvm_enabled()) {
1795 qemu_kvm_start_vcpu(cpu);
1796 } else if (hax_enabled()) {
1797 qemu_hax_start_vcpu(cpu);
1798 } else if (tcg_enabled()) {
1799 qemu_tcg_init_vcpu(cpu);
1800 } else {
1801 qemu_dummy_start_vcpu(cpu);
1805 void cpu_stop_current(void)
1807 if (current_cpu) {
1808 qemu_cpu_stop(current_cpu, true);
1812 int vm_stop(RunState state)
1814 if (qemu_in_vcpu_thread()) {
1815 qemu_system_vmstop_request_prepare();
1816 qemu_system_vmstop_request(state);
1818 * FIXME: should not return to device code in case
1819 * vm_stop() has been requested.
1821 cpu_stop_current();
1822 return 0;
1825 return do_vm_stop(state);
1829 * Prepare for (re)starting the VM.
1830 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1831 * running or in case of an error condition), 0 otherwise.
1833 int vm_prepare_start(void)
1835 RunState requested;
1836 int res = 0;
1838 qemu_vmstop_requested(&requested);
1839 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1840 return -1;
1843 /* Ensure that a STOP/RESUME pair of events is emitted if a
1844 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1845 * example, according to documentation is always followed by
1846 * the STOP event.
1848 if (runstate_is_running()) {
1849 qapi_event_send_stop(&error_abort);
1850 res = -1;
1851 } else {
1852 replay_enable_events();
1853 cpu_enable_ticks();
1854 runstate_set(RUN_STATE_RUNNING);
1855 vm_state_notify(1, RUN_STATE_RUNNING);
1858 /* We are sending this now, but the CPUs will be resumed shortly later */
1859 qapi_event_send_resume(&error_abort);
1860 return res;
1863 void vm_start(void)
1865 if (!vm_prepare_start()) {
1866 resume_all_vcpus();
1870 /* does a state transition even if the VM is already stopped,
1871 current state is forgotten forever */
1872 int vm_stop_force_state(RunState state)
1874 if (runstate_is_running()) {
1875 return vm_stop(state);
1876 } else {
1877 runstate_set(state);
1879 bdrv_drain_all();
1880 /* Make sure to return an error if the flush in a previous vm_stop()
1881 * failed. */
1882 return bdrv_flush_all();
1886 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1888 /* XXX: implement xxx_cpu_list for targets that still miss it */
1889 #if defined(cpu_list)
1890 cpu_list(f, cpu_fprintf);
1891 #endif
1894 CpuInfoList *qmp_query_cpus(Error **errp)
1896 MachineState *ms = MACHINE(qdev_get_machine());
1897 MachineClass *mc = MACHINE_GET_CLASS(ms);
1898 CpuInfoList *head = NULL, *cur_item = NULL;
1899 CPUState *cpu;
1901 CPU_FOREACH(cpu) {
1902 CpuInfoList *info;
1903 #if defined(TARGET_I386)
1904 X86CPU *x86_cpu = X86_CPU(cpu);
1905 CPUX86State *env = &x86_cpu->env;
1906 #elif defined(TARGET_PPC)
1907 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1908 CPUPPCState *env = &ppc_cpu->env;
1909 #elif defined(TARGET_SPARC)
1910 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1911 CPUSPARCState *env = &sparc_cpu->env;
1912 #elif defined(TARGET_MIPS)
1913 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1914 CPUMIPSState *env = &mips_cpu->env;
1915 #elif defined(TARGET_TRICORE)
1916 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1917 CPUTriCoreState *env = &tricore_cpu->env;
1918 #endif
1920 cpu_synchronize_state(cpu);
1922 info = g_malloc0(sizeof(*info));
1923 info->value = g_malloc0(sizeof(*info->value));
1924 info->value->CPU = cpu->cpu_index;
1925 info->value->current = (cpu == first_cpu);
1926 info->value->halted = cpu->halted;
1927 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1928 info->value->thread_id = cpu->thread_id;
1929 #if defined(TARGET_I386)
1930 info->value->arch = CPU_INFO_ARCH_X86;
1931 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1932 #elif defined(TARGET_PPC)
1933 info->value->arch = CPU_INFO_ARCH_PPC;
1934 info->value->u.ppc.nip = env->nip;
1935 #elif defined(TARGET_SPARC)
1936 info->value->arch = CPU_INFO_ARCH_SPARC;
1937 info->value->u.q_sparc.pc = env->pc;
1938 info->value->u.q_sparc.npc = env->npc;
1939 #elif defined(TARGET_MIPS)
1940 info->value->arch = CPU_INFO_ARCH_MIPS;
1941 info->value->u.q_mips.PC = env->active_tc.PC;
1942 #elif defined(TARGET_TRICORE)
1943 info->value->arch = CPU_INFO_ARCH_TRICORE;
1944 info->value->u.tricore.PC = env->PC;
1945 #else
1946 info->value->arch = CPU_INFO_ARCH_OTHER;
1947 #endif
1948 info->value->has_props = !!mc->cpu_index_to_instance_props;
1949 if (info->value->has_props) {
1950 CpuInstanceProperties *props;
1951 props = g_malloc0(sizeof(*props));
1952 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1953 info->value->props = props;
1956 /* XXX: waiting for the qapi to support GSList */
1957 if (!cur_item) {
1958 head = cur_item = info;
1959 } else {
1960 cur_item->next = info;
1961 cur_item = info;
1965 return head;
1968 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1969 bool has_cpu, int64_t cpu_index, Error **errp)
1971 FILE *f;
1972 uint32_t l;
1973 CPUState *cpu;
1974 uint8_t buf[1024];
1975 int64_t orig_addr = addr, orig_size = size;
1977 if (!has_cpu) {
1978 cpu_index = 0;
1981 cpu = qemu_get_cpu(cpu_index);
1982 if (cpu == NULL) {
1983 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1984 "a CPU number");
1985 return;
1988 f = fopen(filename, "wb");
1989 if (!f) {
1990 error_setg_file_open(errp, errno, filename);
1991 return;
1994 while (size != 0) {
1995 l = sizeof(buf);
1996 if (l > size)
1997 l = size;
1998 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1999 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2000 " specified", orig_addr, orig_size);
2001 goto exit;
2003 if (fwrite(buf, 1, l, f) != l) {
2004 error_setg(errp, QERR_IO_ERROR);
2005 goto exit;
2007 addr += l;
2008 size -= l;
2011 exit:
2012 fclose(f);
2015 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2016 Error **errp)
2018 FILE *f;
2019 uint32_t l;
2020 uint8_t buf[1024];
2022 f = fopen(filename, "wb");
2023 if (!f) {
2024 error_setg_file_open(errp, errno, filename);
2025 return;
2028 while (size != 0) {
2029 l = sizeof(buf);
2030 if (l > size)
2031 l = size;
2032 cpu_physical_memory_read(addr, buf, l);
2033 if (fwrite(buf, 1, l, f) != l) {
2034 error_setg(errp, QERR_IO_ERROR);
2035 goto exit;
2037 addr += l;
2038 size -= l;
2041 exit:
2042 fclose(f);
2045 void qmp_inject_nmi(Error **errp)
2047 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2050 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2052 if (!use_icount) {
2053 return;
2056 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2057 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2058 if (icount_align_option) {
2059 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2060 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2061 } else {
2062 cpu_fprintf(f, "Max guest delay NA\n");
2063 cpu_fprintf(f, "Max guest advance NA\n");