4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/hw_accel.h"
37 #include "sysemu/kvm.h"
38 #include "sysemu/hax.h"
39 #include "sysemu/hvf.h"
40 #include "sysemu/whpx.h"
41 #include "qmp-commands.h"
42 #include "exec/exec-all.h"
44 #include "qemu/thread.h"
45 #include "sysemu/cpus.h"
46 #include "sysemu/qtest.h"
47 #include "qemu/main-loop.h"
48 #include "qemu/option.h"
49 #include "qemu/bitmap.h"
50 #include "qemu/seqlock.h"
52 #include "qapi-event.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
59 #include <sys/prctl.h>
62 #define PR_MCE_KILL 33
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
73 #endif /* CONFIG_LINUX */
78 /* vcpu throttling controls */
79 static QEMUTimer
*throttle_timer
;
80 static unsigned int throttle_percentage
;
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
86 bool cpu_is_stopped(CPUState
*cpu
)
88 return cpu
->stopped
|| !runstate_is_running();
91 static bool cpu_thread_is_idle(CPUState
*cpu
)
93 if (cpu
->stop
|| cpu
->queued_work_first
) {
96 if (cpu_is_stopped(cpu
)) {
99 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
100 kvm_halt_in_kernel()) {
106 static bool all_cpu_threads_idle(void)
111 if (!cpu_thread_is_idle(cpu
)) {
118 /***********************************************************/
119 /* guest cycle counter */
121 /* Protected by TimersState seqlock */
123 static bool icount_sleep
= true;
124 /* Conversion factor from emulated instructions to virtual clock ticks. */
125 static int icount_time_shift
;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127 #define MAX_ICOUNT_SHIFT 10
129 typedef struct TimersState
{
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev
;
132 int64_t cpu_ticks_offset
;
134 /* cpu_clock_offset can be read out of BQL, so protect it with
137 QemuSeqLock vm_clock_seqlock
;
138 int64_t cpu_clock_offset
;
139 int32_t cpu_ticks_enabled
;
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias
;
144 /* Only written by TCG thread */
146 /* for adjusting icount */
147 int64_t vm_clock_warp_start
;
148 QEMUTimer
*icount_rt_timer
;
149 QEMUTimer
*icount_vm_timer
;
150 QEMUTimer
*icount_warp_timer
;
153 static TimersState timers_state
;
157 * We default to false if we know other options have been enabled
158 * which are currently incompatible with MTTCG. Otherwise when each
159 * guest (target) has been updated to support:
160 * - atomic instructions
161 * - memory ordering primitives (barriers)
162 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
164 * Once a guest architecture has been converted to the new primitives
165 * there are two remaining limitations to check.
167 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
168 * - The host must have a stronger memory order than the guest
170 * It may be possible in future to support strong guests on weak hosts
171 * but that will require tagging all load/stores in a guest with their
172 * implicit memory order requirements which would likely slow things
176 static bool check_tcg_memory_orders_compatible(void)
178 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
179 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
185 static bool default_mttcg_enabled(void)
187 if (use_icount
|| TCG_OVERSIZED_GUEST
) {
190 #ifdef TARGET_SUPPORTS_MTTCG
191 return check_tcg_memory_orders_compatible();
198 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
200 const char *t
= qemu_opt_get(opts
, "thread");
202 if (strcmp(t
, "multi") == 0) {
203 if (TCG_OVERSIZED_GUEST
) {
204 error_setg(errp
, "No MTTCG when guest word size > hosts");
205 } else if (use_icount
) {
206 error_setg(errp
, "No MTTCG when icount is enabled");
208 #ifndef TARGET_SUPPORTS_MTTCG
209 error_report("Guest not yet converted to MTTCG - "
210 "you may get unexpected results");
212 if (!check_tcg_memory_orders_compatible()) {
213 error_report("Guest expects a stronger memory ordering "
214 "than the host provides");
215 error_printf("This may cause strange/hard to debug errors\n");
217 mttcg_enabled
= true;
219 } else if (strcmp(t
, "single") == 0) {
220 mttcg_enabled
= false;
222 error_setg(errp
, "Invalid 'thread' setting %s", t
);
225 mttcg_enabled
= default_mttcg_enabled();
229 /* The current number of executed instructions is based on what we
230 * originally budgeted minus the current state of the decrementing
231 * icount counters in extra/u16.low.
233 static int64_t cpu_get_icount_executed(CPUState
*cpu
)
235 return cpu
->icount_budget
- (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
239 * Update the global shared timer_state.qemu_icount to take into
240 * account executed instructions. This is done by the TCG vCPU
241 * thread so the main-loop can see time has moved forward.
243 void cpu_update_icount(CPUState
*cpu
)
245 int64_t executed
= cpu_get_icount_executed(cpu
);
246 cpu
->icount_budget
-= executed
;
248 #ifdef CONFIG_ATOMIC64
249 atomic_set__nocheck(&timers_state
.qemu_icount
,
250 atomic_read__nocheck(&timers_state
.qemu_icount
) +
252 #else /* FIXME: we need 64bit atomics to do this safely */
253 timers_state
.qemu_icount
+= executed
;
257 int64_t cpu_get_icount_raw(void)
259 CPUState
*cpu
= current_cpu
;
261 if (cpu
&& cpu
->running
) {
262 if (!cpu
->can_do_io
) {
263 error_report("Bad icount read");
266 /* Take into account what has run */
267 cpu_update_icount(cpu
);
269 #ifdef CONFIG_ATOMIC64
270 return atomic_read__nocheck(&timers_state
.qemu_icount
);
271 #else /* FIXME: we need 64bit atomics to do this safely */
272 return timers_state
.qemu_icount
;
276 /* Return the virtual CPU time, based on the instruction counter. */
277 static int64_t cpu_get_icount_locked(void)
279 int64_t icount
= cpu_get_icount_raw();
280 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
283 int64_t cpu_get_icount(void)
289 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
290 icount
= cpu_get_icount_locked();
291 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
296 int64_t cpu_icount_to_ns(int64_t icount
)
298 return icount
<< icount_time_shift
;
301 /* return the time elapsed in VM between vm_start and vm_stop. Unless
302 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
305 * Caller must hold the BQL
307 int64_t cpu_get_ticks(void)
312 return cpu_get_icount();
315 ticks
= timers_state
.cpu_ticks_offset
;
316 if (timers_state
.cpu_ticks_enabled
) {
317 ticks
+= cpu_get_host_ticks();
320 if (timers_state
.cpu_ticks_prev
> ticks
) {
321 /* Note: non increasing ticks may happen if the host uses
323 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
324 ticks
= timers_state
.cpu_ticks_prev
;
327 timers_state
.cpu_ticks_prev
= ticks
;
331 static int64_t cpu_get_clock_locked(void)
335 time
= timers_state
.cpu_clock_offset
;
336 if (timers_state
.cpu_ticks_enabled
) {
343 /* Return the monotonic time elapsed in VM, i.e.,
344 * the time between vm_start and vm_stop
346 int64_t cpu_get_clock(void)
352 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
353 ti
= cpu_get_clock_locked();
354 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
359 /* enable cpu_get_ticks()
360 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
362 void cpu_enable_ticks(void)
364 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
365 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
366 if (!timers_state
.cpu_ticks_enabled
) {
367 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
368 timers_state
.cpu_clock_offset
-= get_clock();
369 timers_state
.cpu_ticks_enabled
= 1;
371 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
374 /* disable cpu_get_ticks() : the clock is stopped. You must not call
375 * cpu_get_ticks() after that.
376 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
378 void cpu_disable_ticks(void)
380 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
381 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
382 if (timers_state
.cpu_ticks_enabled
) {
383 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
384 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
385 timers_state
.cpu_ticks_enabled
= 0;
387 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
390 /* Correlation between real and virtual time is always going to be
391 fairly approximate, so ignore small variation.
392 When the guest is idle real and virtual time will be aligned in
394 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
396 static void icount_adjust(void)
402 /* Protected by TimersState mutex. */
403 static int64_t last_delta
;
405 /* If the VM is not running, then do nothing. */
406 if (!runstate_is_running()) {
410 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
411 cur_time
= cpu_get_clock_locked();
412 cur_icount
= cpu_get_icount_locked();
414 delta
= cur_icount
- cur_time
;
415 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
417 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
418 && icount_time_shift
> 0) {
419 /* The guest is getting too far ahead. Slow time down. */
423 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
424 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
425 /* The guest is getting too far behind. Speed time up. */
429 timers_state
.qemu_icount_bias
= cur_icount
430 - (timers_state
.qemu_icount
<< icount_time_shift
);
431 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
434 static void icount_adjust_rt(void *opaque
)
436 timer_mod(timers_state
.icount_rt_timer
,
437 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
441 static void icount_adjust_vm(void *opaque
)
443 timer_mod(timers_state
.icount_vm_timer
,
444 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
445 NANOSECONDS_PER_SECOND
/ 10);
449 static int64_t qemu_icount_round(int64_t count
)
451 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
454 static void icount_warp_rt(void)
459 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
460 * changes from -1 to another value, so the race here is okay.
463 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
464 warp_start
= timers_state
.vm_clock_warp_start
;
465 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
467 if (warp_start
== -1) {
471 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
472 if (runstate_is_running()) {
473 int64_t clock
= REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT
,
474 cpu_get_clock_locked());
477 warp_delta
= clock
- timers_state
.vm_clock_warp_start
;
478 if (use_icount
== 2) {
480 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
481 * far ahead of real time.
483 int64_t cur_icount
= cpu_get_icount_locked();
484 int64_t delta
= clock
- cur_icount
;
485 warp_delta
= MIN(warp_delta
, delta
);
487 timers_state
.qemu_icount_bias
+= warp_delta
;
489 timers_state
.vm_clock_warp_start
= -1;
490 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
492 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
493 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
497 static void icount_timer_cb(void *opaque
)
499 /* No need for a checkpoint because the timer already synchronizes
500 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
505 void qtest_clock_warp(int64_t dest
)
507 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
508 AioContext
*aio_context
;
509 assert(qtest_enabled());
510 aio_context
= qemu_get_aio_context();
511 while (clock
< dest
) {
512 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
513 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
515 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
516 timers_state
.qemu_icount_bias
+= warp
;
517 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
519 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
520 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
521 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
523 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
526 void qemu_start_warp_timer(void)
535 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
536 * do not fire, so computing the deadline does not make sense.
538 if (!runstate_is_running()) {
542 /* warp clock deterministically in record/replay mode */
543 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
547 if (!all_cpu_threads_idle()) {
551 if (qtest_enabled()) {
552 /* When testing, qtest commands advance icount. */
556 /* We want to use the earliest deadline from ALL vm_clocks */
557 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
558 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
560 static bool notified
;
561 if (!icount_sleep
&& !notified
) {
562 warn_report("icount sleep disabled and no active timers");
570 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
571 * sleep. Otherwise, the CPU might be waiting for a future timer
572 * interrupt to wake it up, but the interrupt never comes because
573 * the vCPU isn't running any insns and thus doesn't advance the
574 * QEMU_CLOCK_VIRTUAL.
578 * We never let VCPUs sleep in no sleep icount mode.
579 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
580 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
581 * It is useful when we want a deterministic execution time,
582 * isolated from host latencies.
584 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
585 timers_state
.qemu_icount_bias
+= deadline
;
586 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
587 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
590 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
591 * "real" time, (related to the time left until the next event) has
592 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
593 * This avoids that the warps are visible externally; for example,
594 * you will not be sending network packets continuously instead of
597 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
598 if (timers_state
.vm_clock_warp_start
== -1
599 || timers_state
.vm_clock_warp_start
> clock
) {
600 timers_state
.vm_clock_warp_start
= clock
;
602 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
603 timer_mod_anticipate(timers_state
.icount_warp_timer
,
606 } else if (deadline
== 0) {
607 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
611 static void qemu_account_warp_timer(void)
613 if (!use_icount
|| !icount_sleep
) {
617 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
618 * do not fire, so computing the deadline does not make sense.
620 if (!runstate_is_running()) {
624 /* warp clock deterministically in record/replay mode */
625 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
629 timer_del(timers_state
.icount_warp_timer
);
633 static bool icount_state_needed(void *opaque
)
638 static bool warp_timer_state_needed(void *opaque
)
640 TimersState
*s
= opaque
;
641 return s
->icount_warp_timer
!= NULL
;
644 static bool adjust_timers_state_needed(void *opaque
)
646 TimersState
*s
= opaque
;
647 return s
->icount_rt_timer
!= NULL
;
651 * Subsection for warp timer migration is optional, because may not be created
653 static const VMStateDescription icount_vmstate_warp_timer
= {
654 .name
= "timer/icount/warp_timer",
656 .minimum_version_id
= 1,
657 .needed
= warp_timer_state_needed
,
658 .fields
= (VMStateField
[]) {
659 VMSTATE_INT64(vm_clock_warp_start
, TimersState
),
660 VMSTATE_TIMER_PTR(icount_warp_timer
, TimersState
),
661 VMSTATE_END_OF_LIST()
665 static const VMStateDescription icount_vmstate_adjust_timers
= {
666 .name
= "timer/icount/timers",
668 .minimum_version_id
= 1,
669 .needed
= adjust_timers_state_needed
,
670 .fields
= (VMStateField
[]) {
671 VMSTATE_TIMER_PTR(icount_rt_timer
, TimersState
),
672 VMSTATE_TIMER_PTR(icount_vm_timer
, TimersState
),
673 VMSTATE_END_OF_LIST()
678 * This is a subsection for icount migration.
680 static const VMStateDescription icount_vmstate_timers
= {
681 .name
= "timer/icount",
683 .minimum_version_id
= 1,
684 .needed
= icount_state_needed
,
685 .fields
= (VMStateField
[]) {
686 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
687 VMSTATE_INT64(qemu_icount
, TimersState
),
688 VMSTATE_END_OF_LIST()
690 .subsections
= (const VMStateDescription
*[]) {
691 &icount_vmstate_warp_timer
,
692 &icount_vmstate_adjust_timers
,
697 static const VMStateDescription vmstate_timers
= {
700 .minimum_version_id
= 1,
701 .fields
= (VMStateField
[]) {
702 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
703 VMSTATE_INT64(dummy
, TimersState
),
704 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
705 VMSTATE_END_OF_LIST()
707 .subsections
= (const VMStateDescription
*[]) {
708 &icount_vmstate_timers
,
713 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
716 double throttle_ratio
;
719 if (!cpu_throttle_get_percentage()) {
723 pct
= (double)cpu_throttle_get_percentage()/100;
724 throttle_ratio
= pct
/ (1 - pct
);
725 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
727 qemu_mutex_unlock_iothread();
728 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
729 qemu_mutex_lock_iothread();
730 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
733 static void cpu_throttle_timer_tick(void *opaque
)
738 /* Stop the timer if needed */
739 if (!cpu_throttle_get_percentage()) {
743 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
744 async_run_on_cpu(cpu
, cpu_throttle_thread
,
749 pct
= (double)cpu_throttle_get_percentage()/100;
750 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
751 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
754 void cpu_throttle_set(int new_throttle_pct
)
756 /* Ensure throttle percentage is within valid range */
757 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
758 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
760 atomic_set(&throttle_percentage
, new_throttle_pct
);
762 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
763 CPU_THROTTLE_TIMESLICE_NS
);
766 void cpu_throttle_stop(void)
768 atomic_set(&throttle_percentage
, 0);
771 bool cpu_throttle_active(void)
773 return (cpu_throttle_get_percentage() != 0);
776 int cpu_throttle_get_percentage(void)
778 return atomic_read(&throttle_percentage
);
781 void cpu_ticks_init(void)
783 seqlock_init(&timers_state
.vm_clock_seqlock
);
784 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
785 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
786 cpu_throttle_timer_tick
, NULL
);
789 void configure_icount(QemuOpts
*opts
, Error
**errp
)
792 char *rem_str
= NULL
;
794 option
= qemu_opt_get(opts
, "shift");
796 if (qemu_opt_get(opts
, "align") != NULL
) {
797 error_setg(errp
, "Please specify shift option when using align");
802 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
804 timers_state
.icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
805 icount_timer_cb
, NULL
);
808 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
810 if (icount_align_option
&& !icount_sleep
) {
811 error_setg(errp
, "align=on and sleep=off are incompatible");
813 if (strcmp(option
, "auto") != 0) {
815 icount_time_shift
= strtol(option
, &rem_str
, 0);
816 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
817 error_setg(errp
, "icount: Invalid shift value");
821 } else if (icount_align_option
) {
822 error_setg(errp
, "shift=auto and align=on are incompatible");
823 } else if (!icount_sleep
) {
824 error_setg(errp
, "shift=auto and sleep=off are incompatible");
829 /* 125MIPS seems a reasonable initial guess at the guest speed.
830 It will be corrected fairly quickly anyway. */
831 icount_time_shift
= 3;
833 /* Have both realtime and virtual time triggers for speed adjustment.
834 The realtime trigger catches emulated time passing too slowly,
835 the virtual time trigger catches emulated time passing too fast.
836 Realtime triggers occur even when idle, so use them less frequently
838 timers_state
.vm_clock_warp_start
= -1;
839 timers_state
.icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
840 icount_adjust_rt
, NULL
);
841 timer_mod(timers_state
.icount_rt_timer
,
842 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
843 timers_state
.icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
844 icount_adjust_vm
, NULL
);
845 timer_mod(timers_state
.icount_vm_timer
,
846 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
847 NANOSECONDS_PER_SECOND
/ 10);
850 /***********************************************************/
851 /* TCG vCPU kick timer
853 * The kick timer is responsible for moving single threaded vCPU
854 * emulation on to the next vCPU. If more than one vCPU is running a
855 * timer event with force a cpu->exit so the next vCPU can get
858 * The timer is removed if all vCPUs are idle and restarted again once
859 * idleness is complete.
862 static QEMUTimer
*tcg_kick_vcpu_timer
;
863 static CPUState
*tcg_current_rr_cpu
;
865 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
867 static inline int64_t qemu_tcg_next_kick(void)
869 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
872 /* Kick the currently round-robin scheduled vCPU */
873 static void qemu_cpu_kick_rr_cpu(void)
877 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
881 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
884 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
888 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
890 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
895 if (!qemu_in_vcpu_thread() && first_cpu
) {
896 /* qemu_cpu_kick is not enough to kick a halted CPU out of
897 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
898 * causes cpu_thread_is_idle to return false. This way,
899 * handle_icount_deadline can run.
901 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
905 static void kick_tcg_thread(void *opaque
)
907 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
908 qemu_cpu_kick_rr_cpu();
911 static void start_tcg_kick_timer(void)
913 assert(!mttcg_enabled
);
914 if (!tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
915 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
916 kick_tcg_thread
, NULL
);
917 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
921 static void stop_tcg_kick_timer(void)
923 assert(!mttcg_enabled
);
924 if (tcg_kick_vcpu_timer
) {
925 timer_del(tcg_kick_vcpu_timer
);
926 tcg_kick_vcpu_timer
= NULL
;
930 /***********************************************************/
931 void hw_error(const char *fmt
, ...)
937 fprintf(stderr
, "qemu: hardware error: ");
938 vfprintf(stderr
, fmt
, ap
);
939 fprintf(stderr
, "\n");
941 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
942 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
948 void cpu_synchronize_all_states(void)
953 cpu_synchronize_state(cpu
);
954 /* TODO: move to cpu_synchronize_state() */
956 hvf_cpu_synchronize_state(cpu
);
961 void cpu_synchronize_all_post_reset(void)
966 cpu_synchronize_post_reset(cpu
);
967 /* TODO: move to cpu_synchronize_post_reset() */
969 hvf_cpu_synchronize_post_reset(cpu
);
974 void cpu_synchronize_all_post_init(void)
979 cpu_synchronize_post_init(cpu
);
980 /* TODO: move to cpu_synchronize_post_init() */
982 hvf_cpu_synchronize_post_init(cpu
);
987 void cpu_synchronize_all_pre_loadvm(void)
992 cpu_synchronize_pre_loadvm(cpu
);
996 static int do_vm_stop(RunState state
)
1000 if (runstate_is_running()) {
1001 cpu_disable_ticks();
1003 runstate_set(state
);
1004 vm_state_notify(0, state
);
1005 qapi_event_send_stop(&error_abort
);
1009 replay_disable_events();
1010 ret
= bdrv_flush_all();
1015 static bool cpu_can_run(CPUState
*cpu
)
1020 if (cpu_is_stopped(cpu
)) {
1026 static void cpu_handle_guest_debug(CPUState
*cpu
)
1028 gdb_set_stop_cpu(cpu
);
1029 qemu_system_debug_request();
1030 cpu
->stopped
= true;
1034 static void sigbus_reraise(void)
1037 struct sigaction action
;
1039 memset(&action
, 0, sizeof(action
));
1040 action
.sa_handler
= SIG_DFL
;
1041 if (!sigaction(SIGBUS
, &action
, NULL
)) {
1044 sigaddset(&set
, SIGBUS
);
1045 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
1047 perror("Failed to re-raise SIGBUS!\n");
1051 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
1053 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
1058 /* Called asynchronously in VCPU thread. */
1059 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
1063 /* Called synchronously (via signalfd) in main thread. */
1064 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
1070 static void qemu_init_sigbus(void)
1072 struct sigaction action
;
1074 memset(&action
, 0, sizeof(action
));
1075 action
.sa_flags
= SA_SIGINFO
;
1076 action
.sa_sigaction
= sigbus_handler
;
1077 sigaction(SIGBUS
, &action
, NULL
);
1079 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
1081 #else /* !CONFIG_LINUX */
1082 static void qemu_init_sigbus(void)
1085 #endif /* !CONFIG_LINUX */
1087 static QemuMutex qemu_global_mutex
;
1089 static QemuThread io_thread
;
1092 static QemuCond qemu_cpu_cond
;
1094 static QemuCond qemu_pause_cond
;
1096 void qemu_init_cpu_loop(void)
1099 qemu_cond_init(&qemu_cpu_cond
);
1100 qemu_cond_init(&qemu_pause_cond
);
1101 qemu_mutex_init(&qemu_global_mutex
);
1103 qemu_thread_get_self(&io_thread
);
1106 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1108 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1111 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1113 if (kvm_destroy_vcpu(cpu
) < 0) {
1114 error_report("kvm_destroy_vcpu failed");
1119 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1123 static void qemu_cpu_stop(CPUState
*cpu
, bool exit
)
1125 g_assert(qemu_cpu_is_self(cpu
));
1127 cpu
->stopped
= true;
1131 qemu_cond_broadcast(&qemu_pause_cond
);
1134 static void qemu_wait_io_event_common(CPUState
*cpu
)
1136 atomic_mb_set(&cpu
->thread_kicked
, false);
1138 qemu_cpu_stop(cpu
, false);
1140 process_queued_cpu_work(cpu
);
1143 static void qemu_tcg_rr_wait_io_event(CPUState
*cpu
)
1145 while (all_cpu_threads_idle()) {
1146 stop_tcg_kick_timer();
1147 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1150 start_tcg_kick_timer();
1152 qemu_wait_io_event_common(cpu
);
1155 static void qemu_wait_io_event(CPUState
*cpu
)
1157 while (cpu_thread_is_idle(cpu
)) {
1158 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1162 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1163 if (!tcg_enabled()) {
1167 qemu_wait_io_event_common(cpu
);
1170 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1172 CPUState
*cpu
= arg
;
1175 rcu_register_thread();
1177 qemu_mutex_lock_iothread();
1178 qemu_thread_get_self(cpu
->thread
);
1179 cpu
->thread_id
= qemu_get_thread_id();
1183 r
= kvm_init_vcpu(cpu
);
1185 error_report("kvm_init_vcpu failed: %s", strerror(-r
));
1189 kvm_init_cpu_signals(cpu
);
1191 /* signal CPU creation */
1192 cpu
->created
= true;
1193 qemu_cond_signal(&qemu_cpu_cond
);
1196 if (cpu_can_run(cpu
)) {
1197 r
= kvm_cpu_exec(cpu
);
1198 if (r
== EXCP_DEBUG
) {
1199 cpu_handle_guest_debug(cpu
);
1202 qemu_wait_io_event(cpu
);
1203 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1205 qemu_kvm_destroy_vcpu(cpu
);
1206 cpu
->created
= false;
1207 qemu_cond_signal(&qemu_cpu_cond
);
1208 qemu_mutex_unlock_iothread();
1209 rcu_unregister_thread();
1213 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1216 error_report("qtest is not supported under Windows");
1219 CPUState
*cpu
= arg
;
1223 rcu_register_thread();
1225 qemu_mutex_lock_iothread();
1226 qemu_thread_get_self(cpu
->thread
);
1227 cpu
->thread_id
= qemu_get_thread_id();
1231 sigemptyset(&waitset
);
1232 sigaddset(&waitset
, SIG_IPI
);
1234 /* signal CPU creation */
1235 cpu
->created
= true;
1236 qemu_cond_signal(&qemu_cpu_cond
);
1239 qemu_mutex_unlock_iothread();
1242 r
= sigwait(&waitset
, &sig
);
1243 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1248 qemu_mutex_lock_iothread();
1249 qemu_wait_io_event(cpu
);
1250 } while (!cpu
->unplug
);
1252 rcu_unregister_thread();
1257 static int64_t tcg_get_icount_limit(void)
1261 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1262 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1264 /* Maintain prior (possibly buggy) behaviour where if no deadline
1265 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1266 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1269 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1270 deadline
= INT32_MAX
;
1273 return qemu_icount_round(deadline
);
1275 return replay_get_instructions();
1279 static void handle_icount_deadline(void)
1281 assert(qemu_in_vcpu_thread());
1284 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1286 if (deadline
== 0) {
1287 /* Wake up other AioContexts. */
1288 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1289 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1294 static void prepare_icount_for_run(CPUState
*cpu
)
1299 /* These should always be cleared by process_icount_data after
1300 * each vCPU execution. However u16.high can be raised
1301 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1303 g_assert(cpu
->icount_decr
.u16
.low
== 0);
1304 g_assert(cpu
->icount_extra
== 0);
1306 cpu
->icount_budget
= tcg_get_icount_limit();
1307 insns_left
= MIN(0xffff, cpu
->icount_budget
);
1308 cpu
->icount_decr
.u16
.low
= insns_left
;
1309 cpu
->icount_extra
= cpu
->icount_budget
- insns_left
;
1313 static void process_icount_data(CPUState
*cpu
)
1316 /* Account for executed instructions */
1317 cpu_update_icount(cpu
);
1319 /* Reset the counters */
1320 cpu
->icount_decr
.u16
.low
= 0;
1321 cpu
->icount_extra
= 0;
1322 cpu
->icount_budget
= 0;
1324 replay_account_executed_instructions();
1329 static int tcg_cpu_exec(CPUState
*cpu
)
1332 #ifdef CONFIG_PROFILER
1336 #ifdef CONFIG_PROFILER
1337 ti
= profile_getclock();
1339 qemu_mutex_unlock_iothread();
1340 cpu_exec_start(cpu
);
1341 ret
= cpu_exec(cpu
);
1343 qemu_mutex_lock_iothread();
1344 #ifdef CONFIG_PROFILER
1345 tcg_time
+= profile_getclock() - ti
;
1350 /* Destroy any remaining vCPUs which have been unplugged and have
1353 static void deal_with_unplugged_cpus(void)
1358 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1359 qemu_tcg_destroy_vcpu(cpu
);
1360 cpu
->created
= false;
1361 qemu_cond_signal(&qemu_cpu_cond
);
1367 /* Single-threaded TCG
1369 * In the single-threaded case each vCPU is simulated in turn. If
1370 * there is more than a single vCPU we create a simple timer to kick
1371 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1372 * This is done explicitly rather than relying on side-effects
1376 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1378 CPUState
*cpu
= arg
;
1380 rcu_register_thread();
1381 tcg_register_thread();
1383 qemu_mutex_lock_iothread();
1384 qemu_thread_get_self(cpu
->thread
);
1387 cpu
->thread_id
= qemu_get_thread_id();
1388 cpu
->created
= true;
1391 qemu_cond_signal(&qemu_cpu_cond
);
1393 /* wait for initial kick-off after machine start */
1394 while (first_cpu
->stopped
) {
1395 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1397 /* process any pending work */
1400 qemu_wait_io_event_common(cpu
);
1404 start_tcg_kick_timer();
1408 /* process any pending work */
1409 cpu
->exit_request
= 1;
1412 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1413 qemu_account_warp_timer();
1415 /* Run the timers here. This is much more efficient than
1416 * waking up the I/O thread and waiting for completion.
1418 handle_icount_deadline();
1424 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1426 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1429 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1430 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1432 if (cpu_can_run(cpu
)) {
1435 prepare_icount_for_run(cpu
);
1437 r
= tcg_cpu_exec(cpu
);
1439 process_icount_data(cpu
);
1441 if (r
== EXCP_DEBUG
) {
1442 cpu_handle_guest_debug(cpu
);
1444 } else if (r
== EXCP_ATOMIC
) {
1445 qemu_mutex_unlock_iothread();
1446 cpu_exec_step_atomic(cpu
);
1447 qemu_mutex_lock_iothread();
1450 } else if (cpu
->stop
) {
1452 cpu
= CPU_NEXT(cpu
);
1457 cpu
= CPU_NEXT(cpu
);
1458 } /* while (cpu && !cpu->exit_request).. */
1460 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1461 atomic_set(&tcg_current_rr_cpu
, NULL
);
1463 if (cpu
&& cpu
->exit_request
) {
1464 atomic_mb_set(&cpu
->exit_request
, 0);
1467 qemu_tcg_rr_wait_io_event(cpu
? cpu
: QTAILQ_FIRST(&cpus
));
1468 deal_with_unplugged_cpus();
1471 rcu_unregister_thread();
1475 static void *qemu_hax_cpu_thread_fn(void *arg
)
1477 CPUState
*cpu
= arg
;
1480 rcu_register_thread();
1481 qemu_mutex_lock_iothread();
1482 qemu_thread_get_self(cpu
->thread
);
1484 cpu
->thread_id
= qemu_get_thread_id();
1485 cpu
->created
= true;
1490 qemu_cond_signal(&qemu_cpu_cond
);
1493 if (cpu_can_run(cpu
)) {
1494 r
= hax_smp_cpu_exec(cpu
);
1495 if (r
== EXCP_DEBUG
) {
1496 cpu_handle_guest_debug(cpu
);
1500 qemu_wait_io_event(cpu
);
1501 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1502 rcu_unregister_thread();
1506 /* The HVF-specific vCPU thread function. This one should only run when the host
1507 * CPU supports the VMX "unrestricted guest" feature. */
1508 static void *qemu_hvf_cpu_thread_fn(void *arg
)
1510 CPUState
*cpu
= arg
;
1514 assert(hvf_enabled());
1516 rcu_register_thread();
1518 qemu_mutex_lock_iothread();
1519 qemu_thread_get_self(cpu
->thread
);
1521 cpu
->thread_id
= qemu_get_thread_id();
1527 /* signal CPU creation */
1528 cpu
->created
= true;
1529 qemu_cond_signal(&qemu_cpu_cond
);
1532 if (cpu_can_run(cpu
)) {
1533 r
= hvf_vcpu_exec(cpu
);
1534 if (r
== EXCP_DEBUG
) {
1535 cpu_handle_guest_debug(cpu
);
1538 qemu_wait_io_event(cpu
);
1539 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1541 hvf_vcpu_destroy(cpu
);
1542 cpu
->created
= false;
1543 qemu_cond_signal(&qemu_cpu_cond
);
1544 qemu_mutex_unlock_iothread();
1545 rcu_unregister_thread();
1549 static void *qemu_whpx_cpu_thread_fn(void *arg
)
1551 CPUState
*cpu
= arg
;
1554 rcu_register_thread();
1556 qemu_mutex_lock_iothread();
1557 qemu_thread_get_self(cpu
->thread
);
1558 cpu
->thread_id
= qemu_get_thread_id();
1561 r
= whpx_init_vcpu(cpu
);
1563 fprintf(stderr
, "whpx_init_vcpu failed: %s\n", strerror(-r
));
1567 /* signal CPU creation */
1568 cpu
->created
= true;
1569 qemu_cond_signal(&qemu_cpu_cond
);
1572 if (cpu_can_run(cpu
)) {
1573 r
= whpx_vcpu_exec(cpu
);
1574 if (r
== EXCP_DEBUG
) {
1575 cpu_handle_guest_debug(cpu
);
1578 while (cpu_thread_is_idle(cpu
)) {
1579 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1581 qemu_wait_io_event_common(cpu
);
1582 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1584 whpx_destroy_vcpu(cpu
);
1585 cpu
->created
= false;
1586 qemu_cond_signal(&qemu_cpu_cond
);
1587 qemu_mutex_unlock_iothread();
1588 rcu_unregister_thread();
1593 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1598 /* Multi-threaded TCG
1600 * In the multi-threaded case each vCPU has its own thread. The TLS
1601 * variable current_cpu can be used deep in the code to find the
1602 * current CPUState for a given thread.
1605 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1607 CPUState
*cpu
= arg
;
1609 g_assert(!use_icount
);
1611 rcu_register_thread();
1612 tcg_register_thread();
1614 qemu_mutex_lock_iothread();
1615 qemu_thread_get_self(cpu
->thread
);
1617 cpu
->thread_id
= qemu_get_thread_id();
1618 cpu
->created
= true;
1621 qemu_cond_signal(&qemu_cpu_cond
);
1623 /* process any pending work */
1624 cpu
->exit_request
= 1;
1627 if (cpu_can_run(cpu
)) {
1629 r
= tcg_cpu_exec(cpu
);
1632 cpu_handle_guest_debug(cpu
);
1635 /* during start-up the vCPU is reset and the thread is
1636 * kicked several times. If we don't ensure we go back
1637 * to sleep in the halted state we won't cleanly
1638 * start-up when the vCPU is enabled.
1640 * cpu->halted should ensure we sleep in wait_io_event
1642 g_assert(cpu
->halted
);
1645 qemu_mutex_unlock_iothread();
1646 cpu_exec_step_atomic(cpu
);
1647 qemu_mutex_lock_iothread();
1649 /* Ignore everything else? */
1654 atomic_mb_set(&cpu
->exit_request
, 0);
1655 qemu_wait_io_event(cpu
);
1656 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1658 qemu_tcg_destroy_vcpu(cpu
);
1659 cpu
->created
= false;
1660 qemu_cond_signal(&qemu_cpu_cond
);
1661 qemu_mutex_unlock_iothread();
1662 rcu_unregister_thread();
1666 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1671 if (cpu
->thread_kicked
) {
1674 cpu
->thread_kicked
= true;
1675 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1677 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1681 if (!qemu_cpu_is_self(cpu
)) {
1682 if (whpx_enabled()) {
1683 whpx_vcpu_kick(cpu
);
1684 } else if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1685 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1686 __func__
, GetLastError());
1693 void qemu_cpu_kick(CPUState
*cpu
)
1695 qemu_cond_broadcast(cpu
->halt_cond
);
1696 if (tcg_enabled()) {
1698 /* NOP unless doing single-thread RR */
1699 qemu_cpu_kick_rr_cpu();
1701 if (hax_enabled()) {
1703 * FIXME: race condition with the exit_request check in
1706 cpu
->exit_request
= 1;
1708 qemu_cpu_kick_thread(cpu
);
1712 void qemu_cpu_kick_self(void)
1714 assert(current_cpu
);
1715 qemu_cpu_kick_thread(current_cpu
);
1718 bool qemu_cpu_is_self(CPUState
*cpu
)
1720 return qemu_thread_is_self(cpu
->thread
);
1723 bool qemu_in_vcpu_thread(void)
1725 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1728 static __thread
bool iothread_locked
= false;
1730 bool qemu_mutex_iothread_locked(void)
1732 return iothread_locked
;
1735 void qemu_mutex_lock_iothread(void)
1737 g_assert(!qemu_mutex_iothread_locked());
1738 qemu_mutex_lock(&qemu_global_mutex
);
1739 iothread_locked
= true;
1742 void qemu_mutex_unlock_iothread(void)
1744 g_assert(qemu_mutex_iothread_locked());
1745 iothread_locked
= false;
1746 qemu_mutex_unlock(&qemu_global_mutex
);
1749 static bool all_vcpus_paused(void)
1754 if (!cpu
->stopped
) {
1762 void pause_all_vcpus(void)
1766 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1768 if (qemu_cpu_is_self(cpu
)) {
1769 qemu_cpu_stop(cpu
, true);
1776 while (!all_vcpus_paused()) {
1777 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1784 void cpu_resume(CPUState
*cpu
)
1787 cpu
->stopped
= false;
1791 void resume_all_vcpus(void)
1795 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1801 void cpu_remove_sync(CPUState
*cpu
)
1806 qemu_mutex_unlock_iothread();
1807 qemu_thread_join(cpu
->thread
);
1808 qemu_mutex_lock_iothread();
1811 /* For temporary buffers for forming a name */
1812 #define VCPU_THREAD_NAME_SIZE 16
1814 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1816 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1817 static QemuCond
*single_tcg_halt_cond
;
1818 static QemuThread
*single_tcg_cpu_thread
;
1819 static int tcg_region_inited
;
1822 * Initialize TCG regions--once. Now is a good time, because:
1823 * (1) TCG's init context, prologue and target globals have been set up.
1824 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1825 * -accel flag is processed, so the check doesn't work then).
1827 if (!tcg_region_inited
) {
1828 tcg_region_inited
= 1;
1832 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1833 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1834 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1835 qemu_cond_init(cpu
->halt_cond
);
1837 if (qemu_tcg_mttcg_enabled()) {
1838 /* create a thread per vCPU with TCG (MTTCG) */
1839 parallel_cpus
= true;
1840 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1843 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1844 cpu
, QEMU_THREAD_JOINABLE
);
1847 /* share a single thread for all cpus with TCG */
1848 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1849 qemu_thread_create(cpu
->thread
, thread_name
,
1850 qemu_tcg_rr_cpu_thread_fn
,
1851 cpu
, QEMU_THREAD_JOINABLE
);
1853 single_tcg_halt_cond
= cpu
->halt_cond
;
1854 single_tcg_cpu_thread
= cpu
->thread
;
1857 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1859 while (!cpu
->created
) {
1860 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1863 /* For non-MTTCG cases we share the thread */
1864 cpu
->thread
= single_tcg_cpu_thread
;
1865 cpu
->halt_cond
= single_tcg_halt_cond
;
1869 static void qemu_hax_start_vcpu(CPUState
*cpu
)
1871 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1873 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1874 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1875 qemu_cond_init(cpu
->halt_cond
);
1877 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
1879 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
1880 cpu
, QEMU_THREAD_JOINABLE
);
1882 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1884 while (!cpu
->created
) {
1885 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1889 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1891 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1893 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1894 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1895 qemu_cond_init(cpu
->halt_cond
);
1896 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1898 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1899 cpu
, QEMU_THREAD_JOINABLE
);
1900 while (!cpu
->created
) {
1901 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1905 static void qemu_hvf_start_vcpu(CPUState
*cpu
)
1907 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1909 /* HVF currently does not support TCG, and only runs in
1910 * unrestricted-guest mode. */
1911 assert(hvf_enabled());
1913 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1914 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1915 qemu_cond_init(cpu
->halt_cond
);
1917 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HVF",
1919 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hvf_cpu_thread_fn
,
1920 cpu
, QEMU_THREAD_JOINABLE
);
1921 while (!cpu
->created
) {
1922 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1926 static void qemu_whpx_start_vcpu(CPUState
*cpu
)
1928 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1930 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1931 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1932 qemu_cond_init(cpu
->halt_cond
);
1933 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/WHPX",
1935 qemu_thread_create(cpu
->thread
, thread_name
, qemu_whpx_cpu_thread_fn
,
1936 cpu
, QEMU_THREAD_JOINABLE
);
1938 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1940 while (!cpu
->created
) {
1941 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1945 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1947 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1949 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1950 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1951 qemu_cond_init(cpu
->halt_cond
);
1952 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1954 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1955 QEMU_THREAD_JOINABLE
);
1956 while (!cpu
->created
) {
1957 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1961 void qemu_init_vcpu(CPUState
*cpu
)
1963 cpu
->nr_cores
= smp_cores
;
1964 cpu
->nr_threads
= smp_threads
;
1965 cpu
->stopped
= true;
1968 /* If the target cpu hasn't set up any address spaces itself,
1969 * give it the default one.
1972 cpu_address_space_init(cpu
, 0, "cpu-memory", cpu
->memory
);
1975 if (kvm_enabled()) {
1976 qemu_kvm_start_vcpu(cpu
);
1977 } else if (hax_enabled()) {
1978 qemu_hax_start_vcpu(cpu
);
1979 } else if (hvf_enabled()) {
1980 qemu_hvf_start_vcpu(cpu
);
1981 } else if (tcg_enabled()) {
1982 qemu_tcg_init_vcpu(cpu
);
1983 } else if (whpx_enabled()) {
1984 qemu_whpx_start_vcpu(cpu
);
1986 qemu_dummy_start_vcpu(cpu
);
1990 void cpu_stop_current(void)
1993 qemu_cpu_stop(current_cpu
, true);
1997 int vm_stop(RunState state
)
1999 if (qemu_in_vcpu_thread()) {
2000 qemu_system_vmstop_request_prepare();
2001 qemu_system_vmstop_request(state
);
2003 * FIXME: should not return to device code in case
2004 * vm_stop() has been requested.
2010 return do_vm_stop(state
);
2014 * Prepare for (re)starting the VM.
2015 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2016 * running or in case of an error condition), 0 otherwise.
2018 int vm_prepare_start(void)
2023 qemu_vmstop_requested(&requested
);
2024 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
2028 /* Ensure that a STOP/RESUME pair of events is emitted if a
2029 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2030 * example, according to documentation is always followed by
2033 if (runstate_is_running()) {
2034 qapi_event_send_stop(&error_abort
);
2037 replay_enable_events();
2039 runstate_set(RUN_STATE_RUNNING
);
2040 vm_state_notify(1, RUN_STATE_RUNNING
);
2043 /* We are sending this now, but the CPUs will be resumed shortly later */
2044 qapi_event_send_resume(&error_abort
);
2050 if (!vm_prepare_start()) {
2055 /* does a state transition even if the VM is already stopped,
2056 current state is forgotten forever */
2057 int vm_stop_force_state(RunState state
)
2059 if (runstate_is_running()) {
2060 return vm_stop(state
);
2062 runstate_set(state
);
2065 /* Make sure to return an error if the flush in a previous vm_stop()
2067 return bdrv_flush_all();
2071 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
2073 /* XXX: implement xxx_cpu_list for targets that still miss it */
2074 #if defined(cpu_list)
2075 cpu_list(f
, cpu_fprintf
);
2079 CpuInfoList
*qmp_query_cpus(Error
**errp
)
2081 MachineState
*ms
= MACHINE(qdev_get_machine());
2082 MachineClass
*mc
= MACHINE_GET_CLASS(ms
);
2083 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
2088 #if defined(TARGET_I386)
2089 X86CPU
*x86_cpu
= X86_CPU(cpu
);
2090 CPUX86State
*env
= &x86_cpu
->env
;
2091 #elif defined(TARGET_PPC)
2092 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
2093 CPUPPCState
*env
= &ppc_cpu
->env
;
2094 #elif defined(TARGET_SPARC)
2095 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
2096 CPUSPARCState
*env
= &sparc_cpu
->env
;
2097 #elif defined(TARGET_MIPS)
2098 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
2099 CPUMIPSState
*env
= &mips_cpu
->env
;
2100 #elif defined(TARGET_TRICORE)
2101 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
2102 CPUTriCoreState
*env
= &tricore_cpu
->env
;
2103 #elif defined(TARGET_S390X)
2104 S390CPU
*s390_cpu
= S390_CPU(cpu
);
2105 CPUS390XState
*env
= &s390_cpu
->env
;
2108 cpu_synchronize_state(cpu
);
2110 info
= g_malloc0(sizeof(*info
));
2111 info
->value
= g_malloc0(sizeof(*info
->value
));
2112 info
->value
->CPU
= cpu
->cpu_index
;
2113 info
->value
->current
= (cpu
== first_cpu
);
2114 info
->value
->halted
= cpu
->halted
;
2115 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
2116 info
->value
->thread_id
= cpu
->thread_id
;
2117 #if defined(TARGET_I386)
2118 info
->value
->arch
= CPU_INFO_ARCH_X86
;
2119 info
->value
->u
.x86
.pc
= env
->eip
+ env
->segs
[R_CS
].base
;
2120 #elif defined(TARGET_PPC)
2121 info
->value
->arch
= CPU_INFO_ARCH_PPC
;
2122 info
->value
->u
.ppc
.nip
= env
->nip
;
2123 #elif defined(TARGET_SPARC)
2124 info
->value
->arch
= CPU_INFO_ARCH_SPARC
;
2125 info
->value
->u
.q_sparc
.pc
= env
->pc
;
2126 info
->value
->u
.q_sparc
.npc
= env
->npc
;
2127 #elif defined(TARGET_MIPS)
2128 info
->value
->arch
= CPU_INFO_ARCH_MIPS
;
2129 info
->value
->u
.q_mips
.PC
= env
->active_tc
.PC
;
2130 #elif defined(TARGET_TRICORE)
2131 info
->value
->arch
= CPU_INFO_ARCH_TRICORE
;
2132 info
->value
->u
.tricore
.PC
= env
->PC
;
2133 #elif defined(TARGET_S390X)
2134 info
->value
->arch
= CPU_INFO_ARCH_S390
;
2135 info
->value
->u
.s390
.cpu_state
= env
->cpu_state
;
2137 info
->value
->arch
= CPU_INFO_ARCH_OTHER
;
2139 info
->value
->has_props
= !!mc
->cpu_index_to_instance_props
;
2140 if (info
->value
->has_props
) {
2141 CpuInstanceProperties
*props
;
2142 props
= g_malloc0(sizeof(*props
));
2143 *props
= mc
->cpu_index_to_instance_props(ms
, cpu
->cpu_index
);
2144 info
->value
->props
= props
;
2147 /* XXX: waiting for the qapi to support GSList */
2149 head
= cur_item
= info
;
2151 cur_item
->next
= info
;
2160 * fast means: we NEVER interrupt vCPU threads to retrieve
2161 * information from KVM.
2163 CpuInfoFastList
*qmp_query_cpus_fast(Error
**errp
)
2165 MachineState
*ms
= MACHINE(qdev_get_machine());
2166 MachineClass
*mc
= MACHINE_GET_CLASS(ms
);
2167 CpuInfoFastList
*head
= NULL
, *cur_item
= NULL
;
2169 #if defined(TARGET_S390X)
2175 CpuInfoFastList
*info
= g_malloc0(sizeof(*info
));
2176 info
->value
= g_malloc0(sizeof(*info
->value
));
2178 info
->value
->cpu_index
= cpu
->cpu_index
;
2179 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
2180 info
->value
->thread_id
= cpu
->thread_id
;
2182 info
->value
->has_props
= !!mc
->cpu_index_to_instance_props
;
2183 if (info
->value
->has_props
) {
2184 CpuInstanceProperties
*props
;
2185 props
= g_malloc0(sizeof(*props
));
2186 *props
= mc
->cpu_index_to_instance_props(ms
, cpu
->cpu_index
);
2187 info
->value
->props
= props
;
2190 #if defined(TARGET_S390X)
2191 s390_cpu
= S390_CPU(cpu
);
2192 env
= &s390_cpu
->env
;
2193 info
->value
->arch
= CPU_INFO_ARCH_S390
;
2194 info
->value
->u
.s390
.cpu_state
= env
->cpu_state
;
2197 head
= cur_item
= info
;
2199 cur_item
->next
= info
;
2207 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
2208 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
2214 int64_t orig_addr
= addr
, orig_size
= size
;
2220 cpu
= qemu_get_cpu(cpu_index
);
2222 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
2227 f
= fopen(filename
, "wb");
2229 error_setg_file_open(errp
, errno
, filename
);
2237 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
2238 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
2239 " specified", orig_addr
, orig_size
);
2242 if (fwrite(buf
, 1, l
, f
) != l
) {
2243 error_setg(errp
, QERR_IO_ERROR
);
2254 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
2261 f
= fopen(filename
, "wb");
2263 error_setg_file_open(errp
, errno
, filename
);
2271 cpu_physical_memory_read(addr
, buf
, l
);
2272 if (fwrite(buf
, 1, l
, f
) != l
) {
2273 error_setg(errp
, QERR_IO_ERROR
);
2284 void qmp_inject_nmi(Error
**errp
)
2286 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
2289 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
2295 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
2296 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
2297 if (icount_align_option
) {
2298 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
2299 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
2301 cpu_fprintf(f
, "Max guest delay NA\n");
2302 cpu_fprintf(f
, "Max guest advance NA\n");