4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "migration/vmstate.h"
29 #include "monitor/monitor.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-commands-misc.h"
32 #include "qapi/qapi-events-run-state.h"
33 #include "qapi/qmp/qerror.h"
34 #include "qemu/error-report.h"
35 #include "qemu/qemu-print.h"
36 #include "sysemu/tcg.h"
37 #include "sysemu/block-backend.h"
38 #include "exec/gdbstub.h"
39 #include "sysemu/dma.h"
40 #include "sysemu/hw_accel.h"
41 #include "sysemu/kvm.h"
42 #include "sysemu/hax.h"
43 #include "sysemu/hvf.h"
44 #include "sysemu/whpx.h"
45 #include "exec/exec-all.h"
47 #include "qemu/thread.h"
48 #include "sysemu/cpus.h"
49 #include "sysemu/qtest.h"
50 #include "qemu/main-loop.h"
51 #include "qemu/option.h"
52 #include "qemu/bitmap.h"
53 #include "qemu/seqlock.h"
54 #include "qemu/guest-random.h"
57 #include "sysemu/replay.h"
58 #include "sysemu/runstate.h"
59 #include "hw/boards.h"
64 #include <sys/prctl.h>
67 #define PR_MCE_KILL 33
70 #ifndef PR_MCE_KILL_SET
71 #define PR_MCE_KILL_SET 1
74 #ifndef PR_MCE_KILL_EARLY
75 #define PR_MCE_KILL_EARLY 1
78 #endif /* CONFIG_LINUX */
80 static QemuMutex qemu_global_mutex
;
85 /* vcpu throttling controls */
86 static QEMUTimer
*throttle_timer
;
87 static unsigned int throttle_percentage
;
89 #define CPU_THROTTLE_PCT_MIN 1
90 #define CPU_THROTTLE_PCT_MAX 99
91 #define CPU_THROTTLE_TIMESLICE_NS 10000000
93 bool cpu_is_stopped(CPUState
*cpu
)
95 return cpu
->stopped
|| !runstate_is_running();
98 static bool cpu_thread_is_idle(CPUState
*cpu
)
100 if (cpu
->stop
|| cpu
->queued_work_first
) {
103 if (cpu_is_stopped(cpu
)) {
106 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
107 kvm_halt_in_kernel()) {
113 static bool all_cpu_threads_idle(void)
118 if (!cpu_thread_is_idle(cpu
)) {
125 /***********************************************************/
126 /* guest cycle counter */
128 /* Protected by TimersState seqlock */
130 static bool icount_sleep
= true;
131 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
132 #define MAX_ICOUNT_SHIFT 10
134 typedef struct TimersState
{
135 /* Protected by BQL. */
136 int64_t cpu_ticks_prev
;
137 int64_t cpu_ticks_offset
;
139 /* Protect fields that can be respectively read outside the
140 * BQL, and written from multiple threads.
142 QemuSeqLock vm_clock_seqlock
;
143 QemuSpin vm_clock_lock
;
145 int16_t cpu_ticks_enabled
;
147 /* Conversion factor from emulated instructions to virtual clock ticks. */
148 int16_t icount_time_shift
;
150 /* Compensate for varying guest execution speed. */
151 int64_t qemu_icount_bias
;
153 int64_t vm_clock_warp_start
;
154 int64_t cpu_clock_offset
;
156 /* Only written by TCG thread */
159 /* for adjusting icount */
160 QEMUTimer
*icount_rt_timer
;
161 QEMUTimer
*icount_vm_timer
;
162 QEMUTimer
*icount_warp_timer
;
165 static TimersState timers_state
;
169 * We default to false if we know other options have been enabled
170 * which are currently incompatible with MTTCG. Otherwise when each
171 * guest (target) has been updated to support:
172 * - atomic instructions
173 * - memory ordering primitives (barriers)
174 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
176 * Once a guest architecture has been converted to the new primitives
177 * there are two remaining limitations to check.
179 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
180 * - The host must have a stronger memory order than the guest
182 * It may be possible in future to support strong guests on weak hosts
183 * but that will require tagging all load/stores in a guest with their
184 * implicit memory order requirements which would likely slow things
188 static bool check_tcg_memory_orders_compatible(void)
190 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
191 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
197 static bool default_mttcg_enabled(void)
199 if (use_icount
|| TCG_OVERSIZED_GUEST
) {
202 #ifdef TARGET_SUPPORTS_MTTCG
203 return check_tcg_memory_orders_compatible();
210 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
212 const char *t
= qemu_opt_get(opts
, "thread");
214 if (strcmp(t
, "multi") == 0) {
215 if (TCG_OVERSIZED_GUEST
) {
216 error_setg(errp
, "No MTTCG when guest word size > hosts");
217 } else if (use_icount
) {
218 error_setg(errp
, "No MTTCG when icount is enabled");
220 #ifndef TARGET_SUPPORTS_MTTCG
221 warn_report("Guest not yet converted to MTTCG - "
222 "you may get unexpected results");
224 if (!check_tcg_memory_orders_compatible()) {
225 warn_report("Guest expects a stronger memory ordering "
226 "than the host provides");
227 error_printf("This may cause strange/hard to debug errors\n");
229 mttcg_enabled
= true;
231 } else if (strcmp(t
, "single") == 0) {
232 mttcg_enabled
= false;
234 error_setg(errp
, "Invalid 'thread' setting %s", t
);
237 mttcg_enabled
= default_mttcg_enabled();
241 /* The current number of executed instructions is based on what we
242 * originally budgeted minus the current state of the decrementing
243 * icount counters in extra/u16.low.
245 static int64_t cpu_get_icount_executed(CPUState
*cpu
)
247 return (cpu
->icount_budget
-
248 (cpu_neg(cpu
)->icount_decr
.u16
.low
+ cpu
->icount_extra
));
252 * Update the global shared timer_state.qemu_icount to take into
253 * account executed instructions. This is done by the TCG vCPU
254 * thread so the main-loop can see time has moved forward.
256 static void cpu_update_icount_locked(CPUState
*cpu
)
258 int64_t executed
= cpu_get_icount_executed(cpu
);
259 cpu
->icount_budget
-= executed
;
261 atomic_set_i64(&timers_state
.qemu_icount
,
262 timers_state
.qemu_icount
+ executed
);
266 * Update the global shared timer_state.qemu_icount to take into
267 * account executed instructions. This is done by the TCG vCPU
268 * thread so the main-loop can see time has moved forward.
270 void cpu_update_icount(CPUState
*cpu
)
272 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
273 &timers_state
.vm_clock_lock
);
274 cpu_update_icount_locked(cpu
);
275 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
276 &timers_state
.vm_clock_lock
);
279 static int64_t cpu_get_icount_raw_locked(void)
281 CPUState
*cpu
= current_cpu
;
283 if (cpu
&& cpu
->running
) {
284 if (!cpu
->can_do_io
) {
285 error_report("Bad icount read");
288 /* Take into account what has run */
289 cpu_update_icount_locked(cpu
);
291 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
292 return atomic_read_i64(&timers_state
.qemu_icount
);
295 static int64_t cpu_get_icount_locked(void)
297 int64_t icount
= cpu_get_icount_raw_locked();
298 return atomic_read_i64(&timers_state
.qemu_icount_bias
) +
299 cpu_icount_to_ns(icount
);
302 int64_t cpu_get_icount_raw(void)
308 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
309 icount
= cpu_get_icount_raw_locked();
310 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
315 /* Return the virtual CPU time, based on the instruction counter. */
316 int64_t cpu_get_icount(void)
322 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
323 icount
= cpu_get_icount_locked();
324 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
329 int64_t cpu_icount_to_ns(int64_t icount
)
331 return icount
<< atomic_read(&timers_state
.icount_time_shift
);
334 static int64_t cpu_get_ticks_locked(void)
336 int64_t ticks
= timers_state
.cpu_ticks_offset
;
337 if (timers_state
.cpu_ticks_enabled
) {
338 ticks
+= cpu_get_host_ticks();
341 if (timers_state
.cpu_ticks_prev
> ticks
) {
342 /* Non increasing ticks may happen if the host uses software suspend. */
343 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
344 ticks
= timers_state
.cpu_ticks_prev
;
347 timers_state
.cpu_ticks_prev
= ticks
;
351 /* return the time elapsed in VM between vm_start and vm_stop. Unless
352 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
355 int64_t cpu_get_ticks(void)
360 return cpu_get_icount();
363 qemu_spin_lock(&timers_state
.vm_clock_lock
);
364 ticks
= cpu_get_ticks_locked();
365 qemu_spin_unlock(&timers_state
.vm_clock_lock
);
369 static int64_t cpu_get_clock_locked(void)
373 time
= timers_state
.cpu_clock_offset
;
374 if (timers_state
.cpu_ticks_enabled
) {
381 /* Return the monotonic time elapsed in VM, i.e.,
382 * the time between vm_start and vm_stop
384 int64_t cpu_get_clock(void)
390 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
391 ti
= cpu_get_clock_locked();
392 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
397 /* enable cpu_get_ticks()
398 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
400 void cpu_enable_ticks(void)
402 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
403 &timers_state
.vm_clock_lock
);
404 if (!timers_state
.cpu_ticks_enabled
) {
405 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
406 timers_state
.cpu_clock_offset
-= get_clock();
407 timers_state
.cpu_ticks_enabled
= 1;
409 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
410 &timers_state
.vm_clock_lock
);
413 /* disable cpu_get_ticks() : the clock is stopped. You must not call
414 * cpu_get_ticks() after that.
415 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
417 void cpu_disable_ticks(void)
419 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
420 &timers_state
.vm_clock_lock
);
421 if (timers_state
.cpu_ticks_enabled
) {
422 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
423 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
424 timers_state
.cpu_ticks_enabled
= 0;
426 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
427 &timers_state
.vm_clock_lock
);
430 /* Correlation between real and virtual time is always going to be
431 fairly approximate, so ignore small variation.
432 When the guest is idle real and virtual time will be aligned in
434 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
436 static void icount_adjust(void)
442 /* Protected by TimersState mutex. */
443 static int64_t last_delta
;
445 /* If the VM is not running, then do nothing. */
446 if (!runstate_is_running()) {
450 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
451 &timers_state
.vm_clock_lock
);
452 cur_time
= cpu_get_clock_locked();
453 cur_icount
= cpu_get_icount_locked();
455 delta
= cur_icount
- cur_time
;
456 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
458 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
459 && timers_state
.icount_time_shift
> 0) {
460 /* The guest is getting too far ahead. Slow time down. */
461 atomic_set(&timers_state
.icount_time_shift
,
462 timers_state
.icount_time_shift
- 1);
465 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
466 && timers_state
.icount_time_shift
< MAX_ICOUNT_SHIFT
) {
467 /* The guest is getting too far behind. Speed time up. */
468 atomic_set(&timers_state
.icount_time_shift
,
469 timers_state
.icount_time_shift
+ 1);
472 atomic_set_i64(&timers_state
.qemu_icount_bias
,
473 cur_icount
- (timers_state
.qemu_icount
474 << timers_state
.icount_time_shift
));
475 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
476 &timers_state
.vm_clock_lock
);
479 static void icount_adjust_rt(void *opaque
)
481 timer_mod(timers_state
.icount_rt_timer
,
482 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
486 static void icount_adjust_vm(void *opaque
)
488 timer_mod(timers_state
.icount_vm_timer
,
489 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
490 NANOSECONDS_PER_SECOND
/ 10);
494 static int64_t qemu_icount_round(int64_t count
)
496 int shift
= atomic_read(&timers_state
.icount_time_shift
);
497 return (count
+ (1 << shift
) - 1) >> shift
;
500 static void icount_warp_rt(void)
505 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
506 * changes from -1 to another value, so the race here is okay.
509 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
510 warp_start
= timers_state
.vm_clock_warp_start
;
511 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
513 if (warp_start
== -1) {
517 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
518 &timers_state
.vm_clock_lock
);
519 if (runstate_is_running()) {
520 int64_t clock
= REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT
,
521 cpu_get_clock_locked());
524 warp_delta
= clock
- timers_state
.vm_clock_warp_start
;
525 if (use_icount
== 2) {
527 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
528 * far ahead of real time.
530 int64_t cur_icount
= cpu_get_icount_locked();
531 int64_t delta
= clock
- cur_icount
;
532 warp_delta
= MIN(warp_delta
, delta
);
534 atomic_set_i64(&timers_state
.qemu_icount_bias
,
535 timers_state
.qemu_icount_bias
+ warp_delta
);
537 timers_state
.vm_clock_warp_start
= -1;
538 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
539 &timers_state
.vm_clock_lock
);
541 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
542 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
546 static void icount_timer_cb(void *opaque
)
548 /* No need for a checkpoint because the timer already synchronizes
549 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
554 void qtest_clock_warp(int64_t dest
)
556 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
557 AioContext
*aio_context
;
558 assert(qtest_enabled());
559 aio_context
= qemu_get_aio_context();
560 while (clock
< dest
) {
561 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
562 QEMU_TIMER_ATTR_ALL
);
563 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
565 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
566 &timers_state
.vm_clock_lock
);
567 atomic_set_i64(&timers_state
.qemu_icount_bias
,
568 timers_state
.qemu_icount_bias
+ warp
);
569 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
570 &timers_state
.vm_clock_lock
);
572 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
573 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
574 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
576 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
579 void qemu_start_warp_timer(void)
588 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
589 * do not fire, so computing the deadline does not make sense.
591 if (!runstate_is_running()) {
595 if (replay_mode
!= REPLAY_MODE_PLAY
) {
596 if (!all_cpu_threads_idle()) {
600 if (qtest_enabled()) {
601 /* When testing, qtest commands advance icount. */
605 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
);
607 /* warp clock deterministically in record/replay mode */
608 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
609 /* vCPU is sleeping and warp can't be started.
610 It is probably a race condition: notification sent
611 to vCPU was processed in advance and vCPU went to sleep.
612 Therefore we have to wake it up for doing someting. */
613 if (replay_has_checkpoint()) {
614 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
620 /* We want to use the earliest deadline from ALL vm_clocks */
621 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
622 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
623 ~QEMU_TIMER_ATTR_EXTERNAL
);
625 static bool notified
;
626 if (!icount_sleep
&& !notified
) {
627 warn_report("icount sleep disabled and no active timers");
635 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
636 * sleep. Otherwise, the CPU might be waiting for a future timer
637 * interrupt to wake it up, but the interrupt never comes because
638 * the vCPU isn't running any insns and thus doesn't advance the
639 * QEMU_CLOCK_VIRTUAL.
643 * We never let VCPUs sleep in no sleep icount mode.
644 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
645 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
646 * It is useful when we want a deterministic execution time,
647 * isolated from host latencies.
649 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
650 &timers_state
.vm_clock_lock
);
651 atomic_set_i64(&timers_state
.qemu_icount_bias
,
652 timers_state
.qemu_icount_bias
+ deadline
);
653 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
654 &timers_state
.vm_clock_lock
);
655 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
658 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
659 * "real" time, (related to the time left until the next event) has
660 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
661 * This avoids that the warps are visible externally; for example,
662 * you will not be sending network packets continuously instead of
665 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
666 &timers_state
.vm_clock_lock
);
667 if (timers_state
.vm_clock_warp_start
== -1
668 || timers_state
.vm_clock_warp_start
> clock
) {
669 timers_state
.vm_clock_warp_start
= clock
;
671 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
672 &timers_state
.vm_clock_lock
);
673 timer_mod_anticipate(timers_state
.icount_warp_timer
,
676 } else if (deadline
== 0) {
677 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
681 static void qemu_account_warp_timer(void)
683 if (!use_icount
|| !icount_sleep
) {
687 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
688 * do not fire, so computing the deadline does not make sense.
690 if (!runstate_is_running()) {
694 /* warp clock deterministically in record/replay mode */
695 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
699 timer_del(timers_state
.icount_warp_timer
);
703 static bool icount_state_needed(void *opaque
)
708 static bool warp_timer_state_needed(void *opaque
)
710 TimersState
*s
= opaque
;
711 return s
->icount_warp_timer
!= NULL
;
714 static bool adjust_timers_state_needed(void *opaque
)
716 TimersState
*s
= opaque
;
717 return s
->icount_rt_timer
!= NULL
;
721 * Subsection for warp timer migration is optional, because may not be created
723 static const VMStateDescription icount_vmstate_warp_timer
= {
724 .name
= "timer/icount/warp_timer",
726 .minimum_version_id
= 1,
727 .needed
= warp_timer_state_needed
,
728 .fields
= (VMStateField
[]) {
729 VMSTATE_INT64(vm_clock_warp_start
, TimersState
),
730 VMSTATE_TIMER_PTR(icount_warp_timer
, TimersState
),
731 VMSTATE_END_OF_LIST()
735 static const VMStateDescription icount_vmstate_adjust_timers
= {
736 .name
= "timer/icount/timers",
738 .minimum_version_id
= 1,
739 .needed
= adjust_timers_state_needed
,
740 .fields
= (VMStateField
[]) {
741 VMSTATE_TIMER_PTR(icount_rt_timer
, TimersState
),
742 VMSTATE_TIMER_PTR(icount_vm_timer
, TimersState
),
743 VMSTATE_END_OF_LIST()
748 * This is a subsection for icount migration.
750 static const VMStateDescription icount_vmstate_timers
= {
751 .name
= "timer/icount",
753 .minimum_version_id
= 1,
754 .needed
= icount_state_needed
,
755 .fields
= (VMStateField
[]) {
756 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
757 VMSTATE_INT64(qemu_icount
, TimersState
),
758 VMSTATE_END_OF_LIST()
760 .subsections
= (const VMStateDescription
*[]) {
761 &icount_vmstate_warp_timer
,
762 &icount_vmstate_adjust_timers
,
767 static const VMStateDescription vmstate_timers
= {
770 .minimum_version_id
= 1,
771 .fields
= (VMStateField
[]) {
772 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
774 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
775 VMSTATE_END_OF_LIST()
777 .subsections
= (const VMStateDescription
*[]) {
778 &icount_vmstate_timers
,
783 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
786 double throttle_ratio
;
787 int64_t sleeptime_ns
, endtime_ns
;
789 if (!cpu_throttle_get_percentage()) {
793 pct
= (double)cpu_throttle_get_percentage()/100;
794 throttle_ratio
= pct
/ (1 - pct
);
795 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
796 sleeptime_ns
= (int64_t)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
+ 1);
797 endtime_ns
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) + sleeptime_ns
;
798 while (sleeptime_ns
> 0 && !cpu
->stop
) {
799 if (sleeptime_ns
> SCALE_MS
) {
800 qemu_cond_timedwait(cpu
->halt_cond
, &qemu_global_mutex
,
801 sleeptime_ns
/ SCALE_MS
);
803 qemu_mutex_unlock_iothread();
804 g_usleep(sleeptime_ns
/ SCALE_US
);
805 qemu_mutex_lock_iothread();
807 sleeptime_ns
= endtime_ns
- qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
809 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
812 static void cpu_throttle_timer_tick(void *opaque
)
817 /* Stop the timer if needed */
818 if (!cpu_throttle_get_percentage()) {
822 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
823 async_run_on_cpu(cpu
, cpu_throttle_thread
,
828 pct
= (double)cpu_throttle_get_percentage()/100;
829 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
830 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
833 void cpu_throttle_set(int new_throttle_pct
)
835 /* Ensure throttle percentage is within valid range */
836 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
837 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
839 atomic_set(&throttle_percentage
, new_throttle_pct
);
841 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
842 CPU_THROTTLE_TIMESLICE_NS
);
845 void cpu_throttle_stop(void)
847 atomic_set(&throttle_percentage
, 0);
850 bool cpu_throttle_active(void)
852 return (cpu_throttle_get_percentage() != 0);
855 int cpu_throttle_get_percentage(void)
857 return atomic_read(&throttle_percentage
);
860 void cpu_ticks_init(void)
862 seqlock_init(&timers_state
.vm_clock_seqlock
);
863 qemu_spin_init(&timers_state
.vm_clock_lock
);
864 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
865 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
866 cpu_throttle_timer_tick
, NULL
);
869 void configure_icount(QemuOpts
*opts
, Error
**errp
)
872 char *rem_str
= NULL
;
874 option
= qemu_opt_get(opts
, "shift");
876 if (qemu_opt_get(opts
, "align") != NULL
) {
877 error_setg(errp
, "Please specify shift option when using align");
882 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
884 timers_state
.icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
885 icount_timer_cb
, NULL
);
888 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
890 if (icount_align_option
&& !icount_sleep
) {
891 error_setg(errp
, "align=on and sleep=off are incompatible");
893 if (strcmp(option
, "auto") != 0) {
895 timers_state
.icount_time_shift
= strtol(option
, &rem_str
, 0);
896 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
897 error_setg(errp
, "icount: Invalid shift value");
901 } else if (icount_align_option
) {
902 error_setg(errp
, "shift=auto and align=on are incompatible");
903 } else if (!icount_sleep
) {
904 error_setg(errp
, "shift=auto and sleep=off are incompatible");
909 /* 125MIPS seems a reasonable initial guess at the guest speed.
910 It will be corrected fairly quickly anyway. */
911 timers_state
.icount_time_shift
= 3;
913 /* Have both realtime and virtual time triggers for speed adjustment.
914 The realtime trigger catches emulated time passing too slowly,
915 the virtual time trigger catches emulated time passing too fast.
916 Realtime triggers occur even when idle, so use them less frequently
918 timers_state
.vm_clock_warp_start
= -1;
919 timers_state
.icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
920 icount_adjust_rt
, NULL
);
921 timer_mod(timers_state
.icount_rt_timer
,
922 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
923 timers_state
.icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
924 icount_adjust_vm
, NULL
);
925 timer_mod(timers_state
.icount_vm_timer
,
926 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
927 NANOSECONDS_PER_SECOND
/ 10);
930 /***********************************************************/
931 /* TCG vCPU kick timer
933 * The kick timer is responsible for moving single threaded vCPU
934 * emulation on to the next vCPU. If more than one vCPU is running a
935 * timer event with force a cpu->exit so the next vCPU can get
938 * The timer is removed if all vCPUs are idle and restarted again once
939 * idleness is complete.
942 static QEMUTimer
*tcg_kick_vcpu_timer
;
943 static CPUState
*tcg_current_rr_cpu
;
945 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
947 static inline int64_t qemu_tcg_next_kick(void)
949 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
952 /* Kick the currently round-robin scheduled vCPU */
953 static void qemu_cpu_kick_rr_cpu(void)
957 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
961 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
964 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
968 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
970 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
975 if (qemu_in_vcpu_thread()) {
976 /* A CPU is currently running; kick it back out to the
977 * tcg_cpu_exec() loop so it will recalculate its
978 * icount deadline immediately.
980 qemu_cpu_kick(current_cpu
);
981 } else if (first_cpu
) {
982 /* qemu_cpu_kick is not enough to kick a halted CPU out of
983 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
984 * causes cpu_thread_is_idle to return false. This way,
985 * handle_icount_deadline can run.
986 * If we have no CPUs at all for some reason, we don't
987 * need to do anything.
989 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
993 static void kick_tcg_thread(void *opaque
)
995 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
996 qemu_cpu_kick_rr_cpu();
999 static void start_tcg_kick_timer(void)
1001 assert(!mttcg_enabled
);
1002 if (!tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
1003 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
1004 kick_tcg_thread
, NULL
);
1006 if (tcg_kick_vcpu_timer
&& !timer_pending(tcg_kick_vcpu_timer
)) {
1007 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
1011 static void stop_tcg_kick_timer(void)
1013 assert(!mttcg_enabled
);
1014 if (tcg_kick_vcpu_timer
&& timer_pending(tcg_kick_vcpu_timer
)) {
1015 timer_del(tcg_kick_vcpu_timer
);
1019 /***********************************************************/
1020 void hw_error(const char *fmt
, ...)
1026 fprintf(stderr
, "qemu: hardware error: ");
1027 vfprintf(stderr
, fmt
, ap
);
1028 fprintf(stderr
, "\n");
1030 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
1031 cpu_dump_state(cpu
, stderr
, CPU_DUMP_FPU
);
1037 void cpu_synchronize_all_states(void)
1042 cpu_synchronize_state(cpu
);
1043 /* TODO: move to cpu_synchronize_state() */
1044 if (hvf_enabled()) {
1045 hvf_cpu_synchronize_state(cpu
);
1050 void cpu_synchronize_all_post_reset(void)
1055 cpu_synchronize_post_reset(cpu
);
1056 /* TODO: move to cpu_synchronize_post_reset() */
1057 if (hvf_enabled()) {
1058 hvf_cpu_synchronize_post_reset(cpu
);
1063 void cpu_synchronize_all_post_init(void)
1068 cpu_synchronize_post_init(cpu
);
1069 /* TODO: move to cpu_synchronize_post_init() */
1070 if (hvf_enabled()) {
1071 hvf_cpu_synchronize_post_init(cpu
);
1076 void cpu_synchronize_all_pre_loadvm(void)
1081 cpu_synchronize_pre_loadvm(cpu
);
1085 static int do_vm_stop(RunState state
, bool send_stop
)
1089 if (runstate_is_running()) {
1090 cpu_disable_ticks();
1092 runstate_set(state
);
1093 vm_state_notify(0, state
);
1095 qapi_event_send_stop();
1100 replay_disable_events();
1101 ret
= bdrv_flush_all();
1106 /* Special vm_stop() variant for terminating the process. Historically clients
1107 * did not expect a QMP STOP event and so we need to retain compatibility.
1109 int vm_shutdown(void)
1111 return do_vm_stop(RUN_STATE_SHUTDOWN
, false);
1114 static bool cpu_can_run(CPUState
*cpu
)
1119 if (cpu_is_stopped(cpu
)) {
1125 static void cpu_handle_guest_debug(CPUState
*cpu
)
1127 gdb_set_stop_cpu(cpu
);
1128 qemu_system_debug_request();
1129 cpu
->stopped
= true;
1133 static void sigbus_reraise(void)
1136 struct sigaction action
;
1138 memset(&action
, 0, sizeof(action
));
1139 action
.sa_handler
= SIG_DFL
;
1140 if (!sigaction(SIGBUS
, &action
, NULL
)) {
1143 sigaddset(&set
, SIGBUS
);
1144 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
1146 perror("Failed to re-raise SIGBUS!\n");
1150 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
1152 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
1157 /* Called asynchronously in VCPU thread. */
1158 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
1162 /* Called synchronously (via signalfd) in main thread. */
1163 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
1169 static void qemu_init_sigbus(void)
1171 struct sigaction action
;
1173 memset(&action
, 0, sizeof(action
));
1174 action
.sa_flags
= SA_SIGINFO
;
1175 action
.sa_sigaction
= sigbus_handler
;
1176 sigaction(SIGBUS
, &action
, NULL
);
1178 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
1180 #else /* !CONFIG_LINUX */
1181 static void qemu_init_sigbus(void)
1184 #endif /* !CONFIG_LINUX */
1186 static QemuThread io_thread
;
1189 static QemuCond qemu_cpu_cond
;
1191 static QemuCond qemu_pause_cond
;
1193 void qemu_init_cpu_loop(void)
1196 qemu_cond_init(&qemu_cpu_cond
);
1197 qemu_cond_init(&qemu_pause_cond
);
1198 qemu_mutex_init(&qemu_global_mutex
);
1200 qemu_thread_get_self(&io_thread
);
1203 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1205 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1208 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1210 if (kvm_destroy_vcpu(cpu
) < 0) {
1211 error_report("kvm_destroy_vcpu failed");
1216 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1220 static void qemu_cpu_stop(CPUState
*cpu
, bool exit
)
1222 g_assert(qemu_cpu_is_self(cpu
));
1224 cpu
->stopped
= true;
1228 qemu_cond_broadcast(&qemu_pause_cond
);
1231 static void qemu_wait_io_event_common(CPUState
*cpu
)
1233 atomic_mb_set(&cpu
->thread_kicked
, false);
1235 qemu_cpu_stop(cpu
, false);
1237 process_queued_cpu_work(cpu
);
1240 static void qemu_tcg_rr_wait_io_event(void)
1244 while (all_cpu_threads_idle()) {
1245 stop_tcg_kick_timer();
1246 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1249 start_tcg_kick_timer();
1252 qemu_wait_io_event_common(cpu
);
1256 static void qemu_wait_io_event(CPUState
*cpu
)
1258 while (cpu_thread_is_idle(cpu
)) {
1259 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1263 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1264 if (!tcg_enabled()) {
1268 qemu_wait_io_event_common(cpu
);
1271 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1273 CPUState
*cpu
= arg
;
1276 rcu_register_thread();
1278 qemu_mutex_lock_iothread();
1279 qemu_thread_get_self(cpu
->thread
);
1280 cpu
->thread_id
= qemu_get_thread_id();
1284 r
= kvm_init_vcpu(cpu
);
1286 error_report("kvm_init_vcpu failed: %s", strerror(-r
));
1290 kvm_init_cpu_signals(cpu
);
1292 /* signal CPU creation */
1293 cpu
->created
= true;
1294 qemu_cond_signal(&qemu_cpu_cond
);
1295 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1298 if (cpu_can_run(cpu
)) {
1299 r
= kvm_cpu_exec(cpu
);
1300 if (r
== EXCP_DEBUG
) {
1301 cpu_handle_guest_debug(cpu
);
1304 qemu_wait_io_event(cpu
);
1305 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1307 qemu_kvm_destroy_vcpu(cpu
);
1308 cpu
->created
= false;
1309 qemu_cond_signal(&qemu_cpu_cond
);
1310 qemu_mutex_unlock_iothread();
1311 rcu_unregister_thread();
1315 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1318 error_report("qtest is not supported under Windows");
1321 CPUState
*cpu
= arg
;
1325 rcu_register_thread();
1327 qemu_mutex_lock_iothread();
1328 qemu_thread_get_self(cpu
->thread
);
1329 cpu
->thread_id
= qemu_get_thread_id();
1333 sigemptyset(&waitset
);
1334 sigaddset(&waitset
, SIG_IPI
);
1336 /* signal CPU creation */
1337 cpu
->created
= true;
1338 qemu_cond_signal(&qemu_cpu_cond
);
1339 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1342 qemu_mutex_unlock_iothread();
1345 r
= sigwait(&waitset
, &sig
);
1346 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1351 qemu_mutex_lock_iothread();
1352 qemu_wait_io_event(cpu
);
1353 } while (!cpu
->unplug
);
1355 qemu_mutex_unlock_iothread();
1356 rcu_unregister_thread();
1361 static int64_t tcg_get_icount_limit(void)
1365 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1367 * Include all the timers, because they may need an attention.
1368 * Too long CPU execution may create unnecessary delay in UI.
1370 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
1371 QEMU_TIMER_ATTR_ALL
);
1373 /* Maintain prior (possibly buggy) behaviour where if no deadline
1374 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1375 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1378 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1379 deadline
= INT32_MAX
;
1382 return qemu_icount_round(deadline
);
1384 return replay_get_instructions();
1388 static void handle_icount_deadline(void)
1390 assert(qemu_in_vcpu_thread());
1392 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
1393 QEMU_TIMER_ATTR_ALL
);
1395 if (deadline
== 0) {
1396 /* Wake up other AioContexts. */
1397 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1398 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1403 static void prepare_icount_for_run(CPUState
*cpu
)
1408 /* These should always be cleared by process_icount_data after
1409 * each vCPU execution. However u16.high can be raised
1410 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1412 g_assert(cpu_neg(cpu
)->icount_decr
.u16
.low
== 0);
1413 g_assert(cpu
->icount_extra
== 0);
1415 cpu
->icount_budget
= tcg_get_icount_limit();
1416 insns_left
= MIN(0xffff, cpu
->icount_budget
);
1417 cpu_neg(cpu
)->icount_decr
.u16
.low
= insns_left
;
1418 cpu
->icount_extra
= cpu
->icount_budget
- insns_left
;
1420 replay_mutex_lock();
1424 static void process_icount_data(CPUState
*cpu
)
1427 /* Account for executed instructions */
1428 cpu_update_icount(cpu
);
1430 /* Reset the counters */
1431 cpu_neg(cpu
)->icount_decr
.u16
.low
= 0;
1432 cpu
->icount_extra
= 0;
1433 cpu
->icount_budget
= 0;
1435 replay_account_executed_instructions();
1437 replay_mutex_unlock();
1442 static int tcg_cpu_exec(CPUState
*cpu
)
1445 #ifdef CONFIG_PROFILER
1449 assert(tcg_enabled());
1450 #ifdef CONFIG_PROFILER
1451 ti
= profile_getclock();
1453 cpu_exec_start(cpu
);
1454 ret
= cpu_exec(cpu
);
1456 #ifdef CONFIG_PROFILER
1457 atomic_set(&tcg_ctx
->prof
.cpu_exec_time
,
1458 tcg_ctx
->prof
.cpu_exec_time
+ profile_getclock() - ti
);
1463 /* Destroy any remaining vCPUs which have been unplugged and have
1466 static void deal_with_unplugged_cpus(void)
1471 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1472 qemu_tcg_destroy_vcpu(cpu
);
1473 cpu
->created
= false;
1474 qemu_cond_signal(&qemu_cpu_cond
);
1480 /* Single-threaded TCG
1482 * In the single-threaded case each vCPU is simulated in turn. If
1483 * there is more than a single vCPU we create a simple timer to kick
1484 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1485 * This is done explicitly rather than relying on side-effects
1489 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1491 CPUState
*cpu
= arg
;
1493 assert(tcg_enabled());
1494 rcu_register_thread();
1495 tcg_register_thread();
1497 qemu_mutex_lock_iothread();
1498 qemu_thread_get_self(cpu
->thread
);
1500 cpu
->thread_id
= qemu_get_thread_id();
1501 cpu
->created
= true;
1503 qemu_cond_signal(&qemu_cpu_cond
);
1504 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1506 /* wait for initial kick-off after machine start */
1507 while (first_cpu
->stopped
) {
1508 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1510 /* process any pending work */
1513 qemu_wait_io_event_common(cpu
);
1517 start_tcg_kick_timer();
1521 /* process any pending work */
1522 cpu
->exit_request
= 1;
1525 qemu_mutex_unlock_iothread();
1526 replay_mutex_lock();
1527 qemu_mutex_lock_iothread();
1528 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1529 qemu_account_warp_timer();
1531 /* Run the timers here. This is much more efficient than
1532 * waking up the I/O thread and waiting for completion.
1534 handle_icount_deadline();
1536 replay_mutex_unlock();
1542 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1544 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1547 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1548 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1550 if (cpu_can_run(cpu
)) {
1553 qemu_mutex_unlock_iothread();
1554 prepare_icount_for_run(cpu
);
1556 r
= tcg_cpu_exec(cpu
);
1558 process_icount_data(cpu
);
1559 qemu_mutex_lock_iothread();
1561 if (r
== EXCP_DEBUG
) {
1562 cpu_handle_guest_debug(cpu
);
1564 } else if (r
== EXCP_ATOMIC
) {
1565 qemu_mutex_unlock_iothread();
1566 cpu_exec_step_atomic(cpu
);
1567 qemu_mutex_lock_iothread();
1570 } else if (cpu
->stop
) {
1572 cpu
= CPU_NEXT(cpu
);
1577 cpu
= CPU_NEXT(cpu
);
1578 } /* while (cpu && !cpu->exit_request).. */
1580 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1581 atomic_set(&tcg_current_rr_cpu
, NULL
);
1583 if (cpu
&& cpu
->exit_request
) {
1584 atomic_mb_set(&cpu
->exit_request
, 0);
1587 if (use_icount
&& all_cpu_threads_idle()) {
1589 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1590 * in the main_loop, wake it up in order to start the warp timer.
1592 qemu_notify_event();
1595 qemu_tcg_rr_wait_io_event();
1596 deal_with_unplugged_cpus();
1599 rcu_unregister_thread();
1603 static void *qemu_hax_cpu_thread_fn(void *arg
)
1605 CPUState
*cpu
= arg
;
1608 rcu_register_thread();
1609 qemu_mutex_lock_iothread();
1610 qemu_thread_get_self(cpu
->thread
);
1612 cpu
->thread_id
= qemu_get_thread_id();
1613 cpu
->created
= true;
1617 qemu_cond_signal(&qemu_cpu_cond
);
1618 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1621 if (cpu_can_run(cpu
)) {
1622 r
= hax_smp_cpu_exec(cpu
);
1623 if (r
== EXCP_DEBUG
) {
1624 cpu_handle_guest_debug(cpu
);
1628 qemu_wait_io_event(cpu
);
1629 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1630 rcu_unregister_thread();
1634 /* The HVF-specific vCPU thread function. This one should only run when the host
1635 * CPU supports the VMX "unrestricted guest" feature. */
1636 static void *qemu_hvf_cpu_thread_fn(void *arg
)
1638 CPUState
*cpu
= arg
;
1642 assert(hvf_enabled());
1644 rcu_register_thread();
1646 qemu_mutex_lock_iothread();
1647 qemu_thread_get_self(cpu
->thread
);
1649 cpu
->thread_id
= qemu_get_thread_id();
1655 /* signal CPU creation */
1656 cpu
->created
= true;
1657 qemu_cond_signal(&qemu_cpu_cond
);
1658 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1661 if (cpu_can_run(cpu
)) {
1662 r
= hvf_vcpu_exec(cpu
);
1663 if (r
== EXCP_DEBUG
) {
1664 cpu_handle_guest_debug(cpu
);
1667 qemu_wait_io_event(cpu
);
1668 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1670 hvf_vcpu_destroy(cpu
);
1671 cpu
->created
= false;
1672 qemu_cond_signal(&qemu_cpu_cond
);
1673 qemu_mutex_unlock_iothread();
1674 rcu_unregister_thread();
1678 static void *qemu_whpx_cpu_thread_fn(void *arg
)
1680 CPUState
*cpu
= arg
;
1683 rcu_register_thread();
1685 qemu_mutex_lock_iothread();
1686 qemu_thread_get_self(cpu
->thread
);
1687 cpu
->thread_id
= qemu_get_thread_id();
1690 r
= whpx_init_vcpu(cpu
);
1692 fprintf(stderr
, "whpx_init_vcpu failed: %s\n", strerror(-r
));
1696 /* signal CPU creation */
1697 cpu
->created
= true;
1698 qemu_cond_signal(&qemu_cpu_cond
);
1699 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1702 if (cpu_can_run(cpu
)) {
1703 r
= whpx_vcpu_exec(cpu
);
1704 if (r
== EXCP_DEBUG
) {
1705 cpu_handle_guest_debug(cpu
);
1708 while (cpu_thread_is_idle(cpu
)) {
1709 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1711 qemu_wait_io_event_common(cpu
);
1712 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1714 whpx_destroy_vcpu(cpu
);
1715 cpu
->created
= false;
1716 qemu_cond_signal(&qemu_cpu_cond
);
1717 qemu_mutex_unlock_iothread();
1718 rcu_unregister_thread();
1723 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1728 /* Multi-threaded TCG
1730 * In the multi-threaded case each vCPU has its own thread. The TLS
1731 * variable current_cpu can be used deep in the code to find the
1732 * current CPUState for a given thread.
1735 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1737 CPUState
*cpu
= arg
;
1739 assert(tcg_enabled());
1740 g_assert(!use_icount
);
1742 rcu_register_thread();
1743 tcg_register_thread();
1745 qemu_mutex_lock_iothread();
1746 qemu_thread_get_self(cpu
->thread
);
1748 cpu
->thread_id
= qemu_get_thread_id();
1749 cpu
->created
= true;
1752 qemu_cond_signal(&qemu_cpu_cond
);
1753 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1755 /* process any pending work */
1756 cpu
->exit_request
= 1;
1759 if (cpu_can_run(cpu
)) {
1761 qemu_mutex_unlock_iothread();
1762 r
= tcg_cpu_exec(cpu
);
1763 qemu_mutex_lock_iothread();
1766 cpu_handle_guest_debug(cpu
);
1769 /* during start-up the vCPU is reset and the thread is
1770 * kicked several times. If we don't ensure we go back
1771 * to sleep in the halted state we won't cleanly
1772 * start-up when the vCPU is enabled.
1774 * cpu->halted should ensure we sleep in wait_io_event
1776 g_assert(cpu
->halted
);
1779 qemu_mutex_unlock_iothread();
1780 cpu_exec_step_atomic(cpu
);
1781 qemu_mutex_lock_iothread();
1783 /* Ignore everything else? */
1788 atomic_mb_set(&cpu
->exit_request
, 0);
1789 qemu_wait_io_event(cpu
);
1790 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1792 qemu_tcg_destroy_vcpu(cpu
);
1793 cpu
->created
= false;
1794 qemu_cond_signal(&qemu_cpu_cond
);
1795 qemu_mutex_unlock_iothread();
1796 rcu_unregister_thread();
1800 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1805 if (cpu
->thread_kicked
) {
1808 cpu
->thread_kicked
= true;
1809 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1810 if (err
&& err
!= ESRCH
) {
1811 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1815 if (!qemu_cpu_is_self(cpu
)) {
1816 if (whpx_enabled()) {
1817 whpx_vcpu_kick(cpu
);
1818 } else if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1819 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1820 __func__
, GetLastError());
1827 void qemu_cpu_kick(CPUState
*cpu
)
1829 qemu_cond_broadcast(cpu
->halt_cond
);
1830 if (tcg_enabled()) {
1832 /* NOP unless doing single-thread RR */
1833 qemu_cpu_kick_rr_cpu();
1835 if (hax_enabled()) {
1837 * FIXME: race condition with the exit_request check in
1840 cpu
->exit_request
= 1;
1842 qemu_cpu_kick_thread(cpu
);
1846 void qemu_cpu_kick_self(void)
1848 assert(current_cpu
);
1849 qemu_cpu_kick_thread(current_cpu
);
1852 bool qemu_cpu_is_self(CPUState
*cpu
)
1854 return qemu_thread_is_self(cpu
->thread
);
1857 bool qemu_in_vcpu_thread(void)
1859 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1862 static __thread
bool iothread_locked
= false;
1864 bool qemu_mutex_iothread_locked(void)
1866 return iothread_locked
;
1870 * The BQL is taken from so many places that it is worth profiling the
1871 * callers directly, instead of funneling them all through a single function.
1873 void qemu_mutex_lock_iothread_impl(const char *file
, int line
)
1875 QemuMutexLockFunc bql_lock
= atomic_read(&qemu_bql_mutex_lock_func
);
1877 g_assert(!qemu_mutex_iothread_locked());
1878 bql_lock(&qemu_global_mutex
, file
, line
);
1879 iothread_locked
= true;
1882 void qemu_mutex_unlock_iothread(void)
1884 g_assert(qemu_mutex_iothread_locked());
1885 iothread_locked
= false;
1886 qemu_mutex_unlock(&qemu_global_mutex
);
1889 static bool all_vcpus_paused(void)
1894 if (!cpu
->stopped
) {
1902 void pause_all_vcpus(void)
1906 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1908 if (qemu_cpu_is_self(cpu
)) {
1909 qemu_cpu_stop(cpu
, true);
1916 /* We need to drop the replay_lock so any vCPU threads woken up
1917 * can finish their replay tasks
1919 replay_mutex_unlock();
1921 while (!all_vcpus_paused()) {
1922 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1928 qemu_mutex_unlock_iothread();
1929 replay_mutex_lock();
1930 qemu_mutex_lock_iothread();
1933 void cpu_resume(CPUState
*cpu
)
1936 cpu
->stopped
= false;
1940 void resume_all_vcpus(void)
1944 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1950 void cpu_remove_sync(CPUState
*cpu
)
1955 qemu_mutex_unlock_iothread();
1956 qemu_thread_join(cpu
->thread
);
1957 qemu_mutex_lock_iothread();
1960 /* For temporary buffers for forming a name */
1961 #define VCPU_THREAD_NAME_SIZE 16
1963 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1965 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1966 static QemuCond
*single_tcg_halt_cond
;
1967 static QemuThread
*single_tcg_cpu_thread
;
1968 static int tcg_region_inited
;
1970 assert(tcg_enabled());
1972 * Initialize TCG regions--once. Now is a good time, because:
1973 * (1) TCG's init context, prologue and target globals have been set up.
1974 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1975 * -accel flag is processed, so the check doesn't work then).
1977 if (!tcg_region_inited
) {
1978 tcg_region_inited
= 1;
1982 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1983 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1984 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1985 qemu_cond_init(cpu
->halt_cond
);
1987 if (qemu_tcg_mttcg_enabled()) {
1988 /* create a thread per vCPU with TCG (MTTCG) */
1989 parallel_cpus
= true;
1990 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1993 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1994 cpu
, QEMU_THREAD_JOINABLE
);
1997 /* share a single thread for all cpus with TCG */
1998 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1999 qemu_thread_create(cpu
->thread
, thread_name
,
2000 qemu_tcg_rr_cpu_thread_fn
,
2001 cpu
, QEMU_THREAD_JOINABLE
);
2003 single_tcg_halt_cond
= cpu
->halt_cond
;
2004 single_tcg_cpu_thread
= cpu
->thread
;
2007 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2010 /* For non-MTTCG cases we share the thread */
2011 cpu
->thread
= single_tcg_cpu_thread
;
2012 cpu
->halt_cond
= single_tcg_halt_cond
;
2013 cpu
->thread_id
= first_cpu
->thread_id
;
2015 cpu
->created
= true;
2019 static void qemu_hax_start_vcpu(CPUState
*cpu
)
2021 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2023 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2024 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2025 qemu_cond_init(cpu
->halt_cond
);
2027 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
2029 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
2030 cpu
, QEMU_THREAD_JOINABLE
);
2032 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2036 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
2038 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2040 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2041 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2042 qemu_cond_init(cpu
->halt_cond
);
2043 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
2045 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
2046 cpu
, QEMU_THREAD_JOINABLE
);
2049 static void qemu_hvf_start_vcpu(CPUState
*cpu
)
2051 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2053 /* HVF currently does not support TCG, and only runs in
2054 * unrestricted-guest mode. */
2055 assert(hvf_enabled());
2057 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2058 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2059 qemu_cond_init(cpu
->halt_cond
);
2061 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HVF",
2063 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hvf_cpu_thread_fn
,
2064 cpu
, QEMU_THREAD_JOINABLE
);
2067 static void qemu_whpx_start_vcpu(CPUState
*cpu
)
2069 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2071 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2072 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2073 qemu_cond_init(cpu
->halt_cond
);
2074 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/WHPX",
2076 qemu_thread_create(cpu
->thread
, thread_name
, qemu_whpx_cpu_thread_fn
,
2077 cpu
, QEMU_THREAD_JOINABLE
);
2079 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2083 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
2085 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2087 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2088 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2089 qemu_cond_init(cpu
->halt_cond
);
2090 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
2092 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
2093 QEMU_THREAD_JOINABLE
);
2096 void qemu_init_vcpu(CPUState
*cpu
)
2098 MachineState
*ms
= MACHINE(qdev_get_machine());
2100 cpu
->nr_cores
= ms
->smp
.cores
;
2101 cpu
->nr_threads
= ms
->smp
.threads
;
2102 cpu
->stopped
= true;
2103 cpu
->random_seed
= qemu_guest_random_seed_thread_part1();
2106 /* If the target cpu hasn't set up any address spaces itself,
2107 * give it the default one.
2110 cpu_address_space_init(cpu
, 0, "cpu-memory", cpu
->memory
);
2113 if (kvm_enabled()) {
2114 qemu_kvm_start_vcpu(cpu
);
2115 } else if (hax_enabled()) {
2116 qemu_hax_start_vcpu(cpu
);
2117 } else if (hvf_enabled()) {
2118 qemu_hvf_start_vcpu(cpu
);
2119 } else if (tcg_enabled()) {
2120 qemu_tcg_init_vcpu(cpu
);
2121 } else if (whpx_enabled()) {
2122 qemu_whpx_start_vcpu(cpu
);
2124 qemu_dummy_start_vcpu(cpu
);
2127 while (!cpu
->created
) {
2128 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
2132 void cpu_stop_current(void)
2135 current_cpu
->stop
= true;
2136 cpu_exit(current_cpu
);
2140 int vm_stop(RunState state
)
2142 if (qemu_in_vcpu_thread()) {
2143 qemu_system_vmstop_request_prepare();
2144 qemu_system_vmstop_request(state
);
2146 * FIXME: should not return to device code in case
2147 * vm_stop() has been requested.
2153 return do_vm_stop(state
, true);
2157 * Prepare for (re)starting the VM.
2158 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2159 * running or in case of an error condition), 0 otherwise.
2161 int vm_prepare_start(void)
2165 qemu_vmstop_requested(&requested
);
2166 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
2170 /* Ensure that a STOP/RESUME pair of events is emitted if a
2171 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2172 * example, according to documentation is always followed by
2175 if (runstate_is_running()) {
2176 qapi_event_send_stop();
2177 qapi_event_send_resume();
2181 /* We are sending this now, but the CPUs will be resumed shortly later */
2182 qapi_event_send_resume();
2184 replay_enable_events();
2186 runstate_set(RUN_STATE_RUNNING
);
2187 vm_state_notify(1, RUN_STATE_RUNNING
);
2193 if (!vm_prepare_start()) {
2198 /* does a state transition even if the VM is already stopped,
2199 current state is forgotten forever */
2200 int vm_stop_force_state(RunState state
)
2202 if (runstate_is_running()) {
2203 return vm_stop(state
);
2205 runstate_set(state
);
2208 /* Make sure to return an error if the flush in a previous vm_stop()
2210 return bdrv_flush_all();
2214 void list_cpus(const char *optarg
)
2216 /* XXX: implement xxx_cpu_list for targets that still miss it */
2217 #if defined(cpu_list)
2222 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
2223 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
2229 int64_t orig_addr
= addr
, orig_size
= size
;
2235 cpu
= qemu_get_cpu(cpu_index
);
2237 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
2242 f
= fopen(filename
, "wb");
2244 error_setg_file_open(errp
, errno
, filename
);
2252 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
2253 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
2254 " specified", orig_addr
, orig_size
);
2257 if (fwrite(buf
, 1, l
, f
) != l
) {
2258 error_setg(errp
, QERR_IO_ERROR
);
2269 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
2276 f
= fopen(filename
, "wb");
2278 error_setg_file_open(errp
, errno
, filename
);
2286 cpu_physical_memory_read(addr
, buf
, l
);
2287 if (fwrite(buf
, 1, l
, f
) != l
) {
2288 error_setg(errp
, QERR_IO_ERROR
);
2299 void qmp_inject_nmi(Error
**errp
)
2301 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
2304 void dump_drift_info(void)
2310 qemu_printf("Host - Guest clock %"PRIi64
" ms\n",
2311 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
2312 if (icount_align_option
) {
2313 qemu_printf("Max guest delay %"PRIi64
" ms\n",
2314 -max_delay
/ SCALE_MS
);
2315 qemu_printf("Max guest advance %"PRIi64
" ms\n",
2316 max_advance
/ SCALE_MS
);
2318 qemu_printf("Max guest delay NA\n");
2319 qemu_printf("Max guest advance NA\n");