4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "migration/vmstate.h"
29 #include "monitor/monitor.h"
30 #include "qapi/error.h"
31 #include "qapi/qapi-commands-misc.h"
32 #include "qapi/qapi-events-run-state.h"
33 #include "qapi/qmp/qerror.h"
34 #include "qemu/error-report.h"
35 #include "qemu/qemu-print.h"
36 #include "sysemu/tcg.h"
37 #include "sysemu/block-backend.h"
38 #include "exec/gdbstub.h"
39 #include "sysemu/dma.h"
40 #include "sysemu/hw_accel.h"
41 #include "sysemu/kvm.h"
42 #include "sysemu/hax.h"
43 #include "sysemu/hvf.h"
44 #include "sysemu/whpx.h"
45 #include "exec/exec-all.h"
47 #include "qemu/thread.h"
48 #include "qemu/plugin.h"
49 #include "sysemu/cpus.h"
50 #include "sysemu/qtest.h"
51 #include "qemu/main-loop.h"
52 #include "qemu/option.h"
53 #include "qemu/bitmap.h"
54 #include "qemu/seqlock.h"
55 #include "qemu/guest-random.h"
58 #include "sysemu/replay.h"
59 #include "sysemu/runstate.h"
60 #include "hw/boards.h"
65 #include <sys/prctl.h>
68 #define PR_MCE_KILL 33
71 #ifndef PR_MCE_KILL_SET
72 #define PR_MCE_KILL_SET 1
75 #ifndef PR_MCE_KILL_EARLY
76 #define PR_MCE_KILL_EARLY 1
79 #endif /* CONFIG_LINUX */
81 static QemuMutex qemu_global_mutex
;
86 /* vcpu throttling controls */
87 static QEMUTimer
*throttle_timer
;
88 static unsigned int throttle_percentage
;
90 #define CPU_THROTTLE_PCT_MIN 1
91 #define CPU_THROTTLE_PCT_MAX 99
92 #define CPU_THROTTLE_TIMESLICE_NS 10000000
94 bool cpu_is_stopped(CPUState
*cpu
)
96 return cpu
->stopped
|| !runstate_is_running();
99 static bool cpu_thread_is_idle(CPUState
*cpu
)
101 if (cpu
->stop
|| cpu
->queued_work_first
) {
104 if (cpu_is_stopped(cpu
)) {
107 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
108 kvm_halt_in_kernel()) {
114 static bool all_cpu_threads_idle(void)
119 if (!cpu_thread_is_idle(cpu
)) {
126 /***********************************************************/
127 /* guest cycle counter */
129 /* Protected by TimersState seqlock */
131 static bool icount_sleep
= true;
132 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
133 #define MAX_ICOUNT_SHIFT 10
135 typedef struct TimersState
{
136 /* Protected by BQL. */
137 int64_t cpu_ticks_prev
;
138 int64_t cpu_ticks_offset
;
140 /* Protect fields that can be respectively read outside the
141 * BQL, and written from multiple threads.
143 QemuSeqLock vm_clock_seqlock
;
144 QemuSpin vm_clock_lock
;
146 int16_t cpu_ticks_enabled
;
148 /* Conversion factor from emulated instructions to virtual clock ticks. */
149 int16_t icount_time_shift
;
151 /* Compensate for varying guest execution speed. */
152 int64_t qemu_icount_bias
;
154 int64_t vm_clock_warp_start
;
155 int64_t cpu_clock_offset
;
157 /* Only written by TCG thread */
160 /* for adjusting icount */
161 QEMUTimer
*icount_rt_timer
;
162 QEMUTimer
*icount_vm_timer
;
163 QEMUTimer
*icount_warp_timer
;
166 static TimersState timers_state
;
170 * We default to false if we know other options have been enabled
171 * which are currently incompatible with MTTCG. Otherwise when each
172 * guest (target) has been updated to support:
173 * - atomic instructions
174 * - memory ordering primitives (barriers)
175 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
177 * Once a guest architecture has been converted to the new primitives
178 * there are two remaining limitations to check.
180 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
181 * - The host must have a stronger memory order than the guest
183 * It may be possible in future to support strong guests on weak hosts
184 * but that will require tagging all load/stores in a guest with their
185 * implicit memory order requirements which would likely slow things
189 static bool check_tcg_memory_orders_compatible(void)
191 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
192 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
198 static bool default_mttcg_enabled(void)
200 if (use_icount
|| TCG_OVERSIZED_GUEST
) {
203 #ifdef TARGET_SUPPORTS_MTTCG
204 return check_tcg_memory_orders_compatible();
211 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
213 const char *t
= qemu_opt_get(opts
, "thread");
215 if (strcmp(t
, "multi") == 0) {
216 if (TCG_OVERSIZED_GUEST
) {
217 error_setg(errp
, "No MTTCG when guest word size > hosts");
218 } else if (use_icount
) {
219 error_setg(errp
, "No MTTCG when icount is enabled");
221 #ifndef TARGET_SUPPORTS_MTTCG
222 warn_report("Guest not yet converted to MTTCG - "
223 "you may get unexpected results");
225 if (!check_tcg_memory_orders_compatible()) {
226 warn_report("Guest expects a stronger memory ordering "
227 "than the host provides");
228 error_printf("This may cause strange/hard to debug errors\n");
230 mttcg_enabled
= true;
232 } else if (strcmp(t
, "single") == 0) {
233 mttcg_enabled
= false;
235 error_setg(errp
, "Invalid 'thread' setting %s", t
);
238 mttcg_enabled
= default_mttcg_enabled();
242 /* The current number of executed instructions is based on what we
243 * originally budgeted minus the current state of the decrementing
244 * icount counters in extra/u16.low.
246 static int64_t cpu_get_icount_executed(CPUState
*cpu
)
248 return (cpu
->icount_budget
-
249 (cpu_neg(cpu
)->icount_decr
.u16
.low
+ cpu
->icount_extra
));
253 * Update the global shared timer_state.qemu_icount to take into
254 * account executed instructions. This is done by the TCG vCPU
255 * thread so the main-loop can see time has moved forward.
257 static void cpu_update_icount_locked(CPUState
*cpu
)
259 int64_t executed
= cpu_get_icount_executed(cpu
);
260 cpu
->icount_budget
-= executed
;
262 atomic_set_i64(&timers_state
.qemu_icount
,
263 timers_state
.qemu_icount
+ executed
);
267 * Update the global shared timer_state.qemu_icount to take into
268 * account executed instructions. This is done by the TCG vCPU
269 * thread so the main-loop can see time has moved forward.
271 void cpu_update_icount(CPUState
*cpu
)
273 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
274 &timers_state
.vm_clock_lock
);
275 cpu_update_icount_locked(cpu
);
276 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
277 &timers_state
.vm_clock_lock
);
280 static int64_t cpu_get_icount_raw_locked(void)
282 CPUState
*cpu
= current_cpu
;
284 if (cpu
&& cpu
->running
) {
285 if (!cpu
->can_do_io
) {
286 error_report("Bad icount read");
289 /* Take into account what has run */
290 cpu_update_icount_locked(cpu
);
292 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
293 return atomic_read_i64(&timers_state
.qemu_icount
);
296 static int64_t cpu_get_icount_locked(void)
298 int64_t icount
= cpu_get_icount_raw_locked();
299 return atomic_read_i64(&timers_state
.qemu_icount_bias
) +
300 cpu_icount_to_ns(icount
);
303 int64_t cpu_get_icount_raw(void)
309 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
310 icount
= cpu_get_icount_raw_locked();
311 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
316 /* Return the virtual CPU time, based on the instruction counter. */
317 int64_t cpu_get_icount(void)
323 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
324 icount
= cpu_get_icount_locked();
325 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
330 int64_t cpu_icount_to_ns(int64_t icount
)
332 return icount
<< atomic_read(&timers_state
.icount_time_shift
);
335 static int64_t cpu_get_ticks_locked(void)
337 int64_t ticks
= timers_state
.cpu_ticks_offset
;
338 if (timers_state
.cpu_ticks_enabled
) {
339 ticks
+= cpu_get_host_ticks();
342 if (timers_state
.cpu_ticks_prev
> ticks
) {
343 /* Non increasing ticks may happen if the host uses software suspend. */
344 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
345 ticks
= timers_state
.cpu_ticks_prev
;
348 timers_state
.cpu_ticks_prev
= ticks
;
352 /* return the time elapsed in VM between vm_start and vm_stop. Unless
353 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
356 int64_t cpu_get_ticks(void)
361 return cpu_get_icount();
364 qemu_spin_lock(&timers_state
.vm_clock_lock
);
365 ticks
= cpu_get_ticks_locked();
366 qemu_spin_unlock(&timers_state
.vm_clock_lock
);
370 static int64_t cpu_get_clock_locked(void)
374 time
= timers_state
.cpu_clock_offset
;
375 if (timers_state
.cpu_ticks_enabled
) {
382 /* Return the monotonic time elapsed in VM, i.e.,
383 * the time between vm_start and vm_stop
385 int64_t cpu_get_clock(void)
391 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
392 ti
= cpu_get_clock_locked();
393 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
398 /* enable cpu_get_ticks()
399 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
401 void cpu_enable_ticks(void)
403 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
404 &timers_state
.vm_clock_lock
);
405 if (!timers_state
.cpu_ticks_enabled
) {
406 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
407 timers_state
.cpu_clock_offset
-= get_clock();
408 timers_state
.cpu_ticks_enabled
= 1;
410 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
411 &timers_state
.vm_clock_lock
);
414 /* disable cpu_get_ticks() : the clock is stopped. You must not call
415 * cpu_get_ticks() after that.
416 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
418 void cpu_disable_ticks(void)
420 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
421 &timers_state
.vm_clock_lock
);
422 if (timers_state
.cpu_ticks_enabled
) {
423 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
424 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
425 timers_state
.cpu_ticks_enabled
= 0;
427 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
428 &timers_state
.vm_clock_lock
);
431 /* Correlation between real and virtual time is always going to be
432 fairly approximate, so ignore small variation.
433 When the guest is idle real and virtual time will be aligned in
435 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
437 static void icount_adjust(void)
443 /* Protected by TimersState mutex. */
444 static int64_t last_delta
;
446 /* If the VM is not running, then do nothing. */
447 if (!runstate_is_running()) {
451 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
452 &timers_state
.vm_clock_lock
);
453 cur_time
= cpu_get_clock_locked();
454 cur_icount
= cpu_get_icount_locked();
456 delta
= cur_icount
- cur_time
;
457 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
459 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
460 && timers_state
.icount_time_shift
> 0) {
461 /* The guest is getting too far ahead. Slow time down. */
462 atomic_set(&timers_state
.icount_time_shift
,
463 timers_state
.icount_time_shift
- 1);
466 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
467 && timers_state
.icount_time_shift
< MAX_ICOUNT_SHIFT
) {
468 /* The guest is getting too far behind. Speed time up. */
469 atomic_set(&timers_state
.icount_time_shift
,
470 timers_state
.icount_time_shift
+ 1);
473 atomic_set_i64(&timers_state
.qemu_icount_bias
,
474 cur_icount
- (timers_state
.qemu_icount
475 << timers_state
.icount_time_shift
));
476 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
477 &timers_state
.vm_clock_lock
);
480 static void icount_adjust_rt(void *opaque
)
482 timer_mod(timers_state
.icount_rt_timer
,
483 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
487 static void icount_adjust_vm(void *opaque
)
489 timer_mod(timers_state
.icount_vm_timer
,
490 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
491 NANOSECONDS_PER_SECOND
/ 10);
495 static int64_t qemu_icount_round(int64_t count
)
497 int shift
= atomic_read(&timers_state
.icount_time_shift
);
498 return (count
+ (1 << shift
) - 1) >> shift
;
501 static void icount_warp_rt(void)
506 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
507 * changes from -1 to another value, so the race here is okay.
510 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
511 warp_start
= timers_state
.vm_clock_warp_start
;
512 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
514 if (warp_start
== -1) {
518 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
519 &timers_state
.vm_clock_lock
);
520 if (runstate_is_running()) {
521 int64_t clock
= REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT
,
522 cpu_get_clock_locked());
525 warp_delta
= clock
- timers_state
.vm_clock_warp_start
;
526 if (use_icount
== 2) {
528 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
529 * far ahead of real time.
531 int64_t cur_icount
= cpu_get_icount_locked();
532 int64_t delta
= clock
- cur_icount
;
533 warp_delta
= MIN(warp_delta
, delta
);
535 atomic_set_i64(&timers_state
.qemu_icount_bias
,
536 timers_state
.qemu_icount_bias
+ warp_delta
);
538 timers_state
.vm_clock_warp_start
= -1;
539 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
540 &timers_state
.vm_clock_lock
);
542 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
543 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
547 static void icount_timer_cb(void *opaque
)
549 /* No need for a checkpoint because the timer already synchronizes
550 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
555 void qtest_clock_warp(int64_t dest
)
557 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
558 AioContext
*aio_context
;
559 assert(qtest_enabled());
560 aio_context
= qemu_get_aio_context();
561 while (clock
< dest
) {
562 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
563 QEMU_TIMER_ATTR_ALL
);
564 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
566 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
567 &timers_state
.vm_clock_lock
);
568 atomic_set_i64(&timers_state
.qemu_icount_bias
,
569 timers_state
.qemu_icount_bias
+ warp
);
570 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
571 &timers_state
.vm_clock_lock
);
573 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
574 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
575 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
577 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
580 void qemu_start_warp_timer(void)
589 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
590 * do not fire, so computing the deadline does not make sense.
592 if (!runstate_is_running()) {
596 if (replay_mode
!= REPLAY_MODE_PLAY
) {
597 if (!all_cpu_threads_idle()) {
601 if (qtest_enabled()) {
602 /* When testing, qtest commands advance icount. */
606 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
);
608 /* warp clock deterministically in record/replay mode */
609 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
610 /* vCPU is sleeping and warp can't be started.
611 It is probably a race condition: notification sent
612 to vCPU was processed in advance and vCPU went to sleep.
613 Therefore we have to wake it up for doing someting. */
614 if (replay_has_checkpoint()) {
615 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
621 /* We want to use the earliest deadline from ALL vm_clocks */
622 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
623 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
624 ~QEMU_TIMER_ATTR_EXTERNAL
);
626 static bool notified
;
627 if (!icount_sleep
&& !notified
) {
628 warn_report("icount sleep disabled and no active timers");
636 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
637 * sleep. Otherwise, the CPU might be waiting for a future timer
638 * interrupt to wake it up, but the interrupt never comes because
639 * the vCPU isn't running any insns and thus doesn't advance the
640 * QEMU_CLOCK_VIRTUAL.
644 * We never let VCPUs sleep in no sleep icount mode.
645 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
646 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
647 * It is useful when we want a deterministic execution time,
648 * isolated from host latencies.
650 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
651 &timers_state
.vm_clock_lock
);
652 atomic_set_i64(&timers_state
.qemu_icount_bias
,
653 timers_state
.qemu_icount_bias
+ deadline
);
654 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
655 &timers_state
.vm_clock_lock
);
656 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
659 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
660 * "real" time, (related to the time left until the next event) has
661 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
662 * This avoids that the warps are visible externally; for example,
663 * you will not be sending network packets continuously instead of
666 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
667 &timers_state
.vm_clock_lock
);
668 if (timers_state
.vm_clock_warp_start
== -1
669 || timers_state
.vm_clock_warp_start
> clock
) {
670 timers_state
.vm_clock_warp_start
= clock
;
672 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
673 &timers_state
.vm_clock_lock
);
674 timer_mod_anticipate(timers_state
.icount_warp_timer
,
677 } else if (deadline
== 0) {
678 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
682 static void qemu_account_warp_timer(void)
684 if (!use_icount
|| !icount_sleep
) {
688 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
689 * do not fire, so computing the deadline does not make sense.
691 if (!runstate_is_running()) {
695 /* warp clock deterministically in record/replay mode */
696 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
700 timer_del(timers_state
.icount_warp_timer
);
704 static bool icount_state_needed(void *opaque
)
709 static bool warp_timer_state_needed(void *opaque
)
711 TimersState
*s
= opaque
;
712 return s
->icount_warp_timer
!= NULL
;
715 static bool adjust_timers_state_needed(void *opaque
)
717 TimersState
*s
= opaque
;
718 return s
->icount_rt_timer
!= NULL
;
722 * Subsection for warp timer migration is optional, because may not be created
724 static const VMStateDescription icount_vmstate_warp_timer
= {
725 .name
= "timer/icount/warp_timer",
727 .minimum_version_id
= 1,
728 .needed
= warp_timer_state_needed
,
729 .fields
= (VMStateField
[]) {
730 VMSTATE_INT64(vm_clock_warp_start
, TimersState
),
731 VMSTATE_TIMER_PTR(icount_warp_timer
, TimersState
),
732 VMSTATE_END_OF_LIST()
736 static const VMStateDescription icount_vmstate_adjust_timers
= {
737 .name
= "timer/icount/timers",
739 .minimum_version_id
= 1,
740 .needed
= adjust_timers_state_needed
,
741 .fields
= (VMStateField
[]) {
742 VMSTATE_TIMER_PTR(icount_rt_timer
, TimersState
),
743 VMSTATE_TIMER_PTR(icount_vm_timer
, TimersState
),
744 VMSTATE_END_OF_LIST()
749 * This is a subsection for icount migration.
751 static const VMStateDescription icount_vmstate_timers
= {
752 .name
= "timer/icount",
754 .minimum_version_id
= 1,
755 .needed
= icount_state_needed
,
756 .fields
= (VMStateField
[]) {
757 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
758 VMSTATE_INT64(qemu_icount
, TimersState
),
759 VMSTATE_END_OF_LIST()
761 .subsections
= (const VMStateDescription
*[]) {
762 &icount_vmstate_warp_timer
,
763 &icount_vmstate_adjust_timers
,
768 static const VMStateDescription vmstate_timers
= {
771 .minimum_version_id
= 1,
772 .fields
= (VMStateField
[]) {
773 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
775 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
776 VMSTATE_END_OF_LIST()
778 .subsections
= (const VMStateDescription
*[]) {
779 &icount_vmstate_timers
,
784 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
787 double throttle_ratio
;
788 int64_t sleeptime_ns
, endtime_ns
;
790 if (!cpu_throttle_get_percentage()) {
794 pct
= (double)cpu_throttle_get_percentage()/100;
795 throttle_ratio
= pct
/ (1 - pct
);
796 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
797 sleeptime_ns
= (int64_t)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
+ 1);
798 endtime_ns
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) + sleeptime_ns
;
799 while (sleeptime_ns
> 0 && !cpu
->stop
) {
800 if (sleeptime_ns
> SCALE_MS
) {
801 qemu_cond_timedwait(cpu
->halt_cond
, &qemu_global_mutex
,
802 sleeptime_ns
/ SCALE_MS
);
804 qemu_mutex_unlock_iothread();
805 g_usleep(sleeptime_ns
/ SCALE_US
);
806 qemu_mutex_lock_iothread();
808 sleeptime_ns
= endtime_ns
- qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
810 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
813 static void cpu_throttle_timer_tick(void *opaque
)
818 /* Stop the timer if needed */
819 if (!cpu_throttle_get_percentage()) {
823 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
824 async_run_on_cpu(cpu
, cpu_throttle_thread
,
829 pct
= (double)cpu_throttle_get_percentage()/100;
830 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
831 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
834 void cpu_throttle_set(int new_throttle_pct
)
836 /* Ensure throttle percentage is within valid range */
837 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
838 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
840 atomic_set(&throttle_percentage
, new_throttle_pct
);
842 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
843 CPU_THROTTLE_TIMESLICE_NS
);
846 void cpu_throttle_stop(void)
848 atomic_set(&throttle_percentage
, 0);
851 bool cpu_throttle_active(void)
853 return (cpu_throttle_get_percentage() != 0);
856 int cpu_throttle_get_percentage(void)
858 return atomic_read(&throttle_percentage
);
861 void cpu_ticks_init(void)
863 seqlock_init(&timers_state
.vm_clock_seqlock
);
864 qemu_spin_init(&timers_state
.vm_clock_lock
);
865 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
866 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
867 cpu_throttle_timer_tick
, NULL
);
870 void configure_icount(QemuOpts
*opts
, Error
**errp
)
873 char *rem_str
= NULL
;
875 option
= qemu_opt_get(opts
, "shift");
877 if (qemu_opt_get(opts
, "align") != NULL
) {
878 error_setg(errp
, "Please specify shift option when using align");
883 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
885 timers_state
.icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
886 icount_timer_cb
, NULL
);
889 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
891 if (icount_align_option
&& !icount_sleep
) {
892 error_setg(errp
, "align=on and sleep=off are incompatible");
894 if (strcmp(option
, "auto") != 0) {
896 timers_state
.icount_time_shift
= strtol(option
, &rem_str
, 0);
897 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
898 error_setg(errp
, "icount: Invalid shift value");
902 } else if (icount_align_option
) {
903 error_setg(errp
, "shift=auto and align=on are incompatible");
904 } else if (!icount_sleep
) {
905 error_setg(errp
, "shift=auto and sleep=off are incompatible");
910 /* 125MIPS seems a reasonable initial guess at the guest speed.
911 It will be corrected fairly quickly anyway. */
912 timers_state
.icount_time_shift
= 3;
914 /* Have both realtime and virtual time triggers for speed adjustment.
915 The realtime trigger catches emulated time passing too slowly,
916 the virtual time trigger catches emulated time passing too fast.
917 Realtime triggers occur even when idle, so use them less frequently
919 timers_state
.vm_clock_warp_start
= -1;
920 timers_state
.icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
921 icount_adjust_rt
, NULL
);
922 timer_mod(timers_state
.icount_rt_timer
,
923 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
924 timers_state
.icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
925 icount_adjust_vm
, NULL
);
926 timer_mod(timers_state
.icount_vm_timer
,
927 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
928 NANOSECONDS_PER_SECOND
/ 10);
931 /***********************************************************/
932 /* TCG vCPU kick timer
934 * The kick timer is responsible for moving single threaded vCPU
935 * emulation on to the next vCPU. If more than one vCPU is running a
936 * timer event with force a cpu->exit so the next vCPU can get
939 * The timer is removed if all vCPUs are idle and restarted again once
940 * idleness is complete.
943 static QEMUTimer
*tcg_kick_vcpu_timer
;
944 static CPUState
*tcg_current_rr_cpu
;
946 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
948 static inline int64_t qemu_tcg_next_kick(void)
950 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
953 /* Kick the currently round-robin scheduled vCPU to next */
954 static void qemu_cpu_kick_rr_next_cpu(void)
958 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
962 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
965 /* Kick all RR vCPUs */
966 static void qemu_cpu_kick_rr_cpus(void)
975 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
979 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
981 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
986 if (qemu_in_vcpu_thread()) {
987 /* A CPU is currently running; kick it back out to the
988 * tcg_cpu_exec() loop so it will recalculate its
989 * icount deadline immediately.
991 qemu_cpu_kick(current_cpu
);
992 } else if (first_cpu
) {
993 /* qemu_cpu_kick is not enough to kick a halted CPU out of
994 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
995 * causes cpu_thread_is_idle to return false. This way,
996 * handle_icount_deadline can run.
997 * If we have no CPUs at all for some reason, we don't
998 * need to do anything.
1000 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
1004 static void kick_tcg_thread(void *opaque
)
1006 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
1007 qemu_cpu_kick_rr_next_cpu();
1010 static void start_tcg_kick_timer(void)
1012 assert(!mttcg_enabled
);
1013 if (!tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
1014 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
1015 kick_tcg_thread
, NULL
);
1017 if (tcg_kick_vcpu_timer
&& !timer_pending(tcg_kick_vcpu_timer
)) {
1018 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
1022 static void stop_tcg_kick_timer(void)
1024 assert(!mttcg_enabled
);
1025 if (tcg_kick_vcpu_timer
&& timer_pending(tcg_kick_vcpu_timer
)) {
1026 timer_del(tcg_kick_vcpu_timer
);
1030 /***********************************************************/
1031 void hw_error(const char *fmt
, ...)
1037 fprintf(stderr
, "qemu: hardware error: ");
1038 vfprintf(stderr
, fmt
, ap
);
1039 fprintf(stderr
, "\n");
1041 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
1042 cpu_dump_state(cpu
, stderr
, CPU_DUMP_FPU
);
1048 void cpu_synchronize_all_states(void)
1053 cpu_synchronize_state(cpu
);
1054 /* TODO: move to cpu_synchronize_state() */
1055 if (hvf_enabled()) {
1056 hvf_cpu_synchronize_state(cpu
);
1061 void cpu_synchronize_all_post_reset(void)
1066 cpu_synchronize_post_reset(cpu
);
1067 /* TODO: move to cpu_synchronize_post_reset() */
1068 if (hvf_enabled()) {
1069 hvf_cpu_synchronize_post_reset(cpu
);
1074 void cpu_synchronize_all_post_init(void)
1079 cpu_synchronize_post_init(cpu
);
1080 /* TODO: move to cpu_synchronize_post_init() */
1081 if (hvf_enabled()) {
1082 hvf_cpu_synchronize_post_init(cpu
);
1087 void cpu_synchronize_all_pre_loadvm(void)
1092 cpu_synchronize_pre_loadvm(cpu
);
1096 static int do_vm_stop(RunState state
, bool send_stop
)
1100 if (runstate_is_running()) {
1101 cpu_disable_ticks();
1103 runstate_set(state
);
1104 vm_state_notify(0, state
);
1106 qapi_event_send_stop();
1111 ret
= bdrv_flush_all();
1116 /* Special vm_stop() variant for terminating the process. Historically clients
1117 * did not expect a QMP STOP event and so we need to retain compatibility.
1119 int vm_shutdown(void)
1121 return do_vm_stop(RUN_STATE_SHUTDOWN
, false);
1124 static bool cpu_can_run(CPUState
*cpu
)
1129 if (cpu_is_stopped(cpu
)) {
1135 static void cpu_handle_guest_debug(CPUState
*cpu
)
1137 gdb_set_stop_cpu(cpu
);
1138 qemu_system_debug_request();
1139 cpu
->stopped
= true;
1143 static void sigbus_reraise(void)
1146 struct sigaction action
;
1148 memset(&action
, 0, sizeof(action
));
1149 action
.sa_handler
= SIG_DFL
;
1150 if (!sigaction(SIGBUS
, &action
, NULL
)) {
1153 sigaddset(&set
, SIGBUS
);
1154 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
1156 perror("Failed to re-raise SIGBUS!\n");
1160 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
1162 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
1167 /* Called asynchronously in VCPU thread. */
1168 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
1172 /* Called synchronously (via signalfd) in main thread. */
1173 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
1179 static void qemu_init_sigbus(void)
1181 struct sigaction action
;
1183 memset(&action
, 0, sizeof(action
));
1184 action
.sa_flags
= SA_SIGINFO
;
1185 action
.sa_sigaction
= sigbus_handler
;
1186 sigaction(SIGBUS
, &action
, NULL
);
1188 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
1190 #else /* !CONFIG_LINUX */
1191 static void qemu_init_sigbus(void)
1194 #endif /* !CONFIG_LINUX */
1196 static QemuThread io_thread
;
1199 static QemuCond qemu_cpu_cond
;
1201 static QemuCond qemu_pause_cond
;
1203 void qemu_init_cpu_loop(void)
1206 qemu_cond_init(&qemu_cpu_cond
);
1207 qemu_cond_init(&qemu_pause_cond
);
1208 qemu_mutex_init(&qemu_global_mutex
);
1210 qemu_thread_get_self(&io_thread
);
1213 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1215 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1218 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1220 if (kvm_destroy_vcpu(cpu
) < 0) {
1221 error_report("kvm_destroy_vcpu failed");
1226 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1230 static void qemu_cpu_stop(CPUState
*cpu
, bool exit
)
1232 g_assert(qemu_cpu_is_self(cpu
));
1234 cpu
->stopped
= true;
1238 qemu_cond_broadcast(&qemu_pause_cond
);
1241 static void qemu_wait_io_event_common(CPUState
*cpu
)
1243 atomic_mb_set(&cpu
->thread_kicked
, false);
1245 qemu_cpu_stop(cpu
, false);
1247 process_queued_cpu_work(cpu
);
1250 static void qemu_tcg_rr_wait_io_event(void)
1254 while (all_cpu_threads_idle()) {
1255 stop_tcg_kick_timer();
1256 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1259 start_tcg_kick_timer();
1262 qemu_wait_io_event_common(cpu
);
1266 static void qemu_wait_io_event(CPUState
*cpu
)
1270 while (cpu_thread_is_idle(cpu
)) {
1273 qemu_plugin_vcpu_idle_cb(cpu
);
1275 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1278 qemu_plugin_vcpu_resume_cb(cpu
);
1282 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1283 if (!tcg_enabled()) {
1287 qemu_wait_io_event_common(cpu
);
1290 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1292 CPUState
*cpu
= arg
;
1295 rcu_register_thread();
1297 qemu_mutex_lock_iothread();
1298 qemu_thread_get_self(cpu
->thread
);
1299 cpu
->thread_id
= qemu_get_thread_id();
1303 r
= kvm_init_vcpu(cpu
);
1305 error_report("kvm_init_vcpu failed: %s", strerror(-r
));
1309 kvm_init_cpu_signals(cpu
);
1311 /* signal CPU creation */
1312 cpu
->created
= true;
1313 qemu_cond_signal(&qemu_cpu_cond
);
1314 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1317 if (cpu_can_run(cpu
)) {
1318 r
= kvm_cpu_exec(cpu
);
1319 if (r
== EXCP_DEBUG
) {
1320 cpu_handle_guest_debug(cpu
);
1323 qemu_wait_io_event(cpu
);
1324 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1326 qemu_kvm_destroy_vcpu(cpu
);
1327 cpu
->created
= false;
1328 qemu_cond_signal(&qemu_cpu_cond
);
1329 qemu_mutex_unlock_iothread();
1330 rcu_unregister_thread();
1334 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1337 error_report("qtest is not supported under Windows");
1340 CPUState
*cpu
= arg
;
1344 rcu_register_thread();
1346 qemu_mutex_lock_iothread();
1347 qemu_thread_get_self(cpu
->thread
);
1348 cpu
->thread_id
= qemu_get_thread_id();
1352 sigemptyset(&waitset
);
1353 sigaddset(&waitset
, SIG_IPI
);
1355 /* signal CPU creation */
1356 cpu
->created
= true;
1357 qemu_cond_signal(&qemu_cpu_cond
);
1358 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1361 qemu_mutex_unlock_iothread();
1364 r
= sigwait(&waitset
, &sig
);
1365 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1370 qemu_mutex_lock_iothread();
1371 qemu_wait_io_event(cpu
);
1372 } while (!cpu
->unplug
);
1374 qemu_mutex_unlock_iothread();
1375 rcu_unregister_thread();
1380 static int64_t tcg_get_icount_limit(void)
1384 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1386 * Include all the timers, because they may need an attention.
1387 * Too long CPU execution may create unnecessary delay in UI.
1389 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
1390 QEMU_TIMER_ATTR_ALL
);
1392 /* Maintain prior (possibly buggy) behaviour where if no deadline
1393 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1394 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1397 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1398 deadline
= INT32_MAX
;
1401 return qemu_icount_round(deadline
);
1403 return replay_get_instructions();
1407 static void handle_icount_deadline(void)
1409 assert(qemu_in_vcpu_thread());
1411 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
1412 QEMU_TIMER_ATTR_ALL
);
1414 if (deadline
== 0) {
1415 /* Wake up other AioContexts. */
1416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1417 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1422 static void prepare_icount_for_run(CPUState
*cpu
)
1427 /* These should always be cleared by process_icount_data after
1428 * each vCPU execution. However u16.high can be raised
1429 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1431 g_assert(cpu_neg(cpu
)->icount_decr
.u16
.low
== 0);
1432 g_assert(cpu
->icount_extra
== 0);
1434 cpu
->icount_budget
= tcg_get_icount_limit();
1435 insns_left
= MIN(0xffff, cpu
->icount_budget
);
1436 cpu_neg(cpu
)->icount_decr
.u16
.low
= insns_left
;
1437 cpu
->icount_extra
= cpu
->icount_budget
- insns_left
;
1439 replay_mutex_lock();
1443 static void process_icount_data(CPUState
*cpu
)
1446 /* Account for executed instructions */
1447 cpu_update_icount(cpu
);
1449 /* Reset the counters */
1450 cpu_neg(cpu
)->icount_decr
.u16
.low
= 0;
1451 cpu
->icount_extra
= 0;
1452 cpu
->icount_budget
= 0;
1454 replay_account_executed_instructions();
1456 replay_mutex_unlock();
1461 static int tcg_cpu_exec(CPUState
*cpu
)
1464 #ifdef CONFIG_PROFILER
1468 assert(tcg_enabled());
1469 #ifdef CONFIG_PROFILER
1470 ti
= profile_getclock();
1472 cpu_exec_start(cpu
);
1473 ret
= cpu_exec(cpu
);
1475 #ifdef CONFIG_PROFILER
1476 atomic_set(&tcg_ctx
->prof
.cpu_exec_time
,
1477 tcg_ctx
->prof
.cpu_exec_time
+ profile_getclock() - ti
);
1482 /* Destroy any remaining vCPUs which have been unplugged and have
1485 static void deal_with_unplugged_cpus(void)
1490 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1491 qemu_tcg_destroy_vcpu(cpu
);
1492 cpu
->created
= false;
1493 qemu_cond_signal(&qemu_cpu_cond
);
1499 /* Single-threaded TCG
1501 * In the single-threaded case each vCPU is simulated in turn. If
1502 * there is more than a single vCPU we create a simple timer to kick
1503 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1504 * This is done explicitly rather than relying on side-effects
1508 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1510 CPUState
*cpu
= arg
;
1512 assert(tcg_enabled());
1513 rcu_register_thread();
1514 tcg_register_thread();
1516 qemu_mutex_lock_iothread();
1517 qemu_thread_get_self(cpu
->thread
);
1519 cpu
->thread_id
= qemu_get_thread_id();
1520 cpu
->created
= true;
1522 qemu_cond_signal(&qemu_cpu_cond
);
1523 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1525 /* wait for initial kick-off after machine start */
1526 while (first_cpu
->stopped
) {
1527 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1529 /* process any pending work */
1532 qemu_wait_io_event_common(cpu
);
1536 start_tcg_kick_timer();
1540 /* process any pending work */
1541 cpu
->exit_request
= 1;
1544 qemu_mutex_unlock_iothread();
1545 replay_mutex_lock();
1546 qemu_mutex_lock_iothread();
1547 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1548 qemu_account_warp_timer();
1550 /* Run the timers here. This is much more efficient than
1551 * waking up the I/O thread and waiting for completion.
1553 handle_icount_deadline();
1555 replay_mutex_unlock();
1561 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1563 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1566 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1567 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1569 if (cpu_can_run(cpu
)) {
1572 qemu_mutex_unlock_iothread();
1573 prepare_icount_for_run(cpu
);
1575 r
= tcg_cpu_exec(cpu
);
1577 process_icount_data(cpu
);
1578 qemu_mutex_lock_iothread();
1580 if (r
== EXCP_DEBUG
) {
1581 cpu_handle_guest_debug(cpu
);
1583 } else if (r
== EXCP_ATOMIC
) {
1584 qemu_mutex_unlock_iothread();
1585 cpu_exec_step_atomic(cpu
);
1586 qemu_mutex_lock_iothread();
1589 } else if (cpu
->stop
) {
1591 cpu
= CPU_NEXT(cpu
);
1596 cpu
= CPU_NEXT(cpu
);
1597 } /* while (cpu && !cpu->exit_request).. */
1599 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1600 atomic_set(&tcg_current_rr_cpu
, NULL
);
1602 if (cpu
&& cpu
->exit_request
) {
1603 atomic_mb_set(&cpu
->exit_request
, 0);
1606 if (use_icount
&& all_cpu_threads_idle()) {
1608 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1609 * in the main_loop, wake it up in order to start the warp timer.
1611 qemu_notify_event();
1614 qemu_tcg_rr_wait_io_event();
1615 deal_with_unplugged_cpus();
1618 rcu_unregister_thread();
1622 static void *qemu_hax_cpu_thread_fn(void *arg
)
1624 CPUState
*cpu
= arg
;
1627 rcu_register_thread();
1628 qemu_mutex_lock_iothread();
1629 qemu_thread_get_self(cpu
->thread
);
1631 cpu
->thread_id
= qemu_get_thread_id();
1632 cpu
->created
= true;
1636 qemu_cond_signal(&qemu_cpu_cond
);
1637 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1640 if (cpu_can_run(cpu
)) {
1641 r
= hax_smp_cpu_exec(cpu
);
1642 if (r
== EXCP_DEBUG
) {
1643 cpu_handle_guest_debug(cpu
);
1647 qemu_wait_io_event(cpu
);
1648 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1649 rcu_unregister_thread();
1653 /* The HVF-specific vCPU thread function. This one should only run when the host
1654 * CPU supports the VMX "unrestricted guest" feature. */
1655 static void *qemu_hvf_cpu_thread_fn(void *arg
)
1657 CPUState
*cpu
= arg
;
1661 assert(hvf_enabled());
1663 rcu_register_thread();
1665 qemu_mutex_lock_iothread();
1666 qemu_thread_get_self(cpu
->thread
);
1668 cpu
->thread_id
= qemu_get_thread_id();
1674 /* signal CPU creation */
1675 cpu
->created
= true;
1676 qemu_cond_signal(&qemu_cpu_cond
);
1677 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1680 if (cpu_can_run(cpu
)) {
1681 r
= hvf_vcpu_exec(cpu
);
1682 if (r
== EXCP_DEBUG
) {
1683 cpu_handle_guest_debug(cpu
);
1686 qemu_wait_io_event(cpu
);
1687 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1689 hvf_vcpu_destroy(cpu
);
1690 cpu
->created
= false;
1691 qemu_cond_signal(&qemu_cpu_cond
);
1692 qemu_mutex_unlock_iothread();
1693 rcu_unregister_thread();
1697 static void *qemu_whpx_cpu_thread_fn(void *arg
)
1699 CPUState
*cpu
= arg
;
1702 rcu_register_thread();
1704 qemu_mutex_lock_iothread();
1705 qemu_thread_get_self(cpu
->thread
);
1706 cpu
->thread_id
= qemu_get_thread_id();
1709 r
= whpx_init_vcpu(cpu
);
1711 fprintf(stderr
, "whpx_init_vcpu failed: %s\n", strerror(-r
));
1715 /* signal CPU creation */
1716 cpu
->created
= true;
1717 qemu_cond_signal(&qemu_cpu_cond
);
1718 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1721 if (cpu_can_run(cpu
)) {
1722 r
= whpx_vcpu_exec(cpu
);
1723 if (r
== EXCP_DEBUG
) {
1724 cpu_handle_guest_debug(cpu
);
1727 while (cpu_thread_is_idle(cpu
)) {
1728 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1730 qemu_wait_io_event_common(cpu
);
1731 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1733 whpx_destroy_vcpu(cpu
);
1734 cpu
->created
= false;
1735 qemu_cond_signal(&qemu_cpu_cond
);
1736 qemu_mutex_unlock_iothread();
1737 rcu_unregister_thread();
1742 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1747 /* Multi-threaded TCG
1749 * In the multi-threaded case each vCPU has its own thread. The TLS
1750 * variable current_cpu can be used deep in the code to find the
1751 * current CPUState for a given thread.
1754 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1756 CPUState
*cpu
= arg
;
1758 assert(tcg_enabled());
1759 g_assert(!use_icount
);
1761 rcu_register_thread();
1762 tcg_register_thread();
1764 qemu_mutex_lock_iothread();
1765 qemu_thread_get_self(cpu
->thread
);
1767 cpu
->thread_id
= qemu_get_thread_id();
1768 cpu
->created
= true;
1771 qemu_cond_signal(&qemu_cpu_cond
);
1772 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1774 /* process any pending work */
1775 cpu
->exit_request
= 1;
1778 if (cpu_can_run(cpu
)) {
1780 qemu_mutex_unlock_iothread();
1781 r
= tcg_cpu_exec(cpu
);
1782 qemu_mutex_lock_iothread();
1785 cpu_handle_guest_debug(cpu
);
1788 /* during start-up the vCPU is reset and the thread is
1789 * kicked several times. If we don't ensure we go back
1790 * to sleep in the halted state we won't cleanly
1791 * start-up when the vCPU is enabled.
1793 * cpu->halted should ensure we sleep in wait_io_event
1795 g_assert(cpu
->halted
);
1798 qemu_mutex_unlock_iothread();
1799 cpu_exec_step_atomic(cpu
);
1800 qemu_mutex_lock_iothread();
1802 /* Ignore everything else? */
1807 atomic_mb_set(&cpu
->exit_request
, 0);
1808 qemu_wait_io_event(cpu
);
1809 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1811 qemu_tcg_destroy_vcpu(cpu
);
1812 cpu
->created
= false;
1813 qemu_cond_signal(&qemu_cpu_cond
);
1814 qemu_mutex_unlock_iothread();
1815 rcu_unregister_thread();
1819 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1824 if (cpu
->thread_kicked
) {
1827 cpu
->thread_kicked
= true;
1828 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1829 if (err
&& err
!= ESRCH
) {
1830 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1834 if (!qemu_cpu_is_self(cpu
)) {
1835 if (whpx_enabled()) {
1836 whpx_vcpu_kick(cpu
);
1837 } else if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1838 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1839 __func__
, GetLastError());
1846 void qemu_cpu_kick(CPUState
*cpu
)
1848 qemu_cond_broadcast(cpu
->halt_cond
);
1849 if (tcg_enabled()) {
1850 if (qemu_tcg_mttcg_enabled()) {
1853 qemu_cpu_kick_rr_cpus();
1856 if (hax_enabled()) {
1858 * FIXME: race condition with the exit_request check in
1861 cpu
->exit_request
= 1;
1863 qemu_cpu_kick_thread(cpu
);
1867 void qemu_cpu_kick_self(void)
1869 assert(current_cpu
);
1870 qemu_cpu_kick_thread(current_cpu
);
1873 bool qemu_cpu_is_self(CPUState
*cpu
)
1875 return qemu_thread_is_self(cpu
->thread
);
1878 bool qemu_in_vcpu_thread(void)
1880 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1883 static __thread
bool iothread_locked
= false;
1885 bool qemu_mutex_iothread_locked(void)
1887 return iothread_locked
;
1891 * The BQL is taken from so many places that it is worth profiling the
1892 * callers directly, instead of funneling them all through a single function.
1894 void qemu_mutex_lock_iothread_impl(const char *file
, int line
)
1896 QemuMutexLockFunc bql_lock
= atomic_read(&qemu_bql_mutex_lock_func
);
1898 g_assert(!qemu_mutex_iothread_locked());
1899 bql_lock(&qemu_global_mutex
, file
, line
);
1900 iothread_locked
= true;
1903 void qemu_mutex_unlock_iothread(void)
1905 g_assert(qemu_mutex_iothread_locked());
1906 iothread_locked
= false;
1907 qemu_mutex_unlock(&qemu_global_mutex
);
1910 static bool all_vcpus_paused(void)
1915 if (!cpu
->stopped
) {
1923 void pause_all_vcpus(void)
1927 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1929 if (qemu_cpu_is_self(cpu
)) {
1930 qemu_cpu_stop(cpu
, true);
1937 /* We need to drop the replay_lock so any vCPU threads woken up
1938 * can finish their replay tasks
1940 replay_mutex_unlock();
1942 while (!all_vcpus_paused()) {
1943 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1949 qemu_mutex_unlock_iothread();
1950 replay_mutex_lock();
1951 qemu_mutex_lock_iothread();
1954 void cpu_resume(CPUState
*cpu
)
1957 cpu
->stopped
= false;
1961 void resume_all_vcpus(void)
1965 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1971 void cpu_remove_sync(CPUState
*cpu
)
1976 qemu_mutex_unlock_iothread();
1977 qemu_thread_join(cpu
->thread
);
1978 qemu_mutex_lock_iothread();
1981 /* For temporary buffers for forming a name */
1982 #define VCPU_THREAD_NAME_SIZE 16
1984 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1986 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1987 static QemuCond
*single_tcg_halt_cond
;
1988 static QemuThread
*single_tcg_cpu_thread
;
1989 static int tcg_region_inited
;
1991 assert(tcg_enabled());
1993 * Initialize TCG regions--once. Now is a good time, because:
1994 * (1) TCG's init context, prologue and target globals have been set up.
1995 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1996 * -accel flag is processed, so the check doesn't work then).
1998 if (!tcg_region_inited
) {
1999 tcg_region_inited
= 1;
2003 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
2004 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2005 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2006 qemu_cond_init(cpu
->halt_cond
);
2008 if (qemu_tcg_mttcg_enabled()) {
2009 /* create a thread per vCPU with TCG (MTTCG) */
2010 parallel_cpus
= true;
2011 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
2014 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
2015 cpu
, QEMU_THREAD_JOINABLE
);
2018 /* share a single thread for all cpus with TCG */
2019 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
2020 qemu_thread_create(cpu
->thread
, thread_name
,
2021 qemu_tcg_rr_cpu_thread_fn
,
2022 cpu
, QEMU_THREAD_JOINABLE
);
2024 single_tcg_halt_cond
= cpu
->halt_cond
;
2025 single_tcg_cpu_thread
= cpu
->thread
;
2028 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2031 /* For non-MTTCG cases we share the thread */
2032 cpu
->thread
= single_tcg_cpu_thread
;
2033 cpu
->halt_cond
= single_tcg_halt_cond
;
2034 cpu
->thread_id
= first_cpu
->thread_id
;
2036 cpu
->created
= true;
2040 static void qemu_hax_start_vcpu(CPUState
*cpu
)
2042 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2044 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2045 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2046 qemu_cond_init(cpu
->halt_cond
);
2048 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
2050 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
2051 cpu
, QEMU_THREAD_JOINABLE
);
2053 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2057 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
2059 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2061 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2062 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2063 qemu_cond_init(cpu
->halt_cond
);
2064 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
2066 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
2067 cpu
, QEMU_THREAD_JOINABLE
);
2070 static void qemu_hvf_start_vcpu(CPUState
*cpu
)
2072 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2074 /* HVF currently does not support TCG, and only runs in
2075 * unrestricted-guest mode. */
2076 assert(hvf_enabled());
2078 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2079 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2080 qemu_cond_init(cpu
->halt_cond
);
2082 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HVF",
2084 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hvf_cpu_thread_fn
,
2085 cpu
, QEMU_THREAD_JOINABLE
);
2088 static void qemu_whpx_start_vcpu(CPUState
*cpu
)
2090 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2092 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2093 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2094 qemu_cond_init(cpu
->halt_cond
);
2095 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/WHPX",
2097 qemu_thread_create(cpu
->thread
, thread_name
, qemu_whpx_cpu_thread_fn
,
2098 cpu
, QEMU_THREAD_JOINABLE
);
2100 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2104 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
2106 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2108 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2109 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2110 qemu_cond_init(cpu
->halt_cond
);
2111 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
2113 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
2114 QEMU_THREAD_JOINABLE
);
2117 void qemu_init_vcpu(CPUState
*cpu
)
2119 MachineState
*ms
= MACHINE(qdev_get_machine());
2121 cpu
->nr_cores
= ms
->smp
.cores
;
2122 cpu
->nr_threads
= ms
->smp
.threads
;
2123 cpu
->stopped
= true;
2124 cpu
->random_seed
= qemu_guest_random_seed_thread_part1();
2127 /* If the target cpu hasn't set up any address spaces itself,
2128 * give it the default one.
2131 cpu_address_space_init(cpu
, 0, "cpu-memory", cpu
->memory
);
2134 if (kvm_enabled()) {
2135 qemu_kvm_start_vcpu(cpu
);
2136 } else if (hax_enabled()) {
2137 qemu_hax_start_vcpu(cpu
);
2138 } else if (hvf_enabled()) {
2139 qemu_hvf_start_vcpu(cpu
);
2140 } else if (tcg_enabled()) {
2141 qemu_tcg_init_vcpu(cpu
);
2142 } else if (whpx_enabled()) {
2143 qemu_whpx_start_vcpu(cpu
);
2145 qemu_dummy_start_vcpu(cpu
);
2148 while (!cpu
->created
) {
2149 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
2153 void cpu_stop_current(void)
2156 current_cpu
->stop
= true;
2157 cpu_exit(current_cpu
);
2161 int vm_stop(RunState state
)
2163 if (qemu_in_vcpu_thread()) {
2164 qemu_system_vmstop_request_prepare();
2165 qemu_system_vmstop_request(state
);
2167 * FIXME: should not return to device code in case
2168 * vm_stop() has been requested.
2174 return do_vm_stop(state
, true);
2178 * Prepare for (re)starting the VM.
2179 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2180 * running or in case of an error condition), 0 otherwise.
2182 int vm_prepare_start(void)
2186 qemu_vmstop_requested(&requested
);
2187 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
2191 /* Ensure that a STOP/RESUME pair of events is emitted if a
2192 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2193 * example, according to documentation is always followed by
2196 if (runstate_is_running()) {
2197 qapi_event_send_stop();
2198 qapi_event_send_resume();
2202 /* We are sending this now, but the CPUs will be resumed shortly later */
2203 qapi_event_send_resume();
2206 runstate_set(RUN_STATE_RUNNING
);
2207 vm_state_notify(1, RUN_STATE_RUNNING
);
2213 if (!vm_prepare_start()) {
2218 /* does a state transition even if the VM is already stopped,
2219 current state is forgotten forever */
2220 int vm_stop_force_state(RunState state
)
2222 if (runstate_is_running()) {
2223 return vm_stop(state
);
2225 runstate_set(state
);
2228 /* Make sure to return an error if the flush in a previous vm_stop()
2230 return bdrv_flush_all();
2234 void list_cpus(const char *optarg
)
2236 /* XXX: implement xxx_cpu_list for targets that still miss it */
2237 #if defined(cpu_list)
2242 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
2243 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
2249 int64_t orig_addr
= addr
, orig_size
= size
;
2255 cpu
= qemu_get_cpu(cpu_index
);
2257 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
2262 f
= fopen(filename
, "wb");
2264 error_setg_file_open(errp
, errno
, filename
);
2272 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
2273 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
2274 " specified", orig_addr
, orig_size
);
2277 if (fwrite(buf
, 1, l
, f
) != l
) {
2278 error_setg(errp
, QERR_IO_ERROR
);
2289 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
2296 f
= fopen(filename
, "wb");
2298 error_setg_file_open(errp
, errno
, filename
);
2306 cpu_physical_memory_read(addr
, buf
, l
);
2307 if (fwrite(buf
, 1, l
, f
) != l
) {
2308 error_setg(errp
, QERR_IO_ERROR
);
2319 void qmp_inject_nmi(Error
**errp
)
2321 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
2324 void dump_drift_info(void)
2330 qemu_printf("Host - Guest clock %"PRIi64
" ms\n",
2331 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
2332 if (icount_align_option
) {
2333 qemu_printf("Max guest delay %"PRIi64
" ms\n",
2334 -max_delay
/ SCALE_MS
);
2335 qemu_printf("Max guest advance %"PRIi64
" ms\n",
2336 max_advance
/ SCALE_MS
);
2338 qemu_printf("Max guest delay NA\n");
2339 qemu_printf("Max guest advance NA\n");