4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "qemu/cutils.h"
29 #include "migration/vmstate.h"
30 #include "monitor/monitor.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-misc.h"
33 #include "qapi/qapi-events-run-state.h"
34 #include "qapi/qmp/qerror.h"
35 #include "qemu/error-report.h"
36 #include "qemu/qemu-print.h"
37 #include "sysemu/tcg.h"
38 #include "sysemu/block-backend.h"
39 #include "exec/gdbstub.h"
40 #include "sysemu/dma.h"
41 #include "sysemu/hw_accel.h"
42 #include "sysemu/kvm.h"
43 #include "sysemu/hax.h"
44 #include "sysemu/hvf.h"
45 #include "sysemu/whpx.h"
46 #include "exec/exec-all.h"
48 #include "qemu/thread.h"
49 #include "qemu/plugin.h"
50 #include "sysemu/cpus.h"
51 #include "sysemu/qtest.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/option.h"
54 #include "qemu/bitmap.h"
55 #include "qemu/seqlock.h"
56 #include "qemu/guest-random.h"
59 #include "sysemu/replay.h"
60 #include "sysemu/runstate.h"
61 #include "hw/boards.h"
66 #include <sys/prctl.h>
69 #define PR_MCE_KILL 33
72 #ifndef PR_MCE_KILL_SET
73 #define PR_MCE_KILL_SET 1
76 #ifndef PR_MCE_KILL_EARLY
77 #define PR_MCE_KILL_EARLY 1
80 #endif /* CONFIG_LINUX */
82 static QemuMutex qemu_global_mutex
;
87 /* vcpu throttling controls */
88 static QEMUTimer
*throttle_timer
;
89 static unsigned int throttle_percentage
;
91 #define CPU_THROTTLE_PCT_MIN 1
92 #define CPU_THROTTLE_PCT_MAX 99
93 #define CPU_THROTTLE_TIMESLICE_NS 10000000
95 bool cpu_is_stopped(CPUState
*cpu
)
97 return cpu
->stopped
|| !runstate_is_running();
100 static inline bool cpu_work_list_empty(CPUState
*cpu
)
104 qemu_mutex_lock(&cpu
->work_mutex
);
105 ret
= QSIMPLEQ_EMPTY(&cpu
->work_list
);
106 qemu_mutex_unlock(&cpu
->work_mutex
);
110 static bool cpu_thread_is_idle(CPUState
*cpu
)
112 if (cpu
->stop
|| !cpu_work_list_empty(cpu
)) {
115 if (cpu_is_stopped(cpu
)) {
118 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
119 kvm_halt_in_kernel()) {
125 static bool all_cpu_threads_idle(void)
130 if (!cpu_thread_is_idle(cpu
)) {
137 /***********************************************************/
138 /* guest cycle counter */
140 /* Protected by TimersState seqlock */
142 static bool icount_sleep
= true;
143 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
144 #define MAX_ICOUNT_SHIFT 10
146 typedef struct TimersState
{
147 /* Protected by BQL. */
148 int64_t cpu_ticks_prev
;
149 int64_t cpu_ticks_offset
;
151 /* Protect fields that can be respectively read outside the
152 * BQL, and written from multiple threads.
154 QemuSeqLock vm_clock_seqlock
;
155 QemuSpin vm_clock_lock
;
157 int16_t cpu_ticks_enabled
;
159 /* Conversion factor from emulated instructions to virtual clock ticks. */
160 int16_t icount_time_shift
;
162 /* Compensate for varying guest execution speed. */
163 int64_t qemu_icount_bias
;
165 int64_t vm_clock_warp_start
;
166 int64_t cpu_clock_offset
;
168 /* Only written by TCG thread */
171 /* for adjusting icount */
172 QEMUTimer
*icount_rt_timer
;
173 QEMUTimer
*icount_vm_timer
;
174 QEMUTimer
*icount_warp_timer
;
177 static TimersState timers_state
;
181 /* The current number of executed instructions is based on what we
182 * originally budgeted minus the current state of the decrementing
183 * icount counters in extra/u16.low.
185 static int64_t cpu_get_icount_executed(CPUState
*cpu
)
187 return (cpu
->icount_budget
-
188 (cpu_neg(cpu
)->icount_decr
.u16
.low
+ cpu
->icount_extra
));
192 * Update the global shared timer_state.qemu_icount to take into
193 * account executed instructions. This is done by the TCG vCPU
194 * thread so the main-loop can see time has moved forward.
196 static void cpu_update_icount_locked(CPUState
*cpu
)
198 int64_t executed
= cpu_get_icount_executed(cpu
);
199 cpu
->icount_budget
-= executed
;
201 atomic_set_i64(&timers_state
.qemu_icount
,
202 timers_state
.qemu_icount
+ executed
);
206 * Update the global shared timer_state.qemu_icount to take into
207 * account executed instructions. This is done by the TCG vCPU
208 * thread so the main-loop can see time has moved forward.
210 void cpu_update_icount(CPUState
*cpu
)
212 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
213 &timers_state
.vm_clock_lock
);
214 cpu_update_icount_locked(cpu
);
215 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
216 &timers_state
.vm_clock_lock
);
219 static int64_t cpu_get_icount_raw_locked(void)
221 CPUState
*cpu
= current_cpu
;
223 if (cpu
&& cpu
->running
) {
224 if (!cpu
->can_do_io
) {
225 error_report("Bad icount read");
228 /* Take into account what has run */
229 cpu_update_icount_locked(cpu
);
231 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
232 return atomic_read_i64(&timers_state
.qemu_icount
);
235 static int64_t cpu_get_icount_locked(void)
237 int64_t icount
= cpu_get_icount_raw_locked();
238 return atomic_read_i64(&timers_state
.qemu_icount_bias
) +
239 cpu_icount_to_ns(icount
);
242 int64_t cpu_get_icount_raw(void)
248 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
249 icount
= cpu_get_icount_raw_locked();
250 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
255 /* Return the virtual CPU time, based on the instruction counter. */
256 int64_t cpu_get_icount(void)
262 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
263 icount
= cpu_get_icount_locked();
264 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
269 int64_t cpu_icount_to_ns(int64_t icount
)
271 return icount
<< atomic_read(&timers_state
.icount_time_shift
);
274 static int64_t cpu_get_ticks_locked(void)
276 int64_t ticks
= timers_state
.cpu_ticks_offset
;
277 if (timers_state
.cpu_ticks_enabled
) {
278 ticks
+= cpu_get_host_ticks();
281 if (timers_state
.cpu_ticks_prev
> ticks
) {
282 /* Non increasing ticks may happen if the host uses software suspend. */
283 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
284 ticks
= timers_state
.cpu_ticks_prev
;
287 timers_state
.cpu_ticks_prev
= ticks
;
291 /* return the time elapsed in VM between vm_start and vm_stop. Unless
292 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
295 int64_t cpu_get_ticks(void)
300 return cpu_get_icount();
303 qemu_spin_lock(&timers_state
.vm_clock_lock
);
304 ticks
= cpu_get_ticks_locked();
305 qemu_spin_unlock(&timers_state
.vm_clock_lock
);
309 static int64_t cpu_get_clock_locked(void)
313 time
= timers_state
.cpu_clock_offset
;
314 if (timers_state
.cpu_ticks_enabled
) {
321 /* Return the monotonic time elapsed in VM, i.e.,
322 * the time between vm_start and vm_stop
324 int64_t cpu_get_clock(void)
330 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
331 ti
= cpu_get_clock_locked();
332 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
337 /* enable cpu_get_ticks()
338 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
340 void cpu_enable_ticks(void)
342 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
343 &timers_state
.vm_clock_lock
);
344 if (!timers_state
.cpu_ticks_enabled
) {
345 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
346 timers_state
.cpu_clock_offset
-= get_clock();
347 timers_state
.cpu_ticks_enabled
= 1;
349 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
350 &timers_state
.vm_clock_lock
);
353 /* disable cpu_get_ticks() : the clock is stopped. You must not call
354 * cpu_get_ticks() after that.
355 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
357 void cpu_disable_ticks(void)
359 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
360 &timers_state
.vm_clock_lock
);
361 if (timers_state
.cpu_ticks_enabled
) {
362 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
363 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
364 timers_state
.cpu_ticks_enabled
= 0;
366 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
367 &timers_state
.vm_clock_lock
);
370 /* Correlation between real and virtual time is always going to be
371 fairly approximate, so ignore small variation.
372 When the guest is idle real and virtual time will be aligned in
374 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
376 static void icount_adjust(void)
382 /* Protected by TimersState mutex. */
383 static int64_t last_delta
;
385 /* If the VM is not running, then do nothing. */
386 if (!runstate_is_running()) {
390 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
391 &timers_state
.vm_clock_lock
);
392 cur_time
= REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT
,
393 cpu_get_clock_locked());
394 cur_icount
= cpu_get_icount_locked();
396 delta
= cur_icount
- cur_time
;
397 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
399 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
400 && timers_state
.icount_time_shift
> 0) {
401 /* The guest is getting too far ahead. Slow time down. */
402 atomic_set(&timers_state
.icount_time_shift
,
403 timers_state
.icount_time_shift
- 1);
406 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
407 && timers_state
.icount_time_shift
< MAX_ICOUNT_SHIFT
) {
408 /* The guest is getting too far behind. Speed time up. */
409 atomic_set(&timers_state
.icount_time_shift
,
410 timers_state
.icount_time_shift
+ 1);
413 atomic_set_i64(&timers_state
.qemu_icount_bias
,
414 cur_icount
- (timers_state
.qemu_icount
415 << timers_state
.icount_time_shift
));
416 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
417 &timers_state
.vm_clock_lock
);
420 static void icount_adjust_rt(void *opaque
)
422 timer_mod(timers_state
.icount_rt_timer
,
423 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
427 static void icount_adjust_vm(void *opaque
)
429 timer_mod(timers_state
.icount_vm_timer
,
430 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
431 NANOSECONDS_PER_SECOND
/ 10);
435 static int64_t qemu_icount_round(int64_t count
)
437 int shift
= atomic_read(&timers_state
.icount_time_shift
);
438 return (count
+ (1 << shift
) - 1) >> shift
;
441 static void icount_warp_rt(void)
446 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
447 * changes from -1 to another value, so the race here is okay.
450 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
451 warp_start
= timers_state
.vm_clock_warp_start
;
452 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
454 if (warp_start
== -1) {
458 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
459 &timers_state
.vm_clock_lock
);
460 if (runstate_is_running()) {
461 int64_t clock
= REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT
,
462 cpu_get_clock_locked());
465 warp_delta
= clock
- timers_state
.vm_clock_warp_start
;
466 if (use_icount
== 2) {
468 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
469 * far ahead of real time.
471 int64_t cur_icount
= cpu_get_icount_locked();
472 int64_t delta
= clock
- cur_icount
;
473 warp_delta
= MIN(warp_delta
, delta
);
475 atomic_set_i64(&timers_state
.qemu_icount_bias
,
476 timers_state
.qemu_icount_bias
+ warp_delta
);
478 timers_state
.vm_clock_warp_start
= -1;
479 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
480 &timers_state
.vm_clock_lock
);
482 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
483 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
487 static void icount_timer_cb(void *opaque
)
489 /* No need for a checkpoint because the timer already synchronizes
490 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
495 void qtest_clock_warp(int64_t dest
)
497 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
498 AioContext
*aio_context
;
499 assert(qtest_enabled());
500 aio_context
= qemu_get_aio_context();
501 while (clock
< dest
) {
502 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
503 QEMU_TIMER_ATTR_ALL
);
504 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
506 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
507 &timers_state
.vm_clock_lock
);
508 atomic_set_i64(&timers_state
.qemu_icount_bias
,
509 timers_state
.qemu_icount_bias
+ warp
);
510 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
511 &timers_state
.vm_clock_lock
);
513 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
514 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
515 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
517 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
520 void qemu_start_warp_timer(void)
529 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
530 * do not fire, so computing the deadline does not make sense.
532 if (!runstate_is_running()) {
536 if (replay_mode
!= REPLAY_MODE_PLAY
) {
537 if (!all_cpu_threads_idle()) {
541 if (qtest_enabled()) {
542 /* When testing, qtest commands advance icount. */
546 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
);
548 /* warp clock deterministically in record/replay mode */
549 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
550 /* vCPU is sleeping and warp can't be started.
551 It is probably a race condition: notification sent
552 to vCPU was processed in advance and vCPU went to sleep.
553 Therefore we have to wake it up for doing someting. */
554 if (replay_has_checkpoint()) {
555 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
561 /* We want to use the earliest deadline from ALL vm_clocks */
562 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
563 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
564 ~QEMU_TIMER_ATTR_EXTERNAL
);
566 static bool notified
;
567 if (!icount_sleep
&& !notified
) {
568 warn_report("icount sleep disabled and no active timers");
576 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
577 * sleep. Otherwise, the CPU might be waiting for a future timer
578 * interrupt to wake it up, but the interrupt never comes because
579 * the vCPU isn't running any insns and thus doesn't advance the
580 * QEMU_CLOCK_VIRTUAL.
584 * We never let VCPUs sleep in no sleep icount mode.
585 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
586 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
587 * It is useful when we want a deterministic execution time,
588 * isolated from host latencies.
590 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
591 &timers_state
.vm_clock_lock
);
592 atomic_set_i64(&timers_state
.qemu_icount_bias
,
593 timers_state
.qemu_icount_bias
+ deadline
);
594 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
595 &timers_state
.vm_clock_lock
);
596 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
599 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
600 * "real" time, (related to the time left until the next event) has
601 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
602 * This avoids that the warps are visible externally; for example,
603 * you will not be sending network packets continuously instead of
606 seqlock_write_lock(&timers_state
.vm_clock_seqlock
,
607 &timers_state
.vm_clock_lock
);
608 if (timers_state
.vm_clock_warp_start
== -1
609 || timers_state
.vm_clock_warp_start
> clock
) {
610 timers_state
.vm_clock_warp_start
= clock
;
612 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
,
613 &timers_state
.vm_clock_lock
);
614 timer_mod_anticipate(timers_state
.icount_warp_timer
,
617 } else if (deadline
== 0) {
618 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
622 static void qemu_account_warp_timer(void)
624 if (!use_icount
|| !icount_sleep
) {
628 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
629 * do not fire, so computing the deadline does not make sense.
631 if (!runstate_is_running()) {
635 /* warp clock deterministically in record/replay mode */
636 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
640 timer_del(timers_state
.icount_warp_timer
);
644 static bool icount_state_needed(void *opaque
)
649 static bool warp_timer_state_needed(void *opaque
)
651 TimersState
*s
= opaque
;
652 return s
->icount_warp_timer
!= NULL
;
655 static bool adjust_timers_state_needed(void *opaque
)
657 TimersState
*s
= opaque
;
658 return s
->icount_rt_timer
!= NULL
;
661 static bool shift_state_needed(void *opaque
)
663 return use_icount
== 2;
667 * Subsection for warp timer migration is optional, because may not be created
669 static const VMStateDescription icount_vmstate_warp_timer
= {
670 .name
= "timer/icount/warp_timer",
672 .minimum_version_id
= 1,
673 .needed
= warp_timer_state_needed
,
674 .fields
= (VMStateField
[]) {
675 VMSTATE_INT64(vm_clock_warp_start
, TimersState
),
676 VMSTATE_TIMER_PTR(icount_warp_timer
, TimersState
),
677 VMSTATE_END_OF_LIST()
681 static const VMStateDescription icount_vmstate_adjust_timers
= {
682 .name
= "timer/icount/timers",
684 .minimum_version_id
= 1,
685 .needed
= adjust_timers_state_needed
,
686 .fields
= (VMStateField
[]) {
687 VMSTATE_TIMER_PTR(icount_rt_timer
, TimersState
),
688 VMSTATE_TIMER_PTR(icount_vm_timer
, TimersState
),
689 VMSTATE_END_OF_LIST()
693 static const VMStateDescription icount_vmstate_shift
= {
694 .name
= "timer/icount/shift",
696 .minimum_version_id
= 1,
697 .needed
= shift_state_needed
,
698 .fields
= (VMStateField
[]) {
699 VMSTATE_INT16(icount_time_shift
, TimersState
),
700 VMSTATE_END_OF_LIST()
705 * This is a subsection for icount migration.
707 static const VMStateDescription icount_vmstate_timers
= {
708 .name
= "timer/icount",
710 .minimum_version_id
= 1,
711 .needed
= icount_state_needed
,
712 .fields
= (VMStateField
[]) {
713 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
714 VMSTATE_INT64(qemu_icount
, TimersState
),
715 VMSTATE_END_OF_LIST()
717 .subsections
= (const VMStateDescription
*[]) {
718 &icount_vmstate_warp_timer
,
719 &icount_vmstate_adjust_timers
,
720 &icount_vmstate_shift
,
725 static const VMStateDescription vmstate_timers
= {
728 .minimum_version_id
= 1,
729 .fields
= (VMStateField
[]) {
730 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
732 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
733 VMSTATE_END_OF_LIST()
735 .subsections
= (const VMStateDescription
*[]) {
736 &icount_vmstate_timers
,
741 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
744 double throttle_ratio
;
745 int64_t sleeptime_ns
, endtime_ns
;
747 if (!cpu_throttle_get_percentage()) {
751 pct
= (double)cpu_throttle_get_percentage()/100;
752 throttle_ratio
= pct
/ (1 - pct
);
753 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
754 sleeptime_ns
= (int64_t)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
+ 1);
755 endtime_ns
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) + sleeptime_ns
;
756 while (sleeptime_ns
> 0 && !cpu
->stop
) {
757 if (sleeptime_ns
> SCALE_MS
) {
758 qemu_cond_timedwait(cpu
->halt_cond
, &qemu_global_mutex
,
759 sleeptime_ns
/ SCALE_MS
);
761 qemu_mutex_unlock_iothread();
762 g_usleep(sleeptime_ns
/ SCALE_US
);
763 qemu_mutex_lock_iothread();
765 sleeptime_ns
= endtime_ns
- qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
767 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
770 static void cpu_throttle_timer_tick(void *opaque
)
775 /* Stop the timer if needed */
776 if (!cpu_throttle_get_percentage()) {
780 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
781 async_run_on_cpu(cpu
, cpu_throttle_thread
,
786 pct
= (double)cpu_throttle_get_percentage()/100;
787 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
788 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
791 void cpu_throttle_set(int new_throttle_pct
)
793 /* Ensure throttle percentage is within valid range */
794 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
795 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
797 atomic_set(&throttle_percentage
, new_throttle_pct
);
799 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
800 CPU_THROTTLE_TIMESLICE_NS
);
803 void cpu_throttle_stop(void)
805 atomic_set(&throttle_percentage
, 0);
808 bool cpu_throttle_active(void)
810 return (cpu_throttle_get_percentage() != 0);
813 int cpu_throttle_get_percentage(void)
815 return atomic_read(&throttle_percentage
);
818 void cpu_ticks_init(void)
820 seqlock_init(&timers_state
.vm_clock_seqlock
);
821 qemu_spin_init(&timers_state
.vm_clock_lock
);
822 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
823 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
824 cpu_throttle_timer_tick
, NULL
);
827 void configure_icount(QemuOpts
*opts
, Error
**errp
)
829 const char *option
= qemu_opt_get(opts
, "shift");
830 bool sleep
= qemu_opt_get_bool(opts
, "sleep", true);
831 bool align
= qemu_opt_get_bool(opts
, "align", false);
832 long time_shift
= -1;
835 if (qemu_opt_get(opts
, "align") != NULL
) {
836 error_setg(errp
, "Please specify shift option when using align");
841 if (align
&& !sleep
) {
842 error_setg(errp
, "align=on and sleep=off are incompatible");
846 if (strcmp(option
, "auto") != 0) {
847 if (qemu_strtol(option
, NULL
, 0, &time_shift
) < 0
848 || time_shift
< 0 || time_shift
> MAX_ICOUNT_SHIFT
) {
849 error_setg(errp
, "icount: Invalid shift value");
852 } else if (icount_align_option
) {
853 error_setg(errp
, "shift=auto and align=on are incompatible");
855 } else if (!icount_sleep
) {
856 error_setg(errp
, "shift=auto and sleep=off are incompatible");
860 icount_sleep
= sleep
;
862 timers_state
.icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
863 icount_timer_cb
, NULL
);
866 icount_align_option
= align
;
868 if (time_shift
>= 0) {
869 timers_state
.icount_time_shift
= time_shift
;
876 /* 125MIPS seems a reasonable initial guess at the guest speed.
877 It will be corrected fairly quickly anyway. */
878 timers_state
.icount_time_shift
= 3;
880 /* Have both realtime and virtual time triggers for speed adjustment.
881 The realtime trigger catches emulated time passing too slowly,
882 the virtual time trigger catches emulated time passing too fast.
883 Realtime triggers occur even when idle, so use them less frequently
885 timers_state
.vm_clock_warp_start
= -1;
886 timers_state
.icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
887 icount_adjust_rt
, NULL
);
888 timer_mod(timers_state
.icount_rt_timer
,
889 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
890 timers_state
.icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
891 icount_adjust_vm
, NULL
);
892 timer_mod(timers_state
.icount_vm_timer
,
893 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
894 NANOSECONDS_PER_SECOND
/ 10);
897 /***********************************************************/
898 /* TCG vCPU kick timer
900 * The kick timer is responsible for moving single threaded vCPU
901 * emulation on to the next vCPU. If more than one vCPU is running a
902 * timer event with force a cpu->exit so the next vCPU can get
905 * The timer is removed if all vCPUs are idle and restarted again once
906 * idleness is complete.
909 static QEMUTimer
*tcg_kick_vcpu_timer
;
910 static CPUState
*tcg_current_rr_cpu
;
912 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
914 static inline int64_t qemu_tcg_next_kick(void)
916 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
919 /* Kick the currently round-robin scheduled vCPU to next */
920 static void qemu_cpu_kick_rr_next_cpu(void)
924 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
928 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
931 /* Kick all RR vCPUs */
932 static void qemu_cpu_kick_rr_cpus(void)
941 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
945 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
947 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
952 if (qemu_in_vcpu_thread()) {
953 /* A CPU is currently running; kick it back out to the
954 * tcg_cpu_exec() loop so it will recalculate its
955 * icount deadline immediately.
957 qemu_cpu_kick(current_cpu
);
958 } else if (first_cpu
) {
959 /* qemu_cpu_kick is not enough to kick a halted CPU out of
960 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
961 * causes cpu_thread_is_idle to return false. This way,
962 * handle_icount_deadline can run.
963 * If we have no CPUs at all for some reason, we don't
964 * need to do anything.
966 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
970 static void kick_tcg_thread(void *opaque
)
972 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
973 qemu_cpu_kick_rr_next_cpu();
976 static void start_tcg_kick_timer(void)
978 assert(!mttcg_enabled
);
979 if (!tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
980 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
981 kick_tcg_thread
, NULL
);
983 if (tcg_kick_vcpu_timer
&& !timer_pending(tcg_kick_vcpu_timer
)) {
984 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
988 static void stop_tcg_kick_timer(void)
990 assert(!mttcg_enabled
);
991 if (tcg_kick_vcpu_timer
&& timer_pending(tcg_kick_vcpu_timer
)) {
992 timer_del(tcg_kick_vcpu_timer
);
996 /***********************************************************/
997 void hw_error(const char *fmt
, ...)
1003 fprintf(stderr
, "qemu: hardware error: ");
1004 vfprintf(stderr
, fmt
, ap
);
1005 fprintf(stderr
, "\n");
1007 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
1008 cpu_dump_state(cpu
, stderr
, CPU_DUMP_FPU
);
1014 void cpu_synchronize_all_states(void)
1019 cpu_synchronize_state(cpu
);
1020 /* TODO: move to cpu_synchronize_state() */
1021 if (hvf_enabled()) {
1022 hvf_cpu_synchronize_state(cpu
);
1027 void cpu_synchronize_all_post_reset(void)
1032 cpu_synchronize_post_reset(cpu
);
1033 /* TODO: move to cpu_synchronize_post_reset() */
1034 if (hvf_enabled()) {
1035 hvf_cpu_synchronize_post_reset(cpu
);
1040 void cpu_synchronize_all_post_init(void)
1045 cpu_synchronize_post_init(cpu
);
1046 /* TODO: move to cpu_synchronize_post_init() */
1047 if (hvf_enabled()) {
1048 hvf_cpu_synchronize_post_init(cpu
);
1053 void cpu_synchronize_all_pre_loadvm(void)
1058 cpu_synchronize_pre_loadvm(cpu
);
1062 static int do_vm_stop(RunState state
, bool send_stop
)
1066 if (runstate_is_running()) {
1067 runstate_set(state
);
1068 cpu_disable_ticks();
1070 vm_state_notify(0, state
);
1072 qapi_event_send_stop();
1077 ret
= bdrv_flush_all();
1082 /* Special vm_stop() variant for terminating the process. Historically clients
1083 * did not expect a QMP STOP event and so we need to retain compatibility.
1085 int vm_shutdown(void)
1087 return do_vm_stop(RUN_STATE_SHUTDOWN
, false);
1090 static bool cpu_can_run(CPUState
*cpu
)
1095 if (cpu_is_stopped(cpu
)) {
1101 static void cpu_handle_guest_debug(CPUState
*cpu
)
1103 gdb_set_stop_cpu(cpu
);
1104 qemu_system_debug_request();
1105 cpu
->stopped
= true;
1109 static void sigbus_reraise(void)
1112 struct sigaction action
;
1114 memset(&action
, 0, sizeof(action
));
1115 action
.sa_handler
= SIG_DFL
;
1116 if (!sigaction(SIGBUS
, &action
, NULL
)) {
1119 sigaddset(&set
, SIGBUS
);
1120 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
1122 perror("Failed to re-raise SIGBUS!\n");
1126 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
1128 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
1133 /* Called asynchronously in VCPU thread. */
1134 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
1138 /* Called synchronously (via signalfd) in main thread. */
1139 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
1145 static void qemu_init_sigbus(void)
1147 struct sigaction action
;
1149 memset(&action
, 0, sizeof(action
));
1150 action
.sa_flags
= SA_SIGINFO
;
1151 action
.sa_sigaction
= sigbus_handler
;
1152 sigaction(SIGBUS
, &action
, NULL
);
1154 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
1156 #else /* !CONFIG_LINUX */
1157 static void qemu_init_sigbus(void)
1160 #endif /* !CONFIG_LINUX */
1162 static QemuThread io_thread
;
1165 static QemuCond qemu_cpu_cond
;
1167 static QemuCond qemu_pause_cond
;
1169 void qemu_init_cpu_loop(void)
1172 qemu_cond_init(&qemu_cpu_cond
);
1173 qemu_cond_init(&qemu_pause_cond
);
1174 qemu_mutex_init(&qemu_global_mutex
);
1176 qemu_thread_get_self(&io_thread
);
1179 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1181 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1184 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1186 if (kvm_destroy_vcpu(cpu
) < 0) {
1187 error_report("kvm_destroy_vcpu failed");
1192 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1196 static void qemu_cpu_stop(CPUState
*cpu
, bool exit
)
1198 g_assert(qemu_cpu_is_self(cpu
));
1200 cpu
->stopped
= true;
1204 qemu_cond_broadcast(&qemu_pause_cond
);
1207 static void qemu_wait_io_event_common(CPUState
*cpu
)
1209 atomic_mb_set(&cpu
->thread_kicked
, false);
1211 qemu_cpu_stop(cpu
, false);
1213 process_queued_cpu_work(cpu
);
1216 static void qemu_tcg_rr_wait_io_event(void)
1220 while (all_cpu_threads_idle()) {
1221 stop_tcg_kick_timer();
1222 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1225 start_tcg_kick_timer();
1228 qemu_wait_io_event_common(cpu
);
1232 static void qemu_wait_io_event(CPUState
*cpu
)
1236 while (cpu_thread_is_idle(cpu
)) {
1239 qemu_plugin_vcpu_idle_cb(cpu
);
1241 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1244 qemu_plugin_vcpu_resume_cb(cpu
);
1248 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1249 if (!tcg_enabled()) {
1253 qemu_wait_io_event_common(cpu
);
1256 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1258 CPUState
*cpu
= arg
;
1261 rcu_register_thread();
1263 qemu_mutex_lock_iothread();
1264 qemu_thread_get_self(cpu
->thread
);
1265 cpu
->thread_id
= qemu_get_thread_id();
1269 r
= kvm_init_vcpu(cpu
);
1271 error_report("kvm_init_vcpu failed: %s", strerror(-r
));
1275 kvm_init_cpu_signals(cpu
);
1277 /* signal CPU creation */
1278 cpu
->created
= true;
1279 qemu_cond_signal(&qemu_cpu_cond
);
1280 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1283 if (cpu_can_run(cpu
)) {
1284 r
= kvm_cpu_exec(cpu
);
1285 if (r
== EXCP_DEBUG
) {
1286 cpu_handle_guest_debug(cpu
);
1289 qemu_wait_io_event(cpu
);
1290 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1292 qemu_kvm_destroy_vcpu(cpu
);
1293 cpu
->created
= false;
1294 qemu_cond_signal(&qemu_cpu_cond
);
1295 qemu_mutex_unlock_iothread();
1296 rcu_unregister_thread();
1300 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1303 error_report("qtest is not supported under Windows");
1306 CPUState
*cpu
= arg
;
1310 rcu_register_thread();
1312 qemu_mutex_lock_iothread();
1313 qemu_thread_get_self(cpu
->thread
);
1314 cpu
->thread_id
= qemu_get_thread_id();
1318 sigemptyset(&waitset
);
1319 sigaddset(&waitset
, SIG_IPI
);
1321 /* signal CPU creation */
1322 cpu
->created
= true;
1323 qemu_cond_signal(&qemu_cpu_cond
);
1324 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1327 qemu_mutex_unlock_iothread();
1330 r
= sigwait(&waitset
, &sig
);
1331 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1336 qemu_mutex_lock_iothread();
1337 qemu_wait_io_event(cpu
);
1338 } while (!cpu
->unplug
);
1340 qemu_mutex_unlock_iothread();
1341 rcu_unregister_thread();
1346 static int64_t tcg_get_icount_limit(void)
1350 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1352 * Include all the timers, because they may need an attention.
1353 * Too long CPU execution may create unnecessary delay in UI.
1355 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
1356 QEMU_TIMER_ATTR_ALL
);
1357 /* Check realtime timers, because they help with input processing */
1358 deadline
= qemu_soonest_timeout(deadline
,
1359 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME
,
1360 QEMU_TIMER_ATTR_ALL
));
1362 /* Maintain prior (possibly buggy) behaviour where if no deadline
1363 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1364 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1367 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1368 deadline
= INT32_MAX
;
1371 return qemu_icount_round(deadline
);
1373 return replay_get_instructions();
1377 static void handle_icount_deadline(void)
1379 assert(qemu_in_vcpu_thread());
1381 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
,
1382 QEMU_TIMER_ATTR_ALL
);
1384 if (deadline
== 0) {
1385 /* Wake up other AioContexts. */
1386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1387 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1392 static void prepare_icount_for_run(CPUState
*cpu
)
1397 /* These should always be cleared by process_icount_data after
1398 * each vCPU execution. However u16.high can be raised
1399 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1401 g_assert(cpu_neg(cpu
)->icount_decr
.u16
.low
== 0);
1402 g_assert(cpu
->icount_extra
== 0);
1404 cpu
->icount_budget
= tcg_get_icount_limit();
1405 insns_left
= MIN(0xffff, cpu
->icount_budget
);
1406 cpu_neg(cpu
)->icount_decr
.u16
.low
= insns_left
;
1407 cpu
->icount_extra
= cpu
->icount_budget
- insns_left
;
1409 replay_mutex_lock();
1413 static void process_icount_data(CPUState
*cpu
)
1416 /* Account for executed instructions */
1417 cpu_update_icount(cpu
);
1419 /* Reset the counters */
1420 cpu_neg(cpu
)->icount_decr
.u16
.low
= 0;
1421 cpu
->icount_extra
= 0;
1422 cpu
->icount_budget
= 0;
1424 replay_account_executed_instructions();
1426 replay_mutex_unlock();
1431 static int tcg_cpu_exec(CPUState
*cpu
)
1434 #ifdef CONFIG_PROFILER
1438 assert(tcg_enabled());
1439 #ifdef CONFIG_PROFILER
1440 ti
= profile_getclock();
1442 cpu_exec_start(cpu
);
1443 ret
= cpu_exec(cpu
);
1445 #ifdef CONFIG_PROFILER
1446 atomic_set(&tcg_ctx
->prof
.cpu_exec_time
,
1447 tcg_ctx
->prof
.cpu_exec_time
+ profile_getclock() - ti
);
1452 /* Destroy any remaining vCPUs which have been unplugged and have
1455 static void deal_with_unplugged_cpus(void)
1460 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1461 qemu_tcg_destroy_vcpu(cpu
);
1462 cpu
->created
= false;
1463 qemu_cond_signal(&qemu_cpu_cond
);
1469 /* Single-threaded TCG
1471 * In the single-threaded case each vCPU is simulated in turn. If
1472 * there is more than a single vCPU we create a simple timer to kick
1473 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1474 * This is done explicitly rather than relying on side-effects
1478 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1480 CPUState
*cpu
= arg
;
1482 assert(tcg_enabled());
1483 rcu_register_thread();
1484 tcg_register_thread();
1486 qemu_mutex_lock_iothread();
1487 qemu_thread_get_self(cpu
->thread
);
1489 cpu
->thread_id
= qemu_get_thread_id();
1490 cpu
->created
= true;
1492 qemu_cond_signal(&qemu_cpu_cond
);
1493 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1495 /* wait for initial kick-off after machine start */
1496 while (first_cpu
->stopped
) {
1497 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1499 /* process any pending work */
1502 qemu_wait_io_event_common(cpu
);
1506 start_tcg_kick_timer();
1510 /* process any pending work */
1511 cpu
->exit_request
= 1;
1514 qemu_mutex_unlock_iothread();
1515 replay_mutex_lock();
1516 qemu_mutex_lock_iothread();
1517 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1518 qemu_account_warp_timer();
1520 /* Run the timers here. This is much more efficient than
1521 * waking up the I/O thread and waiting for completion.
1523 handle_icount_deadline();
1525 replay_mutex_unlock();
1531 while (cpu
&& cpu_work_list_empty(cpu
) && !cpu
->exit_request
) {
1533 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1536 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1537 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1539 if (cpu_can_run(cpu
)) {
1542 qemu_mutex_unlock_iothread();
1543 prepare_icount_for_run(cpu
);
1545 r
= tcg_cpu_exec(cpu
);
1547 process_icount_data(cpu
);
1548 qemu_mutex_lock_iothread();
1550 if (r
== EXCP_DEBUG
) {
1551 cpu_handle_guest_debug(cpu
);
1553 } else if (r
== EXCP_ATOMIC
) {
1554 qemu_mutex_unlock_iothread();
1555 cpu_exec_step_atomic(cpu
);
1556 qemu_mutex_lock_iothread();
1559 } else if (cpu
->stop
) {
1561 cpu
= CPU_NEXT(cpu
);
1566 cpu
= CPU_NEXT(cpu
);
1567 } /* while (cpu && !cpu->exit_request).. */
1569 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1570 atomic_set(&tcg_current_rr_cpu
, NULL
);
1572 if (cpu
&& cpu
->exit_request
) {
1573 atomic_mb_set(&cpu
->exit_request
, 0);
1576 if (use_icount
&& all_cpu_threads_idle()) {
1578 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1579 * in the main_loop, wake it up in order to start the warp timer.
1581 qemu_notify_event();
1584 qemu_tcg_rr_wait_io_event();
1585 deal_with_unplugged_cpus();
1588 rcu_unregister_thread();
1592 static void *qemu_hax_cpu_thread_fn(void *arg
)
1594 CPUState
*cpu
= arg
;
1597 rcu_register_thread();
1598 qemu_mutex_lock_iothread();
1599 qemu_thread_get_self(cpu
->thread
);
1601 cpu
->thread_id
= qemu_get_thread_id();
1602 cpu
->created
= true;
1606 qemu_cond_signal(&qemu_cpu_cond
);
1607 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1610 if (cpu_can_run(cpu
)) {
1611 r
= hax_smp_cpu_exec(cpu
);
1612 if (r
== EXCP_DEBUG
) {
1613 cpu_handle_guest_debug(cpu
);
1617 qemu_wait_io_event(cpu
);
1618 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1619 rcu_unregister_thread();
1623 /* The HVF-specific vCPU thread function. This one should only run when the host
1624 * CPU supports the VMX "unrestricted guest" feature. */
1625 static void *qemu_hvf_cpu_thread_fn(void *arg
)
1627 CPUState
*cpu
= arg
;
1631 assert(hvf_enabled());
1633 rcu_register_thread();
1635 qemu_mutex_lock_iothread();
1636 qemu_thread_get_self(cpu
->thread
);
1638 cpu
->thread_id
= qemu_get_thread_id();
1644 /* signal CPU creation */
1645 cpu
->created
= true;
1646 qemu_cond_signal(&qemu_cpu_cond
);
1647 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1650 if (cpu_can_run(cpu
)) {
1651 r
= hvf_vcpu_exec(cpu
);
1652 if (r
== EXCP_DEBUG
) {
1653 cpu_handle_guest_debug(cpu
);
1656 qemu_wait_io_event(cpu
);
1657 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1659 hvf_vcpu_destroy(cpu
);
1660 cpu
->created
= false;
1661 qemu_cond_signal(&qemu_cpu_cond
);
1662 qemu_mutex_unlock_iothread();
1663 rcu_unregister_thread();
1667 static void *qemu_whpx_cpu_thread_fn(void *arg
)
1669 CPUState
*cpu
= arg
;
1672 rcu_register_thread();
1674 qemu_mutex_lock_iothread();
1675 qemu_thread_get_self(cpu
->thread
);
1676 cpu
->thread_id
= qemu_get_thread_id();
1679 r
= whpx_init_vcpu(cpu
);
1681 fprintf(stderr
, "whpx_init_vcpu failed: %s\n", strerror(-r
));
1685 /* signal CPU creation */
1686 cpu
->created
= true;
1687 qemu_cond_signal(&qemu_cpu_cond
);
1688 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1691 if (cpu_can_run(cpu
)) {
1692 r
= whpx_vcpu_exec(cpu
);
1693 if (r
== EXCP_DEBUG
) {
1694 cpu_handle_guest_debug(cpu
);
1697 while (cpu_thread_is_idle(cpu
)) {
1698 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1700 qemu_wait_io_event_common(cpu
);
1701 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1703 whpx_destroy_vcpu(cpu
);
1704 cpu
->created
= false;
1705 qemu_cond_signal(&qemu_cpu_cond
);
1706 qemu_mutex_unlock_iothread();
1707 rcu_unregister_thread();
1712 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1717 /* Multi-threaded TCG
1719 * In the multi-threaded case each vCPU has its own thread. The TLS
1720 * variable current_cpu can be used deep in the code to find the
1721 * current CPUState for a given thread.
1724 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1726 CPUState
*cpu
= arg
;
1728 assert(tcg_enabled());
1729 g_assert(!use_icount
);
1731 rcu_register_thread();
1732 tcg_register_thread();
1734 qemu_mutex_lock_iothread();
1735 qemu_thread_get_self(cpu
->thread
);
1737 cpu
->thread_id
= qemu_get_thread_id();
1738 cpu
->created
= true;
1741 qemu_cond_signal(&qemu_cpu_cond
);
1742 qemu_guest_random_seed_thread_part2(cpu
->random_seed
);
1744 /* process any pending work */
1745 cpu
->exit_request
= 1;
1748 if (cpu_can_run(cpu
)) {
1750 qemu_mutex_unlock_iothread();
1751 r
= tcg_cpu_exec(cpu
);
1752 qemu_mutex_lock_iothread();
1755 cpu_handle_guest_debug(cpu
);
1758 /* during start-up the vCPU is reset and the thread is
1759 * kicked several times. If we don't ensure we go back
1760 * to sleep in the halted state we won't cleanly
1761 * start-up when the vCPU is enabled.
1763 * cpu->halted should ensure we sleep in wait_io_event
1765 g_assert(cpu
->halted
);
1768 qemu_mutex_unlock_iothread();
1769 cpu_exec_step_atomic(cpu
);
1770 qemu_mutex_lock_iothread();
1772 /* Ignore everything else? */
1777 atomic_mb_set(&cpu
->exit_request
, 0);
1778 qemu_wait_io_event(cpu
);
1779 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1781 qemu_tcg_destroy_vcpu(cpu
);
1782 cpu
->created
= false;
1783 qemu_cond_signal(&qemu_cpu_cond
);
1784 qemu_mutex_unlock_iothread();
1785 rcu_unregister_thread();
1789 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1794 if (cpu
->thread_kicked
) {
1797 cpu
->thread_kicked
= true;
1798 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1799 if (err
&& err
!= ESRCH
) {
1800 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1804 if (!qemu_cpu_is_self(cpu
)) {
1805 if (whpx_enabled()) {
1806 whpx_vcpu_kick(cpu
);
1807 } else if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1808 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1809 __func__
, GetLastError());
1816 void qemu_cpu_kick(CPUState
*cpu
)
1818 qemu_cond_broadcast(cpu
->halt_cond
);
1819 if (tcg_enabled()) {
1820 if (qemu_tcg_mttcg_enabled()) {
1823 qemu_cpu_kick_rr_cpus();
1826 if (hax_enabled()) {
1828 * FIXME: race condition with the exit_request check in
1831 cpu
->exit_request
= 1;
1833 qemu_cpu_kick_thread(cpu
);
1837 void qemu_cpu_kick_self(void)
1839 assert(current_cpu
);
1840 qemu_cpu_kick_thread(current_cpu
);
1843 bool qemu_cpu_is_self(CPUState
*cpu
)
1845 return qemu_thread_is_self(cpu
->thread
);
1848 bool qemu_in_vcpu_thread(void)
1850 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1853 static __thread
bool iothread_locked
= false;
1855 bool qemu_mutex_iothread_locked(void)
1857 return iothread_locked
;
1861 * The BQL is taken from so many places that it is worth profiling the
1862 * callers directly, instead of funneling them all through a single function.
1864 void qemu_mutex_lock_iothread_impl(const char *file
, int line
)
1866 QemuMutexLockFunc bql_lock
= atomic_read(&qemu_bql_mutex_lock_func
);
1868 g_assert(!qemu_mutex_iothread_locked());
1869 bql_lock(&qemu_global_mutex
, file
, line
);
1870 iothread_locked
= true;
1873 void qemu_mutex_unlock_iothread(void)
1875 g_assert(qemu_mutex_iothread_locked());
1876 iothread_locked
= false;
1877 qemu_mutex_unlock(&qemu_global_mutex
);
1880 void qemu_cond_wait_iothread(QemuCond
*cond
)
1882 qemu_cond_wait(cond
, &qemu_global_mutex
);
1885 static bool all_vcpus_paused(void)
1890 if (!cpu
->stopped
) {
1898 void pause_all_vcpus(void)
1902 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1904 if (qemu_cpu_is_self(cpu
)) {
1905 qemu_cpu_stop(cpu
, true);
1912 /* We need to drop the replay_lock so any vCPU threads woken up
1913 * can finish their replay tasks
1915 replay_mutex_unlock();
1917 while (!all_vcpus_paused()) {
1918 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1924 qemu_mutex_unlock_iothread();
1925 replay_mutex_lock();
1926 qemu_mutex_lock_iothread();
1929 void cpu_resume(CPUState
*cpu
)
1932 cpu
->stopped
= false;
1936 void resume_all_vcpus(void)
1940 if (!runstate_is_running()) {
1944 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1950 void cpu_remove_sync(CPUState
*cpu
)
1955 qemu_mutex_unlock_iothread();
1956 qemu_thread_join(cpu
->thread
);
1957 qemu_mutex_lock_iothread();
1960 /* For temporary buffers for forming a name */
1961 #define VCPU_THREAD_NAME_SIZE 16
1963 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1965 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1966 static QemuCond
*single_tcg_halt_cond
;
1967 static QemuThread
*single_tcg_cpu_thread
;
1968 static int tcg_region_inited
;
1970 assert(tcg_enabled());
1972 * Initialize TCG regions--once. Now is a good time, because:
1973 * (1) TCG's init context, prologue and target globals have been set up.
1974 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1975 * -accel flag is processed, so the check doesn't work then).
1977 if (!tcg_region_inited
) {
1978 tcg_region_inited
= 1;
1982 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1983 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1984 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1985 qemu_cond_init(cpu
->halt_cond
);
1987 if (qemu_tcg_mttcg_enabled()) {
1988 /* create a thread per vCPU with TCG (MTTCG) */
1989 parallel_cpus
= true;
1990 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1993 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1994 cpu
, QEMU_THREAD_JOINABLE
);
1997 /* share a single thread for all cpus with TCG */
1998 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1999 qemu_thread_create(cpu
->thread
, thread_name
,
2000 qemu_tcg_rr_cpu_thread_fn
,
2001 cpu
, QEMU_THREAD_JOINABLE
);
2003 single_tcg_halt_cond
= cpu
->halt_cond
;
2004 single_tcg_cpu_thread
= cpu
->thread
;
2007 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2010 /* For non-MTTCG cases we share the thread */
2011 cpu
->thread
= single_tcg_cpu_thread
;
2012 cpu
->halt_cond
= single_tcg_halt_cond
;
2013 cpu
->thread_id
= first_cpu
->thread_id
;
2015 cpu
->created
= true;
2019 static void qemu_hax_start_vcpu(CPUState
*cpu
)
2021 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2023 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2024 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2025 qemu_cond_init(cpu
->halt_cond
);
2027 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
2029 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
2030 cpu
, QEMU_THREAD_JOINABLE
);
2032 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2036 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
2038 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2040 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2041 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2042 qemu_cond_init(cpu
->halt_cond
);
2043 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
2045 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
2046 cpu
, QEMU_THREAD_JOINABLE
);
2049 static void qemu_hvf_start_vcpu(CPUState
*cpu
)
2051 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2053 /* HVF currently does not support TCG, and only runs in
2054 * unrestricted-guest mode. */
2055 assert(hvf_enabled());
2057 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2058 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2059 qemu_cond_init(cpu
->halt_cond
);
2061 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HVF",
2063 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hvf_cpu_thread_fn
,
2064 cpu
, QEMU_THREAD_JOINABLE
);
2067 static void qemu_whpx_start_vcpu(CPUState
*cpu
)
2069 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2071 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2072 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2073 qemu_cond_init(cpu
->halt_cond
);
2074 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/WHPX",
2076 qemu_thread_create(cpu
->thread
, thread_name
, qemu_whpx_cpu_thread_fn
,
2077 cpu
, QEMU_THREAD_JOINABLE
);
2079 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
2083 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
2085 char thread_name
[VCPU_THREAD_NAME_SIZE
];
2087 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
2088 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
2089 qemu_cond_init(cpu
->halt_cond
);
2090 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
2092 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
2093 QEMU_THREAD_JOINABLE
);
2096 void qemu_init_vcpu(CPUState
*cpu
)
2098 MachineState
*ms
= MACHINE(qdev_get_machine());
2100 cpu
->nr_cores
= ms
->smp
.cores
;
2101 cpu
->nr_threads
= ms
->smp
.threads
;
2102 cpu
->stopped
= true;
2103 cpu
->random_seed
= qemu_guest_random_seed_thread_part1();
2106 /* If the target cpu hasn't set up any address spaces itself,
2107 * give it the default one.
2110 cpu_address_space_init(cpu
, 0, "cpu-memory", cpu
->memory
);
2113 if (kvm_enabled()) {
2114 qemu_kvm_start_vcpu(cpu
);
2115 } else if (hax_enabled()) {
2116 qemu_hax_start_vcpu(cpu
);
2117 } else if (hvf_enabled()) {
2118 qemu_hvf_start_vcpu(cpu
);
2119 } else if (tcg_enabled()) {
2120 qemu_tcg_init_vcpu(cpu
);
2121 } else if (whpx_enabled()) {
2122 qemu_whpx_start_vcpu(cpu
);
2124 qemu_dummy_start_vcpu(cpu
);
2127 while (!cpu
->created
) {
2128 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
2132 void cpu_stop_current(void)
2135 current_cpu
->stop
= true;
2136 cpu_exit(current_cpu
);
2140 int vm_stop(RunState state
)
2142 if (qemu_in_vcpu_thread()) {
2143 qemu_system_vmstop_request_prepare();
2144 qemu_system_vmstop_request(state
);
2146 * FIXME: should not return to device code in case
2147 * vm_stop() has been requested.
2153 return do_vm_stop(state
, true);
2157 * Prepare for (re)starting the VM.
2158 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2159 * running or in case of an error condition), 0 otherwise.
2161 int vm_prepare_start(void)
2165 qemu_vmstop_requested(&requested
);
2166 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
2170 /* Ensure that a STOP/RESUME pair of events is emitted if a
2171 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2172 * example, according to documentation is always followed by
2175 if (runstate_is_running()) {
2176 qapi_event_send_stop();
2177 qapi_event_send_resume();
2181 /* We are sending this now, but the CPUs will be resumed shortly later */
2182 qapi_event_send_resume();
2185 runstate_set(RUN_STATE_RUNNING
);
2186 vm_state_notify(1, RUN_STATE_RUNNING
);
2192 if (!vm_prepare_start()) {
2197 /* does a state transition even if the VM is already stopped,
2198 current state is forgotten forever */
2199 int vm_stop_force_state(RunState state
)
2201 if (runstate_is_running()) {
2202 return vm_stop(state
);
2204 runstate_set(state
);
2207 /* Make sure to return an error if the flush in a previous vm_stop()
2209 return bdrv_flush_all();
2213 void list_cpus(const char *optarg
)
2215 /* XXX: implement xxx_cpu_list for targets that still miss it */
2216 #if defined(cpu_list)
2221 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
2222 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
2228 int64_t orig_addr
= addr
, orig_size
= size
;
2234 cpu
= qemu_get_cpu(cpu_index
);
2236 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
2241 f
= fopen(filename
, "wb");
2243 error_setg_file_open(errp
, errno
, filename
);
2251 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
2252 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
2253 " specified", orig_addr
, orig_size
);
2256 if (fwrite(buf
, 1, l
, f
) != l
) {
2257 error_setg(errp
, QERR_IO_ERROR
);
2268 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
2275 f
= fopen(filename
, "wb");
2277 error_setg_file_open(errp
, errno
, filename
);
2285 cpu_physical_memory_read(addr
, buf
, l
);
2286 if (fwrite(buf
, 1, l
, f
) != l
) {
2287 error_setg(errp
, QERR_IO_ERROR
);
2298 void qmp_inject_nmi(Error
**errp
)
2300 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
2303 void dump_drift_info(void)
2309 qemu_printf("Host - Guest clock %"PRIi64
" ms\n",
2310 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
2311 if (icount_align_option
) {
2312 qemu_printf("Max guest delay %"PRIi64
" ms\n",
2313 -max_delay
/ SCALE_MS
);
2314 qemu_printf("Max guest advance %"PRIi64
" ms\n",
2315 max_advance
/ SCALE_MS
);
2317 qemu_printf("Max guest delay NA\n");
2318 qemu_printf("Max guest advance NA\n");