4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
50 #include "qapi-event.h"
52 #include "sysemu/replay.h"
53 #include "hw/boards.h"
57 #include <sys/prctl.h>
60 #define PR_MCE_KILL 33
63 #ifndef PR_MCE_KILL_SET
64 #define PR_MCE_KILL_SET 1
67 #ifndef PR_MCE_KILL_EARLY
68 #define PR_MCE_KILL_EARLY 1
71 #endif /* CONFIG_LINUX */
76 /* vcpu throttling controls */
77 static QEMUTimer
*throttle_timer
;
78 static unsigned int throttle_percentage
;
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
84 bool cpu_is_stopped(CPUState
*cpu
)
86 return cpu
->stopped
|| !runstate_is_running();
89 static bool cpu_thread_is_idle(CPUState
*cpu
)
91 if (cpu
->stop
|| cpu
->queued_work_first
) {
94 if (cpu_is_stopped(cpu
)) {
97 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
98 kvm_halt_in_kernel()) {
104 static bool all_cpu_threads_idle(void)
109 if (!cpu_thread_is_idle(cpu
)) {
116 /***********************************************************/
117 /* guest cycle counter */
119 /* Protected by TimersState seqlock */
121 static bool icount_sleep
= true;
122 static int64_t vm_clock_warp_start
= -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift
;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
128 static QEMUTimer
*icount_rt_timer
;
129 static QEMUTimer
*icount_vm_timer
;
130 static QEMUTimer
*icount_warp_timer
;
132 typedef struct TimersState
{
133 /* Protected by BQL. */
134 int64_t cpu_ticks_prev
;
135 int64_t cpu_ticks_offset
;
137 /* cpu_clock_offset can be read out of BQL, so protect it with
140 QemuSeqLock vm_clock_seqlock
;
141 int64_t cpu_clock_offset
;
142 int32_t cpu_ticks_enabled
;
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias
;
147 /* Only written by TCG thread */
151 static TimersState timers_state
;
155 * We default to false if we know other options have been enabled
156 * which are currently incompatible with MTTCG. Otherwise when each
157 * guest (target) has been updated to support:
158 * - atomic instructions
159 * - memory ordering primitives (barriers)
160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
162 * Once a guest architecture has been converted to the new primitives
163 * there are two remaining limitations to check.
165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
166 * - The host must have a stronger memory order than the guest
168 * It may be possible in future to support strong guests on weak hosts
169 * but that will require tagging all load/stores in a guest with their
170 * implicit memory order requirements which would likely slow things
174 static bool check_tcg_memory_orders_compatible(void)
176 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
177 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
183 static bool default_mttcg_enabled(void)
185 if (use_icount
|| TCG_OVERSIZED_GUEST
) {
188 #ifdef TARGET_SUPPORTS_MTTCG
189 return check_tcg_memory_orders_compatible();
196 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
198 const char *t
= qemu_opt_get(opts
, "thread");
200 if (strcmp(t
, "multi") == 0) {
201 if (TCG_OVERSIZED_GUEST
) {
202 error_setg(errp
, "No MTTCG when guest word size > hosts");
203 } else if (use_icount
) {
204 error_setg(errp
, "No MTTCG when icount is enabled");
206 #ifndef TARGET_SUPPORTS_MTTCG
207 error_report("Guest not yet converted to MTTCG - "
208 "you may get unexpected results");
210 if (!check_tcg_memory_orders_compatible()) {
211 error_report("Guest expects a stronger memory ordering "
212 "than the host provides");
213 error_printf("This may cause strange/hard to debug errors\n");
215 mttcg_enabled
= true;
217 } else if (strcmp(t
, "single") == 0) {
218 mttcg_enabled
= false;
220 error_setg(errp
, "Invalid 'thread' setting %s", t
);
223 mttcg_enabled
= default_mttcg_enabled();
227 /* The current number of executed instructions is based on what we
228 * originally budgeted minus the current state of the decrementing
229 * icount counters in extra/u16.low.
231 static int64_t cpu_get_icount_executed(CPUState
*cpu
)
233 return cpu
->icount_budget
- (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
237 * Update the global shared timer_state.qemu_icount to take into
238 * account executed instructions. This is done by the TCG vCPU
239 * thread so the main-loop can see time has moved forward.
241 void cpu_update_icount(CPUState
*cpu
)
243 int64_t executed
= cpu_get_icount_executed(cpu
);
244 cpu
->icount_budget
-= executed
;
246 #ifdef CONFIG_ATOMIC64
247 atomic_set__nocheck(&timers_state
.qemu_icount
,
248 atomic_read__nocheck(&timers_state
.qemu_icount
) +
250 #else /* FIXME: we need 64bit atomics to do this safely */
251 timers_state
.qemu_icount
+= executed
;
255 int64_t cpu_get_icount_raw(void)
257 CPUState
*cpu
= current_cpu
;
259 if (cpu
&& cpu
->running
) {
260 if (!cpu
->can_do_io
) {
261 fprintf(stderr
, "Bad icount read\n");
264 /* Take into account what has run */
265 cpu_update_icount(cpu
);
267 #ifdef CONFIG_ATOMIC64
268 return atomic_read__nocheck(&timers_state
.qemu_icount
);
269 #else /* FIXME: we need 64bit atomics to do this safely */
270 return timers_state
.qemu_icount
;
274 /* Return the virtual CPU time, based on the instruction counter. */
275 static int64_t cpu_get_icount_locked(void)
277 int64_t icount
= cpu_get_icount_raw();
278 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
281 int64_t cpu_get_icount(void)
287 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
288 icount
= cpu_get_icount_locked();
289 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
294 int64_t cpu_icount_to_ns(int64_t icount
)
296 return icount
<< icount_time_shift
;
299 /* return the time elapsed in VM between vm_start and vm_stop. Unless
300 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
303 * Caller must hold the BQL
305 int64_t cpu_get_ticks(void)
310 return cpu_get_icount();
313 ticks
= timers_state
.cpu_ticks_offset
;
314 if (timers_state
.cpu_ticks_enabled
) {
315 ticks
+= cpu_get_host_ticks();
318 if (timers_state
.cpu_ticks_prev
> ticks
) {
319 /* Note: non increasing ticks may happen if the host uses
321 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
322 ticks
= timers_state
.cpu_ticks_prev
;
325 timers_state
.cpu_ticks_prev
= ticks
;
329 static int64_t cpu_get_clock_locked(void)
333 time
= timers_state
.cpu_clock_offset
;
334 if (timers_state
.cpu_ticks_enabled
) {
341 /* Return the monotonic time elapsed in VM, i.e.,
342 * the time between vm_start and vm_stop
344 int64_t cpu_get_clock(void)
350 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
351 ti
= cpu_get_clock_locked();
352 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
357 /* enable cpu_get_ticks()
358 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
360 void cpu_enable_ticks(void)
362 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
363 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
364 if (!timers_state
.cpu_ticks_enabled
) {
365 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
366 timers_state
.cpu_clock_offset
-= get_clock();
367 timers_state
.cpu_ticks_enabled
= 1;
369 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
372 /* disable cpu_get_ticks() : the clock is stopped. You must not call
373 * cpu_get_ticks() after that.
374 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
376 void cpu_disable_ticks(void)
378 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
379 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
380 if (timers_state
.cpu_ticks_enabled
) {
381 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
382 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
383 timers_state
.cpu_ticks_enabled
= 0;
385 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
388 /* Correlation between real and virtual time is always going to be
389 fairly approximate, so ignore small variation.
390 When the guest is idle real and virtual time will be aligned in
392 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
394 static void icount_adjust(void)
400 /* Protected by TimersState mutex. */
401 static int64_t last_delta
;
403 /* If the VM is not running, then do nothing. */
404 if (!runstate_is_running()) {
408 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
409 cur_time
= cpu_get_clock_locked();
410 cur_icount
= cpu_get_icount_locked();
412 delta
= cur_icount
- cur_time
;
413 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
415 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
416 && icount_time_shift
> 0) {
417 /* The guest is getting too far ahead. Slow time down. */
421 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
422 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
423 /* The guest is getting too far behind. Speed time up. */
427 timers_state
.qemu_icount_bias
= cur_icount
428 - (timers_state
.qemu_icount
<< icount_time_shift
);
429 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
432 static void icount_adjust_rt(void *opaque
)
434 timer_mod(icount_rt_timer
,
435 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
439 static void icount_adjust_vm(void *opaque
)
441 timer_mod(icount_vm_timer
,
442 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
443 NANOSECONDS_PER_SECOND
/ 10);
447 static int64_t qemu_icount_round(int64_t count
)
449 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
452 static void icount_warp_rt(void)
457 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
458 * changes from -1 to another value, so the race here is okay.
461 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
462 warp_start
= vm_clock_warp_start
;
463 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
465 if (warp_start
== -1) {
469 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
470 if (runstate_is_running()) {
471 int64_t clock
= REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT
,
472 cpu_get_clock_locked());
475 warp_delta
= clock
- vm_clock_warp_start
;
476 if (use_icount
== 2) {
478 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
479 * far ahead of real time.
481 int64_t cur_icount
= cpu_get_icount_locked();
482 int64_t delta
= clock
- cur_icount
;
483 warp_delta
= MIN(warp_delta
, delta
);
485 timers_state
.qemu_icount_bias
+= warp_delta
;
487 vm_clock_warp_start
= -1;
488 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
490 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
491 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
495 static void icount_timer_cb(void *opaque
)
497 /* No need for a checkpoint because the timer already synchronizes
498 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
503 void qtest_clock_warp(int64_t dest
)
505 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
506 AioContext
*aio_context
;
507 assert(qtest_enabled());
508 aio_context
= qemu_get_aio_context();
509 while (clock
< dest
) {
510 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
511 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
513 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
514 timers_state
.qemu_icount_bias
+= warp
;
515 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
517 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
518 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
519 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
521 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
524 void qemu_start_warp_timer(void)
533 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
534 * do not fire, so computing the deadline does not make sense.
536 if (!runstate_is_running()) {
540 /* warp clock deterministically in record/replay mode */
541 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
545 if (!all_cpu_threads_idle()) {
549 if (qtest_enabled()) {
550 /* When testing, qtest commands advance icount. */
554 /* We want to use the earliest deadline from ALL vm_clocks */
555 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
556 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
558 static bool notified
;
559 if (!icount_sleep
&& !notified
) {
560 error_report("WARNING: icount sleep disabled and no active timers");
568 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
569 * sleep. Otherwise, the CPU might be waiting for a future timer
570 * interrupt to wake it up, but the interrupt never comes because
571 * the vCPU isn't running any insns and thus doesn't advance the
572 * QEMU_CLOCK_VIRTUAL.
576 * We never let VCPUs sleep in no sleep icount mode.
577 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
578 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
579 * It is useful when we want a deterministic execution time,
580 * isolated from host latencies.
582 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
583 timers_state
.qemu_icount_bias
+= deadline
;
584 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
595 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
596 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
597 vm_clock_warp_start
= clock
;
599 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
600 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
602 } else if (deadline
== 0) {
603 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
607 static void qemu_account_warp_timer(void)
609 if (!use_icount
|| !icount_sleep
) {
613 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
614 * do not fire, so computing the deadline does not make sense.
616 if (!runstate_is_running()) {
620 /* warp clock deterministically in record/replay mode */
621 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
625 timer_del(icount_warp_timer
);
629 static bool icount_state_needed(void *opaque
)
635 * This is a subsection for icount migration.
637 static const VMStateDescription icount_vmstate_timers
= {
638 .name
= "timer/icount",
640 .minimum_version_id
= 1,
641 .needed
= icount_state_needed
,
642 .fields
= (VMStateField
[]) {
643 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
644 VMSTATE_INT64(qemu_icount
, TimersState
),
645 VMSTATE_END_OF_LIST()
649 static const VMStateDescription vmstate_timers
= {
652 .minimum_version_id
= 1,
653 .fields
= (VMStateField
[]) {
654 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
655 VMSTATE_INT64(dummy
, TimersState
),
656 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
657 VMSTATE_END_OF_LIST()
659 .subsections
= (const VMStateDescription
*[]) {
660 &icount_vmstate_timers
,
665 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
668 double throttle_ratio
;
671 if (!cpu_throttle_get_percentage()) {
675 pct
= (double)cpu_throttle_get_percentage()/100;
676 throttle_ratio
= pct
/ (1 - pct
);
677 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
679 qemu_mutex_unlock_iothread();
680 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
681 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
682 qemu_mutex_lock_iothread();
685 static void cpu_throttle_timer_tick(void *opaque
)
690 /* Stop the timer if needed */
691 if (!cpu_throttle_get_percentage()) {
695 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
696 async_run_on_cpu(cpu
, cpu_throttle_thread
,
701 pct
= (double)cpu_throttle_get_percentage()/100;
702 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
703 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
706 void cpu_throttle_set(int new_throttle_pct
)
708 /* Ensure throttle percentage is within valid range */
709 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
710 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
712 atomic_set(&throttle_percentage
, new_throttle_pct
);
714 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
715 CPU_THROTTLE_TIMESLICE_NS
);
718 void cpu_throttle_stop(void)
720 atomic_set(&throttle_percentage
, 0);
723 bool cpu_throttle_active(void)
725 return (cpu_throttle_get_percentage() != 0);
728 int cpu_throttle_get_percentage(void)
730 return atomic_read(&throttle_percentage
);
733 void cpu_ticks_init(void)
735 seqlock_init(&timers_state
.vm_clock_seqlock
);
736 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
737 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
738 cpu_throttle_timer_tick
, NULL
);
741 void configure_icount(QemuOpts
*opts
, Error
**errp
)
744 char *rem_str
= NULL
;
746 option
= qemu_opt_get(opts
, "shift");
748 if (qemu_opt_get(opts
, "align") != NULL
) {
749 error_setg(errp
, "Please specify shift option when using align");
754 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
756 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
757 icount_timer_cb
, NULL
);
760 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
762 if (icount_align_option
&& !icount_sleep
) {
763 error_setg(errp
, "align=on and sleep=off are incompatible");
765 if (strcmp(option
, "auto") != 0) {
767 icount_time_shift
= strtol(option
, &rem_str
, 0);
768 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
769 error_setg(errp
, "icount: Invalid shift value");
773 } else if (icount_align_option
) {
774 error_setg(errp
, "shift=auto and align=on are incompatible");
775 } else if (!icount_sleep
) {
776 error_setg(errp
, "shift=auto and sleep=off are incompatible");
781 /* 125MIPS seems a reasonable initial guess at the guest speed.
782 It will be corrected fairly quickly anyway. */
783 icount_time_shift
= 3;
785 /* Have both realtime and virtual time triggers for speed adjustment.
786 The realtime trigger catches emulated time passing too slowly,
787 the virtual time trigger catches emulated time passing too fast.
788 Realtime triggers occur even when idle, so use them less frequently
790 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
791 icount_adjust_rt
, NULL
);
792 timer_mod(icount_rt_timer
,
793 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
794 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
795 icount_adjust_vm
, NULL
);
796 timer_mod(icount_vm_timer
,
797 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
798 NANOSECONDS_PER_SECOND
/ 10);
801 /***********************************************************/
802 /* TCG vCPU kick timer
804 * The kick timer is responsible for moving single threaded vCPU
805 * emulation on to the next vCPU. If more than one vCPU is running a
806 * timer event with force a cpu->exit so the next vCPU can get
809 * The timer is removed if all vCPUs are idle and restarted again once
810 * idleness is complete.
813 static QEMUTimer
*tcg_kick_vcpu_timer
;
814 static CPUState
*tcg_current_rr_cpu
;
816 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
818 static inline int64_t qemu_tcg_next_kick(void)
820 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
823 /* Kick the currently round-robin scheduled vCPU */
824 static void qemu_cpu_kick_rr_cpu(void)
828 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
832 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
835 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
839 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
841 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
846 if (!qemu_in_vcpu_thread() && first_cpu
) {
847 /* qemu_cpu_kick is not enough to kick a halted CPU out of
848 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
849 * causes cpu_thread_is_idle to return false. This way,
850 * handle_icount_deadline can run.
852 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
856 static void kick_tcg_thread(void *opaque
)
858 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
859 qemu_cpu_kick_rr_cpu();
862 static void start_tcg_kick_timer(void)
864 if (!mttcg_enabled
&& !tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
865 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
866 kick_tcg_thread
, NULL
);
867 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
871 static void stop_tcg_kick_timer(void)
873 if (tcg_kick_vcpu_timer
) {
874 timer_del(tcg_kick_vcpu_timer
);
875 tcg_kick_vcpu_timer
= NULL
;
879 /***********************************************************/
880 void hw_error(const char *fmt
, ...)
886 fprintf(stderr
, "qemu: hardware error: ");
887 vfprintf(stderr
, fmt
, ap
);
888 fprintf(stderr
, "\n");
890 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
891 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
897 void cpu_synchronize_all_states(void)
902 cpu_synchronize_state(cpu
);
906 void cpu_synchronize_all_post_reset(void)
911 cpu_synchronize_post_reset(cpu
);
915 void cpu_synchronize_all_post_init(void)
920 cpu_synchronize_post_init(cpu
);
924 static int do_vm_stop(RunState state
)
928 if (runstate_is_running()) {
932 vm_state_notify(0, state
);
933 qapi_event_send_stop(&error_abort
);
937 replay_disable_events();
938 ret
= bdrv_flush_all();
943 static bool cpu_can_run(CPUState
*cpu
)
948 if (cpu_is_stopped(cpu
)) {
954 static void cpu_handle_guest_debug(CPUState
*cpu
)
956 gdb_set_stop_cpu(cpu
);
957 qemu_system_debug_request();
962 static void sigbus_reraise(void)
965 struct sigaction action
;
967 memset(&action
, 0, sizeof(action
));
968 action
.sa_handler
= SIG_DFL
;
969 if (!sigaction(SIGBUS
, &action
, NULL
)) {
972 sigaddset(&set
, SIGBUS
);
973 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
975 perror("Failed to re-raise SIGBUS!\n");
979 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
981 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
986 /* Called asynchronously in VCPU thread. */
987 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
991 /* Called synchronously (via signalfd) in main thread. */
992 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
998 static void qemu_init_sigbus(void)
1000 struct sigaction action
;
1002 memset(&action
, 0, sizeof(action
));
1003 action
.sa_flags
= SA_SIGINFO
;
1004 action
.sa_sigaction
= sigbus_handler
;
1005 sigaction(SIGBUS
, &action
, NULL
);
1007 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
1009 #else /* !CONFIG_LINUX */
1010 static void qemu_init_sigbus(void)
1013 #endif /* !CONFIG_LINUX */
1015 static QemuMutex qemu_global_mutex
;
1017 static QemuThread io_thread
;
1020 static QemuCond qemu_cpu_cond
;
1022 static QemuCond qemu_pause_cond
;
1024 void qemu_init_cpu_loop(void)
1027 qemu_cond_init(&qemu_cpu_cond
);
1028 qemu_cond_init(&qemu_pause_cond
);
1029 qemu_mutex_init(&qemu_global_mutex
);
1031 qemu_thread_get_self(&io_thread
);
1034 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1036 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1039 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1041 if (kvm_destroy_vcpu(cpu
) < 0) {
1042 error_report("kvm_destroy_vcpu failed");
1047 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1051 static void qemu_wait_io_event_common(CPUState
*cpu
)
1053 atomic_mb_set(&cpu
->thread_kicked
, false);
1056 cpu
->stopped
= true;
1057 qemu_cond_broadcast(&qemu_pause_cond
);
1059 process_queued_cpu_work(cpu
);
1062 static bool qemu_tcg_should_sleep(CPUState
*cpu
)
1064 if (mttcg_enabled
) {
1065 return cpu_thread_is_idle(cpu
);
1067 return all_cpu_threads_idle();
1071 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
1073 while (qemu_tcg_should_sleep(cpu
)) {
1074 stop_tcg_kick_timer();
1075 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1078 start_tcg_kick_timer();
1080 qemu_wait_io_event_common(cpu
);
1083 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
1085 while (cpu_thread_is_idle(cpu
)) {
1086 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1089 qemu_wait_io_event_common(cpu
);
1092 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1094 CPUState
*cpu
= arg
;
1097 rcu_register_thread();
1099 qemu_mutex_lock_iothread();
1100 qemu_thread_get_self(cpu
->thread
);
1101 cpu
->thread_id
= qemu_get_thread_id();
1105 r
= kvm_init_vcpu(cpu
);
1107 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1111 kvm_init_cpu_signals(cpu
);
1113 /* signal CPU creation */
1114 cpu
->created
= true;
1115 qemu_cond_signal(&qemu_cpu_cond
);
1118 if (cpu_can_run(cpu
)) {
1119 r
= kvm_cpu_exec(cpu
);
1120 if (r
== EXCP_DEBUG
) {
1121 cpu_handle_guest_debug(cpu
);
1124 qemu_kvm_wait_io_event(cpu
);
1125 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1127 qemu_kvm_destroy_vcpu(cpu
);
1128 cpu
->created
= false;
1129 qemu_cond_signal(&qemu_cpu_cond
);
1130 qemu_mutex_unlock_iothread();
1134 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1137 fprintf(stderr
, "qtest is not supported under Windows\n");
1140 CPUState
*cpu
= arg
;
1144 rcu_register_thread();
1146 qemu_mutex_lock_iothread();
1147 qemu_thread_get_self(cpu
->thread
);
1148 cpu
->thread_id
= qemu_get_thread_id();
1152 sigemptyset(&waitset
);
1153 sigaddset(&waitset
, SIG_IPI
);
1155 /* signal CPU creation */
1156 cpu
->created
= true;
1157 qemu_cond_signal(&qemu_cpu_cond
);
1160 qemu_mutex_unlock_iothread();
1163 r
= sigwait(&waitset
, &sig
);
1164 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1169 qemu_mutex_lock_iothread();
1170 qemu_wait_io_event_common(cpu
);
1177 static int64_t tcg_get_icount_limit(void)
1181 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1182 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1184 /* Maintain prior (possibly buggy) behaviour where if no deadline
1185 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1186 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1189 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1190 deadline
= INT32_MAX
;
1193 return qemu_icount_round(deadline
);
1195 return replay_get_instructions();
1199 static void handle_icount_deadline(void)
1201 assert(qemu_in_vcpu_thread());
1204 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1206 if (deadline
== 0) {
1207 /* Wake up other AioContexts. */
1208 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1209 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1214 static void prepare_icount_for_run(CPUState
*cpu
)
1219 /* These should always be cleared by process_icount_data after
1220 * each vCPU execution. However u16.high can be raised
1221 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1223 g_assert(cpu
->icount_decr
.u16
.low
== 0);
1224 g_assert(cpu
->icount_extra
== 0);
1226 cpu
->icount_budget
= tcg_get_icount_limit();
1227 insns_left
= MIN(0xffff, cpu
->icount_budget
);
1228 cpu
->icount_decr
.u16
.low
= insns_left
;
1229 cpu
->icount_extra
= cpu
->icount_budget
- insns_left
;
1233 static void process_icount_data(CPUState
*cpu
)
1236 /* Account for executed instructions */
1237 cpu_update_icount(cpu
);
1239 /* Reset the counters */
1240 cpu
->icount_decr
.u16
.low
= 0;
1241 cpu
->icount_extra
= 0;
1242 cpu
->icount_budget
= 0;
1244 replay_account_executed_instructions();
1249 static int tcg_cpu_exec(CPUState
*cpu
)
1252 #ifdef CONFIG_PROFILER
1256 #ifdef CONFIG_PROFILER
1257 ti
= profile_getclock();
1259 qemu_mutex_unlock_iothread();
1260 cpu_exec_start(cpu
);
1261 ret
= cpu_exec(cpu
);
1263 qemu_mutex_lock_iothread();
1264 #ifdef CONFIG_PROFILER
1265 tcg_time
+= profile_getclock() - ti
;
1270 /* Destroy any remaining vCPUs which have been unplugged and have
1273 static void deal_with_unplugged_cpus(void)
1278 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1279 qemu_tcg_destroy_vcpu(cpu
);
1280 cpu
->created
= false;
1281 qemu_cond_signal(&qemu_cpu_cond
);
1287 /* Single-threaded TCG
1289 * In the single-threaded case each vCPU is simulated in turn. If
1290 * there is more than a single vCPU we create a simple timer to kick
1291 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1292 * This is done explicitly rather than relying on side-effects
1296 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1298 CPUState
*cpu
= arg
;
1300 rcu_register_thread();
1302 qemu_mutex_lock_iothread();
1303 qemu_thread_get_self(cpu
->thread
);
1306 cpu
->thread_id
= qemu_get_thread_id();
1307 cpu
->created
= true;
1310 qemu_cond_signal(&qemu_cpu_cond
);
1312 /* wait for initial kick-off after machine start */
1313 while (first_cpu
->stopped
) {
1314 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1316 /* process any pending work */
1319 qemu_wait_io_event_common(cpu
);
1323 start_tcg_kick_timer();
1327 /* process any pending work */
1328 cpu
->exit_request
= 1;
1331 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1332 qemu_account_warp_timer();
1334 /* Run the timers here. This is much more efficient than
1335 * waking up the I/O thread and waiting for completion.
1337 handle_icount_deadline();
1343 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1345 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1348 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1349 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1351 if (cpu_can_run(cpu
)) {
1354 prepare_icount_for_run(cpu
);
1356 r
= tcg_cpu_exec(cpu
);
1358 process_icount_data(cpu
);
1360 if (r
== EXCP_DEBUG
) {
1361 cpu_handle_guest_debug(cpu
);
1363 } else if (r
== EXCP_ATOMIC
) {
1364 qemu_mutex_unlock_iothread();
1365 cpu_exec_step_atomic(cpu
);
1366 qemu_mutex_lock_iothread();
1369 } else if (cpu
->stop
) {
1371 cpu
= CPU_NEXT(cpu
);
1376 cpu
= CPU_NEXT(cpu
);
1377 } /* while (cpu && !cpu->exit_request).. */
1379 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1380 atomic_set(&tcg_current_rr_cpu
, NULL
);
1382 if (cpu
&& cpu
->exit_request
) {
1383 atomic_mb_set(&cpu
->exit_request
, 0);
1386 qemu_tcg_wait_io_event(cpu
? cpu
: QTAILQ_FIRST(&cpus
));
1387 deal_with_unplugged_cpus();
1393 static void *qemu_hax_cpu_thread_fn(void *arg
)
1395 CPUState
*cpu
= arg
;
1398 qemu_mutex_lock_iothread();
1399 qemu_thread_get_self(cpu
->thread
);
1401 cpu
->thread_id
= qemu_get_thread_id();
1402 cpu
->created
= true;
1407 qemu_cond_signal(&qemu_cpu_cond
);
1410 if (cpu_can_run(cpu
)) {
1411 r
= hax_smp_cpu_exec(cpu
);
1412 if (r
== EXCP_DEBUG
) {
1413 cpu_handle_guest_debug(cpu
);
1417 while (cpu_thread_is_idle(cpu
)) {
1418 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1423 qemu_wait_io_event_common(cpu
);
1429 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1434 /* Multi-threaded TCG
1436 * In the multi-threaded case each vCPU has its own thread. The TLS
1437 * variable current_cpu can be used deep in the code to find the
1438 * current CPUState for a given thread.
1441 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1443 CPUState
*cpu
= arg
;
1445 g_assert(!use_icount
);
1447 rcu_register_thread();
1449 qemu_mutex_lock_iothread();
1450 qemu_thread_get_self(cpu
->thread
);
1452 cpu
->thread_id
= qemu_get_thread_id();
1453 cpu
->created
= true;
1456 qemu_cond_signal(&qemu_cpu_cond
);
1458 /* process any pending work */
1459 cpu
->exit_request
= 1;
1462 if (cpu_can_run(cpu
)) {
1464 r
= tcg_cpu_exec(cpu
);
1467 cpu_handle_guest_debug(cpu
);
1470 /* during start-up the vCPU is reset and the thread is
1471 * kicked several times. If we don't ensure we go back
1472 * to sleep in the halted state we won't cleanly
1473 * start-up when the vCPU is enabled.
1475 * cpu->halted should ensure we sleep in wait_io_event
1477 g_assert(cpu
->halted
);
1480 qemu_mutex_unlock_iothread();
1481 cpu_exec_step_atomic(cpu
);
1482 qemu_mutex_lock_iothread();
1484 /* Ignore everything else? */
1487 } else if (cpu
->unplug
) {
1488 qemu_tcg_destroy_vcpu(cpu
);
1489 cpu
->created
= false;
1490 qemu_cond_signal(&qemu_cpu_cond
);
1491 qemu_mutex_unlock_iothread();
1495 atomic_mb_set(&cpu
->exit_request
, 0);
1496 qemu_tcg_wait_io_event(cpu
);
1502 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1507 if (cpu
->thread_kicked
) {
1510 cpu
->thread_kicked
= true;
1511 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1513 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1517 if (!qemu_cpu_is_self(cpu
)) {
1518 if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1519 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1520 __func__
, GetLastError());
1527 void qemu_cpu_kick(CPUState
*cpu
)
1529 qemu_cond_broadcast(cpu
->halt_cond
);
1530 if (tcg_enabled()) {
1532 /* NOP unless doing single-thread RR */
1533 qemu_cpu_kick_rr_cpu();
1535 if (hax_enabled()) {
1537 * FIXME: race condition with the exit_request check in
1540 cpu
->exit_request
= 1;
1542 qemu_cpu_kick_thread(cpu
);
1546 void qemu_cpu_kick_self(void)
1548 assert(current_cpu
);
1549 qemu_cpu_kick_thread(current_cpu
);
1552 bool qemu_cpu_is_self(CPUState
*cpu
)
1554 return qemu_thread_is_self(cpu
->thread
);
1557 bool qemu_in_vcpu_thread(void)
1559 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1562 static __thread
bool iothread_locked
= false;
1564 bool qemu_mutex_iothread_locked(void)
1566 return iothread_locked
;
1569 void qemu_mutex_lock_iothread(void)
1571 g_assert(!qemu_mutex_iothread_locked());
1572 qemu_mutex_lock(&qemu_global_mutex
);
1573 iothread_locked
= true;
1576 void qemu_mutex_unlock_iothread(void)
1578 g_assert(qemu_mutex_iothread_locked());
1579 iothread_locked
= false;
1580 qemu_mutex_unlock(&qemu_global_mutex
);
1583 static bool all_vcpus_paused(void)
1588 if (!cpu
->stopped
) {
1596 void pause_all_vcpus(void)
1600 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1606 if (qemu_in_vcpu_thread()) {
1610 while (!all_vcpus_paused()) {
1611 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1618 void cpu_resume(CPUState
*cpu
)
1621 cpu
->stopped
= false;
1625 void resume_all_vcpus(void)
1629 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1635 void cpu_remove(CPUState
*cpu
)
1642 void cpu_remove_sync(CPUState
*cpu
)
1645 while (cpu
->created
) {
1646 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1650 /* For temporary buffers for forming a name */
1651 #define VCPU_THREAD_NAME_SIZE 16
1653 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1655 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1656 static QemuCond
*single_tcg_halt_cond
;
1657 static QemuThread
*single_tcg_cpu_thread
;
1659 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1660 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1661 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1662 qemu_cond_init(cpu
->halt_cond
);
1664 if (qemu_tcg_mttcg_enabled()) {
1665 /* create a thread per vCPU with TCG (MTTCG) */
1666 parallel_cpus
= true;
1667 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1670 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1671 cpu
, QEMU_THREAD_JOINABLE
);
1674 /* share a single thread for all cpus with TCG */
1675 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1676 qemu_thread_create(cpu
->thread
, thread_name
,
1677 qemu_tcg_rr_cpu_thread_fn
,
1678 cpu
, QEMU_THREAD_JOINABLE
);
1680 single_tcg_halt_cond
= cpu
->halt_cond
;
1681 single_tcg_cpu_thread
= cpu
->thread
;
1684 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1686 while (!cpu
->created
) {
1687 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1690 /* For non-MTTCG cases we share the thread */
1691 cpu
->thread
= single_tcg_cpu_thread
;
1692 cpu
->halt_cond
= single_tcg_halt_cond
;
1696 static void qemu_hax_start_vcpu(CPUState
*cpu
)
1698 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1700 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1701 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1702 qemu_cond_init(cpu
->halt_cond
);
1704 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
1706 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
1707 cpu
, QEMU_THREAD_JOINABLE
);
1709 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1711 while (!cpu
->created
) {
1712 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1716 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1718 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1720 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1721 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1722 qemu_cond_init(cpu
->halt_cond
);
1723 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1725 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1726 cpu
, QEMU_THREAD_JOINABLE
);
1727 while (!cpu
->created
) {
1728 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1732 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1734 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1736 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1737 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1738 qemu_cond_init(cpu
->halt_cond
);
1739 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1741 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1742 QEMU_THREAD_JOINABLE
);
1743 while (!cpu
->created
) {
1744 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1748 void qemu_init_vcpu(CPUState
*cpu
)
1750 cpu
->nr_cores
= smp_cores
;
1751 cpu
->nr_threads
= smp_threads
;
1752 cpu
->stopped
= true;
1755 /* If the target cpu hasn't set up any address spaces itself,
1756 * give it the default one.
1758 AddressSpace
*as
= address_space_init_shareable(cpu
->memory
,
1761 cpu_address_space_init(cpu
, as
, 0);
1764 if (kvm_enabled()) {
1765 qemu_kvm_start_vcpu(cpu
);
1766 } else if (hax_enabled()) {
1767 qemu_hax_start_vcpu(cpu
);
1768 } else if (tcg_enabled()) {
1769 qemu_tcg_init_vcpu(cpu
);
1771 qemu_dummy_start_vcpu(cpu
);
1775 void cpu_stop_current(void)
1778 current_cpu
->stop
= false;
1779 current_cpu
->stopped
= true;
1780 cpu_exit(current_cpu
);
1781 qemu_cond_broadcast(&qemu_pause_cond
);
1785 int vm_stop(RunState state
)
1787 if (qemu_in_vcpu_thread()) {
1788 qemu_system_vmstop_request_prepare();
1789 qemu_system_vmstop_request(state
);
1791 * FIXME: should not return to device code in case
1792 * vm_stop() has been requested.
1798 return do_vm_stop(state
);
1802 * Prepare for (re)starting the VM.
1803 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1804 * running or in case of an error condition), 0 otherwise.
1806 int vm_prepare_start(void)
1811 qemu_vmstop_requested(&requested
);
1812 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
1816 /* Ensure that a STOP/RESUME pair of events is emitted if a
1817 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1818 * example, according to documentation is always followed by
1821 if (runstate_is_running()) {
1822 qapi_event_send_stop(&error_abort
);
1825 replay_enable_events();
1827 runstate_set(RUN_STATE_RUNNING
);
1828 vm_state_notify(1, RUN_STATE_RUNNING
);
1831 /* We are sending this now, but the CPUs will be resumed shortly later */
1832 qapi_event_send_resume(&error_abort
);
1838 if (!vm_prepare_start()) {
1843 /* does a state transition even if the VM is already stopped,
1844 current state is forgotten forever */
1845 int vm_stop_force_state(RunState state
)
1847 if (runstate_is_running()) {
1848 return vm_stop(state
);
1850 runstate_set(state
);
1853 /* Make sure to return an error if the flush in a previous vm_stop()
1855 return bdrv_flush_all();
1859 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1861 /* XXX: implement xxx_cpu_list for targets that still miss it */
1862 #if defined(cpu_list)
1863 cpu_list(f
, cpu_fprintf
);
1867 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1869 MachineState
*ms
= MACHINE(qdev_get_machine());
1870 MachineClass
*mc
= MACHINE_GET_CLASS(ms
);
1871 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1876 #if defined(TARGET_I386)
1877 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1878 CPUX86State
*env
= &x86_cpu
->env
;
1879 #elif defined(TARGET_PPC)
1880 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1881 CPUPPCState
*env
= &ppc_cpu
->env
;
1882 #elif defined(TARGET_SPARC)
1883 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1884 CPUSPARCState
*env
= &sparc_cpu
->env
;
1885 #elif defined(TARGET_MIPS)
1886 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1887 CPUMIPSState
*env
= &mips_cpu
->env
;
1888 #elif defined(TARGET_TRICORE)
1889 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1890 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1893 cpu_synchronize_state(cpu
);
1895 info
= g_malloc0(sizeof(*info
));
1896 info
->value
= g_malloc0(sizeof(*info
->value
));
1897 info
->value
->CPU
= cpu
->cpu_index
;
1898 info
->value
->current
= (cpu
== first_cpu
);
1899 info
->value
->halted
= cpu
->halted
;
1900 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1901 info
->value
->thread_id
= cpu
->thread_id
;
1902 #if defined(TARGET_I386)
1903 info
->value
->arch
= CPU_INFO_ARCH_X86
;
1904 info
->value
->u
.x86
.pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1905 #elif defined(TARGET_PPC)
1906 info
->value
->arch
= CPU_INFO_ARCH_PPC
;
1907 info
->value
->u
.ppc
.nip
= env
->nip
;
1908 #elif defined(TARGET_SPARC)
1909 info
->value
->arch
= CPU_INFO_ARCH_SPARC
;
1910 info
->value
->u
.q_sparc
.pc
= env
->pc
;
1911 info
->value
->u
.q_sparc
.npc
= env
->npc
;
1912 #elif defined(TARGET_MIPS)
1913 info
->value
->arch
= CPU_INFO_ARCH_MIPS
;
1914 info
->value
->u
.q_mips
.PC
= env
->active_tc
.PC
;
1915 #elif defined(TARGET_TRICORE)
1916 info
->value
->arch
= CPU_INFO_ARCH_TRICORE
;
1917 info
->value
->u
.tricore
.PC
= env
->PC
;
1919 info
->value
->arch
= CPU_INFO_ARCH_OTHER
;
1921 info
->value
->has_props
= !!mc
->cpu_index_to_instance_props
;
1922 if (info
->value
->has_props
) {
1923 CpuInstanceProperties
*props
;
1924 props
= g_malloc0(sizeof(*props
));
1925 *props
= mc
->cpu_index_to_instance_props(ms
, cpu
->cpu_index
);
1926 info
->value
->props
= props
;
1929 /* XXX: waiting for the qapi to support GSList */
1931 head
= cur_item
= info
;
1933 cur_item
->next
= info
;
1941 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1942 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1948 int64_t orig_addr
= addr
, orig_size
= size
;
1954 cpu
= qemu_get_cpu(cpu_index
);
1956 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1961 f
= fopen(filename
, "wb");
1963 error_setg_file_open(errp
, errno
, filename
);
1971 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1972 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1973 " specified", orig_addr
, orig_size
);
1976 if (fwrite(buf
, 1, l
, f
) != l
) {
1977 error_setg(errp
, QERR_IO_ERROR
);
1988 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1995 f
= fopen(filename
, "wb");
1997 error_setg_file_open(errp
, errno
, filename
);
2005 cpu_physical_memory_read(addr
, buf
, l
);
2006 if (fwrite(buf
, 1, l
, f
) != l
) {
2007 error_setg(errp
, QERR_IO_ERROR
);
2018 void qmp_inject_nmi(Error
**errp
)
2020 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
2023 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
2029 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
2030 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
2031 if (icount_align_option
) {
2032 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
2033 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
2035 cpu_fprintf(f
, "Max guest delay NA\n");
2036 cpu_fprintf(f
, "Max guest advance NA\n");