4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
50 #include "qapi-event.h"
52 #include "sysemu/replay.h"
56 #include <sys/prctl.h>
59 #define PR_MCE_KILL 33
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
70 #endif /* CONFIG_LINUX */
75 /* vcpu throttling controls */
76 static QEMUTimer
*throttle_timer
;
77 static unsigned int throttle_percentage
;
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83 bool cpu_is_stopped(CPUState
*cpu
)
85 return cpu
->stopped
|| !runstate_is_running();
88 static bool cpu_thread_is_idle(CPUState
*cpu
)
90 if (cpu
->stop
|| cpu
->queued_work_first
) {
93 if (cpu_is_stopped(cpu
)) {
96 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
97 kvm_halt_in_kernel()) {
103 static bool all_cpu_threads_idle(void)
108 if (!cpu_thread_is_idle(cpu
)) {
115 /***********************************************************/
116 /* guest cycle counter */
118 /* Protected by TimersState seqlock */
120 static bool icount_sleep
= true;
121 static int64_t vm_clock_warp_start
= -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift
;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 static QEMUTimer
*icount_rt_timer
;
128 static QEMUTimer
*icount_vm_timer
;
129 static QEMUTimer
*icount_warp_timer
;
131 typedef struct TimersState
{
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev
;
134 int64_t cpu_ticks_offset
;
136 /* cpu_clock_offset can be read out of BQL, so protect it with
139 QemuSeqLock vm_clock_seqlock
;
140 int64_t cpu_clock_offset
;
141 int32_t cpu_ticks_enabled
;
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias
;
146 /* Only written by TCG thread */
150 static TimersState timers_state
;
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
173 static bool check_tcg_memory_orders_compatible(void)
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
182 static bool default_mttcg_enabled(void)
184 if (use_icount
|| TCG_OVERSIZED_GUEST
) {
187 #ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
195 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
197 const char *t
= qemu_opt_get(opts
, "thread");
199 if (strcmp(t
, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST
) {
201 error_setg(errp
, "No MTTCG when guest word size > hosts");
202 } else if (use_icount
) {
203 error_setg(errp
, "No MTTCG when icount is enabled");
205 #ifndef TARGET_SUPPORTS_MTTCG
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
212 error_printf("This may cause strange/hard to debug errors\n");
214 mttcg_enabled
= true;
216 } else if (strcmp(t
, "single") == 0) {
217 mttcg_enabled
= false;
219 error_setg(errp
, "Invalid 'thread' setting %s", t
);
222 mttcg_enabled
= default_mttcg_enabled();
226 /* The current number of executed instructions is based on what we
227 * originally budgeted minus the current state of the decrementing
228 * icount counters in extra/u16.low.
230 static int64_t cpu_get_icount_executed(CPUState
*cpu
)
232 return cpu
->icount_budget
- (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
236 * Update the global shared timer_state.qemu_icount to take into
237 * account executed instructions. This is done by the TCG vCPU
238 * thread so the main-loop can see time has moved forward.
240 void cpu_update_icount(CPUState
*cpu
)
242 int64_t executed
= cpu_get_icount_executed(cpu
);
243 cpu
->icount_budget
-= executed
;
245 #ifdef CONFIG_ATOMIC64
246 atomic_set__nocheck(&timers_state
.qemu_icount
,
247 atomic_read__nocheck(&timers_state
.qemu_icount
) +
249 #else /* FIXME: we need 64bit atomics to do this safely */
250 timers_state
.qemu_icount
+= executed
;
254 int64_t cpu_get_icount_raw(void)
256 CPUState
*cpu
= current_cpu
;
258 if (cpu
&& cpu
->running
) {
259 if (!cpu
->can_do_io
) {
260 fprintf(stderr
, "Bad icount read\n");
263 /* Take into account what has run */
264 cpu_update_icount(cpu
);
266 #ifdef CONFIG_ATOMIC64
267 return atomic_read__nocheck(&timers_state
.qemu_icount
);
268 #else /* FIXME: we need 64bit atomics to do this safely */
269 return timers_state
.qemu_icount
;
273 /* Return the virtual CPU time, based on the instruction counter. */
274 static int64_t cpu_get_icount_locked(void)
276 int64_t icount
= cpu_get_icount_raw();
277 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
280 int64_t cpu_get_icount(void)
286 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
287 icount
= cpu_get_icount_locked();
288 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
293 int64_t cpu_icount_to_ns(int64_t icount
)
295 return icount
<< icount_time_shift
;
298 /* return the time elapsed in VM between vm_start and vm_stop. Unless
299 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
302 * Caller must hold the BQL
304 int64_t cpu_get_ticks(void)
309 return cpu_get_icount();
312 ticks
= timers_state
.cpu_ticks_offset
;
313 if (timers_state
.cpu_ticks_enabled
) {
314 ticks
+= cpu_get_host_ticks();
317 if (timers_state
.cpu_ticks_prev
> ticks
) {
318 /* Note: non increasing ticks may happen if the host uses
320 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
321 ticks
= timers_state
.cpu_ticks_prev
;
324 timers_state
.cpu_ticks_prev
= ticks
;
328 static int64_t cpu_get_clock_locked(void)
332 time
= timers_state
.cpu_clock_offset
;
333 if (timers_state
.cpu_ticks_enabled
) {
340 /* Return the monotonic time elapsed in VM, i.e.,
341 * the time between vm_start and vm_stop
343 int64_t cpu_get_clock(void)
349 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
350 ti
= cpu_get_clock_locked();
351 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
356 /* enable cpu_get_ticks()
357 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
359 void cpu_enable_ticks(void)
361 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
362 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
363 if (!timers_state
.cpu_ticks_enabled
) {
364 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
365 timers_state
.cpu_clock_offset
-= get_clock();
366 timers_state
.cpu_ticks_enabled
= 1;
368 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
371 /* disable cpu_get_ticks() : the clock is stopped. You must not call
372 * cpu_get_ticks() after that.
373 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
375 void cpu_disable_ticks(void)
377 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
378 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
379 if (timers_state
.cpu_ticks_enabled
) {
380 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
381 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
382 timers_state
.cpu_ticks_enabled
= 0;
384 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
387 /* Correlation between real and virtual time is always going to be
388 fairly approximate, so ignore small variation.
389 When the guest is idle real and virtual time will be aligned in
391 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
393 static void icount_adjust(void)
399 /* Protected by TimersState mutex. */
400 static int64_t last_delta
;
402 /* If the VM is not running, then do nothing. */
403 if (!runstate_is_running()) {
407 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
408 cur_time
= cpu_get_clock_locked();
409 cur_icount
= cpu_get_icount_locked();
411 delta
= cur_icount
- cur_time
;
412 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
414 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
415 && icount_time_shift
> 0) {
416 /* The guest is getting too far ahead. Slow time down. */
420 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
421 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
422 /* The guest is getting too far behind. Speed time up. */
426 timers_state
.qemu_icount_bias
= cur_icount
427 - (timers_state
.qemu_icount
<< icount_time_shift
);
428 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
431 static void icount_adjust_rt(void *opaque
)
433 timer_mod(icount_rt_timer
,
434 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
438 static void icount_adjust_vm(void *opaque
)
440 timer_mod(icount_vm_timer
,
441 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
442 NANOSECONDS_PER_SECOND
/ 10);
446 static int64_t qemu_icount_round(int64_t count
)
448 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
451 static void icount_warp_rt(void)
456 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
457 * changes from -1 to another value, so the race here is okay.
460 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
461 warp_start
= vm_clock_warp_start
;
462 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
464 if (warp_start
== -1) {
468 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
469 if (runstate_is_running()) {
470 int64_t clock
= REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT
,
471 cpu_get_clock_locked());
474 warp_delta
= clock
- vm_clock_warp_start
;
475 if (use_icount
== 2) {
477 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
478 * far ahead of real time.
480 int64_t cur_icount
= cpu_get_icount_locked();
481 int64_t delta
= clock
- cur_icount
;
482 warp_delta
= MIN(warp_delta
, delta
);
484 timers_state
.qemu_icount_bias
+= warp_delta
;
486 vm_clock_warp_start
= -1;
487 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
489 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
490 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
494 static void icount_timer_cb(void *opaque
)
496 /* No need for a checkpoint because the timer already synchronizes
497 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
502 void qtest_clock_warp(int64_t dest
)
504 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
505 AioContext
*aio_context
;
506 assert(qtest_enabled());
507 aio_context
= qemu_get_aio_context();
508 while (clock
< dest
) {
509 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
510 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
512 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
513 timers_state
.qemu_icount_bias
+= warp
;
514 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
516 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
517 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
518 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
520 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
523 void qemu_start_warp_timer(void)
532 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
533 * do not fire, so computing the deadline does not make sense.
535 if (!runstate_is_running()) {
539 /* warp clock deterministically in record/replay mode */
540 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
544 if (!all_cpu_threads_idle()) {
548 if (qtest_enabled()) {
549 /* When testing, qtest commands advance icount. */
553 /* We want to use the earliest deadline from ALL vm_clocks */
554 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
555 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
557 static bool notified
;
558 if (!icount_sleep
&& !notified
) {
559 error_report("WARNING: icount sleep disabled and no active timers");
567 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
568 * sleep. Otherwise, the CPU might be waiting for a future timer
569 * interrupt to wake it up, but the interrupt never comes because
570 * the vCPU isn't running any insns and thus doesn't advance the
571 * QEMU_CLOCK_VIRTUAL.
575 * We never let VCPUs sleep in no sleep icount mode.
576 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
577 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
578 * It is useful when we want a deterministic execution time,
579 * isolated from host latencies.
581 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
582 timers_state
.qemu_icount_bias
+= deadline
;
583 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
584 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
587 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
588 * "real" time, (related to the time left until the next event) has
589 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
590 * This avoids that the warps are visible externally; for example,
591 * you will not be sending network packets continuously instead of
594 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
595 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
596 vm_clock_warp_start
= clock
;
598 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
599 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
601 } else if (deadline
== 0) {
602 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
606 static void qemu_account_warp_timer(void)
608 if (!use_icount
|| !icount_sleep
) {
612 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
613 * do not fire, so computing the deadline does not make sense.
615 if (!runstate_is_running()) {
619 /* warp clock deterministically in record/replay mode */
620 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
624 timer_del(icount_warp_timer
);
628 static bool icount_state_needed(void *opaque
)
634 * This is a subsection for icount migration.
636 static const VMStateDescription icount_vmstate_timers
= {
637 .name
= "timer/icount",
639 .minimum_version_id
= 1,
640 .needed
= icount_state_needed
,
641 .fields
= (VMStateField
[]) {
642 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
643 VMSTATE_INT64(qemu_icount
, TimersState
),
644 VMSTATE_END_OF_LIST()
648 static const VMStateDescription vmstate_timers
= {
651 .minimum_version_id
= 1,
652 .fields
= (VMStateField
[]) {
653 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
654 VMSTATE_INT64(dummy
, TimersState
),
655 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
656 VMSTATE_END_OF_LIST()
658 .subsections
= (const VMStateDescription
*[]) {
659 &icount_vmstate_timers
,
664 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
667 double throttle_ratio
;
670 if (!cpu_throttle_get_percentage()) {
674 pct
= (double)cpu_throttle_get_percentage()/100;
675 throttle_ratio
= pct
/ (1 - pct
);
676 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
678 qemu_mutex_unlock_iothread();
679 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
680 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
681 qemu_mutex_lock_iothread();
684 static void cpu_throttle_timer_tick(void *opaque
)
689 /* Stop the timer if needed */
690 if (!cpu_throttle_get_percentage()) {
694 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
695 async_run_on_cpu(cpu
, cpu_throttle_thread
,
700 pct
= (double)cpu_throttle_get_percentage()/100;
701 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
702 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
705 void cpu_throttle_set(int new_throttle_pct
)
707 /* Ensure throttle percentage is within valid range */
708 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
709 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
711 atomic_set(&throttle_percentage
, new_throttle_pct
);
713 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
714 CPU_THROTTLE_TIMESLICE_NS
);
717 void cpu_throttle_stop(void)
719 atomic_set(&throttle_percentage
, 0);
722 bool cpu_throttle_active(void)
724 return (cpu_throttle_get_percentage() != 0);
727 int cpu_throttle_get_percentage(void)
729 return atomic_read(&throttle_percentage
);
732 void cpu_ticks_init(void)
734 seqlock_init(&timers_state
.vm_clock_seqlock
);
735 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
736 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
737 cpu_throttle_timer_tick
, NULL
);
740 void configure_icount(QemuOpts
*opts
, Error
**errp
)
743 char *rem_str
= NULL
;
745 option
= qemu_opt_get(opts
, "shift");
747 if (qemu_opt_get(opts
, "align") != NULL
) {
748 error_setg(errp
, "Please specify shift option when using align");
753 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
755 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
756 icount_timer_cb
, NULL
);
759 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
761 if (icount_align_option
&& !icount_sleep
) {
762 error_setg(errp
, "align=on and sleep=off are incompatible");
764 if (strcmp(option
, "auto") != 0) {
766 icount_time_shift
= strtol(option
, &rem_str
, 0);
767 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
768 error_setg(errp
, "icount: Invalid shift value");
772 } else if (icount_align_option
) {
773 error_setg(errp
, "shift=auto and align=on are incompatible");
774 } else if (!icount_sleep
) {
775 error_setg(errp
, "shift=auto and sleep=off are incompatible");
780 /* 125MIPS seems a reasonable initial guess at the guest speed.
781 It will be corrected fairly quickly anyway. */
782 icount_time_shift
= 3;
784 /* Have both realtime and virtual time triggers for speed adjustment.
785 The realtime trigger catches emulated time passing too slowly,
786 the virtual time trigger catches emulated time passing too fast.
787 Realtime triggers occur even when idle, so use them less frequently
789 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
790 icount_adjust_rt
, NULL
);
791 timer_mod(icount_rt_timer
,
792 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
793 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
794 icount_adjust_vm
, NULL
);
795 timer_mod(icount_vm_timer
,
796 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
797 NANOSECONDS_PER_SECOND
/ 10);
800 /***********************************************************/
801 /* TCG vCPU kick timer
803 * The kick timer is responsible for moving single threaded vCPU
804 * emulation on to the next vCPU. If more than one vCPU is running a
805 * timer event with force a cpu->exit so the next vCPU can get
808 * The timer is removed if all vCPUs are idle and restarted again once
809 * idleness is complete.
812 static QEMUTimer
*tcg_kick_vcpu_timer
;
813 static CPUState
*tcg_current_rr_cpu
;
815 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
817 static inline int64_t qemu_tcg_next_kick(void)
819 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
822 /* Kick the currently round-robin scheduled vCPU */
823 static void qemu_cpu_kick_rr_cpu(void)
827 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
831 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
834 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
838 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
840 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
845 if (!qemu_in_vcpu_thread() && first_cpu
) {
846 /* qemu_cpu_kick is not enough to kick a halted CPU out of
847 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
848 * causes cpu_thread_is_idle to return false. This way,
849 * handle_icount_deadline can run.
851 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
855 static void kick_tcg_thread(void *opaque
)
857 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
858 qemu_cpu_kick_rr_cpu();
861 static void start_tcg_kick_timer(void)
863 if (!mttcg_enabled
&& !tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
864 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
865 kick_tcg_thread
, NULL
);
866 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
870 static void stop_tcg_kick_timer(void)
872 if (tcg_kick_vcpu_timer
) {
873 timer_del(tcg_kick_vcpu_timer
);
874 tcg_kick_vcpu_timer
= NULL
;
878 /***********************************************************/
879 void hw_error(const char *fmt
, ...)
885 fprintf(stderr
, "qemu: hardware error: ");
886 vfprintf(stderr
, fmt
, ap
);
887 fprintf(stderr
, "\n");
889 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
890 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
896 void cpu_synchronize_all_states(void)
901 cpu_synchronize_state(cpu
);
905 void cpu_synchronize_all_post_reset(void)
910 cpu_synchronize_post_reset(cpu
);
914 void cpu_synchronize_all_post_init(void)
919 cpu_synchronize_post_init(cpu
);
923 static int do_vm_stop(RunState state
)
927 if (runstate_is_running()) {
931 vm_state_notify(0, state
);
932 qapi_event_send_stop(&error_abort
);
936 replay_disable_events();
937 ret
= bdrv_flush_all();
942 static bool cpu_can_run(CPUState
*cpu
)
947 if (cpu_is_stopped(cpu
)) {
953 static void cpu_handle_guest_debug(CPUState
*cpu
)
955 gdb_set_stop_cpu(cpu
);
956 qemu_system_debug_request();
961 static void sigbus_reraise(void)
964 struct sigaction action
;
966 memset(&action
, 0, sizeof(action
));
967 action
.sa_handler
= SIG_DFL
;
968 if (!sigaction(SIGBUS
, &action
, NULL
)) {
971 sigaddset(&set
, SIGBUS
);
972 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
974 perror("Failed to re-raise SIGBUS!\n");
978 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
980 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
985 /* Called asynchronously in VCPU thread. */
986 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
990 /* Called synchronously (via signalfd) in main thread. */
991 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
997 static void qemu_init_sigbus(void)
999 struct sigaction action
;
1001 memset(&action
, 0, sizeof(action
));
1002 action
.sa_flags
= SA_SIGINFO
;
1003 action
.sa_sigaction
= sigbus_handler
;
1004 sigaction(SIGBUS
, &action
, NULL
);
1006 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
1008 #else /* !CONFIG_LINUX */
1009 static void qemu_init_sigbus(void)
1012 #endif /* !CONFIG_LINUX */
1014 static QemuMutex qemu_global_mutex
;
1016 static QemuThread io_thread
;
1019 static QemuCond qemu_cpu_cond
;
1021 static QemuCond qemu_pause_cond
;
1023 void qemu_init_cpu_loop(void)
1026 qemu_cond_init(&qemu_cpu_cond
);
1027 qemu_cond_init(&qemu_pause_cond
);
1028 qemu_mutex_init(&qemu_global_mutex
);
1030 qemu_thread_get_self(&io_thread
);
1033 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1035 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1038 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1040 if (kvm_destroy_vcpu(cpu
) < 0) {
1041 error_report("kvm_destroy_vcpu failed");
1046 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1050 static void qemu_wait_io_event_common(CPUState
*cpu
)
1052 atomic_mb_set(&cpu
->thread_kicked
, false);
1055 cpu
->stopped
= true;
1056 qemu_cond_broadcast(&qemu_pause_cond
);
1058 process_queued_cpu_work(cpu
);
1061 static bool qemu_tcg_should_sleep(CPUState
*cpu
)
1063 if (mttcg_enabled
) {
1064 return cpu_thread_is_idle(cpu
);
1066 return all_cpu_threads_idle();
1070 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
1072 while (qemu_tcg_should_sleep(cpu
)) {
1073 stop_tcg_kick_timer();
1074 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1077 start_tcg_kick_timer();
1079 qemu_wait_io_event_common(cpu
);
1082 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
1084 while (cpu_thread_is_idle(cpu
)) {
1085 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1088 qemu_wait_io_event_common(cpu
);
1091 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1093 CPUState
*cpu
= arg
;
1096 rcu_register_thread();
1098 qemu_mutex_lock_iothread();
1099 qemu_thread_get_self(cpu
->thread
);
1100 cpu
->thread_id
= qemu_get_thread_id();
1104 r
= kvm_init_vcpu(cpu
);
1106 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1110 kvm_init_cpu_signals(cpu
);
1112 /* signal CPU creation */
1113 cpu
->created
= true;
1114 qemu_cond_signal(&qemu_cpu_cond
);
1117 if (cpu_can_run(cpu
)) {
1118 r
= kvm_cpu_exec(cpu
);
1119 if (r
== EXCP_DEBUG
) {
1120 cpu_handle_guest_debug(cpu
);
1123 qemu_kvm_wait_io_event(cpu
);
1124 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1126 qemu_kvm_destroy_vcpu(cpu
);
1127 cpu
->created
= false;
1128 qemu_cond_signal(&qemu_cpu_cond
);
1129 qemu_mutex_unlock_iothread();
1133 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1136 fprintf(stderr
, "qtest is not supported under Windows\n");
1139 CPUState
*cpu
= arg
;
1143 rcu_register_thread();
1145 qemu_mutex_lock_iothread();
1146 qemu_thread_get_self(cpu
->thread
);
1147 cpu
->thread_id
= qemu_get_thread_id();
1151 sigemptyset(&waitset
);
1152 sigaddset(&waitset
, SIG_IPI
);
1154 /* signal CPU creation */
1155 cpu
->created
= true;
1156 qemu_cond_signal(&qemu_cpu_cond
);
1159 qemu_mutex_unlock_iothread();
1162 r
= sigwait(&waitset
, &sig
);
1163 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1168 qemu_mutex_lock_iothread();
1169 qemu_wait_io_event_common(cpu
);
1176 static int64_t tcg_get_icount_limit(void)
1180 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1181 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1183 /* Maintain prior (possibly buggy) behaviour where if no deadline
1184 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1185 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1188 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1189 deadline
= INT32_MAX
;
1192 return qemu_icount_round(deadline
);
1194 return replay_get_instructions();
1198 static void handle_icount_deadline(void)
1200 assert(qemu_in_vcpu_thread());
1203 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1205 if (deadline
== 0) {
1206 /* Wake up other AioContexts. */
1207 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1208 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1213 static void prepare_icount_for_run(CPUState
*cpu
)
1218 /* These should always be cleared by process_icount_data after
1219 * each vCPU execution. However u16.high can be raised
1220 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1222 g_assert(cpu
->icount_decr
.u16
.low
== 0);
1223 g_assert(cpu
->icount_extra
== 0);
1225 cpu
->icount_budget
= tcg_get_icount_limit();
1226 insns_left
= MIN(0xffff, cpu
->icount_budget
);
1227 cpu
->icount_decr
.u16
.low
= insns_left
;
1228 cpu
->icount_extra
= cpu
->icount_budget
- insns_left
;
1232 static void process_icount_data(CPUState
*cpu
)
1235 /* Account for executed instructions */
1236 cpu_update_icount(cpu
);
1238 /* Reset the counters */
1239 cpu
->icount_decr
.u16
.low
= 0;
1240 cpu
->icount_extra
= 0;
1241 cpu
->icount_budget
= 0;
1243 replay_account_executed_instructions();
1248 static int tcg_cpu_exec(CPUState
*cpu
)
1251 #ifdef CONFIG_PROFILER
1255 #ifdef CONFIG_PROFILER
1256 ti
= profile_getclock();
1258 qemu_mutex_unlock_iothread();
1259 cpu_exec_start(cpu
);
1260 ret
= cpu_exec(cpu
);
1262 qemu_mutex_lock_iothread();
1263 #ifdef CONFIG_PROFILER
1264 tcg_time
+= profile_getclock() - ti
;
1269 /* Destroy any remaining vCPUs which have been unplugged and have
1272 static void deal_with_unplugged_cpus(void)
1277 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1278 qemu_tcg_destroy_vcpu(cpu
);
1279 cpu
->created
= false;
1280 qemu_cond_signal(&qemu_cpu_cond
);
1286 /* Single-threaded TCG
1288 * In the single-threaded case each vCPU is simulated in turn. If
1289 * there is more than a single vCPU we create a simple timer to kick
1290 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1291 * This is done explicitly rather than relying on side-effects
1295 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1297 CPUState
*cpu
= arg
;
1299 rcu_register_thread();
1301 qemu_mutex_lock_iothread();
1302 qemu_thread_get_self(cpu
->thread
);
1305 cpu
->thread_id
= qemu_get_thread_id();
1306 cpu
->created
= true;
1309 qemu_cond_signal(&qemu_cpu_cond
);
1311 /* wait for initial kick-off after machine start */
1312 while (first_cpu
->stopped
) {
1313 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1315 /* process any pending work */
1318 qemu_wait_io_event_common(cpu
);
1322 start_tcg_kick_timer();
1326 /* process any pending work */
1327 cpu
->exit_request
= 1;
1330 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1331 qemu_account_warp_timer();
1333 /* Run the timers here. This is much more efficient than
1334 * waking up the I/O thread and waiting for completion.
1336 handle_icount_deadline();
1342 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1344 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1347 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1348 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1350 if (cpu_can_run(cpu
)) {
1353 prepare_icount_for_run(cpu
);
1355 r
= tcg_cpu_exec(cpu
);
1357 process_icount_data(cpu
);
1359 if (r
== EXCP_DEBUG
) {
1360 cpu_handle_guest_debug(cpu
);
1362 } else if (r
== EXCP_ATOMIC
) {
1363 qemu_mutex_unlock_iothread();
1364 cpu_exec_step_atomic(cpu
);
1365 qemu_mutex_lock_iothread();
1368 } else if (cpu
->stop
) {
1370 cpu
= CPU_NEXT(cpu
);
1375 cpu
= CPU_NEXT(cpu
);
1376 } /* while (cpu && !cpu->exit_request).. */
1378 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1379 atomic_set(&tcg_current_rr_cpu
, NULL
);
1381 if (cpu
&& cpu
->exit_request
) {
1382 atomic_mb_set(&cpu
->exit_request
, 0);
1385 qemu_tcg_wait_io_event(cpu
? cpu
: QTAILQ_FIRST(&cpus
));
1386 deal_with_unplugged_cpus();
1392 static void *qemu_hax_cpu_thread_fn(void *arg
)
1394 CPUState
*cpu
= arg
;
1397 qemu_mutex_lock_iothread();
1398 qemu_thread_get_self(cpu
->thread
);
1400 cpu
->thread_id
= qemu_get_thread_id();
1401 cpu
->created
= true;
1406 qemu_cond_signal(&qemu_cpu_cond
);
1409 if (cpu_can_run(cpu
)) {
1410 r
= hax_smp_cpu_exec(cpu
);
1411 if (r
== EXCP_DEBUG
) {
1412 cpu_handle_guest_debug(cpu
);
1416 while (cpu_thread_is_idle(cpu
)) {
1417 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1422 qemu_wait_io_event_common(cpu
);
1428 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1433 /* Multi-threaded TCG
1435 * In the multi-threaded case each vCPU has its own thread. The TLS
1436 * variable current_cpu can be used deep in the code to find the
1437 * current CPUState for a given thread.
1440 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1442 CPUState
*cpu
= arg
;
1444 g_assert(!use_icount
);
1446 rcu_register_thread();
1448 qemu_mutex_lock_iothread();
1449 qemu_thread_get_self(cpu
->thread
);
1451 cpu
->thread_id
= qemu_get_thread_id();
1452 cpu
->created
= true;
1455 qemu_cond_signal(&qemu_cpu_cond
);
1457 /* process any pending work */
1458 cpu
->exit_request
= 1;
1461 if (cpu_can_run(cpu
)) {
1463 r
= tcg_cpu_exec(cpu
);
1466 cpu_handle_guest_debug(cpu
);
1469 /* during start-up the vCPU is reset and the thread is
1470 * kicked several times. If we don't ensure we go back
1471 * to sleep in the halted state we won't cleanly
1472 * start-up when the vCPU is enabled.
1474 * cpu->halted should ensure we sleep in wait_io_event
1476 g_assert(cpu
->halted
);
1479 qemu_mutex_unlock_iothread();
1480 cpu_exec_step_atomic(cpu
);
1481 qemu_mutex_lock_iothread();
1483 /* Ignore everything else? */
1488 atomic_mb_set(&cpu
->exit_request
, 0);
1489 qemu_tcg_wait_io_event(cpu
);
1495 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1500 if (cpu
->thread_kicked
) {
1503 cpu
->thread_kicked
= true;
1504 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1506 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1510 if (!qemu_cpu_is_self(cpu
)) {
1511 if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1512 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1513 __func__
, GetLastError());
1520 void qemu_cpu_kick(CPUState
*cpu
)
1522 qemu_cond_broadcast(cpu
->halt_cond
);
1523 if (tcg_enabled()) {
1525 /* NOP unless doing single-thread RR */
1526 qemu_cpu_kick_rr_cpu();
1528 if (hax_enabled()) {
1530 * FIXME: race condition with the exit_request check in
1533 cpu
->exit_request
= 1;
1535 qemu_cpu_kick_thread(cpu
);
1539 void qemu_cpu_kick_self(void)
1541 assert(current_cpu
);
1542 qemu_cpu_kick_thread(current_cpu
);
1545 bool qemu_cpu_is_self(CPUState
*cpu
)
1547 return qemu_thread_is_self(cpu
->thread
);
1550 bool qemu_in_vcpu_thread(void)
1552 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1555 static __thread
bool iothread_locked
= false;
1557 bool qemu_mutex_iothread_locked(void)
1559 return iothread_locked
;
1562 void qemu_mutex_lock_iothread(void)
1564 g_assert(!qemu_mutex_iothread_locked());
1565 qemu_mutex_lock(&qemu_global_mutex
);
1566 iothread_locked
= true;
1569 void qemu_mutex_unlock_iothread(void)
1571 g_assert(qemu_mutex_iothread_locked());
1572 iothread_locked
= false;
1573 qemu_mutex_unlock(&qemu_global_mutex
);
1576 static bool all_vcpus_paused(void)
1581 if (!cpu
->stopped
) {
1589 void pause_all_vcpus(void)
1593 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1599 if (qemu_in_vcpu_thread()) {
1603 while (!all_vcpus_paused()) {
1604 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1611 void cpu_resume(CPUState
*cpu
)
1614 cpu
->stopped
= false;
1618 void resume_all_vcpus(void)
1622 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1628 void cpu_remove(CPUState
*cpu
)
1635 void cpu_remove_sync(CPUState
*cpu
)
1638 while (cpu
->created
) {
1639 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1643 /* For temporary buffers for forming a name */
1644 #define VCPU_THREAD_NAME_SIZE 16
1646 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1648 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1649 static QemuCond
*single_tcg_halt_cond
;
1650 static QemuThread
*single_tcg_cpu_thread
;
1652 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1653 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1654 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1655 qemu_cond_init(cpu
->halt_cond
);
1657 if (qemu_tcg_mttcg_enabled()) {
1658 /* create a thread per vCPU with TCG (MTTCG) */
1659 parallel_cpus
= true;
1660 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1663 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1664 cpu
, QEMU_THREAD_JOINABLE
);
1667 /* share a single thread for all cpus with TCG */
1668 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1669 qemu_thread_create(cpu
->thread
, thread_name
,
1670 qemu_tcg_rr_cpu_thread_fn
,
1671 cpu
, QEMU_THREAD_JOINABLE
);
1673 single_tcg_halt_cond
= cpu
->halt_cond
;
1674 single_tcg_cpu_thread
= cpu
->thread
;
1677 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1679 while (!cpu
->created
) {
1680 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1683 /* For non-MTTCG cases we share the thread */
1684 cpu
->thread
= single_tcg_cpu_thread
;
1685 cpu
->halt_cond
= single_tcg_halt_cond
;
1689 static void qemu_hax_start_vcpu(CPUState
*cpu
)
1691 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1693 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1694 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1695 qemu_cond_init(cpu
->halt_cond
);
1697 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
1699 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
1700 cpu
, QEMU_THREAD_JOINABLE
);
1702 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1704 while (!cpu
->created
) {
1705 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1709 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1711 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1713 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1714 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1715 qemu_cond_init(cpu
->halt_cond
);
1716 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1718 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1719 cpu
, QEMU_THREAD_JOINABLE
);
1720 while (!cpu
->created
) {
1721 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1725 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1727 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1729 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1730 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1731 qemu_cond_init(cpu
->halt_cond
);
1732 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1734 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1735 QEMU_THREAD_JOINABLE
);
1736 while (!cpu
->created
) {
1737 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1741 void qemu_init_vcpu(CPUState
*cpu
)
1743 cpu
->nr_cores
= smp_cores
;
1744 cpu
->nr_threads
= smp_threads
;
1745 cpu
->stopped
= true;
1748 /* If the target cpu hasn't set up any address spaces itself,
1749 * give it the default one.
1751 AddressSpace
*as
= address_space_init_shareable(cpu
->memory
,
1754 cpu_address_space_init(cpu
, as
, 0);
1757 if (kvm_enabled()) {
1758 qemu_kvm_start_vcpu(cpu
);
1759 } else if (hax_enabled()) {
1760 qemu_hax_start_vcpu(cpu
);
1761 } else if (tcg_enabled()) {
1762 qemu_tcg_init_vcpu(cpu
);
1764 qemu_dummy_start_vcpu(cpu
);
1768 void cpu_stop_current(void)
1771 current_cpu
->stop
= false;
1772 current_cpu
->stopped
= true;
1773 cpu_exit(current_cpu
);
1774 qemu_cond_broadcast(&qemu_pause_cond
);
1778 int vm_stop(RunState state
)
1780 if (qemu_in_vcpu_thread()) {
1781 qemu_system_vmstop_request_prepare();
1782 qemu_system_vmstop_request(state
);
1784 * FIXME: should not return to device code in case
1785 * vm_stop() has been requested.
1791 return do_vm_stop(state
);
1795 * Prepare for (re)starting the VM.
1796 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1797 * running or in case of an error condition), 0 otherwise.
1799 int vm_prepare_start(void)
1804 qemu_vmstop_requested(&requested
);
1805 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
1809 /* Ensure that a STOP/RESUME pair of events is emitted if a
1810 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1811 * example, according to documentation is always followed by
1814 if (runstate_is_running()) {
1815 qapi_event_send_stop(&error_abort
);
1818 replay_enable_events();
1820 runstate_set(RUN_STATE_RUNNING
);
1821 vm_state_notify(1, RUN_STATE_RUNNING
);
1824 /* We are sending this now, but the CPUs will be resumed shortly later */
1825 qapi_event_send_resume(&error_abort
);
1831 if (!vm_prepare_start()) {
1836 /* does a state transition even if the VM is already stopped,
1837 current state is forgotten forever */
1838 int vm_stop_force_state(RunState state
)
1840 if (runstate_is_running()) {
1841 return vm_stop(state
);
1843 runstate_set(state
);
1846 /* Make sure to return an error if the flush in a previous vm_stop()
1848 return bdrv_flush_all();
1852 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1854 /* XXX: implement xxx_cpu_list for targets that still miss it */
1855 #if defined(cpu_list)
1856 cpu_list(f
, cpu_fprintf
);
1860 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1862 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1867 #if defined(TARGET_I386)
1868 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1869 CPUX86State
*env
= &x86_cpu
->env
;
1870 #elif defined(TARGET_PPC)
1871 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1872 CPUPPCState
*env
= &ppc_cpu
->env
;
1873 #elif defined(TARGET_SPARC)
1874 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1875 CPUSPARCState
*env
= &sparc_cpu
->env
;
1876 #elif defined(TARGET_MIPS)
1877 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1878 CPUMIPSState
*env
= &mips_cpu
->env
;
1879 #elif defined(TARGET_TRICORE)
1880 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1881 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1884 cpu_synchronize_state(cpu
);
1886 info
= g_malloc0(sizeof(*info
));
1887 info
->value
= g_malloc0(sizeof(*info
->value
));
1888 info
->value
->CPU
= cpu
->cpu_index
;
1889 info
->value
->current
= (cpu
== first_cpu
);
1890 info
->value
->halted
= cpu
->halted
;
1891 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1892 info
->value
->thread_id
= cpu
->thread_id
;
1893 #if defined(TARGET_I386)
1894 info
->value
->arch
= CPU_INFO_ARCH_X86
;
1895 info
->value
->u
.x86
.pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1896 #elif defined(TARGET_PPC)
1897 info
->value
->arch
= CPU_INFO_ARCH_PPC
;
1898 info
->value
->u
.ppc
.nip
= env
->nip
;
1899 #elif defined(TARGET_SPARC)
1900 info
->value
->arch
= CPU_INFO_ARCH_SPARC
;
1901 info
->value
->u
.q_sparc
.pc
= env
->pc
;
1902 info
->value
->u
.q_sparc
.npc
= env
->npc
;
1903 #elif defined(TARGET_MIPS)
1904 info
->value
->arch
= CPU_INFO_ARCH_MIPS
;
1905 info
->value
->u
.q_mips
.PC
= env
->active_tc
.PC
;
1906 #elif defined(TARGET_TRICORE)
1907 info
->value
->arch
= CPU_INFO_ARCH_TRICORE
;
1908 info
->value
->u
.tricore
.PC
= env
->PC
;
1910 info
->value
->arch
= CPU_INFO_ARCH_OTHER
;
1913 /* XXX: waiting for the qapi to support GSList */
1915 head
= cur_item
= info
;
1917 cur_item
->next
= info
;
1925 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1926 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1932 int64_t orig_addr
= addr
, orig_size
= size
;
1938 cpu
= qemu_get_cpu(cpu_index
);
1940 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1945 f
= fopen(filename
, "wb");
1947 error_setg_file_open(errp
, errno
, filename
);
1955 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1956 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1957 " specified", orig_addr
, orig_size
);
1960 if (fwrite(buf
, 1, l
, f
) != l
) {
1961 error_setg(errp
, QERR_IO_ERROR
);
1972 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1979 f
= fopen(filename
, "wb");
1981 error_setg_file_open(errp
, errno
, filename
);
1989 cpu_physical_memory_read(addr
, buf
, l
);
1990 if (fwrite(buf
, 1, l
, f
) != l
) {
1991 error_setg(errp
, QERR_IO_ERROR
);
2002 void qmp_inject_nmi(Error
**errp
)
2004 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
2007 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
2013 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
2014 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
2015 if (icount_align_option
) {
2016 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
2017 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
2019 cpu_fprintf(f
, "Max guest delay NA\n");
2020 cpu_fprintf(f
, "Max guest advance NA\n");