4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/kvm.h"
37 #include "qmp-commands.h"
38 #include "exec/exec-all.h"
40 #include "qemu/thread.h"
41 #include "sysemu/cpus.h"
42 #include "sysemu/qtest.h"
43 #include "qemu/main-loop.h"
44 #include "qemu/bitmap.h"
45 #include "qemu/seqlock.h"
46 #include "qapi-event.h"
48 #include "sysemu/replay.h"
51 #include "qemu/compatfd.h"
56 #include <sys/prctl.h>
59 #define PR_MCE_KILL 33
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
70 #endif /* CONFIG_LINUX */
72 static CPUState
*next_cpu
;
76 /* vcpu throttling controls */
77 static QEMUTimer
*throttle_timer
;
78 static unsigned int throttle_percentage
;
80 #define CPU_THROTTLE_PCT_MIN 1
81 #define CPU_THROTTLE_PCT_MAX 99
82 #define CPU_THROTTLE_TIMESLICE_NS 10000000
84 bool cpu_is_stopped(CPUState
*cpu
)
86 return cpu
->stopped
|| !runstate_is_running();
89 static bool cpu_thread_is_idle(CPUState
*cpu
)
91 if (cpu
->stop
|| cpu
->queued_work_first
) {
94 if (cpu_is_stopped(cpu
)) {
97 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
98 kvm_halt_in_kernel()) {
104 static bool all_cpu_threads_idle(void)
109 if (!cpu_thread_is_idle(cpu
)) {
116 /***********************************************************/
117 /* guest cycle counter */
119 /* Protected by TimersState seqlock */
121 static bool icount_sleep
= true;
122 static int64_t vm_clock_warp_start
= -1;
123 /* Conversion factor from emulated instructions to virtual clock ticks. */
124 static int icount_time_shift
;
125 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126 #define MAX_ICOUNT_SHIFT 10
128 static QEMUTimer
*icount_rt_timer
;
129 static QEMUTimer
*icount_vm_timer
;
130 static QEMUTimer
*icount_warp_timer
;
132 typedef struct TimersState
{
133 /* Protected by BQL. */
134 int64_t cpu_ticks_prev
;
135 int64_t cpu_ticks_offset
;
137 /* cpu_clock_offset can be read out of BQL, so protect it with
140 QemuSeqLock vm_clock_seqlock
;
141 int64_t cpu_clock_offset
;
142 int32_t cpu_ticks_enabled
;
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias
;
147 /* Only written by TCG thread */
151 static TimersState timers_state
;
153 int64_t cpu_get_icount_raw(void)
156 CPUState
*cpu
= current_cpu
;
158 icount
= timers_state
.qemu_icount
;
160 if (!cpu
->can_do_io
) {
161 fprintf(stderr
, "Bad icount read\n");
164 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
169 /* Return the virtual CPU time, based on the instruction counter. */
170 static int64_t cpu_get_icount_locked(void)
172 int64_t icount
= cpu_get_icount_raw();
173 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
176 int64_t cpu_get_icount(void)
182 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
183 icount
= cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
189 int64_t cpu_icount_to_ns(int64_t icount
)
191 return icount
<< icount_time_shift
;
194 /* return the host CPU cycle counter and handle stop/restart */
195 /* Caller must hold the BQL */
196 int64_t cpu_get_ticks(void)
201 return cpu_get_icount();
204 ticks
= timers_state
.cpu_ticks_offset
;
205 if (timers_state
.cpu_ticks_enabled
) {
206 ticks
+= cpu_get_host_ticks();
209 if (timers_state
.cpu_ticks_prev
> ticks
) {
210 /* Note: non increasing ticks may happen if the host uses
212 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
213 ticks
= timers_state
.cpu_ticks_prev
;
216 timers_state
.cpu_ticks_prev
= ticks
;
220 static int64_t cpu_get_clock_locked(void)
224 ticks
= timers_state
.cpu_clock_offset
;
225 if (timers_state
.cpu_ticks_enabled
) {
226 ticks
+= get_clock();
232 /* return the host CPU monotonic timer and handle stop/restart */
233 int64_t cpu_get_clock(void)
239 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
240 ti
= cpu_get_clock_locked();
241 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
246 /* enable cpu_get_ticks()
247 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
249 void cpu_enable_ticks(void)
251 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
252 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
253 if (!timers_state
.cpu_ticks_enabled
) {
254 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
255 timers_state
.cpu_clock_offset
-= get_clock();
256 timers_state
.cpu_ticks_enabled
= 1;
258 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
261 /* disable cpu_get_ticks() : the clock is stopped. You must not call
262 * cpu_get_ticks() after that.
263 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
265 void cpu_disable_ticks(void)
267 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
268 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
269 if (timers_state
.cpu_ticks_enabled
) {
270 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
271 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
272 timers_state
.cpu_ticks_enabled
= 0;
274 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
277 /* Correlation between real and virtual time is always going to be
278 fairly approximate, so ignore small variation.
279 When the guest is idle real and virtual time will be aligned in
281 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
283 static void icount_adjust(void)
289 /* Protected by TimersState mutex. */
290 static int64_t last_delta
;
292 /* If the VM is not running, then do nothing. */
293 if (!runstate_is_running()) {
297 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
298 cur_time
= cpu_get_clock_locked();
299 cur_icount
= cpu_get_icount_locked();
301 delta
= cur_icount
- cur_time
;
302 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
304 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
305 && icount_time_shift
> 0) {
306 /* The guest is getting too far ahead. Slow time down. */
310 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
311 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
312 /* The guest is getting too far behind. Speed time up. */
316 timers_state
.qemu_icount_bias
= cur_icount
317 - (timers_state
.qemu_icount
<< icount_time_shift
);
318 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
321 static void icount_adjust_rt(void *opaque
)
323 timer_mod(icount_rt_timer
,
324 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
328 static void icount_adjust_vm(void *opaque
)
330 timer_mod(icount_vm_timer
,
331 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
332 NANOSECONDS_PER_SECOND
/ 10);
336 static int64_t qemu_icount_round(int64_t count
)
338 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
341 static void icount_warp_rt(void)
346 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
347 * changes from -1 to another value, so the race here is okay.
350 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
351 warp_start
= vm_clock_warp_start
;
352 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
354 if (warp_start
== -1) {
358 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
359 if (runstate_is_running()) {
360 int64_t clock
= REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT
,
361 cpu_get_clock_locked());
364 warp_delta
= clock
- vm_clock_warp_start
;
365 if (use_icount
== 2) {
367 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
368 * far ahead of real time.
370 int64_t cur_icount
= cpu_get_icount_locked();
371 int64_t delta
= clock
- cur_icount
;
372 warp_delta
= MIN(warp_delta
, delta
);
374 timers_state
.qemu_icount_bias
+= warp_delta
;
376 vm_clock_warp_start
= -1;
377 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
379 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
384 static void icount_timer_cb(void *opaque
)
386 /* No need for a checkpoint because the timer already synchronizes
387 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
392 void qtest_clock_warp(int64_t dest
)
394 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
395 AioContext
*aio_context
;
396 assert(qtest_enabled());
397 aio_context
= qemu_get_aio_context();
398 while (clock
< dest
) {
399 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
400 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
402 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
403 timers_state
.qemu_icount_bias
+= warp
;
404 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
406 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
407 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
408 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
410 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
413 void qemu_start_warp_timer(void)
422 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
423 * do not fire, so computing the deadline does not make sense.
425 if (!runstate_is_running()) {
429 /* warp clock deterministically in record/replay mode */
430 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
434 if (!all_cpu_threads_idle()) {
438 if (qtest_enabled()) {
439 /* When testing, qtest commands advance icount. */
443 /* We want to use the earliest deadline from ALL vm_clocks */
444 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
445 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
447 static bool notified
;
448 if (!icount_sleep
&& !notified
) {
449 error_report("WARNING: icount sleep disabled and no active timers");
457 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
458 * sleep. Otherwise, the CPU might be waiting for a future timer
459 * interrupt to wake it up, but the interrupt never comes because
460 * the vCPU isn't running any insns and thus doesn't advance the
461 * QEMU_CLOCK_VIRTUAL.
465 * We never let VCPUs sleep in no sleep icount mode.
466 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
467 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
468 * It is useful when we want a deterministic execution time,
469 * isolated from host latencies.
471 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
472 timers_state
.qemu_icount_bias
+= deadline
;
473 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
474 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
477 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
478 * "real" time, (related to the time left until the next event) has
479 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
480 * This avoids that the warps are visible externally; for example,
481 * you will not be sending network packets continuously instead of
484 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
485 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
486 vm_clock_warp_start
= clock
;
488 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
489 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
491 } else if (deadline
== 0) {
492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
496 static void qemu_account_warp_timer(void)
498 if (!use_icount
|| !icount_sleep
) {
502 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
503 * do not fire, so computing the deadline does not make sense.
505 if (!runstate_is_running()) {
509 /* warp clock deterministically in record/replay mode */
510 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
514 timer_del(icount_warp_timer
);
518 static bool icount_state_needed(void *opaque
)
524 * This is a subsection for icount migration.
526 static const VMStateDescription icount_vmstate_timers
= {
527 .name
= "timer/icount",
529 .minimum_version_id
= 1,
530 .needed
= icount_state_needed
,
531 .fields
= (VMStateField
[]) {
532 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
533 VMSTATE_INT64(qemu_icount
, TimersState
),
534 VMSTATE_END_OF_LIST()
538 static const VMStateDescription vmstate_timers
= {
541 .minimum_version_id
= 1,
542 .fields
= (VMStateField
[]) {
543 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
544 VMSTATE_INT64(dummy
, TimersState
),
545 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
546 VMSTATE_END_OF_LIST()
548 .subsections
= (const VMStateDescription
*[]) {
549 &icount_vmstate_timers
,
554 static void cpu_throttle_thread(void *opaque
)
556 CPUState
*cpu
= opaque
;
558 double throttle_ratio
;
561 if (!cpu_throttle_get_percentage()) {
565 pct
= (double)cpu_throttle_get_percentage()/100;
566 throttle_ratio
= pct
/ (1 - pct
);
567 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
569 qemu_mutex_unlock_iothread();
570 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
571 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
572 qemu_mutex_lock_iothread();
575 static void cpu_throttle_timer_tick(void *opaque
)
580 /* Stop the timer if needed */
581 if (!cpu_throttle_get_percentage()) {
585 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
586 async_run_on_cpu(cpu
, cpu_throttle_thread
, cpu
);
590 pct
= (double)cpu_throttle_get_percentage()/100;
591 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
592 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
595 void cpu_throttle_set(int new_throttle_pct
)
597 /* Ensure throttle percentage is within valid range */
598 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
599 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
601 atomic_set(&throttle_percentage
, new_throttle_pct
);
603 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
604 CPU_THROTTLE_TIMESLICE_NS
);
607 void cpu_throttle_stop(void)
609 atomic_set(&throttle_percentage
, 0);
612 bool cpu_throttle_active(void)
614 return (cpu_throttle_get_percentage() != 0);
617 int cpu_throttle_get_percentage(void)
619 return atomic_read(&throttle_percentage
);
622 void cpu_ticks_init(void)
624 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
625 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
626 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
627 cpu_throttle_timer_tick
, NULL
);
630 void configure_icount(QemuOpts
*opts
, Error
**errp
)
633 char *rem_str
= NULL
;
635 option
= qemu_opt_get(opts
, "shift");
637 if (qemu_opt_get(opts
, "align") != NULL
) {
638 error_setg(errp
, "Please specify shift option when using align");
643 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
645 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
646 icount_timer_cb
, NULL
);
649 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
651 if (icount_align_option
&& !icount_sleep
) {
652 error_setg(errp
, "align=on and sleep=off are incompatible");
654 if (strcmp(option
, "auto") != 0) {
656 icount_time_shift
= strtol(option
, &rem_str
, 0);
657 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
658 error_setg(errp
, "icount: Invalid shift value");
662 } else if (icount_align_option
) {
663 error_setg(errp
, "shift=auto and align=on are incompatible");
664 } else if (!icount_sleep
) {
665 error_setg(errp
, "shift=auto and sleep=off are incompatible");
670 /* 125MIPS seems a reasonable initial guess at the guest speed.
671 It will be corrected fairly quickly anyway. */
672 icount_time_shift
= 3;
674 /* Have both realtime and virtual time triggers for speed adjustment.
675 The realtime trigger catches emulated time passing too slowly,
676 the virtual time trigger catches emulated time passing too fast.
677 Realtime triggers occur even when idle, so use them less frequently
679 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
680 icount_adjust_rt
, NULL
);
681 timer_mod(icount_rt_timer
,
682 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
683 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
684 icount_adjust_vm
, NULL
);
685 timer_mod(icount_vm_timer
,
686 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
687 NANOSECONDS_PER_SECOND
/ 10);
690 /***********************************************************/
691 void hw_error(const char *fmt
, ...)
697 fprintf(stderr
, "qemu: hardware error: ");
698 vfprintf(stderr
, fmt
, ap
);
699 fprintf(stderr
, "\n");
701 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
702 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
708 void cpu_synchronize_all_states(void)
713 cpu_synchronize_state(cpu
);
717 void cpu_synchronize_all_post_reset(void)
722 cpu_synchronize_post_reset(cpu
);
726 void cpu_synchronize_all_post_init(void)
731 cpu_synchronize_post_init(cpu
);
735 static int do_vm_stop(RunState state
)
739 if (runstate_is_running()) {
743 vm_state_notify(0, state
);
744 qapi_event_send_stop(&error_abort
);
748 ret
= blk_flush_all();
753 static bool cpu_can_run(CPUState
*cpu
)
758 if (cpu_is_stopped(cpu
)) {
764 static void cpu_handle_guest_debug(CPUState
*cpu
)
766 gdb_set_stop_cpu(cpu
);
767 qemu_system_debug_request();
772 static void sigbus_reraise(void)
775 struct sigaction action
;
777 memset(&action
, 0, sizeof(action
));
778 action
.sa_handler
= SIG_DFL
;
779 if (!sigaction(SIGBUS
, &action
, NULL
)) {
782 sigaddset(&set
, SIGBUS
);
783 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
785 perror("Failed to re-raise SIGBUS!\n");
789 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
792 if (kvm_on_sigbus(siginfo
->ssi_code
,
793 (void *)(intptr_t)siginfo
->ssi_addr
)) {
798 static void qemu_init_sigbus(void)
800 struct sigaction action
;
802 memset(&action
, 0, sizeof(action
));
803 action
.sa_flags
= SA_SIGINFO
;
804 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
805 sigaction(SIGBUS
, &action
, NULL
);
807 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
810 static void qemu_kvm_eat_signals(CPUState
*cpu
)
812 struct timespec ts
= { 0, 0 };
818 sigemptyset(&waitset
);
819 sigaddset(&waitset
, SIG_IPI
);
820 sigaddset(&waitset
, SIGBUS
);
823 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
824 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
825 perror("sigtimedwait");
831 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
839 r
= sigpending(&chkset
);
841 perror("sigpending");
844 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
847 #else /* !CONFIG_LINUX */
849 static void qemu_init_sigbus(void)
853 static void qemu_kvm_eat_signals(CPUState
*cpu
)
856 #endif /* !CONFIG_LINUX */
859 static void dummy_signal(int sig
)
863 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
867 struct sigaction sigact
;
869 memset(&sigact
, 0, sizeof(sigact
));
870 sigact
.sa_handler
= dummy_signal
;
871 sigaction(SIG_IPI
, &sigact
, NULL
);
873 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
874 sigdelset(&set
, SIG_IPI
);
875 sigdelset(&set
, SIGBUS
);
876 r
= kvm_set_signal_mask(cpu
, &set
);
878 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
884 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
890 static QemuMutex qemu_global_mutex
;
891 static QemuCond qemu_io_proceeded_cond
;
892 static unsigned iothread_requesting_mutex
;
894 static QemuThread io_thread
;
897 static QemuCond qemu_cpu_cond
;
899 static QemuCond qemu_pause_cond
;
900 static QemuCond qemu_work_cond
;
902 void qemu_init_cpu_loop(void)
905 qemu_cond_init(&qemu_cpu_cond
);
906 qemu_cond_init(&qemu_pause_cond
);
907 qemu_cond_init(&qemu_work_cond
);
908 qemu_cond_init(&qemu_io_proceeded_cond
);
909 qemu_mutex_init(&qemu_global_mutex
);
911 qemu_thread_get_self(&io_thread
);
914 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
916 struct qemu_work_item wi
;
918 if (qemu_cpu_is_self(cpu
)) {
927 qemu_mutex_lock(&cpu
->work_mutex
);
928 if (cpu
->queued_work_first
== NULL
) {
929 cpu
->queued_work_first
= &wi
;
931 cpu
->queued_work_last
->next
= &wi
;
933 cpu
->queued_work_last
= &wi
;
936 qemu_mutex_unlock(&cpu
->work_mutex
);
939 while (!atomic_mb_read(&wi
.done
)) {
940 CPUState
*self_cpu
= current_cpu
;
942 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
943 current_cpu
= self_cpu
;
947 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
949 struct qemu_work_item
*wi
;
951 if (qemu_cpu_is_self(cpu
)) {
956 wi
= g_malloc0(sizeof(struct qemu_work_item
));
961 qemu_mutex_lock(&cpu
->work_mutex
);
962 if (cpu
->queued_work_first
== NULL
) {
963 cpu
->queued_work_first
= wi
;
965 cpu
->queued_work_last
->next
= wi
;
967 cpu
->queued_work_last
= wi
;
970 qemu_mutex_unlock(&cpu
->work_mutex
);
975 static void flush_queued_work(CPUState
*cpu
)
977 struct qemu_work_item
*wi
;
979 if (cpu
->queued_work_first
== NULL
) {
983 qemu_mutex_lock(&cpu
->work_mutex
);
984 while (cpu
->queued_work_first
!= NULL
) {
985 wi
= cpu
->queued_work_first
;
986 cpu
->queued_work_first
= wi
->next
;
987 if (!cpu
->queued_work_first
) {
988 cpu
->queued_work_last
= NULL
;
990 qemu_mutex_unlock(&cpu
->work_mutex
);
992 qemu_mutex_lock(&cpu
->work_mutex
);
996 atomic_mb_set(&wi
->done
, true);
999 qemu_mutex_unlock(&cpu
->work_mutex
);
1000 qemu_cond_broadcast(&qemu_work_cond
);
1003 static void qemu_wait_io_event_common(CPUState
*cpu
)
1007 cpu
->stopped
= true;
1008 qemu_cond_broadcast(&qemu_pause_cond
);
1010 flush_queued_work(cpu
);
1011 cpu
->thread_kicked
= false;
1014 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
1016 while (all_cpu_threads_idle()) {
1017 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1020 while (iothread_requesting_mutex
) {
1021 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
1025 qemu_wait_io_event_common(cpu
);
1029 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
1031 while (cpu_thread_is_idle(cpu
)) {
1032 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1035 qemu_kvm_eat_signals(cpu
);
1036 qemu_wait_io_event_common(cpu
);
1039 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1041 CPUState
*cpu
= arg
;
1044 rcu_register_thread();
1046 qemu_mutex_lock_iothread();
1047 qemu_thread_get_self(cpu
->thread
);
1048 cpu
->thread_id
= qemu_get_thread_id();
1052 r
= kvm_init_vcpu(cpu
);
1054 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1058 qemu_kvm_init_cpu_signals(cpu
);
1060 /* signal CPU creation */
1061 cpu
->created
= true;
1062 qemu_cond_signal(&qemu_cpu_cond
);
1065 if (cpu_can_run(cpu
)) {
1066 r
= kvm_cpu_exec(cpu
);
1067 if (r
== EXCP_DEBUG
) {
1068 cpu_handle_guest_debug(cpu
);
1071 qemu_kvm_wait_io_event(cpu
);
1077 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1080 fprintf(stderr
, "qtest is not supported under Windows\n");
1083 CPUState
*cpu
= arg
;
1087 rcu_register_thread();
1089 qemu_mutex_lock_iothread();
1090 qemu_thread_get_self(cpu
->thread
);
1091 cpu
->thread_id
= qemu_get_thread_id();
1094 sigemptyset(&waitset
);
1095 sigaddset(&waitset
, SIG_IPI
);
1097 /* signal CPU creation */
1098 cpu
->created
= true;
1099 qemu_cond_signal(&qemu_cpu_cond
);
1104 qemu_mutex_unlock_iothread();
1107 r
= sigwait(&waitset
, &sig
);
1108 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1113 qemu_mutex_lock_iothread();
1115 qemu_wait_io_event_common(cpu
);
1122 static void tcg_exec_all(void);
1124 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1126 CPUState
*cpu
= arg
;
1128 rcu_register_thread();
1130 qemu_mutex_lock_iothread();
1131 qemu_thread_get_self(cpu
->thread
);
1134 cpu
->thread_id
= qemu_get_thread_id();
1135 cpu
->created
= true;
1138 qemu_cond_signal(&qemu_cpu_cond
);
1140 /* wait for initial kick-off after machine start */
1141 while (first_cpu
->stopped
) {
1142 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1144 /* process any pending work */
1146 qemu_wait_io_event_common(cpu
);
1150 /* process any pending work */
1151 atomic_mb_set(&exit_request
, 1);
1157 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1159 if (deadline
== 0) {
1160 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1163 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus
));
1169 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1174 if (cpu
->thread_kicked
) {
1177 cpu
->thread_kicked
= true;
1178 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1180 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1188 static void qemu_cpu_kick_no_halt(void)
1191 /* Ensure whatever caused the exit has reached the CPU threads before
1192 * writing exit_request.
1194 atomic_mb_set(&exit_request
, 1);
1195 cpu
= atomic_mb_read(&tcg_current_cpu
);
1201 void qemu_cpu_kick(CPUState
*cpu
)
1203 qemu_cond_broadcast(cpu
->halt_cond
);
1204 if (tcg_enabled()) {
1205 qemu_cpu_kick_no_halt();
1207 qemu_cpu_kick_thread(cpu
);
1211 void qemu_cpu_kick_self(void)
1213 assert(current_cpu
);
1214 qemu_cpu_kick_thread(current_cpu
);
1217 bool qemu_cpu_is_self(CPUState
*cpu
)
1219 return qemu_thread_is_self(cpu
->thread
);
1222 bool qemu_in_vcpu_thread(void)
1224 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1227 static __thread
bool iothread_locked
= false;
1229 bool qemu_mutex_iothread_locked(void)
1231 return iothread_locked
;
1234 void qemu_mutex_lock_iothread(void)
1236 atomic_inc(&iothread_requesting_mutex
);
1237 /* In the simple case there is no need to bump the VCPU thread out of
1238 * TCG code execution.
1240 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1241 !first_cpu
|| !first_cpu
->created
) {
1242 qemu_mutex_lock(&qemu_global_mutex
);
1243 atomic_dec(&iothread_requesting_mutex
);
1245 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1246 qemu_cpu_kick_no_halt();
1247 qemu_mutex_lock(&qemu_global_mutex
);
1249 atomic_dec(&iothread_requesting_mutex
);
1250 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1252 iothread_locked
= true;
1255 void qemu_mutex_unlock_iothread(void)
1257 iothread_locked
= false;
1258 qemu_mutex_unlock(&qemu_global_mutex
);
1261 static int all_vcpus_paused(void)
1266 if (!cpu
->stopped
) {
1274 void pause_all_vcpus(void)
1278 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1284 if (qemu_in_vcpu_thread()) {
1286 if (!kvm_enabled()) {
1289 cpu
->stopped
= true;
1295 while (!all_vcpus_paused()) {
1296 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1303 void cpu_resume(CPUState
*cpu
)
1306 cpu
->stopped
= false;
1310 void resume_all_vcpus(void)
1314 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1320 /* For temporary buffers for forming a name */
1321 #define VCPU_THREAD_NAME_SIZE 16
1323 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1325 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1326 static QemuCond
*tcg_halt_cond
;
1327 static QemuThread
*tcg_cpu_thread
;
1329 /* share a single thread for all cpus with TCG */
1330 if (!tcg_cpu_thread
) {
1331 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1332 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1333 qemu_cond_init(cpu
->halt_cond
);
1334 tcg_halt_cond
= cpu
->halt_cond
;
1335 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1337 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1338 cpu
, QEMU_THREAD_JOINABLE
);
1340 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1342 while (!cpu
->created
) {
1343 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1345 tcg_cpu_thread
= cpu
->thread
;
1347 cpu
->thread
= tcg_cpu_thread
;
1348 cpu
->halt_cond
= tcg_halt_cond
;
1352 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1354 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1356 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1357 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1358 qemu_cond_init(cpu
->halt_cond
);
1359 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1361 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1362 cpu
, QEMU_THREAD_JOINABLE
);
1363 while (!cpu
->created
) {
1364 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1368 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1370 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1372 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1373 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1374 qemu_cond_init(cpu
->halt_cond
);
1375 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1377 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1378 QEMU_THREAD_JOINABLE
);
1379 while (!cpu
->created
) {
1380 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1384 void qemu_init_vcpu(CPUState
*cpu
)
1386 cpu
->nr_cores
= smp_cores
;
1387 cpu
->nr_threads
= smp_threads
;
1388 cpu
->stopped
= true;
1391 /* If the target cpu hasn't set up any address spaces itself,
1392 * give it the default one.
1394 AddressSpace
*as
= address_space_init_shareable(cpu
->memory
,
1397 cpu_address_space_init(cpu
, as
, 0);
1400 if (kvm_enabled()) {
1401 qemu_kvm_start_vcpu(cpu
);
1402 } else if (tcg_enabled()) {
1403 qemu_tcg_init_vcpu(cpu
);
1405 qemu_dummy_start_vcpu(cpu
);
1409 void cpu_stop_current(void)
1412 current_cpu
->stop
= false;
1413 current_cpu
->stopped
= true;
1414 cpu_exit(current_cpu
);
1415 qemu_cond_broadcast(&qemu_pause_cond
);
1419 int vm_stop(RunState state
)
1421 if (qemu_in_vcpu_thread()) {
1422 qemu_system_vmstop_request_prepare();
1423 qemu_system_vmstop_request(state
);
1425 * FIXME: should not return to device code in case
1426 * vm_stop() has been requested.
1432 return do_vm_stop(state
);
1435 /* does a state transition even if the VM is already stopped,
1436 current state is forgotten forever */
1437 int vm_stop_force_state(RunState state
)
1439 if (runstate_is_running()) {
1440 return vm_stop(state
);
1442 runstate_set(state
);
1445 /* Make sure to return an error if the flush in a previous vm_stop()
1447 return blk_flush_all();
1451 static int64_t tcg_get_icount_limit(void)
1455 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1456 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1458 /* Maintain prior (possibly buggy) behaviour where if no deadline
1459 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1460 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1463 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1464 deadline
= INT32_MAX
;
1467 return qemu_icount_round(deadline
);
1469 return replay_get_instructions();
1473 static int tcg_cpu_exec(CPUState
*cpu
)
1476 #ifdef CONFIG_PROFILER
1480 #ifdef CONFIG_PROFILER
1481 ti
= profile_getclock();
1486 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1487 + cpu
->icount_extra
);
1488 cpu
->icount_decr
.u16
.low
= 0;
1489 cpu
->icount_extra
= 0;
1490 count
= tcg_get_icount_limit();
1491 timers_state
.qemu_icount
+= count
;
1492 decr
= (count
> 0xffff) ? 0xffff : count
;
1494 cpu
->icount_decr
.u16
.low
= decr
;
1495 cpu
->icount_extra
= count
;
1497 ret
= cpu_exec(cpu
);
1498 #ifdef CONFIG_PROFILER
1499 tcg_time
+= profile_getclock() - ti
;
1502 /* Fold pending instructions back into the
1503 instruction counter, and clear the interrupt flag. */
1504 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1505 + cpu
->icount_extra
);
1506 cpu
->icount_decr
.u32
= 0;
1507 cpu
->icount_extra
= 0;
1508 replay_account_executed_instructions();
1513 static void tcg_exec_all(void)
1517 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1518 qemu_account_warp_timer();
1520 if (next_cpu
== NULL
) {
1521 next_cpu
= first_cpu
;
1523 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1524 CPUState
*cpu
= next_cpu
;
1526 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1527 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1529 if (cpu_can_run(cpu
)) {
1530 r
= tcg_cpu_exec(cpu
);
1531 if (r
== EXCP_DEBUG
) {
1532 cpu_handle_guest_debug(cpu
);
1535 } else if (cpu
->stop
|| cpu
->stopped
) {
1540 /* Pairs with smp_wmb in qemu_cpu_kick. */
1541 atomic_mb_set(&exit_request
, 0);
1544 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1546 /* XXX: implement xxx_cpu_list for targets that still miss it */
1547 #if defined(cpu_list)
1548 cpu_list(f
, cpu_fprintf
);
1552 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1554 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1559 #if defined(TARGET_I386)
1560 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1561 CPUX86State
*env
= &x86_cpu
->env
;
1562 #elif defined(TARGET_PPC)
1563 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1564 CPUPPCState
*env
= &ppc_cpu
->env
;
1565 #elif defined(TARGET_SPARC)
1566 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1567 CPUSPARCState
*env
= &sparc_cpu
->env
;
1568 #elif defined(TARGET_MIPS)
1569 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1570 CPUMIPSState
*env
= &mips_cpu
->env
;
1571 #elif defined(TARGET_TRICORE)
1572 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1573 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1576 cpu_synchronize_state(cpu
);
1578 info
= g_malloc0(sizeof(*info
));
1579 info
->value
= g_malloc0(sizeof(*info
->value
));
1580 info
->value
->CPU
= cpu
->cpu_index
;
1581 info
->value
->current
= (cpu
== first_cpu
);
1582 info
->value
->halted
= cpu
->halted
;
1583 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1584 info
->value
->thread_id
= cpu
->thread_id
;
1585 #if defined(TARGET_I386)
1586 info
->value
->arch
= CPU_INFO_ARCH_X86
;
1587 info
->value
->u
.x86
.pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1588 #elif defined(TARGET_PPC)
1589 info
->value
->arch
= CPU_INFO_ARCH_PPC
;
1590 info
->value
->u
.ppc
.nip
= env
->nip
;
1591 #elif defined(TARGET_SPARC)
1592 info
->value
->arch
= CPU_INFO_ARCH_SPARC
;
1593 info
->value
->u
.q_sparc
.pc
= env
->pc
;
1594 info
->value
->u
.q_sparc
.npc
= env
->npc
;
1595 #elif defined(TARGET_MIPS)
1596 info
->value
->arch
= CPU_INFO_ARCH_MIPS
;
1597 info
->value
->u
.q_mips
.PC
= env
->active_tc
.PC
;
1598 #elif defined(TARGET_TRICORE)
1599 info
->value
->arch
= CPU_INFO_ARCH_TRICORE
;
1600 info
->value
->u
.tricore
.PC
= env
->PC
;
1602 info
->value
->arch
= CPU_INFO_ARCH_OTHER
;
1605 /* XXX: waiting for the qapi to support GSList */
1607 head
= cur_item
= info
;
1609 cur_item
->next
= info
;
1617 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1618 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1624 int64_t orig_addr
= addr
, orig_size
= size
;
1630 cpu
= qemu_get_cpu(cpu_index
);
1632 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1637 f
= fopen(filename
, "wb");
1639 error_setg_file_open(errp
, errno
, filename
);
1647 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1648 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1649 " specified", orig_addr
, orig_size
);
1652 if (fwrite(buf
, 1, l
, f
) != l
) {
1653 error_setg(errp
, QERR_IO_ERROR
);
1664 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1671 f
= fopen(filename
, "wb");
1673 error_setg_file_open(errp
, errno
, filename
);
1681 cpu_physical_memory_read(addr
, buf
, l
);
1682 if (fwrite(buf
, 1, l
, f
) != l
) {
1683 error_setg(errp
, QERR_IO_ERROR
);
1694 void qmp_inject_nmi(Error
**errp
)
1696 #if defined(TARGET_I386)
1700 X86CPU
*cpu
= X86_CPU(cs
);
1702 if (!cpu
->apic_state
) {
1703 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1705 apic_deliver_nmi(cpu
->apic_state
);
1709 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1713 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1719 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1720 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1721 if (icount_align_option
) {
1722 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1723 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1725 cpu_fprintf(f
, "Max guest delay NA\n");
1726 cpu_fprintf(f
, "Max guest advance NA\n");