4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
45 #include "qemu/compatfd.h"
50 #include <sys/prctl.h>
53 #define PR_MCE_KILL 33
56 #ifndef PR_MCE_KILL_SET
57 #define PR_MCE_KILL_SET 1
60 #ifndef PR_MCE_KILL_EARLY
61 #define PR_MCE_KILL_EARLY 1
64 #endif /* CONFIG_LINUX */
66 static CPUState
*next_cpu
;
70 bool cpu_is_stopped(CPUState
*cpu
)
72 return cpu
->stopped
|| !runstate_is_running();
75 static bool cpu_thread_is_idle(CPUState
*cpu
)
77 if (cpu
->stop
|| cpu
->queued_work_first
) {
80 if (cpu_is_stopped(cpu
)) {
83 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
84 kvm_halt_in_kernel()) {
90 static bool all_cpu_threads_idle(void)
95 if (!cpu_thread_is_idle(cpu
)) {
102 /***********************************************************/
103 /* guest cycle counter */
105 /* Protected by TimersState seqlock */
107 static int64_t vm_clock_warp_start
= -1;
108 /* Conversion factor from emulated instructions to virtual clock ticks. */
109 static int icount_time_shift
;
110 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
111 #define MAX_ICOUNT_SHIFT 10
113 static QEMUTimer
*icount_rt_timer
;
114 static QEMUTimer
*icount_vm_timer
;
115 static QEMUTimer
*icount_warp_timer
;
117 typedef struct TimersState
{
118 /* Protected by BQL. */
119 int64_t cpu_ticks_prev
;
120 int64_t cpu_ticks_offset
;
122 /* cpu_clock_offset can be read out of BQL, so protect it with
125 QemuSeqLock vm_clock_seqlock
;
126 int64_t cpu_clock_offset
;
127 int32_t cpu_ticks_enabled
;
130 /* Compensate for varying guest execution speed. */
131 int64_t qemu_icount_bias
;
132 /* Only written by TCG thread */
136 static TimersState timers_state
;
138 /* Return the virtual CPU time, based on the instruction counter. */
139 static int64_t cpu_get_icount_locked(void)
142 CPUState
*cpu
= current_cpu
;
144 icount
= timers_state
.qemu_icount
;
146 if (!cpu_can_do_io(cpu
)) {
147 fprintf(stderr
, "Bad clock read\n");
149 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
151 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
154 int64_t cpu_get_icount(void)
160 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
161 icount
= cpu_get_icount_locked();
162 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
167 int64_t cpu_icount_to_ns(int64_t icount
)
169 return icount
<< icount_time_shift
;
172 /* return the host CPU cycle counter and handle stop/restart */
173 /* Caller must hold the BQL */
174 int64_t cpu_get_ticks(void)
179 return cpu_get_icount();
182 ticks
= timers_state
.cpu_ticks_offset
;
183 if (timers_state
.cpu_ticks_enabled
) {
184 ticks
+= cpu_get_real_ticks();
187 if (timers_state
.cpu_ticks_prev
> ticks
) {
188 /* Note: non increasing ticks may happen if the host uses
190 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
191 ticks
= timers_state
.cpu_ticks_prev
;
194 timers_state
.cpu_ticks_prev
= ticks
;
198 static int64_t cpu_get_clock_locked(void)
202 ticks
= timers_state
.cpu_clock_offset
;
203 if (timers_state
.cpu_ticks_enabled
) {
204 ticks
+= get_clock();
210 /* return the host CPU monotonic timer and handle stop/restart */
211 int64_t cpu_get_clock(void)
217 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
218 ti
= cpu_get_clock_locked();
219 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
224 /* return the offset between the host clock and virtual CPU clock */
225 int64_t cpu_get_clock_offset(void)
231 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
232 ti
= timers_state
.cpu_clock_offset
;
233 if (!timers_state
.cpu_ticks_enabled
) {
236 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
241 /* enable cpu_get_ticks()
242 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
244 void cpu_enable_ticks(void)
246 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
247 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
248 if (!timers_state
.cpu_ticks_enabled
) {
249 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
250 timers_state
.cpu_clock_offset
-= get_clock();
251 timers_state
.cpu_ticks_enabled
= 1;
253 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
256 /* disable cpu_get_ticks() : the clock is stopped. You must not call
257 * cpu_get_ticks() after that.
258 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
260 void cpu_disable_ticks(void)
262 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
263 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
264 if (timers_state
.cpu_ticks_enabled
) {
265 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
266 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
267 timers_state
.cpu_ticks_enabled
= 0;
269 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
272 /* Correlation between real and virtual time is always going to be
273 fairly approximate, so ignore small variation.
274 When the guest is idle real and virtual time will be aligned in
276 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
278 static void icount_adjust(void)
284 /* Protected by TimersState mutex. */
285 static int64_t last_delta
;
287 /* If the VM is not running, then do nothing. */
288 if (!runstate_is_running()) {
292 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
293 cur_time
= cpu_get_clock_locked();
294 cur_icount
= cpu_get_icount_locked();
296 delta
= cur_icount
- cur_time
;
297 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
299 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
300 && icount_time_shift
> 0) {
301 /* The guest is getting too far ahead. Slow time down. */
305 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
306 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
307 /* The guest is getting too far behind. Speed time up. */
311 timers_state
.qemu_icount_bias
= cur_icount
312 - (timers_state
.qemu_icount
<< icount_time_shift
);
313 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
316 static void icount_adjust_rt(void *opaque
)
318 timer_mod(icount_rt_timer
,
319 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
323 static void icount_adjust_vm(void *opaque
)
325 timer_mod(icount_vm_timer
,
326 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
327 get_ticks_per_sec() / 10);
331 static int64_t qemu_icount_round(int64_t count
)
333 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
336 static void icount_warp_rt(void *opaque
)
338 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
339 * changes from -1 to another value, so the race here is okay.
341 if (atomic_read(&vm_clock_warp_start
) == -1) {
345 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
346 if (runstate_is_running()) {
347 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
350 warp_delta
= clock
- vm_clock_warp_start
;
351 if (use_icount
== 2) {
353 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
354 * far ahead of real time.
356 int64_t cur_time
= cpu_get_clock_locked();
357 int64_t cur_icount
= cpu_get_icount_locked();
358 int64_t delta
= cur_time
- cur_icount
;
359 warp_delta
= MIN(warp_delta
, delta
);
361 timers_state
.qemu_icount_bias
+= warp_delta
;
363 vm_clock_warp_start
= -1;
364 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
366 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
367 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
371 void qtest_clock_warp(int64_t dest
)
373 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
374 assert(qtest_enabled());
375 while (clock
< dest
) {
376 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
377 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
378 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
379 timers_state
.qemu_icount_bias
+= warp
;
380 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
382 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
383 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
385 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
388 void qemu_clock_warp(QEMUClockType type
)
394 * There are too many global variables to make the "warp" behavior
395 * applicable to other clocks. But a clock argument removes the
396 * need for if statements all over the place.
398 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
403 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
404 * This ensures that the deadline for the timer is computed correctly below.
405 * This also makes sure that the insn counter is synchronized before the
406 * CPU starts running, in case the CPU is woken by an event other than
407 * the earliest QEMU_CLOCK_VIRTUAL timer.
409 icount_warp_rt(NULL
);
410 timer_del(icount_warp_timer
);
411 if (!all_cpu_threads_idle()) {
415 if (qtest_enabled()) {
416 /* When testing, qtest commands advance icount. */
420 /* We want to use the earliest deadline from ALL vm_clocks */
421 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
422 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
429 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
430 * sleep. Otherwise, the CPU might be waiting for a future timer
431 * interrupt to wake it up, but the interrupt never comes because
432 * the vCPU isn't running any insns and thus doesn't advance the
433 * QEMU_CLOCK_VIRTUAL.
435 * An extreme solution for this problem would be to never let VCPUs
436 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
437 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
438 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
439 * after some e"real" time, (related to the time left until the next
440 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
441 * This avoids that the warps are visible externally; for example,
442 * you will not be sending network packets continuously instead of
445 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
446 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
447 vm_clock_warp_start
= clock
;
449 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
450 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
451 } else if (deadline
== 0) {
452 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
456 static bool icount_state_needed(void *opaque
)
462 * This is a subsection for icount migration.
464 static const VMStateDescription icount_vmstate_timers
= {
465 .name
= "timer/icount",
467 .minimum_version_id
= 1,
468 .fields
= (VMStateField
[]) {
469 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
470 VMSTATE_INT64(qemu_icount
, TimersState
),
471 VMSTATE_END_OF_LIST()
475 static const VMStateDescription vmstate_timers
= {
478 .minimum_version_id
= 1,
479 .fields
= (VMStateField
[]) {
480 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
481 VMSTATE_INT64(dummy
, TimersState
),
482 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
483 VMSTATE_END_OF_LIST()
485 .subsections
= (VMStateSubsection
[]) {
487 .vmsd
= &icount_vmstate_timers
,
488 .needed
= icount_state_needed
,
495 void configure_icount(QemuOpts
*opts
, Error
**errp
)
498 char *rem_str
= NULL
;
500 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
501 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
502 option
= qemu_opt_get(opts
, "shift");
504 if (qemu_opt_get(opts
, "align") != NULL
) {
505 error_setg(errp
, "Please specify shift option when using align");
509 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
510 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
511 icount_warp_rt
, NULL
);
512 if (strcmp(option
, "auto") != 0) {
514 icount_time_shift
= strtol(option
, &rem_str
, 0);
515 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
516 error_setg(errp
, "icount: Invalid shift value");
520 } else if (icount_align_option
) {
521 error_setg(errp
, "shift=auto and align=on are incompatible");
526 /* 125MIPS seems a reasonable initial guess at the guest speed.
527 It will be corrected fairly quickly anyway. */
528 icount_time_shift
= 3;
530 /* Have both realtime and virtual time triggers for speed adjustment.
531 The realtime trigger catches emulated time passing too slowly,
532 the virtual time trigger catches emulated time passing too fast.
533 Realtime triggers occur even when idle, so use them less frequently
535 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
536 icount_adjust_rt
, NULL
);
537 timer_mod(icount_rt_timer
,
538 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
539 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
540 icount_adjust_vm
, NULL
);
541 timer_mod(icount_vm_timer
,
542 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
543 get_ticks_per_sec() / 10);
546 /***********************************************************/
547 void hw_error(const char *fmt
, ...)
553 fprintf(stderr
, "qemu: hardware error: ");
554 vfprintf(stderr
, fmt
, ap
);
555 fprintf(stderr
, "\n");
557 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
558 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
564 void cpu_synchronize_all_states(void)
569 cpu_synchronize_state(cpu
);
573 void cpu_synchronize_all_post_reset(void)
578 cpu_synchronize_post_reset(cpu
);
582 void cpu_synchronize_all_post_init(void)
587 cpu_synchronize_post_init(cpu
);
591 static int do_vm_stop(RunState state
)
595 if (runstate_is_running()) {
599 vm_state_notify(0, state
);
600 qapi_event_send_stop(&error_abort
);
604 ret
= bdrv_flush_all();
609 static bool cpu_can_run(CPUState
*cpu
)
614 if (cpu_is_stopped(cpu
)) {
620 static void cpu_handle_guest_debug(CPUState
*cpu
)
622 gdb_set_stop_cpu(cpu
);
623 qemu_system_debug_request();
627 static void cpu_signal(int sig
)
630 cpu_exit(current_cpu
);
636 static void sigbus_reraise(void)
639 struct sigaction action
;
641 memset(&action
, 0, sizeof(action
));
642 action
.sa_handler
= SIG_DFL
;
643 if (!sigaction(SIGBUS
, &action
, NULL
)) {
646 sigaddset(&set
, SIGBUS
);
647 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
649 perror("Failed to re-raise SIGBUS!\n");
653 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
656 if (kvm_on_sigbus(siginfo
->ssi_code
,
657 (void *)(intptr_t)siginfo
->ssi_addr
)) {
662 static void qemu_init_sigbus(void)
664 struct sigaction action
;
666 memset(&action
, 0, sizeof(action
));
667 action
.sa_flags
= SA_SIGINFO
;
668 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
669 sigaction(SIGBUS
, &action
, NULL
);
671 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
674 static void qemu_kvm_eat_signals(CPUState
*cpu
)
676 struct timespec ts
= { 0, 0 };
682 sigemptyset(&waitset
);
683 sigaddset(&waitset
, SIG_IPI
);
684 sigaddset(&waitset
, SIGBUS
);
687 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
688 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
689 perror("sigtimedwait");
695 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
703 r
= sigpending(&chkset
);
705 perror("sigpending");
708 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
711 #else /* !CONFIG_LINUX */
713 static void qemu_init_sigbus(void)
717 static void qemu_kvm_eat_signals(CPUState
*cpu
)
720 #endif /* !CONFIG_LINUX */
723 static void dummy_signal(int sig
)
727 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
731 struct sigaction sigact
;
733 memset(&sigact
, 0, sizeof(sigact
));
734 sigact
.sa_handler
= dummy_signal
;
735 sigaction(SIG_IPI
, &sigact
, NULL
);
737 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
738 sigdelset(&set
, SIG_IPI
);
739 sigdelset(&set
, SIGBUS
);
740 r
= kvm_set_signal_mask(cpu
, &set
);
742 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
747 static void qemu_tcg_init_cpu_signals(void)
750 struct sigaction sigact
;
752 memset(&sigact
, 0, sizeof(sigact
));
753 sigact
.sa_handler
= cpu_signal
;
754 sigaction(SIG_IPI
, &sigact
, NULL
);
757 sigaddset(&set
, SIG_IPI
);
758 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
762 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
767 static void qemu_tcg_init_cpu_signals(void)
772 static QemuMutex qemu_global_mutex
;
773 static QemuCond qemu_io_proceeded_cond
;
774 static bool iothread_requesting_mutex
;
776 static QemuThread io_thread
;
778 static QemuThread
*tcg_cpu_thread
;
779 static QemuCond
*tcg_halt_cond
;
782 static QemuCond qemu_cpu_cond
;
784 static QemuCond qemu_pause_cond
;
785 static QemuCond qemu_work_cond
;
787 void qemu_init_cpu_loop(void)
790 qemu_cond_init(&qemu_cpu_cond
);
791 qemu_cond_init(&qemu_pause_cond
);
792 qemu_cond_init(&qemu_work_cond
);
793 qemu_cond_init(&qemu_io_proceeded_cond
);
794 qemu_mutex_init(&qemu_global_mutex
);
796 qemu_thread_get_self(&io_thread
);
799 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
801 struct qemu_work_item wi
;
803 if (qemu_cpu_is_self(cpu
)) {
811 if (cpu
->queued_work_first
== NULL
) {
812 cpu
->queued_work_first
= &wi
;
814 cpu
->queued_work_last
->next
= &wi
;
816 cpu
->queued_work_last
= &wi
;
822 CPUState
*self_cpu
= current_cpu
;
824 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
825 current_cpu
= self_cpu
;
829 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
831 struct qemu_work_item
*wi
;
833 if (qemu_cpu_is_self(cpu
)) {
838 wi
= g_malloc0(sizeof(struct qemu_work_item
));
842 if (cpu
->queued_work_first
== NULL
) {
843 cpu
->queued_work_first
= wi
;
845 cpu
->queued_work_last
->next
= wi
;
847 cpu
->queued_work_last
= wi
;
854 static void flush_queued_work(CPUState
*cpu
)
856 struct qemu_work_item
*wi
;
858 if (cpu
->queued_work_first
== NULL
) {
862 while ((wi
= cpu
->queued_work_first
)) {
863 cpu
->queued_work_first
= wi
->next
;
870 cpu
->queued_work_last
= NULL
;
871 qemu_cond_broadcast(&qemu_work_cond
);
874 static void qemu_wait_io_event_common(CPUState
*cpu
)
879 qemu_cond_signal(&qemu_pause_cond
);
881 flush_queued_work(cpu
);
882 cpu
->thread_kicked
= false;
885 static void qemu_tcg_wait_io_event(void)
889 while (all_cpu_threads_idle()) {
890 /* Start accounting real time to the virtual clock if the CPUs
892 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
893 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
896 while (iothread_requesting_mutex
) {
897 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
901 qemu_wait_io_event_common(cpu
);
905 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
907 while (cpu_thread_is_idle(cpu
)) {
908 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
911 qemu_kvm_eat_signals(cpu
);
912 qemu_wait_io_event_common(cpu
);
915 static void *qemu_kvm_cpu_thread_fn(void *arg
)
920 qemu_mutex_lock(&qemu_global_mutex
);
921 qemu_thread_get_self(cpu
->thread
);
922 cpu
->thread_id
= qemu_get_thread_id();
925 r
= kvm_init_vcpu(cpu
);
927 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
931 qemu_kvm_init_cpu_signals(cpu
);
933 /* signal CPU creation */
935 qemu_cond_signal(&qemu_cpu_cond
);
938 if (cpu_can_run(cpu
)) {
939 r
= kvm_cpu_exec(cpu
);
940 if (r
== EXCP_DEBUG
) {
941 cpu_handle_guest_debug(cpu
);
944 qemu_kvm_wait_io_event(cpu
);
950 static void *qemu_dummy_cpu_thread_fn(void *arg
)
953 fprintf(stderr
, "qtest is not supported under Windows\n");
960 qemu_mutex_lock_iothread();
961 qemu_thread_get_self(cpu
->thread
);
962 cpu
->thread_id
= qemu_get_thread_id();
964 sigemptyset(&waitset
);
965 sigaddset(&waitset
, SIG_IPI
);
967 /* signal CPU creation */
969 qemu_cond_signal(&qemu_cpu_cond
);
974 qemu_mutex_unlock_iothread();
977 r
= sigwait(&waitset
, &sig
);
978 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
983 qemu_mutex_lock_iothread();
985 qemu_wait_io_event_common(cpu
);
992 static void tcg_exec_all(void);
994 static void *qemu_tcg_cpu_thread_fn(void *arg
)
998 qemu_tcg_init_cpu_signals();
999 qemu_thread_get_self(cpu
->thread
);
1001 qemu_mutex_lock(&qemu_global_mutex
);
1003 cpu
->thread_id
= qemu_get_thread_id();
1004 cpu
->created
= true;
1006 qemu_cond_signal(&qemu_cpu_cond
);
1008 /* wait for initial kick-off after machine start */
1009 while (QTAILQ_FIRST(&cpus
)->stopped
) {
1010 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1012 /* process any pending work */
1014 qemu_wait_io_event_common(cpu
);
1022 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1024 if (deadline
== 0) {
1025 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1028 qemu_tcg_wait_io_event();
1034 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1039 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1041 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1045 if (!qemu_cpu_is_self(cpu
)) {
1048 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1049 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1054 /* On multi-core systems, we are not sure that the thread is actually
1055 * suspended until we can get the context.
1057 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1058 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1064 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1065 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1073 void qemu_cpu_kick(CPUState
*cpu
)
1075 qemu_cond_broadcast(cpu
->halt_cond
);
1076 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1077 qemu_cpu_kick_thread(cpu
);
1078 cpu
->thread_kicked
= true;
1082 void qemu_cpu_kick_self(void)
1085 assert(current_cpu
);
1087 if (!current_cpu
->thread_kicked
) {
1088 qemu_cpu_kick_thread(current_cpu
);
1089 current_cpu
->thread_kicked
= true;
1096 bool qemu_cpu_is_self(CPUState
*cpu
)
1098 return qemu_thread_is_self(cpu
->thread
);
1101 static bool qemu_in_vcpu_thread(void)
1103 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1106 void qemu_mutex_lock_iothread(void)
1108 if (!tcg_enabled()) {
1109 qemu_mutex_lock(&qemu_global_mutex
);
1111 iothread_requesting_mutex
= true;
1112 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1113 qemu_cpu_kick_thread(first_cpu
);
1114 qemu_mutex_lock(&qemu_global_mutex
);
1116 iothread_requesting_mutex
= false;
1117 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1121 void qemu_mutex_unlock_iothread(void)
1123 qemu_mutex_unlock(&qemu_global_mutex
);
1126 static int all_vcpus_paused(void)
1131 if (!cpu
->stopped
) {
1139 void pause_all_vcpus(void)
1143 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1149 if (qemu_in_vcpu_thread()) {
1151 if (!kvm_enabled()) {
1154 cpu
->stopped
= true;
1160 while (!all_vcpus_paused()) {
1161 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1168 void cpu_resume(CPUState
*cpu
)
1171 cpu
->stopped
= false;
1175 void resume_all_vcpus(void)
1179 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1185 /* For temporary buffers for forming a name */
1186 #define VCPU_THREAD_NAME_SIZE 16
1188 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1190 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1192 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1194 /* share a single thread for all cpus with TCG */
1195 if (!tcg_cpu_thread
) {
1196 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1197 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1198 qemu_cond_init(cpu
->halt_cond
);
1199 tcg_halt_cond
= cpu
->halt_cond
;
1200 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1202 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1203 cpu
, QEMU_THREAD_JOINABLE
);
1205 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1207 while (!cpu
->created
) {
1208 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1210 tcg_cpu_thread
= cpu
->thread
;
1212 cpu
->thread
= tcg_cpu_thread
;
1213 cpu
->halt_cond
= tcg_halt_cond
;
1217 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1219 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1221 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1222 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1223 qemu_cond_init(cpu
->halt_cond
);
1224 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1226 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1227 cpu
, QEMU_THREAD_JOINABLE
);
1228 while (!cpu
->created
) {
1229 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1233 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1235 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1237 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1238 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1239 qemu_cond_init(cpu
->halt_cond
);
1240 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1242 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1243 QEMU_THREAD_JOINABLE
);
1244 while (!cpu
->created
) {
1245 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1249 void qemu_init_vcpu(CPUState
*cpu
)
1251 cpu
->nr_cores
= smp_cores
;
1252 cpu
->nr_threads
= smp_threads
;
1253 cpu
->stopped
= true;
1254 if (kvm_enabled()) {
1255 qemu_kvm_start_vcpu(cpu
);
1256 } else if (tcg_enabled()) {
1257 qemu_tcg_init_vcpu(cpu
);
1259 qemu_dummy_start_vcpu(cpu
);
1263 void cpu_stop_current(void)
1266 current_cpu
->stop
= false;
1267 current_cpu
->stopped
= true;
1268 cpu_exit(current_cpu
);
1269 qemu_cond_signal(&qemu_pause_cond
);
1273 int vm_stop(RunState state
)
1275 if (qemu_in_vcpu_thread()) {
1276 qemu_system_vmstop_request_prepare();
1277 qemu_system_vmstop_request(state
);
1279 * FIXME: should not return to device code in case
1280 * vm_stop() has been requested.
1286 return do_vm_stop(state
);
1289 /* does a state transition even if the VM is already stopped,
1290 current state is forgotten forever */
1291 int vm_stop_force_state(RunState state
)
1293 if (runstate_is_running()) {
1294 return vm_stop(state
);
1296 runstate_set(state
);
1297 /* Make sure to return an error if the flush in a previous vm_stop()
1299 return bdrv_flush_all();
1303 static int tcg_cpu_exec(CPUArchState
*env
)
1305 CPUState
*cpu
= ENV_GET_CPU(env
);
1307 #ifdef CONFIG_PROFILER
1311 #ifdef CONFIG_PROFILER
1312 ti
= profile_getclock();
1318 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1319 + cpu
->icount_extra
);
1320 cpu
->icount_decr
.u16
.low
= 0;
1321 cpu
->icount_extra
= 0;
1322 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1324 /* Maintain prior (possibly buggy) behaviour where if no deadline
1325 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1326 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1329 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1330 deadline
= INT32_MAX
;
1333 count
= qemu_icount_round(deadline
);
1334 timers_state
.qemu_icount
+= count
;
1335 decr
= (count
> 0xffff) ? 0xffff : count
;
1337 cpu
->icount_decr
.u16
.low
= decr
;
1338 cpu
->icount_extra
= count
;
1340 ret
= cpu_exec(env
);
1341 #ifdef CONFIG_PROFILER
1342 qemu_time
+= profile_getclock() - ti
;
1345 /* Fold pending instructions back into the
1346 instruction counter, and clear the interrupt flag. */
1347 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1348 + cpu
->icount_extra
);
1349 cpu
->icount_decr
.u32
= 0;
1350 cpu
->icount_extra
= 0;
1355 static void tcg_exec_all(void)
1359 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1360 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1362 if (next_cpu
== NULL
) {
1363 next_cpu
= first_cpu
;
1365 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1366 CPUState
*cpu
= next_cpu
;
1367 CPUArchState
*env
= cpu
->env_ptr
;
1369 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1370 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1372 if (cpu_can_run(cpu
)) {
1373 r
= tcg_cpu_exec(env
);
1374 if (r
== EXCP_DEBUG
) {
1375 cpu_handle_guest_debug(cpu
);
1378 } else if (cpu
->stop
|| cpu
->stopped
) {
1385 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1387 /* XXX: implement xxx_cpu_list for targets that still miss it */
1388 #if defined(cpu_list)
1389 cpu_list(f
, cpu_fprintf
);
1393 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1395 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1400 #if defined(TARGET_I386)
1401 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1402 CPUX86State
*env
= &x86_cpu
->env
;
1403 #elif defined(TARGET_PPC)
1404 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1405 CPUPPCState
*env
= &ppc_cpu
->env
;
1406 #elif defined(TARGET_SPARC)
1407 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1408 CPUSPARCState
*env
= &sparc_cpu
->env
;
1409 #elif defined(TARGET_MIPS)
1410 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1411 CPUMIPSState
*env
= &mips_cpu
->env
;
1414 cpu_synchronize_state(cpu
);
1416 info
= g_malloc0(sizeof(*info
));
1417 info
->value
= g_malloc0(sizeof(*info
->value
));
1418 info
->value
->CPU
= cpu
->cpu_index
;
1419 info
->value
->current
= (cpu
== first_cpu
);
1420 info
->value
->halted
= cpu
->halted
;
1421 info
->value
->thread_id
= cpu
->thread_id
;
1422 #if defined(TARGET_I386)
1423 info
->value
->has_pc
= true;
1424 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1425 #elif defined(TARGET_PPC)
1426 info
->value
->has_nip
= true;
1427 info
->value
->nip
= env
->nip
;
1428 #elif defined(TARGET_SPARC)
1429 info
->value
->has_pc
= true;
1430 info
->value
->pc
= env
->pc
;
1431 info
->value
->has_npc
= true;
1432 info
->value
->npc
= env
->npc
;
1433 #elif defined(TARGET_MIPS)
1434 info
->value
->has_PC
= true;
1435 info
->value
->PC
= env
->active_tc
.PC
;
1438 /* XXX: waiting for the qapi to support GSList */
1440 head
= cur_item
= info
;
1442 cur_item
->next
= info
;
1450 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1451 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1462 cpu
= qemu_get_cpu(cpu_index
);
1464 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1469 f
= fopen(filename
, "wb");
1471 error_setg_file_open(errp
, errno
, filename
);
1479 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1480 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1483 if (fwrite(buf
, 1, l
, f
) != l
) {
1484 error_set(errp
, QERR_IO_ERROR
);
1495 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1502 f
= fopen(filename
, "wb");
1504 error_setg_file_open(errp
, errno
, filename
);
1512 cpu_physical_memory_read(addr
, buf
, l
);
1513 if (fwrite(buf
, 1, l
, f
) != l
) {
1514 error_set(errp
, QERR_IO_ERROR
);
1525 void qmp_inject_nmi(Error
**errp
)
1527 #if defined(TARGET_I386)
1531 X86CPU
*cpu
= X86_CPU(cs
);
1533 if (!cpu
->apic_state
) {
1534 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1536 apic_deliver_nmi(cpu
->apic_state
);
1539 #elif defined(TARGET_S390X)
1545 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1546 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1547 error_set(errp
, QERR_UNSUPPORTED
);
1554 error_set(errp
, QERR_UNSUPPORTED
);
1558 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1564 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1565 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1566 if (icount_align_option
) {
1567 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1568 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1570 cpu_fprintf(f
, "Max guest delay NA\n");
1571 cpu_fprintf(f
, "Max guest advance NA\n");