4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
42 #include "qemu/compatfd.h"
47 #include <sys/prctl.h>
50 #define PR_MCE_KILL 33
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
61 #endif /* CONFIG_LINUX */
63 static CPUState
*next_cpu
;
65 bool cpu_is_stopped(CPUState
*cpu
)
67 return cpu
->stopped
|| !runstate_is_running();
70 static bool cpu_thread_is_idle(CPUState
*cpu
)
72 if (cpu
->stop
|| cpu
->queued_work_first
) {
75 if (cpu_is_stopped(cpu
)) {
78 if (!cpu
->halted
|| qemu_cpu_has_work(cpu
) ||
79 kvm_halt_in_kernel()) {
85 static bool all_cpu_threads_idle(void)
89 for (cpu
= first_cpu
; cpu
!= NULL
; cpu
= cpu
->next_cpu
) {
90 if (!cpu_thread_is_idle(cpu
)) {
97 /***********************************************************/
98 /* guest cycle counter */
100 /* Conversion factor from emulated instructions to virtual clock ticks. */
101 static int icount_time_shift
;
102 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
103 #define MAX_ICOUNT_SHIFT 10
104 /* Compensate for varying guest execution speed. */
105 static int64_t qemu_icount_bias
;
106 static QEMUTimer
*icount_rt_timer
;
107 static QEMUTimer
*icount_vm_timer
;
108 static QEMUTimer
*icount_warp_timer
;
109 static int64_t vm_clock_warp_start
;
110 static int64_t qemu_icount
;
112 typedef struct TimersState
{
113 int64_t cpu_ticks_prev
;
114 int64_t cpu_ticks_offset
;
115 int64_t cpu_clock_offset
;
116 int32_t cpu_ticks_enabled
;
120 static TimersState timers_state
;
122 /* Return the virtual CPU time, based on the instruction counter. */
123 int64_t cpu_get_icount(void)
126 CPUState
*cpu
= current_cpu
;
128 icount
= qemu_icount
;
130 CPUArchState
*env
= cpu
->env_ptr
;
131 if (!can_do_io(env
)) {
132 fprintf(stderr
, "Bad clock read\n");
134 icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
136 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
139 /* return the host CPU cycle counter and handle stop/restart */
140 int64_t cpu_get_ticks(void)
143 return cpu_get_icount();
145 if (!timers_state
.cpu_ticks_enabled
) {
146 return timers_state
.cpu_ticks_offset
;
149 ticks
= cpu_get_real_ticks();
150 if (timers_state
.cpu_ticks_prev
> ticks
) {
151 /* Note: non increasing ticks may happen if the host uses
153 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
155 timers_state
.cpu_ticks_prev
= ticks
;
156 return ticks
+ timers_state
.cpu_ticks_offset
;
160 /* return the host CPU monotonic timer and handle stop/restart */
161 int64_t cpu_get_clock(void)
164 if (!timers_state
.cpu_ticks_enabled
) {
165 return timers_state
.cpu_clock_offset
;
168 return ti
+ timers_state
.cpu_clock_offset
;
172 /* enable cpu_get_ticks() */
173 void cpu_enable_ticks(void)
175 if (!timers_state
.cpu_ticks_enabled
) {
176 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
177 timers_state
.cpu_clock_offset
-= get_clock();
178 timers_state
.cpu_ticks_enabled
= 1;
182 /* disable cpu_get_ticks() : the clock is stopped. You must not call
183 cpu_get_ticks() after that. */
184 void cpu_disable_ticks(void)
186 if (timers_state
.cpu_ticks_enabled
) {
187 timers_state
.cpu_ticks_offset
= cpu_get_ticks();
188 timers_state
.cpu_clock_offset
= cpu_get_clock();
189 timers_state
.cpu_ticks_enabled
= 0;
193 /* Correlation between real and virtual time is always going to be
194 fairly approximate, so ignore small variation.
195 When the guest is idle real and virtual time will be aligned in
197 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
199 static void icount_adjust(void)
204 static int64_t last_delta
;
205 /* If the VM is not running, then do nothing. */
206 if (!runstate_is_running()) {
209 cur_time
= cpu_get_clock();
210 cur_icount
= qemu_get_clock_ns(vm_clock
);
211 delta
= cur_icount
- cur_time
;
212 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
214 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
215 && icount_time_shift
> 0) {
216 /* The guest is getting too far ahead. Slow time down. */
220 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
221 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
222 /* The guest is getting too far behind. Speed time up. */
226 qemu_icount_bias
= cur_icount
- (qemu_icount
<< icount_time_shift
);
229 static void icount_adjust_rt(void *opaque
)
231 qemu_mod_timer(icount_rt_timer
,
232 qemu_get_clock_ms(rt_clock
) + 1000);
236 static void icount_adjust_vm(void *opaque
)
238 qemu_mod_timer(icount_vm_timer
,
239 qemu_get_clock_ns(vm_clock
) + get_ticks_per_sec() / 10);
243 static int64_t qemu_icount_round(int64_t count
)
245 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
248 static void icount_warp_rt(void *opaque
)
250 if (vm_clock_warp_start
== -1) {
254 if (runstate_is_running()) {
255 int64_t clock
= qemu_get_clock_ns(rt_clock
);
256 int64_t warp_delta
= clock
- vm_clock_warp_start
;
257 if (use_icount
== 1) {
258 qemu_icount_bias
+= warp_delta
;
261 * In adaptive mode, do not let the vm_clock run too
262 * far ahead of real time.
264 int64_t cur_time
= cpu_get_clock();
265 int64_t cur_icount
= qemu_get_clock_ns(vm_clock
);
266 int64_t delta
= cur_time
- cur_icount
;
267 qemu_icount_bias
+= MIN(warp_delta
, delta
);
269 if (qemu_clock_expired(vm_clock
)) {
273 vm_clock_warp_start
= -1;
276 void qtest_clock_warp(int64_t dest
)
278 int64_t clock
= qemu_get_clock_ns(vm_clock
);
279 assert(qtest_enabled());
280 while (clock
< dest
) {
281 int64_t deadline
= qemu_clock_deadline(vm_clock
);
282 int64_t warp
= MIN(dest
- clock
, deadline
);
283 qemu_icount_bias
+= warp
;
284 qemu_run_timers(vm_clock
);
285 clock
= qemu_get_clock_ns(vm_clock
);
290 void qemu_clock_warp(QEMUClock
*clock
)
295 * There are too many global variables to make the "warp" behavior
296 * applicable to other clocks. But a clock argument removes the
297 * need for if statements all over the place.
299 if (clock
!= vm_clock
|| !use_icount
) {
304 * If the CPUs have been sleeping, advance the vm_clock timer now. This
305 * ensures that the deadline for the timer is computed correctly below.
306 * This also makes sure that the insn counter is synchronized before the
307 * CPU starts running, in case the CPU is woken by an event other than
308 * the earliest vm_clock timer.
310 icount_warp_rt(NULL
);
311 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock
)) {
312 qemu_del_timer(icount_warp_timer
);
316 if (qtest_enabled()) {
317 /* When testing, qtest commands advance icount. */
321 vm_clock_warp_start
= qemu_get_clock_ns(rt_clock
);
322 deadline
= qemu_clock_deadline(vm_clock
);
325 * Ensure the vm_clock proceeds even when the virtual CPU goes to
326 * sleep. Otherwise, the CPU might be waiting for a future timer
327 * interrupt to wake it up, but the interrupt never comes because
328 * the vCPU isn't running any insns and thus doesn't advance the
331 * An extreme solution for this problem would be to never let VCPUs
332 * sleep in icount mode if there is a pending vm_clock timer; rather
333 * time could just advance to the next vm_clock event. Instead, we
334 * do stop VCPUs and only advance vm_clock after some "real" time,
335 * (related to the time left until the next event) has passed. This
336 * rt_clock timer will do this. This avoids that the warps are too
337 * visible externally---for example, you will not be sending network
338 * packets continuously instead of every 100ms.
340 qemu_mod_timer(icount_warp_timer
, vm_clock_warp_start
+ deadline
);
346 static const VMStateDescription vmstate_timers
= {
349 .minimum_version_id
= 1,
350 .minimum_version_id_old
= 1,
351 .fields
= (VMStateField
[]) {
352 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
353 VMSTATE_INT64(dummy
, TimersState
),
354 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
355 VMSTATE_END_OF_LIST()
359 void configure_icount(const char *option
)
361 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
366 icount_warp_timer
= qemu_new_timer_ns(rt_clock
, icount_warp_rt
, NULL
);
367 if (strcmp(option
, "auto") != 0) {
368 icount_time_shift
= strtol(option
, NULL
, 0);
375 /* 125MIPS seems a reasonable initial guess at the guest speed.
376 It will be corrected fairly quickly anyway. */
377 icount_time_shift
= 3;
379 /* Have both realtime and virtual time triggers for speed adjustment.
380 The realtime trigger catches emulated time passing too slowly,
381 the virtual time trigger catches emulated time passing too fast.
382 Realtime triggers occur even when idle, so use them less frequently
384 icount_rt_timer
= qemu_new_timer_ms(rt_clock
, icount_adjust_rt
, NULL
);
385 qemu_mod_timer(icount_rt_timer
,
386 qemu_get_clock_ms(rt_clock
) + 1000);
387 icount_vm_timer
= qemu_new_timer_ns(vm_clock
, icount_adjust_vm
, NULL
);
388 qemu_mod_timer(icount_vm_timer
,
389 qemu_get_clock_ns(vm_clock
) + get_ticks_per_sec() / 10);
392 /***********************************************************/
393 void hw_error(const char *fmt
, ...)
399 fprintf(stderr
, "qemu: hardware error: ");
400 vfprintf(stderr
, fmt
, ap
);
401 fprintf(stderr
, "\n");
402 for (cpu
= first_cpu
; cpu
!= NULL
; cpu
= cpu
->next_cpu
) {
403 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
404 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
410 void cpu_synchronize_all_states(void)
414 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
415 cpu_synchronize_state(cpu
);
419 void cpu_synchronize_all_post_reset(void)
423 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
424 cpu_synchronize_post_reset(cpu
);
428 void cpu_synchronize_all_post_init(void)
432 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
433 cpu_synchronize_post_init(cpu
);
437 static int do_vm_stop(RunState state
)
441 if (runstate_is_running()) {
445 vm_state_notify(0, state
);
446 monitor_protocol_event(QEVENT_STOP
, NULL
);
450 ret
= bdrv_flush_all();
455 static bool cpu_can_run(CPUState
*cpu
)
460 if (cpu_is_stopped(cpu
)) {
466 static void cpu_handle_guest_debug(CPUState
*cpu
)
468 gdb_set_stop_cpu(cpu
);
469 qemu_system_debug_request();
473 static void cpu_signal(int sig
)
476 cpu_exit(current_cpu
);
482 static void sigbus_reraise(void)
485 struct sigaction action
;
487 memset(&action
, 0, sizeof(action
));
488 action
.sa_handler
= SIG_DFL
;
489 if (!sigaction(SIGBUS
, &action
, NULL
)) {
492 sigaddset(&set
, SIGBUS
);
493 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
495 perror("Failed to re-raise SIGBUS!\n");
499 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
502 if (kvm_on_sigbus(siginfo
->ssi_code
,
503 (void *)(intptr_t)siginfo
->ssi_addr
)) {
508 static void qemu_init_sigbus(void)
510 struct sigaction action
;
512 memset(&action
, 0, sizeof(action
));
513 action
.sa_flags
= SA_SIGINFO
;
514 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
515 sigaction(SIGBUS
, &action
, NULL
);
517 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
520 static void qemu_kvm_eat_signals(CPUState
*cpu
)
522 struct timespec ts
= { 0, 0 };
528 sigemptyset(&waitset
);
529 sigaddset(&waitset
, SIG_IPI
);
530 sigaddset(&waitset
, SIGBUS
);
533 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
534 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
535 perror("sigtimedwait");
541 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
549 r
= sigpending(&chkset
);
551 perror("sigpending");
554 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
557 #else /* !CONFIG_LINUX */
559 static void qemu_init_sigbus(void)
563 static void qemu_kvm_eat_signals(CPUState
*cpu
)
566 #endif /* !CONFIG_LINUX */
569 static void dummy_signal(int sig
)
573 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
577 struct sigaction sigact
;
579 memset(&sigact
, 0, sizeof(sigact
));
580 sigact
.sa_handler
= dummy_signal
;
581 sigaction(SIG_IPI
, &sigact
, NULL
);
583 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
584 sigdelset(&set
, SIG_IPI
);
585 sigdelset(&set
, SIGBUS
);
586 r
= kvm_set_signal_mask(cpu
, &set
);
588 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
593 static void qemu_tcg_init_cpu_signals(void)
596 struct sigaction sigact
;
598 memset(&sigact
, 0, sizeof(sigact
));
599 sigact
.sa_handler
= cpu_signal
;
600 sigaction(SIG_IPI
, &sigact
, NULL
);
603 sigaddset(&set
, SIG_IPI
);
604 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
608 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
613 static void qemu_tcg_init_cpu_signals(void)
618 static QemuMutex qemu_global_mutex
;
619 static QemuCond qemu_io_proceeded_cond
;
620 static bool iothread_requesting_mutex
;
622 static QemuThread io_thread
;
624 static QemuThread
*tcg_cpu_thread
;
625 static QemuCond
*tcg_halt_cond
;
628 static QemuCond qemu_cpu_cond
;
630 static QemuCond qemu_pause_cond
;
631 static QemuCond qemu_work_cond
;
633 void qemu_init_cpu_loop(void)
636 qemu_cond_init(&qemu_cpu_cond
);
637 qemu_cond_init(&qemu_pause_cond
);
638 qemu_cond_init(&qemu_work_cond
);
639 qemu_cond_init(&qemu_io_proceeded_cond
);
640 qemu_mutex_init(&qemu_global_mutex
);
642 qemu_thread_get_self(&io_thread
);
645 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
647 struct qemu_work_item wi
;
649 if (qemu_cpu_is_self(cpu
)) {
657 if (cpu
->queued_work_first
== NULL
) {
658 cpu
->queued_work_first
= &wi
;
660 cpu
->queued_work_last
->next
= &wi
;
662 cpu
->queued_work_last
= &wi
;
668 CPUState
*self_cpu
= current_cpu
;
670 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
671 current_cpu
= self_cpu
;
675 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
677 struct qemu_work_item
*wi
;
679 if (qemu_cpu_is_self(cpu
)) {
684 wi
= g_malloc0(sizeof(struct qemu_work_item
));
688 if (cpu
->queued_work_first
== NULL
) {
689 cpu
->queued_work_first
= wi
;
691 cpu
->queued_work_last
->next
= wi
;
693 cpu
->queued_work_last
= wi
;
700 static void flush_queued_work(CPUState
*cpu
)
702 struct qemu_work_item
*wi
;
704 if (cpu
->queued_work_first
== NULL
) {
708 while ((wi
= cpu
->queued_work_first
)) {
709 cpu
->queued_work_first
= wi
->next
;
716 cpu
->queued_work_last
= NULL
;
717 qemu_cond_broadcast(&qemu_work_cond
);
720 static void qemu_wait_io_event_common(CPUState
*cpu
)
725 qemu_cond_signal(&qemu_pause_cond
);
727 flush_queued_work(cpu
);
728 cpu
->thread_kicked
= false;
731 static void qemu_tcg_wait_io_event(void)
735 while (all_cpu_threads_idle()) {
736 /* Start accounting real time to the virtual clock if the CPUs
738 qemu_clock_warp(vm_clock
);
739 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
742 while (iothread_requesting_mutex
) {
743 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
746 for (cpu
= first_cpu
; cpu
!= NULL
; cpu
= cpu
->next_cpu
) {
747 qemu_wait_io_event_common(cpu
);
751 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
753 while (cpu_thread_is_idle(cpu
)) {
754 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
757 qemu_kvm_eat_signals(cpu
);
758 qemu_wait_io_event_common(cpu
);
761 static void *qemu_kvm_cpu_thread_fn(void *arg
)
766 qemu_mutex_lock(&qemu_global_mutex
);
767 qemu_thread_get_self(cpu
->thread
);
768 cpu
->thread_id
= qemu_get_thread_id();
771 r
= kvm_init_vcpu(cpu
);
773 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
777 qemu_kvm_init_cpu_signals(cpu
);
779 /* signal CPU creation */
781 qemu_cond_signal(&qemu_cpu_cond
);
784 if (cpu_can_run(cpu
)) {
785 r
= kvm_cpu_exec(cpu
);
786 if (r
== EXCP_DEBUG
) {
787 cpu_handle_guest_debug(cpu
);
790 qemu_kvm_wait_io_event(cpu
);
796 static void *qemu_dummy_cpu_thread_fn(void *arg
)
799 fprintf(stderr
, "qtest is not supported under Windows\n");
806 qemu_mutex_lock_iothread();
807 qemu_thread_get_self(cpu
->thread
);
808 cpu
->thread_id
= qemu_get_thread_id();
810 sigemptyset(&waitset
);
811 sigaddset(&waitset
, SIG_IPI
);
813 /* signal CPU creation */
815 qemu_cond_signal(&qemu_cpu_cond
);
820 qemu_mutex_unlock_iothread();
823 r
= sigwait(&waitset
, &sig
);
824 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
829 qemu_mutex_lock_iothread();
831 qemu_wait_io_event_common(cpu
);
838 static void tcg_exec_all(void);
840 static void tcg_signal_cpu_creation(CPUState
*cpu
, void *data
)
842 cpu
->thread_id
= qemu_get_thread_id();
846 static void *qemu_tcg_cpu_thread_fn(void *arg
)
850 qemu_tcg_init_cpu_signals();
851 qemu_thread_get_self(cpu
->thread
);
853 qemu_mutex_lock(&qemu_global_mutex
);
854 qemu_for_each_cpu(tcg_signal_cpu_creation
, NULL
);
855 qemu_cond_signal(&qemu_cpu_cond
);
857 /* wait for initial kick-off after machine start */
858 while (first_cpu
->stopped
) {
859 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
861 /* process any pending work */
862 for (cpu
= first_cpu
; cpu
!= NULL
; cpu
= cpu
->next_cpu
) {
863 qemu_wait_io_event_common(cpu
);
869 if (use_icount
&& qemu_clock_deadline(vm_clock
) <= 0) {
872 qemu_tcg_wait_io_event();
878 static void qemu_cpu_kick_thread(CPUState
*cpu
)
883 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
885 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
889 if (!qemu_cpu_is_self(cpu
)) {
892 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
893 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
898 /* On multi-core systems, we are not sure that the thread is actually
899 * suspended until we can get the context.
901 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
902 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
908 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
909 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
917 void qemu_cpu_kick(CPUState
*cpu
)
919 qemu_cond_broadcast(cpu
->halt_cond
);
920 if (!tcg_enabled() && !cpu
->thread_kicked
) {
921 qemu_cpu_kick_thread(cpu
);
922 cpu
->thread_kicked
= true;
926 void qemu_cpu_kick_self(void)
931 if (!current_cpu
->thread_kicked
) {
932 qemu_cpu_kick_thread(current_cpu
);
933 current_cpu
->thread_kicked
= true;
940 bool qemu_cpu_is_self(CPUState
*cpu
)
942 return qemu_thread_is_self(cpu
->thread
);
945 static bool qemu_in_vcpu_thread(void)
947 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
950 void qemu_mutex_lock_iothread(void)
952 if (!tcg_enabled()) {
953 qemu_mutex_lock(&qemu_global_mutex
);
955 iothread_requesting_mutex
= true;
956 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
957 qemu_cpu_kick_thread(first_cpu
);
958 qemu_mutex_lock(&qemu_global_mutex
);
960 iothread_requesting_mutex
= false;
961 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
965 void qemu_mutex_unlock_iothread(void)
967 qemu_mutex_unlock(&qemu_global_mutex
);
970 static int all_vcpus_paused(void)
972 CPUState
*cpu
= first_cpu
;
984 void pause_all_vcpus(void)
986 CPUState
*cpu
= first_cpu
;
988 qemu_clock_enable(vm_clock
, false);
995 if (qemu_in_vcpu_thread()) {
997 if (!kvm_enabled()) {
1001 cpu
->stopped
= true;
1002 cpu
= cpu
->next_cpu
;
1008 while (!all_vcpus_paused()) {
1009 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1013 cpu
= cpu
->next_cpu
;
1018 void cpu_resume(CPUState
*cpu
)
1021 cpu
->stopped
= false;
1025 void resume_all_vcpus(void)
1027 CPUState
*cpu
= first_cpu
;
1029 qemu_clock_enable(vm_clock
, true);
1032 cpu
= cpu
->next_cpu
;
1036 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1038 /* share a single thread for all cpus with TCG */
1039 if (!tcg_cpu_thread
) {
1040 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1041 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1042 qemu_cond_init(cpu
->halt_cond
);
1043 tcg_halt_cond
= cpu
->halt_cond
;
1044 qemu_thread_create(cpu
->thread
, qemu_tcg_cpu_thread_fn
, cpu
,
1045 QEMU_THREAD_JOINABLE
);
1047 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1049 while (!cpu
->created
) {
1050 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1052 tcg_cpu_thread
= cpu
->thread
;
1054 cpu
->thread
= tcg_cpu_thread
;
1055 cpu
->halt_cond
= tcg_halt_cond
;
1059 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1061 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1062 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1063 qemu_cond_init(cpu
->halt_cond
);
1064 qemu_thread_create(cpu
->thread
, qemu_kvm_cpu_thread_fn
, cpu
,
1065 QEMU_THREAD_JOINABLE
);
1066 while (!cpu
->created
) {
1067 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1071 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1073 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1074 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1075 qemu_cond_init(cpu
->halt_cond
);
1076 qemu_thread_create(cpu
->thread
, qemu_dummy_cpu_thread_fn
, cpu
,
1077 QEMU_THREAD_JOINABLE
);
1078 while (!cpu
->created
) {
1079 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1083 void qemu_init_vcpu(CPUState
*cpu
)
1085 cpu
->nr_cores
= smp_cores
;
1086 cpu
->nr_threads
= smp_threads
;
1087 cpu
->stopped
= true;
1088 if (kvm_enabled()) {
1089 qemu_kvm_start_vcpu(cpu
);
1090 } else if (tcg_enabled()) {
1091 qemu_tcg_init_vcpu(cpu
);
1093 qemu_dummy_start_vcpu(cpu
);
1097 void cpu_stop_current(void)
1100 current_cpu
->stop
= false;
1101 current_cpu
->stopped
= true;
1102 cpu_exit(current_cpu
);
1103 qemu_cond_signal(&qemu_pause_cond
);
1107 int vm_stop(RunState state
)
1109 if (qemu_in_vcpu_thread()) {
1110 qemu_system_vmstop_request(state
);
1112 * FIXME: should not return to device code in case
1113 * vm_stop() has been requested.
1119 return do_vm_stop(state
);
1122 /* does a state transition even if the VM is already stopped,
1123 current state is forgotten forever */
1124 int vm_stop_force_state(RunState state
)
1126 if (runstate_is_running()) {
1127 return vm_stop(state
);
1129 runstate_set(state
);
1130 /* Make sure to return an error if the flush in a previous vm_stop()
1132 return bdrv_flush_all();
1136 static int tcg_cpu_exec(CPUArchState
*env
)
1139 #ifdef CONFIG_PROFILER
1143 #ifdef CONFIG_PROFILER
1144 ti
= profile_getclock();
1149 qemu_icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
1150 env
->icount_decr
.u16
.low
= 0;
1151 env
->icount_extra
= 0;
1152 count
= qemu_icount_round(qemu_clock_deadline(vm_clock
));
1153 qemu_icount
+= count
;
1154 decr
= (count
> 0xffff) ? 0xffff : count
;
1156 env
->icount_decr
.u16
.low
= decr
;
1157 env
->icount_extra
= count
;
1159 ret
= cpu_exec(env
);
1160 #ifdef CONFIG_PROFILER
1161 qemu_time
+= profile_getclock() - ti
;
1164 /* Fold pending instructions back into the
1165 instruction counter, and clear the interrupt flag. */
1166 qemu_icount
-= (env
->icount_decr
.u16
.low
1167 + env
->icount_extra
);
1168 env
->icount_decr
.u32
= 0;
1169 env
->icount_extra
= 0;
1174 static void tcg_exec_all(void)
1178 /* Account partial waits to the vm_clock. */
1179 qemu_clock_warp(vm_clock
);
1181 if (next_cpu
== NULL
) {
1182 next_cpu
= first_cpu
;
1184 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= next_cpu
->next_cpu
) {
1185 CPUState
*cpu
= next_cpu
;
1186 CPUArchState
*env
= cpu
->env_ptr
;
1188 qemu_clock_enable(vm_clock
,
1189 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1191 if (cpu_can_run(cpu
)) {
1192 r
= tcg_cpu_exec(env
);
1193 if (r
== EXCP_DEBUG
) {
1194 cpu_handle_guest_debug(cpu
);
1197 } else if (cpu
->stop
|| cpu
->stopped
) {
1204 void set_numa_modes(void)
1209 for (cpu
= first_cpu
; cpu
!= NULL
; cpu
= cpu
->next_cpu
) {
1210 for (i
= 0; i
< nb_numa_nodes
; i
++) {
1211 if (test_bit(cpu
->cpu_index
, node_cpumask
[i
])) {
1218 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1220 /* XXX: implement xxx_cpu_list for targets that still miss it */
1221 #if defined(cpu_list)
1222 cpu_list(f
, cpu_fprintf
);
1226 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1228 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1231 for (cpu
= first_cpu
; cpu
!= NULL
; cpu
= cpu
->next_cpu
) {
1233 #if defined(TARGET_I386)
1234 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1235 CPUX86State
*env
= &x86_cpu
->env
;
1236 #elif defined(TARGET_PPC)
1237 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1238 CPUPPCState
*env
= &ppc_cpu
->env
;
1239 #elif defined(TARGET_SPARC)
1240 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1241 CPUSPARCState
*env
= &sparc_cpu
->env
;
1242 #elif defined(TARGET_MIPS)
1243 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1244 CPUMIPSState
*env
= &mips_cpu
->env
;
1247 cpu_synchronize_state(cpu
);
1249 info
= g_malloc0(sizeof(*info
));
1250 info
->value
= g_malloc0(sizeof(*info
->value
));
1251 info
->value
->CPU
= cpu
->cpu_index
;
1252 info
->value
->current
= (cpu
== first_cpu
);
1253 info
->value
->halted
= cpu
->halted
;
1254 info
->value
->thread_id
= cpu
->thread_id
;
1255 #if defined(TARGET_I386)
1256 info
->value
->has_pc
= true;
1257 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1258 #elif defined(TARGET_PPC)
1259 info
->value
->has_nip
= true;
1260 info
->value
->nip
= env
->nip
;
1261 #elif defined(TARGET_SPARC)
1262 info
->value
->has_pc
= true;
1263 info
->value
->pc
= env
->pc
;
1264 info
->value
->has_npc
= true;
1265 info
->value
->npc
= env
->npc
;
1266 #elif defined(TARGET_MIPS)
1267 info
->value
->has_PC
= true;
1268 info
->value
->PC
= env
->active_tc
.PC
;
1271 /* XXX: waiting for the qapi to support GSList */
1273 head
= cur_item
= info
;
1275 cur_item
->next
= info
;
1283 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1284 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1295 cpu
= qemu_get_cpu(cpu_index
);
1297 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1302 f
= fopen(filename
, "wb");
1304 error_setg_file_open(errp
, errno
, filename
);
1312 cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0);
1313 if (fwrite(buf
, 1, l
, f
) != l
) {
1314 error_set(errp
, QERR_IO_ERROR
);
1325 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1332 f
= fopen(filename
, "wb");
1334 error_setg_file_open(errp
, errno
, filename
);
1342 cpu_physical_memory_rw(addr
, buf
, l
, 0);
1343 if (fwrite(buf
, 1, l
, f
) != l
) {
1344 error_set(errp
, QERR_IO_ERROR
);
1355 void qmp_inject_nmi(Error
**errp
)
1357 #if defined(TARGET_I386)
1360 for (cs
= first_cpu
; cs
!= NULL
; cs
= cs
->next_cpu
) {
1361 X86CPU
*cpu
= X86_CPU(cs
);
1362 CPUX86State
*env
= &cpu
->env
;
1364 if (!env
->apic_state
) {
1365 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1367 apic_deliver_nmi(env
->apic_state
);
1371 error_set(errp
, QERR_UNSUPPORTED
);