4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
43 #include "qemu/compatfd.h"
48 #include <sys/prctl.h>
51 #define PR_MCE_KILL 33
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
62 #endif /* CONFIG_LINUX */
64 static CPUState
*next_cpu
;
66 bool cpu_is_stopped(CPUState
*cpu
)
68 return cpu
->stopped
|| !runstate_is_running();
71 static bool cpu_thread_is_idle(CPUState
*cpu
)
73 if (cpu
->stop
|| cpu
->queued_work_first
) {
76 if (cpu_is_stopped(cpu
)) {
79 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
80 kvm_halt_in_kernel()) {
86 static bool all_cpu_threads_idle(void)
91 if (!cpu_thread_is_idle(cpu
)) {
98 /***********************************************************/
99 /* guest cycle counter */
101 /* Protected by TimersState seqlock */
103 /* Compensate for varying guest execution speed. */
104 static int64_t qemu_icount_bias
;
105 static int64_t vm_clock_warp_start
;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift
;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 /* Only written by TCG thread */
112 static int64_t qemu_icount
;
114 static QEMUTimer
*icount_rt_timer
;
115 static QEMUTimer
*icount_vm_timer
;
116 static QEMUTimer
*icount_warp_timer
;
118 typedef struct TimersState
{
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev
;
121 int64_t cpu_ticks_offset
;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
126 QemuSeqLock vm_clock_seqlock
;
127 int64_t cpu_clock_offset
;
128 int32_t cpu_ticks_enabled
;
132 static TimersState timers_state
;
134 /* Return the virtual CPU time, based on the instruction counter. */
135 static int64_t cpu_get_icount_locked(void)
138 CPUState
*cpu
= current_cpu
;
140 icount
= qemu_icount
;
142 if (!cpu_can_do_io(cpu
)) {
143 fprintf(stderr
, "Bad clock read\n");
145 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
147 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
150 int64_t cpu_get_icount(void)
156 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
157 icount
= cpu_get_icount_locked();
158 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
163 /* return the host CPU cycle counter and handle stop/restart */
164 /* Caller must hold the BQL */
165 int64_t cpu_get_ticks(void)
170 return cpu_get_icount();
173 ticks
= timers_state
.cpu_ticks_offset
;
174 if (timers_state
.cpu_ticks_enabled
) {
175 ticks
+= cpu_get_real_ticks();
178 if (timers_state
.cpu_ticks_prev
> ticks
) {
179 /* Note: non increasing ticks may happen if the host uses
181 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
182 ticks
= timers_state
.cpu_ticks_prev
;
185 timers_state
.cpu_ticks_prev
= ticks
;
189 static int64_t cpu_get_clock_locked(void)
193 ticks
= timers_state
.cpu_clock_offset
;
194 if (timers_state
.cpu_ticks_enabled
) {
195 ticks
+= get_clock();
201 /* return the host CPU monotonic timer and handle stop/restart */
202 int64_t cpu_get_clock(void)
208 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
209 ti
= cpu_get_clock_locked();
210 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
215 /* enable cpu_get_ticks()
216 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
218 void cpu_enable_ticks(void)
220 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
221 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
222 if (!timers_state
.cpu_ticks_enabled
) {
223 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
224 timers_state
.cpu_clock_offset
-= get_clock();
225 timers_state
.cpu_ticks_enabled
= 1;
227 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
230 /* disable cpu_get_ticks() : the clock is stopped. You must not call
231 * cpu_get_ticks() after that.
232 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
234 void cpu_disable_ticks(void)
236 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
237 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
238 if (timers_state
.cpu_ticks_enabled
) {
239 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
240 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
241 timers_state
.cpu_ticks_enabled
= 0;
243 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
246 /* Correlation between real and virtual time is always going to be
247 fairly approximate, so ignore small variation.
248 When the guest is idle real and virtual time will be aligned in
250 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
252 static void icount_adjust(void)
258 /* Protected by TimersState mutex. */
259 static int64_t last_delta
;
261 /* If the VM is not running, then do nothing. */
262 if (!runstate_is_running()) {
266 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
267 cur_time
= cpu_get_clock_locked();
268 cur_icount
= cpu_get_icount_locked();
270 delta
= cur_icount
- cur_time
;
271 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
273 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
274 && icount_time_shift
> 0) {
275 /* The guest is getting too far ahead. Slow time down. */
279 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
280 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
281 /* The guest is getting too far behind. Speed time up. */
285 qemu_icount_bias
= cur_icount
- (qemu_icount
<< icount_time_shift
);
286 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
289 static void icount_adjust_rt(void *opaque
)
291 timer_mod(icount_rt_timer
,
292 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
296 static void icount_adjust_vm(void *opaque
)
298 timer_mod(icount_vm_timer
,
299 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
300 get_ticks_per_sec() / 10);
304 static int64_t qemu_icount_round(int64_t count
)
306 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
309 static void icount_warp_rt(void *opaque
)
311 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
312 * changes from -1 to another value, so the race here is okay.
314 if (atomic_read(&vm_clock_warp_start
) == -1) {
318 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
319 if (runstate_is_running()) {
320 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
323 warp_delta
= clock
- vm_clock_warp_start
;
324 if (use_icount
== 2) {
326 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
327 * far ahead of real time.
329 int64_t cur_time
= cpu_get_clock_locked();
330 int64_t cur_icount
= cpu_get_icount_locked();
331 int64_t delta
= cur_time
- cur_icount
;
332 warp_delta
= MIN(warp_delta
, delta
);
334 qemu_icount_bias
+= warp_delta
;
336 vm_clock_warp_start
= -1;
337 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
339 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
340 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
344 void qtest_clock_warp(int64_t dest
)
346 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
347 assert(qtest_enabled());
348 while (clock
< dest
) {
349 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
350 int64_t warp
= MIN(dest
- clock
, deadline
);
351 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
352 qemu_icount_bias
+= warp
;
353 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
355 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
356 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
361 void qemu_clock_warp(QEMUClockType type
)
367 * There are too many global variables to make the "warp" behavior
368 * applicable to other clocks. But a clock argument removes the
369 * need for if statements all over the place.
371 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
376 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
377 * This ensures that the deadline for the timer is computed correctly below.
378 * This also makes sure that the insn counter is synchronized before the
379 * CPU starts running, in case the CPU is woken by an event other than
380 * the earliest QEMU_CLOCK_VIRTUAL timer.
382 icount_warp_rt(NULL
);
383 timer_del(icount_warp_timer
);
384 if (!all_cpu_threads_idle()) {
388 if (qtest_enabled()) {
389 /* When testing, qtest commands advance icount. */
393 /* We want to use the earliest deadline from ALL vm_clocks */
394 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
395 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
402 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
403 * sleep. Otherwise, the CPU might be waiting for a future timer
404 * interrupt to wake it up, but the interrupt never comes because
405 * the vCPU isn't running any insns and thus doesn't advance the
406 * QEMU_CLOCK_VIRTUAL.
408 * An extreme solution for this problem would be to never let VCPUs
409 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
410 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
411 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
412 * after some e"real" time, (related to the time left until the next
413 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
414 * This avoids that the warps are visible externally; for example,
415 * you will not be sending network packets continuously instead of
418 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
419 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
420 vm_clock_warp_start
= clock
;
422 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
423 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
424 } else if (deadline
== 0) {
425 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
429 static const VMStateDescription vmstate_timers
= {
432 .minimum_version_id
= 1,
433 .minimum_version_id_old
= 1,
434 .fields
= (VMStateField
[]) {
435 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
436 VMSTATE_INT64(dummy
, TimersState
),
437 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
438 VMSTATE_END_OF_LIST()
442 void configure_icount(const char *option
)
444 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
445 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
450 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
451 icount_warp_rt
, NULL
);
452 if (strcmp(option
, "auto") != 0) {
453 icount_time_shift
= strtol(option
, NULL
, 0);
460 /* 125MIPS seems a reasonable initial guess at the guest speed.
461 It will be corrected fairly quickly anyway. */
462 icount_time_shift
= 3;
464 /* Have both realtime and virtual time triggers for speed adjustment.
465 The realtime trigger catches emulated time passing too slowly,
466 the virtual time trigger catches emulated time passing too fast.
467 Realtime triggers occur even when idle, so use them less frequently
469 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
470 icount_adjust_rt
, NULL
);
471 timer_mod(icount_rt_timer
,
472 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
473 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
474 icount_adjust_vm
, NULL
);
475 timer_mod(icount_vm_timer
,
476 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
477 get_ticks_per_sec() / 10);
480 /***********************************************************/
481 void hw_error(const char *fmt
, ...)
487 fprintf(stderr
, "qemu: hardware error: ");
488 vfprintf(stderr
, fmt
, ap
);
489 fprintf(stderr
, "\n");
491 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
492 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
498 void cpu_synchronize_all_states(void)
503 cpu_synchronize_state(cpu
);
507 void cpu_synchronize_all_post_reset(void)
512 cpu_synchronize_post_reset(cpu
);
516 void cpu_synchronize_all_post_init(void)
521 cpu_synchronize_post_init(cpu
);
525 static int do_vm_stop(RunState state
)
529 if (runstate_is_running()) {
533 vm_state_notify(0, state
);
534 monitor_protocol_event(QEVENT_STOP
, NULL
);
538 ret
= bdrv_flush_all();
543 static bool cpu_can_run(CPUState
*cpu
)
548 if (cpu_is_stopped(cpu
)) {
554 static void cpu_handle_guest_debug(CPUState
*cpu
)
556 gdb_set_stop_cpu(cpu
);
557 qemu_system_debug_request();
561 static void cpu_signal(int sig
)
564 cpu_exit(current_cpu
);
570 static void sigbus_reraise(void)
573 struct sigaction action
;
575 memset(&action
, 0, sizeof(action
));
576 action
.sa_handler
= SIG_DFL
;
577 if (!sigaction(SIGBUS
, &action
, NULL
)) {
580 sigaddset(&set
, SIGBUS
);
581 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
583 perror("Failed to re-raise SIGBUS!\n");
587 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
590 if (kvm_on_sigbus(siginfo
->ssi_code
,
591 (void *)(intptr_t)siginfo
->ssi_addr
)) {
596 static void qemu_init_sigbus(void)
598 struct sigaction action
;
600 memset(&action
, 0, sizeof(action
));
601 action
.sa_flags
= SA_SIGINFO
;
602 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
603 sigaction(SIGBUS
, &action
, NULL
);
605 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
608 static void qemu_kvm_eat_signals(CPUState
*cpu
)
610 struct timespec ts
= { 0, 0 };
616 sigemptyset(&waitset
);
617 sigaddset(&waitset
, SIG_IPI
);
618 sigaddset(&waitset
, SIGBUS
);
621 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
622 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
623 perror("sigtimedwait");
629 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
637 r
= sigpending(&chkset
);
639 perror("sigpending");
642 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
645 #else /* !CONFIG_LINUX */
647 static void qemu_init_sigbus(void)
651 static void qemu_kvm_eat_signals(CPUState
*cpu
)
654 #endif /* !CONFIG_LINUX */
657 static void dummy_signal(int sig
)
661 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
665 struct sigaction sigact
;
667 memset(&sigact
, 0, sizeof(sigact
));
668 sigact
.sa_handler
= dummy_signal
;
669 sigaction(SIG_IPI
, &sigact
, NULL
);
671 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
672 sigdelset(&set
, SIG_IPI
);
673 sigdelset(&set
, SIGBUS
);
674 r
= kvm_set_signal_mask(cpu
, &set
);
676 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
681 static void qemu_tcg_init_cpu_signals(void)
684 struct sigaction sigact
;
686 memset(&sigact
, 0, sizeof(sigact
));
687 sigact
.sa_handler
= cpu_signal
;
688 sigaction(SIG_IPI
, &sigact
, NULL
);
691 sigaddset(&set
, SIG_IPI
);
692 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
696 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
701 static void qemu_tcg_init_cpu_signals(void)
706 static QemuMutex qemu_global_mutex
;
707 static QemuCond qemu_io_proceeded_cond
;
708 static bool iothread_requesting_mutex
;
710 static QemuThread io_thread
;
712 static QemuThread
*tcg_cpu_thread
;
713 static QemuCond
*tcg_halt_cond
;
716 static QemuCond qemu_cpu_cond
;
718 static QemuCond qemu_pause_cond
;
719 static QemuCond qemu_work_cond
;
721 void qemu_init_cpu_loop(void)
724 qemu_cond_init(&qemu_cpu_cond
);
725 qemu_cond_init(&qemu_pause_cond
);
726 qemu_cond_init(&qemu_work_cond
);
727 qemu_cond_init(&qemu_io_proceeded_cond
);
728 qemu_mutex_init(&qemu_global_mutex
);
730 qemu_thread_get_self(&io_thread
);
733 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
735 struct qemu_work_item wi
;
737 if (qemu_cpu_is_self(cpu
)) {
745 if (cpu
->queued_work_first
== NULL
) {
746 cpu
->queued_work_first
= &wi
;
748 cpu
->queued_work_last
->next
= &wi
;
750 cpu
->queued_work_last
= &wi
;
756 CPUState
*self_cpu
= current_cpu
;
758 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
759 current_cpu
= self_cpu
;
763 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
765 struct qemu_work_item
*wi
;
767 if (qemu_cpu_is_self(cpu
)) {
772 wi
= g_malloc0(sizeof(struct qemu_work_item
));
776 if (cpu
->queued_work_first
== NULL
) {
777 cpu
->queued_work_first
= wi
;
779 cpu
->queued_work_last
->next
= wi
;
781 cpu
->queued_work_last
= wi
;
788 static void flush_queued_work(CPUState
*cpu
)
790 struct qemu_work_item
*wi
;
792 if (cpu
->queued_work_first
== NULL
) {
796 while ((wi
= cpu
->queued_work_first
)) {
797 cpu
->queued_work_first
= wi
->next
;
804 cpu
->queued_work_last
= NULL
;
805 qemu_cond_broadcast(&qemu_work_cond
);
808 static void qemu_wait_io_event_common(CPUState
*cpu
)
813 qemu_cond_signal(&qemu_pause_cond
);
815 flush_queued_work(cpu
);
816 cpu
->thread_kicked
= false;
819 static void qemu_tcg_wait_io_event(void)
823 while (all_cpu_threads_idle()) {
824 /* Start accounting real time to the virtual clock if the CPUs
826 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
827 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
830 while (iothread_requesting_mutex
) {
831 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
835 qemu_wait_io_event_common(cpu
);
839 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
841 while (cpu_thread_is_idle(cpu
)) {
842 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
845 qemu_kvm_eat_signals(cpu
);
846 qemu_wait_io_event_common(cpu
);
849 static void *qemu_kvm_cpu_thread_fn(void *arg
)
854 qemu_mutex_lock(&qemu_global_mutex
);
855 qemu_thread_get_self(cpu
->thread
);
856 cpu
->thread_id
= qemu_get_thread_id();
859 r
= kvm_init_vcpu(cpu
);
861 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
865 qemu_kvm_init_cpu_signals(cpu
);
867 /* signal CPU creation */
869 qemu_cond_signal(&qemu_cpu_cond
);
872 if (cpu_can_run(cpu
)) {
873 r
= kvm_cpu_exec(cpu
);
874 if (r
== EXCP_DEBUG
) {
875 cpu_handle_guest_debug(cpu
);
878 qemu_kvm_wait_io_event(cpu
);
884 static void *qemu_dummy_cpu_thread_fn(void *arg
)
887 fprintf(stderr
, "qtest is not supported under Windows\n");
894 qemu_mutex_lock_iothread();
895 qemu_thread_get_self(cpu
->thread
);
896 cpu
->thread_id
= qemu_get_thread_id();
898 sigemptyset(&waitset
);
899 sigaddset(&waitset
, SIG_IPI
);
901 /* signal CPU creation */
903 qemu_cond_signal(&qemu_cpu_cond
);
908 qemu_mutex_unlock_iothread();
911 r
= sigwait(&waitset
, &sig
);
912 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
917 qemu_mutex_lock_iothread();
919 qemu_wait_io_event_common(cpu
);
926 static void tcg_exec_all(void);
928 static void *qemu_tcg_cpu_thread_fn(void *arg
)
932 qemu_tcg_init_cpu_signals();
933 qemu_thread_get_self(cpu
->thread
);
935 qemu_mutex_lock(&qemu_global_mutex
);
937 cpu
->thread_id
= qemu_get_thread_id();
940 qemu_cond_signal(&qemu_cpu_cond
);
942 /* wait for initial kick-off after machine start */
943 while (QTAILQ_FIRST(&cpus
)->stopped
) {
944 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
946 /* process any pending work */
948 qemu_wait_io_event_common(cpu
);
956 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
959 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
962 qemu_tcg_wait_io_event();
968 static void qemu_cpu_kick_thread(CPUState
*cpu
)
973 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
975 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
979 if (!qemu_cpu_is_self(cpu
)) {
982 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
983 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
988 /* On multi-core systems, we are not sure that the thread is actually
989 * suspended until we can get the context.
991 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
992 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
998 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
999 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1007 void qemu_cpu_kick(CPUState
*cpu
)
1009 qemu_cond_broadcast(cpu
->halt_cond
);
1010 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1011 qemu_cpu_kick_thread(cpu
);
1012 cpu
->thread_kicked
= true;
1016 void qemu_cpu_kick_self(void)
1019 assert(current_cpu
);
1021 if (!current_cpu
->thread_kicked
) {
1022 qemu_cpu_kick_thread(current_cpu
);
1023 current_cpu
->thread_kicked
= true;
1030 bool qemu_cpu_is_self(CPUState
*cpu
)
1032 return qemu_thread_is_self(cpu
->thread
);
1035 static bool qemu_in_vcpu_thread(void)
1037 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1040 void qemu_mutex_lock_iothread(void)
1042 if (!tcg_enabled()) {
1043 qemu_mutex_lock(&qemu_global_mutex
);
1045 iothread_requesting_mutex
= true;
1046 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1047 qemu_cpu_kick_thread(first_cpu
);
1048 qemu_mutex_lock(&qemu_global_mutex
);
1050 iothread_requesting_mutex
= false;
1051 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1055 void qemu_mutex_unlock_iothread(void)
1057 qemu_mutex_unlock(&qemu_global_mutex
);
1060 static int all_vcpus_paused(void)
1065 if (!cpu
->stopped
) {
1073 void pause_all_vcpus(void)
1077 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1083 if (qemu_in_vcpu_thread()) {
1085 if (!kvm_enabled()) {
1088 cpu
->stopped
= true;
1094 while (!all_vcpus_paused()) {
1095 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1102 void cpu_resume(CPUState
*cpu
)
1105 cpu
->stopped
= false;
1109 void resume_all_vcpus(void)
1113 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1119 /* For temporary buffers for forming a name */
1120 #define VCPU_THREAD_NAME_SIZE 16
1122 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1124 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1126 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1128 /* share a single thread for all cpus with TCG */
1129 if (!tcg_cpu_thread
) {
1130 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1131 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1132 qemu_cond_init(cpu
->halt_cond
);
1133 tcg_halt_cond
= cpu
->halt_cond
;
1134 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1136 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1137 cpu
, QEMU_THREAD_JOINABLE
);
1139 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1141 while (!cpu
->created
) {
1142 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1144 tcg_cpu_thread
= cpu
->thread
;
1146 cpu
->thread
= tcg_cpu_thread
;
1147 cpu
->halt_cond
= tcg_halt_cond
;
1151 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1153 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1155 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1156 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1157 qemu_cond_init(cpu
->halt_cond
);
1158 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1160 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1161 cpu
, QEMU_THREAD_JOINABLE
);
1162 while (!cpu
->created
) {
1163 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1167 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1169 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1171 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1172 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1173 qemu_cond_init(cpu
->halt_cond
);
1174 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1176 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1177 QEMU_THREAD_JOINABLE
);
1178 while (!cpu
->created
) {
1179 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1183 void qemu_init_vcpu(CPUState
*cpu
)
1185 cpu
->nr_cores
= smp_cores
;
1186 cpu
->nr_threads
= smp_threads
;
1187 cpu
->stopped
= true;
1188 if (kvm_enabled()) {
1189 qemu_kvm_start_vcpu(cpu
);
1190 } else if (tcg_enabled()) {
1191 qemu_tcg_init_vcpu(cpu
);
1193 qemu_dummy_start_vcpu(cpu
);
1197 void cpu_stop_current(void)
1200 current_cpu
->stop
= false;
1201 current_cpu
->stopped
= true;
1202 cpu_exit(current_cpu
);
1203 qemu_cond_signal(&qemu_pause_cond
);
1207 int vm_stop(RunState state
)
1209 if (qemu_in_vcpu_thread()) {
1210 qemu_system_vmstop_request(state
);
1212 * FIXME: should not return to device code in case
1213 * vm_stop() has been requested.
1219 return do_vm_stop(state
);
1222 /* does a state transition even if the VM is already stopped,
1223 current state is forgotten forever */
1224 int vm_stop_force_state(RunState state
)
1226 if (runstate_is_running()) {
1227 return vm_stop(state
);
1229 runstate_set(state
);
1230 /* Make sure to return an error if the flush in a previous vm_stop()
1232 return bdrv_flush_all();
1236 static int tcg_cpu_exec(CPUArchState
*env
)
1238 CPUState
*cpu
= ENV_GET_CPU(env
);
1240 #ifdef CONFIG_PROFILER
1244 #ifdef CONFIG_PROFILER
1245 ti
= profile_getclock();
1251 qemu_icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
1252 cpu
->icount_decr
.u16
.low
= 0;
1253 cpu
->icount_extra
= 0;
1254 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1256 /* Maintain prior (possibly buggy) behaviour where if no deadline
1257 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1258 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1261 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1262 deadline
= INT32_MAX
;
1265 count
= qemu_icount_round(deadline
);
1266 qemu_icount
+= count
;
1267 decr
= (count
> 0xffff) ? 0xffff : count
;
1269 cpu
->icount_decr
.u16
.low
= decr
;
1270 cpu
->icount_extra
= count
;
1272 ret
= cpu_exec(env
);
1273 #ifdef CONFIG_PROFILER
1274 qemu_time
+= profile_getclock() - ti
;
1277 /* Fold pending instructions back into the
1278 instruction counter, and clear the interrupt flag. */
1279 qemu_icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
1280 cpu
->icount_decr
.u32
= 0;
1281 cpu
->icount_extra
= 0;
1286 static void tcg_exec_all(void)
1290 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1291 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1293 if (next_cpu
== NULL
) {
1294 next_cpu
= first_cpu
;
1296 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1297 CPUState
*cpu
= next_cpu
;
1298 CPUArchState
*env
= cpu
->env_ptr
;
1300 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1301 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1303 if (cpu_can_run(cpu
)) {
1304 r
= tcg_cpu_exec(env
);
1305 if (r
== EXCP_DEBUG
) {
1306 cpu_handle_guest_debug(cpu
);
1309 } else if (cpu
->stop
|| cpu
->stopped
) {
1316 void set_numa_modes(void)
1322 for (i
= 0; i
< nb_numa_nodes
; i
++) {
1323 if (test_bit(cpu
->cpu_index
, node_cpumask
[i
])) {
1330 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1332 /* XXX: implement xxx_cpu_list for targets that still miss it */
1333 #if defined(cpu_list)
1334 cpu_list(f
, cpu_fprintf
);
1338 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1340 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1345 #if defined(TARGET_I386)
1346 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1347 CPUX86State
*env
= &x86_cpu
->env
;
1348 #elif defined(TARGET_PPC)
1349 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1350 CPUPPCState
*env
= &ppc_cpu
->env
;
1351 #elif defined(TARGET_SPARC)
1352 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1353 CPUSPARCState
*env
= &sparc_cpu
->env
;
1354 #elif defined(TARGET_MIPS)
1355 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1356 CPUMIPSState
*env
= &mips_cpu
->env
;
1359 cpu_synchronize_state(cpu
);
1361 info
= g_malloc0(sizeof(*info
));
1362 info
->value
= g_malloc0(sizeof(*info
->value
));
1363 info
->value
->CPU
= cpu
->cpu_index
;
1364 info
->value
->current
= (cpu
== first_cpu
);
1365 info
->value
->halted
= cpu
->halted
;
1366 info
->value
->thread_id
= cpu
->thread_id
;
1367 #if defined(TARGET_I386)
1368 info
->value
->has_pc
= true;
1369 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1370 #elif defined(TARGET_PPC)
1371 info
->value
->has_nip
= true;
1372 info
->value
->nip
= env
->nip
;
1373 #elif defined(TARGET_SPARC)
1374 info
->value
->has_pc
= true;
1375 info
->value
->pc
= env
->pc
;
1376 info
->value
->has_npc
= true;
1377 info
->value
->npc
= env
->npc
;
1378 #elif defined(TARGET_MIPS)
1379 info
->value
->has_PC
= true;
1380 info
->value
->PC
= env
->active_tc
.PC
;
1383 /* XXX: waiting for the qapi to support GSList */
1385 head
= cur_item
= info
;
1387 cur_item
->next
= info
;
1395 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1396 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1407 cpu
= qemu_get_cpu(cpu_index
);
1409 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1414 f
= fopen(filename
, "wb");
1416 error_setg_file_open(errp
, errno
, filename
);
1424 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1425 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1428 if (fwrite(buf
, 1, l
, f
) != l
) {
1429 error_set(errp
, QERR_IO_ERROR
);
1440 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1447 f
= fopen(filename
, "wb");
1449 error_setg_file_open(errp
, errno
, filename
);
1457 cpu_physical_memory_rw(addr
, buf
, l
, 0);
1458 if (fwrite(buf
, 1, l
, f
) != l
) {
1459 error_set(errp
, QERR_IO_ERROR
);
1470 void qmp_inject_nmi(Error
**errp
)
1472 #if defined(TARGET_I386)
1476 X86CPU
*cpu
= X86_CPU(cs
);
1478 if (!cpu
->apic_state
) {
1479 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1481 apic_deliver_nmi(cpu
->apic_state
);
1484 #elif defined(TARGET_S390X)
1490 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1491 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1492 error_set(errp
, QERR_UNSUPPORTED
);
1499 error_set(errp
, QERR_UNSUPPORTED
);