4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
43 #include "qemu/compatfd.h"
48 #include <sys/prctl.h>
51 #define PR_MCE_KILL 33
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
62 #endif /* CONFIG_LINUX */
64 static CPUState
*next_cpu
;
66 bool cpu_is_stopped(CPUState
*cpu
)
68 return cpu
->stopped
|| !runstate_is_running();
71 static bool cpu_thread_is_idle(CPUState
*cpu
)
73 if (cpu
->stop
|| cpu
->queued_work_first
) {
76 if (cpu_is_stopped(cpu
)) {
79 if (!cpu
->halted
|| qemu_cpu_has_work(cpu
) ||
80 kvm_halt_in_kernel()) {
86 static bool all_cpu_threads_idle(void)
91 if (!cpu_thread_is_idle(cpu
)) {
98 /***********************************************************/
99 /* guest cycle counter */
101 /* Protected by TimersState seqlock */
103 /* Compensate for varying guest execution speed. */
104 static int64_t qemu_icount_bias
;
105 static int64_t vm_clock_warp_start
;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift
;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 /* Only written by TCG thread */
112 static int64_t qemu_icount
;
114 static QEMUTimer
*icount_rt_timer
;
115 static QEMUTimer
*icount_vm_timer
;
116 static QEMUTimer
*icount_warp_timer
;
118 typedef struct TimersState
{
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev
;
121 int64_t cpu_ticks_offset
;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
126 QemuSeqLock vm_clock_seqlock
;
127 int64_t cpu_clock_offset
;
128 int32_t cpu_ticks_enabled
;
132 static TimersState timers_state
;
134 /* Return the virtual CPU time, based on the instruction counter. */
135 static int64_t cpu_get_icount_locked(void)
138 CPUState
*cpu
= current_cpu
;
140 icount
= qemu_icount
;
142 CPUArchState
*env
= cpu
->env_ptr
;
143 if (!can_do_io(env
)) {
144 fprintf(stderr
, "Bad clock read\n");
146 icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
148 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
151 int64_t cpu_get_icount(void)
157 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
158 icount
= cpu_get_icount_locked();
159 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
164 /* return the host CPU cycle counter and handle stop/restart */
165 /* Caller must hold the BQL */
166 int64_t cpu_get_ticks(void)
169 return cpu_get_icount();
171 if (!timers_state
.cpu_ticks_enabled
) {
172 return timers_state
.cpu_ticks_offset
;
175 ticks
= cpu_get_real_ticks();
176 if (timers_state
.cpu_ticks_prev
> ticks
) {
177 /* Note: non increasing ticks may happen if the host uses
179 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
181 timers_state
.cpu_ticks_prev
= ticks
;
182 return ticks
+ timers_state
.cpu_ticks_offset
;
186 static int64_t cpu_get_clock_locked(void)
190 if (!timers_state
.cpu_ticks_enabled
) {
191 ti
= timers_state
.cpu_clock_offset
;
194 ti
+= timers_state
.cpu_clock_offset
;
200 /* return the host CPU monotonic timer and handle stop/restart */
201 int64_t cpu_get_clock(void)
207 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
208 ti
= cpu_get_clock_locked();
209 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
214 /* enable cpu_get_ticks()
215 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
217 void cpu_enable_ticks(void)
219 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
220 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
221 if (!timers_state
.cpu_ticks_enabled
) {
222 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
223 timers_state
.cpu_clock_offset
-= get_clock();
224 timers_state
.cpu_ticks_enabled
= 1;
226 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
229 /* disable cpu_get_ticks() : the clock is stopped. You must not call
230 * cpu_get_ticks() after that.
231 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
233 void cpu_disable_ticks(void)
235 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
236 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
237 if (timers_state
.cpu_ticks_enabled
) {
238 timers_state
.cpu_ticks_offset
= cpu_get_ticks();
239 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
240 timers_state
.cpu_ticks_enabled
= 0;
242 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
245 /* Correlation between real and virtual time is always going to be
246 fairly approximate, so ignore small variation.
247 When the guest is idle real and virtual time will be aligned in
249 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
251 static void icount_adjust(void)
257 /* Protected by TimersState mutex. */
258 static int64_t last_delta
;
260 /* If the VM is not running, then do nothing. */
261 if (!runstate_is_running()) {
265 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
266 cur_time
= cpu_get_clock_locked();
267 cur_icount
= cpu_get_icount_locked();
269 delta
= cur_icount
- cur_time
;
270 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
272 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
273 && icount_time_shift
> 0) {
274 /* The guest is getting too far ahead. Slow time down. */
278 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
279 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
280 /* The guest is getting too far behind. Speed time up. */
284 qemu_icount_bias
= cur_icount
- (qemu_icount
<< icount_time_shift
);
285 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
288 static void icount_adjust_rt(void *opaque
)
290 timer_mod(icount_rt_timer
,
291 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
295 static void icount_adjust_vm(void *opaque
)
297 timer_mod(icount_vm_timer
,
298 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
299 get_ticks_per_sec() / 10);
303 static int64_t qemu_icount_round(int64_t count
)
305 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
308 static void icount_warp_rt(void *opaque
)
310 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
311 * changes from -1 to another value, so the race here is okay.
313 if (atomic_read(&vm_clock_warp_start
) == -1) {
317 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
318 if (runstate_is_running()) {
319 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
322 warp_delta
= clock
- vm_clock_warp_start
;
323 if (use_icount
== 2) {
325 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
326 * far ahead of real time.
328 int64_t cur_time
= cpu_get_clock_locked();
329 int64_t cur_icount
= cpu_get_icount_locked();
330 int64_t delta
= cur_time
- cur_icount
;
331 warp_delta
= MIN(warp_delta
, delta
);
333 qemu_icount_bias
+= warp_delta
;
335 vm_clock_warp_start
= -1;
336 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
338 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
339 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
343 void qtest_clock_warp(int64_t dest
)
345 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
346 assert(qtest_enabled());
347 while (clock
< dest
) {
348 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
349 int64_t warp
= MIN(dest
- clock
, deadline
);
350 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
351 qemu_icount_bias
+= warp
;
352 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
354 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
355 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
360 void qemu_clock_warp(QEMUClockType type
)
366 * There are too many global variables to make the "warp" behavior
367 * applicable to other clocks. But a clock argument removes the
368 * need for if statements all over the place.
370 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
375 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
376 * This ensures that the deadline for the timer is computed correctly below.
377 * This also makes sure that the insn counter is synchronized before the
378 * CPU starts running, in case the CPU is woken by an event other than
379 * the earliest QEMU_CLOCK_VIRTUAL timer.
381 icount_warp_rt(NULL
);
382 timer_del(icount_warp_timer
);
383 if (!all_cpu_threads_idle()) {
387 if (qtest_enabled()) {
388 /* When testing, qtest commands advance icount. */
392 /* We want to use the earliest deadline from ALL vm_clocks */
393 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
394 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
401 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
402 * sleep. Otherwise, the CPU might be waiting for a future timer
403 * interrupt to wake it up, but the interrupt never comes because
404 * the vCPU isn't running any insns and thus doesn't advance the
405 * QEMU_CLOCK_VIRTUAL.
407 * An extreme solution for this problem would be to never let VCPUs
408 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
409 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
410 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
411 * after some e"real" time, (related to the time left until the next
412 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
413 * This avoids that the warps are visible externally; for example,
414 * you will not be sending network packets continuously instead of
417 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
418 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
419 vm_clock_warp_start
= clock
;
421 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
422 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
423 } else if (deadline
== 0) {
424 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
428 static const VMStateDescription vmstate_timers
= {
431 .minimum_version_id
= 1,
432 .minimum_version_id_old
= 1,
433 .fields
= (VMStateField
[]) {
434 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
435 VMSTATE_INT64(dummy
, TimersState
),
436 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
437 VMSTATE_END_OF_LIST()
441 void configure_icount(const char *option
)
443 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
444 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
449 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
450 icount_warp_rt
, NULL
);
451 if (strcmp(option
, "auto") != 0) {
452 icount_time_shift
= strtol(option
, NULL
, 0);
459 /* 125MIPS seems a reasonable initial guess at the guest speed.
460 It will be corrected fairly quickly anyway. */
461 icount_time_shift
= 3;
463 /* Have both realtime and virtual time triggers for speed adjustment.
464 The realtime trigger catches emulated time passing too slowly,
465 the virtual time trigger catches emulated time passing too fast.
466 Realtime triggers occur even when idle, so use them less frequently
468 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
469 icount_adjust_rt
, NULL
);
470 timer_mod(icount_rt_timer
,
471 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
472 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
473 icount_adjust_vm
, NULL
);
474 timer_mod(icount_vm_timer
,
475 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
476 get_ticks_per_sec() / 10);
479 /***********************************************************/
480 void hw_error(const char *fmt
, ...)
486 fprintf(stderr
, "qemu: hardware error: ");
487 vfprintf(stderr
, fmt
, ap
);
488 fprintf(stderr
, "\n");
490 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
491 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
497 void cpu_synchronize_all_states(void)
502 cpu_synchronize_state(cpu
);
506 void cpu_synchronize_all_post_reset(void)
511 cpu_synchronize_post_reset(cpu
);
515 void cpu_synchronize_all_post_init(void)
520 cpu_synchronize_post_init(cpu
);
524 static int do_vm_stop(RunState state
)
528 if (runstate_is_running()) {
532 vm_state_notify(0, state
);
533 monitor_protocol_event(QEVENT_STOP
, NULL
);
537 ret
= bdrv_flush_all();
542 static bool cpu_can_run(CPUState
*cpu
)
547 if (cpu_is_stopped(cpu
)) {
553 static void cpu_handle_guest_debug(CPUState
*cpu
)
555 gdb_set_stop_cpu(cpu
);
556 qemu_system_debug_request();
560 static void cpu_signal(int sig
)
563 cpu_exit(current_cpu
);
569 static void sigbus_reraise(void)
572 struct sigaction action
;
574 memset(&action
, 0, sizeof(action
));
575 action
.sa_handler
= SIG_DFL
;
576 if (!sigaction(SIGBUS
, &action
, NULL
)) {
579 sigaddset(&set
, SIGBUS
);
580 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
582 perror("Failed to re-raise SIGBUS!\n");
586 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
589 if (kvm_on_sigbus(siginfo
->ssi_code
,
590 (void *)(intptr_t)siginfo
->ssi_addr
)) {
595 static void qemu_init_sigbus(void)
597 struct sigaction action
;
599 memset(&action
, 0, sizeof(action
));
600 action
.sa_flags
= SA_SIGINFO
;
601 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
602 sigaction(SIGBUS
, &action
, NULL
);
604 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
607 static void qemu_kvm_eat_signals(CPUState
*cpu
)
609 struct timespec ts
= { 0, 0 };
615 sigemptyset(&waitset
);
616 sigaddset(&waitset
, SIG_IPI
);
617 sigaddset(&waitset
, SIGBUS
);
620 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
621 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
622 perror("sigtimedwait");
628 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
636 r
= sigpending(&chkset
);
638 perror("sigpending");
641 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
644 #else /* !CONFIG_LINUX */
646 static void qemu_init_sigbus(void)
650 static void qemu_kvm_eat_signals(CPUState
*cpu
)
653 #endif /* !CONFIG_LINUX */
656 static void dummy_signal(int sig
)
660 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
664 struct sigaction sigact
;
666 memset(&sigact
, 0, sizeof(sigact
));
667 sigact
.sa_handler
= dummy_signal
;
668 sigaction(SIG_IPI
, &sigact
, NULL
);
670 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
671 sigdelset(&set
, SIG_IPI
);
672 sigdelset(&set
, SIGBUS
);
673 r
= kvm_set_signal_mask(cpu
, &set
);
675 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
680 static void qemu_tcg_init_cpu_signals(void)
683 struct sigaction sigact
;
685 memset(&sigact
, 0, sizeof(sigact
));
686 sigact
.sa_handler
= cpu_signal
;
687 sigaction(SIG_IPI
, &sigact
, NULL
);
690 sigaddset(&set
, SIG_IPI
);
691 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
695 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
700 static void qemu_tcg_init_cpu_signals(void)
705 static QemuMutex qemu_global_mutex
;
706 static QemuCond qemu_io_proceeded_cond
;
707 static bool iothread_requesting_mutex
;
709 static QemuThread io_thread
;
711 static QemuThread
*tcg_cpu_thread
;
712 static QemuCond
*tcg_halt_cond
;
715 static QemuCond qemu_cpu_cond
;
717 static QemuCond qemu_pause_cond
;
718 static QemuCond qemu_work_cond
;
720 void qemu_init_cpu_loop(void)
723 qemu_cond_init(&qemu_cpu_cond
);
724 qemu_cond_init(&qemu_pause_cond
);
725 qemu_cond_init(&qemu_work_cond
);
726 qemu_cond_init(&qemu_io_proceeded_cond
);
727 qemu_mutex_init(&qemu_global_mutex
);
729 qemu_thread_get_self(&io_thread
);
732 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
734 struct qemu_work_item wi
;
736 if (qemu_cpu_is_self(cpu
)) {
744 if (cpu
->queued_work_first
== NULL
) {
745 cpu
->queued_work_first
= &wi
;
747 cpu
->queued_work_last
->next
= &wi
;
749 cpu
->queued_work_last
= &wi
;
755 CPUState
*self_cpu
= current_cpu
;
757 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
758 current_cpu
= self_cpu
;
762 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
764 struct qemu_work_item
*wi
;
766 if (qemu_cpu_is_self(cpu
)) {
771 wi
= g_malloc0(sizeof(struct qemu_work_item
));
775 if (cpu
->queued_work_first
== NULL
) {
776 cpu
->queued_work_first
= wi
;
778 cpu
->queued_work_last
->next
= wi
;
780 cpu
->queued_work_last
= wi
;
787 static void flush_queued_work(CPUState
*cpu
)
789 struct qemu_work_item
*wi
;
791 if (cpu
->queued_work_first
== NULL
) {
795 while ((wi
= cpu
->queued_work_first
)) {
796 cpu
->queued_work_first
= wi
->next
;
803 cpu
->queued_work_last
= NULL
;
804 qemu_cond_broadcast(&qemu_work_cond
);
807 static void qemu_wait_io_event_common(CPUState
*cpu
)
812 qemu_cond_signal(&qemu_pause_cond
);
814 flush_queued_work(cpu
);
815 cpu
->thread_kicked
= false;
818 static void qemu_tcg_wait_io_event(void)
822 while (all_cpu_threads_idle()) {
823 /* Start accounting real time to the virtual clock if the CPUs
825 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
826 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
829 while (iothread_requesting_mutex
) {
830 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
834 qemu_wait_io_event_common(cpu
);
838 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
840 while (cpu_thread_is_idle(cpu
)) {
841 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
844 qemu_kvm_eat_signals(cpu
);
845 qemu_wait_io_event_common(cpu
);
848 static void *qemu_kvm_cpu_thread_fn(void *arg
)
853 qemu_mutex_lock(&qemu_global_mutex
);
854 qemu_thread_get_self(cpu
->thread
);
855 cpu
->thread_id
= qemu_get_thread_id();
858 r
= kvm_init_vcpu(cpu
);
860 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
864 qemu_kvm_init_cpu_signals(cpu
);
866 /* signal CPU creation */
868 qemu_cond_signal(&qemu_cpu_cond
);
871 if (cpu_can_run(cpu
)) {
872 r
= kvm_cpu_exec(cpu
);
873 if (r
== EXCP_DEBUG
) {
874 cpu_handle_guest_debug(cpu
);
877 qemu_kvm_wait_io_event(cpu
);
883 static void *qemu_dummy_cpu_thread_fn(void *arg
)
886 fprintf(stderr
, "qtest is not supported under Windows\n");
893 qemu_mutex_lock_iothread();
894 qemu_thread_get_self(cpu
->thread
);
895 cpu
->thread_id
= qemu_get_thread_id();
897 sigemptyset(&waitset
);
898 sigaddset(&waitset
, SIG_IPI
);
900 /* signal CPU creation */
902 qemu_cond_signal(&qemu_cpu_cond
);
907 qemu_mutex_unlock_iothread();
910 r
= sigwait(&waitset
, &sig
);
911 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
916 qemu_mutex_lock_iothread();
918 qemu_wait_io_event_common(cpu
);
925 static void tcg_exec_all(void);
927 static void *qemu_tcg_cpu_thread_fn(void *arg
)
931 qemu_tcg_init_cpu_signals();
932 qemu_thread_get_self(cpu
->thread
);
934 qemu_mutex_lock(&qemu_global_mutex
);
936 cpu
->thread_id
= qemu_get_thread_id();
939 qemu_cond_signal(&qemu_cpu_cond
);
941 /* wait for initial kick-off after machine start */
942 while (QTAILQ_FIRST(&cpus
)->stopped
) {
943 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
945 /* process any pending work */
947 qemu_wait_io_event_common(cpu
);
955 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
958 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
961 qemu_tcg_wait_io_event();
967 static void qemu_cpu_kick_thread(CPUState
*cpu
)
972 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
974 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
978 if (!qemu_cpu_is_self(cpu
)) {
981 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
982 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
987 /* On multi-core systems, we are not sure that the thread is actually
988 * suspended until we can get the context.
990 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
991 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
997 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
998 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1006 void qemu_cpu_kick(CPUState
*cpu
)
1008 qemu_cond_broadcast(cpu
->halt_cond
);
1009 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1010 qemu_cpu_kick_thread(cpu
);
1011 cpu
->thread_kicked
= true;
1015 void qemu_cpu_kick_self(void)
1018 assert(current_cpu
);
1020 if (!current_cpu
->thread_kicked
) {
1021 qemu_cpu_kick_thread(current_cpu
);
1022 current_cpu
->thread_kicked
= true;
1029 bool qemu_cpu_is_self(CPUState
*cpu
)
1031 return qemu_thread_is_self(cpu
->thread
);
1034 static bool qemu_in_vcpu_thread(void)
1036 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1039 void qemu_mutex_lock_iothread(void)
1041 if (!tcg_enabled()) {
1042 qemu_mutex_lock(&qemu_global_mutex
);
1044 iothread_requesting_mutex
= true;
1045 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1046 qemu_cpu_kick_thread(first_cpu
);
1047 qemu_mutex_lock(&qemu_global_mutex
);
1049 iothread_requesting_mutex
= false;
1050 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1054 void qemu_mutex_unlock_iothread(void)
1056 qemu_mutex_unlock(&qemu_global_mutex
);
1059 static int all_vcpus_paused(void)
1064 if (!cpu
->stopped
) {
1072 void pause_all_vcpus(void)
1076 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1082 if (qemu_in_vcpu_thread()) {
1084 if (!kvm_enabled()) {
1087 cpu
->stopped
= true;
1093 while (!all_vcpus_paused()) {
1094 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1101 void cpu_resume(CPUState
*cpu
)
1104 cpu
->stopped
= false;
1108 void resume_all_vcpus(void)
1112 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1118 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1120 /* share a single thread for all cpus with TCG */
1121 if (!tcg_cpu_thread
) {
1122 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1123 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1124 qemu_cond_init(cpu
->halt_cond
);
1125 tcg_halt_cond
= cpu
->halt_cond
;
1126 qemu_thread_create(cpu
->thread
, qemu_tcg_cpu_thread_fn
, cpu
,
1127 QEMU_THREAD_JOINABLE
);
1129 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1131 while (!cpu
->created
) {
1132 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1134 tcg_cpu_thread
= cpu
->thread
;
1136 cpu
->thread
= tcg_cpu_thread
;
1137 cpu
->halt_cond
= tcg_halt_cond
;
1141 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1143 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1144 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1145 qemu_cond_init(cpu
->halt_cond
);
1146 qemu_thread_create(cpu
->thread
, qemu_kvm_cpu_thread_fn
, cpu
,
1147 QEMU_THREAD_JOINABLE
);
1148 while (!cpu
->created
) {
1149 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1153 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1155 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1156 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1157 qemu_cond_init(cpu
->halt_cond
);
1158 qemu_thread_create(cpu
->thread
, qemu_dummy_cpu_thread_fn
, cpu
,
1159 QEMU_THREAD_JOINABLE
);
1160 while (!cpu
->created
) {
1161 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1165 void qemu_init_vcpu(CPUState
*cpu
)
1167 cpu
->nr_cores
= smp_cores
;
1168 cpu
->nr_threads
= smp_threads
;
1169 cpu
->stopped
= true;
1170 if (kvm_enabled()) {
1171 qemu_kvm_start_vcpu(cpu
);
1172 } else if (tcg_enabled()) {
1173 qemu_tcg_init_vcpu(cpu
);
1175 qemu_dummy_start_vcpu(cpu
);
1179 void cpu_stop_current(void)
1182 current_cpu
->stop
= false;
1183 current_cpu
->stopped
= true;
1184 cpu_exit(current_cpu
);
1185 qemu_cond_signal(&qemu_pause_cond
);
1189 int vm_stop(RunState state
)
1191 if (qemu_in_vcpu_thread()) {
1192 qemu_system_vmstop_request(state
);
1194 * FIXME: should not return to device code in case
1195 * vm_stop() has been requested.
1201 return do_vm_stop(state
);
1204 /* does a state transition even if the VM is already stopped,
1205 current state is forgotten forever */
1206 int vm_stop_force_state(RunState state
)
1208 if (runstate_is_running()) {
1209 return vm_stop(state
);
1211 runstate_set(state
);
1212 /* Make sure to return an error if the flush in a previous vm_stop()
1214 return bdrv_flush_all();
1218 static int tcg_cpu_exec(CPUArchState
*env
)
1221 #ifdef CONFIG_PROFILER
1225 #ifdef CONFIG_PROFILER
1226 ti
= profile_getclock();
1232 qemu_icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
1233 env
->icount_decr
.u16
.low
= 0;
1234 env
->icount_extra
= 0;
1235 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1237 /* Maintain prior (possibly buggy) behaviour where if no deadline
1238 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1239 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1242 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1243 deadline
= INT32_MAX
;
1246 count
= qemu_icount_round(deadline
);
1247 qemu_icount
+= count
;
1248 decr
= (count
> 0xffff) ? 0xffff : count
;
1250 env
->icount_decr
.u16
.low
= decr
;
1251 env
->icount_extra
= count
;
1253 ret
= cpu_exec(env
);
1254 #ifdef CONFIG_PROFILER
1255 qemu_time
+= profile_getclock() - ti
;
1258 /* Fold pending instructions back into the
1259 instruction counter, and clear the interrupt flag. */
1260 qemu_icount
-= (env
->icount_decr
.u16
.low
1261 + env
->icount_extra
);
1262 env
->icount_decr
.u32
= 0;
1263 env
->icount_extra
= 0;
1268 static void tcg_exec_all(void)
1272 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1273 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1275 if (next_cpu
== NULL
) {
1276 next_cpu
= first_cpu
;
1278 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1279 CPUState
*cpu
= next_cpu
;
1280 CPUArchState
*env
= cpu
->env_ptr
;
1282 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1283 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1285 if (cpu_can_run(cpu
)) {
1286 r
= tcg_cpu_exec(env
);
1287 if (r
== EXCP_DEBUG
) {
1288 cpu_handle_guest_debug(cpu
);
1291 } else if (cpu
->stop
|| cpu
->stopped
) {
1298 void set_numa_modes(void)
1304 for (i
= 0; i
< nb_numa_nodes
; i
++) {
1305 if (test_bit(cpu
->cpu_index
, node_cpumask
[i
])) {
1312 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1314 /* XXX: implement xxx_cpu_list for targets that still miss it */
1315 #if defined(cpu_list)
1316 cpu_list(f
, cpu_fprintf
);
1320 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1322 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1327 #if defined(TARGET_I386)
1328 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1329 CPUX86State
*env
= &x86_cpu
->env
;
1330 #elif defined(TARGET_PPC)
1331 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1332 CPUPPCState
*env
= &ppc_cpu
->env
;
1333 #elif defined(TARGET_SPARC)
1334 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1335 CPUSPARCState
*env
= &sparc_cpu
->env
;
1336 #elif defined(TARGET_MIPS)
1337 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1338 CPUMIPSState
*env
= &mips_cpu
->env
;
1341 cpu_synchronize_state(cpu
);
1343 info
= g_malloc0(sizeof(*info
));
1344 info
->value
= g_malloc0(sizeof(*info
->value
));
1345 info
->value
->CPU
= cpu
->cpu_index
;
1346 info
->value
->current
= (cpu
== first_cpu
);
1347 info
->value
->halted
= cpu
->halted
;
1348 info
->value
->thread_id
= cpu
->thread_id
;
1349 #if defined(TARGET_I386)
1350 info
->value
->has_pc
= true;
1351 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1352 #elif defined(TARGET_PPC)
1353 info
->value
->has_nip
= true;
1354 info
->value
->nip
= env
->nip
;
1355 #elif defined(TARGET_SPARC)
1356 info
->value
->has_pc
= true;
1357 info
->value
->pc
= env
->pc
;
1358 info
->value
->has_npc
= true;
1359 info
->value
->npc
= env
->npc
;
1360 #elif defined(TARGET_MIPS)
1361 info
->value
->has_PC
= true;
1362 info
->value
->PC
= env
->active_tc
.PC
;
1365 /* XXX: waiting for the qapi to support GSList */
1367 head
= cur_item
= info
;
1369 cur_item
->next
= info
;
1377 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1378 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1389 cpu
= qemu_get_cpu(cpu_index
);
1391 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1396 f
= fopen(filename
, "wb");
1398 error_setg_file_open(errp
, errno
, filename
);
1406 cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0);
1407 if (fwrite(buf
, 1, l
, f
) != l
) {
1408 error_set(errp
, QERR_IO_ERROR
);
1419 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1426 f
= fopen(filename
, "wb");
1428 error_setg_file_open(errp
, errno
, filename
);
1436 cpu_physical_memory_rw(addr
, buf
, l
, 0);
1437 if (fwrite(buf
, 1, l
, f
) != l
) {
1438 error_set(errp
, QERR_IO_ERROR
);
1449 void qmp_inject_nmi(Error
**errp
)
1451 #if defined(TARGET_I386)
1455 X86CPU
*cpu
= X86_CPU(cs
);
1456 CPUX86State
*env
= &cpu
->env
;
1458 if (!env
->apic_state
) {
1459 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1461 apic_deliver_nmi(env
->apic_state
);
1464 #elif defined(TARGET_S390X)
1470 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1471 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1472 error_set(errp
, QERR_UNSUPPORTED
);
1479 error_set(errp
, QERR_UNSUPPORTED
);