4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
43 #include "qemu/compatfd.h"
48 #include <sys/prctl.h>
51 #define PR_MCE_KILL 33
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
62 #endif /* CONFIG_LINUX */
64 static CPUState
*next_cpu
;
66 bool cpu_is_stopped(CPUState
*cpu
)
68 return cpu
->stopped
|| !runstate_is_running();
71 static bool cpu_thread_is_idle(CPUState
*cpu
)
73 if (cpu
->stop
|| cpu
->queued_work_first
) {
76 if (cpu_is_stopped(cpu
)) {
79 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
80 kvm_halt_in_kernel()) {
86 static bool all_cpu_threads_idle(void)
91 if (!cpu_thread_is_idle(cpu
)) {
98 /***********************************************************/
99 /* guest cycle counter */
101 /* Protected by TimersState seqlock */
103 /* Compensate for varying guest execution speed. */
104 static int64_t qemu_icount_bias
;
105 static int64_t vm_clock_warp_start
;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift
;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 /* Only written by TCG thread */
112 static int64_t qemu_icount
;
114 static QEMUTimer
*icount_rt_timer
;
115 static QEMUTimer
*icount_vm_timer
;
116 static QEMUTimer
*icount_warp_timer
;
118 typedef struct TimersState
{
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev
;
121 int64_t cpu_ticks_offset
;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
126 QemuSeqLock vm_clock_seqlock
;
127 int64_t cpu_clock_offset
;
128 int32_t cpu_ticks_enabled
;
132 static TimersState timers_state
;
134 /* Return the virtual CPU time, based on the instruction counter. */
135 static int64_t cpu_get_icount_locked(void)
138 CPUState
*cpu
= current_cpu
;
140 icount
= qemu_icount
;
142 if (!cpu_can_do_io(cpu
)) {
143 fprintf(stderr
, "Bad clock read\n");
145 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
147 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
150 int64_t cpu_get_icount(void)
156 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
157 icount
= cpu_get_icount_locked();
158 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
163 /* return the host CPU cycle counter and handle stop/restart */
164 /* Caller must hold the BQL */
165 int64_t cpu_get_ticks(void)
170 return cpu_get_icount();
173 ticks
= timers_state
.cpu_ticks_offset
;
174 if (timers_state
.cpu_ticks_enabled
) {
175 ticks
+= cpu_get_real_ticks();
178 if (timers_state
.cpu_ticks_prev
> ticks
) {
179 /* Note: non increasing ticks may happen if the host uses
181 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
182 ticks
= timers_state
.cpu_ticks_prev
;
185 timers_state
.cpu_ticks_prev
= ticks
;
189 static int64_t cpu_get_clock_locked(void)
193 ticks
= timers_state
.cpu_clock_offset
;
194 if (timers_state
.cpu_ticks_enabled
) {
195 ticks
+= get_clock();
201 /* return the host CPU monotonic timer and handle stop/restart */
202 int64_t cpu_get_clock(void)
208 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
209 ti
= cpu_get_clock_locked();
210 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
215 /* enable cpu_get_ticks()
216 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
218 void cpu_enable_ticks(void)
220 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
221 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
222 if (!timers_state
.cpu_ticks_enabled
) {
223 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
224 timers_state
.cpu_clock_offset
-= get_clock();
225 timers_state
.cpu_ticks_enabled
= 1;
227 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
230 /* disable cpu_get_ticks() : the clock is stopped. You must not call
231 * cpu_get_ticks() after that.
232 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
234 void cpu_disable_ticks(void)
236 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
237 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
238 if (timers_state
.cpu_ticks_enabled
) {
239 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
240 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
241 timers_state
.cpu_ticks_enabled
= 0;
243 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
246 /* Correlation between real and virtual time is always going to be
247 fairly approximate, so ignore small variation.
248 When the guest is idle real and virtual time will be aligned in
250 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
252 static void icount_adjust(void)
258 /* Protected by TimersState mutex. */
259 static int64_t last_delta
;
261 /* If the VM is not running, then do nothing. */
262 if (!runstate_is_running()) {
266 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
267 cur_time
= cpu_get_clock_locked();
268 cur_icount
= cpu_get_icount_locked();
270 delta
= cur_icount
- cur_time
;
271 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
273 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
274 && icount_time_shift
> 0) {
275 /* The guest is getting too far ahead. Slow time down. */
279 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
280 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
281 /* The guest is getting too far behind. Speed time up. */
285 qemu_icount_bias
= cur_icount
- (qemu_icount
<< icount_time_shift
);
286 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
289 static void icount_adjust_rt(void *opaque
)
291 timer_mod(icount_rt_timer
,
292 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
296 static void icount_adjust_vm(void *opaque
)
298 timer_mod(icount_vm_timer
,
299 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
300 get_ticks_per_sec() / 10);
304 static int64_t qemu_icount_round(int64_t count
)
306 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
309 static void icount_warp_rt(void *opaque
)
311 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
312 * changes from -1 to another value, so the race here is okay.
314 if (atomic_read(&vm_clock_warp_start
) == -1) {
318 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
319 if (runstate_is_running()) {
320 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
323 warp_delta
= clock
- vm_clock_warp_start
;
324 if (use_icount
== 2) {
326 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
327 * far ahead of real time.
329 int64_t cur_time
= cpu_get_clock_locked();
330 int64_t cur_icount
= cpu_get_icount_locked();
331 int64_t delta
= cur_time
- cur_icount
;
332 warp_delta
= MIN(warp_delta
, delta
);
334 qemu_icount_bias
+= warp_delta
;
336 vm_clock_warp_start
= -1;
337 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
339 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
340 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
344 void qtest_clock_warp(int64_t dest
)
346 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
347 assert(qtest_enabled());
348 while (clock
< dest
) {
349 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
350 int64_t warp
= MIN(dest
- clock
, deadline
);
351 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
352 qemu_icount_bias
+= warp
;
353 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
355 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
356 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
361 void qemu_clock_warp(QEMUClockType type
)
367 * There are too many global variables to make the "warp" behavior
368 * applicable to other clocks. But a clock argument removes the
369 * need for if statements all over the place.
371 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
376 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
377 * This ensures that the deadline for the timer is computed correctly below.
378 * This also makes sure that the insn counter is synchronized before the
379 * CPU starts running, in case the CPU is woken by an event other than
380 * the earliest QEMU_CLOCK_VIRTUAL timer.
382 icount_warp_rt(NULL
);
383 timer_del(icount_warp_timer
);
384 if (!all_cpu_threads_idle()) {
388 if (qtest_enabled()) {
389 /* When testing, qtest commands advance icount. */
393 /* We want to use the earliest deadline from ALL vm_clocks */
394 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
395 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
402 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
403 * sleep. Otherwise, the CPU might be waiting for a future timer
404 * interrupt to wake it up, but the interrupt never comes because
405 * the vCPU isn't running any insns and thus doesn't advance the
406 * QEMU_CLOCK_VIRTUAL.
408 * An extreme solution for this problem would be to never let VCPUs
409 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
410 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
411 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
412 * after some e"real" time, (related to the time left until the next
413 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
414 * This avoids that the warps are visible externally; for example,
415 * you will not be sending network packets continuously instead of
418 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
419 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
420 vm_clock_warp_start
= clock
;
422 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
423 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
424 } else if (deadline
== 0) {
425 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
429 static const VMStateDescription vmstate_timers
= {
432 .minimum_version_id
= 1,
433 .fields
= (VMStateField
[]) {
434 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
435 VMSTATE_INT64(dummy
, TimersState
),
436 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
437 VMSTATE_END_OF_LIST()
441 void configure_icount(const char *option
)
443 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
444 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
449 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
450 icount_warp_rt
, NULL
);
451 if (strcmp(option
, "auto") != 0) {
452 icount_time_shift
= strtol(option
, NULL
, 0);
459 /* 125MIPS seems a reasonable initial guess at the guest speed.
460 It will be corrected fairly quickly anyway. */
461 icount_time_shift
= 3;
463 /* Have both realtime and virtual time triggers for speed adjustment.
464 The realtime trigger catches emulated time passing too slowly,
465 the virtual time trigger catches emulated time passing too fast.
466 Realtime triggers occur even when idle, so use them less frequently
468 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
469 icount_adjust_rt
, NULL
);
470 timer_mod(icount_rt_timer
,
471 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
472 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
473 icount_adjust_vm
, NULL
);
474 timer_mod(icount_vm_timer
,
475 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
476 get_ticks_per_sec() / 10);
479 /***********************************************************/
480 void hw_error(const char *fmt
, ...)
486 fprintf(stderr
, "qemu: hardware error: ");
487 vfprintf(stderr
, fmt
, ap
);
488 fprintf(stderr
, "\n");
490 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
491 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
497 void cpu_synchronize_all_states(void)
502 cpu_synchronize_state(cpu
);
506 void cpu_synchronize_all_post_reset(void)
511 cpu_synchronize_post_reset(cpu
);
515 void cpu_synchronize_all_post_init(void)
520 cpu_synchronize_post_init(cpu
);
524 static int do_vm_stop(RunState state
)
528 if (runstate_is_running()) {
532 vm_state_notify(0, state
);
533 monitor_protocol_event(QEVENT_STOP
, NULL
);
537 ret
= bdrv_flush_all();
542 static bool cpu_can_run(CPUState
*cpu
)
547 if (cpu_is_stopped(cpu
)) {
553 static void cpu_handle_guest_debug(CPUState
*cpu
)
555 gdb_set_stop_cpu(cpu
);
556 qemu_system_debug_request();
560 static void cpu_signal(int sig
)
563 cpu_exit(current_cpu
);
569 static void sigbus_reraise(void)
572 struct sigaction action
;
574 memset(&action
, 0, sizeof(action
));
575 action
.sa_handler
= SIG_DFL
;
576 if (!sigaction(SIGBUS
, &action
, NULL
)) {
579 sigaddset(&set
, SIGBUS
);
580 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
582 perror("Failed to re-raise SIGBUS!\n");
586 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
589 if (kvm_on_sigbus(siginfo
->ssi_code
,
590 (void *)(intptr_t)siginfo
->ssi_addr
)) {
595 static void qemu_init_sigbus(void)
597 struct sigaction action
;
599 memset(&action
, 0, sizeof(action
));
600 action
.sa_flags
= SA_SIGINFO
;
601 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
602 sigaction(SIGBUS
, &action
, NULL
);
604 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
607 static void qemu_kvm_eat_signals(CPUState
*cpu
)
609 struct timespec ts
= { 0, 0 };
615 sigemptyset(&waitset
);
616 sigaddset(&waitset
, SIG_IPI
);
617 sigaddset(&waitset
, SIGBUS
);
620 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
621 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
622 perror("sigtimedwait");
628 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
636 r
= sigpending(&chkset
);
638 perror("sigpending");
641 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
644 #else /* !CONFIG_LINUX */
646 static void qemu_init_sigbus(void)
650 static void qemu_kvm_eat_signals(CPUState
*cpu
)
653 #endif /* !CONFIG_LINUX */
656 static void dummy_signal(int sig
)
660 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
664 struct sigaction sigact
;
666 memset(&sigact
, 0, sizeof(sigact
));
667 sigact
.sa_handler
= dummy_signal
;
668 sigaction(SIG_IPI
, &sigact
, NULL
);
670 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
671 sigdelset(&set
, SIG_IPI
);
672 sigdelset(&set
, SIGBUS
);
673 r
= kvm_set_signal_mask(cpu
, &set
);
675 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
680 static void qemu_tcg_init_cpu_signals(void)
683 struct sigaction sigact
;
685 memset(&sigact
, 0, sizeof(sigact
));
686 sigact
.sa_handler
= cpu_signal
;
687 sigaction(SIG_IPI
, &sigact
, NULL
);
690 sigaddset(&set
, SIG_IPI
);
691 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
695 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
700 static void qemu_tcg_init_cpu_signals(void)
705 static QemuMutex qemu_global_mutex
;
706 static QemuCond qemu_io_proceeded_cond
;
707 static bool iothread_requesting_mutex
;
709 static QemuThread io_thread
;
711 static QemuThread
*tcg_cpu_thread
;
712 static QemuCond
*tcg_halt_cond
;
715 static QemuCond qemu_cpu_cond
;
717 static QemuCond qemu_pause_cond
;
718 static QemuCond qemu_work_cond
;
720 void qemu_init_cpu_loop(void)
723 qemu_cond_init(&qemu_cpu_cond
);
724 qemu_cond_init(&qemu_pause_cond
);
725 qemu_cond_init(&qemu_work_cond
);
726 qemu_cond_init(&qemu_io_proceeded_cond
);
727 qemu_mutex_init(&qemu_global_mutex
);
729 qemu_thread_get_self(&io_thread
);
732 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
734 struct qemu_work_item wi
;
736 if (qemu_cpu_is_self(cpu
)) {
744 if (cpu
->queued_work_first
== NULL
) {
745 cpu
->queued_work_first
= &wi
;
747 cpu
->queued_work_last
->next
= &wi
;
749 cpu
->queued_work_last
= &wi
;
755 CPUState
*self_cpu
= current_cpu
;
757 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
758 current_cpu
= self_cpu
;
762 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
764 struct qemu_work_item
*wi
;
766 if (qemu_cpu_is_self(cpu
)) {
771 wi
= g_malloc0(sizeof(struct qemu_work_item
));
775 if (cpu
->queued_work_first
== NULL
) {
776 cpu
->queued_work_first
= wi
;
778 cpu
->queued_work_last
->next
= wi
;
780 cpu
->queued_work_last
= wi
;
787 static void flush_queued_work(CPUState
*cpu
)
789 struct qemu_work_item
*wi
;
791 if (cpu
->queued_work_first
== NULL
) {
795 while ((wi
= cpu
->queued_work_first
)) {
796 cpu
->queued_work_first
= wi
->next
;
803 cpu
->queued_work_last
= NULL
;
804 qemu_cond_broadcast(&qemu_work_cond
);
807 static void qemu_wait_io_event_common(CPUState
*cpu
)
812 qemu_cond_signal(&qemu_pause_cond
);
814 flush_queued_work(cpu
);
815 cpu
->thread_kicked
= false;
818 static void qemu_tcg_wait_io_event(void)
822 while (all_cpu_threads_idle()) {
823 /* Start accounting real time to the virtual clock if the CPUs
825 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
826 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
829 while (iothread_requesting_mutex
) {
830 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
834 qemu_wait_io_event_common(cpu
);
838 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
840 while (cpu_thread_is_idle(cpu
)) {
841 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
844 qemu_kvm_eat_signals(cpu
);
845 qemu_wait_io_event_common(cpu
);
848 static void *qemu_kvm_cpu_thread_fn(void *arg
)
853 qemu_mutex_lock(&qemu_global_mutex
);
854 qemu_thread_get_self(cpu
->thread
);
855 cpu
->thread_id
= qemu_get_thread_id();
858 r
= kvm_init_vcpu(cpu
);
860 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
864 qemu_kvm_init_cpu_signals(cpu
);
866 /* signal CPU creation */
868 qemu_cond_signal(&qemu_cpu_cond
);
871 if (cpu_can_run(cpu
)) {
872 r
= kvm_cpu_exec(cpu
);
873 if (r
== EXCP_DEBUG
) {
874 cpu_handle_guest_debug(cpu
);
877 qemu_kvm_wait_io_event(cpu
);
883 static void *qemu_dummy_cpu_thread_fn(void *arg
)
886 fprintf(stderr
, "qtest is not supported under Windows\n");
893 qemu_mutex_lock_iothread();
894 qemu_thread_get_self(cpu
->thread
);
895 cpu
->thread_id
= qemu_get_thread_id();
897 sigemptyset(&waitset
);
898 sigaddset(&waitset
, SIG_IPI
);
900 /* signal CPU creation */
902 qemu_cond_signal(&qemu_cpu_cond
);
907 qemu_mutex_unlock_iothread();
910 r
= sigwait(&waitset
, &sig
);
911 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
916 qemu_mutex_lock_iothread();
918 qemu_wait_io_event_common(cpu
);
925 static void tcg_exec_all(void);
927 static void *qemu_tcg_cpu_thread_fn(void *arg
)
931 qemu_tcg_init_cpu_signals();
932 qemu_thread_get_self(cpu
->thread
);
934 qemu_mutex_lock(&qemu_global_mutex
);
936 cpu
->thread_id
= qemu_get_thread_id();
939 qemu_cond_signal(&qemu_cpu_cond
);
941 /* wait for initial kick-off after machine start */
942 while (QTAILQ_FIRST(&cpus
)->stopped
) {
943 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
945 /* process any pending work */
947 qemu_wait_io_event_common(cpu
);
955 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
958 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
961 qemu_tcg_wait_io_event();
967 static void qemu_cpu_kick_thread(CPUState
*cpu
)
972 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
974 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
978 if (!qemu_cpu_is_self(cpu
)) {
981 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
982 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
987 /* On multi-core systems, we are not sure that the thread is actually
988 * suspended until we can get the context.
990 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
991 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
997 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
998 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1006 void qemu_cpu_kick(CPUState
*cpu
)
1008 qemu_cond_broadcast(cpu
->halt_cond
);
1009 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1010 qemu_cpu_kick_thread(cpu
);
1011 cpu
->thread_kicked
= true;
1015 void qemu_cpu_kick_self(void)
1018 assert(current_cpu
);
1020 if (!current_cpu
->thread_kicked
) {
1021 qemu_cpu_kick_thread(current_cpu
);
1022 current_cpu
->thread_kicked
= true;
1029 bool qemu_cpu_is_self(CPUState
*cpu
)
1031 return qemu_thread_is_self(cpu
->thread
);
1034 static bool qemu_in_vcpu_thread(void)
1036 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1039 void qemu_mutex_lock_iothread(void)
1041 if (!tcg_enabled()) {
1042 qemu_mutex_lock(&qemu_global_mutex
);
1044 iothread_requesting_mutex
= true;
1045 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1046 qemu_cpu_kick_thread(first_cpu
);
1047 qemu_mutex_lock(&qemu_global_mutex
);
1049 iothread_requesting_mutex
= false;
1050 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1054 void qemu_mutex_unlock_iothread(void)
1056 qemu_mutex_unlock(&qemu_global_mutex
);
1059 static int all_vcpus_paused(void)
1064 if (!cpu
->stopped
) {
1072 void pause_all_vcpus(void)
1076 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1082 if (qemu_in_vcpu_thread()) {
1084 if (!kvm_enabled()) {
1087 cpu
->stopped
= true;
1093 while (!all_vcpus_paused()) {
1094 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1101 void cpu_resume(CPUState
*cpu
)
1104 cpu
->stopped
= false;
1108 void resume_all_vcpus(void)
1112 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1118 /* For temporary buffers for forming a name */
1119 #define VCPU_THREAD_NAME_SIZE 16
1121 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1123 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1125 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1127 /* share a single thread for all cpus with TCG */
1128 if (!tcg_cpu_thread
) {
1129 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1130 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1131 qemu_cond_init(cpu
->halt_cond
);
1132 tcg_halt_cond
= cpu
->halt_cond
;
1133 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1135 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1136 cpu
, QEMU_THREAD_JOINABLE
);
1138 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1140 while (!cpu
->created
) {
1141 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1143 tcg_cpu_thread
= cpu
->thread
;
1145 cpu
->thread
= tcg_cpu_thread
;
1146 cpu
->halt_cond
= tcg_halt_cond
;
1150 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1152 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1154 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1155 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1156 qemu_cond_init(cpu
->halt_cond
);
1157 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1159 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1160 cpu
, QEMU_THREAD_JOINABLE
);
1161 while (!cpu
->created
) {
1162 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1166 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1168 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1170 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1171 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1172 qemu_cond_init(cpu
->halt_cond
);
1173 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1175 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1176 QEMU_THREAD_JOINABLE
);
1177 while (!cpu
->created
) {
1178 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1182 void qemu_init_vcpu(CPUState
*cpu
)
1184 cpu
->nr_cores
= smp_cores
;
1185 cpu
->nr_threads
= smp_threads
;
1186 cpu
->stopped
= true;
1187 if (kvm_enabled()) {
1188 qemu_kvm_start_vcpu(cpu
);
1189 } else if (tcg_enabled()) {
1190 qemu_tcg_init_vcpu(cpu
);
1192 qemu_dummy_start_vcpu(cpu
);
1196 void cpu_stop_current(void)
1199 current_cpu
->stop
= false;
1200 current_cpu
->stopped
= true;
1201 cpu_exit(current_cpu
);
1202 qemu_cond_signal(&qemu_pause_cond
);
1206 int vm_stop(RunState state
)
1208 if (qemu_in_vcpu_thread()) {
1209 qemu_system_vmstop_request(state
);
1211 * FIXME: should not return to device code in case
1212 * vm_stop() has been requested.
1218 return do_vm_stop(state
);
1221 /* does a state transition even if the VM is already stopped,
1222 current state is forgotten forever */
1223 int vm_stop_force_state(RunState state
)
1225 if (runstate_is_running()) {
1226 return vm_stop(state
);
1228 runstate_set(state
);
1229 /* Make sure to return an error if the flush in a previous vm_stop()
1231 return bdrv_flush_all();
1235 static int tcg_cpu_exec(CPUArchState
*env
)
1237 CPUState
*cpu
= ENV_GET_CPU(env
);
1239 #ifdef CONFIG_PROFILER
1243 #ifdef CONFIG_PROFILER
1244 ti
= profile_getclock();
1250 qemu_icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
1251 cpu
->icount_decr
.u16
.low
= 0;
1252 cpu
->icount_extra
= 0;
1253 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1255 /* Maintain prior (possibly buggy) behaviour where if no deadline
1256 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1257 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1260 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1261 deadline
= INT32_MAX
;
1264 count
= qemu_icount_round(deadline
);
1265 qemu_icount
+= count
;
1266 decr
= (count
> 0xffff) ? 0xffff : count
;
1268 cpu
->icount_decr
.u16
.low
= decr
;
1269 cpu
->icount_extra
= count
;
1271 ret
= cpu_exec(env
);
1272 #ifdef CONFIG_PROFILER
1273 qemu_time
+= profile_getclock() - ti
;
1276 /* Fold pending instructions back into the
1277 instruction counter, and clear the interrupt flag. */
1278 qemu_icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
1279 cpu
->icount_decr
.u32
= 0;
1280 cpu
->icount_extra
= 0;
1285 static void tcg_exec_all(void)
1289 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1290 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1292 if (next_cpu
== NULL
) {
1293 next_cpu
= first_cpu
;
1295 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1296 CPUState
*cpu
= next_cpu
;
1297 CPUArchState
*env
= cpu
->env_ptr
;
1299 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1300 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1302 if (cpu_can_run(cpu
)) {
1303 r
= tcg_cpu_exec(env
);
1304 if (r
== EXCP_DEBUG
) {
1305 cpu_handle_guest_debug(cpu
);
1308 } else if (cpu
->stop
|| cpu
->stopped
) {
1315 void set_numa_modes(void)
1321 for (i
= 0; i
< nb_numa_nodes
; i
++) {
1322 if (test_bit(cpu
->cpu_index
, node_cpumask
[i
])) {
1329 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1331 /* XXX: implement xxx_cpu_list for targets that still miss it */
1332 #if defined(cpu_list)
1333 cpu_list(f
, cpu_fprintf
);
1337 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1339 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1344 #if defined(TARGET_I386)
1345 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1346 CPUX86State
*env
= &x86_cpu
->env
;
1347 #elif defined(TARGET_PPC)
1348 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1349 CPUPPCState
*env
= &ppc_cpu
->env
;
1350 #elif defined(TARGET_SPARC)
1351 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1352 CPUSPARCState
*env
= &sparc_cpu
->env
;
1353 #elif defined(TARGET_MIPS)
1354 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1355 CPUMIPSState
*env
= &mips_cpu
->env
;
1358 cpu_synchronize_state(cpu
);
1360 info
= g_malloc0(sizeof(*info
));
1361 info
->value
= g_malloc0(sizeof(*info
->value
));
1362 info
->value
->CPU
= cpu
->cpu_index
;
1363 info
->value
->current
= (cpu
== first_cpu
);
1364 info
->value
->halted
= cpu
->halted
;
1365 info
->value
->thread_id
= cpu
->thread_id
;
1366 #if defined(TARGET_I386)
1367 info
->value
->has_pc
= true;
1368 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1369 #elif defined(TARGET_PPC)
1370 info
->value
->has_nip
= true;
1371 info
->value
->nip
= env
->nip
;
1372 #elif defined(TARGET_SPARC)
1373 info
->value
->has_pc
= true;
1374 info
->value
->pc
= env
->pc
;
1375 info
->value
->has_npc
= true;
1376 info
->value
->npc
= env
->npc
;
1377 #elif defined(TARGET_MIPS)
1378 info
->value
->has_PC
= true;
1379 info
->value
->PC
= env
->active_tc
.PC
;
1382 /* XXX: waiting for the qapi to support GSList */
1384 head
= cur_item
= info
;
1386 cur_item
->next
= info
;
1394 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1395 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1406 cpu
= qemu_get_cpu(cpu_index
);
1408 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1413 f
= fopen(filename
, "wb");
1415 error_setg_file_open(errp
, errno
, filename
);
1423 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1424 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1427 if (fwrite(buf
, 1, l
, f
) != l
) {
1428 error_set(errp
, QERR_IO_ERROR
);
1439 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1446 f
= fopen(filename
, "wb");
1448 error_setg_file_open(errp
, errno
, filename
);
1456 cpu_physical_memory_read(addr
, buf
, l
);
1457 if (fwrite(buf
, 1, l
, f
) != l
) {
1458 error_set(errp
, QERR_IO_ERROR
);
1469 void qmp_inject_nmi(Error
**errp
)
1471 #if defined(TARGET_I386)
1475 X86CPU
*cpu
= X86_CPU(cs
);
1477 if (!cpu
->apic_state
) {
1478 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1480 apic_deliver_nmi(cpu
->apic_state
);
1483 #elif defined(TARGET_S390X)
1489 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1490 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1491 error_set(errp
, QERR_UNSUPPORTED
);
1498 error_set(errp
, QERR_UNSUPPORTED
);