4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
46 #include "qemu/compatfd.h"
51 #include <sys/prctl.h>
54 #define PR_MCE_KILL 33
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
65 #endif /* CONFIG_LINUX */
67 static CPUState
*next_cpu
;
71 bool cpu_is_stopped(CPUState
*cpu
)
73 return cpu
->stopped
|| !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState
*cpu
)
78 if (cpu
->stop
|| cpu
->queued_work_first
) {
81 if (cpu_is_stopped(cpu
)) {
84 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
85 kvm_halt_in_kernel()) {
91 static bool all_cpu_threads_idle(void)
96 if (!cpu_thread_is_idle(cpu
)) {
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static int64_t vm_clock_warp_start
= -1;
109 /* Conversion factor from emulated instructions to virtual clock ticks. */
110 static int icount_time_shift
;
111 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112 #define MAX_ICOUNT_SHIFT 10
114 static QEMUTimer
*icount_rt_timer
;
115 static QEMUTimer
*icount_vm_timer
;
116 static QEMUTimer
*icount_warp_timer
;
118 typedef struct TimersState
{
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev
;
121 int64_t cpu_ticks_offset
;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
126 QemuSeqLock vm_clock_seqlock
;
127 int64_t cpu_clock_offset
;
128 int32_t cpu_ticks_enabled
;
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias
;
133 /* Only written by TCG thread */
137 static TimersState timers_state
;
139 /* Return the virtual CPU time, based on the instruction counter. */
140 static int64_t cpu_get_icount_locked(void)
143 CPUState
*cpu
= current_cpu
;
145 icount
= timers_state
.qemu_icount
;
147 if (!cpu_can_do_io(cpu
)) {
148 fprintf(stderr
, "Bad clock read\n");
150 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
152 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
155 int64_t cpu_get_icount(void)
161 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
162 icount
= cpu_get_icount_locked();
163 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
168 int64_t cpu_icount_to_ns(int64_t icount
)
170 return icount
<< icount_time_shift
;
173 /* return the host CPU cycle counter and handle stop/restart */
174 /* Caller must hold the BQL */
175 int64_t cpu_get_ticks(void)
180 return cpu_get_icount();
183 ticks
= timers_state
.cpu_ticks_offset
;
184 if (timers_state
.cpu_ticks_enabled
) {
185 ticks
+= cpu_get_real_ticks();
188 if (timers_state
.cpu_ticks_prev
> ticks
) {
189 /* Note: non increasing ticks may happen if the host uses
191 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
192 ticks
= timers_state
.cpu_ticks_prev
;
195 timers_state
.cpu_ticks_prev
= ticks
;
199 static int64_t cpu_get_clock_locked(void)
203 ticks
= timers_state
.cpu_clock_offset
;
204 if (timers_state
.cpu_ticks_enabled
) {
205 ticks
+= get_clock();
211 /* return the host CPU monotonic timer and handle stop/restart */
212 int64_t cpu_get_clock(void)
218 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
219 ti
= cpu_get_clock_locked();
220 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
225 /* return the offset between the host clock and virtual CPU clock */
226 int64_t cpu_get_clock_offset(void)
232 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
233 ti
= timers_state
.cpu_clock_offset
;
234 if (!timers_state
.cpu_ticks_enabled
) {
237 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
242 /* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 void cpu_enable_ticks(void)
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
249 if (!timers_state
.cpu_ticks_enabled
) {
250 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
251 timers_state
.cpu_clock_offset
-= get_clock();
252 timers_state
.cpu_ticks_enabled
= 1;
254 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
257 /* disable cpu_get_ticks() : the clock is stopped. You must not call
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 void cpu_disable_ticks(void)
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
265 if (timers_state
.cpu_ticks_enabled
) {
266 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
267 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
268 timers_state
.cpu_ticks_enabled
= 0;
270 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
273 /* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
277 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279 static void icount_adjust(void)
285 /* Protected by TimersState mutex. */
286 static int64_t last_delta
;
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
293 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
294 cur_time
= cpu_get_clock_locked();
295 cur_icount
= cpu_get_icount_locked();
297 delta
= cur_icount
- cur_time
;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
300 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
301 && icount_time_shift
> 0) {
302 /* The guest is getting too far ahead. Slow time down. */
306 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
307 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
308 /* The guest is getting too far behind. Speed time up. */
312 timers_state
.qemu_icount_bias
= cur_icount
313 - (timers_state
.qemu_icount
<< icount_time_shift
);
314 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
317 static void icount_adjust_rt(void *opaque
)
319 timer_mod(icount_rt_timer
,
320 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
324 static void icount_adjust_vm(void *opaque
)
326 timer_mod(icount_vm_timer
,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
328 get_ticks_per_sec() / 10);
332 static int64_t qemu_icount_round(int64_t count
)
334 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
337 static void icount_warp_rt(void *opaque
)
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
342 if (atomic_read(&vm_clock_warp_start
) == -1) {
346 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
347 if (runstate_is_running()) {
348 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
351 warp_delta
= clock
- vm_clock_warp_start
;
352 if (use_icount
== 2) {
354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
355 * far ahead of real time.
357 int64_t cur_time
= cpu_get_clock_locked();
358 int64_t cur_icount
= cpu_get_icount_locked();
359 int64_t delta
= cur_time
- cur_icount
;
360 warp_delta
= MIN(warp_delta
, delta
);
362 timers_state
.qemu_icount_bias
+= warp_delta
;
364 vm_clock_warp_start
= -1;
365 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
367 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
372 void qtest_clock_warp(int64_t dest
)
374 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
375 assert(qtest_enabled());
376 while (clock
< dest
) {
377 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
378 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
379 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
380 timers_state
.qemu_icount_bias
+= warp
;
381 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
383 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
384 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
389 void qemu_clock_warp(QEMUClockType type
)
395 * There are too many global variables to make the "warp" behavior
396 * applicable to other clocks. But a clock argument removes the
397 * need for if statements all over the place.
399 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
404 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
405 * This ensures that the deadline for the timer is computed correctly below.
406 * This also makes sure that the insn counter is synchronized before the
407 * CPU starts running, in case the CPU is woken by an event other than
408 * the earliest QEMU_CLOCK_VIRTUAL timer.
410 icount_warp_rt(NULL
);
411 timer_del(icount_warp_timer
);
412 if (!all_cpu_threads_idle()) {
416 if (qtest_enabled()) {
417 /* When testing, qtest commands advance icount. */
421 /* We want to use the earliest deadline from ALL vm_clocks */
422 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
423 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
430 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
431 * sleep. Otherwise, the CPU might be waiting for a future timer
432 * interrupt to wake it up, but the interrupt never comes because
433 * the vCPU isn't running any insns and thus doesn't advance the
434 * QEMU_CLOCK_VIRTUAL.
436 * An extreme solution for this problem would be to never let VCPUs
437 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
438 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
439 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
440 * after some e"real" time, (related to the time left until the next
441 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
442 * This avoids that the warps are visible externally; for example,
443 * you will not be sending network packets continuously instead of
446 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
447 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
448 vm_clock_warp_start
= clock
;
450 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
451 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
452 } else if (deadline
== 0) {
453 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
457 static bool icount_state_needed(void *opaque
)
463 * This is a subsection for icount migration.
465 static const VMStateDescription icount_vmstate_timers
= {
466 .name
= "timer/icount",
468 .minimum_version_id
= 1,
469 .fields
= (VMStateField
[]) {
470 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
471 VMSTATE_INT64(qemu_icount
, TimersState
),
472 VMSTATE_END_OF_LIST()
476 static const VMStateDescription vmstate_timers
= {
479 .minimum_version_id
= 1,
480 .fields
= (VMStateField
[]) {
481 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
482 VMSTATE_INT64(dummy
, TimersState
),
483 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
484 VMSTATE_END_OF_LIST()
486 .subsections
= (VMStateSubsection
[]) {
488 .vmsd
= &icount_vmstate_timers
,
489 .needed
= icount_state_needed
,
496 void cpu_ticks_init(void)
498 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
499 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
502 void configure_icount(QemuOpts
*opts
, Error
**errp
)
505 char *rem_str
= NULL
;
507 option
= qemu_opt_get(opts
, "shift");
509 if (qemu_opt_get(opts
, "align") != NULL
) {
510 error_setg(errp
, "Please specify shift option when using align");
514 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
515 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
516 icount_warp_rt
, NULL
);
517 if (strcmp(option
, "auto") != 0) {
519 icount_time_shift
= strtol(option
, &rem_str
, 0);
520 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
521 error_setg(errp
, "icount: Invalid shift value");
525 } else if (icount_align_option
) {
526 error_setg(errp
, "shift=auto and align=on are incompatible");
531 /* 125MIPS seems a reasonable initial guess at the guest speed.
532 It will be corrected fairly quickly anyway. */
533 icount_time_shift
= 3;
535 /* Have both realtime and virtual time triggers for speed adjustment.
536 The realtime trigger catches emulated time passing too slowly,
537 the virtual time trigger catches emulated time passing too fast.
538 Realtime triggers occur even when idle, so use them less frequently
540 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
541 icount_adjust_rt
, NULL
);
542 timer_mod(icount_rt_timer
,
543 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
544 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
545 icount_adjust_vm
, NULL
);
546 timer_mod(icount_vm_timer
,
547 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
548 get_ticks_per_sec() / 10);
551 /***********************************************************/
552 void hw_error(const char *fmt
, ...)
558 fprintf(stderr
, "qemu: hardware error: ");
559 vfprintf(stderr
, fmt
, ap
);
560 fprintf(stderr
, "\n");
562 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
563 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
569 void cpu_synchronize_all_states(void)
574 cpu_synchronize_state(cpu
);
578 void cpu_synchronize_all_post_reset(void)
583 cpu_synchronize_post_reset(cpu
);
587 void cpu_synchronize_all_post_init(void)
592 cpu_synchronize_post_init(cpu
);
596 void cpu_clean_all_dirty(void)
601 cpu_clean_state(cpu
);
605 static int do_vm_stop(RunState state
)
609 if (runstate_is_running()) {
613 vm_state_notify(0, state
);
614 qapi_event_send_stop(&error_abort
);
618 ret
= bdrv_flush_all();
623 static bool cpu_can_run(CPUState
*cpu
)
628 if (cpu_is_stopped(cpu
)) {
634 static void cpu_handle_guest_debug(CPUState
*cpu
)
636 gdb_set_stop_cpu(cpu
);
637 qemu_system_debug_request();
641 static void cpu_signal(int sig
)
644 cpu_exit(current_cpu
);
650 static void sigbus_reraise(void)
653 struct sigaction action
;
655 memset(&action
, 0, sizeof(action
));
656 action
.sa_handler
= SIG_DFL
;
657 if (!sigaction(SIGBUS
, &action
, NULL
)) {
660 sigaddset(&set
, SIGBUS
);
661 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
663 perror("Failed to re-raise SIGBUS!\n");
667 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
670 if (kvm_on_sigbus(siginfo
->ssi_code
,
671 (void *)(intptr_t)siginfo
->ssi_addr
)) {
676 static void qemu_init_sigbus(void)
678 struct sigaction action
;
680 memset(&action
, 0, sizeof(action
));
681 action
.sa_flags
= SA_SIGINFO
;
682 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
683 sigaction(SIGBUS
, &action
, NULL
);
685 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
688 static void qemu_kvm_eat_signals(CPUState
*cpu
)
690 struct timespec ts
= { 0, 0 };
696 sigemptyset(&waitset
);
697 sigaddset(&waitset
, SIG_IPI
);
698 sigaddset(&waitset
, SIGBUS
);
701 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
702 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
703 perror("sigtimedwait");
709 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
717 r
= sigpending(&chkset
);
719 perror("sigpending");
722 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
725 #else /* !CONFIG_LINUX */
727 static void qemu_init_sigbus(void)
731 static void qemu_kvm_eat_signals(CPUState
*cpu
)
734 #endif /* !CONFIG_LINUX */
737 static void dummy_signal(int sig
)
741 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
745 struct sigaction sigact
;
747 memset(&sigact
, 0, sizeof(sigact
));
748 sigact
.sa_handler
= dummy_signal
;
749 sigaction(SIG_IPI
, &sigact
, NULL
);
751 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
752 sigdelset(&set
, SIG_IPI
);
753 sigdelset(&set
, SIGBUS
);
754 r
= kvm_set_signal_mask(cpu
, &set
);
756 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
761 static void qemu_tcg_init_cpu_signals(void)
764 struct sigaction sigact
;
766 memset(&sigact
, 0, sizeof(sigact
));
767 sigact
.sa_handler
= cpu_signal
;
768 sigaction(SIG_IPI
, &sigact
, NULL
);
771 sigaddset(&set
, SIG_IPI
);
772 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
776 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
781 static void qemu_tcg_init_cpu_signals(void)
786 static QemuMutex qemu_global_mutex
;
787 static QemuCond qemu_io_proceeded_cond
;
788 static bool iothread_requesting_mutex
;
790 static QemuThread io_thread
;
792 static QemuThread
*tcg_cpu_thread
;
793 static QemuCond
*tcg_halt_cond
;
796 static QemuCond qemu_cpu_cond
;
798 static QemuCond qemu_pause_cond
;
799 static QemuCond qemu_work_cond
;
801 void qemu_init_cpu_loop(void)
804 qemu_cond_init(&qemu_cpu_cond
);
805 qemu_cond_init(&qemu_pause_cond
);
806 qemu_cond_init(&qemu_work_cond
);
807 qemu_cond_init(&qemu_io_proceeded_cond
);
808 qemu_mutex_init(&qemu_global_mutex
);
810 qemu_thread_get_self(&io_thread
);
813 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
815 struct qemu_work_item wi
;
817 if (qemu_cpu_is_self(cpu
)) {
825 if (cpu
->queued_work_first
== NULL
) {
826 cpu
->queued_work_first
= &wi
;
828 cpu
->queued_work_last
->next
= &wi
;
830 cpu
->queued_work_last
= &wi
;
836 CPUState
*self_cpu
= current_cpu
;
838 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
839 current_cpu
= self_cpu
;
843 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
845 struct qemu_work_item
*wi
;
847 if (qemu_cpu_is_self(cpu
)) {
852 wi
= g_malloc0(sizeof(struct qemu_work_item
));
856 if (cpu
->queued_work_first
== NULL
) {
857 cpu
->queued_work_first
= wi
;
859 cpu
->queued_work_last
->next
= wi
;
861 cpu
->queued_work_last
= wi
;
868 static void flush_queued_work(CPUState
*cpu
)
870 struct qemu_work_item
*wi
;
872 if (cpu
->queued_work_first
== NULL
) {
876 while ((wi
= cpu
->queued_work_first
)) {
877 cpu
->queued_work_first
= wi
->next
;
884 cpu
->queued_work_last
= NULL
;
885 qemu_cond_broadcast(&qemu_work_cond
);
888 static void qemu_wait_io_event_common(CPUState
*cpu
)
893 qemu_cond_signal(&qemu_pause_cond
);
895 flush_queued_work(cpu
);
896 cpu
->thread_kicked
= false;
899 static void qemu_tcg_wait_io_event(void)
903 while (all_cpu_threads_idle()) {
904 /* Start accounting real time to the virtual clock if the CPUs
906 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
907 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
910 while (iothread_requesting_mutex
) {
911 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
915 qemu_wait_io_event_common(cpu
);
919 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
921 while (cpu_thread_is_idle(cpu
)) {
922 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
925 qemu_kvm_eat_signals(cpu
);
926 qemu_wait_io_event_common(cpu
);
929 static void *qemu_kvm_cpu_thread_fn(void *arg
)
934 qemu_mutex_lock(&qemu_global_mutex
);
935 qemu_thread_get_self(cpu
->thread
);
936 cpu
->thread_id
= qemu_get_thread_id();
937 cpu
->exception_index
= -1;
941 r
= kvm_init_vcpu(cpu
);
943 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
947 qemu_kvm_init_cpu_signals(cpu
);
949 /* signal CPU creation */
951 qemu_cond_signal(&qemu_cpu_cond
);
954 if (cpu_can_run(cpu
)) {
955 r
= kvm_cpu_exec(cpu
);
956 if (r
== EXCP_DEBUG
) {
957 cpu_handle_guest_debug(cpu
);
960 qemu_kvm_wait_io_event(cpu
);
966 static void *qemu_dummy_cpu_thread_fn(void *arg
)
969 fprintf(stderr
, "qtest is not supported under Windows\n");
976 qemu_mutex_lock_iothread();
977 qemu_thread_get_self(cpu
->thread
);
978 cpu
->thread_id
= qemu_get_thread_id();
979 cpu
->exception_index
= -1;
982 sigemptyset(&waitset
);
983 sigaddset(&waitset
, SIG_IPI
);
985 /* signal CPU creation */
987 qemu_cond_signal(&qemu_cpu_cond
);
992 qemu_mutex_unlock_iothread();
995 r
= sigwait(&waitset
, &sig
);
996 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1001 qemu_mutex_lock_iothread();
1003 qemu_wait_io_event_common(cpu
);
1010 static void tcg_exec_all(void);
1012 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1014 CPUState
*cpu
= arg
;
1016 qemu_tcg_init_cpu_signals();
1017 qemu_thread_get_self(cpu
->thread
);
1019 qemu_mutex_lock(&qemu_global_mutex
);
1021 cpu
->thread_id
= qemu_get_thread_id();
1022 cpu
->created
= true;
1023 cpu
->exception_index
= -1;
1026 qemu_cond_signal(&qemu_cpu_cond
);
1028 /* wait for initial kick-off after machine start */
1029 while (QTAILQ_FIRST(&cpus
)->stopped
) {
1030 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1032 /* process any pending work */
1034 qemu_wait_io_event_common(cpu
);
1042 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1044 if (deadline
== 0) {
1045 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1048 qemu_tcg_wait_io_event();
1054 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1059 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1061 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1065 if (!qemu_cpu_is_self(cpu
)) {
1068 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1069 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1074 /* On multi-core systems, we are not sure that the thread is actually
1075 * suspended until we can get the context.
1077 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1078 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1084 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1085 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1093 void qemu_cpu_kick(CPUState
*cpu
)
1095 qemu_cond_broadcast(cpu
->halt_cond
);
1096 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1097 qemu_cpu_kick_thread(cpu
);
1098 cpu
->thread_kicked
= true;
1102 void qemu_cpu_kick_self(void)
1105 assert(current_cpu
);
1107 if (!current_cpu
->thread_kicked
) {
1108 qemu_cpu_kick_thread(current_cpu
);
1109 current_cpu
->thread_kicked
= true;
1116 bool qemu_cpu_is_self(CPUState
*cpu
)
1118 return qemu_thread_is_self(cpu
->thread
);
1121 static bool qemu_in_vcpu_thread(void)
1123 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1126 void qemu_mutex_lock_iothread(void)
1128 if (!tcg_enabled()) {
1129 qemu_mutex_lock(&qemu_global_mutex
);
1131 iothread_requesting_mutex
= true;
1132 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1133 qemu_cpu_kick_thread(first_cpu
);
1134 qemu_mutex_lock(&qemu_global_mutex
);
1136 iothread_requesting_mutex
= false;
1137 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1141 void qemu_mutex_unlock_iothread(void)
1143 qemu_mutex_unlock(&qemu_global_mutex
);
1146 static int all_vcpus_paused(void)
1151 if (!cpu
->stopped
) {
1159 void pause_all_vcpus(void)
1163 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1169 if (qemu_in_vcpu_thread()) {
1171 if (!kvm_enabled()) {
1174 cpu
->stopped
= true;
1180 while (!all_vcpus_paused()) {
1181 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1188 void cpu_resume(CPUState
*cpu
)
1191 cpu
->stopped
= false;
1195 void resume_all_vcpus(void)
1199 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1205 /* For temporary buffers for forming a name */
1206 #define VCPU_THREAD_NAME_SIZE 16
1208 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1210 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1212 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1214 /* share a single thread for all cpus with TCG */
1215 if (!tcg_cpu_thread
) {
1216 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1217 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1218 qemu_cond_init(cpu
->halt_cond
);
1219 tcg_halt_cond
= cpu
->halt_cond
;
1220 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1222 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1223 cpu
, QEMU_THREAD_JOINABLE
);
1225 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1227 while (!cpu
->created
) {
1228 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1230 tcg_cpu_thread
= cpu
->thread
;
1232 cpu
->thread
= tcg_cpu_thread
;
1233 cpu
->halt_cond
= tcg_halt_cond
;
1237 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1239 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1241 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1242 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1243 qemu_cond_init(cpu
->halt_cond
);
1244 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1246 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1247 cpu
, QEMU_THREAD_JOINABLE
);
1248 while (!cpu
->created
) {
1249 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1253 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1255 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1257 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1258 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1259 qemu_cond_init(cpu
->halt_cond
);
1260 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1262 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1263 QEMU_THREAD_JOINABLE
);
1264 while (!cpu
->created
) {
1265 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1269 void qemu_init_vcpu(CPUState
*cpu
)
1271 cpu
->nr_cores
= smp_cores
;
1272 cpu
->nr_threads
= smp_threads
;
1273 cpu
->stopped
= true;
1274 if (kvm_enabled()) {
1275 qemu_kvm_start_vcpu(cpu
);
1276 } else if (tcg_enabled()) {
1277 qemu_tcg_init_vcpu(cpu
);
1279 qemu_dummy_start_vcpu(cpu
);
1283 void cpu_stop_current(void)
1286 current_cpu
->stop
= false;
1287 current_cpu
->stopped
= true;
1288 cpu_exit(current_cpu
);
1289 qemu_cond_signal(&qemu_pause_cond
);
1293 int vm_stop(RunState state
)
1295 if (qemu_in_vcpu_thread()) {
1296 qemu_system_vmstop_request_prepare();
1297 qemu_system_vmstop_request(state
);
1299 * FIXME: should not return to device code in case
1300 * vm_stop() has been requested.
1306 return do_vm_stop(state
);
1309 /* does a state transition even if the VM is already stopped,
1310 current state is forgotten forever */
1311 int vm_stop_force_state(RunState state
)
1313 if (runstate_is_running()) {
1314 return vm_stop(state
);
1316 runstate_set(state
);
1317 /* Make sure to return an error if the flush in a previous vm_stop()
1319 return bdrv_flush_all();
1323 static int tcg_cpu_exec(CPUArchState
*env
)
1325 CPUState
*cpu
= ENV_GET_CPU(env
);
1327 #ifdef CONFIG_PROFILER
1331 #ifdef CONFIG_PROFILER
1332 ti
= profile_getclock();
1338 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1339 + cpu
->icount_extra
);
1340 cpu
->icount_decr
.u16
.low
= 0;
1341 cpu
->icount_extra
= 0;
1342 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1344 /* Maintain prior (possibly buggy) behaviour where if no deadline
1345 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1346 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1349 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1350 deadline
= INT32_MAX
;
1353 count
= qemu_icount_round(deadline
);
1354 timers_state
.qemu_icount
+= count
;
1355 decr
= (count
> 0xffff) ? 0xffff : count
;
1357 cpu
->icount_decr
.u16
.low
= decr
;
1358 cpu
->icount_extra
= count
;
1360 ret
= cpu_exec(env
);
1361 #ifdef CONFIG_PROFILER
1362 qemu_time
+= profile_getclock() - ti
;
1365 /* Fold pending instructions back into the
1366 instruction counter, and clear the interrupt flag. */
1367 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1368 + cpu
->icount_extra
);
1369 cpu
->icount_decr
.u32
= 0;
1370 cpu
->icount_extra
= 0;
1375 static void tcg_exec_all(void)
1379 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1380 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1382 if (next_cpu
== NULL
) {
1383 next_cpu
= first_cpu
;
1385 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1386 CPUState
*cpu
= next_cpu
;
1387 CPUArchState
*env
= cpu
->env_ptr
;
1389 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1390 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1392 if (cpu_can_run(cpu
)) {
1393 r
= tcg_cpu_exec(env
);
1394 if (r
== EXCP_DEBUG
) {
1395 cpu_handle_guest_debug(cpu
);
1398 } else if (cpu
->stop
|| cpu
->stopped
) {
1405 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1407 /* XXX: implement xxx_cpu_list for targets that still miss it */
1408 #if defined(cpu_list)
1409 cpu_list(f
, cpu_fprintf
);
1413 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1415 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1420 #if defined(TARGET_I386)
1421 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1422 CPUX86State
*env
= &x86_cpu
->env
;
1423 #elif defined(TARGET_PPC)
1424 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1425 CPUPPCState
*env
= &ppc_cpu
->env
;
1426 #elif defined(TARGET_SPARC)
1427 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1428 CPUSPARCState
*env
= &sparc_cpu
->env
;
1429 #elif defined(TARGET_MIPS)
1430 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1431 CPUMIPSState
*env
= &mips_cpu
->env
;
1432 #elif defined(TARGET_TRICORE)
1433 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1434 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1437 cpu_synchronize_state(cpu
);
1439 info
= g_malloc0(sizeof(*info
));
1440 info
->value
= g_malloc0(sizeof(*info
->value
));
1441 info
->value
->CPU
= cpu
->cpu_index
;
1442 info
->value
->current
= (cpu
== first_cpu
);
1443 info
->value
->halted
= cpu
->halted
;
1444 info
->value
->thread_id
= cpu
->thread_id
;
1445 #if defined(TARGET_I386)
1446 info
->value
->has_pc
= true;
1447 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1448 #elif defined(TARGET_PPC)
1449 info
->value
->has_nip
= true;
1450 info
->value
->nip
= env
->nip
;
1451 #elif defined(TARGET_SPARC)
1452 info
->value
->has_pc
= true;
1453 info
->value
->pc
= env
->pc
;
1454 info
->value
->has_npc
= true;
1455 info
->value
->npc
= env
->npc
;
1456 #elif defined(TARGET_MIPS)
1457 info
->value
->has_PC
= true;
1458 info
->value
->PC
= env
->active_tc
.PC
;
1459 #elif defined(TARGET_TRICORE)
1460 info
->value
->has_PC
= true;
1461 info
->value
->PC
= env
->PC
;
1464 /* XXX: waiting for the qapi to support GSList */
1466 head
= cur_item
= info
;
1468 cur_item
->next
= info
;
1476 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1477 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1488 cpu
= qemu_get_cpu(cpu_index
);
1490 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1495 f
= fopen(filename
, "wb");
1497 error_setg_file_open(errp
, errno
, filename
);
1505 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1506 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1509 if (fwrite(buf
, 1, l
, f
) != l
) {
1510 error_set(errp
, QERR_IO_ERROR
);
1521 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1528 f
= fopen(filename
, "wb");
1530 error_setg_file_open(errp
, errno
, filename
);
1538 cpu_physical_memory_read(addr
, buf
, l
);
1539 if (fwrite(buf
, 1, l
, f
) != l
) {
1540 error_set(errp
, QERR_IO_ERROR
);
1551 void qmp_inject_nmi(Error
**errp
)
1553 #if defined(TARGET_I386)
1557 X86CPU
*cpu
= X86_CPU(cs
);
1559 if (!cpu
->apic_state
) {
1560 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1562 apic_deliver_nmi(cpu
->apic_state
);
1566 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1570 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1576 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1577 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1578 if (icount_align_option
) {
1579 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1580 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1582 cpu_fprintf(f
, "Max guest delay NA\n");
1583 cpu_fprintf(f
, "Max guest advance NA\n");