4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
46 #include "qemu/compatfd.h"
51 #include <sys/prctl.h>
54 #define PR_MCE_KILL 33
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
65 #endif /* CONFIG_LINUX */
67 static CPUState
*next_cpu
;
71 bool cpu_is_stopped(CPUState
*cpu
)
73 return cpu
->stopped
|| !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState
*cpu
)
78 if (cpu
->stop
|| cpu
->queued_work_first
) {
81 if (cpu_is_stopped(cpu
)) {
84 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
85 kvm_halt_in_kernel()) {
91 static bool all_cpu_threads_idle(void)
96 if (!cpu_thread_is_idle(cpu
)) {
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static bool icount_sleep
= true;
109 static int64_t vm_clock_warp_start
= -1;
110 /* Conversion factor from emulated instructions to virtual clock ticks. */
111 static int icount_time_shift
;
112 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
113 #define MAX_ICOUNT_SHIFT 10
115 static QEMUTimer
*icount_rt_timer
;
116 static QEMUTimer
*icount_vm_timer
;
117 static QEMUTimer
*icount_warp_timer
;
119 typedef struct TimersState
{
120 /* Protected by BQL. */
121 int64_t cpu_ticks_prev
;
122 int64_t cpu_ticks_offset
;
124 /* cpu_clock_offset can be read out of BQL, so protect it with
127 QemuSeqLock vm_clock_seqlock
;
128 int64_t cpu_clock_offset
;
129 int32_t cpu_ticks_enabled
;
132 /* Compensate for varying guest execution speed. */
133 int64_t qemu_icount_bias
;
134 /* Only written by TCG thread */
138 static TimersState timers_state
;
140 int64_t cpu_get_icount_raw(void)
143 CPUState
*cpu
= current_cpu
;
145 icount
= timers_state
.qemu_icount
;
147 if (!cpu_can_do_io(cpu
)) {
148 fprintf(stderr
, "Bad icount read\n");
151 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
156 /* Return the virtual CPU time, based on the instruction counter. */
157 static int64_t cpu_get_icount_locked(void)
159 int64_t icount
= cpu_get_icount_raw();
160 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
163 int64_t cpu_get_icount(void)
169 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
170 icount
= cpu_get_icount_locked();
171 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
176 int64_t cpu_icount_to_ns(int64_t icount
)
178 return icount
<< icount_time_shift
;
181 /* return the host CPU cycle counter and handle stop/restart */
182 /* Caller must hold the BQL */
183 int64_t cpu_get_ticks(void)
188 return cpu_get_icount();
191 ticks
= timers_state
.cpu_ticks_offset
;
192 if (timers_state
.cpu_ticks_enabled
) {
193 ticks
+= cpu_get_real_ticks();
196 if (timers_state
.cpu_ticks_prev
> ticks
) {
197 /* Note: non increasing ticks may happen if the host uses
199 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
200 ticks
= timers_state
.cpu_ticks_prev
;
203 timers_state
.cpu_ticks_prev
= ticks
;
207 static int64_t cpu_get_clock_locked(void)
211 ticks
= timers_state
.cpu_clock_offset
;
212 if (timers_state
.cpu_ticks_enabled
) {
213 ticks
+= get_clock();
219 /* return the host CPU monotonic timer and handle stop/restart */
220 int64_t cpu_get_clock(void)
226 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
227 ti
= cpu_get_clock_locked();
228 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
233 /* enable cpu_get_ticks()
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 void cpu_enable_ticks(void)
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
240 if (!timers_state
.cpu_ticks_enabled
) {
241 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
242 timers_state
.cpu_clock_offset
-= get_clock();
243 timers_state
.cpu_ticks_enabled
= 1;
245 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
248 /* disable cpu_get_ticks() : the clock is stopped. You must not call
249 * cpu_get_ticks() after that.
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 void cpu_disable_ticks(void)
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
256 if (timers_state
.cpu_ticks_enabled
) {
257 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
258 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
259 timers_state
.cpu_ticks_enabled
= 0;
261 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
264 /* Correlation between real and virtual time is always going to be
265 fairly approximate, so ignore small variation.
266 When the guest is idle real and virtual time will be aligned in
268 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270 static void icount_adjust(void)
276 /* Protected by TimersState mutex. */
277 static int64_t last_delta
;
279 /* If the VM is not running, then do nothing. */
280 if (!runstate_is_running()) {
284 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
285 cur_time
= cpu_get_clock_locked();
286 cur_icount
= cpu_get_icount_locked();
288 delta
= cur_icount
- cur_time
;
289 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
291 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
292 && icount_time_shift
> 0) {
293 /* The guest is getting too far ahead. Slow time down. */
297 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
298 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
299 /* The guest is getting too far behind. Speed time up. */
303 timers_state
.qemu_icount_bias
= cur_icount
304 - (timers_state
.qemu_icount
<< icount_time_shift
);
305 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
308 static void icount_adjust_rt(void *opaque
)
310 timer_mod(icount_rt_timer
,
311 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
315 static void icount_adjust_vm(void *opaque
)
317 timer_mod(icount_vm_timer
,
318 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
319 get_ticks_per_sec() / 10);
323 static int64_t qemu_icount_round(int64_t count
)
325 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
328 static void icount_warp_rt(void *opaque
)
330 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
331 * changes from -1 to another value, so the race here is okay.
333 if (atomic_read(&vm_clock_warp_start
) == -1) {
337 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
338 if (runstate_is_running()) {
339 int64_t clock
= cpu_get_clock_locked();
342 warp_delta
= clock
- vm_clock_warp_start
;
343 if (use_icount
== 2) {
345 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
346 * far ahead of real time.
348 int64_t cur_icount
= cpu_get_icount_locked();
349 int64_t delta
= clock
- cur_icount
;
350 warp_delta
= MIN(warp_delta
, delta
);
352 timers_state
.qemu_icount_bias
+= warp_delta
;
354 vm_clock_warp_start
= -1;
355 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
357 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
362 void qtest_clock_warp(int64_t dest
)
364 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
365 AioContext
*aio_context
;
366 assert(qtest_enabled());
367 aio_context
= qemu_get_aio_context();
368 while (clock
< dest
) {
369 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
370 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
372 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
373 timers_state
.qemu_icount_bias
+= warp
;
374 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
376 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
377 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
378 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
383 void qemu_clock_warp(QEMUClockType type
)
389 * There are too many global variables to make the "warp" behavior
390 * applicable to other clocks. But a clock argument removes the
391 * need for if statements all over the place.
393 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
399 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
400 * This ensures that the deadline for the timer is computed correctly
402 * This also makes sure that the insn counter is synchronized before
403 * the CPU starts running, in case the CPU is woken by an event other
404 * than the earliest QEMU_CLOCK_VIRTUAL timer.
406 icount_warp_rt(NULL
);
407 timer_del(icount_warp_timer
);
409 if (!all_cpu_threads_idle()) {
413 if (qtest_enabled()) {
414 /* When testing, qtest commands advance icount. */
418 /* We want to use the earliest deadline from ALL vm_clocks */
419 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
420 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
422 static bool notified
;
423 if (!icount_sleep
&& !notified
) {
424 error_report("WARNING: icount sleep disabled and no active timers");
432 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
433 * sleep. Otherwise, the CPU might be waiting for a future timer
434 * interrupt to wake it up, but the interrupt never comes because
435 * the vCPU isn't running any insns and thus doesn't advance the
436 * QEMU_CLOCK_VIRTUAL.
440 * We never let VCPUs sleep in no sleep icount mode.
441 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
442 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
443 * It is useful when we want a deterministic execution time,
444 * isolated from host latencies.
446 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
447 timers_state
.qemu_icount_bias
+= deadline
;
448 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
449 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
452 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
453 * "real" time, (related to the time left until the next event) has
454 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
455 * This avoids that the warps are visible externally; for example,
456 * you will not be sending network packets continuously instead of
459 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
460 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
461 vm_clock_warp_start
= clock
;
463 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
464 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
466 } else if (deadline
== 0) {
467 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
471 static bool icount_state_needed(void *opaque
)
477 * This is a subsection for icount migration.
479 static const VMStateDescription icount_vmstate_timers
= {
480 .name
= "timer/icount",
482 .minimum_version_id
= 1,
483 .needed
= icount_state_needed
,
484 .fields
= (VMStateField
[]) {
485 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
486 VMSTATE_INT64(qemu_icount
, TimersState
),
487 VMSTATE_END_OF_LIST()
491 static const VMStateDescription vmstate_timers
= {
494 .minimum_version_id
= 1,
495 .fields
= (VMStateField
[]) {
496 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
497 VMSTATE_INT64(dummy
, TimersState
),
498 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
499 VMSTATE_END_OF_LIST()
501 .subsections
= (const VMStateDescription
*[]) {
502 &icount_vmstate_timers
,
507 void cpu_ticks_init(void)
509 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
510 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
513 void configure_icount(QemuOpts
*opts
, Error
**errp
)
516 char *rem_str
= NULL
;
518 option
= qemu_opt_get(opts
, "shift");
520 if (qemu_opt_get(opts
, "align") != NULL
) {
521 error_setg(errp
, "Please specify shift option when using align");
526 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
528 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
529 icount_warp_rt
, NULL
);
532 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
534 if (icount_align_option
&& !icount_sleep
) {
535 error_setg(errp
, "align=on and sleep=no are incompatible");
537 if (strcmp(option
, "auto") != 0) {
539 icount_time_shift
= strtol(option
, &rem_str
, 0);
540 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
541 error_setg(errp
, "icount: Invalid shift value");
545 } else if (icount_align_option
) {
546 error_setg(errp
, "shift=auto and align=on are incompatible");
547 } else if (!icount_sleep
) {
548 error_setg(errp
, "shift=auto and sleep=no are incompatible");
553 /* 125MIPS seems a reasonable initial guess at the guest speed.
554 It will be corrected fairly quickly anyway. */
555 icount_time_shift
= 3;
557 /* Have both realtime and virtual time triggers for speed adjustment.
558 The realtime trigger catches emulated time passing too slowly,
559 the virtual time trigger catches emulated time passing too fast.
560 Realtime triggers occur even when idle, so use them less frequently
562 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
563 icount_adjust_rt
, NULL
);
564 timer_mod(icount_rt_timer
,
565 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
566 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
567 icount_adjust_vm
, NULL
);
568 timer_mod(icount_vm_timer
,
569 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
570 get_ticks_per_sec() / 10);
573 /***********************************************************/
574 void hw_error(const char *fmt
, ...)
580 fprintf(stderr
, "qemu: hardware error: ");
581 vfprintf(stderr
, fmt
, ap
);
582 fprintf(stderr
, "\n");
584 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
585 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
591 void cpu_synchronize_all_states(void)
596 cpu_synchronize_state(cpu
);
600 void cpu_synchronize_all_post_reset(void)
605 cpu_synchronize_post_reset(cpu
);
609 void cpu_synchronize_all_post_init(void)
614 cpu_synchronize_post_init(cpu
);
618 void cpu_clean_all_dirty(void)
623 cpu_clean_state(cpu
);
627 static int do_vm_stop(RunState state
)
631 if (runstate_is_running()) {
635 vm_state_notify(0, state
);
636 qapi_event_send_stop(&error_abort
);
640 ret
= bdrv_flush_all();
645 static bool cpu_can_run(CPUState
*cpu
)
650 if (cpu_is_stopped(cpu
)) {
656 static void cpu_handle_guest_debug(CPUState
*cpu
)
658 gdb_set_stop_cpu(cpu
);
659 qemu_system_debug_request();
663 static void cpu_signal(int sig
)
666 cpu_exit(current_cpu
);
672 static void sigbus_reraise(void)
675 struct sigaction action
;
677 memset(&action
, 0, sizeof(action
));
678 action
.sa_handler
= SIG_DFL
;
679 if (!sigaction(SIGBUS
, &action
, NULL
)) {
682 sigaddset(&set
, SIGBUS
);
683 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
685 perror("Failed to re-raise SIGBUS!\n");
689 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
692 if (kvm_on_sigbus(siginfo
->ssi_code
,
693 (void *)(intptr_t)siginfo
->ssi_addr
)) {
698 static void qemu_init_sigbus(void)
700 struct sigaction action
;
702 memset(&action
, 0, sizeof(action
));
703 action
.sa_flags
= SA_SIGINFO
;
704 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
705 sigaction(SIGBUS
, &action
, NULL
);
707 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
710 static void qemu_kvm_eat_signals(CPUState
*cpu
)
712 struct timespec ts
= { 0, 0 };
718 sigemptyset(&waitset
);
719 sigaddset(&waitset
, SIG_IPI
);
720 sigaddset(&waitset
, SIGBUS
);
723 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
724 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
725 perror("sigtimedwait");
731 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
739 r
= sigpending(&chkset
);
741 perror("sigpending");
744 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
747 #else /* !CONFIG_LINUX */
749 static void qemu_init_sigbus(void)
753 static void qemu_kvm_eat_signals(CPUState
*cpu
)
756 #endif /* !CONFIG_LINUX */
759 static void dummy_signal(int sig
)
763 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
767 struct sigaction sigact
;
769 memset(&sigact
, 0, sizeof(sigact
));
770 sigact
.sa_handler
= dummy_signal
;
771 sigaction(SIG_IPI
, &sigact
, NULL
);
773 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
774 sigdelset(&set
, SIG_IPI
);
775 sigdelset(&set
, SIGBUS
);
776 r
= kvm_set_signal_mask(cpu
, &set
);
778 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
783 static void qemu_tcg_init_cpu_signals(void)
786 struct sigaction sigact
;
788 memset(&sigact
, 0, sizeof(sigact
));
789 sigact
.sa_handler
= cpu_signal
;
790 sigaction(SIG_IPI
, &sigact
, NULL
);
793 sigaddset(&set
, SIG_IPI
);
794 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
798 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
803 static void qemu_tcg_init_cpu_signals(void)
808 static QemuMutex qemu_global_mutex
;
809 static QemuCond qemu_io_proceeded_cond
;
810 static unsigned iothread_requesting_mutex
;
812 static QemuThread io_thread
;
814 static QemuThread
*tcg_cpu_thread
;
815 static QemuCond
*tcg_halt_cond
;
818 static QemuCond qemu_cpu_cond
;
820 static QemuCond qemu_pause_cond
;
821 static QemuCond qemu_work_cond
;
823 void qemu_init_cpu_loop(void)
826 qemu_cond_init(&qemu_cpu_cond
);
827 qemu_cond_init(&qemu_pause_cond
);
828 qemu_cond_init(&qemu_work_cond
);
829 qemu_cond_init(&qemu_io_proceeded_cond
);
830 qemu_mutex_init(&qemu_global_mutex
);
832 qemu_thread_get_self(&io_thread
);
835 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
837 struct qemu_work_item wi
;
839 if (qemu_cpu_is_self(cpu
)) {
847 if (cpu
->queued_work_first
== NULL
) {
848 cpu
->queued_work_first
= &wi
;
850 cpu
->queued_work_last
->next
= &wi
;
852 cpu
->queued_work_last
= &wi
;
858 CPUState
*self_cpu
= current_cpu
;
860 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
861 current_cpu
= self_cpu
;
865 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
867 struct qemu_work_item
*wi
;
869 if (qemu_cpu_is_self(cpu
)) {
874 wi
= g_malloc0(sizeof(struct qemu_work_item
));
878 if (cpu
->queued_work_first
== NULL
) {
879 cpu
->queued_work_first
= wi
;
881 cpu
->queued_work_last
->next
= wi
;
883 cpu
->queued_work_last
= wi
;
890 static void flush_queued_work(CPUState
*cpu
)
892 struct qemu_work_item
*wi
;
894 if (cpu
->queued_work_first
== NULL
) {
898 while ((wi
= cpu
->queued_work_first
)) {
899 cpu
->queued_work_first
= wi
->next
;
906 cpu
->queued_work_last
= NULL
;
907 qemu_cond_broadcast(&qemu_work_cond
);
910 static void qemu_wait_io_event_common(CPUState
*cpu
)
915 qemu_cond_signal(&qemu_pause_cond
);
917 flush_queued_work(cpu
);
918 cpu
->thread_kicked
= false;
921 static void qemu_tcg_wait_io_event(void)
925 while (all_cpu_threads_idle()) {
926 /* Start accounting real time to the virtual clock if the CPUs
928 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
929 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
932 while (iothread_requesting_mutex
) {
933 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
937 qemu_wait_io_event_common(cpu
);
941 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
943 while (cpu_thread_is_idle(cpu
)) {
944 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
947 qemu_kvm_eat_signals(cpu
);
948 qemu_wait_io_event_common(cpu
);
951 static void *qemu_kvm_cpu_thread_fn(void *arg
)
956 qemu_mutex_lock(&qemu_global_mutex
);
957 qemu_thread_get_self(cpu
->thread
);
958 cpu
->thread_id
= qemu_get_thread_id();
962 r
= kvm_init_vcpu(cpu
);
964 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
968 qemu_kvm_init_cpu_signals(cpu
);
970 /* signal CPU creation */
972 qemu_cond_signal(&qemu_cpu_cond
);
975 if (cpu_can_run(cpu
)) {
976 r
= kvm_cpu_exec(cpu
);
977 if (r
== EXCP_DEBUG
) {
978 cpu_handle_guest_debug(cpu
);
981 qemu_kvm_wait_io_event(cpu
);
987 static void *qemu_dummy_cpu_thread_fn(void *arg
)
990 fprintf(stderr
, "qtest is not supported under Windows\n");
997 qemu_mutex_lock_iothread();
998 qemu_thread_get_self(cpu
->thread
);
999 cpu
->thread_id
= qemu_get_thread_id();
1002 sigemptyset(&waitset
);
1003 sigaddset(&waitset
, SIG_IPI
);
1005 /* signal CPU creation */
1006 cpu
->created
= true;
1007 qemu_cond_signal(&qemu_cpu_cond
);
1012 qemu_mutex_unlock_iothread();
1015 r
= sigwait(&waitset
, &sig
);
1016 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1021 qemu_mutex_lock_iothread();
1023 qemu_wait_io_event_common(cpu
);
1030 static void tcg_exec_all(void);
1032 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1034 CPUState
*cpu
= arg
;
1036 qemu_tcg_init_cpu_signals();
1037 qemu_thread_get_self(cpu
->thread
);
1039 qemu_mutex_lock(&qemu_global_mutex
);
1041 cpu
->thread_id
= qemu_get_thread_id();
1042 cpu
->created
= true;
1045 qemu_cond_signal(&qemu_cpu_cond
);
1047 /* wait for initial kick-off after machine start */
1048 while (first_cpu
->stopped
) {
1049 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1051 /* process any pending work */
1053 qemu_wait_io_event_common(cpu
);
1057 /* process any pending work */
1064 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1066 if (deadline
== 0) {
1067 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1070 qemu_tcg_wait_io_event();
1076 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1081 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1083 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1087 if (!qemu_cpu_is_self(cpu
)) {
1090 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1091 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1096 /* On multi-core systems, we are not sure that the thread is actually
1097 * suspended until we can get the context.
1099 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1100 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1106 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1107 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1115 void qemu_cpu_kick(CPUState
*cpu
)
1117 qemu_cond_broadcast(cpu
->halt_cond
);
1118 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1119 qemu_cpu_kick_thread(cpu
);
1120 cpu
->thread_kicked
= true;
1124 void qemu_cpu_kick_self(void)
1127 assert(current_cpu
);
1129 if (!current_cpu
->thread_kicked
) {
1130 qemu_cpu_kick_thread(current_cpu
);
1131 current_cpu
->thread_kicked
= true;
1138 bool qemu_cpu_is_self(CPUState
*cpu
)
1140 return qemu_thread_is_self(cpu
->thread
);
1143 bool qemu_in_vcpu_thread(void)
1145 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1148 void qemu_mutex_lock_iothread(void)
1150 atomic_inc(&iothread_requesting_mutex
);
1151 if (!tcg_enabled() || !first_cpu
|| !first_cpu
->thread
) {
1152 qemu_mutex_lock(&qemu_global_mutex
);
1153 atomic_dec(&iothread_requesting_mutex
);
1155 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1156 qemu_cpu_kick_thread(first_cpu
);
1157 qemu_mutex_lock(&qemu_global_mutex
);
1159 atomic_dec(&iothread_requesting_mutex
);
1160 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1164 void qemu_mutex_unlock_iothread(void)
1166 qemu_mutex_unlock(&qemu_global_mutex
);
1169 static int all_vcpus_paused(void)
1174 if (!cpu
->stopped
) {
1182 void pause_all_vcpus(void)
1186 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1192 if (qemu_in_vcpu_thread()) {
1194 if (!kvm_enabled()) {
1197 cpu
->stopped
= true;
1203 while (!all_vcpus_paused()) {
1204 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1211 void cpu_resume(CPUState
*cpu
)
1214 cpu
->stopped
= false;
1218 void resume_all_vcpus(void)
1222 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1228 /* For temporary buffers for forming a name */
1229 #define VCPU_THREAD_NAME_SIZE 16
1231 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1233 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1235 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1237 /* share a single thread for all cpus with TCG */
1238 if (!tcg_cpu_thread
) {
1239 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1240 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1241 qemu_cond_init(cpu
->halt_cond
);
1242 tcg_halt_cond
= cpu
->halt_cond
;
1243 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1245 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1246 cpu
, QEMU_THREAD_JOINABLE
);
1248 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1250 while (!cpu
->created
) {
1251 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1253 tcg_cpu_thread
= cpu
->thread
;
1255 cpu
->thread
= tcg_cpu_thread
;
1256 cpu
->halt_cond
= tcg_halt_cond
;
1260 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1262 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1264 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1265 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1266 qemu_cond_init(cpu
->halt_cond
);
1267 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1269 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1270 cpu
, QEMU_THREAD_JOINABLE
);
1271 while (!cpu
->created
) {
1272 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1276 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1278 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1280 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1281 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1282 qemu_cond_init(cpu
->halt_cond
);
1283 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1285 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1286 QEMU_THREAD_JOINABLE
);
1287 while (!cpu
->created
) {
1288 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1292 void qemu_init_vcpu(CPUState
*cpu
)
1294 cpu
->nr_cores
= smp_cores
;
1295 cpu
->nr_threads
= smp_threads
;
1296 cpu
->stopped
= true;
1297 if (kvm_enabled()) {
1298 qemu_kvm_start_vcpu(cpu
);
1299 } else if (tcg_enabled()) {
1300 qemu_tcg_init_vcpu(cpu
);
1302 qemu_dummy_start_vcpu(cpu
);
1306 void cpu_stop_current(void)
1309 current_cpu
->stop
= false;
1310 current_cpu
->stopped
= true;
1311 cpu_exit(current_cpu
);
1312 qemu_cond_signal(&qemu_pause_cond
);
1316 int vm_stop(RunState state
)
1318 if (qemu_in_vcpu_thread()) {
1319 qemu_system_vmstop_request_prepare();
1320 qemu_system_vmstop_request(state
);
1322 * FIXME: should not return to device code in case
1323 * vm_stop() has been requested.
1329 return do_vm_stop(state
);
1332 /* does a state transition even if the VM is already stopped,
1333 current state is forgotten forever */
1334 int vm_stop_force_state(RunState state
)
1336 if (runstate_is_running()) {
1337 return vm_stop(state
);
1339 runstate_set(state
);
1340 /* Make sure to return an error if the flush in a previous vm_stop()
1342 return bdrv_flush_all();
1346 static int tcg_cpu_exec(CPUArchState
*env
)
1348 CPUState
*cpu
= ENV_GET_CPU(env
);
1350 #ifdef CONFIG_PROFILER
1354 #ifdef CONFIG_PROFILER
1355 ti
= profile_getclock();
1361 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1362 + cpu
->icount_extra
);
1363 cpu
->icount_decr
.u16
.low
= 0;
1364 cpu
->icount_extra
= 0;
1365 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1367 /* Maintain prior (possibly buggy) behaviour where if no deadline
1368 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1369 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1372 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1373 deadline
= INT32_MAX
;
1376 count
= qemu_icount_round(deadline
);
1377 timers_state
.qemu_icount
+= count
;
1378 decr
= (count
> 0xffff) ? 0xffff : count
;
1380 cpu
->icount_decr
.u16
.low
= decr
;
1381 cpu
->icount_extra
= count
;
1383 ret
= cpu_exec(env
);
1384 #ifdef CONFIG_PROFILER
1385 tcg_time
+= profile_getclock() - ti
;
1388 /* Fold pending instructions back into the
1389 instruction counter, and clear the interrupt flag. */
1390 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1391 + cpu
->icount_extra
);
1392 cpu
->icount_decr
.u32
= 0;
1393 cpu
->icount_extra
= 0;
1398 static void tcg_exec_all(void)
1402 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1403 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1405 if (next_cpu
== NULL
) {
1406 next_cpu
= first_cpu
;
1408 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1409 CPUState
*cpu
= next_cpu
;
1410 CPUArchState
*env
= cpu
->env_ptr
;
1412 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1413 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1415 if (cpu_can_run(cpu
)) {
1416 r
= tcg_cpu_exec(env
);
1417 if (r
== EXCP_DEBUG
) {
1418 cpu_handle_guest_debug(cpu
);
1421 } else if (cpu
->stop
|| cpu
->stopped
) {
1428 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1430 /* XXX: implement xxx_cpu_list for targets that still miss it */
1431 #if defined(cpu_list)
1432 cpu_list(f
, cpu_fprintf
);
1436 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1438 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1443 #if defined(TARGET_I386)
1444 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1445 CPUX86State
*env
= &x86_cpu
->env
;
1446 #elif defined(TARGET_PPC)
1447 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1448 CPUPPCState
*env
= &ppc_cpu
->env
;
1449 #elif defined(TARGET_SPARC)
1450 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1451 CPUSPARCState
*env
= &sparc_cpu
->env
;
1452 #elif defined(TARGET_MIPS)
1453 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1454 CPUMIPSState
*env
= &mips_cpu
->env
;
1455 #elif defined(TARGET_TRICORE)
1456 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1457 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1460 cpu_synchronize_state(cpu
);
1462 info
= g_malloc0(sizeof(*info
));
1463 info
->value
= g_malloc0(sizeof(*info
->value
));
1464 info
->value
->CPU
= cpu
->cpu_index
;
1465 info
->value
->current
= (cpu
== first_cpu
);
1466 info
->value
->halted
= cpu
->halted
;
1467 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1468 info
->value
->thread_id
= cpu
->thread_id
;
1469 #if defined(TARGET_I386)
1470 info
->value
->has_pc
= true;
1471 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1472 #elif defined(TARGET_PPC)
1473 info
->value
->has_nip
= true;
1474 info
->value
->nip
= env
->nip
;
1475 #elif defined(TARGET_SPARC)
1476 info
->value
->has_pc
= true;
1477 info
->value
->pc
= env
->pc
;
1478 info
->value
->has_npc
= true;
1479 info
->value
->npc
= env
->npc
;
1480 #elif defined(TARGET_MIPS)
1481 info
->value
->has_PC
= true;
1482 info
->value
->PC
= env
->active_tc
.PC
;
1483 #elif defined(TARGET_TRICORE)
1484 info
->value
->has_PC
= true;
1485 info
->value
->PC
= env
->PC
;
1488 /* XXX: waiting for the qapi to support GSList */
1490 head
= cur_item
= info
;
1492 cur_item
->next
= info
;
1500 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1501 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1507 int64_t orig_addr
= addr
, orig_size
= size
;
1513 cpu
= qemu_get_cpu(cpu_index
);
1515 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1520 f
= fopen(filename
, "wb");
1522 error_setg_file_open(errp
, errno
, filename
);
1530 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1531 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1532 " specified", orig_addr
, orig_size
);
1535 if (fwrite(buf
, 1, l
, f
) != l
) {
1536 error_set(errp
, QERR_IO_ERROR
);
1547 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1554 f
= fopen(filename
, "wb");
1556 error_setg_file_open(errp
, errno
, filename
);
1564 cpu_physical_memory_read(addr
, buf
, l
);
1565 if (fwrite(buf
, 1, l
, f
) != l
) {
1566 error_set(errp
, QERR_IO_ERROR
);
1577 void qmp_inject_nmi(Error
**errp
)
1579 #if defined(TARGET_I386)
1583 X86CPU
*cpu
= X86_CPU(cs
);
1585 if (!cpu
->apic_state
) {
1586 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1588 apic_deliver_nmi(cpu
->apic_state
);
1592 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1596 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1602 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1603 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1604 if (icount_align_option
) {
1605 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1606 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1608 cpu_fprintf(f
, "Max guest delay NA\n");
1609 cpu_fprintf(f
, "Max guest advance NA\n");