4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "qmp-commands.h"
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
45 #include "sysemu/replay.h"
48 #include "qemu/compatfd.h"
53 #include <sys/prctl.h>
56 #define PR_MCE_KILL 33
59 #ifndef PR_MCE_KILL_SET
60 #define PR_MCE_KILL_SET 1
63 #ifndef PR_MCE_KILL_EARLY
64 #define PR_MCE_KILL_EARLY 1
67 #endif /* CONFIG_LINUX */
69 static CPUState
*next_cpu
;
73 /* vcpu throttling controls */
74 static QEMUTimer
*throttle_timer
;
75 static unsigned int throttle_percentage
;
77 #define CPU_THROTTLE_PCT_MIN 1
78 #define CPU_THROTTLE_PCT_MAX 99
79 #define CPU_THROTTLE_TIMESLICE_NS 10000000
81 bool cpu_is_stopped(CPUState
*cpu
)
83 return cpu
->stopped
|| !runstate_is_running();
86 static bool cpu_thread_is_idle(CPUState
*cpu
)
88 if (cpu
->stop
|| cpu
->queued_work_first
) {
91 if (cpu_is_stopped(cpu
)) {
94 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
95 kvm_halt_in_kernel()) {
101 static bool all_cpu_threads_idle(void)
106 if (!cpu_thread_is_idle(cpu
)) {
113 /***********************************************************/
114 /* guest cycle counter */
116 /* Protected by TimersState seqlock */
118 static bool icount_sleep
= true;
119 static int64_t vm_clock_warp_start
= -1;
120 /* Conversion factor from emulated instructions to virtual clock ticks. */
121 static int icount_time_shift
;
122 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
123 #define MAX_ICOUNT_SHIFT 10
125 static QEMUTimer
*icount_rt_timer
;
126 static QEMUTimer
*icount_vm_timer
;
127 static QEMUTimer
*icount_warp_timer
;
129 typedef struct TimersState
{
130 /* Protected by BQL. */
131 int64_t cpu_ticks_prev
;
132 int64_t cpu_ticks_offset
;
134 /* cpu_clock_offset can be read out of BQL, so protect it with
137 QemuSeqLock vm_clock_seqlock
;
138 int64_t cpu_clock_offset
;
139 int32_t cpu_ticks_enabled
;
142 /* Compensate for varying guest execution speed. */
143 int64_t qemu_icount_bias
;
144 /* Only written by TCG thread */
148 static TimersState timers_state
;
150 int64_t cpu_get_icount_raw(void)
153 CPUState
*cpu
= current_cpu
;
155 icount
= timers_state
.qemu_icount
;
157 if (!cpu
->can_do_io
) {
158 fprintf(stderr
, "Bad icount read\n");
161 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
166 /* Return the virtual CPU time, based on the instruction counter. */
167 static int64_t cpu_get_icount_locked(void)
169 int64_t icount
= cpu_get_icount_raw();
170 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
173 int64_t cpu_get_icount(void)
179 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
180 icount
= cpu_get_icount_locked();
181 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
186 int64_t cpu_icount_to_ns(int64_t icount
)
188 return icount
<< icount_time_shift
;
191 /* return the host CPU cycle counter and handle stop/restart */
192 /* Caller must hold the BQL */
193 int64_t cpu_get_ticks(void)
198 return cpu_get_icount();
201 ticks
= timers_state
.cpu_ticks_offset
;
202 if (timers_state
.cpu_ticks_enabled
) {
203 ticks
+= cpu_get_host_ticks();
206 if (timers_state
.cpu_ticks_prev
> ticks
) {
207 /* Note: non increasing ticks may happen if the host uses
209 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
210 ticks
= timers_state
.cpu_ticks_prev
;
213 timers_state
.cpu_ticks_prev
= ticks
;
217 static int64_t cpu_get_clock_locked(void)
221 ticks
= timers_state
.cpu_clock_offset
;
222 if (timers_state
.cpu_ticks_enabled
) {
223 ticks
+= get_clock();
229 /* return the host CPU monotonic timer and handle stop/restart */
230 int64_t cpu_get_clock(void)
236 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
237 ti
= cpu_get_clock_locked();
238 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
243 /* enable cpu_get_ticks()
244 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
246 void cpu_enable_ticks(void)
248 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
249 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
250 if (!timers_state
.cpu_ticks_enabled
) {
251 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
252 timers_state
.cpu_clock_offset
-= get_clock();
253 timers_state
.cpu_ticks_enabled
= 1;
255 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
258 /* disable cpu_get_ticks() : the clock is stopped. You must not call
259 * cpu_get_ticks() after that.
260 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
262 void cpu_disable_ticks(void)
264 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
265 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
266 if (timers_state
.cpu_ticks_enabled
) {
267 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
268 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
269 timers_state
.cpu_ticks_enabled
= 0;
271 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
274 /* Correlation between real and virtual time is always going to be
275 fairly approximate, so ignore small variation.
276 When the guest is idle real and virtual time will be aligned in
278 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
280 static void icount_adjust(void)
286 /* Protected by TimersState mutex. */
287 static int64_t last_delta
;
289 /* If the VM is not running, then do nothing. */
290 if (!runstate_is_running()) {
294 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
295 cur_time
= cpu_get_clock_locked();
296 cur_icount
= cpu_get_icount_locked();
298 delta
= cur_icount
- cur_time
;
299 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
301 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
302 && icount_time_shift
> 0) {
303 /* The guest is getting too far ahead. Slow time down. */
307 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
308 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
309 /* The guest is getting too far behind. Speed time up. */
313 timers_state
.qemu_icount_bias
= cur_icount
314 - (timers_state
.qemu_icount
<< icount_time_shift
);
315 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
318 static void icount_adjust_rt(void *opaque
)
320 timer_mod(icount_rt_timer
,
321 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
325 static void icount_adjust_vm(void *opaque
)
327 timer_mod(icount_vm_timer
,
328 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
329 get_ticks_per_sec() / 10);
333 static int64_t qemu_icount_round(int64_t count
)
335 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
338 static void icount_warp_rt(void *opaque
)
340 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
341 * changes from -1 to another value, so the race here is okay.
343 if (atomic_read(&vm_clock_warp_start
) == -1) {
347 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
348 if (runstate_is_running()) {
349 int64_t clock
= cpu_get_clock_locked();
352 warp_delta
= clock
- vm_clock_warp_start
;
353 if (use_icount
== 2) {
355 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
356 * far ahead of real time.
358 int64_t cur_icount
= cpu_get_icount_locked();
359 int64_t delta
= clock
- cur_icount
;
360 warp_delta
= MIN(warp_delta
, delta
);
362 timers_state
.qemu_icount_bias
+= warp_delta
;
364 vm_clock_warp_start
= -1;
365 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
367 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
372 void qtest_clock_warp(int64_t dest
)
374 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
375 AioContext
*aio_context
;
376 assert(qtest_enabled());
377 aio_context
= qemu_get_aio_context();
378 while (clock
< dest
) {
379 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
380 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
382 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
383 timers_state
.qemu_icount_bias
+= warp
;
384 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
386 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
387 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
388 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
390 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
393 void qemu_clock_warp(QEMUClockType type
)
399 * There are too many global variables to make the "warp" behavior
400 * applicable to other clocks. But a clock argument removes the
401 * need for if statements all over the place.
403 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
409 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
410 * This ensures that the deadline for the timer is computed correctly
412 * This also makes sure that the insn counter is synchronized before
413 * the CPU starts running, in case the CPU is woken by an event other
414 * than the earliest QEMU_CLOCK_VIRTUAL timer.
416 icount_warp_rt(NULL
);
417 timer_del(icount_warp_timer
);
419 if (!all_cpu_threads_idle()) {
423 if (qtest_enabled()) {
424 /* When testing, qtest commands advance icount. */
428 /* We want to use the earliest deadline from ALL vm_clocks */
429 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
430 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
432 static bool notified
;
433 if (!icount_sleep
&& !notified
) {
434 error_report("WARNING: icount sleep disabled and no active timers");
442 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
443 * sleep. Otherwise, the CPU might be waiting for a future timer
444 * interrupt to wake it up, but the interrupt never comes because
445 * the vCPU isn't running any insns and thus doesn't advance the
446 * QEMU_CLOCK_VIRTUAL.
450 * We never let VCPUs sleep in no sleep icount mode.
451 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
452 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
453 * It is useful when we want a deterministic execution time,
454 * isolated from host latencies.
456 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
457 timers_state
.qemu_icount_bias
+= deadline
;
458 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
459 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
462 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
463 * "real" time, (related to the time left until the next event) has
464 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
465 * This avoids that the warps are visible externally; for example,
466 * you will not be sending network packets continuously instead of
469 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
470 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
471 vm_clock_warp_start
= clock
;
473 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
474 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
476 } else if (deadline
== 0) {
477 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
481 static bool icount_state_needed(void *opaque
)
487 * This is a subsection for icount migration.
489 static const VMStateDescription icount_vmstate_timers
= {
490 .name
= "timer/icount",
492 .minimum_version_id
= 1,
493 .needed
= icount_state_needed
,
494 .fields
= (VMStateField
[]) {
495 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
496 VMSTATE_INT64(qemu_icount
, TimersState
),
497 VMSTATE_END_OF_LIST()
501 static const VMStateDescription vmstate_timers
= {
504 .minimum_version_id
= 1,
505 .fields
= (VMStateField
[]) {
506 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
507 VMSTATE_INT64(dummy
, TimersState
),
508 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
509 VMSTATE_END_OF_LIST()
511 .subsections
= (const VMStateDescription
*[]) {
512 &icount_vmstate_timers
,
517 static void cpu_throttle_thread(void *opaque
)
519 CPUState
*cpu
= opaque
;
521 double throttle_ratio
;
524 if (!cpu_throttle_get_percentage()) {
528 pct
= (double)cpu_throttle_get_percentage()/100;
529 throttle_ratio
= pct
/ (1 - pct
);
530 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
532 qemu_mutex_unlock_iothread();
533 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
534 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
535 qemu_mutex_lock_iothread();
538 static void cpu_throttle_timer_tick(void *opaque
)
543 /* Stop the timer if needed */
544 if (!cpu_throttle_get_percentage()) {
548 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
549 async_run_on_cpu(cpu
, cpu_throttle_thread
, cpu
);
553 pct
= (double)cpu_throttle_get_percentage()/100;
554 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
555 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
558 void cpu_throttle_set(int new_throttle_pct
)
560 /* Ensure throttle percentage is within valid range */
561 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
562 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
564 atomic_set(&throttle_percentage
, new_throttle_pct
);
566 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
567 CPU_THROTTLE_TIMESLICE_NS
);
570 void cpu_throttle_stop(void)
572 atomic_set(&throttle_percentage
, 0);
575 bool cpu_throttle_active(void)
577 return (cpu_throttle_get_percentage() != 0);
580 int cpu_throttle_get_percentage(void)
582 return atomic_read(&throttle_percentage
);
585 void cpu_ticks_init(void)
587 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
588 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
589 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
590 cpu_throttle_timer_tick
, NULL
);
593 void configure_icount(QemuOpts
*opts
, Error
**errp
)
596 char *rem_str
= NULL
;
598 option
= qemu_opt_get(opts
, "shift");
600 if (qemu_opt_get(opts
, "align") != NULL
) {
601 error_setg(errp
, "Please specify shift option when using align");
606 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
608 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
609 icount_warp_rt
, NULL
);
612 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
614 if (icount_align_option
&& !icount_sleep
) {
615 error_setg(errp
, "align=on and sleep=no are incompatible");
617 if (strcmp(option
, "auto") != 0) {
619 icount_time_shift
= strtol(option
, &rem_str
, 0);
620 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
621 error_setg(errp
, "icount: Invalid shift value");
625 } else if (icount_align_option
) {
626 error_setg(errp
, "shift=auto and align=on are incompatible");
627 } else if (!icount_sleep
) {
628 error_setg(errp
, "shift=auto and sleep=no are incompatible");
633 /* 125MIPS seems a reasonable initial guess at the guest speed.
634 It will be corrected fairly quickly anyway. */
635 icount_time_shift
= 3;
637 /* Have both realtime and virtual time triggers for speed adjustment.
638 The realtime trigger catches emulated time passing too slowly,
639 the virtual time trigger catches emulated time passing too fast.
640 Realtime triggers occur even when idle, so use them less frequently
642 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
643 icount_adjust_rt
, NULL
);
644 timer_mod(icount_rt_timer
,
645 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
646 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
647 icount_adjust_vm
, NULL
);
648 timer_mod(icount_vm_timer
,
649 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
650 get_ticks_per_sec() / 10);
653 /***********************************************************/
654 void hw_error(const char *fmt
, ...)
660 fprintf(stderr
, "qemu: hardware error: ");
661 vfprintf(stderr
, fmt
, ap
);
662 fprintf(stderr
, "\n");
664 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
665 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
671 void cpu_synchronize_all_states(void)
676 cpu_synchronize_state(cpu
);
680 void cpu_synchronize_all_post_reset(void)
685 cpu_synchronize_post_reset(cpu
);
689 void cpu_synchronize_all_post_init(void)
694 cpu_synchronize_post_init(cpu
);
698 void cpu_clean_all_dirty(void)
703 cpu_clean_state(cpu
);
707 static int do_vm_stop(RunState state
)
711 if (runstate_is_running()) {
715 vm_state_notify(0, state
);
716 qapi_event_send_stop(&error_abort
);
720 ret
= bdrv_flush_all();
725 static bool cpu_can_run(CPUState
*cpu
)
730 if (cpu_is_stopped(cpu
)) {
736 static void cpu_handle_guest_debug(CPUState
*cpu
)
738 gdb_set_stop_cpu(cpu
);
739 qemu_system_debug_request();
744 static void sigbus_reraise(void)
747 struct sigaction action
;
749 memset(&action
, 0, sizeof(action
));
750 action
.sa_handler
= SIG_DFL
;
751 if (!sigaction(SIGBUS
, &action
, NULL
)) {
754 sigaddset(&set
, SIGBUS
);
755 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
757 perror("Failed to re-raise SIGBUS!\n");
761 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
764 if (kvm_on_sigbus(siginfo
->ssi_code
,
765 (void *)(intptr_t)siginfo
->ssi_addr
)) {
770 static void qemu_init_sigbus(void)
772 struct sigaction action
;
774 memset(&action
, 0, sizeof(action
));
775 action
.sa_flags
= SA_SIGINFO
;
776 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
777 sigaction(SIGBUS
, &action
, NULL
);
779 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
782 static void qemu_kvm_eat_signals(CPUState
*cpu
)
784 struct timespec ts
= { 0, 0 };
790 sigemptyset(&waitset
);
791 sigaddset(&waitset
, SIG_IPI
);
792 sigaddset(&waitset
, SIGBUS
);
795 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
796 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
797 perror("sigtimedwait");
803 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
811 r
= sigpending(&chkset
);
813 perror("sigpending");
816 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
819 #else /* !CONFIG_LINUX */
821 static void qemu_init_sigbus(void)
825 static void qemu_kvm_eat_signals(CPUState
*cpu
)
828 #endif /* !CONFIG_LINUX */
831 static void dummy_signal(int sig
)
835 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
839 struct sigaction sigact
;
841 memset(&sigact
, 0, sizeof(sigact
));
842 sigact
.sa_handler
= dummy_signal
;
843 sigaction(SIG_IPI
, &sigact
, NULL
);
845 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
846 sigdelset(&set
, SIG_IPI
);
847 sigdelset(&set
, SIGBUS
);
848 r
= kvm_set_signal_mask(cpu
, &set
);
850 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
856 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
862 static QemuMutex qemu_global_mutex
;
863 static QemuCond qemu_io_proceeded_cond
;
864 static unsigned iothread_requesting_mutex
;
866 static QemuThread io_thread
;
869 static QemuCond qemu_cpu_cond
;
871 static QemuCond qemu_pause_cond
;
872 static QemuCond qemu_work_cond
;
874 void qemu_init_cpu_loop(void)
877 qemu_cond_init(&qemu_cpu_cond
);
878 qemu_cond_init(&qemu_pause_cond
);
879 qemu_cond_init(&qemu_work_cond
);
880 qemu_cond_init(&qemu_io_proceeded_cond
);
881 qemu_mutex_init(&qemu_global_mutex
);
883 qemu_thread_get_self(&io_thread
);
886 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
888 struct qemu_work_item wi
;
890 if (qemu_cpu_is_self(cpu
)) {
899 qemu_mutex_lock(&cpu
->work_mutex
);
900 if (cpu
->queued_work_first
== NULL
) {
901 cpu
->queued_work_first
= &wi
;
903 cpu
->queued_work_last
->next
= &wi
;
905 cpu
->queued_work_last
= &wi
;
908 qemu_mutex_unlock(&cpu
->work_mutex
);
911 while (!atomic_mb_read(&wi
.done
)) {
912 CPUState
*self_cpu
= current_cpu
;
914 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
915 current_cpu
= self_cpu
;
919 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
921 struct qemu_work_item
*wi
;
923 if (qemu_cpu_is_self(cpu
)) {
928 wi
= g_malloc0(sizeof(struct qemu_work_item
));
933 qemu_mutex_lock(&cpu
->work_mutex
);
934 if (cpu
->queued_work_first
== NULL
) {
935 cpu
->queued_work_first
= wi
;
937 cpu
->queued_work_last
->next
= wi
;
939 cpu
->queued_work_last
= wi
;
942 qemu_mutex_unlock(&cpu
->work_mutex
);
947 static void flush_queued_work(CPUState
*cpu
)
949 struct qemu_work_item
*wi
;
951 if (cpu
->queued_work_first
== NULL
) {
955 qemu_mutex_lock(&cpu
->work_mutex
);
956 while (cpu
->queued_work_first
!= NULL
) {
957 wi
= cpu
->queued_work_first
;
958 cpu
->queued_work_first
= wi
->next
;
959 if (!cpu
->queued_work_first
) {
960 cpu
->queued_work_last
= NULL
;
962 qemu_mutex_unlock(&cpu
->work_mutex
);
964 qemu_mutex_lock(&cpu
->work_mutex
);
968 atomic_mb_set(&wi
->done
, true);
971 qemu_mutex_unlock(&cpu
->work_mutex
);
972 qemu_cond_broadcast(&qemu_work_cond
);
975 static void qemu_wait_io_event_common(CPUState
*cpu
)
980 qemu_cond_signal(&qemu_pause_cond
);
982 flush_queued_work(cpu
);
983 cpu
->thread_kicked
= false;
986 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
988 while (all_cpu_threads_idle()) {
989 /* Start accounting real time to the virtual clock if the CPUs
991 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
992 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
995 while (iothread_requesting_mutex
) {
996 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
1000 qemu_wait_io_event_common(cpu
);
1004 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
1006 while (cpu_thread_is_idle(cpu
)) {
1007 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1010 qemu_kvm_eat_signals(cpu
);
1011 qemu_wait_io_event_common(cpu
);
1014 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1016 CPUState
*cpu
= arg
;
1019 rcu_register_thread();
1021 qemu_mutex_lock_iothread();
1022 qemu_thread_get_self(cpu
->thread
);
1023 cpu
->thread_id
= qemu_get_thread_id();
1027 r
= kvm_init_vcpu(cpu
);
1029 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1033 qemu_kvm_init_cpu_signals(cpu
);
1035 /* signal CPU creation */
1036 cpu
->created
= true;
1037 qemu_cond_signal(&qemu_cpu_cond
);
1040 if (cpu_can_run(cpu
)) {
1041 r
= kvm_cpu_exec(cpu
);
1042 if (r
== EXCP_DEBUG
) {
1043 cpu_handle_guest_debug(cpu
);
1046 qemu_kvm_wait_io_event(cpu
);
1052 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1055 fprintf(stderr
, "qtest is not supported under Windows\n");
1058 CPUState
*cpu
= arg
;
1062 rcu_register_thread();
1064 qemu_mutex_lock_iothread();
1065 qemu_thread_get_self(cpu
->thread
);
1066 cpu
->thread_id
= qemu_get_thread_id();
1069 sigemptyset(&waitset
);
1070 sigaddset(&waitset
, SIG_IPI
);
1072 /* signal CPU creation */
1073 cpu
->created
= true;
1074 qemu_cond_signal(&qemu_cpu_cond
);
1079 qemu_mutex_unlock_iothread();
1082 r
= sigwait(&waitset
, &sig
);
1083 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1088 qemu_mutex_lock_iothread();
1090 qemu_wait_io_event_common(cpu
);
1097 static void tcg_exec_all(void);
1099 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1101 CPUState
*cpu
= arg
;
1103 rcu_register_thread();
1105 qemu_mutex_lock_iothread();
1106 qemu_thread_get_self(cpu
->thread
);
1109 cpu
->thread_id
= qemu_get_thread_id();
1110 cpu
->created
= true;
1113 qemu_cond_signal(&qemu_cpu_cond
);
1115 /* wait for initial kick-off after machine start */
1116 while (first_cpu
->stopped
) {
1117 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1119 /* process any pending work */
1121 qemu_wait_io_event_common(cpu
);
1125 /* process any pending work */
1126 atomic_mb_set(&exit_request
, 1);
1132 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1134 if (deadline
== 0) {
1135 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1138 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus
));
1144 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1149 if (cpu
->thread_kicked
) {
1152 cpu
->thread_kicked
= true;
1153 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1155 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1163 static void qemu_cpu_kick_no_halt(void)
1166 /* Ensure whatever caused the exit has reached the CPU threads before
1167 * writing exit_request.
1169 atomic_mb_set(&exit_request
, 1);
1170 cpu
= atomic_mb_read(&tcg_current_cpu
);
1176 void qemu_cpu_kick(CPUState
*cpu
)
1178 qemu_cond_broadcast(cpu
->halt_cond
);
1179 if (tcg_enabled()) {
1180 qemu_cpu_kick_no_halt();
1182 qemu_cpu_kick_thread(cpu
);
1186 void qemu_cpu_kick_self(void)
1188 assert(current_cpu
);
1189 qemu_cpu_kick_thread(current_cpu
);
1192 bool qemu_cpu_is_self(CPUState
*cpu
)
1194 return qemu_thread_is_self(cpu
->thread
);
1197 bool qemu_in_vcpu_thread(void)
1199 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1202 static __thread
bool iothread_locked
= false;
1204 bool qemu_mutex_iothread_locked(void)
1206 return iothread_locked
;
1209 void qemu_mutex_lock_iothread(void)
1211 atomic_inc(&iothread_requesting_mutex
);
1212 /* In the simple case there is no need to bump the VCPU thread out of
1213 * TCG code execution.
1215 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1216 !first_cpu
|| !first_cpu
->created
) {
1217 qemu_mutex_lock(&qemu_global_mutex
);
1218 atomic_dec(&iothread_requesting_mutex
);
1220 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1221 qemu_cpu_kick_no_halt();
1222 qemu_mutex_lock(&qemu_global_mutex
);
1224 atomic_dec(&iothread_requesting_mutex
);
1225 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1227 iothread_locked
= true;
1230 void qemu_mutex_unlock_iothread(void)
1232 iothread_locked
= false;
1233 qemu_mutex_unlock(&qemu_global_mutex
);
1236 static int all_vcpus_paused(void)
1241 if (!cpu
->stopped
) {
1249 void pause_all_vcpus(void)
1253 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1259 if (qemu_in_vcpu_thread()) {
1261 if (!kvm_enabled()) {
1264 cpu
->stopped
= true;
1270 while (!all_vcpus_paused()) {
1271 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1278 void cpu_resume(CPUState
*cpu
)
1281 cpu
->stopped
= false;
1285 void resume_all_vcpus(void)
1289 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1295 /* For temporary buffers for forming a name */
1296 #define VCPU_THREAD_NAME_SIZE 16
1298 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1300 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1301 static QemuCond
*tcg_halt_cond
;
1302 static QemuThread
*tcg_cpu_thread
;
1304 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1306 /* share a single thread for all cpus with TCG */
1307 if (!tcg_cpu_thread
) {
1308 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1309 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1310 qemu_cond_init(cpu
->halt_cond
);
1311 tcg_halt_cond
= cpu
->halt_cond
;
1312 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1314 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1315 cpu
, QEMU_THREAD_JOINABLE
);
1317 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1319 while (!cpu
->created
) {
1320 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1322 tcg_cpu_thread
= cpu
->thread
;
1324 cpu
->thread
= tcg_cpu_thread
;
1325 cpu
->halt_cond
= tcg_halt_cond
;
1329 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1331 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1333 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1334 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1335 qemu_cond_init(cpu
->halt_cond
);
1336 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1338 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1339 cpu
, QEMU_THREAD_JOINABLE
);
1340 while (!cpu
->created
) {
1341 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1345 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1347 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1349 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1350 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1351 qemu_cond_init(cpu
->halt_cond
);
1352 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1354 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1355 QEMU_THREAD_JOINABLE
);
1356 while (!cpu
->created
) {
1357 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1361 void qemu_init_vcpu(CPUState
*cpu
)
1363 cpu
->nr_cores
= smp_cores
;
1364 cpu
->nr_threads
= smp_threads
;
1365 cpu
->stopped
= true;
1366 if (kvm_enabled()) {
1367 qemu_kvm_start_vcpu(cpu
);
1368 } else if (tcg_enabled()) {
1369 qemu_tcg_init_vcpu(cpu
);
1371 qemu_dummy_start_vcpu(cpu
);
1375 void cpu_stop_current(void)
1378 current_cpu
->stop
= false;
1379 current_cpu
->stopped
= true;
1380 cpu_exit(current_cpu
);
1381 qemu_cond_signal(&qemu_pause_cond
);
1385 int vm_stop(RunState state
)
1387 if (qemu_in_vcpu_thread()) {
1388 qemu_system_vmstop_request_prepare();
1389 qemu_system_vmstop_request(state
);
1391 * FIXME: should not return to device code in case
1392 * vm_stop() has been requested.
1398 return do_vm_stop(state
);
1401 /* does a state transition even if the VM is already stopped,
1402 current state is forgotten forever */
1403 int vm_stop_force_state(RunState state
)
1405 if (runstate_is_running()) {
1406 return vm_stop(state
);
1408 runstate_set(state
);
1409 /* Make sure to return an error if the flush in a previous vm_stop()
1411 return bdrv_flush_all();
1415 static int64_t tcg_get_icount_limit(void)
1419 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1420 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1422 /* Maintain prior (possibly buggy) behaviour where if no deadline
1423 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1424 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1427 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1428 deadline
= INT32_MAX
;
1431 return qemu_icount_round(deadline
);
1433 return replay_get_instructions();
1437 static int tcg_cpu_exec(CPUState
*cpu
)
1440 #ifdef CONFIG_PROFILER
1444 #ifdef CONFIG_PROFILER
1445 ti
= profile_getclock();
1450 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1451 + cpu
->icount_extra
);
1452 cpu
->icount_decr
.u16
.low
= 0;
1453 cpu
->icount_extra
= 0;
1454 count
= tcg_get_icount_limit();
1455 timers_state
.qemu_icount
+= count
;
1456 decr
= (count
> 0xffff) ? 0xffff : count
;
1458 cpu
->icount_decr
.u16
.low
= decr
;
1459 cpu
->icount_extra
= count
;
1461 ret
= cpu_exec(cpu
);
1462 #ifdef CONFIG_PROFILER
1463 tcg_time
+= profile_getclock() - ti
;
1466 /* Fold pending instructions back into the
1467 instruction counter, and clear the interrupt flag. */
1468 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1469 + cpu
->icount_extra
);
1470 cpu
->icount_decr
.u32
= 0;
1471 cpu
->icount_extra
= 0;
1472 replay_account_executed_instructions();
1477 static void tcg_exec_all(void)
1481 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1482 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1484 if (next_cpu
== NULL
) {
1485 next_cpu
= first_cpu
;
1487 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1488 CPUState
*cpu
= next_cpu
;
1490 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1491 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1493 if (cpu_can_run(cpu
)) {
1494 r
= tcg_cpu_exec(cpu
);
1495 if (r
== EXCP_DEBUG
) {
1496 cpu_handle_guest_debug(cpu
);
1499 } else if (cpu
->stop
|| cpu
->stopped
) {
1504 /* Pairs with smp_wmb in qemu_cpu_kick. */
1505 atomic_mb_set(&exit_request
, 0);
1508 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1510 /* XXX: implement xxx_cpu_list for targets that still miss it */
1511 #if defined(cpu_list)
1512 cpu_list(f
, cpu_fprintf
);
1516 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1518 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1523 #if defined(TARGET_I386)
1524 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1525 CPUX86State
*env
= &x86_cpu
->env
;
1526 #elif defined(TARGET_PPC)
1527 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1528 CPUPPCState
*env
= &ppc_cpu
->env
;
1529 #elif defined(TARGET_SPARC)
1530 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1531 CPUSPARCState
*env
= &sparc_cpu
->env
;
1532 #elif defined(TARGET_MIPS)
1533 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1534 CPUMIPSState
*env
= &mips_cpu
->env
;
1535 #elif defined(TARGET_TRICORE)
1536 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1537 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1540 cpu_synchronize_state(cpu
);
1542 info
= g_malloc0(sizeof(*info
));
1543 info
->value
= g_malloc0(sizeof(*info
->value
));
1544 info
->value
->CPU
= cpu
->cpu_index
;
1545 info
->value
->current
= (cpu
== first_cpu
);
1546 info
->value
->halted
= cpu
->halted
;
1547 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1548 info
->value
->thread_id
= cpu
->thread_id
;
1549 #if defined(TARGET_I386)
1550 info
->value
->has_pc
= true;
1551 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1552 #elif defined(TARGET_PPC)
1553 info
->value
->has_nip
= true;
1554 info
->value
->nip
= env
->nip
;
1555 #elif defined(TARGET_SPARC)
1556 info
->value
->has_pc
= true;
1557 info
->value
->pc
= env
->pc
;
1558 info
->value
->has_npc
= true;
1559 info
->value
->npc
= env
->npc
;
1560 #elif defined(TARGET_MIPS)
1561 info
->value
->has_PC
= true;
1562 info
->value
->PC
= env
->active_tc
.PC
;
1563 #elif defined(TARGET_TRICORE)
1564 info
->value
->has_PC
= true;
1565 info
->value
->PC
= env
->PC
;
1568 /* XXX: waiting for the qapi to support GSList */
1570 head
= cur_item
= info
;
1572 cur_item
->next
= info
;
1580 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1581 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1587 int64_t orig_addr
= addr
, orig_size
= size
;
1593 cpu
= qemu_get_cpu(cpu_index
);
1595 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1600 f
= fopen(filename
, "wb");
1602 error_setg_file_open(errp
, errno
, filename
);
1610 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1611 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1612 " specified", orig_addr
, orig_size
);
1615 if (fwrite(buf
, 1, l
, f
) != l
) {
1616 error_setg(errp
, QERR_IO_ERROR
);
1627 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1634 f
= fopen(filename
, "wb");
1636 error_setg_file_open(errp
, errno
, filename
);
1644 cpu_physical_memory_read(addr
, buf
, l
);
1645 if (fwrite(buf
, 1, l
, f
) != l
) {
1646 error_setg(errp
, QERR_IO_ERROR
);
1657 void qmp_inject_nmi(Error
**errp
)
1659 #if defined(TARGET_I386)
1663 X86CPU
*cpu
= X86_CPU(cs
);
1665 if (!cpu
->apic_state
) {
1666 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1668 apic_deliver_nmi(cpu
->apic_state
);
1672 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1676 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1682 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1683 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1684 if (icount_align_option
) {
1685 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1686 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1688 cpu_fprintf(f
, "Max guest delay NA\n");
1689 cpu_fprintf(f
, "Max guest advance NA\n");