4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "qmp-commands.h"
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
47 #include "qemu/compatfd.h"
52 #include <sys/prctl.h>
55 #define PR_MCE_KILL 33
58 #ifndef PR_MCE_KILL_SET
59 #define PR_MCE_KILL_SET 1
62 #ifndef PR_MCE_KILL_EARLY
63 #define PR_MCE_KILL_EARLY 1
66 #endif /* CONFIG_LINUX */
68 static CPUState
*next_cpu
;
72 bool cpu_is_stopped(CPUState
*cpu
)
74 return cpu
->stopped
|| !runstate_is_running();
77 static bool cpu_thread_is_idle(CPUState
*cpu
)
79 if (cpu
->stop
|| cpu
->queued_work_first
) {
82 if (cpu_is_stopped(cpu
)) {
85 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
86 kvm_halt_in_kernel()) {
92 static bool all_cpu_threads_idle(void)
97 if (!cpu_thread_is_idle(cpu
)) {
104 /***********************************************************/
105 /* guest cycle counter */
107 /* Protected by TimersState seqlock */
109 static bool icount_sleep
= true;
110 static int64_t vm_clock_warp_start
= -1;
111 /* Conversion factor from emulated instructions to virtual clock ticks. */
112 static int icount_time_shift
;
113 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
114 #define MAX_ICOUNT_SHIFT 10
116 static QEMUTimer
*icount_rt_timer
;
117 static QEMUTimer
*icount_vm_timer
;
118 static QEMUTimer
*icount_warp_timer
;
120 typedef struct TimersState
{
121 /* Protected by BQL. */
122 int64_t cpu_ticks_prev
;
123 int64_t cpu_ticks_offset
;
125 /* cpu_clock_offset can be read out of BQL, so protect it with
128 QemuSeqLock vm_clock_seqlock
;
129 int64_t cpu_clock_offset
;
130 int32_t cpu_ticks_enabled
;
133 /* Compensate for varying guest execution speed. */
134 int64_t qemu_icount_bias
;
135 /* Only written by TCG thread */
139 static TimersState timers_state
;
141 int64_t cpu_get_icount_raw(void)
144 CPUState
*cpu
= current_cpu
;
146 icount
= timers_state
.qemu_icount
;
148 if (!cpu_can_do_io(cpu
)) {
149 fprintf(stderr
, "Bad icount read\n");
152 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
157 /* Return the virtual CPU time, based on the instruction counter. */
158 static int64_t cpu_get_icount_locked(void)
160 int64_t icount
= cpu_get_icount_raw();
161 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
164 int64_t cpu_get_icount(void)
170 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
171 icount
= cpu_get_icount_locked();
172 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
177 int64_t cpu_icount_to_ns(int64_t icount
)
179 return icount
<< icount_time_shift
;
182 /* return the host CPU cycle counter and handle stop/restart */
183 /* Caller must hold the BQL */
184 int64_t cpu_get_ticks(void)
189 return cpu_get_icount();
192 ticks
= timers_state
.cpu_ticks_offset
;
193 if (timers_state
.cpu_ticks_enabled
) {
194 ticks
+= cpu_get_real_ticks();
197 if (timers_state
.cpu_ticks_prev
> ticks
) {
198 /* Note: non increasing ticks may happen if the host uses
200 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
201 ticks
= timers_state
.cpu_ticks_prev
;
204 timers_state
.cpu_ticks_prev
= ticks
;
208 static int64_t cpu_get_clock_locked(void)
212 ticks
= timers_state
.cpu_clock_offset
;
213 if (timers_state
.cpu_ticks_enabled
) {
214 ticks
+= get_clock();
220 /* return the host CPU monotonic timer and handle stop/restart */
221 int64_t cpu_get_clock(void)
227 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
228 ti
= cpu_get_clock_locked();
229 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
234 /* enable cpu_get_ticks()
235 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
237 void cpu_enable_ticks(void)
239 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
240 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
241 if (!timers_state
.cpu_ticks_enabled
) {
242 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
243 timers_state
.cpu_clock_offset
-= get_clock();
244 timers_state
.cpu_ticks_enabled
= 1;
246 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
249 /* disable cpu_get_ticks() : the clock is stopped. You must not call
250 * cpu_get_ticks() after that.
251 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
253 void cpu_disable_ticks(void)
255 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
256 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
257 if (timers_state
.cpu_ticks_enabled
) {
258 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
259 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
260 timers_state
.cpu_ticks_enabled
= 0;
262 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
265 /* Correlation between real and virtual time is always going to be
266 fairly approximate, so ignore small variation.
267 When the guest is idle real and virtual time will be aligned in
269 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
271 static void icount_adjust(void)
277 /* Protected by TimersState mutex. */
278 static int64_t last_delta
;
280 /* If the VM is not running, then do nothing. */
281 if (!runstate_is_running()) {
285 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
286 cur_time
= cpu_get_clock_locked();
287 cur_icount
= cpu_get_icount_locked();
289 delta
= cur_icount
- cur_time
;
290 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
292 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
293 && icount_time_shift
> 0) {
294 /* The guest is getting too far ahead. Slow time down. */
298 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
299 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
300 /* The guest is getting too far behind. Speed time up. */
304 timers_state
.qemu_icount_bias
= cur_icount
305 - (timers_state
.qemu_icount
<< icount_time_shift
);
306 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
309 static void icount_adjust_rt(void *opaque
)
311 timer_mod(icount_rt_timer
,
312 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
316 static void icount_adjust_vm(void *opaque
)
318 timer_mod(icount_vm_timer
,
319 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
320 get_ticks_per_sec() / 10);
324 static int64_t qemu_icount_round(int64_t count
)
326 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
329 static void icount_warp_rt(void *opaque
)
331 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
332 * changes from -1 to another value, so the race here is okay.
334 if (atomic_read(&vm_clock_warp_start
) == -1) {
338 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
339 if (runstate_is_running()) {
340 int64_t clock
= cpu_get_clock_locked();
343 warp_delta
= clock
- vm_clock_warp_start
;
344 if (use_icount
== 2) {
346 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
347 * far ahead of real time.
349 int64_t cur_icount
= cpu_get_icount_locked();
350 int64_t delta
= clock
- cur_icount
;
351 warp_delta
= MIN(warp_delta
, delta
);
353 timers_state
.qemu_icount_bias
+= warp_delta
;
355 vm_clock_warp_start
= -1;
356 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
358 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
359 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
363 void qtest_clock_warp(int64_t dest
)
365 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
366 AioContext
*aio_context
;
367 assert(qtest_enabled());
368 aio_context
= qemu_get_aio_context();
369 while (clock
< dest
) {
370 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
371 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
373 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
374 timers_state
.qemu_icount_bias
+= warp
;
375 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
378 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
379 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
381 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
384 void qemu_clock_warp(QEMUClockType type
)
390 * There are too many global variables to make the "warp" behavior
391 * applicable to other clocks. But a clock argument removes the
392 * need for if statements all over the place.
394 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
400 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
401 * This ensures that the deadline for the timer is computed correctly
403 * This also makes sure that the insn counter is synchronized before
404 * the CPU starts running, in case the CPU is woken by an event other
405 * than the earliest QEMU_CLOCK_VIRTUAL timer.
407 icount_warp_rt(NULL
);
408 timer_del(icount_warp_timer
);
410 if (!all_cpu_threads_idle()) {
414 if (qtest_enabled()) {
415 /* When testing, qtest commands advance icount. */
419 /* We want to use the earliest deadline from ALL vm_clocks */
420 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
421 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
423 static bool notified
;
424 if (!icount_sleep
&& !notified
) {
425 error_report("WARNING: icount sleep disabled and no active timers");
433 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
434 * sleep. Otherwise, the CPU might be waiting for a future timer
435 * interrupt to wake it up, but the interrupt never comes because
436 * the vCPU isn't running any insns and thus doesn't advance the
437 * QEMU_CLOCK_VIRTUAL.
441 * We never let VCPUs sleep in no sleep icount mode.
442 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
443 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
444 * It is useful when we want a deterministic execution time,
445 * isolated from host latencies.
447 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
448 timers_state
.qemu_icount_bias
+= deadline
;
449 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
450 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
453 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
454 * "real" time, (related to the time left until the next event) has
455 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
456 * This avoids that the warps are visible externally; for example,
457 * you will not be sending network packets continuously instead of
460 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
461 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
462 vm_clock_warp_start
= clock
;
464 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
465 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
467 } else if (deadline
== 0) {
468 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
472 static bool icount_state_needed(void *opaque
)
478 * This is a subsection for icount migration.
480 static const VMStateDescription icount_vmstate_timers
= {
481 .name
= "timer/icount",
483 .minimum_version_id
= 1,
484 .needed
= icount_state_needed
,
485 .fields
= (VMStateField
[]) {
486 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
487 VMSTATE_INT64(qemu_icount
, TimersState
),
488 VMSTATE_END_OF_LIST()
492 static const VMStateDescription vmstate_timers
= {
495 .minimum_version_id
= 1,
496 .fields
= (VMStateField
[]) {
497 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
498 VMSTATE_INT64(dummy
, TimersState
),
499 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
500 VMSTATE_END_OF_LIST()
502 .subsections
= (const VMStateDescription
*[]) {
503 &icount_vmstate_timers
,
508 void cpu_ticks_init(void)
510 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
511 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
514 void configure_icount(QemuOpts
*opts
, Error
**errp
)
517 char *rem_str
= NULL
;
519 option
= qemu_opt_get(opts
, "shift");
521 if (qemu_opt_get(opts
, "align") != NULL
) {
522 error_setg(errp
, "Please specify shift option when using align");
527 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
529 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
530 icount_warp_rt
, NULL
);
533 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
535 if (icount_align_option
&& !icount_sleep
) {
536 error_setg(errp
, "align=on and sleep=no are incompatible");
538 if (strcmp(option
, "auto") != 0) {
540 icount_time_shift
= strtol(option
, &rem_str
, 0);
541 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
542 error_setg(errp
, "icount: Invalid shift value");
546 } else if (icount_align_option
) {
547 error_setg(errp
, "shift=auto and align=on are incompatible");
548 } else if (!icount_sleep
) {
549 error_setg(errp
, "shift=auto and sleep=no are incompatible");
554 /* 125MIPS seems a reasonable initial guess at the guest speed.
555 It will be corrected fairly quickly anyway. */
556 icount_time_shift
= 3;
558 /* Have both realtime and virtual time triggers for speed adjustment.
559 The realtime trigger catches emulated time passing too slowly,
560 the virtual time trigger catches emulated time passing too fast.
561 Realtime triggers occur even when idle, so use them less frequently
563 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
564 icount_adjust_rt
, NULL
);
565 timer_mod(icount_rt_timer
,
566 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
567 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
568 icount_adjust_vm
, NULL
);
569 timer_mod(icount_vm_timer
,
570 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
571 get_ticks_per_sec() / 10);
574 /***********************************************************/
575 void hw_error(const char *fmt
, ...)
581 fprintf(stderr
, "qemu: hardware error: ");
582 vfprintf(stderr
, fmt
, ap
);
583 fprintf(stderr
, "\n");
585 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
586 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
592 void cpu_synchronize_all_states(void)
597 cpu_synchronize_state(cpu
);
601 void cpu_synchronize_all_post_reset(void)
606 cpu_synchronize_post_reset(cpu
);
610 void cpu_synchronize_all_post_init(void)
615 cpu_synchronize_post_init(cpu
);
619 void cpu_clean_all_dirty(void)
624 cpu_clean_state(cpu
);
628 static int do_vm_stop(RunState state
)
632 if (runstate_is_running()) {
636 vm_state_notify(0, state
);
637 qapi_event_send_stop(&error_abort
);
641 ret
= bdrv_flush_all();
646 static bool cpu_can_run(CPUState
*cpu
)
651 if (cpu_is_stopped(cpu
)) {
657 static void cpu_handle_guest_debug(CPUState
*cpu
)
659 gdb_set_stop_cpu(cpu
);
660 qemu_system_debug_request();
664 static void cpu_signal(int sig
)
667 cpu_exit(current_cpu
);
673 static void sigbus_reraise(void)
676 struct sigaction action
;
678 memset(&action
, 0, sizeof(action
));
679 action
.sa_handler
= SIG_DFL
;
680 if (!sigaction(SIGBUS
, &action
, NULL
)) {
683 sigaddset(&set
, SIGBUS
);
684 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
686 perror("Failed to re-raise SIGBUS!\n");
690 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
693 if (kvm_on_sigbus(siginfo
->ssi_code
,
694 (void *)(intptr_t)siginfo
->ssi_addr
)) {
699 static void qemu_init_sigbus(void)
701 struct sigaction action
;
703 memset(&action
, 0, sizeof(action
));
704 action
.sa_flags
= SA_SIGINFO
;
705 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
706 sigaction(SIGBUS
, &action
, NULL
);
708 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
711 static void qemu_kvm_eat_signals(CPUState
*cpu
)
713 struct timespec ts
= { 0, 0 };
719 sigemptyset(&waitset
);
720 sigaddset(&waitset
, SIG_IPI
);
721 sigaddset(&waitset
, SIGBUS
);
724 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
725 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
726 perror("sigtimedwait");
732 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
740 r
= sigpending(&chkset
);
742 perror("sigpending");
745 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
748 #else /* !CONFIG_LINUX */
750 static void qemu_init_sigbus(void)
754 static void qemu_kvm_eat_signals(CPUState
*cpu
)
757 #endif /* !CONFIG_LINUX */
760 static void dummy_signal(int sig
)
764 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
768 struct sigaction sigact
;
770 memset(&sigact
, 0, sizeof(sigact
));
771 sigact
.sa_handler
= dummy_signal
;
772 sigaction(SIG_IPI
, &sigact
, NULL
);
774 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
775 sigdelset(&set
, SIG_IPI
);
776 sigdelset(&set
, SIGBUS
);
777 r
= kvm_set_signal_mask(cpu
, &set
);
779 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
784 static void qemu_tcg_init_cpu_signals(void)
787 struct sigaction sigact
;
789 memset(&sigact
, 0, sizeof(sigact
));
790 sigact
.sa_handler
= cpu_signal
;
791 sigaction(SIG_IPI
, &sigact
, NULL
);
794 sigaddset(&set
, SIG_IPI
);
795 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
799 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
804 static void qemu_tcg_init_cpu_signals(void)
809 static QemuMutex qemu_global_mutex
;
810 static QemuCond qemu_io_proceeded_cond
;
811 static unsigned iothread_requesting_mutex
;
813 static QemuThread io_thread
;
815 static QemuThread
*tcg_cpu_thread
;
816 static QemuCond
*tcg_halt_cond
;
819 static QemuCond qemu_cpu_cond
;
821 static QemuCond qemu_pause_cond
;
822 static QemuCond qemu_work_cond
;
824 void qemu_init_cpu_loop(void)
827 qemu_cond_init(&qemu_cpu_cond
);
828 qemu_cond_init(&qemu_pause_cond
);
829 qemu_cond_init(&qemu_work_cond
);
830 qemu_cond_init(&qemu_io_proceeded_cond
);
831 qemu_mutex_init(&qemu_global_mutex
);
833 qemu_thread_get_self(&io_thread
);
836 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
838 struct qemu_work_item wi
;
840 if (qemu_cpu_is_self(cpu
)) {
848 if (cpu
->queued_work_first
== NULL
) {
849 cpu
->queued_work_first
= &wi
;
851 cpu
->queued_work_last
->next
= &wi
;
853 cpu
->queued_work_last
= &wi
;
859 CPUState
*self_cpu
= current_cpu
;
861 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
862 current_cpu
= self_cpu
;
866 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
868 struct qemu_work_item
*wi
;
870 if (qemu_cpu_is_self(cpu
)) {
875 wi
= g_malloc0(sizeof(struct qemu_work_item
));
879 if (cpu
->queued_work_first
== NULL
) {
880 cpu
->queued_work_first
= wi
;
882 cpu
->queued_work_last
->next
= wi
;
884 cpu
->queued_work_last
= wi
;
891 static void flush_queued_work(CPUState
*cpu
)
893 struct qemu_work_item
*wi
;
895 if (cpu
->queued_work_first
== NULL
) {
899 while ((wi
= cpu
->queued_work_first
)) {
900 cpu
->queued_work_first
= wi
->next
;
907 cpu
->queued_work_last
= NULL
;
908 qemu_cond_broadcast(&qemu_work_cond
);
911 static void qemu_wait_io_event_common(CPUState
*cpu
)
916 qemu_cond_signal(&qemu_pause_cond
);
918 flush_queued_work(cpu
);
919 cpu
->thread_kicked
= false;
922 static void qemu_tcg_wait_io_event(void)
926 while (all_cpu_threads_idle()) {
927 /* Start accounting real time to the virtual clock if the CPUs
929 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
930 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
933 while (iothread_requesting_mutex
) {
934 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
938 qemu_wait_io_event_common(cpu
);
942 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
944 while (cpu_thread_is_idle(cpu
)) {
945 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
948 qemu_kvm_eat_signals(cpu
);
949 qemu_wait_io_event_common(cpu
);
952 static void *qemu_kvm_cpu_thread_fn(void *arg
)
957 qemu_mutex_lock_iothread();
958 qemu_thread_get_self(cpu
->thread
);
959 cpu
->thread_id
= qemu_get_thread_id();
963 r
= kvm_init_vcpu(cpu
);
965 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
969 qemu_kvm_init_cpu_signals(cpu
);
971 /* signal CPU creation */
973 qemu_cond_signal(&qemu_cpu_cond
);
976 if (cpu_can_run(cpu
)) {
977 r
= kvm_cpu_exec(cpu
);
978 if (r
== EXCP_DEBUG
) {
979 cpu_handle_guest_debug(cpu
);
982 qemu_kvm_wait_io_event(cpu
);
988 static void *qemu_dummy_cpu_thread_fn(void *arg
)
991 fprintf(stderr
, "qtest is not supported under Windows\n");
998 qemu_mutex_lock_iothread();
999 qemu_thread_get_self(cpu
->thread
);
1000 cpu
->thread_id
= qemu_get_thread_id();
1003 sigemptyset(&waitset
);
1004 sigaddset(&waitset
, SIG_IPI
);
1006 /* signal CPU creation */
1007 cpu
->created
= true;
1008 qemu_cond_signal(&qemu_cpu_cond
);
1013 qemu_mutex_unlock_iothread();
1016 r
= sigwait(&waitset
, &sig
);
1017 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1022 qemu_mutex_lock_iothread();
1024 qemu_wait_io_event_common(cpu
);
1031 static void tcg_exec_all(void);
1033 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1035 CPUState
*cpu
= arg
;
1037 qemu_mutex_lock_iothread();
1038 qemu_tcg_init_cpu_signals();
1039 qemu_thread_get_self(cpu
->thread
);
1042 cpu
->thread_id
= qemu_get_thread_id();
1043 cpu
->created
= true;
1046 qemu_cond_signal(&qemu_cpu_cond
);
1048 /* wait for initial kick-off after machine start */
1049 while (first_cpu
->stopped
) {
1050 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1052 /* process any pending work */
1054 qemu_wait_io_event_common(cpu
);
1058 /* process any pending work */
1065 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1067 if (deadline
== 0) {
1068 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1071 qemu_tcg_wait_io_event();
1077 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1082 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1084 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1088 if (!qemu_cpu_is_self(cpu
)) {
1091 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1092 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1097 /* On multi-core systems, we are not sure that the thread is actually
1098 * suspended until we can get the context.
1100 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1101 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1107 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1108 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1116 void qemu_cpu_kick(CPUState
*cpu
)
1118 qemu_cond_broadcast(cpu
->halt_cond
);
1119 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1120 qemu_cpu_kick_thread(cpu
);
1121 cpu
->thread_kicked
= true;
1125 void qemu_cpu_kick_self(void)
1128 assert(current_cpu
);
1130 if (!current_cpu
->thread_kicked
) {
1131 qemu_cpu_kick_thread(current_cpu
);
1132 current_cpu
->thread_kicked
= true;
1139 bool qemu_cpu_is_self(CPUState
*cpu
)
1141 return qemu_thread_is_self(cpu
->thread
);
1144 bool qemu_in_vcpu_thread(void)
1146 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1149 static __thread
bool iothread_locked
= false;
1151 bool qemu_mutex_iothread_locked(void)
1153 return iothread_locked
;
1156 void qemu_mutex_lock_iothread(void)
1158 atomic_inc(&iothread_requesting_mutex
);
1159 /* In the simple case there is no need to bump the VCPU thread out of
1160 * TCG code execution.
1162 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1163 !first_cpu
|| !first_cpu
->thread
) {
1164 qemu_mutex_lock(&qemu_global_mutex
);
1165 atomic_dec(&iothread_requesting_mutex
);
1167 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1168 qemu_cpu_kick_thread(first_cpu
);
1169 qemu_mutex_lock(&qemu_global_mutex
);
1171 atomic_dec(&iothread_requesting_mutex
);
1172 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1174 iothread_locked
= true;
1177 void qemu_mutex_unlock_iothread(void)
1179 iothread_locked
= false;
1180 qemu_mutex_unlock(&qemu_global_mutex
);
1183 static int all_vcpus_paused(void)
1188 if (!cpu
->stopped
) {
1196 void pause_all_vcpus(void)
1200 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1206 if (qemu_in_vcpu_thread()) {
1208 if (!kvm_enabled()) {
1211 cpu
->stopped
= true;
1217 while (!all_vcpus_paused()) {
1218 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1225 void cpu_resume(CPUState
*cpu
)
1228 cpu
->stopped
= false;
1232 void resume_all_vcpus(void)
1236 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1242 /* For temporary buffers for forming a name */
1243 #define VCPU_THREAD_NAME_SIZE 16
1245 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1247 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1249 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1251 /* share a single thread for all cpus with TCG */
1252 if (!tcg_cpu_thread
) {
1253 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1254 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1255 qemu_cond_init(cpu
->halt_cond
);
1256 tcg_halt_cond
= cpu
->halt_cond
;
1257 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1259 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1260 cpu
, QEMU_THREAD_JOINABLE
);
1262 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1264 while (!cpu
->created
) {
1265 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1267 tcg_cpu_thread
= cpu
->thread
;
1269 cpu
->thread
= tcg_cpu_thread
;
1270 cpu
->halt_cond
= tcg_halt_cond
;
1274 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1276 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1278 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1279 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1280 qemu_cond_init(cpu
->halt_cond
);
1281 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1283 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1284 cpu
, QEMU_THREAD_JOINABLE
);
1285 while (!cpu
->created
) {
1286 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1290 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1292 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1294 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1295 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1296 qemu_cond_init(cpu
->halt_cond
);
1297 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1299 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1300 QEMU_THREAD_JOINABLE
);
1301 while (!cpu
->created
) {
1302 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1306 void qemu_init_vcpu(CPUState
*cpu
)
1308 cpu
->nr_cores
= smp_cores
;
1309 cpu
->nr_threads
= smp_threads
;
1310 cpu
->stopped
= true;
1311 if (kvm_enabled()) {
1312 qemu_kvm_start_vcpu(cpu
);
1313 } else if (tcg_enabled()) {
1314 qemu_tcg_init_vcpu(cpu
);
1316 qemu_dummy_start_vcpu(cpu
);
1320 void cpu_stop_current(void)
1323 current_cpu
->stop
= false;
1324 current_cpu
->stopped
= true;
1325 cpu_exit(current_cpu
);
1326 qemu_cond_signal(&qemu_pause_cond
);
1330 int vm_stop(RunState state
)
1332 if (qemu_in_vcpu_thread()) {
1333 qemu_system_vmstop_request_prepare();
1334 qemu_system_vmstop_request(state
);
1336 * FIXME: should not return to device code in case
1337 * vm_stop() has been requested.
1343 return do_vm_stop(state
);
1346 /* does a state transition even if the VM is already stopped,
1347 current state is forgotten forever */
1348 int vm_stop_force_state(RunState state
)
1350 if (runstate_is_running()) {
1351 return vm_stop(state
);
1353 runstate_set(state
);
1354 /* Make sure to return an error if the flush in a previous vm_stop()
1356 return bdrv_flush_all();
1360 static int tcg_cpu_exec(CPUArchState
*env
)
1362 CPUState
*cpu
= ENV_GET_CPU(env
);
1364 #ifdef CONFIG_PROFILER
1368 #ifdef CONFIG_PROFILER
1369 ti
= profile_getclock();
1375 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1376 + cpu
->icount_extra
);
1377 cpu
->icount_decr
.u16
.low
= 0;
1378 cpu
->icount_extra
= 0;
1379 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1381 /* Maintain prior (possibly buggy) behaviour where if no deadline
1382 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1383 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1386 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1387 deadline
= INT32_MAX
;
1390 count
= qemu_icount_round(deadline
);
1391 timers_state
.qemu_icount
+= count
;
1392 decr
= (count
> 0xffff) ? 0xffff : count
;
1394 cpu
->icount_decr
.u16
.low
= decr
;
1395 cpu
->icount_extra
= count
;
1397 ret
= cpu_exec(env
);
1398 #ifdef CONFIG_PROFILER
1399 tcg_time
+= profile_getclock() - ti
;
1402 /* Fold pending instructions back into the
1403 instruction counter, and clear the interrupt flag. */
1404 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1405 + cpu
->icount_extra
);
1406 cpu
->icount_decr
.u32
= 0;
1407 cpu
->icount_extra
= 0;
1412 static void tcg_exec_all(void)
1416 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1417 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1419 if (next_cpu
== NULL
) {
1420 next_cpu
= first_cpu
;
1422 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1423 CPUState
*cpu
= next_cpu
;
1424 CPUArchState
*env
= cpu
->env_ptr
;
1426 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1427 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1429 if (cpu_can_run(cpu
)) {
1430 r
= tcg_cpu_exec(env
);
1431 if (r
== EXCP_DEBUG
) {
1432 cpu_handle_guest_debug(cpu
);
1435 } else if (cpu
->stop
|| cpu
->stopped
) {
1442 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1444 /* XXX: implement xxx_cpu_list for targets that still miss it */
1445 #if defined(cpu_list)
1446 cpu_list(f
, cpu_fprintf
);
1450 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1452 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1457 #if defined(TARGET_I386)
1458 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1459 CPUX86State
*env
= &x86_cpu
->env
;
1460 #elif defined(TARGET_PPC)
1461 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1462 CPUPPCState
*env
= &ppc_cpu
->env
;
1463 #elif defined(TARGET_SPARC)
1464 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1465 CPUSPARCState
*env
= &sparc_cpu
->env
;
1466 #elif defined(TARGET_MIPS)
1467 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1468 CPUMIPSState
*env
= &mips_cpu
->env
;
1469 #elif defined(TARGET_TRICORE)
1470 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1471 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1474 cpu_synchronize_state(cpu
);
1476 info
= g_malloc0(sizeof(*info
));
1477 info
->value
= g_malloc0(sizeof(*info
->value
));
1478 info
->value
->CPU
= cpu
->cpu_index
;
1479 info
->value
->current
= (cpu
== first_cpu
);
1480 info
->value
->halted
= cpu
->halted
;
1481 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1482 info
->value
->thread_id
= cpu
->thread_id
;
1483 #if defined(TARGET_I386)
1484 info
->value
->has_pc
= true;
1485 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1486 #elif defined(TARGET_PPC)
1487 info
->value
->has_nip
= true;
1488 info
->value
->nip
= env
->nip
;
1489 #elif defined(TARGET_SPARC)
1490 info
->value
->has_pc
= true;
1491 info
->value
->pc
= env
->pc
;
1492 info
->value
->has_npc
= true;
1493 info
->value
->npc
= env
->npc
;
1494 #elif defined(TARGET_MIPS)
1495 info
->value
->has_PC
= true;
1496 info
->value
->PC
= env
->active_tc
.PC
;
1497 #elif defined(TARGET_TRICORE)
1498 info
->value
->has_PC
= true;
1499 info
->value
->PC
= env
->PC
;
1502 /* XXX: waiting for the qapi to support GSList */
1504 head
= cur_item
= info
;
1506 cur_item
->next
= info
;
1514 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1515 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1521 int64_t orig_addr
= addr
, orig_size
= size
;
1527 cpu
= qemu_get_cpu(cpu_index
);
1529 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1534 f
= fopen(filename
, "wb");
1536 error_setg_file_open(errp
, errno
, filename
);
1544 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1545 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1546 " specified", orig_addr
, orig_size
);
1549 if (fwrite(buf
, 1, l
, f
) != l
) {
1550 error_setg(errp
, QERR_IO_ERROR
);
1561 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1568 f
= fopen(filename
, "wb");
1570 error_setg_file_open(errp
, errno
, filename
);
1578 cpu_physical_memory_read(addr
, buf
, l
);
1579 if (fwrite(buf
, 1, l
, f
) != l
) {
1580 error_setg(errp
, QERR_IO_ERROR
);
1591 void qmp_inject_nmi(Error
**errp
)
1593 #if defined(TARGET_I386)
1597 X86CPU
*cpu
= X86_CPU(cs
);
1599 if (!cpu
->apic_state
) {
1600 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1602 apic_deliver_nmi(cpu
->apic_state
);
1606 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1610 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1616 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1617 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1618 if (icount_align_option
) {
1619 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1620 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1622 cpu_fprintf(f
, "Max guest delay NA\n");
1623 cpu_fprintf(f
, "Max guest advance NA\n");